{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999530215004933, "eval_steps": 500, "global_step": 15964, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.263799934230101e-05, "grad_norm": 3.858246462582563, "learning_rate": 2.0876826722338203e-08, "loss": 0.6556, "step": 1 }, { "epoch": 0.00012527599868460203, "grad_norm": 2.8119121815342027, "learning_rate": 4.1753653444676405e-08, "loss": 0.6526, "step": 2 }, { "epoch": 0.000187913998026903, "grad_norm": 4.37314039414459, "learning_rate": 6.263048016701463e-08, "loss": 0.6576, "step": 3 }, { "epoch": 0.00025055199736920405, "grad_norm": 3.3691798271712194, "learning_rate": 8.350730688935281e-08, "loss": 0.6632, "step": 4 }, { "epoch": 0.000313189996711505, "grad_norm": 3.8850758063243602, "learning_rate": 1.0438413361169103e-07, "loss": 0.6251, "step": 5 }, { "epoch": 0.000375827996053806, "grad_norm": 4.282646414197021, "learning_rate": 1.2526096033402926e-07, "loss": 0.6821, "step": 6 }, { "epoch": 0.00043846599539610704, "grad_norm": 3.255403937244552, "learning_rate": 1.4613778705636743e-07, "loss": 0.7077, "step": 7 }, { "epoch": 0.0005011039947384081, "grad_norm": 3.804064909558229, "learning_rate": 1.6701461377870562e-07, "loss": 0.6412, "step": 8 }, { "epoch": 0.0005637419940807091, "grad_norm": 4.310035823534526, "learning_rate": 1.8789144050104384e-07, "loss": 0.6659, "step": 9 }, { "epoch": 0.00062637999342301, "grad_norm": 3.8627229422269194, "learning_rate": 2.0876826722338207e-07, "loss": 0.6959, "step": 10 }, { "epoch": 0.000689017992765311, "grad_norm": 3.2331488181219994, "learning_rate": 2.2964509394572026e-07, "loss": 0.6538, "step": 11 }, { "epoch": 0.000751655992107612, "grad_norm": 3.348925964280946, "learning_rate": 2.505219206680585e-07, "loss": 0.6417, "step": 12 }, { "epoch": 0.0008142939914499131, "grad_norm": 4.1015297664563795, "learning_rate": 2.713987473903967e-07, "loss": 0.6999, "step": 13 }, { "epoch": 0.0008769319907922141, "grad_norm": 3.8277600827741574, "learning_rate": 2.9227557411273485e-07, "loss": 0.7035, "step": 14 }, { "epoch": 0.0009395699901345151, "grad_norm": 3.491662087714972, "learning_rate": 3.131524008350731e-07, "loss": 0.6543, "step": 15 }, { "epoch": 0.0010022079894768162, "grad_norm": 3.0109277959531355, "learning_rate": 3.3402922755741124e-07, "loss": 0.5837, "step": 16 }, { "epoch": 0.0010648459888191171, "grad_norm": 3.2620424982792846, "learning_rate": 3.549060542797495e-07, "loss": 0.6755, "step": 17 }, { "epoch": 0.0011274839881614182, "grad_norm": 3.1272000178049226, "learning_rate": 3.757828810020877e-07, "loss": 0.5891, "step": 18 }, { "epoch": 0.0011901219875037191, "grad_norm": 3.1006815712132973, "learning_rate": 3.9665970772442594e-07, "loss": 0.6114, "step": 19 }, { "epoch": 0.00125275998684602, "grad_norm": 2.9217370821323105, "learning_rate": 4.1753653444676413e-07, "loss": 0.6507, "step": 20 }, { "epoch": 0.0013153979861883212, "grad_norm": 0.7513542919000871, "learning_rate": 4.384133611691024e-07, "loss": 0.4267, "step": 21 }, { "epoch": 0.001378035985530622, "grad_norm": 2.2397548636664775, "learning_rate": 4.592901878914405e-07, "loss": 0.574, "step": 22 }, { "epoch": 0.0014406739848729232, "grad_norm": 2.8393767549719575, "learning_rate": 4.801670146137788e-07, "loss": 0.5616, "step": 23 }, { "epoch": 0.001503311984215224, "grad_norm": 2.61776792866924, "learning_rate": 5.01043841336117e-07, "loss": 0.5732, "step": 24 }, { "epoch": 0.0015659499835575252, "grad_norm": 2.836607102546222, "learning_rate": 5.219206680584552e-07, "loss": 0.6257, "step": 25 }, { "epoch": 0.0016285879828998261, "grad_norm": 2.5856733948576514, "learning_rate": 5.427974947807934e-07, "loss": 0.5937, "step": 26 }, { "epoch": 0.0016912259822421272, "grad_norm": 1.9897448861760498, "learning_rate": 5.636743215031316e-07, "loss": 0.5951, "step": 27 }, { "epoch": 0.0017538639815844281, "grad_norm": 2.592384218210653, "learning_rate": 5.845511482254697e-07, "loss": 0.5927, "step": 28 }, { "epoch": 0.0018165019809267293, "grad_norm": 2.346491595611199, "learning_rate": 6.05427974947808e-07, "loss": 0.599, "step": 29 }, { "epoch": 0.0018791399802690302, "grad_norm": 1.9743054351340714, "learning_rate": 6.263048016701462e-07, "loss": 0.6316, "step": 30 }, { "epoch": 0.0019417779796113313, "grad_norm": 1.7460246039341718, "learning_rate": 6.471816283924843e-07, "loss": 0.5596, "step": 31 }, { "epoch": 0.0020044159789536324, "grad_norm": 1.8949712382002544, "learning_rate": 6.680584551148225e-07, "loss": 0.5927, "step": 32 }, { "epoch": 0.002067053978295933, "grad_norm": 1.70591562542313, "learning_rate": 6.889352818371608e-07, "loss": 0.5877, "step": 33 }, { "epoch": 0.0021296919776382342, "grad_norm": 1.5314484579364067, "learning_rate": 7.09812108559499e-07, "loss": 0.6348, "step": 34 }, { "epoch": 0.0021923299769805353, "grad_norm": 1.5378350081382752, "learning_rate": 7.306889352818372e-07, "loss": 0.5205, "step": 35 }, { "epoch": 0.0022549679763228365, "grad_norm": 1.6154955839392315, "learning_rate": 7.515657620041754e-07, "loss": 0.5898, "step": 36 }, { "epoch": 0.002317605975665137, "grad_norm": 1.7852969381289066, "learning_rate": 7.724425887265135e-07, "loss": 0.6176, "step": 37 }, { "epoch": 0.0023802439750074383, "grad_norm": 1.4697421973459197, "learning_rate": 7.933194154488519e-07, "loss": 0.5358, "step": 38 }, { "epoch": 0.0024428819743497394, "grad_norm": 1.4585534357045287, "learning_rate": 8.1419624217119e-07, "loss": 0.6065, "step": 39 }, { "epoch": 0.00250551997369204, "grad_norm": 1.2129466315801778, "learning_rate": 8.350730688935283e-07, "loss": 0.5301, "step": 40 }, { "epoch": 0.002568157973034341, "grad_norm": 1.3484335839163204, "learning_rate": 8.559498956158664e-07, "loss": 0.5794, "step": 41 }, { "epoch": 0.0026307959723766423, "grad_norm": 1.2904046929597606, "learning_rate": 8.768267223382048e-07, "loss": 0.5232, "step": 42 }, { "epoch": 0.0026934339717189434, "grad_norm": 1.424258702134985, "learning_rate": 8.977035490605429e-07, "loss": 0.5588, "step": 43 }, { "epoch": 0.002756071971061244, "grad_norm": 1.2645675381197703, "learning_rate": 9.18580375782881e-07, "loss": 0.4987, "step": 44 }, { "epoch": 0.0028187099704035453, "grad_norm": 1.420063779488067, "learning_rate": 9.394572025052193e-07, "loss": 0.5568, "step": 45 }, { "epoch": 0.0028813479697458464, "grad_norm": 1.4144535548841484, "learning_rate": 9.603340292275575e-07, "loss": 0.609, "step": 46 }, { "epoch": 0.0029439859690881475, "grad_norm": 1.303609161047398, "learning_rate": 9.812108559498957e-07, "loss": 0.5267, "step": 47 }, { "epoch": 0.003006623968430448, "grad_norm": 1.33649767185354, "learning_rate": 1.002087682672234e-06, "loss": 0.5555, "step": 48 }, { "epoch": 0.0030692619677727493, "grad_norm": 0.9550151992620151, "learning_rate": 1.0229645093945722e-06, "loss": 0.457, "step": 49 }, { "epoch": 0.0031318999671150504, "grad_norm": 1.2737717691864179, "learning_rate": 1.0438413361169103e-06, "loss": 0.5109, "step": 50 }, { "epoch": 0.0031945379664573515, "grad_norm": 1.0950004816688281, "learning_rate": 1.0647181628392485e-06, "loss": 0.52, "step": 51 }, { "epoch": 0.0032571759657996522, "grad_norm": 1.343756537925798, "learning_rate": 1.0855949895615868e-06, "loss": 0.5379, "step": 52 }, { "epoch": 0.0033198139651419534, "grad_norm": 1.2812735912510242, "learning_rate": 1.106471816283925e-06, "loss": 0.606, "step": 53 }, { "epoch": 0.0033824519644842545, "grad_norm": 1.2786892396300105, "learning_rate": 1.1273486430062631e-06, "loss": 0.6238, "step": 54 }, { "epoch": 0.0034450899638265556, "grad_norm": 1.2363156753261304, "learning_rate": 1.1482254697286013e-06, "loss": 0.5229, "step": 55 }, { "epoch": 0.0035077279631688563, "grad_norm": 1.283827338191788, "learning_rate": 1.1691022964509394e-06, "loss": 0.5884, "step": 56 }, { "epoch": 0.0035703659625111574, "grad_norm": 1.2157241813865975, "learning_rate": 1.1899791231732778e-06, "loss": 0.568, "step": 57 }, { "epoch": 0.0036330039618534585, "grad_norm": 1.0672728629706603, "learning_rate": 1.210855949895616e-06, "loss": 0.5057, "step": 58 }, { "epoch": 0.003695641961195759, "grad_norm": 1.097644549879363, "learning_rate": 1.2317327766179543e-06, "loss": 0.5258, "step": 59 }, { "epoch": 0.0037582799605380603, "grad_norm": 1.1658496382624082, "learning_rate": 1.2526096033402924e-06, "loss": 0.4955, "step": 60 }, { "epoch": 0.0038209179598803615, "grad_norm": 0.6906370577819059, "learning_rate": 1.2734864300626308e-06, "loss": 0.4645, "step": 61 }, { "epoch": 0.0038835559592226626, "grad_norm": 1.149519635390912, "learning_rate": 1.2943632567849687e-06, "loss": 0.5185, "step": 62 }, { "epoch": 0.003946193958564963, "grad_norm": 1.09158686448251, "learning_rate": 1.315240083507307e-06, "loss": 0.5245, "step": 63 }, { "epoch": 0.004008831957907265, "grad_norm": 1.2265208264731924, "learning_rate": 1.336116910229645e-06, "loss": 0.5647, "step": 64 }, { "epoch": 0.0040714699572495655, "grad_norm": 1.1873778162492044, "learning_rate": 1.3569937369519833e-06, "loss": 0.5121, "step": 65 }, { "epoch": 0.004134107956591866, "grad_norm": 1.0719873274622074, "learning_rate": 1.3778705636743217e-06, "loss": 0.5123, "step": 66 }, { "epoch": 0.004196745955934168, "grad_norm": 1.0698863903712088, "learning_rate": 1.3987473903966598e-06, "loss": 0.5288, "step": 67 }, { "epoch": 0.0042593839552764684, "grad_norm": 1.139092848507996, "learning_rate": 1.419624217118998e-06, "loss": 0.5314, "step": 68 }, { "epoch": 0.004322021954618769, "grad_norm": 1.1485930477303963, "learning_rate": 1.4405010438413361e-06, "loss": 0.4948, "step": 69 }, { "epoch": 0.004384659953961071, "grad_norm": 1.1071073623227474, "learning_rate": 1.4613778705636745e-06, "loss": 0.5292, "step": 70 }, { "epoch": 0.004447297953303371, "grad_norm": 1.1328930083981115, "learning_rate": 1.4822546972860128e-06, "loss": 0.5836, "step": 71 }, { "epoch": 0.004509935952645673, "grad_norm": 0.643986939147151, "learning_rate": 1.5031315240083507e-06, "loss": 0.4517, "step": 72 }, { "epoch": 0.004572573951987974, "grad_norm": 1.313623730850765, "learning_rate": 1.524008350730689e-06, "loss": 0.578, "step": 73 }, { "epoch": 0.004635211951330274, "grad_norm": 1.088069049930434, "learning_rate": 1.544885177453027e-06, "loss": 0.4735, "step": 74 }, { "epoch": 0.004697849950672576, "grad_norm": 1.0588882839597165, "learning_rate": 1.5657620041753654e-06, "loss": 0.5297, "step": 75 }, { "epoch": 0.0047604879500148765, "grad_norm": 1.190794928783865, "learning_rate": 1.5866388308977037e-06, "loss": 0.5103, "step": 76 }, { "epoch": 0.004823125949357177, "grad_norm": 1.189382189840441, "learning_rate": 1.6075156576200419e-06, "loss": 0.5664, "step": 77 }, { "epoch": 0.004885763948699479, "grad_norm": 1.093919638250216, "learning_rate": 1.62839248434238e-06, "loss": 0.5365, "step": 78 }, { "epoch": 0.0049484019480417795, "grad_norm": 1.0546717208154948, "learning_rate": 1.6492693110647182e-06, "loss": 0.513, "step": 79 }, { "epoch": 0.00501103994738408, "grad_norm": 1.0111072255357463, "learning_rate": 1.6701461377870565e-06, "loss": 0.4488, "step": 80 }, { "epoch": 0.005073677946726382, "grad_norm": 1.1199797827857312, "learning_rate": 1.6910229645093949e-06, "loss": 0.5234, "step": 81 }, { "epoch": 0.005136315946068682, "grad_norm": 1.0238803544330657, "learning_rate": 1.7118997912317328e-06, "loss": 0.5295, "step": 82 }, { "epoch": 0.005198953945410984, "grad_norm": 1.140501133074839, "learning_rate": 1.7327766179540712e-06, "loss": 0.513, "step": 83 }, { "epoch": 0.005261591944753285, "grad_norm": 1.116573825073439, "learning_rate": 1.7536534446764095e-06, "loss": 0.4995, "step": 84 }, { "epoch": 0.005324229944095585, "grad_norm": 0.9404777653188781, "learning_rate": 1.7745302713987475e-06, "loss": 0.5193, "step": 85 }, { "epoch": 0.005386867943437887, "grad_norm": 1.1415096511283775, "learning_rate": 1.7954070981210858e-06, "loss": 0.564, "step": 86 }, { "epoch": 0.005449505942780188, "grad_norm": 1.0667730622010472, "learning_rate": 1.816283924843424e-06, "loss": 0.507, "step": 87 }, { "epoch": 0.005512143942122488, "grad_norm": 0.9856125126706705, "learning_rate": 1.837160751565762e-06, "loss": 0.5057, "step": 88 }, { "epoch": 0.00557478194146479, "grad_norm": 1.1089144753383688, "learning_rate": 1.8580375782881005e-06, "loss": 0.4994, "step": 89 }, { "epoch": 0.0056374199408070905, "grad_norm": 0.7913516821595887, "learning_rate": 1.8789144050104386e-06, "loss": 0.4285, "step": 90 }, { "epoch": 0.005700057940149392, "grad_norm": 0.9949016658406422, "learning_rate": 1.899791231732777e-06, "loss": 0.4574, "step": 91 }, { "epoch": 0.005762695939491693, "grad_norm": 1.0172594220652533, "learning_rate": 1.920668058455115e-06, "loss": 0.4798, "step": 92 }, { "epoch": 0.005825333938833993, "grad_norm": 1.0235201684444175, "learning_rate": 1.941544885177453e-06, "loss": 0.4932, "step": 93 }, { "epoch": 0.005887971938176295, "grad_norm": 1.1681954155230496, "learning_rate": 1.9624217118997914e-06, "loss": 0.4854, "step": 94 }, { "epoch": 0.005950609937518596, "grad_norm": 1.1234057763473662, "learning_rate": 1.9832985386221297e-06, "loss": 0.4924, "step": 95 }, { "epoch": 0.006013247936860896, "grad_norm": 1.0170230053571934, "learning_rate": 2.004175365344468e-06, "loss": 0.496, "step": 96 }, { "epoch": 0.006075885936203198, "grad_norm": 1.1423557307019825, "learning_rate": 2.025052192066806e-06, "loss": 0.488, "step": 97 }, { "epoch": 0.006138523935545499, "grad_norm": 1.045937875225304, "learning_rate": 2.0459290187891444e-06, "loss": 0.549, "step": 98 }, { "epoch": 0.006201161934887799, "grad_norm": 1.0921519402561854, "learning_rate": 2.0668058455114827e-06, "loss": 0.5141, "step": 99 }, { "epoch": 0.006263799934230101, "grad_norm": 1.1121118003367094, "learning_rate": 2.0876826722338207e-06, "loss": 0.5147, "step": 100 }, { "epoch": 0.0063264379335724015, "grad_norm": 1.076945062360127, "learning_rate": 2.108559498956159e-06, "loss": 0.5592, "step": 101 }, { "epoch": 0.006389075932914703, "grad_norm": 0.9224163935797927, "learning_rate": 2.129436325678497e-06, "loss": 0.4746, "step": 102 }, { "epoch": 0.006451713932257004, "grad_norm": 1.093744856443408, "learning_rate": 2.1503131524008353e-06, "loss": 0.5046, "step": 103 }, { "epoch": 0.0065143519315993045, "grad_norm": 1.0544106225205734, "learning_rate": 2.1711899791231737e-06, "loss": 0.4844, "step": 104 }, { "epoch": 0.006576989930941606, "grad_norm": 1.001594521955387, "learning_rate": 2.1920668058455116e-06, "loss": 0.4719, "step": 105 }, { "epoch": 0.006639627930283907, "grad_norm": 1.0678206659208174, "learning_rate": 2.21294363256785e-06, "loss": 0.5393, "step": 106 }, { "epoch": 0.006702265929626207, "grad_norm": 1.086739825505249, "learning_rate": 2.233820459290188e-06, "loss": 0.4939, "step": 107 }, { "epoch": 0.006764903928968509, "grad_norm": 1.0078869385132547, "learning_rate": 2.2546972860125262e-06, "loss": 0.4751, "step": 108 }, { "epoch": 0.00682754192831081, "grad_norm": 0.7356807226316928, "learning_rate": 2.2755741127348646e-06, "loss": 0.4496, "step": 109 }, { "epoch": 0.006890179927653111, "grad_norm": 1.050764047209418, "learning_rate": 2.2964509394572025e-06, "loss": 0.4603, "step": 110 }, { "epoch": 0.006952817926995412, "grad_norm": 1.0394967666623012, "learning_rate": 2.317327766179541e-06, "loss": 0.5315, "step": 111 }, { "epoch": 0.007015455926337713, "grad_norm": 1.0602497947938774, "learning_rate": 2.338204592901879e-06, "loss": 0.4901, "step": 112 }, { "epoch": 0.007078093925680014, "grad_norm": 1.0257070634927614, "learning_rate": 2.359081419624217e-06, "loss": 0.4984, "step": 113 }, { "epoch": 0.007140731925022315, "grad_norm": 1.0760008010073892, "learning_rate": 2.3799582463465555e-06, "loss": 0.5352, "step": 114 }, { "epoch": 0.0072033699243646155, "grad_norm": 0.9764566829688709, "learning_rate": 2.400835073068894e-06, "loss": 0.466, "step": 115 }, { "epoch": 0.007266007923706917, "grad_norm": 1.0211556850587806, "learning_rate": 2.421711899791232e-06, "loss": 0.4691, "step": 116 }, { "epoch": 0.007328645923049218, "grad_norm": 0.962038893509024, "learning_rate": 2.44258872651357e-06, "loss": 0.4704, "step": 117 }, { "epoch": 0.007391283922391518, "grad_norm": 1.0246135137326515, "learning_rate": 2.4634655532359085e-06, "loss": 0.5183, "step": 118 }, { "epoch": 0.00745392192173382, "grad_norm": 0.9762498734515195, "learning_rate": 2.484342379958247e-06, "loss": 0.4542, "step": 119 }, { "epoch": 0.007516559921076121, "grad_norm": 0.9531644787025032, "learning_rate": 2.505219206680585e-06, "loss": 0.4976, "step": 120 }, { "epoch": 0.007579197920418422, "grad_norm": 1.0088685912113438, "learning_rate": 2.5260960334029227e-06, "loss": 0.4795, "step": 121 }, { "epoch": 0.007641835919760723, "grad_norm": 1.0407768835043478, "learning_rate": 2.5469728601252615e-06, "loss": 0.4531, "step": 122 }, { "epoch": 0.007704473919103024, "grad_norm": 1.0531424574218735, "learning_rate": 2.5678496868475994e-06, "loss": 0.5124, "step": 123 }, { "epoch": 0.007767111918445325, "grad_norm": 1.0592814918464573, "learning_rate": 2.5887265135699374e-06, "loss": 0.4739, "step": 124 }, { "epoch": 0.007829749917787626, "grad_norm": 0.7704512808980731, "learning_rate": 2.609603340292276e-06, "loss": 0.4555, "step": 125 }, { "epoch": 0.007892387917129927, "grad_norm": 0.9619284652004104, "learning_rate": 2.630480167014614e-06, "loss": 0.4299, "step": 126 }, { "epoch": 0.007955025916472227, "grad_norm": 1.0598976965787394, "learning_rate": 2.651356993736952e-06, "loss": 0.4904, "step": 127 }, { "epoch": 0.00801766391581453, "grad_norm": 1.0881172096160947, "learning_rate": 2.67223382045929e-06, "loss": 0.468, "step": 128 }, { "epoch": 0.00808030191515683, "grad_norm": 1.098405838065741, "learning_rate": 2.6931106471816287e-06, "loss": 0.5234, "step": 129 }, { "epoch": 0.008142939914499131, "grad_norm": 0.9619111243894403, "learning_rate": 2.7139874739039666e-06, "loss": 0.4677, "step": 130 }, { "epoch": 0.008205577913841432, "grad_norm": 1.1488886855077485, "learning_rate": 2.734864300626305e-06, "loss": 0.4932, "step": 131 }, { "epoch": 0.008268215913183732, "grad_norm": 1.0453855110191648, "learning_rate": 2.7557411273486434e-06, "loss": 0.465, "step": 132 }, { "epoch": 0.008330853912526033, "grad_norm": 0.9985773191691992, "learning_rate": 2.7766179540709813e-06, "loss": 0.4622, "step": 133 }, { "epoch": 0.008393491911868336, "grad_norm": 1.1367074352126274, "learning_rate": 2.7974947807933196e-06, "loss": 0.4806, "step": 134 }, { "epoch": 0.008456129911210636, "grad_norm": 1.0587405507050538, "learning_rate": 2.818371607515658e-06, "loss": 0.5026, "step": 135 }, { "epoch": 0.008518767910552937, "grad_norm": 1.0763655377426442, "learning_rate": 2.839248434237996e-06, "loss": 0.5284, "step": 136 }, { "epoch": 0.008581405909895238, "grad_norm": 1.0218345924762122, "learning_rate": 2.8601252609603343e-06, "loss": 0.48, "step": 137 }, { "epoch": 0.008644043909237538, "grad_norm": 0.7448265515180501, "learning_rate": 2.8810020876826722e-06, "loss": 0.4456, "step": 138 }, { "epoch": 0.00870668190857984, "grad_norm": 0.9921108695414547, "learning_rate": 2.901878914405011e-06, "loss": 0.4483, "step": 139 }, { "epoch": 0.008769319907922141, "grad_norm": 1.0620431780420618, "learning_rate": 2.922755741127349e-06, "loss": 0.5347, "step": 140 }, { "epoch": 0.008831957907264442, "grad_norm": 1.0126834686491157, "learning_rate": 2.943632567849687e-06, "loss": 0.4786, "step": 141 }, { "epoch": 0.008894595906606743, "grad_norm": 1.038905561380666, "learning_rate": 2.9645093945720256e-06, "loss": 0.4727, "step": 142 }, { "epoch": 0.008957233905949043, "grad_norm": 0.7620131311160847, "learning_rate": 2.9853862212943636e-06, "loss": 0.4033, "step": 143 }, { "epoch": 0.009019871905291346, "grad_norm": 1.0652460741176468, "learning_rate": 3.0062630480167015e-06, "loss": 0.4968, "step": 144 }, { "epoch": 0.009082509904633647, "grad_norm": 1.0328029376865766, "learning_rate": 3.0271398747390403e-06, "loss": 0.4824, "step": 145 }, { "epoch": 0.009145147903975947, "grad_norm": 0.9919628673284245, "learning_rate": 3.048016701461378e-06, "loss": 0.4649, "step": 146 }, { "epoch": 0.009207785903318248, "grad_norm": 0.9737211345001473, "learning_rate": 3.068893528183716e-06, "loss": 0.4715, "step": 147 }, { "epoch": 0.009270423902660549, "grad_norm": 1.076543346506063, "learning_rate": 3.089770354906054e-06, "loss": 0.4938, "step": 148 }, { "epoch": 0.00933306190200285, "grad_norm": 1.0915990957003543, "learning_rate": 3.110647181628393e-06, "loss": 0.4543, "step": 149 }, { "epoch": 0.009395699901345152, "grad_norm": 1.0927755550760756, "learning_rate": 3.1315240083507308e-06, "loss": 0.4948, "step": 150 }, { "epoch": 0.009458337900687452, "grad_norm": 1.0079496628204152, "learning_rate": 3.152400835073069e-06, "loss": 0.4684, "step": 151 }, { "epoch": 0.009520975900029753, "grad_norm": 1.0592697805564688, "learning_rate": 3.1732776617954075e-06, "loss": 0.4772, "step": 152 }, { "epoch": 0.009583613899372054, "grad_norm": 1.0720240701225303, "learning_rate": 3.1941544885177454e-06, "loss": 0.5035, "step": 153 }, { "epoch": 0.009646251898714354, "grad_norm": 1.0294206211900345, "learning_rate": 3.2150313152400838e-06, "loss": 0.4689, "step": 154 }, { "epoch": 0.009708889898056657, "grad_norm": 1.1395191077812998, "learning_rate": 3.235908141962422e-06, "loss": 0.515, "step": 155 }, { "epoch": 0.009771527897398958, "grad_norm": 1.023581244241528, "learning_rate": 3.25678496868476e-06, "loss": 0.4957, "step": 156 }, { "epoch": 0.009834165896741258, "grad_norm": 1.1054189648835393, "learning_rate": 3.2776617954070984e-06, "loss": 0.4947, "step": 157 }, { "epoch": 0.009896803896083559, "grad_norm": 1.2034941276502231, "learning_rate": 3.2985386221294363e-06, "loss": 0.4652, "step": 158 }, { "epoch": 0.00995944189542586, "grad_norm": 1.033781018890459, "learning_rate": 3.3194154488517747e-06, "loss": 0.4807, "step": 159 }, { "epoch": 0.01002207989476816, "grad_norm": 1.0468799975085483, "learning_rate": 3.340292275574113e-06, "loss": 0.4619, "step": 160 }, { "epoch": 0.010084717894110463, "grad_norm": 1.1275812080268477, "learning_rate": 3.361169102296451e-06, "loss": 0.5194, "step": 161 }, { "epoch": 0.010147355893452763, "grad_norm": 1.0224551987015758, "learning_rate": 3.3820459290187898e-06, "loss": 0.5319, "step": 162 }, { "epoch": 0.010209993892795064, "grad_norm": 1.028785125398643, "learning_rate": 3.4029227557411277e-06, "loss": 0.466, "step": 163 }, { "epoch": 0.010272631892137365, "grad_norm": 1.0549527110026886, "learning_rate": 3.4237995824634656e-06, "loss": 0.4706, "step": 164 }, { "epoch": 0.010335269891479665, "grad_norm": 1.0925069832539478, "learning_rate": 3.4446764091858044e-06, "loss": 0.4652, "step": 165 }, { "epoch": 0.010397907890821968, "grad_norm": 1.0217216587012772, "learning_rate": 3.4655532359081423e-06, "loss": 0.4979, "step": 166 }, { "epoch": 0.010460545890164269, "grad_norm": 1.0633692728915884, "learning_rate": 3.4864300626304803e-06, "loss": 0.4994, "step": 167 }, { "epoch": 0.01052318388950657, "grad_norm": 0.9799541517896768, "learning_rate": 3.507306889352819e-06, "loss": 0.4484, "step": 168 }, { "epoch": 0.01058582188884887, "grad_norm": 1.0241758059586266, "learning_rate": 3.528183716075157e-06, "loss": 0.53, "step": 169 }, { "epoch": 0.01064845988819117, "grad_norm": 1.0730025392115772, "learning_rate": 3.549060542797495e-06, "loss": 0.4777, "step": 170 }, { "epoch": 0.010711097887533471, "grad_norm": 1.0851197827633212, "learning_rate": 3.569937369519833e-06, "loss": 0.4798, "step": 171 }, { "epoch": 0.010773735886875774, "grad_norm": 0.9858276946348559, "learning_rate": 3.5908141962421716e-06, "loss": 0.4618, "step": 172 }, { "epoch": 0.010836373886218074, "grad_norm": 0.923299798253563, "learning_rate": 3.6116910229645096e-06, "loss": 0.4611, "step": 173 }, { "epoch": 0.010899011885560375, "grad_norm": 1.034134739823511, "learning_rate": 3.632567849686848e-06, "loss": 0.5058, "step": 174 }, { "epoch": 0.010961649884902676, "grad_norm": 0.7398001387044641, "learning_rate": 3.6534446764091863e-06, "loss": 0.4597, "step": 175 }, { "epoch": 0.011024287884244977, "grad_norm": 0.9368124731619292, "learning_rate": 3.674321503131524e-06, "loss": 0.495, "step": 176 }, { "epoch": 0.011086925883587279, "grad_norm": 1.0924136049020585, "learning_rate": 3.6951983298538625e-06, "loss": 0.4231, "step": 177 }, { "epoch": 0.01114956388292958, "grad_norm": 1.0969746891242427, "learning_rate": 3.716075156576201e-06, "loss": 0.5264, "step": 178 }, { "epoch": 0.01121220188227188, "grad_norm": 0.980930696280059, "learning_rate": 3.736951983298539e-06, "loss": 0.4286, "step": 179 }, { "epoch": 0.011274839881614181, "grad_norm": 1.0404716326236667, "learning_rate": 3.757828810020877e-06, "loss": 0.4803, "step": 180 }, { "epoch": 0.011337477880956482, "grad_norm": 1.050052718634254, "learning_rate": 3.778705636743215e-06, "loss": 0.4789, "step": 181 }, { "epoch": 0.011400115880298784, "grad_norm": 1.0947033180482668, "learning_rate": 3.799582463465554e-06, "loss": 0.4995, "step": 182 }, { "epoch": 0.011462753879641085, "grad_norm": 1.0559025368136914, "learning_rate": 3.820459290187892e-06, "loss": 0.4069, "step": 183 }, { "epoch": 0.011525391878983385, "grad_norm": 1.0043055688658917, "learning_rate": 3.84133611691023e-06, "loss": 0.4521, "step": 184 }, { "epoch": 0.011588029878325686, "grad_norm": 1.1338024311268484, "learning_rate": 3.8622129436325685e-06, "loss": 0.4891, "step": 185 }, { "epoch": 0.011650667877667987, "grad_norm": 0.9601268987873947, "learning_rate": 3.883089770354906e-06, "loss": 0.4802, "step": 186 }, { "epoch": 0.011713305877010288, "grad_norm": 0.9529681904012965, "learning_rate": 3.903966597077244e-06, "loss": 0.4939, "step": 187 }, { "epoch": 0.01177594387635259, "grad_norm": 0.9866849998776137, "learning_rate": 3.924843423799583e-06, "loss": 0.4364, "step": 188 }, { "epoch": 0.01183858187569489, "grad_norm": 1.031337705595775, "learning_rate": 3.945720250521921e-06, "loss": 0.5092, "step": 189 }, { "epoch": 0.011901219875037191, "grad_norm": 0.8743091787107913, "learning_rate": 3.9665970772442595e-06, "loss": 0.4575, "step": 190 }, { "epoch": 0.011963857874379492, "grad_norm": 0.975697332586742, "learning_rate": 3.987473903966597e-06, "loss": 0.4648, "step": 191 }, { "epoch": 0.012026495873721793, "grad_norm": 1.126906857623645, "learning_rate": 4.008350730688936e-06, "loss": 0.52, "step": 192 }, { "epoch": 0.012089133873064095, "grad_norm": 0.9989527488750611, "learning_rate": 4.029227557411274e-06, "loss": 0.4691, "step": 193 }, { "epoch": 0.012151771872406396, "grad_norm": 1.1050127624680133, "learning_rate": 4.050104384133612e-06, "loss": 0.4697, "step": 194 }, { "epoch": 0.012214409871748697, "grad_norm": 1.1104494143040955, "learning_rate": 4.07098121085595e-06, "loss": 0.4984, "step": 195 }, { "epoch": 0.012277047871090997, "grad_norm": 1.0041829747409963, "learning_rate": 4.091858037578289e-06, "loss": 0.4786, "step": 196 }, { "epoch": 0.012339685870433298, "grad_norm": 0.9592971161986891, "learning_rate": 4.112734864300626e-06, "loss": 0.4733, "step": 197 }, { "epoch": 0.012402323869775599, "grad_norm": 1.0826668523833276, "learning_rate": 4.1336116910229655e-06, "loss": 0.4441, "step": 198 }, { "epoch": 0.012464961869117901, "grad_norm": 0.9944836197780231, "learning_rate": 4.154488517745303e-06, "loss": 0.5008, "step": 199 }, { "epoch": 0.012527599868460202, "grad_norm": 1.0539514839279558, "learning_rate": 4.175365344467641e-06, "loss": 0.459, "step": 200 }, { "epoch": 0.012590237867802502, "grad_norm": 1.053781464605634, "learning_rate": 4.196242171189979e-06, "loss": 0.4788, "step": 201 }, { "epoch": 0.012652875867144803, "grad_norm": 1.0211614699426739, "learning_rate": 4.217118997912318e-06, "loss": 0.4503, "step": 202 }, { "epoch": 0.012715513866487104, "grad_norm": 1.0427517617292166, "learning_rate": 4.2379958246346555e-06, "loss": 0.4924, "step": 203 }, { "epoch": 0.012778151865829406, "grad_norm": 1.0502804896506799, "learning_rate": 4.258872651356994e-06, "loss": 0.4832, "step": 204 }, { "epoch": 0.012840789865171707, "grad_norm": 0.9587067322996741, "learning_rate": 4.279749478079332e-06, "loss": 0.4718, "step": 205 }, { "epoch": 0.012903427864514008, "grad_norm": 1.029138473291132, "learning_rate": 4.300626304801671e-06, "loss": 0.465, "step": 206 }, { "epoch": 0.012966065863856308, "grad_norm": 1.0081575809405636, "learning_rate": 4.321503131524009e-06, "loss": 0.4482, "step": 207 }, { "epoch": 0.013028703863198609, "grad_norm": 1.0829791811044427, "learning_rate": 4.342379958246347e-06, "loss": 0.4797, "step": 208 }, { "epoch": 0.01309134186254091, "grad_norm": 1.0996774869825248, "learning_rate": 4.363256784968685e-06, "loss": 0.4574, "step": 209 }, { "epoch": 0.013153979861883212, "grad_norm": 1.092451142863826, "learning_rate": 4.384133611691023e-06, "loss": 0.4613, "step": 210 }, { "epoch": 0.013216617861225513, "grad_norm": 1.0926739930862424, "learning_rate": 4.4050104384133615e-06, "loss": 0.5453, "step": 211 }, { "epoch": 0.013279255860567813, "grad_norm": 0.9401689358719709, "learning_rate": 4.4258872651357e-06, "loss": 0.4912, "step": 212 }, { "epoch": 0.013341893859910114, "grad_norm": 1.020876926317057, "learning_rate": 4.446764091858038e-06, "loss": 0.437, "step": 213 }, { "epoch": 0.013404531859252415, "grad_norm": 0.9842030498288622, "learning_rate": 4.467640918580376e-06, "loss": 0.4654, "step": 214 }, { "epoch": 0.013467169858594717, "grad_norm": 1.0174471998772452, "learning_rate": 4.488517745302715e-06, "loss": 0.4453, "step": 215 }, { "epoch": 0.013529807857937018, "grad_norm": 1.0487063050004148, "learning_rate": 4.5093945720250525e-06, "loss": 0.4665, "step": 216 }, { "epoch": 0.013592445857279319, "grad_norm": 1.1077749997986306, "learning_rate": 4.530271398747391e-06, "loss": 0.4611, "step": 217 }, { "epoch": 0.01365508385662162, "grad_norm": 0.9738701078308191, "learning_rate": 4.551148225469729e-06, "loss": 0.4538, "step": 218 }, { "epoch": 0.01371772185596392, "grad_norm": 1.028132684002268, "learning_rate": 4.5720250521920675e-06, "loss": 0.4791, "step": 219 }, { "epoch": 0.013780359855306222, "grad_norm": 1.0299871015594273, "learning_rate": 4.592901878914405e-06, "loss": 0.4507, "step": 220 }, { "epoch": 0.013842997854648523, "grad_norm": 0.8168442302528722, "learning_rate": 4.613778705636743e-06, "loss": 0.4681, "step": 221 }, { "epoch": 0.013905635853990824, "grad_norm": 1.078313143025839, "learning_rate": 4.634655532359082e-06, "loss": 0.4852, "step": 222 }, { "epoch": 0.013968273853333124, "grad_norm": 1.0961526819838456, "learning_rate": 4.65553235908142e-06, "loss": 0.4442, "step": 223 }, { "epoch": 0.014030911852675425, "grad_norm": 0.9563744096049571, "learning_rate": 4.676409185803758e-06, "loss": 0.459, "step": 224 }, { "epoch": 0.014093549852017726, "grad_norm": 1.092222764494014, "learning_rate": 4.697286012526097e-06, "loss": 0.4911, "step": 225 }, { "epoch": 0.014156187851360028, "grad_norm": 1.0095658512782304, "learning_rate": 4.718162839248434e-06, "loss": 0.4814, "step": 226 }, { "epoch": 0.014218825850702329, "grad_norm": 1.0450061681710097, "learning_rate": 4.739039665970773e-06, "loss": 0.4972, "step": 227 }, { "epoch": 0.01428146385004463, "grad_norm": 0.9572559475918908, "learning_rate": 4.759916492693111e-06, "loss": 0.4452, "step": 228 }, { "epoch": 0.01434410184938693, "grad_norm": 0.9827017531097282, "learning_rate": 4.780793319415449e-06, "loss": 0.4687, "step": 229 }, { "epoch": 0.014406739848729231, "grad_norm": 1.124894650077553, "learning_rate": 4.801670146137788e-06, "loss": 0.4823, "step": 230 }, { "epoch": 0.014469377848071533, "grad_norm": 0.9979297806384481, "learning_rate": 4.822546972860125e-06, "loss": 0.4256, "step": 231 }, { "epoch": 0.014532015847413834, "grad_norm": 0.9390284419190013, "learning_rate": 4.843423799582464e-06, "loss": 0.4633, "step": 232 }, { "epoch": 0.014594653846756135, "grad_norm": 1.0807868026451068, "learning_rate": 4.864300626304802e-06, "loss": 0.4666, "step": 233 }, { "epoch": 0.014657291846098435, "grad_norm": 1.0250484260298092, "learning_rate": 4.88517745302714e-06, "loss": 0.4637, "step": 234 }, { "epoch": 0.014719929845440736, "grad_norm": 1.0496425969341063, "learning_rate": 4.906054279749479e-06, "loss": 0.445, "step": 235 }, { "epoch": 0.014782567844783037, "grad_norm": 1.1014218572219718, "learning_rate": 4.926931106471817e-06, "loss": 0.4995, "step": 236 }, { "epoch": 0.01484520584412534, "grad_norm": 1.0436644342582813, "learning_rate": 4.9478079331941545e-06, "loss": 0.4479, "step": 237 }, { "epoch": 0.01490784384346764, "grad_norm": 1.025364509020331, "learning_rate": 4.968684759916494e-06, "loss": 0.4943, "step": 238 }, { "epoch": 0.01497048184280994, "grad_norm": 1.0115353991579583, "learning_rate": 4.989561586638831e-06, "loss": 0.4563, "step": 239 }, { "epoch": 0.015033119842152241, "grad_norm": 1.1112352476579102, "learning_rate": 5.01043841336117e-06, "loss": 0.4971, "step": 240 }, { "epoch": 0.015095757841494542, "grad_norm": 0.9930497304035641, "learning_rate": 5.031315240083507e-06, "loss": 0.4736, "step": 241 }, { "epoch": 0.015158395840836844, "grad_norm": 1.1491353417688646, "learning_rate": 5.0521920668058454e-06, "loss": 0.5277, "step": 242 }, { "epoch": 0.015221033840179145, "grad_norm": 0.9693151423138642, "learning_rate": 5.073068893528185e-06, "loss": 0.4509, "step": 243 }, { "epoch": 0.015283671839521446, "grad_norm": 1.003465901462217, "learning_rate": 5.093945720250523e-06, "loss": 0.4503, "step": 244 }, { "epoch": 0.015346309838863747, "grad_norm": 1.0782380380736178, "learning_rate": 5.1148225469728605e-06, "loss": 0.4671, "step": 245 }, { "epoch": 0.015408947838206047, "grad_norm": 1.0348471612655046, "learning_rate": 5.135699373695199e-06, "loss": 0.482, "step": 246 }, { "epoch": 0.015471585837548348, "grad_norm": 1.0801449483388263, "learning_rate": 5.156576200417537e-06, "loss": 0.4937, "step": 247 }, { "epoch": 0.01553422383689065, "grad_norm": 1.1709948799670535, "learning_rate": 5.177453027139875e-06, "loss": 0.4665, "step": 248 }, { "epoch": 0.015596861836232951, "grad_norm": 1.1696299371705117, "learning_rate": 5.198329853862213e-06, "loss": 0.4685, "step": 249 }, { "epoch": 0.01565949983557525, "grad_norm": 1.015865638859858, "learning_rate": 5.219206680584552e-06, "loss": 0.4601, "step": 250 }, { "epoch": 0.015722137834917552, "grad_norm": 1.0898355356334632, "learning_rate": 5.24008350730689e-06, "loss": 0.489, "step": 251 }, { "epoch": 0.015784775834259853, "grad_norm": 1.2561835047068353, "learning_rate": 5.260960334029228e-06, "loss": 0.457, "step": 252 }, { "epoch": 0.015847413833602154, "grad_norm": 1.1268966625675128, "learning_rate": 5.2818371607515665e-06, "loss": 0.4358, "step": 253 }, { "epoch": 0.015910051832944454, "grad_norm": 1.0205344319262977, "learning_rate": 5.302713987473904e-06, "loss": 0.4615, "step": 254 }, { "epoch": 0.015972689832286755, "grad_norm": 1.260319243673335, "learning_rate": 5.323590814196242e-06, "loss": 0.4771, "step": 255 }, { "epoch": 0.01603532783162906, "grad_norm": 1.0125043246026921, "learning_rate": 5.34446764091858e-06, "loss": 0.4568, "step": 256 }, { "epoch": 0.01609796583097136, "grad_norm": 5.783045109394937, "learning_rate": 5.365344467640919e-06, "loss": 0.4561, "step": 257 }, { "epoch": 0.01616060383031366, "grad_norm": 0.9627982924180688, "learning_rate": 5.3862212943632574e-06, "loss": 0.4527, "step": 258 }, { "epoch": 0.01622324182965596, "grad_norm": 1.2279708942831664, "learning_rate": 5.407098121085596e-06, "loss": 0.4674, "step": 259 }, { "epoch": 0.016285879828998262, "grad_norm": 0.9597868260368048, "learning_rate": 5.427974947807933e-06, "loss": 0.4673, "step": 260 }, { "epoch": 0.016348517828340563, "grad_norm": 1.006917661253672, "learning_rate": 5.448851774530272e-06, "loss": 0.4924, "step": 261 }, { "epoch": 0.016411155827682863, "grad_norm": 0.9424211662503136, "learning_rate": 5.46972860125261e-06, "loss": 0.4403, "step": 262 }, { "epoch": 0.016473793827025164, "grad_norm": 1.3849835440518103, "learning_rate": 5.490605427974948e-06, "loss": 0.49, "step": 263 }, { "epoch": 0.016536431826367465, "grad_norm": 1.0593460404904704, "learning_rate": 5.511482254697287e-06, "loss": 0.4197, "step": 264 }, { "epoch": 0.016599069825709765, "grad_norm": 1.1157084062962488, "learning_rate": 5.532359081419625e-06, "loss": 0.4865, "step": 265 }, { "epoch": 0.016661707825052066, "grad_norm": 1.1000867158785046, "learning_rate": 5.553235908141963e-06, "loss": 0.4675, "step": 266 }, { "epoch": 0.01672434582439437, "grad_norm": 1.0770863171528489, "learning_rate": 5.574112734864301e-06, "loss": 0.4957, "step": 267 }, { "epoch": 0.01678698382373667, "grad_norm": 0.9714024490747837, "learning_rate": 5.594989561586639e-06, "loss": 0.4538, "step": 268 }, { "epoch": 0.01684962182307897, "grad_norm": 1.165988927555262, "learning_rate": 5.615866388308977e-06, "loss": 0.4951, "step": 269 }, { "epoch": 0.016912259822421272, "grad_norm": 0.9730974309557999, "learning_rate": 5.636743215031316e-06, "loss": 0.5038, "step": 270 }, { "epoch": 0.016974897821763573, "grad_norm": 0.9606155679232153, "learning_rate": 5.657620041753654e-06, "loss": 0.4488, "step": 271 }, { "epoch": 0.017037535821105874, "grad_norm": 0.804596948948002, "learning_rate": 5.678496868475992e-06, "loss": 0.4338, "step": 272 }, { "epoch": 0.017100173820448174, "grad_norm": 0.82495372513188, "learning_rate": 5.69937369519833e-06, "loss": 0.4594, "step": 273 }, { "epoch": 0.017162811819790475, "grad_norm": 1.2140370923133463, "learning_rate": 5.7202505219206686e-06, "loss": 0.4423, "step": 274 }, { "epoch": 0.017225449819132776, "grad_norm": 1.0746583875902784, "learning_rate": 5.741127348643006e-06, "loss": 0.4638, "step": 275 }, { "epoch": 0.017288087818475077, "grad_norm": 1.1261341351323662, "learning_rate": 5.7620041753653444e-06, "loss": 0.4986, "step": 276 }, { "epoch": 0.017350725817817377, "grad_norm": 1.0389684736927427, "learning_rate": 5.782881002087684e-06, "loss": 0.4936, "step": 277 }, { "epoch": 0.01741336381715968, "grad_norm": 0.948839026108661, "learning_rate": 5.803757828810022e-06, "loss": 0.4362, "step": 278 }, { "epoch": 0.017476001816501982, "grad_norm": 0.9552582088939211, "learning_rate": 5.8246346555323595e-06, "loss": 0.4629, "step": 279 }, { "epoch": 0.017538639815844283, "grad_norm": 0.9962394884434483, "learning_rate": 5.845511482254698e-06, "loss": 0.4312, "step": 280 }, { "epoch": 0.017601277815186583, "grad_norm": 1.0011099403035508, "learning_rate": 5.866388308977035e-06, "loss": 0.4656, "step": 281 }, { "epoch": 0.017663915814528884, "grad_norm": 0.908268892804034, "learning_rate": 5.887265135699374e-06, "loss": 0.4397, "step": 282 }, { "epoch": 0.017726553813871185, "grad_norm": 1.0817853192596105, "learning_rate": 5.908141962421713e-06, "loss": 0.4829, "step": 283 }, { "epoch": 0.017789191813213485, "grad_norm": 1.0326547122871144, "learning_rate": 5.929018789144051e-06, "loss": 0.4826, "step": 284 }, { "epoch": 0.017851829812555786, "grad_norm": 1.0256726962101932, "learning_rate": 5.949895615866389e-06, "loss": 0.4502, "step": 285 }, { "epoch": 0.017914467811898087, "grad_norm": 0.9322448247089512, "learning_rate": 5.970772442588727e-06, "loss": 0.4498, "step": 286 }, { "epoch": 0.017977105811240388, "grad_norm": 1.0891047466866584, "learning_rate": 5.991649269311065e-06, "loss": 0.4523, "step": 287 }, { "epoch": 0.01803974381058269, "grad_norm": 1.0227559762408969, "learning_rate": 6.012526096033403e-06, "loss": 0.457, "step": 288 }, { "epoch": 0.018102381809924992, "grad_norm": 1.072883506924392, "learning_rate": 6.033402922755741e-06, "loss": 0.4818, "step": 289 }, { "epoch": 0.018165019809267293, "grad_norm": 0.9846795894752909, "learning_rate": 6.0542797494780806e-06, "loss": 0.4944, "step": 290 }, { "epoch": 0.018227657808609594, "grad_norm": 1.02540808295984, "learning_rate": 6.075156576200418e-06, "loss": 0.479, "step": 291 }, { "epoch": 0.018290295807951894, "grad_norm": 1.0847320806696585, "learning_rate": 6.096033402922756e-06, "loss": 0.4698, "step": 292 }, { "epoch": 0.018352933807294195, "grad_norm": 0.9891368008387524, "learning_rate": 6.116910229645095e-06, "loss": 0.4667, "step": 293 }, { "epoch": 0.018415571806636496, "grad_norm": 1.0530699761424416, "learning_rate": 6.137787056367432e-06, "loss": 0.4979, "step": 294 }, { "epoch": 0.018478209805978797, "grad_norm": 1.0643990774249072, "learning_rate": 6.158663883089771e-06, "loss": 0.4731, "step": 295 }, { "epoch": 0.018540847805321097, "grad_norm": 1.0328454767590418, "learning_rate": 6.179540709812108e-06, "loss": 0.4376, "step": 296 }, { "epoch": 0.018603485804663398, "grad_norm": 1.0017426844796349, "learning_rate": 6.200417536534447e-06, "loss": 0.4748, "step": 297 }, { "epoch": 0.0186661238040057, "grad_norm": 0.8399297779633434, "learning_rate": 6.221294363256786e-06, "loss": 0.4697, "step": 298 }, { "epoch": 0.018728761803348003, "grad_norm": 0.9658580400707074, "learning_rate": 6.242171189979124e-06, "loss": 0.421, "step": 299 }, { "epoch": 0.018791399802690303, "grad_norm": 1.0710821607185472, "learning_rate": 6.2630480167014616e-06, "loss": 0.4711, "step": 300 }, { "epoch": 0.018854037802032604, "grad_norm": 1.0924650153830093, "learning_rate": 6.2839248434238e-06, "loss": 0.4777, "step": 301 }, { "epoch": 0.018916675801374905, "grad_norm": 1.1367377343788119, "learning_rate": 6.304801670146138e-06, "loss": 0.4882, "step": 302 }, { "epoch": 0.018979313800717205, "grad_norm": 0.9941537493168215, "learning_rate": 6.325678496868477e-06, "loss": 0.4388, "step": 303 }, { "epoch": 0.019041951800059506, "grad_norm": 0.7827888700809601, "learning_rate": 6.346555323590815e-06, "loss": 0.4575, "step": 304 }, { "epoch": 0.019104589799401807, "grad_norm": 0.9769272407633175, "learning_rate": 6.367432150313153e-06, "loss": 0.4957, "step": 305 }, { "epoch": 0.019167227798744108, "grad_norm": 1.2174281887361098, "learning_rate": 6.388308977035491e-06, "loss": 0.4948, "step": 306 }, { "epoch": 0.019229865798086408, "grad_norm": 0.8228105578333935, "learning_rate": 6.409185803757829e-06, "loss": 0.4717, "step": 307 }, { "epoch": 0.01929250379742871, "grad_norm": 1.0140585185309565, "learning_rate": 6.4300626304801676e-06, "loss": 0.4698, "step": 308 }, { "epoch": 0.01935514179677101, "grad_norm": 1.0156574282778859, "learning_rate": 6.450939457202505e-06, "loss": 0.521, "step": 309 }, { "epoch": 0.019417779796113314, "grad_norm": 1.0524019675196545, "learning_rate": 6.471816283924844e-06, "loss": 0.4367, "step": 310 }, { "epoch": 0.019480417795455614, "grad_norm": 0.9262597784208584, "learning_rate": 6.492693110647183e-06, "loss": 0.4538, "step": 311 }, { "epoch": 0.019543055794797915, "grad_norm": 0.9426327634140615, "learning_rate": 6.51356993736952e-06, "loss": 0.3656, "step": 312 }, { "epoch": 0.019605693794140216, "grad_norm": 1.0964103544154795, "learning_rate": 6.5344467640918585e-06, "loss": 0.4674, "step": 313 }, { "epoch": 0.019668331793482517, "grad_norm": 0.994367132604669, "learning_rate": 6.555323590814197e-06, "loss": 0.494, "step": 314 }, { "epoch": 0.019730969792824817, "grad_norm": 1.1428090786125302, "learning_rate": 6.576200417536534e-06, "loss": 0.4932, "step": 315 }, { "epoch": 0.019793607792167118, "grad_norm": 1.0107334886798172, "learning_rate": 6.597077244258873e-06, "loss": 0.5024, "step": 316 }, { "epoch": 0.01985624579150942, "grad_norm": 0.9526133922658374, "learning_rate": 6.617954070981212e-06, "loss": 0.4813, "step": 317 }, { "epoch": 0.01991888379085172, "grad_norm": 1.030159601731955, "learning_rate": 6.638830897703549e-06, "loss": 0.4851, "step": 318 }, { "epoch": 0.01998152179019402, "grad_norm": 0.9932425525343781, "learning_rate": 6.659707724425888e-06, "loss": 0.4296, "step": 319 }, { "epoch": 0.02004415978953632, "grad_norm": 1.0137186265445013, "learning_rate": 6.680584551148226e-06, "loss": 0.4551, "step": 320 }, { "epoch": 0.020106797788878625, "grad_norm": 0.9362823546186946, "learning_rate": 6.701461377870564e-06, "loss": 0.4011, "step": 321 }, { "epoch": 0.020169435788220925, "grad_norm": 1.0016938508387971, "learning_rate": 6.722338204592902e-06, "loss": 0.4513, "step": 322 }, { "epoch": 0.020232073787563226, "grad_norm": 0.9830197209703241, "learning_rate": 6.743215031315241e-06, "loss": 0.4194, "step": 323 }, { "epoch": 0.020294711786905527, "grad_norm": 1.0832431016339947, "learning_rate": 6.7640918580375795e-06, "loss": 0.4661, "step": 324 }, { "epoch": 0.020357349786247828, "grad_norm": 1.0961203160099944, "learning_rate": 6.784968684759917e-06, "loss": 0.4592, "step": 325 }, { "epoch": 0.020419987785590128, "grad_norm": 1.0853213801865442, "learning_rate": 6.805845511482255e-06, "loss": 0.4669, "step": 326 }, { "epoch": 0.02048262578493243, "grad_norm": 0.9817851639344812, "learning_rate": 6.826722338204593e-06, "loss": 0.455, "step": 327 }, { "epoch": 0.02054526378427473, "grad_norm": 0.9745276162862645, "learning_rate": 6.847599164926931e-06, "loss": 0.4672, "step": 328 }, { "epoch": 0.02060790178361703, "grad_norm": 0.9485597310391845, "learning_rate": 6.86847599164927e-06, "loss": 0.4692, "step": 329 }, { "epoch": 0.02067053978295933, "grad_norm": 0.9728458860153021, "learning_rate": 6.889352818371609e-06, "loss": 0.4455, "step": 330 }, { "epoch": 0.02073317778230163, "grad_norm": 1.0720947528693843, "learning_rate": 6.910229645093946e-06, "loss": 0.5045, "step": 331 }, { "epoch": 0.020795815781643936, "grad_norm": 1.042613020058983, "learning_rate": 6.931106471816285e-06, "loss": 0.4667, "step": 332 }, { "epoch": 0.020858453780986237, "grad_norm": 1.0063932841881253, "learning_rate": 6.951983298538623e-06, "loss": 0.4681, "step": 333 }, { "epoch": 0.020921091780328537, "grad_norm": 1.0059503424200802, "learning_rate": 6.9728601252609605e-06, "loss": 0.4821, "step": 334 }, { "epoch": 0.020983729779670838, "grad_norm": 0.9824906716228554, "learning_rate": 6.993736951983299e-06, "loss": 0.4453, "step": 335 }, { "epoch": 0.02104636777901314, "grad_norm": 0.9233078581058215, "learning_rate": 7.014613778705638e-06, "loss": 0.4423, "step": 336 }, { "epoch": 0.02110900577835544, "grad_norm": 1.0265948474360445, "learning_rate": 7.035490605427976e-06, "loss": 0.4581, "step": 337 }, { "epoch": 0.02117164377769774, "grad_norm": 1.0193498228285711, "learning_rate": 7.056367432150314e-06, "loss": 0.454, "step": 338 }, { "epoch": 0.02123428177704004, "grad_norm": 0.9715118948039183, "learning_rate": 7.077244258872652e-06, "loss": 0.4881, "step": 339 }, { "epoch": 0.02129691977638234, "grad_norm": 0.9476429877580987, "learning_rate": 7.09812108559499e-06, "loss": 0.484, "step": 340 }, { "epoch": 0.021359557775724642, "grad_norm": 1.092216360482922, "learning_rate": 7.118997912317328e-06, "loss": 0.4814, "step": 341 }, { "epoch": 0.021422195775066943, "grad_norm": 1.0432762013730783, "learning_rate": 7.139874739039666e-06, "loss": 0.4688, "step": 342 }, { "epoch": 0.021484833774409247, "grad_norm": 1.1255772116234146, "learning_rate": 7.160751565762005e-06, "loss": 0.4873, "step": 343 }, { "epoch": 0.021547471773751548, "grad_norm": 1.0350980392828582, "learning_rate": 7.181628392484343e-06, "loss": 0.4634, "step": 344 }, { "epoch": 0.021610109773093848, "grad_norm": 1.0601640408558772, "learning_rate": 7.202505219206682e-06, "loss": 0.4744, "step": 345 }, { "epoch": 0.02167274777243615, "grad_norm": 1.0061908449669457, "learning_rate": 7.223382045929019e-06, "loss": 0.4412, "step": 346 }, { "epoch": 0.02173538577177845, "grad_norm": 1.0508727300316898, "learning_rate": 7.2442588726513575e-06, "loss": 0.4838, "step": 347 }, { "epoch": 0.02179802377112075, "grad_norm": 1.0903114376290823, "learning_rate": 7.265135699373696e-06, "loss": 0.5477, "step": 348 }, { "epoch": 0.02186066177046305, "grad_norm": 1.1430748532007675, "learning_rate": 7.286012526096033e-06, "loss": 0.5006, "step": 349 }, { "epoch": 0.02192329976980535, "grad_norm": 1.1784332576331287, "learning_rate": 7.3068893528183725e-06, "loss": 0.4369, "step": 350 }, { "epoch": 0.021985937769147652, "grad_norm": 0.9108783379481976, "learning_rate": 7.327766179540711e-06, "loss": 0.4981, "step": 351 }, { "epoch": 0.022048575768489953, "grad_norm": 0.9892401630581342, "learning_rate": 7.348643006263048e-06, "loss": 0.4387, "step": 352 }, { "epoch": 0.022111213767832254, "grad_norm": 0.892853676376306, "learning_rate": 7.369519832985387e-06, "loss": 0.4402, "step": 353 }, { "epoch": 0.022173851767174558, "grad_norm": 1.0114216292799079, "learning_rate": 7.390396659707725e-06, "loss": 0.4749, "step": 354 }, { "epoch": 0.02223648976651686, "grad_norm": 0.9922434476456627, "learning_rate": 7.411273486430063e-06, "loss": 0.4171, "step": 355 }, { "epoch": 0.02229912776585916, "grad_norm": 0.9772069829255501, "learning_rate": 7.432150313152402e-06, "loss": 0.4822, "step": 356 }, { "epoch": 0.02236176576520146, "grad_norm": 0.9399639872125047, "learning_rate": 7.45302713987474e-06, "loss": 0.4378, "step": 357 }, { "epoch": 0.02242440376454376, "grad_norm": 1.009429971197964, "learning_rate": 7.473903966597078e-06, "loss": 0.4977, "step": 358 }, { "epoch": 0.02248704176388606, "grad_norm": 0.9942475303092013, "learning_rate": 7.494780793319416e-06, "loss": 0.5264, "step": 359 }, { "epoch": 0.022549679763228362, "grad_norm": 0.9505252607304049, "learning_rate": 7.515657620041754e-06, "loss": 0.4614, "step": 360 }, { "epoch": 0.022612317762570663, "grad_norm": 1.0349102788798663, "learning_rate": 7.536534446764092e-06, "loss": 0.4749, "step": 361 }, { "epoch": 0.022674955761912963, "grad_norm": 0.9967059255639978, "learning_rate": 7.55741127348643e-06, "loss": 0.4495, "step": 362 }, { "epoch": 0.022737593761255264, "grad_norm": 0.9691161651484356, "learning_rate": 7.5782881002087694e-06, "loss": 0.4231, "step": 363 }, { "epoch": 0.022800231760597568, "grad_norm": 1.0471392917543423, "learning_rate": 7.599164926931108e-06, "loss": 0.4804, "step": 364 }, { "epoch": 0.02286286975993987, "grad_norm": 1.1466097172133765, "learning_rate": 7.620041753653445e-06, "loss": 0.4777, "step": 365 }, { "epoch": 0.02292550775928217, "grad_norm": 1.0810933896664272, "learning_rate": 7.640918580375784e-06, "loss": 0.4708, "step": 366 }, { "epoch": 0.02298814575862447, "grad_norm": 0.9477066851146424, "learning_rate": 7.661795407098122e-06, "loss": 0.4618, "step": 367 }, { "epoch": 0.02305078375796677, "grad_norm": 1.0526356549997242, "learning_rate": 7.68267223382046e-06, "loss": 0.4857, "step": 368 }, { "epoch": 0.02311342175730907, "grad_norm": 1.0137781803367414, "learning_rate": 7.703549060542797e-06, "loss": 0.4601, "step": 369 }, { "epoch": 0.023176059756651372, "grad_norm": 0.9035822104579848, "learning_rate": 7.724425887265137e-06, "loss": 0.4423, "step": 370 }, { "epoch": 0.023238697755993673, "grad_norm": 0.9890150970382936, "learning_rate": 7.745302713987475e-06, "loss": 0.4892, "step": 371 }, { "epoch": 0.023301335755335974, "grad_norm": 0.9752754153103337, "learning_rate": 7.766179540709812e-06, "loss": 0.4647, "step": 372 }, { "epoch": 0.023363973754678274, "grad_norm": 1.0294150072952917, "learning_rate": 7.78705636743215e-06, "loss": 0.4585, "step": 373 }, { "epoch": 0.023426611754020575, "grad_norm": 1.0368743151752813, "learning_rate": 7.807933194154489e-06, "loss": 0.4911, "step": 374 }, { "epoch": 0.02348924975336288, "grad_norm": 0.9236772394686724, "learning_rate": 7.828810020876827e-06, "loss": 0.4694, "step": 375 }, { "epoch": 0.02355188775270518, "grad_norm": 0.9764081240870652, "learning_rate": 7.849686847599166e-06, "loss": 0.4672, "step": 376 }, { "epoch": 0.02361452575204748, "grad_norm": 1.025613826615659, "learning_rate": 7.870563674321504e-06, "loss": 0.4618, "step": 377 }, { "epoch": 0.02367716375138978, "grad_norm": 1.0120143895301574, "learning_rate": 7.891440501043842e-06, "loss": 0.4473, "step": 378 }, { "epoch": 0.023739801750732082, "grad_norm": 0.8105118221530545, "learning_rate": 7.91231732776618e-06, "loss": 0.4704, "step": 379 }, { "epoch": 0.023802439750074383, "grad_norm": 1.0360834970242987, "learning_rate": 7.933194154488519e-06, "loss": 0.4632, "step": 380 }, { "epoch": 0.023865077749416683, "grad_norm": 0.9131812803758628, "learning_rate": 7.954070981210856e-06, "loss": 0.3959, "step": 381 }, { "epoch": 0.023927715748758984, "grad_norm": 1.0862417475805177, "learning_rate": 7.974947807933194e-06, "loss": 0.4341, "step": 382 }, { "epoch": 0.023990353748101285, "grad_norm": 1.1857066699203984, "learning_rate": 7.995824634655534e-06, "loss": 0.3944, "step": 383 }, { "epoch": 0.024052991747443585, "grad_norm": 1.0318652268164963, "learning_rate": 8.016701461377872e-06, "loss": 0.4907, "step": 384 }, { "epoch": 0.024115629746785886, "grad_norm": 1.150685862822361, "learning_rate": 8.037578288100209e-06, "loss": 0.483, "step": 385 }, { "epoch": 0.02417826774612819, "grad_norm": 1.1380555353977218, "learning_rate": 8.058455114822547e-06, "loss": 0.4554, "step": 386 }, { "epoch": 0.02424090574547049, "grad_norm": 0.9755624010943118, "learning_rate": 8.079331941544886e-06, "loss": 0.4541, "step": 387 }, { "epoch": 0.02430354374481279, "grad_norm": 1.039565472122167, "learning_rate": 8.100208768267224e-06, "loss": 0.4325, "step": 388 }, { "epoch": 0.024366181744155092, "grad_norm": 0.8816276176909026, "learning_rate": 8.121085594989562e-06, "loss": 0.4616, "step": 389 }, { "epoch": 0.024428819743497393, "grad_norm": 1.037750704492878, "learning_rate": 8.1419624217119e-06, "loss": 0.5099, "step": 390 }, { "epoch": 0.024491457742839694, "grad_norm": 1.0139945790367009, "learning_rate": 8.162839248434239e-06, "loss": 0.5087, "step": 391 }, { "epoch": 0.024554095742181994, "grad_norm": 0.8376043073774213, "learning_rate": 8.183716075156577e-06, "loss": 0.4529, "step": 392 }, { "epoch": 0.024616733741524295, "grad_norm": 1.0786410074917778, "learning_rate": 8.204592901878914e-06, "loss": 0.4523, "step": 393 }, { "epoch": 0.024679371740866596, "grad_norm": 1.0480518416738163, "learning_rate": 8.225469728601253e-06, "loss": 0.4616, "step": 394 }, { "epoch": 0.024742009740208896, "grad_norm": 1.0260583482672316, "learning_rate": 8.246346555323591e-06, "loss": 0.427, "step": 395 }, { "epoch": 0.024804647739551197, "grad_norm": 1.1264105332455978, "learning_rate": 8.267223382045931e-06, "loss": 0.4976, "step": 396 }, { "epoch": 0.0248672857388935, "grad_norm": 1.032063917510832, "learning_rate": 8.288100208768268e-06, "loss": 0.5019, "step": 397 }, { "epoch": 0.024929923738235802, "grad_norm": 0.9715164719205722, "learning_rate": 8.308977035490606e-06, "loss": 0.4455, "step": 398 }, { "epoch": 0.024992561737578103, "grad_norm": 0.9814933514219619, "learning_rate": 8.329853862212944e-06, "loss": 0.4456, "step": 399 }, { "epoch": 0.025055199736920403, "grad_norm": 1.0230326730865775, "learning_rate": 8.350730688935283e-06, "loss": 0.4471, "step": 400 }, { "epoch": 0.025117837736262704, "grad_norm": 1.0595310751493696, "learning_rate": 8.371607515657621e-06, "loss": 0.4952, "step": 401 }, { "epoch": 0.025180475735605005, "grad_norm": 0.9772124143996185, "learning_rate": 8.392484342379958e-06, "loss": 0.4424, "step": 402 }, { "epoch": 0.025243113734947305, "grad_norm": 1.0524528330565162, "learning_rate": 8.413361169102298e-06, "loss": 0.4521, "step": 403 }, { "epoch": 0.025305751734289606, "grad_norm": 0.9482437461981857, "learning_rate": 8.434237995824636e-06, "loss": 0.4439, "step": 404 }, { "epoch": 0.025368389733631907, "grad_norm": 0.9526838767880685, "learning_rate": 8.455114822546974e-06, "loss": 0.4119, "step": 405 }, { "epoch": 0.025431027732974208, "grad_norm": 1.0310039595839549, "learning_rate": 8.475991649269311e-06, "loss": 0.4911, "step": 406 }, { "epoch": 0.025493665732316508, "grad_norm": 1.2591414480657233, "learning_rate": 8.49686847599165e-06, "loss": 0.503, "step": 407 }, { "epoch": 0.025556303731658812, "grad_norm": 1.1191338580026253, "learning_rate": 8.517745302713988e-06, "loss": 0.4951, "step": 408 }, { "epoch": 0.025618941731001113, "grad_norm": 1.02116195368937, "learning_rate": 8.538622129436326e-06, "loss": 0.4613, "step": 409 }, { "epoch": 0.025681579730343414, "grad_norm": 1.0671134830736575, "learning_rate": 8.559498956158664e-06, "loss": 0.4676, "step": 410 }, { "epoch": 0.025744217729685714, "grad_norm": 1.071283682343229, "learning_rate": 8.580375782881003e-06, "loss": 0.4814, "step": 411 }, { "epoch": 0.025806855729028015, "grad_norm": 0.9395657309429192, "learning_rate": 8.601252609603341e-06, "loss": 0.4845, "step": 412 }, { "epoch": 0.025869493728370316, "grad_norm": 1.1414152937755306, "learning_rate": 8.62212943632568e-06, "loss": 0.4562, "step": 413 }, { "epoch": 0.025932131727712616, "grad_norm": 0.9606318805371331, "learning_rate": 8.643006263048018e-06, "loss": 0.382, "step": 414 }, { "epoch": 0.025994769727054917, "grad_norm": 0.9775244840413565, "learning_rate": 8.663883089770355e-06, "loss": 0.4717, "step": 415 }, { "epoch": 0.026057407726397218, "grad_norm": 1.0218755935329171, "learning_rate": 8.684759916492695e-06, "loss": 0.4704, "step": 416 }, { "epoch": 0.02612004572573952, "grad_norm": 1.0144329241935746, "learning_rate": 8.705636743215033e-06, "loss": 0.5093, "step": 417 }, { "epoch": 0.02618268372508182, "grad_norm": 1.0379790069097439, "learning_rate": 8.72651356993737e-06, "loss": 0.4927, "step": 418 }, { "epoch": 0.026245321724424123, "grad_norm": 1.1413911809593913, "learning_rate": 8.747390396659708e-06, "loss": 0.4624, "step": 419 }, { "epoch": 0.026307959723766424, "grad_norm": 1.090739939005204, "learning_rate": 8.768267223382046e-06, "loss": 0.4669, "step": 420 }, { "epoch": 0.026370597723108725, "grad_norm": 1.359887219753948, "learning_rate": 8.789144050104385e-06, "loss": 0.4675, "step": 421 }, { "epoch": 0.026433235722451025, "grad_norm": 1.1462638444121762, "learning_rate": 8.810020876826723e-06, "loss": 0.4464, "step": 422 }, { "epoch": 0.026495873721793326, "grad_norm": 1.0289071005825086, "learning_rate": 8.830897703549061e-06, "loss": 0.4464, "step": 423 }, { "epoch": 0.026558511721135627, "grad_norm": 1.1092552650257865, "learning_rate": 8.8517745302714e-06, "loss": 0.488, "step": 424 }, { "epoch": 0.026621149720477928, "grad_norm": 0.9723700364332956, "learning_rate": 8.872651356993738e-06, "loss": 0.4403, "step": 425 }, { "epoch": 0.026683787719820228, "grad_norm": 1.0884467003196372, "learning_rate": 8.893528183716076e-06, "loss": 0.4897, "step": 426 }, { "epoch": 0.02674642571916253, "grad_norm": 1.0339845314560088, "learning_rate": 8.914405010438413e-06, "loss": 0.4508, "step": 427 }, { "epoch": 0.02680906371850483, "grad_norm": 1.0215559406421708, "learning_rate": 8.935281837160751e-06, "loss": 0.4455, "step": 428 }, { "epoch": 0.02687170171784713, "grad_norm": 1.3859299271398258, "learning_rate": 8.95615866388309e-06, "loss": 0.4688, "step": 429 }, { "epoch": 0.026934339717189434, "grad_norm": 0.9658495508125703, "learning_rate": 8.97703549060543e-06, "loss": 0.4715, "step": 430 }, { "epoch": 0.026996977716531735, "grad_norm": 0.8392638969134845, "learning_rate": 8.997912317327767e-06, "loss": 0.4373, "step": 431 }, { "epoch": 0.027059615715874036, "grad_norm": 1.0236434732686934, "learning_rate": 9.018789144050105e-06, "loss": 0.4407, "step": 432 }, { "epoch": 0.027122253715216336, "grad_norm": 0.9506770278455658, "learning_rate": 9.039665970772443e-06, "loss": 0.4303, "step": 433 }, { "epoch": 0.027184891714558637, "grad_norm": 0.9762568425740348, "learning_rate": 9.060542797494782e-06, "loss": 0.4551, "step": 434 }, { "epoch": 0.027247529713900938, "grad_norm": 0.9493415685861777, "learning_rate": 9.08141962421712e-06, "loss": 0.4291, "step": 435 }, { "epoch": 0.02731016771324324, "grad_norm": 0.98682363472654, "learning_rate": 9.102296450939458e-06, "loss": 0.4223, "step": 436 }, { "epoch": 0.02737280571258554, "grad_norm": 1.0583860429633773, "learning_rate": 9.123173277661797e-06, "loss": 0.5016, "step": 437 }, { "epoch": 0.02743544371192784, "grad_norm": 1.094915578684592, "learning_rate": 9.144050104384135e-06, "loss": 0.4812, "step": 438 }, { "epoch": 0.02749808171127014, "grad_norm": 1.1489910075375958, "learning_rate": 9.164926931106473e-06, "loss": 0.4448, "step": 439 }, { "epoch": 0.027560719710612445, "grad_norm": 1.0651067721530127, "learning_rate": 9.18580375782881e-06, "loss": 0.451, "step": 440 }, { "epoch": 0.027623357709954745, "grad_norm": 0.984838143778904, "learning_rate": 9.206680584551148e-06, "loss": 0.4447, "step": 441 }, { "epoch": 0.027685995709297046, "grad_norm": 0.9919782890265614, "learning_rate": 9.227557411273487e-06, "loss": 0.4561, "step": 442 }, { "epoch": 0.027748633708639347, "grad_norm": 1.0154264201915884, "learning_rate": 9.248434237995825e-06, "loss": 0.4458, "step": 443 }, { "epoch": 0.027811271707981648, "grad_norm": 1.0792354186623045, "learning_rate": 9.269311064718163e-06, "loss": 0.4239, "step": 444 }, { "epoch": 0.027873909707323948, "grad_norm": 1.34376289454518, "learning_rate": 9.290187891440502e-06, "loss": 0.4765, "step": 445 }, { "epoch": 0.02793654770666625, "grad_norm": 1.024778963163423, "learning_rate": 9.31106471816284e-06, "loss": 0.4298, "step": 446 }, { "epoch": 0.02799918570600855, "grad_norm": 1.0031450242070024, "learning_rate": 9.331941544885179e-06, "loss": 0.4481, "step": 447 }, { "epoch": 0.02806182370535085, "grad_norm": 1.0626565643549075, "learning_rate": 9.352818371607515e-06, "loss": 0.4604, "step": 448 }, { "epoch": 0.02812446170469315, "grad_norm": 0.9551350468949873, "learning_rate": 9.373695198329854e-06, "loss": 0.4503, "step": 449 }, { "epoch": 0.02818709970403545, "grad_norm": 1.504403744828678, "learning_rate": 9.394572025052194e-06, "loss": 0.4255, "step": 450 }, { "epoch": 0.028249737703377756, "grad_norm": 0.9780721403717871, "learning_rate": 9.415448851774532e-06, "loss": 0.4795, "step": 451 }, { "epoch": 0.028312375702720056, "grad_norm": 0.9786691712435661, "learning_rate": 9.436325678496869e-06, "loss": 0.4787, "step": 452 }, { "epoch": 0.028375013702062357, "grad_norm": 1.0643419116158872, "learning_rate": 9.457202505219207e-06, "loss": 0.4649, "step": 453 }, { "epoch": 0.028437651701404658, "grad_norm": 1.0986803047589564, "learning_rate": 9.478079331941545e-06, "loss": 0.4881, "step": 454 }, { "epoch": 0.02850028970074696, "grad_norm": 2.2652590419375427, "learning_rate": 9.498956158663884e-06, "loss": 0.4163, "step": 455 }, { "epoch": 0.02856292770008926, "grad_norm": 1.0581246843333987, "learning_rate": 9.519832985386222e-06, "loss": 0.4753, "step": 456 }, { "epoch": 0.02862556569943156, "grad_norm": 0.9339112351436439, "learning_rate": 9.54070981210856e-06, "loss": 0.4728, "step": 457 }, { "epoch": 0.02868820369877386, "grad_norm": 1.0919109524183666, "learning_rate": 9.561586638830899e-06, "loss": 0.453, "step": 458 }, { "epoch": 0.02875084169811616, "grad_norm": 1.1234203968235812, "learning_rate": 9.582463465553237e-06, "loss": 0.4842, "step": 459 }, { "epoch": 0.028813479697458462, "grad_norm": 0.8195552089319333, "learning_rate": 9.603340292275575e-06, "loss": 0.4697, "step": 460 }, { "epoch": 0.028876117696800763, "grad_norm": 0.9771454273604905, "learning_rate": 9.624217118997912e-06, "loss": 0.468, "step": 461 }, { "epoch": 0.028938755696143067, "grad_norm": 0.999655777687778, "learning_rate": 9.64509394572025e-06, "loss": 0.4898, "step": 462 }, { "epoch": 0.029001393695485368, "grad_norm": 1.050439874886279, "learning_rate": 9.66597077244259e-06, "loss": 0.4998, "step": 463 }, { "epoch": 0.029064031694827668, "grad_norm": 1.038096993298549, "learning_rate": 9.686847599164927e-06, "loss": 0.4304, "step": 464 }, { "epoch": 0.02912666969416997, "grad_norm": 0.9590823159093111, "learning_rate": 9.707724425887266e-06, "loss": 0.4569, "step": 465 }, { "epoch": 0.02918930769351227, "grad_norm": 1.0126144191437996, "learning_rate": 9.728601252609604e-06, "loss": 0.416, "step": 466 }, { "epoch": 0.02925194569285457, "grad_norm": 0.990069353375383, "learning_rate": 9.749478079331942e-06, "loss": 0.4583, "step": 467 }, { "epoch": 0.02931458369219687, "grad_norm": 1.0380118484199026, "learning_rate": 9.77035490605428e-06, "loss": 0.4956, "step": 468 }, { "epoch": 0.02937722169153917, "grad_norm": 1.1014460595693187, "learning_rate": 9.791231732776619e-06, "loss": 0.4977, "step": 469 }, { "epoch": 0.029439859690881472, "grad_norm": 1.0085092508405116, "learning_rate": 9.812108559498957e-06, "loss": 0.4648, "step": 470 }, { "epoch": 0.029502497690223773, "grad_norm": 0.9907624588013442, "learning_rate": 9.832985386221296e-06, "loss": 0.4884, "step": 471 }, { "epoch": 0.029565135689566074, "grad_norm": 1.0051374056097386, "learning_rate": 9.853862212943634e-06, "loss": 0.5023, "step": 472 }, { "epoch": 0.029627773688908378, "grad_norm": 0.9108677857108926, "learning_rate": 9.87473903966597e-06, "loss": 0.4439, "step": 473 }, { "epoch": 0.02969041168825068, "grad_norm": 1.0280437212636164, "learning_rate": 9.895615866388309e-06, "loss": 0.465, "step": 474 }, { "epoch": 0.02975304968759298, "grad_norm": 0.9906976080255074, "learning_rate": 9.916492693110647e-06, "loss": 0.5124, "step": 475 }, { "epoch": 0.02981568768693528, "grad_norm": 1.0164079870710059, "learning_rate": 9.937369519832987e-06, "loss": 0.4487, "step": 476 }, { "epoch": 0.02987832568627758, "grad_norm": 1.214954161070769, "learning_rate": 9.958246346555324e-06, "loss": 0.4621, "step": 477 }, { "epoch": 0.02994096368561988, "grad_norm": 0.9466834711550224, "learning_rate": 9.979123173277662e-06, "loss": 0.4288, "step": 478 }, { "epoch": 0.030003601684962182, "grad_norm": 1.0107912584126701, "learning_rate": 1e-05, "loss": 0.4327, "step": 479 }, { "epoch": 0.030066239684304483, "grad_norm": 1.0436277987152458, "learning_rate": 9.999999897099536e-06, "loss": 0.4946, "step": 480 }, { "epoch": 0.030128877683646783, "grad_norm": 1.1407797723300557, "learning_rate": 9.999999588398148e-06, "loss": 0.5431, "step": 481 }, { "epoch": 0.030191515682989084, "grad_norm": 0.9581165982334768, "learning_rate": 9.99999907389585e-06, "loss": 0.433, "step": 482 }, { "epoch": 0.030254153682331385, "grad_norm": 1.0943637577641834, "learning_rate": 9.99999835359266e-06, "loss": 0.4752, "step": 483 }, { "epoch": 0.03031679168167369, "grad_norm": 0.9923849428331267, "learning_rate": 9.999997427488612e-06, "loss": 0.4821, "step": 484 }, { "epoch": 0.03037942968101599, "grad_norm": 0.9175734193478818, "learning_rate": 9.999996295583741e-06, "loss": 0.4388, "step": 485 }, { "epoch": 0.03044206768035829, "grad_norm": 1.0024348356344028, "learning_rate": 9.999994957878094e-06, "loss": 0.4839, "step": 486 }, { "epoch": 0.03050470567970059, "grad_norm": 0.9854766442151004, "learning_rate": 9.999993414371726e-06, "loss": 0.46, "step": 487 }, { "epoch": 0.03056734367904289, "grad_norm": 0.9245639832385998, "learning_rate": 9.999991665064703e-06, "loss": 0.4281, "step": 488 }, { "epoch": 0.030629981678385192, "grad_norm": 0.9248227741637166, "learning_rate": 9.999989709957093e-06, "loss": 0.4771, "step": 489 }, { "epoch": 0.030692619677727493, "grad_norm": 0.92059840901639, "learning_rate": 9.99998754904898e-06, "loss": 0.4139, "step": 490 }, { "epoch": 0.030755257677069794, "grad_norm": 1.0609216515151922, "learning_rate": 9.99998518234045e-06, "loss": 0.466, "step": 491 }, { "epoch": 0.030817895676412094, "grad_norm": 0.9465076246325514, "learning_rate": 9.999982609831602e-06, "loss": 0.4717, "step": 492 }, { "epoch": 0.030880533675754395, "grad_norm": 1.1400113555055207, "learning_rate": 9.999979831522542e-06, "loss": 0.4905, "step": 493 }, { "epoch": 0.030943171675096696, "grad_norm": 0.969603267295193, "learning_rate": 9.999976847413384e-06, "loss": 0.4653, "step": 494 }, { "epoch": 0.031005809674439, "grad_norm": 1.0096136673356448, "learning_rate": 9.999973657504252e-06, "loss": 0.4862, "step": 495 }, { "epoch": 0.0310684476737813, "grad_norm": 1.00760839191466, "learning_rate": 9.999970261795275e-06, "loss": 0.48, "step": 496 }, { "epoch": 0.0311310856731236, "grad_norm": 0.9691567894864449, "learning_rate": 9.999966660286595e-06, "loss": 0.4521, "step": 497 }, { "epoch": 0.031193723672465902, "grad_norm": 1.0578340506578705, "learning_rate": 9.999962852978358e-06, "loss": 0.4849, "step": 498 }, { "epoch": 0.0312563616718082, "grad_norm": 0.9764114767764127, "learning_rate": 9.999958839870722e-06, "loss": 0.4267, "step": 499 }, { "epoch": 0.0313189996711505, "grad_norm": 1.0074380395987035, "learning_rate": 9.999954620963855e-06, "loss": 0.4799, "step": 500 }, { "epoch": 0.031381637670492804, "grad_norm": 1.1811927042275638, "learning_rate": 9.999950196257924e-06, "loss": 0.4904, "step": 501 }, { "epoch": 0.031444275669835105, "grad_norm": 1.0041882768132266, "learning_rate": 9.999945565753116e-06, "loss": 0.4806, "step": 502 }, { "epoch": 0.031506913669177405, "grad_norm": 1.0273531123174042, "learning_rate": 9.999940729449622e-06, "loss": 0.454, "step": 503 }, { "epoch": 0.031569551668519706, "grad_norm": 0.9722350000020039, "learning_rate": 9.999935687347638e-06, "loss": 0.4295, "step": 504 }, { "epoch": 0.03163218966786201, "grad_norm": 0.92030913206606, "learning_rate": 9.999930439447372e-06, "loss": 0.4687, "step": 505 }, { "epoch": 0.03169482766720431, "grad_norm": 1.0924693890570083, "learning_rate": 9.999924985749044e-06, "loss": 0.4898, "step": 506 }, { "epoch": 0.03175746566654661, "grad_norm": 1.0646972647631245, "learning_rate": 9.999919326252874e-06, "loss": 0.4895, "step": 507 }, { "epoch": 0.03182010366588891, "grad_norm": 0.8652912409826867, "learning_rate": 9.999913460959097e-06, "loss": 0.4549, "step": 508 }, { "epoch": 0.03188274166523121, "grad_norm": 0.9624685363914474, "learning_rate": 9.999907389867954e-06, "loss": 0.4506, "step": 509 }, { "epoch": 0.03194537966457351, "grad_norm": 1.0315788411209288, "learning_rate": 9.999901112979692e-06, "loss": 0.5095, "step": 510 }, { "epoch": 0.03200801766391582, "grad_norm": 0.9846071396915839, "learning_rate": 9.999894630294576e-06, "loss": 0.5179, "step": 511 }, { "epoch": 0.03207065566325812, "grad_norm": 1.0627532866412681, "learning_rate": 9.999887941812866e-06, "loss": 0.488, "step": 512 }, { "epoch": 0.03213329366260042, "grad_norm": 0.9742328408810338, "learning_rate": 9.999881047534842e-06, "loss": 0.4739, "step": 513 }, { "epoch": 0.03219593166194272, "grad_norm": 0.9507339577337984, "learning_rate": 9.999873947460787e-06, "loss": 0.4452, "step": 514 }, { "epoch": 0.03225856966128502, "grad_norm": 0.9704327396775589, "learning_rate": 9.99986664159099e-06, "loss": 0.4349, "step": 515 }, { "epoch": 0.03232120766062732, "grad_norm": 0.9318856938466582, "learning_rate": 9.999859129925755e-06, "loss": 0.465, "step": 516 }, { "epoch": 0.03238384565996962, "grad_norm": 1.018203564786288, "learning_rate": 9.99985141246539e-06, "loss": 0.4696, "step": 517 }, { "epoch": 0.03244648365931192, "grad_norm": 1.0303214212289362, "learning_rate": 9.999843489210214e-06, "loss": 0.4796, "step": 518 }, { "epoch": 0.03250912165865422, "grad_norm": 0.8916298851440834, "learning_rate": 9.999835360160552e-06, "loss": 0.4438, "step": 519 }, { "epoch": 0.032571759657996524, "grad_norm": 1.075470573489586, "learning_rate": 9.999827025316737e-06, "loss": 0.4633, "step": 520 }, { "epoch": 0.032634397657338825, "grad_norm": 0.9824834115931615, "learning_rate": 9.999818484679115e-06, "loss": 0.4318, "step": 521 }, { "epoch": 0.032697035656681125, "grad_norm": 0.9773623813408532, "learning_rate": 9.999809738248035e-06, "loss": 0.4415, "step": 522 }, { "epoch": 0.032759673656023426, "grad_norm": 0.9665533542360624, "learning_rate": 9.999800786023858e-06, "loss": 0.4053, "step": 523 }, { "epoch": 0.03282231165536573, "grad_norm": 0.9851886697033954, "learning_rate": 9.999791628006953e-06, "loss": 0.5264, "step": 524 }, { "epoch": 0.03288494965470803, "grad_norm": 1.0079922070618188, "learning_rate": 9.999782264197699e-06, "loss": 0.4498, "step": 525 }, { "epoch": 0.03294758765405033, "grad_norm": 0.9543285247903758, "learning_rate": 9.999772694596475e-06, "loss": 0.4461, "step": 526 }, { "epoch": 0.03301022565339263, "grad_norm": 1.011477439861251, "learning_rate": 9.999762919203681e-06, "loss": 0.4603, "step": 527 }, { "epoch": 0.03307286365273493, "grad_norm": 1.1059121496488515, "learning_rate": 9.999752938019718e-06, "loss": 0.472, "step": 528 }, { "epoch": 0.03313550165207723, "grad_norm": 0.9578748311593964, "learning_rate": 9.999742751044993e-06, "loss": 0.4874, "step": 529 }, { "epoch": 0.03319813965141953, "grad_norm": 1.0079423308019362, "learning_rate": 9.999732358279932e-06, "loss": 0.4868, "step": 530 }, { "epoch": 0.03326077765076183, "grad_norm": 0.9469607821182577, "learning_rate": 9.999721759724958e-06, "loss": 0.4356, "step": 531 }, { "epoch": 0.03332341565010413, "grad_norm": 1.0053609209392569, "learning_rate": 9.999710955380509e-06, "loss": 0.4679, "step": 532 }, { "epoch": 0.03338605364944644, "grad_norm": 0.9762542388891873, "learning_rate": 9.999699945247028e-06, "loss": 0.4402, "step": 533 }, { "epoch": 0.03344869164878874, "grad_norm": 0.9384813781953285, "learning_rate": 9.999688729324967e-06, "loss": 0.4742, "step": 534 }, { "epoch": 0.03351132964813104, "grad_norm": 0.9897891615559052, "learning_rate": 9.999677307614793e-06, "loss": 0.4712, "step": 535 }, { "epoch": 0.03357396764747334, "grad_norm": 0.9885448983496022, "learning_rate": 9.999665680116973e-06, "loss": 0.4195, "step": 536 }, { "epoch": 0.03363660564681564, "grad_norm": 0.991051960999574, "learning_rate": 9.999653846831985e-06, "loss": 0.4998, "step": 537 }, { "epoch": 0.03369924364615794, "grad_norm": 1.0361053951553014, "learning_rate": 9.999641807760316e-06, "loss": 0.4431, "step": 538 }, { "epoch": 0.033761881645500244, "grad_norm": 1.0285461505097981, "learning_rate": 9.999629562902463e-06, "loss": 0.4672, "step": 539 }, { "epoch": 0.033824519644842545, "grad_norm": 0.965644470727518, "learning_rate": 9.99961711225893e-06, "loss": 0.4202, "step": 540 }, { "epoch": 0.033887157644184845, "grad_norm": 0.9560817790554014, "learning_rate": 9.999604455830228e-06, "loss": 0.4671, "step": 541 }, { "epoch": 0.033949795643527146, "grad_norm": 1.1812162057199627, "learning_rate": 9.999591593616878e-06, "loss": 0.4952, "step": 542 }, { "epoch": 0.03401243364286945, "grad_norm": 1.0043295465511102, "learning_rate": 9.99957852561941e-06, "loss": 0.4761, "step": 543 }, { "epoch": 0.03407507164221175, "grad_norm": 1.0737894947783835, "learning_rate": 9.999565251838361e-06, "loss": 0.5041, "step": 544 }, { "epoch": 0.03413770964155405, "grad_norm": 0.8868052825081272, "learning_rate": 9.999551772274281e-06, "loss": 0.4753, "step": 545 }, { "epoch": 0.03420034764089635, "grad_norm": 1.0044086592696277, "learning_rate": 9.99953808692772e-06, "loss": 0.4926, "step": 546 }, { "epoch": 0.03426298564023865, "grad_norm": 0.9703547332596356, "learning_rate": 9.999524195799244e-06, "loss": 0.4543, "step": 547 }, { "epoch": 0.03432562363958095, "grad_norm": 0.9492467948678193, "learning_rate": 9.999510098889425e-06, "loss": 0.4406, "step": 548 }, { "epoch": 0.03438826163892325, "grad_norm": 0.9101995244343161, "learning_rate": 9.999495796198842e-06, "loss": 0.4883, "step": 549 }, { "epoch": 0.03445089963826555, "grad_norm": 0.9384182648318252, "learning_rate": 9.999481287728083e-06, "loss": 0.4536, "step": 550 }, { "epoch": 0.03451353763760785, "grad_norm": 1.0059085044870621, "learning_rate": 9.999466573477748e-06, "loss": 0.4657, "step": 551 }, { "epoch": 0.03457617563695015, "grad_norm": 1.0104732065656994, "learning_rate": 9.999451653448438e-06, "loss": 0.457, "step": 552 }, { "epoch": 0.034638813636292454, "grad_norm": 0.9040445433360217, "learning_rate": 9.999436527640772e-06, "loss": 0.3829, "step": 553 }, { "epoch": 0.034701451635634754, "grad_norm": 0.9930828924993765, "learning_rate": 9.99942119605537e-06, "loss": 0.4404, "step": 554 }, { "epoch": 0.03476408963497706, "grad_norm": 0.9292860631002692, "learning_rate": 9.999405658692864e-06, "loss": 0.4676, "step": 555 }, { "epoch": 0.03482672763431936, "grad_norm": 0.964658134385176, "learning_rate": 9.999389915553894e-06, "loss": 0.4461, "step": 556 }, { "epoch": 0.03488936563366166, "grad_norm": 0.9930273170704527, "learning_rate": 9.999373966639106e-06, "loss": 0.4422, "step": 557 }, { "epoch": 0.034952003633003964, "grad_norm": 0.914622629657591, "learning_rate": 9.999357811949158e-06, "loss": 0.4806, "step": 558 }, { "epoch": 0.035014641632346265, "grad_norm": 1.0415850723617697, "learning_rate": 9.999341451484714e-06, "loss": 0.4781, "step": 559 }, { "epoch": 0.035077279631688565, "grad_norm": 0.9259754796628213, "learning_rate": 9.999324885246449e-06, "loss": 0.462, "step": 560 }, { "epoch": 0.035139917631030866, "grad_norm": 0.905892572369413, "learning_rate": 9.999308113235043e-06, "loss": 0.4208, "step": 561 }, { "epoch": 0.03520255563037317, "grad_norm": 1.0308834747625186, "learning_rate": 9.999291135451188e-06, "loss": 0.4785, "step": 562 }, { "epoch": 0.03526519362971547, "grad_norm": 1.0378162844826708, "learning_rate": 9.99927395189558e-06, "loss": 0.4321, "step": 563 }, { "epoch": 0.03532783162905777, "grad_norm": 1.019531433765153, "learning_rate": 9.999256562568932e-06, "loss": 0.4782, "step": 564 }, { "epoch": 0.03539046962840007, "grad_norm": 1.00416111298788, "learning_rate": 9.999238967471952e-06, "loss": 0.4604, "step": 565 }, { "epoch": 0.03545310762774237, "grad_norm": 0.9412528474484246, "learning_rate": 9.99922116660537e-06, "loss": 0.4733, "step": 566 }, { "epoch": 0.03551574562708467, "grad_norm": 1.015456751882925, "learning_rate": 9.999203159969916e-06, "loss": 0.476, "step": 567 }, { "epoch": 0.03557838362642697, "grad_norm": 0.9795878241595918, "learning_rate": 9.999184947566333e-06, "loss": 0.4406, "step": 568 }, { "epoch": 0.03564102162576927, "grad_norm": 1.0006474992202883, "learning_rate": 9.99916652939537e-06, "loss": 0.4526, "step": 569 }, { "epoch": 0.03570365962511157, "grad_norm": 0.9384636498456076, "learning_rate": 9.999147905457782e-06, "loss": 0.438, "step": 570 }, { "epoch": 0.03576629762445387, "grad_norm": 0.8966149020553249, "learning_rate": 9.99912907575434e-06, "loss": 0.4327, "step": 571 }, { "epoch": 0.035828935623796174, "grad_norm": 1.0951653468182427, "learning_rate": 9.99911004028582e-06, "loss": 0.49, "step": 572 }, { "epoch": 0.035891573623138474, "grad_norm": 0.902266576135075, "learning_rate": 9.999090799053e-06, "loss": 0.4528, "step": 573 }, { "epoch": 0.035954211622480775, "grad_norm": 1.0421962777080522, "learning_rate": 9.999071352056676e-06, "loss": 0.4651, "step": 574 }, { "epoch": 0.036016849621823076, "grad_norm": 0.9597946218997044, "learning_rate": 9.999051699297646e-06, "loss": 0.4208, "step": 575 }, { "epoch": 0.03607948762116538, "grad_norm": 0.8637534029130581, "learning_rate": 9.999031840776721e-06, "loss": 0.4403, "step": 576 }, { "epoch": 0.036142125620507684, "grad_norm": 1.0033480009493443, "learning_rate": 9.999011776494719e-06, "loss": 0.4952, "step": 577 }, { "epoch": 0.036204763619849985, "grad_norm": 1.090690976714218, "learning_rate": 9.998991506452462e-06, "loss": 0.4763, "step": 578 }, { "epoch": 0.036267401619192285, "grad_norm": 1.0459510154470744, "learning_rate": 9.998971030650788e-06, "loss": 0.4714, "step": 579 }, { "epoch": 0.036330039618534586, "grad_norm": 1.1860738438237561, "learning_rate": 9.998950349090536e-06, "loss": 0.4746, "step": 580 }, { "epoch": 0.03639267761787689, "grad_norm": 0.9451826114718472, "learning_rate": 9.998929461772564e-06, "loss": 0.431, "step": 581 }, { "epoch": 0.03645531561721919, "grad_norm": 1.0399613778743524, "learning_rate": 9.998908368697725e-06, "loss": 0.4568, "step": 582 }, { "epoch": 0.03651795361656149, "grad_norm": 0.9938401747308981, "learning_rate": 9.998887069866889e-06, "loss": 0.4957, "step": 583 }, { "epoch": 0.03658059161590379, "grad_norm": 0.9713714096755157, "learning_rate": 9.998865565280935e-06, "loss": 0.4548, "step": 584 }, { "epoch": 0.03664322961524609, "grad_norm": 0.8986037559771676, "learning_rate": 9.998843854940746e-06, "loss": 0.438, "step": 585 }, { "epoch": 0.03670586761458839, "grad_norm": 1.0918995669339344, "learning_rate": 9.998821938847216e-06, "loss": 0.4701, "step": 586 }, { "epoch": 0.03676850561393069, "grad_norm": 1.0070802225957607, "learning_rate": 9.998799817001247e-06, "loss": 0.4809, "step": 587 }, { "epoch": 0.03683114361327299, "grad_norm": 0.9910427723137986, "learning_rate": 9.99877748940375e-06, "loss": 0.4601, "step": 588 }, { "epoch": 0.03689378161261529, "grad_norm": 1.0890501289461592, "learning_rate": 9.998754956055645e-06, "loss": 0.5149, "step": 589 }, { "epoch": 0.03695641961195759, "grad_norm": 0.9331195552361714, "learning_rate": 9.998732216957856e-06, "loss": 0.4744, "step": 590 }, { "epoch": 0.037019057611299894, "grad_norm": 0.973348073584033, "learning_rate": 9.998709272111322e-06, "loss": 0.476, "step": 591 }, { "epoch": 0.037081695610642194, "grad_norm": 1.0317916428820801, "learning_rate": 9.998686121516988e-06, "loss": 0.5082, "step": 592 }, { "epoch": 0.037144333609984495, "grad_norm": 1.0142032508682037, "learning_rate": 9.998662765175803e-06, "loss": 0.4474, "step": 593 }, { "epoch": 0.037206971609326796, "grad_norm": 0.9769792523750915, "learning_rate": 9.998639203088732e-06, "loss": 0.4842, "step": 594 }, { "epoch": 0.037269609608669096, "grad_norm": 0.9487157168800099, "learning_rate": 9.998615435256744e-06, "loss": 0.4103, "step": 595 }, { "epoch": 0.0373322476080114, "grad_norm": 0.8861090868687992, "learning_rate": 9.998591461680815e-06, "loss": 0.4585, "step": 596 }, { "epoch": 0.0373948856073537, "grad_norm": 0.9031499065684753, "learning_rate": 9.998567282361935e-06, "loss": 0.4237, "step": 597 }, { "epoch": 0.037457523606696005, "grad_norm": 0.9088413311422079, "learning_rate": 9.998542897301097e-06, "loss": 0.4415, "step": 598 }, { "epoch": 0.037520161606038306, "grad_norm": 0.9777998365073011, "learning_rate": 9.998518306499307e-06, "loss": 0.5075, "step": 599 }, { "epoch": 0.03758279960538061, "grad_norm": 0.9600785925598556, "learning_rate": 9.998493509957577e-06, "loss": 0.4243, "step": 600 }, { "epoch": 0.03764543760472291, "grad_norm": 1.112337269143292, "learning_rate": 9.998468507676924e-06, "loss": 0.5082, "step": 601 }, { "epoch": 0.03770807560406521, "grad_norm": 0.9255230529345939, "learning_rate": 9.99844329965838e-06, "loss": 0.432, "step": 602 }, { "epoch": 0.03777071360340751, "grad_norm": 0.9657390450813839, "learning_rate": 9.998417885902983e-06, "loss": 0.503, "step": 603 }, { "epoch": 0.03783335160274981, "grad_norm": 0.9914077051594874, "learning_rate": 9.998392266411776e-06, "loss": 0.5022, "step": 604 }, { "epoch": 0.03789598960209211, "grad_norm": 0.9615120100875046, "learning_rate": 9.998366441185816e-06, "loss": 0.4413, "step": 605 }, { "epoch": 0.03795862760143441, "grad_norm": 0.9907185647151263, "learning_rate": 9.998340410226166e-06, "loss": 0.4439, "step": 606 }, { "epoch": 0.03802126560077671, "grad_norm": 1.0645631392736339, "learning_rate": 9.998314173533899e-06, "loss": 0.4211, "step": 607 }, { "epoch": 0.03808390360011901, "grad_norm": 1.1165331680035415, "learning_rate": 9.99828773111009e-06, "loss": 0.4752, "step": 608 }, { "epoch": 0.03814654159946131, "grad_norm": 0.9976949663430053, "learning_rate": 9.99826108295583e-06, "loss": 0.4809, "step": 609 }, { "epoch": 0.038209179598803614, "grad_norm": 0.9060148437491219, "learning_rate": 9.998234229072219e-06, "loss": 0.4385, "step": 610 }, { "epoch": 0.038271817598145914, "grad_norm": 0.9165908417039417, "learning_rate": 9.998207169460358e-06, "loss": 0.4656, "step": 611 }, { "epoch": 0.038334455597488215, "grad_norm": 1.0031112514335099, "learning_rate": 9.998179904121361e-06, "loss": 0.4856, "step": 612 }, { "epoch": 0.038397093596830516, "grad_norm": 1.1455165579127387, "learning_rate": 9.998152433056354e-06, "loss": 0.4747, "step": 613 }, { "epoch": 0.038459731596172816, "grad_norm": 1.0934454697254627, "learning_rate": 9.998124756266463e-06, "loss": 0.468, "step": 614 }, { "epoch": 0.03852236959551512, "grad_norm": 1.0091767455467406, "learning_rate": 9.99809687375283e-06, "loss": 0.4955, "step": 615 }, { "epoch": 0.03858500759485742, "grad_norm": 1.1526015635320352, "learning_rate": 9.998068785516601e-06, "loss": 0.5141, "step": 616 }, { "epoch": 0.03864764559419972, "grad_norm": 0.9864017088146733, "learning_rate": 9.998040491558936e-06, "loss": 0.4895, "step": 617 }, { "epoch": 0.03871028359354202, "grad_norm": 1.0044950574183737, "learning_rate": 9.998011991880994e-06, "loss": 0.4432, "step": 618 }, { "epoch": 0.03877292159288432, "grad_norm": 0.9998424825193412, "learning_rate": 9.997983286483953e-06, "loss": 0.4293, "step": 619 }, { "epoch": 0.03883555959222663, "grad_norm": 1.034036172985092, "learning_rate": 9.997954375368989e-06, "loss": 0.4835, "step": 620 }, { "epoch": 0.03889819759156893, "grad_norm": 1.0515823173171122, "learning_rate": 9.997925258537298e-06, "loss": 0.5218, "step": 621 }, { "epoch": 0.03896083559091123, "grad_norm": 1.0685245540989072, "learning_rate": 9.997895935990075e-06, "loss": 0.4448, "step": 622 }, { "epoch": 0.03902347359025353, "grad_norm": 0.9460773359178397, "learning_rate": 9.997866407728527e-06, "loss": 0.4359, "step": 623 }, { "epoch": 0.03908611158959583, "grad_norm": 0.9143083826136245, "learning_rate": 9.997836673753868e-06, "loss": 0.456, "step": 624 }, { "epoch": 0.03914874958893813, "grad_norm": 0.9963318489173749, "learning_rate": 9.997806734067328e-06, "loss": 0.5058, "step": 625 }, { "epoch": 0.03921138758828043, "grad_norm": 1.0096796677534479, "learning_rate": 9.997776588670133e-06, "loss": 0.4329, "step": 626 }, { "epoch": 0.03927402558762273, "grad_norm": 1.0502358728595, "learning_rate": 9.997746237563526e-06, "loss": 0.4813, "step": 627 }, { "epoch": 0.03933666358696503, "grad_norm": 1.5333964417376655, "learning_rate": 9.997715680748757e-06, "loss": 0.5102, "step": 628 }, { "epoch": 0.039399301586307334, "grad_norm": 1.114156203998625, "learning_rate": 9.997684918227083e-06, "loss": 0.4615, "step": 629 }, { "epoch": 0.039461939585649634, "grad_norm": 1.1449257459646056, "learning_rate": 9.997653949999769e-06, "loss": 0.4957, "step": 630 }, { "epoch": 0.039524577584991935, "grad_norm": 1.0123482057200637, "learning_rate": 9.997622776068092e-06, "loss": 0.4804, "step": 631 }, { "epoch": 0.039587215584334236, "grad_norm": 1.3042639852064444, "learning_rate": 9.997591396433333e-06, "loss": 0.5095, "step": 632 }, { "epoch": 0.039649853583676536, "grad_norm": 1.0845242278561564, "learning_rate": 9.997559811096785e-06, "loss": 0.4896, "step": 633 }, { "epoch": 0.03971249158301884, "grad_norm": 0.9102289622479706, "learning_rate": 9.997528020059748e-06, "loss": 0.4297, "step": 634 }, { "epoch": 0.03977512958236114, "grad_norm": 0.959861066428182, "learning_rate": 9.99749602332353e-06, "loss": 0.4941, "step": 635 }, { "epoch": 0.03983776758170344, "grad_norm": 0.9190787546617656, "learning_rate": 9.997463820889447e-06, "loss": 0.4645, "step": 636 }, { "epoch": 0.03990040558104574, "grad_norm": 0.9388601258662337, "learning_rate": 9.997431412758827e-06, "loss": 0.4955, "step": 637 }, { "epoch": 0.03996304358038804, "grad_norm": 1.0037848348482277, "learning_rate": 9.997398798933002e-06, "loss": 0.4714, "step": 638 }, { "epoch": 0.04002568157973034, "grad_norm": 0.9213487559779558, "learning_rate": 9.997365979413314e-06, "loss": 0.4566, "step": 639 }, { "epoch": 0.04008831957907264, "grad_norm": 1.4342791385857256, "learning_rate": 9.997332954201115e-06, "loss": 0.5175, "step": 640 }, { "epoch": 0.04015095757841495, "grad_norm": 0.9184833642187181, "learning_rate": 9.997299723297765e-06, "loss": 0.4374, "step": 641 }, { "epoch": 0.04021359557775725, "grad_norm": 0.9976762961925686, "learning_rate": 9.99726628670463e-06, "loss": 0.4289, "step": 642 }, { "epoch": 0.04027623357709955, "grad_norm": 1.068931329314942, "learning_rate": 9.997232644423089e-06, "loss": 0.5068, "step": 643 }, { "epoch": 0.04033887157644185, "grad_norm": 0.9900129083905248, "learning_rate": 9.997198796454522e-06, "loss": 0.442, "step": 644 }, { "epoch": 0.04040150957578415, "grad_norm": 0.8981927838528493, "learning_rate": 9.997164742800327e-06, "loss": 0.4626, "step": 645 }, { "epoch": 0.04046414757512645, "grad_norm": 0.8916856873102379, "learning_rate": 9.997130483461903e-06, "loss": 0.4588, "step": 646 }, { "epoch": 0.04052678557446875, "grad_norm": 1.0013684128942837, "learning_rate": 9.997096018440661e-06, "loss": 0.4628, "step": 647 }, { "epoch": 0.040589423573811054, "grad_norm": 0.9528620086703701, "learning_rate": 9.997061347738019e-06, "loss": 0.4967, "step": 648 }, { "epoch": 0.040652061573153354, "grad_norm": 0.9902420493385229, "learning_rate": 9.997026471355404e-06, "loss": 0.4409, "step": 649 }, { "epoch": 0.040714699572495655, "grad_norm": 0.8399190444522047, "learning_rate": 9.996991389294253e-06, "loss": 0.423, "step": 650 }, { "epoch": 0.040777337571837956, "grad_norm": 0.9908145594898994, "learning_rate": 9.996956101556009e-06, "loss": 0.4565, "step": 651 }, { "epoch": 0.040839975571180256, "grad_norm": 0.9793276007170698, "learning_rate": 9.996920608142123e-06, "loss": 0.4278, "step": 652 }, { "epoch": 0.04090261357052256, "grad_norm": 0.9463422669458154, "learning_rate": 9.996884909054059e-06, "loss": 0.4493, "step": 653 }, { "epoch": 0.04096525156986486, "grad_norm": 0.9818598444410425, "learning_rate": 9.996849004293283e-06, "loss": 0.473, "step": 654 }, { "epoch": 0.04102788956920716, "grad_norm": 1.0508552153246706, "learning_rate": 9.996812893861276e-06, "loss": 0.5317, "step": 655 }, { "epoch": 0.04109052756854946, "grad_norm": 0.9274853335495093, "learning_rate": 9.996776577759522e-06, "loss": 0.429, "step": 656 }, { "epoch": 0.04115316556789176, "grad_norm": 0.8521552724711785, "learning_rate": 9.996740055989516e-06, "loss": 0.434, "step": 657 }, { "epoch": 0.04121580356723406, "grad_norm": 0.9248493874067197, "learning_rate": 9.996703328552764e-06, "loss": 0.4437, "step": 658 }, { "epoch": 0.04127844156657636, "grad_norm": 0.912593040838753, "learning_rate": 9.996666395450773e-06, "loss": 0.4893, "step": 659 }, { "epoch": 0.04134107956591866, "grad_norm": 0.9605858141682788, "learning_rate": 9.996629256685067e-06, "loss": 0.4195, "step": 660 }, { "epoch": 0.04140371756526096, "grad_norm": 1.2225560143239458, "learning_rate": 9.996591912257175e-06, "loss": 0.4545, "step": 661 }, { "epoch": 0.04146635556460326, "grad_norm": 0.8644472926459017, "learning_rate": 9.99655436216863e-06, "loss": 0.4011, "step": 662 }, { "epoch": 0.04152899356394557, "grad_norm": 1.1243880529160646, "learning_rate": 9.99651660642098e-06, "loss": 0.4948, "step": 663 }, { "epoch": 0.04159163156328787, "grad_norm": 0.9128976792932832, "learning_rate": 9.996478645015779e-06, "loss": 0.4293, "step": 664 }, { "epoch": 0.04165426956263017, "grad_norm": 1.0085742032382932, "learning_rate": 9.996440477954592e-06, "loss": 0.49, "step": 665 }, { "epoch": 0.04171690756197247, "grad_norm": 0.9138971856367806, "learning_rate": 9.996402105238985e-06, "loss": 0.4422, "step": 666 }, { "epoch": 0.041779545561314774, "grad_norm": 1.0633114091813225, "learning_rate": 9.996363526870541e-06, "loss": 0.4967, "step": 667 }, { "epoch": 0.041842183560657074, "grad_norm": 0.9610155186509937, "learning_rate": 9.996324742850848e-06, "loss": 0.4577, "step": 668 }, { "epoch": 0.041904821559999375, "grad_norm": 0.9824864826920999, "learning_rate": 9.996285753181499e-06, "loss": 0.4655, "step": 669 }, { "epoch": 0.041967459559341676, "grad_norm": 0.934706462278132, "learning_rate": 9.996246557864102e-06, "loss": 0.4839, "step": 670 }, { "epoch": 0.042030097558683976, "grad_norm": 0.9835437236070101, "learning_rate": 9.99620715690027e-06, "loss": 0.4699, "step": 671 }, { "epoch": 0.04209273555802628, "grad_norm": 0.9486628364755014, "learning_rate": 9.996167550291624e-06, "loss": 0.4678, "step": 672 }, { "epoch": 0.04215537355736858, "grad_norm": 1.0128997596890357, "learning_rate": 9.996127738039793e-06, "loss": 0.4497, "step": 673 }, { "epoch": 0.04221801155671088, "grad_norm": 1.049530930318942, "learning_rate": 9.99608772014642e-06, "loss": 0.4507, "step": 674 }, { "epoch": 0.04228064955605318, "grad_norm": 1.06903445550272, "learning_rate": 9.996047496613146e-06, "loss": 0.4874, "step": 675 }, { "epoch": 0.04234328755539548, "grad_norm": 0.9642058510071302, "learning_rate": 9.99600706744163e-06, "loss": 0.4969, "step": 676 }, { "epoch": 0.04240592555473778, "grad_norm": 1.0214254623919068, "learning_rate": 9.995966432633537e-06, "loss": 0.506, "step": 677 }, { "epoch": 0.04246856355408008, "grad_norm": 0.9421611011238127, "learning_rate": 9.995925592190537e-06, "loss": 0.4824, "step": 678 }, { "epoch": 0.04253120155342238, "grad_norm": 0.925849155242043, "learning_rate": 9.995884546114315e-06, "loss": 0.4865, "step": 679 }, { "epoch": 0.04259383955276468, "grad_norm": 1.0146283781562906, "learning_rate": 9.995843294406554e-06, "loss": 0.4322, "step": 680 }, { "epoch": 0.04265647755210698, "grad_norm": 1.0042717374500967, "learning_rate": 9.995801837068958e-06, "loss": 0.4626, "step": 681 }, { "epoch": 0.042719115551449284, "grad_norm": 1.0130780964908586, "learning_rate": 9.99576017410323e-06, "loss": 0.4568, "step": 682 }, { "epoch": 0.042781753550791585, "grad_norm": 0.9802978347810047, "learning_rate": 9.995718305511088e-06, "loss": 0.4683, "step": 683 }, { "epoch": 0.042844391550133885, "grad_norm": 0.9630203858770743, "learning_rate": 9.995676231294251e-06, "loss": 0.488, "step": 684 }, { "epoch": 0.04290702954947619, "grad_norm": 1.035245011056049, "learning_rate": 9.995633951454456e-06, "loss": 0.4757, "step": 685 }, { "epoch": 0.042969667548818494, "grad_norm": 0.9806765192994227, "learning_rate": 9.995591465993438e-06, "loss": 0.4137, "step": 686 }, { "epoch": 0.043032305548160794, "grad_norm": 1.0582608361824482, "learning_rate": 9.99554877491295e-06, "loss": 0.4664, "step": 687 }, { "epoch": 0.043094943547503095, "grad_norm": 0.950494578421443, "learning_rate": 9.995505878214744e-06, "loss": 0.487, "step": 688 }, { "epoch": 0.043157581546845396, "grad_norm": 1.0051273453706044, "learning_rate": 9.995462775900591e-06, "loss": 0.4444, "step": 689 }, { "epoch": 0.043220219546187696, "grad_norm": 0.9189436909288832, "learning_rate": 9.995419467972262e-06, "loss": 0.4604, "step": 690 }, { "epoch": 0.04328285754553, "grad_norm": 0.9722365439422429, "learning_rate": 9.99537595443154e-06, "loss": 0.4444, "step": 691 }, { "epoch": 0.0433454955448723, "grad_norm": 1.0125307967345447, "learning_rate": 9.995332235280218e-06, "loss": 0.4161, "step": 692 }, { "epoch": 0.0434081335442146, "grad_norm": 0.7909757593408102, "learning_rate": 9.995288310520094e-06, "loss": 0.4613, "step": 693 }, { "epoch": 0.0434707715435569, "grad_norm": 1.003720395867555, "learning_rate": 9.995244180152975e-06, "loss": 0.4664, "step": 694 }, { "epoch": 0.0435334095428992, "grad_norm": 0.9804291823667496, "learning_rate": 9.99519984418068e-06, "loss": 0.4237, "step": 695 }, { "epoch": 0.0435960475422415, "grad_norm": 1.0764437751444076, "learning_rate": 9.99515530260503e-06, "loss": 0.4445, "step": 696 }, { "epoch": 0.0436586855415838, "grad_norm": 0.9552801235216631, "learning_rate": 9.995110555427863e-06, "loss": 0.415, "step": 697 }, { "epoch": 0.0437213235409261, "grad_norm": 0.8476546466671363, "learning_rate": 9.995065602651017e-06, "loss": 0.4556, "step": 698 }, { "epoch": 0.0437839615402684, "grad_norm": 0.9968752408477135, "learning_rate": 9.995020444276345e-06, "loss": 0.5059, "step": 699 }, { "epoch": 0.0438465995396107, "grad_norm": 0.9772301507710963, "learning_rate": 9.994975080305703e-06, "loss": 0.4134, "step": 700 }, { "epoch": 0.043909237538953004, "grad_norm": 1.013029721280459, "learning_rate": 9.994929510740958e-06, "loss": 0.5126, "step": 701 }, { "epoch": 0.043971875538295305, "grad_norm": 0.9894687006271955, "learning_rate": 9.99488373558399e-06, "loss": 0.5247, "step": 702 }, { "epoch": 0.044034513537637605, "grad_norm": 1.0058284374838626, "learning_rate": 9.99483775483668e-06, "loss": 0.4572, "step": 703 }, { "epoch": 0.044097151536979906, "grad_norm": 0.9553474741911481, "learning_rate": 9.99479156850092e-06, "loss": 0.4717, "step": 704 }, { "epoch": 0.04415978953632221, "grad_norm": 0.9099344162592342, "learning_rate": 9.994745176578611e-06, "loss": 0.4335, "step": 705 }, { "epoch": 0.04422242753566451, "grad_norm": 0.8999173419831605, "learning_rate": 9.994698579071666e-06, "loss": 0.4386, "step": 706 }, { "epoch": 0.044285065535006815, "grad_norm": 0.8937404180947458, "learning_rate": 9.994651775981998e-06, "loss": 0.4454, "step": 707 }, { "epoch": 0.044347703534349116, "grad_norm": 0.8901454169732728, "learning_rate": 9.994604767311538e-06, "loss": 0.4259, "step": 708 }, { "epoch": 0.044410341533691416, "grad_norm": 1.0008631391069158, "learning_rate": 9.994557553062218e-06, "loss": 0.4313, "step": 709 }, { "epoch": 0.04447297953303372, "grad_norm": 0.8902744588342272, "learning_rate": 9.994510133235982e-06, "loss": 0.4418, "step": 710 }, { "epoch": 0.04453561753237602, "grad_norm": 0.956393726859102, "learning_rate": 9.994462507834782e-06, "loss": 0.4208, "step": 711 }, { "epoch": 0.04459825553171832, "grad_norm": 0.9444992577474345, "learning_rate": 9.994414676860578e-06, "loss": 0.4711, "step": 712 }, { "epoch": 0.04466089353106062, "grad_norm": 0.9681633570757133, "learning_rate": 9.994366640315338e-06, "loss": 0.478, "step": 713 }, { "epoch": 0.04472353153040292, "grad_norm": 0.901541455168364, "learning_rate": 9.994318398201042e-06, "loss": 0.4011, "step": 714 }, { "epoch": 0.04478616952974522, "grad_norm": 0.9857170835026026, "learning_rate": 9.994269950519672e-06, "loss": 0.4395, "step": 715 }, { "epoch": 0.04484880752908752, "grad_norm": 0.9652941667425703, "learning_rate": 9.994221297273224e-06, "loss": 0.5097, "step": 716 }, { "epoch": 0.04491144552842982, "grad_norm": 0.9798649933997702, "learning_rate": 9.9941724384637e-06, "loss": 0.4546, "step": 717 }, { "epoch": 0.04497408352777212, "grad_norm": 0.9877602517758866, "learning_rate": 9.994123374093114e-06, "loss": 0.4366, "step": 718 }, { "epoch": 0.04503672152711442, "grad_norm": 0.9090927155754608, "learning_rate": 9.99407410416348e-06, "loss": 0.4284, "step": 719 }, { "epoch": 0.045099359526456724, "grad_norm": 0.8169542326975429, "learning_rate": 9.99402462867683e-06, "loss": 0.4561, "step": 720 }, { "epoch": 0.045161997525799025, "grad_norm": 0.9611329299292285, "learning_rate": 9.993974947635201e-06, "loss": 0.4674, "step": 721 }, { "epoch": 0.045224635525141325, "grad_norm": 0.934907586188618, "learning_rate": 9.993925061040634e-06, "loss": 0.5143, "step": 722 }, { "epoch": 0.045287273524483626, "grad_norm": 0.9222254506898022, "learning_rate": 9.993874968895186e-06, "loss": 0.4294, "step": 723 }, { "epoch": 0.04534991152382593, "grad_norm": 0.9256901510340121, "learning_rate": 9.993824671200916e-06, "loss": 0.4591, "step": 724 }, { "epoch": 0.04541254952316823, "grad_norm": 0.9583011173465659, "learning_rate": 9.993774167959897e-06, "loss": 0.4778, "step": 725 }, { "epoch": 0.04547518752251053, "grad_norm": 1.044269474115197, "learning_rate": 9.993723459174206e-06, "loss": 0.4518, "step": 726 }, { "epoch": 0.04553782552185283, "grad_norm": 1.0144672018012593, "learning_rate": 9.993672544845931e-06, "loss": 0.491, "step": 727 }, { "epoch": 0.045600463521195136, "grad_norm": 0.9517287607116042, "learning_rate": 9.993621424977166e-06, "loss": 0.4622, "step": 728 }, { "epoch": 0.04566310152053744, "grad_norm": 0.8889620316072456, "learning_rate": 9.993570099570017e-06, "loss": 0.4592, "step": 729 }, { "epoch": 0.04572573951987974, "grad_norm": 0.9687359871375144, "learning_rate": 9.993518568626595e-06, "loss": 0.4899, "step": 730 }, { "epoch": 0.04578837751922204, "grad_norm": 0.9592521453505902, "learning_rate": 9.993466832149024e-06, "loss": 0.4645, "step": 731 }, { "epoch": 0.04585101551856434, "grad_norm": 1.0372334623343942, "learning_rate": 9.99341489013943e-06, "loss": 0.4612, "step": 732 }, { "epoch": 0.04591365351790664, "grad_norm": 0.910917121050653, "learning_rate": 9.99336274259995e-06, "loss": 0.4619, "step": 733 }, { "epoch": 0.04597629151724894, "grad_norm": 0.9595627785906364, "learning_rate": 9.993310389532736e-06, "loss": 0.4699, "step": 734 }, { "epoch": 0.04603892951659124, "grad_norm": 0.9665070308084366, "learning_rate": 9.993257830939938e-06, "loss": 0.4912, "step": 735 }, { "epoch": 0.04610156751593354, "grad_norm": 0.9931497511221541, "learning_rate": 9.993205066823722e-06, "loss": 0.4597, "step": 736 }, { "epoch": 0.04616420551527584, "grad_norm": 0.9811041344546328, "learning_rate": 9.993152097186258e-06, "loss": 0.4506, "step": 737 }, { "epoch": 0.04622684351461814, "grad_norm": 1.0118203248379027, "learning_rate": 9.993098922029728e-06, "loss": 0.4833, "step": 738 }, { "epoch": 0.046289481513960444, "grad_norm": 1.0123269803984023, "learning_rate": 9.993045541356318e-06, "loss": 0.4309, "step": 739 }, { "epoch": 0.046352119513302745, "grad_norm": 0.8153290511181605, "learning_rate": 9.992991955168227e-06, "loss": 0.4786, "step": 740 }, { "epoch": 0.046414757512645045, "grad_norm": 1.001069671521246, "learning_rate": 9.99293816346766e-06, "loss": 0.4542, "step": 741 }, { "epoch": 0.046477395511987346, "grad_norm": 1.0118245256071083, "learning_rate": 9.992884166256833e-06, "loss": 0.4529, "step": 742 }, { "epoch": 0.04654003351132965, "grad_norm": 0.9863031122078769, "learning_rate": 9.992829963537965e-06, "loss": 0.4479, "step": 743 }, { "epoch": 0.04660267151067195, "grad_norm": 0.949604468946847, "learning_rate": 9.992775555313291e-06, "loss": 0.4768, "step": 744 }, { "epoch": 0.04666530951001425, "grad_norm": 0.9515086186659695, "learning_rate": 9.992720941585048e-06, "loss": 0.4538, "step": 745 }, { "epoch": 0.04672794750935655, "grad_norm": 0.8925033977623502, "learning_rate": 9.992666122355483e-06, "loss": 0.4277, "step": 746 }, { "epoch": 0.04679058550869885, "grad_norm": 0.8449580007111595, "learning_rate": 9.992611097626856e-06, "loss": 0.4191, "step": 747 }, { "epoch": 0.04685322350804115, "grad_norm": 0.9315575193510275, "learning_rate": 9.992555867401428e-06, "loss": 0.4507, "step": 748 }, { "epoch": 0.04691586150738345, "grad_norm": 0.9715945762235036, "learning_rate": 9.992500431681473e-06, "loss": 0.4849, "step": 749 }, { "epoch": 0.04697849950672576, "grad_norm": 0.8807236630022497, "learning_rate": 9.992444790469276e-06, "loss": 0.4454, "step": 750 }, { "epoch": 0.04704113750606806, "grad_norm": 0.951856879460865, "learning_rate": 9.992388943767122e-06, "loss": 0.4617, "step": 751 }, { "epoch": 0.04710377550541036, "grad_norm": 0.859940008081188, "learning_rate": 9.992332891577314e-06, "loss": 0.4456, "step": 752 }, { "epoch": 0.04716641350475266, "grad_norm": 1.0223444075246404, "learning_rate": 9.992276633902158e-06, "loss": 0.4691, "step": 753 }, { "epoch": 0.04722905150409496, "grad_norm": 0.9318242058375885, "learning_rate": 9.992220170743968e-06, "loss": 0.5017, "step": 754 }, { "epoch": 0.04729168950343726, "grad_norm": 0.917414841019299, "learning_rate": 9.99216350210507e-06, "loss": 0.4723, "step": 755 }, { "epoch": 0.04735432750277956, "grad_norm": 0.9312686007650006, "learning_rate": 9.992106627987795e-06, "loss": 0.4175, "step": 756 }, { "epoch": 0.04741696550212186, "grad_norm": 0.9841511116003078, "learning_rate": 9.992049548394484e-06, "loss": 0.4829, "step": 757 }, { "epoch": 0.047479603501464164, "grad_norm": 0.9177329234363769, "learning_rate": 9.99199226332749e-06, "loss": 0.4303, "step": 758 }, { "epoch": 0.047542241500806465, "grad_norm": 0.9677126011431414, "learning_rate": 9.991934772789164e-06, "loss": 0.486, "step": 759 }, { "epoch": 0.047604879500148765, "grad_norm": 0.9842216279546642, "learning_rate": 9.99187707678188e-06, "loss": 0.4701, "step": 760 }, { "epoch": 0.047667517499491066, "grad_norm": 0.9663022413044057, "learning_rate": 9.991819175308007e-06, "loss": 0.4969, "step": 761 }, { "epoch": 0.04773015549883337, "grad_norm": 0.9782789835477755, "learning_rate": 9.99176106836993e-06, "loss": 0.4745, "step": 762 }, { "epoch": 0.04779279349817567, "grad_norm": 0.9719607484345565, "learning_rate": 9.991702755970041e-06, "loss": 0.4475, "step": 763 }, { "epoch": 0.04785543149751797, "grad_norm": 0.9974508358469623, "learning_rate": 9.991644238110741e-06, "loss": 0.4725, "step": 764 }, { "epoch": 0.04791806949686027, "grad_norm": 1.0211469862632896, "learning_rate": 9.991585514794436e-06, "loss": 0.4966, "step": 765 }, { "epoch": 0.04798070749620257, "grad_norm": 0.9183209412355604, "learning_rate": 9.991526586023546e-06, "loss": 0.4582, "step": 766 }, { "epoch": 0.04804334549554487, "grad_norm": 0.9453055985536284, "learning_rate": 9.991467451800493e-06, "loss": 0.4516, "step": 767 }, { "epoch": 0.04810598349488717, "grad_norm": 0.8598246741597764, "learning_rate": 9.991408112127717e-06, "loss": 0.4725, "step": 768 }, { "epoch": 0.04816862149422947, "grad_norm": 0.9239525051110906, "learning_rate": 9.991348567007655e-06, "loss": 0.4651, "step": 769 }, { "epoch": 0.04823125949357177, "grad_norm": 0.9315007100535981, "learning_rate": 9.99128881644276e-06, "loss": 0.4251, "step": 770 }, { "epoch": 0.04829389749291407, "grad_norm": 0.9110274864053051, "learning_rate": 9.991228860435489e-06, "loss": 0.437, "step": 771 }, { "epoch": 0.04835653549225638, "grad_norm": 0.9624368296403017, "learning_rate": 9.991168698988315e-06, "loss": 0.4491, "step": 772 }, { "epoch": 0.04841917349159868, "grad_norm": 0.9525438308942169, "learning_rate": 9.991108332103707e-06, "loss": 0.4364, "step": 773 }, { "epoch": 0.04848181149094098, "grad_norm": 0.9521232787190272, "learning_rate": 9.991047759784156e-06, "loss": 0.4995, "step": 774 }, { "epoch": 0.04854444949028328, "grad_norm": 0.9392138333898377, "learning_rate": 9.990986982032152e-06, "loss": 0.4826, "step": 775 }, { "epoch": 0.04860708748962558, "grad_norm": 0.9677686606535415, "learning_rate": 9.9909259988502e-06, "loss": 0.4423, "step": 776 }, { "epoch": 0.048669725488967884, "grad_norm": 0.934787773706382, "learning_rate": 9.990864810240803e-06, "loss": 0.506, "step": 777 }, { "epoch": 0.048732363488310185, "grad_norm": 0.9413665900296336, "learning_rate": 9.990803416206487e-06, "loss": 0.4535, "step": 778 }, { "epoch": 0.048795001487652485, "grad_norm": 0.952415742292826, "learning_rate": 9.990741816749776e-06, "loss": 0.4603, "step": 779 }, { "epoch": 0.048857639486994786, "grad_norm": 0.8699301034896143, "learning_rate": 9.990680011873204e-06, "loss": 0.4536, "step": 780 }, { "epoch": 0.04892027748633709, "grad_norm": 1.0321118027045681, "learning_rate": 9.990618001579318e-06, "loss": 0.5103, "step": 781 }, { "epoch": 0.04898291548567939, "grad_norm": 0.9887789670760242, "learning_rate": 9.990555785870667e-06, "loss": 0.4659, "step": 782 }, { "epoch": 0.04904555348502169, "grad_norm": 1.0482374261301903, "learning_rate": 9.990493364749815e-06, "loss": 0.5034, "step": 783 }, { "epoch": 0.04910819148436399, "grad_norm": 0.914568086350897, "learning_rate": 9.99043073821933e-06, "loss": 0.4346, "step": 784 }, { "epoch": 0.04917082948370629, "grad_norm": 0.9274842904337175, "learning_rate": 9.990367906281789e-06, "loss": 0.4173, "step": 785 }, { "epoch": 0.04923346748304859, "grad_norm": 0.9490526339521382, "learning_rate": 9.990304868939778e-06, "loss": 0.4902, "step": 786 }, { "epoch": 0.04929610548239089, "grad_norm": 0.9628472319649374, "learning_rate": 9.990241626195893e-06, "loss": 0.4308, "step": 787 }, { "epoch": 0.04935874348173319, "grad_norm": 0.9811908804895358, "learning_rate": 9.990178178052737e-06, "loss": 0.475, "step": 788 }, { "epoch": 0.04942138148107549, "grad_norm": 0.9317354366306052, "learning_rate": 9.990114524512919e-06, "loss": 0.4255, "step": 789 }, { "epoch": 0.04948401948041779, "grad_norm": 1.0361662580161788, "learning_rate": 9.990050665579063e-06, "loss": 0.4812, "step": 790 }, { "epoch": 0.049546657479760094, "grad_norm": 0.9852770433881092, "learning_rate": 9.989986601253796e-06, "loss": 0.4348, "step": 791 }, { "epoch": 0.049609295479102394, "grad_norm": 0.934261336623812, "learning_rate": 9.989922331539752e-06, "loss": 0.4523, "step": 792 }, { "epoch": 0.049671933478444695, "grad_norm": 0.911230732617379, "learning_rate": 9.98985785643958e-06, "loss": 0.4472, "step": 793 }, { "epoch": 0.049734571477787, "grad_norm": 0.9331601883361844, "learning_rate": 9.989793175955933e-06, "loss": 0.4502, "step": 794 }, { "epoch": 0.0497972094771293, "grad_norm": 0.953988623327564, "learning_rate": 9.989728290091471e-06, "loss": 0.4361, "step": 795 }, { "epoch": 0.049859847476471604, "grad_norm": 1.0521302596067241, "learning_rate": 9.98966319884887e-06, "loss": 0.4775, "step": 796 }, { "epoch": 0.049922485475813905, "grad_norm": 0.9059320786382553, "learning_rate": 9.989597902230801e-06, "loss": 0.4153, "step": 797 }, { "epoch": 0.049985123475156205, "grad_norm": 0.8649718726457603, "learning_rate": 9.989532400239958e-06, "loss": 0.4515, "step": 798 }, { "epoch": 0.050047761474498506, "grad_norm": 0.9678164424910579, "learning_rate": 9.989466692879036e-06, "loss": 0.4794, "step": 799 }, { "epoch": 0.05011039947384081, "grad_norm": 1.0188879945578437, "learning_rate": 9.989400780150739e-06, "loss": 0.4725, "step": 800 }, { "epoch": 0.05017303747318311, "grad_norm": 0.9543983584099914, "learning_rate": 9.98933466205778e-06, "loss": 0.497, "step": 801 }, { "epoch": 0.05023567547252541, "grad_norm": 0.9448764313321409, "learning_rate": 9.989268338602878e-06, "loss": 0.5037, "step": 802 }, { "epoch": 0.05029831347186771, "grad_norm": 0.9253155645686162, "learning_rate": 9.989201809788767e-06, "loss": 0.4321, "step": 803 }, { "epoch": 0.05036095147121001, "grad_norm": 0.9180777847225409, "learning_rate": 9.989135075618183e-06, "loss": 0.4593, "step": 804 }, { "epoch": 0.05042358947055231, "grad_norm": 0.9269281045104778, "learning_rate": 9.989068136093873e-06, "loss": 0.4277, "step": 805 }, { "epoch": 0.05048622746989461, "grad_norm": 0.9467219408399294, "learning_rate": 9.989000991218594e-06, "loss": 0.466, "step": 806 }, { "epoch": 0.05054886546923691, "grad_norm": 0.9998168099637439, "learning_rate": 9.988933640995107e-06, "loss": 0.4618, "step": 807 }, { "epoch": 0.05061150346857921, "grad_norm": 0.9775540978994027, "learning_rate": 9.988866085426185e-06, "loss": 0.442, "step": 808 }, { "epoch": 0.05067414146792151, "grad_norm": 0.9209643442921713, "learning_rate": 9.988798324514608e-06, "loss": 0.4618, "step": 809 }, { "epoch": 0.050736779467263814, "grad_norm": 1.0642043301623647, "learning_rate": 9.98873035826317e-06, "loss": 0.4825, "step": 810 }, { "epoch": 0.050799417466606114, "grad_norm": 0.9583716071947113, "learning_rate": 9.98866218667466e-06, "loss": 0.4331, "step": 811 }, { "epoch": 0.050862055465948415, "grad_norm": 0.8924599796374214, "learning_rate": 9.98859380975189e-06, "loss": 0.4594, "step": 812 }, { "epoch": 0.050924693465290716, "grad_norm": 0.9701799397023545, "learning_rate": 9.988525227497674e-06, "loss": 0.4786, "step": 813 }, { "epoch": 0.050987331464633016, "grad_norm": 0.9657838502930279, "learning_rate": 9.988456439914834e-06, "loss": 0.4902, "step": 814 }, { "epoch": 0.051049969463975324, "grad_norm": 1.0839392919680693, "learning_rate": 9.988387447006198e-06, "loss": 0.4925, "step": 815 }, { "epoch": 0.051112607463317625, "grad_norm": 0.9235329106765405, "learning_rate": 9.988318248774611e-06, "loss": 0.4379, "step": 816 }, { "epoch": 0.051175245462659925, "grad_norm": 0.9615940648106116, "learning_rate": 9.988248845222919e-06, "loss": 0.4472, "step": 817 }, { "epoch": 0.051237883462002226, "grad_norm": 0.8971279133130212, "learning_rate": 9.98817923635398e-06, "loss": 0.4361, "step": 818 }, { "epoch": 0.05130052146134453, "grad_norm": 0.9104794496195636, "learning_rate": 9.988109422170655e-06, "loss": 0.4103, "step": 819 }, { "epoch": 0.05136315946068683, "grad_norm": 0.9698776167517091, "learning_rate": 9.988039402675823e-06, "loss": 0.464, "step": 820 }, { "epoch": 0.05142579746002913, "grad_norm": 0.9925907447436046, "learning_rate": 9.98796917787236e-06, "loss": 0.438, "step": 821 }, { "epoch": 0.05148843545937143, "grad_norm": 0.9585101506953108, "learning_rate": 9.987898747763163e-06, "loss": 0.4626, "step": 822 }, { "epoch": 0.05155107345871373, "grad_norm": 0.9918104652586125, "learning_rate": 9.987828112351125e-06, "loss": 0.4248, "step": 823 }, { "epoch": 0.05161371145805603, "grad_norm": 1.0231031762527514, "learning_rate": 9.987757271639158e-06, "loss": 0.4792, "step": 824 }, { "epoch": 0.05167634945739833, "grad_norm": 0.837210004291008, "learning_rate": 9.987686225630176e-06, "loss": 0.449, "step": 825 }, { "epoch": 0.05173898745674063, "grad_norm": 0.8395411293023756, "learning_rate": 9.987614974327102e-06, "loss": 0.4864, "step": 826 }, { "epoch": 0.05180162545608293, "grad_norm": 0.9803945422060287, "learning_rate": 9.987543517732871e-06, "loss": 0.4612, "step": 827 }, { "epoch": 0.05186426345542523, "grad_norm": 0.8911917159864156, "learning_rate": 9.987471855850424e-06, "loss": 0.4387, "step": 828 }, { "epoch": 0.051926901454767534, "grad_norm": 0.7484370429750258, "learning_rate": 9.987399988682707e-06, "loss": 0.5085, "step": 829 }, { "epoch": 0.051989539454109834, "grad_norm": 0.895369309064475, "learning_rate": 9.987327916232683e-06, "loss": 0.405, "step": 830 }, { "epoch": 0.052052177453452135, "grad_norm": 1.001566331005194, "learning_rate": 9.987255638503314e-06, "loss": 0.484, "step": 831 }, { "epoch": 0.052114815452794436, "grad_norm": 0.908769636797958, "learning_rate": 9.98718315549758e-06, "loss": 0.4376, "step": 832 }, { "epoch": 0.052177453452136736, "grad_norm": 0.9083590569556048, "learning_rate": 9.98711046721846e-06, "loss": 0.451, "step": 833 }, { "epoch": 0.05224009145147904, "grad_norm": 0.9863352411103733, "learning_rate": 9.987037573668948e-06, "loss": 0.4654, "step": 834 }, { "epoch": 0.05230272945082134, "grad_norm": 0.985803687548831, "learning_rate": 9.986964474852043e-06, "loss": 0.4475, "step": 835 }, { "epoch": 0.05236536745016364, "grad_norm": 0.9554757157724206, "learning_rate": 9.986891170770756e-06, "loss": 0.461, "step": 836 }, { "epoch": 0.052428005449505946, "grad_norm": 1.0269100791115995, "learning_rate": 9.986817661428102e-06, "loss": 0.4801, "step": 837 }, { "epoch": 0.05249064344884825, "grad_norm": 0.9334353419448908, "learning_rate": 9.986743946827106e-06, "loss": 0.4732, "step": 838 }, { "epoch": 0.05255328144819055, "grad_norm": 1.0003128880954313, "learning_rate": 9.986670026970806e-06, "loss": 0.4629, "step": 839 }, { "epoch": 0.05261591944753285, "grad_norm": 0.9072894332531359, "learning_rate": 9.986595901862242e-06, "loss": 0.4555, "step": 840 }, { "epoch": 0.05267855744687515, "grad_norm": 0.991975393674197, "learning_rate": 9.986521571504464e-06, "loss": 0.4895, "step": 841 }, { "epoch": 0.05274119544621745, "grad_norm": 0.9343772633504821, "learning_rate": 9.986447035900533e-06, "loss": 0.4545, "step": 842 }, { "epoch": 0.05280383344555975, "grad_norm": 0.8907924597464965, "learning_rate": 9.986372295053517e-06, "loss": 0.4212, "step": 843 }, { "epoch": 0.05286647144490205, "grad_norm": 0.8620055570097864, "learning_rate": 9.98629734896649e-06, "loss": 0.4314, "step": 844 }, { "epoch": 0.05292910944424435, "grad_norm": 0.9587957974432815, "learning_rate": 9.98622219764254e-06, "loss": 0.4893, "step": 845 }, { "epoch": 0.05299174744358665, "grad_norm": 1.0441473687464549, "learning_rate": 9.986146841084758e-06, "loss": 0.4968, "step": 846 }, { "epoch": 0.05305438544292895, "grad_norm": 0.9638521498249859, "learning_rate": 9.986071279296248e-06, "loss": 0.4265, "step": 847 }, { "epoch": 0.053117023442271254, "grad_norm": 0.9828339058826837, "learning_rate": 9.985995512280118e-06, "loss": 0.4817, "step": 848 }, { "epoch": 0.053179661441613554, "grad_norm": 0.9666892967486925, "learning_rate": 9.985919540039487e-06, "loss": 0.464, "step": 849 }, { "epoch": 0.053242299440955855, "grad_norm": 0.9658057572395716, "learning_rate": 9.985843362577484e-06, "loss": 0.5194, "step": 850 }, { "epoch": 0.053304937440298156, "grad_norm": 0.9196795697942185, "learning_rate": 9.98576697989724e-06, "loss": 0.4478, "step": 851 }, { "epoch": 0.053367575439640456, "grad_norm": 0.8624988862586199, "learning_rate": 9.985690392001904e-06, "loss": 0.405, "step": 852 }, { "epoch": 0.05343021343898276, "grad_norm": 0.9801863545193211, "learning_rate": 9.985613598894624e-06, "loss": 0.4377, "step": 853 }, { "epoch": 0.05349285143832506, "grad_norm": 0.8735660036909861, "learning_rate": 9.985536600578563e-06, "loss": 0.4472, "step": 854 }, { "epoch": 0.05355548943766736, "grad_norm": 0.8404351626312561, "learning_rate": 9.985459397056891e-06, "loss": 0.4476, "step": 855 }, { "epoch": 0.05361812743700966, "grad_norm": 0.8751077488814104, "learning_rate": 9.985381988332785e-06, "loss": 0.4488, "step": 856 }, { "epoch": 0.05368076543635196, "grad_norm": 1.0022121546005114, "learning_rate": 9.985304374409432e-06, "loss": 0.437, "step": 857 }, { "epoch": 0.05374340343569426, "grad_norm": 0.8948434350195908, "learning_rate": 9.985226555290025e-06, "loss": 0.4449, "step": 858 }, { "epoch": 0.05380604143503657, "grad_norm": 0.8913901078583684, "learning_rate": 9.985148530977767e-06, "loss": 0.4427, "step": 859 }, { "epoch": 0.05386867943437887, "grad_norm": 1.0072950677411912, "learning_rate": 9.98507030147587e-06, "loss": 0.5063, "step": 860 }, { "epoch": 0.05393131743372117, "grad_norm": 0.8816308876687692, "learning_rate": 9.984991866787555e-06, "loss": 0.431, "step": 861 }, { "epoch": 0.05399395543306347, "grad_norm": 0.9390691830159609, "learning_rate": 9.98491322691605e-06, "loss": 0.4671, "step": 862 }, { "epoch": 0.05405659343240577, "grad_norm": 0.8913814263454453, "learning_rate": 9.98483438186459e-06, "loss": 0.4358, "step": 863 }, { "epoch": 0.05411923143174807, "grad_norm": 0.9158210914677264, "learning_rate": 9.984755331636423e-06, "loss": 0.5372, "step": 864 }, { "epoch": 0.05418186943109037, "grad_norm": 0.8308840233581802, "learning_rate": 9.984676076234799e-06, "loss": 0.4347, "step": 865 }, { "epoch": 0.05424450743043267, "grad_norm": 0.9063069192921868, "learning_rate": 9.984596615662986e-06, "loss": 0.4644, "step": 866 }, { "epoch": 0.054307145429774974, "grad_norm": 0.9177900298965735, "learning_rate": 9.984516949924246e-06, "loss": 0.442, "step": 867 }, { "epoch": 0.054369783429117274, "grad_norm": 0.9840136431814384, "learning_rate": 9.984437079021867e-06, "loss": 0.5021, "step": 868 }, { "epoch": 0.054432421428459575, "grad_norm": 0.9519764128475355, "learning_rate": 9.984357002959131e-06, "loss": 0.4747, "step": 869 }, { "epoch": 0.054495059427801876, "grad_norm": 1.1098340702992966, "learning_rate": 9.984276721739337e-06, "loss": 0.449, "step": 870 }, { "epoch": 0.054557697427144176, "grad_norm": 0.8875167203147291, "learning_rate": 9.984196235365788e-06, "loss": 0.4639, "step": 871 }, { "epoch": 0.05462033542648648, "grad_norm": 1.0493588142636467, "learning_rate": 9.984115543841797e-06, "loss": 0.472, "step": 872 }, { "epoch": 0.05468297342582878, "grad_norm": 0.9217014604405765, "learning_rate": 9.984034647170684e-06, "loss": 0.4496, "step": 873 }, { "epoch": 0.05474561142517108, "grad_norm": 0.8737062924409414, "learning_rate": 9.983953545355781e-06, "loss": 0.4287, "step": 874 }, { "epoch": 0.05480824942451338, "grad_norm": 0.9701724464473825, "learning_rate": 9.983872238400423e-06, "loss": 0.4578, "step": 875 }, { "epoch": 0.05487088742385568, "grad_norm": 0.9732272823294963, "learning_rate": 9.98379072630796e-06, "loss": 0.5021, "step": 876 }, { "epoch": 0.05493352542319798, "grad_norm": 0.9609475935736345, "learning_rate": 9.983709009081745e-06, "loss": 0.432, "step": 877 }, { "epoch": 0.05499616342254028, "grad_norm": 0.9556755710942036, "learning_rate": 9.983627086725145e-06, "loss": 0.4591, "step": 878 }, { "epoch": 0.05505880142188258, "grad_norm": 1.015899606016432, "learning_rate": 9.983544959241525e-06, "loss": 0.4976, "step": 879 }, { "epoch": 0.05512143942122489, "grad_norm": 0.8956964380893778, "learning_rate": 9.983462626634273e-06, "loss": 0.4256, "step": 880 }, { "epoch": 0.05518407742056719, "grad_norm": 1.1966224134027057, "learning_rate": 9.98338008890677e-06, "loss": 0.4785, "step": 881 }, { "epoch": 0.05524671541990949, "grad_norm": 0.9558357551207638, "learning_rate": 9.983297346062422e-06, "loss": 0.427, "step": 882 }, { "epoch": 0.05530935341925179, "grad_norm": 1.0492479887215465, "learning_rate": 9.983214398104628e-06, "loss": 0.4975, "step": 883 }, { "epoch": 0.05537199141859409, "grad_norm": 1.0580169935887045, "learning_rate": 9.983131245036805e-06, "loss": 0.49, "step": 884 }, { "epoch": 0.05543462941793639, "grad_norm": 0.8940482994434045, "learning_rate": 9.983047886862375e-06, "loss": 0.4355, "step": 885 }, { "epoch": 0.055497267417278694, "grad_norm": 0.9454529619124102, "learning_rate": 9.982964323584769e-06, "loss": 0.466, "step": 886 }, { "epoch": 0.055559905416620994, "grad_norm": 0.9282112449017146, "learning_rate": 9.982880555207428e-06, "loss": 0.4499, "step": 887 }, { "epoch": 0.055622543415963295, "grad_norm": 0.9838921053676487, "learning_rate": 9.982796581733796e-06, "loss": 0.4896, "step": 888 }, { "epoch": 0.055685181415305596, "grad_norm": 0.8960840755270871, "learning_rate": 9.982712403167333e-06, "loss": 0.4381, "step": 889 }, { "epoch": 0.055747819414647896, "grad_norm": 0.9376876245939268, "learning_rate": 9.982628019511502e-06, "loss": 0.4421, "step": 890 }, { "epoch": 0.0558104574139902, "grad_norm": 0.9416246103319993, "learning_rate": 9.982543430769778e-06, "loss": 0.4838, "step": 891 }, { "epoch": 0.0558730954133325, "grad_norm": 0.9405139792428872, "learning_rate": 9.982458636945642e-06, "loss": 0.4673, "step": 892 }, { "epoch": 0.0559357334126748, "grad_norm": 0.9870481542883106, "learning_rate": 9.982373638042581e-06, "loss": 0.4406, "step": 893 }, { "epoch": 0.0559983714120171, "grad_norm": 0.8814328797603032, "learning_rate": 9.9822884340641e-06, "loss": 0.465, "step": 894 }, { "epoch": 0.0560610094113594, "grad_norm": 0.8608494393820718, "learning_rate": 9.982203025013698e-06, "loss": 0.4297, "step": 895 }, { "epoch": 0.0561236474107017, "grad_norm": 0.8372965359533916, "learning_rate": 9.982117410894899e-06, "loss": 0.442, "step": 896 }, { "epoch": 0.056186285410044, "grad_norm": 0.8889810623459387, "learning_rate": 9.982031591711222e-06, "loss": 0.4296, "step": 897 }, { "epoch": 0.0562489234093863, "grad_norm": 0.9423282543438809, "learning_rate": 9.981945567466198e-06, "loss": 0.4275, "step": 898 }, { "epoch": 0.0563115614087286, "grad_norm": 0.9520083825622397, "learning_rate": 9.98185933816337e-06, "loss": 0.4521, "step": 899 }, { "epoch": 0.0563741994080709, "grad_norm": 1.0332149006199065, "learning_rate": 9.981772903806288e-06, "loss": 0.4889, "step": 900 }, { "epoch": 0.056436837407413204, "grad_norm": 0.9363211714614416, "learning_rate": 9.981686264398508e-06, "loss": 0.4741, "step": 901 }, { "epoch": 0.05649947540675551, "grad_norm": 0.9231417419350555, "learning_rate": 9.981599419943597e-06, "loss": 0.4186, "step": 902 }, { "epoch": 0.05656211340609781, "grad_norm": 1.0143888894010589, "learning_rate": 9.981512370445128e-06, "loss": 0.4909, "step": 903 }, { "epoch": 0.05662475140544011, "grad_norm": 0.8755061832538629, "learning_rate": 9.981425115906687e-06, "loss": 0.4723, "step": 904 }, { "epoch": 0.056687389404782414, "grad_norm": 0.9589821237731164, "learning_rate": 9.981337656331863e-06, "loss": 0.4487, "step": 905 }, { "epoch": 0.056750027404124714, "grad_norm": 0.9268258671708899, "learning_rate": 9.981249991724256e-06, "loss": 0.4775, "step": 906 }, { "epoch": 0.056812665403467015, "grad_norm": 0.89910845295711, "learning_rate": 9.981162122087475e-06, "loss": 0.433, "step": 907 }, { "epoch": 0.056875303402809316, "grad_norm": 0.9124447783041417, "learning_rate": 9.981074047425138e-06, "loss": 0.5107, "step": 908 }, { "epoch": 0.056937941402151616, "grad_norm": 0.846643295064803, "learning_rate": 9.980985767740867e-06, "loss": 0.419, "step": 909 }, { "epoch": 0.05700057940149392, "grad_norm": 1.0178178220466456, "learning_rate": 9.980897283038297e-06, "loss": 0.4145, "step": 910 }, { "epoch": 0.05706321740083622, "grad_norm": 0.9162565092013449, "learning_rate": 9.980808593321072e-06, "loss": 0.4579, "step": 911 }, { "epoch": 0.05712585540017852, "grad_norm": 1.1043123941048623, "learning_rate": 9.98071969859284e-06, "loss": 0.507, "step": 912 }, { "epoch": 0.05718849339952082, "grad_norm": 0.9251331955374069, "learning_rate": 9.98063059885726e-06, "loss": 0.4665, "step": 913 }, { "epoch": 0.05725113139886312, "grad_norm": 0.984266513566876, "learning_rate": 9.980541294118e-06, "loss": 0.459, "step": 914 }, { "epoch": 0.05731376939820542, "grad_norm": 1.2232139114826488, "learning_rate": 9.980451784378738e-06, "loss": 0.4246, "step": 915 }, { "epoch": 0.05737640739754772, "grad_norm": 0.9881560985954707, "learning_rate": 9.980362069643155e-06, "loss": 0.4447, "step": 916 }, { "epoch": 0.05743904539689002, "grad_norm": 0.9332074440127756, "learning_rate": 9.980272149914943e-06, "loss": 0.5025, "step": 917 }, { "epoch": 0.05750168339623232, "grad_norm": 0.9456667678809924, "learning_rate": 9.980182025197806e-06, "loss": 0.4473, "step": 918 }, { "epoch": 0.05756432139557462, "grad_norm": 0.9203097111735135, "learning_rate": 9.980091695495453e-06, "loss": 0.4746, "step": 919 }, { "epoch": 0.057626959394916924, "grad_norm": 0.9906186872856871, "learning_rate": 9.980001160811603e-06, "loss": 0.4589, "step": 920 }, { "epoch": 0.057689597394259225, "grad_norm": 1.1380749997020274, "learning_rate": 9.979910421149979e-06, "loss": 0.4207, "step": 921 }, { "epoch": 0.057752235393601525, "grad_norm": 0.9553277263333892, "learning_rate": 9.979819476514318e-06, "loss": 0.4428, "step": 922 }, { "epoch": 0.057814873392943826, "grad_norm": 0.9094571556056714, "learning_rate": 9.979728326908361e-06, "loss": 0.4612, "step": 923 }, { "epoch": 0.057877511392286134, "grad_norm": 0.960454323871676, "learning_rate": 9.979636972335865e-06, "loss": 0.4668, "step": 924 }, { "epoch": 0.057940149391628434, "grad_norm": 0.9756658565003925, "learning_rate": 9.979545412800587e-06, "loss": 0.4712, "step": 925 }, { "epoch": 0.058002787390970735, "grad_norm": 0.8829836319876834, "learning_rate": 9.979453648306295e-06, "loss": 0.4634, "step": 926 }, { "epoch": 0.058065425390313036, "grad_norm": 0.9486913241601279, "learning_rate": 9.979361678856764e-06, "loss": 0.4469, "step": 927 }, { "epoch": 0.058128063389655336, "grad_norm": 0.9857651049633375, "learning_rate": 9.979269504455784e-06, "loss": 0.454, "step": 928 }, { "epoch": 0.05819070138899764, "grad_norm": 1.0210617396080277, "learning_rate": 9.979177125107147e-06, "loss": 0.4698, "step": 929 }, { "epoch": 0.05825333938833994, "grad_norm": 0.9926905640115624, "learning_rate": 9.979084540814657e-06, "loss": 0.4737, "step": 930 }, { "epoch": 0.05831597738768224, "grad_norm": 0.9465138558091019, "learning_rate": 9.978991751582122e-06, "loss": 0.4674, "step": 931 }, { "epoch": 0.05837861538702454, "grad_norm": 0.9575094352989362, "learning_rate": 9.97889875741336e-06, "loss": 0.4414, "step": 932 }, { "epoch": 0.05844125338636684, "grad_norm": 0.9949242789836218, "learning_rate": 9.978805558312203e-06, "loss": 0.4555, "step": 933 }, { "epoch": 0.05850389138570914, "grad_norm": 0.9797992284721461, "learning_rate": 9.978712154282487e-06, "loss": 0.4223, "step": 934 }, { "epoch": 0.05856652938505144, "grad_norm": 0.8474539995753507, "learning_rate": 9.978618545328052e-06, "loss": 0.4591, "step": 935 }, { "epoch": 0.05862916738439374, "grad_norm": 0.9561551229840582, "learning_rate": 9.978524731452754e-06, "loss": 0.4463, "step": 936 }, { "epoch": 0.05869180538373604, "grad_norm": 0.9569443626413291, "learning_rate": 9.978430712660453e-06, "loss": 0.4098, "step": 937 }, { "epoch": 0.05875444338307834, "grad_norm": 0.9030274133337753, "learning_rate": 9.978336488955023e-06, "loss": 0.4606, "step": 938 }, { "epoch": 0.058817081382420644, "grad_norm": 0.9174697376126015, "learning_rate": 9.978242060340335e-06, "loss": 0.4227, "step": 939 }, { "epoch": 0.058879719381762945, "grad_norm": 1.0669177349351004, "learning_rate": 9.978147426820284e-06, "loss": 0.5048, "step": 940 }, { "epoch": 0.058942357381105245, "grad_norm": 0.947858376392348, "learning_rate": 9.97805258839876e-06, "loss": 0.4332, "step": 941 }, { "epoch": 0.059004995380447546, "grad_norm": 0.9612461179689937, "learning_rate": 9.977957545079665e-06, "loss": 0.455, "step": 942 }, { "epoch": 0.05906763337978985, "grad_norm": 0.9218968962769395, "learning_rate": 9.977862296866914e-06, "loss": 0.4072, "step": 943 }, { "epoch": 0.05913027137913215, "grad_norm": 0.9476948402645348, "learning_rate": 9.977766843764428e-06, "loss": 0.4408, "step": 944 }, { "epoch": 0.05919290937847445, "grad_norm": 0.9946133471206838, "learning_rate": 9.977671185776135e-06, "loss": 0.4723, "step": 945 }, { "epoch": 0.059255547377816756, "grad_norm": 0.9595749695167028, "learning_rate": 9.977575322905972e-06, "loss": 0.4577, "step": 946 }, { "epoch": 0.059318185377159056, "grad_norm": 0.965937191803051, "learning_rate": 9.977479255157884e-06, "loss": 0.4605, "step": 947 }, { "epoch": 0.05938082337650136, "grad_norm": 0.8788612669870901, "learning_rate": 9.977382982535826e-06, "loss": 0.4566, "step": 948 }, { "epoch": 0.05944346137584366, "grad_norm": 0.9309368059991531, "learning_rate": 9.977286505043761e-06, "loss": 0.4714, "step": 949 }, { "epoch": 0.05950609937518596, "grad_norm": 0.9512980053668874, "learning_rate": 9.97718982268566e-06, "loss": 0.4479, "step": 950 }, { "epoch": 0.05956873737452826, "grad_norm": 0.9334506816075641, "learning_rate": 9.977092935465502e-06, "loss": 0.4515, "step": 951 }, { "epoch": 0.05963137537387056, "grad_norm": 1.053954600954042, "learning_rate": 9.976995843387276e-06, "loss": 0.4595, "step": 952 }, { "epoch": 0.05969401337321286, "grad_norm": 1.0296916802194738, "learning_rate": 9.976898546454977e-06, "loss": 0.4754, "step": 953 }, { "epoch": 0.05975665137255516, "grad_norm": 1.1303383150369335, "learning_rate": 9.976801044672608e-06, "loss": 0.4914, "step": 954 }, { "epoch": 0.05981928937189746, "grad_norm": 0.906029510011733, "learning_rate": 9.976703338044185e-06, "loss": 0.4221, "step": 955 }, { "epoch": 0.05988192737123976, "grad_norm": 0.8865745701687702, "learning_rate": 9.97660542657373e-06, "loss": 0.4453, "step": 956 }, { "epoch": 0.05994456537058206, "grad_norm": 0.9387242878975702, "learning_rate": 9.976507310265271e-06, "loss": 0.4934, "step": 957 }, { "epoch": 0.060007203369924364, "grad_norm": 0.9049891398671451, "learning_rate": 9.976408989122848e-06, "loss": 0.4732, "step": 958 }, { "epoch": 0.060069841369266665, "grad_norm": 0.9367180008559177, "learning_rate": 9.976310463150508e-06, "loss": 0.4639, "step": 959 }, { "epoch": 0.060132479368608965, "grad_norm": 0.8989167063628697, "learning_rate": 9.976211732352304e-06, "loss": 0.446, "step": 960 }, { "epoch": 0.060195117367951266, "grad_norm": 1.0014260967778823, "learning_rate": 9.976112796732301e-06, "loss": 0.453, "step": 961 }, { "epoch": 0.06025775536729357, "grad_norm": 0.9659638953722826, "learning_rate": 9.976013656294575e-06, "loss": 0.4238, "step": 962 }, { "epoch": 0.06032039336663587, "grad_norm": 0.9039177971765535, "learning_rate": 9.9759143110432e-06, "loss": 0.4552, "step": 963 }, { "epoch": 0.06038303136597817, "grad_norm": 0.880517260929041, "learning_rate": 9.975814760982269e-06, "loss": 0.3886, "step": 964 }, { "epoch": 0.06044566936532047, "grad_norm": 0.9166192865662124, "learning_rate": 9.97571500611588e-06, "loss": 0.4598, "step": 965 }, { "epoch": 0.06050830736466277, "grad_norm": 0.8944555417296142, "learning_rate": 9.975615046448137e-06, "loss": 0.4945, "step": 966 }, { "epoch": 0.06057094536400508, "grad_norm": 0.9709797180527064, "learning_rate": 9.975514881983154e-06, "loss": 0.4231, "step": 967 }, { "epoch": 0.06063358336334738, "grad_norm": 0.954999533709772, "learning_rate": 9.975414512725058e-06, "loss": 0.4579, "step": 968 }, { "epoch": 0.06069622136268968, "grad_norm": 0.8931842778492058, "learning_rate": 9.975313938677975e-06, "loss": 0.4304, "step": 969 }, { "epoch": 0.06075885936203198, "grad_norm": 0.8642292049908066, "learning_rate": 9.975213159846048e-06, "loss": 0.4202, "step": 970 }, { "epoch": 0.06082149736137428, "grad_norm": 0.8977775615294807, "learning_rate": 9.975112176233424e-06, "loss": 0.4585, "step": 971 }, { "epoch": 0.06088413536071658, "grad_norm": 0.9659720478773683, "learning_rate": 9.975010987844258e-06, "loss": 0.4706, "step": 972 }, { "epoch": 0.06094677336005888, "grad_norm": 0.9324741276768244, "learning_rate": 9.974909594682716e-06, "loss": 0.4271, "step": 973 }, { "epoch": 0.06100941135940118, "grad_norm": 0.9268965987266379, "learning_rate": 9.974807996752974e-06, "loss": 0.4587, "step": 974 }, { "epoch": 0.06107204935874348, "grad_norm": 0.8108900599759947, "learning_rate": 9.97470619405921e-06, "loss": 0.4358, "step": 975 }, { "epoch": 0.06113468735808578, "grad_norm": 0.9732965503219843, "learning_rate": 9.974604186605616e-06, "loss": 0.435, "step": 976 }, { "epoch": 0.061197325357428084, "grad_norm": 1.1011568168130204, "learning_rate": 9.97450197439639e-06, "loss": 0.4677, "step": 977 }, { "epoch": 0.061259963356770385, "grad_norm": 0.8867602174200493, "learning_rate": 9.97439955743574e-06, "loss": 0.4802, "step": 978 }, { "epoch": 0.061322601356112685, "grad_norm": 0.8921255658508139, "learning_rate": 9.97429693572788e-06, "loss": 0.4501, "step": 979 }, { "epoch": 0.061385239355454986, "grad_norm": 0.935648865338509, "learning_rate": 9.974194109277036e-06, "loss": 0.4718, "step": 980 }, { "epoch": 0.06144787735479729, "grad_norm": 0.8770261663061795, "learning_rate": 9.97409107808744e-06, "loss": 0.3942, "step": 981 }, { "epoch": 0.06151051535413959, "grad_norm": 0.8714295095284562, "learning_rate": 9.97398784216333e-06, "loss": 0.4799, "step": 982 }, { "epoch": 0.06157315335348189, "grad_norm": 0.9293139694323811, "learning_rate": 9.973884401508958e-06, "loss": 0.4552, "step": 983 }, { "epoch": 0.06163579135282419, "grad_norm": 0.9655245783214117, "learning_rate": 9.97378075612858e-06, "loss": 0.4652, "step": 984 }, { "epoch": 0.06169842935216649, "grad_norm": 0.8889073839939376, "learning_rate": 9.973676906026464e-06, "loss": 0.4775, "step": 985 }, { "epoch": 0.06176106735150879, "grad_norm": 0.7771725776282543, "learning_rate": 9.973572851206882e-06, "loss": 0.4946, "step": 986 }, { "epoch": 0.06182370535085109, "grad_norm": 0.920354583074144, "learning_rate": 9.973468591674121e-06, "loss": 0.4686, "step": 987 }, { "epoch": 0.06188634335019339, "grad_norm": 0.9105984779556204, "learning_rate": 9.973364127432468e-06, "loss": 0.4635, "step": 988 }, { "epoch": 0.0619489813495357, "grad_norm": 0.9050315508186861, "learning_rate": 9.973259458486225e-06, "loss": 0.4596, "step": 989 }, { "epoch": 0.062011619348878, "grad_norm": 0.8844632321456114, "learning_rate": 9.973154584839698e-06, "loss": 0.4277, "step": 990 }, { "epoch": 0.0620742573482203, "grad_norm": 0.9141800110896054, "learning_rate": 9.973049506497205e-06, "loss": 0.4341, "step": 991 }, { "epoch": 0.0621368953475626, "grad_norm": 0.911077269839778, "learning_rate": 9.972944223463073e-06, "loss": 0.4416, "step": 992 }, { "epoch": 0.0621995333469049, "grad_norm": 0.978684313220983, "learning_rate": 9.972838735741633e-06, "loss": 0.3992, "step": 993 }, { "epoch": 0.0622621713462472, "grad_norm": 0.9362980721917727, "learning_rate": 9.972733043337228e-06, "loss": 0.4737, "step": 994 }, { "epoch": 0.0623248093455895, "grad_norm": 0.9751142882755939, "learning_rate": 9.972627146254208e-06, "loss": 0.4559, "step": 995 }, { "epoch": 0.062387447344931804, "grad_norm": 1.009280100128346, "learning_rate": 9.972521044496932e-06, "loss": 0.4666, "step": 996 }, { "epoch": 0.062450085344274105, "grad_norm": 0.9057171229464022, "learning_rate": 9.972414738069766e-06, "loss": 0.4133, "step": 997 }, { "epoch": 0.0625127233436164, "grad_norm": 0.8771620729376317, "learning_rate": 9.972308226977086e-06, "loss": 0.4736, "step": 998 }, { "epoch": 0.0625753613429587, "grad_norm": 0.9838228405145389, "learning_rate": 9.972201511223278e-06, "loss": 0.4627, "step": 999 }, { "epoch": 0.062637999342301, "grad_norm": 0.8821466566844415, "learning_rate": 9.972094590812734e-06, "loss": 0.4423, "step": 1000 }, { "epoch": 0.06270063734164331, "grad_norm": 0.9644488339449918, "learning_rate": 9.971987465749851e-06, "loss": 0.4878, "step": 1001 }, { "epoch": 0.06276327534098561, "grad_norm": 0.9673194926596151, "learning_rate": 9.971880136039042e-06, "loss": 0.4626, "step": 1002 }, { "epoch": 0.06282591334032792, "grad_norm": 0.9178048293334611, "learning_rate": 9.971772601684724e-06, "loss": 0.4788, "step": 1003 }, { "epoch": 0.06288855133967021, "grad_norm": 0.9296480613615702, "learning_rate": 9.971664862691322e-06, "loss": 0.4383, "step": 1004 }, { "epoch": 0.06295118933901252, "grad_norm": 0.903606460617527, "learning_rate": 9.971556919063273e-06, "loss": 0.4506, "step": 1005 }, { "epoch": 0.06301382733835481, "grad_norm": 0.9209403957513969, "learning_rate": 9.971448770805017e-06, "loss": 0.4542, "step": 1006 }, { "epoch": 0.06307646533769712, "grad_norm": 0.9397220390110198, "learning_rate": 9.971340417921008e-06, "loss": 0.4388, "step": 1007 }, { "epoch": 0.06313910333703941, "grad_norm": 0.9904803669120282, "learning_rate": 9.971231860415704e-06, "loss": 0.442, "step": 1008 }, { "epoch": 0.06320174133638172, "grad_norm": 0.9131840867728517, "learning_rate": 9.971123098293574e-06, "loss": 0.4408, "step": 1009 }, { "epoch": 0.06326437933572401, "grad_norm": 0.8986131443348198, "learning_rate": 9.971014131559094e-06, "loss": 0.4649, "step": 1010 }, { "epoch": 0.06332701733506632, "grad_norm": 1.0056334920104384, "learning_rate": 9.970904960216751e-06, "loss": 0.4705, "step": 1011 }, { "epoch": 0.06338965533440861, "grad_norm": 0.9071066584465742, "learning_rate": 9.970795584271036e-06, "loss": 0.4621, "step": 1012 }, { "epoch": 0.06345229333375092, "grad_norm": 0.9036766481701008, "learning_rate": 9.970686003726453e-06, "loss": 0.4327, "step": 1013 }, { "epoch": 0.06351493133309322, "grad_norm": 0.9077034810405973, "learning_rate": 9.970576218587511e-06, "loss": 0.4538, "step": 1014 }, { "epoch": 0.06357756933243552, "grad_norm": 0.957117245868161, "learning_rate": 9.97046622885873e-06, "loss": 0.4604, "step": 1015 }, { "epoch": 0.06364020733177782, "grad_norm": 0.9110448941827765, "learning_rate": 9.970356034544636e-06, "loss": 0.455, "step": 1016 }, { "epoch": 0.06370284533112013, "grad_norm": 0.8248256897242008, "learning_rate": 9.970245635649764e-06, "loss": 0.4003, "step": 1017 }, { "epoch": 0.06376548333046242, "grad_norm": 0.9991489189376284, "learning_rate": 9.97013503217866e-06, "loss": 0.4794, "step": 1018 }, { "epoch": 0.06382812132980473, "grad_norm": 0.9409702245941902, "learning_rate": 9.970024224135877e-06, "loss": 0.4774, "step": 1019 }, { "epoch": 0.06389075932914702, "grad_norm": 0.9028339116343745, "learning_rate": 9.969913211525972e-06, "loss": 0.4802, "step": 1020 }, { "epoch": 0.06395339732848933, "grad_norm": 0.8781710156893328, "learning_rate": 9.969801994353519e-06, "loss": 0.4277, "step": 1021 }, { "epoch": 0.06401603532783164, "grad_norm": 0.8541983134215585, "learning_rate": 9.969690572623092e-06, "loss": 0.4217, "step": 1022 }, { "epoch": 0.06407867332717393, "grad_norm": 0.7660410972984174, "learning_rate": 9.969578946339278e-06, "loss": 0.4877, "step": 1023 }, { "epoch": 0.06414131132651624, "grad_norm": 0.8484072247725958, "learning_rate": 9.969467115506674e-06, "loss": 0.4517, "step": 1024 }, { "epoch": 0.06420394932585853, "grad_norm": 0.8873983109178231, "learning_rate": 9.969355080129881e-06, "loss": 0.4833, "step": 1025 }, { "epoch": 0.06426658732520084, "grad_norm": 0.9061388665907908, "learning_rate": 9.96924284021351e-06, "loss": 0.4201, "step": 1026 }, { "epoch": 0.06432922532454313, "grad_norm": 0.9243026718581335, "learning_rate": 9.969130395762182e-06, "loss": 0.4285, "step": 1027 }, { "epoch": 0.06439186332388544, "grad_norm": 0.8853667719331157, "learning_rate": 9.969017746780523e-06, "loss": 0.4659, "step": 1028 }, { "epoch": 0.06445450132322773, "grad_norm": 0.9006652863226411, "learning_rate": 9.968904893273172e-06, "loss": 0.468, "step": 1029 }, { "epoch": 0.06451713932257004, "grad_norm": 0.8372570179293093, "learning_rate": 9.968791835244773e-06, "loss": 0.4477, "step": 1030 }, { "epoch": 0.06457977732191233, "grad_norm": 0.9128770938664015, "learning_rate": 9.968678572699978e-06, "loss": 0.4654, "step": 1031 }, { "epoch": 0.06464241532125464, "grad_norm": 0.8855725196001685, "learning_rate": 9.968565105643456e-06, "loss": 0.4609, "step": 1032 }, { "epoch": 0.06470505332059694, "grad_norm": 0.9001299675168565, "learning_rate": 9.968451434079868e-06, "loss": 0.452, "step": 1033 }, { "epoch": 0.06476769131993924, "grad_norm": 0.9316845603520797, "learning_rate": 9.968337558013898e-06, "loss": 0.4792, "step": 1034 }, { "epoch": 0.06483032931928154, "grad_norm": 0.87638039153309, "learning_rate": 9.968223477450231e-06, "loss": 0.4416, "step": 1035 }, { "epoch": 0.06489296731862385, "grad_norm": 0.8877389999941736, "learning_rate": 9.968109192393564e-06, "loss": 0.4432, "step": 1036 }, { "epoch": 0.06495560531796614, "grad_norm": 0.9182093475014985, "learning_rate": 9.9679947028486e-06, "loss": 0.4341, "step": 1037 }, { "epoch": 0.06501824331730845, "grad_norm": 0.8484111084510647, "learning_rate": 9.967880008820053e-06, "loss": 0.4561, "step": 1038 }, { "epoch": 0.06508088131665074, "grad_norm": 0.8244522808702833, "learning_rate": 9.967765110312642e-06, "loss": 0.4406, "step": 1039 }, { "epoch": 0.06514351931599305, "grad_norm": 0.9040459731092966, "learning_rate": 9.967650007331098e-06, "loss": 0.4344, "step": 1040 }, { "epoch": 0.06520615731533534, "grad_norm": 0.9244436619548205, "learning_rate": 9.967534699880157e-06, "loss": 0.4565, "step": 1041 }, { "epoch": 0.06526879531467765, "grad_norm": 0.8542188210499615, "learning_rate": 9.967419187964567e-06, "loss": 0.4203, "step": 1042 }, { "epoch": 0.06533143331401996, "grad_norm": 0.9084288471077621, "learning_rate": 9.96730347158908e-06, "loss": 0.4148, "step": 1043 }, { "epoch": 0.06539407131336225, "grad_norm": 1.0000715132494038, "learning_rate": 9.967187550758459e-06, "loss": 0.4318, "step": 1044 }, { "epoch": 0.06545670931270456, "grad_norm": 0.9084262834924828, "learning_rate": 9.967071425477477e-06, "loss": 0.4506, "step": 1045 }, { "epoch": 0.06551934731204685, "grad_norm": 0.9551499610852525, "learning_rate": 9.966955095750915e-06, "loss": 0.4475, "step": 1046 }, { "epoch": 0.06558198531138916, "grad_norm": 0.923131266093865, "learning_rate": 9.966838561583559e-06, "loss": 0.4447, "step": 1047 }, { "epoch": 0.06564462331073145, "grad_norm": 0.873682475985493, "learning_rate": 9.966721822980203e-06, "loss": 0.4422, "step": 1048 }, { "epoch": 0.06570726131007376, "grad_norm": 1.0050883055046294, "learning_rate": 9.966604879945659e-06, "loss": 0.4771, "step": 1049 }, { "epoch": 0.06576989930941605, "grad_norm": 0.9193923258615352, "learning_rate": 9.966487732484733e-06, "loss": 0.4458, "step": 1050 }, { "epoch": 0.06583253730875836, "grad_norm": 0.9431915540285041, "learning_rate": 9.966370380602252e-06, "loss": 0.4814, "step": 1051 }, { "epoch": 0.06589517530810066, "grad_norm": 0.9187129039029993, "learning_rate": 9.966252824303044e-06, "loss": 0.4401, "step": 1052 }, { "epoch": 0.06595781330744296, "grad_norm": 0.8963055310063858, "learning_rate": 9.966135063591946e-06, "loss": 0.4159, "step": 1053 }, { "epoch": 0.06602045130678526, "grad_norm": 0.9744156641165728, "learning_rate": 9.96601709847381e-06, "loss": 0.5086, "step": 1054 }, { "epoch": 0.06608308930612757, "grad_norm": 0.9857139727399274, "learning_rate": 9.965898928953486e-06, "loss": 0.4799, "step": 1055 }, { "epoch": 0.06614572730546986, "grad_norm": 0.8686198172842436, "learning_rate": 9.965780555035842e-06, "loss": 0.4018, "step": 1056 }, { "epoch": 0.06620836530481217, "grad_norm": 0.862618625627495, "learning_rate": 9.965661976725748e-06, "loss": 0.4551, "step": 1057 }, { "epoch": 0.06627100330415446, "grad_norm": 0.9424355279202712, "learning_rate": 9.965543194028086e-06, "loss": 0.4766, "step": 1058 }, { "epoch": 0.06633364130349677, "grad_norm": 0.9353845625708711, "learning_rate": 9.965424206947745e-06, "loss": 0.4057, "step": 1059 }, { "epoch": 0.06639627930283906, "grad_norm": 0.9827181201359431, "learning_rate": 9.96530501548962e-06, "loss": 0.4888, "step": 1060 }, { "epoch": 0.06645891730218137, "grad_norm": 0.9315698778385604, "learning_rate": 9.965185619658621e-06, "loss": 0.4099, "step": 1061 }, { "epoch": 0.06652155530152366, "grad_norm": 0.8863635864541215, "learning_rate": 9.96506601945966e-06, "loss": 0.4431, "step": 1062 }, { "epoch": 0.06658419330086597, "grad_norm": 0.9578244485706328, "learning_rate": 9.964946214897659e-06, "loss": 0.4743, "step": 1063 }, { "epoch": 0.06664683130020826, "grad_norm": 1.0078022609933255, "learning_rate": 9.96482620597755e-06, "loss": 0.4279, "step": 1064 }, { "epoch": 0.06670946929955057, "grad_norm": 0.8946946824065742, "learning_rate": 9.964705992704273e-06, "loss": 0.4785, "step": 1065 }, { "epoch": 0.06677210729889288, "grad_norm": 0.9246503965463188, "learning_rate": 9.964585575082777e-06, "loss": 0.4267, "step": 1066 }, { "epoch": 0.06683474529823517, "grad_norm": 0.8731057352785752, "learning_rate": 9.964464953118016e-06, "loss": 0.454, "step": 1067 }, { "epoch": 0.06689738329757748, "grad_norm": 0.9961925471354837, "learning_rate": 9.964344126814958e-06, "loss": 0.4336, "step": 1068 }, { "epoch": 0.06696002129691978, "grad_norm": 0.9713635591582898, "learning_rate": 9.964223096178572e-06, "loss": 0.4508, "step": 1069 }, { "epoch": 0.06702265929626208, "grad_norm": 0.9571056815436522, "learning_rate": 9.964101861213844e-06, "loss": 0.4316, "step": 1070 }, { "epoch": 0.06708529729560438, "grad_norm": 0.9482825518772832, "learning_rate": 9.963980421925761e-06, "loss": 0.474, "step": 1071 }, { "epoch": 0.06714793529494668, "grad_norm": 0.9170432560258359, "learning_rate": 9.963858778319322e-06, "loss": 0.4516, "step": 1072 }, { "epoch": 0.06721057329428898, "grad_norm": 0.9166343971621109, "learning_rate": 9.963736930399535e-06, "loss": 0.4589, "step": 1073 }, { "epoch": 0.06727321129363129, "grad_norm": 0.8559648795578971, "learning_rate": 9.963614878171416e-06, "loss": 0.4113, "step": 1074 }, { "epoch": 0.06733584929297358, "grad_norm": 0.8612745386116809, "learning_rate": 9.963492621639982e-06, "loss": 0.463, "step": 1075 }, { "epoch": 0.06739848729231589, "grad_norm": 0.9310823501186235, "learning_rate": 9.963370160810275e-06, "loss": 0.4287, "step": 1076 }, { "epoch": 0.06746112529165818, "grad_norm": 0.88813870771171, "learning_rate": 9.963247495687331e-06, "loss": 0.4282, "step": 1077 }, { "epoch": 0.06752376329100049, "grad_norm": 0.9097374814041197, "learning_rate": 9.963124626276199e-06, "loss": 0.4434, "step": 1078 }, { "epoch": 0.06758640129034278, "grad_norm": 0.9612153673565844, "learning_rate": 9.963001552581935e-06, "loss": 0.4513, "step": 1079 }, { "epoch": 0.06764903928968509, "grad_norm": 0.9333535480026945, "learning_rate": 9.962878274609605e-06, "loss": 0.4479, "step": 1080 }, { "epoch": 0.06771167728902738, "grad_norm": 0.8827605912405209, "learning_rate": 9.962754792364287e-06, "loss": 0.4936, "step": 1081 }, { "epoch": 0.06777431528836969, "grad_norm": 0.9584891305113715, "learning_rate": 9.962631105851058e-06, "loss": 0.4306, "step": 1082 }, { "epoch": 0.06783695328771198, "grad_norm": 0.9311372333280447, "learning_rate": 9.962507215075012e-06, "loss": 0.4072, "step": 1083 }, { "epoch": 0.06789959128705429, "grad_norm": 0.9068682767925895, "learning_rate": 9.962383120041248e-06, "loss": 0.4411, "step": 1084 }, { "epoch": 0.06796222928639659, "grad_norm": 0.9137223296077437, "learning_rate": 9.962258820754875e-06, "loss": 0.474, "step": 1085 }, { "epoch": 0.0680248672857389, "grad_norm": 0.8972665858279727, "learning_rate": 9.962134317221007e-06, "loss": 0.3916, "step": 1086 }, { "epoch": 0.0680875052850812, "grad_norm": 0.9548936070325101, "learning_rate": 9.962009609444768e-06, "loss": 0.4522, "step": 1087 }, { "epoch": 0.0681501432844235, "grad_norm": 1.0181335766040742, "learning_rate": 9.961884697431294e-06, "loss": 0.4348, "step": 1088 }, { "epoch": 0.0682127812837658, "grad_norm": 0.8984773758659099, "learning_rate": 9.961759581185725e-06, "loss": 0.4483, "step": 1089 }, { "epoch": 0.0682754192831081, "grad_norm": 0.8649458717935091, "learning_rate": 9.961634260713212e-06, "loss": 0.4396, "step": 1090 }, { "epoch": 0.0683380572824504, "grad_norm": 0.9806224292421863, "learning_rate": 9.961508736018908e-06, "loss": 0.4714, "step": 1091 }, { "epoch": 0.0684006952817927, "grad_norm": 1.014288825771017, "learning_rate": 9.961383007107988e-06, "loss": 0.476, "step": 1092 }, { "epoch": 0.068463333281135, "grad_norm": 0.913084649542963, "learning_rate": 9.961257073985619e-06, "loss": 0.4614, "step": 1093 }, { "epoch": 0.0685259712804773, "grad_norm": 0.8344525450440529, "learning_rate": 9.961130936656989e-06, "loss": 0.4319, "step": 1094 }, { "epoch": 0.0685886092798196, "grad_norm": 0.896882409207514, "learning_rate": 9.961004595127291e-06, "loss": 0.4215, "step": 1095 }, { "epoch": 0.0686512472791619, "grad_norm": 0.9791635581839561, "learning_rate": 9.96087804940172e-06, "loss": 0.4479, "step": 1096 }, { "epoch": 0.06871388527850421, "grad_norm": 0.8448729645162987, "learning_rate": 9.960751299485489e-06, "loss": 0.4781, "step": 1097 }, { "epoch": 0.0687765232778465, "grad_norm": 0.9599394220550328, "learning_rate": 9.960624345383813e-06, "loss": 0.4645, "step": 1098 }, { "epoch": 0.06883916127718881, "grad_norm": 0.9654315503801658, "learning_rate": 9.96049718710192e-06, "loss": 0.4732, "step": 1099 }, { "epoch": 0.0689017992765311, "grad_norm": 0.9964694285345661, "learning_rate": 9.960369824645042e-06, "loss": 0.4457, "step": 1100 }, { "epoch": 0.06896443727587341, "grad_norm": 0.9405536948237645, "learning_rate": 9.96024225801842e-06, "loss": 0.4447, "step": 1101 }, { "epoch": 0.0690270752752157, "grad_norm": 0.9593962207457893, "learning_rate": 9.960114487227307e-06, "loss": 0.4461, "step": 1102 }, { "epoch": 0.06908971327455801, "grad_norm": 0.8650910378535112, "learning_rate": 9.95998651227696e-06, "loss": 0.4013, "step": 1103 }, { "epoch": 0.0691523512739003, "grad_norm": 0.9822624090824733, "learning_rate": 9.959858333172647e-06, "loss": 0.4565, "step": 1104 }, { "epoch": 0.06921498927324261, "grad_norm": 1.0289869184405083, "learning_rate": 9.959729949919646e-06, "loss": 0.4375, "step": 1105 }, { "epoch": 0.06927762727258491, "grad_norm": 0.9297178769887658, "learning_rate": 9.959601362523238e-06, "loss": 0.4444, "step": 1106 }, { "epoch": 0.06934026527192722, "grad_norm": 0.9825713951325267, "learning_rate": 9.95947257098872e-06, "loss": 0.4598, "step": 1107 }, { "epoch": 0.06940290327126951, "grad_norm": 1.0194870028196827, "learning_rate": 9.959343575321389e-06, "loss": 0.4253, "step": 1108 }, { "epoch": 0.06946554127061182, "grad_norm": 0.9502963612492574, "learning_rate": 9.959214375526556e-06, "loss": 0.428, "step": 1109 }, { "epoch": 0.06952817926995412, "grad_norm": 0.8670379745980935, "learning_rate": 9.95908497160954e-06, "loss": 0.4599, "step": 1110 }, { "epoch": 0.06959081726929642, "grad_norm": 0.9238120039163563, "learning_rate": 9.958955363575661e-06, "loss": 0.4309, "step": 1111 }, { "epoch": 0.06965345526863873, "grad_norm": 0.9775400357781101, "learning_rate": 9.958825551430265e-06, "loss": 0.4628, "step": 1112 }, { "epoch": 0.06971609326798102, "grad_norm": 0.925876157090268, "learning_rate": 9.958695535178685e-06, "loss": 0.4871, "step": 1113 }, { "epoch": 0.06977873126732333, "grad_norm": 0.9621446605084049, "learning_rate": 9.958565314826279e-06, "loss": 0.4714, "step": 1114 }, { "epoch": 0.06984136926666562, "grad_norm": 1.0005025514571129, "learning_rate": 9.958434890378404e-06, "loss": 0.4501, "step": 1115 }, { "epoch": 0.06990400726600793, "grad_norm": 0.9935280843101303, "learning_rate": 9.958304261840427e-06, "loss": 0.4573, "step": 1116 }, { "epoch": 0.06996664526535022, "grad_norm": 0.9415024323495434, "learning_rate": 9.958173429217727e-06, "loss": 0.4385, "step": 1117 }, { "epoch": 0.07002928326469253, "grad_norm": 0.9914079369023342, "learning_rate": 9.958042392515688e-06, "loss": 0.4234, "step": 1118 }, { "epoch": 0.07009192126403482, "grad_norm": 0.9035275437428306, "learning_rate": 9.957911151739705e-06, "loss": 0.4474, "step": 1119 }, { "epoch": 0.07015455926337713, "grad_norm": 1.0076690702132527, "learning_rate": 9.957779706895177e-06, "loss": 0.4693, "step": 1120 }, { "epoch": 0.07021719726271942, "grad_norm": 0.9182887107050397, "learning_rate": 9.957648057987517e-06, "loss": 0.4597, "step": 1121 }, { "epoch": 0.07027983526206173, "grad_norm": 0.815784218228752, "learning_rate": 9.957516205022144e-06, "loss": 0.4545, "step": 1122 }, { "epoch": 0.07034247326140403, "grad_norm": 0.973652260628064, "learning_rate": 9.957384148004483e-06, "loss": 0.4199, "step": 1123 }, { "epoch": 0.07040511126074633, "grad_norm": 0.9553005309946088, "learning_rate": 9.957251886939969e-06, "loss": 0.4401, "step": 1124 }, { "epoch": 0.07046774926008863, "grad_norm": 0.9483020112212504, "learning_rate": 9.957119421834048e-06, "loss": 0.4563, "step": 1125 }, { "epoch": 0.07053038725943094, "grad_norm": 0.9882903751244959, "learning_rate": 9.956986752692172e-06, "loss": 0.442, "step": 1126 }, { "epoch": 0.07059302525877323, "grad_norm": 0.907178779271339, "learning_rate": 9.9568538795198e-06, "loss": 0.3905, "step": 1127 }, { "epoch": 0.07065566325811554, "grad_norm": 0.9573002407315578, "learning_rate": 9.956720802322403e-06, "loss": 0.4589, "step": 1128 }, { "epoch": 0.07071830125745783, "grad_norm": 0.8445772019176783, "learning_rate": 9.95658752110546e-06, "loss": 0.4699, "step": 1129 }, { "epoch": 0.07078093925680014, "grad_norm": 0.9549087782177684, "learning_rate": 9.956454035874452e-06, "loss": 0.4649, "step": 1130 }, { "epoch": 0.07084357725614245, "grad_norm": 0.9568305742933522, "learning_rate": 9.956320346634877e-06, "loss": 0.4826, "step": 1131 }, { "epoch": 0.07090621525548474, "grad_norm": 0.9096135562349444, "learning_rate": 9.956186453392235e-06, "loss": 0.4942, "step": 1132 }, { "epoch": 0.07096885325482705, "grad_norm": 0.8795459110597155, "learning_rate": 9.956052356152041e-06, "loss": 0.4328, "step": 1133 }, { "epoch": 0.07103149125416934, "grad_norm": 0.9841528295810771, "learning_rate": 9.95591805491981e-06, "loss": 0.4739, "step": 1134 }, { "epoch": 0.07109412925351165, "grad_norm": 0.9301455559437479, "learning_rate": 9.955783549701075e-06, "loss": 0.4814, "step": 1135 }, { "epoch": 0.07115676725285394, "grad_norm": 0.8548161740828436, "learning_rate": 9.955648840501366e-06, "loss": 0.4115, "step": 1136 }, { "epoch": 0.07121940525219625, "grad_norm": 0.9510972828463805, "learning_rate": 9.955513927326233e-06, "loss": 0.4569, "step": 1137 }, { "epoch": 0.07128204325153854, "grad_norm": 0.86684128366375, "learning_rate": 9.955378810181225e-06, "loss": 0.4396, "step": 1138 }, { "epoch": 0.07134468125088085, "grad_norm": 1.0117445198628543, "learning_rate": 9.955243489071906e-06, "loss": 0.4851, "step": 1139 }, { "epoch": 0.07140731925022314, "grad_norm": 0.9510666838591837, "learning_rate": 9.955107964003847e-06, "loss": 0.465, "step": 1140 }, { "epoch": 0.07146995724956545, "grad_norm": 0.8990530930038272, "learning_rate": 9.954972234982622e-06, "loss": 0.4296, "step": 1141 }, { "epoch": 0.07153259524890775, "grad_norm": 0.8603565891570338, "learning_rate": 9.954836302013822e-06, "loss": 0.4578, "step": 1142 }, { "epoch": 0.07159523324825005, "grad_norm": 0.9302194647674883, "learning_rate": 9.95470016510304e-06, "loss": 0.4744, "step": 1143 }, { "epoch": 0.07165787124759235, "grad_norm": 0.9316150826999832, "learning_rate": 9.954563824255879e-06, "loss": 0.4912, "step": 1144 }, { "epoch": 0.07172050924693466, "grad_norm": 0.9718984299637353, "learning_rate": 9.954427279477951e-06, "loss": 0.4586, "step": 1145 }, { "epoch": 0.07178314724627695, "grad_norm": 0.990188305555676, "learning_rate": 9.954290530774877e-06, "loss": 0.4922, "step": 1146 }, { "epoch": 0.07184578524561926, "grad_norm": 0.9100260376288748, "learning_rate": 9.954153578152286e-06, "loss": 0.4557, "step": 1147 }, { "epoch": 0.07190842324496155, "grad_norm": 0.8663613440671539, "learning_rate": 9.954016421615813e-06, "loss": 0.462, "step": 1148 }, { "epoch": 0.07197106124430386, "grad_norm": 0.9142179382806003, "learning_rate": 9.953879061171106e-06, "loss": 0.4562, "step": 1149 }, { "epoch": 0.07203369924364615, "grad_norm": 0.9397616857576695, "learning_rate": 9.953741496823817e-06, "loss": 0.4575, "step": 1150 }, { "epoch": 0.07209633724298846, "grad_norm": 0.913691245532332, "learning_rate": 9.953603728579607e-06, "loss": 0.4117, "step": 1151 }, { "epoch": 0.07215897524233077, "grad_norm": 0.8825543135902681, "learning_rate": 9.953465756444149e-06, "loss": 0.4309, "step": 1152 }, { "epoch": 0.07222161324167306, "grad_norm": 0.8923141296343525, "learning_rate": 9.95332758042312e-06, "loss": 0.4331, "step": 1153 }, { "epoch": 0.07228425124101537, "grad_norm": 0.9238709178108122, "learning_rate": 9.95318920052221e-06, "loss": 0.4363, "step": 1154 }, { "epoch": 0.07234688924035766, "grad_norm": 0.9459094201711958, "learning_rate": 9.953050616747113e-06, "loss": 0.4498, "step": 1155 }, { "epoch": 0.07240952723969997, "grad_norm": 0.9396152795557088, "learning_rate": 9.952911829103532e-06, "loss": 0.5072, "step": 1156 }, { "epoch": 0.07247216523904226, "grad_norm": 0.8390940859911431, "learning_rate": 9.952772837597184e-06, "loss": 0.4305, "step": 1157 }, { "epoch": 0.07253480323838457, "grad_norm": 0.9714988848845277, "learning_rate": 9.952633642233785e-06, "loss": 0.5019, "step": 1158 }, { "epoch": 0.07259744123772686, "grad_norm": 0.9698975596699668, "learning_rate": 9.952494243019064e-06, "loss": 0.4481, "step": 1159 }, { "epoch": 0.07266007923706917, "grad_norm": 0.998845851989552, "learning_rate": 9.952354639958763e-06, "loss": 0.4691, "step": 1160 }, { "epoch": 0.07272271723641147, "grad_norm": 0.8250987923827157, "learning_rate": 9.952214833058624e-06, "loss": 0.5005, "step": 1161 }, { "epoch": 0.07278535523575377, "grad_norm": 0.9587165557526699, "learning_rate": 9.952074822324405e-06, "loss": 0.4952, "step": 1162 }, { "epoch": 0.07284799323509607, "grad_norm": 0.9679028423552817, "learning_rate": 9.951934607761866e-06, "loss": 0.447, "step": 1163 }, { "epoch": 0.07291063123443838, "grad_norm": 0.8418214590437509, "learning_rate": 9.95179418937678e-06, "loss": 0.4726, "step": 1164 }, { "epoch": 0.07297326923378067, "grad_norm": 0.871812113618552, "learning_rate": 9.951653567174926e-06, "loss": 0.439, "step": 1165 }, { "epoch": 0.07303590723312298, "grad_norm": 1.0064238103868552, "learning_rate": 9.951512741162093e-06, "loss": 0.4579, "step": 1166 }, { "epoch": 0.07309854523246527, "grad_norm": 1.0404181207242498, "learning_rate": 9.951371711344076e-06, "loss": 0.47, "step": 1167 }, { "epoch": 0.07316118323180758, "grad_norm": 0.8798062741709393, "learning_rate": 9.951230477726678e-06, "loss": 0.4195, "step": 1168 }, { "epoch": 0.07322382123114987, "grad_norm": 0.8846510464585108, "learning_rate": 9.951089040315718e-06, "loss": 0.4369, "step": 1169 }, { "epoch": 0.07328645923049218, "grad_norm": 0.7998512183174141, "learning_rate": 9.950947399117012e-06, "loss": 0.4409, "step": 1170 }, { "epoch": 0.07334909722983447, "grad_norm": 0.9490126707161151, "learning_rate": 9.950805554136393e-06, "loss": 0.4449, "step": 1171 }, { "epoch": 0.07341173522917678, "grad_norm": 0.9469023842744038, "learning_rate": 9.950663505379699e-06, "loss": 0.4668, "step": 1172 }, { "epoch": 0.07347437322851907, "grad_norm": 0.9921476776184577, "learning_rate": 9.950521252852775e-06, "loss": 0.4653, "step": 1173 }, { "epoch": 0.07353701122786138, "grad_norm": 0.8606166100754216, "learning_rate": 9.950378796561478e-06, "loss": 0.4442, "step": 1174 }, { "epoch": 0.07359964922720369, "grad_norm": 1.001117763064365, "learning_rate": 9.95023613651167e-06, "loss": 0.4428, "step": 1175 }, { "epoch": 0.07366228722654598, "grad_norm": 0.894424089470169, "learning_rate": 9.950093272709224e-06, "loss": 0.4829, "step": 1176 }, { "epoch": 0.07372492522588829, "grad_norm": 0.9090861431087188, "learning_rate": 9.949950205160022e-06, "loss": 0.4565, "step": 1177 }, { "epoch": 0.07378756322523058, "grad_norm": 0.9723573552227865, "learning_rate": 9.949806933869949e-06, "loss": 0.5018, "step": 1178 }, { "epoch": 0.07385020122457289, "grad_norm": 0.9381742406911614, "learning_rate": 9.949663458844904e-06, "loss": 0.433, "step": 1179 }, { "epoch": 0.07391283922391519, "grad_norm": 1.0242736448952792, "learning_rate": 9.949519780090792e-06, "loss": 0.4634, "step": 1180 }, { "epoch": 0.0739754772232575, "grad_norm": 0.8610151495552374, "learning_rate": 9.94937589761353e-06, "loss": 0.4226, "step": 1181 }, { "epoch": 0.07403811522259979, "grad_norm": 0.8747923216137132, "learning_rate": 9.949231811419036e-06, "loss": 0.4745, "step": 1182 }, { "epoch": 0.0741007532219421, "grad_norm": 0.9318842555194711, "learning_rate": 9.94908752151324e-06, "loss": 0.4799, "step": 1183 }, { "epoch": 0.07416339122128439, "grad_norm": 0.9225389818245735, "learning_rate": 9.948943027902085e-06, "loss": 0.4577, "step": 1184 }, { "epoch": 0.0742260292206267, "grad_norm": 0.8808203161591768, "learning_rate": 9.948798330591516e-06, "loss": 0.3909, "step": 1185 }, { "epoch": 0.07428866721996899, "grad_norm": 0.8694371232544301, "learning_rate": 9.948653429587487e-06, "loss": 0.4351, "step": 1186 }, { "epoch": 0.0743513052193113, "grad_norm": 0.8385342897620334, "learning_rate": 9.948508324895967e-06, "loss": 0.4181, "step": 1187 }, { "epoch": 0.07441394321865359, "grad_norm": 0.9709958983575211, "learning_rate": 9.948363016522926e-06, "loss": 0.4157, "step": 1188 }, { "epoch": 0.0744765812179959, "grad_norm": 0.8604417110685101, "learning_rate": 9.948217504474343e-06, "loss": 0.4138, "step": 1189 }, { "epoch": 0.07453921921733819, "grad_norm": 0.900180203594728, "learning_rate": 9.948071788756211e-06, "loss": 0.4126, "step": 1190 }, { "epoch": 0.0746018572166805, "grad_norm": 0.8048510443890734, "learning_rate": 9.947925869374524e-06, "loss": 0.4343, "step": 1191 }, { "epoch": 0.0746644952160228, "grad_norm": 0.8686742237974772, "learning_rate": 9.947779746335291e-06, "loss": 0.4321, "step": 1192 }, { "epoch": 0.0747271332153651, "grad_norm": 0.936783405606959, "learning_rate": 9.947633419644525e-06, "loss": 0.4558, "step": 1193 }, { "epoch": 0.0747897712147074, "grad_norm": 0.9124320706529733, "learning_rate": 9.947486889308249e-06, "loss": 0.4546, "step": 1194 }, { "epoch": 0.0748524092140497, "grad_norm": 0.9117615591529733, "learning_rate": 9.947340155332495e-06, "loss": 0.4121, "step": 1195 }, { "epoch": 0.07491504721339201, "grad_norm": 0.9744141301753017, "learning_rate": 9.9471932177233e-06, "loss": 0.4412, "step": 1196 }, { "epoch": 0.0749776852127343, "grad_norm": 1.0059087199773467, "learning_rate": 9.947046076486715e-06, "loss": 0.4682, "step": 1197 }, { "epoch": 0.07504032321207661, "grad_norm": 0.8194743559016224, "learning_rate": 9.946898731628795e-06, "loss": 0.4458, "step": 1198 }, { "epoch": 0.0751029612114189, "grad_norm": 0.9752424015351329, "learning_rate": 9.946751183155607e-06, "loss": 0.4375, "step": 1199 }, { "epoch": 0.07516559921076121, "grad_norm": 0.9549432557534264, "learning_rate": 9.946603431073221e-06, "loss": 0.4359, "step": 1200 }, { "epoch": 0.07522823721010351, "grad_norm": 0.9941861250100175, "learning_rate": 9.946455475387717e-06, "loss": 0.4687, "step": 1201 }, { "epoch": 0.07529087520944582, "grad_norm": 0.9022724180212576, "learning_rate": 9.94630731610519e-06, "loss": 0.4194, "step": 1202 }, { "epoch": 0.07535351320878811, "grad_norm": 0.8446332738052422, "learning_rate": 9.946158953231737e-06, "loss": 0.4484, "step": 1203 }, { "epoch": 0.07541615120813042, "grad_norm": 0.7636256876765098, "learning_rate": 9.946010386773462e-06, "loss": 0.5083, "step": 1204 }, { "epoch": 0.07547878920747271, "grad_norm": 0.9112928294176557, "learning_rate": 9.94586161673648e-06, "loss": 0.4392, "step": 1205 }, { "epoch": 0.07554142720681502, "grad_norm": 0.8813121173335156, "learning_rate": 9.945712643126919e-06, "loss": 0.421, "step": 1206 }, { "epoch": 0.07560406520615731, "grad_norm": 0.9123075791013374, "learning_rate": 9.945563465950905e-06, "loss": 0.4364, "step": 1207 }, { "epoch": 0.07566670320549962, "grad_norm": 0.8805298541638007, "learning_rate": 9.945414085214582e-06, "loss": 0.4412, "step": 1208 }, { "epoch": 0.07572934120484191, "grad_norm": 0.8699009932271607, "learning_rate": 9.945264500924097e-06, "loss": 0.4373, "step": 1209 }, { "epoch": 0.07579197920418422, "grad_norm": 0.943685212097875, "learning_rate": 9.945114713085607e-06, "loss": 0.4486, "step": 1210 }, { "epoch": 0.07585461720352651, "grad_norm": 0.8801967481923897, "learning_rate": 9.944964721705278e-06, "loss": 0.3975, "step": 1211 }, { "epoch": 0.07591725520286882, "grad_norm": 0.8223723930928067, "learning_rate": 9.944814526789284e-06, "loss": 0.477, "step": 1212 }, { "epoch": 0.07597989320221112, "grad_norm": 0.8875098819083301, "learning_rate": 9.944664128343806e-06, "loss": 0.4286, "step": 1213 }, { "epoch": 0.07604253120155342, "grad_norm": 0.8960322748018014, "learning_rate": 9.944513526375032e-06, "loss": 0.455, "step": 1214 }, { "epoch": 0.07610516920089572, "grad_norm": 0.9350673298224462, "learning_rate": 9.944362720889167e-06, "loss": 0.4672, "step": 1215 }, { "epoch": 0.07616780720023802, "grad_norm": 0.9466788065437881, "learning_rate": 9.944211711892414e-06, "loss": 0.4503, "step": 1216 }, { "epoch": 0.07623044519958033, "grad_norm": 0.9328923339335707, "learning_rate": 9.944060499390989e-06, "loss": 0.4613, "step": 1217 }, { "epoch": 0.07629308319892263, "grad_norm": 0.9121181139680061, "learning_rate": 9.943909083391115e-06, "loss": 0.4205, "step": 1218 }, { "epoch": 0.07635572119826493, "grad_norm": 0.8687352844174967, "learning_rate": 9.943757463899027e-06, "loss": 0.4938, "step": 1219 }, { "epoch": 0.07641835919760723, "grad_norm": 0.9719311931696941, "learning_rate": 9.943605640920962e-06, "loss": 0.457, "step": 1220 }, { "epoch": 0.07648099719694954, "grad_norm": 0.9217159937198494, "learning_rate": 9.943453614463174e-06, "loss": 0.4905, "step": 1221 }, { "epoch": 0.07654363519629183, "grad_norm": 0.9358187811657068, "learning_rate": 9.943301384531916e-06, "loss": 0.4387, "step": 1222 }, { "epoch": 0.07660627319563414, "grad_norm": 0.917428881965497, "learning_rate": 9.943148951133455e-06, "loss": 0.4739, "step": 1223 }, { "epoch": 0.07666891119497643, "grad_norm": 0.9622594182104627, "learning_rate": 9.942996314274067e-06, "loss": 0.455, "step": 1224 }, { "epoch": 0.07673154919431874, "grad_norm": 0.9184884607029525, "learning_rate": 9.942843473960033e-06, "loss": 0.4587, "step": 1225 }, { "epoch": 0.07679418719366103, "grad_norm": 0.9332928338547054, "learning_rate": 9.942690430197644e-06, "loss": 0.4077, "step": 1226 }, { "epoch": 0.07685682519300334, "grad_norm": 0.9500450505788541, "learning_rate": 9.9425371829932e-06, "loss": 0.4284, "step": 1227 }, { "epoch": 0.07691946319234563, "grad_norm": 0.814791174051221, "learning_rate": 9.942383732353007e-06, "loss": 0.4808, "step": 1228 }, { "epoch": 0.07698210119168794, "grad_norm": 0.987964401462945, "learning_rate": 9.942230078283383e-06, "loss": 0.465, "step": 1229 }, { "epoch": 0.07704473919103023, "grad_norm": 0.9806123709421981, "learning_rate": 9.94207622079065e-06, "loss": 0.4017, "step": 1230 }, { "epoch": 0.07710737719037254, "grad_norm": 0.868216845376054, "learning_rate": 9.941922159881146e-06, "loss": 0.441, "step": 1231 }, { "epoch": 0.07717001518971484, "grad_norm": 0.9290924344040146, "learning_rate": 9.941767895561205e-06, "loss": 0.4347, "step": 1232 }, { "epoch": 0.07723265318905714, "grad_norm": 0.761634539050343, "learning_rate": 9.941613427837182e-06, "loss": 0.4793, "step": 1233 }, { "epoch": 0.07729529118839944, "grad_norm": 0.9008101631017644, "learning_rate": 9.941458756715433e-06, "loss": 0.4637, "step": 1234 }, { "epoch": 0.07735792918774174, "grad_norm": 0.8718394908016194, "learning_rate": 9.941303882202325e-06, "loss": 0.4229, "step": 1235 }, { "epoch": 0.07742056718708404, "grad_norm": 0.9839948034352733, "learning_rate": 9.94114880430423e-06, "loss": 0.4419, "step": 1236 }, { "epoch": 0.07748320518642635, "grad_norm": 0.8509985578547442, "learning_rate": 9.940993523027535e-06, "loss": 0.4221, "step": 1237 }, { "epoch": 0.07754584318576864, "grad_norm": 0.8655700800330552, "learning_rate": 9.940838038378628e-06, "loss": 0.4801, "step": 1238 }, { "epoch": 0.07760848118511095, "grad_norm": 0.9774996127021218, "learning_rate": 9.940682350363913e-06, "loss": 0.4794, "step": 1239 }, { "epoch": 0.07767111918445326, "grad_norm": 0.9627491963876326, "learning_rate": 9.940526458989794e-06, "loss": 0.4618, "step": 1240 }, { "epoch": 0.07773375718379555, "grad_norm": 0.8609968737583685, "learning_rate": 9.940370364262689e-06, "loss": 0.4305, "step": 1241 }, { "epoch": 0.07779639518313786, "grad_norm": 0.8664149335450184, "learning_rate": 9.940214066189021e-06, "loss": 0.4321, "step": 1242 }, { "epoch": 0.07785903318248015, "grad_norm": 0.8928766383367529, "learning_rate": 9.940057564775225e-06, "loss": 0.4621, "step": 1243 }, { "epoch": 0.07792167118182246, "grad_norm": 0.8795047640106914, "learning_rate": 9.939900860027745e-06, "loss": 0.4803, "step": 1244 }, { "epoch": 0.07798430918116475, "grad_norm": 0.9309412281283268, "learning_rate": 9.93974395195303e-06, "loss": 0.4722, "step": 1245 }, { "epoch": 0.07804694718050706, "grad_norm": 0.800948587336539, "learning_rate": 9.939586840557533e-06, "loss": 0.4114, "step": 1246 }, { "epoch": 0.07810958517984935, "grad_norm": 0.906322814345792, "learning_rate": 9.939429525847728e-06, "loss": 0.4435, "step": 1247 }, { "epoch": 0.07817222317919166, "grad_norm": 0.9269134628177128, "learning_rate": 9.939272007830086e-06, "loss": 0.4992, "step": 1248 }, { "epoch": 0.07823486117853395, "grad_norm": 0.8863414158141384, "learning_rate": 9.93911428651109e-06, "loss": 0.4422, "step": 1249 }, { "epoch": 0.07829749917787626, "grad_norm": 0.8877966588323426, "learning_rate": 9.938956361897235e-06, "loss": 0.4551, "step": 1250 }, { "epoch": 0.07836013717721856, "grad_norm": 0.87633714189077, "learning_rate": 9.938798233995018e-06, "loss": 0.4741, "step": 1251 }, { "epoch": 0.07842277517656086, "grad_norm": 0.8763069140427436, "learning_rate": 9.938639902810951e-06, "loss": 0.4299, "step": 1252 }, { "epoch": 0.07848541317590316, "grad_norm": 0.8774112362888605, "learning_rate": 9.938481368351547e-06, "loss": 0.4309, "step": 1253 }, { "epoch": 0.07854805117524546, "grad_norm": 1.1502880938645852, "learning_rate": 9.938322630623334e-06, "loss": 0.3964, "step": 1254 }, { "epoch": 0.07861068917458776, "grad_norm": 0.8727638113279288, "learning_rate": 9.938163689632844e-06, "loss": 0.4306, "step": 1255 }, { "epoch": 0.07867332717393007, "grad_norm": 0.9779276797264517, "learning_rate": 9.938004545386622e-06, "loss": 0.4282, "step": 1256 }, { "epoch": 0.07873596517327236, "grad_norm": 0.8820157332683263, "learning_rate": 9.937845197891215e-06, "loss": 0.398, "step": 1257 }, { "epoch": 0.07879860317261467, "grad_norm": 0.9529335465898677, "learning_rate": 9.937685647153184e-06, "loss": 0.4527, "step": 1258 }, { "epoch": 0.07886124117195696, "grad_norm": 0.9581635974408398, "learning_rate": 9.937525893179095e-06, "loss": 0.4644, "step": 1259 }, { "epoch": 0.07892387917129927, "grad_norm": 0.9282909793018385, "learning_rate": 9.937365935975523e-06, "loss": 0.4515, "step": 1260 }, { "epoch": 0.07898651717064158, "grad_norm": 0.9965946658055406, "learning_rate": 9.937205775549052e-06, "loss": 0.5241, "step": 1261 }, { "epoch": 0.07904915516998387, "grad_norm": 0.8895398121273224, "learning_rate": 9.937045411906276e-06, "loss": 0.4938, "step": 1262 }, { "epoch": 0.07911179316932618, "grad_norm": 0.927802947928921, "learning_rate": 9.936884845053796e-06, "loss": 0.4549, "step": 1263 }, { "epoch": 0.07917443116866847, "grad_norm": 0.9068969745148321, "learning_rate": 9.936724074998216e-06, "loss": 0.4416, "step": 1264 }, { "epoch": 0.07923706916801078, "grad_norm": 0.9291953169177368, "learning_rate": 9.93656310174616e-06, "loss": 0.4261, "step": 1265 }, { "epoch": 0.07929970716735307, "grad_norm": 0.8863548289019633, "learning_rate": 9.93640192530425e-06, "loss": 0.4519, "step": 1266 }, { "epoch": 0.07936234516669538, "grad_norm": 0.9249054663138242, "learning_rate": 9.936240545679119e-06, "loss": 0.4502, "step": 1267 }, { "epoch": 0.07942498316603767, "grad_norm": 0.8953713920354981, "learning_rate": 9.936078962877411e-06, "loss": 0.4572, "step": 1268 }, { "epoch": 0.07948762116537998, "grad_norm": 0.9130688534532823, "learning_rate": 9.935917176905778e-06, "loss": 0.4346, "step": 1269 }, { "epoch": 0.07955025916472228, "grad_norm": 0.8785494083673833, "learning_rate": 9.935755187770876e-06, "loss": 0.4787, "step": 1270 }, { "epoch": 0.07961289716406458, "grad_norm": 0.9881268488793725, "learning_rate": 9.935592995479376e-06, "loss": 0.4542, "step": 1271 }, { "epoch": 0.07967553516340688, "grad_norm": 0.885360927274521, "learning_rate": 9.935430600037952e-06, "loss": 0.413, "step": 1272 }, { "epoch": 0.07973817316274918, "grad_norm": 0.9006873165108479, "learning_rate": 9.935268001453289e-06, "loss": 0.4212, "step": 1273 }, { "epoch": 0.07980081116209148, "grad_norm": 0.900047285868892, "learning_rate": 9.935105199732078e-06, "loss": 0.3857, "step": 1274 }, { "epoch": 0.07986344916143379, "grad_norm": 0.8897202616806049, "learning_rate": 9.934942194881021e-06, "loss": 0.465, "step": 1275 }, { "epoch": 0.07992608716077608, "grad_norm": 0.9246008928186844, "learning_rate": 9.934778986906828e-06, "loss": 0.449, "step": 1276 }, { "epoch": 0.07998872516011839, "grad_norm": 0.9131053405363146, "learning_rate": 9.934615575816215e-06, "loss": 0.4119, "step": 1277 }, { "epoch": 0.08005136315946068, "grad_norm": 1.0304607358113678, "learning_rate": 9.934451961615909e-06, "loss": 0.4719, "step": 1278 }, { "epoch": 0.08011400115880299, "grad_norm": 0.9664580827729918, "learning_rate": 9.934288144312645e-06, "loss": 0.4717, "step": 1279 }, { "epoch": 0.08017663915814528, "grad_norm": 0.9119044109810605, "learning_rate": 9.934124123913164e-06, "loss": 0.4421, "step": 1280 }, { "epoch": 0.08023927715748759, "grad_norm": 0.9049418121460919, "learning_rate": 9.93395990042422e-06, "loss": 0.4213, "step": 1281 }, { "epoch": 0.0803019151568299, "grad_norm": 0.9481312747839066, "learning_rate": 9.93379547385257e-06, "loss": 0.429, "step": 1282 }, { "epoch": 0.08036455315617219, "grad_norm": 0.8311396097185462, "learning_rate": 9.933630844204982e-06, "loss": 0.4728, "step": 1283 }, { "epoch": 0.0804271911555145, "grad_norm": 0.9587217944843172, "learning_rate": 9.933466011488233e-06, "loss": 0.4413, "step": 1284 }, { "epoch": 0.08048982915485679, "grad_norm": 0.9248487172111027, "learning_rate": 9.933300975709109e-06, "loss": 0.4306, "step": 1285 }, { "epoch": 0.0805524671541991, "grad_norm": 0.9379860293473867, "learning_rate": 9.933135736874399e-06, "loss": 0.4703, "step": 1286 }, { "epoch": 0.0806151051535414, "grad_norm": 0.9893959058113893, "learning_rate": 9.932970294990907e-06, "loss": 0.4706, "step": 1287 }, { "epoch": 0.0806777431528837, "grad_norm": 0.9474194553271569, "learning_rate": 9.932804650065442e-06, "loss": 0.4774, "step": 1288 }, { "epoch": 0.080740381152226, "grad_norm": 0.9244822708182773, "learning_rate": 9.932638802104822e-06, "loss": 0.4958, "step": 1289 }, { "epoch": 0.0808030191515683, "grad_norm": 0.7866499122523637, "learning_rate": 9.932472751115873e-06, "loss": 0.4303, "step": 1290 }, { "epoch": 0.0808656571509106, "grad_norm": 0.8189372240258482, "learning_rate": 9.93230649710543e-06, "loss": 0.404, "step": 1291 }, { "epoch": 0.0809282951502529, "grad_norm": 0.9481294493303128, "learning_rate": 9.932140040080337e-06, "loss": 0.4409, "step": 1292 }, { "epoch": 0.0809909331495952, "grad_norm": 0.9847854211311461, "learning_rate": 9.931973380047446e-06, "loss": 0.4796, "step": 1293 }, { "epoch": 0.0810535711489375, "grad_norm": 0.8757564692839778, "learning_rate": 9.931806517013612e-06, "loss": 0.4291, "step": 1294 }, { "epoch": 0.0811162091482798, "grad_norm": 0.786392225815852, "learning_rate": 9.931639450985708e-06, "loss": 0.386, "step": 1295 }, { "epoch": 0.08117884714762211, "grad_norm": 0.882097348131154, "learning_rate": 9.93147218197061e-06, "loss": 0.4324, "step": 1296 }, { "epoch": 0.0812414851469644, "grad_norm": 0.7867297251863316, "learning_rate": 9.9313047099752e-06, "loss": 0.4454, "step": 1297 }, { "epoch": 0.08130412314630671, "grad_norm": 0.8409534747623244, "learning_rate": 9.931137035006373e-06, "loss": 0.4469, "step": 1298 }, { "epoch": 0.081366761145649, "grad_norm": 0.9400948533190642, "learning_rate": 9.93096915707103e-06, "loss": 0.4318, "step": 1299 }, { "epoch": 0.08142939914499131, "grad_norm": 0.9088292303879242, "learning_rate": 9.930801076176083e-06, "loss": 0.4493, "step": 1300 }, { "epoch": 0.0814920371443336, "grad_norm": 0.9081349800162409, "learning_rate": 9.930632792328446e-06, "loss": 0.4276, "step": 1301 }, { "epoch": 0.08155467514367591, "grad_norm": 0.9634553241936091, "learning_rate": 9.930464305535051e-06, "loss": 0.4517, "step": 1302 }, { "epoch": 0.0816173131430182, "grad_norm": 0.8289762816688031, "learning_rate": 9.930295615802827e-06, "loss": 0.3868, "step": 1303 }, { "epoch": 0.08167995114236051, "grad_norm": 0.8967478684707443, "learning_rate": 9.930126723138722e-06, "loss": 0.4571, "step": 1304 }, { "epoch": 0.08174258914170282, "grad_norm": 0.8963523149097937, "learning_rate": 9.929957627549686e-06, "loss": 0.4417, "step": 1305 }, { "epoch": 0.08180522714104511, "grad_norm": 0.9101153980554354, "learning_rate": 9.929788329042679e-06, "loss": 0.4509, "step": 1306 }, { "epoch": 0.08186786514038742, "grad_norm": 0.8317269853181682, "learning_rate": 9.92961882762467e-06, "loss": 0.4175, "step": 1307 }, { "epoch": 0.08193050313972972, "grad_norm": 0.8959554798876624, "learning_rate": 9.929449123302633e-06, "loss": 0.4938, "step": 1308 }, { "epoch": 0.08199314113907202, "grad_norm": 0.8904192439558701, "learning_rate": 9.929279216083556e-06, "loss": 0.4159, "step": 1309 }, { "epoch": 0.08205577913841432, "grad_norm": 0.9513386395678146, "learning_rate": 9.929109105974432e-06, "loss": 0.4604, "step": 1310 }, { "epoch": 0.08211841713775662, "grad_norm": 0.9133337279862005, "learning_rate": 9.928938792982263e-06, "loss": 0.4739, "step": 1311 }, { "epoch": 0.08218105513709892, "grad_norm": 0.9067196757715565, "learning_rate": 9.928768277114059e-06, "loss": 0.474, "step": 1312 }, { "epoch": 0.08224369313644123, "grad_norm": 0.8665390477708504, "learning_rate": 9.928597558376836e-06, "loss": 0.4165, "step": 1313 }, { "epoch": 0.08230633113578352, "grad_norm": 0.8480357180498844, "learning_rate": 9.928426636777622e-06, "loss": 0.4452, "step": 1314 }, { "epoch": 0.08236896913512583, "grad_norm": 0.9529430768707674, "learning_rate": 9.928255512323455e-06, "loss": 0.4561, "step": 1315 }, { "epoch": 0.08243160713446812, "grad_norm": 0.8525850547021465, "learning_rate": 9.928084185021374e-06, "loss": 0.3786, "step": 1316 }, { "epoch": 0.08249424513381043, "grad_norm": 0.8727674759196122, "learning_rate": 9.927912654878436e-06, "loss": 0.4265, "step": 1317 }, { "epoch": 0.08255688313315272, "grad_norm": 0.8859945258540228, "learning_rate": 9.927740921901696e-06, "loss": 0.435, "step": 1318 }, { "epoch": 0.08261952113249503, "grad_norm": 0.8579271657598861, "learning_rate": 9.927568986098227e-06, "loss": 0.4167, "step": 1319 }, { "epoch": 0.08268215913183732, "grad_norm": 0.9209203133023071, "learning_rate": 9.927396847475103e-06, "loss": 0.4008, "step": 1320 }, { "epoch": 0.08274479713117963, "grad_norm": 0.950579500825011, "learning_rate": 9.92722450603941e-06, "loss": 0.4381, "step": 1321 }, { "epoch": 0.08280743513052193, "grad_norm": 0.8841469672713104, "learning_rate": 9.927051961798244e-06, "loss": 0.44, "step": 1322 }, { "epoch": 0.08287007312986423, "grad_norm": 0.9776301900190234, "learning_rate": 9.926879214758703e-06, "loss": 0.4132, "step": 1323 }, { "epoch": 0.08293271112920653, "grad_norm": 0.8789002799842064, "learning_rate": 9.926706264927898e-06, "loss": 0.445, "step": 1324 }, { "epoch": 0.08299534912854883, "grad_norm": 0.8238934117054088, "learning_rate": 9.92653311231295e-06, "loss": 0.3836, "step": 1325 }, { "epoch": 0.08305798712789114, "grad_norm": 0.8682642135144991, "learning_rate": 9.926359756920985e-06, "loss": 0.4378, "step": 1326 }, { "epoch": 0.08312062512723344, "grad_norm": 0.9400329264055638, "learning_rate": 9.926186198759136e-06, "loss": 0.4377, "step": 1327 }, { "epoch": 0.08318326312657574, "grad_norm": 0.8131060153557612, "learning_rate": 9.926012437834552e-06, "loss": 0.4829, "step": 1328 }, { "epoch": 0.08324590112591804, "grad_norm": 0.7975465088742627, "learning_rate": 9.92583847415438e-06, "loss": 0.4992, "step": 1329 }, { "epoch": 0.08330853912526034, "grad_norm": 0.9033009330790217, "learning_rate": 9.92566430772578e-06, "loss": 0.4689, "step": 1330 }, { "epoch": 0.08337117712460264, "grad_norm": 0.9395848150659001, "learning_rate": 9.925489938555926e-06, "loss": 0.4447, "step": 1331 }, { "epoch": 0.08343381512394495, "grad_norm": 0.8972050483590744, "learning_rate": 9.92531536665199e-06, "loss": 0.4537, "step": 1332 }, { "epoch": 0.08349645312328724, "grad_norm": 0.8545733599726286, "learning_rate": 9.925140592021161e-06, "loss": 0.4396, "step": 1333 }, { "epoch": 0.08355909112262955, "grad_norm": 0.8728196893996324, "learning_rate": 9.924965614670629e-06, "loss": 0.4726, "step": 1334 }, { "epoch": 0.08362172912197184, "grad_norm": 0.8394858913774375, "learning_rate": 9.924790434607599e-06, "loss": 0.4388, "step": 1335 }, { "epoch": 0.08368436712131415, "grad_norm": 0.8583505947935397, "learning_rate": 9.924615051839281e-06, "loss": 0.4035, "step": 1336 }, { "epoch": 0.08374700512065644, "grad_norm": 0.8982565530363668, "learning_rate": 9.924439466372894e-06, "loss": 0.4209, "step": 1337 }, { "epoch": 0.08380964311999875, "grad_norm": 0.9092155741048061, "learning_rate": 9.924263678215664e-06, "loss": 0.4382, "step": 1338 }, { "epoch": 0.08387228111934104, "grad_norm": 0.8799977148733958, "learning_rate": 9.924087687374826e-06, "loss": 0.4319, "step": 1339 }, { "epoch": 0.08393491911868335, "grad_norm": 0.8798112161212216, "learning_rate": 9.923911493857627e-06, "loss": 0.4663, "step": 1340 }, { "epoch": 0.08399755711802565, "grad_norm": 0.833742595524073, "learning_rate": 9.923735097671315e-06, "loss": 0.416, "step": 1341 }, { "epoch": 0.08406019511736795, "grad_norm": 0.8446554140721072, "learning_rate": 9.923558498823154e-06, "loss": 0.4383, "step": 1342 }, { "epoch": 0.08412283311671025, "grad_norm": 0.8598310260829837, "learning_rate": 9.92338169732041e-06, "loss": 0.4416, "step": 1343 }, { "epoch": 0.08418547111605255, "grad_norm": 0.9235150111263606, "learning_rate": 9.923204693170362e-06, "loss": 0.4812, "step": 1344 }, { "epoch": 0.08424810911539485, "grad_norm": 0.9270111754100334, "learning_rate": 9.923027486380296e-06, "loss": 0.5047, "step": 1345 }, { "epoch": 0.08431074711473716, "grad_norm": 0.8926653877662165, "learning_rate": 9.922850076957505e-06, "loss": 0.415, "step": 1346 }, { "epoch": 0.08437338511407945, "grad_norm": 0.8142186964379081, "learning_rate": 9.922672464909289e-06, "loss": 0.4081, "step": 1347 }, { "epoch": 0.08443602311342176, "grad_norm": 0.8869852971594008, "learning_rate": 9.922494650242962e-06, "loss": 0.455, "step": 1348 }, { "epoch": 0.08449866111276406, "grad_norm": 0.9369135446675806, "learning_rate": 9.922316632965841e-06, "loss": 0.4474, "step": 1349 }, { "epoch": 0.08456129911210636, "grad_norm": 0.8888503030737285, "learning_rate": 9.922138413085254e-06, "loss": 0.4679, "step": 1350 }, { "epoch": 0.08462393711144867, "grad_norm": 0.8820582315879134, "learning_rate": 9.921959990608536e-06, "loss": 0.382, "step": 1351 }, { "epoch": 0.08468657511079096, "grad_norm": 0.9652105790874645, "learning_rate": 9.921781365543033e-06, "loss": 0.4272, "step": 1352 }, { "epoch": 0.08474921311013327, "grad_norm": 0.8993888060598315, "learning_rate": 9.921602537896093e-06, "loss": 0.3956, "step": 1353 }, { "epoch": 0.08481185110947556, "grad_norm": 0.9249119667557419, "learning_rate": 9.921423507675081e-06, "loss": 0.4624, "step": 1354 }, { "epoch": 0.08487448910881787, "grad_norm": 0.8673504778088543, "learning_rate": 9.921244274887363e-06, "loss": 0.4481, "step": 1355 }, { "epoch": 0.08493712710816016, "grad_norm": 0.9904119354222318, "learning_rate": 9.921064839540318e-06, "loss": 0.4897, "step": 1356 }, { "epoch": 0.08499976510750247, "grad_norm": 0.9422069615993155, "learning_rate": 9.920885201641329e-06, "loss": 0.4228, "step": 1357 }, { "epoch": 0.08506240310684476, "grad_norm": 0.8877408695069506, "learning_rate": 9.920705361197793e-06, "loss": 0.4772, "step": 1358 }, { "epoch": 0.08512504110618707, "grad_norm": 0.885646440879506, "learning_rate": 9.920525318217111e-06, "loss": 0.4647, "step": 1359 }, { "epoch": 0.08518767910552937, "grad_norm": 0.875424777824192, "learning_rate": 9.920345072706695e-06, "loss": 0.4456, "step": 1360 }, { "epoch": 0.08525031710487167, "grad_norm": 0.9034189648162483, "learning_rate": 9.92016462467396e-06, "loss": 0.4691, "step": 1361 }, { "epoch": 0.08531295510421397, "grad_norm": 0.9726799929197013, "learning_rate": 9.919983974126336e-06, "loss": 0.4846, "step": 1362 }, { "epoch": 0.08537559310355627, "grad_norm": 0.9755037733795654, "learning_rate": 9.91980312107126e-06, "loss": 0.4397, "step": 1363 }, { "epoch": 0.08543823110289857, "grad_norm": 0.8138341896033927, "learning_rate": 9.919622065516173e-06, "loss": 0.405, "step": 1364 }, { "epoch": 0.08550086910224088, "grad_norm": 0.917193139745396, "learning_rate": 9.91944080746853e-06, "loss": 0.4345, "step": 1365 }, { "epoch": 0.08556350710158317, "grad_norm": 0.8377839301030301, "learning_rate": 9.91925934693579e-06, "loss": 0.4406, "step": 1366 }, { "epoch": 0.08562614510092548, "grad_norm": 0.8899823493585649, "learning_rate": 9.919077683925422e-06, "loss": 0.467, "step": 1367 }, { "epoch": 0.08568878310026777, "grad_norm": 0.8999683179078123, "learning_rate": 9.918895818444903e-06, "loss": 0.4971, "step": 1368 }, { "epoch": 0.08575142109961008, "grad_norm": 0.8940180727154063, "learning_rate": 9.91871375050172e-06, "loss": 0.4056, "step": 1369 }, { "epoch": 0.08581405909895239, "grad_norm": 0.9125008930517028, "learning_rate": 9.918531480103365e-06, "loss": 0.4538, "step": 1370 }, { "epoch": 0.08587669709829468, "grad_norm": 0.9183663677437494, "learning_rate": 9.918349007257341e-06, "loss": 0.4976, "step": 1371 }, { "epoch": 0.08593933509763699, "grad_norm": 0.8691737245020468, "learning_rate": 9.918166331971162e-06, "loss": 0.4454, "step": 1372 }, { "epoch": 0.08600197309697928, "grad_norm": 0.8946216830516855, "learning_rate": 9.91798345425234e-06, "loss": 0.4577, "step": 1373 }, { "epoch": 0.08606461109632159, "grad_norm": 0.9250315219000506, "learning_rate": 9.917800374108408e-06, "loss": 0.4001, "step": 1374 }, { "epoch": 0.08612724909566388, "grad_norm": 0.8183727896455611, "learning_rate": 9.9176170915469e-06, "loss": 0.4971, "step": 1375 }, { "epoch": 0.08618988709500619, "grad_norm": 0.9525050032681599, "learning_rate": 9.91743360657536e-06, "loss": 0.4912, "step": 1376 }, { "epoch": 0.08625252509434848, "grad_norm": 0.9217854526680355, "learning_rate": 9.91724991920134e-06, "loss": 0.4685, "step": 1377 }, { "epoch": 0.08631516309369079, "grad_norm": 0.8973605701124336, "learning_rate": 9.9170660294324e-06, "loss": 0.4683, "step": 1378 }, { "epoch": 0.08637780109303309, "grad_norm": 0.8564823704358066, "learning_rate": 9.91688193727611e-06, "loss": 0.5028, "step": 1379 }, { "epoch": 0.08644043909237539, "grad_norm": 0.9878513312830047, "learning_rate": 9.916697642740047e-06, "loss": 0.4667, "step": 1380 }, { "epoch": 0.08650307709171769, "grad_norm": 0.9783682440705049, "learning_rate": 9.916513145831797e-06, "loss": 0.4422, "step": 1381 }, { "epoch": 0.08656571509106, "grad_norm": 0.8592352311945686, "learning_rate": 9.916328446558951e-06, "loss": 0.4457, "step": 1382 }, { "epoch": 0.08662835309040229, "grad_norm": 0.8591974827790301, "learning_rate": 9.916143544929115e-06, "loss": 0.3973, "step": 1383 }, { "epoch": 0.0866909910897446, "grad_norm": 0.8699453794572631, "learning_rate": 9.915958440949899e-06, "loss": 0.455, "step": 1384 }, { "epoch": 0.08675362908908689, "grad_norm": 0.806697157483979, "learning_rate": 9.915773134628921e-06, "loss": 0.3977, "step": 1385 }, { "epoch": 0.0868162670884292, "grad_norm": 0.9189914250062717, "learning_rate": 9.915587625973807e-06, "loss": 0.4466, "step": 1386 }, { "epoch": 0.08687890508777149, "grad_norm": 0.8733466181604178, "learning_rate": 9.915401914992196e-06, "loss": 0.4361, "step": 1387 }, { "epoch": 0.0869415430871138, "grad_norm": 0.9079970862157909, "learning_rate": 9.915216001691728e-06, "loss": 0.456, "step": 1388 }, { "epoch": 0.08700418108645609, "grad_norm": 0.8694976093234688, "learning_rate": 9.915029886080058e-06, "loss": 0.4943, "step": 1389 }, { "epoch": 0.0870668190857984, "grad_norm": 0.9637066595052954, "learning_rate": 9.914843568164845e-06, "loss": 0.4647, "step": 1390 }, { "epoch": 0.08712945708514071, "grad_norm": 0.8263936324208578, "learning_rate": 9.914657047953758e-06, "loss": 0.4377, "step": 1391 }, { "epoch": 0.087192095084483, "grad_norm": 0.93944505450495, "learning_rate": 9.914470325454476e-06, "loss": 0.4696, "step": 1392 }, { "epoch": 0.08725473308382531, "grad_norm": 0.810240495384967, "learning_rate": 9.914283400674683e-06, "loss": 0.3892, "step": 1393 }, { "epoch": 0.0873173710831676, "grad_norm": 0.9182884733742914, "learning_rate": 9.914096273622072e-06, "loss": 0.4375, "step": 1394 }, { "epoch": 0.08738000908250991, "grad_norm": 0.9046982775831647, "learning_rate": 9.913908944304346e-06, "loss": 0.4347, "step": 1395 }, { "epoch": 0.0874426470818522, "grad_norm": 0.8632876319351713, "learning_rate": 9.913721412729218e-06, "loss": 0.4199, "step": 1396 }, { "epoch": 0.08750528508119451, "grad_norm": 0.9100003424736227, "learning_rate": 9.913533678904403e-06, "loss": 0.4757, "step": 1397 }, { "epoch": 0.0875679230805368, "grad_norm": 0.9096000087088054, "learning_rate": 9.91334574283763e-06, "loss": 0.4497, "step": 1398 }, { "epoch": 0.08763056107987911, "grad_norm": 0.7716644198701434, "learning_rate": 9.913157604536636e-06, "loss": 0.4921, "step": 1399 }, { "epoch": 0.0876931990792214, "grad_norm": 0.9124959333369587, "learning_rate": 9.912969264009162e-06, "loss": 0.4347, "step": 1400 }, { "epoch": 0.08775583707856371, "grad_norm": 0.9667904475252258, "learning_rate": 9.91278072126296e-06, "loss": 0.4828, "step": 1401 }, { "epoch": 0.08781847507790601, "grad_norm": 0.7857968307570329, "learning_rate": 9.912591976305794e-06, "loss": 0.5058, "step": 1402 }, { "epoch": 0.08788111307724832, "grad_norm": 0.8812932983947236, "learning_rate": 9.912403029145428e-06, "loss": 0.4393, "step": 1403 }, { "epoch": 0.08794375107659061, "grad_norm": 0.7834902756080955, "learning_rate": 9.912213879789643e-06, "loss": 0.4096, "step": 1404 }, { "epoch": 0.08800638907593292, "grad_norm": 0.9606828582098004, "learning_rate": 9.912024528246222e-06, "loss": 0.488, "step": 1405 }, { "epoch": 0.08806902707527521, "grad_norm": 0.9592104482722726, "learning_rate": 9.91183497452296e-06, "loss": 0.4315, "step": 1406 }, { "epoch": 0.08813166507461752, "grad_norm": 0.8799191410829605, "learning_rate": 9.911645218627659e-06, "loss": 0.4281, "step": 1407 }, { "epoch": 0.08819430307395981, "grad_norm": 0.8866016612584962, "learning_rate": 9.91145526056813e-06, "loss": 0.4863, "step": 1408 }, { "epoch": 0.08825694107330212, "grad_norm": 0.8490503006788033, "learning_rate": 9.911265100352188e-06, "loss": 0.4789, "step": 1409 }, { "epoch": 0.08831957907264441, "grad_norm": 0.8544828785789507, "learning_rate": 9.911074737987665e-06, "loss": 0.4077, "step": 1410 }, { "epoch": 0.08838221707198672, "grad_norm": 0.8324897233725786, "learning_rate": 9.910884173482395e-06, "loss": 0.445, "step": 1411 }, { "epoch": 0.08844485507132901, "grad_norm": 0.9380870185865814, "learning_rate": 9.91069340684422e-06, "loss": 0.4337, "step": 1412 }, { "epoch": 0.08850749307067132, "grad_norm": 0.9284636077504759, "learning_rate": 9.910502438080993e-06, "loss": 0.457, "step": 1413 }, { "epoch": 0.08857013107001363, "grad_norm": 0.9537935376045981, "learning_rate": 9.910311267200574e-06, "loss": 0.4604, "step": 1414 }, { "epoch": 0.08863276906935592, "grad_norm": 0.8210276047848626, "learning_rate": 9.910119894210833e-06, "loss": 0.4312, "step": 1415 }, { "epoch": 0.08869540706869823, "grad_norm": 0.8788326891700143, "learning_rate": 9.909928319119644e-06, "loss": 0.4299, "step": 1416 }, { "epoch": 0.08875804506804053, "grad_norm": 0.9051767739822573, "learning_rate": 9.909736541934893e-06, "loss": 0.457, "step": 1417 }, { "epoch": 0.08882068306738283, "grad_norm": 0.9201199948958599, "learning_rate": 9.909544562664477e-06, "loss": 0.4669, "step": 1418 }, { "epoch": 0.08888332106672513, "grad_norm": 0.8803563775633492, "learning_rate": 9.909352381316295e-06, "loss": 0.4063, "step": 1419 }, { "epoch": 0.08894595906606743, "grad_norm": 0.8903313042330325, "learning_rate": 9.90915999789826e-06, "loss": 0.437, "step": 1420 }, { "epoch": 0.08900859706540973, "grad_norm": 0.9590001200987133, "learning_rate": 9.908967412418285e-06, "loss": 0.46, "step": 1421 }, { "epoch": 0.08907123506475204, "grad_norm": 0.8755420771248582, "learning_rate": 9.908774624884301e-06, "loss": 0.4144, "step": 1422 }, { "epoch": 0.08913387306409433, "grad_norm": 0.9814485717329147, "learning_rate": 9.908581635304241e-06, "loss": 0.4806, "step": 1423 }, { "epoch": 0.08919651106343664, "grad_norm": 0.8078865184386038, "learning_rate": 9.908388443686053e-06, "loss": 0.4408, "step": 1424 }, { "epoch": 0.08925914906277893, "grad_norm": 0.86630409433829, "learning_rate": 9.908195050037683e-06, "loss": 0.4376, "step": 1425 }, { "epoch": 0.08932178706212124, "grad_norm": 0.9222234286120634, "learning_rate": 9.908001454367096e-06, "loss": 0.4613, "step": 1426 }, { "epoch": 0.08938442506146353, "grad_norm": 0.9260909584738123, "learning_rate": 9.907807656682256e-06, "loss": 0.4783, "step": 1427 }, { "epoch": 0.08944706306080584, "grad_norm": 0.9026631278853621, "learning_rate": 9.907613656991143e-06, "loss": 0.4452, "step": 1428 }, { "epoch": 0.08950970106014813, "grad_norm": 0.8165509404055101, "learning_rate": 9.90741945530174e-06, "loss": 0.4284, "step": 1429 }, { "epoch": 0.08957233905949044, "grad_norm": 0.8317573036912767, "learning_rate": 9.907225051622042e-06, "loss": 0.4415, "step": 1430 }, { "epoch": 0.08963497705883273, "grad_norm": 0.9601241569944564, "learning_rate": 9.90703044596005e-06, "loss": 0.4912, "step": 1431 }, { "epoch": 0.08969761505817504, "grad_norm": 0.8710323141700268, "learning_rate": 9.906835638323775e-06, "loss": 0.4729, "step": 1432 }, { "epoch": 0.08976025305751734, "grad_norm": 0.8429993595610012, "learning_rate": 9.906640628721234e-06, "loss": 0.4313, "step": 1433 }, { "epoch": 0.08982289105685964, "grad_norm": 0.8917161274637533, "learning_rate": 9.906445417160452e-06, "loss": 0.4105, "step": 1434 }, { "epoch": 0.08988552905620195, "grad_norm": 0.8862198213063751, "learning_rate": 9.906250003649469e-06, "loss": 0.4122, "step": 1435 }, { "epoch": 0.08994816705554425, "grad_norm": 0.9225951552117314, "learning_rate": 9.906054388196324e-06, "loss": 0.4775, "step": 1436 }, { "epoch": 0.09001080505488655, "grad_norm": 0.9221453391753531, "learning_rate": 9.90585857080907e-06, "loss": 0.4715, "step": 1437 }, { "epoch": 0.09007344305422885, "grad_norm": 0.9735940052682739, "learning_rate": 9.905662551495766e-06, "loss": 0.4651, "step": 1438 }, { "epoch": 0.09013608105357115, "grad_norm": 0.9276860588843439, "learning_rate": 9.905466330264481e-06, "loss": 0.464, "step": 1439 }, { "epoch": 0.09019871905291345, "grad_norm": 0.864940857680695, "learning_rate": 9.90526990712329e-06, "loss": 0.4238, "step": 1440 }, { "epoch": 0.09026135705225576, "grad_norm": 0.9008822748292902, "learning_rate": 9.905073282080281e-06, "loss": 0.4235, "step": 1441 }, { "epoch": 0.09032399505159805, "grad_norm": 0.8855782072529085, "learning_rate": 9.904876455143545e-06, "loss": 0.3939, "step": 1442 }, { "epoch": 0.09038663305094036, "grad_norm": 0.788617731769824, "learning_rate": 9.904679426321184e-06, "loss": 0.485, "step": 1443 }, { "epoch": 0.09044927105028265, "grad_norm": 0.8366924188789073, "learning_rate": 9.904482195621308e-06, "loss": 0.4081, "step": 1444 }, { "epoch": 0.09051190904962496, "grad_norm": 0.9083772926928925, "learning_rate": 9.904284763052032e-06, "loss": 0.4131, "step": 1445 }, { "epoch": 0.09057454704896725, "grad_norm": 0.8951185972984395, "learning_rate": 9.904087128621487e-06, "loss": 0.4466, "step": 1446 }, { "epoch": 0.09063718504830956, "grad_norm": 0.9161670139002976, "learning_rate": 9.903889292337804e-06, "loss": 0.421, "step": 1447 }, { "epoch": 0.09069982304765185, "grad_norm": 0.7664988393569152, "learning_rate": 9.90369125420913e-06, "loss": 0.4715, "step": 1448 }, { "epoch": 0.09076246104699416, "grad_norm": 0.8829658941645271, "learning_rate": 9.903493014243613e-06, "loss": 0.4458, "step": 1449 }, { "epoch": 0.09082509904633645, "grad_norm": 0.9648936179194268, "learning_rate": 9.903294572449412e-06, "loss": 0.4711, "step": 1450 }, { "epoch": 0.09088773704567876, "grad_norm": 0.8828475881410391, "learning_rate": 9.903095928834696e-06, "loss": 0.4517, "step": 1451 }, { "epoch": 0.09095037504502106, "grad_norm": 0.9297508385704868, "learning_rate": 9.902897083407642e-06, "loss": 0.4316, "step": 1452 }, { "epoch": 0.09101301304436336, "grad_norm": 0.9711919905025587, "learning_rate": 9.902698036176437e-06, "loss": 0.4717, "step": 1453 }, { "epoch": 0.09107565104370566, "grad_norm": 0.8949900602784688, "learning_rate": 9.902498787149268e-06, "loss": 0.4497, "step": 1454 }, { "epoch": 0.09113828904304797, "grad_norm": 0.9260906587721149, "learning_rate": 9.90229933633434e-06, "loss": 0.4408, "step": 1455 }, { "epoch": 0.09120092704239027, "grad_norm": 0.9368634417647993, "learning_rate": 9.902099683739859e-06, "loss": 0.4411, "step": 1456 }, { "epoch": 0.09126356504173257, "grad_norm": 0.912966210524351, "learning_rate": 9.901899829374048e-06, "loss": 0.4453, "step": 1457 }, { "epoch": 0.09132620304107487, "grad_norm": 0.9242540766879641, "learning_rate": 9.901699773245128e-06, "loss": 0.3862, "step": 1458 }, { "epoch": 0.09138884104041717, "grad_norm": 0.96048669816582, "learning_rate": 9.901499515361338e-06, "loss": 0.4759, "step": 1459 }, { "epoch": 0.09145147903975948, "grad_norm": 0.9207268526676665, "learning_rate": 9.901299055730916e-06, "loss": 0.4117, "step": 1460 }, { "epoch": 0.09151411703910177, "grad_norm": 0.8984347536281135, "learning_rate": 9.901098394362117e-06, "loss": 0.481, "step": 1461 }, { "epoch": 0.09157675503844408, "grad_norm": 0.9894226969579926, "learning_rate": 9.900897531263197e-06, "loss": 0.4655, "step": 1462 }, { "epoch": 0.09163939303778637, "grad_norm": 0.848282126916537, "learning_rate": 9.900696466442425e-06, "loss": 0.5055, "step": 1463 }, { "epoch": 0.09170203103712868, "grad_norm": 0.9072287605583648, "learning_rate": 9.900495199908077e-06, "loss": 0.4234, "step": 1464 }, { "epoch": 0.09176466903647097, "grad_norm": 0.9812647787164273, "learning_rate": 9.900293731668437e-06, "loss": 0.4999, "step": 1465 }, { "epoch": 0.09182730703581328, "grad_norm": 0.9044640721637041, "learning_rate": 9.900092061731799e-06, "loss": 0.4497, "step": 1466 }, { "epoch": 0.09188994503515557, "grad_norm": 0.9120064060300129, "learning_rate": 9.899890190106461e-06, "loss": 0.4305, "step": 1467 }, { "epoch": 0.09195258303449788, "grad_norm": 0.8626294196444891, "learning_rate": 9.899688116800732e-06, "loss": 0.448, "step": 1468 }, { "epoch": 0.09201522103384017, "grad_norm": 0.9443738868187155, "learning_rate": 9.899485841822932e-06, "loss": 0.419, "step": 1469 }, { "epoch": 0.09207785903318248, "grad_norm": 0.9149288654865164, "learning_rate": 9.899283365181384e-06, "loss": 0.4553, "step": 1470 }, { "epoch": 0.09214049703252478, "grad_norm": 0.8862757443050963, "learning_rate": 9.899080686884426e-06, "loss": 0.4609, "step": 1471 }, { "epoch": 0.09220313503186708, "grad_norm": 0.9397660849885395, "learning_rate": 9.898877806940396e-06, "loss": 0.4614, "step": 1472 }, { "epoch": 0.09226577303120938, "grad_norm": 0.931526896673016, "learning_rate": 9.898674725357646e-06, "loss": 0.4592, "step": 1473 }, { "epoch": 0.09232841103055169, "grad_norm": 0.7917876619276295, "learning_rate": 9.898471442144535e-06, "loss": 0.4122, "step": 1474 }, { "epoch": 0.09239104902989398, "grad_norm": 0.9339296768327562, "learning_rate": 9.898267957309432e-06, "loss": 0.423, "step": 1475 }, { "epoch": 0.09245368702923629, "grad_norm": 1.096678304025624, "learning_rate": 9.898064270860709e-06, "loss": 0.4617, "step": 1476 }, { "epoch": 0.09251632502857858, "grad_norm": 1.2474738436209039, "learning_rate": 9.897860382806752e-06, "loss": 0.47, "step": 1477 }, { "epoch": 0.09257896302792089, "grad_norm": 0.9793132893795026, "learning_rate": 9.897656293155953e-06, "loss": 0.4646, "step": 1478 }, { "epoch": 0.0926416010272632, "grad_norm": 0.9849391562004103, "learning_rate": 9.89745200191671e-06, "loss": 0.4388, "step": 1479 }, { "epoch": 0.09270423902660549, "grad_norm": 1.0037002442017227, "learning_rate": 9.897247509097436e-06, "loss": 0.4717, "step": 1480 }, { "epoch": 0.0927668770259478, "grad_norm": 0.8125584336111052, "learning_rate": 9.897042814706543e-06, "loss": 0.4244, "step": 1481 }, { "epoch": 0.09282951502529009, "grad_norm": 0.9190290288335197, "learning_rate": 9.896837918752463e-06, "loss": 0.435, "step": 1482 }, { "epoch": 0.0928921530246324, "grad_norm": 1.0200006074645942, "learning_rate": 9.896632821243622e-06, "loss": 0.4771, "step": 1483 }, { "epoch": 0.09295479102397469, "grad_norm": 0.8917018157143095, "learning_rate": 9.896427522188466e-06, "loss": 0.457, "step": 1484 }, { "epoch": 0.093017429023317, "grad_norm": 0.9446787976929322, "learning_rate": 9.896222021595443e-06, "loss": 0.4872, "step": 1485 }, { "epoch": 0.0930800670226593, "grad_norm": 1.0377954352135303, "learning_rate": 9.896016319473015e-06, "loss": 0.4613, "step": 1486 }, { "epoch": 0.0931427050220016, "grad_norm": 0.8622481990808284, "learning_rate": 9.895810415829645e-06, "loss": 0.5145, "step": 1487 }, { "epoch": 0.0932053430213439, "grad_norm": 0.9456294006331782, "learning_rate": 9.895604310673809e-06, "loss": 0.4091, "step": 1488 }, { "epoch": 0.0932679810206862, "grad_norm": 0.9435843121809769, "learning_rate": 9.895398004013994e-06, "loss": 0.4865, "step": 1489 }, { "epoch": 0.0933306190200285, "grad_norm": 0.9306929711031157, "learning_rate": 9.895191495858686e-06, "loss": 0.4319, "step": 1490 }, { "epoch": 0.0933932570193708, "grad_norm": 0.8789036358482651, "learning_rate": 9.894984786216389e-06, "loss": 0.4356, "step": 1491 }, { "epoch": 0.0934558950187131, "grad_norm": 1.0070410068059266, "learning_rate": 9.894777875095609e-06, "loss": 0.4554, "step": 1492 }, { "epoch": 0.0935185330180554, "grad_norm": 0.9637700323071658, "learning_rate": 9.894570762504863e-06, "loss": 0.4487, "step": 1493 }, { "epoch": 0.0935811710173977, "grad_norm": 0.8355788205250919, "learning_rate": 9.894363448452675e-06, "loss": 0.4279, "step": 1494 }, { "epoch": 0.09364380901674, "grad_norm": 1.0170388582004242, "learning_rate": 9.89415593294758e-06, "loss": 0.4357, "step": 1495 }, { "epoch": 0.0937064470160823, "grad_norm": 0.8979517974333173, "learning_rate": 9.893948215998118e-06, "loss": 0.4788, "step": 1496 }, { "epoch": 0.09376908501542461, "grad_norm": 1.0560972505172115, "learning_rate": 9.89374029761284e-06, "loss": 0.4457, "step": 1497 }, { "epoch": 0.0938317230147669, "grad_norm": 0.9359166153795132, "learning_rate": 9.893532177800302e-06, "loss": 0.4361, "step": 1498 }, { "epoch": 0.09389436101410921, "grad_norm": 0.9549406530855985, "learning_rate": 9.893323856569071e-06, "loss": 0.4835, "step": 1499 }, { "epoch": 0.09395699901345152, "grad_norm": 0.8119845808565949, "learning_rate": 9.893115333927723e-06, "loss": 0.4816, "step": 1500 }, { "epoch": 0.09401963701279381, "grad_norm": 0.8421225662717794, "learning_rate": 9.892906609884838e-06, "loss": 0.4532, "step": 1501 }, { "epoch": 0.09408227501213612, "grad_norm": 0.8562898607430842, "learning_rate": 9.892697684449011e-06, "loss": 0.4535, "step": 1502 }, { "epoch": 0.09414491301147841, "grad_norm": 0.8571592201766343, "learning_rate": 9.892488557628838e-06, "loss": 0.4526, "step": 1503 }, { "epoch": 0.09420755101082072, "grad_norm": 0.8016828152124434, "learning_rate": 9.892279229432929e-06, "loss": 0.4342, "step": 1504 }, { "epoch": 0.09427018901016301, "grad_norm": 0.9507099778753813, "learning_rate": 9.892069699869897e-06, "loss": 0.4887, "step": 1505 }, { "epoch": 0.09433282700950532, "grad_norm": 0.8658002651569511, "learning_rate": 9.891859968948368e-06, "loss": 0.4856, "step": 1506 }, { "epoch": 0.09439546500884761, "grad_norm": 0.9233232235498693, "learning_rate": 9.891650036676976e-06, "loss": 0.5101, "step": 1507 }, { "epoch": 0.09445810300818992, "grad_norm": 0.8993673258117713, "learning_rate": 9.891439903064361e-06, "loss": 0.4646, "step": 1508 }, { "epoch": 0.09452074100753222, "grad_norm": 0.8886501405166498, "learning_rate": 9.89122956811917e-06, "loss": 0.4157, "step": 1509 }, { "epoch": 0.09458337900687452, "grad_norm": 0.9656494805603429, "learning_rate": 9.891019031850063e-06, "loss": 0.5207, "step": 1510 }, { "epoch": 0.09464601700621682, "grad_norm": 0.8889575971982786, "learning_rate": 9.890808294265706e-06, "loss": 0.4679, "step": 1511 }, { "epoch": 0.09470865500555913, "grad_norm": 0.8374231232526697, "learning_rate": 9.890597355374769e-06, "loss": 0.4794, "step": 1512 }, { "epoch": 0.09477129300490142, "grad_norm": 0.9428455338207213, "learning_rate": 9.89038621518594e-06, "loss": 0.4849, "step": 1513 }, { "epoch": 0.09483393100424373, "grad_norm": 0.9317924947339054, "learning_rate": 9.890174873707905e-06, "loss": 0.4472, "step": 1514 }, { "epoch": 0.09489656900358602, "grad_norm": 0.8867083688940639, "learning_rate": 9.889963330949366e-06, "loss": 0.4402, "step": 1515 }, { "epoch": 0.09495920700292833, "grad_norm": 0.9445734555186007, "learning_rate": 9.889751586919028e-06, "loss": 0.4344, "step": 1516 }, { "epoch": 0.09502184500227062, "grad_norm": 0.9875804337068883, "learning_rate": 9.889539641625605e-06, "loss": 0.45, "step": 1517 }, { "epoch": 0.09508448300161293, "grad_norm": 0.8842639210716592, "learning_rate": 9.889327495077824e-06, "loss": 0.4652, "step": 1518 }, { "epoch": 0.09514712100095522, "grad_norm": 0.890292067193579, "learning_rate": 9.889115147284417e-06, "loss": 0.476, "step": 1519 }, { "epoch": 0.09520975900029753, "grad_norm": 0.8141322385731111, "learning_rate": 9.888902598254122e-06, "loss": 0.3792, "step": 1520 }, { "epoch": 0.09527239699963984, "grad_norm": 0.8322514190258518, "learning_rate": 9.88868984799569e-06, "loss": 0.4303, "step": 1521 }, { "epoch": 0.09533503499898213, "grad_norm": 0.8904831853136833, "learning_rate": 9.888476896517875e-06, "loss": 0.4367, "step": 1522 }, { "epoch": 0.09539767299832444, "grad_norm": 0.863877405150155, "learning_rate": 9.888263743829445e-06, "loss": 0.4587, "step": 1523 }, { "epoch": 0.09546031099766673, "grad_norm": 0.8658056910905525, "learning_rate": 9.888050389939172e-06, "loss": 0.4269, "step": 1524 }, { "epoch": 0.09552294899700904, "grad_norm": 0.9134606048027469, "learning_rate": 9.887836834855836e-06, "loss": 0.4463, "step": 1525 }, { "epoch": 0.09558558699635133, "grad_norm": 0.8933774988349712, "learning_rate": 9.88762307858823e-06, "loss": 0.417, "step": 1526 }, { "epoch": 0.09564822499569364, "grad_norm": 0.8838948085061537, "learning_rate": 9.88740912114515e-06, "loss": 0.4637, "step": 1527 }, { "epoch": 0.09571086299503594, "grad_norm": 0.8540811656528821, "learning_rate": 9.887194962535405e-06, "loss": 0.4077, "step": 1528 }, { "epoch": 0.09577350099437824, "grad_norm": 0.9069828909742581, "learning_rate": 9.886980602767806e-06, "loss": 0.4535, "step": 1529 }, { "epoch": 0.09583613899372054, "grad_norm": 0.8791266051327843, "learning_rate": 9.886766041851178e-06, "loss": 0.4633, "step": 1530 }, { "epoch": 0.09589877699306285, "grad_norm": 0.9516000875499592, "learning_rate": 9.886551279794356e-06, "loss": 0.4509, "step": 1531 }, { "epoch": 0.09596141499240514, "grad_norm": 0.8829923576823713, "learning_rate": 9.886336316606175e-06, "loss": 0.4411, "step": 1532 }, { "epoch": 0.09602405299174745, "grad_norm": 0.9865985601591539, "learning_rate": 9.886121152295483e-06, "loss": 0.4435, "step": 1533 }, { "epoch": 0.09608669099108974, "grad_norm": 0.9586963515089149, "learning_rate": 9.885905786871138e-06, "loss": 0.46, "step": 1534 }, { "epoch": 0.09614932899043205, "grad_norm": 0.8626284828213061, "learning_rate": 9.885690220342005e-06, "loss": 0.5006, "step": 1535 }, { "epoch": 0.09621196698977434, "grad_norm": 0.8488840284700826, "learning_rate": 9.885474452716955e-06, "loss": 0.4228, "step": 1536 }, { "epoch": 0.09627460498911665, "grad_norm": 0.9429534463056131, "learning_rate": 9.885258484004869e-06, "loss": 0.4967, "step": 1537 }, { "epoch": 0.09633724298845894, "grad_norm": 0.95221214117684, "learning_rate": 9.885042314214637e-06, "loss": 0.5043, "step": 1538 }, { "epoch": 0.09639988098780125, "grad_norm": 0.9219411365073961, "learning_rate": 9.884825943355158e-06, "loss": 0.4555, "step": 1539 }, { "epoch": 0.09646251898714354, "grad_norm": 0.8950474938169982, "learning_rate": 9.884609371435335e-06, "loss": 0.4329, "step": 1540 }, { "epoch": 0.09652515698648585, "grad_norm": 0.9578574923298676, "learning_rate": 9.884392598464086e-06, "loss": 0.4499, "step": 1541 }, { "epoch": 0.09658779498582815, "grad_norm": 0.8723696327015126, "learning_rate": 9.884175624450328e-06, "loss": 0.4313, "step": 1542 }, { "epoch": 0.09665043298517045, "grad_norm": 0.858397950929006, "learning_rate": 9.883958449402997e-06, "loss": 0.4645, "step": 1543 }, { "epoch": 0.09671307098451276, "grad_norm": 0.9091150484480857, "learning_rate": 9.883741073331027e-06, "loss": 0.4314, "step": 1544 }, { "epoch": 0.09677570898385505, "grad_norm": 0.8805184215137215, "learning_rate": 9.883523496243372e-06, "loss": 0.4548, "step": 1545 }, { "epoch": 0.09683834698319736, "grad_norm": 0.8937919293605776, "learning_rate": 9.883305718148981e-06, "loss": 0.4455, "step": 1546 }, { "epoch": 0.09690098498253966, "grad_norm": 0.9354055691986288, "learning_rate": 9.88308773905682e-06, "loss": 0.4881, "step": 1547 }, { "epoch": 0.09696362298188196, "grad_norm": 0.8898781625685626, "learning_rate": 9.882869558975863e-06, "loss": 0.4452, "step": 1548 }, { "epoch": 0.09702626098122426, "grad_norm": 0.927365062960007, "learning_rate": 9.882651177915087e-06, "loss": 0.49, "step": 1549 }, { "epoch": 0.09708889898056657, "grad_norm": 0.9936598088360935, "learning_rate": 9.882432595883482e-06, "loss": 0.4522, "step": 1550 }, { "epoch": 0.09715153697990886, "grad_norm": 0.911308392594863, "learning_rate": 9.882213812890048e-06, "loss": 0.4617, "step": 1551 }, { "epoch": 0.09721417497925117, "grad_norm": 0.8789885262684483, "learning_rate": 9.881994828943784e-06, "loss": 0.4469, "step": 1552 }, { "epoch": 0.09727681297859346, "grad_norm": 0.8387014288547737, "learning_rate": 9.881775644053708e-06, "loss": 0.4396, "step": 1553 }, { "epoch": 0.09733945097793577, "grad_norm": 0.9624855766607733, "learning_rate": 9.881556258228841e-06, "loss": 0.4692, "step": 1554 }, { "epoch": 0.09740208897727806, "grad_norm": 0.9625210172598724, "learning_rate": 9.881336671478211e-06, "loss": 0.4763, "step": 1555 }, { "epoch": 0.09746472697662037, "grad_norm": 0.9882852689752175, "learning_rate": 9.881116883810858e-06, "loss": 0.5004, "step": 1556 }, { "epoch": 0.09752736497596266, "grad_norm": 0.8163529937277837, "learning_rate": 9.880896895235828e-06, "loss": 0.4058, "step": 1557 }, { "epoch": 0.09759000297530497, "grad_norm": 0.8903999132465609, "learning_rate": 9.880676705762178e-06, "loss": 0.4232, "step": 1558 }, { "epoch": 0.09765264097464726, "grad_norm": 0.8480173531934881, "learning_rate": 9.880456315398967e-06, "loss": 0.4272, "step": 1559 }, { "epoch": 0.09771527897398957, "grad_norm": 0.9598374480060924, "learning_rate": 9.880235724155267e-06, "loss": 0.4562, "step": 1560 }, { "epoch": 0.09777791697333187, "grad_norm": 0.9188573023833896, "learning_rate": 9.880014932040161e-06, "loss": 0.4419, "step": 1561 }, { "epoch": 0.09784055497267417, "grad_norm": 0.8885909492222273, "learning_rate": 9.879793939062734e-06, "loss": 0.4344, "step": 1562 }, { "epoch": 0.09790319297201647, "grad_norm": 0.9671803805328165, "learning_rate": 9.879572745232082e-06, "loss": 0.4431, "step": 1563 }, { "epoch": 0.09796583097135877, "grad_norm": 0.8867169527735584, "learning_rate": 9.879351350557311e-06, "loss": 0.4514, "step": 1564 }, { "epoch": 0.09802846897070108, "grad_norm": 0.8305946197757244, "learning_rate": 9.879129755047532e-06, "loss": 0.4188, "step": 1565 }, { "epoch": 0.09809110697004338, "grad_norm": 0.8997359923356064, "learning_rate": 9.878907958711867e-06, "loss": 0.4135, "step": 1566 }, { "epoch": 0.09815374496938568, "grad_norm": 0.9292691686082959, "learning_rate": 9.878685961559445e-06, "loss": 0.477, "step": 1567 }, { "epoch": 0.09821638296872798, "grad_norm": 0.9524964470534331, "learning_rate": 9.878463763599401e-06, "loss": 0.4739, "step": 1568 }, { "epoch": 0.09827902096807029, "grad_norm": 0.8832153990349538, "learning_rate": 9.878241364840887e-06, "loss": 0.4855, "step": 1569 }, { "epoch": 0.09834165896741258, "grad_norm": 0.996866276785187, "learning_rate": 9.878018765293052e-06, "loss": 0.4967, "step": 1570 }, { "epoch": 0.09840429696675489, "grad_norm": 0.8977586599449298, "learning_rate": 9.877795964965058e-06, "loss": 0.4396, "step": 1571 }, { "epoch": 0.09846693496609718, "grad_norm": 0.8102605556571321, "learning_rate": 9.877572963866075e-06, "loss": 0.4343, "step": 1572 }, { "epoch": 0.09852957296543949, "grad_norm": 0.8653819066194124, "learning_rate": 9.877349762005286e-06, "loss": 0.4118, "step": 1573 }, { "epoch": 0.09859221096478178, "grad_norm": 0.8802786482958497, "learning_rate": 9.877126359391875e-06, "loss": 0.4677, "step": 1574 }, { "epoch": 0.09865484896412409, "grad_norm": 0.8809539880059191, "learning_rate": 9.876902756035038e-06, "loss": 0.445, "step": 1575 }, { "epoch": 0.09871748696346638, "grad_norm": 0.8227556671158442, "learning_rate": 9.87667895194398e-06, "loss": 0.4327, "step": 1576 }, { "epoch": 0.09878012496280869, "grad_norm": 0.8990257252871325, "learning_rate": 9.87645494712791e-06, "loss": 0.4274, "step": 1577 }, { "epoch": 0.09884276296215098, "grad_norm": 0.8561470829658842, "learning_rate": 9.876230741596047e-06, "loss": 0.4271, "step": 1578 }, { "epoch": 0.09890540096149329, "grad_norm": 0.8582507836484214, "learning_rate": 9.876006335357625e-06, "loss": 0.4381, "step": 1579 }, { "epoch": 0.09896803896083559, "grad_norm": 0.9126165872093309, "learning_rate": 9.875781728421874e-06, "loss": 0.4579, "step": 1580 }, { "epoch": 0.0990306769601779, "grad_norm": 0.8722106874453135, "learning_rate": 9.875556920798043e-06, "loss": 0.4144, "step": 1581 }, { "epoch": 0.09909331495952019, "grad_norm": 0.9145094480176127, "learning_rate": 9.875331912495385e-06, "loss": 0.4456, "step": 1582 }, { "epoch": 0.0991559529588625, "grad_norm": 0.8706458609675202, "learning_rate": 9.875106703523161e-06, "loss": 0.4286, "step": 1583 }, { "epoch": 0.09921859095820479, "grad_norm": 0.8802434336539445, "learning_rate": 9.87488129389064e-06, "loss": 0.4032, "step": 1584 }, { "epoch": 0.0992812289575471, "grad_norm": 0.8730111395571392, "learning_rate": 9.874655683607099e-06, "loss": 0.4378, "step": 1585 }, { "epoch": 0.09934386695688939, "grad_norm": 0.9418277769097348, "learning_rate": 9.874429872681826e-06, "loss": 0.4733, "step": 1586 }, { "epoch": 0.0994065049562317, "grad_norm": 0.8731033196585996, "learning_rate": 9.874203861124114e-06, "loss": 0.4454, "step": 1587 }, { "epoch": 0.099469142955574, "grad_norm": 0.8737559782897221, "learning_rate": 9.873977648943267e-06, "loss": 0.4726, "step": 1588 }, { "epoch": 0.0995317809549163, "grad_norm": 0.9210275981827408, "learning_rate": 9.873751236148596e-06, "loss": 0.4898, "step": 1589 }, { "epoch": 0.0995944189542586, "grad_norm": 1.0915458781321115, "learning_rate": 9.87352462274942e-06, "loss": 0.4408, "step": 1590 }, { "epoch": 0.0996570569536009, "grad_norm": 0.8422571111739536, "learning_rate": 9.873297808755065e-06, "loss": 0.4492, "step": 1591 }, { "epoch": 0.09971969495294321, "grad_norm": 0.8808428287782618, "learning_rate": 9.873070794174866e-06, "loss": 0.4247, "step": 1592 }, { "epoch": 0.0997823329522855, "grad_norm": 0.9735610810916026, "learning_rate": 9.87284357901817e-06, "loss": 0.4776, "step": 1593 }, { "epoch": 0.09984497095162781, "grad_norm": 0.9390131059833663, "learning_rate": 9.872616163294328e-06, "loss": 0.4992, "step": 1594 }, { "epoch": 0.0999076089509701, "grad_norm": 0.8827399506654423, "learning_rate": 9.872388547012701e-06, "loss": 0.4202, "step": 1595 }, { "epoch": 0.09997024695031241, "grad_norm": 0.8987906903067735, "learning_rate": 9.872160730182656e-06, "loss": 0.4048, "step": 1596 }, { "epoch": 0.1000328849496547, "grad_norm": 0.8138361145827581, "learning_rate": 9.871932712813572e-06, "loss": 0.4297, "step": 1597 }, { "epoch": 0.10009552294899701, "grad_norm": 0.8130679581312211, "learning_rate": 9.871704494914832e-06, "loss": 0.4752, "step": 1598 }, { "epoch": 0.1001581609483393, "grad_norm": 0.975517713522263, "learning_rate": 9.871476076495831e-06, "loss": 0.4406, "step": 1599 }, { "epoch": 0.10022079894768161, "grad_norm": 0.8951589086817716, "learning_rate": 9.871247457565971e-06, "loss": 0.439, "step": 1600 }, { "epoch": 0.10028343694702391, "grad_norm": 0.9146858997152447, "learning_rate": 9.871018638134661e-06, "loss": 0.4925, "step": 1601 }, { "epoch": 0.10034607494636621, "grad_norm": 0.928208667574649, "learning_rate": 9.87078961821132e-06, "loss": 0.4433, "step": 1602 }, { "epoch": 0.10040871294570851, "grad_norm": 0.997420475943222, "learning_rate": 9.870560397805375e-06, "loss": 0.4543, "step": 1603 }, { "epoch": 0.10047135094505082, "grad_norm": 0.9061383374389848, "learning_rate": 9.870330976926261e-06, "loss": 0.4476, "step": 1604 }, { "epoch": 0.10053398894439311, "grad_norm": 0.8410419745164147, "learning_rate": 9.870101355583416e-06, "loss": 0.4325, "step": 1605 }, { "epoch": 0.10059662694373542, "grad_norm": 0.8533857346363363, "learning_rate": 9.8698715337863e-06, "loss": 0.439, "step": 1606 }, { "epoch": 0.10065926494307771, "grad_norm": 0.8295931134816784, "learning_rate": 9.869641511544366e-06, "loss": 0.4136, "step": 1607 }, { "epoch": 0.10072190294242002, "grad_norm": 0.9321156041665808, "learning_rate": 9.86941128886708e-06, "loss": 0.4681, "step": 1608 }, { "epoch": 0.10078454094176233, "grad_norm": 0.8710737149967902, "learning_rate": 9.869180865763926e-06, "loss": 0.4439, "step": 1609 }, { "epoch": 0.10084717894110462, "grad_norm": 0.8640181492809873, "learning_rate": 9.868950242244382e-06, "loss": 0.4625, "step": 1610 }, { "epoch": 0.10090981694044693, "grad_norm": 0.9643987223518166, "learning_rate": 9.868719418317944e-06, "loss": 0.4697, "step": 1611 }, { "epoch": 0.10097245493978922, "grad_norm": 0.8626840173643526, "learning_rate": 9.868488393994109e-06, "loss": 0.4334, "step": 1612 }, { "epoch": 0.10103509293913153, "grad_norm": 0.950042206748387, "learning_rate": 9.86825716928239e-06, "loss": 0.4606, "step": 1613 }, { "epoch": 0.10109773093847382, "grad_norm": 0.886719828764984, "learning_rate": 9.8680257441923e-06, "loss": 0.47, "step": 1614 }, { "epoch": 0.10116036893781613, "grad_norm": 0.8588788137389599, "learning_rate": 9.867794118733368e-06, "loss": 0.4278, "step": 1615 }, { "epoch": 0.10122300693715842, "grad_norm": 0.8227576623418922, "learning_rate": 9.867562292915128e-06, "loss": 0.4028, "step": 1616 }, { "epoch": 0.10128564493650073, "grad_norm": 0.8981278258606616, "learning_rate": 9.867330266747118e-06, "loss": 0.4281, "step": 1617 }, { "epoch": 0.10134828293584303, "grad_norm": 1.0052308750920933, "learning_rate": 9.867098040238892e-06, "loss": 0.4357, "step": 1618 }, { "epoch": 0.10141092093518533, "grad_norm": 0.907873859413351, "learning_rate": 9.866865613400008e-06, "loss": 0.4747, "step": 1619 }, { "epoch": 0.10147355893452763, "grad_norm": 0.986479635281565, "learning_rate": 9.86663298624003e-06, "loss": 0.4813, "step": 1620 }, { "epoch": 0.10153619693386993, "grad_norm": 0.8610221096948076, "learning_rate": 9.866400158768536e-06, "loss": 0.4468, "step": 1621 }, { "epoch": 0.10159883493321223, "grad_norm": 0.9057490033619454, "learning_rate": 9.866167130995108e-06, "loss": 0.425, "step": 1622 }, { "epoch": 0.10166147293255454, "grad_norm": 0.8898577268240097, "learning_rate": 9.86593390292934e-06, "loss": 0.4476, "step": 1623 }, { "epoch": 0.10172411093189683, "grad_norm": 0.8840136506244884, "learning_rate": 9.865700474580827e-06, "loss": 0.4379, "step": 1624 }, { "epoch": 0.10178674893123914, "grad_norm": 0.9451869427096309, "learning_rate": 9.86546684595918e-06, "loss": 0.4514, "step": 1625 }, { "epoch": 0.10184938693058143, "grad_norm": 0.8657267730157551, "learning_rate": 9.865233017074014e-06, "loss": 0.4767, "step": 1626 }, { "epoch": 0.10191202492992374, "grad_norm": 0.8145133597527424, "learning_rate": 9.864998987934954e-06, "loss": 0.4214, "step": 1627 }, { "epoch": 0.10197466292926603, "grad_norm": 0.8903203735523892, "learning_rate": 9.864764758551632e-06, "loss": 0.4254, "step": 1628 }, { "epoch": 0.10203730092860834, "grad_norm": 0.8914813918201505, "learning_rate": 9.864530328933692e-06, "loss": 0.4749, "step": 1629 }, { "epoch": 0.10209993892795065, "grad_norm": 0.8999176548241218, "learning_rate": 9.864295699090779e-06, "loss": 0.4487, "step": 1630 }, { "epoch": 0.10216257692729294, "grad_norm": 0.870678370015496, "learning_rate": 9.864060869032553e-06, "loss": 0.4349, "step": 1631 }, { "epoch": 0.10222521492663525, "grad_norm": 0.9111666325552786, "learning_rate": 9.863825838768678e-06, "loss": 0.4421, "step": 1632 }, { "epoch": 0.10228785292597754, "grad_norm": 0.8509554426305704, "learning_rate": 9.863590608308828e-06, "loss": 0.446, "step": 1633 }, { "epoch": 0.10235049092531985, "grad_norm": 0.8715878297339201, "learning_rate": 9.863355177662689e-06, "loss": 0.4531, "step": 1634 }, { "epoch": 0.10241312892466214, "grad_norm": 0.8239743870641836, "learning_rate": 9.863119546839945e-06, "loss": 0.4358, "step": 1635 }, { "epoch": 0.10247576692400445, "grad_norm": 0.8742994438071928, "learning_rate": 9.862883715850299e-06, "loss": 0.4716, "step": 1636 }, { "epoch": 0.10253840492334675, "grad_norm": 0.9677685685369322, "learning_rate": 9.862647684703457e-06, "loss": 0.4637, "step": 1637 }, { "epoch": 0.10260104292268905, "grad_norm": 0.8736481579576624, "learning_rate": 9.862411453409133e-06, "loss": 0.4356, "step": 1638 }, { "epoch": 0.10266368092203135, "grad_norm": 0.9229859984915212, "learning_rate": 9.862175021977053e-06, "loss": 0.4186, "step": 1639 }, { "epoch": 0.10272631892137365, "grad_norm": 0.9646057437216833, "learning_rate": 9.861938390416944e-06, "loss": 0.486, "step": 1640 }, { "epoch": 0.10278895692071595, "grad_norm": 0.9016565734915767, "learning_rate": 9.861701558738548e-06, "loss": 0.4742, "step": 1641 }, { "epoch": 0.10285159492005826, "grad_norm": 0.918072803481721, "learning_rate": 9.861464526951616e-06, "loss": 0.4272, "step": 1642 }, { "epoch": 0.10291423291940055, "grad_norm": 0.9346532552311103, "learning_rate": 9.861227295065901e-06, "loss": 0.437, "step": 1643 }, { "epoch": 0.10297687091874286, "grad_norm": 0.9147588719545384, "learning_rate": 9.860989863091168e-06, "loss": 0.4312, "step": 1644 }, { "epoch": 0.10303950891808515, "grad_norm": 0.881686582456261, "learning_rate": 9.860752231037189e-06, "loss": 0.4022, "step": 1645 }, { "epoch": 0.10310214691742746, "grad_norm": 0.9275460045466786, "learning_rate": 9.860514398913748e-06, "loss": 0.4599, "step": 1646 }, { "epoch": 0.10316478491676975, "grad_norm": 0.8935640524063782, "learning_rate": 9.86027636673063e-06, "loss": 0.4288, "step": 1647 }, { "epoch": 0.10322742291611206, "grad_norm": 0.9292660249184228, "learning_rate": 9.860038134497635e-06, "loss": 0.5014, "step": 1648 }, { "epoch": 0.10329006091545435, "grad_norm": 0.8641883373382413, "learning_rate": 9.859799702224571e-06, "loss": 0.4229, "step": 1649 }, { "epoch": 0.10335269891479666, "grad_norm": 0.7863905206583155, "learning_rate": 9.859561069921246e-06, "loss": 0.5183, "step": 1650 }, { "epoch": 0.10341533691413896, "grad_norm": 0.9634587644794977, "learning_rate": 9.859322237597488e-06, "loss": 0.4605, "step": 1651 }, { "epoch": 0.10347797491348126, "grad_norm": 0.9455039075557116, "learning_rate": 9.859083205263122e-06, "loss": 0.4123, "step": 1652 }, { "epoch": 0.10354061291282357, "grad_norm": 0.9692545167239823, "learning_rate": 9.858843972927992e-06, "loss": 0.4586, "step": 1653 }, { "epoch": 0.10360325091216586, "grad_norm": 0.8539553312350842, "learning_rate": 9.858604540601941e-06, "loss": 0.4044, "step": 1654 }, { "epoch": 0.10366588891150817, "grad_norm": 0.8611053881468008, "learning_rate": 9.858364908294826e-06, "loss": 0.4754, "step": 1655 }, { "epoch": 0.10372852691085047, "grad_norm": 0.8098606389588296, "learning_rate": 9.858125076016507e-06, "loss": 0.413, "step": 1656 }, { "epoch": 0.10379116491019277, "grad_norm": 0.8928764148044713, "learning_rate": 9.857885043776862e-06, "loss": 0.4532, "step": 1657 }, { "epoch": 0.10385380290953507, "grad_norm": 0.8560461608484777, "learning_rate": 9.857644811585764e-06, "loss": 0.4711, "step": 1658 }, { "epoch": 0.10391644090887737, "grad_norm": 1.0205851565970248, "learning_rate": 9.857404379453106e-06, "loss": 0.4214, "step": 1659 }, { "epoch": 0.10397907890821967, "grad_norm": 0.9777910485960564, "learning_rate": 9.85716374738878e-06, "loss": 0.4682, "step": 1660 }, { "epoch": 0.10404171690756198, "grad_norm": 1.1197939040366751, "learning_rate": 9.856922915402695e-06, "loss": 0.4949, "step": 1661 }, { "epoch": 0.10410435490690427, "grad_norm": 0.9511377142908352, "learning_rate": 9.85668188350476e-06, "loss": 0.4832, "step": 1662 }, { "epoch": 0.10416699290624658, "grad_norm": 0.8800495258207905, "learning_rate": 9.856440651704897e-06, "loss": 0.4681, "step": 1663 }, { "epoch": 0.10422963090558887, "grad_norm": 0.9522932188927004, "learning_rate": 9.856199220013037e-06, "loss": 0.478, "step": 1664 }, { "epoch": 0.10429226890493118, "grad_norm": 1.054456042521886, "learning_rate": 9.855957588439114e-06, "loss": 0.5094, "step": 1665 }, { "epoch": 0.10435490690427347, "grad_norm": 0.9388326455503359, "learning_rate": 9.855715756993074e-06, "loss": 0.4092, "step": 1666 }, { "epoch": 0.10441754490361578, "grad_norm": 1.1016712697483182, "learning_rate": 9.855473725684875e-06, "loss": 0.5068, "step": 1667 }, { "epoch": 0.10448018290295807, "grad_norm": 0.8790681057726103, "learning_rate": 9.855231494524475e-06, "loss": 0.4233, "step": 1668 }, { "epoch": 0.10454282090230038, "grad_norm": 0.8998848505308056, "learning_rate": 9.854989063521846e-06, "loss": 0.4077, "step": 1669 }, { "epoch": 0.10460545890164268, "grad_norm": 0.9353527747599285, "learning_rate": 9.854746432686965e-06, "loss": 0.4597, "step": 1670 }, { "epoch": 0.10466809690098498, "grad_norm": 0.9155795233593754, "learning_rate": 9.854503602029821e-06, "loss": 0.3945, "step": 1671 }, { "epoch": 0.10473073490032728, "grad_norm": 0.8980389780944664, "learning_rate": 9.854260571560408e-06, "loss": 0.4396, "step": 1672 }, { "epoch": 0.10479337289966958, "grad_norm": 0.8565074431516235, "learning_rate": 9.854017341288728e-06, "loss": 0.4275, "step": 1673 }, { "epoch": 0.10485601089901189, "grad_norm": 0.8903065673027977, "learning_rate": 9.853773911224794e-06, "loss": 0.4332, "step": 1674 }, { "epoch": 0.10491864889835419, "grad_norm": 0.8599586996257699, "learning_rate": 9.853530281378623e-06, "loss": 0.4441, "step": 1675 }, { "epoch": 0.1049812868976965, "grad_norm": 0.8708904812350786, "learning_rate": 9.853286451760246e-06, "loss": 0.4173, "step": 1676 }, { "epoch": 0.10504392489703879, "grad_norm": 0.9565777861560476, "learning_rate": 9.853042422379698e-06, "loss": 0.4502, "step": 1677 }, { "epoch": 0.1051065628963811, "grad_norm": 0.9157878551033322, "learning_rate": 9.852798193247024e-06, "loss": 0.4912, "step": 1678 }, { "epoch": 0.10516920089572339, "grad_norm": 0.9041663690420485, "learning_rate": 9.852553764372275e-06, "loss": 0.4337, "step": 1679 }, { "epoch": 0.1052318388950657, "grad_norm": 0.9531355914253057, "learning_rate": 9.85230913576551e-06, "loss": 0.4314, "step": 1680 }, { "epoch": 0.10529447689440799, "grad_norm": 0.8303868159760173, "learning_rate": 9.852064307436803e-06, "loss": 0.4241, "step": 1681 }, { "epoch": 0.1053571148937503, "grad_norm": 0.8009388689403389, "learning_rate": 9.851819279396228e-06, "loss": 0.3715, "step": 1682 }, { "epoch": 0.10541975289309259, "grad_norm": 0.911416249993186, "learning_rate": 9.851574051653871e-06, "loss": 0.4084, "step": 1683 }, { "epoch": 0.1054823908924349, "grad_norm": 0.9012310852151912, "learning_rate": 9.851328624219826e-06, "loss": 0.4853, "step": 1684 }, { "epoch": 0.10554502889177719, "grad_norm": 0.8130258725707079, "learning_rate": 9.851082997104194e-06, "loss": 0.4034, "step": 1685 }, { "epoch": 0.1056076668911195, "grad_norm": 0.8600995074922225, "learning_rate": 9.850837170317084e-06, "loss": 0.4274, "step": 1686 }, { "epoch": 0.1056703048904618, "grad_norm": 0.9493935449244991, "learning_rate": 9.850591143868618e-06, "loss": 0.4726, "step": 1687 }, { "epoch": 0.1057329428898041, "grad_norm": 0.9932795429894357, "learning_rate": 9.85034491776892e-06, "loss": 0.4548, "step": 1688 }, { "epoch": 0.1057955808891464, "grad_norm": 0.8984649291418174, "learning_rate": 9.850098492028122e-06, "loss": 0.4618, "step": 1689 }, { "epoch": 0.1058582188884887, "grad_norm": 0.855911141728444, "learning_rate": 9.849851866656373e-06, "loss": 0.4335, "step": 1690 }, { "epoch": 0.105920856887831, "grad_norm": 0.9604817416382047, "learning_rate": 9.84960504166382e-06, "loss": 0.4599, "step": 1691 }, { "epoch": 0.1059834948871733, "grad_norm": 0.9105369643646842, "learning_rate": 9.849358017060622e-06, "loss": 0.417, "step": 1692 }, { "epoch": 0.1060461328865156, "grad_norm": 1.2907743981203437, "learning_rate": 9.84911079285695e-06, "loss": 0.4412, "step": 1693 }, { "epoch": 0.1061087708858579, "grad_norm": 0.8565494530855996, "learning_rate": 9.848863369062977e-06, "loss": 0.4341, "step": 1694 }, { "epoch": 0.10617140888520021, "grad_norm": 0.9214419807626887, "learning_rate": 9.848615745688888e-06, "loss": 0.4332, "step": 1695 }, { "epoch": 0.10623404688454251, "grad_norm": 0.8397219296147146, "learning_rate": 9.848367922744874e-06, "loss": 0.4161, "step": 1696 }, { "epoch": 0.10629668488388481, "grad_norm": 0.8741640542570748, "learning_rate": 9.848119900241138e-06, "loss": 0.3901, "step": 1697 }, { "epoch": 0.10635932288322711, "grad_norm": 0.8831692285470235, "learning_rate": 9.847871678187885e-06, "loss": 0.4401, "step": 1698 }, { "epoch": 0.10642196088256942, "grad_norm": 0.8007718646736729, "learning_rate": 9.847623256595336e-06, "loss": 0.4134, "step": 1699 }, { "epoch": 0.10648459888191171, "grad_norm": 0.9303942601553514, "learning_rate": 9.847374635473713e-06, "loss": 0.4877, "step": 1700 }, { "epoch": 0.10654723688125402, "grad_norm": 0.8978394903296093, "learning_rate": 9.84712581483325e-06, "loss": 0.4511, "step": 1701 }, { "epoch": 0.10660987488059631, "grad_norm": 0.9147950077820368, "learning_rate": 9.846876794684188e-06, "loss": 0.4291, "step": 1702 }, { "epoch": 0.10667251287993862, "grad_norm": 0.8631361194654336, "learning_rate": 9.846627575036779e-06, "loss": 0.4442, "step": 1703 }, { "epoch": 0.10673515087928091, "grad_norm": 0.9856092630483868, "learning_rate": 9.84637815590128e-06, "loss": 0.5205, "step": 1704 }, { "epoch": 0.10679778887862322, "grad_norm": 0.8835227542571434, "learning_rate": 9.846128537287954e-06, "loss": 0.4223, "step": 1705 }, { "epoch": 0.10686042687796551, "grad_norm": 0.898073908912535, "learning_rate": 9.84587871920708e-06, "loss": 0.4831, "step": 1706 }, { "epoch": 0.10692306487730782, "grad_norm": 0.9163370923733389, "learning_rate": 9.845628701668938e-06, "loss": 0.4674, "step": 1707 }, { "epoch": 0.10698570287665012, "grad_norm": 0.9204438506746777, "learning_rate": 9.845378484683818e-06, "loss": 0.4128, "step": 1708 }, { "epoch": 0.10704834087599242, "grad_norm": 0.8284473774769301, "learning_rate": 9.845128068262023e-06, "loss": 0.4274, "step": 1709 }, { "epoch": 0.10711097887533472, "grad_norm": 0.843847485275788, "learning_rate": 9.844877452413855e-06, "loss": 0.4366, "step": 1710 }, { "epoch": 0.10717361687467702, "grad_norm": 0.9245624314526578, "learning_rate": 9.844626637149633e-06, "loss": 0.4577, "step": 1711 }, { "epoch": 0.10723625487401932, "grad_norm": 0.9086965655883255, "learning_rate": 9.844375622479678e-06, "loss": 0.4517, "step": 1712 }, { "epoch": 0.10729889287336163, "grad_norm": 0.8766304336561018, "learning_rate": 9.844124408414325e-06, "loss": 0.4533, "step": 1713 }, { "epoch": 0.10736153087270392, "grad_norm": 0.8873359125326057, "learning_rate": 9.843872994963912e-06, "loss": 0.4555, "step": 1714 }, { "epoch": 0.10742416887204623, "grad_norm": 0.8632025998144776, "learning_rate": 9.843621382138786e-06, "loss": 0.4096, "step": 1715 }, { "epoch": 0.10748680687138852, "grad_norm": 0.851485841302409, "learning_rate": 9.843369569949305e-06, "loss": 0.4682, "step": 1716 }, { "epoch": 0.10754944487073083, "grad_norm": 0.8881083174316393, "learning_rate": 9.843117558405836e-06, "loss": 0.4451, "step": 1717 }, { "epoch": 0.10761208287007314, "grad_norm": 0.8988576874504488, "learning_rate": 9.842865347518748e-06, "loss": 0.4268, "step": 1718 }, { "epoch": 0.10767472086941543, "grad_norm": 0.9628945791785181, "learning_rate": 9.842612937298422e-06, "loss": 0.4303, "step": 1719 }, { "epoch": 0.10773735886875774, "grad_norm": 0.9973766002851463, "learning_rate": 9.84236032775525e-06, "loss": 0.4435, "step": 1720 }, { "epoch": 0.10779999686810003, "grad_norm": 0.9011858740406491, "learning_rate": 9.842107518899628e-06, "loss": 0.4349, "step": 1721 }, { "epoch": 0.10786263486744234, "grad_norm": 0.8859749054523879, "learning_rate": 9.841854510741962e-06, "loss": 0.4333, "step": 1722 }, { "epoch": 0.10792527286678463, "grad_norm": 0.8594753411421615, "learning_rate": 9.841601303292664e-06, "loss": 0.4222, "step": 1723 }, { "epoch": 0.10798791086612694, "grad_norm": 0.9130734500730797, "learning_rate": 9.84134789656216e-06, "loss": 0.4623, "step": 1724 }, { "epoch": 0.10805054886546923, "grad_norm": 0.8043543914739791, "learning_rate": 9.841094290560878e-06, "loss": 0.4331, "step": 1725 }, { "epoch": 0.10811318686481154, "grad_norm": 0.7725050648198681, "learning_rate": 9.840840485299255e-06, "loss": 0.47, "step": 1726 }, { "epoch": 0.10817582486415384, "grad_norm": 0.8644720260195033, "learning_rate": 9.840586480787741e-06, "loss": 0.4829, "step": 1727 }, { "epoch": 0.10823846286349614, "grad_norm": 0.9566882242234561, "learning_rate": 9.840332277036786e-06, "loss": 0.4273, "step": 1728 }, { "epoch": 0.10830110086283844, "grad_norm": 0.9761980142861405, "learning_rate": 9.840077874056859e-06, "loss": 0.452, "step": 1729 }, { "epoch": 0.10836373886218074, "grad_norm": 0.8719169850149026, "learning_rate": 9.839823271858426e-06, "loss": 0.4648, "step": 1730 }, { "epoch": 0.10842637686152304, "grad_norm": 0.8969050921120731, "learning_rate": 9.83956847045197e-06, "loss": 0.4516, "step": 1731 }, { "epoch": 0.10848901486086535, "grad_norm": 0.828756635376854, "learning_rate": 9.839313469847978e-06, "loss": 0.4608, "step": 1732 }, { "epoch": 0.10855165286020764, "grad_norm": 0.9081884583948283, "learning_rate": 9.839058270056945e-06, "loss": 0.4678, "step": 1733 }, { "epoch": 0.10861429085954995, "grad_norm": 0.9144341872695031, "learning_rate": 9.838802871089374e-06, "loss": 0.5011, "step": 1734 }, { "epoch": 0.10867692885889224, "grad_norm": 0.9119872641598379, "learning_rate": 9.83854727295578e-06, "loss": 0.4492, "step": 1735 }, { "epoch": 0.10873956685823455, "grad_norm": 0.854625886087694, "learning_rate": 9.838291475666684e-06, "loss": 0.4779, "step": 1736 }, { "epoch": 0.10880220485757684, "grad_norm": 0.9165727896008979, "learning_rate": 9.83803547923261e-06, "loss": 0.4366, "step": 1737 }, { "epoch": 0.10886484285691915, "grad_norm": 1.0070723316251193, "learning_rate": 9.837779283664099e-06, "loss": 0.4722, "step": 1738 }, { "epoch": 0.10892748085626146, "grad_norm": 0.8887584049947701, "learning_rate": 9.837522888971694e-06, "loss": 0.4933, "step": 1739 }, { "epoch": 0.10899011885560375, "grad_norm": 1.0945554215099949, "learning_rate": 9.837266295165949e-06, "loss": 0.4461, "step": 1740 }, { "epoch": 0.10905275685494606, "grad_norm": 0.9280059674521232, "learning_rate": 9.837009502257426e-06, "loss": 0.47, "step": 1741 }, { "epoch": 0.10911539485428835, "grad_norm": 0.9009191779341991, "learning_rate": 9.836752510256694e-06, "loss": 0.4866, "step": 1742 }, { "epoch": 0.10917803285363066, "grad_norm": 0.9573147622727864, "learning_rate": 9.836495319174331e-06, "loss": 0.4811, "step": 1743 }, { "epoch": 0.10924067085297295, "grad_norm": 0.8880653453612596, "learning_rate": 9.836237929020922e-06, "loss": 0.4774, "step": 1744 }, { "epoch": 0.10930330885231526, "grad_norm": 0.9006539444317118, "learning_rate": 9.835980339807063e-06, "loss": 0.4445, "step": 1745 }, { "epoch": 0.10936594685165756, "grad_norm": 0.8500135235833722, "learning_rate": 9.835722551543357e-06, "loss": 0.4486, "step": 1746 }, { "epoch": 0.10942858485099986, "grad_norm": 1.015211072333527, "learning_rate": 9.83546456424041e-06, "loss": 0.5279, "step": 1747 }, { "epoch": 0.10949122285034216, "grad_norm": 1.0234548982738485, "learning_rate": 9.835206377908845e-06, "loss": 0.4641, "step": 1748 }, { "epoch": 0.10955386084968446, "grad_norm": 0.8749914250972367, "learning_rate": 9.834947992559291e-06, "loss": 0.4098, "step": 1749 }, { "epoch": 0.10961649884902676, "grad_norm": 0.952860909544951, "learning_rate": 9.834689408202378e-06, "loss": 0.4473, "step": 1750 }, { "epoch": 0.10967913684836907, "grad_norm": 0.8539255868737581, "learning_rate": 9.834430624848752e-06, "loss": 0.4826, "step": 1751 }, { "epoch": 0.10974177484771136, "grad_norm": 0.8779646998854063, "learning_rate": 9.834171642509063e-06, "loss": 0.4498, "step": 1752 }, { "epoch": 0.10980441284705367, "grad_norm": 0.9406805526413583, "learning_rate": 9.833912461193973e-06, "loss": 0.4226, "step": 1753 }, { "epoch": 0.10986705084639596, "grad_norm": 0.8777633701092064, "learning_rate": 9.833653080914149e-06, "loss": 0.4091, "step": 1754 }, { "epoch": 0.10992968884573827, "grad_norm": 0.8776340875297767, "learning_rate": 9.833393501680266e-06, "loss": 0.4565, "step": 1755 }, { "epoch": 0.10999232684508056, "grad_norm": 0.8082185189860553, "learning_rate": 9.83313372350301e-06, "loss": 0.4007, "step": 1756 }, { "epoch": 0.11005496484442287, "grad_norm": 0.9346803961552255, "learning_rate": 9.832873746393073e-06, "loss": 0.4306, "step": 1757 }, { "epoch": 0.11011760284376516, "grad_norm": 0.8504990623853856, "learning_rate": 9.832613570361157e-06, "loss": 0.4223, "step": 1758 }, { "epoch": 0.11018024084310747, "grad_norm": 0.9068985625592745, "learning_rate": 9.832353195417967e-06, "loss": 0.4426, "step": 1759 }, { "epoch": 0.11024287884244978, "grad_norm": 0.8855831518129471, "learning_rate": 9.832092621574224e-06, "loss": 0.4122, "step": 1760 }, { "epoch": 0.11030551684179207, "grad_norm": 0.9219516343181697, "learning_rate": 9.83183184884065e-06, "loss": 0.4282, "step": 1761 }, { "epoch": 0.11036815484113438, "grad_norm": 0.8799376637555896, "learning_rate": 9.831570877227983e-06, "loss": 0.4723, "step": 1762 }, { "epoch": 0.11043079284047667, "grad_norm": 0.87486849346039, "learning_rate": 9.831309706746959e-06, "loss": 0.4365, "step": 1763 }, { "epoch": 0.11049343083981898, "grad_norm": 0.8654277231578429, "learning_rate": 9.831048337408333e-06, "loss": 0.4623, "step": 1764 }, { "epoch": 0.11055606883916128, "grad_norm": 0.8444809191854944, "learning_rate": 9.83078676922286e-06, "loss": 0.4785, "step": 1765 }, { "epoch": 0.11061870683850358, "grad_norm": 1.1293142437366843, "learning_rate": 9.830525002201307e-06, "loss": 0.4515, "step": 1766 }, { "epoch": 0.11068134483784588, "grad_norm": 0.8900615490833907, "learning_rate": 9.830263036354448e-06, "loss": 0.4466, "step": 1767 }, { "epoch": 0.11074398283718818, "grad_norm": 0.8652821195140946, "learning_rate": 9.830000871693065e-06, "loss": 0.4259, "step": 1768 }, { "epoch": 0.11080662083653048, "grad_norm": 0.8840333613642132, "learning_rate": 9.829738508227953e-06, "loss": 0.4629, "step": 1769 }, { "epoch": 0.11086925883587279, "grad_norm": 0.8125703140841185, "learning_rate": 9.829475945969904e-06, "loss": 0.4061, "step": 1770 }, { "epoch": 0.11093189683521508, "grad_norm": 0.8641317274558977, "learning_rate": 9.82921318492973e-06, "loss": 0.4103, "step": 1771 }, { "epoch": 0.11099453483455739, "grad_norm": 0.8877273206625376, "learning_rate": 9.828950225118245e-06, "loss": 0.434, "step": 1772 }, { "epoch": 0.11105717283389968, "grad_norm": 0.8753463300069221, "learning_rate": 9.828687066546273e-06, "loss": 0.4823, "step": 1773 }, { "epoch": 0.11111981083324199, "grad_norm": 0.8665560786318306, "learning_rate": 9.828423709224644e-06, "loss": 0.4659, "step": 1774 }, { "epoch": 0.11118244883258428, "grad_norm": 0.8536529725184699, "learning_rate": 9.8281601531642e-06, "loss": 0.451, "step": 1775 }, { "epoch": 0.11124508683192659, "grad_norm": 0.94319748321395, "learning_rate": 9.827896398375787e-06, "loss": 0.428, "step": 1776 }, { "epoch": 0.11130772483126888, "grad_norm": 0.9329199143995318, "learning_rate": 9.827632444870264e-06, "loss": 0.4979, "step": 1777 }, { "epoch": 0.11137036283061119, "grad_norm": 1.0859475793029072, "learning_rate": 9.82736829265849e-06, "loss": 0.4507, "step": 1778 }, { "epoch": 0.11143300082995349, "grad_norm": 0.8469204274642447, "learning_rate": 9.827103941751344e-06, "loss": 0.427, "step": 1779 }, { "epoch": 0.11149563882929579, "grad_norm": 0.7489782166092357, "learning_rate": 9.826839392159702e-06, "loss": 0.4747, "step": 1780 }, { "epoch": 0.11155827682863809, "grad_norm": 0.9021802736852541, "learning_rate": 9.826574643894457e-06, "loss": 0.4451, "step": 1781 }, { "epoch": 0.1116209148279804, "grad_norm": 0.9054750028292631, "learning_rate": 9.826309696966503e-06, "loss": 0.4714, "step": 1782 }, { "epoch": 0.1116835528273227, "grad_norm": 0.9119500267574243, "learning_rate": 9.826044551386743e-06, "loss": 0.4595, "step": 1783 }, { "epoch": 0.111746190826665, "grad_norm": 0.8365916935888272, "learning_rate": 9.825779207166098e-06, "loss": 0.4039, "step": 1784 }, { "epoch": 0.1118088288260073, "grad_norm": 0.8685061048231827, "learning_rate": 9.825513664315482e-06, "loss": 0.496, "step": 1785 }, { "epoch": 0.1118714668253496, "grad_norm": 0.8631829656525766, "learning_rate": 9.825247922845829e-06, "loss": 0.4512, "step": 1786 }, { "epoch": 0.1119341048246919, "grad_norm": 0.9195597415227157, "learning_rate": 9.824981982768076e-06, "loss": 0.4241, "step": 1787 }, { "epoch": 0.1119967428240342, "grad_norm": 0.8202935239095179, "learning_rate": 9.824715844093169e-06, "loss": 0.416, "step": 1788 }, { "epoch": 0.1120593808233765, "grad_norm": 0.8842601017209399, "learning_rate": 9.824449506832063e-06, "loss": 0.4618, "step": 1789 }, { "epoch": 0.1121220188227188, "grad_norm": 0.8668653731511038, "learning_rate": 9.82418297099572e-06, "loss": 0.4359, "step": 1790 }, { "epoch": 0.11218465682206111, "grad_norm": 0.8767730634735029, "learning_rate": 9.823916236595108e-06, "loss": 0.4416, "step": 1791 }, { "epoch": 0.1122472948214034, "grad_norm": 0.9289844726162338, "learning_rate": 9.82364930364121e-06, "loss": 0.4888, "step": 1792 }, { "epoch": 0.11230993282074571, "grad_norm": 0.8563483511828351, "learning_rate": 9.823382172145012e-06, "loss": 0.4164, "step": 1793 }, { "epoch": 0.112372570820088, "grad_norm": 0.780248997620945, "learning_rate": 9.823114842117507e-06, "loss": 0.4463, "step": 1794 }, { "epoch": 0.11243520881943031, "grad_norm": 0.8111741736554748, "learning_rate": 9.822847313569702e-06, "loss": 0.423, "step": 1795 }, { "epoch": 0.1124978468187726, "grad_norm": 0.8404707136833853, "learning_rate": 9.822579586512603e-06, "loss": 0.4513, "step": 1796 }, { "epoch": 0.11256048481811491, "grad_norm": 0.8454849622200288, "learning_rate": 9.822311660957238e-06, "loss": 0.4078, "step": 1797 }, { "epoch": 0.1126231228174572, "grad_norm": 0.8107745083119459, "learning_rate": 9.822043536914627e-06, "loss": 0.4208, "step": 1798 }, { "epoch": 0.11268576081679951, "grad_norm": 0.8383428811095872, "learning_rate": 9.821775214395807e-06, "loss": 0.4327, "step": 1799 }, { "epoch": 0.1127483988161418, "grad_norm": 0.9677545367218037, "learning_rate": 9.821506693411829e-06, "loss": 0.4616, "step": 1800 }, { "epoch": 0.11281103681548411, "grad_norm": 0.8924349795888481, "learning_rate": 9.821237973973736e-06, "loss": 0.4334, "step": 1801 }, { "epoch": 0.11287367481482641, "grad_norm": 0.9338246917050314, "learning_rate": 9.820969056092597e-06, "loss": 0.4324, "step": 1802 }, { "epoch": 0.11293631281416872, "grad_norm": 0.9125304707959679, "learning_rate": 9.820699939779475e-06, "loss": 0.4436, "step": 1803 }, { "epoch": 0.11299895081351102, "grad_norm": 0.8977270023078138, "learning_rate": 9.820430625045448e-06, "loss": 0.4443, "step": 1804 }, { "epoch": 0.11306158881285332, "grad_norm": 0.8553912431816587, "learning_rate": 9.820161111901604e-06, "loss": 0.3957, "step": 1805 }, { "epoch": 0.11312422681219562, "grad_norm": 0.9137765647527338, "learning_rate": 9.819891400359031e-06, "loss": 0.4865, "step": 1806 }, { "epoch": 0.11318686481153792, "grad_norm": 0.9094275204817321, "learning_rate": 9.819621490428835e-06, "loss": 0.4605, "step": 1807 }, { "epoch": 0.11324950281088023, "grad_norm": 0.8915574852316523, "learning_rate": 9.819351382122125e-06, "loss": 0.4228, "step": 1808 }, { "epoch": 0.11331214081022252, "grad_norm": 0.8394056472734308, "learning_rate": 9.819081075450014e-06, "loss": 0.4213, "step": 1809 }, { "epoch": 0.11337477880956483, "grad_norm": 0.9438529646140494, "learning_rate": 9.818810570423634e-06, "loss": 0.4446, "step": 1810 }, { "epoch": 0.11343741680890712, "grad_norm": 0.8567747173963876, "learning_rate": 9.818539867054117e-06, "loss": 0.4417, "step": 1811 }, { "epoch": 0.11350005480824943, "grad_norm": 0.8865814135217905, "learning_rate": 9.818268965352604e-06, "loss": 0.4586, "step": 1812 }, { "epoch": 0.11356269280759172, "grad_norm": 0.8615889312188874, "learning_rate": 9.817997865330246e-06, "loss": 0.441, "step": 1813 }, { "epoch": 0.11362533080693403, "grad_norm": 0.8075814745515107, "learning_rate": 9.817726566998202e-06, "loss": 0.4497, "step": 1814 }, { "epoch": 0.11368796880627632, "grad_norm": 0.9338161023367897, "learning_rate": 9.817455070367637e-06, "loss": 0.4573, "step": 1815 }, { "epoch": 0.11375060680561863, "grad_norm": 0.8819476649983128, "learning_rate": 9.817183375449729e-06, "loss": 0.4471, "step": 1816 }, { "epoch": 0.11381324480496093, "grad_norm": 0.839917304445873, "learning_rate": 9.816911482255658e-06, "loss": 0.4249, "step": 1817 }, { "epoch": 0.11387588280430323, "grad_norm": 0.800213665244549, "learning_rate": 9.816639390796616e-06, "loss": 0.4507, "step": 1818 }, { "epoch": 0.11393852080364553, "grad_norm": 0.9242563535970489, "learning_rate": 9.816367101083803e-06, "loss": 0.431, "step": 1819 }, { "epoch": 0.11400115880298783, "grad_norm": 0.8499915501654253, "learning_rate": 9.816094613128427e-06, "loss": 0.4161, "step": 1820 }, { "epoch": 0.11406379680233013, "grad_norm": 0.9061046785795988, "learning_rate": 9.815821926941703e-06, "loss": 0.4407, "step": 1821 }, { "epoch": 0.11412643480167244, "grad_norm": 0.8782420244272203, "learning_rate": 9.815549042534854e-06, "loss": 0.4231, "step": 1822 }, { "epoch": 0.11418907280101473, "grad_norm": 0.877207147384595, "learning_rate": 9.815275959919112e-06, "loss": 0.4747, "step": 1823 }, { "epoch": 0.11425171080035704, "grad_norm": 0.9444735375534098, "learning_rate": 9.815002679105718e-06, "loss": 0.447, "step": 1824 }, { "epoch": 0.11431434879969933, "grad_norm": 0.9840523683625316, "learning_rate": 9.81472920010592e-06, "loss": 0.4798, "step": 1825 }, { "epoch": 0.11437698679904164, "grad_norm": 0.888144664773925, "learning_rate": 9.814455522930975e-06, "loss": 0.4664, "step": 1826 }, { "epoch": 0.11443962479838395, "grad_norm": 0.8732070494029672, "learning_rate": 9.814181647592146e-06, "loss": 0.4329, "step": 1827 }, { "epoch": 0.11450226279772624, "grad_norm": 0.8915971083823488, "learning_rate": 9.813907574100709e-06, "loss": 0.4463, "step": 1828 }, { "epoch": 0.11456490079706855, "grad_norm": 0.7935610782977627, "learning_rate": 9.81363330246794e-06, "loss": 0.4885, "step": 1829 }, { "epoch": 0.11462753879641084, "grad_norm": 0.9061445728996753, "learning_rate": 9.813358832705133e-06, "loss": 0.4571, "step": 1830 }, { "epoch": 0.11469017679575315, "grad_norm": 1.0162545580906128, "learning_rate": 9.813084164823583e-06, "loss": 0.4604, "step": 1831 }, { "epoch": 0.11475281479509544, "grad_norm": 0.8368103257381461, "learning_rate": 9.812809298834594e-06, "loss": 0.4417, "step": 1832 }, { "epoch": 0.11481545279443775, "grad_norm": 0.9126132060443275, "learning_rate": 9.812534234749482e-06, "loss": 0.4939, "step": 1833 }, { "epoch": 0.11487809079378004, "grad_norm": 0.8453153253313673, "learning_rate": 9.812258972579567e-06, "loss": 0.4521, "step": 1834 }, { "epoch": 0.11494072879312235, "grad_norm": 0.7965715860216054, "learning_rate": 9.811983512336178e-06, "loss": 0.4736, "step": 1835 }, { "epoch": 0.11500336679246465, "grad_norm": 0.8912920536376826, "learning_rate": 9.811707854030656e-06, "loss": 0.4519, "step": 1836 }, { "epoch": 0.11506600479180695, "grad_norm": 0.9492911375361305, "learning_rate": 9.811431997674347e-06, "loss": 0.4384, "step": 1837 }, { "epoch": 0.11512864279114925, "grad_norm": 0.8292140892331655, "learning_rate": 9.811155943278603e-06, "loss": 0.4338, "step": 1838 }, { "epoch": 0.11519128079049155, "grad_norm": 0.7740482895225712, "learning_rate": 9.810879690854787e-06, "loss": 0.4802, "step": 1839 }, { "epoch": 0.11525391878983385, "grad_norm": 0.8095614305250138, "learning_rate": 9.81060324041427e-06, "loss": 0.4345, "step": 1840 }, { "epoch": 0.11531655678917616, "grad_norm": 0.8746284578008088, "learning_rate": 9.810326591968431e-06, "loss": 0.4382, "step": 1841 }, { "epoch": 0.11537919478851845, "grad_norm": 0.8577072766322007, "learning_rate": 9.810049745528656e-06, "loss": 0.4574, "step": 1842 }, { "epoch": 0.11544183278786076, "grad_norm": 1.02853805132689, "learning_rate": 9.809772701106342e-06, "loss": 0.4618, "step": 1843 }, { "epoch": 0.11550447078720305, "grad_norm": 0.9983689663120909, "learning_rate": 9.80949545871289e-06, "loss": 0.407, "step": 1844 }, { "epoch": 0.11556710878654536, "grad_norm": 1.0555470442906645, "learning_rate": 9.809218018359712e-06, "loss": 0.4574, "step": 1845 }, { "epoch": 0.11562974678588765, "grad_norm": 0.8873253127472138, "learning_rate": 9.808940380058226e-06, "loss": 0.4692, "step": 1846 }, { "epoch": 0.11569238478522996, "grad_norm": 0.9117737700902694, "learning_rate": 9.808662543819864e-06, "loss": 0.4317, "step": 1847 }, { "epoch": 0.11575502278457227, "grad_norm": 0.8092209775339054, "learning_rate": 9.808384509656059e-06, "loss": 0.387, "step": 1848 }, { "epoch": 0.11581766078391456, "grad_norm": 0.8869226847897876, "learning_rate": 9.808106277578255e-06, "loss": 0.4172, "step": 1849 }, { "epoch": 0.11588029878325687, "grad_norm": 0.8657560217405227, "learning_rate": 9.807827847597903e-06, "loss": 0.4242, "step": 1850 }, { "epoch": 0.11594293678259916, "grad_norm": 0.8826669184181659, "learning_rate": 9.807549219726467e-06, "loss": 0.4508, "step": 1851 }, { "epoch": 0.11600557478194147, "grad_norm": 0.9204109756552029, "learning_rate": 9.807270393975412e-06, "loss": 0.4877, "step": 1852 }, { "epoch": 0.11606821278128376, "grad_norm": 0.8920022310876107, "learning_rate": 9.806991370356213e-06, "loss": 0.4083, "step": 1853 }, { "epoch": 0.11613085078062607, "grad_norm": 0.827755862476535, "learning_rate": 9.806712148880359e-06, "loss": 0.4348, "step": 1854 }, { "epoch": 0.11619348877996837, "grad_norm": 0.8843952033547905, "learning_rate": 9.80643272955934e-06, "loss": 0.4017, "step": 1855 }, { "epoch": 0.11625612677931067, "grad_norm": 0.9583351269670265, "learning_rate": 9.806153112404658e-06, "loss": 0.4529, "step": 1856 }, { "epoch": 0.11631876477865297, "grad_norm": 1.0011670577368086, "learning_rate": 9.805873297427822e-06, "loss": 0.4808, "step": 1857 }, { "epoch": 0.11638140277799527, "grad_norm": 0.9404093424506966, "learning_rate": 9.805593284640349e-06, "loss": 0.4333, "step": 1858 }, { "epoch": 0.11644404077733757, "grad_norm": 0.8522502729715666, "learning_rate": 9.805313074053764e-06, "loss": 0.4097, "step": 1859 }, { "epoch": 0.11650667877667988, "grad_norm": 0.8655955617406346, "learning_rate": 9.805032665679601e-06, "loss": 0.4388, "step": 1860 }, { "epoch": 0.11656931677602217, "grad_norm": 0.928818914049864, "learning_rate": 9.804752059529401e-06, "loss": 0.4673, "step": 1861 }, { "epoch": 0.11663195477536448, "grad_norm": 0.8535197691834867, "learning_rate": 9.804471255614716e-06, "loss": 0.4163, "step": 1862 }, { "epoch": 0.11669459277470677, "grad_norm": 0.9325410239357725, "learning_rate": 9.8041902539471e-06, "loss": 0.4467, "step": 1863 }, { "epoch": 0.11675723077404908, "grad_norm": 0.8673886746712364, "learning_rate": 9.803909054538125e-06, "loss": 0.3982, "step": 1864 }, { "epoch": 0.11681986877339137, "grad_norm": 0.7962051903006204, "learning_rate": 9.80362765739936e-06, "loss": 0.4719, "step": 1865 }, { "epoch": 0.11688250677273368, "grad_norm": 0.8752645488158937, "learning_rate": 9.803346062542388e-06, "loss": 0.4664, "step": 1866 }, { "epoch": 0.11694514477207597, "grad_norm": 0.8464256411165557, "learning_rate": 9.803064269978802e-06, "loss": 0.426, "step": 1867 }, { "epoch": 0.11700778277141828, "grad_norm": 0.9449243371978524, "learning_rate": 9.802782279720198e-06, "loss": 0.4817, "step": 1868 }, { "epoch": 0.11707042077076059, "grad_norm": 0.8327541168629713, "learning_rate": 9.802500091778185e-06, "loss": 0.4563, "step": 1869 }, { "epoch": 0.11713305877010288, "grad_norm": 0.8782458761950669, "learning_rate": 9.802217706164376e-06, "loss": 0.4533, "step": 1870 }, { "epoch": 0.11719569676944519, "grad_norm": 0.8329608458563408, "learning_rate": 9.801935122890394e-06, "loss": 0.4038, "step": 1871 }, { "epoch": 0.11725833476878748, "grad_norm": 0.8669713938456914, "learning_rate": 9.801652341967872e-06, "loss": 0.4711, "step": 1872 }, { "epoch": 0.11732097276812979, "grad_norm": 0.8908329448807645, "learning_rate": 9.801369363408448e-06, "loss": 0.4449, "step": 1873 }, { "epoch": 0.11738361076747209, "grad_norm": 0.9484261268105042, "learning_rate": 9.801086187223772e-06, "loss": 0.4623, "step": 1874 }, { "epoch": 0.11744624876681439, "grad_norm": 0.779672442016922, "learning_rate": 9.800802813425495e-06, "loss": 0.4073, "step": 1875 }, { "epoch": 0.11750888676615669, "grad_norm": 0.8137531800030036, "learning_rate": 9.800519242025285e-06, "loss": 0.4152, "step": 1876 }, { "epoch": 0.117571524765499, "grad_norm": 0.8557024115057666, "learning_rate": 9.80023547303481e-06, "loss": 0.4374, "step": 1877 }, { "epoch": 0.11763416276484129, "grad_norm": 0.9681301507168295, "learning_rate": 9.799951506465753e-06, "loss": 0.4323, "step": 1878 }, { "epoch": 0.1176968007641836, "grad_norm": 0.8669353901520958, "learning_rate": 9.7996673423298e-06, "loss": 0.4491, "step": 1879 }, { "epoch": 0.11775943876352589, "grad_norm": 0.7389160667306787, "learning_rate": 9.79938298063865e-06, "loss": 0.4874, "step": 1880 }, { "epoch": 0.1178220767628682, "grad_norm": 0.7794703842238357, "learning_rate": 9.799098421404006e-06, "loss": 0.4162, "step": 1881 }, { "epoch": 0.11788471476221049, "grad_norm": 0.9019050748078803, "learning_rate": 9.79881366463758e-06, "loss": 0.4332, "step": 1882 }, { "epoch": 0.1179473527615528, "grad_norm": 0.9161745479607387, "learning_rate": 9.798528710351089e-06, "loss": 0.4401, "step": 1883 }, { "epoch": 0.11800999076089509, "grad_norm": 0.8642639711281483, "learning_rate": 9.79824355855627e-06, "loss": 0.4262, "step": 1884 }, { "epoch": 0.1180726287602374, "grad_norm": 0.9304407248712889, "learning_rate": 9.797958209264853e-06, "loss": 0.4638, "step": 1885 }, { "epoch": 0.1181352667595797, "grad_norm": 0.894351778227182, "learning_rate": 9.797672662488588e-06, "loss": 0.4211, "step": 1886 }, { "epoch": 0.118197904758922, "grad_norm": 0.8523632438665623, "learning_rate": 9.797386918239225e-06, "loss": 0.4567, "step": 1887 }, { "epoch": 0.1182605427582643, "grad_norm": 0.8522848105714573, "learning_rate": 9.797100976528525e-06, "loss": 0.4344, "step": 1888 }, { "epoch": 0.1183231807576066, "grad_norm": 0.8555568071677059, "learning_rate": 9.796814837368258e-06, "loss": 0.4483, "step": 1889 }, { "epoch": 0.1183858187569489, "grad_norm": 0.8514662446840497, "learning_rate": 9.796528500770203e-06, "loss": 0.4128, "step": 1890 }, { "epoch": 0.1184484567562912, "grad_norm": 0.8662943577478178, "learning_rate": 9.796241966746145e-06, "loss": 0.423, "step": 1891 }, { "epoch": 0.11851109475563351, "grad_norm": 0.8472479170835439, "learning_rate": 9.795955235307876e-06, "loss": 0.4383, "step": 1892 }, { "epoch": 0.1185737327549758, "grad_norm": 0.880638112145388, "learning_rate": 9.7956683064672e-06, "loss": 0.4611, "step": 1893 }, { "epoch": 0.11863637075431811, "grad_norm": 0.8723754376340148, "learning_rate": 9.795381180235929e-06, "loss": 0.4536, "step": 1894 }, { "epoch": 0.1186990087536604, "grad_norm": 0.8538393903052174, "learning_rate": 9.795093856625875e-06, "loss": 0.436, "step": 1895 }, { "epoch": 0.11876164675300271, "grad_norm": 0.9587448332065376, "learning_rate": 9.794806335648868e-06, "loss": 0.4761, "step": 1896 }, { "epoch": 0.11882428475234501, "grad_norm": 0.8565138521963223, "learning_rate": 9.794518617316745e-06, "loss": 0.4841, "step": 1897 }, { "epoch": 0.11888692275168732, "grad_norm": 0.8705235098323003, "learning_rate": 9.794230701641343e-06, "loss": 0.4416, "step": 1898 }, { "epoch": 0.11894956075102961, "grad_norm": 0.8202464581466415, "learning_rate": 9.793942588634518e-06, "loss": 0.4538, "step": 1899 }, { "epoch": 0.11901219875037192, "grad_norm": 0.8908521424222513, "learning_rate": 9.793654278308125e-06, "loss": 0.4776, "step": 1900 }, { "epoch": 0.11907483674971421, "grad_norm": 0.9511568025639996, "learning_rate": 9.793365770674031e-06, "loss": 0.4009, "step": 1901 }, { "epoch": 0.11913747474905652, "grad_norm": 0.882567568100085, "learning_rate": 9.793077065744114e-06, "loss": 0.4779, "step": 1902 }, { "epoch": 0.11920011274839881, "grad_norm": 0.8571723577326055, "learning_rate": 9.792788163530254e-06, "loss": 0.4201, "step": 1903 }, { "epoch": 0.11926275074774112, "grad_norm": 0.8760476129173063, "learning_rate": 9.792499064044343e-06, "loss": 0.4523, "step": 1904 }, { "epoch": 0.11932538874708341, "grad_norm": 0.7843825700816958, "learning_rate": 9.792209767298282e-06, "loss": 0.4921, "step": 1905 }, { "epoch": 0.11938802674642572, "grad_norm": 0.8116121003920735, "learning_rate": 9.79192027330398e-06, "loss": 0.4327, "step": 1906 }, { "epoch": 0.11945066474576801, "grad_norm": 0.8585154877457689, "learning_rate": 9.791630582073345e-06, "loss": 0.4264, "step": 1907 }, { "epoch": 0.11951330274511032, "grad_norm": 0.7052824910939224, "learning_rate": 9.79134069361831e-06, "loss": 0.4666, "step": 1908 }, { "epoch": 0.11957594074445262, "grad_norm": 0.9319054263297472, "learning_rate": 9.7910506079508e-06, "loss": 0.4515, "step": 1909 }, { "epoch": 0.11963857874379492, "grad_norm": 0.6880721951419185, "learning_rate": 9.79076032508276e-06, "loss": 0.4598, "step": 1910 }, { "epoch": 0.11970121674313722, "grad_norm": 0.8738478308714286, "learning_rate": 9.790469845026136e-06, "loss": 0.4492, "step": 1911 }, { "epoch": 0.11976385474247953, "grad_norm": 0.9009560904629528, "learning_rate": 9.790179167792882e-06, "loss": 0.4826, "step": 1912 }, { "epoch": 0.11982649274182183, "grad_norm": 0.9450338411082336, "learning_rate": 9.789888293394965e-06, "loss": 0.452, "step": 1913 }, { "epoch": 0.11988913074116413, "grad_norm": 0.9554951015526164, "learning_rate": 9.789597221844358e-06, "loss": 0.4706, "step": 1914 }, { "epoch": 0.11995176874050643, "grad_norm": 0.7910428100332522, "learning_rate": 9.789305953153038e-06, "loss": 0.4817, "step": 1915 }, { "epoch": 0.12001440673984873, "grad_norm": 0.8702468802902968, "learning_rate": 9.789014487332999e-06, "loss": 0.4023, "step": 1916 }, { "epoch": 0.12007704473919104, "grad_norm": 0.9150530353493025, "learning_rate": 9.788722824396232e-06, "loss": 0.4697, "step": 1917 }, { "epoch": 0.12013968273853333, "grad_norm": 0.8164398242538387, "learning_rate": 9.788430964354747e-06, "loss": 0.42, "step": 1918 }, { "epoch": 0.12020232073787564, "grad_norm": 0.8910891412318117, "learning_rate": 9.788138907220553e-06, "loss": 0.4772, "step": 1919 }, { "epoch": 0.12026495873721793, "grad_norm": 0.8812851136303951, "learning_rate": 9.787846653005675e-06, "loss": 0.4645, "step": 1920 }, { "epoch": 0.12032759673656024, "grad_norm": 0.8956053236953402, "learning_rate": 9.787554201722138e-06, "loss": 0.4388, "step": 1921 }, { "epoch": 0.12039023473590253, "grad_norm": 0.7617935476842171, "learning_rate": 9.787261553381981e-06, "loss": 0.5035, "step": 1922 }, { "epoch": 0.12045287273524484, "grad_norm": 0.8906855387705271, "learning_rate": 9.786968707997253e-06, "loss": 0.4784, "step": 1923 }, { "epoch": 0.12051551073458713, "grad_norm": 0.877625738642612, "learning_rate": 9.786675665580002e-06, "loss": 0.4548, "step": 1924 }, { "epoch": 0.12057814873392944, "grad_norm": 0.9435432054605591, "learning_rate": 9.786382426142292e-06, "loss": 0.4458, "step": 1925 }, { "epoch": 0.12064078673327173, "grad_norm": 0.8520552182203984, "learning_rate": 9.786088989696194e-06, "loss": 0.4434, "step": 1926 }, { "epoch": 0.12070342473261404, "grad_norm": 0.9017825108790051, "learning_rate": 9.785795356253784e-06, "loss": 0.454, "step": 1927 }, { "epoch": 0.12076606273195634, "grad_norm": 0.8562990074599104, "learning_rate": 9.78550152582715e-06, "loss": 0.3969, "step": 1928 }, { "epoch": 0.12082870073129864, "grad_norm": 0.847255016459296, "learning_rate": 9.785207498428384e-06, "loss": 0.4704, "step": 1929 }, { "epoch": 0.12089133873064094, "grad_norm": 0.7905320037355367, "learning_rate": 9.78491327406959e-06, "loss": 0.4265, "step": 1930 }, { "epoch": 0.12095397672998325, "grad_norm": 0.9115492611743168, "learning_rate": 9.784618852762876e-06, "loss": 0.448, "step": 1931 }, { "epoch": 0.12101661472932554, "grad_norm": 0.8503515998889217, "learning_rate": 9.784324234520363e-06, "loss": 0.4392, "step": 1932 }, { "epoch": 0.12107925272866785, "grad_norm": 0.6695028803100502, "learning_rate": 9.784029419354177e-06, "loss": 0.4793, "step": 1933 }, { "epoch": 0.12114189072801015, "grad_norm": 0.8590726281894936, "learning_rate": 9.783734407276452e-06, "loss": 0.4175, "step": 1934 }, { "epoch": 0.12120452872735245, "grad_norm": 0.8614399218486347, "learning_rate": 9.78343919829933e-06, "loss": 0.4079, "step": 1935 }, { "epoch": 0.12126716672669476, "grad_norm": 0.8852876121090753, "learning_rate": 9.783143792434964e-06, "loss": 0.413, "step": 1936 }, { "epoch": 0.12132980472603705, "grad_norm": 0.8800285009052519, "learning_rate": 9.78284818969551e-06, "loss": 0.4151, "step": 1937 }, { "epoch": 0.12139244272537936, "grad_norm": 0.8580730050358212, "learning_rate": 9.782552390093139e-06, "loss": 0.4198, "step": 1938 }, { "epoch": 0.12145508072472165, "grad_norm": 0.8882665352039608, "learning_rate": 9.782256393640022e-06, "loss": 0.433, "step": 1939 }, { "epoch": 0.12151771872406396, "grad_norm": 0.8309596894341328, "learning_rate": 9.781960200348343e-06, "loss": 0.4174, "step": 1940 }, { "epoch": 0.12158035672340625, "grad_norm": 0.8910879768307287, "learning_rate": 9.781663810230298e-06, "loss": 0.4145, "step": 1941 }, { "epoch": 0.12164299472274856, "grad_norm": 0.9255918586421936, "learning_rate": 9.781367223298082e-06, "loss": 0.4305, "step": 1942 }, { "epoch": 0.12170563272209085, "grad_norm": 0.8610108274097095, "learning_rate": 9.781070439563902e-06, "loss": 0.4798, "step": 1943 }, { "epoch": 0.12176827072143316, "grad_norm": 0.9032538311887406, "learning_rate": 9.780773459039977e-06, "loss": 0.445, "step": 1944 }, { "epoch": 0.12183090872077545, "grad_norm": 0.8850903931272996, "learning_rate": 9.780476281738528e-06, "loss": 0.4375, "step": 1945 }, { "epoch": 0.12189354672011776, "grad_norm": 0.9689633369027005, "learning_rate": 9.780178907671788e-06, "loss": 0.4555, "step": 1946 }, { "epoch": 0.12195618471946006, "grad_norm": 0.7389075555406811, "learning_rate": 9.779881336851998e-06, "loss": 0.4843, "step": 1947 }, { "epoch": 0.12201882271880236, "grad_norm": 0.8998760281243913, "learning_rate": 9.779583569291405e-06, "loss": 0.4481, "step": 1948 }, { "epoch": 0.12208146071814466, "grad_norm": 0.8530380878704589, "learning_rate": 9.779285605002265e-06, "loss": 0.4965, "step": 1949 }, { "epoch": 0.12214409871748697, "grad_norm": 0.8930443159422766, "learning_rate": 9.778987443996841e-06, "loss": 0.4475, "step": 1950 }, { "epoch": 0.12220673671682926, "grad_norm": 0.9361500358245101, "learning_rate": 9.778689086287407e-06, "loss": 0.477, "step": 1951 }, { "epoch": 0.12226937471617157, "grad_norm": 0.8832521336121351, "learning_rate": 9.778390531886245e-06, "loss": 0.4284, "step": 1952 }, { "epoch": 0.12233201271551386, "grad_norm": 0.8431885223713479, "learning_rate": 9.77809178080564e-06, "loss": 0.4245, "step": 1953 }, { "epoch": 0.12239465071485617, "grad_norm": 0.8831172994188349, "learning_rate": 9.777792833057893e-06, "loss": 0.4174, "step": 1954 }, { "epoch": 0.12245728871419846, "grad_norm": 0.7621422290422518, "learning_rate": 9.777493688655303e-06, "loss": 0.4947, "step": 1955 }, { "epoch": 0.12251992671354077, "grad_norm": 0.9511591613154979, "learning_rate": 9.777194347610189e-06, "loss": 0.4689, "step": 1956 }, { "epoch": 0.12258256471288308, "grad_norm": 0.9092383095725791, "learning_rate": 9.776894809934866e-06, "loss": 0.4595, "step": 1957 }, { "epoch": 0.12264520271222537, "grad_norm": 0.8280430115830753, "learning_rate": 9.776595075641667e-06, "loss": 0.4387, "step": 1958 }, { "epoch": 0.12270784071156768, "grad_norm": 0.8346567794777724, "learning_rate": 9.77629514474293e-06, "loss": 0.4913, "step": 1959 }, { "epoch": 0.12277047871090997, "grad_norm": 0.8939037252726106, "learning_rate": 9.775995017250995e-06, "loss": 0.4191, "step": 1960 }, { "epoch": 0.12283311671025228, "grad_norm": 0.954339365250822, "learning_rate": 9.775694693178222e-06, "loss": 0.4842, "step": 1961 }, { "epoch": 0.12289575470959457, "grad_norm": 0.872884066048064, "learning_rate": 9.775394172536967e-06, "loss": 0.4448, "step": 1962 }, { "epoch": 0.12295839270893688, "grad_norm": 0.9347014330890309, "learning_rate": 9.775093455339601e-06, "loss": 0.4729, "step": 1963 }, { "epoch": 0.12302103070827917, "grad_norm": 0.8399675614208689, "learning_rate": 9.774792541598504e-06, "loss": 0.4048, "step": 1964 }, { "epoch": 0.12308366870762148, "grad_norm": 0.8991716812353074, "learning_rate": 9.774491431326058e-06, "loss": 0.4933, "step": 1965 }, { "epoch": 0.12314630670696378, "grad_norm": 0.9256635214755898, "learning_rate": 9.77419012453466e-06, "loss": 0.4597, "step": 1966 }, { "epoch": 0.12320894470630608, "grad_norm": 0.8562747610961979, "learning_rate": 9.773888621236708e-06, "loss": 0.4536, "step": 1967 }, { "epoch": 0.12327158270564838, "grad_norm": 0.8358971795133557, "learning_rate": 9.773586921444616e-06, "loss": 0.4104, "step": 1968 }, { "epoch": 0.12333422070499069, "grad_norm": 0.7596544452598072, "learning_rate": 9.773285025170801e-06, "loss": 0.4831, "step": 1969 }, { "epoch": 0.12339685870433298, "grad_norm": 0.7928230186685794, "learning_rate": 9.772982932427689e-06, "loss": 0.4108, "step": 1970 }, { "epoch": 0.12345949670367529, "grad_norm": 0.8698664952868713, "learning_rate": 9.77268064322771e-06, "loss": 0.4213, "step": 1971 }, { "epoch": 0.12352213470301758, "grad_norm": 0.8598034842677503, "learning_rate": 9.772378157583314e-06, "loss": 0.4244, "step": 1972 }, { "epoch": 0.12358477270235989, "grad_norm": 0.8597939166270584, "learning_rate": 9.772075475506943e-06, "loss": 0.429, "step": 1973 }, { "epoch": 0.12364741070170218, "grad_norm": 0.8559225370518411, "learning_rate": 9.771772597011063e-06, "loss": 0.4629, "step": 1974 }, { "epoch": 0.12371004870104449, "grad_norm": 1.0404646663152395, "learning_rate": 9.771469522108137e-06, "loss": 0.4765, "step": 1975 }, { "epoch": 0.12377268670038678, "grad_norm": 0.8881557984884741, "learning_rate": 9.771166250810638e-06, "loss": 0.4317, "step": 1976 }, { "epoch": 0.12383532469972909, "grad_norm": 0.9135083737814151, "learning_rate": 9.770862783131051e-06, "loss": 0.4721, "step": 1977 }, { "epoch": 0.1238979626990714, "grad_norm": 0.8299129373524771, "learning_rate": 9.770559119081869e-06, "loss": 0.3922, "step": 1978 }, { "epoch": 0.12396060069841369, "grad_norm": 1.0097811319466954, "learning_rate": 9.770255258675585e-06, "loss": 0.4691, "step": 1979 }, { "epoch": 0.124023238697756, "grad_norm": 0.8589812941746178, "learning_rate": 9.76995120192471e-06, "loss": 0.471, "step": 1980 }, { "epoch": 0.1240858766970983, "grad_norm": 0.8892097782697058, "learning_rate": 9.769646948841759e-06, "loss": 0.411, "step": 1981 }, { "epoch": 0.1241485146964406, "grad_norm": 0.836382394889718, "learning_rate": 9.769342499439253e-06, "loss": 0.4179, "step": 1982 }, { "epoch": 0.1242111526957829, "grad_norm": 0.9701303555126329, "learning_rate": 9.769037853729726e-06, "loss": 0.4337, "step": 1983 }, { "epoch": 0.1242737906951252, "grad_norm": 0.8025952015137189, "learning_rate": 9.768733011725715e-06, "loss": 0.4411, "step": 1984 }, { "epoch": 0.1243364286944675, "grad_norm": 0.8717624177547921, "learning_rate": 9.768427973439769e-06, "loss": 0.45, "step": 1985 }, { "epoch": 0.1243990666938098, "grad_norm": 0.9064800083155948, "learning_rate": 9.76812273888444e-06, "loss": 0.4542, "step": 1986 }, { "epoch": 0.1244617046931521, "grad_norm": 0.9797016817669671, "learning_rate": 9.767817308072296e-06, "loss": 0.4272, "step": 1987 }, { "epoch": 0.1245243426924944, "grad_norm": 0.9332144097959241, "learning_rate": 9.767511681015907e-06, "loss": 0.4648, "step": 1988 }, { "epoch": 0.1245869806918367, "grad_norm": 0.8784522514899924, "learning_rate": 9.76720585772785e-06, "loss": 0.4258, "step": 1989 }, { "epoch": 0.124649618691179, "grad_norm": 0.8439068846303541, "learning_rate": 9.766899838220718e-06, "loss": 0.4425, "step": 1990 }, { "epoch": 0.1247122566905213, "grad_norm": 0.7321479731781613, "learning_rate": 9.766593622507103e-06, "loss": 0.4435, "step": 1991 }, { "epoch": 0.12477489468986361, "grad_norm": 0.8417725018487341, "learning_rate": 9.76628721059961e-06, "loss": 0.4215, "step": 1992 }, { "epoch": 0.1248375326892059, "grad_norm": 0.8796977911200452, "learning_rate": 9.765980602510849e-06, "loss": 0.4233, "step": 1993 }, { "epoch": 0.12490017068854821, "grad_norm": 0.7878704244519861, "learning_rate": 9.765673798253443e-06, "loss": 0.4696, "step": 1994 }, { "epoch": 0.1249628086878905, "grad_norm": 0.9176408645587596, "learning_rate": 9.76536679784002e-06, "loss": 0.4831, "step": 1995 }, { "epoch": 0.1250254466872328, "grad_norm": 0.888170379551344, "learning_rate": 9.765059601283212e-06, "loss": 0.4068, "step": 1996 }, { "epoch": 0.12508808468657512, "grad_norm": 0.8629017296950021, "learning_rate": 9.764752208595669e-06, "loss": 0.4641, "step": 1997 }, { "epoch": 0.1251507226859174, "grad_norm": 0.8697727302216106, "learning_rate": 9.764444619790041e-06, "loss": 0.4255, "step": 1998 }, { "epoch": 0.1252133606852597, "grad_norm": 0.8133250765798806, "learning_rate": 9.764136834878987e-06, "loss": 0.3907, "step": 1999 }, { "epoch": 0.125275998684602, "grad_norm": 0.9080153321639883, "learning_rate": 9.763828853875177e-06, "loss": 0.4315, "step": 2000 }, { "epoch": 0.12533863668394432, "grad_norm": 0.6909455843220903, "learning_rate": 9.763520676791287e-06, "loss": 0.4859, "step": 2001 }, { "epoch": 0.12540127468328663, "grad_norm": 0.7896755186114948, "learning_rate": 9.763212303640001e-06, "loss": 0.4853, "step": 2002 }, { "epoch": 0.1254639126826289, "grad_norm": 0.933548381592713, "learning_rate": 9.762903734434014e-06, "loss": 0.4565, "step": 2003 }, { "epoch": 0.12552655068197122, "grad_norm": 0.982648658820914, "learning_rate": 9.762594969186024e-06, "loss": 0.4907, "step": 2004 }, { "epoch": 0.12558918868131352, "grad_norm": 0.9947701639144322, "learning_rate": 9.762286007908741e-06, "loss": 0.43, "step": 2005 }, { "epoch": 0.12565182668065583, "grad_norm": 0.846969810884745, "learning_rate": 9.761976850614882e-06, "loss": 0.4448, "step": 2006 }, { "epoch": 0.1257144646799981, "grad_norm": 0.9013533073968757, "learning_rate": 9.761667497317173e-06, "loss": 0.4621, "step": 2007 }, { "epoch": 0.12577710267934042, "grad_norm": 0.8147084444742309, "learning_rate": 9.761357948028347e-06, "loss": 0.3919, "step": 2008 }, { "epoch": 0.12583974067868273, "grad_norm": 0.8916963068247526, "learning_rate": 9.761048202761142e-06, "loss": 0.4169, "step": 2009 }, { "epoch": 0.12590237867802503, "grad_norm": 0.8260349160941491, "learning_rate": 9.76073826152831e-06, "loss": 0.4063, "step": 2010 }, { "epoch": 0.12596501667736731, "grad_norm": 0.8735279711176607, "learning_rate": 9.760428124342608e-06, "loss": 0.4459, "step": 2011 }, { "epoch": 0.12602765467670962, "grad_norm": 0.9168879496020104, "learning_rate": 9.7601177912168e-06, "loss": 0.4092, "step": 2012 }, { "epoch": 0.12609029267605193, "grad_norm": 0.8548067308993058, "learning_rate": 9.759807262163662e-06, "loss": 0.4423, "step": 2013 }, { "epoch": 0.12615293067539424, "grad_norm": 0.89348098665832, "learning_rate": 9.75949653719597e-06, "loss": 0.4313, "step": 2014 }, { "epoch": 0.12621556867473652, "grad_norm": 0.8598510032170522, "learning_rate": 9.75918561632652e-06, "loss": 0.4458, "step": 2015 }, { "epoch": 0.12627820667407882, "grad_norm": 0.8778185840376277, "learning_rate": 9.758874499568106e-06, "loss": 0.4361, "step": 2016 }, { "epoch": 0.12634084467342113, "grad_norm": 0.8324730732058352, "learning_rate": 9.758563186933535e-06, "loss": 0.44, "step": 2017 }, { "epoch": 0.12640348267276344, "grad_norm": 0.8808563990140768, "learning_rate": 9.758251678435617e-06, "loss": 0.4678, "step": 2018 }, { "epoch": 0.12646612067210572, "grad_norm": 0.9712151362074406, "learning_rate": 9.75793997408718e-06, "loss": 0.4884, "step": 2019 }, { "epoch": 0.12652875867144803, "grad_norm": 0.883091683707926, "learning_rate": 9.757628073901049e-06, "loss": 0.411, "step": 2020 }, { "epoch": 0.12659139667079033, "grad_norm": 0.8657294110379281, "learning_rate": 9.757315977890064e-06, "loss": 0.4453, "step": 2021 }, { "epoch": 0.12665403467013264, "grad_norm": 0.955509872734563, "learning_rate": 9.75700368606707e-06, "loss": 0.4347, "step": 2022 }, { "epoch": 0.12671667266947495, "grad_norm": 0.8545629708570182, "learning_rate": 9.756691198444923e-06, "loss": 0.4388, "step": 2023 }, { "epoch": 0.12677931066881723, "grad_norm": 0.9323397915705518, "learning_rate": 9.756378515036481e-06, "loss": 0.4408, "step": 2024 }, { "epoch": 0.12684194866815954, "grad_norm": 0.9153132757586695, "learning_rate": 9.756065635854619e-06, "loss": 0.4125, "step": 2025 }, { "epoch": 0.12690458666750185, "grad_norm": 0.8580141862048717, "learning_rate": 9.75575256091221e-06, "loss": 0.4062, "step": 2026 }, { "epoch": 0.12696722466684415, "grad_norm": 0.8394931159853855, "learning_rate": 9.755439290222144e-06, "loss": 0.4564, "step": 2027 }, { "epoch": 0.12702986266618643, "grad_norm": 0.8441399555243873, "learning_rate": 9.755125823797315e-06, "loss": 0.4884, "step": 2028 }, { "epoch": 0.12709250066552874, "grad_norm": 0.8803089097707018, "learning_rate": 9.754812161650624e-06, "loss": 0.46, "step": 2029 }, { "epoch": 0.12715513866487105, "grad_norm": 0.9066221584711328, "learning_rate": 9.754498303794981e-06, "loss": 0.4084, "step": 2030 }, { "epoch": 0.12721777666421336, "grad_norm": 0.8310112819886916, "learning_rate": 9.754184250243306e-06, "loss": 0.4511, "step": 2031 }, { "epoch": 0.12728041466355564, "grad_norm": 0.8726795696019146, "learning_rate": 9.753870001008526e-06, "loss": 0.456, "step": 2032 }, { "epoch": 0.12734305266289794, "grad_norm": 0.8023723634057087, "learning_rate": 9.75355555610357e-06, "loss": 0.3957, "step": 2033 }, { "epoch": 0.12740569066224025, "grad_norm": 0.8050130343961314, "learning_rate": 9.75324091554139e-06, "loss": 0.3921, "step": 2034 }, { "epoch": 0.12746832866158256, "grad_norm": 0.9124820458098558, "learning_rate": 9.752926079334931e-06, "loss": 0.4375, "step": 2035 }, { "epoch": 0.12753096666092484, "grad_norm": 0.9110556984116792, "learning_rate": 9.75261104749715e-06, "loss": 0.419, "step": 2036 }, { "epoch": 0.12759360466026715, "grad_norm": 0.8706164931849545, "learning_rate": 9.752295820041017e-06, "loss": 0.4439, "step": 2037 }, { "epoch": 0.12765624265960945, "grad_norm": 0.9570859734750722, "learning_rate": 9.751980396979507e-06, "loss": 0.4632, "step": 2038 }, { "epoch": 0.12771888065895176, "grad_norm": 0.7886553351442371, "learning_rate": 9.751664778325602e-06, "loss": 0.4388, "step": 2039 }, { "epoch": 0.12778151865829404, "grad_norm": 0.8383529158615958, "learning_rate": 9.751348964092291e-06, "loss": 0.4357, "step": 2040 }, { "epoch": 0.12784415665763635, "grad_norm": 0.8799563225724358, "learning_rate": 9.751032954292576e-06, "loss": 0.4553, "step": 2041 }, { "epoch": 0.12790679465697866, "grad_norm": 0.8462974517060525, "learning_rate": 9.750716748939463e-06, "loss": 0.3834, "step": 2042 }, { "epoch": 0.12796943265632096, "grad_norm": 0.9089764984716264, "learning_rate": 9.750400348045966e-06, "loss": 0.4617, "step": 2043 }, { "epoch": 0.12803207065566327, "grad_norm": 0.9237026023491177, "learning_rate": 9.75008375162511e-06, "loss": 0.4738, "step": 2044 }, { "epoch": 0.12809470865500555, "grad_norm": 0.7842969209751549, "learning_rate": 9.749766959689923e-06, "loss": 0.4474, "step": 2045 }, { "epoch": 0.12815734665434786, "grad_norm": 0.8528907040512711, "learning_rate": 9.749449972253449e-06, "loss": 0.4537, "step": 2046 }, { "epoch": 0.12821998465369017, "grad_norm": 0.979993972059209, "learning_rate": 9.749132789328732e-06, "loss": 0.5089, "step": 2047 }, { "epoch": 0.12828262265303247, "grad_norm": 0.8513663397782814, "learning_rate": 9.748815410928827e-06, "loss": 0.4048, "step": 2048 }, { "epoch": 0.12834526065237475, "grad_norm": 0.9671122965066332, "learning_rate": 9.748497837066798e-06, "loss": 0.4225, "step": 2049 }, { "epoch": 0.12840789865171706, "grad_norm": 0.8311741757958695, "learning_rate": 9.748180067755717e-06, "loss": 0.5078, "step": 2050 }, { "epoch": 0.12847053665105937, "grad_norm": 0.8592665184978404, "learning_rate": 9.747862103008664e-06, "loss": 0.4271, "step": 2051 }, { "epoch": 0.12853317465040168, "grad_norm": 0.767003818152189, "learning_rate": 9.747543942838723e-06, "loss": 0.4459, "step": 2052 }, { "epoch": 0.12859581264974396, "grad_norm": 0.8746017007166439, "learning_rate": 9.747225587258995e-06, "loss": 0.4516, "step": 2053 }, { "epoch": 0.12865845064908626, "grad_norm": 0.9243591429428598, "learning_rate": 9.74690703628258e-06, "loss": 0.4305, "step": 2054 }, { "epoch": 0.12872108864842857, "grad_norm": 0.8867291733512515, "learning_rate": 9.746588289922591e-06, "loss": 0.4531, "step": 2055 }, { "epoch": 0.12878372664777088, "grad_norm": 0.9791520813199897, "learning_rate": 9.746269348192145e-06, "loss": 0.4587, "step": 2056 }, { "epoch": 0.12884636464711316, "grad_norm": 0.8915441619125695, "learning_rate": 9.745950211104374e-06, "loss": 0.4153, "step": 2057 }, { "epoch": 0.12890900264645547, "grad_norm": 0.8665394804766472, "learning_rate": 9.74563087867241e-06, "loss": 0.4283, "step": 2058 }, { "epoch": 0.12897164064579777, "grad_norm": 0.8335906962051938, "learning_rate": 9.745311350909399e-06, "loss": 0.4171, "step": 2059 }, { "epoch": 0.12903427864514008, "grad_norm": 0.8843820658663534, "learning_rate": 9.74499162782849e-06, "loss": 0.4611, "step": 2060 }, { "epoch": 0.12909691664448236, "grad_norm": 0.8935214659814371, "learning_rate": 9.744671709442847e-06, "loss": 0.4535, "step": 2061 }, { "epoch": 0.12915955464382467, "grad_norm": 0.8880458998206955, "learning_rate": 9.744351595765635e-06, "loss": 0.4439, "step": 2062 }, { "epoch": 0.12922219264316698, "grad_norm": 0.9103310682799571, "learning_rate": 9.74403128681003e-06, "loss": 0.4321, "step": 2063 }, { "epoch": 0.12928483064250929, "grad_norm": 0.8564431268135086, "learning_rate": 9.743710782589217e-06, "loss": 0.4615, "step": 2064 }, { "epoch": 0.1293474686418516, "grad_norm": 0.8644586404774781, "learning_rate": 9.743390083116389e-06, "loss": 0.3982, "step": 2065 }, { "epoch": 0.12941010664119387, "grad_norm": 0.7673740818181908, "learning_rate": 9.743069188404744e-06, "loss": 0.4786, "step": 2066 }, { "epoch": 0.12947274464053618, "grad_norm": 0.9348737651679134, "learning_rate": 9.742748098467492e-06, "loss": 0.4401, "step": 2067 }, { "epoch": 0.1295353826398785, "grad_norm": 0.8383828559393218, "learning_rate": 9.742426813317847e-06, "loss": 0.4062, "step": 2068 }, { "epoch": 0.1295980206392208, "grad_norm": 0.8917451892204629, "learning_rate": 9.742105332969033e-06, "loss": 0.4334, "step": 2069 }, { "epoch": 0.12966065863856308, "grad_norm": 0.8683136765903321, "learning_rate": 9.741783657434286e-06, "loss": 0.4332, "step": 2070 }, { "epoch": 0.12972329663790538, "grad_norm": 0.9014577489559453, "learning_rate": 9.74146178672684e-06, "loss": 0.4246, "step": 2071 }, { "epoch": 0.1297859346372477, "grad_norm": 0.9589528320282293, "learning_rate": 9.741139720859948e-06, "loss": 0.4479, "step": 2072 }, { "epoch": 0.12984857263659, "grad_norm": 0.7468152516383746, "learning_rate": 9.740817459846867e-06, "loss": 0.4856, "step": 2073 }, { "epoch": 0.12991121063593228, "grad_norm": 0.908066838412693, "learning_rate": 9.740495003700859e-06, "loss": 0.4287, "step": 2074 }, { "epoch": 0.12997384863527459, "grad_norm": 0.8219531984459191, "learning_rate": 9.740172352435194e-06, "loss": 0.4127, "step": 2075 }, { "epoch": 0.1300364866346169, "grad_norm": 0.7768455925740888, "learning_rate": 9.739849506063157e-06, "loss": 0.4686, "step": 2076 }, { "epoch": 0.1300991246339592, "grad_norm": 0.9383218971072638, "learning_rate": 9.739526464598034e-06, "loss": 0.4391, "step": 2077 }, { "epoch": 0.13016176263330148, "grad_norm": 0.8450537384981881, "learning_rate": 9.739203228053123e-06, "loss": 0.4332, "step": 2078 }, { "epoch": 0.1302244006326438, "grad_norm": 0.7596182859917648, "learning_rate": 9.738879796441728e-06, "loss": 0.502, "step": 2079 }, { "epoch": 0.1302870386319861, "grad_norm": 0.9353950815726088, "learning_rate": 9.73855616977716e-06, "loss": 0.4476, "step": 2080 }, { "epoch": 0.1303496766313284, "grad_norm": 0.9957119013716053, "learning_rate": 9.73823234807274e-06, "loss": 0.4787, "step": 2081 }, { "epoch": 0.13041231463067068, "grad_norm": 0.9250461280177235, "learning_rate": 9.737908331341798e-06, "loss": 0.4398, "step": 2082 }, { "epoch": 0.130474952630013, "grad_norm": 0.8722862695734025, "learning_rate": 9.73758411959767e-06, "loss": 0.4461, "step": 2083 }, { "epoch": 0.1305375906293553, "grad_norm": 0.8344402196138767, "learning_rate": 9.7372597128537e-06, "loss": 0.4028, "step": 2084 }, { "epoch": 0.1306002286286976, "grad_norm": 0.9218798034550804, "learning_rate": 9.73693511112324e-06, "loss": 0.4773, "step": 2085 }, { "epoch": 0.13066286662803991, "grad_norm": 0.7687854386751745, "learning_rate": 9.736610314419651e-06, "loss": 0.4224, "step": 2086 }, { "epoch": 0.1307255046273822, "grad_norm": 0.8934784212119946, "learning_rate": 9.736285322756305e-06, "loss": 0.4482, "step": 2087 }, { "epoch": 0.1307881426267245, "grad_norm": 0.9355473488871526, "learning_rate": 9.735960136146576e-06, "loss": 0.4454, "step": 2088 }, { "epoch": 0.1308507806260668, "grad_norm": 0.979981554988764, "learning_rate": 9.735634754603846e-06, "loss": 0.4655, "step": 2089 }, { "epoch": 0.13091341862540912, "grad_norm": 0.9142567062089845, "learning_rate": 9.735309178141513e-06, "loss": 0.4552, "step": 2090 }, { "epoch": 0.1309760566247514, "grad_norm": 0.8741745846148111, "learning_rate": 9.734983406772973e-06, "loss": 0.4272, "step": 2091 }, { "epoch": 0.1310386946240937, "grad_norm": 1.1000956883010664, "learning_rate": 9.734657440511638e-06, "loss": 0.4095, "step": 2092 }, { "epoch": 0.131101332623436, "grad_norm": 0.8415160776035115, "learning_rate": 9.734331279370925e-06, "loss": 0.4462, "step": 2093 }, { "epoch": 0.13116397062277832, "grad_norm": 0.7782170463323025, "learning_rate": 9.734004923364258e-06, "loss": 0.4942, "step": 2094 }, { "epoch": 0.1312266086221206, "grad_norm": 0.7984043822282996, "learning_rate": 9.733678372505068e-06, "loss": 0.3905, "step": 2095 }, { "epoch": 0.1312892466214629, "grad_norm": 0.9109716182097731, "learning_rate": 9.733351626806797e-06, "loss": 0.4634, "step": 2096 }, { "epoch": 0.13135188462080521, "grad_norm": 0.8242302375418116, "learning_rate": 9.733024686282898e-06, "loss": 0.4369, "step": 2097 }, { "epoch": 0.13141452262014752, "grad_norm": 0.9602117753687835, "learning_rate": 9.732697550946822e-06, "loss": 0.4961, "step": 2098 }, { "epoch": 0.1314771606194898, "grad_norm": 0.9457929286076865, "learning_rate": 9.732370220812035e-06, "loss": 0.4603, "step": 2099 }, { "epoch": 0.1315397986188321, "grad_norm": 0.9272297008909214, "learning_rate": 9.732042695892014e-06, "loss": 0.4219, "step": 2100 }, { "epoch": 0.13160243661817442, "grad_norm": 0.9167402340628865, "learning_rate": 9.731714976200236e-06, "loss": 0.4319, "step": 2101 }, { "epoch": 0.13166507461751673, "grad_norm": 0.8560026137950737, "learning_rate": 9.731387061750193e-06, "loss": 0.401, "step": 2102 }, { "epoch": 0.131727712616859, "grad_norm": 0.8833413241614748, "learning_rate": 9.731058952555378e-06, "loss": 0.4513, "step": 2103 }, { "epoch": 0.1317903506162013, "grad_norm": 0.808418940493975, "learning_rate": 9.730730648629298e-06, "loss": 0.4548, "step": 2104 }, { "epoch": 0.13185298861554362, "grad_norm": 0.8659482965154802, "learning_rate": 9.730402149985469e-06, "loss": 0.4045, "step": 2105 }, { "epoch": 0.13191562661488593, "grad_norm": 0.9478601818802093, "learning_rate": 9.730073456637408e-06, "loss": 0.4629, "step": 2106 }, { "epoch": 0.1319782646142282, "grad_norm": 0.8276304840136891, "learning_rate": 9.729744568598647e-06, "loss": 0.4156, "step": 2107 }, { "epoch": 0.13204090261357052, "grad_norm": 0.8405966541006353, "learning_rate": 9.72941548588272e-06, "loss": 0.4153, "step": 2108 }, { "epoch": 0.13210354061291282, "grad_norm": 0.8642937532057018, "learning_rate": 9.729086208503174e-06, "loss": 0.4368, "step": 2109 }, { "epoch": 0.13216617861225513, "grad_norm": 0.8864839571976726, "learning_rate": 9.728756736473562e-06, "loss": 0.441, "step": 2110 }, { "epoch": 0.13222881661159744, "grad_norm": 0.8918146164092077, "learning_rate": 9.728427069807445e-06, "loss": 0.4166, "step": 2111 }, { "epoch": 0.13229145461093972, "grad_norm": 0.8422247028600206, "learning_rate": 9.728097208518393e-06, "loss": 0.453, "step": 2112 }, { "epoch": 0.13235409261028203, "grad_norm": 0.8733152189483934, "learning_rate": 9.727767152619982e-06, "loss": 0.4331, "step": 2113 }, { "epoch": 0.13241673060962433, "grad_norm": 0.9782218870159587, "learning_rate": 9.727436902125798e-06, "loss": 0.4818, "step": 2114 }, { "epoch": 0.13247936860896664, "grad_norm": 0.8379718918800763, "learning_rate": 9.727106457049432e-06, "loss": 0.4522, "step": 2115 }, { "epoch": 0.13254200660830892, "grad_norm": 0.9283562791051059, "learning_rate": 9.726775817404488e-06, "loss": 0.5075, "step": 2116 }, { "epoch": 0.13260464460765123, "grad_norm": 0.8440467725165138, "learning_rate": 9.726444983204574e-06, "loss": 0.4354, "step": 2117 }, { "epoch": 0.13266728260699354, "grad_norm": 0.9012323417565896, "learning_rate": 9.726113954463307e-06, "loss": 0.4331, "step": 2118 }, { "epoch": 0.13272992060633584, "grad_norm": 1.9517194563470441, "learning_rate": 9.725782731194312e-06, "loss": 0.4242, "step": 2119 }, { "epoch": 0.13279255860567812, "grad_norm": 0.9405411734135994, "learning_rate": 9.725451313411223e-06, "loss": 0.4392, "step": 2120 }, { "epoch": 0.13285519660502043, "grad_norm": 0.8593712235727949, "learning_rate": 9.725119701127681e-06, "loss": 0.4425, "step": 2121 }, { "epoch": 0.13291783460436274, "grad_norm": 0.8768825684428532, "learning_rate": 9.724787894357335e-06, "loss": 0.4017, "step": 2122 }, { "epoch": 0.13298047260370505, "grad_norm": 0.8792789583527646, "learning_rate": 9.724455893113842e-06, "loss": 0.4536, "step": 2123 }, { "epoch": 0.13304311060304733, "grad_norm": 0.8799351477849479, "learning_rate": 9.724123697410867e-06, "loss": 0.4196, "step": 2124 }, { "epoch": 0.13310574860238963, "grad_norm": 0.8782838784237665, "learning_rate": 9.723791307262086e-06, "loss": 0.4627, "step": 2125 }, { "epoch": 0.13316838660173194, "grad_norm": 0.846872776750436, "learning_rate": 9.723458722681176e-06, "loss": 0.4432, "step": 2126 }, { "epoch": 0.13323102460107425, "grad_norm": 0.7840915035669741, "learning_rate": 9.72312594368183e-06, "loss": 0.4002, "step": 2127 }, { "epoch": 0.13329366260041653, "grad_norm": 0.8263191234037119, "learning_rate": 9.722792970277741e-06, "loss": 0.4216, "step": 2128 }, { "epoch": 0.13335630059975884, "grad_norm": 0.8476495730573337, "learning_rate": 9.72245980248262e-06, "loss": 0.4269, "step": 2129 }, { "epoch": 0.13341893859910114, "grad_norm": 0.8305705966718464, "learning_rate": 9.722126440310173e-06, "loss": 0.4047, "step": 2130 }, { "epoch": 0.13348157659844345, "grad_norm": 0.8449023565602183, "learning_rate": 9.721792883774128e-06, "loss": 0.4315, "step": 2131 }, { "epoch": 0.13354421459778576, "grad_norm": 0.8798460524317174, "learning_rate": 9.721459132888212e-06, "loss": 0.4597, "step": 2132 }, { "epoch": 0.13360685259712804, "grad_norm": 0.8249873330353092, "learning_rate": 9.721125187666161e-06, "loss": 0.4339, "step": 2133 }, { "epoch": 0.13366949059647035, "grad_norm": 0.9279176723886595, "learning_rate": 9.72079104812172e-06, "loss": 0.4601, "step": 2134 }, { "epoch": 0.13373212859581265, "grad_norm": 0.936381955049695, "learning_rate": 9.720456714268643e-06, "loss": 0.4548, "step": 2135 }, { "epoch": 0.13379476659515496, "grad_norm": 0.8995382763071785, "learning_rate": 9.720122186120693e-06, "loss": 0.5048, "step": 2136 }, { "epoch": 0.13385740459449724, "grad_norm": 0.8385715440903346, "learning_rate": 9.719787463691637e-06, "loss": 0.3964, "step": 2137 }, { "epoch": 0.13392004259383955, "grad_norm": 0.8257095846577366, "learning_rate": 9.719452546995252e-06, "loss": 0.485, "step": 2138 }, { "epoch": 0.13398268059318186, "grad_norm": 1.040061344649781, "learning_rate": 9.719117436045327e-06, "loss": 0.458, "step": 2139 }, { "epoch": 0.13404531859252417, "grad_norm": 0.7992353174131153, "learning_rate": 9.71878213085565e-06, "loss": 0.4938, "step": 2140 }, { "epoch": 0.13410795659186645, "grad_norm": 0.8048263921102105, "learning_rate": 9.718446631440025e-06, "loss": 0.4078, "step": 2141 }, { "epoch": 0.13417059459120875, "grad_norm": 0.8937308109052531, "learning_rate": 9.718110937812262e-06, "loss": 0.4387, "step": 2142 }, { "epoch": 0.13423323259055106, "grad_norm": 0.8307589142258672, "learning_rate": 9.717775049986176e-06, "loss": 0.3744, "step": 2143 }, { "epoch": 0.13429587058989337, "grad_norm": 0.8097826014244687, "learning_rate": 9.717438967975593e-06, "loss": 0.4269, "step": 2144 }, { "epoch": 0.13435850858923565, "grad_norm": 0.8944994559635281, "learning_rate": 9.717102691794347e-06, "loss": 0.4648, "step": 2145 }, { "epoch": 0.13442114658857796, "grad_norm": 0.9658348627088638, "learning_rate": 9.716766221456278e-06, "loss": 0.4717, "step": 2146 }, { "epoch": 0.13448378458792026, "grad_norm": 0.9393262925904621, "learning_rate": 9.716429556975237e-06, "loss": 0.4466, "step": 2147 }, { "epoch": 0.13454642258726257, "grad_norm": 0.8234086764367133, "learning_rate": 9.71609269836508e-06, "loss": 0.448, "step": 2148 }, { "epoch": 0.13460906058660485, "grad_norm": 0.7971101830165479, "learning_rate": 9.715755645639671e-06, "loss": 0.4575, "step": 2149 }, { "epoch": 0.13467169858594716, "grad_norm": 0.9360481326285544, "learning_rate": 9.715418398812886e-06, "loss": 0.4167, "step": 2150 }, { "epoch": 0.13473433658528947, "grad_norm": 0.8971146854140705, "learning_rate": 9.715080957898604e-06, "loss": 0.4418, "step": 2151 }, { "epoch": 0.13479697458463177, "grad_norm": 0.8213756117406368, "learning_rate": 9.714743322910714e-06, "loss": 0.4235, "step": 2152 }, { "epoch": 0.13485961258397408, "grad_norm": 0.8428002272901317, "learning_rate": 9.714405493863112e-06, "loss": 0.419, "step": 2153 }, { "epoch": 0.13492225058331636, "grad_norm": 1.0022217124068777, "learning_rate": 9.714067470769708e-06, "loss": 0.4504, "step": 2154 }, { "epoch": 0.13498488858265867, "grad_norm": 0.8954731967577586, "learning_rate": 9.71372925364441e-06, "loss": 0.4316, "step": 2155 }, { "epoch": 0.13504752658200098, "grad_norm": 0.9022354644447992, "learning_rate": 9.713390842501143e-06, "loss": 0.4303, "step": 2156 }, { "epoch": 0.13511016458134328, "grad_norm": 0.8228634737975942, "learning_rate": 9.713052237353833e-06, "loss": 0.4387, "step": 2157 }, { "epoch": 0.13517280258068556, "grad_norm": 0.9650432420131635, "learning_rate": 9.712713438216417e-06, "loss": 0.4721, "step": 2158 }, { "epoch": 0.13523544058002787, "grad_norm": 0.9045021161912801, "learning_rate": 9.712374445102842e-06, "loss": 0.4444, "step": 2159 }, { "epoch": 0.13529807857937018, "grad_norm": 0.8342035994388977, "learning_rate": 9.712035258027061e-06, "loss": 0.4275, "step": 2160 }, { "epoch": 0.1353607165787125, "grad_norm": 0.8807765871049515, "learning_rate": 9.711695877003033e-06, "loss": 0.4098, "step": 2161 }, { "epoch": 0.13542335457805477, "grad_norm": 0.8575262193259905, "learning_rate": 9.711356302044728e-06, "loss": 0.444, "step": 2162 }, { "epoch": 0.13548599257739707, "grad_norm": 0.8334092422121091, "learning_rate": 9.711016533166125e-06, "loss": 0.4427, "step": 2163 }, { "epoch": 0.13554863057673938, "grad_norm": 0.7888257518994014, "learning_rate": 9.710676570381205e-06, "loss": 0.4622, "step": 2164 }, { "epoch": 0.1356112685760817, "grad_norm": 0.94374525229091, "learning_rate": 9.710336413703964e-06, "loss": 0.4899, "step": 2165 }, { "epoch": 0.13567390657542397, "grad_norm": 0.8434059067039544, "learning_rate": 9.709996063148401e-06, "loss": 0.3999, "step": 2166 }, { "epoch": 0.13573654457476628, "grad_norm": 0.9040013981548884, "learning_rate": 9.709655518728526e-06, "loss": 0.4743, "step": 2167 }, { "epoch": 0.13579918257410858, "grad_norm": 0.8596217902106368, "learning_rate": 9.709314780458357e-06, "loss": 0.4224, "step": 2168 }, { "epoch": 0.1358618205734509, "grad_norm": 0.8824956620121363, "learning_rate": 9.708973848351917e-06, "loss": 0.4297, "step": 2169 }, { "epoch": 0.13592445857279317, "grad_norm": 0.9976441827894507, "learning_rate": 9.708632722423238e-06, "loss": 0.4558, "step": 2170 }, { "epoch": 0.13598709657213548, "grad_norm": 0.8811244329160518, "learning_rate": 9.708291402686365e-06, "loss": 0.4358, "step": 2171 }, { "epoch": 0.1360497345714778, "grad_norm": 0.9030085495743074, "learning_rate": 9.707949889155341e-06, "loss": 0.4073, "step": 2172 }, { "epoch": 0.1361123725708201, "grad_norm": 0.9428667186128505, "learning_rate": 9.707608181844228e-06, "loss": 0.456, "step": 2173 }, { "epoch": 0.1361750105701624, "grad_norm": 0.8664669935624583, "learning_rate": 9.707266280767086e-06, "loss": 0.4247, "step": 2174 }, { "epoch": 0.13623764856950468, "grad_norm": 0.8447772226283399, "learning_rate": 9.706924185937993e-06, "loss": 0.446, "step": 2175 }, { "epoch": 0.136300286568847, "grad_norm": 0.8795845193358592, "learning_rate": 9.706581897371025e-06, "loss": 0.4119, "step": 2176 }, { "epoch": 0.1363629245681893, "grad_norm": 0.8624702789665037, "learning_rate": 9.706239415080275e-06, "loss": 0.4659, "step": 2177 }, { "epoch": 0.1364255625675316, "grad_norm": 0.8349453064877617, "learning_rate": 9.705896739079834e-06, "loss": 0.437, "step": 2178 }, { "epoch": 0.13648820056687389, "grad_norm": 0.827452215672811, "learning_rate": 9.705553869383811e-06, "loss": 0.4811, "step": 2179 }, { "epoch": 0.1365508385662162, "grad_norm": 0.9400347248623704, "learning_rate": 9.705210806006317e-06, "loss": 0.4919, "step": 2180 }, { "epoch": 0.1366134765655585, "grad_norm": 0.8717365463924438, "learning_rate": 9.704867548961474e-06, "loss": 0.3825, "step": 2181 }, { "epoch": 0.1366761145649008, "grad_norm": 0.9083129104353631, "learning_rate": 9.704524098263409e-06, "loss": 0.48, "step": 2182 }, { "epoch": 0.1367387525642431, "grad_norm": 0.8940436638516562, "learning_rate": 9.704180453926259e-06, "loss": 0.4296, "step": 2183 }, { "epoch": 0.1368013905635854, "grad_norm": 0.9084071348706758, "learning_rate": 9.703836615964167e-06, "loss": 0.4674, "step": 2184 }, { "epoch": 0.1368640285629277, "grad_norm": 0.8560383043156078, "learning_rate": 9.703492584391287e-06, "loss": 0.4124, "step": 2185 }, { "epoch": 0.13692666656227, "grad_norm": 0.9280471870093759, "learning_rate": 9.70314835922178e-06, "loss": 0.4821, "step": 2186 }, { "epoch": 0.1369893045616123, "grad_norm": 0.9253114159966243, "learning_rate": 9.702803940469815e-06, "loss": 0.474, "step": 2187 }, { "epoch": 0.1370519425609546, "grad_norm": 0.9186481620054546, "learning_rate": 9.702459328149565e-06, "loss": 0.4561, "step": 2188 }, { "epoch": 0.1371145805602969, "grad_norm": 0.8745714419647423, "learning_rate": 9.702114522275216e-06, "loss": 0.3969, "step": 2189 }, { "epoch": 0.1371772185596392, "grad_norm": 0.8594754579311678, "learning_rate": 9.701769522860963e-06, "loss": 0.3859, "step": 2190 }, { "epoch": 0.1372398565589815, "grad_norm": 0.8158871232871461, "learning_rate": 9.701424329921e-06, "loss": 0.4359, "step": 2191 }, { "epoch": 0.1373024945583238, "grad_norm": 0.9492692006820466, "learning_rate": 9.701078943469541e-06, "loss": 0.4509, "step": 2192 }, { "epoch": 0.1373651325576661, "grad_norm": 0.9081073910917724, "learning_rate": 9.700733363520802e-06, "loss": 0.4883, "step": 2193 }, { "epoch": 0.13742777055700842, "grad_norm": 0.910381231696085, "learning_rate": 9.700387590089002e-06, "loss": 0.4467, "step": 2194 }, { "epoch": 0.13749040855635072, "grad_norm": 0.878024607270846, "learning_rate": 9.70004162318838e-06, "loss": 0.432, "step": 2195 }, { "epoch": 0.137553046555693, "grad_norm": 0.87043172632439, "learning_rate": 9.699695462833169e-06, "loss": 0.4187, "step": 2196 }, { "epoch": 0.1376156845550353, "grad_norm": 1.0506428282059537, "learning_rate": 9.69934910903762e-06, "loss": 0.4683, "step": 2197 }, { "epoch": 0.13767832255437762, "grad_norm": 0.9082872528130357, "learning_rate": 9.699002561815992e-06, "loss": 0.4021, "step": 2198 }, { "epoch": 0.13774096055371993, "grad_norm": 0.8044506720399468, "learning_rate": 9.698655821182545e-06, "loss": 0.3913, "step": 2199 }, { "epoch": 0.1378035985530622, "grad_norm": 0.9201163411558048, "learning_rate": 9.698308887151553e-06, "loss": 0.4585, "step": 2200 }, { "epoch": 0.13786623655240451, "grad_norm": 0.8784158770071561, "learning_rate": 9.697961759737296e-06, "loss": 0.4432, "step": 2201 }, { "epoch": 0.13792887455174682, "grad_norm": 0.9178607453899356, "learning_rate": 9.697614438954059e-06, "loss": 0.4665, "step": 2202 }, { "epoch": 0.13799151255108913, "grad_norm": 0.9245095424322415, "learning_rate": 9.69726692481614e-06, "loss": 0.4411, "step": 2203 }, { "epoch": 0.1380541505504314, "grad_norm": 0.8446563137255306, "learning_rate": 9.696919217337843e-06, "loss": 0.4222, "step": 2204 }, { "epoch": 0.13811678854977372, "grad_norm": 0.935595206797833, "learning_rate": 9.69657131653348e-06, "loss": 0.5224, "step": 2205 }, { "epoch": 0.13817942654911602, "grad_norm": 0.8359433085844524, "learning_rate": 9.696223222417369e-06, "loss": 0.4253, "step": 2206 }, { "epoch": 0.13824206454845833, "grad_norm": 0.8792681929527693, "learning_rate": 9.69587493500384e-06, "loss": 0.4279, "step": 2207 }, { "epoch": 0.1383047025478006, "grad_norm": 0.7472703270081213, "learning_rate": 9.695526454307224e-06, "loss": 0.477, "step": 2208 }, { "epoch": 0.13836734054714292, "grad_norm": 0.8463356530499532, "learning_rate": 9.69517778034187e-06, "loss": 0.4436, "step": 2209 }, { "epoch": 0.13842997854648523, "grad_norm": 0.8470588751770831, "learning_rate": 9.694828913122128e-06, "loss": 0.4511, "step": 2210 }, { "epoch": 0.13849261654582753, "grad_norm": 0.8999146246378132, "learning_rate": 9.694479852662355e-06, "loss": 0.4707, "step": 2211 }, { "epoch": 0.13855525454516981, "grad_norm": 0.7896702992132011, "learning_rate": 9.694130598976919e-06, "loss": 0.5006, "step": 2212 }, { "epoch": 0.13861789254451212, "grad_norm": 0.9847868740422208, "learning_rate": 9.693781152080197e-06, "loss": 0.4565, "step": 2213 }, { "epoch": 0.13868053054385443, "grad_norm": 0.812871343013172, "learning_rate": 9.693431511986572e-06, "loss": 0.3874, "step": 2214 }, { "epoch": 0.13874316854319674, "grad_norm": 0.8985133757051832, "learning_rate": 9.693081678710435e-06, "loss": 0.4683, "step": 2215 }, { "epoch": 0.13880580654253902, "grad_norm": 0.8695409837128261, "learning_rate": 9.692731652266184e-06, "loss": 0.415, "step": 2216 }, { "epoch": 0.13886844454188133, "grad_norm": 0.9286666645304484, "learning_rate": 9.692381432668228e-06, "loss": 0.4774, "step": 2217 }, { "epoch": 0.13893108254122363, "grad_norm": 0.8496265809508814, "learning_rate": 9.692031019930981e-06, "loss": 0.4513, "step": 2218 }, { "epoch": 0.13899372054056594, "grad_norm": 0.8483157555482311, "learning_rate": 9.691680414068867e-06, "loss": 0.4923, "step": 2219 }, { "epoch": 0.13905635853990825, "grad_norm": 0.8693471719479456, "learning_rate": 9.691329615096315e-06, "loss": 0.4921, "step": 2220 }, { "epoch": 0.13911899653925053, "grad_norm": 0.8789127971424072, "learning_rate": 9.690978623027767e-06, "loss": 0.4712, "step": 2221 }, { "epoch": 0.13918163453859284, "grad_norm": 0.8489954412603635, "learning_rate": 9.690627437877667e-06, "loss": 0.4244, "step": 2222 }, { "epoch": 0.13924427253793514, "grad_norm": 0.8608051690058997, "learning_rate": 9.690276059660471e-06, "loss": 0.4863, "step": 2223 }, { "epoch": 0.13930691053727745, "grad_norm": 0.8178938207427763, "learning_rate": 9.689924488390643e-06, "loss": 0.396, "step": 2224 }, { "epoch": 0.13936954853661973, "grad_norm": 0.7575938072179592, "learning_rate": 9.689572724082652e-06, "loss": 0.4776, "step": 2225 }, { "epoch": 0.13943218653596204, "grad_norm": 0.9320278124664211, "learning_rate": 9.689220766750977e-06, "loss": 0.412, "step": 2226 }, { "epoch": 0.13949482453530435, "grad_norm": 0.7923980175478729, "learning_rate": 9.688868616410105e-06, "loss": 0.4326, "step": 2227 }, { "epoch": 0.13955746253464665, "grad_norm": 0.8303756920232234, "learning_rate": 9.68851627307453e-06, "loss": 0.4713, "step": 2228 }, { "epoch": 0.13962010053398893, "grad_norm": 0.8249966179827555, "learning_rate": 9.688163736758755e-06, "loss": 0.3996, "step": 2229 }, { "epoch": 0.13968273853333124, "grad_norm": 0.8058591671027405, "learning_rate": 9.68781100747729e-06, "loss": 0.4192, "step": 2230 }, { "epoch": 0.13974537653267355, "grad_norm": 0.8307514861322093, "learning_rate": 9.687458085244654e-06, "loss": 0.4429, "step": 2231 }, { "epoch": 0.13980801453201586, "grad_norm": 0.8414700697569526, "learning_rate": 9.687104970075374e-06, "loss": 0.4442, "step": 2232 }, { "epoch": 0.13987065253135814, "grad_norm": 0.9248765424615424, "learning_rate": 9.686751661983985e-06, "loss": 0.4428, "step": 2233 }, { "epoch": 0.13993329053070044, "grad_norm": 0.8320833495374479, "learning_rate": 9.686398160985024e-06, "loss": 0.3943, "step": 2234 }, { "epoch": 0.13999592853004275, "grad_norm": 0.943264199577162, "learning_rate": 9.686044467093048e-06, "loss": 0.4594, "step": 2235 }, { "epoch": 0.14005856652938506, "grad_norm": 0.8929484255142927, "learning_rate": 9.685690580322612e-06, "loss": 0.4667, "step": 2236 }, { "epoch": 0.14012120452872734, "grad_norm": 0.8932555578946941, "learning_rate": 9.685336500688279e-06, "loss": 0.4765, "step": 2237 }, { "epoch": 0.14018384252806965, "grad_norm": 0.8786751361475417, "learning_rate": 9.684982228204627e-06, "loss": 0.4297, "step": 2238 }, { "epoch": 0.14024648052741195, "grad_norm": 0.8313577809048117, "learning_rate": 9.684627762886239e-06, "loss": 0.4041, "step": 2239 }, { "epoch": 0.14030911852675426, "grad_norm": 0.9980954618342258, "learning_rate": 9.684273104747702e-06, "loss": 0.4774, "step": 2240 }, { "epoch": 0.14037175652609657, "grad_norm": 0.9047714497836662, "learning_rate": 9.683918253803612e-06, "loss": 0.4895, "step": 2241 }, { "epoch": 0.14043439452543885, "grad_norm": 0.8658790837920405, "learning_rate": 9.683563210068581e-06, "loss": 0.4129, "step": 2242 }, { "epoch": 0.14049703252478116, "grad_norm": 0.9048304258728651, "learning_rate": 9.683207973557216e-06, "loss": 0.4093, "step": 2243 }, { "epoch": 0.14055967052412346, "grad_norm": 0.8785226375530205, "learning_rate": 9.682852544284143e-06, "loss": 0.4509, "step": 2244 }, { "epoch": 0.14062230852346577, "grad_norm": 0.8029841911522505, "learning_rate": 9.68249692226399e-06, "loss": 0.4208, "step": 2245 }, { "epoch": 0.14068494652280805, "grad_norm": 0.9905500653961982, "learning_rate": 9.682141107511394e-06, "loss": 0.4844, "step": 2246 }, { "epoch": 0.14074758452215036, "grad_norm": 0.8420769461406663, "learning_rate": 9.681785100041001e-06, "loss": 0.4461, "step": 2247 }, { "epoch": 0.14081022252149267, "grad_norm": 0.787049299246364, "learning_rate": 9.681428899867466e-06, "loss": 0.3771, "step": 2248 }, { "epoch": 0.14087286052083497, "grad_norm": 0.8946956054677999, "learning_rate": 9.681072507005446e-06, "loss": 0.4543, "step": 2249 }, { "epoch": 0.14093549852017725, "grad_norm": 0.8698706447603998, "learning_rate": 9.680715921469615e-06, "loss": 0.4233, "step": 2250 }, { "epoch": 0.14099813651951956, "grad_norm": 0.8173812336374683, "learning_rate": 9.680359143274646e-06, "loss": 0.3999, "step": 2251 }, { "epoch": 0.14106077451886187, "grad_norm": 0.9048758681462872, "learning_rate": 9.680002172435228e-06, "loss": 0.4622, "step": 2252 }, { "epoch": 0.14112341251820418, "grad_norm": 0.8488149212689138, "learning_rate": 9.67964500896605e-06, "loss": 0.3992, "step": 2253 }, { "epoch": 0.14118605051754646, "grad_norm": 0.7987622880014729, "learning_rate": 9.679287652881815e-06, "loss": 0.3994, "step": 2254 }, { "epoch": 0.14124868851688877, "grad_norm": 0.874658805330724, "learning_rate": 9.678930104197233e-06, "loss": 0.4336, "step": 2255 }, { "epoch": 0.14131132651623107, "grad_norm": 0.9343589650360308, "learning_rate": 9.678572362927019e-06, "loss": 0.4979, "step": 2256 }, { "epoch": 0.14137396451557338, "grad_norm": 0.7799738041422156, "learning_rate": 9.678214429085898e-06, "loss": 0.4706, "step": 2257 }, { "epoch": 0.14143660251491566, "grad_norm": 0.8247002457649069, "learning_rate": 9.677856302688603e-06, "loss": 0.4175, "step": 2258 }, { "epoch": 0.14149924051425797, "grad_norm": 0.8171683842298494, "learning_rate": 9.677497983749874e-06, "loss": 0.414, "step": 2259 }, { "epoch": 0.14156187851360028, "grad_norm": 0.9030862319938098, "learning_rate": 9.677139472284461e-06, "loss": 0.4389, "step": 2260 }, { "epoch": 0.14162451651294258, "grad_norm": 0.9153877789719159, "learning_rate": 9.676780768307117e-06, "loss": 0.4421, "step": 2261 }, { "epoch": 0.1416871545122849, "grad_norm": 0.8751589506147975, "learning_rate": 9.67642187183261e-06, "loss": 0.5034, "step": 2262 }, { "epoch": 0.14174979251162717, "grad_norm": 0.8585508014159221, "learning_rate": 9.67606278287571e-06, "loss": 0.3833, "step": 2263 }, { "epoch": 0.14181243051096948, "grad_norm": 0.7931676884838392, "learning_rate": 9.675703501451199e-06, "loss": 0.3535, "step": 2264 }, { "epoch": 0.14187506851031179, "grad_norm": 0.8367221322271434, "learning_rate": 9.675344027573862e-06, "loss": 0.4186, "step": 2265 }, { "epoch": 0.1419377065096541, "grad_norm": 0.905681798062385, "learning_rate": 9.6749843612585e-06, "loss": 0.4276, "step": 2266 }, { "epoch": 0.14200034450899637, "grad_norm": 0.9101200639522679, "learning_rate": 9.674624502519912e-06, "loss": 0.4274, "step": 2267 }, { "epoch": 0.14206298250833868, "grad_norm": 0.7942583281151744, "learning_rate": 9.674264451372912e-06, "loss": 0.4034, "step": 2268 }, { "epoch": 0.142125620507681, "grad_norm": 0.8092464420763034, "learning_rate": 9.67390420783232e-06, "loss": 0.4418, "step": 2269 }, { "epoch": 0.1421882585070233, "grad_norm": 0.8938713170876549, "learning_rate": 9.673543771912964e-06, "loss": 0.4477, "step": 2270 }, { "epoch": 0.14225089650636558, "grad_norm": 0.8638147614957213, "learning_rate": 9.673183143629677e-06, "loss": 0.3924, "step": 2271 }, { "epoch": 0.14231353450570788, "grad_norm": 0.7781302978492965, "learning_rate": 9.672822322997305e-06, "loss": 0.4937, "step": 2272 }, { "epoch": 0.1423761725050502, "grad_norm": 0.7949022480335897, "learning_rate": 9.6724613100307e-06, "loss": 0.4228, "step": 2273 }, { "epoch": 0.1424388105043925, "grad_norm": 0.8515484193980477, "learning_rate": 9.67210010474472e-06, "loss": 0.4449, "step": 2274 }, { "epoch": 0.14250144850373478, "grad_norm": 0.8726739340231814, "learning_rate": 9.67173870715423e-06, "loss": 0.4657, "step": 2275 }, { "epoch": 0.1425640865030771, "grad_norm": 0.8310758017733653, "learning_rate": 9.67137711727411e-06, "loss": 0.3929, "step": 2276 }, { "epoch": 0.1426267245024194, "grad_norm": 0.8435275578307975, "learning_rate": 9.67101533511924e-06, "loss": 0.4631, "step": 2277 }, { "epoch": 0.1426893625017617, "grad_norm": 0.8883069312600281, "learning_rate": 9.670653360704514e-06, "loss": 0.4367, "step": 2278 }, { "epoch": 0.14275200050110398, "grad_norm": 0.7980096992000741, "learning_rate": 9.670291194044828e-06, "loss": 0.4154, "step": 2279 }, { "epoch": 0.1428146385004463, "grad_norm": 0.9082874766371772, "learning_rate": 9.669928835155088e-06, "loss": 0.3995, "step": 2280 }, { "epoch": 0.1428772764997886, "grad_norm": 0.8583952157488394, "learning_rate": 9.66956628405021e-06, "loss": 0.4017, "step": 2281 }, { "epoch": 0.1429399144991309, "grad_norm": 0.8180762784395122, "learning_rate": 9.669203540745117e-06, "loss": 0.4772, "step": 2282 }, { "epoch": 0.1430025524984732, "grad_norm": 0.9187110631959239, "learning_rate": 9.668840605254742e-06, "loss": 0.4502, "step": 2283 }, { "epoch": 0.1430651904978155, "grad_norm": 0.894525408282474, "learning_rate": 9.668477477594021e-06, "loss": 0.4464, "step": 2284 }, { "epoch": 0.1431278284971578, "grad_norm": 0.8900166911968236, "learning_rate": 9.6681141577779e-06, "loss": 0.4304, "step": 2285 }, { "epoch": 0.1431904664965001, "grad_norm": 0.9252136322129633, "learning_rate": 9.667750645821332e-06, "loss": 0.4769, "step": 2286 }, { "epoch": 0.14325310449584241, "grad_norm": 0.8215437699935754, "learning_rate": 9.667386941739283e-06, "loss": 0.4374, "step": 2287 }, { "epoch": 0.1433157424951847, "grad_norm": 0.9114618504940122, "learning_rate": 9.66702304554672e-06, "loss": 0.4105, "step": 2288 }, { "epoch": 0.143378380494527, "grad_norm": 0.7957094822203379, "learning_rate": 9.66665895725862e-06, "loss": 0.4086, "step": 2289 }, { "epoch": 0.1434410184938693, "grad_norm": 0.8895002181398354, "learning_rate": 9.666294676889976e-06, "loss": 0.4286, "step": 2290 }, { "epoch": 0.14350365649321162, "grad_norm": 0.8611753123260439, "learning_rate": 9.665930204455773e-06, "loss": 0.4382, "step": 2291 }, { "epoch": 0.1435662944925539, "grad_norm": 0.8581690724377423, "learning_rate": 9.66556553997102e-06, "loss": 0.4256, "step": 2292 }, { "epoch": 0.1436289324918962, "grad_norm": 0.8692570522050129, "learning_rate": 9.665200683450721e-06, "loss": 0.4123, "step": 2293 }, { "epoch": 0.1436915704912385, "grad_norm": 0.8439746226052689, "learning_rate": 9.664835634909897e-06, "loss": 0.4337, "step": 2294 }, { "epoch": 0.14375420849058082, "grad_norm": 0.8431104044401114, "learning_rate": 9.664470394363571e-06, "loss": 0.4435, "step": 2295 }, { "epoch": 0.1438168464899231, "grad_norm": 1.013648199738999, "learning_rate": 9.664104961826781e-06, "loss": 0.4667, "step": 2296 }, { "epoch": 0.1438794844892654, "grad_norm": 0.8978294004178626, "learning_rate": 9.663739337314563e-06, "loss": 0.43, "step": 2297 }, { "epoch": 0.14394212248860772, "grad_norm": 0.8349920122563275, "learning_rate": 9.663373520841968e-06, "loss": 0.3829, "step": 2298 }, { "epoch": 0.14400476048795002, "grad_norm": 0.8114681141504396, "learning_rate": 9.663007512424053e-06, "loss": 0.3863, "step": 2299 }, { "epoch": 0.1440673984872923, "grad_norm": 0.8545206705842632, "learning_rate": 9.662641312075884e-06, "loss": 0.4271, "step": 2300 }, { "epoch": 0.1441300364866346, "grad_norm": 0.820906023156429, "learning_rate": 9.662274919812535e-06, "loss": 0.4151, "step": 2301 }, { "epoch": 0.14419267448597692, "grad_norm": 0.8755671797092478, "learning_rate": 9.661908335649082e-06, "loss": 0.4466, "step": 2302 }, { "epoch": 0.14425531248531923, "grad_norm": 0.9095921462898913, "learning_rate": 9.661541559600619e-06, "loss": 0.4432, "step": 2303 }, { "epoch": 0.14431795048466153, "grad_norm": 0.8438632242623119, "learning_rate": 9.661174591682238e-06, "loss": 0.4295, "step": 2304 }, { "epoch": 0.1443805884840038, "grad_norm": 0.8169418421448084, "learning_rate": 9.660807431909047e-06, "loss": 0.4224, "step": 2305 }, { "epoch": 0.14444322648334612, "grad_norm": 0.8801112277840498, "learning_rate": 9.660440080296156e-06, "loss": 0.4388, "step": 2306 }, { "epoch": 0.14450586448268843, "grad_norm": 0.8805325506577353, "learning_rate": 9.660072536858687e-06, "loss": 0.4697, "step": 2307 }, { "epoch": 0.14456850248203074, "grad_norm": 0.9083380947183523, "learning_rate": 9.659704801611767e-06, "loss": 0.4516, "step": 2308 }, { "epoch": 0.14463114048137302, "grad_norm": 0.8757638135949098, "learning_rate": 9.659336874570533e-06, "loss": 0.4121, "step": 2309 }, { "epoch": 0.14469377848071532, "grad_norm": 0.9218877110706876, "learning_rate": 9.658968755750129e-06, "loss": 0.4009, "step": 2310 }, { "epoch": 0.14475641648005763, "grad_norm": 0.8843122645902163, "learning_rate": 9.658600445165705e-06, "loss": 0.5088, "step": 2311 }, { "epoch": 0.14481905447939994, "grad_norm": 0.8845793209583669, "learning_rate": 9.658231942832423e-06, "loss": 0.4751, "step": 2312 }, { "epoch": 0.14488169247874222, "grad_norm": 0.8151727438848101, "learning_rate": 9.657863248765448e-06, "loss": 0.4258, "step": 2313 }, { "epoch": 0.14494433047808453, "grad_norm": 0.8910249107779263, "learning_rate": 9.657494362979958e-06, "loss": 0.4364, "step": 2314 }, { "epoch": 0.14500696847742683, "grad_norm": 0.7873203021090587, "learning_rate": 9.657125285491136e-06, "loss": 0.4892, "step": 2315 }, { "epoch": 0.14506960647676914, "grad_norm": 0.8857021235148637, "learning_rate": 9.656756016314173e-06, "loss": 0.4235, "step": 2316 }, { "epoch": 0.14513224447611142, "grad_norm": 0.7737993176409519, "learning_rate": 9.656386555464267e-06, "loss": 0.3882, "step": 2317 }, { "epoch": 0.14519488247545373, "grad_norm": 0.7799514869719378, "learning_rate": 9.656016902956628e-06, "loss": 0.4917, "step": 2318 }, { "epoch": 0.14525752047479604, "grad_norm": 0.8973805988302151, "learning_rate": 9.655647058806468e-06, "loss": 0.4447, "step": 2319 }, { "epoch": 0.14532015847413834, "grad_norm": 0.8802062428070762, "learning_rate": 9.65527702302901e-06, "loss": 0.4706, "step": 2320 }, { "epoch": 0.14538279647348062, "grad_norm": 0.9330544171777992, "learning_rate": 9.654906795639487e-06, "loss": 0.4395, "step": 2321 }, { "epoch": 0.14544543447282293, "grad_norm": 0.885867749513151, "learning_rate": 9.654536376653135e-06, "loss": 0.4048, "step": 2322 }, { "epoch": 0.14550807247216524, "grad_norm": 0.8209454098008089, "learning_rate": 9.654165766085203e-06, "loss": 0.4522, "step": 2323 }, { "epoch": 0.14557071047150755, "grad_norm": 0.8851687136485553, "learning_rate": 9.653794963950944e-06, "loss": 0.4232, "step": 2324 }, { "epoch": 0.14563334847084985, "grad_norm": 0.8694756484928869, "learning_rate": 9.65342397026562e-06, "loss": 0.4378, "step": 2325 }, { "epoch": 0.14569598647019213, "grad_norm": 0.9481573664617065, "learning_rate": 9.653052785044502e-06, "loss": 0.4719, "step": 2326 }, { "epoch": 0.14575862446953444, "grad_norm": 0.8907635164538238, "learning_rate": 9.652681408302867e-06, "loss": 0.4383, "step": 2327 }, { "epoch": 0.14582126246887675, "grad_norm": 0.8000978403199045, "learning_rate": 9.652309840056002e-06, "loss": 0.4739, "step": 2328 }, { "epoch": 0.14588390046821906, "grad_norm": 0.9085038364201059, "learning_rate": 9.651938080319202e-06, "loss": 0.4207, "step": 2329 }, { "epoch": 0.14594653846756134, "grad_norm": 0.8020984860767632, "learning_rate": 9.651566129107765e-06, "loss": 0.3948, "step": 2330 }, { "epoch": 0.14600917646690365, "grad_norm": 0.8541056741698139, "learning_rate": 9.651193986437004e-06, "loss": 0.4457, "step": 2331 }, { "epoch": 0.14607181446624595, "grad_norm": 0.769231972553952, "learning_rate": 9.650821652322236e-06, "loss": 0.4207, "step": 2332 }, { "epoch": 0.14613445246558826, "grad_norm": 0.8262319782302551, "learning_rate": 9.650449126778785e-06, "loss": 0.4316, "step": 2333 }, { "epoch": 0.14619709046493054, "grad_norm": 0.8754341312897409, "learning_rate": 9.650076409821984e-06, "loss": 0.4194, "step": 2334 }, { "epoch": 0.14625972846427285, "grad_norm": 0.9158670363535011, "learning_rate": 9.649703501467174e-06, "loss": 0.4103, "step": 2335 }, { "epoch": 0.14632236646361516, "grad_norm": 0.8282031772876082, "learning_rate": 9.649330401729707e-06, "loss": 0.4312, "step": 2336 }, { "epoch": 0.14638500446295746, "grad_norm": 0.9525534426862259, "learning_rate": 9.648957110624937e-06, "loss": 0.461, "step": 2337 }, { "epoch": 0.14644764246229974, "grad_norm": 0.8017443857930013, "learning_rate": 9.648583628168229e-06, "loss": 0.409, "step": 2338 }, { "epoch": 0.14651028046164205, "grad_norm": 0.8782016625081, "learning_rate": 9.648209954374956e-06, "loss": 0.5175, "step": 2339 }, { "epoch": 0.14657291846098436, "grad_norm": 0.8865723502101954, "learning_rate": 9.647836089260497e-06, "loss": 0.4587, "step": 2340 }, { "epoch": 0.14663555646032667, "grad_norm": 0.8038028190721217, "learning_rate": 9.647462032840245e-06, "loss": 0.4277, "step": 2341 }, { "epoch": 0.14669819445966895, "grad_norm": 0.8834983866029281, "learning_rate": 9.647087785129591e-06, "loss": 0.4094, "step": 2342 }, { "epoch": 0.14676083245901125, "grad_norm": 0.8870826228719058, "learning_rate": 9.646713346143943e-06, "loss": 0.484, "step": 2343 }, { "epoch": 0.14682347045835356, "grad_norm": 0.8482045660304712, "learning_rate": 9.64633871589871e-06, "loss": 0.4639, "step": 2344 }, { "epoch": 0.14688610845769587, "grad_norm": 0.8628826819093957, "learning_rate": 9.645963894409314e-06, "loss": 0.4274, "step": 2345 }, { "epoch": 0.14694874645703815, "grad_norm": 0.8866958023314192, "learning_rate": 9.64558888169118e-06, "loss": 0.4596, "step": 2346 }, { "epoch": 0.14701138445638046, "grad_norm": 0.9431210913352419, "learning_rate": 9.645213677759746e-06, "loss": 0.4865, "step": 2347 }, { "epoch": 0.14707402245572276, "grad_norm": 0.7302403598809101, "learning_rate": 9.644838282630456e-06, "loss": 0.4779, "step": 2348 }, { "epoch": 0.14713666045506507, "grad_norm": 0.9049631395261968, "learning_rate": 9.644462696318759e-06, "loss": 0.4214, "step": 2349 }, { "epoch": 0.14719929845440738, "grad_norm": 0.7340839389877459, "learning_rate": 9.644086918840116e-06, "loss": 0.4771, "step": 2350 }, { "epoch": 0.14726193645374966, "grad_norm": 0.8747651804543148, "learning_rate": 9.643710950209993e-06, "loss": 0.4562, "step": 2351 }, { "epoch": 0.14732457445309197, "grad_norm": 0.9470050537399178, "learning_rate": 9.643334790443866e-06, "loss": 0.4577, "step": 2352 }, { "epoch": 0.14738721245243427, "grad_norm": 0.8376829792620915, "learning_rate": 9.642958439557217e-06, "loss": 0.3923, "step": 2353 }, { "epoch": 0.14744985045177658, "grad_norm": 0.8470474210482474, "learning_rate": 9.642581897565535e-06, "loss": 0.4779, "step": 2354 }, { "epoch": 0.14751248845111886, "grad_norm": 0.847232525861142, "learning_rate": 9.642205164484323e-06, "loss": 0.4355, "step": 2355 }, { "epoch": 0.14757512645046117, "grad_norm": 0.8684491595549861, "learning_rate": 9.641828240329083e-06, "loss": 0.4951, "step": 2356 }, { "epoch": 0.14763776444980348, "grad_norm": 0.9849481700290545, "learning_rate": 9.641451125115333e-06, "loss": 0.4755, "step": 2357 }, { "epoch": 0.14770040244914578, "grad_norm": 0.8506513466862283, "learning_rate": 9.641073818858591e-06, "loss": 0.4153, "step": 2358 }, { "epoch": 0.14776304044848806, "grad_norm": 0.8373563000863815, "learning_rate": 9.64069632157439e-06, "loss": 0.4438, "step": 2359 }, { "epoch": 0.14782567844783037, "grad_norm": 0.9019029667949023, "learning_rate": 9.640318633278266e-06, "loss": 0.4265, "step": 2360 }, { "epoch": 0.14788831644717268, "grad_norm": 0.8266657327243677, "learning_rate": 9.639940753985768e-06, "loss": 0.4273, "step": 2361 }, { "epoch": 0.147950954446515, "grad_norm": 0.8581105506802421, "learning_rate": 9.639562683712446e-06, "loss": 0.4517, "step": 2362 }, { "epoch": 0.14801359244585727, "grad_norm": 0.8033684802975174, "learning_rate": 9.639184422473865e-06, "loss": 0.3757, "step": 2363 }, { "epoch": 0.14807623044519957, "grad_norm": 0.8270348263420059, "learning_rate": 9.63880597028559e-06, "loss": 0.433, "step": 2364 }, { "epoch": 0.14813886844454188, "grad_norm": 1.1928836555392626, "learning_rate": 9.6384273271632e-06, "loss": 0.5043, "step": 2365 }, { "epoch": 0.1482015064438842, "grad_norm": 0.8478188482045678, "learning_rate": 9.638048493122281e-06, "loss": 0.4759, "step": 2366 }, { "epoch": 0.14826414444322647, "grad_norm": 0.869010494005975, "learning_rate": 9.637669468178425e-06, "loss": 0.3903, "step": 2367 }, { "epoch": 0.14832678244256878, "grad_norm": 0.8759184839374334, "learning_rate": 9.637290252347233e-06, "loss": 0.3931, "step": 2368 }, { "epoch": 0.14838942044191109, "grad_norm": 0.9058054514862914, "learning_rate": 9.636910845644315e-06, "loss": 0.4364, "step": 2369 }, { "epoch": 0.1484520584412534, "grad_norm": 0.9176614748611075, "learning_rate": 9.636531248085284e-06, "loss": 0.4255, "step": 2370 }, { "epoch": 0.1485146964405957, "grad_norm": 0.8907698815948322, "learning_rate": 9.636151459685765e-06, "loss": 0.4879, "step": 2371 }, { "epoch": 0.14857733443993798, "grad_norm": 0.8698524916142856, "learning_rate": 9.635771480461393e-06, "loss": 0.4476, "step": 2372 }, { "epoch": 0.1486399724392803, "grad_norm": 0.8676108149051293, "learning_rate": 9.635391310427807e-06, "loss": 0.4554, "step": 2373 }, { "epoch": 0.1487026104386226, "grad_norm": 0.8811445140313584, "learning_rate": 9.635010949600654e-06, "loss": 0.4775, "step": 2374 }, { "epoch": 0.1487652484379649, "grad_norm": 0.9492777427326136, "learning_rate": 9.634630397995589e-06, "loss": 0.4233, "step": 2375 }, { "epoch": 0.14882788643730718, "grad_norm": 0.9036401016459509, "learning_rate": 9.634249655628278e-06, "loss": 0.4633, "step": 2376 }, { "epoch": 0.1488905244366495, "grad_norm": 0.833608396397723, "learning_rate": 9.63386872251439e-06, "loss": 0.4371, "step": 2377 }, { "epoch": 0.1489531624359918, "grad_norm": 0.9504202152230048, "learning_rate": 9.633487598669605e-06, "loss": 0.4615, "step": 2378 }, { "epoch": 0.1490158004353341, "grad_norm": 0.8516445229515681, "learning_rate": 9.633106284109612e-06, "loss": 0.4481, "step": 2379 }, { "epoch": 0.14907843843467639, "grad_norm": 0.7756864689633367, "learning_rate": 9.632724778850103e-06, "loss": 0.4192, "step": 2380 }, { "epoch": 0.1491410764340187, "grad_norm": 0.8075483737715474, "learning_rate": 9.632343082906782e-06, "loss": 0.4214, "step": 2381 }, { "epoch": 0.149203714433361, "grad_norm": 0.8495226140789064, "learning_rate": 9.631961196295361e-06, "loss": 0.4407, "step": 2382 }, { "epoch": 0.1492663524327033, "grad_norm": 0.8627696535186514, "learning_rate": 9.631579119031557e-06, "loss": 0.4603, "step": 2383 }, { "epoch": 0.1493289904320456, "grad_norm": 0.9833427205260712, "learning_rate": 9.631196851131097e-06, "loss": 0.48, "step": 2384 }, { "epoch": 0.1493916284313879, "grad_norm": 0.8499499621237054, "learning_rate": 9.630814392609715e-06, "loss": 0.4374, "step": 2385 }, { "epoch": 0.1494542664307302, "grad_norm": 0.7928901697601555, "learning_rate": 9.630431743483152e-06, "loss": 0.425, "step": 2386 }, { "epoch": 0.1495169044300725, "grad_norm": 0.789312482596649, "learning_rate": 9.63004890376716e-06, "loss": 0.3997, "step": 2387 }, { "epoch": 0.1495795424294148, "grad_norm": 0.8128200432999614, "learning_rate": 9.629665873477497e-06, "loss": 0.4733, "step": 2388 }, { "epoch": 0.1496421804287571, "grad_norm": 0.7615365811678368, "learning_rate": 9.629282652629926e-06, "loss": 0.4114, "step": 2389 }, { "epoch": 0.1497048184280994, "grad_norm": 0.8589118116776328, "learning_rate": 9.62889924124022e-06, "loss": 0.4227, "step": 2390 }, { "epoch": 0.14976745642744171, "grad_norm": 0.8291738206322965, "learning_rate": 9.628515639324165e-06, "loss": 0.4203, "step": 2391 }, { "epoch": 0.14983009442678402, "grad_norm": 0.8593183521226162, "learning_rate": 9.628131846897546e-06, "loss": 0.4076, "step": 2392 }, { "epoch": 0.1498927324261263, "grad_norm": 0.8611805723601197, "learning_rate": 9.62774786397616e-06, "loss": 0.4547, "step": 2393 }, { "epoch": 0.1499553704254686, "grad_norm": 0.8722064297884812, "learning_rate": 9.627363690575815e-06, "loss": 0.4357, "step": 2394 }, { "epoch": 0.15001800842481092, "grad_norm": 0.6701532249785436, "learning_rate": 9.626979326712321e-06, "loss": 0.4846, "step": 2395 }, { "epoch": 0.15008064642415322, "grad_norm": 0.8677723601696344, "learning_rate": 9.626594772401498e-06, "loss": 0.4274, "step": 2396 }, { "epoch": 0.1501432844234955, "grad_norm": 0.824606491064819, "learning_rate": 9.626210027659177e-06, "loss": 0.4092, "step": 2397 }, { "epoch": 0.1502059224228378, "grad_norm": 1.007816523083989, "learning_rate": 9.62582509250119e-06, "loss": 0.4573, "step": 2398 }, { "epoch": 0.15026856042218012, "grad_norm": 0.9008596761939137, "learning_rate": 9.625439966943388e-06, "loss": 0.4367, "step": 2399 }, { "epoch": 0.15033119842152243, "grad_norm": 0.8481409811805928, "learning_rate": 9.625054651001615e-06, "loss": 0.4408, "step": 2400 }, { "epoch": 0.1503938364208647, "grad_norm": 0.885431701320464, "learning_rate": 9.624669144691735e-06, "loss": 0.4359, "step": 2401 }, { "epoch": 0.15045647442020701, "grad_norm": 0.8792346507499824, "learning_rate": 9.624283448029614e-06, "loss": 0.4319, "step": 2402 }, { "epoch": 0.15051911241954932, "grad_norm": 0.8356309190399823, "learning_rate": 9.623897561031129e-06, "loss": 0.416, "step": 2403 }, { "epoch": 0.15058175041889163, "grad_norm": 0.8227366621498904, "learning_rate": 9.623511483712163e-06, "loss": 0.47, "step": 2404 }, { "epoch": 0.1506443884182339, "grad_norm": 0.8651954493910214, "learning_rate": 9.623125216088604e-06, "loss": 0.4446, "step": 2405 }, { "epoch": 0.15070702641757622, "grad_norm": 0.8956096972733216, "learning_rate": 9.622738758176353e-06, "loss": 0.4505, "step": 2406 }, { "epoch": 0.15076966441691853, "grad_norm": 0.7718114856052984, "learning_rate": 9.62235210999132e-06, "loss": 0.5232, "step": 2407 }, { "epoch": 0.15083230241626083, "grad_norm": 0.9553808029684103, "learning_rate": 9.621965271549413e-06, "loss": 0.4899, "step": 2408 }, { "epoch": 0.1508949404156031, "grad_norm": 0.8595795702334038, "learning_rate": 9.621578242866558e-06, "loss": 0.4228, "step": 2409 }, { "epoch": 0.15095757841494542, "grad_norm": 0.9160868208238513, "learning_rate": 9.621191023958685e-06, "loss": 0.4552, "step": 2410 }, { "epoch": 0.15102021641428773, "grad_norm": 0.861894225923158, "learning_rate": 9.620803614841732e-06, "loss": 0.4725, "step": 2411 }, { "epoch": 0.15108285441363004, "grad_norm": 1.0112295695950564, "learning_rate": 9.620416015531644e-06, "loss": 0.4231, "step": 2412 }, { "epoch": 0.15114549241297234, "grad_norm": 0.8482139232390409, "learning_rate": 9.620028226044375e-06, "loss": 0.4362, "step": 2413 }, { "epoch": 0.15120813041231462, "grad_norm": 0.9044934825325542, "learning_rate": 9.619640246395889e-06, "loss": 0.4288, "step": 2414 }, { "epoch": 0.15127076841165693, "grad_norm": 0.8497697181108379, "learning_rate": 9.61925207660215e-06, "loss": 0.4106, "step": 2415 }, { "epoch": 0.15133340641099924, "grad_norm": 0.8482769671397066, "learning_rate": 9.61886371667914e-06, "loss": 0.433, "step": 2416 }, { "epoch": 0.15139604441034155, "grad_norm": 0.814045684880283, "learning_rate": 9.618475166642842e-06, "loss": 0.4373, "step": 2417 }, { "epoch": 0.15145868240968383, "grad_norm": 0.8727904097314203, "learning_rate": 9.618086426509248e-06, "loss": 0.4555, "step": 2418 }, { "epoch": 0.15152132040902613, "grad_norm": 0.835947309276288, "learning_rate": 9.61769749629436e-06, "loss": 0.4032, "step": 2419 }, { "epoch": 0.15158395840836844, "grad_norm": 0.7115955313179162, "learning_rate": 9.617308376014185e-06, "loss": 0.466, "step": 2420 }, { "epoch": 0.15164659640771075, "grad_norm": 0.8135974158689896, "learning_rate": 9.616919065684742e-06, "loss": 0.4271, "step": 2421 }, { "epoch": 0.15170923440705303, "grad_norm": 0.6969122467335835, "learning_rate": 9.616529565322052e-06, "loss": 0.482, "step": 2422 }, { "epoch": 0.15177187240639534, "grad_norm": 0.9241893841615136, "learning_rate": 9.61613987494215e-06, "loss": 0.4669, "step": 2423 }, { "epoch": 0.15183451040573764, "grad_norm": 0.8772083729573474, "learning_rate": 9.615749994561073e-06, "loss": 0.4335, "step": 2424 }, { "epoch": 0.15189714840507995, "grad_norm": 0.9350423972844112, "learning_rate": 9.61535992419487e-06, "loss": 0.4994, "step": 2425 }, { "epoch": 0.15195978640442223, "grad_norm": 0.8936695294273029, "learning_rate": 9.614969663859593e-06, "loss": 0.4445, "step": 2426 }, { "epoch": 0.15202242440376454, "grad_norm": 0.9665407637378294, "learning_rate": 9.614579213571311e-06, "loss": 0.4593, "step": 2427 }, { "epoch": 0.15208506240310685, "grad_norm": 0.8440668994360617, "learning_rate": 9.614188573346091e-06, "loss": 0.433, "step": 2428 }, { "epoch": 0.15214770040244915, "grad_norm": 0.9084944508370191, "learning_rate": 9.613797743200013e-06, "loss": 0.4476, "step": 2429 }, { "epoch": 0.15221033840179143, "grad_norm": 0.8668678670879548, "learning_rate": 9.613406723149164e-06, "loss": 0.4325, "step": 2430 }, { "epoch": 0.15227297640113374, "grad_norm": 0.8912780229729776, "learning_rate": 9.613015513209638e-06, "loss": 0.4573, "step": 2431 }, { "epoch": 0.15233561440047605, "grad_norm": 0.8119934631695337, "learning_rate": 9.612624113397536e-06, "loss": 0.4576, "step": 2432 }, { "epoch": 0.15239825239981836, "grad_norm": 0.9263038513500895, "learning_rate": 9.61223252372897e-06, "loss": 0.4561, "step": 2433 }, { "epoch": 0.15246089039916066, "grad_norm": 0.8680718733685372, "learning_rate": 9.611840744220057e-06, "loss": 0.4414, "step": 2434 }, { "epoch": 0.15252352839850294, "grad_norm": 0.8579754161401948, "learning_rate": 9.611448774886925e-06, "loss": 0.4218, "step": 2435 }, { "epoch": 0.15258616639784525, "grad_norm": 0.9626726311010398, "learning_rate": 9.611056615745705e-06, "loss": 0.4636, "step": 2436 }, { "epoch": 0.15264880439718756, "grad_norm": 0.8446558390407034, "learning_rate": 9.610664266812537e-06, "loss": 0.4647, "step": 2437 }, { "epoch": 0.15271144239652987, "grad_norm": 0.8934984853225806, "learning_rate": 9.610271728103571e-06, "loss": 0.4555, "step": 2438 }, { "epoch": 0.15277408039587215, "grad_norm": 0.7864004236749215, "learning_rate": 9.609878999634968e-06, "loss": 0.3946, "step": 2439 }, { "epoch": 0.15283671839521445, "grad_norm": 0.8560483680413262, "learning_rate": 9.609486081422887e-06, "loss": 0.4561, "step": 2440 }, { "epoch": 0.15289935639455676, "grad_norm": 0.8856045989333212, "learning_rate": 9.609092973483507e-06, "loss": 0.4333, "step": 2441 }, { "epoch": 0.15296199439389907, "grad_norm": 0.8942781358042332, "learning_rate": 9.608699675833002e-06, "loss": 0.4621, "step": 2442 }, { "epoch": 0.15302463239324135, "grad_norm": 0.7929863034452728, "learning_rate": 9.608306188487562e-06, "loss": 0.414, "step": 2443 }, { "epoch": 0.15308727039258366, "grad_norm": 0.8458806410002347, "learning_rate": 9.607912511463387e-06, "loss": 0.4532, "step": 2444 }, { "epoch": 0.15314990839192597, "grad_norm": 0.8789685769306063, "learning_rate": 9.607518644776675e-06, "loss": 0.4139, "step": 2445 }, { "epoch": 0.15321254639126827, "grad_norm": 0.8081039208801672, "learning_rate": 9.607124588443642e-06, "loss": 0.391, "step": 2446 }, { "epoch": 0.15327518439061055, "grad_norm": 0.8191526921874455, "learning_rate": 9.606730342480506e-06, "loss": 0.4219, "step": 2447 }, { "epoch": 0.15333782238995286, "grad_norm": 0.8461760390351218, "learning_rate": 9.606335906903493e-06, "loss": 0.4443, "step": 2448 }, { "epoch": 0.15340046038929517, "grad_norm": 0.9170891887360324, "learning_rate": 9.60594128172884e-06, "loss": 0.4832, "step": 2449 }, { "epoch": 0.15346309838863748, "grad_norm": 0.8338661583762684, "learning_rate": 9.605546466972788e-06, "loss": 0.4289, "step": 2450 }, { "epoch": 0.15352573638797976, "grad_norm": 0.8765851636256804, "learning_rate": 9.60515146265159e-06, "loss": 0.4152, "step": 2451 }, { "epoch": 0.15358837438732206, "grad_norm": 0.8129336329477193, "learning_rate": 9.6047562687815e-06, "loss": 0.4403, "step": 2452 }, { "epoch": 0.15365101238666437, "grad_norm": 0.8859911281838742, "learning_rate": 9.604360885378789e-06, "loss": 0.4171, "step": 2453 }, { "epoch": 0.15371365038600668, "grad_norm": 0.9306802696233231, "learning_rate": 9.603965312459732e-06, "loss": 0.465, "step": 2454 }, { "epoch": 0.15377628838534896, "grad_norm": 0.8579268760034917, "learning_rate": 9.603569550040606e-06, "loss": 0.4326, "step": 2455 }, { "epoch": 0.15383892638469127, "grad_norm": 0.8885256382651785, "learning_rate": 9.603173598137702e-06, "loss": 0.4694, "step": 2456 }, { "epoch": 0.15390156438403357, "grad_norm": 0.8727621140846769, "learning_rate": 9.602777456767319e-06, "loss": 0.4411, "step": 2457 }, { "epoch": 0.15396420238337588, "grad_norm": 0.8016710456984827, "learning_rate": 9.60238112594576e-06, "loss": 0.3963, "step": 2458 }, { "epoch": 0.1540268403827182, "grad_norm": 0.8367745093692693, "learning_rate": 9.601984605689342e-06, "loss": 0.4231, "step": 2459 }, { "epoch": 0.15408947838206047, "grad_norm": 0.9247589241881171, "learning_rate": 9.601587896014383e-06, "loss": 0.4816, "step": 2460 }, { "epoch": 0.15415211638140278, "grad_norm": 0.8317591122459586, "learning_rate": 9.601190996937213e-06, "loss": 0.4404, "step": 2461 }, { "epoch": 0.15421475438074508, "grad_norm": 0.7865379627024374, "learning_rate": 9.600793908474166e-06, "loss": 0.4171, "step": 2462 }, { "epoch": 0.1542773923800874, "grad_norm": 0.8324815284051958, "learning_rate": 9.600396630641589e-06, "loss": 0.4265, "step": 2463 }, { "epoch": 0.15434003037942967, "grad_norm": 0.9061957444680231, "learning_rate": 9.59999916345583e-06, "loss": 0.4033, "step": 2464 }, { "epoch": 0.15440266837877198, "grad_norm": 0.8212193129654404, "learning_rate": 9.599601506933256e-06, "loss": 0.4149, "step": 2465 }, { "epoch": 0.1544653063781143, "grad_norm": 0.8900114906296487, "learning_rate": 9.599203661090226e-06, "loss": 0.4571, "step": 2466 }, { "epoch": 0.1545279443774566, "grad_norm": 0.9398715076321317, "learning_rate": 9.598805625943122e-06, "loss": 0.4354, "step": 2467 }, { "epoch": 0.15459058237679887, "grad_norm": 0.8760707797943218, "learning_rate": 9.598407401508325e-06, "loss": 0.4667, "step": 2468 }, { "epoch": 0.15465322037614118, "grad_norm": 0.7735964406430383, "learning_rate": 9.598008987802226e-06, "loss": 0.3842, "step": 2469 }, { "epoch": 0.1547158583754835, "grad_norm": 0.9276637589597163, "learning_rate": 9.597610384841222e-06, "loss": 0.4919, "step": 2470 }, { "epoch": 0.1547784963748258, "grad_norm": 0.8137203146992326, "learning_rate": 9.597211592641721e-06, "loss": 0.4407, "step": 2471 }, { "epoch": 0.15484113437416808, "grad_norm": 0.8715943761363466, "learning_rate": 9.596812611220139e-06, "loss": 0.4599, "step": 2472 }, { "epoch": 0.15490377237351038, "grad_norm": 0.9028416295026567, "learning_rate": 9.596413440592897e-06, "loss": 0.4177, "step": 2473 }, { "epoch": 0.1549664103728527, "grad_norm": 0.864732484161656, "learning_rate": 9.596014080776424e-06, "loss": 0.4412, "step": 2474 }, { "epoch": 0.155029048372195, "grad_norm": 0.9034319011181509, "learning_rate": 9.595614531787159e-06, "loss": 0.4455, "step": 2475 }, { "epoch": 0.15509168637153728, "grad_norm": 0.8204208193529091, "learning_rate": 9.595214793641546e-06, "loss": 0.405, "step": 2476 }, { "epoch": 0.1551543243708796, "grad_norm": 0.8500643692932301, "learning_rate": 9.594814866356039e-06, "loss": 0.4322, "step": 2477 }, { "epoch": 0.1552169623702219, "grad_norm": 0.8211132613232758, "learning_rate": 9.594414749947099e-06, "loss": 0.4461, "step": 2478 }, { "epoch": 0.1552796003695642, "grad_norm": 0.8656297821536006, "learning_rate": 9.594014444431196e-06, "loss": 0.4543, "step": 2479 }, { "epoch": 0.1553422383689065, "grad_norm": 0.8931750320715225, "learning_rate": 9.593613949824806e-06, "loss": 0.4087, "step": 2480 }, { "epoch": 0.1554048763682488, "grad_norm": 0.891000085506786, "learning_rate": 9.593213266144412e-06, "loss": 0.463, "step": 2481 }, { "epoch": 0.1554675143675911, "grad_norm": 0.9658209563212905, "learning_rate": 9.592812393406509e-06, "loss": 0.4494, "step": 2482 }, { "epoch": 0.1555301523669334, "grad_norm": 0.852049062205769, "learning_rate": 9.592411331627593e-06, "loss": 0.4391, "step": 2483 }, { "epoch": 0.1555927903662757, "grad_norm": 0.9895709281234618, "learning_rate": 9.592010080824177e-06, "loss": 0.4898, "step": 2484 }, { "epoch": 0.155655428365618, "grad_norm": 1.001154502257721, "learning_rate": 9.591608641012773e-06, "loss": 0.4654, "step": 2485 }, { "epoch": 0.1557180663649603, "grad_norm": 0.8621538640480553, "learning_rate": 9.591207012209905e-06, "loss": 0.4575, "step": 2486 }, { "epoch": 0.1557807043643026, "grad_norm": 0.8592081363336459, "learning_rate": 9.590805194432103e-06, "loss": 0.4342, "step": 2487 }, { "epoch": 0.15584334236364492, "grad_norm": 0.9036978090961402, "learning_rate": 9.59040318769591e-06, "loss": 0.4761, "step": 2488 }, { "epoch": 0.1559059803629872, "grad_norm": 0.8386593291023011, "learning_rate": 9.590000992017867e-06, "loss": 0.4824, "step": 2489 }, { "epoch": 0.1559686183623295, "grad_norm": 0.8750595485449342, "learning_rate": 9.589598607414531e-06, "loss": 0.3938, "step": 2490 }, { "epoch": 0.1560312563616718, "grad_norm": 0.9135826174710197, "learning_rate": 9.589196033902465e-06, "loss": 0.4757, "step": 2491 }, { "epoch": 0.15609389436101412, "grad_norm": 0.8668451770535498, "learning_rate": 9.58879327149824e-06, "loss": 0.4689, "step": 2492 }, { "epoch": 0.1561565323603564, "grad_norm": 0.8848214317959661, "learning_rate": 9.58839032021843e-06, "loss": 0.466, "step": 2493 }, { "epoch": 0.1562191703596987, "grad_norm": 0.8415634066432458, "learning_rate": 9.587987180079626e-06, "loss": 0.4266, "step": 2494 }, { "epoch": 0.156281808359041, "grad_norm": 0.8438172158473972, "learning_rate": 9.587583851098414e-06, "loss": 0.491, "step": 2495 }, { "epoch": 0.15634444635838332, "grad_norm": 0.9542748483301218, "learning_rate": 9.587180333291402e-06, "loss": 0.3992, "step": 2496 }, { "epoch": 0.1564070843577256, "grad_norm": 0.8051876077893441, "learning_rate": 9.586776626675195e-06, "loss": 0.4045, "step": 2497 }, { "epoch": 0.1564697223570679, "grad_norm": 0.8581728797269185, "learning_rate": 9.586372731266412e-06, "loss": 0.3971, "step": 2498 }, { "epoch": 0.15653236035641022, "grad_norm": 0.8369257112601043, "learning_rate": 9.585968647081674e-06, "loss": 0.424, "step": 2499 }, { "epoch": 0.15659499835575252, "grad_norm": 0.8503933370686396, "learning_rate": 9.585564374137617e-06, "loss": 0.4581, "step": 2500 }, { "epoch": 0.15665763635509483, "grad_norm": 0.8279242480143701, "learning_rate": 9.58515991245088e-06, "loss": 0.4087, "step": 2501 }, { "epoch": 0.1567202743544371, "grad_norm": 0.7982035839231489, "learning_rate": 9.584755262038106e-06, "loss": 0.3957, "step": 2502 }, { "epoch": 0.15678291235377942, "grad_norm": 0.8035537044448254, "learning_rate": 9.584350422915959e-06, "loss": 0.4113, "step": 2503 }, { "epoch": 0.15684555035312173, "grad_norm": 0.8769068026035574, "learning_rate": 9.583945395101095e-06, "loss": 0.4605, "step": 2504 }, { "epoch": 0.15690818835246403, "grad_norm": 0.8176030744920416, "learning_rate": 9.583540178610189e-06, "loss": 0.4266, "step": 2505 }, { "epoch": 0.15697082635180631, "grad_norm": 0.912999599012218, "learning_rate": 9.583134773459917e-06, "loss": 0.4341, "step": 2506 }, { "epoch": 0.15703346435114862, "grad_norm": 0.8424329714768108, "learning_rate": 9.582729179666967e-06, "loss": 0.4471, "step": 2507 }, { "epoch": 0.15709610235049093, "grad_norm": 0.8735793136809137, "learning_rate": 9.582323397248035e-06, "loss": 0.475, "step": 2508 }, { "epoch": 0.15715874034983324, "grad_norm": 0.8269125150035503, "learning_rate": 9.58191742621982e-06, "loss": 0.4137, "step": 2509 }, { "epoch": 0.15722137834917552, "grad_norm": 0.8464295914905088, "learning_rate": 9.581511266599036e-06, "loss": 0.453, "step": 2510 }, { "epoch": 0.15728401634851782, "grad_norm": 0.9307725791557825, "learning_rate": 9.581104918402395e-06, "loss": 0.4758, "step": 2511 }, { "epoch": 0.15734665434786013, "grad_norm": 0.8682580263120265, "learning_rate": 9.580698381646626e-06, "loss": 0.4341, "step": 2512 }, { "epoch": 0.15740929234720244, "grad_norm": 0.8535748257768625, "learning_rate": 9.580291656348462e-06, "loss": 0.4323, "step": 2513 }, { "epoch": 0.15747193034654472, "grad_norm": 0.9471617759836005, "learning_rate": 9.579884742524644e-06, "loss": 0.5298, "step": 2514 }, { "epoch": 0.15753456834588703, "grad_norm": 0.8418125312666805, "learning_rate": 9.579477640191917e-06, "loss": 0.425, "step": 2515 }, { "epoch": 0.15759720634522933, "grad_norm": 0.8691617366885266, "learning_rate": 9.579070349367044e-06, "loss": 0.4347, "step": 2516 }, { "epoch": 0.15765984434457164, "grad_norm": 0.8279703310738359, "learning_rate": 9.578662870066783e-06, "loss": 0.415, "step": 2517 }, { "epoch": 0.15772248234391392, "grad_norm": 0.8138718972081685, "learning_rate": 9.578255202307909e-06, "loss": 0.4152, "step": 2518 }, { "epoch": 0.15778512034325623, "grad_norm": 0.8575941917600187, "learning_rate": 9.5778473461072e-06, "loss": 0.3966, "step": 2519 }, { "epoch": 0.15784775834259854, "grad_norm": 0.8118657391396819, "learning_rate": 9.577439301481448e-06, "loss": 0.422, "step": 2520 }, { "epoch": 0.15791039634194085, "grad_norm": 0.8920240296353231, "learning_rate": 9.577031068447441e-06, "loss": 0.4285, "step": 2521 }, { "epoch": 0.15797303434128315, "grad_norm": 0.8470642872928104, "learning_rate": 9.576622647021988e-06, "loss": 0.4639, "step": 2522 }, { "epoch": 0.15803567234062543, "grad_norm": 0.8956732117373375, "learning_rate": 9.576214037221895e-06, "loss": 0.4508, "step": 2523 }, { "epoch": 0.15809831033996774, "grad_norm": 0.8853283669132453, "learning_rate": 9.575805239063985e-06, "loss": 0.4811, "step": 2524 }, { "epoch": 0.15816094833931005, "grad_norm": 0.824320677832317, "learning_rate": 9.575396252565083e-06, "loss": 0.3958, "step": 2525 }, { "epoch": 0.15822358633865236, "grad_norm": 0.8085501844761102, "learning_rate": 9.574987077742019e-06, "loss": 0.4232, "step": 2526 }, { "epoch": 0.15828622433799464, "grad_norm": 0.8160468566400376, "learning_rate": 9.57457771461164e-06, "loss": 0.4292, "step": 2527 }, { "epoch": 0.15834886233733694, "grad_norm": 0.9197057185278228, "learning_rate": 9.574168163190792e-06, "loss": 0.4714, "step": 2528 }, { "epoch": 0.15841150033667925, "grad_norm": 0.911108862126394, "learning_rate": 9.573758423496334e-06, "loss": 0.471, "step": 2529 }, { "epoch": 0.15847413833602156, "grad_norm": 0.9216576968304545, "learning_rate": 9.57334849554513e-06, "loss": 0.4666, "step": 2530 }, { "epoch": 0.15853677633536384, "grad_norm": 0.844955688177425, "learning_rate": 9.572938379354053e-06, "loss": 0.4315, "step": 2531 }, { "epoch": 0.15859941433470615, "grad_norm": 0.9138515211089059, "learning_rate": 9.572528074939985e-06, "loss": 0.4552, "step": 2532 }, { "epoch": 0.15866205233404845, "grad_norm": 0.8223979468975695, "learning_rate": 9.57211758231981e-06, "loss": 0.426, "step": 2533 }, { "epoch": 0.15872469033339076, "grad_norm": 0.8850696018432389, "learning_rate": 9.571706901510429e-06, "loss": 0.4484, "step": 2534 }, { "epoch": 0.15878732833273304, "grad_norm": 0.8804621297362388, "learning_rate": 9.571296032528744e-06, "loss": 0.4432, "step": 2535 }, { "epoch": 0.15884996633207535, "grad_norm": 0.8425866609014727, "learning_rate": 9.570884975391662e-06, "loss": 0.4449, "step": 2536 }, { "epoch": 0.15891260433141766, "grad_norm": 0.8562809460415205, "learning_rate": 9.57047373011611e-06, "loss": 0.4351, "step": 2537 }, { "epoch": 0.15897524233075996, "grad_norm": 0.840665576556803, "learning_rate": 9.570062296719009e-06, "loss": 0.4128, "step": 2538 }, { "epoch": 0.15903788033010224, "grad_norm": 0.8165823805791668, "learning_rate": 9.569650675217295e-06, "loss": 0.3964, "step": 2539 }, { "epoch": 0.15910051832944455, "grad_norm": 0.8931891364782731, "learning_rate": 9.569238865627913e-06, "loss": 0.423, "step": 2540 }, { "epoch": 0.15916315632878686, "grad_norm": 0.8270748795527492, "learning_rate": 9.56882686796781e-06, "loss": 0.4155, "step": 2541 }, { "epoch": 0.15922579432812917, "grad_norm": 0.8369468855852289, "learning_rate": 9.568414682253946e-06, "loss": 0.4627, "step": 2542 }, { "epoch": 0.15928843232747147, "grad_norm": 0.8036040809493677, "learning_rate": 9.568002308503285e-06, "loss": 0.4903, "step": 2543 }, { "epoch": 0.15935107032681375, "grad_norm": 0.8780574109551809, "learning_rate": 9.5675897467328e-06, "loss": 0.4483, "step": 2544 }, { "epoch": 0.15941370832615606, "grad_norm": 0.88611768799201, "learning_rate": 9.567176996959475e-06, "loss": 0.4309, "step": 2545 }, { "epoch": 0.15947634632549837, "grad_norm": 0.7403056035678959, "learning_rate": 9.566764059200296e-06, "loss": 0.4967, "step": 2546 }, { "epoch": 0.15953898432484068, "grad_norm": 0.8779093338235935, "learning_rate": 9.566350933472262e-06, "loss": 0.3897, "step": 2547 }, { "epoch": 0.15960162232418296, "grad_norm": 0.835235471937895, "learning_rate": 9.565937619792375e-06, "loss": 0.4165, "step": 2548 }, { "epoch": 0.15966426032352526, "grad_norm": 0.8403329879364821, "learning_rate": 9.56552411817765e-06, "loss": 0.4379, "step": 2549 }, { "epoch": 0.15972689832286757, "grad_norm": 0.8695072504748369, "learning_rate": 9.5651104286451e-06, "loss": 0.4467, "step": 2550 }, { "epoch": 0.15978953632220988, "grad_norm": 0.8002200594838434, "learning_rate": 9.564696551211763e-06, "loss": 0.3966, "step": 2551 }, { "epoch": 0.15985217432155216, "grad_norm": 0.8944586092047815, "learning_rate": 9.564282485894665e-06, "loss": 0.409, "step": 2552 }, { "epoch": 0.15991481232089447, "grad_norm": 0.8037714890905621, "learning_rate": 9.563868232710855e-06, "loss": 0.4496, "step": 2553 }, { "epoch": 0.15997745032023677, "grad_norm": 0.839842913644358, "learning_rate": 9.56345379167738e-06, "loss": 0.4222, "step": 2554 }, { "epoch": 0.16004008831957908, "grad_norm": 0.8167535139595054, "learning_rate": 9.563039162811301e-06, "loss": 0.4217, "step": 2555 }, { "epoch": 0.16010272631892136, "grad_norm": 0.8519238483656325, "learning_rate": 9.562624346129683e-06, "loss": 0.4406, "step": 2556 }, { "epoch": 0.16016536431826367, "grad_norm": 0.8680749249414098, "learning_rate": 9.562209341649598e-06, "loss": 0.4526, "step": 2557 }, { "epoch": 0.16022800231760598, "grad_norm": 0.8789017541723526, "learning_rate": 9.561794149388132e-06, "loss": 0.4417, "step": 2558 }, { "epoch": 0.16029064031694829, "grad_norm": 0.8723504599285197, "learning_rate": 9.561378769362372e-06, "loss": 0.4274, "step": 2559 }, { "epoch": 0.16035327831629057, "grad_norm": 0.79947706288344, "learning_rate": 9.560963201589416e-06, "loss": 0.4562, "step": 2560 }, { "epoch": 0.16041591631563287, "grad_norm": 0.8652676750965085, "learning_rate": 9.560547446086366e-06, "loss": 0.4509, "step": 2561 }, { "epoch": 0.16047855431497518, "grad_norm": 0.9135956823633377, "learning_rate": 9.560131502870335e-06, "loss": 0.4638, "step": 2562 }, { "epoch": 0.1605411923143175, "grad_norm": 0.9596051762974733, "learning_rate": 9.55971537195845e-06, "loss": 0.4404, "step": 2563 }, { "epoch": 0.1606038303136598, "grad_norm": 0.8404367915446014, "learning_rate": 9.55929905336783e-06, "loss": 0.4272, "step": 2564 }, { "epoch": 0.16066646831300208, "grad_norm": 0.8639319970923328, "learning_rate": 9.558882547115616e-06, "loss": 0.4671, "step": 2565 }, { "epoch": 0.16072910631234438, "grad_norm": 0.8775147043363833, "learning_rate": 9.55846585321895e-06, "loss": 0.4421, "step": 2566 }, { "epoch": 0.1607917443116867, "grad_norm": 0.8676708367020642, "learning_rate": 9.558048971694984e-06, "loss": 0.3875, "step": 2567 }, { "epoch": 0.160854382311029, "grad_norm": 0.8915820871769075, "learning_rate": 9.557631902560876e-06, "loss": 0.4102, "step": 2568 }, { "epoch": 0.16091702031037128, "grad_norm": 0.8349371890279598, "learning_rate": 9.557214645833792e-06, "loss": 0.4208, "step": 2569 }, { "epoch": 0.16097965830971359, "grad_norm": 0.8912421591791062, "learning_rate": 9.556797201530908e-06, "loss": 0.4547, "step": 2570 }, { "epoch": 0.1610422963090559, "grad_norm": 0.8836316722571188, "learning_rate": 9.556379569669406e-06, "loss": 0.458, "step": 2571 }, { "epoch": 0.1611049343083982, "grad_norm": 0.9030332939927537, "learning_rate": 9.555961750266475e-06, "loss": 0.5007, "step": 2572 }, { "epoch": 0.16116757230774048, "grad_norm": 0.8871465403605112, "learning_rate": 9.555543743339312e-06, "loss": 0.454, "step": 2573 }, { "epoch": 0.1612302103070828, "grad_norm": 0.8847752193321187, "learning_rate": 9.555125548905123e-06, "loss": 0.3685, "step": 2574 }, { "epoch": 0.1612928483064251, "grad_norm": 0.8632241759768705, "learning_rate": 9.554707166981122e-06, "loss": 0.4133, "step": 2575 }, { "epoch": 0.1613554863057674, "grad_norm": 1.0240420403273873, "learning_rate": 9.55428859758453e-06, "loss": 0.4521, "step": 2576 }, { "epoch": 0.16141812430510968, "grad_norm": 0.9089817435399401, "learning_rate": 9.55386984073257e-06, "loss": 0.3985, "step": 2577 }, { "epoch": 0.161480762304452, "grad_norm": 0.8665045743026693, "learning_rate": 9.553450896442484e-06, "loss": 0.4591, "step": 2578 }, { "epoch": 0.1615434003037943, "grad_norm": 0.9349600363179519, "learning_rate": 9.553031764731516e-06, "loss": 0.5081, "step": 2579 }, { "epoch": 0.1616060383031366, "grad_norm": 0.8236522905540098, "learning_rate": 9.552612445616912e-06, "loss": 0.4414, "step": 2580 }, { "epoch": 0.1616686763024789, "grad_norm": 0.8376150834385812, "learning_rate": 9.552192939115936e-06, "loss": 0.3956, "step": 2581 }, { "epoch": 0.1617313143018212, "grad_norm": 0.838462065433609, "learning_rate": 9.551773245245854e-06, "loss": 0.4226, "step": 2582 }, { "epoch": 0.1617939523011635, "grad_norm": 0.8599953816256034, "learning_rate": 9.55135336402394e-06, "loss": 0.4549, "step": 2583 }, { "epoch": 0.1618565903005058, "grad_norm": 0.9240649040907671, "learning_rate": 9.550933295467478e-06, "loss": 0.472, "step": 2584 }, { "epoch": 0.1619192282998481, "grad_norm": 0.9273151361803083, "learning_rate": 9.550513039593756e-06, "loss": 0.4412, "step": 2585 }, { "epoch": 0.1619818662991904, "grad_norm": 0.9272536058986685, "learning_rate": 9.550092596420073e-06, "loss": 0.504, "step": 2586 }, { "epoch": 0.1620445042985327, "grad_norm": 0.8247764956278907, "learning_rate": 9.549671965963734e-06, "loss": 0.4164, "step": 2587 }, { "epoch": 0.162107142297875, "grad_norm": 0.9091803752434323, "learning_rate": 9.549251148242051e-06, "loss": 0.4595, "step": 2588 }, { "epoch": 0.16216978029721732, "grad_norm": 0.8278794013319789, "learning_rate": 9.548830143272348e-06, "loss": 0.4109, "step": 2589 }, { "epoch": 0.1622324182965596, "grad_norm": 0.9085954953181858, "learning_rate": 9.548408951071953e-06, "loss": 0.4271, "step": 2590 }, { "epoch": 0.1622950562959019, "grad_norm": 0.8953640267383759, "learning_rate": 9.5479875716582e-06, "loss": 0.4448, "step": 2591 }, { "epoch": 0.16235769429524421, "grad_norm": 0.8471312579845293, "learning_rate": 9.547566005048433e-06, "loss": 0.4646, "step": 2592 }, { "epoch": 0.16242033229458652, "grad_norm": 1.0128670955935837, "learning_rate": 9.547144251260007e-06, "loss": 0.4663, "step": 2593 }, { "epoch": 0.1624829702939288, "grad_norm": 0.9052457765957664, "learning_rate": 9.54672231031028e-06, "loss": 0.4446, "step": 2594 }, { "epoch": 0.1625456082932711, "grad_norm": 0.9113257736885182, "learning_rate": 9.546300182216616e-06, "loss": 0.4452, "step": 2595 }, { "epoch": 0.16260824629261342, "grad_norm": 0.8564936462335121, "learning_rate": 9.545877866996396e-06, "loss": 0.4437, "step": 2596 }, { "epoch": 0.16267088429195573, "grad_norm": 0.9273631353503378, "learning_rate": 9.545455364666998e-06, "loss": 0.4315, "step": 2597 }, { "epoch": 0.162733522291298, "grad_norm": 0.8558732329447561, "learning_rate": 9.545032675245814e-06, "loss": 0.4476, "step": 2598 }, { "epoch": 0.1627961602906403, "grad_norm": 0.9069014959222792, "learning_rate": 9.54460979875024e-06, "loss": 0.4671, "step": 2599 }, { "epoch": 0.16285879828998262, "grad_norm": 0.832777761286989, "learning_rate": 9.544186735197686e-06, "loss": 0.4284, "step": 2600 }, { "epoch": 0.16292143628932493, "grad_norm": 0.9340500413851691, "learning_rate": 9.54376348460556e-06, "loss": 0.4597, "step": 2601 }, { "epoch": 0.1629840742886672, "grad_norm": 0.8483945898265481, "learning_rate": 9.543340046991286e-06, "loss": 0.449, "step": 2602 }, { "epoch": 0.16304671228800952, "grad_norm": 0.8406446581781889, "learning_rate": 9.542916422372293e-06, "loss": 0.4243, "step": 2603 }, { "epoch": 0.16310935028735182, "grad_norm": 0.9238174578296955, "learning_rate": 9.542492610766016e-06, "loss": 0.4623, "step": 2604 }, { "epoch": 0.16317198828669413, "grad_norm": 0.7945145737787388, "learning_rate": 9.5420686121899e-06, "loss": 0.4174, "step": 2605 }, { "epoch": 0.1632346262860364, "grad_norm": 0.8896004678867944, "learning_rate": 9.541644426661397e-06, "loss": 0.4264, "step": 2606 }, { "epoch": 0.16329726428537872, "grad_norm": 0.8898627299334262, "learning_rate": 9.541220054197966e-06, "loss": 0.423, "step": 2607 }, { "epoch": 0.16335990228472103, "grad_norm": 0.8763471913489947, "learning_rate": 9.540795494817076e-06, "loss": 0.4193, "step": 2608 }, { "epoch": 0.16342254028406333, "grad_norm": 0.8436600810945617, "learning_rate": 9.540370748536201e-06, "loss": 0.4205, "step": 2609 }, { "epoch": 0.16348517828340564, "grad_norm": 0.9350106878042955, "learning_rate": 9.539945815372822e-06, "loss": 0.4369, "step": 2610 }, { "epoch": 0.16354781628274792, "grad_norm": 0.8913550842322173, "learning_rate": 9.53952069534443e-06, "loss": 0.4521, "step": 2611 }, { "epoch": 0.16361045428209023, "grad_norm": 0.8629102139337645, "learning_rate": 9.539095388468525e-06, "loss": 0.4195, "step": 2612 }, { "epoch": 0.16367309228143254, "grad_norm": 0.7844790895617535, "learning_rate": 9.538669894762611e-06, "loss": 0.4785, "step": 2613 }, { "epoch": 0.16373573028077484, "grad_norm": 0.9244115302495114, "learning_rate": 9.5382442142442e-06, "loss": 0.4502, "step": 2614 }, { "epoch": 0.16379836828011712, "grad_norm": 0.8810955780893498, "learning_rate": 9.537818346930819e-06, "loss": 0.4661, "step": 2615 }, { "epoch": 0.16386100627945943, "grad_norm": 0.9217815303113598, "learning_rate": 9.537392292839989e-06, "loss": 0.4545, "step": 2616 }, { "epoch": 0.16392364427880174, "grad_norm": 0.9217952262472913, "learning_rate": 9.536966051989252e-06, "loss": 0.4404, "step": 2617 }, { "epoch": 0.16398628227814405, "grad_norm": 0.9715426676094361, "learning_rate": 9.536539624396149e-06, "loss": 0.4415, "step": 2618 }, { "epoch": 0.16404892027748633, "grad_norm": 0.94715564517307, "learning_rate": 9.536113010078232e-06, "loss": 0.4662, "step": 2619 }, { "epoch": 0.16411155827682863, "grad_norm": 0.8472923294929718, "learning_rate": 9.535686209053064e-06, "loss": 0.4293, "step": 2620 }, { "epoch": 0.16417419627617094, "grad_norm": 0.7983988285100745, "learning_rate": 9.535259221338206e-06, "loss": 0.4267, "step": 2621 }, { "epoch": 0.16423683427551325, "grad_norm": 0.8463324824907011, "learning_rate": 9.534832046951241e-06, "loss": 0.4417, "step": 2622 }, { "epoch": 0.16429947227485553, "grad_norm": 0.8748217691673758, "learning_rate": 9.534404685909744e-06, "loss": 0.4401, "step": 2623 }, { "epoch": 0.16436211027419784, "grad_norm": 0.8878847194572562, "learning_rate": 9.533977138231312e-06, "loss": 0.4396, "step": 2624 }, { "epoch": 0.16442474827354014, "grad_norm": 0.8168905382465736, "learning_rate": 9.533549403933538e-06, "loss": 0.4187, "step": 2625 }, { "epoch": 0.16448738627288245, "grad_norm": 0.9100798866096558, "learning_rate": 9.533121483034027e-06, "loss": 0.4907, "step": 2626 }, { "epoch": 0.16455002427222473, "grad_norm": 0.949333722626153, "learning_rate": 9.532693375550396e-06, "loss": 0.4053, "step": 2627 }, { "epoch": 0.16461266227156704, "grad_norm": 0.8532791262632705, "learning_rate": 9.532265081500264e-06, "loss": 0.4271, "step": 2628 }, { "epoch": 0.16467530027090935, "grad_norm": 0.8356120182006105, "learning_rate": 9.53183660090126e-06, "loss": 0.4581, "step": 2629 }, { "epoch": 0.16473793827025165, "grad_norm": 0.8085402242895724, "learning_rate": 9.53140793377102e-06, "loss": 0.431, "step": 2630 }, { "epoch": 0.16480057626959396, "grad_norm": 0.8556743000547639, "learning_rate": 9.530979080127189e-06, "loss": 0.4607, "step": 2631 }, { "epoch": 0.16486321426893624, "grad_norm": 0.8221051175245874, "learning_rate": 9.530550039987419e-06, "loss": 0.4075, "step": 2632 }, { "epoch": 0.16492585226827855, "grad_norm": 0.878864268450772, "learning_rate": 9.530120813369368e-06, "loss": 0.4407, "step": 2633 }, { "epoch": 0.16498849026762086, "grad_norm": 0.880353809754111, "learning_rate": 9.529691400290703e-06, "loss": 0.448, "step": 2634 }, { "epoch": 0.16505112826696317, "grad_norm": 0.8898731108843905, "learning_rate": 9.529261800769099e-06, "loss": 0.4413, "step": 2635 }, { "epoch": 0.16511376626630545, "grad_norm": 0.8350597809636049, "learning_rate": 9.528832014822238e-06, "loss": 0.4338, "step": 2636 }, { "epoch": 0.16517640426564775, "grad_norm": 0.9264286934249504, "learning_rate": 9.528402042467812e-06, "loss": 0.4787, "step": 2637 }, { "epoch": 0.16523904226499006, "grad_norm": 0.9142159447880784, "learning_rate": 9.527971883723518e-06, "loss": 0.4764, "step": 2638 }, { "epoch": 0.16530168026433237, "grad_norm": 0.7918535403814716, "learning_rate": 9.527541538607059e-06, "loss": 0.4087, "step": 2639 }, { "epoch": 0.16536431826367465, "grad_norm": 0.8617239167269862, "learning_rate": 9.52711100713615e-06, "loss": 0.4401, "step": 2640 }, { "epoch": 0.16542695626301696, "grad_norm": 0.9004091311975695, "learning_rate": 9.526680289328513e-06, "loss": 0.4709, "step": 2641 }, { "epoch": 0.16548959426235926, "grad_norm": 0.8504394300225159, "learning_rate": 9.526249385201873e-06, "loss": 0.4256, "step": 2642 }, { "epoch": 0.16555223226170157, "grad_norm": 0.9693514701140441, "learning_rate": 9.525818294773971e-06, "loss": 0.4475, "step": 2643 }, { "epoch": 0.16561487026104385, "grad_norm": 0.8409492227760244, "learning_rate": 9.525387018062547e-06, "loss": 0.4326, "step": 2644 }, { "epoch": 0.16567750826038616, "grad_norm": 0.9348518632782661, "learning_rate": 9.524955555085352e-06, "loss": 0.4497, "step": 2645 }, { "epoch": 0.16574014625972847, "grad_norm": 0.8815572919295553, "learning_rate": 9.524523905860147e-06, "loss": 0.4317, "step": 2646 }, { "epoch": 0.16580278425907077, "grad_norm": 0.926449605694904, "learning_rate": 9.5240920704047e-06, "loss": 0.4473, "step": 2647 }, { "epoch": 0.16586542225841305, "grad_norm": 0.8319380504598587, "learning_rate": 9.523660048736782e-06, "loss": 0.4298, "step": 2648 }, { "epoch": 0.16592806025775536, "grad_norm": 0.8248268655507827, "learning_rate": 9.523227840874177e-06, "loss": 0.4162, "step": 2649 }, { "epoch": 0.16599069825709767, "grad_norm": 0.8375105822311147, "learning_rate": 9.522795446834673e-06, "loss": 0.4442, "step": 2650 }, { "epoch": 0.16605333625643998, "grad_norm": 0.8101901315404193, "learning_rate": 9.522362866636073e-06, "loss": 0.4353, "step": 2651 }, { "epoch": 0.16611597425578228, "grad_norm": 0.7448247633079829, "learning_rate": 9.521930100296175e-06, "loss": 0.4198, "step": 2652 }, { "epoch": 0.16617861225512456, "grad_norm": 0.9079934609694781, "learning_rate": 9.521497147832794e-06, "loss": 0.4672, "step": 2653 }, { "epoch": 0.16624125025446687, "grad_norm": 0.8201563333136723, "learning_rate": 9.521064009263755e-06, "loss": 0.4243, "step": 2654 }, { "epoch": 0.16630388825380918, "grad_norm": 0.9101177370600885, "learning_rate": 9.520630684606877e-06, "loss": 0.4413, "step": 2655 }, { "epoch": 0.1663665262531515, "grad_norm": 0.8752368488643526, "learning_rate": 9.520197173880005e-06, "loss": 0.4775, "step": 2656 }, { "epoch": 0.16642916425249377, "grad_norm": 0.8923787384681517, "learning_rate": 9.519763477100976e-06, "loss": 0.4395, "step": 2657 }, { "epoch": 0.16649180225183607, "grad_norm": 0.915052711413404, "learning_rate": 9.519329594287645e-06, "loss": 0.463, "step": 2658 }, { "epoch": 0.16655444025117838, "grad_norm": 0.8501612124796961, "learning_rate": 9.518895525457869e-06, "loss": 0.5256, "step": 2659 }, { "epoch": 0.1666170782505207, "grad_norm": 0.7703010151970023, "learning_rate": 9.518461270629514e-06, "loss": 0.4225, "step": 2660 }, { "epoch": 0.16667971624986297, "grad_norm": 0.8146315471089793, "learning_rate": 9.518026829820454e-06, "loss": 0.4052, "step": 2661 }, { "epoch": 0.16674235424920528, "grad_norm": 0.8318083223507846, "learning_rate": 9.517592203048571e-06, "loss": 0.4081, "step": 2662 }, { "epoch": 0.16680499224854758, "grad_norm": 0.7587490568793648, "learning_rate": 9.517157390331756e-06, "loss": 0.4274, "step": 2663 }, { "epoch": 0.1668676302478899, "grad_norm": 0.8581095895237623, "learning_rate": 9.516722391687903e-06, "loss": 0.4273, "step": 2664 }, { "epoch": 0.16693026824723217, "grad_norm": 0.8705821254322631, "learning_rate": 9.516287207134918e-06, "loss": 0.4131, "step": 2665 }, { "epoch": 0.16699290624657448, "grad_norm": 0.9580113294793938, "learning_rate": 9.515851836690713e-06, "loss": 0.4966, "step": 2666 }, { "epoch": 0.1670555442459168, "grad_norm": 0.8113628762092572, "learning_rate": 9.515416280373209e-06, "loss": 0.4188, "step": 2667 }, { "epoch": 0.1671181822452591, "grad_norm": 0.7980668413514465, "learning_rate": 9.514980538200333e-06, "loss": 0.4154, "step": 2668 }, { "epoch": 0.16718082024460137, "grad_norm": 0.8152126449134496, "learning_rate": 9.51454461019002e-06, "loss": 0.4581, "step": 2669 }, { "epoch": 0.16724345824394368, "grad_norm": 0.8660753701274988, "learning_rate": 9.514108496360212e-06, "loss": 0.4442, "step": 2670 }, { "epoch": 0.167306096243286, "grad_norm": 0.8772952846645293, "learning_rate": 9.51367219672886e-06, "loss": 0.4644, "step": 2671 }, { "epoch": 0.1673687342426283, "grad_norm": 0.8730339863017681, "learning_rate": 9.513235711313923e-06, "loss": 0.4669, "step": 2672 }, { "epoch": 0.1674313722419706, "grad_norm": 0.8528428467283024, "learning_rate": 9.512799040133368e-06, "loss": 0.4409, "step": 2673 }, { "epoch": 0.16749401024131289, "grad_norm": 0.8536404598700664, "learning_rate": 9.512362183205166e-06, "loss": 0.4746, "step": 2674 }, { "epoch": 0.1675566482406552, "grad_norm": 0.861086522853339, "learning_rate": 9.511925140547299e-06, "loss": 0.4101, "step": 2675 }, { "epoch": 0.1676192862399975, "grad_norm": 0.8307207154292103, "learning_rate": 9.511487912177755e-06, "loss": 0.4161, "step": 2676 }, { "epoch": 0.1676819242393398, "grad_norm": 0.8297401662783211, "learning_rate": 9.511050498114532e-06, "loss": 0.4321, "step": 2677 }, { "epoch": 0.1677445622386821, "grad_norm": 0.869918407861152, "learning_rate": 9.510612898375634e-06, "loss": 0.4053, "step": 2678 }, { "epoch": 0.1678072002380244, "grad_norm": 0.9072667327835706, "learning_rate": 9.51017511297907e-06, "loss": 0.4396, "step": 2679 }, { "epoch": 0.1678698382373667, "grad_norm": 0.9539621929203498, "learning_rate": 9.509737141942863e-06, "loss": 0.4668, "step": 2680 }, { "epoch": 0.167932476236709, "grad_norm": 0.8496498280095651, "learning_rate": 9.509298985285037e-06, "loss": 0.4209, "step": 2681 }, { "epoch": 0.1679951142360513, "grad_norm": 0.8775222447178712, "learning_rate": 9.508860643023627e-06, "loss": 0.4248, "step": 2682 }, { "epoch": 0.1680577522353936, "grad_norm": 0.9159841707679621, "learning_rate": 9.50842211517668e-06, "loss": 0.3993, "step": 2683 }, { "epoch": 0.1681203902347359, "grad_norm": 0.8386196347112842, "learning_rate": 9.507983401762238e-06, "loss": 0.4002, "step": 2684 }, { "epoch": 0.1681830282340782, "grad_norm": 0.8466563462701431, "learning_rate": 9.507544502798363e-06, "loss": 0.4236, "step": 2685 }, { "epoch": 0.1682456662334205, "grad_norm": 0.9458750776593242, "learning_rate": 9.50710541830312e-06, "loss": 0.4014, "step": 2686 }, { "epoch": 0.1683083042327628, "grad_norm": 0.9449676815239555, "learning_rate": 9.506666148294582e-06, "loss": 0.4403, "step": 2687 }, { "epoch": 0.1683709422321051, "grad_norm": 0.8812754302280255, "learning_rate": 9.506226692790828e-06, "loss": 0.4784, "step": 2688 }, { "epoch": 0.16843358023144742, "grad_norm": 0.797327249113039, "learning_rate": 9.505787051809947e-06, "loss": 0.358, "step": 2689 }, { "epoch": 0.1684962182307897, "grad_norm": 0.8452235159112569, "learning_rate": 9.505347225370035e-06, "loss": 0.4018, "step": 2690 }, { "epoch": 0.168558856230132, "grad_norm": 0.8531581737489607, "learning_rate": 9.504907213489196e-06, "loss": 0.4723, "step": 2691 }, { "epoch": 0.1686214942294743, "grad_norm": 0.8329996344794001, "learning_rate": 9.50446701618554e-06, "loss": 0.4222, "step": 2692 }, { "epoch": 0.16868413222881662, "grad_norm": 0.8464989725103249, "learning_rate": 9.504026633477183e-06, "loss": 0.4399, "step": 2693 }, { "epoch": 0.1687467702281589, "grad_norm": 0.9063625773411863, "learning_rate": 9.503586065382255e-06, "loss": 0.4005, "step": 2694 }, { "epoch": 0.1688094082275012, "grad_norm": 0.9013622227876102, "learning_rate": 9.503145311918889e-06, "loss": 0.4342, "step": 2695 }, { "epoch": 0.16887204622684351, "grad_norm": 0.943849925110537, "learning_rate": 9.502704373105225e-06, "loss": 0.4753, "step": 2696 }, { "epoch": 0.16893468422618582, "grad_norm": 0.8987283216433049, "learning_rate": 9.502263248959417e-06, "loss": 0.4347, "step": 2697 }, { "epoch": 0.16899732222552813, "grad_norm": 0.8638821865809057, "learning_rate": 9.501821939499614e-06, "loss": 0.4277, "step": 2698 }, { "epoch": 0.1690599602248704, "grad_norm": 0.823803854902869, "learning_rate": 9.501380444743985e-06, "loss": 0.4105, "step": 2699 }, { "epoch": 0.16912259822421272, "grad_norm": 0.909702353811993, "learning_rate": 9.500938764710703e-06, "loss": 0.4519, "step": 2700 }, { "epoch": 0.16918523622355502, "grad_norm": 0.8804065773834213, "learning_rate": 9.500496899417944e-06, "loss": 0.4274, "step": 2701 }, { "epoch": 0.16924787422289733, "grad_norm": 0.8241543590731245, "learning_rate": 9.500054848883899e-06, "loss": 0.4164, "step": 2702 }, { "epoch": 0.1693105122222396, "grad_norm": 0.8126760789446797, "learning_rate": 9.499612613126761e-06, "loss": 0.5008, "step": 2703 }, { "epoch": 0.16937315022158192, "grad_norm": 0.8794467976569355, "learning_rate": 9.499170192164733e-06, "loss": 0.4393, "step": 2704 }, { "epoch": 0.16943578822092423, "grad_norm": 0.8812069353238116, "learning_rate": 9.498727586016023e-06, "loss": 0.4231, "step": 2705 }, { "epoch": 0.16949842622026653, "grad_norm": 0.9063087877813988, "learning_rate": 9.49828479469885e-06, "loss": 0.4776, "step": 2706 }, { "epoch": 0.16956106421960881, "grad_norm": 0.9483955436697055, "learning_rate": 9.497841818231443e-06, "loss": 0.482, "step": 2707 }, { "epoch": 0.16962370221895112, "grad_norm": 0.9781055189651422, "learning_rate": 9.49739865663203e-06, "loss": 0.4661, "step": 2708 }, { "epoch": 0.16968634021829343, "grad_norm": 0.8911000308079827, "learning_rate": 9.496955309918855e-06, "loss": 0.4456, "step": 2709 }, { "epoch": 0.16974897821763574, "grad_norm": 0.8666668137747247, "learning_rate": 9.496511778110164e-06, "loss": 0.3971, "step": 2710 }, { "epoch": 0.16981161621697802, "grad_norm": 0.849975326328914, "learning_rate": 9.496068061224214e-06, "loss": 0.3987, "step": 2711 }, { "epoch": 0.16987425421632033, "grad_norm": 0.9100904248557948, "learning_rate": 9.495624159279267e-06, "loss": 0.4896, "step": 2712 }, { "epoch": 0.16993689221566263, "grad_norm": 0.7778734698318733, "learning_rate": 9.495180072293595e-06, "loss": 0.3832, "step": 2713 }, { "epoch": 0.16999953021500494, "grad_norm": 0.9285297469187793, "learning_rate": 9.494735800285477e-06, "loss": 0.4686, "step": 2714 }, { "epoch": 0.17006216821434722, "grad_norm": 0.8726131270779746, "learning_rate": 9.4942913432732e-06, "loss": 0.4309, "step": 2715 }, { "epoch": 0.17012480621368953, "grad_norm": 0.8841817844661076, "learning_rate": 9.493846701275058e-06, "loss": 0.432, "step": 2716 }, { "epoch": 0.17018744421303184, "grad_norm": 0.8645684167309261, "learning_rate": 9.49340187430935e-06, "loss": 0.408, "step": 2717 }, { "epoch": 0.17025008221237414, "grad_norm": 0.8105416051029012, "learning_rate": 9.492956862394386e-06, "loss": 0.4905, "step": 2718 }, { "epoch": 0.17031272021171645, "grad_norm": 0.8654478671981315, "learning_rate": 9.492511665548487e-06, "loss": 0.4359, "step": 2719 }, { "epoch": 0.17037535821105873, "grad_norm": 0.8017937602920091, "learning_rate": 9.492066283789972e-06, "loss": 0.4054, "step": 2720 }, { "epoch": 0.17043799621040104, "grad_norm": 0.6993839182729844, "learning_rate": 9.491620717137176e-06, "loss": 0.4743, "step": 2721 }, { "epoch": 0.17050063420974335, "grad_norm": 0.9004845970824682, "learning_rate": 9.491174965608434e-06, "loss": 0.4779, "step": 2722 }, { "epoch": 0.17056327220908565, "grad_norm": 0.7967138422540898, "learning_rate": 9.490729029222101e-06, "loss": 0.3717, "step": 2723 }, { "epoch": 0.17062591020842793, "grad_norm": 0.8278599102603033, "learning_rate": 9.490282907996526e-06, "loss": 0.4477, "step": 2724 }, { "epoch": 0.17068854820777024, "grad_norm": 0.8503512400342429, "learning_rate": 9.489836601950073e-06, "loss": 0.4184, "step": 2725 }, { "epoch": 0.17075118620711255, "grad_norm": 0.8520475271558471, "learning_rate": 9.489390111101112e-06, "loss": 0.4305, "step": 2726 }, { "epoch": 0.17081382420645486, "grad_norm": 0.9185083585985825, "learning_rate": 9.488943435468021e-06, "loss": 0.4432, "step": 2727 }, { "epoch": 0.17087646220579714, "grad_norm": 1.0039507144890025, "learning_rate": 9.488496575069185e-06, "loss": 0.4315, "step": 2728 }, { "epoch": 0.17093910020513944, "grad_norm": 0.8745137918401572, "learning_rate": 9.488049529922997e-06, "loss": 0.4414, "step": 2729 }, { "epoch": 0.17100173820448175, "grad_norm": 0.942480677025271, "learning_rate": 9.487602300047858e-06, "loss": 0.4418, "step": 2730 }, { "epoch": 0.17106437620382406, "grad_norm": 0.8169426452277193, "learning_rate": 9.487154885462173e-06, "loss": 0.4172, "step": 2731 }, { "epoch": 0.17112701420316634, "grad_norm": 0.8454200600634748, "learning_rate": 9.486707286184361e-06, "loss": 0.4035, "step": 2732 }, { "epoch": 0.17118965220250865, "grad_norm": 0.7587290809751136, "learning_rate": 9.486259502232847e-06, "loss": 0.4335, "step": 2733 }, { "epoch": 0.17125229020185095, "grad_norm": 0.854226458644086, "learning_rate": 9.485811533626057e-06, "loss": 0.4357, "step": 2734 }, { "epoch": 0.17131492820119326, "grad_norm": 0.8425139435119016, "learning_rate": 9.485363380382432e-06, "loss": 0.4003, "step": 2735 }, { "epoch": 0.17137756620053554, "grad_norm": 0.8781491663441114, "learning_rate": 9.484915042520418e-06, "loss": 0.4351, "step": 2736 }, { "epoch": 0.17144020419987785, "grad_norm": 0.8331286137971406, "learning_rate": 9.484466520058467e-06, "loss": 0.3917, "step": 2737 }, { "epoch": 0.17150284219922016, "grad_norm": 0.9094144066104425, "learning_rate": 9.484017813015043e-06, "loss": 0.4217, "step": 2738 }, { "epoch": 0.17156548019856246, "grad_norm": 0.8973977739483414, "learning_rate": 9.483568921408614e-06, "loss": 0.4936, "step": 2739 }, { "epoch": 0.17162811819790477, "grad_norm": 0.8476288079547359, "learning_rate": 9.483119845257655e-06, "loss": 0.3856, "step": 2740 }, { "epoch": 0.17169075619724705, "grad_norm": 0.8337823245474411, "learning_rate": 9.48267058458065e-06, "loss": 0.4538, "step": 2741 }, { "epoch": 0.17175339419658936, "grad_norm": 0.9531192064031861, "learning_rate": 9.482221139396093e-06, "loss": 0.4412, "step": 2742 }, { "epoch": 0.17181603219593167, "grad_norm": 0.8336104062088713, "learning_rate": 9.481771509722483e-06, "loss": 0.4548, "step": 2743 }, { "epoch": 0.17187867019527397, "grad_norm": 0.8487844568902482, "learning_rate": 9.481321695578324e-06, "loss": 0.4277, "step": 2744 }, { "epoch": 0.17194130819461625, "grad_norm": 0.911879637973743, "learning_rate": 9.480871696982133e-06, "loss": 0.4371, "step": 2745 }, { "epoch": 0.17200394619395856, "grad_norm": 0.8445294927748577, "learning_rate": 9.480421513952432e-06, "loss": 0.4306, "step": 2746 }, { "epoch": 0.17206658419330087, "grad_norm": 0.8269585946964968, "learning_rate": 9.47997114650775e-06, "loss": 0.4169, "step": 2747 }, { "epoch": 0.17212922219264318, "grad_norm": 0.8925985873628597, "learning_rate": 9.479520594666622e-06, "loss": 0.4355, "step": 2748 }, { "epoch": 0.17219186019198546, "grad_norm": 0.8741637289002904, "learning_rate": 9.479069858447596e-06, "loss": 0.4138, "step": 2749 }, { "epoch": 0.17225449819132777, "grad_norm": 0.9312310454691328, "learning_rate": 9.478618937869223e-06, "loss": 0.5024, "step": 2750 }, { "epoch": 0.17231713619067007, "grad_norm": 0.847729700839907, "learning_rate": 9.478167832950066e-06, "loss": 0.4512, "step": 2751 }, { "epoch": 0.17237977419001238, "grad_norm": 0.9050249952715486, "learning_rate": 9.477716543708687e-06, "loss": 0.431, "step": 2752 }, { "epoch": 0.17244241218935466, "grad_norm": 0.859880386048491, "learning_rate": 9.477265070163663e-06, "loss": 0.457, "step": 2753 }, { "epoch": 0.17250505018869697, "grad_norm": 0.9960221039365595, "learning_rate": 9.47681341233358e-06, "loss": 0.5147, "step": 2754 }, { "epoch": 0.17256768818803928, "grad_norm": 0.8452680497858814, "learning_rate": 9.476361570237025e-06, "loss": 0.3847, "step": 2755 }, { "epoch": 0.17263032618738158, "grad_norm": 0.8968075224592105, "learning_rate": 9.475909543892596e-06, "loss": 0.4517, "step": 2756 }, { "epoch": 0.17269296418672386, "grad_norm": 0.8870725030714036, "learning_rate": 9.4754573333189e-06, "loss": 0.4688, "step": 2757 }, { "epoch": 0.17275560218606617, "grad_norm": 0.8094623476099095, "learning_rate": 9.475004938534551e-06, "loss": 0.4497, "step": 2758 }, { "epoch": 0.17281824018540848, "grad_norm": 0.8628631723628966, "learning_rate": 9.474552359558167e-06, "loss": 0.4352, "step": 2759 }, { "epoch": 0.17288087818475079, "grad_norm": 0.9329117817565882, "learning_rate": 9.474099596408377e-06, "loss": 0.4253, "step": 2760 }, { "epoch": 0.1729435161840931, "grad_norm": 0.9187457255463044, "learning_rate": 9.473646649103819e-06, "loss": 0.4617, "step": 2761 }, { "epoch": 0.17300615418343537, "grad_norm": 0.8009654569402445, "learning_rate": 9.473193517663132e-06, "loss": 0.368, "step": 2762 }, { "epoch": 0.17306879218277768, "grad_norm": 0.8853783868513775, "learning_rate": 9.472740202104971e-06, "loss": 0.4421, "step": 2763 }, { "epoch": 0.17313143018212, "grad_norm": 0.793431769538641, "learning_rate": 9.472286702447993e-06, "loss": 0.4091, "step": 2764 }, { "epoch": 0.1731940681814623, "grad_norm": 0.823293861170222, "learning_rate": 9.471833018710865e-06, "loss": 0.3717, "step": 2765 }, { "epoch": 0.17325670618080458, "grad_norm": 0.8377959642619103, "learning_rate": 9.471379150912258e-06, "loss": 0.4236, "step": 2766 }, { "epoch": 0.17331934418014688, "grad_norm": 0.8982659090698357, "learning_rate": 9.470925099070855e-06, "loss": 0.4762, "step": 2767 }, { "epoch": 0.1733819821794892, "grad_norm": 0.8310244779299916, "learning_rate": 9.470470863205347e-06, "loss": 0.4017, "step": 2768 }, { "epoch": 0.1734446201788315, "grad_norm": 0.8057244123377411, "learning_rate": 9.470016443334429e-06, "loss": 0.4205, "step": 2769 }, { "epoch": 0.17350725817817378, "grad_norm": 0.8551586268753306, "learning_rate": 9.469561839476803e-06, "loss": 0.4545, "step": 2770 }, { "epoch": 0.1735698961775161, "grad_norm": 0.8641409538149325, "learning_rate": 9.469107051651181e-06, "loss": 0.3876, "step": 2771 }, { "epoch": 0.1736325341768584, "grad_norm": 0.789457684916307, "learning_rate": 9.468652079876286e-06, "loss": 0.4547, "step": 2772 }, { "epoch": 0.1736951721762007, "grad_norm": 0.8886053300130071, "learning_rate": 9.468196924170841e-06, "loss": 0.4329, "step": 2773 }, { "epoch": 0.17375781017554298, "grad_norm": 0.8955429908153815, "learning_rate": 9.467741584553581e-06, "loss": 0.4553, "step": 2774 }, { "epoch": 0.1738204481748853, "grad_norm": 0.862489380304807, "learning_rate": 9.467286061043247e-06, "loss": 0.3831, "step": 2775 }, { "epoch": 0.1738830861742276, "grad_norm": 0.8418746714757002, "learning_rate": 9.466830353658591e-06, "loss": 0.5016, "step": 2776 }, { "epoch": 0.1739457241735699, "grad_norm": 0.8954949212290697, "learning_rate": 9.466374462418368e-06, "loss": 0.4669, "step": 2777 }, { "epoch": 0.17400836217291218, "grad_norm": 0.8080812341076752, "learning_rate": 9.465918387341344e-06, "loss": 0.4205, "step": 2778 }, { "epoch": 0.1740710001722545, "grad_norm": 0.884648161494236, "learning_rate": 9.465462128446289e-06, "loss": 0.487, "step": 2779 }, { "epoch": 0.1741336381715968, "grad_norm": 0.7887060982060315, "learning_rate": 9.465005685751984e-06, "loss": 0.4307, "step": 2780 }, { "epoch": 0.1741962761709391, "grad_norm": 1.1289092877607154, "learning_rate": 9.464549059277217e-06, "loss": 0.4513, "step": 2781 }, { "epoch": 0.17425891417028141, "grad_norm": 0.8192169774867829, "learning_rate": 9.464092249040781e-06, "loss": 0.451, "step": 2782 }, { "epoch": 0.1743215521696237, "grad_norm": 0.8729983913783754, "learning_rate": 9.463635255061481e-06, "loss": 0.3735, "step": 2783 }, { "epoch": 0.174384190168966, "grad_norm": 0.8995998442579233, "learning_rate": 9.463178077358126e-06, "loss": 0.4022, "step": 2784 }, { "epoch": 0.1744468281683083, "grad_norm": 0.888759431566588, "learning_rate": 9.46272071594953e-06, "loss": 0.4482, "step": 2785 }, { "epoch": 0.17450946616765062, "grad_norm": 0.7934948779256831, "learning_rate": 9.462263170854522e-06, "loss": 0.4811, "step": 2786 }, { "epoch": 0.1745721041669929, "grad_norm": 0.8790555270031397, "learning_rate": 9.461805442091935e-06, "loss": 0.4376, "step": 2787 }, { "epoch": 0.1746347421663352, "grad_norm": 0.8616073419403925, "learning_rate": 9.461347529680607e-06, "loss": 0.4664, "step": 2788 }, { "epoch": 0.1746973801656775, "grad_norm": 0.8974259888646205, "learning_rate": 9.460889433639386e-06, "loss": 0.4454, "step": 2789 }, { "epoch": 0.17476001816501982, "grad_norm": 0.8223820250563709, "learning_rate": 9.46043115398713e-06, "loss": 0.458, "step": 2790 }, { "epoch": 0.1748226561643621, "grad_norm": 0.8259213603597525, "learning_rate": 9.459972690742698e-06, "loss": 0.4133, "step": 2791 }, { "epoch": 0.1748852941637044, "grad_norm": 0.8420746608188033, "learning_rate": 9.459514043924963e-06, "loss": 0.4729, "step": 2792 }, { "epoch": 0.17494793216304672, "grad_norm": 0.8587065882725121, "learning_rate": 9.4590552135528e-06, "loss": 0.4445, "step": 2793 }, { "epoch": 0.17501057016238902, "grad_norm": 0.8709269432032266, "learning_rate": 9.4585961996451e-06, "loss": 0.4236, "step": 2794 }, { "epoch": 0.1750732081617313, "grad_norm": 0.9131214558216991, "learning_rate": 9.458137002220752e-06, "loss": 0.4556, "step": 2795 }, { "epoch": 0.1751358461610736, "grad_norm": 0.826543672410774, "learning_rate": 9.457677621298658e-06, "loss": 0.4584, "step": 2796 }, { "epoch": 0.17519848416041592, "grad_norm": 0.8375402310535915, "learning_rate": 9.457218056897725e-06, "loss": 0.4153, "step": 2797 }, { "epoch": 0.17526112215975823, "grad_norm": 0.905849911479791, "learning_rate": 9.456758309036868e-06, "loss": 0.4474, "step": 2798 }, { "epoch": 0.1753237601591005, "grad_norm": 0.8860930809865608, "learning_rate": 9.456298377735014e-06, "loss": 0.4638, "step": 2799 }, { "epoch": 0.1753863981584428, "grad_norm": 0.896778413302155, "learning_rate": 9.45583826301109e-06, "loss": 0.4266, "step": 2800 }, { "epoch": 0.17544903615778512, "grad_norm": 0.8123398971167703, "learning_rate": 9.455377964884038e-06, "loss": 0.4434, "step": 2801 }, { "epoch": 0.17551167415712743, "grad_norm": 0.8512884033411453, "learning_rate": 9.4549174833728e-06, "loss": 0.4677, "step": 2802 }, { "epoch": 0.17557431215646974, "grad_norm": 0.9270304309028553, "learning_rate": 9.454456818496331e-06, "loss": 0.4417, "step": 2803 }, { "epoch": 0.17563695015581202, "grad_norm": 0.9387575558591479, "learning_rate": 9.453995970273596e-06, "loss": 0.407, "step": 2804 }, { "epoch": 0.17569958815515432, "grad_norm": 0.8445830601206818, "learning_rate": 9.453534938723557e-06, "loss": 0.4696, "step": 2805 }, { "epoch": 0.17576222615449663, "grad_norm": 0.8711887470237691, "learning_rate": 9.453073723865194e-06, "loss": 0.399, "step": 2806 }, { "epoch": 0.17582486415383894, "grad_norm": 0.825965198227466, "learning_rate": 9.45261232571749e-06, "loss": 0.4477, "step": 2807 }, { "epoch": 0.17588750215318122, "grad_norm": 0.8331262519124429, "learning_rate": 9.452150744299437e-06, "loss": 0.4349, "step": 2808 }, { "epoch": 0.17595014015252353, "grad_norm": 0.7787306675730276, "learning_rate": 9.451688979630035e-06, "loss": 0.4063, "step": 2809 }, { "epoch": 0.17601277815186583, "grad_norm": 0.9135230522854533, "learning_rate": 9.451227031728285e-06, "loss": 0.4202, "step": 2810 }, { "epoch": 0.17607541615120814, "grad_norm": 0.8437755878750033, "learning_rate": 9.450764900613205e-06, "loss": 0.3796, "step": 2811 }, { "epoch": 0.17613805415055042, "grad_norm": 0.8057037271838987, "learning_rate": 9.450302586303814e-06, "loss": 0.49, "step": 2812 }, { "epoch": 0.17620069214989273, "grad_norm": 0.828301859404482, "learning_rate": 9.449840088819145e-06, "loss": 0.4062, "step": 2813 }, { "epoch": 0.17626333014923504, "grad_norm": 0.850083355037954, "learning_rate": 9.44937740817823e-06, "loss": 0.4217, "step": 2814 }, { "epoch": 0.17632596814857734, "grad_norm": 0.843891222765461, "learning_rate": 9.448914544400117e-06, "loss": 0.4028, "step": 2815 }, { "epoch": 0.17638860614791962, "grad_norm": 0.8672263190467742, "learning_rate": 9.448451497503855e-06, "loss": 0.4394, "step": 2816 }, { "epoch": 0.17645124414726193, "grad_norm": 0.8158743153755569, "learning_rate": 9.447988267508503e-06, "loss": 0.4346, "step": 2817 }, { "epoch": 0.17651388214660424, "grad_norm": 0.9168055384326477, "learning_rate": 9.44752485443313e-06, "loss": 0.4264, "step": 2818 }, { "epoch": 0.17657652014594655, "grad_norm": 0.8956509399189175, "learning_rate": 9.447061258296807e-06, "loss": 0.4107, "step": 2819 }, { "epoch": 0.17663915814528883, "grad_norm": 0.792628270539246, "learning_rate": 9.446597479118617e-06, "loss": 0.4133, "step": 2820 }, { "epoch": 0.17670179614463113, "grad_norm": 0.8286522723441212, "learning_rate": 9.446133516917651e-06, "loss": 0.4371, "step": 2821 }, { "epoch": 0.17676443414397344, "grad_norm": 0.8683794813476353, "learning_rate": 9.445669371713003e-06, "loss": 0.4481, "step": 2822 }, { "epoch": 0.17682707214331575, "grad_norm": 0.9925243733252437, "learning_rate": 9.44520504352378e-06, "loss": 0.4363, "step": 2823 }, { "epoch": 0.17688971014265803, "grad_norm": 0.782966325279666, "learning_rate": 9.44474053236909e-06, "loss": 0.4946, "step": 2824 }, { "epoch": 0.17695234814200034, "grad_norm": 0.8242972771198651, "learning_rate": 9.444275838268056e-06, "loss": 0.3919, "step": 2825 }, { "epoch": 0.17701498614134265, "grad_norm": 0.8619044867299531, "learning_rate": 9.443810961239804e-06, "loss": 0.4303, "step": 2826 }, { "epoch": 0.17707762414068495, "grad_norm": 0.7616622044804653, "learning_rate": 9.443345901303466e-06, "loss": 0.4933, "step": 2827 }, { "epoch": 0.17714026214002726, "grad_norm": 0.948440716515132, "learning_rate": 9.442880658478188e-06, "loss": 0.4801, "step": 2828 }, { "epoch": 0.17720290013936954, "grad_norm": 0.8486653291847763, "learning_rate": 9.442415232783115e-06, "loss": 0.4442, "step": 2829 }, { "epoch": 0.17726553813871185, "grad_norm": 0.8411110166353309, "learning_rate": 9.44194962423741e-06, "loss": 0.4065, "step": 2830 }, { "epoch": 0.17732817613805416, "grad_norm": 0.8628570874826459, "learning_rate": 9.441483832860232e-06, "loss": 0.4446, "step": 2831 }, { "epoch": 0.17739081413739646, "grad_norm": 0.857989848281793, "learning_rate": 9.441017858670756e-06, "loss": 0.4405, "step": 2832 }, { "epoch": 0.17745345213673874, "grad_norm": 0.7579267365614566, "learning_rate": 9.440551701688158e-06, "loss": 0.4634, "step": 2833 }, { "epoch": 0.17751609013608105, "grad_norm": 0.822222747566777, "learning_rate": 9.44008536193163e-06, "loss": 0.4357, "step": 2834 }, { "epoch": 0.17757872813542336, "grad_norm": 0.9098380473054793, "learning_rate": 9.439618839420365e-06, "loss": 0.4403, "step": 2835 }, { "epoch": 0.17764136613476567, "grad_norm": 0.852518820025479, "learning_rate": 9.439152134173564e-06, "loss": 0.4137, "step": 2836 }, { "epoch": 0.17770400413410795, "grad_norm": 0.9247550472097025, "learning_rate": 9.438685246210437e-06, "loss": 0.4324, "step": 2837 }, { "epoch": 0.17776664213345025, "grad_norm": 0.8987367142105294, "learning_rate": 9.4382181755502e-06, "loss": 0.4918, "step": 2838 }, { "epoch": 0.17782928013279256, "grad_norm": 0.8070093594699421, "learning_rate": 9.437750922212081e-06, "loss": 0.4008, "step": 2839 }, { "epoch": 0.17789191813213487, "grad_norm": 0.9239780016626321, "learning_rate": 9.43728348621531e-06, "loss": 0.5182, "step": 2840 }, { "epoch": 0.17795455613147715, "grad_norm": 0.7977501741351619, "learning_rate": 9.436815867579126e-06, "loss": 0.4097, "step": 2841 }, { "epoch": 0.17801719413081946, "grad_norm": 0.7947489602946767, "learning_rate": 9.436348066322777e-06, "loss": 0.4292, "step": 2842 }, { "epoch": 0.17807983213016176, "grad_norm": 0.862281816504563, "learning_rate": 9.435880082465521e-06, "loss": 0.404, "step": 2843 }, { "epoch": 0.17814247012950407, "grad_norm": 0.911535096273376, "learning_rate": 9.435411916026616e-06, "loss": 0.4318, "step": 2844 }, { "epoch": 0.17820510812884635, "grad_norm": 0.8708106213340135, "learning_rate": 9.434943567025333e-06, "loss": 0.4174, "step": 2845 }, { "epoch": 0.17826774612818866, "grad_norm": 0.798989367874429, "learning_rate": 9.43447503548095e-06, "loss": 0.4405, "step": 2846 }, { "epoch": 0.17833038412753097, "grad_norm": 0.8357742468813493, "learning_rate": 9.43400632141275e-06, "loss": 0.4376, "step": 2847 }, { "epoch": 0.17839302212687327, "grad_norm": 0.8629926561248715, "learning_rate": 9.433537424840029e-06, "loss": 0.4464, "step": 2848 }, { "epoch": 0.17845566012621558, "grad_norm": 0.8038463870496617, "learning_rate": 9.433068345782084e-06, "loss": 0.4067, "step": 2849 }, { "epoch": 0.17851829812555786, "grad_norm": 0.8049265263579096, "learning_rate": 9.432599084258222e-06, "loss": 0.4333, "step": 2850 }, { "epoch": 0.17858093612490017, "grad_norm": 0.8883625454407474, "learning_rate": 9.43212964028776e-06, "loss": 0.4201, "step": 2851 }, { "epoch": 0.17864357412424248, "grad_norm": 0.8607998775153426, "learning_rate": 9.431660013890021e-06, "loss": 0.4312, "step": 2852 }, { "epoch": 0.17870621212358478, "grad_norm": 0.761797237350617, "learning_rate": 9.43119020508433e-06, "loss": 0.4701, "step": 2853 }, { "epoch": 0.17876885012292706, "grad_norm": 0.8411655619516173, "learning_rate": 9.43072021389003e-06, "loss": 0.5054, "step": 2854 }, { "epoch": 0.17883148812226937, "grad_norm": 0.8413482757433943, "learning_rate": 9.430250040326461e-06, "loss": 0.3894, "step": 2855 }, { "epoch": 0.17889412612161168, "grad_norm": 0.8531479443896964, "learning_rate": 9.42977968441298e-06, "loss": 0.426, "step": 2856 }, { "epoch": 0.178956764120954, "grad_norm": 0.8449469254188176, "learning_rate": 9.429309146168946e-06, "loss": 0.4044, "step": 2857 }, { "epoch": 0.17901940212029627, "grad_norm": 0.836893767284663, "learning_rate": 9.428838425613725e-06, "loss": 0.4067, "step": 2858 }, { "epoch": 0.17908204011963857, "grad_norm": 0.8868661967464119, "learning_rate": 9.428367522766691e-06, "loss": 0.4258, "step": 2859 }, { "epoch": 0.17914467811898088, "grad_norm": 0.895977773883451, "learning_rate": 9.427896437647229e-06, "loss": 0.4481, "step": 2860 }, { "epoch": 0.1792073161183232, "grad_norm": 0.7936359204716122, "learning_rate": 9.427425170274726e-06, "loss": 0.3662, "step": 2861 }, { "epoch": 0.17926995411766547, "grad_norm": 0.8752654475941211, "learning_rate": 9.426953720668582e-06, "loss": 0.4411, "step": 2862 }, { "epoch": 0.17933259211700778, "grad_norm": 0.8584262426256167, "learning_rate": 9.4264820888482e-06, "loss": 0.4254, "step": 2863 }, { "epoch": 0.17939523011635009, "grad_norm": 0.8254954394337519, "learning_rate": 9.426010274832997e-06, "loss": 0.4025, "step": 2864 }, { "epoch": 0.1794578681156924, "grad_norm": 0.8328592322490375, "learning_rate": 9.425538278642388e-06, "loss": 0.4184, "step": 2865 }, { "epoch": 0.17952050611503467, "grad_norm": 0.8678064483885254, "learning_rate": 9.425066100295802e-06, "loss": 0.444, "step": 2866 }, { "epoch": 0.17958314411437698, "grad_norm": 0.977970753065322, "learning_rate": 9.424593739812673e-06, "loss": 0.418, "step": 2867 }, { "epoch": 0.1796457821137193, "grad_norm": 0.8013993485080579, "learning_rate": 9.424121197212446e-06, "loss": 0.3942, "step": 2868 }, { "epoch": 0.1797084201130616, "grad_norm": 0.8778343748958704, "learning_rate": 9.423648472514569e-06, "loss": 0.4257, "step": 2869 }, { "epoch": 0.1797710581124039, "grad_norm": 0.8394865740162272, "learning_rate": 9.423175565738498e-06, "loss": 0.4718, "step": 2870 }, { "epoch": 0.17983369611174618, "grad_norm": 0.8269695835160127, "learning_rate": 9.4227024769037e-06, "loss": 0.4401, "step": 2871 }, { "epoch": 0.1798963341110885, "grad_norm": 0.858561275214598, "learning_rate": 9.42222920602965e-06, "loss": 0.4224, "step": 2872 }, { "epoch": 0.1799589721104308, "grad_norm": 0.8660332551503425, "learning_rate": 9.421755753135823e-06, "loss": 0.4525, "step": 2873 }, { "epoch": 0.1800216101097731, "grad_norm": 0.8648491452766954, "learning_rate": 9.421282118241708e-06, "loss": 0.4422, "step": 2874 }, { "epoch": 0.18008424810911539, "grad_norm": 0.787454484238567, "learning_rate": 9.420808301366801e-06, "loss": 0.4787, "step": 2875 }, { "epoch": 0.1801468861084577, "grad_norm": 0.8967174709994419, "learning_rate": 9.420334302530606e-06, "loss": 0.4315, "step": 2876 }, { "epoch": 0.1802095241078, "grad_norm": 0.8651576552281631, "learning_rate": 9.419860121752628e-06, "loss": 0.4574, "step": 2877 }, { "epoch": 0.1802721621071423, "grad_norm": 0.8037049688445972, "learning_rate": 9.419385759052388e-06, "loss": 0.3864, "step": 2878 }, { "epoch": 0.1803348001064846, "grad_norm": 0.8328448744908445, "learning_rate": 9.418911214449409e-06, "loss": 0.417, "step": 2879 }, { "epoch": 0.1803974381058269, "grad_norm": 0.7397566097714897, "learning_rate": 9.418436487963226e-06, "loss": 0.4712, "step": 2880 }, { "epoch": 0.1804600761051692, "grad_norm": 0.8595115761754731, "learning_rate": 9.417961579613374e-06, "loss": 0.414, "step": 2881 }, { "epoch": 0.1805227141045115, "grad_norm": 0.8452072564129594, "learning_rate": 9.417486489419405e-06, "loss": 0.384, "step": 2882 }, { "epoch": 0.1805853521038538, "grad_norm": 0.8976933965269407, "learning_rate": 9.417011217400874e-06, "loss": 0.4733, "step": 2883 }, { "epoch": 0.1806479901031961, "grad_norm": 0.8039587613600406, "learning_rate": 9.416535763577338e-06, "loss": 0.469, "step": 2884 }, { "epoch": 0.1807106281025384, "grad_norm": 0.8352348530938727, "learning_rate": 9.416060127968372e-06, "loss": 0.4281, "step": 2885 }, { "epoch": 0.18077326610188071, "grad_norm": 0.8250796357720096, "learning_rate": 9.415584310593553e-06, "loss": 0.4059, "step": 2886 }, { "epoch": 0.180835904101223, "grad_norm": 0.8439640688781531, "learning_rate": 9.415108311472462e-06, "loss": 0.4615, "step": 2887 }, { "epoch": 0.1808985421005653, "grad_norm": 0.8554557800597767, "learning_rate": 9.414632130624695e-06, "loss": 0.4245, "step": 2888 }, { "epoch": 0.1809611800999076, "grad_norm": 0.7912134757309447, "learning_rate": 9.414155768069848e-06, "loss": 0.4134, "step": 2889 }, { "epoch": 0.18102381809924992, "grad_norm": 0.87151258581928, "learning_rate": 9.413679223827532e-06, "loss": 0.4144, "step": 2890 }, { "epoch": 0.18108645609859222, "grad_norm": 0.8511841827892873, "learning_rate": 9.41320249791736e-06, "loss": 0.445, "step": 2891 }, { "epoch": 0.1811490940979345, "grad_norm": 0.7991164386137517, "learning_rate": 9.412725590358953e-06, "loss": 0.4356, "step": 2892 }, { "epoch": 0.1812117320972768, "grad_norm": 0.8428514090080735, "learning_rate": 9.412248501171942e-06, "loss": 0.4138, "step": 2893 }, { "epoch": 0.18127437009661912, "grad_norm": 0.7554550063865975, "learning_rate": 9.411771230375963e-06, "loss": 0.3926, "step": 2894 }, { "epoch": 0.18133700809596143, "grad_norm": 0.8073982552754833, "learning_rate": 9.411293777990662e-06, "loss": 0.4169, "step": 2895 }, { "epoch": 0.1813996460953037, "grad_norm": 0.8900754904224207, "learning_rate": 9.410816144035691e-06, "loss": 0.4382, "step": 2896 }, { "epoch": 0.18146228409464601, "grad_norm": 0.7997583427126023, "learning_rate": 9.410338328530708e-06, "loss": 0.4096, "step": 2897 }, { "epoch": 0.18152492209398832, "grad_norm": 0.9072504660088666, "learning_rate": 9.409860331495382e-06, "loss": 0.4676, "step": 2898 }, { "epoch": 0.18158756009333063, "grad_norm": 0.8336212490615138, "learning_rate": 9.409382152949384e-06, "loss": 0.4259, "step": 2899 }, { "epoch": 0.1816501980926729, "grad_norm": 0.8768546934258616, "learning_rate": 9.408903792912401e-06, "loss": 0.4495, "step": 2900 }, { "epoch": 0.18171283609201522, "grad_norm": 0.9177380048201401, "learning_rate": 9.408425251404118e-06, "loss": 0.4647, "step": 2901 }, { "epoch": 0.18177547409135753, "grad_norm": 0.8182244865170593, "learning_rate": 9.407946528444234e-06, "loss": 0.4078, "step": 2902 }, { "epoch": 0.18183811209069983, "grad_norm": 0.7827699658159359, "learning_rate": 9.407467624052451e-06, "loss": 0.487, "step": 2903 }, { "epoch": 0.1819007500900421, "grad_norm": 0.8956690249530793, "learning_rate": 9.406988538248484e-06, "loss": 0.4162, "step": 2904 }, { "epoch": 0.18196338808938442, "grad_norm": 0.9460942378647424, "learning_rate": 9.406509271052051e-06, "loss": 0.4689, "step": 2905 }, { "epoch": 0.18202602608872673, "grad_norm": 0.8976945259041498, "learning_rate": 9.406029822482878e-06, "loss": 0.477, "step": 2906 }, { "epoch": 0.18208866408806904, "grad_norm": 0.8785565276364077, "learning_rate": 9.4055501925607e-06, "loss": 0.4374, "step": 2907 }, { "epoch": 0.18215130208741132, "grad_norm": 0.8893313334011034, "learning_rate": 9.405070381305258e-06, "loss": 0.445, "step": 2908 }, { "epoch": 0.18221394008675362, "grad_norm": 0.8211317253180791, "learning_rate": 9.4045903887363e-06, "loss": 0.4289, "step": 2909 }, { "epoch": 0.18227657808609593, "grad_norm": 0.7702010576733063, "learning_rate": 9.404110214873588e-06, "loss": 0.5123, "step": 2910 }, { "epoch": 0.18233921608543824, "grad_norm": 0.8595713954134403, "learning_rate": 9.40362985973688e-06, "loss": 0.4664, "step": 2911 }, { "epoch": 0.18240185408478055, "grad_norm": 0.8976749879976275, "learning_rate": 9.403149323345948e-06, "loss": 0.4389, "step": 2912 }, { "epoch": 0.18246449208412283, "grad_norm": 0.8538049360669636, "learning_rate": 9.402668605720575e-06, "loss": 0.4677, "step": 2913 }, { "epoch": 0.18252713008346513, "grad_norm": 0.8548285793926348, "learning_rate": 9.402187706880544e-06, "loss": 0.4368, "step": 2914 }, { "epoch": 0.18258976808280744, "grad_norm": 0.8856824206866444, "learning_rate": 9.40170662684565e-06, "loss": 0.441, "step": 2915 }, { "epoch": 0.18265240608214975, "grad_norm": 0.8200693911174156, "learning_rate": 9.401225365635693e-06, "loss": 0.4228, "step": 2916 }, { "epoch": 0.18271504408149203, "grad_norm": 0.8604082865241042, "learning_rate": 9.400743923270485e-06, "loss": 0.4238, "step": 2917 }, { "epoch": 0.18277768208083434, "grad_norm": 0.8954173561928156, "learning_rate": 9.40026229976984e-06, "loss": 0.4486, "step": 2918 }, { "epoch": 0.18284032008017664, "grad_norm": 0.6334809910949074, "learning_rate": 9.399780495153582e-06, "loss": 0.4717, "step": 2919 }, { "epoch": 0.18290295807951895, "grad_norm": 0.864278795779149, "learning_rate": 9.399298509441542e-06, "loss": 0.4388, "step": 2920 }, { "epoch": 0.18296559607886123, "grad_norm": 0.9127449344398062, "learning_rate": 9.398816342653558e-06, "loss": 0.4264, "step": 2921 }, { "epoch": 0.18302823407820354, "grad_norm": 2.1628699486887126, "learning_rate": 9.398333994809476e-06, "loss": 0.4706, "step": 2922 }, { "epoch": 0.18309087207754585, "grad_norm": 0.9025094413982775, "learning_rate": 9.397851465929153e-06, "loss": 0.4445, "step": 2923 }, { "epoch": 0.18315351007688815, "grad_norm": 0.7407921573300011, "learning_rate": 9.397368756032445e-06, "loss": 0.4674, "step": 2924 }, { "epoch": 0.18321614807623043, "grad_norm": 0.8916199476887927, "learning_rate": 9.396885865139224e-06, "loss": 0.4261, "step": 2925 }, { "epoch": 0.18327878607557274, "grad_norm": 1.5859046344609846, "learning_rate": 9.396402793269366e-06, "loss": 0.425, "step": 2926 }, { "epoch": 0.18334142407491505, "grad_norm": 0.8655470953818517, "learning_rate": 9.395919540442751e-06, "loss": 0.4189, "step": 2927 }, { "epoch": 0.18340406207425736, "grad_norm": 0.6917055174060588, "learning_rate": 9.395436106679273e-06, "loss": 0.4844, "step": 2928 }, { "epoch": 0.18346670007359964, "grad_norm": 0.9378696082175291, "learning_rate": 9.39495249199883e-06, "loss": 0.4413, "step": 2929 }, { "epoch": 0.18352933807294194, "grad_norm": 0.9942924155626044, "learning_rate": 9.394468696421323e-06, "loss": 0.4717, "step": 2930 }, { "epoch": 0.18359197607228425, "grad_norm": 0.8958081820314538, "learning_rate": 9.393984719966672e-06, "loss": 0.4359, "step": 2931 }, { "epoch": 0.18365461407162656, "grad_norm": 0.8522338588049286, "learning_rate": 9.393500562654794e-06, "loss": 0.4197, "step": 2932 }, { "epoch": 0.18371725207096884, "grad_norm": 0.9449682451833215, "learning_rate": 9.393016224505615e-06, "loss": 0.4945, "step": 2933 }, { "epoch": 0.18377989007031115, "grad_norm": 0.9126896300961488, "learning_rate": 9.392531705539077e-06, "loss": 0.4257, "step": 2934 }, { "epoch": 0.18384252806965345, "grad_norm": 0.9119960203125869, "learning_rate": 9.392047005775116e-06, "loss": 0.4505, "step": 2935 }, { "epoch": 0.18390516606899576, "grad_norm": 0.8643492757836909, "learning_rate": 9.391562125233683e-06, "loss": 0.4576, "step": 2936 }, { "epoch": 0.18396780406833807, "grad_norm": 0.9330487371855269, "learning_rate": 9.39107706393474e-06, "loss": 0.4411, "step": 2937 }, { "epoch": 0.18403044206768035, "grad_norm": 0.887143126141096, "learning_rate": 9.390591821898252e-06, "loss": 0.4278, "step": 2938 }, { "epoch": 0.18409308006702266, "grad_norm": 0.8796575869447298, "learning_rate": 9.390106399144188e-06, "loss": 0.4347, "step": 2939 }, { "epoch": 0.18415571806636497, "grad_norm": 0.9200348302726111, "learning_rate": 9.389620795692529e-06, "loss": 0.5477, "step": 2940 }, { "epoch": 0.18421835606570727, "grad_norm": 0.7887600016340023, "learning_rate": 9.389135011563264e-06, "loss": 0.393, "step": 2941 }, { "epoch": 0.18428099406504955, "grad_norm": 0.8448794658187407, "learning_rate": 9.388649046776387e-06, "loss": 0.433, "step": 2942 }, { "epoch": 0.18434363206439186, "grad_norm": 0.7997852961128792, "learning_rate": 9.3881629013519e-06, "loss": 0.3936, "step": 2943 }, { "epoch": 0.18440627006373417, "grad_norm": 0.8489304337681914, "learning_rate": 9.387676575309815e-06, "loss": 0.4468, "step": 2944 }, { "epoch": 0.18446890806307648, "grad_norm": 0.8634599276500341, "learning_rate": 9.387190068670148e-06, "loss": 0.4258, "step": 2945 }, { "epoch": 0.18453154606241876, "grad_norm": 0.8196736139182754, "learning_rate": 9.386703381452922e-06, "loss": 0.4519, "step": 2946 }, { "epoch": 0.18459418406176106, "grad_norm": 0.9049834472261471, "learning_rate": 9.386216513678171e-06, "loss": 0.4325, "step": 2947 }, { "epoch": 0.18465682206110337, "grad_norm": 0.835697972354992, "learning_rate": 9.385729465365935e-06, "loss": 0.4497, "step": 2948 }, { "epoch": 0.18471946006044568, "grad_norm": 0.833240097602765, "learning_rate": 9.385242236536259e-06, "loss": 0.413, "step": 2949 }, { "epoch": 0.18478209805978796, "grad_norm": 0.9557401177745967, "learning_rate": 9.3847548272092e-06, "loss": 0.4854, "step": 2950 }, { "epoch": 0.18484473605913027, "grad_norm": 0.982758957231048, "learning_rate": 9.384267237404818e-06, "loss": 0.4423, "step": 2951 }, { "epoch": 0.18490737405847257, "grad_norm": 0.9634258666302347, "learning_rate": 9.38377946714318e-06, "loss": 0.4844, "step": 2952 }, { "epoch": 0.18497001205781488, "grad_norm": 0.8482169402729168, "learning_rate": 9.383291516444368e-06, "loss": 0.4335, "step": 2953 }, { "epoch": 0.18503265005715716, "grad_norm": 0.9175892677814198, "learning_rate": 9.382803385328465e-06, "loss": 0.472, "step": 2954 }, { "epoch": 0.18509528805649947, "grad_norm": 0.8377373251993687, "learning_rate": 9.38231507381556e-06, "loss": 0.4123, "step": 2955 }, { "epoch": 0.18515792605584178, "grad_norm": 0.823708739515769, "learning_rate": 9.381826581925753e-06, "loss": 0.3999, "step": 2956 }, { "epoch": 0.18522056405518408, "grad_norm": 0.8812213953802267, "learning_rate": 9.381337909679149e-06, "loss": 0.4312, "step": 2957 }, { "epoch": 0.1852832020545264, "grad_norm": 0.8536729184972439, "learning_rate": 9.380849057095864e-06, "loss": 0.4024, "step": 2958 }, { "epoch": 0.18534584005386867, "grad_norm": 0.8436501449561388, "learning_rate": 9.380360024196021e-06, "loss": 0.4787, "step": 2959 }, { "epoch": 0.18540847805321098, "grad_norm": 0.8237958752191988, "learning_rate": 9.379870810999745e-06, "loss": 0.4286, "step": 2960 }, { "epoch": 0.1854711160525533, "grad_norm": 0.8960366627067327, "learning_rate": 9.379381417527173e-06, "loss": 0.4458, "step": 2961 }, { "epoch": 0.1855337540518956, "grad_norm": 0.891391677304874, "learning_rate": 9.378891843798448e-06, "loss": 0.3994, "step": 2962 }, { "epoch": 0.18559639205123787, "grad_norm": 0.970085035752376, "learning_rate": 9.378402089833724e-06, "loss": 0.4587, "step": 2963 }, { "epoch": 0.18565903005058018, "grad_norm": 0.8744739985532543, "learning_rate": 9.377912155653158e-06, "loss": 0.4547, "step": 2964 }, { "epoch": 0.1857216680499225, "grad_norm": 0.8742935489839214, "learning_rate": 9.377422041276913e-06, "loss": 0.4078, "step": 2965 }, { "epoch": 0.1857843060492648, "grad_norm": 0.8912496562547669, "learning_rate": 9.376931746725164e-06, "loss": 0.4327, "step": 2966 }, { "epoch": 0.18584694404860708, "grad_norm": 0.839985620737831, "learning_rate": 9.376441272018093e-06, "loss": 0.4309, "step": 2967 }, { "epoch": 0.18590958204794938, "grad_norm": 0.7672454734835588, "learning_rate": 9.375950617175888e-06, "loss": 0.4918, "step": 2968 }, { "epoch": 0.1859722200472917, "grad_norm": 0.9069760899794105, "learning_rate": 9.375459782218741e-06, "loss": 0.4185, "step": 2969 }, { "epoch": 0.186034858046634, "grad_norm": 0.9345623575664578, "learning_rate": 9.37496876716686e-06, "loss": 0.4242, "step": 2970 }, { "epoch": 0.18609749604597628, "grad_norm": 0.8668019833864888, "learning_rate": 9.37447757204045e-06, "loss": 0.3957, "step": 2971 }, { "epoch": 0.1861601340453186, "grad_norm": 0.8752182708023317, "learning_rate": 9.373986196859732e-06, "loss": 0.4488, "step": 2972 }, { "epoch": 0.1862227720446609, "grad_norm": 0.8885281290053582, "learning_rate": 9.37349464164493e-06, "loss": 0.437, "step": 2973 }, { "epoch": 0.1862854100440032, "grad_norm": 0.95950485753342, "learning_rate": 9.373002906416278e-06, "loss": 0.4716, "step": 2974 }, { "epoch": 0.18634804804334548, "grad_norm": 0.9114173054859926, "learning_rate": 9.372510991194014e-06, "loss": 0.4504, "step": 2975 }, { "epoch": 0.1864106860426878, "grad_norm": 0.9440123688671054, "learning_rate": 9.372018895998386e-06, "loss": 0.415, "step": 2976 }, { "epoch": 0.1864733240420301, "grad_norm": 0.8161747567552697, "learning_rate": 9.371526620849647e-06, "loss": 0.4707, "step": 2977 }, { "epoch": 0.1865359620413724, "grad_norm": 0.8532871456519749, "learning_rate": 9.371034165768063e-06, "loss": 0.4252, "step": 2978 }, { "epoch": 0.1865986000407147, "grad_norm": 0.9578869452254659, "learning_rate": 9.370541530773903e-06, "loss": 0.4909, "step": 2979 }, { "epoch": 0.186661238040057, "grad_norm": 0.8176618222697978, "learning_rate": 9.370048715887439e-06, "loss": 0.3946, "step": 2980 }, { "epoch": 0.1867238760393993, "grad_norm": 0.8648413080781157, "learning_rate": 9.36955572112896e-06, "loss": 0.4198, "step": 2981 }, { "epoch": 0.1867865140387416, "grad_norm": 0.6306535793710252, "learning_rate": 9.369062546518757e-06, "loss": 0.4795, "step": 2982 }, { "epoch": 0.18684915203808392, "grad_norm": 0.8435039767011422, "learning_rate": 9.368569192077129e-06, "loss": 0.4263, "step": 2983 }, { "epoch": 0.1869117900374262, "grad_norm": 0.8736246991365362, "learning_rate": 9.36807565782438e-06, "loss": 0.416, "step": 2984 }, { "epoch": 0.1869744280367685, "grad_norm": 0.82716405157532, "learning_rate": 9.367581943780828e-06, "loss": 0.4467, "step": 2985 }, { "epoch": 0.1870370660361108, "grad_norm": 0.8168820413821022, "learning_rate": 9.367088049966793e-06, "loss": 0.4125, "step": 2986 }, { "epoch": 0.18709970403545312, "grad_norm": 0.7441061061638214, "learning_rate": 9.366593976402602e-06, "loss": 0.4688, "step": 2987 }, { "epoch": 0.1871623420347954, "grad_norm": 0.888836178150615, "learning_rate": 9.366099723108592e-06, "loss": 0.4814, "step": 2988 }, { "epoch": 0.1872249800341377, "grad_norm": 0.9384958088507253, "learning_rate": 9.36560529010511e-06, "loss": 0.4374, "step": 2989 }, { "epoch": 0.18728761803348, "grad_norm": 0.9193705942675995, "learning_rate": 9.3651106774125e-06, "loss": 0.4515, "step": 2990 }, { "epoch": 0.18735025603282232, "grad_norm": 0.8772112727189686, "learning_rate": 9.364615885051128e-06, "loss": 0.4317, "step": 2991 }, { "epoch": 0.1874128940321646, "grad_norm": 0.8649184439628993, "learning_rate": 9.364120913041353e-06, "loss": 0.4139, "step": 2992 }, { "epoch": 0.1874755320315069, "grad_norm": 0.8915792829843158, "learning_rate": 9.363625761403554e-06, "loss": 0.4714, "step": 2993 }, { "epoch": 0.18753817003084922, "grad_norm": 0.8256879377176778, "learning_rate": 9.363130430158105e-06, "loss": 0.4128, "step": 2994 }, { "epoch": 0.18760080803019152, "grad_norm": 0.7146391496164469, "learning_rate": 9.362634919325402e-06, "loss": 0.4619, "step": 2995 }, { "epoch": 0.1876634460295338, "grad_norm": 0.9080143543201559, "learning_rate": 9.362139228925833e-06, "loss": 0.4686, "step": 2996 }, { "epoch": 0.1877260840288761, "grad_norm": 0.7937751368453742, "learning_rate": 9.361643358979805e-06, "loss": 0.3848, "step": 2997 }, { "epoch": 0.18778872202821842, "grad_norm": 0.7862591022586987, "learning_rate": 9.361147309507726e-06, "loss": 0.3868, "step": 2998 }, { "epoch": 0.18785136002756073, "grad_norm": 0.8508209448196775, "learning_rate": 9.360651080530015e-06, "loss": 0.4658, "step": 2999 }, { "epoch": 0.18791399802690303, "grad_norm": 0.8186512091890964, "learning_rate": 9.360154672067094e-06, "loss": 0.4935, "step": 3000 }, { "epoch": 0.18797663602624531, "grad_norm": 0.6817637541848756, "learning_rate": 9.3596580841394e-06, "loss": 0.4679, "step": 3001 }, { "epoch": 0.18803927402558762, "grad_norm": 0.7866681133054536, "learning_rate": 9.35916131676737e-06, "loss": 0.3969, "step": 3002 }, { "epoch": 0.18810191202492993, "grad_norm": 0.7920781804804674, "learning_rate": 9.358664369971449e-06, "loss": 0.4206, "step": 3003 }, { "epoch": 0.18816455002427224, "grad_norm": 0.9429228546412305, "learning_rate": 9.358167243772095e-06, "loss": 0.4603, "step": 3004 }, { "epoch": 0.18822718802361452, "grad_norm": 0.8241467257869807, "learning_rate": 9.357669938189768e-06, "loss": 0.4069, "step": 3005 }, { "epoch": 0.18828982602295682, "grad_norm": 0.9468794548996553, "learning_rate": 9.357172453244939e-06, "loss": 0.4457, "step": 3006 }, { "epoch": 0.18835246402229913, "grad_norm": 0.8358511365434284, "learning_rate": 9.356674788958082e-06, "loss": 0.4502, "step": 3007 }, { "epoch": 0.18841510202164144, "grad_norm": 0.7933254255397642, "learning_rate": 9.356176945349682e-06, "loss": 0.4082, "step": 3008 }, { "epoch": 0.18847774002098372, "grad_norm": 0.909375788219589, "learning_rate": 9.35567892244023e-06, "loss": 0.4803, "step": 3009 }, { "epoch": 0.18854037802032603, "grad_norm": 0.8796628997122837, "learning_rate": 9.355180720250225e-06, "loss": 0.4961, "step": 3010 }, { "epoch": 0.18860301601966833, "grad_norm": 0.8845721909827922, "learning_rate": 9.354682338800175e-06, "loss": 0.4476, "step": 3011 }, { "epoch": 0.18866565401901064, "grad_norm": 0.8119657244959361, "learning_rate": 9.354183778110589e-06, "loss": 0.4131, "step": 3012 }, { "epoch": 0.18872829201835292, "grad_norm": 0.7953609806754641, "learning_rate": 9.353685038201991e-06, "loss": 0.4139, "step": 3013 }, { "epoch": 0.18879093001769523, "grad_norm": 0.820104067979418, "learning_rate": 9.35318611909491e-06, "loss": 0.4348, "step": 3014 }, { "epoch": 0.18885356801703754, "grad_norm": 0.8687014629323044, "learning_rate": 9.352687020809879e-06, "loss": 0.4514, "step": 3015 }, { "epoch": 0.18891620601637985, "grad_norm": 0.8681136033298055, "learning_rate": 9.352187743367444e-06, "loss": 0.4223, "step": 3016 }, { "epoch": 0.18897884401572212, "grad_norm": 0.8246536256739053, "learning_rate": 9.351688286788152e-06, "loss": 0.3869, "step": 3017 }, { "epoch": 0.18904148201506443, "grad_norm": 0.6632715269860872, "learning_rate": 9.351188651092565e-06, "loss": 0.4837, "step": 3018 }, { "epoch": 0.18910412001440674, "grad_norm": 0.932282230158109, "learning_rate": 9.350688836301242e-06, "loss": 0.4384, "step": 3019 }, { "epoch": 0.18916675801374905, "grad_norm": 0.9073391397791334, "learning_rate": 9.350188842434762e-06, "loss": 0.4854, "step": 3020 }, { "epoch": 0.18922939601309136, "grad_norm": 0.9263084792757222, "learning_rate": 9.3496886695137e-06, "loss": 0.4496, "step": 3021 }, { "epoch": 0.18929203401243364, "grad_norm": 0.8647271345446552, "learning_rate": 9.349188317558646e-06, "loss": 0.4278, "step": 3022 }, { "epoch": 0.18935467201177594, "grad_norm": 0.9155470716860798, "learning_rate": 9.348687786590194e-06, "loss": 0.4197, "step": 3023 }, { "epoch": 0.18941731001111825, "grad_norm": 0.824650837703512, "learning_rate": 9.348187076628942e-06, "loss": 0.441, "step": 3024 }, { "epoch": 0.18947994801046056, "grad_norm": 0.7397877795798785, "learning_rate": 9.347686187695507e-06, "loss": 0.5043, "step": 3025 }, { "epoch": 0.18954258600980284, "grad_norm": 0.7637500217145495, "learning_rate": 9.3471851198105e-06, "loss": 0.4533, "step": 3026 }, { "epoch": 0.18960522400914515, "grad_norm": 0.830366539560474, "learning_rate": 9.346683872994546e-06, "loss": 0.4106, "step": 3027 }, { "epoch": 0.18966786200848745, "grad_norm": 0.9305339062995966, "learning_rate": 9.346182447268278e-06, "loss": 0.4439, "step": 3028 }, { "epoch": 0.18973050000782976, "grad_norm": 0.8704920758100215, "learning_rate": 9.345680842652333e-06, "loss": 0.4082, "step": 3029 }, { "epoch": 0.18979313800717204, "grad_norm": 0.7461920725349197, "learning_rate": 9.34517905916736e-06, "loss": 0.4132, "step": 3030 }, { "epoch": 0.18985577600651435, "grad_norm": 0.9042773976597814, "learning_rate": 9.344677096834009e-06, "loss": 0.4587, "step": 3031 }, { "epoch": 0.18991841400585666, "grad_norm": 0.8770806743502902, "learning_rate": 9.344174955672942e-06, "loss": 0.4107, "step": 3032 }, { "epoch": 0.18998105200519896, "grad_norm": 0.871111615664811, "learning_rate": 9.343672635704827e-06, "loss": 0.4342, "step": 3033 }, { "epoch": 0.19004369000454124, "grad_norm": 0.8275954110803647, "learning_rate": 9.343170136950342e-06, "loss": 0.4226, "step": 3034 }, { "epoch": 0.19010632800388355, "grad_norm": 0.9443312696641138, "learning_rate": 9.342667459430167e-06, "loss": 0.4461, "step": 3035 }, { "epoch": 0.19016896600322586, "grad_norm": 0.8744675249752464, "learning_rate": 9.34216460316499e-06, "loss": 0.4204, "step": 3036 }, { "epoch": 0.19023160400256817, "grad_norm": 0.8748358302711422, "learning_rate": 9.341661568175517e-06, "loss": 0.4361, "step": 3037 }, { "epoch": 0.19029424200191045, "grad_norm": 0.8472694503526759, "learning_rate": 9.341158354482446e-06, "loss": 0.4211, "step": 3038 }, { "epoch": 0.19035688000125275, "grad_norm": 0.8815773984721492, "learning_rate": 9.34065496210649e-06, "loss": 0.45, "step": 3039 }, { "epoch": 0.19041951800059506, "grad_norm": 0.8727655266516333, "learning_rate": 9.340151391068373e-06, "loss": 0.4739, "step": 3040 }, { "epoch": 0.19048215599993737, "grad_norm": 0.8247862257973978, "learning_rate": 9.339647641388818e-06, "loss": 0.4379, "step": 3041 }, { "epoch": 0.19054479399927968, "grad_norm": 0.8690850717342572, "learning_rate": 9.33914371308856e-06, "loss": 0.4019, "step": 3042 }, { "epoch": 0.19060743199862196, "grad_norm": 0.8722235390323724, "learning_rate": 9.338639606188342e-06, "loss": 0.4327, "step": 3043 }, { "epoch": 0.19067006999796426, "grad_norm": 0.8638857467415523, "learning_rate": 9.338135320708912e-06, "loss": 0.4287, "step": 3044 }, { "epoch": 0.19073270799730657, "grad_norm": 0.8390084625866391, "learning_rate": 9.337630856671027e-06, "loss": 0.4033, "step": 3045 }, { "epoch": 0.19079534599664888, "grad_norm": 0.9346052336881862, "learning_rate": 9.337126214095452e-06, "loss": 0.4435, "step": 3046 }, { "epoch": 0.19085798399599116, "grad_norm": 0.8397197979585252, "learning_rate": 9.336621393002954e-06, "loss": 0.4705, "step": 3047 }, { "epoch": 0.19092062199533347, "grad_norm": 0.9403805279334988, "learning_rate": 9.336116393414318e-06, "loss": 0.473, "step": 3048 }, { "epoch": 0.19098325999467577, "grad_norm": 0.9396772597370433, "learning_rate": 9.335611215350325e-06, "loss": 0.5023, "step": 3049 }, { "epoch": 0.19104589799401808, "grad_norm": 0.8989161833086503, "learning_rate": 9.33510585883177e-06, "loss": 0.4141, "step": 3050 }, { "epoch": 0.19110853599336036, "grad_norm": 0.7931722682600236, "learning_rate": 9.334600323879453e-06, "loss": 0.4112, "step": 3051 }, { "epoch": 0.19117117399270267, "grad_norm": 0.8469773260607972, "learning_rate": 9.334094610514182e-06, "loss": 0.4596, "step": 3052 }, { "epoch": 0.19123381199204498, "grad_norm": 0.8601726239985092, "learning_rate": 9.333588718756772e-06, "loss": 0.4573, "step": 3053 }, { "epoch": 0.19129644999138729, "grad_norm": 0.8076133329432712, "learning_rate": 9.333082648628047e-06, "loss": 0.4119, "step": 3054 }, { "epoch": 0.19135908799072956, "grad_norm": 0.8281470383041887, "learning_rate": 9.332576400148836e-06, "loss": 0.4105, "step": 3055 }, { "epoch": 0.19142172599007187, "grad_norm": 0.9742558317217965, "learning_rate": 9.332069973339975e-06, "loss": 0.3979, "step": 3056 }, { "epoch": 0.19148436398941418, "grad_norm": 0.8974588680086785, "learning_rate": 9.33156336822231e-06, "loss": 0.4608, "step": 3057 }, { "epoch": 0.1915470019887565, "grad_norm": 0.8569994082953107, "learning_rate": 9.331056584816693e-06, "loss": 0.436, "step": 3058 }, { "epoch": 0.19160963998809877, "grad_norm": 0.8824115500720264, "learning_rate": 9.330549623143983e-06, "loss": 0.4213, "step": 3059 }, { "epoch": 0.19167227798744108, "grad_norm": 0.8930722385912591, "learning_rate": 9.330042483225046e-06, "loss": 0.4592, "step": 3060 }, { "epoch": 0.19173491598678338, "grad_norm": 0.857486055719045, "learning_rate": 9.329535165080758e-06, "loss": 0.4185, "step": 3061 }, { "epoch": 0.1917975539861257, "grad_norm": 0.7602386009832971, "learning_rate": 9.329027668731999e-06, "loss": 0.4153, "step": 3062 }, { "epoch": 0.19186019198546797, "grad_norm": 0.8430855886037426, "learning_rate": 9.328519994199658e-06, "loss": 0.4217, "step": 3063 }, { "epoch": 0.19192282998481028, "grad_norm": 0.8327093938161867, "learning_rate": 9.32801214150463e-06, "loss": 0.4296, "step": 3064 }, { "epoch": 0.19198546798415259, "grad_norm": 0.8305535191422616, "learning_rate": 9.327504110667818e-06, "loss": 0.4158, "step": 3065 }, { "epoch": 0.1920481059834949, "grad_norm": 0.8276445484004731, "learning_rate": 9.326995901710134e-06, "loss": 0.4219, "step": 3066 }, { "epoch": 0.1921107439828372, "grad_norm": 0.8609994942086974, "learning_rate": 9.326487514652496e-06, "loss": 0.4094, "step": 3067 }, { "epoch": 0.19217338198217948, "grad_norm": 0.8260368366916747, "learning_rate": 9.325978949515831e-06, "loss": 0.4444, "step": 3068 }, { "epoch": 0.1922360199815218, "grad_norm": 0.8659415993784343, "learning_rate": 9.325470206321067e-06, "loss": 0.435, "step": 3069 }, { "epoch": 0.1922986579808641, "grad_norm": 0.875401372109644, "learning_rate": 9.324961285089147e-06, "loss": 0.4609, "step": 3070 }, { "epoch": 0.1923612959802064, "grad_norm": 0.8621830778910594, "learning_rate": 9.324452185841018e-06, "loss": 0.4267, "step": 3071 }, { "epoch": 0.19242393397954868, "grad_norm": 0.7687702660913278, "learning_rate": 9.323942908597633e-06, "loss": 0.4045, "step": 3072 }, { "epoch": 0.192486571978891, "grad_norm": 0.7842446178429626, "learning_rate": 9.323433453379955e-06, "loss": 0.4028, "step": 3073 }, { "epoch": 0.1925492099782333, "grad_norm": 0.8261225683715523, "learning_rate": 9.322923820208955e-06, "loss": 0.4128, "step": 3074 }, { "epoch": 0.1926118479775756, "grad_norm": 0.830413934992402, "learning_rate": 9.32241400910561e-06, "loss": 0.3932, "step": 3075 }, { "epoch": 0.1926744859769179, "grad_norm": 0.8572303864007756, "learning_rate": 9.321904020090899e-06, "loss": 0.4381, "step": 3076 }, { "epoch": 0.1927371239762602, "grad_norm": 0.7847889895551254, "learning_rate": 9.321393853185818e-06, "loss": 0.4821, "step": 3077 }, { "epoch": 0.1927997619756025, "grad_norm": 0.7726649272218009, "learning_rate": 9.320883508411363e-06, "loss": 0.4274, "step": 3078 }, { "epoch": 0.1928623999749448, "grad_norm": 0.8052274932004644, "learning_rate": 9.320372985788541e-06, "loss": 0.4064, "step": 3079 }, { "epoch": 0.1929250379742871, "grad_norm": 0.8320172656088374, "learning_rate": 9.319862285338366e-06, "loss": 0.4027, "step": 3080 }, { "epoch": 0.1929876759736294, "grad_norm": 0.9300903982577783, "learning_rate": 9.319351407081856e-06, "loss": 0.4503, "step": 3081 }, { "epoch": 0.1930503139729717, "grad_norm": 0.8869817265084241, "learning_rate": 9.318840351040041e-06, "loss": 0.4508, "step": 3082 }, { "epoch": 0.193112951972314, "grad_norm": 0.8871352622775628, "learning_rate": 9.318329117233955e-06, "loss": 0.4614, "step": 3083 }, { "epoch": 0.1931755899716563, "grad_norm": 0.8239932151646637, "learning_rate": 9.317817705684643e-06, "loss": 0.3987, "step": 3084 }, { "epoch": 0.1932382279709986, "grad_norm": 0.8534177520613482, "learning_rate": 9.317306116413152e-06, "loss": 0.4111, "step": 3085 }, { "epoch": 0.1933008659703409, "grad_norm": 0.9193232929662934, "learning_rate": 9.31679434944054e-06, "loss": 0.4366, "step": 3086 }, { "epoch": 0.19336350396968321, "grad_norm": 0.9016515072439987, "learning_rate": 9.31628240478787e-06, "loss": 0.4948, "step": 3087 }, { "epoch": 0.19342614196902552, "grad_norm": 0.8881390049184057, "learning_rate": 9.315770282476216e-06, "loss": 0.4193, "step": 3088 }, { "epoch": 0.1934887799683678, "grad_norm": 0.8721170851459044, "learning_rate": 9.315257982526656e-06, "loss": 0.4502, "step": 3089 }, { "epoch": 0.1935514179677101, "grad_norm": 0.7865021705055056, "learning_rate": 9.314745504960276e-06, "loss": 0.3728, "step": 3090 }, { "epoch": 0.19361405596705242, "grad_norm": 0.8438787786968912, "learning_rate": 9.31423284979817e-06, "loss": 0.4022, "step": 3091 }, { "epoch": 0.19367669396639473, "grad_norm": 0.8842098687395605, "learning_rate": 9.31372001706144e-06, "loss": 0.4193, "step": 3092 }, { "epoch": 0.193739331965737, "grad_norm": 0.8991501517033545, "learning_rate": 9.313207006771195e-06, "loss": 0.4519, "step": 3093 }, { "epoch": 0.1938019699650793, "grad_norm": 0.7673330457476106, "learning_rate": 9.312693818948546e-06, "loss": 0.4889, "step": 3094 }, { "epoch": 0.19386460796442162, "grad_norm": 0.920314176984053, "learning_rate": 9.312180453614621e-06, "loss": 0.4446, "step": 3095 }, { "epoch": 0.19392724596376393, "grad_norm": 0.8154490329474425, "learning_rate": 9.311666910790547e-06, "loss": 0.4578, "step": 3096 }, { "epoch": 0.1939898839631062, "grad_norm": 0.7941508689007526, "learning_rate": 9.311153190497464e-06, "loss": 0.4339, "step": 3097 }, { "epoch": 0.19405252196244852, "grad_norm": 0.8812251868502797, "learning_rate": 9.310639292756513e-06, "loss": 0.4487, "step": 3098 }, { "epoch": 0.19411515996179082, "grad_norm": 0.8686273501612921, "learning_rate": 9.31012521758885e-06, "loss": 0.4361, "step": 3099 }, { "epoch": 0.19417779796113313, "grad_norm": 0.852728731195404, "learning_rate": 9.309610965015633e-06, "loss": 0.4438, "step": 3100 }, { "epoch": 0.1942404359604754, "grad_norm": 0.827399043181498, "learning_rate": 9.30909653505803e-06, "loss": 0.4285, "step": 3101 }, { "epoch": 0.19430307395981772, "grad_norm": 0.8448094151863449, "learning_rate": 9.308581927737212e-06, "loss": 0.4545, "step": 3102 }, { "epoch": 0.19436571195916003, "grad_norm": 0.8780476772392917, "learning_rate": 9.308067143074364e-06, "loss": 0.4406, "step": 3103 }, { "epoch": 0.19442834995850233, "grad_norm": 0.7413438733132895, "learning_rate": 9.307552181090671e-06, "loss": 0.4962, "step": 3104 }, { "epoch": 0.1944909879578446, "grad_norm": 0.8410074268283168, "learning_rate": 9.307037041807333e-06, "loss": 0.3969, "step": 3105 }, { "epoch": 0.19455362595718692, "grad_norm": 0.7786434635251416, "learning_rate": 9.306521725245548e-06, "loss": 0.3902, "step": 3106 }, { "epoch": 0.19461626395652923, "grad_norm": 0.879452395694447, "learning_rate": 9.30600623142653e-06, "loss": 0.446, "step": 3107 }, { "epoch": 0.19467890195587154, "grad_norm": 0.6604266472150047, "learning_rate": 9.305490560371497e-06, "loss": 0.4738, "step": 3108 }, { "epoch": 0.19474153995521384, "grad_norm": 0.9069115229409969, "learning_rate": 9.304974712101675e-06, "loss": 0.4319, "step": 3109 }, { "epoch": 0.19480417795455612, "grad_norm": 0.8996658026964673, "learning_rate": 9.304458686638293e-06, "loss": 0.4323, "step": 3110 }, { "epoch": 0.19486681595389843, "grad_norm": 0.9001088578831788, "learning_rate": 9.303942484002592e-06, "loss": 0.4632, "step": 3111 }, { "epoch": 0.19492945395324074, "grad_norm": 0.8537822160934425, "learning_rate": 9.303426104215821e-06, "loss": 0.4694, "step": 3112 }, { "epoch": 0.19499209195258305, "grad_norm": 0.9374478332940329, "learning_rate": 9.30290954729923e-06, "loss": 0.4833, "step": 3113 }, { "epoch": 0.19505472995192533, "grad_norm": 0.8657315094607193, "learning_rate": 9.302392813274087e-06, "loss": 0.4321, "step": 3114 }, { "epoch": 0.19511736795126763, "grad_norm": 0.8537650889141112, "learning_rate": 9.301875902161656e-06, "loss": 0.4591, "step": 3115 }, { "epoch": 0.19518000595060994, "grad_norm": 0.8338046585366955, "learning_rate": 9.301358813983215e-06, "loss": 0.4408, "step": 3116 }, { "epoch": 0.19524264394995225, "grad_norm": 0.9011751406171186, "learning_rate": 9.300841548760044e-06, "loss": 0.405, "step": 3117 }, { "epoch": 0.19530528194929453, "grad_norm": 0.9890854714662722, "learning_rate": 9.300324106513438e-06, "loss": 0.4679, "step": 3118 }, { "epoch": 0.19536791994863684, "grad_norm": 0.8406900942536542, "learning_rate": 9.299806487264695e-06, "loss": 0.418, "step": 3119 }, { "epoch": 0.19543055794797914, "grad_norm": 0.8130164889679083, "learning_rate": 9.29928869103512e-06, "loss": 0.4337, "step": 3120 }, { "epoch": 0.19549319594732145, "grad_norm": 0.8887683263091417, "learning_rate": 9.298770717846023e-06, "loss": 0.454, "step": 3121 }, { "epoch": 0.19555583394666373, "grad_norm": 0.6773303924239191, "learning_rate": 9.298252567718723e-06, "loss": 0.4796, "step": 3122 }, { "epoch": 0.19561847194600604, "grad_norm": 0.842330051828045, "learning_rate": 9.297734240674551e-06, "loss": 0.4332, "step": 3123 }, { "epoch": 0.19568110994534835, "grad_norm": 0.9042877565429607, "learning_rate": 9.297215736734842e-06, "loss": 0.4419, "step": 3124 }, { "epoch": 0.19574374794469065, "grad_norm": 0.8203520722144101, "learning_rate": 9.296697055920934e-06, "loss": 0.4265, "step": 3125 }, { "epoch": 0.19580638594403293, "grad_norm": 0.9572551405527563, "learning_rate": 9.296178198254176e-06, "loss": 0.4267, "step": 3126 }, { "epoch": 0.19586902394337524, "grad_norm": 0.844319016557032, "learning_rate": 9.295659163755929e-06, "loss": 0.4305, "step": 3127 }, { "epoch": 0.19593166194271755, "grad_norm": 0.8561080305539454, "learning_rate": 9.295139952447551e-06, "loss": 0.4379, "step": 3128 }, { "epoch": 0.19599429994205986, "grad_norm": 0.8476124050192345, "learning_rate": 9.294620564350418e-06, "loss": 0.4503, "step": 3129 }, { "epoch": 0.19605693794140217, "grad_norm": 0.881782620911456, "learning_rate": 9.294100999485903e-06, "loss": 0.448, "step": 3130 }, { "epoch": 0.19611957594074444, "grad_norm": 0.9425507279915378, "learning_rate": 9.293581257875395e-06, "loss": 0.4501, "step": 3131 }, { "epoch": 0.19618221394008675, "grad_norm": 0.856093067212581, "learning_rate": 9.293061339540286e-06, "loss": 0.4139, "step": 3132 }, { "epoch": 0.19624485193942906, "grad_norm": 0.7933404525687834, "learning_rate": 9.292541244501974e-06, "loss": 0.4048, "step": 3133 }, { "epoch": 0.19630748993877137, "grad_norm": 0.9054864883396326, "learning_rate": 9.292020972781868e-06, "loss": 0.4067, "step": 3134 }, { "epoch": 0.19637012793811365, "grad_norm": 0.8832017306985076, "learning_rate": 9.291500524401381e-06, "loss": 0.4877, "step": 3135 }, { "epoch": 0.19643276593745596, "grad_norm": 0.8268120920333365, "learning_rate": 9.29097989938194e-06, "loss": 0.4076, "step": 3136 }, { "epoch": 0.19649540393679826, "grad_norm": 0.8155807045690256, "learning_rate": 9.290459097744966e-06, "loss": 0.4396, "step": 3137 }, { "epoch": 0.19655804193614057, "grad_norm": 0.9247682438397257, "learning_rate": 9.289938119511901e-06, "loss": 0.4344, "step": 3138 }, { "epoch": 0.19662067993548285, "grad_norm": 0.8159477092472619, "learning_rate": 9.289416964704186e-06, "loss": 0.4337, "step": 3139 }, { "epoch": 0.19668331793482516, "grad_norm": 0.9148352085674376, "learning_rate": 9.288895633343273e-06, "loss": 0.4676, "step": 3140 }, { "epoch": 0.19674595593416747, "grad_norm": 0.8103297484839199, "learning_rate": 9.288374125450619e-06, "loss": 0.4571, "step": 3141 }, { "epoch": 0.19680859393350977, "grad_norm": 0.8566518695257539, "learning_rate": 9.28785244104769e-06, "loss": 0.4526, "step": 3142 }, { "epoch": 0.19687123193285205, "grad_norm": 0.8209376032432789, "learning_rate": 9.28733058015596e-06, "loss": 0.426, "step": 3143 }, { "epoch": 0.19693386993219436, "grad_norm": 0.9198633631968351, "learning_rate": 9.286808542796906e-06, "loss": 0.4618, "step": 3144 }, { "epoch": 0.19699650793153667, "grad_norm": 0.846720034104665, "learning_rate": 9.286286328992018e-06, "loss": 0.4688, "step": 3145 }, { "epoch": 0.19705914593087898, "grad_norm": 0.819176995191718, "learning_rate": 9.285763938762789e-06, "loss": 0.4008, "step": 3146 }, { "epoch": 0.19712178393022126, "grad_norm": 0.7627090205884608, "learning_rate": 9.28524137213072e-06, "loss": 0.4845, "step": 3147 }, { "epoch": 0.19718442192956356, "grad_norm": 0.8898614351117483, "learning_rate": 9.284718629117322e-06, "loss": 0.4558, "step": 3148 }, { "epoch": 0.19724705992890587, "grad_norm": 0.8387937521707625, "learning_rate": 9.284195709744109e-06, "loss": 0.4188, "step": 3149 }, { "epoch": 0.19730969792824818, "grad_norm": 0.7874626621048965, "learning_rate": 9.283672614032605e-06, "loss": 0.3769, "step": 3150 }, { "epoch": 0.1973723359275905, "grad_norm": 0.8926433930245751, "learning_rate": 9.283149342004342e-06, "loss": 0.4355, "step": 3151 }, { "epoch": 0.19743497392693277, "grad_norm": 0.8043407498379131, "learning_rate": 9.282625893680857e-06, "loss": 0.4086, "step": 3152 }, { "epoch": 0.19749761192627507, "grad_norm": 0.8291650828070337, "learning_rate": 9.282102269083695e-06, "loss": 0.4086, "step": 3153 }, { "epoch": 0.19756024992561738, "grad_norm": 0.8074363365845668, "learning_rate": 9.281578468234408e-06, "loss": 0.4109, "step": 3154 }, { "epoch": 0.1976228879249597, "grad_norm": 0.8530288343061533, "learning_rate": 9.281054491154557e-06, "loss": 0.4275, "step": 3155 }, { "epoch": 0.19768552592430197, "grad_norm": 0.8959180341427273, "learning_rate": 9.280530337865709e-06, "loss": 0.4826, "step": 3156 }, { "epoch": 0.19774816392364428, "grad_norm": 0.777317562085768, "learning_rate": 9.280006008389439e-06, "loss": 0.4258, "step": 3157 }, { "epoch": 0.19781080192298658, "grad_norm": 0.9230634956881277, "learning_rate": 9.279481502747325e-06, "loss": 0.4665, "step": 3158 }, { "epoch": 0.1978734399223289, "grad_norm": 0.8141987078557152, "learning_rate": 9.27895682096096e-06, "loss": 0.4197, "step": 3159 }, { "epoch": 0.19793607792167117, "grad_norm": 0.8870271054472848, "learning_rate": 9.278431963051937e-06, "loss": 0.4762, "step": 3160 }, { "epoch": 0.19799871592101348, "grad_norm": 0.9262466578813463, "learning_rate": 9.27790692904186e-06, "loss": 0.4832, "step": 3161 }, { "epoch": 0.1980613539203558, "grad_norm": 0.8159137525607528, "learning_rate": 9.27738171895234e-06, "loss": 0.4447, "step": 3162 }, { "epoch": 0.1981239919196981, "grad_norm": 0.8754869567535126, "learning_rate": 9.276856332804993e-06, "loss": 0.4528, "step": 3163 }, { "epoch": 0.19818662991904037, "grad_norm": 0.9495649549166846, "learning_rate": 9.27633077062145e-06, "loss": 0.4149, "step": 3164 }, { "epoch": 0.19824926791838268, "grad_norm": 0.8897951199391345, "learning_rate": 9.275805032423336e-06, "loss": 0.4682, "step": 3165 }, { "epoch": 0.198311905917725, "grad_norm": 0.8798812981728719, "learning_rate": 9.275279118232293e-06, "loss": 0.4526, "step": 3166 }, { "epoch": 0.1983745439170673, "grad_norm": 0.8839182292217876, "learning_rate": 9.274753028069967e-06, "loss": 0.4829, "step": 3167 }, { "epoch": 0.19843718191640958, "grad_norm": 0.9028428960041952, "learning_rate": 9.274226761958015e-06, "loss": 0.3922, "step": 3168 }, { "epoch": 0.19849981991575189, "grad_norm": 0.8344018444820097, "learning_rate": 9.273700319918094e-06, "loss": 0.4218, "step": 3169 }, { "epoch": 0.1985624579150942, "grad_norm": 0.8420940623702444, "learning_rate": 9.273173701971876e-06, "loss": 0.4112, "step": 3170 }, { "epoch": 0.1986250959144365, "grad_norm": 0.8342319100506808, "learning_rate": 9.272646908141034e-06, "loss": 0.4435, "step": 3171 }, { "epoch": 0.19868773391377878, "grad_norm": 0.8424750120724059, "learning_rate": 9.272119938447254e-06, "loss": 0.405, "step": 3172 }, { "epoch": 0.1987503719131211, "grad_norm": 0.8288627218022735, "learning_rate": 9.271592792912224e-06, "loss": 0.4999, "step": 3173 }, { "epoch": 0.1988130099124634, "grad_norm": 0.7953699725498034, "learning_rate": 9.27106547155764e-06, "loss": 0.4162, "step": 3174 }, { "epoch": 0.1988756479118057, "grad_norm": 0.9566658860155697, "learning_rate": 9.270537974405212e-06, "loss": 0.4237, "step": 3175 }, { "epoch": 0.198938285911148, "grad_norm": 0.810101936134063, "learning_rate": 9.270010301476646e-06, "loss": 0.4604, "step": 3176 }, { "epoch": 0.1990009239104903, "grad_norm": 0.8274138954533432, "learning_rate": 9.269482452793663e-06, "loss": 0.4189, "step": 3177 }, { "epoch": 0.1990635619098326, "grad_norm": 0.8164427442389796, "learning_rate": 9.26895442837799e-06, "loss": 0.3801, "step": 3178 }, { "epoch": 0.1991261999091749, "grad_norm": 0.9139564246320285, "learning_rate": 9.268426228251361e-06, "loss": 0.4539, "step": 3179 }, { "epoch": 0.1991888379085172, "grad_norm": 0.8691302713033695, "learning_rate": 9.267897852435516e-06, "loss": 0.4337, "step": 3180 }, { "epoch": 0.1992514759078595, "grad_norm": 0.7958028492366991, "learning_rate": 9.267369300952204e-06, "loss": 0.4158, "step": 3181 }, { "epoch": 0.1993141139072018, "grad_norm": 0.9454790769861078, "learning_rate": 9.266840573823178e-06, "loss": 0.4807, "step": 3182 }, { "epoch": 0.1993767519065441, "grad_norm": 0.8894327380089813, "learning_rate": 9.266311671070204e-06, "loss": 0.4607, "step": 3183 }, { "epoch": 0.19943938990588642, "grad_norm": 0.8876185127534177, "learning_rate": 9.26578259271505e-06, "loss": 0.3958, "step": 3184 }, { "epoch": 0.1995020279052287, "grad_norm": 0.8875544257289466, "learning_rate": 9.26525333877949e-06, "loss": 0.4507, "step": 3185 }, { "epoch": 0.199564665904571, "grad_norm": 0.6949227126025407, "learning_rate": 9.264723909285311e-06, "loss": 0.4957, "step": 3186 }, { "epoch": 0.1996273039039133, "grad_norm": 0.8929264250545792, "learning_rate": 9.264194304254306e-06, "loss": 0.3702, "step": 3187 }, { "epoch": 0.19968994190325562, "grad_norm": 0.7597806520684258, "learning_rate": 9.263664523708273e-06, "loss": 0.4836, "step": 3188 }, { "epoch": 0.1997525799025979, "grad_norm": 0.8566010782520077, "learning_rate": 9.263134567669014e-06, "loss": 0.4194, "step": 3189 }, { "epoch": 0.1998152179019402, "grad_norm": 0.8118780445845543, "learning_rate": 9.262604436158347e-06, "loss": 0.4363, "step": 3190 }, { "epoch": 0.19987785590128251, "grad_norm": 0.8042127339111369, "learning_rate": 9.262074129198089e-06, "loss": 0.4308, "step": 3191 }, { "epoch": 0.19994049390062482, "grad_norm": 0.8253447963679993, "learning_rate": 9.26154364681007e-06, "loss": 0.4072, "step": 3192 }, { "epoch": 0.2000031318999671, "grad_norm": 0.8913013118748445, "learning_rate": 9.261012989016123e-06, "loss": 0.4284, "step": 3193 }, { "epoch": 0.2000657698993094, "grad_norm": 0.8176330209204661, "learning_rate": 9.26048215583809e-06, "loss": 0.3742, "step": 3194 }, { "epoch": 0.20012840789865172, "grad_norm": 0.8677222756749469, "learning_rate": 9.25995114729782e-06, "loss": 0.4035, "step": 3195 }, { "epoch": 0.20019104589799402, "grad_norm": 0.8287920022063134, "learning_rate": 9.259419963417172e-06, "loss": 0.4721, "step": 3196 }, { "epoch": 0.20025368389733633, "grad_norm": 0.9025430016251274, "learning_rate": 9.258888604218006e-06, "loss": 0.4444, "step": 3197 }, { "epoch": 0.2003163218966786, "grad_norm": 0.9165437458903829, "learning_rate": 9.258357069722196e-06, "loss": 0.4452, "step": 3198 }, { "epoch": 0.20037895989602092, "grad_norm": 0.8302601797190812, "learning_rate": 9.257825359951618e-06, "loss": 0.4166, "step": 3199 }, { "epoch": 0.20044159789536323, "grad_norm": 0.96728417349147, "learning_rate": 9.257293474928158e-06, "loss": 0.4405, "step": 3200 }, { "epoch": 0.20050423589470553, "grad_norm": 0.8672100633651084, "learning_rate": 9.256761414673708e-06, "loss": 0.468, "step": 3201 }, { "epoch": 0.20056687389404781, "grad_norm": 0.9579957403327793, "learning_rate": 9.256229179210168e-06, "loss": 0.4285, "step": 3202 }, { "epoch": 0.20062951189339012, "grad_norm": 0.8303615479066816, "learning_rate": 9.255696768559445e-06, "loss": 0.4172, "step": 3203 }, { "epoch": 0.20069214989273243, "grad_norm": 0.8309464596197392, "learning_rate": 9.255164182743454e-06, "loss": 0.4288, "step": 3204 }, { "epoch": 0.20075478789207474, "grad_norm": 0.8561357556257311, "learning_rate": 9.254631421784114e-06, "loss": 0.461, "step": 3205 }, { "epoch": 0.20081742589141702, "grad_norm": 0.8769505334600178, "learning_rate": 9.254098485703355e-06, "loss": 0.4626, "step": 3206 }, { "epoch": 0.20088006389075933, "grad_norm": 0.9284051091790393, "learning_rate": 9.253565374523115e-06, "loss": 0.4525, "step": 3207 }, { "epoch": 0.20094270189010163, "grad_norm": 0.8464030756434526, "learning_rate": 9.253032088265333e-06, "loss": 0.3803, "step": 3208 }, { "epoch": 0.20100533988944394, "grad_norm": 0.8410659166290186, "learning_rate": 9.25249862695196e-06, "loss": 0.4142, "step": 3209 }, { "epoch": 0.20106797788878622, "grad_norm": 0.8636949289835598, "learning_rate": 9.251964990604956e-06, "loss": 0.5105, "step": 3210 }, { "epoch": 0.20113061588812853, "grad_norm": 0.86710828236121, "learning_rate": 9.251431179246284e-06, "loss": 0.4, "step": 3211 }, { "epoch": 0.20119325388747084, "grad_norm": 0.8523340207468089, "learning_rate": 9.250897192897915e-06, "loss": 0.3634, "step": 3212 }, { "epoch": 0.20125589188681314, "grad_norm": 0.807578275802673, "learning_rate": 9.250363031581828e-06, "loss": 0.4277, "step": 3213 }, { "epoch": 0.20131852988615542, "grad_norm": 0.8571194887649893, "learning_rate": 9.24982869532001e-06, "loss": 0.4046, "step": 3214 }, { "epoch": 0.20138116788549773, "grad_norm": 0.7626605227259577, "learning_rate": 9.249294184134453e-06, "loss": 0.397, "step": 3215 }, { "epoch": 0.20144380588484004, "grad_norm": 0.8355847717459856, "learning_rate": 9.24875949804716e-06, "loss": 0.3957, "step": 3216 }, { "epoch": 0.20150644388418235, "grad_norm": 0.879600757167357, "learning_rate": 9.248224637080137e-06, "loss": 0.4268, "step": 3217 }, { "epoch": 0.20156908188352465, "grad_norm": 0.9111939623188466, "learning_rate": 9.2476896012554e-06, "loss": 0.4807, "step": 3218 }, { "epoch": 0.20163171988286693, "grad_norm": 0.8109071899228582, "learning_rate": 9.24715439059497e-06, "loss": 0.4128, "step": 3219 }, { "epoch": 0.20169435788220924, "grad_norm": 0.9227355069528325, "learning_rate": 9.246619005120876e-06, "loss": 0.4712, "step": 3220 }, { "epoch": 0.20175699588155155, "grad_norm": 0.9238201195393845, "learning_rate": 9.246083444855158e-06, "loss": 0.5004, "step": 3221 }, { "epoch": 0.20181963388089386, "grad_norm": 0.8242325885402408, "learning_rate": 9.245547709819856e-06, "loss": 0.4306, "step": 3222 }, { "epoch": 0.20188227188023614, "grad_norm": 0.8711866329206238, "learning_rate": 9.245011800037022e-06, "loss": 0.431, "step": 3223 }, { "epoch": 0.20194490987957844, "grad_norm": 0.8334503514142803, "learning_rate": 9.244475715528715e-06, "loss": 0.4013, "step": 3224 }, { "epoch": 0.20200754787892075, "grad_norm": 0.8202788861741351, "learning_rate": 9.243939456317e-06, "loss": 0.4334, "step": 3225 }, { "epoch": 0.20207018587826306, "grad_norm": 0.8629029070680639, "learning_rate": 9.243403022423947e-06, "loss": 0.4303, "step": 3226 }, { "epoch": 0.20213282387760534, "grad_norm": 0.7871267553124784, "learning_rate": 9.242866413871641e-06, "loss": 0.5018, "step": 3227 }, { "epoch": 0.20219546187694765, "grad_norm": 0.8930075060036754, "learning_rate": 9.242329630682163e-06, "loss": 0.4582, "step": 3228 }, { "epoch": 0.20225809987628995, "grad_norm": 0.8623281922507666, "learning_rate": 9.241792672877612e-06, "loss": 0.4269, "step": 3229 }, { "epoch": 0.20232073787563226, "grad_norm": 0.7790640768760101, "learning_rate": 9.241255540480088e-06, "loss": 0.4177, "step": 3230 }, { "epoch": 0.20238337587497454, "grad_norm": 0.9752679352726026, "learning_rate": 9.240718233511697e-06, "loss": 0.4704, "step": 3231 }, { "epoch": 0.20244601387431685, "grad_norm": 0.8225161919442645, "learning_rate": 9.240180751994557e-06, "loss": 0.4115, "step": 3232 }, { "epoch": 0.20250865187365916, "grad_norm": 0.8606692760182363, "learning_rate": 9.239643095950789e-06, "loss": 0.423, "step": 3233 }, { "epoch": 0.20257128987300146, "grad_norm": 0.8647871189373134, "learning_rate": 9.239105265402525e-06, "loss": 0.4176, "step": 3234 }, { "epoch": 0.20263392787234374, "grad_norm": 0.8899592350668667, "learning_rate": 9.238567260371902e-06, "loss": 0.4147, "step": 3235 }, { "epoch": 0.20269656587168605, "grad_norm": 0.8719673353782664, "learning_rate": 9.238029080881063e-06, "loss": 0.4457, "step": 3236 }, { "epoch": 0.20275920387102836, "grad_norm": 0.8459635230956709, "learning_rate": 9.237490726952161e-06, "loss": 0.4106, "step": 3237 }, { "epoch": 0.20282184187037067, "grad_norm": 0.8488295857027797, "learning_rate": 9.236952198607356e-06, "loss": 0.4745, "step": 3238 }, { "epoch": 0.20288447986971297, "grad_norm": 0.9335063355701069, "learning_rate": 9.23641349586881e-06, "loss": 0.4474, "step": 3239 }, { "epoch": 0.20294711786905525, "grad_norm": 0.8168319726807147, "learning_rate": 9.235874618758698e-06, "loss": 0.3928, "step": 3240 }, { "epoch": 0.20300975586839756, "grad_norm": 0.80422984559777, "learning_rate": 9.235335567299202e-06, "loss": 0.4371, "step": 3241 }, { "epoch": 0.20307239386773987, "grad_norm": 0.7659351550655774, "learning_rate": 9.234796341512506e-06, "loss": 0.3891, "step": 3242 }, { "epoch": 0.20313503186708218, "grad_norm": 0.8406051253424243, "learning_rate": 9.234256941420807e-06, "loss": 0.4175, "step": 3243 }, { "epoch": 0.20319766986642446, "grad_norm": 0.8844273279916403, "learning_rate": 9.233717367046308e-06, "loss": 0.4783, "step": 3244 }, { "epoch": 0.20326030786576677, "grad_norm": 0.8526448568375605, "learning_rate": 9.233177618411215e-06, "loss": 0.417, "step": 3245 }, { "epoch": 0.20332294586510907, "grad_norm": 0.7946169068535859, "learning_rate": 9.232637695537745e-06, "loss": 0.4299, "step": 3246 }, { "epoch": 0.20338558386445138, "grad_norm": 0.9540225842460709, "learning_rate": 9.232097598448123e-06, "loss": 0.457, "step": 3247 }, { "epoch": 0.20344822186379366, "grad_norm": 0.8606512705446555, "learning_rate": 9.231557327164577e-06, "loss": 0.4391, "step": 3248 }, { "epoch": 0.20351085986313597, "grad_norm": 0.8171470401099932, "learning_rate": 9.231016881709349e-06, "loss": 0.3958, "step": 3249 }, { "epoch": 0.20357349786247828, "grad_norm": 0.8559540311928551, "learning_rate": 9.230476262104678e-06, "loss": 0.4844, "step": 3250 }, { "epoch": 0.20363613586182058, "grad_norm": 0.8252453001044721, "learning_rate": 9.22993546837282e-06, "loss": 0.413, "step": 3251 }, { "epoch": 0.20369877386116286, "grad_norm": 0.9498664625832934, "learning_rate": 9.229394500536031e-06, "loss": 0.4814, "step": 3252 }, { "epoch": 0.20376141186050517, "grad_norm": 0.8522777405365718, "learning_rate": 9.228853358616583e-06, "loss": 0.4238, "step": 3253 }, { "epoch": 0.20382404985984748, "grad_norm": 0.838087611597928, "learning_rate": 9.228312042636742e-06, "loss": 0.3963, "step": 3254 }, { "epoch": 0.20388668785918979, "grad_norm": 0.786745448684298, "learning_rate": 9.227770552618796e-06, "loss": 0.4205, "step": 3255 }, { "epoch": 0.20394932585853207, "grad_norm": 0.8465129928236776, "learning_rate": 9.227228888585026e-06, "loss": 0.5101, "step": 3256 }, { "epoch": 0.20401196385787437, "grad_norm": 0.8491993551880797, "learning_rate": 9.226687050557733e-06, "loss": 0.4435, "step": 3257 }, { "epoch": 0.20407460185721668, "grad_norm": 0.8986361216456349, "learning_rate": 9.226145038559216e-06, "loss": 0.4338, "step": 3258 }, { "epoch": 0.204137239856559, "grad_norm": 0.9148932098197372, "learning_rate": 9.225602852611783e-06, "loss": 0.4312, "step": 3259 }, { "epoch": 0.2041998778559013, "grad_norm": 0.7926063305066009, "learning_rate": 9.225060492737754e-06, "loss": 0.4331, "step": 3260 }, { "epoch": 0.20426251585524358, "grad_norm": 0.8215270707517172, "learning_rate": 9.224517958959452e-06, "loss": 0.448, "step": 3261 }, { "epoch": 0.20432515385458588, "grad_norm": 0.8474693878494831, "learning_rate": 9.223975251299204e-06, "loss": 0.4412, "step": 3262 }, { "epoch": 0.2043877918539282, "grad_norm": 0.8880981535723546, "learning_rate": 9.223432369779353e-06, "loss": 0.4665, "step": 3263 }, { "epoch": 0.2044504298532705, "grad_norm": 0.8453393922475091, "learning_rate": 9.22288931442224e-06, "loss": 0.3856, "step": 3264 }, { "epoch": 0.20451306785261278, "grad_norm": 0.8093588130340836, "learning_rate": 9.22234608525022e-06, "loss": 0.4084, "step": 3265 }, { "epoch": 0.2045757058519551, "grad_norm": 0.8433584698680905, "learning_rate": 9.22180268228565e-06, "loss": 0.4576, "step": 3266 }, { "epoch": 0.2046383438512974, "grad_norm": 0.8801963038246357, "learning_rate": 9.221259105550899e-06, "loss": 0.4288, "step": 3267 }, { "epoch": 0.2047009818506397, "grad_norm": 0.8521240231599905, "learning_rate": 9.22071535506834e-06, "loss": 0.4275, "step": 3268 }, { "epoch": 0.20476361984998198, "grad_norm": 0.8885123942630785, "learning_rate": 9.220171430860354e-06, "loss": 0.4712, "step": 3269 }, { "epoch": 0.2048262578493243, "grad_norm": 0.8883169945183278, "learning_rate": 9.219627332949329e-06, "loss": 0.4407, "step": 3270 }, { "epoch": 0.2048888958486666, "grad_norm": 0.841542583321638, "learning_rate": 9.219083061357657e-06, "loss": 0.4266, "step": 3271 }, { "epoch": 0.2049515338480089, "grad_norm": 0.8823332386938695, "learning_rate": 9.218538616107745e-06, "loss": 0.4497, "step": 3272 }, { "epoch": 0.20501417184735118, "grad_norm": 0.8038784409286737, "learning_rate": 9.217993997222e-06, "loss": 0.435, "step": 3273 }, { "epoch": 0.2050768098466935, "grad_norm": 0.9349981274869598, "learning_rate": 9.217449204722839e-06, "loss": 0.4378, "step": 3274 }, { "epoch": 0.2051394478460358, "grad_norm": 0.7841121310918725, "learning_rate": 9.216904238632687e-06, "loss": 0.3912, "step": 3275 }, { "epoch": 0.2052020858453781, "grad_norm": 0.8398200865770461, "learning_rate": 9.216359098973974e-06, "loss": 0.3856, "step": 3276 }, { "epoch": 0.2052647238447204, "grad_norm": 0.9763858015361466, "learning_rate": 9.215813785769136e-06, "loss": 0.4571, "step": 3277 }, { "epoch": 0.2053273618440627, "grad_norm": 0.868716076287844, "learning_rate": 9.215268299040622e-06, "loss": 0.433, "step": 3278 }, { "epoch": 0.205389999843405, "grad_norm": 0.9024598783588731, "learning_rate": 9.214722638810882e-06, "loss": 0.4288, "step": 3279 }, { "epoch": 0.2054526378427473, "grad_norm": 0.8869001389003607, "learning_rate": 9.214176805102375e-06, "loss": 0.5048, "step": 3280 }, { "epoch": 0.20551527584208962, "grad_norm": 0.986829687202644, "learning_rate": 9.213630797937569e-06, "loss": 0.4409, "step": 3281 }, { "epoch": 0.2055779138414319, "grad_norm": 0.7048444942591653, "learning_rate": 9.213084617338938e-06, "loss": 0.4989, "step": 3282 }, { "epoch": 0.2056405518407742, "grad_norm": 0.8741414315116655, "learning_rate": 9.212538263328961e-06, "loss": 0.439, "step": 3283 }, { "epoch": 0.2057031898401165, "grad_norm": 0.8525264120550041, "learning_rate": 9.211991735930129e-06, "loss": 0.4327, "step": 3284 }, { "epoch": 0.20576582783945882, "grad_norm": 0.924715723932997, "learning_rate": 9.211445035164935e-06, "loss": 0.4395, "step": 3285 }, { "epoch": 0.2058284658388011, "grad_norm": 0.8775756381474793, "learning_rate": 9.21089816105588e-06, "loss": 0.443, "step": 3286 }, { "epoch": 0.2058911038381434, "grad_norm": 0.8981642660236029, "learning_rate": 9.210351113625475e-06, "loss": 0.4215, "step": 3287 }, { "epoch": 0.20595374183748572, "grad_norm": 0.9184965540830196, "learning_rate": 9.209803892896238e-06, "loss": 0.4483, "step": 3288 }, { "epoch": 0.20601637983682802, "grad_norm": 0.8195331560864743, "learning_rate": 9.209256498890691e-06, "loss": 0.4412, "step": 3289 }, { "epoch": 0.2060790178361703, "grad_norm": 0.8338889371574072, "learning_rate": 9.208708931631365e-06, "loss": 0.4134, "step": 3290 }, { "epoch": 0.2061416558355126, "grad_norm": 0.9905878520585092, "learning_rate": 9.208161191140799e-06, "loss": 0.4957, "step": 3291 }, { "epoch": 0.20620429383485492, "grad_norm": 0.8642057550673428, "learning_rate": 9.207613277441536e-06, "loss": 0.4516, "step": 3292 }, { "epoch": 0.20626693183419723, "grad_norm": 0.8035659820464052, "learning_rate": 9.207065190556131e-06, "loss": 0.4857, "step": 3293 }, { "epoch": 0.2063295698335395, "grad_norm": 0.9401476363303195, "learning_rate": 9.20651693050714e-06, "loss": 0.4558, "step": 3294 }, { "epoch": 0.2063922078328818, "grad_norm": 0.9528509689532044, "learning_rate": 9.205968497317133e-06, "loss": 0.4633, "step": 3295 }, { "epoch": 0.20645484583222412, "grad_norm": 0.9759149737339148, "learning_rate": 9.205419891008682e-06, "loss": 0.4653, "step": 3296 }, { "epoch": 0.20651748383156643, "grad_norm": 0.934037209336129, "learning_rate": 9.204871111604368e-06, "loss": 0.4266, "step": 3297 }, { "epoch": 0.2065801218309087, "grad_norm": 0.975892419618638, "learning_rate": 9.204322159126777e-06, "loss": 0.4817, "step": 3298 }, { "epoch": 0.20664275983025102, "grad_norm": 0.8682236893656792, "learning_rate": 9.203773033598506e-06, "loss": 0.4429, "step": 3299 }, { "epoch": 0.20670539782959332, "grad_norm": 0.8525460811615844, "learning_rate": 9.203223735042157e-06, "loss": 0.4394, "step": 3300 }, { "epoch": 0.20676803582893563, "grad_norm": 0.8573448814381951, "learning_rate": 9.202674263480338e-06, "loss": 0.4211, "step": 3301 }, { "epoch": 0.2068306738282779, "grad_norm": 0.8449327165079669, "learning_rate": 9.202124618935669e-06, "loss": 0.3768, "step": 3302 }, { "epoch": 0.20689331182762022, "grad_norm": 0.9380842442114333, "learning_rate": 9.201574801430768e-06, "loss": 0.4225, "step": 3303 }, { "epoch": 0.20695594982696253, "grad_norm": 0.8428742377560019, "learning_rate": 9.201024810988271e-06, "loss": 0.4131, "step": 3304 }, { "epoch": 0.20701858782630483, "grad_norm": 0.90573157748622, "learning_rate": 9.20047464763081e-06, "loss": 0.4676, "step": 3305 }, { "epoch": 0.20708122582564714, "grad_norm": 0.9036935557734621, "learning_rate": 9.199924311381035e-06, "loss": 0.4496, "step": 3306 }, { "epoch": 0.20714386382498942, "grad_norm": 1.3376934777703455, "learning_rate": 9.199373802261594e-06, "loss": 0.5168, "step": 3307 }, { "epoch": 0.20720650182433173, "grad_norm": 0.8684602183382305, "learning_rate": 9.198823120295149e-06, "loss": 0.419, "step": 3308 }, { "epoch": 0.20726913982367404, "grad_norm": 0.8163476239561485, "learning_rate": 9.198272265504364e-06, "loss": 0.4247, "step": 3309 }, { "epoch": 0.20733177782301634, "grad_norm": 0.8377682146518003, "learning_rate": 9.197721237911915e-06, "loss": 0.375, "step": 3310 }, { "epoch": 0.20739441582235862, "grad_norm": 0.804596309568741, "learning_rate": 9.19717003754048e-06, "loss": 0.3952, "step": 3311 }, { "epoch": 0.20745705382170093, "grad_norm": 0.8208002583251885, "learning_rate": 9.196618664412746e-06, "loss": 0.4195, "step": 3312 }, { "epoch": 0.20751969182104324, "grad_norm": 0.8287930929769609, "learning_rate": 9.19606711855141e-06, "loss": 0.4512, "step": 3313 }, { "epoch": 0.20758232982038555, "grad_norm": 0.8652480848829999, "learning_rate": 9.195515399979174e-06, "loss": 0.4339, "step": 3314 }, { "epoch": 0.20764496781972783, "grad_norm": 0.878380307756373, "learning_rate": 9.194963508718743e-06, "loss": 0.4529, "step": 3315 }, { "epoch": 0.20770760581907013, "grad_norm": 0.862769453760316, "learning_rate": 9.194411444792836e-06, "loss": 0.4764, "step": 3316 }, { "epoch": 0.20777024381841244, "grad_norm": 0.892252823798781, "learning_rate": 9.193859208224176e-06, "loss": 0.4357, "step": 3317 }, { "epoch": 0.20783288181775475, "grad_norm": 0.8502293822839186, "learning_rate": 9.193306799035494e-06, "loss": 0.4272, "step": 3318 }, { "epoch": 0.20789551981709703, "grad_norm": 0.8407019195103553, "learning_rate": 9.192754217249523e-06, "loss": 0.4417, "step": 3319 }, { "epoch": 0.20795815781643934, "grad_norm": 0.8690700985593811, "learning_rate": 9.192201462889012e-06, "loss": 0.4968, "step": 3320 }, { "epoch": 0.20802079581578165, "grad_norm": 0.8343541475030292, "learning_rate": 9.19164853597671e-06, "loss": 0.4244, "step": 3321 }, { "epoch": 0.20808343381512395, "grad_norm": 0.8785686486060936, "learning_rate": 9.191095436535376e-06, "loss": 0.4039, "step": 3322 }, { "epoch": 0.20814607181446623, "grad_norm": 0.879600344827115, "learning_rate": 9.190542164587776e-06, "loss": 0.4128, "step": 3323 }, { "epoch": 0.20820870981380854, "grad_norm": 0.8402476629803167, "learning_rate": 9.189988720156682e-06, "loss": 0.4718, "step": 3324 }, { "epoch": 0.20827134781315085, "grad_norm": 0.7082814964233262, "learning_rate": 9.189435103264876e-06, "loss": 0.4816, "step": 3325 }, { "epoch": 0.20833398581249316, "grad_norm": 0.8883845935650176, "learning_rate": 9.188881313935143e-06, "loss": 0.4543, "step": 3326 }, { "epoch": 0.20839662381183546, "grad_norm": 0.8621083407413281, "learning_rate": 9.188327352190279e-06, "loss": 0.4551, "step": 3327 }, { "epoch": 0.20845926181117774, "grad_norm": 0.8554286413378765, "learning_rate": 9.187773218053082e-06, "loss": 0.4267, "step": 3328 }, { "epoch": 0.20852189981052005, "grad_norm": 0.8102099193001588, "learning_rate": 9.187218911546363e-06, "loss": 0.4446, "step": 3329 }, { "epoch": 0.20858453780986236, "grad_norm": 0.812868169457042, "learning_rate": 9.186664432692935e-06, "loss": 0.4057, "step": 3330 }, { "epoch": 0.20864717580920467, "grad_norm": 0.8445059810374039, "learning_rate": 9.186109781515623e-06, "loss": 0.4629, "step": 3331 }, { "epoch": 0.20870981380854695, "grad_norm": 0.9196187841604291, "learning_rate": 9.185554958037255e-06, "loss": 0.4398, "step": 3332 }, { "epoch": 0.20877245180788925, "grad_norm": 0.9281409503617892, "learning_rate": 9.184999962280667e-06, "loss": 0.4453, "step": 3333 }, { "epoch": 0.20883508980723156, "grad_norm": 0.7906767513896703, "learning_rate": 9.184444794268707e-06, "loss": 0.4824, "step": 3334 }, { "epoch": 0.20889772780657387, "grad_norm": 0.9014624503120523, "learning_rate": 9.18388945402422e-06, "loss": 0.4707, "step": 3335 }, { "epoch": 0.20896036580591615, "grad_norm": 0.7901486618255228, "learning_rate": 9.183333941570067e-06, "loss": 0.4539, "step": 3336 }, { "epoch": 0.20902300380525846, "grad_norm": 0.772541668779061, "learning_rate": 9.182778256929112e-06, "loss": 0.4208, "step": 3337 }, { "epoch": 0.20908564180460076, "grad_norm": 0.8619055389279087, "learning_rate": 9.182222400124229e-06, "loss": 0.4491, "step": 3338 }, { "epoch": 0.20914827980394307, "grad_norm": 0.8095572377089879, "learning_rate": 9.181666371178295e-06, "loss": 0.394, "step": 3339 }, { "epoch": 0.20921091780328535, "grad_norm": 0.8972102760495322, "learning_rate": 9.181110170114197e-06, "loss": 0.4437, "step": 3340 }, { "epoch": 0.20927355580262766, "grad_norm": 0.9373634758214595, "learning_rate": 9.18055379695483e-06, "loss": 0.4523, "step": 3341 }, { "epoch": 0.20933619380196997, "grad_norm": 0.8112106002338824, "learning_rate": 9.17999725172309e-06, "loss": 0.4329, "step": 3342 }, { "epoch": 0.20939883180131227, "grad_norm": 0.8979828896218737, "learning_rate": 9.179440534441887e-06, "loss": 0.4406, "step": 3343 }, { "epoch": 0.20946146980065455, "grad_norm": 0.8810433535787447, "learning_rate": 9.178883645134138e-06, "loss": 0.4744, "step": 3344 }, { "epoch": 0.20952410779999686, "grad_norm": 0.836520095203867, "learning_rate": 9.178326583822762e-06, "loss": 0.4357, "step": 3345 }, { "epoch": 0.20958674579933917, "grad_norm": 0.8520675726483377, "learning_rate": 9.177769350530688e-06, "loss": 0.4076, "step": 3346 }, { "epoch": 0.20964938379868148, "grad_norm": 0.7338945331881971, "learning_rate": 9.177211945280852e-06, "loss": 0.3886, "step": 3347 }, { "epoch": 0.20971202179802378, "grad_norm": 0.8532459415734124, "learning_rate": 9.176654368096198e-06, "loss": 0.4692, "step": 3348 }, { "epoch": 0.20977465979736606, "grad_norm": 0.8463146254488535, "learning_rate": 9.176096618999673e-06, "loss": 0.4255, "step": 3349 }, { "epoch": 0.20983729779670837, "grad_norm": 0.8256090493144704, "learning_rate": 9.175538698014237e-06, "loss": 0.4447, "step": 3350 }, { "epoch": 0.20989993579605068, "grad_norm": 0.8743625546416893, "learning_rate": 9.174980605162855e-06, "loss": 0.4125, "step": 3351 }, { "epoch": 0.209962573795393, "grad_norm": 0.8357571253111237, "learning_rate": 9.174422340468495e-06, "loss": 0.4407, "step": 3352 }, { "epoch": 0.21002521179473527, "grad_norm": 0.8138941396973759, "learning_rate": 9.173863903954135e-06, "loss": 0.4585, "step": 3353 }, { "epoch": 0.21008784979407757, "grad_norm": 0.854330156929319, "learning_rate": 9.173305295642763e-06, "loss": 0.4326, "step": 3354 }, { "epoch": 0.21015048779341988, "grad_norm": 0.9123262484525455, "learning_rate": 9.172746515557371e-06, "loss": 0.4583, "step": 3355 }, { "epoch": 0.2102131257927622, "grad_norm": 0.8136257241081035, "learning_rate": 9.172187563720958e-06, "loss": 0.4248, "step": 3356 }, { "epoch": 0.21027576379210447, "grad_norm": 0.8141656766190709, "learning_rate": 9.171628440156531e-06, "loss": 0.4422, "step": 3357 }, { "epoch": 0.21033840179144678, "grad_norm": 0.8429831484959491, "learning_rate": 9.171069144887101e-06, "loss": 0.4396, "step": 3358 }, { "epoch": 0.21040103979078909, "grad_norm": 0.9087373046967302, "learning_rate": 9.170509677935694e-06, "loss": 0.4107, "step": 3359 }, { "epoch": 0.2104636777901314, "grad_norm": 0.8009442604051205, "learning_rate": 9.169950039325331e-06, "loss": 0.4462, "step": 3360 }, { "epoch": 0.21052631578947367, "grad_norm": 0.8334550670730261, "learning_rate": 9.169390229079054e-06, "loss": 0.4115, "step": 3361 }, { "epoch": 0.21058895378881598, "grad_norm": 0.802683878193965, "learning_rate": 9.168830247219898e-06, "loss": 0.4074, "step": 3362 }, { "epoch": 0.2106515917881583, "grad_norm": 0.8820812276645772, "learning_rate": 9.168270093770918e-06, "loss": 0.4428, "step": 3363 }, { "epoch": 0.2107142297875006, "grad_norm": 0.7827213323641778, "learning_rate": 9.167709768755165e-06, "loss": 0.4162, "step": 3364 }, { "epoch": 0.21077686778684288, "grad_norm": 0.8065461297720375, "learning_rate": 9.167149272195706e-06, "loss": 0.4138, "step": 3365 }, { "epoch": 0.21083950578618518, "grad_norm": 0.8585240458389404, "learning_rate": 9.166588604115607e-06, "loss": 0.4578, "step": 3366 }, { "epoch": 0.2109021437855275, "grad_norm": 0.8132387600523127, "learning_rate": 9.166027764537951e-06, "loss": 0.4493, "step": 3367 }, { "epoch": 0.2109647817848698, "grad_norm": 0.8312605688871612, "learning_rate": 9.165466753485816e-06, "loss": 0.439, "step": 3368 }, { "epoch": 0.2110274197842121, "grad_norm": 0.8531031008912388, "learning_rate": 9.164905570982299e-06, "loss": 0.4701, "step": 3369 }, { "epoch": 0.21109005778355439, "grad_norm": 0.8580918254940981, "learning_rate": 9.164344217050494e-06, "loss": 0.4499, "step": 3370 }, { "epoch": 0.2111526957828967, "grad_norm": 0.7994863909437608, "learning_rate": 9.16378269171351e-06, "loss": 0.422, "step": 3371 }, { "epoch": 0.211215333782239, "grad_norm": 0.8427078754987986, "learning_rate": 9.163220994994455e-06, "loss": 0.402, "step": 3372 }, { "epoch": 0.2112779717815813, "grad_norm": 0.8410486643059157, "learning_rate": 9.162659126916453e-06, "loss": 0.4258, "step": 3373 }, { "epoch": 0.2113406097809236, "grad_norm": 0.6816545654416111, "learning_rate": 9.162097087502627e-06, "loss": 0.4902, "step": 3374 }, { "epoch": 0.2114032477802659, "grad_norm": 0.8111489812756513, "learning_rate": 9.161534876776114e-06, "loss": 0.415, "step": 3375 }, { "epoch": 0.2114658857796082, "grad_norm": 0.8102885315253535, "learning_rate": 9.160972494760052e-06, "loss": 0.4018, "step": 3376 }, { "epoch": 0.2115285237789505, "grad_norm": 0.9098676367842284, "learning_rate": 9.160409941477591e-06, "loss": 0.4523, "step": 3377 }, { "epoch": 0.2115911617782928, "grad_norm": 0.8730323812458546, "learning_rate": 9.159847216951882e-06, "loss": 0.4845, "step": 3378 }, { "epoch": 0.2116537997776351, "grad_norm": 0.8724320333098525, "learning_rate": 9.159284321206091e-06, "loss": 0.4708, "step": 3379 }, { "epoch": 0.2117164377769774, "grad_norm": 0.9169340995621772, "learning_rate": 9.158721254263387e-06, "loss": 0.477, "step": 3380 }, { "epoch": 0.21177907577631971, "grad_norm": 0.8337585608992336, "learning_rate": 9.158158016146942e-06, "loss": 0.3943, "step": 3381 }, { "epoch": 0.211841713775662, "grad_norm": 0.7313403140342521, "learning_rate": 9.157594606879942e-06, "loss": 0.4842, "step": 3382 }, { "epoch": 0.2119043517750043, "grad_norm": 0.8831134488675165, "learning_rate": 9.157031026485577e-06, "loss": 0.4753, "step": 3383 }, { "epoch": 0.2119669897743466, "grad_norm": 0.8362287423138655, "learning_rate": 9.156467274987042e-06, "loss": 0.4163, "step": 3384 }, { "epoch": 0.21202962777368892, "grad_norm": 0.8407491728843567, "learning_rate": 9.155903352407542e-06, "loss": 0.4053, "step": 3385 }, { "epoch": 0.2120922657730312, "grad_norm": 0.8798734287286547, "learning_rate": 9.15533925877029e-06, "loss": 0.4131, "step": 3386 }, { "epoch": 0.2121549037723735, "grad_norm": 0.9198286098846054, "learning_rate": 9.154774994098504e-06, "loss": 0.4418, "step": 3387 }, { "epoch": 0.2122175417717158, "grad_norm": 0.8261926059944933, "learning_rate": 9.154210558415407e-06, "loss": 0.4065, "step": 3388 }, { "epoch": 0.21228017977105812, "grad_norm": 0.8595585050570093, "learning_rate": 9.153645951744232e-06, "loss": 0.4509, "step": 3389 }, { "epoch": 0.21234281777040043, "grad_norm": 0.8950249323086904, "learning_rate": 9.153081174108219e-06, "loss": 0.4481, "step": 3390 }, { "epoch": 0.2124054557697427, "grad_norm": 0.7710459287174753, "learning_rate": 9.152516225530613e-06, "loss": 0.3856, "step": 3391 }, { "epoch": 0.21246809376908501, "grad_norm": 0.8487066930226548, "learning_rate": 9.151951106034668e-06, "loss": 0.3947, "step": 3392 }, { "epoch": 0.21253073176842732, "grad_norm": 0.8615629119560242, "learning_rate": 9.151385815643647e-06, "loss": 0.4332, "step": 3393 }, { "epoch": 0.21259336976776963, "grad_norm": 0.8957225434087194, "learning_rate": 9.150820354380815e-06, "loss": 0.4696, "step": 3394 }, { "epoch": 0.2126560077671119, "grad_norm": 0.7660150640114933, "learning_rate": 9.150254722269446e-06, "loss": 0.4167, "step": 3395 }, { "epoch": 0.21271864576645422, "grad_norm": 0.8930365066662044, "learning_rate": 9.149688919332822e-06, "loss": 0.392, "step": 3396 }, { "epoch": 0.21278128376579653, "grad_norm": 0.8674537154207222, "learning_rate": 9.149122945594233e-06, "loss": 0.438, "step": 3397 }, { "epoch": 0.21284392176513883, "grad_norm": 0.7604979436586703, "learning_rate": 9.148556801076974e-06, "loss": 0.3891, "step": 3398 }, { "epoch": 0.2129065597644811, "grad_norm": 0.8342086321245238, "learning_rate": 9.147990485804345e-06, "loss": 0.402, "step": 3399 }, { "epoch": 0.21296919776382342, "grad_norm": 0.8426458998916536, "learning_rate": 9.147423999799659e-06, "loss": 0.4443, "step": 3400 }, { "epoch": 0.21303183576316573, "grad_norm": 0.870501162670089, "learning_rate": 9.14685734308623e-06, "loss": 0.4013, "step": 3401 }, { "epoch": 0.21309447376250804, "grad_norm": 0.8717720572077022, "learning_rate": 9.146290515687384e-06, "loss": 0.4312, "step": 3402 }, { "epoch": 0.21315711176185032, "grad_norm": 0.8758666299979452, "learning_rate": 9.14572351762645e-06, "loss": 0.4481, "step": 3403 }, { "epoch": 0.21321974976119262, "grad_norm": 0.8267187233206539, "learning_rate": 9.145156348926767e-06, "loss": 0.427, "step": 3404 }, { "epoch": 0.21328238776053493, "grad_norm": 0.8835038883106409, "learning_rate": 9.144589009611681e-06, "loss": 0.4115, "step": 3405 }, { "epoch": 0.21334502575987724, "grad_norm": 0.8807430083547967, "learning_rate": 9.14402149970454e-06, "loss": 0.4046, "step": 3406 }, { "epoch": 0.21340766375921952, "grad_norm": 0.8671787824107875, "learning_rate": 9.143453819228706e-06, "loss": 0.4238, "step": 3407 }, { "epoch": 0.21347030175856183, "grad_norm": 0.8902092699295743, "learning_rate": 9.142885968207542e-06, "loss": 0.4296, "step": 3408 }, { "epoch": 0.21353293975790413, "grad_norm": 0.8710251170218991, "learning_rate": 9.142317946664426e-06, "loss": 0.4681, "step": 3409 }, { "epoch": 0.21359557775724644, "grad_norm": 0.8458930132793274, "learning_rate": 9.14174975462273e-06, "loss": 0.427, "step": 3410 }, { "epoch": 0.21365821575658872, "grad_norm": 0.861633021515958, "learning_rate": 9.141181392105849e-06, "loss": 0.4373, "step": 3411 }, { "epoch": 0.21372085375593103, "grad_norm": 0.8821957298733285, "learning_rate": 9.140612859137173e-06, "loss": 0.4364, "step": 3412 }, { "epoch": 0.21378349175527334, "grad_norm": 0.8493858490604816, "learning_rate": 9.140044155740102e-06, "loss": 0.4182, "step": 3413 }, { "epoch": 0.21384612975461564, "grad_norm": 0.8059782421793328, "learning_rate": 9.139475281938043e-06, "loss": 0.4341, "step": 3414 }, { "epoch": 0.21390876775395795, "grad_norm": 0.9382274459637886, "learning_rate": 9.138906237754416e-06, "loss": 0.4907, "step": 3415 }, { "epoch": 0.21397140575330023, "grad_norm": 0.8336303229672425, "learning_rate": 9.13833702321264e-06, "loss": 0.417, "step": 3416 }, { "epoch": 0.21403404375264254, "grad_norm": 1.0127760028686827, "learning_rate": 9.137767638336144e-06, "loss": 0.4474, "step": 3417 }, { "epoch": 0.21409668175198485, "grad_norm": 0.8295996979571515, "learning_rate": 9.137198083148363e-06, "loss": 0.4415, "step": 3418 }, { "epoch": 0.21415931975132715, "grad_norm": 0.8187180235820053, "learning_rate": 9.136628357672742e-06, "loss": 0.4258, "step": 3419 }, { "epoch": 0.21422195775066943, "grad_norm": 0.8707674465480058, "learning_rate": 9.136058461932729e-06, "loss": 0.4446, "step": 3420 }, { "epoch": 0.21428459575001174, "grad_norm": 0.8177326928670063, "learning_rate": 9.135488395951782e-06, "loss": 0.414, "step": 3421 }, { "epoch": 0.21434723374935405, "grad_norm": 0.9291643140778464, "learning_rate": 9.134918159753365e-06, "loss": 0.4578, "step": 3422 }, { "epoch": 0.21440987174869636, "grad_norm": 0.8431285196978607, "learning_rate": 9.134347753360951e-06, "loss": 0.4595, "step": 3423 }, { "epoch": 0.21447250974803864, "grad_norm": 0.9038861576905716, "learning_rate": 9.133777176798013e-06, "loss": 0.4235, "step": 3424 }, { "epoch": 0.21453514774738094, "grad_norm": 0.8534990208814436, "learning_rate": 9.133206430088042e-06, "loss": 0.4517, "step": 3425 }, { "epoch": 0.21459778574672325, "grad_norm": 0.8640343961304656, "learning_rate": 9.132635513254526e-06, "loss": 0.4212, "step": 3426 }, { "epoch": 0.21466042374606556, "grad_norm": 0.8837559071623254, "learning_rate": 9.132064426320964e-06, "loss": 0.4505, "step": 3427 }, { "epoch": 0.21472306174540784, "grad_norm": 0.8476600762400688, "learning_rate": 9.131493169310866e-06, "loss": 0.435, "step": 3428 }, { "epoch": 0.21478569974475015, "grad_norm": 0.7953202400025011, "learning_rate": 9.13092174224774e-06, "loss": 0.3938, "step": 3429 }, { "epoch": 0.21484833774409245, "grad_norm": 0.8536921668703055, "learning_rate": 9.13035014515511e-06, "loss": 0.4023, "step": 3430 }, { "epoch": 0.21491097574343476, "grad_norm": 0.8312522508748729, "learning_rate": 9.1297783780565e-06, "loss": 0.4085, "step": 3431 }, { "epoch": 0.21497361374277704, "grad_norm": 0.8346208927733847, "learning_rate": 9.129206440975446e-06, "loss": 0.4252, "step": 3432 }, { "epoch": 0.21503625174211935, "grad_norm": 0.9240573651414102, "learning_rate": 9.128634333935488e-06, "loss": 0.4665, "step": 3433 }, { "epoch": 0.21509888974146166, "grad_norm": 0.8722491223873804, "learning_rate": 9.128062056960175e-06, "loss": 0.4224, "step": 3434 }, { "epoch": 0.21516152774080397, "grad_norm": 0.8557705129082309, "learning_rate": 9.127489610073061e-06, "loss": 0.4455, "step": 3435 }, { "epoch": 0.21522416574014627, "grad_norm": 0.9396296021169515, "learning_rate": 9.12691699329771e-06, "loss": 0.4608, "step": 3436 }, { "epoch": 0.21528680373948855, "grad_norm": 0.8594037577770807, "learning_rate": 9.126344206657689e-06, "loss": 0.4306, "step": 3437 }, { "epoch": 0.21534944173883086, "grad_norm": 0.8457043378197814, "learning_rate": 9.125771250176572e-06, "loss": 0.4422, "step": 3438 }, { "epoch": 0.21541207973817317, "grad_norm": 0.8962233555005542, "learning_rate": 9.125198123877947e-06, "loss": 0.4551, "step": 3439 }, { "epoch": 0.21547471773751548, "grad_norm": 0.7780312415204476, "learning_rate": 9.124624827785402e-06, "loss": 0.3997, "step": 3440 }, { "epoch": 0.21553735573685776, "grad_norm": 0.9376612362411576, "learning_rate": 9.124051361922532e-06, "loss": 0.4518, "step": 3441 }, { "epoch": 0.21559999373620006, "grad_norm": 0.7554140446957337, "learning_rate": 9.123477726312943e-06, "loss": 0.422, "step": 3442 }, { "epoch": 0.21566263173554237, "grad_norm": 0.682904661201874, "learning_rate": 9.122903920980247e-06, "loss": 0.4793, "step": 3443 }, { "epoch": 0.21572526973488468, "grad_norm": 0.8542650851805832, "learning_rate": 9.122329945948059e-06, "loss": 0.3996, "step": 3444 }, { "epoch": 0.21578790773422696, "grad_norm": 0.8456624103392347, "learning_rate": 9.121755801240005e-06, "loss": 0.4354, "step": 3445 }, { "epoch": 0.21585054573356927, "grad_norm": 0.840919459859079, "learning_rate": 9.121181486879718e-06, "loss": 0.4513, "step": 3446 }, { "epoch": 0.21591318373291157, "grad_norm": 0.80663331109863, "learning_rate": 9.120607002890835e-06, "loss": 0.4202, "step": 3447 }, { "epoch": 0.21597582173225388, "grad_norm": 0.79542519780374, "learning_rate": 9.120032349297004e-06, "loss": 0.4479, "step": 3448 }, { "epoch": 0.21603845973159616, "grad_norm": 0.8111201591089079, "learning_rate": 9.119457526121876e-06, "loss": 0.4051, "step": 3449 }, { "epoch": 0.21610109773093847, "grad_norm": 0.8347062592676378, "learning_rate": 9.118882533389111e-06, "loss": 0.433, "step": 3450 }, { "epoch": 0.21616373573028078, "grad_norm": 0.8467809575940478, "learning_rate": 9.11830737112238e-06, "loss": 0.4349, "step": 3451 }, { "epoch": 0.21622637372962308, "grad_norm": 0.8027466730788757, "learning_rate": 9.117732039345348e-06, "loss": 0.4355, "step": 3452 }, { "epoch": 0.21628901172896536, "grad_norm": 0.8517242027847985, "learning_rate": 9.117156538081704e-06, "loss": 0.4204, "step": 3453 }, { "epoch": 0.21635164972830767, "grad_norm": 0.8424873794675967, "learning_rate": 9.116580867355133e-06, "loss": 0.3881, "step": 3454 }, { "epoch": 0.21641428772764998, "grad_norm": 0.8704989857756612, "learning_rate": 9.116005027189328e-06, "loss": 0.4474, "step": 3455 }, { "epoch": 0.2164769257269923, "grad_norm": 0.8030020603232793, "learning_rate": 9.115429017607992e-06, "loss": 0.4217, "step": 3456 }, { "epoch": 0.2165395637263346, "grad_norm": 0.8410652944040922, "learning_rate": 9.114852838634836e-06, "loss": 0.4435, "step": 3457 }, { "epoch": 0.21660220172567687, "grad_norm": 0.8946209317534654, "learning_rate": 9.114276490293572e-06, "loss": 0.4431, "step": 3458 }, { "epoch": 0.21666483972501918, "grad_norm": 0.8453618464522995, "learning_rate": 9.113699972607924e-06, "loss": 0.3895, "step": 3459 }, { "epoch": 0.2167274777243615, "grad_norm": 0.816534635989571, "learning_rate": 9.113123285601621e-06, "loss": 0.4081, "step": 3460 }, { "epoch": 0.2167901157237038, "grad_norm": 0.8076378981435558, "learning_rate": 9.1125464292984e-06, "loss": 0.4187, "step": 3461 }, { "epoch": 0.21685275372304608, "grad_norm": 0.9298844050294148, "learning_rate": 9.111969403722007e-06, "loss": 0.4238, "step": 3462 }, { "epoch": 0.21691539172238838, "grad_norm": 0.8132141821312064, "learning_rate": 9.111392208896187e-06, "loss": 0.4209, "step": 3463 }, { "epoch": 0.2169780297217307, "grad_norm": 0.8333801230299253, "learning_rate": 9.110814844844703e-06, "loss": 0.4486, "step": 3464 }, { "epoch": 0.217040667721073, "grad_norm": 0.9083940731014362, "learning_rate": 9.110237311591316e-06, "loss": 0.4409, "step": 3465 }, { "epoch": 0.21710330572041528, "grad_norm": 0.742053958746206, "learning_rate": 9.109659609159798e-06, "loss": 0.485, "step": 3466 }, { "epoch": 0.2171659437197576, "grad_norm": 0.8054236064154258, "learning_rate": 9.109081737573927e-06, "loss": 0.4052, "step": 3467 }, { "epoch": 0.2172285817190999, "grad_norm": 0.8442103222480151, "learning_rate": 9.108503696857491e-06, "loss": 0.4381, "step": 3468 }, { "epoch": 0.2172912197184422, "grad_norm": 0.8034897770058345, "learning_rate": 9.10792548703428e-06, "loss": 0.4086, "step": 3469 }, { "epoch": 0.21735385771778448, "grad_norm": 0.8625131783662776, "learning_rate": 9.107347108128093e-06, "loss": 0.4306, "step": 3470 }, { "epoch": 0.2174164957171268, "grad_norm": 0.8534591226171914, "learning_rate": 9.106768560162737e-06, "loss": 0.4369, "step": 3471 }, { "epoch": 0.2174791337164691, "grad_norm": 0.7169922369454246, "learning_rate": 9.106189843162023e-06, "loss": 0.4826, "step": 3472 }, { "epoch": 0.2175417717158114, "grad_norm": 0.8444900854642773, "learning_rate": 9.105610957149775e-06, "loss": 0.4386, "step": 3473 }, { "epoch": 0.21760440971515368, "grad_norm": 0.9066087267780036, "learning_rate": 9.105031902149816e-06, "loss": 0.4266, "step": 3474 }, { "epoch": 0.217667047714496, "grad_norm": 0.8088454150074316, "learning_rate": 9.104452678185985e-06, "loss": 0.4044, "step": 3475 }, { "epoch": 0.2177296857138383, "grad_norm": 0.8369484351419599, "learning_rate": 9.103873285282117e-06, "loss": 0.4466, "step": 3476 }, { "epoch": 0.2177923237131806, "grad_norm": 0.6720668789350304, "learning_rate": 9.103293723462064e-06, "loss": 0.4658, "step": 3477 }, { "epoch": 0.21785496171252292, "grad_norm": 0.8278959808920889, "learning_rate": 9.10271399274968e-06, "loss": 0.4747, "step": 3478 }, { "epoch": 0.2179175997118652, "grad_norm": 0.8807131223641256, "learning_rate": 9.102134093168827e-06, "loss": 0.4489, "step": 3479 }, { "epoch": 0.2179802377112075, "grad_norm": 0.8640073296424367, "learning_rate": 9.101554024743373e-06, "loss": 0.4022, "step": 3480 }, { "epoch": 0.2180428757105498, "grad_norm": 0.823051949220351, "learning_rate": 9.100973787497193e-06, "loss": 0.4376, "step": 3481 }, { "epoch": 0.21810551370989212, "grad_norm": 0.7331856074552733, "learning_rate": 9.10039338145417e-06, "loss": 0.3878, "step": 3482 }, { "epoch": 0.2181681517092344, "grad_norm": 0.8782470633456324, "learning_rate": 9.099812806638197e-06, "loss": 0.4036, "step": 3483 }, { "epoch": 0.2182307897085767, "grad_norm": 0.8555190935855986, "learning_rate": 9.099232063073167e-06, "loss": 0.408, "step": 3484 }, { "epoch": 0.218293427707919, "grad_norm": 0.8261633707583838, "learning_rate": 9.098651150782985e-06, "loss": 0.4606, "step": 3485 }, { "epoch": 0.21835606570726132, "grad_norm": 0.7966133572348397, "learning_rate": 9.098070069791559e-06, "loss": 0.4417, "step": 3486 }, { "epoch": 0.2184187037066036, "grad_norm": 0.7574391340404741, "learning_rate": 9.097488820122808e-06, "loss": 0.3865, "step": 3487 }, { "epoch": 0.2184813417059459, "grad_norm": 0.9170723547892927, "learning_rate": 9.096907401800658e-06, "loss": 0.4403, "step": 3488 }, { "epoch": 0.21854397970528822, "grad_norm": 0.8594080587540511, "learning_rate": 9.096325814849039e-06, "loss": 0.4296, "step": 3489 }, { "epoch": 0.21860661770463052, "grad_norm": 0.7661977675186527, "learning_rate": 9.09574405929189e-06, "loss": 0.4744, "step": 3490 }, { "epoch": 0.2186692557039728, "grad_norm": 0.8183689386478324, "learning_rate": 9.095162135153153e-06, "loss": 0.4354, "step": 3491 }, { "epoch": 0.2187318937033151, "grad_norm": 0.8428987961649365, "learning_rate": 9.094580042456784e-06, "loss": 0.4168, "step": 3492 }, { "epoch": 0.21879453170265742, "grad_norm": 0.7865785162284066, "learning_rate": 9.09399778122674e-06, "loss": 0.3909, "step": 3493 }, { "epoch": 0.21885716970199973, "grad_norm": 0.8349117376245395, "learning_rate": 9.09341535148699e-06, "loss": 0.3959, "step": 3494 }, { "epoch": 0.218919807701342, "grad_norm": 0.842257947610258, "learning_rate": 9.0928327532615e-06, "loss": 0.4185, "step": 3495 }, { "epoch": 0.21898244570068431, "grad_norm": 0.8643290731523918, "learning_rate": 9.092249986574258e-06, "loss": 0.4149, "step": 3496 }, { "epoch": 0.21904508370002662, "grad_norm": 0.8540558447650581, "learning_rate": 9.091667051449245e-06, "loss": 0.4211, "step": 3497 }, { "epoch": 0.21910772169936893, "grad_norm": 0.8759626452252091, "learning_rate": 9.091083947910455e-06, "loss": 0.422, "step": 3498 }, { "epoch": 0.21917035969871124, "grad_norm": 0.9130135007877531, "learning_rate": 9.090500675981893e-06, "loss": 0.4451, "step": 3499 }, { "epoch": 0.21923299769805352, "grad_norm": 0.8420195345887747, "learning_rate": 9.089917235687564e-06, "loss": 0.4122, "step": 3500 }, { "epoch": 0.21929563569739582, "grad_norm": 0.8485558215500045, "learning_rate": 9.08933362705148e-06, "loss": 0.4266, "step": 3501 }, { "epoch": 0.21935827369673813, "grad_norm": 0.8368509502320544, "learning_rate": 9.088749850097667e-06, "loss": 0.432, "step": 3502 }, { "epoch": 0.21942091169608044, "grad_norm": 1.1342222713436856, "learning_rate": 9.088165904850152e-06, "loss": 0.4938, "step": 3503 }, { "epoch": 0.21948354969542272, "grad_norm": 0.8719998307040342, "learning_rate": 9.087581791332968e-06, "loss": 0.4458, "step": 3504 }, { "epoch": 0.21954618769476503, "grad_norm": 0.8392854343309148, "learning_rate": 9.086997509570159e-06, "loss": 0.4251, "step": 3505 }, { "epoch": 0.21960882569410733, "grad_norm": 0.7877223605912422, "learning_rate": 9.086413059585775e-06, "loss": 0.3917, "step": 3506 }, { "epoch": 0.21967146369344964, "grad_norm": 0.8994424070247724, "learning_rate": 9.085828441403868e-06, "loss": 0.4455, "step": 3507 }, { "epoch": 0.21973410169279192, "grad_norm": 0.8583773951892993, "learning_rate": 9.085243655048507e-06, "loss": 0.4365, "step": 3508 }, { "epoch": 0.21979673969213423, "grad_norm": 0.838636095076454, "learning_rate": 9.084658700543759e-06, "loss": 0.404, "step": 3509 }, { "epoch": 0.21985937769147654, "grad_norm": 0.7854355973859403, "learning_rate": 9.0840735779137e-06, "loss": 0.452, "step": 3510 }, { "epoch": 0.21992201569081885, "grad_norm": 0.8541503412710453, "learning_rate": 9.083488287182414e-06, "loss": 0.4725, "step": 3511 }, { "epoch": 0.21998465369016112, "grad_norm": 0.8766287396866455, "learning_rate": 9.082902828373992e-06, "loss": 0.443, "step": 3512 }, { "epoch": 0.22004729168950343, "grad_norm": 0.7896997642543903, "learning_rate": 9.082317201512532e-06, "loss": 0.4657, "step": 3513 }, { "epoch": 0.22010992968884574, "grad_norm": 0.7540936268555365, "learning_rate": 9.081731406622139e-06, "loss": 0.483, "step": 3514 }, { "epoch": 0.22017256768818805, "grad_norm": 0.8387532595957249, "learning_rate": 9.081145443726922e-06, "loss": 0.3781, "step": 3515 }, { "epoch": 0.22023520568753033, "grad_norm": 0.8250549216746454, "learning_rate": 9.080559312851003e-06, "loss": 0.434, "step": 3516 }, { "epoch": 0.22029784368687264, "grad_norm": 0.8189301924777083, "learning_rate": 9.079973014018504e-06, "loss": 0.3675, "step": 3517 }, { "epoch": 0.22036048168621494, "grad_norm": 0.8081565401916514, "learning_rate": 9.079386547253559e-06, "loss": 0.4546, "step": 3518 }, { "epoch": 0.22042311968555725, "grad_norm": 0.8933957294437657, "learning_rate": 9.078799912580305e-06, "loss": 0.4165, "step": 3519 }, { "epoch": 0.22048575768489956, "grad_norm": 0.8678317704148982, "learning_rate": 9.078213110022893e-06, "loss": 0.4355, "step": 3520 }, { "epoch": 0.22054839568424184, "grad_norm": 0.8913713955263155, "learning_rate": 9.077626139605469e-06, "loss": 0.4442, "step": 3521 }, { "epoch": 0.22061103368358415, "grad_norm": 0.7965550493765535, "learning_rate": 9.077039001352198e-06, "loss": 0.364, "step": 3522 }, { "epoch": 0.22067367168292645, "grad_norm": 0.7786881766915114, "learning_rate": 9.076451695287245e-06, "loss": 0.4072, "step": 3523 }, { "epoch": 0.22073630968226876, "grad_norm": 0.8122057398501212, "learning_rate": 9.075864221434782e-06, "loss": 0.4067, "step": 3524 }, { "epoch": 0.22079894768161104, "grad_norm": 0.8262303010894142, "learning_rate": 9.075276579818993e-06, "loss": 0.4323, "step": 3525 }, { "epoch": 0.22086158568095335, "grad_norm": 0.857947800928926, "learning_rate": 9.074688770464063e-06, "loss": 0.4224, "step": 3526 }, { "epoch": 0.22092422368029566, "grad_norm": 0.8601311900350356, "learning_rate": 9.074100793394188e-06, "loss": 0.4576, "step": 3527 }, { "epoch": 0.22098686167963796, "grad_norm": 0.869531032885354, "learning_rate": 9.073512648633565e-06, "loss": 0.4519, "step": 3528 }, { "epoch": 0.22104949967898024, "grad_norm": 0.7917673211552705, "learning_rate": 9.072924336206408e-06, "loss": 0.4222, "step": 3529 }, { "epoch": 0.22111213767832255, "grad_norm": 0.8132525170543327, "learning_rate": 9.072335856136929e-06, "loss": 0.4412, "step": 3530 }, { "epoch": 0.22117477567766486, "grad_norm": 0.8183050254380189, "learning_rate": 9.07174720844935e-06, "loss": 0.4207, "step": 3531 }, { "epoch": 0.22123741367700717, "grad_norm": 0.8137215100378563, "learning_rate": 9.0711583931679e-06, "loss": 0.4394, "step": 3532 }, { "epoch": 0.22130005167634945, "grad_norm": 0.8338738408115987, "learning_rate": 9.070569410316815e-06, "loss": 0.4221, "step": 3533 }, { "epoch": 0.22136268967569175, "grad_norm": 0.8675327922517426, "learning_rate": 9.069980259920337e-06, "loss": 0.445, "step": 3534 }, { "epoch": 0.22142532767503406, "grad_norm": 0.7715128703811805, "learning_rate": 9.069390942002718e-06, "loss": 0.4032, "step": 3535 }, { "epoch": 0.22148796567437637, "grad_norm": 0.7762014601525519, "learning_rate": 9.06880145658821e-06, "loss": 0.4259, "step": 3536 }, { "epoch": 0.22155060367371865, "grad_norm": 0.8532224722321907, "learning_rate": 9.068211803701079e-06, "loss": 0.4657, "step": 3537 }, { "epoch": 0.22161324167306096, "grad_norm": 0.8792214686957324, "learning_rate": 9.067621983365597e-06, "loss": 0.4676, "step": 3538 }, { "epoch": 0.22167587967240326, "grad_norm": 0.7652470054453844, "learning_rate": 9.067031995606038e-06, "loss": 0.4346, "step": 3539 }, { "epoch": 0.22173851767174557, "grad_norm": 0.8323376523279931, "learning_rate": 9.066441840446687e-06, "loss": 0.4431, "step": 3540 }, { "epoch": 0.22180115567108785, "grad_norm": 0.9189318788021077, "learning_rate": 9.065851517911836e-06, "loss": 0.5086, "step": 3541 }, { "epoch": 0.22186379367043016, "grad_norm": 0.8732893366796302, "learning_rate": 9.065261028025782e-06, "loss": 0.4096, "step": 3542 }, { "epoch": 0.22192643166977247, "grad_norm": 0.8732373234277946, "learning_rate": 9.064670370812828e-06, "loss": 0.4619, "step": 3543 }, { "epoch": 0.22198906966911477, "grad_norm": 0.8519115218465824, "learning_rate": 9.064079546297289e-06, "loss": 0.4122, "step": 3544 }, { "epoch": 0.22205170766845708, "grad_norm": 0.8896210186449152, "learning_rate": 9.06348855450348e-06, "loss": 0.4028, "step": 3545 }, { "epoch": 0.22211434566779936, "grad_norm": 0.8770545894921812, "learning_rate": 9.062897395455728e-06, "loss": 0.3954, "step": 3546 }, { "epoch": 0.22217698366714167, "grad_norm": 0.917849304025567, "learning_rate": 9.062306069178366e-06, "loss": 0.4602, "step": 3547 }, { "epoch": 0.22223962166648398, "grad_norm": 0.8670614496144201, "learning_rate": 9.061714575695731e-06, "loss": 0.4034, "step": 3548 }, { "epoch": 0.22230225966582629, "grad_norm": 0.8334009102478156, "learning_rate": 9.061122915032172e-06, "loss": 0.434, "step": 3549 }, { "epoch": 0.22236489766516856, "grad_norm": 0.8870824317082204, "learning_rate": 9.060531087212039e-06, "loss": 0.4356, "step": 3550 }, { "epoch": 0.22242753566451087, "grad_norm": 0.8046687835409454, "learning_rate": 9.05993909225969e-06, "loss": 0.3884, "step": 3551 }, { "epoch": 0.22249017366385318, "grad_norm": 0.8805309782215427, "learning_rate": 9.0593469301995e-06, "loss": 0.4826, "step": 3552 }, { "epoch": 0.2225528116631955, "grad_norm": 0.9151182408337164, "learning_rate": 9.058754601055833e-06, "loss": 0.5034, "step": 3553 }, { "epoch": 0.22261544966253777, "grad_norm": 0.8624281356703499, "learning_rate": 9.058162104853074e-06, "loss": 0.4481, "step": 3554 }, { "epoch": 0.22267808766188008, "grad_norm": 0.8214631364137688, "learning_rate": 9.05756944161561e-06, "loss": 0.4112, "step": 3555 }, { "epoch": 0.22274072566122238, "grad_norm": 0.8769581053994999, "learning_rate": 9.056976611367833e-06, "loss": 0.4549, "step": 3556 }, { "epoch": 0.2228033636605647, "grad_norm": 0.8615884019885124, "learning_rate": 9.056383614134147e-06, "loss": 0.4164, "step": 3557 }, { "epoch": 0.22286600165990697, "grad_norm": 0.6767465379553247, "learning_rate": 9.05579044993896e-06, "loss": 0.4799, "step": 3558 }, { "epoch": 0.22292863965924928, "grad_norm": 0.9212694619652625, "learning_rate": 9.055197118806684e-06, "loss": 0.4557, "step": 3559 }, { "epoch": 0.22299127765859159, "grad_norm": 0.8131195213615494, "learning_rate": 9.054603620761742e-06, "loss": 0.4528, "step": 3560 }, { "epoch": 0.2230539156579339, "grad_norm": 0.7280578765614866, "learning_rate": 9.054009955828562e-06, "loss": 0.4892, "step": 3561 }, { "epoch": 0.22311655365727617, "grad_norm": 0.8800246093595495, "learning_rate": 9.053416124031579e-06, "loss": 0.4505, "step": 3562 }, { "epoch": 0.22317919165661848, "grad_norm": 0.8456636246968394, "learning_rate": 9.052822125395238e-06, "loss": 0.4387, "step": 3563 }, { "epoch": 0.2232418296559608, "grad_norm": 0.8141798536308668, "learning_rate": 9.052227959943986e-06, "loss": 0.3947, "step": 3564 }, { "epoch": 0.2233044676553031, "grad_norm": 0.9817574131738336, "learning_rate": 9.051633627702278e-06, "loss": 0.4268, "step": 3565 }, { "epoch": 0.2233671056546454, "grad_norm": 0.8393628019501299, "learning_rate": 9.051039128694578e-06, "loss": 0.3848, "step": 3566 }, { "epoch": 0.22342974365398768, "grad_norm": 0.8590844618084081, "learning_rate": 9.050444462945355e-06, "loss": 0.4373, "step": 3567 }, { "epoch": 0.22349238165333, "grad_norm": 0.9214391222263101, "learning_rate": 9.049849630479086e-06, "loss": 0.4208, "step": 3568 }, { "epoch": 0.2235550196526723, "grad_norm": 0.9100006291748037, "learning_rate": 9.049254631320255e-06, "loss": 0.4721, "step": 3569 }, { "epoch": 0.2236176576520146, "grad_norm": 0.8552548573373048, "learning_rate": 9.048659465493353e-06, "loss": 0.4122, "step": 3570 }, { "epoch": 0.2236802956513569, "grad_norm": 0.9282594252677706, "learning_rate": 9.048064133022874e-06, "loss": 0.4502, "step": 3571 }, { "epoch": 0.2237429336506992, "grad_norm": 0.7715374686384755, "learning_rate": 9.047468633933326e-06, "loss": 0.4001, "step": 3572 }, { "epoch": 0.2238055716500415, "grad_norm": 0.8182428274860272, "learning_rate": 9.046872968249217e-06, "loss": 0.3953, "step": 3573 }, { "epoch": 0.2238682096493838, "grad_norm": 0.8972927719198263, "learning_rate": 9.046277135995064e-06, "loss": 0.4193, "step": 3574 }, { "epoch": 0.2239308476487261, "grad_norm": 0.8232806534029271, "learning_rate": 9.045681137195394e-06, "loss": 0.4541, "step": 3575 }, { "epoch": 0.2239934856480684, "grad_norm": 0.8753724604429279, "learning_rate": 9.045084971874738e-06, "loss": 0.4725, "step": 3576 }, { "epoch": 0.2240561236474107, "grad_norm": 0.8946521396492207, "learning_rate": 9.044488640057635e-06, "loss": 0.427, "step": 3577 }, { "epoch": 0.224118761646753, "grad_norm": 0.8968621426419141, "learning_rate": 9.043892141768626e-06, "loss": 0.4484, "step": 3578 }, { "epoch": 0.2241813996460953, "grad_norm": 0.7818726911530873, "learning_rate": 9.043295477032266e-06, "loss": 0.3982, "step": 3579 }, { "epoch": 0.2242440376454376, "grad_norm": 0.8948651865936197, "learning_rate": 9.042698645873116e-06, "loss": 0.4388, "step": 3580 }, { "epoch": 0.2243066756447799, "grad_norm": 0.8154687905268586, "learning_rate": 9.042101648315737e-06, "loss": 0.3933, "step": 3581 }, { "epoch": 0.22436931364412221, "grad_norm": 0.9178305582296664, "learning_rate": 9.041504484384707e-06, "loss": 0.4444, "step": 3582 }, { "epoch": 0.2244319516434645, "grad_norm": 0.8870594082026341, "learning_rate": 9.0409071541046e-06, "loss": 0.4479, "step": 3583 }, { "epoch": 0.2244945896428068, "grad_norm": 0.8757963562963595, "learning_rate": 9.040309657500003e-06, "loss": 0.4555, "step": 3584 }, { "epoch": 0.2245572276421491, "grad_norm": 0.8546885720926737, "learning_rate": 9.039711994595513e-06, "loss": 0.467, "step": 3585 }, { "epoch": 0.22461986564149142, "grad_norm": 0.8235052791578358, "learning_rate": 9.039114165415727e-06, "loss": 0.3976, "step": 3586 }, { "epoch": 0.22468250364083373, "grad_norm": 0.8406702126396312, "learning_rate": 9.038516169985252e-06, "loss": 0.4101, "step": 3587 }, { "epoch": 0.224745141640176, "grad_norm": 0.859588675685274, "learning_rate": 9.037918008328703e-06, "loss": 0.4126, "step": 3588 }, { "epoch": 0.2248077796395183, "grad_norm": 0.8545704971318758, "learning_rate": 9.037319680470699e-06, "loss": 0.4132, "step": 3589 }, { "epoch": 0.22487041763886062, "grad_norm": 0.8583795738651745, "learning_rate": 9.036721186435867e-06, "loss": 0.4087, "step": 3590 }, { "epoch": 0.22493305563820293, "grad_norm": 0.8464809106548128, "learning_rate": 9.036122526248841e-06, "loss": 0.4238, "step": 3591 }, { "epoch": 0.2249956936375452, "grad_norm": 0.8125138545710179, "learning_rate": 9.035523699934263e-06, "loss": 0.4394, "step": 3592 }, { "epoch": 0.22505833163688752, "grad_norm": 0.8773698418755456, "learning_rate": 9.034924707516781e-06, "loss": 0.4569, "step": 3593 }, { "epoch": 0.22512096963622982, "grad_norm": 0.852973862653054, "learning_rate": 9.03432554902105e-06, "loss": 0.4678, "step": 3594 }, { "epoch": 0.22518360763557213, "grad_norm": 0.857960585047986, "learning_rate": 9.033726224471728e-06, "loss": 0.4093, "step": 3595 }, { "epoch": 0.2252462456349144, "grad_norm": 0.9377309539076115, "learning_rate": 9.033126733893489e-06, "loss": 0.4439, "step": 3596 }, { "epoch": 0.22530888363425672, "grad_norm": 0.8178875687126334, "learning_rate": 9.032527077311004e-06, "loss": 0.4359, "step": 3597 }, { "epoch": 0.22537152163359903, "grad_norm": 0.8309884363364699, "learning_rate": 9.031927254748955e-06, "loss": 0.4556, "step": 3598 }, { "epoch": 0.22543415963294133, "grad_norm": 0.8892743770215512, "learning_rate": 9.031327266232034e-06, "loss": 0.4171, "step": 3599 }, { "epoch": 0.2254967976322836, "grad_norm": 0.8141032607997513, "learning_rate": 9.030727111784932e-06, "loss": 0.4281, "step": 3600 }, { "epoch": 0.22555943563162592, "grad_norm": 0.8993256858410686, "learning_rate": 9.030126791432354e-06, "loss": 0.4488, "step": 3601 }, { "epoch": 0.22562207363096823, "grad_norm": 0.918306813247655, "learning_rate": 9.02952630519901e-06, "loss": 0.4522, "step": 3602 }, { "epoch": 0.22568471163031054, "grad_norm": 0.8367776861586605, "learning_rate": 9.028925653109615e-06, "loss": 0.4097, "step": 3603 }, { "epoch": 0.22574734962965282, "grad_norm": 0.8846374185660012, "learning_rate": 9.028324835188893e-06, "loss": 0.4178, "step": 3604 }, { "epoch": 0.22580998762899512, "grad_norm": 0.727796092650797, "learning_rate": 9.027723851461572e-06, "loss": 0.4906, "step": 3605 }, { "epoch": 0.22587262562833743, "grad_norm": 0.8237345762759789, "learning_rate": 9.02712270195239e-06, "loss": 0.4152, "step": 3606 }, { "epoch": 0.22593526362767974, "grad_norm": 0.7922443345674449, "learning_rate": 9.026521386686092e-06, "loss": 0.4123, "step": 3607 }, { "epoch": 0.22599790162702205, "grad_norm": 0.8440250170093321, "learning_rate": 9.025919905687424e-06, "loss": 0.4358, "step": 3608 }, { "epoch": 0.22606053962636433, "grad_norm": 0.89195818915605, "learning_rate": 9.025318258981148e-06, "loss": 0.4518, "step": 3609 }, { "epoch": 0.22612317762570663, "grad_norm": 0.8998229010206983, "learning_rate": 9.024716446592023e-06, "loss": 0.4057, "step": 3610 }, { "epoch": 0.22618581562504894, "grad_norm": 0.9912987269987427, "learning_rate": 9.024114468544822e-06, "loss": 0.4364, "step": 3611 }, { "epoch": 0.22624845362439125, "grad_norm": 0.8830502862595292, "learning_rate": 9.023512324864325e-06, "loss": 0.4293, "step": 3612 }, { "epoch": 0.22631109162373353, "grad_norm": 0.8593596591472704, "learning_rate": 9.022910015575312e-06, "loss": 0.4441, "step": 3613 }, { "epoch": 0.22637372962307584, "grad_norm": 0.8619290218066976, "learning_rate": 9.022307540702576e-06, "loss": 0.4657, "step": 3614 }, { "epoch": 0.22643636762241814, "grad_norm": 0.8683566535220671, "learning_rate": 9.021704900270916e-06, "loss": 0.4211, "step": 3615 }, { "epoch": 0.22649900562176045, "grad_norm": 0.7806352379146742, "learning_rate": 9.021102094305134e-06, "loss": 0.3834, "step": 3616 }, { "epoch": 0.22656164362110273, "grad_norm": 0.8695393513515355, "learning_rate": 9.020499122830045e-06, "loss": 0.4369, "step": 3617 }, { "epoch": 0.22662428162044504, "grad_norm": 0.8349845769735094, "learning_rate": 9.019895985870466e-06, "loss": 0.3852, "step": 3618 }, { "epoch": 0.22668691961978735, "grad_norm": 0.8545860122953967, "learning_rate": 9.019292683451222e-06, "loss": 0.47, "step": 3619 }, { "epoch": 0.22674955761912965, "grad_norm": 0.7814150720087838, "learning_rate": 9.018689215597145e-06, "loss": 0.4153, "step": 3620 }, { "epoch": 0.22681219561847193, "grad_norm": 0.9190723226545144, "learning_rate": 9.018085582333074e-06, "loss": 0.4441, "step": 3621 }, { "epoch": 0.22687483361781424, "grad_norm": 0.8217340878291491, "learning_rate": 9.017481783683854e-06, "loss": 0.4484, "step": 3622 }, { "epoch": 0.22693747161715655, "grad_norm": 0.8241610497968725, "learning_rate": 9.016877819674337e-06, "loss": 0.4368, "step": 3623 }, { "epoch": 0.22700010961649886, "grad_norm": 0.847841036551124, "learning_rate": 9.016273690329385e-06, "loss": 0.446, "step": 3624 }, { "epoch": 0.22706274761584114, "grad_norm": 0.8229904034180477, "learning_rate": 9.015669395673863e-06, "loss": 0.4417, "step": 3625 }, { "epoch": 0.22712538561518344, "grad_norm": 0.8548250159269043, "learning_rate": 9.015064935732643e-06, "loss": 0.434, "step": 3626 }, { "epoch": 0.22718802361452575, "grad_norm": 0.8159545581228318, "learning_rate": 9.014460310530604e-06, "loss": 0.3945, "step": 3627 }, { "epoch": 0.22725066161386806, "grad_norm": 0.8727113804774039, "learning_rate": 9.013855520092635e-06, "loss": 0.4566, "step": 3628 }, { "epoch": 0.22731329961321037, "grad_norm": 0.8353332483878763, "learning_rate": 9.013250564443627e-06, "loss": 0.4394, "step": 3629 }, { "epoch": 0.22737593761255265, "grad_norm": 0.8624163550950759, "learning_rate": 9.01264544360848e-06, "loss": 0.461, "step": 3630 }, { "epoch": 0.22743857561189496, "grad_norm": 0.8486252892448428, "learning_rate": 9.012040157612103e-06, "loss": 0.3931, "step": 3631 }, { "epoch": 0.22750121361123726, "grad_norm": 0.8179914168690471, "learning_rate": 9.011434706479406e-06, "loss": 0.377, "step": 3632 }, { "epoch": 0.22756385161057957, "grad_norm": 0.8321579826468115, "learning_rate": 9.010829090235316e-06, "loss": 0.3845, "step": 3633 }, { "epoch": 0.22762648960992185, "grad_norm": 0.7961289461798637, "learning_rate": 9.010223308904754e-06, "loss": 0.3939, "step": 3634 }, { "epoch": 0.22768912760926416, "grad_norm": 0.8432789609437337, "learning_rate": 9.009617362512657e-06, "loss": 0.4285, "step": 3635 }, { "epoch": 0.22775176560860647, "grad_norm": 0.8548105518309141, "learning_rate": 9.009011251083964e-06, "loss": 0.4404, "step": 3636 }, { "epoch": 0.22781440360794877, "grad_norm": 0.891173280719501, "learning_rate": 9.008404974643625e-06, "loss": 0.4701, "step": 3637 }, { "epoch": 0.22787704160729105, "grad_norm": 0.8529117588625832, "learning_rate": 9.007798533216591e-06, "loss": 0.4339, "step": 3638 }, { "epoch": 0.22793967960663336, "grad_norm": 0.8525482471791082, "learning_rate": 9.007191926827828e-06, "loss": 0.4079, "step": 3639 }, { "epoch": 0.22800231760597567, "grad_norm": 0.8350564781465142, "learning_rate": 9.006585155502301e-06, "loss": 0.4063, "step": 3640 }, { "epoch": 0.22806495560531798, "grad_norm": 0.8691396553844191, "learning_rate": 9.005978219264985e-06, "loss": 0.4014, "step": 3641 }, { "epoch": 0.22812759360466026, "grad_norm": 0.8671149230432913, "learning_rate": 9.005371118140861e-06, "loss": 0.443, "step": 3642 }, { "epoch": 0.22819023160400256, "grad_norm": 0.895613361996229, "learning_rate": 9.00476385215492e-06, "loss": 0.4258, "step": 3643 }, { "epoch": 0.22825286960334487, "grad_norm": 0.8591192489273214, "learning_rate": 9.004156421332156e-06, "loss": 0.4507, "step": 3644 }, { "epoch": 0.22831550760268718, "grad_norm": 0.8512531113734823, "learning_rate": 9.003548825697567e-06, "loss": 0.4518, "step": 3645 }, { "epoch": 0.22837814560202946, "grad_norm": 0.814154493244863, "learning_rate": 9.00294106527617e-06, "loss": 0.4565, "step": 3646 }, { "epoch": 0.22844078360137177, "grad_norm": 0.8988306919368988, "learning_rate": 9.002333140092972e-06, "loss": 0.4898, "step": 3647 }, { "epoch": 0.22850342160071407, "grad_norm": 0.8543412422958783, "learning_rate": 9.001725050173e-06, "loss": 0.4379, "step": 3648 }, { "epoch": 0.22856605960005638, "grad_norm": 0.9015357731751348, "learning_rate": 9.001116795541282e-06, "loss": 0.4868, "step": 3649 }, { "epoch": 0.22862869759939866, "grad_norm": 0.8030811000272579, "learning_rate": 9.000508376222856e-06, "loss": 0.4616, "step": 3650 }, { "epoch": 0.22869133559874097, "grad_norm": 0.7979115769066775, "learning_rate": 8.99989979224276e-06, "loss": 0.4071, "step": 3651 }, { "epoch": 0.22875397359808328, "grad_norm": 0.886059446643181, "learning_rate": 8.999291043626048e-06, "loss": 0.3925, "step": 3652 }, { "epoch": 0.22881661159742558, "grad_norm": 0.8560719629244559, "learning_rate": 8.998682130397773e-06, "loss": 0.3936, "step": 3653 }, { "epoch": 0.2288792495967679, "grad_norm": 0.8103702380942743, "learning_rate": 8.998073052583001e-06, "loss": 0.4242, "step": 3654 }, { "epoch": 0.22894188759611017, "grad_norm": 0.8459386323519481, "learning_rate": 8.9974638102068e-06, "loss": 0.4471, "step": 3655 }, { "epoch": 0.22900452559545248, "grad_norm": 0.9311299842616617, "learning_rate": 8.996854403294246e-06, "loss": 0.4491, "step": 3656 }, { "epoch": 0.2290671635947948, "grad_norm": 0.7953068491551474, "learning_rate": 8.996244831870421e-06, "loss": 0.4263, "step": 3657 }, { "epoch": 0.2291298015941371, "grad_norm": 0.8209280593774693, "learning_rate": 8.99563509596042e-06, "loss": 0.4063, "step": 3658 }, { "epoch": 0.22919243959347937, "grad_norm": 0.8630643221499814, "learning_rate": 8.995025195589334e-06, "loss": 0.4336, "step": 3659 }, { "epoch": 0.22925507759282168, "grad_norm": 0.8805229550459962, "learning_rate": 8.994415130782272e-06, "loss": 0.4343, "step": 3660 }, { "epoch": 0.229317715592164, "grad_norm": 0.787656389980031, "learning_rate": 8.99380490156434e-06, "loss": 0.4129, "step": 3661 }, { "epoch": 0.2293803535915063, "grad_norm": 0.7847861060729685, "learning_rate": 8.993194507960658e-06, "loss": 0.4, "step": 3662 }, { "epoch": 0.22944299159084858, "grad_norm": 0.8631948816226814, "learning_rate": 8.992583949996349e-06, "loss": 0.4831, "step": 3663 }, { "epoch": 0.22950562959019088, "grad_norm": 0.835542580361206, "learning_rate": 8.991973227696543e-06, "loss": 0.4329, "step": 3664 }, { "epoch": 0.2295682675895332, "grad_norm": 0.9374668403490802, "learning_rate": 8.991362341086378e-06, "loss": 0.4341, "step": 3665 }, { "epoch": 0.2296309055888755, "grad_norm": 0.7640931525883586, "learning_rate": 8.990751290190999e-06, "loss": 0.4965, "step": 3666 }, { "epoch": 0.22969354358821778, "grad_norm": 0.8432421447935764, "learning_rate": 8.990140075035555e-06, "loss": 0.4429, "step": 3667 }, { "epoch": 0.2297561815875601, "grad_norm": 0.8700845524036463, "learning_rate": 8.989528695645206e-06, "loss": 0.4329, "step": 3668 }, { "epoch": 0.2298188195869024, "grad_norm": 0.9078597897264133, "learning_rate": 8.988917152045114e-06, "loss": 0.4735, "step": 3669 }, { "epoch": 0.2298814575862447, "grad_norm": 0.7924441469914381, "learning_rate": 8.988305444260453e-06, "loss": 0.4153, "step": 3670 }, { "epoch": 0.22994409558558698, "grad_norm": 0.8367374938706016, "learning_rate": 8.9876935723164e-06, "loss": 0.4118, "step": 3671 }, { "epoch": 0.2300067335849293, "grad_norm": 0.9022932997261867, "learning_rate": 8.987081536238138e-06, "loss": 0.4515, "step": 3672 }, { "epoch": 0.2300693715842716, "grad_norm": 0.8849605245440336, "learning_rate": 8.98646933605086e-06, "loss": 0.4414, "step": 3673 }, { "epoch": 0.2301320095836139, "grad_norm": 0.8391350947148194, "learning_rate": 8.985856971779765e-06, "loss": 0.4564, "step": 3674 }, { "epoch": 0.2301946475829562, "grad_norm": 0.8756518010482653, "learning_rate": 8.985244443450056e-06, "loss": 0.4508, "step": 3675 }, { "epoch": 0.2302572855822985, "grad_norm": 0.8750934179465625, "learning_rate": 8.984631751086949e-06, "loss": 0.4473, "step": 3676 }, { "epoch": 0.2303199235816408, "grad_norm": 0.7306329782285383, "learning_rate": 8.984018894715657e-06, "loss": 0.4914, "step": 3677 }, { "epoch": 0.2303825615809831, "grad_norm": 0.6600640693909249, "learning_rate": 8.983405874361408e-06, "loss": 0.4639, "step": 3678 }, { "epoch": 0.23044519958032542, "grad_norm": 0.8420698375494975, "learning_rate": 8.982792690049434e-06, "loss": 0.4087, "step": 3679 }, { "epoch": 0.2305078375796677, "grad_norm": 0.8690032093019414, "learning_rate": 8.982179341804975e-06, "loss": 0.4705, "step": 3680 }, { "epoch": 0.23057047557901, "grad_norm": 0.8790340420396889, "learning_rate": 8.981565829653274e-06, "loss": 0.4335, "step": 3681 }, { "epoch": 0.2306331135783523, "grad_norm": 0.9066763166144028, "learning_rate": 8.980952153619584e-06, "loss": 0.4362, "step": 3682 }, { "epoch": 0.23069575157769462, "grad_norm": 0.8774478353005137, "learning_rate": 8.980338313729164e-06, "loss": 0.4339, "step": 3683 }, { "epoch": 0.2307583895770369, "grad_norm": 0.8827137421941279, "learning_rate": 8.979724310007281e-06, "loss": 0.4673, "step": 3684 }, { "epoch": 0.2308210275763792, "grad_norm": 0.8005373431857583, "learning_rate": 8.979110142479208e-06, "loss": 0.42, "step": 3685 }, { "epoch": 0.23088366557572151, "grad_norm": 0.8508724699571976, "learning_rate": 8.978495811170221e-06, "loss": 0.4053, "step": 3686 }, { "epoch": 0.23094630357506382, "grad_norm": 0.8448282506290096, "learning_rate": 8.977881316105609e-06, "loss": 0.4229, "step": 3687 }, { "epoch": 0.2310089415744061, "grad_norm": 0.7681157991176513, "learning_rate": 8.977266657310664e-06, "loss": 0.4142, "step": 3688 }, { "epoch": 0.2310715795737484, "grad_norm": 0.83437527374813, "learning_rate": 8.976651834810683e-06, "loss": 0.4084, "step": 3689 }, { "epoch": 0.23113421757309072, "grad_norm": 0.8922894232275208, "learning_rate": 8.976036848630977e-06, "loss": 0.441, "step": 3690 }, { "epoch": 0.23119685557243302, "grad_norm": 0.80720800699337, "learning_rate": 8.975421698796855e-06, "loss": 0.4433, "step": 3691 }, { "epoch": 0.2312594935717753, "grad_norm": 0.8447285505251921, "learning_rate": 8.974806385333638e-06, "loss": 0.4303, "step": 3692 }, { "epoch": 0.2313221315711176, "grad_norm": 0.8223564555337609, "learning_rate": 8.974190908266654e-06, "loss": 0.4001, "step": 3693 }, { "epoch": 0.23138476957045992, "grad_norm": 0.8967267771608749, "learning_rate": 8.973575267621234e-06, "loss": 0.4383, "step": 3694 }, { "epoch": 0.23144740756980223, "grad_norm": 0.8496258839207315, "learning_rate": 8.972959463422716e-06, "loss": 0.4142, "step": 3695 }, { "epoch": 0.23151004556914453, "grad_norm": 0.9154507392469868, "learning_rate": 8.972343495696452e-06, "loss": 0.4399, "step": 3696 }, { "epoch": 0.23157268356848681, "grad_norm": 0.8650069351910815, "learning_rate": 8.97172736446779e-06, "loss": 0.4275, "step": 3697 }, { "epoch": 0.23163532156782912, "grad_norm": 0.8639368026876769, "learning_rate": 8.971111069762096e-06, "loss": 0.4555, "step": 3698 }, { "epoch": 0.23169795956717143, "grad_norm": 0.9458452056303981, "learning_rate": 8.970494611604729e-06, "loss": 0.4635, "step": 3699 }, { "epoch": 0.23176059756651374, "grad_norm": 0.8597297933808521, "learning_rate": 8.96987799002107e-06, "loss": 0.4123, "step": 3700 }, { "epoch": 0.23182323556585602, "grad_norm": 0.8148703502192166, "learning_rate": 8.969261205036494e-06, "loss": 0.4405, "step": 3701 }, { "epoch": 0.23188587356519832, "grad_norm": 0.8320595041150318, "learning_rate": 8.968644256676392e-06, "loss": 0.435, "step": 3702 }, { "epoch": 0.23194851156454063, "grad_norm": 0.909770632935105, "learning_rate": 8.968027144966153e-06, "loss": 0.5001, "step": 3703 }, { "epoch": 0.23201114956388294, "grad_norm": 0.865786128481643, "learning_rate": 8.967409869931183e-06, "loss": 0.4559, "step": 3704 }, { "epoch": 0.23207378756322522, "grad_norm": 0.8464079994323607, "learning_rate": 8.966792431596885e-06, "loss": 0.4411, "step": 3705 }, { "epoch": 0.23213642556256753, "grad_norm": 0.8609449460268267, "learning_rate": 8.966174829988677e-06, "loss": 0.4156, "step": 3706 }, { "epoch": 0.23219906356190984, "grad_norm": 0.8324236758129632, "learning_rate": 8.965557065131973e-06, "loss": 0.451, "step": 3707 }, { "epoch": 0.23226170156125214, "grad_norm": 0.8642968861077522, "learning_rate": 8.964939137052207e-06, "loss": 0.4355, "step": 3708 }, { "epoch": 0.23232433956059442, "grad_norm": 0.7932557856384131, "learning_rate": 8.964321045774808e-06, "loss": 0.4069, "step": 3709 }, { "epoch": 0.23238697755993673, "grad_norm": 0.7958166263296355, "learning_rate": 8.963702791325219e-06, "loss": 0.4134, "step": 3710 }, { "epoch": 0.23244961555927904, "grad_norm": 0.8526060383000797, "learning_rate": 8.963084373728889e-06, "loss": 0.4312, "step": 3711 }, { "epoch": 0.23251225355862135, "grad_norm": 0.9234953090126835, "learning_rate": 8.96246579301127e-06, "loss": 0.4767, "step": 3712 }, { "epoch": 0.23257489155796363, "grad_norm": 0.792676111705942, "learning_rate": 8.961847049197823e-06, "loss": 0.3909, "step": 3713 }, { "epoch": 0.23263752955730593, "grad_norm": 0.8683274822151229, "learning_rate": 8.961228142314015e-06, "loss": 0.4179, "step": 3714 }, { "epoch": 0.23270016755664824, "grad_norm": 0.8852087875172702, "learning_rate": 8.960609072385325e-06, "loss": 0.4722, "step": 3715 }, { "epoch": 0.23276280555599055, "grad_norm": 0.8221505926088775, "learning_rate": 8.959989839437227e-06, "loss": 0.4394, "step": 3716 }, { "epoch": 0.23282544355533286, "grad_norm": 0.8164151022937488, "learning_rate": 8.959370443495215e-06, "loss": 0.4793, "step": 3717 }, { "epoch": 0.23288808155467514, "grad_norm": 0.802246347088223, "learning_rate": 8.958750884584777e-06, "loss": 0.3871, "step": 3718 }, { "epoch": 0.23295071955401744, "grad_norm": 0.812899693939796, "learning_rate": 8.95813116273142e-06, "loss": 0.4181, "step": 3719 }, { "epoch": 0.23301335755335975, "grad_norm": 0.8832434904783107, "learning_rate": 8.957511277960652e-06, "loss": 0.4333, "step": 3720 }, { "epoch": 0.23307599555270206, "grad_norm": 0.8593174286802718, "learning_rate": 8.956891230297981e-06, "loss": 0.4396, "step": 3721 }, { "epoch": 0.23313863355204434, "grad_norm": 0.8530785962661518, "learning_rate": 8.956271019768934e-06, "loss": 0.4384, "step": 3722 }, { "epoch": 0.23320127155138665, "grad_norm": 0.8855744902922944, "learning_rate": 8.95565064639904e-06, "loss": 0.3881, "step": 3723 }, { "epoch": 0.23326390955072895, "grad_norm": 0.8947122930005271, "learning_rate": 8.955030110213827e-06, "loss": 0.435, "step": 3724 }, { "epoch": 0.23332654755007126, "grad_norm": 0.8109033463622566, "learning_rate": 8.954409411238844e-06, "loss": 0.4194, "step": 3725 }, { "epoch": 0.23338918554941354, "grad_norm": 0.8572174454075353, "learning_rate": 8.953788549499634e-06, "loss": 0.4779, "step": 3726 }, { "epoch": 0.23345182354875585, "grad_norm": 0.8207598573752485, "learning_rate": 8.953167525021753e-06, "loss": 0.4206, "step": 3727 }, { "epoch": 0.23351446154809816, "grad_norm": 0.8544864944702866, "learning_rate": 8.952546337830763e-06, "loss": 0.422, "step": 3728 }, { "epoch": 0.23357709954744046, "grad_norm": 0.9060684144219673, "learning_rate": 8.951924987952233e-06, "loss": 0.4227, "step": 3729 }, { "epoch": 0.23363973754678274, "grad_norm": 0.8155004566814957, "learning_rate": 8.951303475411738e-06, "loss": 0.4825, "step": 3730 }, { "epoch": 0.23370237554612505, "grad_norm": 0.856861357625736, "learning_rate": 8.950681800234857e-06, "loss": 0.4208, "step": 3731 }, { "epoch": 0.23376501354546736, "grad_norm": 0.7795471092510527, "learning_rate": 8.95005996244718e-06, "loss": 0.377, "step": 3732 }, { "epoch": 0.23382765154480967, "grad_norm": 0.8548138407839065, "learning_rate": 8.949437962074301e-06, "loss": 0.4103, "step": 3733 }, { "epoch": 0.23389028954415195, "grad_norm": 0.7934149776548439, "learning_rate": 8.948815799141824e-06, "loss": 0.4173, "step": 3734 }, { "epoch": 0.23395292754349425, "grad_norm": 0.8797667176727282, "learning_rate": 8.948193473675355e-06, "loss": 0.4423, "step": 3735 }, { "epoch": 0.23401556554283656, "grad_norm": 0.8811033603951505, "learning_rate": 8.947570985700511e-06, "loss": 0.4356, "step": 3736 }, { "epoch": 0.23407820354217887, "grad_norm": 0.8268778424750751, "learning_rate": 8.946948335242912e-06, "loss": 0.3709, "step": 3737 }, { "epoch": 0.23414084154152118, "grad_norm": 0.8273624294044409, "learning_rate": 8.946325522328186e-06, "loss": 0.4196, "step": 3738 }, { "epoch": 0.23420347954086346, "grad_norm": 0.8477235710455265, "learning_rate": 8.94570254698197e-06, "loss": 0.4288, "step": 3739 }, { "epoch": 0.23426611754020576, "grad_norm": 0.8668471997529342, "learning_rate": 8.945079409229904e-06, "loss": 0.471, "step": 3740 }, { "epoch": 0.23432875553954807, "grad_norm": 0.8363740730863963, "learning_rate": 8.944456109097637e-06, "loss": 0.4131, "step": 3741 }, { "epoch": 0.23439139353889038, "grad_norm": 0.7833328468630085, "learning_rate": 8.943832646610823e-06, "loss": 0.3788, "step": 3742 }, { "epoch": 0.23445403153823266, "grad_norm": 0.8877817020225685, "learning_rate": 8.943209021795129e-06, "loss": 0.4666, "step": 3743 }, { "epoch": 0.23451666953757497, "grad_norm": 0.8471352249358615, "learning_rate": 8.942585234676218e-06, "loss": 0.4314, "step": 3744 }, { "epoch": 0.23457930753691728, "grad_norm": 0.8736702672410306, "learning_rate": 8.941961285279767e-06, "loss": 0.4325, "step": 3745 }, { "epoch": 0.23464194553625958, "grad_norm": 0.8030600213820606, "learning_rate": 8.941337173631457e-06, "loss": 0.4334, "step": 3746 }, { "epoch": 0.23470458353560186, "grad_norm": 0.8377536715516944, "learning_rate": 8.940712899756977e-06, "loss": 0.4187, "step": 3747 }, { "epoch": 0.23476722153494417, "grad_norm": 0.9225531878474668, "learning_rate": 8.940088463682027e-06, "loss": 0.4698, "step": 3748 }, { "epoch": 0.23482985953428648, "grad_norm": 0.8791047795436393, "learning_rate": 8.9394638654323e-06, "loss": 0.4077, "step": 3749 }, { "epoch": 0.23489249753362879, "grad_norm": 0.8326946890180807, "learning_rate": 8.93883910503351e-06, "loss": 0.4817, "step": 3750 }, { "epoch": 0.23495513553297107, "grad_norm": 0.8370256712328739, "learning_rate": 8.938214182511372e-06, "loss": 0.4085, "step": 3751 }, { "epoch": 0.23501777353231337, "grad_norm": 0.8681022986897976, "learning_rate": 8.937589097891607e-06, "loss": 0.412, "step": 3752 }, { "epoch": 0.23508041153165568, "grad_norm": 0.7871696422310682, "learning_rate": 8.936963851199944e-06, "loss": 0.4241, "step": 3753 }, { "epoch": 0.235143049530998, "grad_norm": 0.8508332562874982, "learning_rate": 8.936338442462117e-06, "loss": 0.4082, "step": 3754 }, { "epoch": 0.23520568753034027, "grad_norm": 0.737942693515263, "learning_rate": 8.93571287170387e-06, "loss": 0.483, "step": 3755 }, { "epoch": 0.23526832552968258, "grad_norm": 0.832644952096058, "learning_rate": 8.93508713895095e-06, "loss": 0.4628, "step": 3756 }, { "epoch": 0.23533096352902488, "grad_norm": 0.8521944499636712, "learning_rate": 8.934461244229115e-06, "loss": 0.3606, "step": 3757 }, { "epoch": 0.2353936015283672, "grad_norm": 0.8710977827642152, "learning_rate": 8.933835187564125e-06, "loss": 0.4274, "step": 3758 }, { "epoch": 0.2354562395277095, "grad_norm": 0.8231412767388027, "learning_rate": 8.933208968981746e-06, "loss": 0.4297, "step": 3759 }, { "epoch": 0.23551887752705178, "grad_norm": 0.8216214486741641, "learning_rate": 8.932582588507757e-06, "loss": 0.3866, "step": 3760 }, { "epoch": 0.2355815155263941, "grad_norm": 0.6981204660776404, "learning_rate": 8.93195604616794e-06, "loss": 0.4872, "step": 3761 }, { "epoch": 0.2356441535257364, "grad_norm": 0.8310533193036078, "learning_rate": 8.93132934198808e-06, "loss": 0.4489, "step": 3762 }, { "epoch": 0.2357067915250787, "grad_norm": 0.9030648349247024, "learning_rate": 8.930702475993976e-06, "loss": 0.4639, "step": 3763 }, { "epoch": 0.23576942952442098, "grad_norm": 0.7436556012972492, "learning_rate": 8.930075448211429e-06, "loss": 0.4759, "step": 3764 }, { "epoch": 0.2358320675237633, "grad_norm": 0.8902968841061338, "learning_rate": 8.929448258666246e-06, "loss": 0.4121, "step": 3765 }, { "epoch": 0.2358947055231056, "grad_norm": 0.8844055947360535, "learning_rate": 8.928820907384244e-06, "loss": 0.4538, "step": 3766 }, { "epoch": 0.2359573435224479, "grad_norm": 0.7669257056824479, "learning_rate": 8.928193394391242e-06, "loss": 0.3895, "step": 3767 }, { "epoch": 0.23601998152179018, "grad_norm": 0.7291211176838776, "learning_rate": 8.927565719713074e-06, "loss": 0.4787, "step": 3768 }, { "epoch": 0.2360826195211325, "grad_norm": 0.8533683796020851, "learning_rate": 8.92693788337557e-06, "loss": 0.4365, "step": 3769 }, { "epoch": 0.2361452575204748, "grad_norm": 0.8945645650563434, "learning_rate": 8.926309885404576e-06, "loss": 0.4735, "step": 3770 }, { "epoch": 0.2362078955198171, "grad_norm": 0.9050142450603613, "learning_rate": 8.925681725825937e-06, "loss": 0.4473, "step": 3771 }, { "epoch": 0.2362705335191594, "grad_norm": 0.7328997684334048, "learning_rate": 8.92505340466551e-06, "loss": 0.4825, "step": 3772 }, { "epoch": 0.2363331715185017, "grad_norm": 0.8168763718248608, "learning_rate": 8.924424921949153e-06, "loss": 0.4328, "step": 3773 }, { "epoch": 0.236395809517844, "grad_norm": 0.8679027765215588, "learning_rate": 8.923796277702742e-06, "loss": 0.4383, "step": 3774 }, { "epoch": 0.2364584475171863, "grad_norm": 0.8159089332478857, "learning_rate": 8.923167471952146e-06, "loss": 0.3984, "step": 3775 }, { "epoch": 0.2365210855165286, "grad_norm": 0.8649249714778, "learning_rate": 8.922538504723249e-06, "loss": 0.4182, "step": 3776 }, { "epoch": 0.2365837235158709, "grad_norm": 0.853995694656672, "learning_rate": 8.92190937604194e-06, "loss": 0.4189, "step": 3777 }, { "epoch": 0.2366463615152132, "grad_norm": 0.8728484594156509, "learning_rate": 8.921280085934112e-06, "loss": 0.4516, "step": 3778 }, { "epoch": 0.2367089995145555, "grad_norm": 0.9624284579625785, "learning_rate": 8.920650634425669e-06, "loss": 0.4549, "step": 3779 }, { "epoch": 0.2367716375138978, "grad_norm": 0.8319758140391752, "learning_rate": 8.920021021542516e-06, "loss": 0.4645, "step": 3780 }, { "epoch": 0.2368342755132401, "grad_norm": 0.7819526285350792, "learning_rate": 8.919391247310571e-06, "loss": 0.4165, "step": 3781 }, { "epoch": 0.2368969135125824, "grad_norm": 0.9375142597300046, "learning_rate": 8.918761311755757e-06, "loss": 0.4708, "step": 3782 }, { "epoch": 0.23695955151192472, "grad_norm": 0.7683988973098524, "learning_rate": 8.918131214903999e-06, "loss": 0.3845, "step": 3783 }, { "epoch": 0.23702218951126702, "grad_norm": 0.8412245374335875, "learning_rate": 8.917500956781231e-06, "loss": 0.3808, "step": 3784 }, { "epoch": 0.2370848275106093, "grad_norm": 0.7821946421706883, "learning_rate": 8.916870537413398e-06, "loss": 0.4097, "step": 3785 }, { "epoch": 0.2371474655099516, "grad_norm": 0.7907454386525287, "learning_rate": 8.916239956826447e-06, "loss": 0.4065, "step": 3786 }, { "epoch": 0.23721010350929392, "grad_norm": 0.7873113190848684, "learning_rate": 8.915609215046332e-06, "loss": 0.4217, "step": 3787 }, { "epoch": 0.23727274150863623, "grad_norm": 0.8339480222097977, "learning_rate": 8.914978312099013e-06, "loss": 0.3953, "step": 3788 }, { "epoch": 0.2373353795079785, "grad_norm": 0.9096204570061515, "learning_rate": 8.914347248010465e-06, "loss": 0.458, "step": 3789 }, { "epoch": 0.2373980175073208, "grad_norm": 0.8595155959549242, "learning_rate": 8.913716022806653e-06, "loss": 0.4139, "step": 3790 }, { "epoch": 0.23746065550666312, "grad_norm": 0.7743715643824469, "learning_rate": 8.913084636513565e-06, "loss": 0.4233, "step": 3791 }, { "epoch": 0.23752329350600543, "grad_norm": 0.823921818747712, "learning_rate": 8.912453089157187e-06, "loss": 0.4237, "step": 3792 }, { "epoch": 0.2375859315053477, "grad_norm": 0.8553433689346553, "learning_rate": 8.911821380763513e-06, "loss": 0.4077, "step": 3793 }, { "epoch": 0.23764856950469002, "grad_norm": 0.8852992877950974, "learning_rate": 8.911189511358546e-06, "loss": 0.432, "step": 3794 }, { "epoch": 0.23771120750403232, "grad_norm": 0.8907109895283777, "learning_rate": 8.910557480968292e-06, "loss": 0.4385, "step": 3795 }, { "epoch": 0.23777384550337463, "grad_norm": 0.6926662343486706, "learning_rate": 8.909925289618767e-06, "loss": 0.4865, "step": 3796 }, { "epoch": 0.2378364835027169, "grad_norm": 0.9065537541015602, "learning_rate": 8.90929293733599e-06, "loss": 0.4528, "step": 3797 }, { "epoch": 0.23789912150205922, "grad_norm": 0.8664291846221461, "learning_rate": 8.90866042414599e-06, "loss": 0.4335, "step": 3798 }, { "epoch": 0.23796175950140153, "grad_norm": 0.8199647244571044, "learning_rate": 8.908027750074804e-06, "loss": 0.4253, "step": 3799 }, { "epoch": 0.23802439750074383, "grad_norm": 0.8206976811217591, "learning_rate": 8.907394915148468e-06, "loss": 0.4395, "step": 3800 }, { "epoch": 0.2380870355000861, "grad_norm": 0.8265379624953524, "learning_rate": 8.906761919393031e-06, "loss": 0.4159, "step": 3801 }, { "epoch": 0.23814967349942842, "grad_norm": 1.0872287858124354, "learning_rate": 8.906128762834551e-06, "loss": 0.4224, "step": 3802 }, { "epoch": 0.23821231149877073, "grad_norm": 0.7885165105810398, "learning_rate": 8.905495445499085e-06, "loss": 0.4607, "step": 3803 }, { "epoch": 0.23827494949811304, "grad_norm": 0.7257883668437796, "learning_rate": 8.904861967412702e-06, "loss": 0.4605, "step": 3804 }, { "epoch": 0.23833758749745534, "grad_norm": 0.8237033861063113, "learning_rate": 8.904228328601477e-06, "loss": 0.4237, "step": 3805 }, { "epoch": 0.23840022549679762, "grad_norm": 0.8121962622981318, "learning_rate": 8.903594529091486e-06, "loss": 0.4056, "step": 3806 }, { "epoch": 0.23846286349613993, "grad_norm": 0.9395083858249613, "learning_rate": 8.902960568908823e-06, "loss": 0.4705, "step": 3807 }, { "epoch": 0.23852550149548224, "grad_norm": 0.8151922642998696, "learning_rate": 8.902326448079578e-06, "loss": 0.4093, "step": 3808 }, { "epoch": 0.23858813949482455, "grad_norm": 0.7809194869351681, "learning_rate": 8.901692166629849e-06, "loss": 0.4082, "step": 3809 }, { "epoch": 0.23865077749416683, "grad_norm": 0.8914021784464983, "learning_rate": 8.901057724585748e-06, "loss": 0.4605, "step": 3810 }, { "epoch": 0.23871341549350913, "grad_norm": 0.7898755068372498, "learning_rate": 8.900423121973386e-06, "loss": 0.3815, "step": 3811 }, { "epoch": 0.23877605349285144, "grad_norm": 0.9428074098595759, "learning_rate": 8.899788358818887e-06, "loss": 0.4546, "step": 3812 }, { "epoch": 0.23883869149219375, "grad_norm": 0.8165775005709776, "learning_rate": 8.899153435148375e-06, "loss": 0.3969, "step": 3813 }, { "epoch": 0.23890132949153603, "grad_norm": 0.8885229584028427, "learning_rate": 8.898518350987982e-06, "loss": 0.4378, "step": 3814 }, { "epoch": 0.23896396749087834, "grad_norm": 0.85390972658363, "learning_rate": 8.897883106363852e-06, "loss": 0.425, "step": 3815 }, { "epoch": 0.23902660549022064, "grad_norm": 0.8061128446868445, "learning_rate": 8.897247701302127e-06, "loss": 0.4429, "step": 3816 }, { "epoch": 0.23908924348956295, "grad_norm": 0.8760738655043303, "learning_rate": 8.896612135828965e-06, "loss": 0.4088, "step": 3817 }, { "epoch": 0.23915188148890523, "grad_norm": 0.7886372460823756, "learning_rate": 8.895976409970525e-06, "loss": 0.4334, "step": 3818 }, { "epoch": 0.23921451948824754, "grad_norm": 0.8381486278530715, "learning_rate": 8.895340523752972e-06, "loss": 0.4354, "step": 3819 }, { "epoch": 0.23927715748758985, "grad_norm": 0.8351802877039732, "learning_rate": 8.89470447720248e-06, "loss": 0.4413, "step": 3820 }, { "epoch": 0.23933979548693216, "grad_norm": 0.869732731065243, "learning_rate": 8.894068270345228e-06, "loss": 0.4114, "step": 3821 }, { "epoch": 0.23940243348627444, "grad_norm": 0.8355683772359691, "learning_rate": 8.893431903207405e-06, "loss": 0.3825, "step": 3822 }, { "epoch": 0.23946507148561674, "grad_norm": 0.879670938967612, "learning_rate": 8.892795375815202e-06, "loss": 0.4482, "step": 3823 }, { "epoch": 0.23952770948495905, "grad_norm": 0.7740745987605056, "learning_rate": 8.892158688194818e-06, "loss": 0.3892, "step": 3824 }, { "epoch": 0.23959034748430136, "grad_norm": 0.7737580584148029, "learning_rate": 8.89152184037246e-06, "loss": 0.466, "step": 3825 }, { "epoch": 0.23965298548364367, "grad_norm": 0.9213460248229102, "learning_rate": 8.89088483237434e-06, "loss": 0.4933, "step": 3826 }, { "epoch": 0.23971562348298595, "grad_norm": 0.8807606404555246, "learning_rate": 8.89024766422668e-06, "loss": 0.4417, "step": 3827 }, { "epoch": 0.23977826148232825, "grad_norm": 0.8528797614810344, "learning_rate": 8.889610335955702e-06, "loss": 0.4783, "step": 3828 }, { "epoch": 0.23984089948167056, "grad_norm": 0.9273056398406895, "learning_rate": 8.888972847587643e-06, "loss": 0.448, "step": 3829 }, { "epoch": 0.23990353748101287, "grad_norm": 0.850272709066075, "learning_rate": 8.888335199148739e-06, "loss": 0.4271, "step": 3830 }, { "epoch": 0.23996617548035515, "grad_norm": 0.8031878318490221, "learning_rate": 8.887697390665236e-06, "loss": 0.4322, "step": 3831 }, { "epoch": 0.24002881347969746, "grad_norm": 0.9413499598439845, "learning_rate": 8.887059422163387e-06, "loss": 0.4602, "step": 3832 }, { "epoch": 0.24009145147903976, "grad_norm": 0.841742805368772, "learning_rate": 8.88642129366945e-06, "loss": 0.391, "step": 3833 }, { "epoch": 0.24015408947838207, "grad_norm": 0.6896283012288239, "learning_rate": 8.885783005209692e-06, "loss": 0.4876, "step": 3834 }, { "epoch": 0.24021672747772435, "grad_norm": 0.8247979890008593, "learning_rate": 8.885144556810384e-06, "loss": 0.4328, "step": 3835 }, { "epoch": 0.24027936547706666, "grad_norm": 0.7665878760302159, "learning_rate": 8.884505948497804e-06, "loss": 0.4839, "step": 3836 }, { "epoch": 0.24034200347640897, "grad_norm": 0.8594458316445888, "learning_rate": 8.883867180298238e-06, "loss": 0.4014, "step": 3837 }, { "epoch": 0.24040464147575127, "grad_norm": 0.8970851412262189, "learning_rate": 8.88322825223798e-06, "loss": 0.4351, "step": 3838 }, { "epoch": 0.24046727947509355, "grad_norm": 0.8368585322432915, "learning_rate": 8.882589164343325e-06, "loss": 0.4373, "step": 3839 }, { "epoch": 0.24052991747443586, "grad_norm": 0.8317616985592973, "learning_rate": 8.88194991664058e-06, "loss": 0.4279, "step": 3840 }, { "epoch": 0.24059255547377817, "grad_norm": 0.8669492161647292, "learning_rate": 8.881310509156056e-06, "loss": 0.4196, "step": 3841 }, { "epoch": 0.24065519347312048, "grad_norm": 0.88448827343652, "learning_rate": 8.88067094191607e-06, "loss": 0.4603, "step": 3842 }, { "epoch": 0.24071783147246276, "grad_norm": 0.8525556784536993, "learning_rate": 8.88003121494695e-06, "loss": 0.4428, "step": 3843 }, { "epoch": 0.24078046947180506, "grad_norm": 0.8698667669843686, "learning_rate": 8.879391328275022e-06, "loss": 0.4688, "step": 3844 }, { "epoch": 0.24084310747114737, "grad_norm": 0.8424058990224111, "learning_rate": 8.87875128192663e-06, "loss": 0.4361, "step": 3845 }, { "epoch": 0.24090574547048968, "grad_norm": 0.8237963731744432, "learning_rate": 8.878111075928114e-06, "loss": 0.4445, "step": 3846 }, { "epoch": 0.240968383469832, "grad_norm": 0.7522373810650228, "learning_rate": 8.877470710305825e-06, "loss": 0.4293, "step": 3847 }, { "epoch": 0.24103102146917427, "grad_norm": 0.8622563120630704, "learning_rate": 8.876830185086123e-06, "loss": 0.4454, "step": 3848 }, { "epoch": 0.24109365946851657, "grad_norm": 0.8317776958427996, "learning_rate": 8.87618950029537e-06, "loss": 0.4431, "step": 3849 }, { "epoch": 0.24115629746785888, "grad_norm": 0.8385817934868515, "learning_rate": 8.875548655959937e-06, "loss": 0.451, "step": 3850 }, { "epoch": 0.2412189354672012, "grad_norm": 0.9147669227784786, "learning_rate": 8.874907652106205e-06, "loss": 0.4573, "step": 3851 }, { "epoch": 0.24128157346654347, "grad_norm": 0.9099132211902602, "learning_rate": 8.874266488760553e-06, "loss": 0.4221, "step": 3852 }, { "epoch": 0.24134421146588578, "grad_norm": 0.8312882693080789, "learning_rate": 8.873625165949373e-06, "loss": 0.4198, "step": 3853 }, { "epoch": 0.24140684946522808, "grad_norm": 0.8428534923811094, "learning_rate": 8.872983683699064e-06, "loss": 0.3977, "step": 3854 }, { "epoch": 0.2414694874645704, "grad_norm": 0.6567939349573766, "learning_rate": 8.872342042036026e-06, "loss": 0.4666, "step": 3855 }, { "epoch": 0.24153212546391267, "grad_norm": 0.8974547084029709, "learning_rate": 8.87170024098667e-06, "loss": 0.4225, "step": 3856 }, { "epoch": 0.24159476346325498, "grad_norm": 0.8408053409769061, "learning_rate": 8.871058280577416e-06, "loss": 0.3992, "step": 3857 }, { "epoch": 0.2416574014625973, "grad_norm": 0.9040891501169723, "learning_rate": 8.870416160834683e-06, "loss": 0.4589, "step": 3858 }, { "epoch": 0.2417200394619396, "grad_norm": 0.8918791005733381, "learning_rate": 8.869773881784904e-06, "loss": 0.4386, "step": 3859 }, { "epoch": 0.24178267746128188, "grad_norm": 0.8198138321764742, "learning_rate": 8.869131443454512e-06, "loss": 0.3887, "step": 3860 }, { "epoch": 0.24184531546062418, "grad_norm": 0.9172244978384675, "learning_rate": 8.868488845869953e-06, "loss": 0.4619, "step": 3861 }, { "epoch": 0.2419079534599665, "grad_norm": 0.8073360904063039, "learning_rate": 8.867846089057675e-06, "loss": 0.4096, "step": 3862 }, { "epoch": 0.2419705914593088, "grad_norm": 0.8294842765906776, "learning_rate": 8.867203173044132e-06, "loss": 0.4333, "step": 3863 }, { "epoch": 0.24203322945865108, "grad_norm": 0.9279090155557428, "learning_rate": 8.86656009785579e-06, "loss": 0.4464, "step": 3864 }, { "epoch": 0.24209586745799339, "grad_norm": 0.8453373608632624, "learning_rate": 8.865916863519117e-06, "loss": 0.4684, "step": 3865 }, { "epoch": 0.2421585054573357, "grad_norm": 0.8891045435212308, "learning_rate": 8.865273470060588e-06, "loss": 0.4097, "step": 3866 }, { "epoch": 0.242221143456678, "grad_norm": 0.7612119776152719, "learning_rate": 8.864629917506686e-06, "loss": 0.4008, "step": 3867 }, { "epoch": 0.2422837814560203, "grad_norm": 0.8425358429077083, "learning_rate": 8.8639862058839e-06, "loss": 0.4165, "step": 3868 }, { "epoch": 0.2423464194553626, "grad_norm": 0.7205146577315554, "learning_rate": 8.863342335218722e-06, "loss": 0.4869, "step": 3869 }, { "epoch": 0.2424090574547049, "grad_norm": 0.8306622293801256, "learning_rate": 8.862698305537658e-06, "loss": 0.4315, "step": 3870 }, { "epoch": 0.2424716954540472, "grad_norm": 0.8114015418503143, "learning_rate": 8.862054116867215e-06, "loss": 0.4212, "step": 3871 }, { "epoch": 0.2425343334533895, "grad_norm": 0.9478172237007644, "learning_rate": 8.861409769233907e-06, "loss": 0.4434, "step": 3872 }, { "epoch": 0.2425969714527318, "grad_norm": 0.8143818867955306, "learning_rate": 8.860765262664257e-06, "loss": 0.4067, "step": 3873 }, { "epoch": 0.2426596094520741, "grad_norm": 0.8337477646800545, "learning_rate": 8.860120597184792e-06, "loss": 0.3998, "step": 3874 }, { "epoch": 0.2427222474514164, "grad_norm": 0.8837882693424783, "learning_rate": 8.859475772822047e-06, "loss": 0.4227, "step": 3875 }, { "epoch": 0.24278488545075871, "grad_norm": 0.8287012545766702, "learning_rate": 8.858830789602561e-06, "loss": 0.4351, "step": 3876 }, { "epoch": 0.242847523450101, "grad_norm": 0.8200347357771085, "learning_rate": 8.858185647552887e-06, "loss": 0.4495, "step": 3877 }, { "epoch": 0.2429101614494433, "grad_norm": 0.828852250148796, "learning_rate": 8.857540346699571e-06, "loss": 0.407, "step": 3878 }, { "epoch": 0.2429727994487856, "grad_norm": 0.8610270834975878, "learning_rate": 8.856894887069181e-06, "loss": 0.4395, "step": 3879 }, { "epoch": 0.24303543744812792, "grad_norm": 0.8388962460597877, "learning_rate": 8.856249268688282e-06, "loss": 0.4005, "step": 3880 }, { "epoch": 0.2430980754474702, "grad_norm": 0.783078199444167, "learning_rate": 8.855603491583447e-06, "loss": 0.4422, "step": 3881 }, { "epoch": 0.2431607134468125, "grad_norm": 0.8738444351118478, "learning_rate": 8.854957555781256e-06, "loss": 0.4254, "step": 3882 }, { "epoch": 0.2432233514461548, "grad_norm": 0.819727910904429, "learning_rate": 8.854311461308296e-06, "loss": 0.4421, "step": 3883 }, { "epoch": 0.24328598944549712, "grad_norm": 0.8080467786789121, "learning_rate": 8.853665208191163e-06, "loss": 0.4265, "step": 3884 }, { "epoch": 0.2433486274448394, "grad_norm": 0.8822159975668089, "learning_rate": 8.853018796456455e-06, "loss": 0.4599, "step": 3885 }, { "epoch": 0.2434112654441817, "grad_norm": 0.8325085342319247, "learning_rate": 8.852372226130775e-06, "loss": 0.4223, "step": 3886 }, { "epoch": 0.24347390344352401, "grad_norm": 0.8384478443931428, "learning_rate": 8.851725497240741e-06, "loss": 0.4168, "step": 3887 }, { "epoch": 0.24353654144286632, "grad_norm": 0.7911631499563913, "learning_rate": 8.85107860981297e-06, "loss": 0.497, "step": 3888 }, { "epoch": 0.2435991794422086, "grad_norm": 0.9388830742867762, "learning_rate": 8.850431563874089e-06, "loss": 0.4761, "step": 3889 }, { "epoch": 0.2436618174415509, "grad_norm": 0.7989900350442909, "learning_rate": 8.849784359450731e-06, "loss": 0.4249, "step": 3890 }, { "epoch": 0.24372445544089322, "grad_norm": 0.7632928168690947, "learning_rate": 8.849136996569534e-06, "loss": 0.3702, "step": 3891 }, { "epoch": 0.24378709344023552, "grad_norm": 0.7916218327649255, "learning_rate": 8.848489475257142e-06, "loss": 0.4327, "step": 3892 }, { "epoch": 0.24384973143957783, "grad_norm": 0.8818518745072749, "learning_rate": 8.84784179554021e-06, "loss": 0.4202, "step": 3893 }, { "epoch": 0.2439123694389201, "grad_norm": 0.8078180508906292, "learning_rate": 8.847193957445398e-06, "loss": 0.3919, "step": 3894 }, { "epoch": 0.24397500743826242, "grad_norm": 1.0134739599609854, "learning_rate": 8.846545960999364e-06, "loss": 0.384, "step": 3895 }, { "epoch": 0.24403764543760473, "grad_norm": 0.8365548186179433, "learning_rate": 8.845897806228786e-06, "loss": 0.44, "step": 3896 }, { "epoch": 0.24410028343694704, "grad_norm": 0.854168979261859, "learning_rate": 8.845249493160341e-06, "loss": 0.4857, "step": 3897 }, { "epoch": 0.24416292143628932, "grad_norm": 0.8773137580608138, "learning_rate": 8.844601021820713e-06, "loss": 0.3798, "step": 3898 }, { "epoch": 0.24422555943563162, "grad_norm": 0.7477792885677021, "learning_rate": 8.843952392236595e-06, "loss": 0.4886, "step": 3899 }, { "epoch": 0.24428819743497393, "grad_norm": 0.8225780492494403, "learning_rate": 8.843303604434682e-06, "loss": 0.4226, "step": 3900 }, { "epoch": 0.24435083543431624, "grad_norm": 0.8697849968139006, "learning_rate": 8.842654658441678e-06, "loss": 0.4135, "step": 3901 }, { "epoch": 0.24441347343365852, "grad_norm": 0.7062216234162791, "learning_rate": 8.842005554284296e-06, "loss": 0.4873, "step": 3902 }, { "epoch": 0.24447611143300083, "grad_norm": 0.8876808929294948, "learning_rate": 8.841356291989251e-06, "loss": 0.4538, "step": 3903 }, { "epoch": 0.24453874943234313, "grad_norm": 0.8615366244216608, "learning_rate": 8.84070687158327e-06, "loss": 0.4292, "step": 3904 }, { "epoch": 0.24460138743168544, "grad_norm": 0.891025666469548, "learning_rate": 8.84005729309308e-06, "loss": 0.4181, "step": 3905 }, { "epoch": 0.24466402543102772, "grad_norm": 0.8541436825031111, "learning_rate": 8.83940755654542e-06, "loss": 0.4767, "step": 3906 }, { "epoch": 0.24472666343037003, "grad_norm": 0.7656786724037616, "learning_rate": 8.838757661967033e-06, "loss": 0.3934, "step": 3907 }, { "epoch": 0.24478930142971234, "grad_norm": 0.893968216946324, "learning_rate": 8.838107609384666e-06, "loss": 0.4271, "step": 3908 }, { "epoch": 0.24485193942905464, "grad_norm": 0.7682434705703433, "learning_rate": 8.837457398825079e-06, "loss": 0.4907, "step": 3909 }, { "epoch": 0.24491457742839692, "grad_norm": 0.8020858450020409, "learning_rate": 8.836807030315032e-06, "loss": 0.4196, "step": 3910 }, { "epoch": 0.24497721542773923, "grad_norm": 0.7952655751421835, "learning_rate": 8.836156503881298e-06, "loss": 0.4125, "step": 3911 }, { "epoch": 0.24503985342708154, "grad_norm": 0.6440790070375955, "learning_rate": 8.835505819550647e-06, "loss": 0.4695, "step": 3912 }, { "epoch": 0.24510249142642385, "grad_norm": 0.6698288711652673, "learning_rate": 8.834854977349866e-06, "loss": 0.4876, "step": 3913 }, { "epoch": 0.24516512942576615, "grad_norm": 0.8294154264524439, "learning_rate": 8.834203977305741e-06, "loss": 0.3937, "step": 3914 }, { "epoch": 0.24522776742510843, "grad_norm": 0.8036901287644359, "learning_rate": 8.83355281944507e-06, "loss": 0.3912, "step": 3915 }, { "epoch": 0.24529040542445074, "grad_norm": 0.8595900797391635, "learning_rate": 8.832901503794656e-06, "loss": 0.3929, "step": 3916 }, { "epoch": 0.24535304342379305, "grad_norm": 0.828693740980089, "learning_rate": 8.8322500303813e-06, "loss": 0.4286, "step": 3917 }, { "epoch": 0.24541568142313536, "grad_norm": 0.7838308412862481, "learning_rate": 8.831598399231824e-06, "loss": 0.4285, "step": 3918 }, { "epoch": 0.24547831942247764, "grad_norm": 0.8505136004189834, "learning_rate": 8.830946610373046e-06, "loss": 0.4275, "step": 3919 }, { "epoch": 0.24554095742181994, "grad_norm": 0.8824651557035378, "learning_rate": 8.830294663831795e-06, "loss": 0.4496, "step": 3920 }, { "epoch": 0.24560359542116225, "grad_norm": 0.8057230806582947, "learning_rate": 8.829642559634905e-06, "loss": 0.3987, "step": 3921 }, { "epoch": 0.24566623342050456, "grad_norm": 0.8337911384859699, "learning_rate": 8.828990297809214e-06, "loss": 0.4294, "step": 3922 }, { "epoch": 0.24572887141984684, "grad_norm": 0.7511799382063948, "learning_rate": 8.828337878381575e-06, "loss": 0.3503, "step": 3923 }, { "epoch": 0.24579150941918915, "grad_norm": 0.8392438260652049, "learning_rate": 8.827685301378836e-06, "loss": 0.4345, "step": 3924 }, { "epoch": 0.24585414741853145, "grad_norm": 0.8301309115251826, "learning_rate": 8.82703256682786e-06, "loss": 0.4045, "step": 3925 }, { "epoch": 0.24591678541787376, "grad_norm": 0.8811628109442372, "learning_rate": 8.826379674755514e-06, "loss": 0.4426, "step": 3926 }, { "epoch": 0.24597942341721604, "grad_norm": 0.7210272937916996, "learning_rate": 8.825726625188671e-06, "loss": 0.3701, "step": 3927 }, { "epoch": 0.24604206141655835, "grad_norm": 0.797446591222875, "learning_rate": 8.825073418154209e-06, "loss": 0.4889, "step": 3928 }, { "epoch": 0.24610469941590066, "grad_norm": 0.8004556543304957, "learning_rate": 8.824420053679015e-06, "loss": 0.4024, "step": 3929 }, { "epoch": 0.24616733741524296, "grad_norm": 0.9398015098761298, "learning_rate": 8.823766531789982e-06, "loss": 0.4515, "step": 3930 }, { "epoch": 0.24622997541458524, "grad_norm": 0.84893555273298, "learning_rate": 8.823112852514007e-06, "loss": 0.3969, "step": 3931 }, { "epoch": 0.24629261341392755, "grad_norm": 0.8082357090096378, "learning_rate": 8.822459015878e-06, "loss": 0.454, "step": 3932 }, { "epoch": 0.24635525141326986, "grad_norm": 0.8149649099216556, "learning_rate": 8.82180502190887e-06, "loss": 0.3864, "step": 3933 }, { "epoch": 0.24641788941261217, "grad_norm": 0.8414220148987954, "learning_rate": 8.821150870633534e-06, "loss": 0.4383, "step": 3934 }, { "epoch": 0.24648052741195448, "grad_norm": 0.8917717845363813, "learning_rate": 8.820496562078921e-06, "loss": 0.4485, "step": 3935 }, { "epoch": 0.24654316541129676, "grad_norm": 0.8557713182163477, "learning_rate": 8.81984209627196e-06, "loss": 0.4433, "step": 3936 }, { "epoch": 0.24660580341063906, "grad_norm": 0.8605832071320939, "learning_rate": 8.81918747323959e-06, "loss": 0.4047, "step": 3937 }, { "epoch": 0.24666844140998137, "grad_norm": 0.8918633731066301, "learning_rate": 8.818532693008752e-06, "loss": 0.4309, "step": 3938 }, { "epoch": 0.24673107940932368, "grad_norm": 0.9052288434213177, "learning_rate": 8.817877755606403e-06, "loss": 0.4405, "step": 3939 }, { "epoch": 0.24679371740866596, "grad_norm": 0.8425531645817728, "learning_rate": 8.817222661059494e-06, "loss": 0.3848, "step": 3940 }, { "epoch": 0.24685635540800827, "grad_norm": 0.88802522596488, "learning_rate": 8.816567409394994e-06, "loss": 0.4767, "step": 3941 }, { "epoch": 0.24691899340735057, "grad_norm": 0.746073172058348, "learning_rate": 8.81591200063987e-06, "loss": 0.4817, "step": 3942 }, { "epoch": 0.24698163140669288, "grad_norm": 0.8839649860605019, "learning_rate": 8.8152564348211e-06, "loss": 0.4098, "step": 3943 }, { "epoch": 0.24704426940603516, "grad_norm": 0.8263046082179349, "learning_rate": 8.814600711965667e-06, "loss": 0.4048, "step": 3944 }, { "epoch": 0.24710690740537747, "grad_norm": 0.8417180032994523, "learning_rate": 8.81394483210056e-06, "loss": 0.415, "step": 3945 }, { "epoch": 0.24716954540471978, "grad_norm": 0.7769645628597653, "learning_rate": 8.813288795252778e-06, "loss": 0.3928, "step": 3946 }, { "epoch": 0.24723218340406208, "grad_norm": 0.8325042292079057, "learning_rate": 8.812632601449317e-06, "loss": 0.417, "step": 3947 }, { "epoch": 0.24729482140340436, "grad_norm": 0.8132390544068415, "learning_rate": 8.811976250717194e-06, "loss": 0.4517, "step": 3948 }, { "epoch": 0.24735745940274667, "grad_norm": 0.7449082219852897, "learning_rate": 8.811319743083421e-06, "loss": 0.4922, "step": 3949 }, { "epoch": 0.24742009740208898, "grad_norm": 0.8699520611751945, "learning_rate": 8.810663078575019e-06, "loss": 0.4217, "step": 3950 }, { "epoch": 0.2474827354014313, "grad_norm": 0.9082335699619455, "learning_rate": 8.810006257219017e-06, "loss": 0.4465, "step": 3951 }, { "epoch": 0.24754537340077357, "grad_norm": 0.9134804603508642, "learning_rate": 8.809349279042451e-06, "loss": 0.4327, "step": 3952 }, { "epoch": 0.24760801140011587, "grad_norm": 0.8338625076085205, "learning_rate": 8.808692144072362e-06, "loss": 0.4322, "step": 3953 }, { "epoch": 0.24767064939945818, "grad_norm": 0.8707962130788458, "learning_rate": 8.808034852335798e-06, "loss": 0.426, "step": 3954 }, { "epoch": 0.2477332873988005, "grad_norm": 0.8923298399780081, "learning_rate": 8.807377403859812e-06, "loss": 0.4187, "step": 3955 }, { "epoch": 0.2477959253981428, "grad_norm": 0.8489196890551522, "learning_rate": 8.806719798671464e-06, "loss": 0.4145, "step": 3956 }, { "epoch": 0.24785856339748508, "grad_norm": 0.860791437030712, "learning_rate": 8.806062036797823e-06, "loss": 0.3981, "step": 3957 }, { "epoch": 0.24792120139682738, "grad_norm": 0.7977683992983085, "learning_rate": 8.805404118265964e-06, "loss": 0.4438, "step": 3958 }, { "epoch": 0.2479838393961697, "grad_norm": 0.8874977226503682, "learning_rate": 8.804746043102964e-06, "loss": 0.427, "step": 3959 }, { "epoch": 0.248046477395512, "grad_norm": 0.8477491964339541, "learning_rate": 8.804087811335908e-06, "loss": 0.3959, "step": 3960 }, { "epoch": 0.24810911539485428, "grad_norm": 0.8516045610718661, "learning_rate": 8.803429422991895e-06, "loss": 0.4602, "step": 3961 }, { "epoch": 0.2481717533941966, "grad_norm": 0.8278297097410464, "learning_rate": 8.80277087809802e-06, "loss": 0.4298, "step": 3962 }, { "epoch": 0.2482343913935389, "grad_norm": 0.8588203406574173, "learning_rate": 8.80211217668139e-06, "loss": 0.4355, "step": 3963 }, { "epoch": 0.2482970293928812, "grad_norm": 0.8643087175432878, "learning_rate": 8.801453318769118e-06, "loss": 0.4318, "step": 3964 }, { "epoch": 0.24835966739222348, "grad_norm": 0.7685317436768062, "learning_rate": 8.80079430438832e-06, "loss": 0.4113, "step": 3965 }, { "epoch": 0.2484223053915658, "grad_norm": 0.8722506696107984, "learning_rate": 8.800135133566123e-06, "loss": 0.4224, "step": 3966 }, { "epoch": 0.2484849433909081, "grad_norm": 0.7940496521156916, "learning_rate": 8.799475806329661e-06, "loss": 0.4126, "step": 3967 }, { "epoch": 0.2485475813902504, "grad_norm": 0.7626570419214805, "learning_rate": 8.798816322706068e-06, "loss": 0.394, "step": 3968 }, { "epoch": 0.24861021938959268, "grad_norm": 0.75770252884847, "learning_rate": 8.79815668272249e-06, "loss": 0.5147, "step": 3969 }, { "epoch": 0.248672857388935, "grad_norm": 0.911203549575555, "learning_rate": 8.797496886406077e-06, "loss": 0.4127, "step": 3970 }, { "epoch": 0.2487354953882773, "grad_norm": 0.8467451446730104, "learning_rate": 8.796836933783989e-06, "loss": 0.4543, "step": 3971 }, { "epoch": 0.2487981333876196, "grad_norm": 0.7886040714026854, "learning_rate": 8.796176824883386e-06, "loss": 0.4115, "step": 3972 }, { "epoch": 0.2488607713869619, "grad_norm": 0.7834720706981753, "learning_rate": 8.795516559731443e-06, "loss": 0.4885, "step": 3973 }, { "epoch": 0.2489234093863042, "grad_norm": 0.8541896988055808, "learning_rate": 8.794856138355331e-06, "loss": 0.4252, "step": 3974 }, { "epoch": 0.2489860473856465, "grad_norm": 0.8234770796537271, "learning_rate": 8.79419556078224e-06, "loss": 0.4145, "step": 3975 }, { "epoch": 0.2490486853849888, "grad_norm": 0.8732682649350209, "learning_rate": 8.793534827039352e-06, "loss": 0.4057, "step": 3976 }, { "epoch": 0.24911132338433112, "grad_norm": 0.7907405192563969, "learning_rate": 8.792873937153868e-06, "loss": 0.4188, "step": 3977 }, { "epoch": 0.2491739613836734, "grad_norm": 0.7661231525999435, "learning_rate": 8.792212891152987e-06, "loss": 0.3806, "step": 3978 }, { "epoch": 0.2492365993830157, "grad_norm": 0.8574193404852932, "learning_rate": 8.791551689063922e-06, "loss": 0.4554, "step": 3979 }, { "epoch": 0.249299237382358, "grad_norm": 0.9210277924247867, "learning_rate": 8.790890330913884e-06, "loss": 0.4313, "step": 3980 }, { "epoch": 0.24936187538170032, "grad_norm": 0.8716864221713031, "learning_rate": 8.790228816730097e-06, "loss": 0.4004, "step": 3981 }, { "epoch": 0.2494245133810426, "grad_norm": 0.8863501967325914, "learning_rate": 8.789567146539788e-06, "loss": 0.4321, "step": 3982 }, { "epoch": 0.2494871513803849, "grad_norm": 0.9192102743791001, "learning_rate": 8.788905320370192e-06, "loss": 0.4168, "step": 3983 }, { "epoch": 0.24954978937972722, "grad_norm": 0.7888293444240405, "learning_rate": 8.78824333824855e-06, "loss": 0.3546, "step": 3984 }, { "epoch": 0.24961242737906952, "grad_norm": 0.8127054007122644, "learning_rate": 8.787581200202109e-06, "loss": 0.4249, "step": 3985 }, { "epoch": 0.2496750653784118, "grad_norm": 0.9123532451116468, "learning_rate": 8.786918906258123e-06, "loss": 0.4368, "step": 3986 }, { "epoch": 0.2497377033777541, "grad_norm": 0.8069692707048782, "learning_rate": 8.786256456443852e-06, "loss": 0.4341, "step": 3987 }, { "epoch": 0.24980034137709642, "grad_norm": 0.8260091602599452, "learning_rate": 8.785593850786562e-06, "loss": 0.4237, "step": 3988 }, { "epoch": 0.24986297937643873, "grad_norm": 0.8275867910886306, "learning_rate": 8.784931089313527e-06, "loss": 0.4308, "step": 3989 }, { "epoch": 0.249925617375781, "grad_norm": 0.8280817081755709, "learning_rate": 8.784268172052025e-06, "loss": 0.4348, "step": 3990 }, { "epoch": 0.2499882553751233, "grad_norm": 0.8525714043223429, "learning_rate": 8.783605099029343e-06, "loss": 0.3965, "step": 3991 }, { "epoch": 0.2500508933744656, "grad_norm": 0.8257929322325309, "learning_rate": 8.782941870272772e-06, "loss": 0.4339, "step": 3992 }, { "epoch": 0.25011353137380793, "grad_norm": 0.835901479929909, "learning_rate": 8.782278485809613e-06, "loss": 0.4468, "step": 3993 }, { "epoch": 0.25017616937315024, "grad_norm": 0.8730464664081355, "learning_rate": 8.78161494566717e-06, "loss": 0.4609, "step": 3994 }, { "epoch": 0.25023880737249254, "grad_norm": 0.8773300857719045, "learning_rate": 8.780951249872753e-06, "loss": 0.4115, "step": 3995 }, { "epoch": 0.2503014453718348, "grad_norm": 0.6778757188281711, "learning_rate": 8.78028739845368e-06, "loss": 0.4992, "step": 3996 }, { "epoch": 0.2503640833711771, "grad_norm": 0.8826481398221894, "learning_rate": 8.779623391437277e-06, "loss": 0.4332, "step": 3997 }, { "epoch": 0.2504267213705194, "grad_norm": 0.8486151805303769, "learning_rate": 8.778959228850873e-06, "loss": 0.3737, "step": 3998 }, { "epoch": 0.2504893593698617, "grad_norm": 0.8068130842353114, "learning_rate": 8.778294910721806e-06, "loss": 0.4153, "step": 3999 }, { "epoch": 0.250551997369204, "grad_norm": 0.8643179050700646, "learning_rate": 8.77763043707742e-06, "loss": 0.4377, "step": 4000 }, { "epoch": 0.25061463536854633, "grad_norm": 0.8337989094718706, "learning_rate": 8.776965807945064e-06, "loss": 0.3862, "step": 4001 }, { "epoch": 0.25067727336788864, "grad_norm": 0.8346370459932116, "learning_rate": 8.776301023352092e-06, "loss": 0.3882, "step": 4002 }, { "epoch": 0.25073991136723095, "grad_norm": 0.8607500658153516, "learning_rate": 8.775636083325871e-06, "loss": 0.4289, "step": 4003 }, { "epoch": 0.25080254936657326, "grad_norm": 0.8527968999257378, "learning_rate": 8.774970987893767e-06, "loss": 0.4761, "step": 4004 }, { "epoch": 0.2508651873659155, "grad_norm": 0.8606083304861996, "learning_rate": 8.774305737083158e-06, "loss": 0.4274, "step": 4005 }, { "epoch": 0.2509278253652578, "grad_norm": 0.8517970371708324, "learning_rate": 8.773640330921423e-06, "loss": 0.4224, "step": 4006 }, { "epoch": 0.2509904633646001, "grad_norm": 0.7871580714097057, "learning_rate": 8.772974769435952e-06, "loss": 0.4306, "step": 4007 }, { "epoch": 0.25105310136394243, "grad_norm": 0.8335593979709541, "learning_rate": 8.77230905265414e-06, "loss": 0.4382, "step": 4008 }, { "epoch": 0.25111573936328474, "grad_norm": 0.8397877536480824, "learning_rate": 8.771643180603386e-06, "loss": 0.424, "step": 4009 }, { "epoch": 0.25117837736262705, "grad_norm": 0.8321607123241023, "learning_rate": 8.770977153311098e-06, "loss": 0.4277, "step": 4010 }, { "epoch": 0.25124101536196936, "grad_norm": 0.8257959228212339, "learning_rate": 8.770310970804692e-06, "loss": 0.4299, "step": 4011 }, { "epoch": 0.25130365336131166, "grad_norm": 0.7027646229933975, "learning_rate": 8.769644633111587e-06, "loss": 0.5053, "step": 4012 }, { "epoch": 0.2513662913606539, "grad_norm": 0.8962008297561905, "learning_rate": 8.768978140259208e-06, "loss": 0.4248, "step": 4013 }, { "epoch": 0.2514289293599962, "grad_norm": 0.8635876235673597, "learning_rate": 8.768311492274989e-06, "loss": 0.4661, "step": 4014 }, { "epoch": 0.25149156735933853, "grad_norm": 0.6984922616904659, "learning_rate": 8.76764468918637e-06, "loss": 0.4815, "step": 4015 }, { "epoch": 0.25155420535868084, "grad_norm": 0.8361043977631712, "learning_rate": 8.766977731020797e-06, "loss": 0.3846, "step": 4016 }, { "epoch": 0.25161684335802315, "grad_norm": 0.9063223319797613, "learning_rate": 8.76631061780572e-06, "loss": 0.4589, "step": 4017 }, { "epoch": 0.25167948135736545, "grad_norm": 0.8109453657166967, "learning_rate": 8.7656433495686e-06, "loss": 0.4498, "step": 4018 }, { "epoch": 0.25174211935670776, "grad_norm": 0.8954964535853436, "learning_rate": 8.7649759263369e-06, "loss": 0.4001, "step": 4019 }, { "epoch": 0.25180475735605007, "grad_norm": 0.8532660204909573, "learning_rate": 8.764308348138094e-06, "loss": 0.4439, "step": 4020 }, { "epoch": 0.2518673953553924, "grad_norm": 0.8250954616622888, "learning_rate": 8.763640614999655e-06, "loss": 0.4517, "step": 4021 }, { "epoch": 0.25193003335473463, "grad_norm": 0.7871148052626863, "learning_rate": 8.762972726949072e-06, "loss": 0.4039, "step": 4022 }, { "epoch": 0.25199267135407694, "grad_norm": 0.8766716692447776, "learning_rate": 8.762304684013832e-06, "loss": 0.4627, "step": 4023 }, { "epoch": 0.25205530935341924, "grad_norm": 0.8329470379563066, "learning_rate": 8.761636486221433e-06, "loss": 0.4298, "step": 4024 }, { "epoch": 0.25211794735276155, "grad_norm": 0.9304834953066388, "learning_rate": 8.760968133599377e-06, "loss": 0.4579, "step": 4025 }, { "epoch": 0.25218058535210386, "grad_norm": 0.8155942807351162, "learning_rate": 8.760299626175176e-06, "loss": 0.3996, "step": 4026 }, { "epoch": 0.25224322335144617, "grad_norm": 0.9068826138902492, "learning_rate": 8.759630963976344e-06, "loss": 0.4098, "step": 4027 }, { "epoch": 0.2523058613507885, "grad_norm": 0.8183351615919899, "learning_rate": 8.758962147030403e-06, "loss": 0.5059, "step": 4028 }, { "epoch": 0.2523684993501308, "grad_norm": 0.7434116897236949, "learning_rate": 8.758293175364883e-06, "loss": 0.4833, "step": 4029 }, { "epoch": 0.25243113734947303, "grad_norm": 0.7202063671348433, "learning_rate": 8.757624049007318e-06, "loss": 0.4749, "step": 4030 }, { "epoch": 0.25249377534881534, "grad_norm": 0.9249141937085156, "learning_rate": 8.756954767985251e-06, "loss": 0.4474, "step": 4031 }, { "epoch": 0.25255641334815765, "grad_norm": 0.9266192695867204, "learning_rate": 8.756285332326228e-06, "loss": 0.4409, "step": 4032 }, { "epoch": 0.25261905134749996, "grad_norm": 0.8239882531541107, "learning_rate": 8.755615742057803e-06, "loss": 0.4245, "step": 4033 }, { "epoch": 0.25268168934684226, "grad_norm": 0.8387740152787051, "learning_rate": 8.754945997207536e-06, "loss": 0.4273, "step": 4034 }, { "epoch": 0.25274432734618457, "grad_norm": 0.8363618110387796, "learning_rate": 8.754276097802996e-06, "loss": 0.4024, "step": 4035 }, { "epoch": 0.2528069653455269, "grad_norm": 0.9088609948881546, "learning_rate": 8.753606043871755e-06, "loss": 0.423, "step": 4036 }, { "epoch": 0.2528696033448692, "grad_norm": 0.8616980330135126, "learning_rate": 8.752935835441392e-06, "loss": 0.422, "step": 4037 }, { "epoch": 0.25293224134421144, "grad_norm": 0.8090802892109882, "learning_rate": 8.752265472539493e-06, "loss": 0.4214, "step": 4038 }, { "epoch": 0.25299487934355375, "grad_norm": 0.8153324212532123, "learning_rate": 8.751594955193654e-06, "loss": 0.3816, "step": 4039 }, { "epoch": 0.25305751734289605, "grad_norm": 0.8653924966583726, "learning_rate": 8.750924283431466e-06, "loss": 0.4371, "step": 4040 }, { "epoch": 0.25312015534223836, "grad_norm": 0.8468946572355669, "learning_rate": 8.75025345728054e-06, "loss": 0.4253, "step": 4041 }, { "epoch": 0.25318279334158067, "grad_norm": 0.80847526608545, "learning_rate": 8.749582476768485e-06, "loss": 0.4755, "step": 4042 }, { "epoch": 0.253245431340923, "grad_norm": 0.840014494452502, "learning_rate": 8.748911341922922e-06, "loss": 0.3531, "step": 4043 }, { "epoch": 0.2533080693402653, "grad_norm": 0.8175760762724443, "learning_rate": 8.748240052771469e-06, "loss": 0.4451, "step": 4044 }, { "epoch": 0.2533707073396076, "grad_norm": 0.8131004741833516, "learning_rate": 8.747568609341762e-06, "loss": 0.4079, "step": 4045 }, { "epoch": 0.2534333453389499, "grad_norm": 0.8363344982493179, "learning_rate": 8.746897011661436e-06, "loss": 0.4269, "step": 4046 }, { "epoch": 0.25349598333829215, "grad_norm": 0.8297624750483213, "learning_rate": 8.74622525975813e-06, "loss": 0.4243, "step": 4047 }, { "epoch": 0.25355862133763446, "grad_norm": 1.1296446521804464, "learning_rate": 8.7455533536595e-06, "loss": 0.5161, "step": 4048 }, { "epoch": 0.25362125933697677, "grad_norm": 0.885714755518661, "learning_rate": 8.744881293393197e-06, "loss": 0.4163, "step": 4049 }, { "epoch": 0.2536838973363191, "grad_norm": 0.9654579974089538, "learning_rate": 8.744209078986884e-06, "loss": 0.4398, "step": 4050 }, { "epoch": 0.2537465353356614, "grad_norm": 0.8834356055245031, "learning_rate": 8.743536710468233e-06, "loss": 0.4029, "step": 4051 }, { "epoch": 0.2538091733350037, "grad_norm": 0.8036281498217388, "learning_rate": 8.742864187864915e-06, "loss": 0.3972, "step": 4052 }, { "epoch": 0.253871811334346, "grad_norm": 0.8433060021172315, "learning_rate": 8.742191511204614e-06, "loss": 0.3942, "step": 4053 }, { "epoch": 0.2539344493336883, "grad_norm": 0.8503594381338249, "learning_rate": 8.741518680515013e-06, "loss": 0.4612, "step": 4054 }, { "epoch": 0.25399708733303056, "grad_norm": 0.87309049906947, "learning_rate": 8.74084569582381e-06, "loss": 0.4362, "step": 4055 }, { "epoch": 0.25405972533237287, "grad_norm": 0.8189840997711705, "learning_rate": 8.740172557158704e-06, "loss": 0.4173, "step": 4056 }, { "epoch": 0.2541223633317152, "grad_norm": 0.8805397433437789, "learning_rate": 8.7394992645474e-06, "loss": 0.399, "step": 4057 }, { "epoch": 0.2541850013310575, "grad_norm": 0.8258244002764825, "learning_rate": 8.738825818017615e-06, "loss": 0.4161, "step": 4058 }, { "epoch": 0.2542476393303998, "grad_norm": 0.8937130087385429, "learning_rate": 8.738152217597063e-06, "loss": 0.4017, "step": 4059 }, { "epoch": 0.2543102773297421, "grad_norm": 0.8783038488295667, "learning_rate": 8.737478463313472e-06, "loss": 0.4531, "step": 4060 }, { "epoch": 0.2543729153290844, "grad_norm": 0.9347821276731777, "learning_rate": 8.736804555194574e-06, "loss": 0.4315, "step": 4061 }, { "epoch": 0.2544355533284267, "grad_norm": 0.8367019681998954, "learning_rate": 8.736130493268108e-06, "loss": 0.397, "step": 4062 }, { "epoch": 0.25449819132776896, "grad_norm": 0.862641869813239, "learning_rate": 8.735456277561817e-06, "loss": 0.4265, "step": 4063 }, { "epoch": 0.25456082932711127, "grad_norm": 0.8167908684271009, "learning_rate": 8.734781908103451e-06, "loss": 0.4229, "step": 4064 }, { "epoch": 0.2546234673264536, "grad_norm": 0.852534272003623, "learning_rate": 8.734107384920771e-06, "loss": 0.4135, "step": 4065 }, { "epoch": 0.2546861053257959, "grad_norm": 0.8614369110999623, "learning_rate": 8.733432708041537e-06, "loss": 0.4216, "step": 4066 }, { "epoch": 0.2547487433251382, "grad_norm": 0.7902745276836605, "learning_rate": 8.732757877493519e-06, "loss": 0.4132, "step": 4067 }, { "epoch": 0.2548113813244805, "grad_norm": 0.7640101407326391, "learning_rate": 8.732082893304492e-06, "loss": 0.3884, "step": 4068 }, { "epoch": 0.2548740193238228, "grad_norm": 0.8751282280712543, "learning_rate": 8.731407755502243e-06, "loss": 0.4235, "step": 4069 }, { "epoch": 0.2549366573231651, "grad_norm": 0.8107143154453778, "learning_rate": 8.730732464114558e-06, "loss": 0.4183, "step": 4070 }, { "epoch": 0.2549992953225074, "grad_norm": 0.8034483967532228, "learning_rate": 8.730057019169232e-06, "loss": 0.386, "step": 4071 }, { "epoch": 0.2550619333218497, "grad_norm": 0.7752438439491688, "learning_rate": 8.729381420694066e-06, "loss": 0.3959, "step": 4072 }, { "epoch": 0.255124571321192, "grad_norm": 0.9525373461801837, "learning_rate": 8.728705668716871e-06, "loss": 0.4782, "step": 4073 }, { "epoch": 0.2551872093205343, "grad_norm": 0.9043844522297888, "learning_rate": 8.728029763265456e-06, "loss": 0.4127, "step": 4074 }, { "epoch": 0.2552498473198766, "grad_norm": 0.8540554133572085, "learning_rate": 8.727353704367646e-06, "loss": 0.4268, "step": 4075 }, { "epoch": 0.2553124853192189, "grad_norm": 0.7951614755917567, "learning_rate": 8.726677492051262e-06, "loss": 0.392, "step": 4076 }, { "epoch": 0.2553751233185612, "grad_norm": 0.8575671118462527, "learning_rate": 8.726001126344143e-06, "loss": 0.4676, "step": 4077 }, { "epoch": 0.2554377613179035, "grad_norm": 0.7534835449443222, "learning_rate": 8.725324607274129e-06, "loss": 0.4867, "step": 4078 }, { "epoch": 0.25550039931724583, "grad_norm": 0.8958933068182268, "learning_rate": 8.724647934869059e-06, "loss": 0.4287, "step": 4079 }, { "epoch": 0.2555630373165881, "grad_norm": 0.8921530702934388, "learning_rate": 8.723971109156789e-06, "loss": 0.4121, "step": 4080 }, { "epoch": 0.2556256753159304, "grad_norm": 0.8842527029377476, "learning_rate": 8.723294130165178e-06, "loss": 0.4424, "step": 4081 }, { "epoch": 0.2556883133152727, "grad_norm": 0.8672251271798975, "learning_rate": 8.72261699792209e-06, "loss": 0.4439, "step": 4082 }, { "epoch": 0.255750951314615, "grad_norm": 0.8174245536099849, "learning_rate": 8.721939712455394e-06, "loss": 0.3894, "step": 4083 }, { "epoch": 0.2558135893139573, "grad_norm": 0.7754546666573133, "learning_rate": 8.72126227379297e-06, "loss": 0.4306, "step": 4084 }, { "epoch": 0.2558762273132996, "grad_norm": 0.8044210960035963, "learning_rate": 8.720584681962698e-06, "loss": 0.3881, "step": 4085 }, { "epoch": 0.2559388653126419, "grad_norm": 0.7987131786574133, "learning_rate": 8.719906936992472e-06, "loss": 0.3635, "step": 4086 }, { "epoch": 0.25600150331198424, "grad_norm": 0.8844059442113197, "learning_rate": 8.719229038910183e-06, "loss": 0.415, "step": 4087 }, { "epoch": 0.25606414131132654, "grad_norm": 0.7987504473841601, "learning_rate": 8.71855098774374e-06, "loss": 0.436, "step": 4088 }, { "epoch": 0.2561267793106688, "grad_norm": 0.9030998077054236, "learning_rate": 8.717872783521048e-06, "loss": 0.4006, "step": 4089 }, { "epoch": 0.2561894173100111, "grad_norm": 0.8946249842989602, "learning_rate": 8.71719442627002e-06, "loss": 0.47, "step": 4090 }, { "epoch": 0.2562520553093534, "grad_norm": 0.8510250742048954, "learning_rate": 8.716515916018581e-06, "loss": 0.4336, "step": 4091 }, { "epoch": 0.2563146933086957, "grad_norm": 0.8498914150960122, "learning_rate": 8.715837252794657e-06, "loss": 0.4152, "step": 4092 }, { "epoch": 0.256377331308038, "grad_norm": 0.8627818081420423, "learning_rate": 8.715158436626181e-06, "loss": 0.397, "step": 4093 }, { "epoch": 0.25643996930738033, "grad_norm": 0.8400234555582621, "learning_rate": 8.714479467541094e-06, "loss": 0.39, "step": 4094 }, { "epoch": 0.25650260730672264, "grad_norm": 0.901751824913567, "learning_rate": 8.713800345567345e-06, "loss": 0.4521, "step": 4095 }, { "epoch": 0.25656524530606495, "grad_norm": 0.8410610868140957, "learning_rate": 8.713121070732881e-06, "loss": 0.4268, "step": 4096 }, { "epoch": 0.2566278833054072, "grad_norm": 0.9228303409803958, "learning_rate": 8.712441643065667e-06, "loss": 0.4501, "step": 4097 }, { "epoch": 0.2566905213047495, "grad_norm": 0.8906105603297703, "learning_rate": 8.711762062593664e-06, "loss": 0.4607, "step": 4098 }, { "epoch": 0.2567531593040918, "grad_norm": 0.8203275100898164, "learning_rate": 8.71108232934485e-06, "loss": 0.4099, "step": 4099 }, { "epoch": 0.2568157973034341, "grad_norm": 0.8296190108190206, "learning_rate": 8.710402443347195e-06, "loss": 0.4517, "step": 4100 }, { "epoch": 0.25687843530277643, "grad_norm": 0.8018344273346267, "learning_rate": 8.709722404628687e-06, "loss": 0.3913, "step": 4101 }, { "epoch": 0.25694107330211874, "grad_norm": 0.9259044836392691, "learning_rate": 8.709042213217316e-06, "loss": 0.4171, "step": 4102 }, { "epoch": 0.25700371130146105, "grad_norm": 0.8666464347723971, "learning_rate": 8.70836186914108e-06, "loss": 0.4175, "step": 4103 }, { "epoch": 0.25706634930080335, "grad_norm": 0.8607833134334429, "learning_rate": 8.707681372427979e-06, "loss": 0.4402, "step": 4104 }, { "epoch": 0.2571289873001456, "grad_norm": 0.7973261926806484, "learning_rate": 8.707000723106027e-06, "loss": 0.4139, "step": 4105 }, { "epoch": 0.2571916252994879, "grad_norm": 0.8448986245044621, "learning_rate": 8.706319921203235e-06, "loss": 0.4443, "step": 4106 }, { "epoch": 0.2572542632988302, "grad_norm": 0.7633207886701704, "learning_rate": 8.70563896674763e-06, "loss": 0.3917, "step": 4107 }, { "epoch": 0.25731690129817253, "grad_norm": 0.9034984782431605, "learning_rate": 8.704957859767235e-06, "loss": 0.4848, "step": 4108 }, { "epoch": 0.25737953929751484, "grad_norm": 0.8526934486911246, "learning_rate": 8.704276600290088e-06, "loss": 0.4485, "step": 4109 }, { "epoch": 0.25744217729685714, "grad_norm": 0.8818459648778565, "learning_rate": 8.703595188344229e-06, "loss": 0.4285, "step": 4110 }, { "epoch": 0.25750481529619945, "grad_norm": 0.8925638947081126, "learning_rate": 8.702913623957703e-06, "loss": 0.4528, "step": 4111 }, { "epoch": 0.25756745329554176, "grad_norm": 0.8201131107101752, "learning_rate": 8.702231907158566e-06, "loss": 0.4619, "step": 4112 }, { "epoch": 0.25763009129488407, "grad_norm": 0.8520130580982318, "learning_rate": 8.701550037974877e-06, "loss": 0.462, "step": 4113 }, { "epoch": 0.2576927292942263, "grad_norm": 0.7681098160989638, "learning_rate": 8.7008680164347e-06, "loss": 0.4055, "step": 4114 }, { "epoch": 0.2577553672935686, "grad_norm": 0.8645986946154711, "learning_rate": 8.70018584256611e-06, "loss": 0.4149, "step": 4115 }, { "epoch": 0.25781800529291093, "grad_norm": 0.8262245243114451, "learning_rate": 8.699503516397184e-06, "loss": 0.4056, "step": 4116 }, { "epoch": 0.25788064329225324, "grad_norm": 0.8466517891488097, "learning_rate": 8.698821037956005e-06, "loss": 0.4356, "step": 4117 }, { "epoch": 0.25794328129159555, "grad_norm": 0.8633767376268112, "learning_rate": 8.698138407270666e-06, "loss": 0.4784, "step": 4118 }, { "epoch": 0.25800591929093786, "grad_norm": 0.8297454861892111, "learning_rate": 8.697455624369262e-06, "loss": 0.4203, "step": 4119 }, { "epoch": 0.25806855729028016, "grad_norm": 0.8107553953232715, "learning_rate": 8.696772689279903e-06, "loss": 0.4183, "step": 4120 }, { "epoch": 0.2581311952896225, "grad_norm": 0.8562540706474615, "learning_rate": 8.696089602030691e-06, "loss": 0.4328, "step": 4121 }, { "epoch": 0.2581938332889647, "grad_norm": 0.8484250509469583, "learning_rate": 8.695406362649744e-06, "loss": 0.4586, "step": 4122 }, { "epoch": 0.25825647128830703, "grad_norm": 0.9030155153604227, "learning_rate": 8.694722971165187e-06, "loss": 0.45, "step": 4123 }, { "epoch": 0.25831910928764934, "grad_norm": 0.9004623687470886, "learning_rate": 8.694039427605146e-06, "loss": 0.4989, "step": 4124 }, { "epoch": 0.25838174728699165, "grad_norm": 0.8115540089432384, "learning_rate": 8.693355731997757e-06, "loss": 0.4403, "step": 4125 }, { "epoch": 0.25844438528633396, "grad_norm": 1.0268228430599038, "learning_rate": 8.69267188437116e-06, "loss": 0.4844, "step": 4126 }, { "epoch": 0.25850702328567626, "grad_norm": 0.7937489705501786, "learning_rate": 8.691987884753503e-06, "loss": 0.4266, "step": 4127 }, { "epoch": 0.25856966128501857, "grad_norm": 0.8950093623928707, "learning_rate": 8.69130373317294e-06, "loss": 0.444, "step": 4128 }, { "epoch": 0.2586322992843609, "grad_norm": 0.8037600332349116, "learning_rate": 8.69061942965763e-06, "loss": 0.3983, "step": 4129 }, { "epoch": 0.2586949372837032, "grad_norm": 0.7765868026213463, "learning_rate": 8.689934974235738e-06, "loss": 0.4318, "step": 4130 }, { "epoch": 0.25875757528304544, "grad_norm": 0.842859437513257, "learning_rate": 8.689250366935438e-06, "loss": 0.4502, "step": 4131 }, { "epoch": 0.25882021328238775, "grad_norm": 0.8427641235286387, "learning_rate": 8.68856560778491e-06, "loss": 0.4371, "step": 4132 }, { "epoch": 0.25888285128173005, "grad_norm": 1.2188537247414912, "learning_rate": 8.687880696812334e-06, "loss": 0.5379, "step": 4133 }, { "epoch": 0.25894548928107236, "grad_norm": 0.9177934193737616, "learning_rate": 8.687195634045906e-06, "loss": 0.4095, "step": 4134 }, { "epoch": 0.25900812728041467, "grad_norm": 0.8325167373275758, "learning_rate": 8.686510419513822e-06, "loss": 0.4277, "step": 4135 }, { "epoch": 0.259070765279757, "grad_norm": 0.8706844256043668, "learning_rate": 8.685825053244284e-06, "loss": 0.4707, "step": 4136 }, { "epoch": 0.2591334032790993, "grad_norm": 0.8675673277198752, "learning_rate": 8.685139535265502e-06, "loss": 0.438, "step": 4137 }, { "epoch": 0.2591960412784416, "grad_norm": 0.7496365438447595, "learning_rate": 8.684453865605692e-06, "loss": 0.4058, "step": 4138 }, { "epoch": 0.25925867927778384, "grad_norm": 0.7999650246923696, "learning_rate": 8.68376804429308e-06, "loss": 0.4188, "step": 4139 }, { "epoch": 0.25932131727712615, "grad_norm": 0.8226388059361087, "learning_rate": 8.68308207135589e-06, "loss": 0.4201, "step": 4140 }, { "epoch": 0.25938395527646846, "grad_norm": 0.8540728374532782, "learning_rate": 8.682395946822358e-06, "loss": 0.4376, "step": 4141 }, { "epoch": 0.25944659327581077, "grad_norm": 0.8687466528594343, "learning_rate": 8.681709670720725e-06, "loss": 0.4068, "step": 4142 }, { "epoch": 0.2595092312751531, "grad_norm": 0.9396313935528866, "learning_rate": 8.681023243079238e-06, "loss": 0.459, "step": 4143 }, { "epoch": 0.2595718692744954, "grad_norm": 0.8090497826409534, "learning_rate": 8.680336663926155e-06, "loss": 0.4176, "step": 4144 }, { "epoch": 0.2596345072738377, "grad_norm": 0.8927850723462247, "learning_rate": 8.679649933289727e-06, "loss": 0.462, "step": 4145 }, { "epoch": 0.25969714527318, "grad_norm": 0.7953404774031424, "learning_rate": 8.678963051198227e-06, "loss": 0.391, "step": 4146 }, { "epoch": 0.25975978327252225, "grad_norm": 0.8626296491290686, "learning_rate": 8.678276017679924e-06, "loss": 0.4224, "step": 4147 }, { "epoch": 0.25982242127186456, "grad_norm": 0.880460625055956, "learning_rate": 8.6775888327631e-06, "loss": 0.453, "step": 4148 }, { "epoch": 0.25988505927120686, "grad_norm": 0.8833298815845291, "learning_rate": 8.676901496476034e-06, "loss": 0.4149, "step": 4149 }, { "epoch": 0.25994769727054917, "grad_norm": 0.8164909467232022, "learning_rate": 8.676214008847022e-06, "loss": 0.43, "step": 4150 }, { "epoch": 0.2600103352698915, "grad_norm": 0.8793905356466585, "learning_rate": 8.675526369904356e-06, "loss": 0.4763, "step": 4151 }, { "epoch": 0.2600729732692338, "grad_norm": 0.8885702701468139, "learning_rate": 8.674838579676346e-06, "loss": 0.48, "step": 4152 }, { "epoch": 0.2601356112685761, "grad_norm": 0.7966827805968251, "learning_rate": 8.674150638191296e-06, "loss": 0.4047, "step": 4153 }, { "epoch": 0.2601982492679184, "grad_norm": 0.7951338182888199, "learning_rate": 8.673462545477524e-06, "loss": 0.4392, "step": 4154 }, { "epoch": 0.2602608872672607, "grad_norm": 0.772022276820005, "learning_rate": 8.672774301563353e-06, "loss": 0.4027, "step": 4155 }, { "epoch": 0.26032352526660296, "grad_norm": 0.8044297475841337, "learning_rate": 8.67208590647711e-06, "loss": 0.4822, "step": 4156 }, { "epoch": 0.26038616326594527, "grad_norm": 0.8545097982104368, "learning_rate": 8.671397360247127e-06, "loss": 0.4199, "step": 4157 }, { "epoch": 0.2604488012652876, "grad_norm": 0.8375375564498295, "learning_rate": 8.67070866290175e-06, "loss": 0.417, "step": 4158 }, { "epoch": 0.2605114392646299, "grad_norm": 0.8347396284269828, "learning_rate": 8.670019814469322e-06, "loss": 0.4223, "step": 4159 }, { "epoch": 0.2605740772639722, "grad_norm": 0.7664141545550234, "learning_rate": 8.669330814978197e-06, "loss": 0.4044, "step": 4160 }, { "epoch": 0.2606367152633145, "grad_norm": 0.8658851472236724, "learning_rate": 8.668641664456734e-06, "loss": 0.4424, "step": 4161 }, { "epoch": 0.2606993532626568, "grad_norm": 0.8073978696710217, "learning_rate": 8.6679523629333e-06, "loss": 0.3885, "step": 4162 }, { "epoch": 0.2607619912619991, "grad_norm": 0.8563867734924991, "learning_rate": 8.667262910436267e-06, "loss": 0.4602, "step": 4163 }, { "epoch": 0.26082462926134137, "grad_norm": 0.9503621934285358, "learning_rate": 8.66657330699401e-06, "loss": 0.4737, "step": 4164 }, { "epoch": 0.2608872672606837, "grad_norm": 0.8601179119840539, "learning_rate": 8.665883552634916e-06, "loss": 0.4309, "step": 4165 }, { "epoch": 0.260949905260026, "grad_norm": 0.89149382750126, "learning_rate": 8.665193647387375e-06, "loss": 0.3972, "step": 4166 }, { "epoch": 0.2610125432593683, "grad_norm": 0.8435038981463181, "learning_rate": 8.664503591279783e-06, "loss": 0.3994, "step": 4167 }, { "epoch": 0.2610751812587106, "grad_norm": 0.8064683501015125, "learning_rate": 8.663813384340543e-06, "loss": 0.4072, "step": 4168 }, { "epoch": 0.2611378192580529, "grad_norm": 0.8349379830736554, "learning_rate": 8.663123026598064e-06, "loss": 0.4175, "step": 4169 }, { "epoch": 0.2612004572573952, "grad_norm": 0.8837154569890563, "learning_rate": 8.662432518080761e-06, "loss": 0.408, "step": 4170 }, { "epoch": 0.2612630952567375, "grad_norm": 0.7687421454382941, "learning_rate": 8.661741858817057e-06, "loss": 0.4287, "step": 4171 }, { "epoch": 0.26132573325607983, "grad_norm": 0.8769753877108003, "learning_rate": 8.661051048835376e-06, "loss": 0.4313, "step": 4172 }, { "epoch": 0.2613883712554221, "grad_norm": 0.8729883544898286, "learning_rate": 8.660360088164158e-06, "loss": 0.4174, "step": 4173 }, { "epoch": 0.2614510092547644, "grad_norm": 0.8275486268212456, "learning_rate": 8.659668976831836e-06, "loss": 0.4216, "step": 4174 }, { "epoch": 0.2615136472541067, "grad_norm": 0.8128953738953694, "learning_rate": 8.658977714866861e-06, "loss": 0.4117, "step": 4175 }, { "epoch": 0.261576285253449, "grad_norm": 0.791090837724322, "learning_rate": 8.658286302297683e-06, "loss": 0.4, "step": 4176 }, { "epoch": 0.2616389232527913, "grad_norm": 0.839313556386332, "learning_rate": 8.657594739152763e-06, "loss": 0.4058, "step": 4177 }, { "epoch": 0.2617015612521336, "grad_norm": 0.9103315511234095, "learning_rate": 8.656903025460564e-06, "loss": 0.4394, "step": 4178 }, { "epoch": 0.2617641992514759, "grad_norm": 0.8085927290552466, "learning_rate": 8.656211161249557e-06, "loss": 0.4377, "step": 4179 }, { "epoch": 0.26182683725081823, "grad_norm": 0.9481105813770321, "learning_rate": 8.655519146548222e-06, "loss": 0.4532, "step": 4180 }, { "epoch": 0.2618894752501605, "grad_norm": 0.8643272623861211, "learning_rate": 8.65482698138504e-06, "loss": 0.481, "step": 4181 }, { "epoch": 0.2619521132495028, "grad_norm": 0.7685862516835523, "learning_rate": 8.6541346657885e-06, "loss": 0.3665, "step": 4182 }, { "epoch": 0.2620147512488451, "grad_norm": 0.8067714918795141, "learning_rate": 8.653442199787099e-06, "loss": 0.3908, "step": 4183 }, { "epoch": 0.2620773892481874, "grad_norm": 0.8596068286285956, "learning_rate": 8.65274958340934e-06, "loss": 0.4311, "step": 4184 }, { "epoch": 0.2621400272475297, "grad_norm": 0.77719757908897, "learning_rate": 8.652056816683729e-06, "loss": 0.3765, "step": 4185 }, { "epoch": 0.262202665246872, "grad_norm": 0.8870181598781006, "learning_rate": 8.651363899638783e-06, "loss": 0.4159, "step": 4186 }, { "epoch": 0.26226530324621433, "grad_norm": 0.7568236260405687, "learning_rate": 8.65067083230302e-06, "loss": 0.4215, "step": 4187 }, { "epoch": 0.26232794124555664, "grad_norm": 0.7905007806850509, "learning_rate": 8.649977614704968e-06, "loss": 0.419, "step": 4188 }, { "epoch": 0.2623905792448989, "grad_norm": 0.8755281481990128, "learning_rate": 8.64928424687316e-06, "loss": 0.407, "step": 4189 }, { "epoch": 0.2624532172442412, "grad_norm": 0.8233142455322221, "learning_rate": 8.648590728836135e-06, "loss": 0.3924, "step": 4190 }, { "epoch": 0.2625158552435835, "grad_norm": 0.8630513574663252, "learning_rate": 8.647897060622439e-06, "loss": 0.4308, "step": 4191 }, { "epoch": 0.2625784932429258, "grad_norm": 0.806159791636785, "learning_rate": 8.647203242260622e-06, "loss": 0.4044, "step": 4192 }, { "epoch": 0.2626411312422681, "grad_norm": 0.8783320090245615, "learning_rate": 8.646509273779243e-06, "loss": 0.4828, "step": 4193 }, { "epoch": 0.26270376924161043, "grad_norm": 0.8713936924556135, "learning_rate": 8.645815155206865e-06, "loss": 0.4394, "step": 4194 }, { "epoch": 0.26276640724095274, "grad_norm": 0.8036090596685788, "learning_rate": 8.64512088657206e-06, "loss": 0.4184, "step": 4195 }, { "epoch": 0.26282904524029505, "grad_norm": 0.8821880999133781, "learning_rate": 8.644426467903401e-06, "loss": 0.463, "step": 4196 }, { "epoch": 0.26289168323963735, "grad_norm": 0.8074322854801484, "learning_rate": 8.643731899229474e-06, "loss": 0.4544, "step": 4197 }, { "epoch": 0.2629543212389796, "grad_norm": 0.9064994349627313, "learning_rate": 8.643037180578864e-06, "loss": 0.4376, "step": 4198 }, { "epoch": 0.2630169592383219, "grad_norm": 0.844446282072924, "learning_rate": 8.642342311980168e-06, "loss": 0.4309, "step": 4199 }, { "epoch": 0.2630795972376642, "grad_norm": 0.8597869107530864, "learning_rate": 8.641647293461987e-06, "loss": 0.4518, "step": 4200 }, { "epoch": 0.2631422352370065, "grad_norm": 0.8379200434904158, "learning_rate": 8.640952125052927e-06, "loss": 0.4318, "step": 4201 }, { "epoch": 0.26320487323634884, "grad_norm": 0.7774764337895741, "learning_rate": 8.640256806781602e-06, "loss": 0.4987, "step": 4202 }, { "epoch": 0.26326751123569114, "grad_norm": 0.8612398170239322, "learning_rate": 8.639561338676632e-06, "loss": 0.4232, "step": 4203 }, { "epoch": 0.26333014923503345, "grad_norm": 0.796637768813782, "learning_rate": 8.638865720766639e-06, "loss": 0.4329, "step": 4204 }, { "epoch": 0.26339278723437576, "grad_norm": 0.8207795238135752, "learning_rate": 8.63816995308026e-06, "loss": 0.4626, "step": 4205 }, { "epoch": 0.263455425233718, "grad_norm": 0.7374919467034509, "learning_rate": 8.63747403564613e-06, "loss": 0.3942, "step": 4206 }, { "epoch": 0.2635180632330603, "grad_norm": 0.9012380822645152, "learning_rate": 8.636777968492892e-06, "loss": 0.4268, "step": 4207 }, { "epoch": 0.2635807012324026, "grad_norm": 0.871016094064237, "learning_rate": 8.636081751649199e-06, "loss": 0.4266, "step": 4208 }, { "epoch": 0.26364333923174493, "grad_norm": 0.8950501548613405, "learning_rate": 8.635385385143707e-06, "loss": 0.438, "step": 4209 }, { "epoch": 0.26370597723108724, "grad_norm": 0.8230179037999306, "learning_rate": 8.634688869005076e-06, "loss": 0.4495, "step": 4210 }, { "epoch": 0.26376861523042955, "grad_norm": 0.7870846204428023, "learning_rate": 8.633992203261978e-06, "loss": 0.4221, "step": 4211 }, { "epoch": 0.26383125322977186, "grad_norm": 0.8257170697418107, "learning_rate": 8.633295387943086e-06, "loss": 0.4046, "step": 4212 }, { "epoch": 0.26389389122911416, "grad_norm": 0.8367182946297216, "learning_rate": 8.632598423077081e-06, "loss": 0.4395, "step": 4213 }, { "epoch": 0.2639565292284564, "grad_norm": 0.7901231113966316, "learning_rate": 8.631901308692652e-06, "loss": 0.4109, "step": 4214 }, { "epoch": 0.2640191672277987, "grad_norm": 0.8198548155304398, "learning_rate": 8.631204044818489e-06, "loss": 0.4484, "step": 4215 }, { "epoch": 0.26408180522714103, "grad_norm": 0.8655978753786937, "learning_rate": 8.630506631483296e-06, "loss": 0.4166, "step": 4216 }, { "epoch": 0.26414444322648334, "grad_norm": 0.6905911960104792, "learning_rate": 8.629809068715773e-06, "loss": 0.4904, "step": 4217 }, { "epoch": 0.26420708122582565, "grad_norm": 0.8781621862808804, "learning_rate": 8.629111356544636e-06, "loss": 0.4309, "step": 4218 }, { "epoch": 0.26426971922516795, "grad_norm": 0.7917872476703761, "learning_rate": 8.628413494998605e-06, "loss": 0.3876, "step": 4219 }, { "epoch": 0.26433235722451026, "grad_norm": 0.8709370089137499, "learning_rate": 8.627715484106399e-06, "loss": 0.4253, "step": 4220 }, { "epoch": 0.26439499522385257, "grad_norm": 0.855308827970046, "learning_rate": 8.62701732389675e-06, "loss": 0.4043, "step": 4221 }, { "epoch": 0.2644576332231949, "grad_norm": 0.7942695789312221, "learning_rate": 8.626319014398394e-06, "loss": 0.4104, "step": 4222 }, { "epoch": 0.26452027122253713, "grad_norm": 0.8496632787991478, "learning_rate": 8.625620555640075e-06, "loss": 0.4115, "step": 4223 }, { "epoch": 0.26458290922187944, "grad_norm": 0.9215991927478038, "learning_rate": 8.624921947650541e-06, "loss": 0.4459, "step": 4224 }, { "epoch": 0.26464554722122174, "grad_norm": 0.7648230629585848, "learning_rate": 8.624223190458549e-06, "loss": 0.4072, "step": 4225 }, { "epoch": 0.26470818522056405, "grad_norm": 0.7834468080981228, "learning_rate": 8.623524284092855e-06, "loss": 0.3903, "step": 4226 }, { "epoch": 0.26477082321990636, "grad_norm": 0.8243846718639596, "learning_rate": 8.62282522858223e-06, "loss": 0.4199, "step": 4227 }, { "epoch": 0.26483346121924867, "grad_norm": 0.8460844779401967, "learning_rate": 8.622126023955446e-06, "loss": 0.4289, "step": 4228 }, { "epoch": 0.264896099218591, "grad_norm": 0.8484052593311894, "learning_rate": 8.621426670241282e-06, "loss": 0.4436, "step": 4229 }, { "epoch": 0.2649587372179333, "grad_norm": 0.8827965573847948, "learning_rate": 8.620727167468525e-06, "loss": 0.4335, "step": 4230 }, { "epoch": 0.26502137521727553, "grad_norm": 0.9043161607186704, "learning_rate": 8.620027515665965e-06, "loss": 0.4282, "step": 4231 }, { "epoch": 0.26508401321661784, "grad_norm": 0.8805284970426956, "learning_rate": 8.6193277148624e-06, "loss": 0.4611, "step": 4232 }, { "epoch": 0.26514665121596015, "grad_norm": 0.8989082094761628, "learning_rate": 8.618627765086636e-06, "loss": 0.4396, "step": 4233 }, { "epoch": 0.26520928921530246, "grad_norm": 0.8856512746845503, "learning_rate": 8.61792766636748e-06, "loss": 0.4401, "step": 4234 }, { "epoch": 0.26527192721464476, "grad_norm": 0.9045312913496782, "learning_rate": 8.617227418733751e-06, "loss": 0.3991, "step": 4235 }, { "epoch": 0.2653345652139871, "grad_norm": 0.8003614352053787, "learning_rate": 8.61652702221427e-06, "loss": 0.3708, "step": 4236 }, { "epoch": 0.2653972032133294, "grad_norm": 0.8587265904835804, "learning_rate": 8.615826476837866e-06, "loss": 0.4753, "step": 4237 }, { "epoch": 0.2654598412126717, "grad_norm": 0.894464540289796, "learning_rate": 8.615125782633371e-06, "loss": 0.4497, "step": 4238 }, { "epoch": 0.265522479212014, "grad_norm": 0.9098209072813513, "learning_rate": 8.61442493962963e-06, "loss": 0.4018, "step": 4239 }, { "epoch": 0.26558511721135625, "grad_norm": 0.8009483087615848, "learning_rate": 8.613723947855485e-06, "loss": 0.4229, "step": 4240 }, { "epoch": 0.26564775521069856, "grad_norm": 0.8200695031279328, "learning_rate": 8.613022807339795e-06, "loss": 0.456, "step": 4241 }, { "epoch": 0.26571039321004086, "grad_norm": 0.804942710569211, "learning_rate": 8.612321518111414e-06, "loss": 0.417, "step": 4242 }, { "epoch": 0.26577303120938317, "grad_norm": 0.7909933325599363, "learning_rate": 8.611620080199208e-06, "loss": 0.3819, "step": 4243 }, { "epoch": 0.2658356692087255, "grad_norm": 0.863703128707603, "learning_rate": 8.61091849363205e-06, "loss": 0.4467, "step": 4244 }, { "epoch": 0.2658983072080678, "grad_norm": 0.8939744527728983, "learning_rate": 8.610216758438817e-06, "loss": 0.4495, "step": 4245 }, { "epoch": 0.2659609452074101, "grad_norm": 0.7662234997899136, "learning_rate": 8.609514874648391e-06, "loss": 0.3715, "step": 4246 }, { "epoch": 0.2660235832067524, "grad_norm": 0.8898196124227771, "learning_rate": 8.608812842289662e-06, "loss": 0.4751, "step": 4247 }, { "epoch": 0.26608622120609465, "grad_norm": 0.7113900109575446, "learning_rate": 8.608110661391529e-06, "loss": 0.4961, "step": 4248 }, { "epoch": 0.26614885920543696, "grad_norm": 0.8822845453818352, "learning_rate": 8.60740833198289e-06, "loss": 0.4315, "step": 4249 }, { "epoch": 0.26621149720477927, "grad_norm": 0.8302210957043594, "learning_rate": 8.606705854092656e-06, "loss": 0.441, "step": 4250 }, { "epoch": 0.2662741352041216, "grad_norm": 0.83671895551264, "learning_rate": 8.606003227749738e-06, "loss": 0.4283, "step": 4251 }, { "epoch": 0.2663367732034639, "grad_norm": 0.8691187218310675, "learning_rate": 8.605300452983058e-06, "loss": 0.4379, "step": 4252 }, { "epoch": 0.2663994112028062, "grad_norm": 0.8745916959042912, "learning_rate": 8.604597529821544e-06, "loss": 0.4486, "step": 4253 }, { "epoch": 0.2664620492021485, "grad_norm": 0.8775110285079845, "learning_rate": 8.603894458294124e-06, "loss": 0.4804, "step": 4254 }, { "epoch": 0.2665246872014908, "grad_norm": 0.9363694253329028, "learning_rate": 8.60319123842974e-06, "loss": 0.4832, "step": 4255 }, { "epoch": 0.26658732520083306, "grad_norm": 0.8396760969808199, "learning_rate": 8.602487870257336e-06, "loss": 0.46, "step": 4256 }, { "epoch": 0.26664996320017537, "grad_norm": 0.8038662924681632, "learning_rate": 8.601784353805864e-06, "loss": 0.443, "step": 4257 }, { "epoch": 0.2667126011995177, "grad_norm": 0.7576364664962547, "learning_rate": 8.601080689104278e-06, "loss": 0.345, "step": 4258 }, { "epoch": 0.26677523919886, "grad_norm": 0.8736343975370163, "learning_rate": 8.600376876181542e-06, "loss": 0.4426, "step": 4259 }, { "epoch": 0.2668378771982023, "grad_norm": 0.83749708367297, "learning_rate": 8.599672915066625e-06, "loss": 0.4321, "step": 4260 }, { "epoch": 0.2669005151975446, "grad_norm": 0.9132267711212684, "learning_rate": 8.598968805788505e-06, "loss": 0.4442, "step": 4261 }, { "epoch": 0.2669631531968869, "grad_norm": 0.8380932871211902, "learning_rate": 8.59826454837616e-06, "loss": 0.4417, "step": 4262 }, { "epoch": 0.2670257911962292, "grad_norm": 0.8680800114554649, "learning_rate": 8.597560142858579e-06, "loss": 0.4364, "step": 4263 }, { "epoch": 0.2670884291955715, "grad_norm": 0.8126035059612606, "learning_rate": 8.596855589264752e-06, "loss": 0.4189, "step": 4264 }, { "epoch": 0.26715106719491377, "grad_norm": 0.9033153448015726, "learning_rate": 8.596150887623685e-06, "loss": 0.4449, "step": 4265 }, { "epoch": 0.2672137051942561, "grad_norm": 0.832674813516797, "learning_rate": 8.595446037964377e-06, "loss": 0.4269, "step": 4266 }, { "epoch": 0.2672763431935984, "grad_norm": 0.7965668088220613, "learning_rate": 8.594741040315844e-06, "loss": 0.4445, "step": 4267 }, { "epoch": 0.2673389811929407, "grad_norm": 0.8415780124540057, "learning_rate": 8.594035894707104e-06, "loss": 0.4387, "step": 4268 }, { "epoch": 0.267401619192283, "grad_norm": 0.8172702337232598, "learning_rate": 8.593330601167179e-06, "loss": 0.4232, "step": 4269 }, { "epoch": 0.2674642571916253, "grad_norm": 0.8529384404297077, "learning_rate": 8.592625159725097e-06, "loss": 0.4621, "step": 4270 }, { "epoch": 0.2675268951909676, "grad_norm": 0.9313700170662795, "learning_rate": 8.591919570409898e-06, "loss": 0.4305, "step": 4271 }, { "epoch": 0.2675895331903099, "grad_norm": 0.8371350272018241, "learning_rate": 8.591213833250624e-06, "loss": 0.3848, "step": 4272 }, { "epoch": 0.2676521711896522, "grad_norm": 0.8151455311265557, "learning_rate": 8.590507948276321e-06, "loss": 0.4804, "step": 4273 }, { "epoch": 0.2677148091889945, "grad_norm": 0.8828129850619955, "learning_rate": 8.589801915516044e-06, "loss": 0.4077, "step": 4274 }, { "epoch": 0.2677774471883368, "grad_norm": 0.817616360555858, "learning_rate": 8.589095734998854e-06, "loss": 0.4694, "step": 4275 }, { "epoch": 0.2678400851876791, "grad_norm": 0.8435096321049345, "learning_rate": 8.58838940675382e-06, "loss": 0.4186, "step": 4276 }, { "epoch": 0.2679027231870214, "grad_norm": 0.8387899240095171, "learning_rate": 8.587682930810008e-06, "loss": 0.4403, "step": 4277 }, { "epoch": 0.2679653611863637, "grad_norm": 0.782298409475879, "learning_rate": 8.586976307196502e-06, "loss": 0.4058, "step": 4278 }, { "epoch": 0.268027999185706, "grad_norm": 0.7934555779287984, "learning_rate": 8.586269535942386e-06, "loss": 0.3946, "step": 4279 }, { "epoch": 0.26809063718504833, "grad_norm": 0.8843046923786375, "learning_rate": 8.58556261707675e-06, "loss": 0.3939, "step": 4280 }, { "epoch": 0.26815327518439064, "grad_norm": 0.8594399124159386, "learning_rate": 8.58485555062869e-06, "loss": 0.4462, "step": 4281 }, { "epoch": 0.2682159131837329, "grad_norm": 0.7481909940123735, "learning_rate": 8.584148336627313e-06, "loss": 0.3935, "step": 4282 }, { "epoch": 0.2682785511830752, "grad_norm": 0.7818775035875271, "learning_rate": 8.583440975101723e-06, "loss": 0.4318, "step": 4283 }, { "epoch": 0.2683411891824175, "grad_norm": 0.8331356523878437, "learning_rate": 8.582733466081038e-06, "loss": 0.408, "step": 4284 }, { "epoch": 0.2684038271817598, "grad_norm": 0.6840882925234901, "learning_rate": 8.58202580959438e-06, "loss": 0.4671, "step": 4285 }, { "epoch": 0.2684664651811021, "grad_norm": 0.8146926945505253, "learning_rate": 8.581318005670872e-06, "loss": 0.4431, "step": 4286 }, { "epoch": 0.26852910318044443, "grad_norm": 0.7863312949891271, "learning_rate": 8.580610054339652e-06, "loss": 0.4048, "step": 4287 }, { "epoch": 0.26859174117978674, "grad_norm": 0.7661518735744125, "learning_rate": 8.579901955629856e-06, "loss": 0.4769, "step": 4288 }, { "epoch": 0.26865437917912904, "grad_norm": 0.8225534850209659, "learning_rate": 8.579193709570632e-06, "loss": 0.4427, "step": 4289 }, { "epoch": 0.2687170171784713, "grad_norm": 0.8510424079985967, "learning_rate": 8.57848531619113e-06, "loss": 0.4117, "step": 4290 }, { "epoch": 0.2687796551778136, "grad_norm": 0.840495128593006, "learning_rate": 8.57777677552051e-06, "loss": 0.4434, "step": 4291 }, { "epoch": 0.2688422931771559, "grad_norm": 0.7760964976634867, "learning_rate": 8.577068087587932e-06, "loss": 0.4063, "step": 4292 }, { "epoch": 0.2689049311764982, "grad_norm": 0.8255561554435197, "learning_rate": 8.576359252422568e-06, "loss": 0.4187, "step": 4293 }, { "epoch": 0.2689675691758405, "grad_norm": 0.7838289068980341, "learning_rate": 8.575650270053594e-06, "loss": 0.4164, "step": 4294 }, { "epoch": 0.26903020717518283, "grad_norm": 0.8206667475366541, "learning_rate": 8.574941140510191e-06, "loss": 0.4065, "step": 4295 }, { "epoch": 0.26909284517452514, "grad_norm": 0.9284700589917293, "learning_rate": 8.574231863821547e-06, "loss": 0.4552, "step": 4296 }, { "epoch": 0.26915548317386745, "grad_norm": 0.8128868940368372, "learning_rate": 8.573522440016855e-06, "loss": 0.42, "step": 4297 }, { "epoch": 0.2692181211732097, "grad_norm": 0.8764933889525925, "learning_rate": 8.572812869125318e-06, "loss": 0.4264, "step": 4298 }, { "epoch": 0.269280759172552, "grad_norm": 0.8034501200882022, "learning_rate": 8.572103151176139e-06, "loss": 0.4045, "step": 4299 }, { "epoch": 0.2693433971718943, "grad_norm": 0.8572400171978521, "learning_rate": 8.571393286198531e-06, "loss": 0.4002, "step": 4300 }, { "epoch": 0.2694060351712366, "grad_norm": 0.8322150493540873, "learning_rate": 8.570683274221713e-06, "loss": 0.4319, "step": 4301 }, { "epoch": 0.26946867317057893, "grad_norm": 0.813650769811573, "learning_rate": 8.56997311527491e-06, "loss": 0.4309, "step": 4302 }, { "epoch": 0.26953131116992124, "grad_norm": 0.9154974454057713, "learning_rate": 8.56926280938735e-06, "loss": 0.4635, "step": 4303 }, { "epoch": 0.26959394916926355, "grad_norm": 0.8585675072638985, "learning_rate": 8.568552356588269e-06, "loss": 0.4108, "step": 4304 }, { "epoch": 0.26965658716860585, "grad_norm": 0.8506526083784324, "learning_rate": 8.567841756906912e-06, "loss": 0.401, "step": 4305 }, { "epoch": 0.26971922516794816, "grad_norm": 0.821973636106998, "learning_rate": 8.567131010372524e-06, "loss": 0.3991, "step": 4306 }, { "epoch": 0.2697818631672904, "grad_norm": 0.777753626596208, "learning_rate": 8.566420117014364e-06, "loss": 0.4811, "step": 4307 }, { "epoch": 0.2698445011666327, "grad_norm": 0.8377560596676171, "learning_rate": 8.56570907686169e-06, "loss": 0.4387, "step": 4308 }, { "epoch": 0.26990713916597503, "grad_norm": 0.9084314075619007, "learning_rate": 8.564997889943767e-06, "loss": 0.424, "step": 4309 }, { "epoch": 0.26996977716531734, "grad_norm": 0.8682674473268072, "learning_rate": 8.564286556289871e-06, "loss": 0.4583, "step": 4310 }, { "epoch": 0.27003241516465964, "grad_norm": 0.9389185251170087, "learning_rate": 8.563575075929277e-06, "loss": 0.4679, "step": 4311 }, { "epoch": 0.27009505316400195, "grad_norm": 0.9077161294919951, "learning_rate": 8.562863448891273e-06, "loss": 0.4757, "step": 4312 }, { "epoch": 0.27015769116334426, "grad_norm": 0.8157951583746036, "learning_rate": 8.562151675205146e-06, "loss": 0.391, "step": 4313 }, { "epoch": 0.27022032916268657, "grad_norm": 0.7605615425093023, "learning_rate": 8.561439754900197e-06, "loss": 0.4895, "step": 4314 }, { "epoch": 0.2702829671620288, "grad_norm": 0.7306773831770704, "learning_rate": 8.560727688005725e-06, "loss": 0.3574, "step": 4315 }, { "epoch": 0.2703456051613711, "grad_norm": 1.0359759112559586, "learning_rate": 8.560015474551043e-06, "loss": 0.4458, "step": 4316 }, { "epoch": 0.27040824316071344, "grad_norm": 0.9350998051228273, "learning_rate": 8.559303114565461e-06, "loss": 0.4096, "step": 4317 }, { "epoch": 0.27047088116005574, "grad_norm": 0.8381379279154557, "learning_rate": 8.558590608078303e-06, "loss": 0.4263, "step": 4318 }, { "epoch": 0.27053351915939805, "grad_norm": 0.8667188977784114, "learning_rate": 8.557877955118893e-06, "loss": 0.4056, "step": 4319 }, { "epoch": 0.27059615715874036, "grad_norm": 0.8054907254637244, "learning_rate": 8.55716515571657e-06, "loss": 0.4194, "step": 4320 }, { "epoch": 0.27065879515808267, "grad_norm": 0.8354465839055676, "learning_rate": 8.556452209900665e-06, "loss": 0.4545, "step": 4321 }, { "epoch": 0.270721433157425, "grad_norm": 0.847299501263117, "learning_rate": 8.55573911770053e-06, "loss": 0.4044, "step": 4322 }, { "epoch": 0.2707840711567672, "grad_norm": 0.8634912908905146, "learning_rate": 8.555025879145512e-06, "loss": 0.4315, "step": 4323 }, { "epoch": 0.27084670915610953, "grad_norm": 0.8481564083296793, "learning_rate": 8.554312494264967e-06, "loss": 0.4421, "step": 4324 }, { "epoch": 0.27090934715545184, "grad_norm": 0.8676621392203895, "learning_rate": 8.553598963088264e-06, "loss": 0.4408, "step": 4325 }, { "epoch": 0.27097198515479415, "grad_norm": 0.6702704056733135, "learning_rate": 8.552885285644765e-06, "loss": 0.476, "step": 4326 }, { "epoch": 0.27103462315413646, "grad_norm": 0.8449099361385716, "learning_rate": 8.552171461963848e-06, "loss": 0.4208, "step": 4327 }, { "epoch": 0.27109726115347876, "grad_norm": 0.8405828921672676, "learning_rate": 8.551457492074895e-06, "loss": 0.4379, "step": 4328 }, { "epoch": 0.27115989915282107, "grad_norm": 0.8835216768387961, "learning_rate": 8.550743376007294e-06, "loss": 0.4273, "step": 4329 }, { "epoch": 0.2712225371521634, "grad_norm": 0.8425296747669313, "learning_rate": 8.550029113790434e-06, "loss": 0.4455, "step": 4330 }, { "epoch": 0.2712851751515057, "grad_norm": 0.7903709113873222, "learning_rate": 8.549314705453719e-06, "loss": 0.4259, "step": 4331 }, { "epoch": 0.27134781315084794, "grad_norm": 0.8408432014966198, "learning_rate": 8.54860015102655e-06, "loss": 0.4344, "step": 4332 }, { "epoch": 0.27141045115019025, "grad_norm": 0.8601942577594287, "learning_rate": 8.547885450538341e-06, "loss": 0.4253, "step": 4333 }, { "epoch": 0.27147308914953255, "grad_norm": 0.6542106540432312, "learning_rate": 8.547170604018508e-06, "loss": 0.5088, "step": 4334 }, { "epoch": 0.27153572714887486, "grad_norm": 0.7996202370130091, "learning_rate": 8.546455611496475e-06, "loss": 0.4372, "step": 4335 }, { "epoch": 0.27159836514821717, "grad_norm": 0.8113937931660986, "learning_rate": 8.545740473001671e-06, "loss": 0.4221, "step": 4336 }, { "epoch": 0.2716610031475595, "grad_norm": 0.8228119278925813, "learning_rate": 8.54502518856353e-06, "loss": 0.4541, "step": 4337 }, { "epoch": 0.2717236411469018, "grad_norm": 0.8749710613965043, "learning_rate": 8.544309758211497e-06, "loss": 0.4176, "step": 4338 }, { "epoch": 0.2717862791462441, "grad_norm": 0.8425931249036832, "learning_rate": 8.543594181975014e-06, "loss": 0.4193, "step": 4339 }, { "epoch": 0.27184891714558634, "grad_norm": 0.8604076669791659, "learning_rate": 8.542878459883537e-06, "loss": 0.4277, "step": 4340 }, { "epoch": 0.27191155514492865, "grad_norm": 0.9924355153120403, "learning_rate": 8.542162591966524e-06, "loss": 0.4363, "step": 4341 }, { "epoch": 0.27197419314427096, "grad_norm": 0.801698149586279, "learning_rate": 8.541446578253445e-06, "loss": 0.4489, "step": 4342 }, { "epoch": 0.27203683114361327, "grad_norm": 0.8470267582297076, "learning_rate": 8.540730418773763e-06, "loss": 0.4162, "step": 4343 }, { "epoch": 0.2720994691429556, "grad_norm": 0.8675551316763114, "learning_rate": 8.540014113556963e-06, "loss": 0.4165, "step": 4344 }, { "epoch": 0.2721621071422979, "grad_norm": 0.8523372026275139, "learning_rate": 8.539297662632524e-06, "loss": 0.4177, "step": 4345 }, { "epoch": 0.2722247451416402, "grad_norm": 0.8986272731705139, "learning_rate": 8.538581066029936e-06, "loss": 0.476, "step": 4346 }, { "epoch": 0.2722873831409825, "grad_norm": 0.845832859040655, "learning_rate": 8.537864323778695e-06, "loss": 0.4103, "step": 4347 }, { "epoch": 0.2723500211403248, "grad_norm": 0.8368511526608505, "learning_rate": 8.537147435908302e-06, "loss": 0.412, "step": 4348 }, { "epoch": 0.27241265913966706, "grad_norm": 0.8677445490529927, "learning_rate": 8.536430402448263e-06, "loss": 0.4421, "step": 4349 }, { "epoch": 0.27247529713900936, "grad_norm": 0.8712912399145514, "learning_rate": 8.535713223428092e-06, "loss": 0.4143, "step": 4350 }, { "epoch": 0.2725379351383517, "grad_norm": 0.8166410335301322, "learning_rate": 8.53499589887731e-06, "loss": 0.3838, "step": 4351 }, { "epoch": 0.272600573137694, "grad_norm": 0.8270923877800821, "learning_rate": 8.53427842882544e-06, "loss": 0.4377, "step": 4352 }, { "epoch": 0.2726632111370363, "grad_norm": 0.8756766567626832, "learning_rate": 8.533560813302013e-06, "loss": 0.4156, "step": 4353 }, { "epoch": 0.2727258491363786, "grad_norm": 0.7702458007390299, "learning_rate": 8.532843052336568e-06, "loss": 0.4042, "step": 4354 }, { "epoch": 0.2727884871357209, "grad_norm": 0.8588557747002017, "learning_rate": 8.532125145958647e-06, "loss": 0.4581, "step": 4355 }, { "epoch": 0.2728511251350632, "grad_norm": 0.8411401291868301, "learning_rate": 8.531407094197799e-06, "loss": 0.4287, "step": 4356 }, { "epoch": 0.27291376313440546, "grad_norm": 0.8887189886078217, "learning_rate": 8.53068889708358e-06, "loss": 0.4666, "step": 4357 }, { "epoch": 0.27297640113374777, "grad_norm": 0.8206871667059457, "learning_rate": 8.52997055464555e-06, "loss": 0.4376, "step": 4358 }, { "epoch": 0.2730390391330901, "grad_norm": 0.8604326012472059, "learning_rate": 8.529252066913279e-06, "loss": 0.4421, "step": 4359 }, { "epoch": 0.2731016771324324, "grad_norm": 0.9176247479407834, "learning_rate": 8.528533433916336e-06, "loss": 0.4308, "step": 4360 }, { "epoch": 0.2731643151317747, "grad_norm": 0.9098869624190429, "learning_rate": 8.527814655684303e-06, "loss": 0.4484, "step": 4361 }, { "epoch": 0.273226953131117, "grad_norm": 0.9032944138012245, "learning_rate": 8.527095732246762e-06, "loss": 0.3962, "step": 4362 }, { "epoch": 0.2732895911304593, "grad_norm": 0.9182842058897489, "learning_rate": 8.526376663633307e-06, "loss": 0.4063, "step": 4363 }, { "epoch": 0.2733522291298016, "grad_norm": 0.8554223863822619, "learning_rate": 8.525657449873535e-06, "loss": 0.4356, "step": 4364 }, { "epoch": 0.27341486712914387, "grad_norm": 0.8639391123872765, "learning_rate": 8.524938090997047e-06, "loss": 0.426, "step": 4365 }, { "epoch": 0.2734775051284862, "grad_norm": 0.814369456907431, "learning_rate": 8.524218587033453e-06, "loss": 0.4272, "step": 4366 }, { "epoch": 0.2735401431278285, "grad_norm": 0.8372925464149109, "learning_rate": 8.523498938012367e-06, "loss": 0.4798, "step": 4367 }, { "epoch": 0.2736027811271708, "grad_norm": 0.846547176526304, "learning_rate": 8.522779143963412e-06, "loss": 0.4699, "step": 4368 }, { "epoch": 0.2736654191265131, "grad_norm": 0.7965750575918236, "learning_rate": 8.522059204916213e-06, "loss": 0.3849, "step": 4369 }, { "epoch": 0.2737280571258554, "grad_norm": 0.8503504189279292, "learning_rate": 8.521339120900405e-06, "loss": 0.4246, "step": 4370 }, { "epoch": 0.2737906951251977, "grad_norm": 0.7685159005936921, "learning_rate": 8.520618891945623e-06, "loss": 0.3789, "step": 4371 }, { "epoch": 0.27385333312454, "grad_norm": 0.8665474206021262, "learning_rate": 8.519898518081515e-06, "loss": 0.4717, "step": 4372 }, { "epoch": 0.27391597112388233, "grad_norm": 0.8492742315468849, "learning_rate": 8.51917799933773e-06, "loss": 0.4167, "step": 4373 }, { "epoch": 0.2739786091232246, "grad_norm": 0.8522639101759041, "learning_rate": 8.518457335743927e-06, "loss": 0.3968, "step": 4374 }, { "epoch": 0.2740412471225669, "grad_norm": 0.8428183661574936, "learning_rate": 8.517736527329765e-06, "loss": 0.4221, "step": 4375 }, { "epoch": 0.2741038851219092, "grad_norm": 0.8575996627981611, "learning_rate": 8.517015574124914e-06, "loss": 0.4415, "step": 4376 }, { "epoch": 0.2741665231212515, "grad_norm": 0.7683905985288342, "learning_rate": 8.516294476159052e-06, "loss": 0.4326, "step": 4377 }, { "epoch": 0.2742291611205938, "grad_norm": 0.8645290344362941, "learning_rate": 8.515573233461855e-06, "loss": 0.4501, "step": 4378 }, { "epoch": 0.2742917991199361, "grad_norm": 0.8297651751621385, "learning_rate": 8.514851846063011e-06, "loss": 0.4125, "step": 4379 }, { "epoch": 0.2743544371192784, "grad_norm": 0.8044575819377482, "learning_rate": 8.514130313992214e-06, "loss": 0.4359, "step": 4380 }, { "epoch": 0.27441707511862073, "grad_norm": 0.8776688291327135, "learning_rate": 8.513408637279159e-06, "loss": 0.4309, "step": 4381 }, { "epoch": 0.274479713117963, "grad_norm": 0.7939127340336007, "learning_rate": 8.512686815953554e-06, "loss": 0.4255, "step": 4382 }, { "epoch": 0.2745423511173053, "grad_norm": 0.939656281118891, "learning_rate": 8.511964850045107e-06, "loss": 0.4201, "step": 4383 }, { "epoch": 0.2746049891166476, "grad_norm": 0.8023818948856254, "learning_rate": 8.511242739583535e-06, "loss": 0.4085, "step": 4384 }, { "epoch": 0.2746676271159899, "grad_norm": 0.8990318968605082, "learning_rate": 8.510520484598561e-06, "loss": 0.4486, "step": 4385 }, { "epoch": 0.2747302651153322, "grad_norm": 0.8533641732611068, "learning_rate": 8.509798085119912e-06, "loss": 0.4532, "step": 4386 }, { "epoch": 0.2747929031146745, "grad_norm": 0.8775336643389041, "learning_rate": 8.50907554117732e-06, "loss": 0.4341, "step": 4387 }, { "epoch": 0.27485554111401683, "grad_norm": 0.8702741772126047, "learning_rate": 8.50835285280053e-06, "loss": 0.3998, "step": 4388 }, { "epoch": 0.27491817911335914, "grad_norm": 0.8360028879842281, "learning_rate": 8.507630020019285e-06, "loss": 0.416, "step": 4389 }, { "epoch": 0.27498081711270145, "grad_norm": 0.815306707388127, "learning_rate": 8.506907042863337e-06, "loss": 0.4368, "step": 4390 }, { "epoch": 0.2750434551120437, "grad_norm": 0.8293627627805774, "learning_rate": 8.506183921362443e-06, "loss": 0.4391, "step": 4391 }, { "epoch": 0.275106093111386, "grad_norm": 0.8234033116082576, "learning_rate": 8.50546065554637e-06, "loss": 0.4212, "step": 4392 }, { "epoch": 0.2751687311107283, "grad_norm": 0.840378318076946, "learning_rate": 8.504737245444886e-06, "loss": 0.4355, "step": 4393 }, { "epoch": 0.2752313691100706, "grad_norm": 0.8737379073319701, "learning_rate": 8.504013691087765e-06, "loss": 0.441, "step": 4394 }, { "epoch": 0.27529400710941293, "grad_norm": 0.8293156595283807, "learning_rate": 8.503289992504791e-06, "loss": 0.4029, "step": 4395 }, { "epoch": 0.27535664510875524, "grad_norm": 0.977572279285155, "learning_rate": 8.50256614972575e-06, "loss": 0.4598, "step": 4396 }, { "epoch": 0.27541928310809755, "grad_norm": 0.8373837333840282, "learning_rate": 8.501842162780436e-06, "loss": 0.4053, "step": 4397 }, { "epoch": 0.27548192110743985, "grad_norm": 0.8679219281525978, "learning_rate": 8.50111803169865e-06, "loss": 0.457, "step": 4398 }, { "epoch": 0.2755445591067821, "grad_norm": 0.8205877433387483, "learning_rate": 8.500393756510194e-06, "loss": 0.4356, "step": 4399 }, { "epoch": 0.2756071971061244, "grad_norm": 0.8245302570332514, "learning_rate": 8.499669337244882e-06, "loss": 0.4537, "step": 4400 }, { "epoch": 0.2756698351054667, "grad_norm": 0.8188661102198597, "learning_rate": 8.49894477393253e-06, "loss": 0.429, "step": 4401 }, { "epoch": 0.27573247310480903, "grad_norm": 0.8684908132316613, "learning_rate": 8.498220066602964e-06, "loss": 0.4405, "step": 4402 }, { "epoch": 0.27579511110415134, "grad_norm": 0.8070790575384066, "learning_rate": 8.49749521528601e-06, "loss": 0.4092, "step": 4403 }, { "epoch": 0.27585774910349364, "grad_norm": 0.8895869294935559, "learning_rate": 8.496770220011502e-06, "loss": 0.4182, "step": 4404 }, { "epoch": 0.27592038710283595, "grad_norm": 0.7461359356799369, "learning_rate": 8.496045080809284e-06, "loss": 0.4896, "step": 4405 }, { "epoch": 0.27598302510217826, "grad_norm": 0.831048856449058, "learning_rate": 8.4953197977092e-06, "loss": 0.4504, "step": 4406 }, { "epoch": 0.2760456631015205, "grad_norm": 0.7022598468574721, "learning_rate": 8.494594370741107e-06, "loss": 0.4747, "step": 4407 }, { "epoch": 0.2761083011008628, "grad_norm": 0.8795832228884478, "learning_rate": 8.493868799934859e-06, "loss": 0.4408, "step": 4408 }, { "epoch": 0.2761709391002051, "grad_norm": 0.8548274695444132, "learning_rate": 8.493143085320324e-06, "loss": 0.4545, "step": 4409 }, { "epoch": 0.27623357709954743, "grad_norm": 0.8786641747589563, "learning_rate": 8.49241722692737e-06, "loss": 0.4535, "step": 4410 }, { "epoch": 0.27629621509888974, "grad_norm": 0.7831456918959225, "learning_rate": 8.491691224785874e-06, "loss": 0.3915, "step": 4411 }, { "epoch": 0.27635885309823205, "grad_norm": 0.9432531018223964, "learning_rate": 8.490965078925722e-06, "loss": 0.451, "step": 4412 }, { "epoch": 0.27642149109757436, "grad_norm": 0.920706662145824, "learning_rate": 8.490238789376796e-06, "loss": 0.4811, "step": 4413 }, { "epoch": 0.27648412909691666, "grad_norm": 0.8885043802990165, "learning_rate": 8.489512356168996e-06, "loss": 0.4594, "step": 4414 }, { "epoch": 0.27654676709625897, "grad_norm": 0.7731624041193329, "learning_rate": 8.488785779332221e-06, "loss": 0.4038, "step": 4415 }, { "epoch": 0.2766094050956012, "grad_norm": 0.8097818054552902, "learning_rate": 8.488059058896373e-06, "loss": 0.4642, "step": 4416 }, { "epoch": 0.27667204309494353, "grad_norm": 0.838561427476391, "learning_rate": 8.487332194891366e-06, "loss": 0.4519, "step": 4417 }, { "epoch": 0.27673468109428584, "grad_norm": 0.812200447370102, "learning_rate": 8.486605187347121e-06, "loss": 0.395, "step": 4418 }, { "epoch": 0.27679731909362815, "grad_norm": 0.8798548414582801, "learning_rate": 8.48587803629356e-06, "loss": 0.417, "step": 4419 }, { "epoch": 0.27685995709297045, "grad_norm": 0.8759399408055073, "learning_rate": 8.48515074176061e-06, "loss": 0.3728, "step": 4420 }, { "epoch": 0.27692259509231276, "grad_norm": 0.8766383812321952, "learning_rate": 8.48442330377821e-06, "loss": 0.4177, "step": 4421 }, { "epoch": 0.27698523309165507, "grad_norm": 0.8286653667948707, "learning_rate": 8.483695722376299e-06, "loss": 0.4066, "step": 4422 }, { "epoch": 0.2770478710909974, "grad_norm": 0.8775012057870281, "learning_rate": 8.482967997584825e-06, "loss": 0.4178, "step": 4423 }, { "epoch": 0.27711050909033963, "grad_norm": 0.9551931461691585, "learning_rate": 8.482240129433744e-06, "loss": 0.4298, "step": 4424 }, { "epoch": 0.27717314708968194, "grad_norm": 0.8899308677773982, "learning_rate": 8.48151211795301e-06, "loss": 0.4145, "step": 4425 }, { "epoch": 0.27723578508902424, "grad_norm": 0.8532260368167534, "learning_rate": 8.480783963172593e-06, "loss": 0.423, "step": 4426 }, { "epoch": 0.27729842308836655, "grad_norm": 0.7866993886935454, "learning_rate": 8.480055665122461e-06, "loss": 0.4527, "step": 4427 }, { "epoch": 0.27736106108770886, "grad_norm": 0.8353013083996946, "learning_rate": 8.479327223832592e-06, "loss": 0.4508, "step": 4428 }, { "epoch": 0.27742369908705117, "grad_norm": 0.7985567995624065, "learning_rate": 8.478598639332969e-06, "loss": 0.3955, "step": 4429 }, { "epoch": 0.2774863370863935, "grad_norm": 0.8137606287561502, "learning_rate": 8.477869911653581e-06, "loss": 0.4036, "step": 4430 }, { "epoch": 0.2775489750857358, "grad_norm": 0.810642709844054, "learning_rate": 8.477141040824421e-06, "loss": 0.4415, "step": 4431 }, { "epoch": 0.27761161308507804, "grad_norm": 0.8845453142279435, "learning_rate": 8.476412026875492e-06, "loss": 0.4392, "step": 4432 }, { "epoch": 0.27767425108442034, "grad_norm": 0.7887707939256526, "learning_rate": 8.475682869836797e-06, "loss": 0.4017, "step": 4433 }, { "epoch": 0.27773688908376265, "grad_norm": 0.7840639384427776, "learning_rate": 8.47495356973835e-06, "loss": 0.3929, "step": 4434 }, { "epoch": 0.27779952708310496, "grad_norm": 0.8629140596461519, "learning_rate": 8.474224126610168e-06, "loss": 0.4296, "step": 4435 }, { "epoch": 0.27786216508244727, "grad_norm": 0.888438469013795, "learning_rate": 8.473494540482279e-06, "loss": 0.4391, "step": 4436 }, { "epoch": 0.2779248030817896, "grad_norm": 0.8029567280590286, "learning_rate": 8.472764811384708e-06, "loss": 0.3788, "step": 4437 }, { "epoch": 0.2779874410811319, "grad_norm": 0.835363335414453, "learning_rate": 8.472034939347493e-06, "loss": 0.4338, "step": 4438 }, { "epoch": 0.2780500790804742, "grad_norm": 0.9116309719211518, "learning_rate": 8.471304924400676e-06, "loss": 0.4928, "step": 4439 }, { "epoch": 0.2781127170798165, "grad_norm": 0.8157635537044745, "learning_rate": 8.470574766574304e-06, "loss": 0.4119, "step": 4440 }, { "epoch": 0.27817535507915875, "grad_norm": 0.8377107975290504, "learning_rate": 8.469844465898431e-06, "loss": 0.424, "step": 4441 }, { "epoch": 0.27823799307850106, "grad_norm": 0.7777005926653328, "learning_rate": 8.469114022403115e-06, "loss": 0.4105, "step": 4442 }, { "epoch": 0.27830063107784336, "grad_norm": 0.9189570360144972, "learning_rate": 8.468383436118422e-06, "loss": 0.4672, "step": 4443 }, { "epoch": 0.27836326907718567, "grad_norm": 0.8280378017251758, "learning_rate": 8.467652707074423e-06, "loss": 0.3892, "step": 4444 }, { "epoch": 0.278425907076528, "grad_norm": 0.8464483908806905, "learning_rate": 8.466921835301195e-06, "loss": 0.4058, "step": 4445 }, { "epoch": 0.2784885450758703, "grad_norm": 0.8802470678553086, "learning_rate": 8.46619082082882e-06, "loss": 0.3936, "step": 4446 }, { "epoch": 0.2785511830752126, "grad_norm": 0.8611961069396535, "learning_rate": 8.465459663687387e-06, "loss": 0.4565, "step": 4447 }, { "epoch": 0.2786138210745549, "grad_norm": 0.8034794121229686, "learning_rate": 8.464728363906993e-06, "loss": 0.373, "step": 4448 }, { "epoch": 0.27867645907389715, "grad_norm": 0.8293760677112963, "learning_rate": 8.463996921517736e-06, "loss": 0.4453, "step": 4449 }, { "epoch": 0.27873909707323946, "grad_norm": 0.8012125694525203, "learning_rate": 8.463265336549722e-06, "loss": 0.3807, "step": 4450 }, { "epoch": 0.27880173507258177, "grad_norm": 0.7833819368043001, "learning_rate": 8.462533609033065e-06, "loss": 0.3992, "step": 4451 }, { "epoch": 0.2788643730719241, "grad_norm": 0.9147642316197814, "learning_rate": 8.461801738997881e-06, "loss": 0.4197, "step": 4452 }, { "epoch": 0.2789270110712664, "grad_norm": 0.8851205384973309, "learning_rate": 8.461069726474295e-06, "loss": 0.4524, "step": 4453 }, { "epoch": 0.2789896490706087, "grad_norm": 0.8281814661081852, "learning_rate": 8.460337571492435e-06, "loss": 0.4089, "step": 4454 }, { "epoch": 0.279052287069951, "grad_norm": 0.9247996872372588, "learning_rate": 8.459605274082441e-06, "loss": 0.428, "step": 4455 }, { "epoch": 0.2791149250692933, "grad_norm": 0.8323314471910815, "learning_rate": 8.458872834274452e-06, "loss": 0.4143, "step": 4456 }, { "epoch": 0.2791775630686356, "grad_norm": 0.8292869878201337, "learning_rate": 8.458140252098615e-06, "loss": 0.4518, "step": 4457 }, { "epoch": 0.27924020106797787, "grad_norm": 0.8882208715215865, "learning_rate": 8.457407527585082e-06, "loss": 0.4191, "step": 4458 }, { "epoch": 0.2793028390673202, "grad_norm": 0.8370285298639575, "learning_rate": 8.456674660764013e-06, "loss": 0.4582, "step": 4459 }, { "epoch": 0.2793654770666625, "grad_norm": 0.8500355667324807, "learning_rate": 8.455941651665574e-06, "loss": 0.4384, "step": 4460 }, { "epoch": 0.2794281150660048, "grad_norm": 0.8469359387901619, "learning_rate": 8.455208500319936e-06, "loss": 0.4254, "step": 4461 }, { "epoch": 0.2794907530653471, "grad_norm": 0.8761183610078126, "learning_rate": 8.454475206757275e-06, "loss": 0.4426, "step": 4462 }, { "epoch": 0.2795533910646894, "grad_norm": 0.8475887025562969, "learning_rate": 8.453741771007772e-06, "loss": 0.4318, "step": 4463 }, { "epoch": 0.2796160290640317, "grad_norm": 0.8232020975370907, "learning_rate": 8.453008193101617e-06, "loss": 0.4072, "step": 4464 }, { "epoch": 0.279678667063374, "grad_norm": 0.8349937227732535, "learning_rate": 8.452274473069005e-06, "loss": 0.4048, "step": 4465 }, { "epoch": 0.2797413050627163, "grad_norm": 0.7430580599265124, "learning_rate": 8.451540610940133e-06, "loss": 0.4696, "step": 4466 }, { "epoch": 0.2798039430620586, "grad_norm": 0.8266480115293178, "learning_rate": 8.45080660674521e-06, "loss": 0.4142, "step": 4467 }, { "epoch": 0.2798665810614009, "grad_norm": 0.7747035461360926, "learning_rate": 8.450072460514445e-06, "loss": 0.4085, "step": 4468 }, { "epoch": 0.2799292190607432, "grad_norm": 0.9183722291204025, "learning_rate": 8.44933817227806e-06, "loss": 0.4534, "step": 4469 }, { "epoch": 0.2799918570600855, "grad_norm": 0.8430629031982132, "learning_rate": 8.448603742066272e-06, "loss": 0.4276, "step": 4470 }, { "epoch": 0.2800544950594278, "grad_norm": 0.8485708248659706, "learning_rate": 8.447869169909318e-06, "loss": 0.4206, "step": 4471 }, { "epoch": 0.2801171330587701, "grad_norm": 0.6542292607688736, "learning_rate": 8.447134455837427e-06, "loss": 0.4639, "step": 4472 }, { "epoch": 0.2801797710581124, "grad_norm": 0.8006173075995354, "learning_rate": 8.44639959988084e-06, "loss": 0.4152, "step": 4473 }, { "epoch": 0.2802424090574547, "grad_norm": 0.8577385149241405, "learning_rate": 8.445664602069809e-06, "loss": 0.3876, "step": 4474 }, { "epoch": 0.280305047056797, "grad_norm": 0.7699279835162418, "learning_rate": 8.444929462434582e-06, "loss": 0.4081, "step": 4475 }, { "epoch": 0.2803676850561393, "grad_norm": 0.8632558988862935, "learning_rate": 8.44419418100542e-06, "loss": 0.4525, "step": 4476 }, { "epoch": 0.2804303230554816, "grad_norm": 0.8174327990788561, "learning_rate": 8.443458757812583e-06, "loss": 0.439, "step": 4477 }, { "epoch": 0.2804929610548239, "grad_norm": 0.8390206924749255, "learning_rate": 8.442723192886349e-06, "loss": 0.4287, "step": 4478 }, { "epoch": 0.2805555990541662, "grad_norm": 0.8895683908965598, "learning_rate": 8.441987486256986e-06, "loss": 0.4231, "step": 4479 }, { "epoch": 0.2806182370535085, "grad_norm": 0.8422907238365351, "learning_rate": 8.441251637954781e-06, "loss": 0.4119, "step": 4480 }, { "epoch": 0.28068087505285083, "grad_norm": 0.8798004012355268, "learning_rate": 8.44051564801002e-06, "loss": 0.4271, "step": 4481 }, { "epoch": 0.28074351305219314, "grad_norm": 0.9162956688051097, "learning_rate": 8.439779516452994e-06, "loss": 0.4244, "step": 4482 }, { "epoch": 0.2808061510515354, "grad_norm": 0.9387709303606743, "learning_rate": 8.439043243314006e-06, "loss": 0.4411, "step": 4483 }, { "epoch": 0.2808687890508777, "grad_norm": 0.7791824355802911, "learning_rate": 8.43830682862336e-06, "loss": 0.5019, "step": 4484 }, { "epoch": 0.28093142705022, "grad_norm": 0.9065703295401388, "learning_rate": 8.437570272411368e-06, "loss": 0.4135, "step": 4485 }, { "epoch": 0.2809940650495623, "grad_norm": 0.7438404973777876, "learning_rate": 8.436833574708343e-06, "loss": 0.3841, "step": 4486 }, { "epoch": 0.2810567030489046, "grad_norm": 0.6638437396309617, "learning_rate": 8.436096735544612e-06, "loss": 0.4812, "step": 4487 }, { "epoch": 0.28111934104824693, "grad_norm": 0.9019747460027647, "learning_rate": 8.435359754950501e-06, "loss": 0.5011, "step": 4488 }, { "epoch": 0.28118197904758924, "grad_norm": 0.780817237652749, "learning_rate": 8.434622632956345e-06, "loss": 0.3743, "step": 4489 }, { "epoch": 0.28124461704693154, "grad_norm": 0.8588540210354703, "learning_rate": 8.433885369592482e-06, "loss": 0.4133, "step": 4490 }, { "epoch": 0.2813072550462738, "grad_norm": 0.8877931370258269, "learning_rate": 8.433147964889263e-06, "loss": 0.4449, "step": 4491 }, { "epoch": 0.2813698930456161, "grad_norm": 0.8869661196646803, "learning_rate": 8.432410418877035e-06, "loss": 0.4656, "step": 4492 }, { "epoch": 0.2814325310449584, "grad_norm": 0.8431416608472331, "learning_rate": 8.431672731586157e-06, "loss": 0.445, "step": 4493 }, { "epoch": 0.2814951690443007, "grad_norm": 0.8928545977082609, "learning_rate": 8.430934903046993e-06, "loss": 0.4332, "step": 4494 }, { "epoch": 0.281557807043643, "grad_norm": 0.861192931468011, "learning_rate": 8.43019693328991e-06, "loss": 0.4555, "step": 4495 }, { "epoch": 0.28162044504298533, "grad_norm": 0.8007904994052102, "learning_rate": 8.429458822345287e-06, "loss": 0.4468, "step": 4496 }, { "epoch": 0.28168308304232764, "grad_norm": 0.8636905088543667, "learning_rate": 8.428720570243501e-06, "loss": 0.51, "step": 4497 }, { "epoch": 0.28174572104166995, "grad_norm": 0.7882641011984228, "learning_rate": 8.427982177014941e-06, "loss": 0.3521, "step": 4498 }, { "epoch": 0.28180835904101226, "grad_norm": 0.88031674780118, "learning_rate": 8.427243642689997e-06, "loss": 0.4414, "step": 4499 }, { "epoch": 0.2818709970403545, "grad_norm": 0.8746543742715582, "learning_rate": 8.426504967299067e-06, "loss": 0.4238, "step": 4500 }, { "epoch": 0.2819336350396968, "grad_norm": 0.8183299525883578, "learning_rate": 8.42576615087256e-06, "loss": 0.4311, "step": 4501 }, { "epoch": 0.2819962730390391, "grad_norm": 0.7774276500562789, "learning_rate": 8.42502719344088e-06, "loss": 0.4006, "step": 4502 }, { "epoch": 0.28205891103838143, "grad_norm": 0.8730530687122681, "learning_rate": 8.424288095034446e-06, "loss": 0.4367, "step": 4503 }, { "epoch": 0.28212154903772374, "grad_norm": 0.7688278470786755, "learning_rate": 8.423548855683679e-06, "loss": 0.3729, "step": 4504 }, { "epoch": 0.28218418703706605, "grad_norm": 0.8177250654066277, "learning_rate": 8.422809475419005e-06, "loss": 0.4334, "step": 4505 }, { "epoch": 0.28224682503640836, "grad_norm": 0.845049820319112, "learning_rate": 8.422069954270858e-06, "loss": 0.4407, "step": 4506 }, { "epoch": 0.28230946303575066, "grad_norm": 0.9153848550415568, "learning_rate": 8.421330292269676e-06, "loss": 0.4231, "step": 4507 }, { "epoch": 0.2823721010350929, "grad_norm": 0.7320751359689285, "learning_rate": 8.420590489445903e-06, "loss": 0.3572, "step": 4508 }, { "epoch": 0.2824347390344352, "grad_norm": 0.8486031290236098, "learning_rate": 8.419850545829993e-06, "loss": 0.4245, "step": 4509 }, { "epoch": 0.28249737703377753, "grad_norm": 0.8186833846273959, "learning_rate": 8.419110461452397e-06, "loss": 0.4127, "step": 4510 }, { "epoch": 0.28256001503311984, "grad_norm": 0.8925574140126645, "learning_rate": 8.418370236343583e-06, "loss": 0.4614, "step": 4511 }, { "epoch": 0.28262265303246215, "grad_norm": 0.8245211518809294, "learning_rate": 8.417629870534013e-06, "loss": 0.4349, "step": 4512 }, { "epoch": 0.28268529103180445, "grad_norm": 0.9031647050548933, "learning_rate": 8.416889364054167e-06, "loss": 0.4491, "step": 4513 }, { "epoch": 0.28274792903114676, "grad_norm": 0.8462950603204374, "learning_rate": 8.416148716934516e-06, "loss": 0.3845, "step": 4514 }, { "epoch": 0.28281056703048907, "grad_norm": 0.8965035297364966, "learning_rate": 8.415407929205551e-06, "loss": 0.4057, "step": 4515 }, { "epoch": 0.2828732050298313, "grad_norm": 0.8325674788136882, "learning_rate": 8.414667000897762e-06, "loss": 0.4179, "step": 4516 }, { "epoch": 0.28293584302917363, "grad_norm": 0.8535993659281004, "learning_rate": 8.413925932041647e-06, "loss": 0.4373, "step": 4517 }, { "epoch": 0.28299848102851594, "grad_norm": 0.8220744629681055, "learning_rate": 8.413184722667705e-06, "loss": 0.4234, "step": 4518 }, { "epoch": 0.28306111902785824, "grad_norm": 0.8491761830580135, "learning_rate": 8.412443372806448e-06, "loss": 0.4098, "step": 4519 }, { "epoch": 0.28312375702720055, "grad_norm": 0.8116566590517976, "learning_rate": 8.411701882488385e-06, "loss": 0.4253, "step": 4520 }, { "epoch": 0.28318639502654286, "grad_norm": 0.9024882625815372, "learning_rate": 8.410960251744042e-06, "loss": 0.3905, "step": 4521 }, { "epoch": 0.28324903302588517, "grad_norm": 0.7699582910260983, "learning_rate": 8.410218480603942e-06, "loss": 0.3922, "step": 4522 }, { "epoch": 0.2833116710252275, "grad_norm": 0.8499652700254379, "learning_rate": 8.409476569098616e-06, "loss": 0.4968, "step": 4523 }, { "epoch": 0.2833743090245698, "grad_norm": 0.8327557432156951, "learning_rate": 8.4087345172586e-06, "loss": 0.407, "step": 4524 }, { "epoch": 0.28343694702391203, "grad_norm": 0.7259577120830983, "learning_rate": 8.407992325114442e-06, "loss": 0.4808, "step": 4525 }, { "epoch": 0.28349958502325434, "grad_norm": 0.8609384529022338, "learning_rate": 8.407249992696685e-06, "loss": 0.4313, "step": 4526 }, { "epoch": 0.28356222302259665, "grad_norm": 0.8055980159047935, "learning_rate": 8.406507520035887e-06, "loss": 0.4247, "step": 4527 }, { "epoch": 0.28362486102193896, "grad_norm": 0.9501564570764383, "learning_rate": 8.405764907162607e-06, "loss": 0.4494, "step": 4528 }, { "epoch": 0.28368749902128126, "grad_norm": 0.8406642545027252, "learning_rate": 8.40502215410741e-06, "loss": 0.4623, "step": 4529 }, { "epoch": 0.28375013702062357, "grad_norm": 0.7823595518914992, "learning_rate": 8.40427926090087e-06, "loss": 0.3841, "step": 4530 }, { "epoch": 0.2838127750199659, "grad_norm": 0.8055630711773765, "learning_rate": 8.403536227573564e-06, "loss": 0.4318, "step": 4531 }, { "epoch": 0.2838754130193082, "grad_norm": 0.7538077845481955, "learning_rate": 8.402793054156076e-06, "loss": 0.4678, "step": 4532 }, { "epoch": 0.28393805101865044, "grad_norm": 0.8627177219962536, "learning_rate": 8.402049740678992e-06, "loss": 0.4606, "step": 4533 }, { "epoch": 0.28400068901799275, "grad_norm": 0.8812955485177362, "learning_rate": 8.40130628717291e-06, "loss": 0.4377, "step": 4534 }, { "epoch": 0.28406332701733505, "grad_norm": 0.8985200832206445, "learning_rate": 8.40056269366843e-06, "loss": 0.4412, "step": 4535 }, { "epoch": 0.28412596501667736, "grad_norm": 0.8046156077725234, "learning_rate": 8.399818960196159e-06, "loss": 0.4286, "step": 4536 }, { "epoch": 0.28418860301601967, "grad_norm": 0.8968933332815846, "learning_rate": 8.399075086786708e-06, "loss": 0.4302, "step": 4537 }, { "epoch": 0.284251241015362, "grad_norm": 0.8975745534516543, "learning_rate": 8.398331073470696e-06, "loss": 0.4355, "step": 4538 }, { "epoch": 0.2843138790147043, "grad_norm": 0.8825580612614147, "learning_rate": 8.397586920278747e-06, "loss": 0.4311, "step": 4539 }, { "epoch": 0.2843765170140466, "grad_norm": 0.7985635382766482, "learning_rate": 8.396842627241488e-06, "loss": 0.4364, "step": 4540 }, { "epoch": 0.28443915501338884, "grad_norm": 0.8404366891398226, "learning_rate": 8.396098194389556e-06, "loss": 0.4093, "step": 4541 }, { "epoch": 0.28450179301273115, "grad_norm": 0.8410694777437387, "learning_rate": 8.395353621753592e-06, "loss": 0.3949, "step": 4542 }, { "epoch": 0.28456443101207346, "grad_norm": 0.8213176287800736, "learning_rate": 8.394608909364243e-06, "loss": 0.4171, "step": 4543 }, { "epoch": 0.28462706901141577, "grad_norm": 0.8918335480504039, "learning_rate": 8.393864057252162e-06, "loss": 0.3983, "step": 4544 }, { "epoch": 0.2846897070107581, "grad_norm": 0.6608846592114112, "learning_rate": 8.393119065448008e-06, "loss": 0.4887, "step": 4545 }, { "epoch": 0.2847523450101004, "grad_norm": 0.7979826588457201, "learning_rate": 8.39237393398244e-06, "loss": 0.4014, "step": 4546 }, { "epoch": 0.2848149830094427, "grad_norm": 0.8366003094445484, "learning_rate": 8.391628662886134e-06, "loss": 0.4426, "step": 4547 }, { "epoch": 0.284877621008785, "grad_norm": 0.860255340205257, "learning_rate": 8.39088325218976e-06, "loss": 0.4476, "step": 4548 }, { "epoch": 0.2849402590081273, "grad_norm": 0.781542890893512, "learning_rate": 8.390137701924003e-06, "loss": 0.4114, "step": 4549 }, { "epoch": 0.28500289700746956, "grad_norm": 0.8309438586306174, "learning_rate": 8.38939201211955e-06, "loss": 0.4511, "step": 4550 }, { "epoch": 0.28506553500681187, "grad_norm": 0.9276841823885593, "learning_rate": 8.388646182807092e-06, "loss": 0.4488, "step": 4551 }, { "epoch": 0.2851281730061542, "grad_norm": 0.8469701553182943, "learning_rate": 8.387900214017328e-06, "loss": 0.4422, "step": 4552 }, { "epoch": 0.2851908110054965, "grad_norm": 0.8474472166244699, "learning_rate": 8.387154105780963e-06, "loss": 0.4242, "step": 4553 }, { "epoch": 0.2852534490048388, "grad_norm": 0.7861425166930543, "learning_rate": 8.386407858128707e-06, "loss": 0.4261, "step": 4554 }, { "epoch": 0.2853160870041811, "grad_norm": 0.8468881876423646, "learning_rate": 8.385661471091273e-06, "loss": 0.4328, "step": 4555 }, { "epoch": 0.2853787250035234, "grad_norm": 0.8775769448572558, "learning_rate": 8.384914944699385e-06, "loss": 0.4693, "step": 4556 }, { "epoch": 0.2854413630028657, "grad_norm": 0.7478369485962452, "learning_rate": 8.384168278983772e-06, "loss": 0.3636, "step": 4557 }, { "epoch": 0.28550400100220796, "grad_norm": 0.6479622556458028, "learning_rate": 8.383421473975162e-06, "loss": 0.4846, "step": 4558 }, { "epoch": 0.28556663900155027, "grad_norm": 0.6921687063237609, "learning_rate": 8.382674529704296e-06, "loss": 0.4908, "step": 4559 }, { "epoch": 0.2856292770008926, "grad_norm": 0.8045103021598321, "learning_rate": 8.381927446201919e-06, "loss": 0.45, "step": 4560 }, { "epoch": 0.2856919150002349, "grad_norm": 0.792991574508108, "learning_rate": 8.38118022349878e-06, "loss": 0.3953, "step": 4561 }, { "epoch": 0.2857545529995772, "grad_norm": 0.860182308584712, "learning_rate": 8.380432861625637e-06, "loss": 0.425, "step": 4562 }, { "epoch": 0.2858171909989195, "grad_norm": 0.8894312609468864, "learning_rate": 8.379685360613248e-06, "loss": 0.463, "step": 4563 }, { "epoch": 0.2858798289982618, "grad_norm": 0.7935107035960371, "learning_rate": 8.378937720492384e-06, "loss": 0.4184, "step": 4564 }, { "epoch": 0.2859424669976041, "grad_norm": 0.9280838626678545, "learning_rate": 8.378189941293815e-06, "loss": 0.4164, "step": 4565 }, { "epoch": 0.2860051049969464, "grad_norm": 0.686159480813328, "learning_rate": 8.377442023048323e-06, "loss": 0.4928, "step": 4566 }, { "epoch": 0.2860677429962887, "grad_norm": 0.8358448000674422, "learning_rate": 8.376693965786688e-06, "loss": 0.4346, "step": 4567 }, { "epoch": 0.286130380995631, "grad_norm": 0.8292946810615583, "learning_rate": 8.375945769539704e-06, "loss": 0.4203, "step": 4568 }, { "epoch": 0.2861930189949733, "grad_norm": 0.8528568944769392, "learning_rate": 8.375197434338164e-06, "loss": 0.4726, "step": 4569 }, { "epoch": 0.2862556569943156, "grad_norm": 0.8729948069040444, "learning_rate": 8.374448960212873e-06, "loss": 0.4689, "step": 4570 }, { "epoch": 0.2863182949936579, "grad_norm": 0.6992213001896993, "learning_rate": 8.373700347194637e-06, "loss": 0.4695, "step": 4571 }, { "epoch": 0.2863809329930002, "grad_norm": 0.8805033902749299, "learning_rate": 8.372951595314268e-06, "loss": 0.4218, "step": 4572 }, { "epoch": 0.2864435709923425, "grad_norm": 0.8194417206296722, "learning_rate": 8.372202704602585e-06, "loss": 0.3927, "step": 4573 }, { "epoch": 0.28650620899168483, "grad_norm": 0.8556483700079822, "learning_rate": 8.371453675090411e-06, "loss": 0.4244, "step": 4574 }, { "epoch": 0.2865688469910271, "grad_norm": 0.8108283602021576, "learning_rate": 8.37070450680858e-06, "loss": 0.4307, "step": 4575 }, { "epoch": 0.2866314849903694, "grad_norm": 0.8208834526299716, "learning_rate": 8.369955199787925e-06, "loss": 0.4067, "step": 4576 }, { "epoch": 0.2866941229897117, "grad_norm": 0.8082671786589473, "learning_rate": 8.369205754059289e-06, "loss": 0.4583, "step": 4577 }, { "epoch": 0.286756760989054, "grad_norm": 0.8377806156967093, "learning_rate": 8.368456169653519e-06, "loss": 0.4429, "step": 4578 }, { "epoch": 0.2868193989883963, "grad_norm": 0.7605655215949123, "learning_rate": 8.367706446601466e-06, "loss": 0.3964, "step": 4579 }, { "epoch": 0.2868820369877386, "grad_norm": 0.8666580154759712, "learning_rate": 8.366956584933992e-06, "loss": 0.4352, "step": 4580 }, { "epoch": 0.2869446749870809, "grad_norm": 0.8260359629848951, "learning_rate": 8.36620658468196e-06, "loss": 0.3861, "step": 4581 }, { "epoch": 0.28700731298642324, "grad_norm": 0.8292055298147408, "learning_rate": 8.36545644587624e-06, "loss": 0.3986, "step": 4582 }, { "epoch": 0.2870699509857655, "grad_norm": 0.7950863570016041, "learning_rate": 8.36470616854771e-06, "loss": 0.4094, "step": 4583 }, { "epoch": 0.2871325889851078, "grad_norm": 0.8432318095562448, "learning_rate": 8.363955752727247e-06, "loss": 0.4512, "step": 4584 }, { "epoch": 0.2871952269844501, "grad_norm": 0.8510153136061384, "learning_rate": 8.36320519844574e-06, "loss": 0.3953, "step": 4585 }, { "epoch": 0.2872578649837924, "grad_norm": 0.8923641748955161, "learning_rate": 8.362454505734085e-06, "loss": 0.4168, "step": 4586 }, { "epoch": 0.2873205029831347, "grad_norm": 0.9021367066478626, "learning_rate": 8.361703674623179e-06, "loss": 0.4198, "step": 4587 }, { "epoch": 0.287383140982477, "grad_norm": 0.8183372061811466, "learning_rate": 8.360952705143924e-06, "loss": 0.4085, "step": 4588 }, { "epoch": 0.28744577898181933, "grad_norm": 0.6517471790667465, "learning_rate": 8.360201597327234e-06, "loss": 0.4901, "step": 4589 }, { "epoch": 0.28750841698116164, "grad_norm": 0.8552492816403398, "learning_rate": 8.35945035120402e-06, "loss": 0.4677, "step": 4590 }, { "epoch": 0.28757105498050395, "grad_norm": 0.7936449470875007, "learning_rate": 8.358698966805208e-06, "loss": 0.4349, "step": 4591 }, { "epoch": 0.2876336929798462, "grad_norm": 0.721881400775441, "learning_rate": 8.35794744416172e-06, "loss": 0.4736, "step": 4592 }, { "epoch": 0.2876963309791885, "grad_norm": 0.9148032123242769, "learning_rate": 8.357195783304495e-06, "loss": 0.4317, "step": 4593 }, { "epoch": 0.2877589689785308, "grad_norm": 0.7878970544853151, "learning_rate": 8.356443984264468e-06, "loss": 0.4082, "step": 4594 }, { "epoch": 0.2878216069778731, "grad_norm": 0.7997682581998039, "learning_rate": 8.355692047072584e-06, "loss": 0.4329, "step": 4595 }, { "epoch": 0.28788424497721543, "grad_norm": 0.8090189333634814, "learning_rate": 8.35493997175979e-06, "loss": 0.4536, "step": 4596 }, { "epoch": 0.28794688297655774, "grad_norm": 0.8312142684419676, "learning_rate": 8.354187758357046e-06, "loss": 0.4694, "step": 4597 }, { "epoch": 0.28800952097590005, "grad_norm": 0.8739342088039931, "learning_rate": 8.353435406895312e-06, "loss": 0.4078, "step": 4598 }, { "epoch": 0.28807215897524235, "grad_norm": 0.8363434223357454, "learning_rate": 8.352682917405552e-06, "loss": 0.4718, "step": 4599 }, { "epoch": 0.2881347969745846, "grad_norm": 0.8176272164661651, "learning_rate": 8.351930289918743e-06, "loss": 0.396, "step": 4600 }, { "epoch": 0.2881974349739269, "grad_norm": 0.7513051720032988, "learning_rate": 8.35117752446586e-06, "loss": 0.3926, "step": 4601 }, { "epoch": 0.2882600729732692, "grad_norm": 0.8482313169861686, "learning_rate": 8.350424621077889e-06, "loss": 0.4136, "step": 4602 }, { "epoch": 0.28832271097261153, "grad_norm": 0.7760307598353499, "learning_rate": 8.349671579785817e-06, "loss": 0.3968, "step": 4603 }, { "epoch": 0.28838534897195384, "grad_norm": 0.8535403679621641, "learning_rate": 8.348918400620643e-06, "loss": 0.3908, "step": 4604 }, { "epoch": 0.28844798697129614, "grad_norm": 0.7603145667110754, "learning_rate": 8.348165083613365e-06, "loss": 0.3862, "step": 4605 }, { "epoch": 0.28851062497063845, "grad_norm": 0.7195484834137589, "learning_rate": 8.34741162879499e-06, "loss": 0.4711, "step": 4606 }, { "epoch": 0.28857326296998076, "grad_norm": 0.8102594974796218, "learning_rate": 8.346658036196533e-06, "loss": 0.3762, "step": 4607 }, { "epoch": 0.28863590096932307, "grad_norm": 0.7465374118885104, "learning_rate": 8.345904305849008e-06, "loss": 0.4322, "step": 4608 }, { "epoch": 0.2886985389686653, "grad_norm": 0.807764072812133, "learning_rate": 8.345150437783442e-06, "loss": 0.4202, "step": 4609 }, { "epoch": 0.2887611769680076, "grad_norm": 0.818110504961382, "learning_rate": 8.344396432030864e-06, "loss": 0.4221, "step": 4610 }, { "epoch": 0.28882381496734993, "grad_norm": 0.8487899842265424, "learning_rate": 8.343642288622305e-06, "loss": 0.4459, "step": 4611 }, { "epoch": 0.28888645296669224, "grad_norm": 0.8498942389786289, "learning_rate": 8.34288800758881e-06, "loss": 0.4093, "step": 4612 }, { "epoch": 0.28894909096603455, "grad_norm": 0.6768746653230164, "learning_rate": 8.342133588961425e-06, "loss": 0.4939, "step": 4613 }, { "epoch": 0.28901172896537686, "grad_norm": 0.7655078484042608, "learning_rate": 8.341379032771202e-06, "loss": 0.3852, "step": 4614 }, { "epoch": 0.28907436696471916, "grad_norm": 0.7983857594247828, "learning_rate": 8.340624339049196e-06, "loss": 0.3658, "step": 4615 }, { "epoch": 0.2891370049640615, "grad_norm": 0.8465241297815904, "learning_rate": 8.339869507826474e-06, "loss": 0.4305, "step": 4616 }, { "epoch": 0.2891996429634037, "grad_norm": 0.9291578381833316, "learning_rate": 8.3391145391341e-06, "loss": 0.4183, "step": 4617 }, { "epoch": 0.28926228096274603, "grad_norm": 0.8674285395494015, "learning_rate": 8.338359433003155e-06, "loss": 0.4394, "step": 4618 }, { "epoch": 0.28932491896208834, "grad_norm": 0.7872338508372421, "learning_rate": 8.337604189464716e-06, "loss": 0.3858, "step": 4619 }, { "epoch": 0.28938755696143065, "grad_norm": 0.8445375094790136, "learning_rate": 8.336848808549866e-06, "loss": 0.4395, "step": 4620 }, { "epoch": 0.28945019496077296, "grad_norm": 0.7471784282009074, "learning_rate": 8.336093290289703e-06, "loss": 0.3975, "step": 4621 }, { "epoch": 0.28951283296011526, "grad_norm": 0.8807929869245109, "learning_rate": 8.33533763471532e-06, "loss": 0.4162, "step": 4622 }, { "epoch": 0.28957547095945757, "grad_norm": 0.7415396553270062, "learning_rate": 8.334581841857821e-06, "loss": 0.4722, "step": 4623 }, { "epoch": 0.2896381089587999, "grad_norm": 0.6978168918434557, "learning_rate": 8.333825911748314e-06, "loss": 0.4656, "step": 4624 }, { "epoch": 0.28970074695814213, "grad_norm": 0.8031343390110353, "learning_rate": 8.333069844417914e-06, "loss": 0.3742, "step": 4625 }, { "epoch": 0.28976338495748444, "grad_norm": 0.8204884685893606, "learning_rate": 8.33231363989774e-06, "loss": 0.3975, "step": 4626 }, { "epoch": 0.28982602295682675, "grad_norm": 0.9092791041395636, "learning_rate": 8.331557298218917e-06, "loss": 0.4395, "step": 4627 }, { "epoch": 0.28988866095616905, "grad_norm": 0.8917595959460629, "learning_rate": 8.33080081941258e-06, "loss": 0.3937, "step": 4628 }, { "epoch": 0.28995129895551136, "grad_norm": 0.8469314222348321, "learning_rate": 8.330044203509862e-06, "loss": 0.4239, "step": 4629 }, { "epoch": 0.29001393695485367, "grad_norm": 0.866630860024013, "learning_rate": 8.329287450541905e-06, "loss": 0.4748, "step": 4630 }, { "epoch": 0.290076574954196, "grad_norm": 0.8391019617923429, "learning_rate": 8.32853056053986e-06, "loss": 0.3986, "step": 4631 }, { "epoch": 0.2901392129535383, "grad_norm": 0.9204742856913263, "learning_rate": 8.327773533534879e-06, "loss": 0.4072, "step": 4632 }, { "epoch": 0.2902018509528806, "grad_norm": 0.807241427691445, "learning_rate": 8.327016369558122e-06, "loss": 0.3906, "step": 4633 }, { "epoch": 0.29026448895222284, "grad_norm": 0.8805439644033244, "learning_rate": 8.326259068640755e-06, "loss": 0.4293, "step": 4634 }, { "epoch": 0.29032712695156515, "grad_norm": 0.9439031884767, "learning_rate": 8.325501630813945e-06, "loss": 0.4415, "step": 4635 }, { "epoch": 0.29038976495090746, "grad_norm": 0.8125735544480082, "learning_rate": 8.32474405610887e-06, "loss": 0.3933, "step": 4636 }, { "epoch": 0.29045240295024977, "grad_norm": 0.9493186220113874, "learning_rate": 8.323986344556716e-06, "loss": 0.4722, "step": 4637 }, { "epoch": 0.2905150409495921, "grad_norm": 0.8432451575107256, "learning_rate": 8.323228496188665e-06, "loss": 0.4272, "step": 4638 }, { "epoch": 0.2905776789489344, "grad_norm": 0.8508921751247125, "learning_rate": 8.322470511035912e-06, "loss": 0.4272, "step": 4639 }, { "epoch": 0.2906403169482767, "grad_norm": 0.8654976020577734, "learning_rate": 8.321712389129656e-06, "loss": 0.4622, "step": 4640 }, { "epoch": 0.290702954947619, "grad_norm": 0.8449308164063765, "learning_rate": 8.320954130501102e-06, "loss": 0.4431, "step": 4641 }, { "epoch": 0.29076559294696125, "grad_norm": 0.7752847781656992, "learning_rate": 8.32019573518146e-06, "loss": 0.4149, "step": 4642 }, { "epoch": 0.29082823094630356, "grad_norm": 0.807125065309348, "learning_rate": 8.319437203201944e-06, "loss": 0.4405, "step": 4643 }, { "epoch": 0.29089086894564586, "grad_norm": 0.849890624402011, "learning_rate": 8.318678534593778e-06, "loss": 0.4341, "step": 4644 }, { "epoch": 0.29095350694498817, "grad_norm": 0.7779799131768563, "learning_rate": 8.317919729388186e-06, "loss": 0.3962, "step": 4645 }, { "epoch": 0.2910161449443305, "grad_norm": 0.7505664193244607, "learning_rate": 8.317160787616404e-06, "loss": 0.3901, "step": 4646 }, { "epoch": 0.2910787829436728, "grad_norm": 0.8882407394526716, "learning_rate": 8.316401709309667e-06, "loss": 0.4194, "step": 4647 }, { "epoch": 0.2911414209430151, "grad_norm": 0.8305383810416257, "learning_rate": 8.31564249449922e-06, "loss": 0.4436, "step": 4648 }, { "epoch": 0.2912040589423574, "grad_norm": 0.7736816748939043, "learning_rate": 8.314883143216315e-06, "loss": 0.3901, "step": 4649 }, { "epoch": 0.2912666969416997, "grad_norm": 0.8459933326178389, "learning_rate": 8.314123655492201e-06, "loss": 0.4564, "step": 4650 }, { "epoch": 0.29132933494104196, "grad_norm": 0.8146871945820008, "learning_rate": 8.313364031358145e-06, "loss": 0.4009, "step": 4651 }, { "epoch": 0.29139197294038427, "grad_norm": 0.807431689432583, "learning_rate": 8.312604270845409e-06, "loss": 0.4097, "step": 4652 }, { "epoch": 0.2914546109397266, "grad_norm": 0.7491819353441986, "learning_rate": 8.311844373985266e-06, "loss": 0.4853, "step": 4653 }, { "epoch": 0.2915172489390689, "grad_norm": 0.7732419158434022, "learning_rate": 8.311084340808996e-06, "loss": 0.4007, "step": 4654 }, { "epoch": 0.2915798869384112, "grad_norm": 0.8060414413580401, "learning_rate": 8.310324171347877e-06, "loss": 0.4361, "step": 4655 }, { "epoch": 0.2916425249377535, "grad_norm": 0.8482817240676441, "learning_rate": 8.309563865633203e-06, "loss": 0.4002, "step": 4656 }, { "epoch": 0.2917051629370958, "grad_norm": 0.8658207788068841, "learning_rate": 8.308803423696264e-06, "loss": 0.4131, "step": 4657 }, { "epoch": 0.2917678009364381, "grad_norm": 0.8559303334419806, "learning_rate": 8.308042845568362e-06, "loss": 0.4423, "step": 4658 }, { "epoch": 0.29183043893578037, "grad_norm": 0.8064858303171292, "learning_rate": 8.307282131280805e-06, "loss": 0.4019, "step": 4659 }, { "epoch": 0.2918930769351227, "grad_norm": 0.8423262112215374, "learning_rate": 8.3065212808649e-06, "loss": 0.4121, "step": 4660 }, { "epoch": 0.291955714934465, "grad_norm": 0.8263361933712459, "learning_rate": 8.305760294351965e-06, "loss": 0.4341, "step": 4661 }, { "epoch": 0.2920183529338073, "grad_norm": 0.786307338474322, "learning_rate": 8.304999171773323e-06, "loss": 0.3891, "step": 4662 }, { "epoch": 0.2920809909331496, "grad_norm": 0.8363507872715031, "learning_rate": 8.304237913160302e-06, "loss": 0.4638, "step": 4663 }, { "epoch": 0.2921436289324919, "grad_norm": 0.7653592210398874, "learning_rate": 8.303476518544236e-06, "loss": 0.4136, "step": 4664 }, { "epoch": 0.2922062669318342, "grad_norm": 0.7227407131607244, "learning_rate": 8.302714987956462e-06, "loss": 0.4743, "step": 4665 }, { "epoch": 0.2922689049311765, "grad_norm": 0.8068256291290727, "learning_rate": 8.301953321428327e-06, "loss": 0.441, "step": 4666 }, { "epoch": 0.2923315429305188, "grad_norm": 0.7978319766235816, "learning_rate": 8.30119151899118e-06, "loss": 0.4235, "step": 4667 }, { "epoch": 0.2923941809298611, "grad_norm": 0.8558393692471259, "learning_rate": 8.300429580676376e-06, "loss": 0.4245, "step": 4668 }, { "epoch": 0.2924568189292034, "grad_norm": 0.840825423358506, "learning_rate": 8.299667506515281e-06, "loss": 0.4431, "step": 4669 }, { "epoch": 0.2925194569285457, "grad_norm": 0.8216079466630785, "learning_rate": 8.298905296539257e-06, "loss": 0.4201, "step": 4670 }, { "epoch": 0.292582094927888, "grad_norm": 0.8278957404850623, "learning_rate": 8.298142950779678e-06, "loss": 0.4061, "step": 4671 }, { "epoch": 0.2926447329272303, "grad_norm": 0.8750995530321125, "learning_rate": 8.297380469267923e-06, "loss": 0.4625, "step": 4672 }, { "epoch": 0.2927073709265726, "grad_norm": 0.8240331402054177, "learning_rate": 8.296617852035377e-06, "loss": 0.4369, "step": 4673 }, { "epoch": 0.2927700089259149, "grad_norm": 0.7134190594652738, "learning_rate": 8.295855099113427e-06, "loss": 0.4962, "step": 4674 }, { "epoch": 0.29283264692525723, "grad_norm": 0.8215323597152484, "learning_rate": 8.29509221053347e-06, "loss": 0.4402, "step": 4675 }, { "epoch": 0.2928952849245995, "grad_norm": 0.8279998448240088, "learning_rate": 8.294329186326908e-06, "loss": 0.4236, "step": 4676 }, { "epoch": 0.2929579229239418, "grad_norm": 0.8108895627034814, "learning_rate": 8.293566026525143e-06, "loss": 0.4094, "step": 4677 }, { "epoch": 0.2930205609232841, "grad_norm": 0.9618262861236937, "learning_rate": 8.29280273115959e-06, "loss": 0.4538, "step": 4678 }, { "epoch": 0.2930831989226264, "grad_norm": 0.726429719904748, "learning_rate": 8.292039300261667e-06, "loss": 0.4698, "step": 4679 }, { "epoch": 0.2931458369219687, "grad_norm": 0.9899008468987311, "learning_rate": 8.29127573386279e-06, "loss": 0.4352, "step": 4680 }, { "epoch": 0.293208474921311, "grad_norm": 0.8283517665505474, "learning_rate": 8.290512031994397e-06, "loss": 0.3612, "step": 4681 }, { "epoch": 0.29327111292065333, "grad_norm": 0.8898500891696635, "learning_rate": 8.289748194687918e-06, "loss": 0.444, "step": 4682 }, { "epoch": 0.29333375091999564, "grad_norm": 0.7188795302486999, "learning_rate": 8.288984221974793e-06, "loss": 0.4861, "step": 4683 }, { "epoch": 0.2933963889193379, "grad_norm": 0.8542828278885054, "learning_rate": 8.288220113886464e-06, "loss": 0.4173, "step": 4684 }, { "epoch": 0.2934590269186802, "grad_norm": 0.8840187199370168, "learning_rate": 8.28745587045439e-06, "loss": 0.4248, "step": 4685 }, { "epoch": 0.2935216649180225, "grad_norm": 0.8203772733907794, "learning_rate": 8.286691491710017e-06, "loss": 0.4017, "step": 4686 }, { "epoch": 0.2935843029173648, "grad_norm": 0.9187487962232189, "learning_rate": 8.285926977684816e-06, "loss": 0.4378, "step": 4687 }, { "epoch": 0.2936469409167071, "grad_norm": 0.839259553189397, "learning_rate": 8.285162328410248e-06, "loss": 0.3888, "step": 4688 }, { "epoch": 0.29370957891604943, "grad_norm": 0.759083196443782, "learning_rate": 8.284397543917789e-06, "loss": 0.3764, "step": 4689 }, { "epoch": 0.29377221691539174, "grad_norm": 0.8006321331309094, "learning_rate": 8.283632624238917e-06, "loss": 0.4146, "step": 4690 }, { "epoch": 0.29383485491473404, "grad_norm": 0.8198037996735498, "learning_rate": 8.28286756940512e-06, "loss": 0.46, "step": 4691 }, { "epoch": 0.2938974929140763, "grad_norm": 0.8080281907894534, "learning_rate": 8.282102379447882e-06, "loss": 0.3773, "step": 4692 }, { "epoch": 0.2939601309134186, "grad_norm": 0.8413652170957195, "learning_rate": 8.281337054398702e-06, "loss": 0.4121, "step": 4693 }, { "epoch": 0.2940227689127609, "grad_norm": 0.8164615798272002, "learning_rate": 8.280571594289077e-06, "loss": 0.4598, "step": 4694 }, { "epoch": 0.2940854069121032, "grad_norm": 0.8612456261550063, "learning_rate": 8.279805999150519e-06, "loss": 0.4074, "step": 4695 }, { "epoch": 0.2941480449114455, "grad_norm": 0.7967845340924328, "learning_rate": 8.279040269014538e-06, "loss": 0.4116, "step": 4696 }, { "epoch": 0.29421068291078784, "grad_norm": 0.8227985067808806, "learning_rate": 8.278274403912649e-06, "loss": 0.3976, "step": 4697 }, { "epoch": 0.29427332091013014, "grad_norm": 0.821493857872847, "learning_rate": 8.277508403876377e-06, "loss": 0.4118, "step": 4698 }, { "epoch": 0.29433595890947245, "grad_norm": 0.8898107008529212, "learning_rate": 8.276742268937252e-06, "loss": 0.4342, "step": 4699 }, { "epoch": 0.29439859690881476, "grad_norm": 0.7941023499816265, "learning_rate": 8.275975999126807e-06, "loss": 0.3992, "step": 4700 }, { "epoch": 0.294461234908157, "grad_norm": 0.8298192077333765, "learning_rate": 8.27520959447658e-06, "loss": 0.4223, "step": 4701 }, { "epoch": 0.2945238729074993, "grad_norm": 0.8885386678950024, "learning_rate": 8.27444305501812e-06, "loss": 0.4351, "step": 4702 }, { "epoch": 0.2945865109068416, "grad_norm": 0.8704685783743942, "learning_rate": 8.273676380782975e-06, "loss": 0.4113, "step": 4703 }, { "epoch": 0.29464914890618393, "grad_norm": 0.668126025116446, "learning_rate": 8.272909571802703e-06, "loss": 0.4876, "step": 4704 }, { "epoch": 0.29471178690552624, "grad_norm": 0.7900211101541844, "learning_rate": 8.272142628108866e-06, "loss": 0.4329, "step": 4705 }, { "epoch": 0.29477442490486855, "grad_norm": 0.8831766896045606, "learning_rate": 8.271375549733031e-06, "loss": 0.3973, "step": 4706 }, { "epoch": 0.29483706290421086, "grad_norm": 0.8546121525653743, "learning_rate": 8.27060833670677e-06, "loss": 0.4013, "step": 4707 }, { "epoch": 0.29489970090355316, "grad_norm": 0.7931836788042963, "learning_rate": 8.269840989061664e-06, "loss": 0.3612, "step": 4708 }, { "epoch": 0.2949623389028954, "grad_norm": 0.8195227160827858, "learning_rate": 8.269073506829296e-06, "loss": 0.4047, "step": 4709 }, { "epoch": 0.2950249769022377, "grad_norm": 0.8959139707075572, "learning_rate": 8.268305890041256e-06, "loss": 0.397, "step": 4710 }, { "epoch": 0.29508761490158003, "grad_norm": 0.8863124011573619, "learning_rate": 8.267538138729137e-06, "loss": 0.4244, "step": 4711 }, { "epoch": 0.29515025290092234, "grad_norm": 0.8350412832429028, "learning_rate": 8.266770252924543e-06, "loss": 0.4138, "step": 4712 }, { "epoch": 0.29521289090026465, "grad_norm": 0.8851264126354216, "learning_rate": 8.266002232659079e-06, "loss": 0.4634, "step": 4713 }, { "epoch": 0.29527552889960695, "grad_norm": 0.8060326177736885, "learning_rate": 8.265234077964356e-06, "loss": 0.4252, "step": 4714 }, { "epoch": 0.29533816689894926, "grad_norm": 0.7744147991697258, "learning_rate": 8.264465788871994e-06, "loss": 0.4155, "step": 4715 }, { "epoch": 0.29540080489829157, "grad_norm": 0.9061806952338898, "learning_rate": 8.263697365413614e-06, "loss": 0.4129, "step": 4716 }, { "epoch": 0.2954634428976339, "grad_norm": 0.8279778082601156, "learning_rate": 8.262928807620843e-06, "loss": 0.4345, "step": 4717 }, { "epoch": 0.29552608089697613, "grad_norm": 0.8431018264006284, "learning_rate": 8.262160115525319e-06, "loss": 0.4666, "step": 4718 }, { "epoch": 0.29558871889631844, "grad_norm": 0.8190873177356165, "learning_rate": 8.261391289158678e-06, "loss": 0.3965, "step": 4719 }, { "epoch": 0.29565135689566074, "grad_norm": 0.877895606878929, "learning_rate": 8.260622328552566e-06, "loss": 0.4329, "step": 4720 }, { "epoch": 0.29571399489500305, "grad_norm": 0.9211563845406315, "learning_rate": 8.259853233738633e-06, "loss": 0.4114, "step": 4721 }, { "epoch": 0.29577663289434536, "grad_norm": 0.8423267673283132, "learning_rate": 8.259084004748536e-06, "loss": 0.3923, "step": 4722 }, { "epoch": 0.29583927089368767, "grad_norm": 0.884865234915486, "learning_rate": 8.258314641613937e-06, "loss": 0.4281, "step": 4723 }, { "epoch": 0.29590190889303, "grad_norm": 1.317497980610835, "learning_rate": 8.257545144366505e-06, "loss": 0.4667, "step": 4724 }, { "epoch": 0.2959645468923723, "grad_norm": 0.8565417030561198, "learning_rate": 8.256775513037907e-06, "loss": 0.4398, "step": 4725 }, { "epoch": 0.29602718489171453, "grad_norm": 0.8277713792251994, "learning_rate": 8.256005747659825e-06, "loss": 0.4547, "step": 4726 }, { "epoch": 0.29608982289105684, "grad_norm": 0.7805897015784916, "learning_rate": 8.255235848263943e-06, "loss": 0.4276, "step": 4727 }, { "epoch": 0.29615246089039915, "grad_norm": 0.9434115309574751, "learning_rate": 8.25446581488195e-06, "loss": 0.4521, "step": 4728 }, { "epoch": 0.29621509888974146, "grad_norm": 0.9201587369374226, "learning_rate": 8.253695647545538e-06, "loss": 0.478, "step": 4729 }, { "epoch": 0.29627773688908376, "grad_norm": 0.9334825678237567, "learning_rate": 8.252925346286409e-06, "loss": 0.4319, "step": 4730 }, { "epoch": 0.2963403748884261, "grad_norm": 0.9295124154781305, "learning_rate": 8.25215491113627e-06, "loss": 0.4574, "step": 4731 }, { "epoch": 0.2964030128877684, "grad_norm": 0.8402146740866543, "learning_rate": 8.251384342126831e-06, "loss": 0.4723, "step": 4732 }, { "epoch": 0.2964656508871107, "grad_norm": 0.8348208506812645, "learning_rate": 8.25061363928981e-06, "loss": 0.3951, "step": 4733 }, { "epoch": 0.29652828888645294, "grad_norm": 0.960101951463965, "learning_rate": 8.249842802656927e-06, "loss": 0.4385, "step": 4734 }, { "epoch": 0.29659092688579525, "grad_norm": 0.8260345788532651, "learning_rate": 8.249071832259911e-06, "loss": 0.4377, "step": 4735 }, { "epoch": 0.29665356488513756, "grad_norm": 0.8922425512648903, "learning_rate": 8.248300728130496e-06, "loss": 0.4364, "step": 4736 }, { "epoch": 0.29671620288447986, "grad_norm": 0.8596019573375334, "learning_rate": 8.247529490300422e-06, "loss": 0.4085, "step": 4737 }, { "epoch": 0.29677884088382217, "grad_norm": 0.7952553555021092, "learning_rate": 8.246758118801428e-06, "loss": 0.3888, "step": 4738 }, { "epoch": 0.2968414788831645, "grad_norm": 0.8223329050084007, "learning_rate": 8.245986613665269e-06, "loss": 0.4044, "step": 4739 }, { "epoch": 0.2969041168825068, "grad_norm": 0.7966263161013798, "learning_rate": 8.245214974923696e-06, "loss": 0.4039, "step": 4740 }, { "epoch": 0.2969667548818491, "grad_norm": 0.7962596086901801, "learning_rate": 8.244443202608475e-06, "loss": 0.4089, "step": 4741 }, { "epoch": 0.2970293928811914, "grad_norm": 0.8564701481121144, "learning_rate": 8.24367129675137e-06, "loss": 0.4538, "step": 4742 }, { "epoch": 0.29709203088053365, "grad_norm": 0.8424297684063751, "learning_rate": 8.242899257384152e-06, "loss": 0.4305, "step": 4743 }, { "epoch": 0.29715466887987596, "grad_norm": 0.9158770874651943, "learning_rate": 8.242127084538597e-06, "loss": 0.4038, "step": 4744 }, { "epoch": 0.29721730687921827, "grad_norm": 0.9837219172759751, "learning_rate": 8.24135477824649e-06, "loss": 0.4259, "step": 4745 }, { "epoch": 0.2972799448785606, "grad_norm": 0.9038729849652434, "learning_rate": 8.24058233853962e-06, "loss": 0.4454, "step": 4746 }, { "epoch": 0.2973425828779029, "grad_norm": 0.861771243264598, "learning_rate": 8.239809765449778e-06, "loss": 0.4369, "step": 4747 }, { "epoch": 0.2974052208772452, "grad_norm": 0.836841888223182, "learning_rate": 8.239037059008765e-06, "loss": 0.3748, "step": 4748 }, { "epoch": 0.2974678588765875, "grad_norm": 0.8443352383434148, "learning_rate": 8.238264219248385e-06, "loss": 0.4183, "step": 4749 }, { "epoch": 0.2975304968759298, "grad_norm": 0.8075544259324587, "learning_rate": 8.23749124620045e-06, "loss": 0.4316, "step": 4750 }, { "epoch": 0.29759313487527206, "grad_norm": 0.8022350214931576, "learning_rate": 8.236718139896773e-06, "loss": 0.3974, "step": 4751 }, { "epoch": 0.29765577287461437, "grad_norm": 0.8857233494965696, "learning_rate": 8.23594490036918e-06, "loss": 0.4083, "step": 4752 }, { "epoch": 0.2977184108739567, "grad_norm": 0.8598938966633237, "learning_rate": 8.23517152764949e-06, "loss": 0.4288, "step": 4753 }, { "epoch": 0.297781048873299, "grad_norm": 0.7746531806251533, "learning_rate": 8.234398021769541e-06, "loss": 0.4111, "step": 4754 }, { "epoch": 0.2978436868726413, "grad_norm": 0.7866063498055118, "learning_rate": 8.23362438276117e-06, "loss": 0.4037, "step": 4755 }, { "epoch": 0.2979063248719836, "grad_norm": 0.8279048114811394, "learning_rate": 8.232850610656218e-06, "loss": 0.3838, "step": 4756 }, { "epoch": 0.2979689628713259, "grad_norm": 0.7011076429893535, "learning_rate": 8.232076705486537e-06, "loss": 0.4776, "step": 4757 }, { "epoch": 0.2980316008706682, "grad_norm": 0.8241154150989908, "learning_rate": 8.231302667283976e-06, "loss": 0.4036, "step": 4758 }, { "epoch": 0.2980942388700105, "grad_norm": 0.8011876045915408, "learning_rate": 8.230528496080397e-06, "loss": 0.4273, "step": 4759 }, { "epoch": 0.29815687686935277, "grad_norm": 0.8068255058063365, "learning_rate": 8.229754191907669e-06, "loss": 0.4532, "step": 4760 }, { "epoch": 0.2982195148686951, "grad_norm": 0.897957926655788, "learning_rate": 8.228979754797657e-06, "loss": 0.4553, "step": 4761 }, { "epoch": 0.2982821528680374, "grad_norm": 0.8878247705510075, "learning_rate": 8.228205184782239e-06, "loss": 0.4251, "step": 4762 }, { "epoch": 0.2983447908673797, "grad_norm": 0.8643003654771906, "learning_rate": 8.227430481893295e-06, "loss": 0.4378, "step": 4763 }, { "epoch": 0.298407428866722, "grad_norm": 0.7961872255463938, "learning_rate": 8.226655646162715e-06, "loss": 0.3989, "step": 4764 }, { "epoch": 0.2984700668660643, "grad_norm": 0.8926825493282843, "learning_rate": 8.225880677622391e-06, "loss": 0.4516, "step": 4765 }, { "epoch": 0.2985327048654066, "grad_norm": 0.8348825471494002, "learning_rate": 8.225105576304218e-06, "loss": 0.4176, "step": 4766 }, { "epoch": 0.2985953428647489, "grad_norm": 0.6783020631375238, "learning_rate": 8.224330342240099e-06, "loss": 0.4703, "step": 4767 }, { "epoch": 0.2986579808640912, "grad_norm": 0.85722243813747, "learning_rate": 8.223554975461947e-06, "loss": 0.4451, "step": 4768 }, { "epoch": 0.2987206188634335, "grad_norm": 0.8016467662536704, "learning_rate": 8.222779476001674e-06, "loss": 0.3919, "step": 4769 }, { "epoch": 0.2987832568627758, "grad_norm": 0.7157037419351648, "learning_rate": 8.2220038438912e-06, "loss": 0.4786, "step": 4770 }, { "epoch": 0.2988458948621181, "grad_norm": 0.8568678178649315, "learning_rate": 8.221228079162449e-06, "loss": 0.4236, "step": 4771 }, { "epoch": 0.2989085328614604, "grad_norm": 0.8316278926566025, "learning_rate": 8.220452181847351e-06, "loss": 0.3893, "step": 4772 }, { "epoch": 0.2989711708608027, "grad_norm": 0.9010324763356219, "learning_rate": 8.219676151977844e-06, "loss": 0.4624, "step": 4773 }, { "epoch": 0.299033808860145, "grad_norm": 0.7263168173523297, "learning_rate": 8.21889998958587e-06, "loss": 0.3271, "step": 4774 }, { "epoch": 0.29909644685948733, "grad_norm": 0.8348840949641461, "learning_rate": 8.218123694703375e-06, "loss": 0.3989, "step": 4775 }, { "epoch": 0.2991590848588296, "grad_norm": 0.7913390365302424, "learning_rate": 8.217347267362311e-06, "loss": 0.4196, "step": 4776 }, { "epoch": 0.2992217228581719, "grad_norm": 0.8941159651022715, "learning_rate": 8.216570707594636e-06, "loss": 0.391, "step": 4777 }, { "epoch": 0.2992843608575142, "grad_norm": 0.7520331704664364, "learning_rate": 8.215794015432314e-06, "loss": 0.379, "step": 4778 }, { "epoch": 0.2993469988568565, "grad_norm": 0.7977596792448313, "learning_rate": 8.215017190907315e-06, "loss": 0.4163, "step": 4779 }, { "epoch": 0.2994096368561988, "grad_norm": 0.8055332701795884, "learning_rate": 8.21424023405161e-06, "loss": 0.441, "step": 4780 }, { "epoch": 0.2994722748555411, "grad_norm": 0.84292347134879, "learning_rate": 8.213463144897179e-06, "loss": 0.4261, "step": 4781 }, { "epoch": 0.29953491285488343, "grad_norm": 0.8118064690916408, "learning_rate": 8.21268592347601e-06, "loss": 0.4395, "step": 4782 }, { "epoch": 0.29959755085422574, "grad_norm": 0.8349935270719717, "learning_rate": 8.211908569820092e-06, "loss": 0.4426, "step": 4783 }, { "epoch": 0.29966018885356804, "grad_norm": 0.7953420036288309, "learning_rate": 8.21113108396142e-06, "loss": 0.4039, "step": 4784 }, { "epoch": 0.2997228268529103, "grad_norm": 0.856848167880259, "learning_rate": 8.210353465931999e-06, "loss": 0.3954, "step": 4785 }, { "epoch": 0.2997854648522526, "grad_norm": 0.8479212700252556, "learning_rate": 8.209575715763832e-06, "loss": 0.4051, "step": 4786 }, { "epoch": 0.2998481028515949, "grad_norm": 0.7726235869591852, "learning_rate": 8.208797833488933e-06, "loss": 0.4346, "step": 4787 }, { "epoch": 0.2999107408509372, "grad_norm": 0.7721455658618833, "learning_rate": 8.20801981913932e-06, "loss": 0.4236, "step": 4788 }, { "epoch": 0.2999733788502795, "grad_norm": 0.87365066102569, "learning_rate": 8.207241672747015e-06, "loss": 0.4557, "step": 4789 }, { "epoch": 0.30003601684962183, "grad_norm": 0.8643389791368363, "learning_rate": 8.206463394344048e-06, "loss": 0.4295, "step": 4790 }, { "epoch": 0.30009865484896414, "grad_norm": 0.8401956980417803, "learning_rate": 8.205684983962453e-06, "loss": 0.4248, "step": 4791 }, { "epoch": 0.30016129284830645, "grad_norm": 0.8936212497634842, "learning_rate": 8.204906441634268e-06, "loss": 0.443, "step": 4792 }, { "epoch": 0.3002239308476487, "grad_norm": 0.7876317412949028, "learning_rate": 8.20412776739154e-06, "loss": 0.357, "step": 4793 }, { "epoch": 0.300286568846991, "grad_norm": 1.0216568571415405, "learning_rate": 8.203348961266317e-06, "loss": 0.4806, "step": 4794 }, { "epoch": 0.3003492068463333, "grad_norm": 0.8390659766020679, "learning_rate": 8.202570023290658e-06, "loss": 0.4167, "step": 4795 }, { "epoch": 0.3004118448456756, "grad_norm": 0.8518660639480424, "learning_rate": 8.201790953496621e-06, "loss": 0.4023, "step": 4796 }, { "epoch": 0.30047448284501793, "grad_norm": 0.8859856333132752, "learning_rate": 8.201011751916275e-06, "loss": 0.4453, "step": 4797 }, { "epoch": 0.30053712084436024, "grad_norm": 0.8927487391667446, "learning_rate": 8.200232418581691e-06, "loss": 0.4482, "step": 4798 }, { "epoch": 0.30059975884370255, "grad_norm": 0.7799457866327495, "learning_rate": 8.199452953524947e-06, "loss": 0.392, "step": 4799 }, { "epoch": 0.30066239684304485, "grad_norm": 0.8510166274100202, "learning_rate": 8.198673356778124e-06, "loss": 0.45, "step": 4800 }, { "epoch": 0.3007250348423871, "grad_norm": 0.7975167351032469, "learning_rate": 8.197893628373312e-06, "loss": 0.4562, "step": 4801 }, { "epoch": 0.3007876728417294, "grad_norm": 0.8314077210308375, "learning_rate": 8.197113768342606e-06, "loss": 0.3685, "step": 4802 }, { "epoch": 0.3008503108410717, "grad_norm": 0.9132547792182526, "learning_rate": 8.196333776718105e-06, "loss": 0.4343, "step": 4803 }, { "epoch": 0.30091294884041403, "grad_norm": 0.7858181038325729, "learning_rate": 8.195553653531909e-06, "loss": 0.383, "step": 4804 }, { "epoch": 0.30097558683975634, "grad_norm": 0.8534051271985902, "learning_rate": 8.194773398816133e-06, "loss": 0.4076, "step": 4805 }, { "epoch": 0.30103822483909864, "grad_norm": 0.825207466406068, "learning_rate": 8.193993012602894e-06, "loss": 0.4756, "step": 4806 }, { "epoch": 0.30110086283844095, "grad_norm": 0.7792227649305923, "learning_rate": 8.193212494924306e-06, "loss": 0.4089, "step": 4807 }, { "epoch": 0.30116350083778326, "grad_norm": 0.8937861787419573, "learning_rate": 8.192431845812501e-06, "loss": 0.4254, "step": 4808 }, { "epoch": 0.30122613883712557, "grad_norm": 0.8487317770610026, "learning_rate": 8.191651065299605e-06, "loss": 0.4562, "step": 4809 }, { "epoch": 0.3012887768364678, "grad_norm": 0.7856147574281219, "learning_rate": 8.190870153417765e-06, "loss": 0.3994, "step": 4810 }, { "epoch": 0.3013514148358101, "grad_norm": 0.7950153699465518, "learning_rate": 8.190089110199113e-06, "loss": 0.4002, "step": 4811 }, { "epoch": 0.30141405283515244, "grad_norm": 0.8441467012037421, "learning_rate": 8.189307935675801e-06, "loss": 0.4185, "step": 4812 }, { "epoch": 0.30147669083449474, "grad_norm": 0.8451710383126633, "learning_rate": 8.188526629879982e-06, "loss": 0.3996, "step": 4813 }, { "epoch": 0.30153932883383705, "grad_norm": 0.745399398799224, "learning_rate": 8.187745192843818e-06, "loss": 0.4961, "step": 4814 }, { "epoch": 0.30160196683317936, "grad_norm": 0.7753220686914618, "learning_rate": 8.186963624599468e-06, "loss": 0.3789, "step": 4815 }, { "epoch": 0.30166460483252167, "grad_norm": 0.8681661878056526, "learning_rate": 8.186181925179105e-06, "loss": 0.3959, "step": 4816 }, { "epoch": 0.301727242831864, "grad_norm": 0.7718721939163875, "learning_rate": 8.185400094614901e-06, "loss": 0.3996, "step": 4817 }, { "epoch": 0.3017898808312062, "grad_norm": 0.8194811569017296, "learning_rate": 8.18461813293904e-06, "loss": 0.4035, "step": 4818 }, { "epoch": 0.30185251883054853, "grad_norm": 0.8156483990774613, "learning_rate": 8.183836040183703e-06, "loss": 0.4286, "step": 4819 }, { "epoch": 0.30191515682989084, "grad_norm": 0.8469462115984194, "learning_rate": 8.183053816381086e-06, "loss": 0.4471, "step": 4820 }, { "epoch": 0.30197779482923315, "grad_norm": 0.8104533135128699, "learning_rate": 8.182271461563383e-06, "loss": 0.394, "step": 4821 }, { "epoch": 0.30204043282857546, "grad_norm": 0.8372552872857792, "learning_rate": 8.181488975762795e-06, "loss": 0.4344, "step": 4822 }, { "epoch": 0.30210307082791776, "grad_norm": 0.9129281988546251, "learning_rate": 8.180706359011529e-06, "loss": 0.4263, "step": 4823 }, { "epoch": 0.30216570882726007, "grad_norm": 0.828267372060393, "learning_rate": 8.1799236113418e-06, "loss": 0.398, "step": 4824 }, { "epoch": 0.3022283468266024, "grad_norm": 0.8948491511765189, "learning_rate": 8.179140732785826e-06, "loss": 0.4267, "step": 4825 }, { "epoch": 0.3022909848259447, "grad_norm": 0.8219026891906757, "learning_rate": 8.178357723375829e-06, "loss": 0.4536, "step": 4826 }, { "epoch": 0.30235362282528694, "grad_norm": 0.8223538629737535, "learning_rate": 8.177574583144037e-06, "loss": 0.4006, "step": 4827 }, { "epoch": 0.30241626082462925, "grad_norm": 0.8510620946183169, "learning_rate": 8.176791312122686e-06, "loss": 0.4455, "step": 4828 }, { "epoch": 0.30247889882397155, "grad_norm": 0.7100935691511454, "learning_rate": 8.176007910344015e-06, "loss": 0.4765, "step": 4829 }, { "epoch": 0.30254153682331386, "grad_norm": 0.8807525612488563, "learning_rate": 8.17522437784027e-06, "loss": 0.4079, "step": 4830 }, { "epoch": 0.30260417482265617, "grad_norm": 0.8245686686873493, "learning_rate": 8.174440714643699e-06, "loss": 0.4248, "step": 4831 }, { "epoch": 0.3026668128219985, "grad_norm": 0.8195109045239336, "learning_rate": 8.17365692078656e-06, "loss": 0.4321, "step": 4832 }, { "epoch": 0.3027294508213408, "grad_norm": 0.808952013876439, "learning_rate": 8.172872996301111e-06, "loss": 0.4241, "step": 4833 }, { "epoch": 0.3027920888206831, "grad_norm": 0.7878016177245367, "learning_rate": 8.172088941219621e-06, "loss": 0.4043, "step": 4834 }, { "epoch": 0.30285472682002534, "grad_norm": 0.8413070000413724, "learning_rate": 8.171304755574363e-06, "loss": 0.4397, "step": 4835 }, { "epoch": 0.30291736481936765, "grad_norm": 0.7574825886367877, "learning_rate": 8.170520439397612e-06, "loss": 0.3745, "step": 4836 }, { "epoch": 0.30298000281870996, "grad_norm": 0.8405075311521469, "learning_rate": 8.16973599272165e-06, "loss": 0.426, "step": 4837 }, { "epoch": 0.30304264081805227, "grad_norm": 0.803868364849622, "learning_rate": 8.168951415578767e-06, "loss": 0.4511, "step": 4838 }, { "epoch": 0.3031052788173946, "grad_norm": 0.8257762530645486, "learning_rate": 8.168166708001257e-06, "loss": 0.4017, "step": 4839 }, { "epoch": 0.3031679168167369, "grad_norm": 0.8657759361181306, "learning_rate": 8.167381870021415e-06, "loss": 0.4333, "step": 4840 }, { "epoch": 0.3032305548160792, "grad_norm": 0.8166982430632441, "learning_rate": 8.16659690167155e-06, "loss": 0.4048, "step": 4841 }, { "epoch": 0.3032931928154215, "grad_norm": 0.8316262975743812, "learning_rate": 8.165811802983967e-06, "loss": 0.4175, "step": 4842 }, { "epoch": 0.30335583081476375, "grad_norm": 0.8154739324475015, "learning_rate": 8.165026573990982e-06, "loss": 0.4193, "step": 4843 }, { "epoch": 0.30341846881410606, "grad_norm": 0.8355682516152629, "learning_rate": 8.164241214724917e-06, "loss": 0.4062, "step": 4844 }, { "epoch": 0.30348110681344836, "grad_norm": 0.8314498950611395, "learning_rate": 8.163455725218098e-06, "loss": 0.4199, "step": 4845 }, { "epoch": 0.3035437448127907, "grad_norm": 0.8835295189671241, "learning_rate": 8.16267010550285e-06, "loss": 0.4255, "step": 4846 }, { "epoch": 0.303606382812133, "grad_norm": 0.8282337397008477, "learning_rate": 8.161884355611516e-06, "loss": 0.4221, "step": 4847 }, { "epoch": 0.3036690208114753, "grad_norm": 0.8070132383959294, "learning_rate": 8.161098475576435e-06, "loss": 0.4179, "step": 4848 }, { "epoch": 0.3037316588108176, "grad_norm": 0.8647898691389194, "learning_rate": 8.160312465429952e-06, "loss": 0.4578, "step": 4849 }, { "epoch": 0.3037942968101599, "grad_norm": 0.8415725175066151, "learning_rate": 8.159526325204424e-06, "loss": 0.4458, "step": 4850 }, { "epoch": 0.3038569348095022, "grad_norm": 0.8020430368726048, "learning_rate": 8.158740054932204e-06, "loss": 0.4447, "step": 4851 }, { "epoch": 0.30391957280884446, "grad_norm": 0.6846838471925597, "learning_rate": 8.157953654645658e-06, "loss": 0.4892, "step": 4852 }, { "epoch": 0.30398221080818677, "grad_norm": 0.8128116596439973, "learning_rate": 8.157167124377153e-06, "loss": 0.4132, "step": 4853 }, { "epoch": 0.3040448488075291, "grad_norm": 0.8792337177361875, "learning_rate": 8.156380464159063e-06, "loss": 0.4349, "step": 4854 }, { "epoch": 0.3041074868068714, "grad_norm": 0.8579506875745047, "learning_rate": 8.155593674023769e-06, "loss": 0.419, "step": 4855 }, { "epoch": 0.3041701248062137, "grad_norm": 0.8438261302993842, "learning_rate": 8.154806754003652e-06, "loss": 0.4632, "step": 4856 }, { "epoch": 0.304232762805556, "grad_norm": 0.9034882829501002, "learning_rate": 8.154019704131103e-06, "loss": 0.3934, "step": 4857 }, { "epoch": 0.3042954008048983, "grad_norm": 0.8704857461514026, "learning_rate": 8.153232524438519e-06, "loss": 0.4822, "step": 4858 }, { "epoch": 0.3043580388042406, "grad_norm": 0.7644034748880972, "learning_rate": 8.152445214958298e-06, "loss": 0.4, "step": 4859 }, { "epoch": 0.30442067680358287, "grad_norm": 0.8025965186801952, "learning_rate": 8.151657775722847e-06, "loss": 0.4226, "step": 4860 }, { "epoch": 0.3044833148029252, "grad_norm": 0.8522059749343252, "learning_rate": 8.15087020676458e-06, "loss": 0.4036, "step": 4861 }, { "epoch": 0.3045459528022675, "grad_norm": 0.8282562800850097, "learning_rate": 8.150082508115906e-06, "loss": 0.4091, "step": 4862 }, { "epoch": 0.3046085908016098, "grad_norm": 0.8247350685404883, "learning_rate": 8.149294679809254e-06, "loss": 0.4167, "step": 4863 }, { "epoch": 0.3046712288009521, "grad_norm": 0.8465407921276551, "learning_rate": 8.148506721877048e-06, "loss": 0.4184, "step": 4864 }, { "epoch": 0.3047338668002944, "grad_norm": 0.8501785813551659, "learning_rate": 8.14771863435172e-06, "loss": 0.3791, "step": 4865 }, { "epoch": 0.3047965047996367, "grad_norm": 0.737686495391819, "learning_rate": 8.146930417265708e-06, "loss": 0.4845, "step": 4866 }, { "epoch": 0.304859142798979, "grad_norm": 0.811910324233938, "learning_rate": 8.146142070651458e-06, "loss": 0.3944, "step": 4867 }, { "epoch": 0.30492178079832133, "grad_norm": 0.8205297037800524, "learning_rate": 8.145353594541416e-06, "loss": 0.4239, "step": 4868 }, { "epoch": 0.3049844187976636, "grad_norm": 0.827749799655784, "learning_rate": 8.144564988968035e-06, "loss": 0.4057, "step": 4869 }, { "epoch": 0.3050470567970059, "grad_norm": 0.8208243295446684, "learning_rate": 8.143776253963777e-06, "loss": 0.4332, "step": 4870 }, { "epoch": 0.3051096947963482, "grad_norm": 0.8047862181221954, "learning_rate": 8.142987389561104e-06, "loss": 0.425, "step": 4871 }, { "epoch": 0.3051723327956905, "grad_norm": 0.8421713949520846, "learning_rate": 8.142198395792487e-06, "loss": 0.4429, "step": 4872 }, { "epoch": 0.3052349707950328, "grad_norm": 0.8641319904787239, "learning_rate": 8.1414092726904e-06, "loss": 0.4496, "step": 4873 }, { "epoch": 0.3052976087943751, "grad_norm": 0.8615982798893081, "learning_rate": 8.140620020287324e-06, "loss": 0.4401, "step": 4874 }, { "epoch": 0.3053602467937174, "grad_norm": 0.8720021815550819, "learning_rate": 8.139830638615747e-06, "loss": 0.4183, "step": 4875 }, { "epoch": 0.30542288479305973, "grad_norm": 0.8949589539902064, "learning_rate": 8.139041127708155e-06, "loss": 0.4192, "step": 4876 }, { "epoch": 0.305485522792402, "grad_norm": 0.8153068451611218, "learning_rate": 8.138251487597051e-06, "loss": 0.3912, "step": 4877 }, { "epoch": 0.3055481607917443, "grad_norm": 0.7699242223670766, "learning_rate": 8.137461718314932e-06, "loss": 0.3932, "step": 4878 }, { "epoch": 0.3056107987910866, "grad_norm": 0.8676352900325563, "learning_rate": 8.136671819894307e-06, "loss": 0.4376, "step": 4879 }, { "epoch": 0.3056734367904289, "grad_norm": 0.8499862727696043, "learning_rate": 8.135881792367686e-06, "loss": 0.4082, "step": 4880 }, { "epoch": 0.3057360747897712, "grad_norm": 0.8511750615593016, "learning_rate": 8.135091635767589e-06, "loss": 0.4266, "step": 4881 }, { "epoch": 0.3057987127891135, "grad_norm": 0.834779852432792, "learning_rate": 8.13430135012654e-06, "loss": 0.4102, "step": 4882 }, { "epoch": 0.30586135078845583, "grad_norm": 0.8437414074147557, "learning_rate": 8.133510935477063e-06, "loss": 0.4157, "step": 4883 }, { "epoch": 0.30592398878779814, "grad_norm": 0.7739026625781004, "learning_rate": 8.132720391851696e-06, "loss": 0.3809, "step": 4884 }, { "epoch": 0.3059866267871404, "grad_norm": 0.982987870814428, "learning_rate": 8.131929719282974e-06, "loss": 0.4, "step": 4885 }, { "epoch": 0.3060492647864827, "grad_norm": 0.8169195918062997, "learning_rate": 8.131138917803448e-06, "loss": 0.4141, "step": 4886 }, { "epoch": 0.306111902785825, "grad_norm": 0.8224316188866233, "learning_rate": 8.13034798744566e-06, "loss": 0.4304, "step": 4887 }, { "epoch": 0.3061745407851673, "grad_norm": 0.8563683679183645, "learning_rate": 8.129556928242166e-06, "loss": 0.4193, "step": 4888 }, { "epoch": 0.3062371787845096, "grad_norm": 0.8223657513399143, "learning_rate": 8.12876574022553e-06, "loss": 0.4299, "step": 4889 }, { "epoch": 0.30629981678385193, "grad_norm": 0.8559318147003453, "learning_rate": 8.127974423428316e-06, "loss": 0.4237, "step": 4890 }, { "epoch": 0.30636245478319424, "grad_norm": 0.9492392729698913, "learning_rate": 8.127182977883095e-06, "loss": 0.441, "step": 4891 }, { "epoch": 0.30642509278253655, "grad_norm": 0.7891187433988368, "learning_rate": 8.126391403622439e-06, "loss": 0.3831, "step": 4892 }, { "epoch": 0.30648773078187885, "grad_norm": 0.8613166224224634, "learning_rate": 8.125599700678935e-06, "loss": 0.4213, "step": 4893 }, { "epoch": 0.3065503687812211, "grad_norm": 0.8964463814273704, "learning_rate": 8.124807869085167e-06, "loss": 0.4507, "step": 4894 }, { "epoch": 0.3066130067805634, "grad_norm": 0.8586646557267611, "learning_rate": 8.124015908873727e-06, "loss": 0.4176, "step": 4895 }, { "epoch": 0.3066756447799057, "grad_norm": 0.9011308420791188, "learning_rate": 8.123223820077211e-06, "loss": 0.4625, "step": 4896 }, { "epoch": 0.30673828277924803, "grad_norm": 0.8624691662705563, "learning_rate": 8.122431602728222e-06, "loss": 0.4475, "step": 4897 }, { "epoch": 0.30680092077859034, "grad_norm": 0.8770930044909918, "learning_rate": 8.121639256859372e-06, "loss": 0.4576, "step": 4898 }, { "epoch": 0.30686355877793264, "grad_norm": 0.7349527806781032, "learning_rate": 8.12084678250327e-06, "loss": 0.4945, "step": 4899 }, { "epoch": 0.30692619677727495, "grad_norm": 0.8109728363454414, "learning_rate": 8.120054179692534e-06, "loss": 0.4286, "step": 4900 }, { "epoch": 0.30698883477661726, "grad_norm": 0.8341864809412843, "learning_rate": 8.119261448459788e-06, "loss": 0.4364, "step": 4901 }, { "epoch": 0.3070514727759595, "grad_norm": 0.812603069417161, "learning_rate": 8.118468588837663e-06, "loss": 0.4364, "step": 4902 }, { "epoch": 0.3071141107753018, "grad_norm": 0.8722771493661652, "learning_rate": 8.117675600858793e-06, "loss": 0.4474, "step": 4903 }, { "epoch": 0.3071767487746441, "grad_norm": 0.8086919310020935, "learning_rate": 8.116882484555815e-06, "loss": 0.445, "step": 4904 }, { "epoch": 0.30723938677398643, "grad_norm": 0.9045613745945024, "learning_rate": 8.116089239961376e-06, "loss": 0.441, "step": 4905 }, { "epoch": 0.30730202477332874, "grad_norm": 0.8209165952825479, "learning_rate": 8.115295867108126e-06, "loss": 0.4255, "step": 4906 }, { "epoch": 0.30736466277267105, "grad_norm": 0.8109696389609491, "learning_rate": 8.11450236602872e-06, "loss": 0.4515, "step": 4907 }, { "epoch": 0.30742730077201336, "grad_norm": 0.7825493640798826, "learning_rate": 8.113708736755819e-06, "loss": 0.4153, "step": 4908 }, { "epoch": 0.30748993877135566, "grad_norm": 0.7837181072822184, "learning_rate": 8.112914979322087e-06, "loss": 0.4137, "step": 4909 }, { "epoch": 0.3075525767706979, "grad_norm": 0.8446351270227476, "learning_rate": 8.112121093760197e-06, "loss": 0.3823, "step": 4910 }, { "epoch": 0.3076152147700402, "grad_norm": 0.8244494445234959, "learning_rate": 8.111327080102824e-06, "loss": 0.4163, "step": 4911 }, { "epoch": 0.30767785276938253, "grad_norm": 0.8248440900260504, "learning_rate": 8.110532938382654e-06, "loss": 0.3989, "step": 4912 }, { "epoch": 0.30774049076872484, "grad_norm": 0.8822043230474996, "learning_rate": 8.109738668632369e-06, "loss": 0.4179, "step": 4913 }, { "epoch": 0.30780312876806715, "grad_norm": 0.7754561758218487, "learning_rate": 8.108944270884665e-06, "loss": 0.4074, "step": 4914 }, { "epoch": 0.30786576676740945, "grad_norm": 0.8478004413051494, "learning_rate": 8.108149745172233e-06, "loss": 0.438, "step": 4915 }, { "epoch": 0.30792840476675176, "grad_norm": 0.882776108166062, "learning_rate": 8.107355091527785e-06, "loss": 0.4652, "step": 4916 }, { "epoch": 0.30799104276609407, "grad_norm": 0.8134648813030242, "learning_rate": 8.106560309984025e-06, "loss": 0.4189, "step": 4917 }, { "epoch": 0.3080536807654364, "grad_norm": 0.8286704318742771, "learning_rate": 8.105765400573663e-06, "loss": 0.4292, "step": 4918 }, { "epoch": 0.30811631876477863, "grad_norm": 0.8631943854325177, "learning_rate": 8.104970363329422e-06, "loss": 0.4409, "step": 4919 }, { "epoch": 0.30817895676412094, "grad_norm": 0.8065374803593921, "learning_rate": 8.104175198284024e-06, "loss": 0.4027, "step": 4920 }, { "epoch": 0.30824159476346324, "grad_norm": 0.9359900005169771, "learning_rate": 8.1033799054702e-06, "loss": 0.4657, "step": 4921 }, { "epoch": 0.30830423276280555, "grad_norm": 0.6495660463836165, "learning_rate": 8.102584484920683e-06, "loss": 0.468, "step": 4922 }, { "epoch": 0.30836687076214786, "grad_norm": 0.8296938629969983, "learning_rate": 8.101788936668213e-06, "loss": 0.4572, "step": 4923 }, { "epoch": 0.30842950876149017, "grad_norm": 0.87624273888366, "learning_rate": 8.100993260745532e-06, "loss": 0.3662, "step": 4924 }, { "epoch": 0.3084921467608325, "grad_norm": 0.9470618530481331, "learning_rate": 8.100197457185396e-06, "loss": 0.4159, "step": 4925 }, { "epoch": 0.3085547847601748, "grad_norm": 0.7171048436576927, "learning_rate": 8.099401526020556e-06, "loss": 0.4846, "step": 4926 }, { "epoch": 0.30861742275951703, "grad_norm": 0.7859598921002718, "learning_rate": 8.098605467283773e-06, "loss": 0.4232, "step": 4927 }, { "epoch": 0.30868006075885934, "grad_norm": 0.9226309554378108, "learning_rate": 8.097809281007814e-06, "loss": 0.4413, "step": 4928 }, { "epoch": 0.30874269875820165, "grad_norm": 0.8388959154788819, "learning_rate": 8.09701296722545e-06, "loss": 0.3902, "step": 4929 }, { "epoch": 0.30880533675754396, "grad_norm": 0.78888245830845, "learning_rate": 8.09621652596946e-06, "loss": 0.4237, "step": 4930 }, { "epoch": 0.30886797475688627, "grad_norm": 0.8536990399530642, "learning_rate": 8.095419957272619e-06, "loss": 0.4184, "step": 4931 }, { "epoch": 0.3089306127562286, "grad_norm": 0.8445106759290737, "learning_rate": 8.094623261167722e-06, "loss": 0.3845, "step": 4932 }, { "epoch": 0.3089932507555709, "grad_norm": 0.8771987619248268, "learning_rate": 8.093826437687555e-06, "loss": 0.4263, "step": 4933 }, { "epoch": 0.3090558887549132, "grad_norm": 0.835366935443226, "learning_rate": 8.093029486864917e-06, "loss": 0.4285, "step": 4934 }, { "epoch": 0.3091185267542555, "grad_norm": 0.8138177576936849, "learning_rate": 8.092232408732612e-06, "loss": 0.4434, "step": 4935 }, { "epoch": 0.30918116475359775, "grad_norm": 0.828039562562437, "learning_rate": 8.091435203323448e-06, "loss": 0.4138, "step": 4936 }, { "epoch": 0.30924380275294006, "grad_norm": 0.7974810682386841, "learning_rate": 8.090637870670236e-06, "loss": 0.4076, "step": 4937 }, { "epoch": 0.30930644075228236, "grad_norm": 0.8119097884084192, "learning_rate": 8.089840410805797e-06, "loss": 0.418, "step": 4938 }, { "epoch": 0.30936907875162467, "grad_norm": 0.7750906123057526, "learning_rate": 8.089042823762951e-06, "loss": 0.431, "step": 4939 }, { "epoch": 0.309431716750967, "grad_norm": 0.8509699959805482, "learning_rate": 8.088245109574531e-06, "loss": 0.3907, "step": 4940 }, { "epoch": 0.3094943547503093, "grad_norm": 0.8628915253878398, "learning_rate": 8.087447268273368e-06, "loss": 0.449, "step": 4941 }, { "epoch": 0.3095569927496516, "grad_norm": 0.813293333033416, "learning_rate": 8.086649299892303e-06, "loss": 0.4343, "step": 4942 }, { "epoch": 0.3096196307489939, "grad_norm": 0.8655645067881812, "learning_rate": 8.08585120446418e-06, "loss": 0.4539, "step": 4943 }, { "epoch": 0.30968226874833615, "grad_norm": 0.7128880550623127, "learning_rate": 8.085052982021849e-06, "loss": 0.4958, "step": 4944 }, { "epoch": 0.30974490674767846, "grad_norm": 0.8450090951052504, "learning_rate": 8.084254632598163e-06, "loss": 0.3973, "step": 4945 }, { "epoch": 0.30980754474702077, "grad_norm": 0.8859861618411089, "learning_rate": 8.083456156225985e-06, "loss": 0.4457, "step": 4946 }, { "epoch": 0.3098701827463631, "grad_norm": 0.7792504052742428, "learning_rate": 8.08265755293818e-06, "loss": 0.3705, "step": 4947 }, { "epoch": 0.3099328207457054, "grad_norm": 0.9210790691779724, "learning_rate": 8.081858822767616e-06, "loss": 0.4791, "step": 4948 }, { "epoch": 0.3099954587450477, "grad_norm": 0.8251513425269472, "learning_rate": 8.081059965747174e-06, "loss": 0.4256, "step": 4949 }, { "epoch": 0.31005809674439, "grad_norm": 0.866054015075676, "learning_rate": 8.080260981909729e-06, "loss": 0.429, "step": 4950 }, { "epoch": 0.3101207347437323, "grad_norm": 0.8185218214986945, "learning_rate": 8.07946187128817e-06, "loss": 0.4004, "step": 4951 }, { "epoch": 0.31018337274307456, "grad_norm": 0.7835128840009582, "learning_rate": 8.07866263391539e-06, "loss": 0.408, "step": 4952 }, { "epoch": 0.31024601074241687, "grad_norm": 0.8338749777146633, "learning_rate": 8.077863269824285e-06, "loss": 0.4437, "step": 4953 }, { "epoch": 0.3103086487417592, "grad_norm": 0.8574901148791424, "learning_rate": 8.077063779047755e-06, "loss": 0.4266, "step": 4954 }, { "epoch": 0.3103712867411015, "grad_norm": 1.1355841884685645, "learning_rate": 8.07626416161871e-06, "loss": 0.3942, "step": 4955 }, { "epoch": 0.3104339247404438, "grad_norm": 0.815336252835358, "learning_rate": 8.07546441757006e-06, "loss": 0.406, "step": 4956 }, { "epoch": 0.3104965627397861, "grad_norm": 0.8442167252326819, "learning_rate": 8.074664546934726e-06, "loss": 0.4014, "step": 4957 }, { "epoch": 0.3105592007391284, "grad_norm": 0.8388072079468631, "learning_rate": 8.073864549745627e-06, "loss": 0.3847, "step": 4958 }, { "epoch": 0.3106218387384707, "grad_norm": 0.8492360737290412, "learning_rate": 8.073064426035692e-06, "loss": 0.4605, "step": 4959 }, { "epoch": 0.310684476737813, "grad_norm": 0.9035413859872073, "learning_rate": 8.072264175837857e-06, "loss": 0.4385, "step": 4960 }, { "epoch": 0.31074711473715527, "grad_norm": 0.9270639923554235, "learning_rate": 8.071463799185057e-06, "loss": 0.5253, "step": 4961 }, { "epoch": 0.3108097527364976, "grad_norm": 0.8748521826830172, "learning_rate": 8.070663296110237e-06, "loss": 0.3878, "step": 4962 }, { "epoch": 0.3108723907358399, "grad_norm": 0.8541719602950977, "learning_rate": 8.069862666646346e-06, "loss": 0.4334, "step": 4963 }, { "epoch": 0.3109350287351822, "grad_norm": 0.7634609223676169, "learning_rate": 8.069061910826336e-06, "loss": 0.402, "step": 4964 }, { "epoch": 0.3109976667345245, "grad_norm": 0.763142426438047, "learning_rate": 8.06826102868317e-06, "loss": 0.4287, "step": 4965 }, { "epoch": 0.3110603047338668, "grad_norm": 0.8751859156125904, "learning_rate": 8.06746002024981e-06, "loss": 0.4403, "step": 4966 }, { "epoch": 0.3111229427332091, "grad_norm": 0.8965053476778267, "learning_rate": 8.066658885559226e-06, "loss": 0.4378, "step": 4967 }, { "epoch": 0.3111855807325514, "grad_norm": 0.8162322267952861, "learning_rate": 8.065857624644394e-06, "loss": 0.3905, "step": 4968 }, { "epoch": 0.3112482187318937, "grad_norm": 0.838199710988239, "learning_rate": 8.065056237538294e-06, "loss": 0.4063, "step": 4969 }, { "epoch": 0.311310856731236, "grad_norm": 0.9022551477948377, "learning_rate": 8.064254724273908e-06, "loss": 0.4566, "step": 4970 }, { "epoch": 0.3113734947305783, "grad_norm": 0.8344377487047416, "learning_rate": 8.06345308488423e-06, "loss": 0.3854, "step": 4971 }, { "epoch": 0.3114361327299206, "grad_norm": 0.8391274184291014, "learning_rate": 8.062651319402254e-06, "loss": 0.4257, "step": 4972 }, { "epoch": 0.3114987707292629, "grad_norm": 0.8354804147292167, "learning_rate": 8.061849427860982e-06, "loss": 0.4106, "step": 4973 }, { "epoch": 0.3115614087286052, "grad_norm": 0.8729725925010979, "learning_rate": 8.061047410293417e-06, "loss": 0.4466, "step": 4974 }, { "epoch": 0.3116240467279475, "grad_norm": 0.8533980613985834, "learning_rate": 8.060245266732573e-06, "loss": 0.415, "step": 4975 }, { "epoch": 0.31168668472728983, "grad_norm": 0.8723275814264664, "learning_rate": 8.059442997211468e-06, "loss": 0.4301, "step": 4976 }, { "epoch": 0.31174932272663214, "grad_norm": 0.8480071115350224, "learning_rate": 8.05864060176312e-06, "loss": 0.3996, "step": 4977 }, { "epoch": 0.3118119607259744, "grad_norm": 0.7488636693689433, "learning_rate": 8.057838080420559e-06, "loss": 0.3991, "step": 4978 }, { "epoch": 0.3118745987253167, "grad_norm": 0.8979794161322747, "learning_rate": 8.057035433216812e-06, "loss": 0.4712, "step": 4979 }, { "epoch": 0.311937236724659, "grad_norm": 0.7914175697472948, "learning_rate": 8.056232660184921e-06, "loss": 0.3923, "step": 4980 }, { "epoch": 0.3119998747240013, "grad_norm": 0.8801331297320059, "learning_rate": 8.055429761357926e-06, "loss": 0.4658, "step": 4981 }, { "epoch": 0.3120625127233436, "grad_norm": 0.7228825133718395, "learning_rate": 8.054626736768875e-06, "loss": 0.4714, "step": 4982 }, { "epoch": 0.31212515072268593, "grad_norm": 0.8957796904533588, "learning_rate": 8.05382358645082e-06, "loss": 0.4191, "step": 4983 }, { "epoch": 0.31218778872202824, "grad_norm": 0.9476719254737364, "learning_rate": 8.05302031043682e-06, "loss": 0.4366, "step": 4984 }, { "epoch": 0.31225042672137054, "grad_norm": 0.816339749790841, "learning_rate": 8.052216908759937e-06, "loss": 0.4103, "step": 4985 }, { "epoch": 0.3123130647207128, "grad_norm": 0.8127545411986151, "learning_rate": 8.05141338145324e-06, "loss": 0.4374, "step": 4986 }, { "epoch": 0.3123757027200551, "grad_norm": 0.8467434151950716, "learning_rate": 8.050609728549802e-06, "loss": 0.4326, "step": 4987 }, { "epoch": 0.3124383407193974, "grad_norm": 0.8782682546326628, "learning_rate": 8.0498059500827e-06, "loss": 0.3701, "step": 4988 }, { "epoch": 0.3125009787187397, "grad_norm": 0.7818101168476022, "learning_rate": 8.04900204608502e-06, "loss": 0.4166, "step": 4989 }, { "epoch": 0.312563616718082, "grad_norm": 0.8000255094968827, "learning_rate": 8.04819801658985e-06, "loss": 0.3931, "step": 4990 }, { "epoch": 0.31262625471742433, "grad_norm": 0.7929852991914422, "learning_rate": 8.047393861630284e-06, "loss": 0.3914, "step": 4991 }, { "epoch": 0.31268889271676664, "grad_norm": 0.8911560014367439, "learning_rate": 8.046589581239421e-06, "loss": 0.4407, "step": 4992 }, { "epoch": 0.31275153071610895, "grad_norm": 0.8703301936657539, "learning_rate": 8.045785175450365e-06, "loss": 0.4572, "step": 4993 }, { "epoch": 0.3128141687154512, "grad_norm": 0.7660051621870526, "learning_rate": 8.044980644296225e-06, "loss": 0.3642, "step": 4994 }, { "epoch": 0.3128768067147935, "grad_norm": 0.768686875156736, "learning_rate": 8.044175987810118e-06, "loss": 0.4281, "step": 4995 }, { "epoch": 0.3129394447141358, "grad_norm": 0.8045800405696867, "learning_rate": 8.043371206025161e-06, "loss": 0.4427, "step": 4996 }, { "epoch": 0.3130020827134781, "grad_norm": 0.784087317835657, "learning_rate": 8.042566298974482e-06, "loss": 0.3912, "step": 4997 }, { "epoch": 0.31306472071282043, "grad_norm": 0.7887204878921159, "learning_rate": 8.041761266691208e-06, "loss": 0.4062, "step": 4998 }, { "epoch": 0.31312735871216274, "grad_norm": 0.8566052689562, "learning_rate": 8.040956109208476e-06, "loss": 0.4128, "step": 4999 }, { "epoch": 0.31318999671150505, "grad_norm": 0.8476388561263404, "learning_rate": 8.040150826559427e-06, "loss": 0.3902, "step": 5000 }, { "epoch": 0.31325263471084736, "grad_norm": 0.8511144446920266, "learning_rate": 8.039345418777205e-06, "loss": 0.4283, "step": 5001 }, { "epoch": 0.31331527271018966, "grad_norm": 0.7976906055021273, "learning_rate": 8.03853988589496e-06, "loss": 0.4281, "step": 5002 }, { "epoch": 0.3133779107095319, "grad_norm": 0.7740770995445977, "learning_rate": 8.03773422794585e-06, "loss": 0.4219, "step": 5003 }, { "epoch": 0.3134405487088742, "grad_norm": 0.8435551376003861, "learning_rate": 8.036928444963037e-06, "loss": 0.4506, "step": 5004 }, { "epoch": 0.31350318670821653, "grad_norm": 0.7807971578976349, "learning_rate": 8.036122536979686e-06, "loss": 0.3669, "step": 5005 }, { "epoch": 0.31356582470755884, "grad_norm": 0.8243852545914679, "learning_rate": 8.035316504028966e-06, "loss": 0.4348, "step": 5006 }, { "epoch": 0.31362846270690115, "grad_norm": 0.7853002109598962, "learning_rate": 8.034510346144056e-06, "loss": 0.3925, "step": 5007 }, { "epoch": 0.31369110070624345, "grad_norm": 0.9128611347641988, "learning_rate": 8.033704063358136e-06, "loss": 0.4522, "step": 5008 }, { "epoch": 0.31375373870558576, "grad_norm": 0.846373018516144, "learning_rate": 8.032897655704394e-06, "loss": 0.4015, "step": 5009 }, { "epoch": 0.31381637670492807, "grad_norm": 0.8241957984544122, "learning_rate": 8.032091123216022e-06, "loss": 0.4437, "step": 5010 }, { "epoch": 0.3138790147042703, "grad_norm": 0.866024374581297, "learning_rate": 8.031284465926217e-06, "loss": 0.4325, "step": 5011 }, { "epoch": 0.31394165270361263, "grad_norm": 0.8228674787924728, "learning_rate": 8.030477683868178e-06, "loss": 0.4185, "step": 5012 }, { "epoch": 0.31400429070295494, "grad_norm": 0.8096838335560873, "learning_rate": 8.029670777075119e-06, "loss": 0.4358, "step": 5013 }, { "epoch": 0.31406692870229724, "grad_norm": 0.8341614990585298, "learning_rate": 8.028863745580246e-06, "loss": 0.4275, "step": 5014 }, { "epoch": 0.31412956670163955, "grad_norm": 0.8178754093532089, "learning_rate": 8.02805658941678e-06, "loss": 0.378, "step": 5015 }, { "epoch": 0.31419220470098186, "grad_norm": 0.8652222054014166, "learning_rate": 8.02724930861794e-06, "loss": 0.43, "step": 5016 }, { "epoch": 0.31425484270032417, "grad_norm": 0.8340277150523977, "learning_rate": 8.026441903216962e-06, "loss": 0.4271, "step": 5017 }, { "epoch": 0.3143174806996665, "grad_norm": 0.7893920333173438, "learning_rate": 8.02563437324707e-06, "loss": 0.4124, "step": 5018 }, { "epoch": 0.3143801186990087, "grad_norm": 0.915062720661596, "learning_rate": 8.024826718741507e-06, "loss": 0.4825, "step": 5019 }, { "epoch": 0.31444275669835103, "grad_norm": 0.8390900119629268, "learning_rate": 8.024018939733513e-06, "loss": 0.4412, "step": 5020 }, { "epoch": 0.31450539469769334, "grad_norm": 0.8671743173281535, "learning_rate": 8.02321103625634e-06, "loss": 0.4738, "step": 5021 }, { "epoch": 0.31456803269703565, "grad_norm": 0.8276453929865151, "learning_rate": 8.022403008343238e-06, "loss": 0.445, "step": 5022 }, { "epoch": 0.31463067069637796, "grad_norm": 0.8893083116293428, "learning_rate": 8.021594856027468e-06, "loss": 0.4457, "step": 5023 }, { "epoch": 0.31469330869572026, "grad_norm": 0.8091303699900442, "learning_rate": 8.020786579342295e-06, "loss": 0.3866, "step": 5024 }, { "epoch": 0.31475594669506257, "grad_norm": 0.7859800257243339, "learning_rate": 8.019978178320982e-06, "loss": 0.4135, "step": 5025 }, { "epoch": 0.3148185846944049, "grad_norm": 0.8794379582756335, "learning_rate": 8.01916965299681e-06, "loss": 0.4463, "step": 5026 }, { "epoch": 0.3148812226937472, "grad_norm": 0.8864412235489758, "learning_rate": 8.01836100340305e-06, "loss": 0.425, "step": 5027 }, { "epoch": 0.31494386069308944, "grad_norm": 0.8225971659117948, "learning_rate": 8.017552229572996e-06, "loss": 0.3955, "step": 5028 }, { "epoch": 0.31500649869243175, "grad_norm": 0.7928575098918874, "learning_rate": 8.016743331539929e-06, "loss": 0.3641, "step": 5029 }, { "epoch": 0.31506913669177405, "grad_norm": 0.872350969151839, "learning_rate": 8.015934309337148e-06, "loss": 0.4071, "step": 5030 }, { "epoch": 0.31513177469111636, "grad_norm": 0.7867699515649699, "learning_rate": 8.01512516299795e-06, "loss": 0.4158, "step": 5031 }, { "epoch": 0.31519441269045867, "grad_norm": 0.8529030243877322, "learning_rate": 8.014315892555642e-06, "loss": 0.4277, "step": 5032 }, { "epoch": 0.315257050689801, "grad_norm": 0.8255450829934592, "learning_rate": 8.01350649804353e-06, "loss": 0.4216, "step": 5033 }, { "epoch": 0.3153196886891433, "grad_norm": 0.799898929060876, "learning_rate": 8.012696979494934e-06, "loss": 0.4203, "step": 5034 }, { "epoch": 0.3153823266884856, "grad_norm": 0.7817476789962179, "learning_rate": 8.011887336943169e-06, "loss": 0.412, "step": 5035 }, { "epoch": 0.31544496468782784, "grad_norm": 0.7814810926697635, "learning_rate": 8.011077570421564e-06, "loss": 0.4144, "step": 5036 }, { "epoch": 0.31550760268717015, "grad_norm": 0.8589610132282892, "learning_rate": 8.010267679963445e-06, "loss": 0.4502, "step": 5037 }, { "epoch": 0.31557024068651246, "grad_norm": 0.8215749151957307, "learning_rate": 8.009457665602153e-06, "loss": 0.4183, "step": 5038 }, { "epoch": 0.31563287868585477, "grad_norm": 0.825091821072601, "learning_rate": 8.008647527371022e-06, "loss": 0.3643, "step": 5039 }, { "epoch": 0.3156955166851971, "grad_norm": 0.7860292259965894, "learning_rate": 8.007837265303402e-06, "loss": 0.4071, "step": 5040 }, { "epoch": 0.3157581546845394, "grad_norm": 0.7346543523363515, "learning_rate": 8.00702687943264e-06, "loss": 0.3598, "step": 5041 }, { "epoch": 0.3158207926838817, "grad_norm": 0.7951234030947529, "learning_rate": 8.006216369792096e-06, "loss": 0.4241, "step": 5042 }, { "epoch": 0.315883430683224, "grad_norm": 0.8986690494037133, "learning_rate": 8.005405736415127e-06, "loss": 0.4475, "step": 5043 }, { "epoch": 0.3159460686825663, "grad_norm": 0.8170577202694469, "learning_rate": 8.004594979335099e-06, "loss": 0.4424, "step": 5044 }, { "epoch": 0.31600870668190856, "grad_norm": 0.804111531451695, "learning_rate": 8.003784098585386e-06, "loss": 0.3828, "step": 5045 }, { "epoch": 0.31607134468125087, "grad_norm": 0.8617713450491025, "learning_rate": 8.00297309419936e-06, "loss": 0.4115, "step": 5046 }, { "epoch": 0.3161339826805932, "grad_norm": 0.8777830918735803, "learning_rate": 8.002161966210406e-06, "loss": 0.4659, "step": 5047 }, { "epoch": 0.3161966206799355, "grad_norm": 0.8452242205253307, "learning_rate": 8.001350714651905e-06, "loss": 0.4272, "step": 5048 }, { "epoch": 0.3162592586792778, "grad_norm": 0.7850319669913206, "learning_rate": 8.000539339557254e-06, "loss": 0.409, "step": 5049 }, { "epoch": 0.3163218966786201, "grad_norm": 0.834149838463763, "learning_rate": 7.999727840959848e-06, "loss": 0.435, "step": 5050 }, { "epoch": 0.3163845346779624, "grad_norm": 0.9453386659790933, "learning_rate": 7.998916218893084e-06, "loss": 0.4021, "step": 5051 }, { "epoch": 0.3164471726773047, "grad_norm": 0.841534784778502, "learning_rate": 7.998104473390376e-06, "loss": 0.4165, "step": 5052 }, { "epoch": 0.31650981067664696, "grad_norm": 0.8195936584125011, "learning_rate": 7.997292604485127e-06, "loss": 0.4112, "step": 5053 }, { "epoch": 0.31657244867598927, "grad_norm": 0.8555665142356441, "learning_rate": 7.996480612210761e-06, "loss": 0.4079, "step": 5054 }, { "epoch": 0.3166350866753316, "grad_norm": 0.8272620369619935, "learning_rate": 7.995668496600697e-06, "loss": 0.4236, "step": 5055 }, { "epoch": 0.3166977246746739, "grad_norm": 0.8401699108011883, "learning_rate": 7.99485625768836e-06, "loss": 0.4284, "step": 5056 }, { "epoch": 0.3167603626740162, "grad_norm": 0.7583817945665231, "learning_rate": 7.994043895507184e-06, "loss": 0.4261, "step": 5057 }, { "epoch": 0.3168230006733585, "grad_norm": 0.840327118381637, "learning_rate": 7.993231410090605e-06, "loss": 0.4375, "step": 5058 }, { "epoch": 0.3168856386727008, "grad_norm": 0.7857419956629302, "learning_rate": 7.992418801472066e-06, "loss": 0.4035, "step": 5059 }, { "epoch": 0.3169482766720431, "grad_norm": 0.7721464536042302, "learning_rate": 7.991606069685016e-06, "loss": 0.4295, "step": 5060 }, { "epoch": 0.31701091467138537, "grad_norm": 0.7813907753958246, "learning_rate": 7.990793214762902e-06, "loss": 0.3931, "step": 5061 }, { "epoch": 0.3170735526707277, "grad_norm": 0.8761601871025742, "learning_rate": 7.989980236739185e-06, "loss": 0.4666, "step": 5062 }, { "epoch": 0.31713619067007, "grad_norm": 0.8499312699852934, "learning_rate": 7.989167135647327e-06, "loss": 0.3971, "step": 5063 }, { "epoch": 0.3171988286694123, "grad_norm": 0.8853208479016473, "learning_rate": 7.988353911520794e-06, "loss": 0.428, "step": 5064 }, { "epoch": 0.3172614666687546, "grad_norm": 0.8109396967026704, "learning_rate": 7.98754056439306e-06, "loss": 0.4253, "step": 5065 }, { "epoch": 0.3173241046680969, "grad_norm": 0.7571396257356571, "learning_rate": 7.986727094297601e-06, "loss": 0.3928, "step": 5066 }, { "epoch": 0.3173867426674392, "grad_norm": 0.7258914342160417, "learning_rate": 7.985913501267901e-06, "loss": 0.4888, "step": 5067 }, { "epoch": 0.3174493806667815, "grad_norm": 0.7227400191419782, "learning_rate": 7.985099785337447e-06, "loss": 0.4823, "step": 5068 }, { "epoch": 0.31751201866612383, "grad_norm": 0.870311710580583, "learning_rate": 7.984285946539733e-06, "loss": 0.4319, "step": 5069 }, { "epoch": 0.3175746566654661, "grad_norm": 0.7870891849908813, "learning_rate": 7.983471984908254e-06, "loss": 0.4241, "step": 5070 }, { "epoch": 0.3176372946648084, "grad_norm": 0.7652804801750462, "learning_rate": 7.982657900476514e-06, "loss": 0.3623, "step": 5071 }, { "epoch": 0.3176999326641507, "grad_norm": 0.7454536378953547, "learning_rate": 7.981843693278022e-06, "loss": 0.3678, "step": 5072 }, { "epoch": 0.317762570663493, "grad_norm": 0.8222413615877726, "learning_rate": 7.98102936334629e-06, "loss": 0.4307, "step": 5073 }, { "epoch": 0.3178252086628353, "grad_norm": 0.8167878270057956, "learning_rate": 7.980214910714838e-06, "loss": 0.4125, "step": 5074 }, { "epoch": 0.3178878466621776, "grad_norm": 0.7921746290636332, "learning_rate": 7.979400335417185e-06, "loss": 0.4308, "step": 5075 }, { "epoch": 0.3179504846615199, "grad_norm": 0.9115981966697431, "learning_rate": 7.978585637486861e-06, "loss": 0.4456, "step": 5076 }, { "epoch": 0.31801312266086224, "grad_norm": 0.817251161767991, "learning_rate": 7.977770816957399e-06, "loss": 0.3928, "step": 5077 }, { "epoch": 0.3180757606602045, "grad_norm": 0.8752118185182527, "learning_rate": 7.97695587386234e-06, "loss": 0.4763, "step": 5078 }, { "epoch": 0.3181383986595468, "grad_norm": 0.8228199238801946, "learning_rate": 7.976140808235222e-06, "loss": 0.4236, "step": 5079 }, { "epoch": 0.3182010366588891, "grad_norm": 0.8354934210154288, "learning_rate": 7.975325620109597e-06, "loss": 0.3972, "step": 5080 }, { "epoch": 0.3182636746582314, "grad_norm": 0.764551719396803, "learning_rate": 7.974510309519017e-06, "loss": 0.3866, "step": 5081 }, { "epoch": 0.3183263126575737, "grad_norm": 0.8183191563884643, "learning_rate": 7.97369487649704e-06, "loss": 0.4049, "step": 5082 }, { "epoch": 0.318388950656916, "grad_norm": 0.8192069499086723, "learning_rate": 7.972879321077231e-06, "loss": 0.3928, "step": 5083 }, { "epoch": 0.31845158865625833, "grad_norm": 0.8749200938098107, "learning_rate": 7.972063643293158e-06, "loss": 0.4721, "step": 5084 }, { "epoch": 0.31851422665560064, "grad_norm": 0.8331134401759855, "learning_rate": 7.971247843178393e-06, "loss": 0.4593, "step": 5085 }, { "epoch": 0.31857686465494295, "grad_norm": 0.693272219480031, "learning_rate": 7.970431920766515e-06, "loss": 0.4641, "step": 5086 }, { "epoch": 0.3186395026542852, "grad_norm": 0.766556389996861, "learning_rate": 7.969615876091107e-06, "loss": 0.4108, "step": 5087 }, { "epoch": 0.3187021406536275, "grad_norm": 0.7688057877985655, "learning_rate": 7.96879970918576e-06, "loss": 0.4192, "step": 5088 }, { "epoch": 0.3187647786529698, "grad_norm": 0.8625380555648381, "learning_rate": 7.967983420084065e-06, "loss": 0.3959, "step": 5089 }, { "epoch": 0.3188274166523121, "grad_norm": 0.7795298325638965, "learning_rate": 7.967167008819622e-06, "loss": 0.3985, "step": 5090 }, { "epoch": 0.31889005465165443, "grad_norm": 0.8000182880707414, "learning_rate": 7.966350475426034e-06, "loss": 0.4073, "step": 5091 }, { "epoch": 0.31895269265099674, "grad_norm": 0.7639855399568938, "learning_rate": 7.965533819936911e-06, "loss": 0.4267, "step": 5092 }, { "epoch": 0.31901533065033905, "grad_norm": 0.8634515792513638, "learning_rate": 7.964717042385864e-06, "loss": 0.3667, "step": 5093 }, { "epoch": 0.31907796864968135, "grad_norm": 0.9153722687850432, "learning_rate": 7.963900142806513e-06, "loss": 0.4343, "step": 5094 }, { "epoch": 0.3191406066490236, "grad_norm": 0.806969290057027, "learning_rate": 7.963083121232482e-06, "loss": 0.4374, "step": 5095 }, { "epoch": 0.3192032446483659, "grad_norm": 0.8011560804805132, "learning_rate": 7.962265977697401e-06, "loss": 0.3964, "step": 5096 }, { "epoch": 0.3192658826477082, "grad_norm": 0.8194022514167234, "learning_rate": 7.961448712234902e-06, "loss": 0.4141, "step": 5097 }, { "epoch": 0.31932852064705053, "grad_norm": 0.7889112894621251, "learning_rate": 7.960631324878624e-06, "loss": 0.405, "step": 5098 }, { "epoch": 0.31939115864639284, "grad_norm": 0.8884315233226531, "learning_rate": 7.959813815662211e-06, "loss": 0.4279, "step": 5099 }, { "epoch": 0.31945379664573514, "grad_norm": 0.878282475662147, "learning_rate": 7.958996184619312e-06, "loss": 0.4255, "step": 5100 }, { "epoch": 0.31951643464507745, "grad_norm": 0.7843157168071532, "learning_rate": 7.95817843178358e-06, "loss": 0.3832, "step": 5101 }, { "epoch": 0.31957907264441976, "grad_norm": 0.8243283523146547, "learning_rate": 7.957360557188678e-06, "loss": 0.3883, "step": 5102 }, { "epoch": 0.319641710643762, "grad_norm": 0.8811077643788153, "learning_rate": 7.956542560868264e-06, "loss": 0.434, "step": 5103 }, { "epoch": 0.3197043486431043, "grad_norm": 0.8353962638517818, "learning_rate": 7.95572444285601e-06, "loss": 0.4046, "step": 5104 }, { "epoch": 0.3197669866424466, "grad_norm": 0.8912945681717526, "learning_rate": 7.95490620318559e-06, "loss": 0.4379, "step": 5105 }, { "epoch": 0.31982962464178893, "grad_norm": 0.8430068866310548, "learning_rate": 7.954087841890681e-06, "loss": 0.414, "step": 5106 }, { "epoch": 0.31989226264113124, "grad_norm": 0.8035373109116811, "learning_rate": 7.953269359004968e-06, "loss": 0.4228, "step": 5107 }, { "epoch": 0.31995490064047355, "grad_norm": 0.8471922514389083, "learning_rate": 7.95245075456214e-06, "loss": 0.4239, "step": 5108 }, { "epoch": 0.32001753863981586, "grad_norm": 0.9214329088778995, "learning_rate": 7.951632028595893e-06, "loss": 0.4291, "step": 5109 }, { "epoch": 0.32008017663915816, "grad_norm": 0.816719037346325, "learning_rate": 7.950813181139921e-06, "loss": 0.4168, "step": 5110 }, { "epoch": 0.3201428146385005, "grad_norm": 0.8806120189220769, "learning_rate": 7.949994212227932e-06, "loss": 0.4418, "step": 5111 }, { "epoch": 0.3202054526378427, "grad_norm": 0.8486901878545502, "learning_rate": 7.949175121893635e-06, "loss": 0.4064, "step": 5112 }, { "epoch": 0.32026809063718503, "grad_norm": 0.8593090948505816, "learning_rate": 7.94835591017074e-06, "loss": 0.4099, "step": 5113 }, { "epoch": 0.32033072863652734, "grad_norm": 0.8798046875153371, "learning_rate": 7.947536577092971e-06, "loss": 0.4434, "step": 5114 }, { "epoch": 0.32039336663586965, "grad_norm": 0.822305067817988, "learning_rate": 7.946717122694048e-06, "loss": 0.451, "step": 5115 }, { "epoch": 0.32045600463521196, "grad_norm": 0.766169081319551, "learning_rate": 7.945897547007702e-06, "loss": 0.3676, "step": 5116 }, { "epoch": 0.32051864263455426, "grad_norm": 0.7887326480840396, "learning_rate": 7.945077850067665e-06, "loss": 0.3708, "step": 5117 }, { "epoch": 0.32058128063389657, "grad_norm": 0.7612562744926782, "learning_rate": 7.944258031907678e-06, "loss": 0.3763, "step": 5118 }, { "epoch": 0.3206439186332389, "grad_norm": 0.8767722188366937, "learning_rate": 7.943438092561483e-06, "loss": 0.4076, "step": 5119 }, { "epoch": 0.32070655663258113, "grad_norm": 0.8558357678496453, "learning_rate": 7.942618032062831e-06, "loss": 0.411, "step": 5120 }, { "epoch": 0.32076919463192344, "grad_norm": 0.8976153762458131, "learning_rate": 7.941797850445474e-06, "loss": 0.4434, "step": 5121 }, { "epoch": 0.32083183263126575, "grad_norm": 0.8304389417641109, "learning_rate": 7.940977547743172e-06, "loss": 0.3989, "step": 5122 }, { "epoch": 0.32089447063060805, "grad_norm": 0.7633313427952682, "learning_rate": 7.940157123989687e-06, "loss": 0.3787, "step": 5123 }, { "epoch": 0.32095710862995036, "grad_norm": 0.8240260637543002, "learning_rate": 7.93933657921879e-06, "loss": 0.3975, "step": 5124 }, { "epoch": 0.32101974662929267, "grad_norm": 0.9156991106961266, "learning_rate": 7.938515913464255e-06, "loss": 0.4525, "step": 5125 }, { "epoch": 0.321082384628635, "grad_norm": 0.8423178679928036, "learning_rate": 7.937695126759856e-06, "loss": 0.4215, "step": 5126 }, { "epoch": 0.3211450226279773, "grad_norm": 0.7585947538839439, "learning_rate": 7.936874219139384e-06, "loss": 0.4144, "step": 5127 }, { "epoch": 0.3212076606273196, "grad_norm": 0.7542764520504074, "learning_rate": 7.936053190636623e-06, "loss": 0.4218, "step": 5128 }, { "epoch": 0.32127029862666184, "grad_norm": 0.8255419977216665, "learning_rate": 7.935232041285367e-06, "loss": 0.4554, "step": 5129 }, { "epoch": 0.32133293662600415, "grad_norm": 0.9513257202795389, "learning_rate": 7.934410771119416e-06, "loss": 0.4777, "step": 5130 }, { "epoch": 0.32139557462534646, "grad_norm": 0.8714961997552328, "learning_rate": 7.933589380172573e-06, "loss": 0.3996, "step": 5131 }, { "epoch": 0.32145821262468877, "grad_norm": 0.8387507151359753, "learning_rate": 7.932767868478645e-06, "loss": 0.4168, "step": 5132 }, { "epoch": 0.3215208506240311, "grad_norm": 0.9113427581211256, "learning_rate": 7.93194623607145e-06, "loss": 0.4449, "step": 5133 }, { "epoch": 0.3215834886233734, "grad_norm": 0.9384277128389048, "learning_rate": 7.931124482984802e-06, "loss": 0.4113, "step": 5134 }, { "epoch": 0.3216461266227157, "grad_norm": 0.8155859865800021, "learning_rate": 7.930302609252527e-06, "loss": 0.4019, "step": 5135 }, { "epoch": 0.321708764622058, "grad_norm": 0.8678883633706171, "learning_rate": 7.929480614908452e-06, "loss": 0.394, "step": 5136 }, { "epoch": 0.32177140262140025, "grad_norm": 0.838704309371698, "learning_rate": 7.928658499986413e-06, "loss": 0.388, "step": 5137 }, { "epoch": 0.32183404062074256, "grad_norm": 0.8041954657203737, "learning_rate": 7.927836264520243e-06, "loss": 0.4109, "step": 5138 }, { "epoch": 0.32189667862008486, "grad_norm": 0.8162250247171499, "learning_rate": 7.927013908543791e-06, "loss": 0.3978, "step": 5139 }, { "epoch": 0.32195931661942717, "grad_norm": 0.705940218522006, "learning_rate": 7.926191432090904e-06, "loss": 0.4794, "step": 5140 }, { "epoch": 0.3220219546187695, "grad_norm": 0.8106034938412581, "learning_rate": 7.925368835195433e-06, "loss": 0.437, "step": 5141 }, { "epoch": 0.3220845926181118, "grad_norm": 0.8220792280345631, "learning_rate": 7.92454611789124e-06, "loss": 0.3992, "step": 5142 }, { "epoch": 0.3221472306174541, "grad_norm": 0.9381709283294348, "learning_rate": 7.923723280212183e-06, "loss": 0.4565, "step": 5143 }, { "epoch": 0.3222098686167964, "grad_norm": 0.8004151985912675, "learning_rate": 7.922900322192136e-06, "loss": 0.4075, "step": 5144 }, { "epoch": 0.32227250661613865, "grad_norm": 0.8175780075222279, "learning_rate": 7.922077243864967e-06, "loss": 0.3918, "step": 5145 }, { "epoch": 0.32233514461548096, "grad_norm": 0.9086848845550608, "learning_rate": 7.921254045264557e-06, "loss": 0.4068, "step": 5146 }, { "epoch": 0.32239778261482327, "grad_norm": 0.8571611337233178, "learning_rate": 7.920430726424788e-06, "loss": 0.4283, "step": 5147 }, { "epoch": 0.3224604206141656, "grad_norm": 0.8099183880975428, "learning_rate": 7.91960728737955e-06, "loss": 0.4185, "step": 5148 }, { "epoch": 0.3225230586135079, "grad_norm": 0.8294622248588632, "learning_rate": 7.918783728162732e-06, "loss": 0.4279, "step": 5149 }, { "epoch": 0.3225856966128502, "grad_norm": 0.8426016627548362, "learning_rate": 7.917960048808234e-06, "loss": 0.4142, "step": 5150 }, { "epoch": 0.3226483346121925, "grad_norm": 0.8556031218288799, "learning_rate": 7.91713624934996e-06, "loss": 0.3956, "step": 5151 }, { "epoch": 0.3227109726115348, "grad_norm": 0.8273624424819198, "learning_rate": 7.916312329821816e-06, "loss": 0.3747, "step": 5152 }, { "epoch": 0.3227736106108771, "grad_norm": 0.8293687354486595, "learning_rate": 7.915488290257716e-06, "loss": 0.4028, "step": 5153 }, { "epoch": 0.32283624861021937, "grad_norm": 0.8477889067520357, "learning_rate": 7.914664130691574e-06, "loss": 0.4319, "step": 5154 }, { "epoch": 0.3228988866095617, "grad_norm": 0.9067019239208159, "learning_rate": 7.913839851157317e-06, "loss": 0.4516, "step": 5155 }, { "epoch": 0.322961524608904, "grad_norm": 0.840318193242577, "learning_rate": 7.913015451688872e-06, "loss": 0.4026, "step": 5156 }, { "epoch": 0.3230241626082463, "grad_norm": 0.8432644516422114, "learning_rate": 7.912190932320169e-06, "loss": 0.4483, "step": 5157 }, { "epoch": 0.3230868006075886, "grad_norm": 0.8088876450774706, "learning_rate": 7.91136629308515e-06, "loss": 0.4212, "step": 5158 }, { "epoch": 0.3231494386069309, "grad_norm": 0.8268646435789943, "learning_rate": 7.91054153401775e-06, "loss": 0.4075, "step": 5159 }, { "epoch": 0.3232120766062732, "grad_norm": 0.8145328858992478, "learning_rate": 7.909716655151923e-06, "loss": 0.4057, "step": 5160 }, { "epoch": 0.3232747146056155, "grad_norm": 0.8571044209264677, "learning_rate": 7.908891656521616e-06, "loss": 0.4159, "step": 5161 }, { "epoch": 0.3233373526049578, "grad_norm": 0.8438819477544031, "learning_rate": 7.90806653816079e-06, "loss": 0.4235, "step": 5162 }, { "epoch": 0.3233999906043001, "grad_norm": 0.823826467282001, "learning_rate": 7.907241300103404e-06, "loss": 0.4161, "step": 5163 }, { "epoch": 0.3234626286036424, "grad_norm": 0.826184893237902, "learning_rate": 7.906415942383428e-06, "loss": 0.4019, "step": 5164 }, { "epoch": 0.3235252666029847, "grad_norm": 0.79229232498212, "learning_rate": 7.90559046503483e-06, "loss": 0.4009, "step": 5165 }, { "epoch": 0.323587904602327, "grad_norm": 0.822214733564842, "learning_rate": 7.904764868091592e-06, "loss": 0.4148, "step": 5166 }, { "epoch": 0.3236505426016693, "grad_norm": 0.8352103384290857, "learning_rate": 7.903939151587694e-06, "loss": 0.437, "step": 5167 }, { "epoch": 0.3237131806010116, "grad_norm": 0.8570159767797313, "learning_rate": 7.903113315557118e-06, "loss": 0.4458, "step": 5168 }, { "epoch": 0.3237758186003539, "grad_norm": 0.8379369054574387, "learning_rate": 7.90228736003386e-06, "loss": 0.4388, "step": 5169 }, { "epoch": 0.3238384565996962, "grad_norm": 0.8474233460325179, "learning_rate": 7.901461285051917e-06, "loss": 0.4382, "step": 5170 }, { "epoch": 0.3239010945990385, "grad_norm": 0.8086763967693164, "learning_rate": 7.900635090645288e-06, "loss": 0.4122, "step": 5171 }, { "epoch": 0.3239637325983808, "grad_norm": 0.8574321962283564, "learning_rate": 7.899808776847979e-06, "loss": 0.4328, "step": 5172 }, { "epoch": 0.3240263705977231, "grad_norm": 0.8537750876897777, "learning_rate": 7.898982343694006e-06, "loss": 0.4013, "step": 5173 }, { "epoch": 0.3240890085970654, "grad_norm": 0.7537857920242367, "learning_rate": 7.89815579121738e-06, "loss": 0.4022, "step": 5174 }, { "epoch": 0.3241516465964077, "grad_norm": 0.8020932699553239, "learning_rate": 7.897329119452123e-06, "loss": 0.4133, "step": 5175 }, { "epoch": 0.32421428459575, "grad_norm": 0.8909573668487926, "learning_rate": 7.89650232843226e-06, "loss": 0.4251, "step": 5176 }, { "epoch": 0.32427692259509233, "grad_norm": 0.8252855755328361, "learning_rate": 7.895675418191826e-06, "loss": 0.4502, "step": 5177 }, { "epoch": 0.32433956059443464, "grad_norm": 0.7606499580307765, "learning_rate": 7.894848388764854e-06, "loss": 0.4147, "step": 5178 }, { "epoch": 0.3244021985937769, "grad_norm": 0.7608191523306237, "learning_rate": 7.894021240185384e-06, "loss": 0.4247, "step": 5179 }, { "epoch": 0.3244648365931192, "grad_norm": 0.8516529021772351, "learning_rate": 7.893193972487465e-06, "loss": 0.4602, "step": 5180 }, { "epoch": 0.3245274745924615, "grad_norm": 0.8437478543755268, "learning_rate": 7.892366585705142e-06, "loss": 0.4378, "step": 5181 }, { "epoch": 0.3245901125918038, "grad_norm": 0.7588707808856535, "learning_rate": 7.891539079872475e-06, "loss": 0.4104, "step": 5182 }, { "epoch": 0.3246527505911461, "grad_norm": 0.8433952312948759, "learning_rate": 7.890711455023523e-06, "loss": 0.4228, "step": 5183 }, { "epoch": 0.32471538859048843, "grad_norm": 0.6971345138150729, "learning_rate": 7.889883711192351e-06, "loss": 0.4725, "step": 5184 }, { "epoch": 0.32477802658983074, "grad_norm": 0.8971534143218326, "learning_rate": 7.88905584841303e-06, "loss": 0.4203, "step": 5185 }, { "epoch": 0.32484066458917304, "grad_norm": 0.8464316643830126, "learning_rate": 7.888227866719632e-06, "loss": 0.4246, "step": 5186 }, { "epoch": 0.3249033025885153, "grad_norm": 0.8053237142071493, "learning_rate": 7.887399766146238e-06, "loss": 0.4429, "step": 5187 }, { "epoch": 0.3249659405878576, "grad_norm": 0.8312932704903643, "learning_rate": 7.886571546726937e-06, "loss": 0.4555, "step": 5188 }, { "epoch": 0.3250285785871999, "grad_norm": 0.847991799994537, "learning_rate": 7.885743208495812e-06, "loss": 0.4215, "step": 5189 }, { "epoch": 0.3250912165865422, "grad_norm": 0.8395216531459325, "learning_rate": 7.884914751486965e-06, "loss": 0.383, "step": 5190 }, { "epoch": 0.3251538545858845, "grad_norm": 0.7954324970136867, "learning_rate": 7.884086175734488e-06, "loss": 0.3952, "step": 5191 }, { "epoch": 0.32521649258522684, "grad_norm": 0.7494853546601952, "learning_rate": 7.883257481272491e-06, "loss": 0.3926, "step": 5192 }, { "epoch": 0.32527913058456914, "grad_norm": 0.8766569986337366, "learning_rate": 7.882428668135078e-06, "loss": 0.4079, "step": 5193 }, { "epoch": 0.32534176858391145, "grad_norm": 0.8844268047526014, "learning_rate": 7.88159973635637e-06, "loss": 0.4368, "step": 5194 }, { "epoch": 0.32540440658325376, "grad_norm": 0.8427083573910293, "learning_rate": 7.880770685970478e-06, "loss": 0.4473, "step": 5195 }, { "epoch": 0.325467044582596, "grad_norm": 0.7743768442109207, "learning_rate": 7.879941517011531e-06, "loss": 0.4246, "step": 5196 }, { "epoch": 0.3255296825819383, "grad_norm": 0.8113787078393224, "learning_rate": 7.879112229513658e-06, "loss": 0.4347, "step": 5197 }, { "epoch": 0.3255923205812806, "grad_norm": 1.1686974398568224, "learning_rate": 7.87828282351099e-06, "loss": 0.4002, "step": 5198 }, { "epoch": 0.32565495858062293, "grad_norm": 0.7523577953243661, "learning_rate": 7.877453299037667e-06, "loss": 0.3869, "step": 5199 }, { "epoch": 0.32571759657996524, "grad_norm": 0.7965113550704139, "learning_rate": 7.876623656127833e-06, "loss": 0.4173, "step": 5200 }, { "epoch": 0.32578023457930755, "grad_norm": 0.8047413241699718, "learning_rate": 7.875793894815635e-06, "loss": 0.3914, "step": 5201 }, { "epoch": 0.32584287257864986, "grad_norm": 0.8711412063980055, "learning_rate": 7.874964015135225e-06, "loss": 0.4461, "step": 5202 }, { "epoch": 0.32590551057799216, "grad_norm": 0.8471799604052368, "learning_rate": 7.874134017120765e-06, "loss": 0.3971, "step": 5203 }, { "epoch": 0.3259681485773344, "grad_norm": 0.9086624330844046, "learning_rate": 7.873303900806415e-06, "loss": 0.409, "step": 5204 }, { "epoch": 0.3260307865766767, "grad_norm": 0.8610280774038157, "learning_rate": 7.872473666226342e-06, "loss": 0.4323, "step": 5205 }, { "epoch": 0.32609342457601903, "grad_norm": 0.7975886151189318, "learning_rate": 7.871643313414718e-06, "loss": 0.4098, "step": 5206 }, { "epoch": 0.32615606257536134, "grad_norm": 0.8221172437951643, "learning_rate": 7.870812842405727e-06, "loss": 0.4046, "step": 5207 }, { "epoch": 0.32621870057470365, "grad_norm": 0.8957067861082115, "learning_rate": 7.869982253233542e-06, "loss": 0.4347, "step": 5208 }, { "epoch": 0.32628133857404595, "grad_norm": 0.8137599892687507, "learning_rate": 7.869151545932357e-06, "loss": 0.4035, "step": 5209 }, { "epoch": 0.32634397657338826, "grad_norm": 0.8776500273764155, "learning_rate": 7.868320720536361e-06, "loss": 0.4224, "step": 5210 }, { "epoch": 0.32640661457273057, "grad_norm": 0.7441117667789882, "learning_rate": 7.867489777079753e-06, "loss": 0.4798, "step": 5211 }, { "epoch": 0.3264692525720728, "grad_norm": 0.8800383922663001, "learning_rate": 7.866658715596731e-06, "loss": 0.4608, "step": 5212 }, { "epoch": 0.32653189057141513, "grad_norm": 0.7949715905612069, "learning_rate": 7.865827536121507e-06, "loss": 0.4272, "step": 5213 }, { "epoch": 0.32659452857075744, "grad_norm": 0.8637128771942791, "learning_rate": 7.864996238688288e-06, "loss": 0.4559, "step": 5214 }, { "epoch": 0.32665716657009974, "grad_norm": 0.8031605191192978, "learning_rate": 7.864164823331293e-06, "loss": 0.4692, "step": 5215 }, { "epoch": 0.32671980456944205, "grad_norm": 0.882526402663248, "learning_rate": 7.863333290084742e-06, "loss": 0.3796, "step": 5216 }, { "epoch": 0.32678244256878436, "grad_norm": 0.8041391292570598, "learning_rate": 7.86250163898286e-06, "loss": 0.4459, "step": 5217 }, { "epoch": 0.32684508056812667, "grad_norm": 0.8174635772643305, "learning_rate": 7.86166987005988e-06, "loss": 0.4201, "step": 5218 }, { "epoch": 0.326907718567469, "grad_norm": 0.830859391813682, "learning_rate": 7.860837983350038e-06, "loss": 0.4025, "step": 5219 }, { "epoch": 0.3269703565668113, "grad_norm": 0.8377250084170831, "learning_rate": 7.860005978887574e-06, "loss": 0.4449, "step": 5220 }, { "epoch": 0.32703299456615353, "grad_norm": 0.836468019483843, "learning_rate": 7.859173856706731e-06, "loss": 0.4162, "step": 5221 }, { "epoch": 0.32709563256549584, "grad_norm": 0.8321822606085888, "learning_rate": 7.858341616841764e-06, "loss": 0.4282, "step": 5222 }, { "epoch": 0.32715827056483815, "grad_norm": 0.8462278141045863, "learning_rate": 7.857509259326923e-06, "loss": 0.4168, "step": 5223 }, { "epoch": 0.32722090856418046, "grad_norm": 0.7879583520291029, "learning_rate": 7.856676784196472e-06, "loss": 0.4216, "step": 5224 }, { "epoch": 0.32728354656352276, "grad_norm": 0.8171829645752693, "learning_rate": 7.855844191484673e-06, "loss": 0.3906, "step": 5225 }, { "epoch": 0.3273461845628651, "grad_norm": 0.8110149520865066, "learning_rate": 7.855011481225798e-06, "loss": 0.4349, "step": 5226 }, { "epoch": 0.3274088225622074, "grad_norm": 0.7403512867452526, "learning_rate": 7.854178653454121e-06, "loss": 0.39, "step": 5227 }, { "epoch": 0.3274714605615497, "grad_norm": 1.518850867008762, "learning_rate": 7.85334570820392e-06, "loss": 0.4213, "step": 5228 }, { "epoch": 0.32753409856089194, "grad_norm": 0.7465356385351459, "learning_rate": 7.85251264550948e-06, "loss": 0.4701, "step": 5229 }, { "epoch": 0.32759673656023425, "grad_norm": 0.7517685486742868, "learning_rate": 7.851679465405091e-06, "loss": 0.4616, "step": 5230 }, { "epoch": 0.32765937455957655, "grad_norm": 1.017334999609808, "learning_rate": 7.850846167925046e-06, "loss": 0.4295, "step": 5231 }, { "epoch": 0.32772201255891886, "grad_norm": 0.8136609292698928, "learning_rate": 7.850012753103641e-06, "loss": 0.4254, "step": 5232 }, { "epoch": 0.32778465055826117, "grad_norm": 0.8873884659514182, "learning_rate": 7.849179220975185e-06, "loss": 0.4352, "step": 5233 }, { "epoch": 0.3278472885576035, "grad_norm": 0.8842457946901302, "learning_rate": 7.848345571573983e-06, "loss": 0.4425, "step": 5234 }, { "epoch": 0.3279099265569458, "grad_norm": 0.9572974206899777, "learning_rate": 7.847511804934347e-06, "loss": 0.375, "step": 5235 }, { "epoch": 0.3279725645562881, "grad_norm": 0.796723952865629, "learning_rate": 7.846677921090597e-06, "loss": 0.3728, "step": 5236 }, { "epoch": 0.3280352025556304, "grad_norm": 1.0073057410781978, "learning_rate": 7.845843920077057e-06, "loss": 0.4122, "step": 5237 }, { "epoch": 0.32809784055497265, "grad_norm": 0.8222227857596126, "learning_rate": 7.845009801928051e-06, "loss": 0.417, "step": 5238 }, { "epoch": 0.32816047855431496, "grad_norm": 0.8809618416070407, "learning_rate": 7.844175566677915e-06, "loss": 0.4292, "step": 5239 }, { "epoch": 0.32822311655365727, "grad_norm": 0.8322989468295334, "learning_rate": 7.843341214360985e-06, "loss": 0.3764, "step": 5240 }, { "epoch": 0.3282857545529996, "grad_norm": 0.8262459146177296, "learning_rate": 7.842506745011601e-06, "loss": 0.3759, "step": 5241 }, { "epoch": 0.3283483925523419, "grad_norm": 0.8370993609297789, "learning_rate": 7.841672158664113e-06, "loss": 0.4547, "step": 5242 }, { "epoch": 0.3284110305516842, "grad_norm": 0.8259332394883399, "learning_rate": 7.840837455352872e-06, "loss": 0.4331, "step": 5243 }, { "epoch": 0.3284736685510265, "grad_norm": 0.9162984362838499, "learning_rate": 7.840002635112233e-06, "loss": 0.422, "step": 5244 }, { "epoch": 0.3285363065503688, "grad_norm": 0.8191400188471161, "learning_rate": 7.839167697976559e-06, "loss": 0.3916, "step": 5245 }, { "epoch": 0.32859894454971106, "grad_norm": 0.8870667257104597, "learning_rate": 7.838332643980215e-06, "loss": 0.3885, "step": 5246 }, { "epoch": 0.32866158254905337, "grad_norm": 0.8167830827298292, "learning_rate": 7.837497473157574e-06, "loss": 0.3816, "step": 5247 }, { "epoch": 0.3287242205483957, "grad_norm": 0.9421881461466497, "learning_rate": 7.83666218554301e-06, "loss": 0.424, "step": 5248 }, { "epoch": 0.328786858547738, "grad_norm": 0.8422240636836577, "learning_rate": 7.835826781170903e-06, "loss": 0.4399, "step": 5249 }, { "epoch": 0.3288494965470803, "grad_norm": 0.8394548976321369, "learning_rate": 7.834991260075638e-06, "loss": 0.3862, "step": 5250 }, { "epoch": 0.3289121345464226, "grad_norm": 0.8895964809934912, "learning_rate": 7.834155622291609e-06, "loss": 0.4098, "step": 5251 }, { "epoch": 0.3289747725457649, "grad_norm": 0.92602263488399, "learning_rate": 7.833319867853207e-06, "loss": 0.4195, "step": 5252 }, { "epoch": 0.3290374105451072, "grad_norm": 0.8736709189905953, "learning_rate": 7.832483996794834e-06, "loss": 0.4272, "step": 5253 }, { "epoch": 0.32910004854444946, "grad_norm": 0.8361452617077532, "learning_rate": 7.831648009150894e-06, "loss": 0.4092, "step": 5254 }, { "epoch": 0.32916268654379177, "grad_norm": 0.8525898991933369, "learning_rate": 7.830811904955796e-06, "loss": 0.4637, "step": 5255 }, { "epoch": 0.3292253245431341, "grad_norm": 0.8860817477998135, "learning_rate": 7.829975684243956e-06, "loss": 0.4122, "step": 5256 }, { "epoch": 0.3292879625424764, "grad_norm": 0.8616325227522224, "learning_rate": 7.82913934704979e-06, "loss": 0.4323, "step": 5257 }, { "epoch": 0.3293506005418187, "grad_norm": 0.8444483587443595, "learning_rate": 7.828302893407725e-06, "loss": 0.4067, "step": 5258 }, { "epoch": 0.329413238541161, "grad_norm": 0.8130613271146194, "learning_rate": 7.827466323352186e-06, "loss": 0.4111, "step": 5259 }, { "epoch": 0.3294758765405033, "grad_norm": 0.8074452274458871, "learning_rate": 7.826629636917609e-06, "loss": 0.4504, "step": 5260 }, { "epoch": 0.3295385145398456, "grad_norm": 0.8113267849793148, "learning_rate": 7.825792834138431e-06, "loss": 0.4276, "step": 5261 }, { "epoch": 0.3296011525391879, "grad_norm": 0.8555183193713496, "learning_rate": 7.824955915049096e-06, "loss": 0.4394, "step": 5262 }, { "epoch": 0.3296637905385302, "grad_norm": 0.927641215953857, "learning_rate": 7.824118879684053e-06, "loss": 0.4541, "step": 5263 }, { "epoch": 0.3297264285378725, "grad_norm": 0.8806460333548842, "learning_rate": 7.823281728077749e-06, "loss": 0.4341, "step": 5264 }, { "epoch": 0.3297890665372148, "grad_norm": 0.7794235266674235, "learning_rate": 7.822444460264648e-06, "loss": 0.3934, "step": 5265 }, { "epoch": 0.3298517045365571, "grad_norm": 0.8242736527052328, "learning_rate": 7.821607076279207e-06, "loss": 0.4231, "step": 5266 }, { "epoch": 0.3299143425358994, "grad_norm": 0.86387680885079, "learning_rate": 7.820769576155898e-06, "loss": 0.4233, "step": 5267 }, { "epoch": 0.3299769805352417, "grad_norm": 0.8016958874656288, "learning_rate": 7.819931959929187e-06, "loss": 0.3869, "step": 5268 }, { "epoch": 0.330039618534584, "grad_norm": 0.7776368769168822, "learning_rate": 7.819094227633553e-06, "loss": 0.4356, "step": 5269 }, { "epoch": 0.33010225653392633, "grad_norm": 0.8628280989468848, "learning_rate": 7.818256379303479e-06, "loss": 0.407, "step": 5270 }, { "epoch": 0.3301648945332686, "grad_norm": 0.8644638646345032, "learning_rate": 7.81741841497345e-06, "loss": 0.4283, "step": 5271 }, { "epoch": 0.3302275325326109, "grad_norm": 0.9055212450755559, "learning_rate": 7.816580334677955e-06, "loss": 0.4747, "step": 5272 }, { "epoch": 0.3302901705319532, "grad_norm": 0.8391024644217355, "learning_rate": 7.81574213845149e-06, "loss": 0.3875, "step": 5273 }, { "epoch": 0.3303528085312955, "grad_norm": 0.8461718638131563, "learning_rate": 7.814903826328555e-06, "loss": 0.4207, "step": 5274 }, { "epoch": 0.3304154465306378, "grad_norm": 0.763033826979776, "learning_rate": 7.814065398343657e-06, "loss": 0.4142, "step": 5275 }, { "epoch": 0.3304780845299801, "grad_norm": 0.8865711837599028, "learning_rate": 7.813226854531306e-06, "loss": 0.4338, "step": 5276 }, { "epoch": 0.33054072252932243, "grad_norm": 0.7621239729471987, "learning_rate": 7.812388194926015e-06, "loss": 0.3697, "step": 5277 }, { "epoch": 0.33060336052866474, "grad_norm": 0.8324375980494948, "learning_rate": 7.811549419562304e-06, "loss": 0.4201, "step": 5278 }, { "epoch": 0.330665998528007, "grad_norm": 0.8962010197349815, "learning_rate": 7.810710528474696e-06, "loss": 0.4563, "step": 5279 }, { "epoch": 0.3307286365273493, "grad_norm": 0.8871361168008738, "learning_rate": 7.809871521697723e-06, "loss": 0.4587, "step": 5280 }, { "epoch": 0.3307912745266916, "grad_norm": 0.8585204397023767, "learning_rate": 7.809032399265914e-06, "loss": 0.4238, "step": 5281 }, { "epoch": 0.3308539125260339, "grad_norm": 0.7616702948915002, "learning_rate": 7.808193161213812e-06, "loss": 0.3786, "step": 5282 }, { "epoch": 0.3309165505253762, "grad_norm": 0.8495316515687915, "learning_rate": 7.807353807575958e-06, "loss": 0.402, "step": 5283 }, { "epoch": 0.3309791885247185, "grad_norm": 0.8527649062176175, "learning_rate": 7.8065143383869e-06, "loss": 0.4639, "step": 5284 }, { "epoch": 0.33104182652406083, "grad_norm": 0.769312448297072, "learning_rate": 7.805674753681191e-06, "loss": 0.4084, "step": 5285 }, { "epoch": 0.33110446452340314, "grad_norm": 0.7538009481780182, "learning_rate": 7.804835053493388e-06, "loss": 0.4117, "step": 5286 }, { "epoch": 0.33116710252274545, "grad_norm": 0.8988491710738418, "learning_rate": 7.803995237858053e-06, "loss": 0.4394, "step": 5287 }, { "epoch": 0.3312297405220877, "grad_norm": 0.8289098134535405, "learning_rate": 7.803155306809755e-06, "loss": 0.4003, "step": 5288 }, { "epoch": 0.33129237852143, "grad_norm": 0.9571522899742656, "learning_rate": 7.802315260383064e-06, "loss": 0.4295, "step": 5289 }, { "epoch": 0.3313550165207723, "grad_norm": 0.8463563591999669, "learning_rate": 7.801475098612556e-06, "loss": 0.4797, "step": 5290 }, { "epoch": 0.3314176545201146, "grad_norm": 0.8792863509379935, "learning_rate": 7.800634821532813e-06, "loss": 0.4396, "step": 5291 }, { "epoch": 0.33148029251945693, "grad_norm": 0.8055661368951407, "learning_rate": 7.799794429178423e-06, "loss": 0.3837, "step": 5292 }, { "epoch": 0.33154293051879924, "grad_norm": 0.8739990114371848, "learning_rate": 7.798953921583972e-06, "loss": 0.4312, "step": 5293 }, { "epoch": 0.33160556851814155, "grad_norm": 0.8191310282840503, "learning_rate": 7.798113298784058e-06, "loss": 0.4054, "step": 5294 }, { "epoch": 0.33166820651748385, "grad_norm": 0.861654123807044, "learning_rate": 7.797272560813283e-06, "loss": 0.4137, "step": 5295 }, { "epoch": 0.3317308445168261, "grad_norm": 0.8580202362167622, "learning_rate": 7.79643170770625e-06, "loss": 0.4059, "step": 5296 }, { "epoch": 0.3317934825161684, "grad_norm": 0.8281764288626364, "learning_rate": 7.795590739497567e-06, "loss": 0.4031, "step": 5297 }, { "epoch": 0.3318561205155107, "grad_norm": 0.7595449868474237, "learning_rate": 7.794749656221851e-06, "loss": 0.4959, "step": 5298 }, { "epoch": 0.33191875851485303, "grad_norm": 0.7826860116540257, "learning_rate": 7.793908457913723e-06, "loss": 0.3928, "step": 5299 }, { "epoch": 0.33198139651419534, "grad_norm": 0.8187036041967871, "learning_rate": 7.793067144607802e-06, "loss": 0.4572, "step": 5300 }, { "epoch": 0.33204403451353764, "grad_norm": 0.8150982500977352, "learning_rate": 7.792225716338718e-06, "loss": 0.4371, "step": 5301 }, { "epoch": 0.33210667251287995, "grad_norm": 0.893897737829077, "learning_rate": 7.791384173141108e-06, "loss": 0.4272, "step": 5302 }, { "epoch": 0.33216931051222226, "grad_norm": 0.8327933101313686, "learning_rate": 7.790542515049604e-06, "loss": 0.4, "step": 5303 }, { "epoch": 0.33223194851156457, "grad_norm": 0.8605401178206988, "learning_rate": 7.789700742098856e-06, "loss": 0.4006, "step": 5304 }, { "epoch": 0.3322945865109068, "grad_norm": 0.8665384179168405, "learning_rate": 7.788858854323505e-06, "loss": 0.4005, "step": 5305 }, { "epoch": 0.3323572245102491, "grad_norm": 0.8936708633527918, "learning_rate": 7.788016851758206e-06, "loss": 0.4212, "step": 5306 }, { "epoch": 0.33241986250959144, "grad_norm": 0.7739345529507033, "learning_rate": 7.787174734437618e-06, "loss": 0.4078, "step": 5307 }, { "epoch": 0.33248250050893374, "grad_norm": 0.8070583889376994, "learning_rate": 7.786332502396398e-06, "loss": 0.4216, "step": 5308 }, { "epoch": 0.33254513850827605, "grad_norm": 0.8543673134055818, "learning_rate": 7.785490155669216e-06, "loss": 0.4422, "step": 5309 }, { "epoch": 0.33260777650761836, "grad_norm": 0.838114741121054, "learning_rate": 7.784647694290742e-06, "loss": 0.3874, "step": 5310 }, { "epoch": 0.33267041450696067, "grad_norm": 0.8628592033848987, "learning_rate": 7.783805118295653e-06, "loss": 0.3802, "step": 5311 }, { "epoch": 0.332733052506303, "grad_norm": 0.7910875789324188, "learning_rate": 7.782962427718627e-06, "loss": 0.4235, "step": 5312 }, { "epoch": 0.3327956905056452, "grad_norm": 0.8430032327645138, "learning_rate": 7.782119622594353e-06, "loss": 0.4078, "step": 5313 }, { "epoch": 0.33285832850498753, "grad_norm": 0.8884438862316457, "learning_rate": 7.781276702957517e-06, "loss": 0.4314, "step": 5314 }, { "epoch": 0.33292096650432984, "grad_norm": 0.7863190680077679, "learning_rate": 7.780433668842815e-06, "loss": 0.4239, "step": 5315 }, { "epoch": 0.33298360450367215, "grad_norm": 0.8426529313027998, "learning_rate": 7.77959052028495e-06, "loss": 0.4264, "step": 5316 }, { "epoch": 0.33304624250301446, "grad_norm": 0.8353103629380714, "learning_rate": 7.778747257318622e-06, "loss": 0.4389, "step": 5317 }, { "epoch": 0.33310888050235676, "grad_norm": 0.7508009760810682, "learning_rate": 7.777903879978541e-06, "loss": 0.416, "step": 5318 }, { "epoch": 0.33317151850169907, "grad_norm": 0.812327811966331, "learning_rate": 7.77706038829942e-06, "loss": 0.4009, "step": 5319 }, { "epoch": 0.3332341565010414, "grad_norm": 0.8216174104043901, "learning_rate": 7.77621678231598e-06, "loss": 0.433, "step": 5320 }, { "epoch": 0.33329679450038363, "grad_norm": 0.8725715884364085, "learning_rate": 7.775373062062939e-06, "loss": 0.4421, "step": 5321 }, { "epoch": 0.33335943249972594, "grad_norm": 0.7209670584364476, "learning_rate": 7.77452922757503e-06, "loss": 0.4663, "step": 5322 }, { "epoch": 0.33342207049906825, "grad_norm": 0.863631851725819, "learning_rate": 7.773685278886982e-06, "loss": 0.476, "step": 5323 }, { "epoch": 0.33348470849841055, "grad_norm": 0.8111508226288096, "learning_rate": 7.772841216033534e-06, "loss": 0.4366, "step": 5324 }, { "epoch": 0.33354734649775286, "grad_norm": 0.7873849714248582, "learning_rate": 7.771997039049426e-06, "loss": 0.432, "step": 5325 }, { "epoch": 0.33360998449709517, "grad_norm": 0.8528204610578393, "learning_rate": 7.771152747969406e-06, "loss": 0.4264, "step": 5326 }, { "epoch": 0.3336726224964375, "grad_norm": 0.8318885558307701, "learning_rate": 7.770308342828223e-06, "loss": 0.3989, "step": 5327 }, { "epoch": 0.3337352604957798, "grad_norm": 0.7386677513580496, "learning_rate": 7.769463823660636e-06, "loss": 0.4821, "step": 5328 }, { "epoch": 0.3337978984951221, "grad_norm": 0.8039080529307877, "learning_rate": 7.768619190501404e-06, "loss": 0.4294, "step": 5329 }, { "epoch": 0.33386053649446434, "grad_norm": 0.8546931182843677, "learning_rate": 7.767774443385291e-06, "loss": 0.4457, "step": 5330 }, { "epoch": 0.33392317449380665, "grad_norm": 0.8295138059319225, "learning_rate": 7.766929582347068e-06, "loss": 0.4368, "step": 5331 }, { "epoch": 0.33398581249314896, "grad_norm": 0.996781871412655, "learning_rate": 7.766084607421512e-06, "loss": 0.4609, "step": 5332 }, { "epoch": 0.33404845049249127, "grad_norm": 0.8058142325110019, "learning_rate": 7.7652395186434e-06, "loss": 0.3921, "step": 5333 }, { "epoch": 0.3341110884918336, "grad_norm": 0.8366858900547761, "learning_rate": 7.764394316047515e-06, "loss": 0.4339, "step": 5334 }, { "epoch": 0.3341737264911759, "grad_norm": 0.8666057801764222, "learning_rate": 7.763548999668646e-06, "loss": 0.429, "step": 5335 }, { "epoch": 0.3342363644905182, "grad_norm": 0.7599205422262141, "learning_rate": 7.762703569541587e-06, "loss": 0.4052, "step": 5336 }, { "epoch": 0.3342990024898605, "grad_norm": 0.8589746486096054, "learning_rate": 7.761858025701139e-06, "loss": 0.4132, "step": 5337 }, { "epoch": 0.33436164048920275, "grad_norm": 0.9204255307534548, "learning_rate": 7.7610123681821e-06, "loss": 0.4253, "step": 5338 }, { "epoch": 0.33442427848854506, "grad_norm": 0.8357141245211003, "learning_rate": 7.760166597019282e-06, "loss": 0.4169, "step": 5339 }, { "epoch": 0.33448691648788736, "grad_norm": 0.8701085531681148, "learning_rate": 7.759320712247491e-06, "loss": 0.4594, "step": 5340 }, { "epoch": 0.3345495544872297, "grad_norm": 0.8013979997969015, "learning_rate": 7.75847471390155e-06, "loss": 0.3932, "step": 5341 }, { "epoch": 0.334612192486572, "grad_norm": 0.9185204954221268, "learning_rate": 7.757628602016277e-06, "loss": 0.4233, "step": 5342 }, { "epoch": 0.3346748304859143, "grad_norm": 0.8422496253555444, "learning_rate": 7.756782376626497e-06, "loss": 0.4598, "step": 5343 }, { "epoch": 0.3347374684852566, "grad_norm": 0.9269775437795906, "learning_rate": 7.755936037767044e-06, "loss": 0.4497, "step": 5344 }, { "epoch": 0.3348001064845989, "grad_norm": 0.8311804589628298, "learning_rate": 7.755089585472752e-06, "loss": 0.383, "step": 5345 }, { "epoch": 0.3348627444839412, "grad_norm": 0.8215625405449356, "learning_rate": 7.754243019778462e-06, "loss": 0.4247, "step": 5346 }, { "epoch": 0.33492538248328346, "grad_norm": 0.873192303209613, "learning_rate": 7.753396340719017e-06, "loss": 0.4476, "step": 5347 }, { "epoch": 0.33498802048262577, "grad_norm": 0.7820572346237162, "learning_rate": 7.752549548329268e-06, "loss": 0.4112, "step": 5348 }, { "epoch": 0.3350506584819681, "grad_norm": 0.8298898234825853, "learning_rate": 7.75170264264407e-06, "loss": 0.4263, "step": 5349 }, { "epoch": 0.3351132964813104, "grad_norm": 0.8272622648040023, "learning_rate": 7.75085562369828e-06, "loss": 0.4274, "step": 5350 }, { "epoch": 0.3351759344806527, "grad_norm": 0.7912289446521692, "learning_rate": 7.750008491526762e-06, "loss": 0.4194, "step": 5351 }, { "epoch": 0.335238572479995, "grad_norm": 0.8439861596822192, "learning_rate": 7.749161246164383e-06, "loss": 0.4234, "step": 5352 }, { "epoch": 0.3353012104793373, "grad_norm": 0.8272855743884115, "learning_rate": 7.748313887646018e-06, "loss": 0.3932, "step": 5353 }, { "epoch": 0.3353638484786796, "grad_norm": 0.750605646629579, "learning_rate": 7.747466416006541e-06, "loss": 0.3673, "step": 5354 }, { "epoch": 0.33542648647802187, "grad_norm": 0.840677996618983, "learning_rate": 7.746618831280839e-06, "loss": 0.4162, "step": 5355 }, { "epoch": 0.3354891244773642, "grad_norm": 0.7693757115495204, "learning_rate": 7.745771133503796e-06, "loss": 0.3932, "step": 5356 }, { "epoch": 0.3355517624767065, "grad_norm": 0.8413944710892881, "learning_rate": 7.744923322710302e-06, "loss": 0.4168, "step": 5357 }, { "epoch": 0.3356144004760488, "grad_norm": 0.8703654428222514, "learning_rate": 7.744075398935257e-06, "loss": 0.3986, "step": 5358 }, { "epoch": 0.3356770384753911, "grad_norm": 0.7763988385180106, "learning_rate": 7.743227362213556e-06, "loss": 0.3932, "step": 5359 }, { "epoch": 0.3357396764747334, "grad_norm": 0.8713063939739846, "learning_rate": 7.74237921258011e-06, "loss": 0.4052, "step": 5360 }, { "epoch": 0.3358023144740757, "grad_norm": 0.8838498897382357, "learning_rate": 7.741530950069824e-06, "loss": 0.4132, "step": 5361 }, { "epoch": 0.335864952473418, "grad_norm": 0.7566030573779345, "learning_rate": 7.740682574717617e-06, "loss": 0.4933, "step": 5362 }, { "epoch": 0.3359275904727603, "grad_norm": 0.8252001977795974, "learning_rate": 7.739834086558407e-06, "loss": 0.4376, "step": 5363 }, { "epoch": 0.3359902284721026, "grad_norm": 0.8012953990561389, "learning_rate": 7.738985485627117e-06, "loss": 0.4341, "step": 5364 }, { "epoch": 0.3360528664714449, "grad_norm": 0.7942360756966773, "learning_rate": 7.738136771958675e-06, "loss": 0.3668, "step": 5365 }, { "epoch": 0.3361155044707872, "grad_norm": 0.7839479083702037, "learning_rate": 7.737287945588016e-06, "loss": 0.3984, "step": 5366 }, { "epoch": 0.3361781424701295, "grad_norm": 0.7310883005015003, "learning_rate": 7.736439006550078e-06, "loss": 0.392, "step": 5367 }, { "epoch": 0.3362407804694718, "grad_norm": 0.7557786072236349, "learning_rate": 7.7355899548798e-06, "loss": 0.3795, "step": 5368 }, { "epoch": 0.3363034184688141, "grad_norm": 0.8895310876558375, "learning_rate": 7.734740790612137e-06, "loss": 0.4288, "step": 5369 }, { "epoch": 0.3363660564681564, "grad_norm": 0.7722326562272243, "learning_rate": 7.73389151378203e-06, "loss": 0.4155, "step": 5370 }, { "epoch": 0.33642869446749873, "grad_norm": 0.7178149559722218, "learning_rate": 7.733042124424444e-06, "loss": 0.4822, "step": 5371 }, { "epoch": 0.336491332466841, "grad_norm": 0.7726871430457412, "learning_rate": 7.732192622574334e-06, "loss": 0.4196, "step": 5372 }, { "epoch": 0.3365539704661833, "grad_norm": 0.8835476673888274, "learning_rate": 7.731343008266672e-06, "loss": 0.4339, "step": 5373 }, { "epoch": 0.3366166084655256, "grad_norm": 0.7721786957780604, "learning_rate": 7.730493281536422e-06, "loss": 0.4383, "step": 5374 }, { "epoch": 0.3366792464648679, "grad_norm": 0.8351599877347983, "learning_rate": 7.729643442418563e-06, "loss": 0.4104, "step": 5375 }, { "epoch": 0.3367418844642102, "grad_norm": 0.7936414691642906, "learning_rate": 7.728793490948074e-06, "loss": 0.4062, "step": 5376 }, { "epoch": 0.3368045224635525, "grad_norm": 0.6347564420599564, "learning_rate": 7.727943427159937e-06, "loss": 0.4488, "step": 5377 }, { "epoch": 0.33686716046289483, "grad_norm": 0.8101896293222082, "learning_rate": 7.727093251089143e-06, "loss": 0.4191, "step": 5378 }, { "epoch": 0.33692979846223714, "grad_norm": 0.8137598780747437, "learning_rate": 7.726242962770684e-06, "loss": 0.401, "step": 5379 }, { "epoch": 0.3369924364615794, "grad_norm": 0.8627567676550569, "learning_rate": 7.72539256223956e-06, "loss": 0.4271, "step": 5380 }, { "epoch": 0.3370550744609217, "grad_norm": 0.7277141961963569, "learning_rate": 7.72454204953077e-06, "loss": 0.3989, "step": 5381 }, { "epoch": 0.337117712460264, "grad_norm": 0.8962053192182641, "learning_rate": 7.723691424679325e-06, "loss": 0.4263, "step": 5382 }, { "epoch": 0.3371803504596063, "grad_norm": 0.7953172417322293, "learning_rate": 7.722840687720234e-06, "loss": 0.44, "step": 5383 }, { "epoch": 0.3372429884589486, "grad_norm": 0.8559091772292255, "learning_rate": 7.721989838688517e-06, "loss": 0.398, "step": 5384 }, { "epoch": 0.33730562645829093, "grad_norm": 0.80560444089184, "learning_rate": 7.721138877619192e-06, "loss": 0.4378, "step": 5385 }, { "epoch": 0.33736826445763324, "grad_norm": 0.8509758370680516, "learning_rate": 7.720287804547286e-06, "loss": 0.4252, "step": 5386 }, { "epoch": 0.33743090245697555, "grad_norm": 0.8031309697222552, "learning_rate": 7.71943661950783e-06, "loss": 0.4325, "step": 5387 }, { "epoch": 0.3374935404563178, "grad_norm": 0.8071405914211807, "learning_rate": 7.718585322535857e-06, "loss": 0.3971, "step": 5388 }, { "epoch": 0.3375561784556601, "grad_norm": 0.8274765846928236, "learning_rate": 7.717733913666406e-06, "loss": 0.4197, "step": 5389 }, { "epoch": 0.3376188164550024, "grad_norm": 0.8473581624347716, "learning_rate": 7.716882392934523e-06, "loss": 0.4348, "step": 5390 }, { "epoch": 0.3376814544543447, "grad_norm": 0.866765378536873, "learning_rate": 7.716030760375259e-06, "loss": 0.4377, "step": 5391 }, { "epoch": 0.33774409245368703, "grad_norm": 0.7396856945315529, "learning_rate": 7.715179016023664e-06, "loss": 0.4831, "step": 5392 }, { "epoch": 0.33780673045302934, "grad_norm": 0.7530163294256795, "learning_rate": 7.714327159914796e-06, "loss": 0.3678, "step": 5393 }, { "epoch": 0.33786936845237164, "grad_norm": 0.8093598563265546, "learning_rate": 7.713475192083716e-06, "loss": 0.3971, "step": 5394 }, { "epoch": 0.33793200645171395, "grad_norm": 0.8664218204340495, "learning_rate": 7.712623112565497e-06, "loss": 0.4229, "step": 5395 }, { "epoch": 0.33799464445105626, "grad_norm": 0.8849959730857683, "learning_rate": 7.711770921395207e-06, "loss": 0.4299, "step": 5396 }, { "epoch": 0.3380572824503985, "grad_norm": 0.7701481615794841, "learning_rate": 7.710918618607923e-06, "loss": 0.3774, "step": 5397 }, { "epoch": 0.3381199204497408, "grad_norm": 0.8253163100283495, "learning_rate": 7.710066204238722e-06, "loss": 0.4171, "step": 5398 }, { "epoch": 0.3381825584490831, "grad_norm": 0.6296538860665624, "learning_rate": 7.709213678322696e-06, "loss": 0.4837, "step": 5399 }, { "epoch": 0.33824519644842543, "grad_norm": 0.8881630262771957, "learning_rate": 7.70836104089493e-06, "loss": 0.4115, "step": 5400 }, { "epoch": 0.33830783444776774, "grad_norm": 0.8368362996902028, "learning_rate": 7.707508291990523e-06, "loss": 0.4092, "step": 5401 }, { "epoch": 0.33837047244711005, "grad_norm": 0.8110209648497023, "learning_rate": 7.706655431644572e-06, "loss": 0.4492, "step": 5402 }, { "epoch": 0.33843311044645236, "grad_norm": 0.7931749371267746, "learning_rate": 7.70580245989218e-06, "loss": 0.408, "step": 5403 }, { "epoch": 0.33849574844579466, "grad_norm": 0.7510733242373532, "learning_rate": 7.704949376768456e-06, "loss": 0.4143, "step": 5404 }, { "epoch": 0.3385583864451369, "grad_norm": 0.8361386694068532, "learning_rate": 7.704096182308516e-06, "loss": 0.4078, "step": 5405 }, { "epoch": 0.3386210244444792, "grad_norm": 0.704240115610989, "learning_rate": 7.703242876547472e-06, "loss": 0.4738, "step": 5406 }, { "epoch": 0.33868366244382153, "grad_norm": 0.8864449156342458, "learning_rate": 7.702389459520452e-06, "loss": 0.4431, "step": 5407 }, { "epoch": 0.33874630044316384, "grad_norm": 0.8734114142868725, "learning_rate": 7.70153593126258e-06, "loss": 0.4199, "step": 5408 }, { "epoch": 0.33880893844250615, "grad_norm": 0.6741652651562087, "learning_rate": 7.700682291808986e-06, "loss": 0.466, "step": 5409 }, { "epoch": 0.33887157644184845, "grad_norm": 0.8064459421097978, "learning_rate": 7.699828541194809e-06, "loss": 0.3904, "step": 5410 }, { "epoch": 0.33893421444119076, "grad_norm": 0.8033575621292249, "learning_rate": 7.698974679455188e-06, "loss": 0.3993, "step": 5411 }, { "epoch": 0.33899685244053307, "grad_norm": 0.856593553559791, "learning_rate": 7.698120706625267e-06, "loss": 0.4253, "step": 5412 }, { "epoch": 0.3390594904398754, "grad_norm": 0.8513432646374332, "learning_rate": 7.697266622740199e-06, "loss": 0.415, "step": 5413 }, { "epoch": 0.33912212843921763, "grad_norm": 0.7541088790086903, "learning_rate": 7.696412427835133e-06, "loss": 0.3906, "step": 5414 }, { "epoch": 0.33918476643855994, "grad_norm": 0.8699719283201349, "learning_rate": 7.695558121945234e-06, "loss": 0.4218, "step": 5415 }, { "epoch": 0.33924740443790224, "grad_norm": 0.8054283491627323, "learning_rate": 7.694703705105661e-06, "loss": 0.4202, "step": 5416 }, { "epoch": 0.33931004243724455, "grad_norm": 0.8427659711672542, "learning_rate": 7.693849177351584e-06, "loss": 0.4572, "step": 5417 }, { "epoch": 0.33937268043658686, "grad_norm": 0.9074793068072399, "learning_rate": 7.692994538718175e-06, "loss": 0.4323, "step": 5418 }, { "epoch": 0.33943531843592917, "grad_norm": 0.7980270586412737, "learning_rate": 7.692139789240611e-06, "loss": 0.4056, "step": 5419 }, { "epoch": 0.3394979564352715, "grad_norm": 0.7642496170362, "learning_rate": 7.691284928954074e-06, "loss": 0.4188, "step": 5420 }, { "epoch": 0.3395605944346138, "grad_norm": 0.7274164848148186, "learning_rate": 7.690429957893749e-06, "loss": 0.3303, "step": 5421 }, { "epoch": 0.33962323243395603, "grad_norm": 0.8278760259188338, "learning_rate": 7.689574876094828e-06, "loss": 0.4175, "step": 5422 }, { "epoch": 0.33968587043329834, "grad_norm": 0.817262839122317, "learning_rate": 7.688719683592506e-06, "loss": 0.4198, "step": 5423 }, { "epoch": 0.33974850843264065, "grad_norm": 0.8606501412547473, "learning_rate": 7.687864380421981e-06, "loss": 0.3911, "step": 5424 }, { "epoch": 0.33981114643198296, "grad_norm": 0.8032556799355705, "learning_rate": 7.687008966618461e-06, "loss": 0.3852, "step": 5425 }, { "epoch": 0.33987378443132527, "grad_norm": 0.8501740259165823, "learning_rate": 7.686153442217153e-06, "loss": 0.4372, "step": 5426 }, { "epoch": 0.3399364224306676, "grad_norm": 0.7837413619232704, "learning_rate": 7.685297807253269e-06, "loss": 0.4112, "step": 5427 }, { "epoch": 0.3399990604300099, "grad_norm": 0.8368342940126086, "learning_rate": 7.68444206176203e-06, "loss": 0.3876, "step": 5428 }, { "epoch": 0.3400616984293522, "grad_norm": 0.888831560856548, "learning_rate": 7.683586205778657e-06, "loss": 0.446, "step": 5429 }, { "epoch": 0.34012433642869444, "grad_norm": 0.8278191884546064, "learning_rate": 7.682730239338379e-06, "loss": 0.4515, "step": 5430 }, { "epoch": 0.34018697442803675, "grad_norm": 0.8134627629120762, "learning_rate": 7.681874162476426e-06, "loss": 0.3939, "step": 5431 }, { "epoch": 0.34024961242737906, "grad_norm": 0.8858747371000765, "learning_rate": 7.681017975228034e-06, "loss": 0.3922, "step": 5432 }, { "epoch": 0.34031225042672136, "grad_norm": 0.8122411039539352, "learning_rate": 7.680161677628446e-06, "loss": 0.4149, "step": 5433 }, { "epoch": 0.34037488842606367, "grad_norm": 0.7932695164963309, "learning_rate": 7.679305269712905e-06, "loss": 0.4349, "step": 5434 }, { "epoch": 0.340437526425406, "grad_norm": 0.8717485829481567, "learning_rate": 7.67844875151666e-06, "loss": 0.3959, "step": 5435 }, { "epoch": 0.3405001644247483, "grad_norm": 0.9437319489876956, "learning_rate": 7.677592123074969e-06, "loss": 0.4356, "step": 5436 }, { "epoch": 0.3405628024240906, "grad_norm": 0.8455462713178707, "learning_rate": 7.67673538442309e-06, "loss": 0.4246, "step": 5437 }, { "epoch": 0.3406254404234329, "grad_norm": 0.7489378434680304, "learning_rate": 7.675878535596287e-06, "loss": 0.4359, "step": 5438 }, { "epoch": 0.34068807842277515, "grad_norm": 0.8183158579017404, "learning_rate": 7.675021576629823e-06, "loss": 0.426, "step": 5439 }, { "epoch": 0.34075071642211746, "grad_norm": 0.6704290690162501, "learning_rate": 7.674164507558976e-06, "loss": 0.4696, "step": 5440 }, { "epoch": 0.34081335442145977, "grad_norm": 0.8754645600382787, "learning_rate": 7.673307328419021e-06, "loss": 0.4373, "step": 5441 }, { "epoch": 0.3408759924208021, "grad_norm": 0.8766035617980605, "learning_rate": 7.672450039245242e-06, "loss": 0.4073, "step": 5442 }, { "epoch": 0.3409386304201444, "grad_norm": 0.8289728860485438, "learning_rate": 7.671592640072923e-06, "loss": 0.4359, "step": 5443 }, { "epoch": 0.3410012684194867, "grad_norm": 0.8507956644419364, "learning_rate": 7.670735130937353e-06, "loss": 0.3969, "step": 5444 }, { "epoch": 0.341063906418829, "grad_norm": 0.8307449293419378, "learning_rate": 7.66987751187383e-06, "loss": 0.4493, "step": 5445 }, { "epoch": 0.3411265444181713, "grad_norm": 0.7920721300451884, "learning_rate": 7.669019782917653e-06, "loss": 0.4179, "step": 5446 }, { "epoch": 0.34118918241751356, "grad_norm": 0.8817467291396222, "learning_rate": 7.668161944104128e-06, "loss": 0.4613, "step": 5447 }, { "epoch": 0.34125182041685587, "grad_norm": 0.7999269089128082, "learning_rate": 7.66730399546856e-06, "loss": 0.4002, "step": 5448 }, { "epoch": 0.3413144584161982, "grad_norm": 0.877492997403083, "learning_rate": 7.666445937046266e-06, "loss": 0.4302, "step": 5449 }, { "epoch": 0.3413770964155405, "grad_norm": 0.7830490076781468, "learning_rate": 7.665587768872563e-06, "loss": 0.3752, "step": 5450 }, { "epoch": 0.3414397344148828, "grad_norm": 0.8360849064660894, "learning_rate": 7.664729490982772e-06, "loss": 0.3696, "step": 5451 }, { "epoch": 0.3415023724142251, "grad_norm": 0.8296759111212884, "learning_rate": 7.66387110341222e-06, "loss": 0.3901, "step": 5452 }, { "epoch": 0.3415650104135674, "grad_norm": 0.8250101567956314, "learning_rate": 7.663012606196238e-06, "loss": 0.3882, "step": 5453 }, { "epoch": 0.3416276484129097, "grad_norm": 0.8391093624110277, "learning_rate": 7.662153999370163e-06, "loss": 0.3828, "step": 5454 }, { "epoch": 0.341690286412252, "grad_norm": 0.7554355232828602, "learning_rate": 7.661295282969337e-06, "loss": 0.3696, "step": 5455 }, { "epoch": 0.34175292441159427, "grad_norm": 0.7504994101078319, "learning_rate": 7.660436457029104e-06, "loss": 0.4056, "step": 5456 }, { "epoch": 0.3418155624109366, "grad_norm": 0.8432218153482814, "learning_rate": 7.659577521584809e-06, "loss": 0.4044, "step": 5457 }, { "epoch": 0.3418782004102789, "grad_norm": 0.741342001393949, "learning_rate": 7.658718476671812e-06, "loss": 0.37, "step": 5458 }, { "epoch": 0.3419408384096212, "grad_norm": 0.8164167662648025, "learning_rate": 7.657859322325469e-06, "loss": 0.4229, "step": 5459 }, { "epoch": 0.3420034764089635, "grad_norm": 0.810661074778895, "learning_rate": 7.657000058581142e-06, "loss": 0.357, "step": 5460 }, { "epoch": 0.3420661144083058, "grad_norm": 0.7905288263073328, "learning_rate": 7.6561406854742e-06, "loss": 0.4325, "step": 5461 }, { "epoch": 0.3421287524076481, "grad_norm": 0.8879638993479201, "learning_rate": 7.655281203040014e-06, "loss": 0.441, "step": 5462 }, { "epoch": 0.3421913904069904, "grad_norm": 0.8779210262969016, "learning_rate": 7.654421611313961e-06, "loss": 0.4251, "step": 5463 }, { "epoch": 0.3422540284063327, "grad_norm": 0.7372878745437778, "learning_rate": 7.653561910331422e-06, "loss": 0.3955, "step": 5464 }, { "epoch": 0.342316666405675, "grad_norm": 0.8486530425873063, "learning_rate": 7.652702100127783e-06, "loss": 0.4734, "step": 5465 }, { "epoch": 0.3423793044050173, "grad_norm": 0.7433722755031043, "learning_rate": 7.651842180738432e-06, "loss": 0.4787, "step": 5466 }, { "epoch": 0.3424419424043596, "grad_norm": 0.7934739196559898, "learning_rate": 7.650982152198765e-06, "loss": 0.3961, "step": 5467 }, { "epoch": 0.3425045804037019, "grad_norm": 0.6529942974274798, "learning_rate": 7.65012201454418e-06, "loss": 0.4826, "step": 5468 }, { "epoch": 0.3425672184030442, "grad_norm": 0.7889865969837013, "learning_rate": 7.649261767810082e-06, "loss": 0.394, "step": 5469 }, { "epoch": 0.3426298564023865, "grad_norm": 0.8521095070186899, "learning_rate": 7.648401412031878e-06, "loss": 0.4318, "step": 5470 }, { "epoch": 0.34269249440172883, "grad_norm": 0.7814160347159455, "learning_rate": 7.64754094724498e-06, "loss": 0.3888, "step": 5471 }, { "epoch": 0.3427551324010711, "grad_norm": 0.7709433633248635, "learning_rate": 7.646680373484805e-06, "loss": 0.3777, "step": 5472 }, { "epoch": 0.3428177704004134, "grad_norm": 0.8181645822122546, "learning_rate": 7.645819690786776e-06, "loss": 0.4002, "step": 5473 }, { "epoch": 0.3428804083997557, "grad_norm": 0.8829273762355285, "learning_rate": 7.644958899186315e-06, "loss": 0.4109, "step": 5474 }, { "epoch": 0.342943046399098, "grad_norm": 0.8087624966146467, "learning_rate": 7.644097998718856e-06, "loss": 0.3688, "step": 5475 }, { "epoch": 0.3430056843984403, "grad_norm": 0.8595896729376733, "learning_rate": 7.643236989419831e-06, "loss": 0.3825, "step": 5476 }, { "epoch": 0.3430683223977826, "grad_norm": 0.8238663874645211, "learning_rate": 7.642375871324683e-06, "loss": 0.434, "step": 5477 }, { "epoch": 0.34313096039712493, "grad_norm": 0.8534709575425896, "learning_rate": 7.641514644468853e-06, "loss": 0.4189, "step": 5478 }, { "epoch": 0.34319359839646724, "grad_norm": 0.8338664564570073, "learning_rate": 7.64065330888779e-06, "loss": 0.3679, "step": 5479 }, { "epoch": 0.34325623639580954, "grad_norm": 0.8636972684060491, "learning_rate": 7.639791864616948e-06, "loss": 0.4296, "step": 5480 }, { "epoch": 0.3433188743951518, "grad_norm": 0.8642460887203769, "learning_rate": 7.63893031169178e-06, "loss": 0.442, "step": 5481 }, { "epoch": 0.3433815123944941, "grad_norm": 0.9313440476611863, "learning_rate": 7.638068650147753e-06, "loss": 0.4355, "step": 5482 }, { "epoch": 0.3434441503938364, "grad_norm": 0.7500768060306824, "learning_rate": 7.637206880020329e-06, "loss": 0.4802, "step": 5483 }, { "epoch": 0.3435067883931787, "grad_norm": 0.7346250244470804, "learning_rate": 7.636345001344983e-06, "loss": 0.4839, "step": 5484 }, { "epoch": 0.343569426392521, "grad_norm": 0.832091825352677, "learning_rate": 7.635483014157183e-06, "loss": 0.3802, "step": 5485 }, { "epoch": 0.34363206439186333, "grad_norm": 0.8237562289061159, "learning_rate": 7.634620918492417e-06, "loss": 0.395, "step": 5486 }, { "epoch": 0.34369470239120564, "grad_norm": 0.9132640293540571, "learning_rate": 7.633758714386163e-06, "loss": 0.4379, "step": 5487 }, { "epoch": 0.34375734039054795, "grad_norm": 0.8427968935415014, "learning_rate": 7.632896401873913e-06, "loss": 0.399, "step": 5488 }, { "epoch": 0.3438199783898902, "grad_norm": 0.8849637883127536, "learning_rate": 7.632033980991156e-06, "loss": 0.3929, "step": 5489 }, { "epoch": 0.3438826163892325, "grad_norm": 0.836918971346629, "learning_rate": 7.631171451773394e-06, "loss": 0.43, "step": 5490 }, { "epoch": 0.3439452543885748, "grad_norm": 0.8059752468926502, "learning_rate": 7.630308814256124e-06, "loss": 0.4176, "step": 5491 }, { "epoch": 0.3440078923879171, "grad_norm": 0.8060587833367376, "learning_rate": 7.629446068474858e-06, "loss": 0.4018, "step": 5492 }, { "epoch": 0.34407053038725943, "grad_norm": 0.7875433574251917, "learning_rate": 7.628583214465104e-06, "loss": 0.426, "step": 5493 }, { "epoch": 0.34413316838660174, "grad_norm": 0.8424979401897935, "learning_rate": 7.627720252262376e-06, "loss": 0.4303, "step": 5494 }, { "epoch": 0.34419580638594405, "grad_norm": 0.8572692889835675, "learning_rate": 7.6268571819021945e-06, "loss": 0.4377, "step": 5495 }, { "epoch": 0.34425844438528636, "grad_norm": 0.7774409467404066, "learning_rate": 7.625994003420082e-06, "loss": 0.472, "step": 5496 }, { "epoch": 0.3443210823846286, "grad_norm": 0.8159318500200293, "learning_rate": 7.625130716851571e-06, "loss": 0.4089, "step": 5497 }, { "epoch": 0.3443837203839709, "grad_norm": 0.8218675176111843, "learning_rate": 7.624267322232193e-06, "loss": 0.4261, "step": 5498 }, { "epoch": 0.3444463583833132, "grad_norm": 0.7964246981519245, "learning_rate": 7.623403819597484e-06, "loss": 0.4234, "step": 5499 }, { "epoch": 0.34450899638265553, "grad_norm": 0.8335868230029775, "learning_rate": 7.622540208982985e-06, "loss": 0.4315, "step": 5500 }, { "epoch": 0.34457163438199784, "grad_norm": 0.7487537720787436, "learning_rate": 7.621676490424247e-06, "loss": 0.4299, "step": 5501 }, { "epoch": 0.34463427238134015, "grad_norm": 0.8487150716253518, "learning_rate": 7.620812663956818e-06, "loss": 0.4021, "step": 5502 }, { "epoch": 0.34469691038068245, "grad_norm": 0.8126540608163572, "learning_rate": 7.619948729616251e-06, "loss": 0.4117, "step": 5503 }, { "epoch": 0.34475954838002476, "grad_norm": 0.8074328919256535, "learning_rate": 7.61908468743811e-06, "loss": 0.4185, "step": 5504 }, { "epoch": 0.34482218637936707, "grad_norm": 0.7883477836178467, "learning_rate": 7.6182205374579564e-06, "loss": 0.4328, "step": 5505 }, { "epoch": 0.3448848243787093, "grad_norm": 0.7637949209650231, "learning_rate": 7.61735627971136e-06, "loss": 0.4759, "step": 5506 }, { "epoch": 0.34494746237805163, "grad_norm": 0.7967077855185555, "learning_rate": 7.616491914233894e-06, "loss": 0.4106, "step": 5507 }, { "epoch": 0.34501010037739394, "grad_norm": 0.8509370565585398, "learning_rate": 7.615627441061133e-06, "loss": 0.3912, "step": 5508 }, { "epoch": 0.34507273837673624, "grad_norm": 0.8615819670423556, "learning_rate": 7.6147628602286615e-06, "loss": 0.439, "step": 5509 }, { "epoch": 0.34513537637607855, "grad_norm": 0.9090902146333607, "learning_rate": 7.6138981717720675e-06, "loss": 0.4214, "step": 5510 }, { "epoch": 0.34519801437542086, "grad_norm": 0.8671818291851687, "learning_rate": 7.613033375726937e-06, "loss": 0.438, "step": 5511 }, { "epoch": 0.34526065237476317, "grad_norm": 0.8717363724689163, "learning_rate": 7.612168472128869e-06, "loss": 0.4582, "step": 5512 }, { "epoch": 0.3453232903741055, "grad_norm": 0.7773691544372587, "learning_rate": 7.611303461013462e-06, "loss": 0.378, "step": 5513 }, { "epoch": 0.3453859283734477, "grad_norm": 0.8129926999202555, "learning_rate": 7.61043834241632e-06, "loss": 0.4438, "step": 5514 }, { "epoch": 0.34544856637279003, "grad_norm": 0.8546393835651444, "learning_rate": 7.6095731163730505e-06, "loss": 0.4098, "step": 5515 }, { "epoch": 0.34551120437213234, "grad_norm": 0.8753131067019376, "learning_rate": 7.608707782919267e-06, "loss": 0.4161, "step": 5516 }, { "epoch": 0.34557384237147465, "grad_norm": 0.7659399975431069, "learning_rate": 7.6078423420905875e-06, "loss": 0.394, "step": 5517 }, { "epoch": 0.34563648037081696, "grad_norm": 0.8661957423644814, "learning_rate": 7.606976793922633e-06, "loss": 0.4372, "step": 5518 }, { "epoch": 0.34569911837015926, "grad_norm": 0.7526656768656645, "learning_rate": 7.606111138451031e-06, "loss": 0.3603, "step": 5519 }, { "epoch": 0.34576175636950157, "grad_norm": 0.7952322095822639, "learning_rate": 7.60524537571141e-06, "loss": 0.3883, "step": 5520 }, { "epoch": 0.3458243943688439, "grad_norm": 1.1050798752956899, "learning_rate": 7.604379505739406e-06, "loss": 0.4626, "step": 5521 }, { "epoch": 0.3458870323681862, "grad_norm": 0.8322182895846107, "learning_rate": 7.603513528570656e-06, "loss": 0.4376, "step": 5522 }, { "epoch": 0.34594967036752844, "grad_norm": 0.8031252205047525, "learning_rate": 7.6026474442408085e-06, "loss": 0.3639, "step": 5523 }, { "epoch": 0.34601230836687075, "grad_norm": 0.8177811524143744, "learning_rate": 7.6017812527855095e-06, "loss": 0.4026, "step": 5524 }, { "epoch": 0.34607494636621305, "grad_norm": 0.7481666920352648, "learning_rate": 7.6009149542404085e-06, "loss": 0.3613, "step": 5525 }, { "epoch": 0.34613758436555536, "grad_norm": 0.7952339091045558, "learning_rate": 7.600048548641168e-06, "loss": 0.3979, "step": 5526 }, { "epoch": 0.34620022236489767, "grad_norm": 0.7850935903715607, "learning_rate": 7.599182036023445e-06, "loss": 0.5111, "step": 5527 }, { "epoch": 0.34626286036424, "grad_norm": 0.8435654078486758, "learning_rate": 7.598315416422908e-06, "loss": 0.4122, "step": 5528 }, { "epoch": 0.3463254983635823, "grad_norm": 0.862163617978835, "learning_rate": 7.597448689875224e-06, "loss": 0.4376, "step": 5529 }, { "epoch": 0.3463881363629246, "grad_norm": 0.6971891570090111, "learning_rate": 7.596581856416072e-06, "loss": 0.4793, "step": 5530 }, { "epoch": 0.34645077436226684, "grad_norm": 0.8395506449904372, "learning_rate": 7.595714916081127e-06, "loss": 0.4261, "step": 5531 }, { "epoch": 0.34651341236160915, "grad_norm": 0.9071732292041191, "learning_rate": 7.594847868906076e-06, "loss": 0.4643, "step": 5532 }, { "epoch": 0.34657605036095146, "grad_norm": 0.8492994793191098, "learning_rate": 7.5939807149266055e-06, "loss": 0.4739, "step": 5533 }, { "epoch": 0.34663868836029377, "grad_norm": 0.811555054853143, "learning_rate": 7.5931134541784056e-06, "loss": 0.4019, "step": 5534 }, { "epoch": 0.3467013263596361, "grad_norm": 0.9050161587848232, "learning_rate": 7.5922460866971755e-06, "loss": 0.401, "step": 5535 }, { "epoch": 0.3467639643589784, "grad_norm": 0.8062735018562666, "learning_rate": 7.591378612518615e-06, "loss": 0.3738, "step": 5536 }, { "epoch": 0.3468266023583207, "grad_norm": 0.7984969672813276, "learning_rate": 7.590511031678431e-06, "loss": 0.4118, "step": 5537 }, { "epoch": 0.346889240357663, "grad_norm": 0.7739638528685552, "learning_rate": 7.58964334421233e-06, "loss": 0.4317, "step": 5538 }, { "epoch": 0.34695187835700525, "grad_norm": 0.903617359463641, "learning_rate": 7.58877555015603e-06, "loss": 0.4538, "step": 5539 }, { "epoch": 0.34701451635634756, "grad_norm": 0.8502971114961537, "learning_rate": 7.587907649545248e-06, "loss": 0.4589, "step": 5540 }, { "epoch": 0.34707715435568987, "grad_norm": 0.8437101547561329, "learning_rate": 7.587039642415706e-06, "loss": 0.4156, "step": 5541 }, { "epoch": 0.3471397923550322, "grad_norm": 0.8559737713855234, "learning_rate": 7.586171528803132e-06, "loss": 0.3998, "step": 5542 }, { "epoch": 0.3472024303543745, "grad_norm": 0.7502583505525121, "learning_rate": 7.585303308743257e-06, "loss": 0.3811, "step": 5543 }, { "epoch": 0.3472650683537168, "grad_norm": 0.8418066727286625, "learning_rate": 7.58443498227182e-06, "loss": 0.4848, "step": 5544 }, { "epoch": 0.3473277063530591, "grad_norm": 0.8288971487446554, "learning_rate": 7.583566549424558e-06, "loss": 0.412, "step": 5545 }, { "epoch": 0.3473903443524014, "grad_norm": 0.8264610786064398, "learning_rate": 7.582698010237218e-06, "loss": 0.3719, "step": 5546 }, { "epoch": 0.3474529823517437, "grad_norm": 0.8040868304917782, "learning_rate": 7.581829364745548e-06, "loss": 0.4261, "step": 5547 }, { "epoch": 0.34751562035108596, "grad_norm": 0.802610396326057, "learning_rate": 7.580960612985303e-06, "loss": 0.3734, "step": 5548 }, { "epoch": 0.34757825835042827, "grad_norm": 0.8226891108978885, "learning_rate": 7.580091754992241e-06, "loss": 0.4154, "step": 5549 }, { "epoch": 0.3476408963497706, "grad_norm": 0.857761133500959, "learning_rate": 7.579222790802121e-06, "loss": 0.4103, "step": 5550 }, { "epoch": 0.3477035343491129, "grad_norm": 0.8586351840568583, "learning_rate": 7.578353720450713e-06, "loss": 0.3948, "step": 5551 }, { "epoch": 0.3477661723484552, "grad_norm": 0.812796863684969, "learning_rate": 7.5774845439737874e-06, "loss": 0.41, "step": 5552 }, { "epoch": 0.3478288103477975, "grad_norm": 0.8282526589701269, "learning_rate": 7.576615261407121e-06, "loss": 0.4355, "step": 5553 }, { "epoch": 0.3478914483471398, "grad_norm": 0.8279330877254666, "learning_rate": 7.575745872786489e-06, "loss": 0.3905, "step": 5554 }, { "epoch": 0.3479540863464821, "grad_norm": 0.8118292702858652, "learning_rate": 7.574876378147681e-06, "loss": 0.4299, "step": 5555 }, { "epoch": 0.34801672434582437, "grad_norm": 0.777081593538002, "learning_rate": 7.574006777526484e-06, "loss": 0.356, "step": 5556 }, { "epoch": 0.3480793623451667, "grad_norm": 0.8536432205532797, "learning_rate": 7.573137070958689e-06, "loss": 0.4373, "step": 5557 }, { "epoch": 0.348142000344509, "grad_norm": 0.6613734257030194, "learning_rate": 7.572267258480095e-06, "loss": 0.4559, "step": 5558 }, { "epoch": 0.3482046383438513, "grad_norm": 0.7931299232524233, "learning_rate": 7.571397340126503e-06, "loss": 0.4244, "step": 5559 }, { "epoch": 0.3482672763431936, "grad_norm": 0.842028987366599, "learning_rate": 7.570527315933719e-06, "loss": 0.4362, "step": 5560 }, { "epoch": 0.3483299143425359, "grad_norm": 0.8409704659980595, "learning_rate": 7.569657185937553e-06, "loss": 0.4025, "step": 5561 }, { "epoch": 0.3483925523418782, "grad_norm": 0.7112862795221685, "learning_rate": 7.568786950173823e-06, "loss": 0.5065, "step": 5562 }, { "epoch": 0.3484551903412205, "grad_norm": 0.8445363810804172, "learning_rate": 7.567916608678343e-06, "loss": 0.435, "step": 5563 }, { "epoch": 0.34851782834056283, "grad_norm": 0.7999924819754477, "learning_rate": 7.567046161486939e-06, "loss": 0.4154, "step": 5564 }, { "epoch": 0.3485804663399051, "grad_norm": 0.8940172183681098, "learning_rate": 7.566175608635439e-06, "loss": 0.392, "step": 5565 }, { "epoch": 0.3486431043392474, "grad_norm": 0.7921573453373009, "learning_rate": 7.565304950159676e-06, "loss": 0.4001, "step": 5566 }, { "epoch": 0.3487057423385897, "grad_norm": 0.7786946995227614, "learning_rate": 7.564434186095483e-06, "loss": 0.437, "step": 5567 }, { "epoch": 0.348768380337932, "grad_norm": 0.7693615224349294, "learning_rate": 7.563563316478702e-06, "loss": 0.4071, "step": 5568 }, { "epoch": 0.3488310183372743, "grad_norm": 0.78837305582, "learning_rate": 7.562692341345182e-06, "loss": 0.3798, "step": 5569 }, { "epoch": 0.3488936563366166, "grad_norm": 0.7679854906393965, "learning_rate": 7.561821260730767e-06, "loss": 0.3902, "step": 5570 }, { "epoch": 0.3489562943359589, "grad_norm": 0.8621496717285919, "learning_rate": 7.560950074671315e-06, "loss": 0.3949, "step": 5571 }, { "epoch": 0.34901893233530124, "grad_norm": 0.8948271664712341, "learning_rate": 7.560078783202682e-06, "loss": 0.4361, "step": 5572 }, { "epoch": 0.3490815703346435, "grad_norm": 0.8453287770033234, "learning_rate": 7.559207386360731e-06, "loss": 0.4173, "step": 5573 }, { "epoch": 0.3491442083339858, "grad_norm": 0.8029652339472014, "learning_rate": 7.558335884181329e-06, "loss": 0.4101, "step": 5574 }, { "epoch": 0.3492068463333281, "grad_norm": 0.8140901847156597, "learning_rate": 7.5574642767003485e-06, "loss": 0.4196, "step": 5575 }, { "epoch": 0.3492694843326704, "grad_norm": 0.8240548321595843, "learning_rate": 7.556592563953662e-06, "loss": 0.3771, "step": 5576 }, { "epoch": 0.3493321223320127, "grad_norm": 0.811731753829463, "learning_rate": 7.555720745977151e-06, "loss": 0.4363, "step": 5577 }, { "epoch": 0.349394760331355, "grad_norm": 0.8324323434713622, "learning_rate": 7.554848822806699e-06, "loss": 0.4215, "step": 5578 }, { "epoch": 0.34945739833069733, "grad_norm": 0.7844045750978696, "learning_rate": 7.553976794478199e-06, "loss": 0.3931, "step": 5579 }, { "epoch": 0.34952003633003964, "grad_norm": 0.7876374460826636, "learning_rate": 7.5531046610275375e-06, "loss": 0.3676, "step": 5580 }, { "epoch": 0.3495826743293819, "grad_norm": 0.8462052002736564, "learning_rate": 7.552232422490613e-06, "loss": 0.3872, "step": 5581 }, { "epoch": 0.3496453123287242, "grad_norm": 0.7911929997198875, "learning_rate": 7.551360078903329e-06, "loss": 0.4269, "step": 5582 }, { "epoch": 0.3497079503280665, "grad_norm": 0.8394500995462816, "learning_rate": 7.550487630301593e-06, "loss": 0.44, "step": 5583 }, { "epoch": 0.3497705883274088, "grad_norm": 0.8030892286091592, "learning_rate": 7.549615076721311e-06, "loss": 0.3973, "step": 5584 }, { "epoch": 0.3498332263267511, "grad_norm": 0.8303077812694794, "learning_rate": 7.548742418198398e-06, "loss": 0.4482, "step": 5585 }, { "epoch": 0.34989586432609343, "grad_norm": 0.8710020386102354, "learning_rate": 7.547869654768776e-06, "loss": 0.4452, "step": 5586 }, { "epoch": 0.34995850232543574, "grad_norm": 0.8353248065898808, "learning_rate": 7.546996786468366e-06, "loss": 0.4041, "step": 5587 }, { "epoch": 0.35002114032477805, "grad_norm": 0.8811168581531634, "learning_rate": 7.546123813333095e-06, "loss": 0.4477, "step": 5588 }, { "epoch": 0.35008377832412035, "grad_norm": 0.6645374909981564, "learning_rate": 7.545250735398895e-06, "loss": 0.4866, "step": 5589 }, { "epoch": 0.3501464163234626, "grad_norm": 0.8385627043196684, "learning_rate": 7.5443775527017024e-06, "loss": 0.4207, "step": 5590 }, { "epoch": 0.3502090543228049, "grad_norm": 0.8773141947492662, "learning_rate": 7.543504265277457e-06, "loss": 0.4247, "step": 5591 }, { "epoch": 0.3502716923221472, "grad_norm": 0.8407072946225689, "learning_rate": 7.542630873162104e-06, "loss": 0.3585, "step": 5592 }, { "epoch": 0.35033433032148953, "grad_norm": 0.8263017230096831, "learning_rate": 7.541757376391593e-06, "loss": 0.3903, "step": 5593 }, { "epoch": 0.35039696832083184, "grad_norm": 0.7718370536570259, "learning_rate": 7.540883775001877e-06, "loss": 0.3896, "step": 5594 }, { "epoch": 0.35045960632017414, "grad_norm": 0.8077262503521131, "learning_rate": 7.540010069028913e-06, "loss": 0.4002, "step": 5595 }, { "epoch": 0.35052224431951645, "grad_norm": 0.8460759356861391, "learning_rate": 7.5391362585086615e-06, "loss": 0.4168, "step": 5596 }, { "epoch": 0.35058488231885876, "grad_norm": 0.849168072376639, "learning_rate": 7.538262343477092e-06, "loss": 0.4326, "step": 5597 }, { "epoch": 0.350647520318201, "grad_norm": 0.86876935518197, "learning_rate": 7.537388323970173e-06, "loss": 0.4163, "step": 5598 }, { "epoch": 0.3507101583175433, "grad_norm": 0.8461947274251624, "learning_rate": 7.53651420002388e-06, "loss": 0.433, "step": 5599 }, { "epoch": 0.3507727963168856, "grad_norm": 0.858015964817803, "learning_rate": 7.535639971674191e-06, "loss": 0.4232, "step": 5600 }, { "epoch": 0.35083543431622793, "grad_norm": 0.8032187892183488, "learning_rate": 7.534765638957091e-06, "loss": 0.3728, "step": 5601 }, { "epoch": 0.35089807231557024, "grad_norm": 0.8295230927820374, "learning_rate": 7.533891201908565e-06, "loss": 0.436, "step": 5602 }, { "epoch": 0.35096071031491255, "grad_norm": 0.8479115206381318, "learning_rate": 7.533016660564608e-06, "loss": 0.4191, "step": 5603 }, { "epoch": 0.35102334831425486, "grad_norm": 0.8661556091864963, "learning_rate": 7.532142014961216e-06, "loss": 0.4245, "step": 5604 }, { "epoch": 0.35108598631359716, "grad_norm": 0.8283708295938161, "learning_rate": 7.531267265134388e-06, "loss": 0.4424, "step": 5605 }, { "epoch": 0.3511486243129395, "grad_norm": 0.8568452094749072, "learning_rate": 7.530392411120128e-06, "loss": 0.3774, "step": 5606 }, { "epoch": 0.3512112623122817, "grad_norm": 0.8942284379404295, "learning_rate": 7.529517452954449e-06, "loss": 0.4545, "step": 5607 }, { "epoch": 0.35127390031162403, "grad_norm": 0.7845076006537623, "learning_rate": 7.528642390673361e-06, "loss": 0.4016, "step": 5608 }, { "epoch": 0.35133653831096634, "grad_norm": 0.8749380829608026, "learning_rate": 7.527767224312883e-06, "loss": 0.4327, "step": 5609 }, { "epoch": 0.35139917631030865, "grad_norm": 0.747695654837569, "learning_rate": 7.526891953909036e-06, "loss": 0.3831, "step": 5610 }, { "epoch": 0.35146181430965096, "grad_norm": 0.8817307271344651, "learning_rate": 7.5260165794978476e-06, "loss": 0.4271, "step": 5611 }, { "epoch": 0.35152445230899326, "grad_norm": 0.8136011941471594, "learning_rate": 7.52514110111535e-06, "loss": 0.3894, "step": 5612 }, { "epoch": 0.35158709030833557, "grad_norm": 0.8305791730217351, "learning_rate": 7.5242655187975744e-06, "loss": 0.4021, "step": 5613 }, { "epoch": 0.3516497283076779, "grad_norm": 0.8998248885618982, "learning_rate": 7.523389832580561e-06, "loss": 0.4499, "step": 5614 }, { "epoch": 0.35171236630702013, "grad_norm": 0.8320974372541124, "learning_rate": 7.522514042500353e-06, "loss": 0.4177, "step": 5615 }, { "epoch": 0.35177500430636244, "grad_norm": 0.8405822323805016, "learning_rate": 7.521638148593001e-06, "loss": 0.4271, "step": 5616 }, { "epoch": 0.35183764230570475, "grad_norm": 0.8159909787176922, "learning_rate": 7.520762150894554e-06, "loss": 0.3982, "step": 5617 }, { "epoch": 0.35190028030504705, "grad_norm": 0.8971951339679737, "learning_rate": 7.51988604944107e-06, "loss": 0.433, "step": 5618 }, { "epoch": 0.35196291830438936, "grad_norm": 0.7547752632123363, "learning_rate": 7.519009844268605e-06, "loss": 0.3716, "step": 5619 }, { "epoch": 0.35202555630373167, "grad_norm": 0.7784585799297568, "learning_rate": 7.51813353541323e-06, "loss": 0.4288, "step": 5620 }, { "epoch": 0.352088194303074, "grad_norm": 0.7995662766151819, "learning_rate": 7.517257122911011e-06, "loss": 0.4287, "step": 5621 }, { "epoch": 0.3521508323024163, "grad_norm": 0.8433200671178743, "learning_rate": 7.516380606798022e-06, "loss": 0.4154, "step": 5622 }, { "epoch": 0.35221347030175854, "grad_norm": 0.8559890033354324, "learning_rate": 7.51550398711034e-06, "loss": 0.4474, "step": 5623 }, { "epoch": 0.35227610830110084, "grad_norm": 0.8225210237691848, "learning_rate": 7.514627263884045e-06, "loss": 0.4098, "step": 5624 }, { "epoch": 0.35233874630044315, "grad_norm": 0.873414002293103, "learning_rate": 7.513750437155227e-06, "loss": 0.4913, "step": 5625 }, { "epoch": 0.35240138429978546, "grad_norm": 0.8151223164219492, "learning_rate": 7.5128735069599745e-06, "loss": 0.417, "step": 5626 }, { "epoch": 0.35246402229912777, "grad_norm": 0.8438878799398686, "learning_rate": 7.511996473334382e-06, "loss": 0.4004, "step": 5627 }, { "epoch": 0.3525266602984701, "grad_norm": 0.841482960438345, "learning_rate": 7.511119336314547e-06, "loss": 0.4232, "step": 5628 }, { "epoch": 0.3525892982978124, "grad_norm": 0.8175113425223685, "learning_rate": 7.510242095936575e-06, "loss": 0.425, "step": 5629 }, { "epoch": 0.3526519362971547, "grad_norm": 0.816079634966281, "learning_rate": 7.509364752236574e-06, "loss": 0.3859, "step": 5630 }, { "epoch": 0.352714574296497, "grad_norm": 0.7798798870629529, "learning_rate": 7.5084873052506525e-06, "loss": 0.4059, "step": 5631 }, { "epoch": 0.35277721229583925, "grad_norm": 0.8116179168423437, "learning_rate": 7.507609755014927e-06, "loss": 0.4154, "step": 5632 }, { "epoch": 0.35283985029518156, "grad_norm": 0.7765970698205323, "learning_rate": 7.506732101565519e-06, "loss": 0.3732, "step": 5633 }, { "epoch": 0.35290248829452386, "grad_norm": 0.7448512634996391, "learning_rate": 7.505854344938555e-06, "loss": 0.3734, "step": 5634 }, { "epoch": 0.35296512629386617, "grad_norm": 0.8372485608918793, "learning_rate": 7.504976485170159e-06, "loss": 0.414, "step": 5635 }, { "epoch": 0.3530277642932085, "grad_norm": 0.8787520500902823, "learning_rate": 7.504098522296464e-06, "loss": 0.4323, "step": 5636 }, { "epoch": 0.3530904022925508, "grad_norm": 0.8525024702997288, "learning_rate": 7.50322045635361e-06, "loss": 0.3899, "step": 5637 }, { "epoch": 0.3531530402918931, "grad_norm": 0.8154736150681071, "learning_rate": 7.502342287377739e-06, "loss": 0.405, "step": 5638 }, { "epoch": 0.3532156782912354, "grad_norm": 0.8943359449059987, "learning_rate": 7.501464015404996e-06, "loss": 0.4334, "step": 5639 }, { "epoch": 0.35327831629057765, "grad_norm": 0.8264293292897433, "learning_rate": 7.500585640471527e-06, "loss": 0.4271, "step": 5640 }, { "epoch": 0.35334095428991996, "grad_norm": 0.7939491936617681, "learning_rate": 7.499707162613491e-06, "loss": 0.4118, "step": 5641 }, { "epoch": 0.35340359228926227, "grad_norm": 0.8334957626123541, "learning_rate": 7.498828581867043e-06, "loss": 0.3864, "step": 5642 }, { "epoch": 0.3534662302886046, "grad_norm": 0.7698527499914908, "learning_rate": 7.497949898268346e-06, "loss": 0.3867, "step": 5643 }, { "epoch": 0.3535288682879469, "grad_norm": 0.8234914676080041, "learning_rate": 7.49707111185357e-06, "loss": 0.367, "step": 5644 }, { "epoch": 0.3535915062872892, "grad_norm": 0.8781733733795352, "learning_rate": 7.496192222658881e-06, "loss": 0.4181, "step": 5645 }, { "epoch": 0.3536541442866315, "grad_norm": 0.8971065383898187, "learning_rate": 7.495313230720459e-06, "loss": 0.4313, "step": 5646 }, { "epoch": 0.3537167822859738, "grad_norm": 0.8712249646655686, "learning_rate": 7.4944341360744795e-06, "loss": 0.4314, "step": 5647 }, { "epoch": 0.35377942028531606, "grad_norm": 0.6618227819140626, "learning_rate": 7.493554938757129e-06, "loss": 0.4614, "step": 5648 }, { "epoch": 0.35384205828465837, "grad_norm": 0.8131328700778558, "learning_rate": 7.492675638804594e-06, "loss": 0.3961, "step": 5649 }, { "epoch": 0.3539046962840007, "grad_norm": 0.8183217696455036, "learning_rate": 7.4917962362530675e-06, "loss": 0.4005, "step": 5650 }, { "epoch": 0.353967334283343, "grad_norm": 0.8816309342445946, "learning_rate": 7.490916731138744e-06, "loss": 0.4043, "step": 5651 }, { "epoch": 0.3540299722826853, "grad_norm": 0.8824507920353514, "learning_rate": 7.490037123497828e-06, "loss": 0.415, "step": 5652 }, { "epoch": 0.3540926102820276, "grad_norm": 0.9390184248528646, "learning_rate": 7.489157413366519e-06, "loss": 0.4466, "step": 5653 }, { "epoch": 0.3541552482813699, "grad_norm": 0.6738931191183793, "learning_rate": 7.488277600781031e-06, "loss": 0.4638, "step": 5654 }, { "epoch": 0.3542178862807122, "grad_norm": 0.9302153164071008, "learning_rate": 7.487397685777575e-06, "loss": 0.4364, "step": 5655 }, { "epoch": 0.3542805242800545, "grad_norm": 0.8282397676004909, "learning_rate": 7.486517668392367e-06, "loss": 0.423, "step": 5656 }, { "epoch": 0.3543431622793968, "grad_norm": 0.8757809555930274, "learning_rate": 7.485637548661632e-06, "loss": 0.4765, "step": 5657 }, { "epoch": 0.3544058002787391, "grad_norm": 0.8233331660676295, "learning_rate": 7.484757326621592e-06, "loss": 0.4446, "step": 5658 }, { "epoch": 0.3544684382780814, "grad_norm": 0.8347564605381815, "learning_rate": 7.483877002308482e-06, "loss": 0.4409, "step": 5659 }, { "epoch": 0.3545310762774237, "grad_norm": 0.8700257196005199, "learning_rate": 7.482996575758531e-06, "loss": 0.4507, "step": 5660 }, { "epoch": 0.354593714276766, "grad_norm": 0.9339781562913889, "learning_rate": 7.482116047007981e-06, "loss": 0.453, "step": 5661 }, { "epoch": 0.3546563522761083, "grad_norm": 0.8535346428277744, "learning_rate": 7.481235416093075e-06, "loss": 0.4298, "step": 5662 }, { "epoch": 0.3547189902754506, "grad_norm": 0.7872395600135368, "learning_rate": 7.4803546830500576e-06, "loss": 0.3974, "step": 5663 }, { "epoch": 0.3547816282747929, "grad_norm": 0.8491596400887164, "learning_rate": 7.479473847915182e-06, "loss": 0.4187, "step": 5664 }, { "epoch": 0.3548442662741352, "grad_norm": 0.8687019991627913, "learning_rate": 7.4785929107247e-06, "loss": 0.4305, "step": 5665 }, { "epoch": 0.3549069042734775, "grad_norm": 0.9590327054854777, "learning_rate": 7.477711871514876e-06, "loss": 0.4087, "step": 5666 }, { "epoch": 0.3549695422728198, "grad_norm": 0.8737071612185303, "learning_rate": 7.476830730321971e-06, "loss": 0.4134, "step": 5667 }, { "epoch": 0.3550321802721621, "grad_norm": 0.81178147717957, "learning_rate": 7.475949487182255e-06, "loss": 0.4287, "step": 5668 }, { "epoch": 0.3550948182715044, "grad_norm": 0.7848668177486939, "learning_rate": 7.475068142131996e-06, "loss": 0.416, "step": 5669 }, { "epoch": 0.3551574562708467, "grad_norm": 0.8931840081050557, "learning_rate": 7.474186695207474e-06, "loss": 0.4236, "step": 5670 }, { "epoch": 0.355220094270189, "grad_norm": 0.8167871413651073, "learning_rate": 7.473305146444968e-06, "loss": 0.4209, "step": 5671 }, { "epoch": 0.35528273226953133, "grad_norm": 0.8081293034695419, "learning_rate": 7.472423495880763e-06, "loss": 0.3912, "step": 5672 }, { "epoch": 0.35534537026887364, "grad_norm": 0.8035962345311985, "learning_rate": 7.47154174355115e-06, "loss": 0.3759, "step": 5673 }, { "epoch": 0.3554080082682159, "grad_norm": 0.8454381722528947, "learning_rate": 7.4706598894924175e-06, "loss": 0.4367, "step": 5674 }, { "epoch": 0.3554706462675582, "grad_norm": 0.8183284628454623, "learning_rate": 7.4697779337408645e-06, "loss": 0.3867, "step": 5675 }, { "epoch": 0.3555332842669005, "grad_norm": 0.8599125359393487, "learning_rate": 7.468895876332797e-06, "loss": 0.4036, "step": 5676 }, { "epoch": 0.3555959222662428, "grad_norm": 0.8579432156876312, "learning_rate": 7.468013717304515e-06, "loss": 0.44, "step": 5677 }, { "epoch": 0.3556585602655851, "grad_norm": 0.8864954610873553, "learning_rate": 7.467131456692331e-06, "loss": 0.4237, "step": 5678 }, { "epoch": 0.35572119826492743, "grad_norm": 0.78408753325875, "learning_rate": 7.466249094532556e-06, "loss": 0.4588, "step": 5679 }, { "epoch": 0.35578383626426974, "grad_norm": 0.9346666032856303, "learning_rate": 7.465366630861514e-06, "loss": 0.4562, "step": 5680 }, { "epoch": 0.35584647426361204, "grad_norm": 0.8518179851460292, "learning_rate": 7.464484065715521e-06, "loss": 0.3991, "step": 5681 }, { "epoch": 0.3559091122629543, "grad_norm": 0.7935288603941473, "learning_rate": 7.463601399130909e-06, "loss": 0.3996, "step": 5682 }, { "epoch": 0.3559717502622966, "grad_norm": 0.8351900851829116, "learning_rate": 7.462718631144003e-06, "loss": 0.4366, "step": 5683 }, { "epoch": 0.3560343882616389, "grad_norm": 0.8772243938733965, "learning_rate": 7.461835761791143e-06, "loss": 0.3965, "step": 5684 }, { "epoch": 0.3560970262609812, "grad_norm": 0.8228178071466743, "learning_rate": 7.4609527911086665e-06, "loss": 0.4378, "step": 5685 }, { "epoch": 0.3561596642603235, "grad_norm": 0.7724116761191941, "learning_rate": 7.460069719132916e-06, "loss": 0.3961, "step": 5686 }, { "epoch": 0.35622230225966584, "grad_norm": 0.7930201869313552, "learning_rate": 7.459186545900239e-06, "loss": 0.4117, "step": 5687 }, { "epoch": 0.35628494025900814, "grad_norm": 1.0305273081208268, "learning_rate": 7.458303271446987e-06, "loss": 0.4179, "step": 5688 }, { "epoch": 0.35634757825835045, "grad_norm": 0.8440484601827306, "learning_rate": 7.457419895809517e-06, "loss": 0.3939, "step": 5689 }, { "epoch": 0.3564102162576927, "grad_norm": 0.7794336123178631, "learning_rate": 7.456536419024188e-06, "loss": 0.4388, "step": 5690 }, { "epoch": 0.356472854257035, "grad_norm": 0.8636884559473316, "learning_rate": 7.455652841127362e-06, "loss": 0.4594, "step": 5691 }, { "epoch": 0.3565354922563773, "grad_norm": 0.843679139954803, "learning_rate": 7.454769162155411e-06, "loss": 0.3952, "step": 5692 }, { "epoch": 0.3565981302557196, "grad_norm": 0.8368292402802597, "learning_rate": 7.453885382144706e-06, "loss": 0.413, "step": 5693 }, { "epoch": 0.35666076825506193, "grad_norm": 0.7827048928531934, "learning_rate": 7.453001501131623e-06, "loss": 0.4011, "step": 5694 }, { "epoch": 0.35672340625440424, "grad_norm": 0.7639747942948917, "learning_rate": 7.452117519152542e-06, "loss": 0.3701, "step": 5695 }, { "epoch": 0.35678604425374655, "grad_norm": 0.9163166981380584, "learning_rate": 7.451233436243849e-06, "loss": 0.4619, "step": 5696 }, { "epoch": 0.35684868225308886, "grad_norm": 0.8657455955173209, "learning_rate": 7.450349252441932e-06, "loss": 0.4436, "step": 5697 }, { "epoch": 0.35691132025243116, "grad_norm": 0.7863725616471414, "learning_rate": 7.449464967783186e-06, "loss": 0.4117, "step": 5698 }, { "epoch": 0.3569739582517734, "grad_norm": 0.8049466803812365, "learning_rate": 7.448580582304008e-06, "loss": 0.3855, "step": 5699 }, { "epoch": 0.3570365962511157, "grad_norm": 0.7777560198582516, "learning_rate": 7.4476960960407974e-06, "loss": 0.409, "step": 5700 }, { "epoch": 0.35709923425045803, "grad_norm": 0.8125195935279501, "learning_rate": 7.446811509029963e-06, "loss": 0.4304, "step": 5701 }, { "epoch": 0.35716187224980034, "grad_norm": 0.8108497185016142, "learning_rate": 7.445926821307911e-06, "loss": 0.3943, "step": 5702 }, { "epoch": 0.35722451024914265, "grad_norm": 0.7999966598207707, "learning_rate": 7.4450420329110575e-06, "loss": 0.4171, "step": 5703 }, { "epoch": 0.35728714824848495, "grad_norm": 0.8395916445272443, "learning_rate": 7.44415714387582e-06, "loss": 0.3696, "step": 5704 }, { "epoch": 0.35734978624782726, "grad_norm": 0.8627990500658659, "learning_rate": 7.443272154238621e-06, "loss": 0.4416, "step": 5705 }, { "epoch": 0.35741242424716957, "grad_norm": 0.8487857354390858, "learning_rate": 7.442387064035886e-06, "loss": 0.4462, "step": 5706 }, { "epoch": 0.3574750622465118, "grad_norm": 0.8080522183707828, "learning_rate": 7.441501873304048e-06, "loss": 0.4375, "step": 5707 }, { "epoch": 0.35753770024585413, "grad_norm": 0.8677391298633034, "learning_rate": 7.440616582079539e-06, "loss": 0.4402, "step": 5708 }, { "epoch": 0.35760033824519644, "grad_norm": 0.8248975271312655, "learning_rate": 7.439731190398797e-06, "loss": 0.4088, "step": 5709 }, { "epoch": 0.35766297624453874, "grad_norm": 0.8524577032277924, "learning_rate": 7.438845698298267e-06, "loss": 0.4304, "step": 5710 }, { "epoch": 0.35772561424388105, "grad_norm": 0.8435182009389761, "learning_rate": 7.437960105814396e-06, "loss": 0.4366, "step": 5711 }, { "epoch": 0.35778825224322336, "grad_norm": 0.6799912318851833, "learning_rate": 7.437074412983635e-06, "loss": 0.4972, "step": 5712 }, { "epoch": 0.35785089024256567, "grad_norm": 0.8539132900650681, "learning_rate": 7.436188619842437e-06, "loss": 0.3955, "step": 5713 }, { "epoch": 0.357913528241908, "grad_norm": 0.8762537849851417, "learning_rate": 7.435302726427265e-06, "loss": 0.43, "step": 5714 }, { "epoch": 0.3579761662412503, "grad_norm": 0.8862405907040904, "learning_rate": 7.434416732774581e-06, "loss": 0.4612, "step": 5715 }, { "epoch": 0.35803880424059253, "grad_norm": 0.789525085673593, "learning_rate": 7.433530638920851e-06, "loss": 0.3925, "step": 5716 }, { "epoch": 0.35810144223993484, "grad_norm": 0.812194702053464, "learning_rate": 7.432644444902548e-06, "loss": 0.4524, "step": 5717 }, { "epoch": 0.35816408023927715, "grad_norm": 0.7836515573708729, "learning_rate": 7.431758150756149e-06, "loss": 0.4069, "step": 5718 }, { "epoch": 0.35822671823861946, "grad_norm": 0.8524090117850027, "learning_rate": 7.430871756518133e-06, "loss": 0.4497, "step": 5719 }, { "epoch": 0.35828935623796176, "grad_norm": 0.8103653881257393, "learning_rate": 7.429985262224983e-06, "loss": 0.4427, "step": 5720 }, { "epoch": 0.3583519942373041, "grad_norm": 0.8646339161747995, "learning_rate": 7.429098667913191e-06, "loss": 0.4324, "step": 5721 }, { "epoch": 0.3584146322366464, "grad_norm": 0.8078994230005493, "learning_rate": 7.428211973619246e-06, "loss": 0.4095, "step": 5722 }, { "epoch": 0.3584772702359887, "grad_norm": 0.8232795640546924, "learning_rate": 7.427325179379645e-06, "loss": 0.4322, "step": 5723 }, { "epoch": 0.35853990823533094, "grad_norm": 0.8090124094175934, "learning_rate": 7.426438285230891e-06, "loss": 0.4235, "step": 5724 }, { "epoch": 0.35860254623467325, "grad_norm": 0.8567743103280401, "learning_rate": 7.425551291209484e-06, "loss": 0.4253, "step": 5725 }, { "epoch": 0.35866518423401555, "grad_norm": 0.7609921519897238, "learning_rate": 7.424664197351936e-06, "loss": 0.3594, "step": 5726 }, { "epoch": 0.35872782223335786, "grad_norm": 0.8327894303290452, "learning_rate": 7.423777003694761e-06, "loss": 0.3901, "step": 5727 }, { "epoch": 0.35879046023270017, "grad_norm": 0.7697749276805892, "learning_rate": 7.422889710274475e-06, "loss": 0.4004, "step": 5728 }, { "epoch": 0.3588530982320425, "grad_norm": 0.8266805062329827, "learning_rate": 7.422002317127597e-06, "loss": 0.4253, "step": 5729 }, { "epoch": 0.3589157362313848, "grad_norm": 0.7945306242140134, "learning_rate": 7.421114824290654e-06, "loss": 0.3752, "step": 5730 }, { "epoch": 0.3589783742307271, "grad_norm": 0.8635246588338701, "learning_rate": 7.4202272318001775e-06, "loss": 0.4476, "step": 5731 }, { "epoch": 0.35904101223006935, "grad_norm": 0.8400818996093828, "learning_rate": 7.419339539692698e-06, "loss": 0.4357, "step": 5732 }, { "epoch": 0.35910365022941165, "grad_norm": 0.823570624101091, "learning_rate": 7.4184517480047545e-06, "loss": 0.4384, "step": 5733 }, { "epoch": 0.35916628822875396, "grad_norm": 0.8464233916310302, "learning_rate": 7.417563856772887e-06, "loss": 0.4259, "step": 5734 }, { "epoch": 0.35922892622809627, "grad_norm": 0.8253391130942974, "learning_rate": 7.4166758660336424e-06, "loss": 0.4119, "step": 5735 }, { "epoch": 0.3592915642274386, "grad_norm": 0.8024268054426582, "learning_rate": 7.415787775823572e-06, "loss": 0.4036, "step": 5736 }, { "epoch": 0.3593542022267809, "grad_norm": 0.8190574895790289, "learning_rate": 7.414899586179227e-06, "loss": 0.3859, "step": 5737 }, { "epoch": 0.3594168402261232, "grad_norm": 0.7522354962365168, "learning_rate": 7.414011297137168e-06, "loss": 0.3999, "step": 5738 }, { "epoch": 0.3594794782254655, "grad_norm": 0.8595328706299682, "learning_rate": 7.413122908733954e-06, "loss": 0.4303, "step": 5739 }, { "epoch": 0.3595421162248078, "grad_norm": 0.8177265344739019, "learning_rate": 7.412234421006155e-06, "loss": 0.42, "step": 5740 }, { "epoch": 0.35960475422415006, "grad_norm": 0.7922046285495647, "learning_rate": 7.41134583399034e-06, "loss": 0.4487, "step": 5741 }, { "epoch": 0.35966739222349237, "grad_norm": 0.8374800154602203, "learning_rate": 7.4104571477230825e-06, "loss": 0.4579, "step": 5742 }, { "epoch": 0.3597300302228347, "grad_norm": 0.8592953838868962, "learning_rate": 7.409568362240958e-06, "loss": 0.4321, "step": 5743 }, { "epoch": 0.359792668222177, "grad_norm": 0.8585822730825504, "learning_rate": 7.408679477580556e-06, "loss": 0.4349, "step": 5744 }, { "epoch": 0.3598553062215193, "grad_norm": 0.7856225873720631, "learning_rate": 7.4077904937784595e-06, "loss": 0.3912, "step": 5745 }, { "epoch": 0.3599179442208616, "grad_norm": 0.7931840113389638, "learning_rate": 7.40690141087126e-06, "loss": 0.3829, "step": 5746 }, { "epoch": 0.3599805822202039, "grad_norm": 0.8895156337722715, "learning_rate": 7.406012228895549e-06, "loss": 0.4116, "step": 5747 }, { "epoch": 0.3600432202195462, "grad_norm": 0.8006874178243293, "learning_rate": 7.405122947887929e-06, "loss": 0.4911, "step": 5748 }, { "epoch": 0.36010585821888846, "grad_norm": 0.8200963105638199, "learning_rate": 7.404233567885004e-06, "loss": 0.3982, "step": 5749 }, { "epoch": 0.36016849621823077, "grad_norm": 0.8108287638415568, "learning_rate": 7.4033440889233775e-06, "loss": 0.3946, "step": 5750 }, { "epoch": 0.3602311342175731, "grad_norm": 0.8635074998438425, "learning_rate": 7.402454511039662e-06, "loss": 0.4076, "step": 5751 }, { "epoch": 0.3602937722169154, "grad_norm": 0.8199622735463522, "learning_rate": 7.4015648342704726e-06, "loss": 0.4217, "step": 5752 }, { "epoch": 0.3603564102162577, "grad_norm": 0.8452473617028118, "learning_rate": 7.40067505865243e-06, "loss": 0.4493, "step": 5753 }, { "epoch": 0.3604190482156, "grad_norm": 0.8278559957704651, "learning_rate": 7.399785184222157e-06, "loss": 0.42, "step": 5754 }, { "epoch": 0.3604816862149423, "grad_norm": 0.8523512383397397, "learning_rate": 7.398895211016279e-06, "loss": 0.4695, "step": 5755 }, { "epoch": 0.3605443242142846, "grad_norm": 0.8302486373842175, "learning_rate": 7.398005139071429e-06, "loss": 0.4062, "step": 5756 }, { "epoch": 0.36060696221362687, "grad_norm": 0.7831676388384917, "learning_rate": 7.3971149684242415e-06, "loss": 0.4016, "step": 5757 }, { "epoch": 0.3606696002129692, "grad_norm": 0.88673683421489, "learning_rate": 7.39622469911136e-06, "loss": 0.4089, "step": 5758 }, { "epoch": 0.3607322382123115, "grad_norm": 0.8267418976912764, "learning_rate": 7.395334331169423e-06, "loss": 0.4126, "step": 5759 }, { "epoch": 0.3607948762116538, "grad_norm": 0.8069706735346918, "learning_rate": 7.3944438646350805e-06, "loss": 0.3946, "step": 5760 }, { "epoch": 0.3608575142109961, "grad_norm": 0.8971818136509114, "learning_rate": 7.393553299544984e-06, "loss": 0.4293, "step": 5761 }, { "epoch": 0.3609201522103384, "grad_norm": 0.8096515303167039, "learning_rate": 7.392662635935789e-06, "loss": 0.4, "step": 5762 }, { "epoch": 0.3609827902096807, "grad_norm": 0.9163410836689029, "learning_rate": 7.3917718738441565e-06, "loss": 0.4118, "step": 5763 }, { "epoch": 0.361045428209023, "grad_norm": 0.7978167514907651, "learning_rate": 7.39088101330675e-06, "loss": 0.3784, "step": 5764 }, { "epoch": 0.36110806620836533, "grad_norm": 0.82454137011966, "learning_rate": 7.3899900543602364e-06, "loss": 0.4083, "step": 5765 }, { "epoch": 0.3611707042077076, "grad_norm": 0.876557109486338, "learning_rate": 7.389098997041288e-06, "loss": 0.4435, "step": 5766 }, { "epoch": 0.3612333422070499, "grad_norm": 0.9111743810309809, "learning_rate": 7.388207841386583e-06, "loss": 0.4047, "step": 5767 }, { "epoch": 0.3612959802063922, "grad_norm": 0.7849318530182237, "learning_rate": 7.3873165874328005e-06, "loss": 0.4138, "step": 5768 }, { "epoch": 0.3613586182057345, "grad_norm": 0.8380888978147147, "learning_rate": 7.386425235216623e-06, "loss": 0.4336, "step": 5769 }, { "epoch": 0.3614212562050768, "grad_norm": 0.8143981906393711, "learning_rate": 7.38553378477474e-06, "loss": 0.4096, "step": 5770 }, { "epoch": 0.3614838942044191, "grad_norm": 0.85350584569826, "learning_rate": 7.384642236143843e-06, "loss": 0.3721, "step": 5771 }, { "epoch": 0.36154653220376143, "grad_norm": 0.8640320708221502, "learning_rate": 7.38375058936063e-06, "loss": 0.4119, "step": 5772 }, { "epoch": 0.36160917020310374, "grad_norm": 0.8292969052620064, "learning_rate": 7.3828588444618e-06, "loss": 0.4307, "step": 5773 }, { "epoch": 0.361671808202446, "grad_norm": 0.7202020549364183, "learning_rate": 7.381967001484057e-06, "loss": 0.3531, "step": 5774 }, { "epoch": 0.3617344462017883, "grad_norm": 0.7819569622450845, "learning_rate": 7.38107506046411e-06, "loss": 0.3779, "step": 5775 }, { "epoch": 0.3617970842011306, "grad_norm": 0.8017148472522285, "learning_rate": 7.380183021438673e-06, "loss": 0.4432, "step": 5776 }, { "epoch": 0.3618597222004729, "grad_norm": 0.7642819400070583, "learning_rate": 7.3792908844444596e-06, "loss": 0.3954, "step": 5777 }, { "epoch": 0.3619223601998152, "grad_norm": 0.7034636008226939, "learning_rate": 7.378398649518193e-06, "loss": 0.4711, "step": 5778 }, { "epoch": 0.3619849981991575, "grad_norm": 0.7988594195660303, "learning_rate": 7.377506316696596e-06, "loss": 0.4008, "step": 5779 }, { "epoch": 0.36204763619849983, "grad_norm": 0.77879272726771, "learning_rate": 7.376613886016397e-06, "loss": 0.381, "step": 5780 }, { "epoch": 0.36211027419784214, "grad_norm": 0.8783896651420762, "learning_rate": 7.375721357514332e-06, "loss": 0.4084, "step": 5781 }, { "epoch": 0.36217291219718445, "grad_norm": 0.9010149762858556, "learning_rate": 7.374828731227131e-06, "loss": 0.4026, "step": 5782 }, { "epoch": 0.3622355501965267, "grad_norm": 0.8820445097272199, "learning_rate": 7.373936007191543e-06, "loss": 0.4107, "step": 5783 }, { "epoch": 0.362298188195869, "grad_norm": 0.8215495338436027, "learning_rate": 7.373043185444305e-06, "loss": 0.4483, "step": 5784 }, { "epoch": 0.3623608261952113, "grad_norm": 0.8083781516946685, "learning_rate": 7.372150266022169e-06, "loss": 0.3943, "step": 5785 }, { "epoch": 0.3624234641945536, "grad_norm": 0.8315231412923928, "learning_rate": 7.371257248961888e-06, "loss": 0.4507, "step": 5786 }, { "epoch": 0.36248610219389593, "grad_norm": 0.8216093323594205, "learning_rate": 7.370364134300221e-06, "loss": 0.3836, "step": 5787 }, { "epoch": 0.36254874019323824, "grad_norm": 0.79723920736812, "learning_rate": 7.3694709220739245e-06, "loss": 0.4223, "step": 5788 }, { "epoch": 0.36261137819258055, "grad_norm": 0.7385850929622145, "learning_rate": 7.368577612319764e-06, "loss": 0.4645, "step": 5789 }, { "epoch": 0.36267401619192285, "grad_norm": 0.8470352507252109, "learning_rate": 7.36768420507451e-06, "loss": 0.4516, "step": 5790 }, { "epoch": 0.3627366541912651, "grad_norm": 0.8076105137359078, "learning_rate": 7.366790700374935e-06, "loss": 0.4187, "step": 5791 }, { "epoch": 0.3627992921906074, "grad_norm": 0.763478732323477, "learning_rate": 7.365897098257817e-06, "loss": 0.3935, "step": 5792 }, { "epoch": 0.3628619301899497, "grad_norm": 0.831956689166088, "learning_rate": 7.365003398759933e-06, "loss": 0.4334, "step": 5793 }, { "epoch": 0.36292456818929203, "grad_norm": 0.8556300177484369, "learning_rate": 7.364109601918071e-06, "loss": 0.4291, "step": 5794 }, { "epoch": 0.36298720618863434, "grad_norm": 0.9864569970439634, "learning_rate": 7.363215707769018e-06, "loss": 0.4349, "step": 5795 }, { "epoch": 0.36304984418797664, "grad_norm": 1.021058655069945, "learning_rate": 7.36232171634957e-06, "loss": 0.4409, "step": 5796 }, { "epoch": 0.36311248218731895, "grad_norm": 0.7708827836524246, "learning_rate": 7.36142762769652e-06, "loss": 0.4094, "step": 5797 }, { "epoch": 0.36317512018666126, "grad_norm": 0.7267692896778561, "learning_rate": 7.360533441846671e-06, "loss": 0.4666, "step": 5798 }, { "epoch": 0.3632377581860035, "grad_norm": 0.8135731451056707, "learning_rate": 7.359639158836828e-06, "loss": 0.4531, "step": 5799 }, { "epoch": 0.3633003961853458, "grad_norm": 0.8943867404493698, "learning_rate": 7.358744778703798e-06, "loss": 0.4025, "step": 5800 }, { "epoch": 0.3633630341846881, "grad_norm": 0.8251800709100707, "learning_rate": 7.357850301484397e-06, "loss": 0.4348, "step": 5801 }, { "epoch": 0.36342567218403043, "grad_norm": 0.8573312744480504, "learning_rate": 7.356955727215438e-06, "loss": 0.4072, "step": 5802 }, { "epoch": 0.36348831018337274, "grad_norm": 0.7668219700790058, "learning_rate": 7.356061055933744e-06, "loss": 0.3813, "step": 5803 }, { "epoch": 0.36355094818271505, "grad_norm": 0.8623986841295415, "learning_rate": 7.355166287676141e-06, "loss": 0.396, "step": 5804 }, { "epoch": 0.36361358618205736, "grad_norm": 0.8301702038869436, "learning_rate": 7.354271422479455e-06, "loss": 0.4193, "step": 5805 }, { "epoch": 0.36367622418139967, "grad_norm": 0.6609038000162436, "learning_rate": 7.3533764603805214e-06, "loss": 0.5061, "step": 5806 }, { "epoch": 0.363738862180742, "grad_norm": 0.8570145812874964, "learning_rate": 7.352481401416175e-06, "loss": 0.4609, "step": 5807 }, { "epoch": 0.3638015001800842, "grad_norm": 0.8586510401414492, "learning_rate": 7.351586245623258e-06, "loss": 0.4187, "step": 5808 }, { "epoch": 0.36386413817942653, "grad_norm": 0.8767083788001556, "learning_rate": 7.350690993038615e-06, "loss": 0.4186, "step": 5809 }, { "epoch": 0.36392677617876884, "grad_norm": 0.8020881982498563, "learning_rate": 7.3497956436990934e-06, "loss": 0.3956, "step": 5810 }, { "epoch": 0.36398941417811115, "grad_norm": 0.8586912471994703, "learning_rate": 7.348900197641548e-06, "loss": 0.4584, "step": 5811 }, { "epoch": 0.36405205217745346, "grad_norm": 0.8312683529608528, "learning_rate": 7.348004654902833e-06, "loss": 0.4097, "step": 5812 }, { "epoch": 0.36411469017679576, "grad_norm": 0.8087418026138633, "learning_rate": 7.347109015519812e-06, "loss": 0.4343, "step": 5813 }, { "epoch": 0.36417732817613807, "grad_norm": 0.815462457550286, "learning_rate": 7.346213279529349e-06, "loss": 0.4157, "step": 5814 }, { "epoch": 0.3642399661754804, "grad_norm": 0.7722242127854525, "learning_rate": 7.34531744696831e-06, "loss": 0.3775, "step": 5815 }, { "epoch": 0.36430260417482263, "grad_norm": 0.8759691293868902, "learning_rate": 7.34442151787357e-06, "loss": 0.4209, "step": 5816 }, { "epoch": 0.36436524217416494, "grad_norm": 0.6853713436639753, "learning_rate": 7.343525492282005e-06, "loss": 0.4864, "step": 5817 }, { "epoch": 0.36442788017350725, "grad_norm": 0.6757809452374269, "learning_rate": 7.342629370230497e-06, "loss": 0.4838, "step": 5818 }, { "epoch": 0.36449051817284955, "grad_norm": 0.8125929688384032, "learning_rate": 7.3417331517559295e-06, "loss": 0.4067, "step": 5819 }, { "epoch": 0.36455315617219186, "grad_norm": 0.8666562889749656, "learning_rate": 7.340836836895189e-06, "loss": 0.4492, "step": 5820 }, { "epoch": 0.36461579417153417, "grad_norm": 0.8527590338337774, "learning_rate": 7.33994042568517e-06, "loss": 0.4157, "step": 5821 }, { "epoch": 0.3646784321708765, "grad_norm": 0.7937788529369343, "learning_rate": 7.339043918162771e-06, "loss": 0.4265, "step": 5822 }, { "epoch": 0.3647410701702188, "grad_norm": 0.8164547089102786, "learning_rate": 7.338147314364889e-06, "loss": 0.4455, "step": 5823 }, { "epoch": 0.3648037081695611, "grad_norm": 0.8094705184169673, "learning_rate": 7.337250614328428e-06, "loss": 0.4293, "step": 5824 }, { "epoch": 0.36486634616890334, "grad_norm": 0.7716458269561888, "learning_rate": 7.3363538180903e-06, "loss": 0.3712, "step": 5825 }, { "epoch": 0.36492898416824565, "grad_norm": 0.7473933574553272, "learning_rate": 7.3354569256874145e-06, "loss": 0.4631, "step": 5826 }, { "epoch": 0.36499162216758796, "grad_norm": 0.8393860766530554, "learning_rate": 7.334559937156687e-06, "loss": 0.4059, "step": 5827 }, { "epoch": 0.36505426016693027, "grad_norm": 0.9043140282885366, "learning_rate": 7.33366285253504e-06, "loss": 0.4373, "step": 5828 }, { "epoch": 0.3651168981662726, "grad_norm": 0.8934888126198045, "learning_rate": 7.332765671859399e-06, "loss": 0.3904, "step": 5829 }, { "epoch": 0.3651795361656149, "grad_norm": 0.8015520732754772, "learning_rate": 7.331868395166688e-06, "loss": 0.3928, "step": 5830 }, { "epoch": 0.3652421741649572, "grad_norm": 0.9596202268888693, "learning_rate": 7.330971022493841e-06, "loss": 0.4156, "step": 5831 }, { "epoch": 0.3653048121642995, "grad_norm": 0.8584335353056964, "learning_rate": 7.330073553877794e-06, "loss": 0.4199, "step": 5832 }, { "epoch": 0.36536745016364175, "grad_norm": 0.8966873572523307, "learning_rate": 7.329175989355488e-06, "loss": 0.4283, "step": 5833 }, { "epoch": 0.36543008816298406, "grad_norm": 0.9279153280143996, "learning_rate": 7.328278328963866e-06, "loss": 0.4225, "step": 5834 }, { "epoch": 0.36549272616232636, "grad_norm": 0.7696098666303545, "learning_rate": 7.327380572739875e-06, "loss": 0.4127, "step": 5835 }, { "epoch": 0.36555536416166867, "grad_norm": 0.8567700847572169, "learning_rate": 7.326482720720468e-06, "loss": 0.4045, "step": 5836 }, { "epoch": 0.365618002161011, "grad_norm": 0.9717105661405655, "learning_rate": 7.3255847729426e-06, "loss": 0.4284, "step": 5837 }, { "epoch": 0.3656806401603533, "grad_norm": 0.8694473437236772, "learning_rate": 7.324686729443231e-06, "loss": 0.4238, "step": 5838 }, { "epoch": 0.3657432781596956, "grad_norm": 0.665677355192219, "learning_rate": 7.323788590259326e-06, "loss": 0.5047, "step": 5839 }, { "epoch": 0.3658059161590379, "grad_norm": 0.8686653147360286, "learning_rate": 7.3228903554278495e-06, "loss": 0.4308, "step": 5840 }, { "epoch": 0.36586855415838015, "grad_norm": 0.6488696459559853, "learning_rate": 7.3219920249857755e-06, "loss": 0.4815, "step": 5841 }, { "epoch": 0.36593119215772246, "grad_norm": 0.8093000889942316, "learning_rate": 7.32109359897008e-06, "loss": 0.4046, "step": 5842 }, { "epoch": 0.36599383015706477, "grad_norm": 0.75684909936627, "learning_rate": 7.32019507741774e-06, "loss": 0.3895, "step": 5843 }, { "epoch": 0.3660564681564071, "grad_norm": 0.6603426259183253, "learning_rate": 7.31929646036574e-06, "loss": 0.4647, "step": 5844 }, { "epoch": 0.3661191061557494, "grad_norm": 0.8647438027883455, "learning_rate": 7.318397747851067e-06, "loss": 0.4715, "step": 5845 }, { "epoch": 0.3661817441550917, "grad_norm": 0.8464675668910929, "learning_rate": 7.317498939910712e-06, "loss": 0.4269, "step": 5846 }, { "epoch": 0.366244382154434, "grad_norm": 0.8466271226624354, "learning_rate": 7.316600036581673e-06, "loss": 0.4122, "step": 5847 }, { "epoch": 0.3663070201537763, "grad_norm": 0.8141653428358245, "learning_rate": 7.3157010379009444e-06, "loss": 0.4294, "step": 5848 }, { "epoch": 0.3663696581531186, "grad_norm": 0.8576372888742911, "learning_rate": 7.314801943905531e-06, "loss": 0.4527, "step": 5849 }, { "epoch": 0.36643229615246087, "grad_norm": 0.7730206480443678, "learning_rate": 7.3139027546324405e-06, "loss": 0.3646, "step": 5850 }, { "epoch": 0.3664949341518032, "grad_norm": 0.8474971628587161, "learning_rate": 7.313003470118684e-06, "loss": 0.4418, "step": 5851 }, { "epoch": 0.3665575721511455, "grad_norm": 0.8183426060446829, "learning_rate": 7.312104090401274e-06, "loss": 0.4112, "step": 5852 }, { "epoch": 0.3666202101504878, "grad_norm": 0.8808331895823226, "learning_rate": 7.311204615517231e-06, "loss": 0.4113, "step": 5853 }, { "epoch": 0.3666828481498301, "grad_norm": 0.7669357855449815, "learning_rate": 7.310305045503576e-06, "loss": 0.4134, "step": 5854 }, { "epoch": 0.3667454861491724, "grad_norm": 0.8250859737907706, "learning_rate": 7.309405380397338e-06, "loss": 0.4152, "step": 5855 }, { "epoch": 0.3668081241485147, "grad_norm": 0.8346846183548109, "learning_rate": 7.308505620235546e-06, "loss": 0.4188, "step": 5856 }, { "epoch": 0.366870762147857, "grad_norm": 0.8212067233711811, "learning_rate": 7.307605765055235e-06, "loss": 0.4562, "step": 5857 }, { "epoch": 0.3669334001471993, "grad_norm": 0.7128974662224422, "learning_rate": 7.30670581489344e-06, "loss": 0.3592, "step": 5858 }, { "epoch": 0.3669960381465416, "grad_norm": 0.691902260068284, "learning_rate": 7.305805769787207e-06, "loss": 0.4879, "step": 5859 }, { "epoch": 0.3670586761458839, "grad_norm": 0.8323101708959559, "learning_rate": 7.30490562977358e-06, "loss": 0.4147, "step": 5860 }, { "epoch": 0.3671213141452262, "grad_norm": 0.8076837671059329, "learning_rate": 7.30400539488961e-06, "loss": 0.4192, "step": 5861 }, { "epoch": 0.3671839521445685, "grad_norm": 0.6537604821161068, "learning_rate": 7.303105065172349e-06, "loss": 0.4846, "step": 5862 }, { "epoch": 0.3672465901439108, "grad_norm": 0.908680488952784, "learning_rate": 7.302204640658856e-06, "loss": 0.4429, "step": 5863 }, { "epoch": 0.3673092281432531, "grad_norm": 0.759758158721983, "learning_rate": 7.301304121386194e-06, "loss": 0.4068, "step": 5864 }, { "epoch": 0.3673718661425954, "grad_norm": 0.7964326359627816, "learning_rate": 7.300403507391427e-06, "loss": 0.3914, "step": 5865 }, { "epoch": 0.3674345041419377, "grad_norm": 0.9341805349739256, "learning_rate": 7.299502798711625e-06, "loss": 0.4387, "step": 5866 }, { "epoch": 0.36749714214128, "grad_norm": 0.7930358232193784, "learning_rate": 7.29860199538386e-06, "loss": 0.3616, "step": 5867 }, { "epoch": 0.3675597801406223, "grad_norm": 0.8141205904485523, "learning_rate": 7.297701097445211e-06, "loss": 0.4209, "step": 5868 }, { "epoch": 0.3676224181399646, "grad_norm": 0.7310754969714806, "learning_rate": 7.2968001049327575e-06, "loss": 0.4701, "step": 5869 }, { "epoch": 0.3676850561393069, "grad_norm": 0.8375548393329895, "learning_rate": 7.2958990178835875e-06, "loss": 0.4315, "step": 5870 }, { "epoch": 0.3677476941386492, "grad_norm": 0.8826472485521139, "learning_rate": 7.294997836334785e-06, "loss": 0.4139, "step": 5871 }, { "epoch": 0.3678103321379915, "grad_norm": 0.8821608638098751, "learning_rate": 7.294096560323447e-06, "loss": 0.4424, "step": 5872 }, { "epoch": 0.36787297013733383, "grad_norm": 0.9092539790642856, "learning_rate": 7.293195189886668e-06, "loss": 0.4543, "step": 5873 }, { "epoch": 0.36793560813667614, "grad_norm": 0.8422887332268084, "learning_rate": 7.292293725061551e-06, "loss": 0.4809, "step": 5874 }, { "epoch": 0.3679982461360184, "grad_norm": 0.8002230735990625, "learning_rate": 7.291392165885197e-06, "loss": 0.4089, "step": 5875 }, { "epoch": 0.3680608841353607, "grad_norm": 0.8117942020032257, "learning_rate": 7.2904905123947176e-06, "loss": 0.4005, "step": 5876 }, { "epoch": 0.368123522134703, "grad_norm": 0.8098001324248731, "learning_rate": 7.289588764627222e-06, "loss": 0.3741, "step": 5877 }, { "epoch": 0.3681861601340453, "grad_norm": 0.8016710952990651, "learning_rate": 7.28868692261983e-06, "loss": 0.4286, "step": 5878 }, { "epoch": 0.3682487981333876, "grad_norm": 0.8269977001371119, "learning_rate": 7.2877849864096585e-06, "loss": 0.3982, "step": 5879 }, { "epoch": 0.36831143613272993, "grad_norm": 0.7979685358693286, "learning_rate": 7.286882956033831e-06, "loss": 0.4013, "step": 5880 }, { "epoch": 0.36837407413207224, "grad_norm": 0.8062268842875226, "learning_rate": 7.2859808315294776e-06, "loss": 0.4014, "step": 5881 }, { "epoch": 0.36843671213141455, "grad_norm": 0.7519757497505836, "learning_rate": 7.285078612933729e-06, "loss": 0.3816, "step": 5882 }, { "epoch": 0.3684993501307568, "grad_norm": 0.8184588524919215, "learning_rate": 7.284176300283721e-06, "loss": 0.4338, "step": 5883 }, { "epoch": 0.3685619881300991, "grad_norm": 0.8488849599847593, "learning_rate": 7.283273893616591e-06, "loss": 0.4166, "step": 5884 }, { "epoch": 0.3686246261294414, "grad_norm": 0.8449615099653043, "learning_rate": 7.282371392969485e-06, "loss": 0.4289, "step": 5885 }, { "epoch": 0.3686872641287837, "grad_norm": 0.8107458560945113, "learning_rate": 7.281468798379549e-06, "loss": 0.4326, "step": 5886 }, { "epoch": 0.36874990212812603, "grad_norm": 0.6948261731518378, "learning_rate": 7.280566109883933e-06, "loss": 0.4679, "step": 5887 }, { "epoch": 0.36881254012746834, "grad_norm": 0.775914721634007, "learning_rate": 7.279663327519792e-06, "loss": 0.4071, "step": 5888 }, { "epoch": 0.36887517812681064, "grad_norm": 0.8737911301108886, "learning_rate": 7.278760451324287e-06, "loss": 0.4199, "step": 5889 }, { "epoch": 0.36893781612615295, "grad_norm": 0.8115215156419473, "learning_rate": 7.2778574813345785e-06, "loss": 0.3871, "step": 5890 }, { "epoch": 0.36900045412549526, "grad_norm": 0.8152902092139018, "learning_rate": 7.2769544175878325e-06, "loss": 0.3965, "step": 5891 }, { "epoch": 0.3690630921248375, "grad_norm": 0.798762199245858, "learning_rate": 7.27605126012122e-06, "loss": 0.3654, "step": 5892 }, { "epoch": 0.3691257301241798, "grad_norm": 0.6598247756317287, "learning_rate": 7.275148008971916e-06, "loss": 0.4628, "step": 5893 }, { "epoch": 0.3691883681235221, "grad_norm": 0.8391945367941989, "learning_rate": 7.2742446641770985e-06, "loss": 0.4211, "step": 5894 }, { "epoch": 0.36925100612286443, "grad_norm": 0.7959421676677423, "learning_rate": 7.273341225773947e-06, "loss": 0.4259, "step": 5895 }, { "epoch": 0.36931364412220674, "grad_norm": 0.9527469021722614, "learning_rate": 7.2724376937996485e-06, "loss": 0.4002, "step": 5896 }, { "epoch": 0.36937628212154905, "grad_norm": 0.8926921970620123, "learning_rate": 7.271534068291393e-06, "loss": 0.4206, "step": 5897 }, { "epoch": 0.36943892012089136, "grad_norm": 0.8218255582659042, "learning_rate": 7.2706303492863736e-06, "loss": 0.4506, "step": 5898 }, { "epoch": 0.36950155812023366, "grad_norm": 0.811171425744123, "learning_rate": 7.2697265368217886e-06, "loss": 0.3765, "step": 5899 }, { "epoch": 0.3695641961195759, "grad_norm": 0.8379330333341313, "learning_rate": 7.268822630934837e-06, "loss": 0.423, "step": 5900 }, { "epoch": 0.3696268341189182, "grad_norm": 0.801316722967744, "learning_rate": 7.267918631662725e-06, "loss": 0.4043, "step": 5901 }, { "epoch": 0.36968947211826053, "grad_norm": 0.8289039724573347, "learning_rate": 7.267014539042662e-06, "loss": 0.4292, "step": 5902 }, { "epoch": 0.36975211011760284, "grad_norm": 0.783131399507439, "learning_rate": 7.2661103531118595e-06, "loss": 0.4341, "step": 5903 }, { "epoch": 0.36981474811694515, "grad_norm": 0.7862667731355101, "learning_rate": 7.265206073907533e-06, "loss": 0.4064, "step": 5904 }, { "epoch": 0.36987738611628745, "grad_norm": 0.8595762475799927, "learning_rate": 7.264301701466903e-06, "loss": 0.4327, "step": 5905 }, { "epoch": 0.36994002411562976, "grad_norm": 0.8638465984249799, "learning_rate": 7.263397235827199e-06, "loss": 0.4193, "step": 5906 }, { "epoch": 0.37000266211497207, "grad_norm": 0.6134742201482536, "learning_rate": 7.262492677025642e-06, "loss": 0.4861, "step": 5907 }, { "epoch": 0.3700653001143143, "grad_norm": 0.857971705967142, "learning_rate": 7.261588025099466e-06, "loss": 0.4223, "step": 5908 }, { "epoch": 0.37012793811365663, "grad_norm": 0.8779013468859642, "learning_rate": 7.260683280085909e-06, "loss": 0.4056, "step": 5909 }, { "epoch": 0.37019057611299894, "grad_norm": 0.8281734347793294, "learning_rate": 7.2597784420222065e-06, "loss": 0.4237, "step": 5910 }, { "epoch": 0.37025321411234124, "grad_norm": 0.7663966940390077, "learning_rate": 7.2588735109456055e-06, "loss": 0.4015, "step": 5911 }, { "epoch": 0.37031585211168355, "grad_norm": 0.8399477330063523, "learning_rate": 7.257968486893351e-06, "loss": 0.3749, "step": 5912 }, { "epoch": 0.37037849011102586, "grad_norm": 0.8327990658869963, "learning_rate": 7.257063369902694e-06, "loss": 0.403, "step": 5913 }, { "epoch": 0.37044112811036817, "grad_norm": 0.737305480290575, "learning_rate": 7.2561581600108885e-06, "loss": 0.3846, "step": 5914 }, { "epoch": 0.3705037661097105, "grad_norm": 0.8631055803683375, "learning_rate": 7.255252857255197e-06, "loss": 0.461, "step": 5915 }, { "epoch": 0.3705664041090528, "grad_norm": 0.8078871194487709, "learning_rate": 7.254347461672879e-06, "loss": 0.4235, "step": 5916 }, { "epoch": 0.37062904210839503, "grad_norm": 0.8453306979348787, "learning_rate": 7.253441973301201e-06, "loss": 0.4152, "step": 5917 }, { "epoch": 0.37069168010773734, "grad_norm": 0.7988285169889959, "learning_rate": 7.252536392177431e-06, "loss": 0.3926, "step": 5918 }, { "epoch": 0.37075431810707965, "grad_norm": 0.8695886070928325, "learning_rate": 7.251630718338847e-06, "loss": 0.4405, "step": 5919 }, { "epoch": 0.37081695610642196, "grad_norm": 0.8587568407842109, "learning_rate": 7.250724951822724e-06, "loss": 0.407, "step": 5920 }, { "epoch": 0.37087959410576427, "grad_norm": 0.8804560514303142, "learning_rate": 7.2498190926663446e-06, "loss": 0.4198, "step": 5921 }, { "epoch": 0.3709422321051066, "grad_norm": 0.911632468108049, "learning_rate": 7.2489131409069935e-06, "loss": 0.4298, "step": 5922 }, { "epoch": 0.3710048701044489, "grad_norm": 0.8092471983223791, "learning_rate": 7.248007096581959e-06, "loss": 0.3866, "step": 5923 }, { "epoch": 0.3710675081037912, "grad_norm": 0.8006646524489005, "learning_rate": 7.247100959728537e-06, "loss": 0.4586, "step": 5924 }, { "epoch": 0.37113014610313344, "grad_norm": 0.793886067128531, "learning_rate": 7.246194730384022e-06, "loss": 0.3867, "step": 5925 }, { "epoch": 0.37119278410247575, "grad_norm": 0.849931268035709, "learning_rate": 7.245288408585714e-06, "loss": 0.4385, "step": 5926 }, { "epoch": 0.37125542210181806, "grad_norm": 0.7998762718192635, "learning_rate": 7.244381994370919e-06, "loss": 0.4262, "step": 5927 }, { "epoch": 0.37131806010116036, "grad_norm": 0.7938811117898124, "learning_rate": 7.243475487776945e-06, "loss": 0.4126, "step": 5928 }, { "epoch": 0.37138069810050267, "grad_norm": 0.7727012484048016, "learning_rate": 7.242568888841104e-06, "loss": 0.3942, "step": 5929 }, { "epoch": 0.371443336099845, "grad_norm": 0.7435566419437892, "learning_rate": 7.24166219760071e-06, "loss": 0.394, "step": 5930 }, { "epoch": 0.3715059740991873, "grad_norm": 0.8146411481506421, "learning_rate": 7.2407554140930834e-06, "loss": 0.4013, "step": 5931 }, { "epoch": 0.3715686120985296, "grad_norm": 0.7518512796368698, "learning_rate": 7.239848538355549e-06, "loss": 0.4827, "step": 5932 }, { "epoch": 0.3716312500978719, "grad_norm": 0.66645832751465, "learning_rate": 7.2389415704254325e-06, "loss": 0.4838, "step": 5933 }, { "epoch": 0.37169388809721415, "grad_norm": 0.8231231242399363, "learning_rate": 7.238034510340068e-06, "loss": 0.4136, "step": 5934 }, { "epoch": 0.37175652609655646, "grad_norm": 0.7837267627297182, "learning_rate": 7.237127358136784e-06, "loss": 0.393, "step": 5935 }, { "epoch": 0.37181916409589877, "grad_norm": 0.8284244008144561, "learning_rate": 7.236220113852926e-06, "loss": 0.4107, "step": 5936 }, { "epoch": 0.3718818020952411, "grad_norm": 0.8960754602901276, "learning_rate": 7.235312777525829e-06, "loss": 0.4461, "step": 5937 }, { "epoch": 0.3719444400945834, "grad_norm": 0.7196738019444244, "learning_rate": 7.234405349192847e-06, "loss": 0.4832, "step": 5938 }, { "epoch": 0.3720070780939257, "grad_norm": 0.947720121256737, "learning_rate": 7.233497828891324e-06, "loss": 0.4372, "step": 5939 }, { "epoch": 0.372069716093268, "grad_norm": 0.8272155161589095, "learning_rate": 7.232590216658617e-06, "loss": 0.3737, "step": 5940 }, { "epoch": 0.3721323540926103, "grad_norm": 0.7948729943203034, "learning_rate": 7.231682512532081e-06, "loss": 0.389, "step": 5941 }, { "epoch": 0.37219499209195256, "grad_norm": 0.7219801482895765, "learning_rate": 7.230774716549079e-06, "loss": 0.3745, "step": 5942 }, { "epoch": 0.37225763009129487, "grad_norm": 0.8597338524172178, "learning_rate": 7.229866828746977e-06, "loss": 0.4547, "step": 5943 }, { "epoch": 0.3723202680906372, "grad_norm": 0.8713738015157348, "learning_rate": 7.22895884916314e-06, "loss": 0.4272, "step": 5944 }, { "epoch": 0.3723829060899795, "grad_norm": 0.7753103696600225, "learning_rate": 7.228050777834945e-06, "loss": 0.3684, "step": 5945 }, { "epoch": 0.3724455440893218, "grad_norm": 0.8176179768446045, "learning_rate": 7.227142614799765e-06, "loss": 0.4281, "step": 5946 }, { "epoch": 0.3725081820886641, "grad_norm": 0.7762795415319533, "learning_rate": 7.226234360094983e-06, "loss": 0.4073, "step": 5947 }, { "epoch": 0.3725708200880064, "grad_norm": 0.853264080194063, "learning_rate": 7.2253260137579805e-06, "loss": 0.4269, "step": 5948 }, { "epoch": 0.3726334580873487, "grad_norm": 0.8328935710135116, "learning_rate": 7.224417575826147e-06, "loss": 0.4074, "step": 5949 }, { "epoch": 0.37269609608669096, "grad_norm": 0.8589495442026321, "learning_rate": 7.223509046336872e-06, "loss": 0.4854, "step": 5950 }, { "epoch": 0.37275873408603327, "grad_norm": 0.7819843938545763, "learning_rate": 7.222600425327553e-06, "loss": 0.4095, "step": 5951 }, { "epoch": 0.3728213720853756, "grad_norm": 0.8609183123759093, "learning_rate": 7.221691712835588e-06, "loss": 0.409, "step": 5952 }, { "epoch": 0.3728840100847179, "grad_norm": 0.8704140615612768, "learning_rate": 7.220782908898379e-06, "loss": 0.4503, "step": 5953 }, { "epoch": 0.3729466480840602, "grad_norm": 0.8156248217896908, "learning_rate": 7.219874013553335e-06, "loss": 0.3938, "step": 5954 }, { "epoch": 0.3730092860834025, "grad_norm": 0.8840268382419364, "learning_rate": 7.218965026837863e-06, "loss": 0.4638, "step": 5955 }, { "epoch": 0.3730719240827448, "grad_norm": 0.7638120258682345, "learning_rate": 7.218055948789379e-06, "loss": 0.4215, "step": 5956 }, { "epoch": 0.3731345620820871, "grad_norm": 0.810301367076806, "learning_rate": 7.2171467794453e-06, "loss": 0.3892, "step": 5957 }, { "epoch": 0.3731972000814294, "grad_norm": 0.8050189611253956, "learning_rate": 7.216237518843049e-06, "loss": 0.4424, "step": 5958 }, { "epoch": 0.3732598380807717, "grad_norm": 0.7672124704992737, "learning_rate": 7.2153281670200505e-06, "loss": 0.4002, "step": 5959 }, { "epoch": 0.373322476080114, "grad_norm": 0.9374977004603784, "learning_rate": 7.214418724013732e-06, "loss": 0.4358, "step": 5960 }, { "epoch": 0.3733851140794563, "grad_norm": 0.7422521219893502, "learning_rate": 7.213509189861527e-06, "loss": 0.389, "step": 5961 }, { "epoch": 0.3734477520787986, "grad_norm": 0.8584309409135739, "learning_rate": 7.212599564600877e-06, "loss": 0.4312, "step": 5962 }, { "epoch": 0.3735103900781409, "grad_norm": 0.8567057792489023, "learning_rate": 7.2116898482692145e-06, "loss": 0.4265, "step": 5963 }, { "epoch": 0.3735730280774832, "grad_norm": 0.7766606978563286, "learning_rate": 7.210780040903988e-06, "loss": 0.3859, "step": 5964 }, { "epoch": 0.3736356660768255, "grad_norm": 0.8246403208046869, "learning_rate": 7.209870142542645e-06, "loss": 0.4215, "step": 5965 }, { "epoch": 0.37369830407616783, "grad_norm": 0.8653312184058856, "learning_rate": 7.208960153222637e-06, "loss": 0.4078, "step": 5966 }, { "epoch": 0.3737609420755101, "grad_norm": 0.8072252660618803, "learning_rate": 7.2080500729814205e-06, "loss": 0.4296, "step": 5967 }, { "epoch": 0.3738235800748524, "grad_norm": 0.790967828347879, "learning_rate": 7.207139901856453e-06, "loss": 0.416, "step": 5968 }, { "epoch": 0.3738862180741947, "grad_norm": 0.7770474160195014, "learning_rate": 7.206229639885195e-06, "loss": 0.3468, "step": 5969 }, { "epoch": 0.373948856073537, "grad_norm": 0.8783666762491366, "learning_rate": 7.2053192871051156e-06, "loss": 0.421, "step": 5970 }, { "epoch": 0.3740114940728793, "grad_norm": 0.7500110549377933, "learning_rate": 7.204408843553688e-06, "loss": 0.3776, "step": 5971 }, { "epoch": 0.3740741320722216, "grad_norm": 0.8362908942539149, "learning_rate": 7.203498309268381e-06, "loss": 0.401, "step": 5972 }, { "epoch": 0.37413677007156393, "grad_norm": 0.7720378421086996, "learning_rate": 7.202587684286675e-06, "loss": 0.378, "step": 5973 }, { "epoch": 0.37419940807090624, "grad_norm": 0.7795369900634718, "learning_rate": 7.201676968646049e-06, "loss": 0.3943, "step": 5974 }, { "epoch": 0.3742620460702485, "grad_norm": 0.840661895920009, "learning_rate": 7.200766162383994e-06, "loss": 0.4833, "step": 5975 }, { "epoch": 0.3743246840695908, "grad_norm": 0.8623420522385441, "learning_rate": 7.199855265537994e-06, "loss": 0.4229, "step": 5976 }, { "epoch": 0.3743873220689331, "grad_norm": 0.7619395228810419, "learning_rate": 7.198944278145542e-06, "loss": 0.3791, "step": 5977 }, { "epoch": 0.3744499600682754, "grad_norm": 0.8291581368871479, "learning_rate": 7.198033200244136e-06, "loss": 0.425, "step": 5978 }, { "epoch": 0.3745125980676177, "grad_norm": 0.8407653629188556, "learning_rate": 7.197122031871275e-06, "loss": 0.4389, "step": 5979 }, { "epoch": 0.37457523606696, "grad_norm": 0.8003554860253593, "learning_rate": 7.196210773064465e-06, "loss": 0.3946, "step": 5980 }, { "epoch": 0.37463787406630233, "grad_norm": 0.7716708086916421, "learning_rate": 7.195299423861212e-06, "loss": 0.3947, "step": 5981 }, { "epoch": 0.37470051206564464, "grad_norm": 0.8014232892123095, "learning_rate": 7.194387984299025e-06, "loss": 0.4068, "step": 5982 }, { "epoch": 0.37476315006498695, "grad_norm": 0.7953109508165411, "learning_rate": 7.193476454415422e-06, "loss": 0.4037, "step": 5983 }, { "epoch": 0.3748257880643292, "grad_norm": 0.8450707997875546, "learning_rate": 7.192564834247921e-06, "loss": 0.4103, "step": 5984 }, { "epoch": 0.3748884260636715, "grad_norm": 0.7667314863367545, "learning_rate": 7.191653123834046e-06, "loss": 0.4196, "step": 5985 }, { "epoch": 0.3749510640630138, "grad_norm": 0.827551687652118, "learning_rate": 7.19074132321132e-06, "loss": 0.4371, "step": 5986 }, { "epoch": 0.3750137020623561, "grad_norm": 0.8600924769392306, "learning_rate": 7.189829432417275e-06, "loss": 0.4342, "step": 5987 }, { "epoch": 0.37507634006169843, "grad_norm": 0.8405123451978119, "learning_rate": 7.188917451489443e-06, "loss": 0.411, "step": 5988 }, { "epoch": 0.37513897806104074, "grad_norm": 0.8024844057152233, "learning_rate": 7.188005380465365e-06, "loss": 0.3909, "step": 5989 }, { "epoch": 0.37520161606038305, "grad_norm": 0.8132854521487614, "learning_rate": 7.187093219382577e-06, "loss": 0.3606, "step": 5990 }, { "epoch": 0.37526425405972536, "grad_norm": 0.8163970649569208, "learning_rate": 7.186180968278628e-06, "loss": 0.4015, "step": 5991 }, { "epoch": 0.3753268920590676, "grad_norm": 0.8238552855729523, "learning_rate": 7.185268627191062e-06, "loss": 0.3767, "step": 5992 }, { "epoch": 0.3753895300584099, "grad_norm": 0.9019066648495111, "learning_rate": 7.184356196157436e-06, "loss": 0.4374, "step": 5993 }, { "epoch": 0.3754521680577522, "grad_norm": 0.8273080552809318, "learning_rate": 7.183443675215304e-06, "loss": 0.4264, "step": 5994 }, { "epoch": 0.37551480605709453, "grad_norm": 0.8205476478094733, "learning_rate": 7.182531064402223e-06, "loss": 0.4394, "step": 5995 }, { "epoch": 0.37557744405643684, "grad_norm": 0.8162142829424549, "learning_rate": 7.181618363755759e-06, "loss": 0.4364, "step": 5996 }, { "epoch": 0.37564008205577915, "grad_norm": 0.8572402509933548, "learning_rate": 7.180705573313479e-06, "loss": 0.421, "step": 5997 }, { "epoch": 0.37570272005512145, "grad_norm": 0.8480598814961163, "learning_rate": 7.179792693112953e-06, "loss": 0.4183, "step": 5998 }, { "epoch": 0.37576535805446376, "grad_norm": 0.6516216564562132, "learning_rate": 7.178879723191754e-06, "loss": 0.4646, "step": 5999 }, { "epoch": 0.37582799605380607, "grad_norm": 0.8533128125278401, "learning_rate": 7.177966663587461e-06, "loss": 0.4271, "step": 6000 }, { "epoch": 0.3758906340531483, "grad_norm": 0.7500389432066854, "learning_rate": 7.177053514337657e-06, "loss": 0.3945, "step": 6001 }, { "epoch": 0.37595327205249063, "grad_norm": 0.6790989836247983, "learning_rate": 7.176140275479926e-06, "loss": 0.4564, "step": 6002 }, { "epoch": 0.37601591005183294, "grad_norm": 0.7913516350851612, "learning_rate": 7.175226947051856e-06, "loss": 0.395, "step": 6003 }, { "epoch": 0.37607854805117524, "grad_norm": 0.6901631625392178, "learning_rate": 7.174313529091044e-06, "loss": 0.4873, "step": 6004 }, { "epoch": 0.37614118605051755, "grad_norm": 0.8415366647191069, "learning_rate": 7.173400021635082e-06, "loss": 0.4249, "step": 6005 }, { "epoch": 0.37620382404985986, "grad_norm": 0.7691009992300232, "learning_rate": 7.172486424721571e-06, "loss": 0.434, "step": 6006 }, { "epoch": 0.37626646204920217, "grad_norm": 0.7665853876412939, "learning_rate": 7.1715727383881165e-06, "loss": 0.3958, "step": 6007 }, { "epoch": 0.3763291000485445, "grad_norm": 0.79566184586161, "learning_rate": 7.170658962672323e-06, "loss": 0.4003, "step": 6008 }, { "epoch": 0.3763917380478867, "grad_norm": 0.621799697887271, "learning_rate": 7.1697450976118065e-06, "loss": 0.5002, "step": 6009 }, { "epoch": 0.37645437604722903, "grad_norm": 0.8005438642345978, "learning_rate": 7.1688311432441774e-06, "loss": 0.3872, "step": 6010 }, { "epoch": 0.37651701404657134, "grad_norm": 0.8353808793679727, "learning_rate": 7.1679170996070565e-06, "loss": 0.387, "step": 6011 }, { "epoch": 0.37657965204591365, "grad_norm": 0.8165017290074329, "learning_rate": 7.167002966738065e-06, "loss": 0.4217, "step": 6012 }, { "epoch": 0.37664229004525596, "grad_norm": 0.824631185079661, "learning_rate": 7.166088744674829e-06, "loss": 0.4276, "step": 6013 }, { "epoch": 0.37670492804459826, "grad_norm": 0.7953273581169849, "learning_rate": 7.16517443345498e-06, "loss": 0.3902, "step": 6014 }, { "epoch": 0.37676756604394057, "grad_norm": 0.7513046869493039, "learning_rate": 7.164260033116147e-06, "loss": 0.3664, "step": 6015 }, { "epoch": 0.3768302040432829, "grad_norm": 0.7248581425946624, "learning_rate": 7.16334554369597e-06, "loss": 0.3975, "step": 6016 }, { "epoch": 0.37689284204262513, "grad_norm": 0.662733540418089, "learning_rate": 7.16243096523209e-06, "loss": 0.4917, "step": 6017 }, { "epoch": 0.37695548004196744, "grad_norm": 0.7407243690856311, "learning_rate": 7.16151629776215e-06, "loss": 0.4148, "step": 6018 }, { "epoch": 0.37701811804130975, "grad_norm": 0.8894366151147665, "learning_rate": 7.160601541323797e-06, "loss": 0.471, "step": 6019 }, { "epoch": 0.37708075604065205, "grad_norm": 0.783531186829769, "learning_rate": 7.159686695954684e-06, "loss": 0.3892, "step": 6020 }, { "epoch": 0.37714339403999436, "grad_norm": 0.7920167042021498, "learning_rate": 7.158771761692464e-06, "loss": 0.3526, "step": 6021 }, { "epoch": 0.37720603203933667, "grad_norm": 0.804753911667462, "learning_rate": 7.157856738574801e-06, "loss": 0.427, "step": 6022 }, { "epoch": 0.377268670038679, "grad_norm": 0.8439319425716394, "learning_rate": 7.156941626639353e-06, "loss": 0.4189, "step": 6023 }, { "epoch": 0.3773313080380213, "grad_norm": 0.8362083394373203, "learning_rate": 7.156026425923785e-06, "loss": 0.4245, "step": 6024 }, { "epoch": 0.3773939460373636, "grad_norm": 0.8507503982336692, "learning_rate": 7.155111136465771e-06, "loss": 0.4199, "step": 6025 }, { "epoch": 0.37745658403670584, "grad_norm": 0.8983512662788217, "learning_rate": 7.154195758302983e-06, "loss": 0.4212, "step": 6026 }, { "epoch": 0.37751922203604815, "grad_norm": 0.8549694407339172, "learning_rate": 7.153280291473097e-06, "loss": 0.4204, "step": 6027 }, { "epoch": 0.37758186003539046, "grad_norm": 0.7679684503858496, "learning_rate": 7.152364736013796e-06, "loss": 0.392, "step": 6028 }, { "epoch": 0.37764449803473277, "grad_norm": 0.8124547118744072, "learning_rate": 7.15144909196276e-06, "loss": 0.3691, "step": 6029 }, { "epoch": 0.3777071360340751, "grad_norm": 0.8069992411561813, "learning_rate": 7.150533359357682e-06, "loss": 0.3921, "step": 6030 }, { "epoch": 0.3777697740334174, "grad_norm": 0.8439876763190461, "learning_rate": 7.149617538236252e-06, "loss": 0.3977, "step": 6031 }, { "epoch": 0.3778324120327597, "grad_norm": 0.7975277540331157, "learning_rate": 7.148701628636165e-06, "loss": 0.4157, "step": 6032 }, { "epoch": 0.377895050032102, "grad_norm": 0.8495659059748366, "learning_rate": 7.14778563059512e-06, "loss": 0.4446, "step": 6033 }, { "epoch": 0.37795768803144425, "grad_norm": 0.7858396658114315, "learning_rate": 7.146869544150818e-06, "loss": 0.4277, "step": 6034 }, { "epoch": 0.37802032603078656, "grad_norm": 0.863810610363887, "learning_rate": 7.14595336934097e-06, "loss": 0.4377, "step": 6035 }, { "epoch": 0.37808296403012887, "grad_norm": 0.7975924003095732, "learning_rate": 7.145037106203282e-06, "loss": 0.4343, "step": 6036 }, { "epoch": 0.3781456020294712, "grad_norm": 0.8913888278656966, "learning_rate": 7.144120754775468e-06, "loss": 0.4134, "step": 6037 }, { "epoch": 0.3782082400288135, "grad_norm": 0.8011774048866088, "learning_rate": 7.143204315095245e-06, "loss": 0.3431, "step": 6038 }, { "epoch": 0.3782708780281558, "grad_norm": 0.8149423804321617, "learning_rate": 7.142287787200336e-06, "loss": 0.4339, "step": 6039 }, { "epoch": 0.3783335160274981, "grad_norm": 0.8450040590287069, "learning_rate": 7.141371171128464e-06, "loss": 0.4449, "step": 6040 }, { "epoch": 0.3783961540268404, "grad_norm": 0.8524737030850843, "learning_rate": 7.140454466917357e-06, "loss": 0.4409, "step": 6041 }, { "epoch": 0.3784587920261827, "grad_norm": 0.8800384138308949, "learning_rate": 7.139537674604746e-06, "loss": 0.4431, "step": 6042 }, { "epoch": 0.37852143002552496, "grad_norm": 0.8240992346393788, "learning_rate": 7.138620794228368e-06, "loss": 0.386, "step": 6043 }, { "epoch": 0.37858406802486727, "grad_norm": 0.7650402635696368, "learning_rate": 7.137703825825961e-06, "loss": 0.3689, "step": 6044 }, { "epoch": 0.3786467060242096, "grad_norm": 0.8390782982950227, "learning_rate": 7.136786769435269e-06, "loss": 0.3998, "step": 6045 }, { "epoch": 0.3787093440235519, "grad_norm": 0.826284393384444, "learning_rate": 7.135869625094035e-06, "loss": 0.4163, "step": 6046 }, { "epoch": 0.3787719820228942, "grad_norm": 0.8056540492792158, "learning_rate": 7.1349523928400114e-06, "loss": 0.4263, "step": 6047 }, { "epoch": 0.3788346200222365, "grad_norm": 0.9206344473842335, "learning_rate": 7.134035072710951e-06, "loss": 0.4056, "step": 6048 }, { "epoch": 0.3788972580215788, "grad_norm": 0.7090988845078441, "learning_rate": 7.133117664744611e-06, "loss": 0.4744, "step": 6049 }, { "epoch": 0.3789598960209211, "grad_norm": 0.8466200679897938, "learning_rate": 7.132200168978752e-06, "loss": 0.4266, "step": 6050 }, { "epoch": 0.37902253402026337, "grad_norm": 0.9011767089468742, "learning_rate": 7.1312825854511395e-06, "loss": 0.4004, "step": 6051 }, { "epoch": 0.3790851720196057, "grad_norm": 0.8678158051692165, "learning_rate": 7.130364914199538e-06, "loss": 0.4356, "step": 6052 }, { "epoch": 0.379147810018948, "grad_norm": 0.8956379109553363, "learning_rate": 7.129447155261723e-06, "loss": 0.4001, "step": 6053 }, { "epoch": 0.3792104480182903, "grad_norm": 0.8202795961050825, "learning_rate": 7.128529308675467e-06, "loss": 0.4318, "step": 6054 }, { "epoch": 0.3792730860176326, "grad_norm": 0.8880094846561293, "learning_rate": 7.127611374478549e-06, "loss": 0.4193, "step": 6055 }, { "epoch": 0.3793357240169749, "grad_norm": 0.810752501470387, "learning_rate": 7.126693352708753e-06, "loss": 0.3901, "step": 6056 }, { "epoch": 0.3793983620163172, "grad_norm": 0.6655308593998541, "learning_rate": 7.125775243403863e-06, "loss": 0.4981, "step": 6057 }, { "epoch": 0.3794610000156595, "grad_norm": 0.7972077241264358, "learning_rate": 7.124857046601671e-06, "loss": 0.3944, "step": 6058 }, { "epoch": 0.3795236380150018, "grad_norm": 0.8059590489260231, "learning_rate": 7.123938762339967e-06, "loss": 0.3682, "step": 6059 }, { "epoch": 0.3795862760143441, "grad_norm": 0.816379415383212, "learning_rate": 7.1230203906565495e-06, "loss": 0.4445, "step": 6060 }, { "epoch": 0.3796489140136864, "grad_norm": 0.9455543711074325, "learning_rate": 7.122101931589219e-06, "loss": 0.377, "step": 6061 }, { "epoch": 0.3797115520130287, "grad_norm": 0.7964918327990895, "learning_rate": 7.1211833851757795e-06, "loss": 0.4088, "step": 6062 }, { "epoch": 0.379774190012371, "grad_norm": 0.8948309114763255, "learning_rate": 7.120264751454037e-06, "loss": 0.4487, "step": 6063 }, { "epoch": 0.3798368280117133, "grad_norm": 0.8628859167454173, "learning_rate": 7.119346030461806e-06, "loss": 0.4241, "step": 6064 }, { "epoch": 0.3798994660110556, "grad_norm": 0.8692288096954564, "learning_rate": 7.118427222236897e-06, "loss": 0.3919, "step": 6065 }, { "epoch": 0.3799621040103979, "grad_norm": 0.8324405352079225, "learning_rate": 7.117508326817131e-06, "loss": 0.409, "step": 6066 }, { "epoch": 0.38002474200974024, "grad_norm": 0.867942230473971, "learning_rate": 7.116589344240329e-06, "loss": 0.3967, "step": 6067 }, { "epoch": 0.3800873800090825, "grad_norm": 0.8234615891186724, "learning_rate": 7.115670274544316e-06, "loss": 0.3778, "step": 6068 }, { "epoch": 0.3801500180084248, "grad_norm": 0.7341679970163366, "learning_rate": 7.114751117766924e-06, "loss": 0.4168, "step": 6069 }, { "epoch": 0.3802126560077671, "grad_norm": 0.7862873491237222, "learning_rate": 7.113831873945981e-06, "loss": 0.4018, "step": 6070 }, { "epoch": 0.3802752940071094, "grad_norm": 0.903022434882879, "learning_rate": 7.112912543119328e-06, "loss": 0.4129, "step": 6071 }, { "epoch": 0.3803379320064517, "grad_norm": 0.8840631762578425, "learning_rate": 7.111993125324801e-06, "loss": 0.3941, "step": 6072 }, { "epoch": 0.380400570005794, "grad_norm": 0.8234099102447939, "learning_rate": 7.111073620600245e-06, "loss": 0.3821, "step": 6073 }, { "epoch": 0.38046320800513633, "grad_norm": 0.8519210462753598, "learning_rate": 7.110154028983508e-06, "loss": 0.4047, "step": 6074 }, { "epoch": 0.38052584600447864, "grad_norm": 0.7818051967010954, "learning_rate": 7.109234350512439e-06, "loss": 0.3973, "step": 6075 }, { "epoch": 0.3805884840038209, "grad_norm": 0.7817464632214571, "learning_rate": 7.1083145852248915e-06, "loss": 0.4937, "step": 6076 }, { "epoch": 0.3806511220031632, "grad_norm": 0.8907566173857909, "learning_rate": 7.107394733158726e-06, "loss": 0.4422, "step": 6077 }, { "epoch": 0.3807137600025055, "grad_norm": 0.6889556536234162, "learning_rate": 7.106474794351801e-06, "loss": 0.464, "step": 6078 }, { "epoch": 0.3807763980018478, "grad_norm": 0.8877523582237647, "learning_rate": 7.1055547688419825e-06, "loss": 0.4647, "step": 6079 }, { "epoch": 0.3808390360011901, "grad_norm": 0.8343597020353921, "learning_rate": 7.10463465666714e-06, "loss": 0.4223, "step": 6080 }, { "epoch": 0.38090167400053243, "grad_norm": 0.6776265749064624, "learning_rate": 7.103714457865143e-06, "loss": 0.4922, "step": 6081 }, { "epoch": 0.38096431199987474, "grad_norm": 0.8087224227186447, "learning_rate": 7.1027941724738705e-06, "loss": 0.3582, "step": 6082 }, { "epoch": 0.38102694999921705, "grad_norm": 0.8662593380042622, "learning_rate": 7.1018738005311985e-06, "loss": 0.4606, "step": 6083 }, { "epoch": 0.38108958799855935, "grad_norm": 0.843764945010773, "learning_rate": 7.10095334207501e-06, "loss": 0.4279, "step": 6084 }, { "epoch": 0.3811522259979016, "grad_norm": 0.7516773743128801, "learning_rate": 7.100032797143192e-06, "loss": 0.3889, "step": 6085 }, { "epoch": 0.3812148639972439, "grad_norm": 0.7823528932997977, "learning_rate": 7.099112165773635e-06, "loss": 0.3795, "step": 6086 }, { "epoch": 0.3812775019965862, "grad_norm": 0.8065662231496112, "learning_rate": 7.098191448004231e-06, "loss": 0.3735, "step": 6087 }, { "epoch": 0.38134013999592853, "grad_norm": 0.8633143535134019, "learning_rate": 7.097270643872878e-06, "loss": 0.4215, "step": 6088 }, { "epoch": 0.38140277799527084, "grad_norm": 0.7991308122247683, "learning_rate": 7.096349753417475e-06, "loss": 0.4102, "step": 6089 }, { "epoch": 0.38146541599461314, "grad_norm": 0.8322083999885583, "learning_rate": 7.095428776675929e-06, "loss": 0.4435, "step": 6090 }, { "epoch": 0.38152805399395545, "grad_norm": 0.7962328746914612, "learning_rate": 7.094507713686145e-06, "loss": 0.3982, "step": 6091 }, { "epoch": 0.38159069199329776, "grad_norm": 0.8267405626242065, "learning_rate": 7.093586564486034e-06, "loss": 0.4371, "step": 6092 }, { "epoch": 0.38165332999264, "grad_norm": 0.8415291011420623, "learning_rate": 7.0926653291135115e-06, "loss": 0.4557, "step": 6093 }, { "epoch": 0.3817159679919823, "grad_norm": 0.8074275585494274, "learning_rate": 7.091744007606496e-06, "loss": 0.4136, "step": 6094 }, { "epoch": 0.3817786059913246, "grad_norm": 0.8892885651399276, "learning_rate": 7.090822600002909e-06, "loss": 0.4131, "step": 6095 }, { "epoch": 0.38184124399066693, "grad_norm": 0.7711199105216994, "learning_rate": 7.089901106340677e-06, "loss": 0.3948, "step": 6096 }, { "epoch": 0.38190388199000924, "grad_norm": 0.8471987244294158, "learning_rate": 7.088979526657725e-06, "loss": 0.4503, "step": 6097 }, { "epoch": 0.38196651998935155, "grad_norm": 0.7956049177348885, "learning_rate": 7.088057860991988e-06, "loss": 0.4792, "step": 6098 }, { "epoch": 0.38202915798869386, "grad_norm": 0.8563233654901828, "learning_rate": 7.0871361093814036e-06, "loss": 0.4261, "step": 6099 }, { "epoch": 0.38209179598803616, "grad_norm": 0.8225772620302215, "learning_rate": 7.086214271863909e-06, "loss": 0.3863, "step": 6100 }, { "epoch": 0.3821544339873784, "grad_norm": 0.9061651649292842, "learning_rate": 7.085292348477448e-06, "loss": 0.4553, "step": 6101 }, { "epoch": 0.3822170719867207, "grad_norm": 0.8549339113020955, "learning_rate": 7.084370339259966e-06, "loss": 0.4255, "step": 6102 }, { "epoch": 0.38227970998606303, "grad_norm": 0.8329869533002395, "learning_rate": 7.0834482442494155e-06, "loss": 0.4051, "step": 6103 }, { "epoch": 0.38234234798540534, "grad_norm": 0.8139988035058909, "learning_rate": 7.082526063483748e-06, "loss": 0.4111, "step": 6104 }, { "epoch": 0.38240498598474765, "grad_norm": 0.8958073937193602, "learning_rate": 7.081603797000921e-06, "loss": 0.4393, "step": 6105 }, { "epoch": 0.38246762398408995, "grad_norm": 0.8617039177681631, "learning_rate": 7.080681444838895e-06, "loss": 0.4247, "step": 6106 }, { "epoch": 0.38253026198343226, "grad_norm": 0.8565327629378452, "learning_rate": 7.0797590070356334e-06, "loss": 0.415, "step": 6107 }, { "epoch": 0.38259289998277457, "grad_norm": 0.8206057971242239, "learning_rate": 7.078836483629107e-06, "loss": 0.3918, "step": 6108 }, { "epoch": 0.3826555379821169, "grad_norm": 0.8036698706371129, "learning_rate": 7.0779138746572855e-06, "loss": 0.4209, "step": 6109 }, { "epoch": 0.38271817598145913, "grad_norm": 0.8307803580587131, "learning_rate": 7.07699118015814e-06, "loss": 0.4382, "step": 6110 }, { "epoch": 0.38278081398080144, "grad_norm": 0.786656307109097, "learning_rate": 7.076068400169654e-06, "loss": 0.4331, "step": 6111 }, { "epoch": 0.38284345198014375, "grad_norm": 0.7957678689175984, "learning_rate": 7.075145534729808e-06, "loss": 0.4284, "step": 6112 }, { "epoch": 0.38290608997948605, "grad_norm": 0.823625052383805, "learning_rate": 7.074222583876586e-06, "loss": 0.3997, "step": 6113 }, { "epoch": 0.38296872797882836, "grad_norm": 0.8279795507655386, "learning_rate": 7.073299547647978e-06, "loss": 0.4213, "step": 6114 }, { "epoch": 0.38303136597817067, "grad_norm": 0.8696556029233287, "learning_rate": 7.072376426081975e-06, "loss": 0.4284, "step": 6115 }, { "epoch": 0.383094003977513, "grad_norm": 0.756378195930642, "learning_rate": 7.071453219216574e-06, "loss": 0.408, "step": 6116 }, { "epoch": 0.3831566419768553, "grad_norm": 0.8820947095354198, "learning_rate": 7.070529927089775e-06, "loss": 0.4269, "step": 6117 }, { "epoch": 0.38321927997619754, "grad_norm": 0.8795352633507372, "learning_rate": 7.069606549739578e-06, "loss": 0.4027, "step": 6118 }, { "epoch": 0.38328191797553984, "grad_norm": 0.8040210790714822, "learning_rate": 7.0686830872039926e-06, "loss": 0.4287, "step": 6119 }, { "epoch": 0.38334455597488215, "grad_norm": 0.8083023530909176, "learning_rate": 7.067759539521027e-06, "loss": 0.3863, "step": 6120 }, { "epoch": 0.38340719397422446, "grad_norm": 0.8146731471027779, "learning_rate": 7.066835906728694e-06, "loss": 0.4008, "step": 6121 }, { "epoch": 0.38346983197356677, "grad_norm": 0.7680538179510763, "learning_rate": 7.065912188865013e-06, "loss": 0.4203, "step": 6122 }, { "epoch": 0.3835324699729091, "grad_norm": 0.8828789169504319, "learning_rate": 7.064988385968001e-06, "loss": 0.4185, "step": 6123 }, { "epoch": 0.3835951079722514, "grad_norm": 0.6712466349729105, "learning_rate": 7.064064498075687e-06, "loss": 0.4772, "step": 6124 }, { "epoch": 0.3836577459715937, "grad_norm": 0.68733228881146, "learning_rate": 7.063140525226092e-06, "loss": 0.4507, "step": 6125 }, { "epoch": 0.38372038397093594, "grad_norm": 0.8094313406382148, "learning_rate": 7.062216467457252e-06, "loss": 0.4382, "step": 6126 }, { "epoch": 0.38378302197027825, "grad_norm": 0.8275936428364348, "learning_rate": 7.061292324807197e-06, "loss": 0.4166, "step": 6127 }, { "epoch": 0.38384565996962056, "grad_norm": 0.8204922246018035, "learning_rate": 7.06036809731397e-06, "loss": 0.4102, "step": 6128 }, { "epoch": 0.38390829796896286, "grad_norm": 0.914871574862147, "learning_rate": 7.0594437850156096e-06, "loss": 0.4324, "step": 6129 }, { "epoch": 0.38397093596830517, "grad_norm": 0.7886774891560075, "learning_rate": 7.058519387950158e-06, "loss": 0.3982, "step": 6130 }, { "epoch": 0.3840335739676475, "grad_norm": 0.6666009870063716, "learning_rate": 7.057594906155669e-06, "loss": 0.4912, "step": 6131 }, { "epoch": 0.3840962119669898, "grad_norm": 0.8047449025901291, "learning_rate": 7.056670339670191e-06, "loss": 0.4174, "step": 6132 }, { "epoch": 0.3841588499663321, "grad_norm": 0.8370873642364914, "learning_rate": 7.05574568853178e-06, "loss": 0.4138, "step": 6133 }, { "epoch": 0.3842214879656744, "grad_norm": 0.7738710054180674, "learning_rate": 7.054820952778494e-06, "loss": 0.4259, "step": 6134 }, { "epoch": 0.38428412596501665, "grad_norm": 0.9058332050643645, "learning_rate": 7.053896132448397e-06, "loss": 0.4107, "step": 6135 }, { "epoch": 0.38434676396435896, "grad_norm": 0.8203265181207906, "learning_rate": 7.0529712275795535e-06, "loss": 0.4287, "step": 6136 }, { "epoch": 0.38440940196370127, "grad_norm": 0.8220741627375615, "learning_rate": 7.052046238210034e-06, "loss": 0.3761, "step": 6137 }, { "epoch": 0.3844720399630436, "grad_norm": 0.818124139525692, "learning_rate": 7.051121164377911e-06, "loss": 0.4291, "step": 6138 }, { "epoch": 0.3845346779623859, "grad_norm": 0.8385456722420321, "learning_rate": 7.0501960061212595e-06, "loss": 0.3834, "step": 6139 }, { "epoch": 0.3845973159617282, "grad_norm": 0.8398561034804021, "learning_rate": 7.049270763478157e-06, "loss": 0.3944, "step": 6140 }, { "epoch": 0.3846599539610705, "grad_norm": 0.7402479612206025, "learning_rate": 7.048345436486694e-06, "loss": 0.5004, "step": 6141 }, { "epoch": 0.3847225919604128, "grad_norm": 0.81743265418052, "learning_rate": 7.047420025184951e-06, "loss": 0.445, "step": 6142 }, { "epoch": 0.38478522995975506, "grad_norm": 0.744576977889003, "learning_rate": 7.04649452961102e-06, "loss": 0.3947, "step": 6143 }, { "epoch": 0.38484786795909737, "grad_norm": 0.697630556748134, "learning_rate": 7.045568949802993e-06, "loss": 0.4513, "step": 6144 }, { "epoch": 0.3849105059584397, "grad_norm": 0.7706973240211059, "learning_rate": 7.0446432857989686e-06, "loss": 0.3779, "step": 6145 }, { "epoch": 0.384973143957782, "grad_norm": 0.7822950018232786, "learning_rate": 7.04371753763705e-06, "loss": 0.4409, "step": 6146 }, { "epoch": 0.3850357819571243, "grad_norm": 0.8052279125208087, "learning_rate": 7.042791705355335e-06, "loss": 0.4029, "step": 6147 }, { "epoch": 0.3850984199564666, "grad_norm": 0.8453720757953296, "learning_rate": 7.041865788991935e-06, "loss": 0.4252, "step": 6148 }, { "epoch": 0.3851610579558089, "grad_norm": 0.8980409516993794, "learning_rate": 7.04093978858496e-06, "loss": 0.4426, "step": 6149 }, { "epoch": 0.3852236959551512, "grad_norm": 0.8548432268392293, "learning_rate": 7.040013704172525e-06, "loss": 0.4703, "step": 6150 }, { "epoch": 0.3852863339544935, "grad_norm": 0.8346243577711209, "learning_rate": 7.039087535792747e-06, "loss": 0.3969, "step": 6151 }, { "epoch": 0.3853489719538358, "grad_norm": 0.8860690897156427, "learning_rate": 7.038161283483749e-06, "loss": 0.4143, "step": 6152 }, { "epoch": 0.3854116099531781, "grad_norm": 0.8022423626587407, "learning_rate": 7.037234947283651e-06, "loss": 0.4129, "step": 6153 }, { "epoch": 0.3854742479525204, "grad_norm": 0.8228900317146465, "learning_rate": 7.036308527230586e-06, "loss": 0.3714, "step": 6154 }, { "epoch": 0.3855368859518627, "grad_norm": 0.7681466136332357, "learning_rate": 7.035382023362684e-06, "loss": 0.3902, "step": 6155 }, { "epoch": 0.385599523951205, "grad_norm": 0.8460435553822424, "learning_rate": 7.034455435718082e-06, "loss": 0.393, "step": 6156 }, { "epoch": 0.3856621619505473, "grad_norm": 0.6687627688391062, "learning_rate": 7.0335287643349145e-06, "loss": 0.4969, "step": 6157 }, { "epoch": 0.3857247999498896, "grad_norm": 0.9608722976147317, "learning_rate": 7.032602009251326e-06, "loss": 0.4204, "step": 6158 }, { "epoch": 0.3857874379492319, "grad_norm": 0.7767507687843853, "learning_rate": 7.031675170505462e-06, "loss": 0.403, "step": 6159 }, { "epoch": 0.3858500759485742, "grad_norm": 0.8993542552838526, "learning_rate": 7.030748248135472e-06, "loss": 0.3935, "step": 6160 }, { "epoch": 0.3859127139479165, "grad_norm": 0.8353725635021992, "learning_rate": 7.029821242179504e-06, "loss": 0.3914, "step": 6161 }, { "epoch": 0.3859753519472588, "grad_norm": 0.8280702780361766, "learning_rate": 7.028894152675719e-06, "loss": 0.4084, "step": 6162 }, { "epoch": 0.3860379899466011, "grad_norm": 0.80787186896438, "learning_rate": 7.027966979662274e-06, "loss": 0.4401, "step": 6163 }, { "epoch": 0.3861006279459434, "grad_norm": 0.8651213322129924, "learning_rate": 7.027039723177332e-06, "loss": 0.4276, "step": 6164 }, { "epoch": 0.3861632659452857, "grad_norm": 0.7981814999362484, "learning_rate": 7.026112383259059e-06, "loss": 0.4328, "step": 6165 }, { "epoch": 0.386225903944628, "grad_norm": 0.8177546363711925, "learning_rate": 7.025184959945624e-06, "loss": 0.4158, "step": 6166 }, { "epoch": 0.38628854194397033, "grad_norm": 0.8050742772884729, "learning_rate": 7.024257453275198e-06, "loss": 0.4173, "step": 6167 }, { "epoch": 0.3863511799433126, "grad_norm": 0.7958635651178614, "learning_rate": 7.023329863285962e-06, "loss": 0.367, "step": 6168 }, { "epoch": 0.3864138179426549, "grad_norm": 0.9280203434439677, "learning_rate": 7.022402190016093e-06, "loss": 0.4824, "step": 6169 }, { "epoch": 0.3864764559419972, "grad_norm": 0.9563501837714657, "learning_rate": 7.021474433503774e-06, "loss": 0.477, "step": 6170 }, { "epoch": 0.3865390939413395, "grad_norm": 0.8391430497915747, "learning_rate": 7.020546593787192e-06, "loss": 0.4223, "step": 6171 }, { "epoch": 0.3866017319406818, "grad_norm": 0.800374053585278, "learning_rate": 7.019618670904538e-06, "loss": 0.4049, "step": 6172 }, { "epoch": 0.3866643699400241, "grad_norm": 0.7518359052188687, "learning_rate": 7.018690664894004e-06, "loss": 0.3728, "step": 6173 }, { "epoch": 0.38672700793936643, "grad_norm": 0.8295153872007655, "learning_rate": 7.017762575793786e-06, "loss": 0.4266, "step": 6174 }, { "epoch": 0.38678964593870874, "grad_norm": 0.8359729522811751, "learning_rate": 7.016834403642088e-06, "loss": 0.4233, "step": 6175 }, { "epoch": 0.38685228393805104, "grad_norm": 0.829842998837805, "learning_rate": 7.015906148477111e-06, "loss": 0.4415, "step": 6176 }, { "epoch": 0.3869149219373933, "grad_norm": 0.728342774037764, "learning_rate": 7.014977810337063e-06, "loss": 0.4031, "step": 6177 }, { "epoch": 0.3869775599367356, "grad_norm": 0.7660761987400988, "learning_rate": 7.014049389260154e-06, "loss": 0.3888, "step": 6178 }, { "epoch": 0.3870401979360779, "grad_norm": 0.8505024623864532, "learning_rate": 7.013120885284599e-06, "loss": 0.4607, "step": 6179 }, { "epoch": 0.3871028359354202, "grad_norm": 0.9060968345122337, "learning_rate": 7.012192298448613e-06, "loss": 0.444, "step": 6180 }, { "epoch": 0.3871654739347625, "grad_norm": 0.6741519834689613, "learning_rate": 7.01126362879042e-06, "loss": 0.4889, "step": 6181 }, { "epoch": 0.38722811193410483, "grad_norm": 0.8847204493527717, "learning_rate": 7.010334876348243e-06, "loss": 0.4154, "step": 6182 }, { "epoch": 0.38729074993344714, "grad_norm": 0.8201690555468174, "learning_rate": 7.009406041160308e-06, "loss": 0.3938, "step": 6183 }, { "epoch": 0.38735338793278945, "grad_norm": 0.8446472067726634, "learning_rate": 7.008477123264849e-06, "loss": 0.433, "step": 6184 }, { "epoch": 0.3874160259321317, "grad_norm": 0.8280300181115833, "learning_rate": 7.0075481227000966e-06, "loss": 0.4472, "step": 6185 }, { "epoch": 0.387478663931474, "grad_norm": 0.7929782981711644, "learning_rate": 7.006619039504292e-06, "loss": 0.4077, "step": 6186 }, { "epoch": 0.3875413019308163, "grad_norm": 0.7805953791143646, "learning_rate": 7.005689873715676e-06, "loss": 0.4157, "step": 6187 }, { "epoch": 0.3876039399301586, "grad_norm": 0.8598275831396661, "learning_rate": 7.004760625372492e-06, "loss": 0.4377, "step": 6188 }, { "epoch": 0.38766657792950093, "grad_norm": 0.8062958300132573, "learning_rate": 7.0038312945129885e-06, "loss": 0.4018, "step": 6189 }, { "epoch": 0.38772921592884324, "grad_norm": 0.8258440223864086, "learning_rate": 7.002901881175417e-06, "loss": 0.4357, "step": 6190 }, { "epoch": 0.38779185392818555, "grad_norm": 0.8109436589767919, "learning_rate": 7.001972385398031e-06, "loss": 0.395, "step": 6191 }, { "epoch": 0.38785449192752786, "grad_norm": 0.7751756437660384, "learning_rate": 7.001042807219093e-06, "loss": 0.3738, "step": 6192 }, { "epoch": 0.38791712992687016, "grad_norm": 0.7911603467940191, "learning_rate": 7.00011314667686e-06, "loss": 0.39, "step": 6193 }, { "epoch": 0.3879797679262124, "grad_norm": 0.8242666196084655, "learning_rate": 6.999183403809597e-06, "loss": 0.3786, "step": 6194 }, { "epoch": 0.3880424059255547, "grad_norm": 0.8202294883203314, "learning_rate": 6.998253578655578e-06, "loss": 0.4363, "step": 6195 }, { "epoch": 0.38810504392489703, "grad_norm": 0.68041982945756, "learning_rate": 6.997323671253067e-06, "loss": 0.4731, "step": 6196 }, { "epoch": 0.38816768192423934, "grad_norm": 0.9025962885443595, "learning_rate": 6.996393681640346e-06, "loss": 0.4345, "step": 6197 }, { "epoch": 0.38823031992358165, "grad_norm": 0.7026575801946802, "learning_rate": 6.9954636098556886e-06, "loss": 0.3614, "step": 6198 }, { "epoch": 0.38829295792292395, "grad_norm": 0.7795281538896568, "learning_rate": 6.994533455937378e-06, "loss": 0.4028, "step": 6199 }, { "epoch": 0.38835559592226626, "grad_norm": 0.8195641870178829, "learning_rate": 6.9936032199237015e-06, "loss": 0.4145, "step": 6200 }, { "epoch": 0.38841823392160857, "grad_norm": 0.8209605295960006, "learning_rate": 6.992672901852946e-06, "loss": 0.4588, "step": 6201 }, { "epoch": 0.3884808719209508, "grad_norm": 0.7683160910929471, "learning_rate": 6.991742501763405e-06, "loss": 0.3766, "step": 6202 }, { "epoch": 0.38854350992029313, "grad_norm": 0.8454600185663477, "learning_rate": 6.9908120196933716e-06, "loss": 0.4008, "step": 6203 }, { "epoch": 0.38860614791963544, "grad_norm": 0.8319243568497579, "learning_rate": 6.989881455681145e-06, "loss": 0.4146, "step": 6204 }, { "epoch": 0.38866878591897774, "grad_norm": 0.7966691451218918, "learning_rate": 6.98895080976503e-06, "loss": 0.3518, "step": 6205 }, { "epoch": 0.38873142391832005, "grad_norm": 0.8163031467449161, "learning_rate": 6.988020081983331e-06, "loss": 0.3931, "step": 6206 }, { "epoch": 0.38879406191766236, "grad_norm": 0.793206962290555, "learning_rate": 6.987089272374357e-06, "loss": 0.4141, "step": 6207 }, { "epoch": 0.38885669991700467, "grad_norm": 0.7967672359928603, "learning_rate": 6.986158380976417e-06, "loss": 0.4023, "step": 6208 }, { "epoch": 0.388919337916347, "grad_norm": 0.7691215855398174, "learning_rate": 6.98522740782783e-06, "loss": 0.3843, "step": 6209 }, { "epoch": 0.3889819759156892, "grad_norm": 0.6748149288994163, "learning_rate": 6.984296352966916e-06, "loss": 0.4726, "step": 6210 }, { "epoch": 0.38904461391503153, "grad_norm": 0.917656407363133, "learning_rate": 6.983365216431995e-06, "loss": 0.4209, "step": 6211 }, { "epoch": 0.38910725191437384, "grad_norm": 0.8761516832216083, "learning_rate": 6.982433998261393e-06, "loss": 0.4492, "step": 6212 }, { "epoch": 0.38916988991371615, "grad_norm": 0.7841174853666925, "learning_rate": 6.98150269849344e-06, "loss": 0.4276, "step": 6213 }, { "epoch": 0.38923252791305846, "grad_norm": 0.792445421600834, "learning_rate": 6.980571317166469e-06, "loss": 0.395, "step": 6214 }, { "epoch": 0.38929516591240076, "grad_norm": 0.8129982319459191, "learning_rate": 6.979639854318815e-06, "loss": 0.4079, "step": 6215 }, { "epoch": 0.3893578039117431, "grad_norm": 0.7760939403744748, "learning_rate": 6.978708309988816e-06, "loss": 0.4412, "step": 6216 }, { "epoch": 0.3894204419110854, "grad_norm": 0.685485237757321, "learning_rate": 6.977776684214815e-06, "loss": 0.4921, "step": 6217 }, { "epoch": 0.3894830799104277, "grad_norm": 0.8040299540441401, "learning_rate": 6.976844977035161e-06, "loss": 0.4142, "step": 6218 }, { "epoch": 0.38954571790976994, "grad_norm": 0.8028324955885181, "learning_rate": 6.975913188488199e-06, "loss": 0.4007, "step": 6219 }, { "epoch": 0.38960835590911225, "grad_norm": 0.6733709496030699, "learning_rate": 6.974981318612285e-06, "loss": 0.465, "step": 6220 }, { "epoch": 0.38967099390845455, "grad_norm": 0.6725166604511087, "learning_rate": 6.974049367445772e-06, "loss": 0.4846, "step": 6221 }, { "epoch": 0.38973363190779686, "grad_norm": 0.8409732686886441, "learning_rate": 6.973117335027021e-06, "loss": 0.3663, "step": 6222 }, { "epoch": 0.38979626990713917, "grad_norm": 0.8978737774289759, "learning_rate": 6.972185221394394e-06, "loss": 0.4158, "step": 6223 }, { "epoch": 0.3898589079064815, "grad_norm": 0.8544033433022894, "learning_rate": 6.971253026586259e-06, "loss": 0.369, "step": 6224 }, { "epoch": 0.3899215459058238, "grad_norm": 0.7624093092995738, "learning_rate": 6.970320750640981e-06, "loss": 0.3749, "step": 6225 }, { "epoch": 0.3899841839051661, "grad_norm": 0.8216447555378344, "learning_rate": 6.969388393596937e-06, "loss": 0.4212, "step": 6226 }, { "epoch": 0.39004682190450835, "grad_norm": 0.8840790601311261, "learning_rate": 6.968455955492499e-06, "loss": 0.3849, "step": 6227 }, { "epoch": 0.39010945990385065, "grad_norm": 0.8239479727059336, "learning_rate": 6.96752343636605e-06, "loss": 0.4169, "step": 6228 }, { "epoch": 0.39017209790319296, "grad_norm": 0.8185490885071799, "learning_rate": 6.966590836255973e-06, "loss": 0.379, "step": 6229 }, { "epoch": 0.39023473590253527, "grad_norm": 0.8351929139637599, "learning_rate": 6.965658155200649e-06, "loss": 0.4419, "step": 6230 }, { "epoch": 0.3902973739018776, "grad_norm": 0.8302529713926861, "learning_rate": 6.964725393238472e-06, "loss": 0.4377, "step": 6231 }, { "epoch": 0.3903600119012199, "grad_norm": 0.8255675963778419, "learning_rate": 6.963792550407833e-06, "loss": 0.441, "step": 6232 }, { "epoch": 0.3904226499005622, "grad_norm": 0.8516348009485282, "learning_rate": 6.9628596267471285e-06, "loss": 0.4087, "step": 6233 }, { "epoch": 0.3904852878999045, "grad_norm": 0.8385894303553048, "learning_rate": 6.961926622294757e-06, "loss": 0.4507, "step": 6234 }, { "epoch": 0.39054792589924675, "grad_norm": 0.8190229689448724, "learning_rate": 6.960993537089121e-06, "loss": 0.3801, "step": 6235 }, { "epoch": 0.39061056389858906, "grad_norm": 0.8330272632321322, "learning_rate": 6.960060371168627e-06, "loss": 0.3941, "step": 6236 }, { "epoch": 0.39067320189793137, "grad_norm": 0.7062158096420558, "learning_rate": 6.959127124571686e-06, "loss": 0.3807, "step": 6237 }, { "epoch": 0.3907358398972737, "grad_norm": 0.7894032928933101, "learning_rate": 6.9581937973367074e-06, "loss": 0.4101, "step": 6238 }, { "epoch": 0.390798477896616, "grad_norm": 0.8459236580768467, "learning_rate": 6.95726038950211e-06, "loss": 0.427, "step": 6239 }, { "epoch": 0.3908611158959583, "grad_norm": 0.8634530879690346, "learning_rate": 6.956326901106309e-06, "loss": 0.3853, "step": 6240 }, { "epoch": 0.3909237538953006, "grad_norm": 0.8107272114882706, "learning_rate": 6.9553933321877325e-06, "loss": 0.4073, "step": 6241 }, { "epoch": 0.3909863918946429, "grad_norm": 0.7701886107564521, "learning_rate": 6.954459682784803e-06, "loss": 0.4079, "step": 6242 }, { "epoch": 0.3910490298939852, "grad_norm": 0.9043468302895145, "learning_rate": 6.953525952935949e-06, "loss": 0.4474, "step": 6243 }, { "epoch": 0.39111166789332746, "grad_norm": 0.8834605689517924, "learning_rate": 6.952592142679605e-06, "loss": 0.454, "step": 6244 }, { "epoch": 0.39117430589266977, "grad_norm": 0.8802085865646595, "learning_rate": 6.9516582520542055e-06, "loss": 0.468, "step": 6245 }, { "epoch": 0.3912369438920121, "grad_norm": 0.828391494054391, "learning_rate": 6.950724281098191e-06, "loss": 0.4212, "step": 6246 }, { "epoch": 0.3912995818913544, "grad_norm": 0.7784739758908243, "learning_rate": 6.949790229850002e-06, "loss": 0.4002, "step": 6247 }, { "epoch": 0.3913622198906967, "grad_norm": 0.8360607604258662, "learning_rate": 6.948856098348086e-06, "loss": 0.4441, "step": 6248 }, { "epoch": 0.391424857890039, "grad_norm": 0.7063344554148981, "learning_rate": 6.9479218866308905e-06, "loss": 0.4705, "step": 6249 }, { "epoch": 0.3914874958893813, "grad_norm": 0.8351601665884644, "learning_rate": 6.946987594736869e-06, "loss": 0.408, "step": 6250 }, { "epoch": 0.3915501338887236, "grad_norm": 0.8225845214498992, "learning_rate": 6.946053222704477e-06, "loss": 0.4434, "step": 6251 }, { "epoch": 0.39161277188806587, "grad_norm": 0.8375422099805985, "learning_rate": 6.945118770572172e-06, "loss": 0.4132, "step": 6252 }, { "epoch": 0.3916754098874082, "grad_norm": 0.8165865022341944, "learning_rate": 6.94418423837842e-06, "loss": 0.3886, "step": 6253 }, { "epoch": 0.3917380478867505, "grad_norm": 0.8229880719036343, "learning_rate": 6.943249626161681e-06, "loss": 0.4298, "step": 6254 }, { "epoch": 0.3918006858860928, "grad_norm": 0.8193245164742149, "learning_rate": 6.942314933960428e-06, "loss": 0.4382, "step": 6255 }, { "epoch": 0.3918633238854351, "grad_norm": 0.899105039205608, "learning_rate": 6.941380161813132e-06, "loss": 0.4344, "step": 6256 }, { "epoch": 0.3919259618847774, "grad_norm": 0.8561719388550229, "learning_rate": 6.940445309758268e-06, "loss": 0.4188, "step": 6257 }, { "epoch": 0.3919885998841197, "grad_norm": 0.8085016924731511, "learning_rate": 6.939510377834316e-06, "loss": 0.3997, "step": 6258 }, { "epoch": 0.392051237883462, "grad_norm": 0.8239753640132296, "learning_rate": 6.938575366079754e-06, "loss": 0.406, "step": 6259 }, { "epoch": 0.39211387588280433, "grad_norm": 0.835055642731441, "learning_rate": 6.9376402745330705e-06, "loss": 0.43, "step": 6260 }, { "epoch": 0.3921765138821466, "grad_norm": 0.8137353876346224, "learning_rate": 6.936705103232756e-06, "loss": 0.4296, "step": 6261 }, { "epoch": 0.3922391518814889, "grad_norm": 0.8364752332479508, "learning_rate": 6.9357698522173004e-06, "loss": 0.4289, "step": 6262 }, { "epoch": 0.3923017898808312, "grad_norm": 0.7719259676208826, "learning_rate": 6.934834521525195e-06, "loss": 0.3645, "step": 6263 }, { "epoch": 0.3923644278801735, "grad_norm": 0.831436714382028, "learning_rate": 6.933899111194943e-06, "loss": 0.4495, "step": 6264 }, { "epoch": 0.3924270658795158, "grad_norm": 0.8103971554551445, "learning_rate": 6.9329636212650455e-06, "loss": 0.3996, "step": 6265 }, { "epoch": 0.3924897038788581, "grad_norm": 0.8007845627653513, "learning_rate": 6.932028051774007e-06, "loss": 0.4151, "step": 6266 }, { "epoch": 0.39255234187820043, "grad_norm": 0.8013647174239807, "learning_rate": 6.931092402760335e-06, "loss": 0.4106, "step": 6267 }, { "epoch": 0.39261497987754274, "grad_norm": 0.8425587594328153, "learning_rate": 6.930156674262541e-06, "loss": 0.4205, "step": 6268 }, { "epoch": 0.392677617876885, "grad_norm": 0.7642327964599537, "learning_rate": 6.929220866319137e-06, "loss": 0.3906, "step": 6269 }, { "epoch": 0.3927402558762273, "grad_norm": 0.766845201065648, "learning_rate": 6.928284978968648e-06, "loss": 0.4014, "step": 6270 }, { "epoch": 0.3928028938755696, "grad_norm": 0.7728481022408388, "learning_rate": 6.927349012249591e-06, "loss": 0.3864, "step": 6271 }, { "epoch": 0.3928655318749119, "grad_norm": 0.7197859669037694, "learning_rate": 6.9264129662004886e-06, "loss": 0.4625, "step": 6272 }, { "epoch": 0.3929281698742542, "grad_norm": 0.8326686001844251, "learning_rate": 6.925476840859873e-06, "loss": 0.3997, "step": 6273 }, { "epoch": 0.3929908078735965, "grad_norm": 0.858597000997595, "learning_rate": 6.924540636266272e-06, "loss": 0.4112, "step": 6274 }, { "epoch": 0.39305344587293883, "grad_norm": 0.8692843268267555, "learning_rate": 6.9236043524582234e-06, "loss": 0.4278, "step": 6275 }, { "epoch": 0.39311608387228114, "grad_norm": 0.8359767232663443, "learning_rate": 6.92266798947426e-06, "loss": 0.4237, "step": 6276 }, { "epoch": 0.3931787218716234, "grad_norm": 0.8749015613486474, "learning_rate": 6.921731547352926e-06, "loss": 0.438, "step": 6277 }, { "epoch": 0.3932413598709657, "grad_norm": 0.8190468036565202, "learning_rate": 6.920795026132765e-06, "loss": 0.3987, "step": 6278 }, { "epoch": 0.393303997870308, "grad_norm": 0.849794921119129, "learning_rate": 6.919858425852326e-06, "loss": 0.4249, "step": 6279 }, { "epoch": 0.3933666358696503, "grad_norm": 0.8518208309307036, "learning_rate": 6.918921746550155e-06, "loss": 0.3985, "step": 6280 }, { "epoch": 0.3934292738689926, "grad_norm": 0.7468173729992125, "learning_rate": 6.917984988264811e-06, "loss": 0.3846, "step": 6281 }, { "epoch": 0.39349191186833493, "grad_norm": 0.85795921420482, "learning_rate": 6.917048151034847e-06, "loss": 0.4471, "step": 6282 }, { "epoch": 0.39355454986767724, "grad_norm": 0.8258276094844708, "learning_rate": 6.916111234898827e-06, "loss": 0.392, "step": 6283 }, { "epoch": 0.39361718786701955, "grad_norm": 0.7910217169238906, "learning_rate": 6.915174239895313e-06, "loss": 0.4017, "step": 6284 }, { "epoch": 0.39367982586636185, "grad_norm": 0.8378723488869755, "learning_rate": 6.9142371660628695e-06, "loss": 0.3805, "step": 6285 }, { "epoch": 0.3937424638657041, "grad_norm": 0.855887676250797, "learning_rate": 6.9133000134400695e-06, "loss": 0.4011, "step": 6286 }, { "epoch": 0.3938051018650464, "grad_norm": 0.8118153918444826, "learning_rate": 6.9123627820654875e-06, "loss": 0.4043, "step": 6287 }, { "epoch": 0.3938677398643887, "grad_norm": 0.8148850499156327, "learning_rate": 6.911425471977697e-06, "loss": 0.3866, "step": 6288 }, { "epoch": 0.39393037786373103, "grad_norm": 0.8442215005396841, "learning_rate": 6.910488083215281e-06, "loss": 0.4324, "step": 6289 }, { "epoch": 0.39399301586307334, "grad_norm": 0.7971181812518628, "learning_rate": 6.90955061581682e-06, "loss": 0.4229, "step": 6290 }, { "epoch": 0.39405565386241564, "grad_norm": 0.7811350998662991, "learning_rate": 6.9086130698208995e-06, "loss": 0.3772, "step": 6291 }, { "epoch": 0.39411829186175795, "grad_norm": 0.7512863780681799, "learning_rate": 6.907675445266113e-06, "loss": 0.421, "step": 6292 }, { "epoch": 0.39418092986110026, "grad_norm": 0.7254876580819134, "learning_rate": 6.906737742191051e-06, "loss": 0.4671, "step": 6293 }, { "epoch": 0.3942435678604425, "grad_norm": 0.780558196411235, "learning_rate": 6.905799960634308e-06, "loss": 0.4385, "step": 6294 }, { "epoch": 0.3943062058597848, "grad_norm": 0.823899157434386, "learning_rate": 6.904862100634488e-06, "loss": 0.4388, "step": 6295 }, { "epoch": 0.3943688438591271, "grad_norm": 0.876835322333112, "learning_rate": 6.9039241622301866e-06, "loss": 0.4177, "step": 6296 }, { "epoch": 0.39443148185846943, "grad_norm": 0.8766837319300567, "learning_rate": 6.902986145460016e-06, "loss": 0.427, "step": 6297 }, { "epoch": 0.39449411985781174, "grad_norm": 0.8664674932316826, "learning_rate": 6.902048050362581e-06, "loss": 0.4061, "step": 6298 }, { "epoch": 0.39455675785715405, "grad_norm": 0.7094547444779266, "learning_rate": 6.901109876976495e-06, "loss": 0.4669, "step": 6299 }, { "epoch": 0.39461939585649636, "grad_norm": 0.8189903832470341, "learning_rate": 6.900171625340374e-06, "loss": 0.4012, "step": 6300 }, { "epoch": 0.39468203385583867, "grad_norm": 0.796573092479714, "learning_rate": 6.899233295492838e-06, "loss": 0.3648, "step": 6301 }, { "epoch": 0.394744671855181, "grad_norm": 0.8224362481539954, "learning_rate": 6.898294887472504e-06, "loss": 0.4208, "step": 6302 }, { "epoch": 0.3948073098545232, "grad_norm": 0.8203043657480685, "learning_rate": 6.897356401318002e-06, "loss": 0.4169, "step": 6303 }, { "epoch": 0.39486994785386553, "grad_norm": 0.8313681627637585, "learning_rate": 6.896417837067959e-06, "loss": 0.4218, "step": 6304 }, { "epoch": 0.39493258585320784, "grad_norm": 0.836634535414638, "learning_rate": 6.895479194761003e-06, "loss": 0.4584, "step": 6305 }, { "epoch": 0.39499522385255015, "grad_norm": 0.6872763201262445, "learning_rate": 6.894540474435775e-06, "loss": 0.4537, "step": 6306 }, { "epoch": 0.39505786185189246, "grad_norm": 0.8303498340580105, "learning_rate": 6.8936016761309065e-06, "loss": 0.452, "step": 6307 }, { "epoch": 0.39512049985123476, "grad_norm": 0.8143878840544966, "learning_rate": 6.892662799885044e-06, "loss": 0.4183, "step": 6308 }, { "epoch": 0.39518313785057707, "grad_norm": 0.8717294475794997, "learning_rate": 6.891723845736828e-06, "loss": 0.391, "step": 6309 }, { "epoch": 0.3952457758499194, "grad_norm": 0.7124160135916298, "learning_rate": 6.890784813724907e-06, "loss": 0.4898, "step": 6310 }, { "epoch": 0.39530841384926163, "grad_norm": 0.7876945181929432, "learning_rate": 6.889845703887932e-06, "loss": 0.4113, "step": 6311 }, { "epoch": 0.39537105184860394, "grad_norm": 0.8536426052503389, "learning_rate": 6.888906516264559e-06, "loss": 0.4176, "step": 6312 }, { "epoch": 0.39543368984794625, "grad_norm": 0.6708749857189626, "learning_rate": 6.887967250893441e-06, "loss": 0.481, "step": 6313 }, { "epoch": 0.39549632784728855, "grad_norm": 0.7869677222775309, "learning_rate": 6.887027907813241e-06, "loss": 0.4269, "step": 6314 }, { "epoch": 0.39555896584663086, "grad_norm": 0.8168080957712988, "learning_rate": 6.886088487062622e-06, "loss": 0.4285, "step": 6315 }, { "epoch": 0.39562160384597317, "grad_norm": 2.0450401033313454, "learning_rate": 6.885148988680253e-06, "loss": 0.3944, "step": 6316 }, { "epoch": 0.3956842418453155, "grad_norm": 0.7015108381608076, "learning_rate": 6.8842094127048e-06, "loss": 0.4533, "step": 6317 }, { "epoch": 0.3957468798446578, "grad_norm": 0.7718621416442655, "learning_rate": 6.883269759174938e-06, "loss": 0.377, "step": 6318 }, { "epoch": 0.39580951784400004, "grad_norm": 0.8312395770818326, "learning_rate": 6.882330028129342e-06, "loss": 0.4129, "step": 6319 }, { "epoch": 0.39587215584334234, "grad_norm": 0.9328840886304656, "learning_rate": 6.881390219606694e-06, "loss": 0.4133, "step": 6320 }, { "epoch": 0.39593479384268465, "grad_norm": 0.8082285266961456, "learning_rate": 6.880450333645675e-06, "loss": 0.439, "step": 6321 }, { "epoch": 0.39599743184202696, "grad_norm": 0.7930596472839121, "learning_rate": 6.879510370284972e-06, "loss": 0.3932, "step": 6322 }, { "epoch": 0.39606006984136927, "grad_norm": 0.8644958153208895, "learning_rate": 6.878570329563273e-06, "loss": 0.4717, "step": 6323 }, { "epoch": 0.3961227078407116, "grad_norm": 0.819033472091712, "learning_rate": 6.877630211519268e-06, "loss": 0.4146, "step": 6324 }, { "epoch": 0.3961853458400539, "grad_norm": 1.0252521711277973, "learning_rate": 6.876690016191658e-06, "loss": 0.4358, "step": 6325 }, { "epoch": 0.3962479838393962, "grad_norm": 0.6733161201884185, "learning_rate": 6.875749743619138e-06, "loss": 0.4572, "step": 6326 }, { "epoch": 0.3963106218387385, "grad_norm": 0.9421641541324192, "learning_rate": 6.874809393840411e-06, "loss": 0.4327, "step": 6327 }, { "epoch": 0.39637325983808075, "grad_norm": 0.8977655712839151, "learning_rate": 6.8738689668941795e-06, "loss": 0.426, "step": 6328 }, { "epoch": 0.39643589783742306, "grad_norm": 0.8745066982873205, "learning_rate": 6.872928462819155e-06, "loss": 0.4383, "step": 6329 }, { "epoch": 0.39649853583676536, "grad_norm": 0.8836080852204947, "learning_rate": 6.871987881654047e-06, "loss": 0.4352, "step": 6330 }, { "epoch": 0.39656117383610767, "grad_norm": 0.8115570183919039, "learning_rate": 6.87104722343757e-06, "loss": 0.3998, "step": 6331 }, { "epoch": 0.39662381183545, "grad_norm": 0.7793007264625997, "learning_rate": 6.8701064882084424e-06, "loss": 0.4016, "step": 6332 }, { "epoch": 0.3966864498347923, "grad_norm": 0.8144537473697347, "learning_rate": 6.8691656760053836e-06, "loss": 0.4802, "step": 6333 }, { "epoch": 0.3967490878341346, "grad_norm": 0.865266738859267, "learning_rate": 6.868224786867119e-06, "loss": 0.449, "step": 6334 }, { "epoch": 0.3968117258334769, "grad_norm": 0.8224541094070261, "learning_rate": 6.867283820832378e-06, "loss": 0.3769, "step": 6335 }, { "epoch": 0.39687436383281915, "grad_norm": 0.7729915032625186, "learning_rate": 6.866342777939885e-06, "loss": 0.3832, "step": 6336 }, { "epoch": 0.39693700183216146, "grad_norm": 0.9838188515397915, "learning_rate": 6.865401658228378e-06, "loss": 0.4348, "step": 6337 }, { "epoch": 0.39699963983150377, "grad_norm": 0.8252540253493298, "learning_rate": 6.864460461736593e-06, "loss": 0.4427, "step": 6338 }, { "epoch": 0.3970622778308461, "grad_norm": 0.7817851674045069, "learning_rate": 6.863519188503269e-06, "loss": 0.4076, "step": 6339 }, { "epoch": 0.3971249158301884, "grad_norm": 0.9121907855689086, "learning_rate": 6.862577838567149e-06, "loss": 0.4148, "step": 6340 }, { "epoch": 0.3971875538295307, "grad_norm": 0.8390963161940983, "learning_rate": 6.86163641196698e-06, "loss": 0.4472, "step": 6341 }, { "epoch": 0.397250191828873, "grad_norm": 0.8281172416523275, "learning_rate": 6.8606949087415096e-06, "loss": 0.3981, "step": 6342 }, { "epoch": 0.3973128298282153, "grad_norm": 0.8601168269074425, "learning_rate": 6.8597533289294925e-06, "loss": 0.4203, "step": 6343 }, { "epoch": 0.39737546782755756, "grad_norm": 0.883680905301307, "learning_rate": 6.858811672569683e-06, "loss": 0.4325, "step": 6344 }, { "epoch": 0.39743810582689987, "grad_norm": 0.7522058421283124, "learning_rate": 6.85786993970084e-06, "loss": 0.3927, "step": 6345 }, { "epoch": 0.3975007438262422, "grad_norm": 0.8896618639639021, "learning_rate": 6.856928130361725e-06, "loss": 0.4612, "step": 6346 }, { "epoch": 0.3975633818255845, "grad_norm": 0.9247982838803341, "learning_rate": 6.855986244591104e-06, "loss": 0.396, "step": 6347 }, { "epoch": 0.3976260198249268, "grad_norm": 0.8222457926744456, "learning_rate": 6.855044282427746e-06, "loss": 0.4449, "step": 6348 }, { "epoch": 0.3976886578242691, "grad_norm": 0.829784899019049, "learning_rate": 6.854102243910419e-06, "loss": 0.4397, "step": 6349 }, { "epoch": 0.3977512958236114, "grad_norm": 0.8159934804588591, "learning_rate": 6.8531601290779e-06, "loss": 0.4316, "step": 6350 }, { "epoch": 0.3978139338229537, "grad_norm": 0.6331360625085544, "learning_rate": 6.852217937968965e-06, "loss": 0.5045, "step": 6351 }, { "epoch": 0.397876571822296, "grad_norm": 0.851968204356374, "learning_rate": 6.851275670622398e-06, "loss": 0.4185, "step": 6352 }, { "epoch": 0.3979392098216383, "grad_norm": 0.8402547032055605, "learning_rate": 6.8503333270769815e-06, "loss": 0.4351, "step": 6353 }, { "epoch": 0.3980018478209806, "grad_norm": 0.7530173020010008, "learning_rate": 6.8493909073714995e-06, "loss": 0.372, "step": 6354 }, { "epoch": 0.3980644858203229, "grad_norm": 0.8036174384252391, "learning_rate": 6.848448411544744e-06, "loss": 0.4032, "step": 6355 }, { "epoch": 0.3981271238196652, "grad_norm": 0.8072328639563744, "learning_rate": 6.847505839635511e-06, "loss": 0.406, "step": 6356 }, { "epoch": 0.3981897618190075, "grad_norm": 0.7726447413631254, "learning_rate": 6.846563191682594e-06, "loss": 0.3928, "step": 6357 }, { "epoch": 0.3982523998183498, "grad_norm": 0.8000059171658257, "learning_rate": 6.845620467724792e-06, "loss": 0.3639, "step": 6358 }, { "epoch": 0.3983150378176921, "grad_norm": 0.7798490423731174, "learning_rate": 6.844677667800911e-06, "loss": 0.4406, "step": 6359 }, { "epoch": 0.3983776758170344, "grad_norm": 0.8245188177443304, "learning_rate": 6.843734791949753e-06, "loss": 0.4148, "step": 6360 }, { "epoch": 0.3984403138163767, "grad_norm": 0.9889205777896256, "learning_rate": 6.84279184021013e-06, "loss": 0.4303, "step": 6361 }, { "epoch": 0.398502951815719, "grad_norm": 0.8244174618300955, "learning_rate": 6.841848812620853e-06, "loss": 0.4074, "step": 6362 }, { "epoch": 0.3985655898150613, "grad_norm": 0.6447318529432328, "learning_rate": 6.840905709220735e-06, "loss": 0.4728, "step": 6363 }, { "epoch": 0.3986282278144036, "grad_norm": 0.8080995609170601, "learning_rate": 6.839962530048599e-06, "loss": 0.3815, "step": 6364 }, { "epoch": 0.3986908658137459, "grad_norm": 0.8685975735584517, "learning_rate": 6.839019275143262e-06, "loss": 0.4148, "step": 6365 }, { "epoch": 0.3987535038130882, "grad_norm": 0.8404582728704616, "learning_rate": 6.838075944543552e-06, "loss": 0.4362, "step": 6366 }, { "epoch": 0.3988161418124305, "grad_norm": 0.852284694581187, "learning_rate": 6.837132538288292e-06, "loss": 0.4287, "step": 6367 }, { "epoch": 0.39887877981177283, "grad_norm": 0.7459673375411182, "learning_rate": 6.836189056416318e-06, "loss": 0.3802, "step": 6368 }, { "epoch": 0.39894141781111514, "grad_norm": 0.8104111726539314, "learning_rate": 6.835245498966461e-06, "loss": 0.3858, "step": 6369 }, { "epoch": 0.3990040558104574, "grad_norm": 0.7918591931661588, "learning_rate": 6.834301865977559e-06, "loss": 0.4001, "step": 6370 }, { "epoch": 0.3990666938097997, "grad_norm": 0.8704700925348323, "learning_rate": 6.833358157488451e-06, "loss": 0.4245, "step": 6371 }, { "epoch": 0.399129331809142, "grad_norm": 0.7935038404837981, "learning_rate": 6.832414373537982e-06, "loss": 0.4033, "step": 6372 }, { "epoch": 0.3991919698084843, "grad_norm": 0.8078686625683242, "learning_rate": 6.831470514164997e-06, "loss": 0.411, "step": 6373 }, { "epoch": 0.3992546078078266, "grad_norm": 0.8690375375337074, "learning_rate": 6.830526579408345e-06, "loss": 0.4354, "step": 6374 }, { "epoch": 0.39931724580716893, "grad_norm": 0.824421277618791, "learning_rate": 6.829582569306879e-06, "loss": 0.3927, "step": 6375 }, { "epoch": 0.39937988380651124, "grad_norm": 0.7790198260744073, "learning_rate": 6.828638483899456e-06, "loss": 0.3681, "step": 6376 }, { "epoch": 0.39944252180585355, "grad_norm": 0.838261918804857, "learning_rate": 6.8276943232249335e-06, "loss": 0.3826, "step": 6377 }, { "epoch": 0.3995051598051958, "grad_norm": 0.8904527253412109, "learning_rate": 6.826750087322173e-06, "loss": 0.4654, "step": 6378 }, { "epoch": 0.3995677978045381, "grad_norm": 0.8174222323786923, "learning_rate": 6.82580577623004e-06, "loss": 0.3929, "step": 6379 }, { "epoch": 0.3996304358038804, "grad_norm": 0.7922601476359623, "learning_rate": 6.824861389987402e-06, "loss": 0.4069, "step": 6380 }, { "epoch": 0.3996930738032227, "grad_norm": 0.8385094529216812, "learning_rate": 6.8239169286331304e-06, "loss": 0.4407, "step": 6381 }, { "epoch": 0.39975571180256503, "grad_norm": 0.7690404316400153, "learning_rate": 6.8229723922061e-06, "loss": 0.4762, "step": 6382 }, { "epoch": 0.39981834980190734, "grad_norm": 0.9757148709114507, "learning_rate": 6.822027780745187e-06, "loss": 0.4652, "step": 6383 }, { "epoch": 0.39988098780124964, "grad_norm": 0.8971417909099884, "learning_rate": 6.8210830942892724e-06, "loss": 0.4509, "step": 6384 }, { "epoch": 0.39994362580059195, "grad_norm": 0.8847282215961554, "learning_rate": 6.82013833287724e-06, "loss": 0.425, "step": 6385 }, { "epoch": 0.4000062637999342, "grad_norm": 0.7733840348360167, "learning_rate": 6.819193496547977e-06, "loss": 0.3928, "step": 6386 }, { "epoch": 0.4000689017992765, "grad_norm": 0.8303050832903448, "learning_rate": 6.81824858534037e-06, "loss": 0.4033, "step": 6387 }, { "epoch": 0.4001315397986188, "grad_norm": 0.8247018973521184, "learning_rate": 6.817303599293315e-06, "loss": 0.376, "step": 6388 }, { "epoch": 0.4001941777979611, "grad_norm": 0.8112047173034855, "learning_rate": 6.816358538445707e-06, "loss": 0.3948, "step": 6389 }, { "epoch": 0.40025681579730343, "grad_norm": 0.8008192238190006, "learning_rate": 6.815413402836443e-06, "loss": 0.4044, "step": 6390 }, { "epoch": 0.40031945379664574, "grad_norm": 0.9737781646931999, "learning_rate": 6.814468192504428e-06, "loss": 0.4173, "step": 6391 }, { "epoch": 0.40038209179598805, "grad_norm": 0.7787507184677895, "learning_rate": 6.813522907488562e-06, "loss": 0.3483, "step": 6392 }, { "epoch": 0.40044472979533036, "grad_norm": 0.8311774836184437, "learning_rate": 6.81257754782776e-06, "loss": 0.392, "step": 6393 }, { "epoch": 0.40050736779467266, "grad_norm": 0.8445187807085428, "learning_rate": 6.811632113560929e-06, "loss": 0.3932, "step": 6394 }, { "epoch": 0.4005700057940149, "grad_norm": 0.8921768850642366, "learning_rate": 6.810686604726984e-06, "loss": 0.4139, "step": 6395 }, { "epoch": 0.4006326437933572, "grad_norm": 0.8357872183427232, "learning_rate": 6.8097410213648415e-06, "loss": 0.437, "step": 6396 }, { "epoch": 0.40069528179269953, "grad_norm": 0.7858654424164156, "learning_rate": 6.808795363513423e-06, "loss": 0.4145, "step": 6397 }, { "epoch": 0.40075791979204184, "grad_norm": 0.7790712710951821, "learning_rate": 6.807849631211652e-06, "loss": 0.3932, "step": 6398 }, { "epoch": 0.40082055779138415, "grad_norm": 0.7940580314134307, "learning_rate": 6.806903824498454e-06, "loss": 0.4094, "step": 6399 }, { "epoch": 0.40088319579072645, "grad_norm": 0.8003574585258142, "learning_rate": 6.80595794341276e-06, "loss": 0.4281, "step": 6400 }, { "epoch": 0.40094583379006876, "grad_norm": 0.8401906852941367, "learning_rate": 6.8050119879935e-06, "loss": 0.4066, "step": 6401 }, { "epoch": 0.40100847178941107, "grad_norm": 0.8492971307112245, "learning_rate": 6.804065958279613e-06, "loss": 0.4518, "step": 6402 }, { "epoch": 0.4010711097887533, "grad_norm": 0.8778791219653796, "learning_rate": 6.803119854310037e-06, "loss": 0.4024, "step": 6403 }, { "epoch": 0.40113374778809563, "grad_norm": 0.896036892459301, "learning_rate": 6.802173676123713e-06, "loss": 0.4568, "step": 6404 }, { "epoch": 0.40119638578743794, "grad_norm": 0.8539718201555554, "learning_rate": 6.801227423759585e-06, "loss": 0.4686, "step": 6405 }, { "epoch": 0.40125902378678024, "grad_norm": 0.7864898407547245, "learning_rate": 6.800281097256601e-06, "loss": 0.4007, "step": 6406 }, { "epoch": 0.40132166178612255, "grad_norm": 0.7938843761586867, "learning_rate": 6.7993346966537145e-06, "loss": 0.4012, "step": 6407 }, { "epoch": 0.40138429978546486, "grad_norm": 0.8166189816253119, "learning_rate": 6.798388221989878e-06, "loss": 0.3999, "step": 6408 }, { "epoch": 0.40144693778480717, "grad_norm": 0.8081833928359938, "learning_rate": 6.797441673304049e-06, "loss": 0.3947, "step": 6409 }, { "epoch": 0.4015095757841495, "grad_norm": 0.794825330484546, "learning_rate": 6.7964950506351855e-06, "loss": 0.4549, "step": 6410 }, { "epoch": 0.4015722137834918, "grad_norm": 0.7811106266302471, "learning_rate": 6.795548354022252e-06, "loss": 0.375, "step": 6411 }, { "epoch": 0.40163485178283403, "grad_norm": 0.9687537930667443, "learning_rate": 6.794601583504216e-06, "loss": 0.4148, "step": 6412 }, { "epoch": 0.40169748978217634, "grad_norm": 0.8783834591513017, "learning_rate": 6.793654739120046e-06, "loss": 0.4626, "step": 6413 }, { "epoch": 0.40176012778151865, "grad_norm": 0.8431566021766386, "learning_rate": 6.792707820908714e-06, "loss": 0.4449, "step": 6414 }, { "epoch": 0.40182276578086096, "grad_norm": 0.8082148928689391, "learning_rate": 6.791760828909194e-06, "loss": 0.39, "step": 6415 }, { "epoch": 0.40188540378020327, "grad_norm": 0.8694915797763153, "learning_rate": 6.790813763160467e-06, "loss": 0.4202, "step": 6416 }, { "epoch": 0.4019480417795456, "grad_norm": 0.8815217959516644, "learning_rate": 6.789866623701513e-06, "loss": 0.4415, "step": 6417 }, { "epoch": 0.4020106797788879, "grad_norm": 0.8231988178860445, "learning_rate": 6.788919410571316e-06, "loss": 0.4052, "step": 6418 }, { "epoch": 0.4020733177782302, "grad_norm": 0.829999176794089, "learning_rate": 6.7879721238088655e-06, "loss": 0.3942, "step": 6419 }, { "epoch": 0.40213595577757244, "grad_norm": 0.9383274127110577, "learning_rate": 6.787024763453148e-06, "loss": 0.4134, "step": 6420 }, { "epoch": 0.40219859377691475, "grad_norm": 0.8350866181518469, "learning_rate": 6.786077329543161e-06, "loss": 0.4825, "step": 6421 }, { "epoch": 0.40226123177625706, "grad_norm": 0.8022422791862784, "learning_rate": 6.7851298221179e-06, "loss": 0.4058, "step": 6422 }, { "epoch": 0.40232386977559936, "grad_norm": 0.7874835225581366, "learning_rate": 6.784182241216364e-06, "loss": 0.3961, "step": 6423 }, { "epoch": 0.40238650777494167, "grad_norm": 0.7965507608398726, "learning_rate": 6.783234586877555e-06, "loss": 0.414, "step": 6424 }, { "epoch": 0.402449145774284, "grad_norm": 0.7694903575833083, "learning_rate": 6.782286859140481e-06, "loss": 0.3722, "step": 6425 }, { "epoch": 0.4025117837736263, "grad_norm": 0.8458940848879797, "learning_rate": 6.781339058044147e-06, "loss": 0.4017, "step": 6426 }, { "epoch": 0.4025744217729686, "grad_norm": 0.8120391312376575, "learning_rate": 6.780391183627569e-06, "loss": 0.3873, "step": 6427 }, { "epoch": 0.40263705977231085, "grad_norm": 0.6911448806797302, "learning_rate": 6.779443235929758e-06, "loss": 0.4441, "step": 6428 }, { "epoch": 0.40269969777165315, "grad_norm": 0.8098440036459258, "learning_rate": 6.778495214989733e-06, "loss": 0.4171, "step": 6429 }, { "epoch": 0.40276233577099546, "grad_norm": 0.8248336706536313, "learning_rate": 6.777547120846515e-06, "loss": 0.4377, "step": 6430 }, { "epoch": 0.40282497377033777, "grad_norm": 0.8545665351836161, "learning_rate": 6.7765989535391275e-06, "loss": 0.403, "step": 6431 }, { "epoch": 0.4028876117696801, "grad_norm": 0.9103471707414984, "learning_rate": 6.775650713106599e-06, "loss": 0.4291, "step": 6432 }, { "epoch": 0.4029502497690224, "grad_norm": 0.8841170228959101, "learning_rate": 6.774702399587956e-06, "loss": 0.4356, "step": 6433 }, { "epoch": 0.4030128877683647, "grad_norm": 0.7951623744810526, "learning_rate": 6.7737540130222314e-06, "loss": 0.4116, "step": 6434 }, { "epoch": 0.403075525767707, "grad_norm": 0.7995212955776113, "learning_rate": 6.772805553448464e-06, "loss": 0.393, "step": 6435 }, { "epoch": 0.4031381637670493, "grad_norm": 0.879075126326359, "learning_rate": 6.771857020905691e-06, "loss": 0.4477, "step": 6436 }, { "epoch": 0.40320080176639156, "grad_norm": 0.8307329455374457, "learning_rate": 6.770908415432955e-06, "loss": 0.4023, "step": 6437 }, { "epoch": 0.40326343976573387, "grad_norm": 0.8774697822640689, "learning_rate": 6.769959737069298e-06, "loss": 0.447, "step": 6438 }, { "epoch": 0.4033260777650762, "grad_norm": 0.8361553613867597, "learning_rate": 6.76901098585377e-06, "loss": 0.4003, "step": 6439 }, { "epoch": 0.4033887157644185, "grad_norm": 0.8562617240694547, "learning_rate": 6.7680621618254225e-06, "loss": 0.4165, "step": 6440 }, { "epoch": 0.4034513537637608, "grad_norm": 0.6604923094595945, "learning_rate": 6.767113265023309e-06, "loss": 0.4835, "step": 6441 }, { "epoch": 0.4035139917631031, "grad_norm": 0.8902875747607314, "learning_rate": 6.766164295486485e-06, "loss": 0.4128, "step": 6442 }, { "epoch": 0.4035766297624454, "grad_norm": 0.9104285177946264, "learning_rate": 6.765215253254009e-06, "loss": 0.4363, "step": 6443 }, { "epoch": 0.4036392677617877, "grad_norm": 0.8395230047552482, "learning_rate": 6.764266138364946e-06, "loss": 0.4209, "step": 6444 }, { "epoch": 0.40370190576112996, "grad_norm": 0.8158571737855719, "learning_rate": 6.763316950858363e-06, "loss": 0.3957, "step": 6445 }, { "epoch": 0.40376454376047227, "grad_norm": 0.8182496797048098, "learning_rate": 6.762367690773327e-06, "loss": 0.4035, "step": 6446 }, { "epoch": 0.4038271817598146, "grad_norm": 0.8140933028455337, "learning_rate": 6.761418358148908e-06, "loss": 0.4121, "step": 6447 }, { "epoch": 0.4038898197591569, "grad_norm": 0.8696609413981886, "learning_rate": 6.760468953024183e-06, "loss": 0.4126, "step": 6448 }, { "epoch": 0.4039524577584992, "grad_norm": 0.788989456378461, "learning_rate": 6.75951947543823e-06, "loss": 0.4111, "step": 6449 }, { "epoch": 0.4040150957578415, "grad_norm": 0.8372852165467597, "learning_rate": 6.7585699254301284e-06, "loss": 0.4142, "step": 6450 }, { "epoch": 0.4040777337571838, "grad_norm": 0.777927684910304, "learning_rate": 6.7576203030389635e-06, "loss": 0.4292, "step": 6451 }, { "epoch": 0.4041403717565261, "grad_norm": 0.8960890467942997, "learning_rate": 6.756670608303817e-06, "loss": 0.4329, "step": 6452 }, { "epoch": 0.40420300975586837, "grad_norm": 0.8299166689532159, "learning_rate": 6.755720841263784e-06, "loss": 0.3834, "step": 6453 }, { "epoch": 0.4042656477552107, "grad_norm": 0.7727586882732359, "learning_rate": 6.754771001957957e-06, "loss": 0.4025, "step": 6454 }, { "epoch": 0.404328285754553, "grad_norm": 0.8868280546085816, "learning_rate": 6.753821090425429e-06, "loss": 0.4381, "step": 6455 }, { "epoch": 0.4043909237538953, "grad_norm": 0.7168776164307178, "learning_rate": 6.752871106705299e-06, "loss": 0.4749, "step": 6456 }, { "epoch": 0.4044535617532376, "grad_norm": 0.8306994237463987, "learning_rate": 6.751921050836667e-06, "loss": 0.4016, "step": 6457 }, { "epoch": 0.4045161997525799, "grad_norm": 0.7986697266878768, "learning_rate": 6.750970922858643e-06, "loss": 0.4063, "step": 6458 }, { "epoch": 0.4045788377519222, "grad_norm": 0.8580199833799009, "learning_rate": 6.750020722810329e-06, "loss": 0.4052, "step": 6459 }, { "epoch": 0.4046414757512645, "grad_norm": 0.8432514360439013, "learning_rate": 6.749070450730837e-06, "loss": 0.4038, "step": 6460 }, { "epoch": 0.40470411375060683, "grad_norm": 0.8403423414266447, "learning_rate": 6.74812010665928e-06, "loss": 0.3854, "step": 6461 }, { "epoch": 0.4047667517499491, "grad_norm": 0.8262315958736968, "learning_rate": 6.747169690634776e-06, "loss": 0.4023, "step": 6462 }, { "epoch": 0.4048293897492914, "grad_norm": 0.8116316817898241, "learning_rate": 6.746219202696445e-06, "loss": 0.3882, "step": 6463 }, { "epoch": 0.4048920277486337, "grad_norm": 0.7883304304774177, "learning_rate": 6.7452686428834045e-06, "loss": 0.4263, "step": 6464 }, { "epoch": 0.404954665747976, "grad_norm": 0.6685872898162585, "learning_rate": 6.744318011234784e-06, "loss": 0.4491, "step": 6465 }, { "epoch": 0.4050173037473183, "grad_norm": 0.8336302612932646, "learning_rate": 6.74336730778971e-06, "loss": 0.4182, "step": 6466 }, { "epoch": 0.4050799417466606, "grad_norm": 0.834169317335812, "learning_rate": 6.742416532587315e-06, "loss": 0.4048, "step": 6467 }, { "epoch": 0.40514257974600293, "grad_norm": 0.858668071947745, "learning_rate": 6.7414656856667315e-06, "loss": 0.3951, "step": 6468 }, { "epoch": 0.40520521774534524, "grad_norm": 0.8323844977583631, "learning_rate": 6.740514767067097e-06, "loss": 0.3964, "step": 6469 }, { "epoch": 0.4052678557446875, "grad_norm": 0.7680100515973821, "learning_rate": 6.739563776827551e-06, "loss": 0.3818, "step": 6470 }, { "epoch": 0.4053304937440298, "grad_norm": 0.8793156584712862, "learning_rate": 6.738612714987238e-06, "loss": 0.4152, "step": 6471 }, { "epoch": 0.4053931317433721, "grad_norm": 0.8340961931989205, "learning_rate": 6.737661581585303e-06, "loss": 0.4319, "step": 6472 }, { "epoch": 0.4054557697427144, "grad_norm": 0.9287751577177514, "learning_rate": 6.736710376660893e-06, "loss": 0.4456, "step": 6473 }, { "epoch": 0.4055184077420567, "grad_norm": 0.788501455504818, "learning_rate": 6.735759100253164e-06, "loss": 0.42, "step": 6474 }, { "epoch": 0.405581045741399, "grad_norm": 0.8482168144720785, "learning_rate": 6.734807752401267e-06, "loss": 0.4245, "step": 6475 }, { "epoch": 0.40564368374074133, "grad_norm": 0.8102073336263935, "learning_rate": 6.7338563331443614e-06, "loss": 0.3972, "step": 6476 }, { "epoch": 0.40570632174008364, "grad_norm": 0.7990280007342405, "learning_rate": 6.732904842521607e-06, "loss": 0.4304, "step": 6477 }, { "epoch": 0.40576895973942595, "grad_norm": 0.8393782752493066, "learning_rate": 6.731953280572167e-06, "loss": 0.4563, "step": 6478 }, { "epoch": 0.4058315977387682, "grad_norm": 0.7966267463914509, "learning_rate": 6.731001647335211e-06, "loss": 0.3599, "step": 6479 }, { "epoch": 0.4058942357381105, "grad_norm": 0.8181905628843751, "learning_rate": 6.730049942849904e-06, "loss": 0.4261, "step": 6480 }, { "epoch": 0.4059568737374528, "grad_norm": 0.8420849004739979, "learning_rate": 6.72909816715542e-06, "loss": 0.4276, "step": 6481 }, { "epoch": 0.4060195117367951, "grad_norm": 0.9165346310224523, "learning_rate": 6.728146320290935e-06, "loss": 0.4224, "step": 6482 }, { "epoch": 0.40608214973613743, "grad_norm": 0.846173492308066, "learning_rate": 6.727194402295627e-06, "loss": 0.3841, "step": 6483 }, { "epoch": 0.40614478773547974, "grad_norm": 0.6477116128654088, "learning_rate": 6.7262424132086755e-06, "loss": 0.4799, "step": 6484 }, { "epoch": 0.40620742573482205, "grad_norm": 0.888375444727439, "learning_rate": 6.725290353069267e-06, "loss": 0.4431, "step": 6485 }, { "epoch": 0.40627006373416436, "grad_norm": 0.7548253212440292, "learning_rate": 6.724338221916587e-06, "loss": 0.4218, "step": 6486 }, { "epoch": 0.4063327017335066, "grad_norm": 0.8253157010729322, "learning_rate": 6.723386019789826e-06, "loss": 0.4275, "step": 6487 }, { "epoch": 0.4063953397328489, "grad_norm": 0.6825078230641264, "learning_rate": 6.722433746728176e-06, "loss": 0.4848, "step": 6488 }, { "epoch": 0.4064579777321912, "grad_norm": 0.855495453500215, "learning_rate": 6.721481402770834e-06, "loss": 0.4507, "step": 6489 }, { "epoch": 0.40652061573153353, "grad_norm": 0.8142081111072991, "learning_rate": 6.720528987956997e-06, "loss": 0.4127, "step": 6490 }, { "epoch": 0.40658325373087584, "grad_norm": 0.8063838244299051, "learning_rate": 6.7195765023258685e-06, "loss": 0.4235, "step": 6491 }, { "epoch": 0.40664589173021815, "grad_norm": 0.8379358320188955, "learning_rate": 6.7186239459166526e-06, "loss": 0.4103, "step": 6492 }, { "epoch": 0.40670852972956045, "grad_norm": 0.8513855355205763, "learning_rate": 6.717671318768555e-06, "loss": 0.4289, "step": 6493 }, { "epoch": 0.40677116772890276, "grad_norm": 0.8694472052693493, "learning_rate": 6.716718620920787e-06, "loss": 0.3998, "step": 6494 }, { "epoch": 0.406833805728245, "grad_norm": 0.8287992511284403, "learning_rate": 6.715765852412561e-06, "loss": 0.4094, "step": 6495 }, { "epoch": 0.4068964437275873, "grad_norm": 0.8600466718236827, "learning_rate": 6.714813013283097e-06, "loss": 0.3803, "step": 6496 }, { "epoch": 0.40695908172692963, "grad_norm": 0.8038489644159436, "learning_rate": 6.713860103571609e-06, "loss": 0.4247, "step": 6497 }, { "epoch": 0.40702171972627194, "grad_norm": 0.7978741698156248, "learning_rate": 6.712907123317321e-06, "loss": 0.3987, "step": 6498 }, { "epoch": 0.40708435772561424, "grad_norm": 0.8053436792766513, "learning_rate": 6.7119540725594565e-06, "loss": 0.3946, "step": 6499 }, { "epoch": 0.40714699572495655, "grad_norm": 0.8004156710339524, "learning_rate": 6.711000951337247e-06, "loss": 0.3436, "step": 6500 }, { "epoch": 0.40720963372429886, "grad_norm": 0.754523065565171, "learning_rate": 6.710047759689921e-06, "loss": 0.4119, "step": 6501 }, { "epoch": 0.40727227172364117, "grad_norm": 0.82064697910159, "learning_rate": 6.709094497656712e-06, "loss": 0.389, "step": 6502 }, { "epoch": 0.4073349097229835, "grad_norm": 0.8331982240785295, "learning_rate": 6.708141165276854e-06, "loss": 0.403, "step": 6503 }, { "epoch": 0.4073975477223257, "grad_norm": 0.8273622332740864, "learning_rate": 6.7071877625895885e-06, "loss": 0.3933, "step": 6504 }, { "epoch": 0.40746018572166803, "grad_norm": 0.7858888327913272, "learning_rate": 6.7062342896341605e-06, "loss": 0.4525, "step": 6505 }, { "epoch": 0.40752282372101034, "grad_norm": 0.8350637004003006, "learning_rate": 6.705280746449811e-06, "loss": 0.4385, "step": 6506 }, { "epoch": 0.40758546172035265, "grad_norm": 0.8736135168633334, "learning_rate": 6.7043271330757895e-06, "loss": 0.4594, "step": 6507 }, { "epoch": 0.40764809971969496, "grad_norm": 0.8159432814417733, "learning_rate": 6.703373449551346e-06, "loss": 0.4005, "step": 6508 }, { "epoch": 0.40771073771903726, "grad_norm": 0.7744294809482581, "learning_rate": 6.702419695915736e-06, "loss": 0.4119, "step": 6509 }, { "epoch": 0.40777337571837957, "grad_norm": 0.8433246008723598, "learning_rate": 6.701465872208216e-06, "loss": 0.4055, "step": 6510 }, { "epoch": 0.4078360137177219, "grad_norm": 0.907640237135514, "learning_rate": 6.700511978468044e-06, "loss": 0.3756, "step": 6511 }, { "epoch": 0.40789865171706413, "grad_norm": 0.7750072545646994, "learning_rate": 6.699558014734483e-06, "loss": 0.4205, "step": 6512 }, { "epoch": 0.40796128971640644, "grad_norm": 0.7942581500932295, "learning_rate": 6.6986039810468e-06, "loss": 0.3977, "step": 6513 }, { "epoch": 0.40802392771574875, "grad_norm": 0.8628353391304465, "learning_rate": 6.697649877444261e-06, "loss": 0.421, "step": 6514 }, { "epoch": 0.40808656571509105, "grad_norm": 0.7155335496295095, "learning_rate": 6.6966957039661384e-06, "loss": 0.4859, "step": 6515 }, { "epoch": 0.40814920371443336, "grad_norm": 0.9231343802475772, "learning_rate": 6.695741460651706e-06, "loss": 0.3946, "step": 6516 }, { "epoch": 0.40821184171377567, "grad_norm": 0.8177761488103796, "learning_rate": 6.694787147540238e-06, "loss": 0.4439, "step": 6517 }, { "epoch": 0.408274479713118, "grad_norm": 0.8165192141400391, "learning_rate": 6.693832764671019e-06, "loss": 0.3654, "step": 6518 }, { "epoch": 0.4083371177124603, "grad_norm": 0.7225769038457118, "learning_rate": 6.6928783120833286e-06, "loss": 0.3816, "step": 6519 }, { "epoch": 0.4083997557118026, "grad_norm": 0.8036173414941689, "learning_rate": 6.691923789816452e-06, "loss": 0.3847, "step": 6520 }, { "epoch": 0.40846239371114484, "grad_norm": 0.8671991437094351, "learning_rate": 6.690969197909677e-06, "loss": 0.4078, "step": 6521 }, { "epoch": 0.40852503171048715, "grad_norm": 0.7601463062320596, "learning_rate": 6.690014536402298e-06, "loss": 0.3876, "step": 6522 }, { "epoch": 0.40858766970982946, "grad_norm": 0.8271639625497467, "learning_rate": 6.689059805333606e-06, "loss": 0.3759, "step": 6523 }, { "epoch": 0.40865030770917177, "grad_norm": 0.8604450912080258, "learning_rate": 6.6881050047429e-06, "loss": 0.4116, "step": 6524 }, { "epoch": 0.4087129457085141, "grad_norm": 0.7993659306088696, "learning_rate": 6.687150134669478e-06, "loss": 0.407, "step": 6525 }, { "epoch": 0.4087755837078564, "grad_norm": 0.6892330424977308, "learning_rate": 6.686195195152643e-06, "loss": 0.4697, "step": 6526 }, { "epoch": 0.4088382217071987, "grad_norm": 0.7372722391467658, "learning_rate": 6.685240186231701e-06, "loss": 0.3854, "step": 6527 }, { "epoch": 0.408900859706541, "grad_norm": 0.7496992387511618, "learning_rate": 6.6842851079459604e-06, "loss": 0.3625, "step": 6528 }, { "epoch": 0.40896349770588325, "grad_norm": 0.8862423188982573, "learning_rate": 6.683329960334732e-06, "loss": 0.4274, "step": 6529 }, { "epoch": 0.40902613570522556, "grad_norm": 0.8457200925256549, "learning_rate": 6.682374743437328e-06, "loss": 0.4056, "step": 6530 }, { "epoch": 0.40908877370456787, "grad_norm": 0.7959832088146642, "learning_rate": 6.68141945729307e-06, "loss": 0.4009, "step": 6531 }, { "epoch": 0.4091514117039102, "grad_norm": 0.6353650696993817, "learning_rate": 6.6804641019412755e-06, "loss": 0.4614, "step": 6532 }, { "epoch": 0.4092140497032525, "grad_norm": 0.7186540548719682, "learning_rate": 6.679508677421264e-06, "loss": 0.3885, "step": 6533 }, { "epoch": 0.4092766877025948, "grad_norm": 0.8689173895572502, "learning_rate": 6.678553183772366e-06, "loss": 0.4135, "step": 6534 }, { "epoch": 0.4093393257019371, "grad_norm": 0.8363153840409839, "learning_rate": 6.677597621033906e-06, "loss": 0.4065, "step": 6535 }, { "epoch": 0.4094019637012794, "grad_norm": 0.7651480431347999, "learning_rate": 6.676641989245217e-06, "loss": 0.3998, "step": 6536 }, { "epoch": 0.40946460170062166, "grad_norm": 0.6126424582162525, "learning_rate": 6.675686288445632e-06, "loss": 0.44, "step": 6537 }, { "epoch": 0.40952723969996396, "grad_norm": 0.8297168550054975, "learning_rate": 6.6747305186744896e-06, "loss": 0.4028, "step": 6538 }, { "epoch": 0.40958987769930627, "grad_norm": 0.8766981925146067, "learning_rate": 6.673774679971128e-06, "loss": 0.4027, "step": 6539 }, { "epoch": 0.4096525156986486, "grad_norm": 0.8353807021173819, "learning_rate": 6.672818772374888e-06, "loss": 0.378, "step": 6540 }, { "epoch": 0.4097151536979909, "grad_norm": 0.8740407373086413, "learning_rate": 6.6718627959251195e-06, "loss": 0.4152, "step": 6541 }, { "epoch": 0.4097777916973332, "grad_norm": 0.8159934498026851, "learning_rate": 6.670906750661166e-06, "loss": 0.4316, "step": 6542 }, { "epoch": 0.4098404296966755, "grad_norm": 0.8111562976774983, "learning_rate": 6.669950636622381e-06, "loss": 0.3875, "step": 6543 }, { "epoch": 0.4099030676960178, "grad_norm": 0.7587563017696515, "learning_rate": 6.668994453848118e-06, "loss": 0.4043, "step": 6544 }, { "epoch": 0.4099657056953601, "grad_norm": 0.8250032098228864, "learning_rate": 6.668038202377733e-06, "loss": 0.3881, "step": 6545 }, { "epoch": 0.41002834369470237, "grad_norm": 0.6359517337537562, "learning_rate": 6.6670818822505864e-06, "loss": 0.4895, "step": 6546 }, { "epoch": 0.4100909816940447, "grad_norm": 0.6799599763492861, "learning_rate": 6.666125493506041e-06, "loss": 0.4704, "step": 6547 }, { "epoch": 0.410153619693387, "grad_norm": 0.8079914208967549, "learning_rate": 6.665169036183461e-06, "loss": 0.3497, "step": 6548 }, { "epoch": 0.4102162576927293, "grad_norm": 0.859060835217165, "learning_rate": 6.6642125103222125e-06, "loss": 0.3953, "step": 6549 }, { "epoch": 0.4102788956920716, "grad_norm": 0.7486008025098083, "learning_rate": 6.663255915961668e-06, "loss": 0.3923, "step": 6550 }, { "epoch": 0.4103415336914139, "grad_norm": 0.8418166017876257, "learning_rate": 6.662299253141203e-06, "loss": 0.3888, "step": 6551 }, { "epoch": 0.4104041716907562, "grad_norm": 0.8351753415256618, "learning_rate": 6.661342521900194e-06, "loss": 0.4211, "step": 6552 }, { "epoch": 0.4104668096900985, "grad_norm": 0.8833736404925802, "learning_rate": 6.660385722278014e-06, "loss": 0.4349, "step": 6553 }, { "epoch": 0.4105294476894408, "grad_norm": 0.8103596269068147, "learning_rate": 6.659428854314053e-06, "loss": 0.4273, "step": 6554 }, { "epoch": 0.4105920856887831, "grad_norm": 0.856474848652611, "learning_rate": 6.658471918047691e-06, "loss": 0.3873, "step": 6555 }, { "epoch": 0.4106547236881254, "grad_norm": 0.945832469880253, "learning_rate": 6.657514913518317e-06, "loss": 0.416, "step": 6556 }, { "epoch": 0.4107173616874677, "grad_norm": 0.8865511537513051, "learning_rate": 6.656557840765322e-06, "loss": 0.4466, "step": 6557 }, { "epoch": 0.41077999968681, "grad_norm": 0.7673041215773336, "learning_rate": 6.655600699828098e-06, "loss": 0.3981, "step": 6558 }, { "epoch": 0.4108426376861523, "grad_norm": 0.841618258339795, "learning_rate": 6.654643490746042e-06, "loss": 0.4225, "step": 6559 }, { "epoch": 0.4109052756854946, "grad_norm": 0.8082093875197354, "learning_rate": 6.653686213558553e-06, "loss": 0.404, "step": 6560 }, { "epoch": 0.4109679136848369, "grad_norm": 0.6878154222265779, "learning_rate": 6.652728868305034e-06, "loss": 0.4572, "step": 6561 }, { "epoch": 0.41103055168417924, "grad_norm": 0.7524293461092177, "learning_rate": 6.651771455024886e-06, "loss": 0.4613, "step": 6562 }, { "epoch": 0.4110931896835215, "grad_norm": 0.7006205098606554, "learning_rate": 6.650813973757517e-06, "loss": 0.4609, "step": 6563 }, { "epoch": 0.4111558276828638, "grad_norm": 0.8860707979626495, "learning_rate": 6.6498564245423425e-06, "loss": 0.4366, "step": 6564 }, { "epoch": 0.4112184656822061, "grad_norm": 0.6461656234672829, "learning_rate": 6.6488988074187684e-06, "loss": 0.4577, "step": 6565 }, { "epoch": 0.4112811036815484, "grad_norm": 0.8552384671709481, "learning_rate": 6.647941122426216e-06, "loss": 0.4104, "step": 6566 }, { "epoch": 0.4113437416808907, "grad_norm": 0.8788801506170293, "learning_rate": 6.646983369604099e-06, "loss": 0.3995, "step": 6567 }, { "epoch": 0.411406379680233, "grad_norm": 0.9223796023301072, "learning_rate": 6.646025548991839e-06, "loss": 0.4364, "step": 6568 }, { "epoch": 0.41146901767957533, "grad_norm": 0.8369504461531386, "learning_rate": 6.645067660628865e-06, "loss": 0.4004, "step": 6569 }, { "epoch": 0.41153165567891764, "grad_norm": 0.8617851286705729, "learning_rate": 6.6441097045546e-06, "loss": 0.4354, "step": 6570 }, { "epoch": 0.4115942936782599, "grad_norm": 0.8119213004063407, "learning_rate": 6.643151680808474e-06, "loss": 0.4233, "step": 6571 }, { "epoch": 0.4116569316776022, "grad_norm": 0.8031366574703883, "learning_rate": 6.642193589429919e-06, "loss": 0.4111, "step": 6572 }, { "epoch": 0.4117195696769445, "grad_norm": 0.8547198672799732, "learning_rate": 6.641235430458373e-06, "loss": 0.402, "step": 6573 }, { "epoch": 0.4117822076762868, "grad_norm": 0.8935350626092392, "learning_rate": 6.640277203933272e-06, "loss": 0.4544, "step": 6574 }, { "epoch": 0.4118448456756291, "grad_norm": 0.7880444270586193, "learning_rate": 6.639318909894054e-06, "loss": 0.4272, "step": 6575 }, { "epoch": 0.41190748367497143, "grad_norm": 0.7631015315762734, "learning_rate": 6.638360548380167e-06, "loss": 0.4104, "step": 6576 }, { "epoch": 0.41197012167431374, "grad_norm": 0.8970685460809124, "learning_rate": 6.6374021194310575e-06, "loss": 0.4193, "step": 6577 }, { "epoch": 0.41203275967365605, "grad_norm": 0.8457912436198679, "learning_rate": 6.636443623086172e-06, "loss": 0.4248, "step": 6578 }, { "epoch": 0.4120953976729983, "grad_norm": 0.8244665575029495, "learning_rate": 6.635485059384963e-06, "loss": 0.3987, "step": 6579 }, { "epoch": 0.4121580356723406, "grad_norm": 0.8133529424386704, "learning_rate": 6.634526428366884e-06, "loss": 0.4577, "step": 6580 }, { "epoch": 0.4122206736716829, "grad_norm": 0.8150247700192297, "learning_rate": 6.6335677300713955e-06, "loss": 0.3931, "step": 6581 }, { "epoch": 0.4122833116710252, "grad_norm": 0.7625002990195262, "learning_rate": 6.632608964537955e-06, "loss": 0.3901, "step": 6582 }, { "epoch": 0.41234594967036753, "grad_norm": 0.7962853625770262, "learning_rate": 6.631650131806026e-06, "loss": 0.389, "step": 6583 }, { "epoch": 0.41240858766970984, "grad_norm": 0.8685615183792181, "learning_rate": 6.630691231915076e-06, "loss": 0.4218, "step": 6584 }, { "epoch": 0.41247122566905214, "grad_norm": 0.7952718952433356, "learning_rate": 6.629732264904572e-06, "loss": 0.4418, "step": 6585 }, { "epoch": 0.41253386366839445, "grad_norm": 0.8933527350049224, "learning_rate": 6.628773230813986e-06, "loss": 0.4034, "step": 6586 }, { "epoch": 0.41259650166773676, "grad_norm": 0.8924796657719081, "learning_rate": 6.62781412968279e-06, "loss": 0.4407, "step": 6587 }, { "epoch": 0.412659139667079, "grad_norm": 0.8191553353397825, "learning_rate": 6.626854961550464e-06, "loss": 0.4196, "step": 6588 }, { "epoch": 0.4127217776664213, "grad_norm": 0.8264893117506691, "learning_rate": 6.625895726456485e-06, "loss": 0.4125, "step": 6589 }, { "epoch": 0.4127844156657636, "grad_norm": 0.794308814266276, "learning_rate": 6.624936424440336e-06, "loss": 0.3859, "step": 6590 }, { "epoch": 0.41284705366510593, "grad_norm": 0.8292881482783633, "learning_rate": 6.623977055541502e-06, "loss": 0.3947, "step": 6591 }, { "epoch": 0.41290969166444824, "grad_norm": 0.7490474373147915, "learning_rate": 6.6230176197994725e-06, "loss": 0.3901, "step": 6592 }, { "epoch": 0.41297232966379055, "grad_norm": 0.8424190442301781, "learning_rate": 6.622058117253734e-06, "loss": 0.4047, "step": 6593 }, { "epoch": 0.41303496766313286, "grad_norm": 0.8640903314025081, "learning_rate": 6.6210985479437835e-06, "loss": 0.4845, "step": 6594 }, { "epoch": 0.41309760566247516, "grad_norm": 0.90897581308804, "learning_rate": 6.620138911909115e-06, "loss": 0.4509, "step": 6595 }, { "epoch": 0.4131602436618174, "grad_norm": 0.8123616582345513, "learning_rate": 6.61917920918923e-06, "loss": 0.3726, "step": 6596 }, { "epoch": 0.4132228816611597, "grad_norm": 0.8050288663668661, "learning_rate": 6.618219439823628e-06, "loss": 0.3942, "step": 6597 }, { "epoch": 0.41328551966050203, "grad_norm": 0.7779574738659958, "learning_rate": 6.617259603851812e-06, "loss": 0.4384, "step": 6598 }, { "epoch": 0.41334815765984434, "grad_norm": 0.7653938812955126, "learning_rate": 6.616299701313291e-06, "loss": 0.366, "step": 6599 }, { "epoch": 0.41341079565918665, "grad_norm": 0.7681726046323181, "learning_rate": 6.615339732247576e-06, "loss": 0.3819, "step": 6600 }, { "epoch": 0.41347343365852895, "grad_norm": 0.7969905879855728, "learning_rate": 6.6143796966941755e-06, "loss": 0.4159, "step": 6601 }, { "epoch": 0.41353607165787126, "grad_norm": 0.8250250927143103, "learning_rate": 6.6134195946926085e-06, "loss": 0.3927, "step": 6602 }, { "epoch": 0.41359870965721357, "grad_norm": 0.8559945620348838, "learning_rate": 6.612459426282391e-06, "loss": 0.3737, "step": 6603 }, { "epoch": 0.4136613476565558, "grad_norm": 0.7645042730108389, "learning_rate": 6.611499191503044e-06, "loss": 0.4729, "step": 6604 }, { "epoch": 0.41372398565589813, "grad_norm": 0.8492755387730233, "learning_rate": 6.610538890394091e-06, "loss": 0.4218, "step": 6605 }, { "epoch": 0.41378662365524044, "grad_norm": 0.8386782541071327, "learning_rate": 6.60957852299506e-06, "loss": 0.415, "step": 6606 }, { "epoch": 0.41384926165458275, "grad_norm": 0.8249576256152265, "learning_rate": 6.608618089345476e-06, "loss": 0.3622, "step": 6607 }, { "epoch": 0.41391189965392505, "grad_norm": 0.6697942124009073, "learning_rate": 6.607657589484875e-06, "loss": 0.4636, "step": 6608 }, { "epoch": 0.41397453765326736, "grad_norm": 0.8533191325035213, "learning_rate": 6.606697023452787e-06, "loss": 0.4113, "step": 6609 }, { "epoch": 0.41403717565260967, "grad_norm": 0.8020486649152421, "learning_rate": 6.6057363912887515e-06, "loss": 0.4141, "step": 6610 }, { "epoch": 0.414099813651952, "grad_norm": 0.8425326108118011, "learning_rate": 6.604775693032311e-06, "loss": 0.4112, "step": 6611 }, { "epoch": 0.4141624516512943, "grad_norm": 0.8898310145788394, "learning_rate": 6.603814928723002e-06, "loss": 0.4106, "step": 6612 }, { "epoch": 0.41422508965063654, "grad_norm": 0.8163350569023868, "learning_rate": 6.602854098400374e-06, "loss": 0.3982, "step": 6613 }, { "epoch": 0.41428772764997884, "grad_norm": 0.8041169083321699, "learning_rate": 6.601893202103972e-06, "loss": 0.3853, "step": 6614 }, { "epoch": 0.41435036564932115, "grad_norm": 0.8460207807637927, "learning_rate": 6.600932239873352e-06, "loss": 0.3807, "step": 6615 }, { "epoch": 0.41441300364866346, "grad_norm": 0.7547539334577168, "learning_rate": 6.599971211748062e-06, "loss": 0.3987, "step": 6616 }, { "epoch": 0.41447564164800577, "grad_norm": 0.7909763860448319, "learning_rate": 6.599010117767662e-06, "loss": 0.3942, "step": 6617 }, { "epoch": 0.4145382796473481, "grad_norm": 0.841642276368199, "learning_rate": 6.598048957971706e-06, "loss": 0.4276, "step": 6618 }, { "epoch": 0.4146009176466904, "grad_norm": 0.8112070290745198, "learning_rate": 6.597087732399757e-06, "loss": 0.4063, "step": 6619 }, { "epoch": 0.4146635556460327, "grad_norm": 0.7967074351557722, "learning_rate": 6.596126441091384e-06, "loss": 0.4174, "step": 6620 }, { "epoch": 0.41472619364537494, "grad_norm": 0.8073909480459928, "learning_rate": 6.595165084086149e-06, "loss": 0.3933, "step": 6621 }, { "epoch": 0.41478883164471725, "grad_norm": 0.8377080170116291, "learning_rate": 6.594203661423622e-06, "loss": 0.4, "step": 6622 }, { "epoch": 0.41485146964405956, "grad_norm": 0.8319657288467167, "learning_rate": 6.5932421731433775e-06, "loss": 0.4004, "step": 6623 }, { "epoch": 0.41491410764340186, "grad_norm": 0.7925344407713694, "learning_rate": 6.59228061928499e-06, "loss": 0.3886, "step": 6624 }, { "epoch": 0.41497674564274417, "grad_norm": 0.8289898349898772, "learning_rate": 6.591318999888037e-06, "loss": 0.4161, "step": 6625 }, { "epoch": 0.4150393836420865, "grad_norm": 0.831438737378566, "learning_rate": 6.590357314992097e-06, "loss": 0.3819, "step": 6626 }, { "epoch": 0.4151020216414288, "grad_norm": 0.8169475167688682, "learning_rate": 6.589395564636755e-06, "loss": 0.4124, "step": 6627 }, { "epoch": 0.4151646596407711, "grad_norm": 0.7846008027523949, "learning_rate": 6.588433748861597e-06, "loss": 0.3947, "step": 6628 }, { "epoch": 0.4152272976401134, "grad_norm": 0.7694977384117211, "learning_rate": 6.587471867706211e-06, "loss": 0.4098, "step": 6629 }, { "epoch": 0.41528993563945565, "grad_norm": 0.7789298920914114, "learning_rate": 6.586509921210189e-06, "loss": 0.3768, "step": 6630 }, { "epoch": 0.41535257363879796, "grad_norm": 0.8277993725929383, "learning_rate": 6.5855479094131225e-06, "loss": 0.3829, "step": 6631 }, { "epoch": 0.41541521163814027, "grad_norm": 0.786786777251904, "learning_rate": 6.584585832354611e-06, "loss": 0.4408, "step": 6632 }, { "epoch": 0.4154778496374826, "grad_norm": 0.8199623921950224, "learning_rate": 6.583623690074252e-06, "loss": 0.4443, "step": 6633 }, { "epoch": 0.4155404876368249, "grad_norm": 0.8535936684993918, "learning_rate": 6.582661482611649e-06, "loss": 0.3911, "step": 6634 }, { "epoch": 0.4156031256361672, "grad_norm": 0.75481011012588, "learning_rate": 6.581699210006405e-06, "loss": 0.3767, "step": 6635 }, { "epoch": 0.4156657636355095, "grad_norm": 0.903726756453755, "learning_rate": 6.580736872298127e-06, "loss": 0.4703, "step": 6636 }, { "epoch": 0.4157284016348518, "grad_norm": 0.8331850422746885, "learning_rate": 6.5797744695264264e-06, "loss": 0.3937, "step": 6637 }, { "epoch": 0.41579103963419406, "grad_norm": 0.8769583700053202, "learning_rate": 6.578812001730917e-06, "loss": 0.4115, "step": 6638 }, { "epoch": 0.41585367763353637, "grad_norm": 0.7409948385985751, "learning_rate": 6.5778494689512115e-06, "loss": 0.3942, "step": 6639 }, { "epoch": 0.4159163156328787, "grad_norm": 0.8128914529760798, "learning_rate": 6.576886871226929e-06, "loss": 0.3895, "step": 6640 }, { "epoch": 0.415978953632221, "grad_norm": 0.7872484146478291, "learning_rate": 6.575924208597688e-06, "loss": 0.4065, "step": 6641 }, { "epoch": 0.4160415916315633, "grad_norm": 0.8004994832461592, "learning_rate": 6.574961481103117e-06, "loss": 0.4065, "step": 6642 }, { "epoch": 0.4161042296309056, "grad_norm": 0.9802288315602005, "learning_rate": 6.573998688782839e-06, "loss": 0.4645, "step": 6643 }, { "epoch": 0.4161668676302479, "grad_norm": 0.7961281292657971, "learning_rate": 6.573035831676481e-06, "loss": 0.373, "step": 6644 }, { "epoch": 0.4162295056295902, "grad_norm": 0.8034502965200779, "learning_rate": 6.572072909823677e-06, "loss": 0.3983, "step": 6645 }, { "epoch": 0.41629214362893247, "grad_norm": 0.812063468063844, "learning_rate": 6.571109923264061e-06, "loss": 0.4102, "step": 6646 }, { "epoch": 0.4163547816282748, "grad_norm": 0.8686097070004032, "learning_rate": 6.570146872037268e-06, "loss": 0.387, "step": 6647 }, { "epoch": 0.4164174196276171, "grad_norm": 0.8465460884104319, "learning_rate": 6.5691837561829384e-06, "loss": 0.4213, "step": 6648 }, { "epoch": 0.4164800576269594, "grad_norm": 0.7565245396169756, "learning_rate": 6.568220575740716e-06, "loss": 0.3589, "step": 6649 }, { "epoch": 0.4165426956263017, "grad_norm": 0.8298868127230542, "learning_rate": 6.567257330750241e-06, "loss": 0.4108, "step": 6650 }, { "epoch": 0.416605333625644, "grad_norm": 0.7670218058891808, "learning_rate": 6.566294021251165e-06, "loss": 0.3801, "step": 6651 }, { "epoch": 0.4166679716249863, "grad_norm": 0.8209806823025345, "learning_rate": 6.5653306472831355e-06, "loss": 0.4394, "step": 6652 }, { "epoch": 0.4167306096243286, "grad_norm": 0.9484431125121513, "learning_rate": 6.5643672088858066e-06, "loss": 0.4203, "step": 6653 }, { "epoch": 0.4167932476236709, "grad_norm": 0.7934747192691989, "learning_rate": 6.563403706098833e-06, "loss": 0.3927, "step": 6654 }, { "epoch": 0.4168558856230132, "grad_norm": 0.8035398229323963, "learning_rate": 6.562440138961872e-06, "loss": 0.4231, "step": 6655 }, { "epoch": 0.4169185236223555, "grad_norm": 0.7212033977016938, "learning_rate": 6.561476507514586e-06, "loss": 0.3819, "step": 6656 }, { "epoch": 0.4169811616216978, "grad_norm": 0.7642487038631165, "learning_rate": 6.5605128117966354e-06, "loss": 0.4855, "step": 6657 }, { "epoch": 0.4170437996210401, "grad_norm": 0.776782263392631, "learning_rate": 6.559549051847689e-06, "loss": 0.3791, "step": 6658 }, { "epoch": 0.4171064376203824, "grad_norm": 0.8473543617498092, "learning_rate": 6.558585227707413e-06, "loss": 0.4763, "step": 6659 }, { "epoch": 0.4171690756197247, "grad_norm": 0.7822853693412366, "learning_rate": 6.557621339415482e-06, "loss": 0.3751, "step": 6660 }, { "epoch": 0.417231713619067, "grad_norm": 0.7685460032527632, "learning_rate": 6.556657387011565e-06, "loss": 0.3718, "step": 6661 }, { "epoch": 0.41729435161840933, "grad_norm": 0.8436628933018139, "learning_rate": 6.5556933705353435e-06, "loss": 0.4282, "step": 6662 }, { "epoch": 0.4173569896177516, "grad_norm": 0.8680033598140477, "learning_rate": 6.554729290026494e-06, "loss": 0.3808, "step": 6663 }, { "epoch": 0.4174196276170939, "grad_norm": 0.8531717126101542, "learning_rate": 6.553765145524696e-06, "loss": 0.4343, "step": 6664 }, { "epoch": 0.4174822656164362, "grad_norm": 0.8470400880007825, "learning_rate": 6.5528009370696385e-06, "loss": 0.3718, "step": 6665 }, { "epoch": 0.4175449036157785, "grad_norm": 0.8169941893059457, "learning_rate": 6.551836664701004e-06, "loss": 0.4082, "step": 6666 }, { "epoch": 0.4176075416151208, "grad_norm": 0.8205418571643569, "learning_rate": 6.550872328458486e-06, "loss": 0.4268, "step": 6667 }, { "epoch": 0.4176701796144631, "grad_norm": 0.842641354955678, "learning_rate": 6.549907928381774e-06, "loss": 0.4168, "step": 6668 }, { "epoch": 0.41773281761380543, "grad_norm": 0.7417743896707018, "learning_rate": 6.548943464510566e-06, "loss": 0.3666, "step": 6669 }, { "epoch": 0.41779545561314774, "grad_norm": 0.7857016178387277, "learning_rate": 6.547978936884556e-06, "loss": 0.3913, "step": 6670 }, { "epoch": 0.41785809361249004, "grad_norm": 0.8807542299642095, "learning_rate": 6.547014345543445e-06, "loss": 0.3978, "step": 6671 }, { "epoch": 0.4179207316118323, "grad_norm": 0.8494029855658913, "learning_rate": 6.5460496905269385e-06, "loss": 0.4159, "step": 6672 }, { "epoch": 0.4179833696111746, "grad_norm": 0.7846088215018209, "learning_rate": 6.545084971874738e-06, "loss": 0.4163, "step": 6673 }, { "epoch": 0.4180460076105169, "grad_norm": 0.8151891317545882, "learning_rate": 6.544120189626553e-06, "loss": 0.4131, "step": 6674 }, { "epoch": 0.4181086456098592, "grad_norm": 0.7829723554522454, "learning_rate": 6.543155343822096e-06, "loss": 0.3807, "step": 6675 }, { "epoch": 0.4181712836092015, "grad_norm": 0.6678582702122686, "learning_rate": 6.542190434501078e-06, "loss": 0.4524, "step": 6676 }, { "epoch": 0.41823392160854383, "grad_norm": 0.8607596534393027, "learning_rate": 6.541225461703215e-06, "loss": 0.4164, "step": 6677 }, { "epoch": 0.41829655960788614, "grad_norm": 0.8646741176309967, "learning_rate": 6.540260425468225e-06, "loss": 0.4263, "step": 6678 }, { "epoch": 0.41835919760722845, "grad_norm": 0.8423268437141658, "learning_rate": 6.539295325835833e-06, "loss": 0.4114, "step": 6679 }, { "epoch": 0.4184218356065707, "grad_norm": 0.8285069021239342, "learning_rate": 6.538330162845757e-06, "loss": 0.4032, "step": 6680 }, { "epoch": 0.418484473605913, "grad_norm": 0.8313041535102895, "learning_rate": 6.537364936537728e-06, "loss": 0.4342, "step": 6681 }, { "epoch": 0.4185471116052553, "grad_norm": 0.8577709441235413, "learning_rate": 6.536399646951471e-06, "loss": 0.4571, "step": 6682 }, { "epoch": 0.4186097496045976, "grad_norm": 0.86394076135085, "learning_rate": 6.535434294126719e-06, "loss": 0.4614, "step": 6683 }, { "epoch": 0.41867238760393993, "grad_norm": 0.8329853212961016, "learning_rate": 6.534468878103208e-06, "loss": 0.4441, "step": 6684 }, { "epoch": 0.41873502560328224, "grad_norm": 0.8950727022668018, "learning_rate": 6.533503398920673e-06, "loss": 0.3714, "step": 6685 }, { "epoch": 0.41879766360262455, "grad_norm": 0.7961478039852888, "learning_rate": 6.532537856618854e-06, "loss": 0.3722, "step": 6686 }, { "epoch": 0.41886030160196686, "grad_norm": 0.8242524157728592, "learning_rate": 6.53157225123749e-06, "loss": 0.4439, "step": 6687 }, { "epoch": 0.4189229396013091, "grad_norm": 0.8104490481211577, "learning_rate": 6.53060658281633e-06, "loss": 0.4134, "step": 6688 }, { "epoch": 0.4189855776006514, "grad_norm": 0.7752568004721767, "learning_rate": 6.529640851395119e-06, "loss": 0.4404, "step": 6689 }, { "epoch": 0.4190482155999937, "grad_norm": 0.7615444382149605, "learning_rate": 6.528675057013607e-06, "loss": 0.4309, "step": 6690 }, { "epoch": 0.41911085359933603, "grad_norm": 0.8250653129253606, "learning_rate": 6.527709199711543e-06, "loss": 0.3957, "step": 6691 }, { "epoch": 0.41917349159867834, "grad_norm": 0.7962681638429224, "learning_rate": 6.526743279528686e-06, "loss": 0.4268, "step": 6692 }, { "epoch": 0.41923612959802065, "grad_norm": 0.8499405400908971, "learning_rate": 6.525777296504794e-06, "loss": 0.4094, "step": 6693 }, { "epoch": 0.41929876759736295, "grad_norm": 0.6319741717404288, "learning_rate": 6.524811250679626e-06, "loss": 0.478, "step": 6694 }, { "epoch": 0.41936140559670526, "grad_norm": 0.8705243752106595, "learning_rate": 6.5238451420929404e-06, "loss": 0.3978, "step": 6695 }, { "epoch": 0.41942404359604757, "grad_norm": 0.832136802771255, "learning_rate": 6.522878970784507e-06, "loss": 0.4085, "step": 6696 }, { "epoch": 0.4194866815953898, "grad_norm": 0.8287207582792625, "learning_rate": 6.521912736794094e-06, "loss": 0.412, "step": 6697 }, { "epoch": 0.41954931959473213, "grad_norm": 0.8468918388100548, "learning_rate": 6.5209464401614695e-06, "loss": 0.4, "step": 6698 }, { "epoch": 0.41961195759407444, "grad_norm": 0.7313308166560301, "learning_rate": 6.519980080926407e-06, "loss": 0.376, "step": 6699 }, { "epoch": 0.41967459559341674, "grad_norm": 0.8381248287072699, "learning_rate": 6.519013659128683e-06, "loss": 0.4312, "step": 6700 }, { "epoch": 0.41973723359275905, "grad_norm": 0.805536316994159, "learning_rate": 6.518047174808073e-06, "loss": 0.3854, "step": 6701 }, { "epoch": 0.41979987159210136, "grad_norm": 0.8935739019856328, "learning_rate": 6.517080628004362e-06, "loss": 0.4558, "step": 6702 }, { "epoch": 0.41986250959144367, "grad_norm": 0.7694798298949629, "learning_rate": 6.5161140187573305e-06, "loss": 0.3621, "step": 6703 }, { "epoch": 0.419925147590786, "grad_norm": 0.8378758362306895, "learning_rate": 6.515147347106763e-06, "loss": 0.3926, "step": 6704 }, { "epoch": 0.4199877855901282, "grad_norm": 0.7983512634660449, "learning_rate": 6.514180613092449e-06, "loss": 0.4289, "step": 6705 }, { "epoch": 0.42005042358947053, "grad_norm": 0.7806819271285, "learning_rate": 6.513213816754181e-06, "loss": 0.4008, "step": 6706 }, { "epoch": 0.42011306158881284, "grad_norm": 0.7791492805029906, "learning_rate": 6.512246958131753e-06, "loss": 0.3929, "step": 6707 }, { "epoch": 0.42017569958815515, "grad_norm": 0.8503834433618165, "learning_rate": 6.511280037264956e-06, "loss": 0.416, "step": 6708 }, { "epoch": 0.42023833758749746, "grad_norm": 0.8048085688481647, "learning_rate": 6.510313054193596e-06, "loss": 0.3941, "step": 6709 }, { "epoch": 0.42030097558683976, "grad_norm": 0.8509911466094854, "learning_rate": 6.509346008957467e-06, "loss": 0.4275, "step": 6710 }, { "epoch": 0.42036361358618207, "grad_norm": 0.9019502745418838, "learning_rate": 6.508378901596378e-06, "loss": 0.4276, "step": 6711 }, { "epoch": 0.4204262515855244, "grad_norm": 0.7857641658867366, "learning_rate": 6.507411732150133e-06, "loss": 0.3514, "step": 6712 }, { "epoch": 0.42048888958486663, "grad_norm": 0.8605203153340836, "learning_rate": 6.506444500658542e-06, "loss": 0.4152, "step": 6713 }, { "epoch": 0.42055152758420894, "grad_norm": 0.7646712339608045, "learning_rate": 6.505477207161415e-06, "loss": 0.434, "step": 6714 }, { "epoch": 0.42061416558355125, "grad_norm": 0.7732738074539308, "learning_rate": 6.504509851698566e-06, "loss": 0.4161, "step": 6715 }, { "epoch": 0.42067680358289355, "grad_norm": 0.798487593092166, "learning_rate": 6.503542434309814e-06, "loss": 0.3762, "step": 6716 }, { "epoch": 0.42073944158223586, "grad_norm": 0.834619161787785, "learning_rate": 6.502574955034975e-06, "loss": 0.4253, "step": 6717 }, { "epoch": 0.42080207958157817, "grad_norm": 0.796233418700879, "learning_rate": 6.5016074139138726e-06, "loss": 0.3945, "step": 6718 }, { "epoch": 0.4208647175809205, "grad_norm": 0.8437648229532165, "learning_rate": 6.50063981098633e-06, "loss": 0.397, "step": 6719 }, { "epoch": 0.4209273555802628, "grad_norm": 0.807248195735066, "learning_rate": 6.499672146292174e-06, "loss": 0.4235, "step": 6720 }, { "epoch": 0.4209899935796051, "grad_norm": 0.7815773740912738, "learning_rate": 6.4987044198712346e-06, "loss": 0.378, "step": 6721 }, { "epoch": 0.42105263157894735, "grad_norm": 0.8754172754144552, "learning_rate": 6.497736631763343e-06, "loss": 0.4169, "step": 6722 }, { "epoch": 0.42111526957828965, "grad_norm": 0.8766831604721429, "learning_rate": 6.496768782008334e-06, "loss": 0.4111, "step": 6723 }, { "epoch": 0.42117790757763196, "grad_norm": 0.9266575135224986, "learning_rate": 6.495800870646043e-06, "loss": 0.4308, "step": 6724 }, { "epoch": 0.42124054557697427, "grad_norm": 0.7853433883646528, "learning_rate": 6.4948328977163115e-06, "loss": 0.4612, "step": 6725 }, { "epoch": 0.4213031835763166, "grad_norm": 0.7837661867196688, "learning_rate": 6.493864863258981e-06, "loss": 0.4023, "step": 6726 }, { "epoch": 0.4213658215756589, "grad_norm": 0.8405947761523033, "learning_rate": 6.492896767313894e-06, "loss": 0.4093, "step": 6727 }, { "epoch": 0.4214284595750012, "grad_norm": 0.8606963924303807, "learning_rate": 6.491928609920899e-06, "loss": 0.4308, "step": 6728 }, { "epoch": 0.4214910975743435, "grad_norm": 0.7973283467571539, "learning_rate": 6.490960391119847e-06, "loss": 0.421, "step": 6729 }, { "epoch": 0.42155373557368575, "grad_norm": 0.8603609378824779, "learning_rate": 6.489992110950587e-06, "loss": 0.3904, "step": 6730 }, { "epoch": 0.42161637357302806, "grad_norm": 0.8617448009170202, "learning_rate": 6.4890237694529765e-06, "loss": 0.4309, "step": 6731 }, { "epoch": 0.42167901157237037, "grad_norm": 0.8172823598643119, "learning_rate": 6.4880553666668714e-06, "loss": 0.4211, "step": 6732 }, { "epoch": 0.4217416495717127, "grad_norm": 0.8603401171511394, "learning_rate": 6.487086902632129e-06, "loss": 0.4012, "step": 6733 }, { "epoch": 0.421804287571055, "grad_norm": 0.7830637213457762, "learning_rate": 6.486118377388615e-06, "loss": 0.3513, "step": 6734 }, { "epoch": 0.4218669255703973, "grad_norm": 0.7795247617920754, "learning_rate": 6.485149790976194e-06, "loss": 0.3536, "step": 6735 }, { "epoch": 0.4219295635697396, "grad_norm": 0.763065264628062, "learning_rate": 6.484181143434732e-06, "loss": 0.367, "step": 6736 }, { "epoch": 0.4219922015690819, "grad_norm": 0.6296739864944059, "learning_rate": 6.483212434804096e-06, "loss": 0.4631, "step": 6737 }, { "epoch": 0.4220548395684242, "grad_norm": 0.8466829214514708, "learning_rate": 6.482243665124161e-06, "loss": 0.4318, "step": 6738 }, { "epoch": 0.42211747756776646, "grad_norm": 0.800798314651756, "learning_rate": 6.481274834434804e-06, "loss": 0.3696, "step": 6739 }, { "epoch": 0.42218011556710877, "grad_norm": 0.8060366512057189, "learning_rate": 6.480305942775899e-06, "loss": 0.3947, "step": 6740 }, { "epoch": 0.4222427535664511, "grad_norm": 0.8680048424354749, "learning_rate": 6.479336990187329e-06, "loss": 0.4436, "step": 6741 }, { "epoch": 0.4223053915657934, "grad_norm": 0.8558391772047974, "learning_rate": 6.4783679767089695e-06, "loss": 0.428, "step": 6742 }, { "epoch": 0.4223680295651357, "grad_norm": 0.9385171035296522, "learning_rate": 6.477398902380711e-06, "loss": 0.4058, "step": 6743 }, { "epoch": 0.422430667564478, "grad_norm": 0.8254797332392496, "learning_rate": 6.47642976724244e-06, "loss": 0.3722, "step": 6744 }, { "epoch": 0.4224933055638203, "grad_norm": 0.8731516323984473, "learning_rate": 6.475460571334047e-06, "loss": 0.3687, "step": 6745 }, { "epoch": 0.4225559435631626, "grad_norm": 0.8107747696149792, "learning_rate": 6.474491314695422e-06, "loss": 0.4059, "step": 6746 }, { "epoch": 0.42261858156250487, "grad_norm": 0.7231021466430687, "learning_rate": 6.473521997366459e-06, "loss": 0.4869, "step": 6747 }, { "epoch": 0.4226812195618472, "grad_norm": 0.8297975642858885, "learning_rate": 6.472552619387059e-06, "loss": 0.4022, "step": 6748 }, { "epoch": 0.4227438575611895, "grad_norm": 0.7835069251809557, "learning_rate": 6.471583180797121e-06, "loss": 0.3678, "step": 6749 }, { "epoch": 0.4228064955605318, "grad_norm": 0.8374668119651049, "learning_rate": 6.470613681636544e-06, "loss": 0.3818, "step": 6750 }, { "epoch": 0.4228691335598741, "grad_norm": 0.8982962035171322, "learning_rate": 6.469644121945235e-06, "loss": 0.4542, "step": 6751 }, { "epoch": 0.4229317715592164, "grad_norm": 0.8522534909264561, "learning_rate": 6.4686745017631014e-06, "loss": 0.4203, "step": 6752 }, { "epoch": 0.4229944095585587, "grad_norm": 0.8347608017265021, "learning_rate": 6.467704821130054e-06, "loss": 0.4105, "step": 6753 }, { "epoch": 0.423057047557901, "grad_norm": 0.8457171473115924, "learning_rate": 6.466735080086004e-06, "loss": 0.42, "step": 6754 }, { "epoch": 0.4231196855572433, "grad_norm": 0.8106574648533246, "learning_rate": 6.465765278670863e-06, "loss": 0.3758, "step": 6755 }, { "epoch": 0.4231823235565856, "grad_norm": 0.8757338958232477, "learning_rate": 6.464795416924553e-06, "loss": 0.4504, "step": 6756 }, { "epoch": 0.4232449615559279, "grad_norm": 0.843722748386492, "learning_rate": 6.463825494886992e-06, "loss": 0.4287, "step": 6757 }, { "epoch": 0.4233075995552702, "grad_norm": 0.838555189905878, "learning_rate": 6.462855512598102e-06, "loss": 0.4453, "step": 6758 }, { "epoch": 0.4233702375546125, "grad_norm": 0.8078690938975001, "learning_rate": 6.461885470097806e-06, "loss": 0.3805, "step": 6759 }, { "epoch": 0.4234328755539548, "grad_norm": 0.7912267406909008, "learning_rate": 6.460915367426034e-06, "loss": 0.3726, "step": 6760 }, { "epoch": 0.4234955135532971, "grad_norm": 0.8163401788937114, "learning_rate": 6.459945204622714e-06, "loss": 0.4325, "step": 6761 }, { "epoch": 0.42355815155263943, "grad_norm": 0.8017982749291754, "learning_rate": 6.458974981727779e-06, "loss": 0.4052, "step": 6762 }, { "epoch": 0.42362078955198174, "grad_norm": 0.8487116970709371, "learning_rate": 6.4580046987811615e-06, "loss": 0.4058, "step": 6763 }, { "epoch": 0.423683427551324, "grad_norm": 0.8231096543686165, "learning_rate": 6.457034355822802e-06, "loss": 0.4066, "step": 6764 }, { "epoch": 0.4237460655506663, "grad_norm": 0.7688286095399681, "learning_rate": 6.456063952892636e-06, "loss": 0.3838, "step": 6765 }, { "epoch": 0.4238087035500086, "grad_norm": 0.7890743721591518, "learning_rate": 6.455093490030609e-06, "loss": 0.4213, "step": 6766 }, { "epoch": 0.4238713415493509, "grad_norm": 0.7836332087538952, "learning_rate": 6.4541229672766636e-06, "loss": 0.3874, "step": 6767 }, { "epoch": 0.4239339795486932, "grad_norm": 0.7878090818476671, "learning_rate": 6.453152384670746e-06, "loss": 0.3887, "step": 6768 }, { "epoch": 0.4239966175480355, "grad_norm": 0.8115361757151384, "learning_rate": 6.452181742252808e-06, "loss": 0.4091, "step": 6769 }, { "epoch": 0.42405925554737783, "grad_norm": 0.8550250842418406, "learning_rate": 6.451211040062798e-06, "loss": 0.4143, "step": 6770 }, { "epoch": 0.42412189354672014, "grad_norm": 0.7435806177576935, "learning_rate": 6.450240278140674e-06, "loss": 0.3618, "step": 6771 }, { "epoch": 0.4241845315460624, "grad_norm": 0.8613752524881251, "learning_rate": 6.44926945652639e-06, "loss": 0.4216, "step": 6772 }, { "epoch": 0.4242471695454047, "grad_norm": 0.7985722185085116, "learning_rate": 6.448298575259907e-06, "loss": 0.4121, "step": 6773 }, { "epoch": 0.424309807544747, "grad_norm": 0.7390829512933778, "learning_rate": 6.4473276343811845e-06, "loss": 0.3495, "step": 6774 }, { "epoch": 0.4243724455440893, "grad_norm": 0.8317539299913498, "learning_rate": 6.446356633930188e-06, "loss": 0.4296, "step": 6775 }, { "epoch": 0.4244350835434316, "grad_norm": 0.8599628233863941, "learning_rate": 6.445385573946885e-06, "loss": 0.4168, "step": 6776 }, { "epoch": 0.42449772154277393, "grad_norm": 0.8455315724551722, "learning_rate": 6.444414454471244e-06, "loss": 0.4025, "step": 6777 }, { "epoch": 0.42456035954211624, "grad_norm": 0.8169096136367194, "learning_rate": 6.443443275543236e-06, "loss": 0.4052, "step": 6778 }, { "epoch": 0.42462299754145855, "grad_norm": 0.855111293577577, "learning_rate": 6.442472037202833e-06, "loss": 0.3614, "step": 6779 }, { "epoch": 0.42468563554080085, "grad_norm": 0.7575838337641814, "learning_rate": 6.441500739490014e-06, "loss": 0.3723, "step": 6780 }, { "epoch": 0.4247482735401431, "grad_norm": 0.879471996012059, "learning_rate": 6.440529382444758e-06, "loss": 0.4198, "step": 6781 }, { "epoch": 0.4248109115394854, "grad_norm": 0.789957786864866, "learning_rate": 6.439557966107044e-06, "loss": 0.4231, "step": 6782 }, { "epoch": 0.4248735495388277, "grad_norm": 0.7662323265558875, "learning_rate": 6.438586490516858e-06, "loss": 0.3978, "step": 6783 }, { "epoch": 0.42493618753817003, "grad_norm": 0.7935263431252171, "learning_rate": 6.4376149557141844e-06, "loss": 0.393, "step": 6784 }, { "epoch": 0.42499882553751234, "grad_norm": 0.7464332107523234, "learning_rate": 6.436643361739011e-06, "loss": 0.3976, "step": 6785 }, { "epoch": 0.42506146353685464, "grad_norm": 0.8219186916643447, "learning_rate": 6.435671708631332e-06, "loss": 0.4607, "step": 6786 }, { "epoch": 0.42512410153619695, "grad_norm": 0.8144169119790213, "learning_rate": 6.43469999643114e-06, "loss": 0.4062, "step": 6787 }, { "epoch": 0.42518673953553926, "grad_norm": 0.7349647213071364, "learning_rate": 6.433728225178428e-06, "loss": 0.489, "step": 6788 }, { "epoch": 0.4252493775348815, "grad_norm": 0.8527304023171687, "learning_rate": 6.432756394913195e-06, "loss": 0.4042, "step": 6789 }, { "epoch": 0.4253120155342238, "grad_norm": 0.8085449331150052, "learning_rate": 6.431784505675446e-06, "loss": 0.4047, "step": 6790 }, { "epoch": 0.4253746535335661, "grad_norm": 0.7527586308085201, "learning_rate": 6.43081255750518e-06, "loss": 0.3959, "step": 6791 }, { "epoch": 0.42543729153290843, "grad_norm": 0.8160378935602867, "learning_rate": 6.429840550442403e-06, "loss": 0.3798, "step": 6792 }, { "epoch": 0.42549992953225074, "grad_norm": 0.7718486788861412, "learning_rate": 6.428868484527123e-06, "loss": 0.3906, "step": 6793 }, { "epoch": 0.42556256753159305, "grad_norm": 0.8382602636118647, "learning_rate": 6.427896359799351e-06, "loss": 0.4299, "step": 6794 }, { "epoch": 0.42562520553093536, "grad_norm": 0.8044154313444061, "learning_rate": 6.426924176299101e-06, "loss": 0.3979, "step": 6795 }, { "epoch": 0.42568784353027767, "grad_norm": 0.7854621520653662, "learning_rate": 6.4259519340663865e-06, "loss": 0.4127, "step": 6796 }, { "epoch": 0.4257504815296199, "grad_norm": 0.869275173847781, "learning_rate": 6.424979633141225e-06, "loss": 0.394, "step": 6797 }, { "epoch": 0.4258131195289622, "grad_norm": 0.8183653395313533, "learning_rate": 6.424007273563637e-06, "loss": 0.4151, "step": 6798 }, { "epoch": 0.42587575752830453, "grad_norm": 0.7972980278288899, "learning_rate": 6.423034855373647e-06, "loss": 0.3739, "step": 6799 }, { "epoch": 0.42593839552764684, "grad_norm": 0.8039696908948583, "learning_rate": 6.422062378611278e-06, "loss": 0.4406, "step": 6800 }, { "epoch": 0.42600103352698915, "grad_norm": 0.7010323482420423, "learning_rate": 6.421089843316557e-06, "loss": 0.4665, "step": 6801 }, { "epoch": 0.42606367152633146, "grad_norm": 0.8207522844579148, "learning_rate": 6.420117249529514e-06, "loss": 0.4146, "step": 6802 }, { "epoch": 0.42612630952567376, "grad_norm": 0.8641371564308263, "learning_rate": 6.419144597290181e-06, "loss": 0.4091, "step": 6803 }, { "epoch": 0.42618894752501607, "grad_norm": 0.8134636550514323, "learning_rate": 6.418171886638595e-06, "loss": 0.3875, "step": 6804 }, { "epoch": 0.4262515855243584, "grad_norm": 0.8251512145729096, "learning_rate": 6.4171991176147895e-06, "loss": 0.4301, "step": 6805 }, { "epoch": 0.42631422352370063, "grad_norm": 0.815080124374866, "learning_rate": 6.416226290258805e-06, "loss": 0.3795, "step": 6806 }, { "epoch": 0.42637686152304294, "grad_norm": 0.8388019076304123, "learning_rate": 6.415253404610683e-06, "loss": 0.4487, "step": 6807 }, { "epoch": 0.42643949952238525, "grad_norm": 0.872520275656021, "learning_rate": 6.414280460710469e-06, "loss": 0.4093, "step": 6808 }, { "epoch": 0.42650213752172755, "grad_norm": 0.6604785758176509, "learning_rate": 6.41330745859821e-06, "loss": 0.4764, "step": 6809 }, { "epoch": 0.42656477552106986, "grad_norm": 0.8160738137890386, "learning_rate": 6.412334398313952e-06, "loss": 0.4083, "step": 6810 }, { "epoch": 0.42662741352041217, "grad_norm": 0.8353375350483991, "learning_rate": 6.411361279897747e-06, "loss": 0.4181, "step": 6811 }, { "epoch": 0.4266900515197545, "grad_norm": 0.8996932460682153, "learning_rate": 6.410388103389653e-06, "loss": 0.4194, "step": 6812 }, { "epoch": 0.4267526895190968, "grad_norm": 0.8203789263701763, "learning_rate": 6.409414868829722e-06, "loss": 0.3765, "step": 6813 }, { "epoch": 0.42681532751843904, "grad_norm": 0.816674296693075, "learning_rate": 6.408441576258012e-06, "loss": 0.3906, "step": 6814 }, { "epoch": 0.42687796551778134, "grad_norm": 0.8523894563760478, "learning_rate": 6.407468225714586e-06, "loss": 0.4227, "step": 6815 }, { "epoch": 0.42694060351712365, "grad_norm": 0.8175845818865566, "learning_rate": 6.406494817239506e-06, "loss": 0.3639, "step": 6816 }, { "epoch": 0.42700324151646596, "grad_norm": 0.8427684462395966, "learning_rate": 6.405521350872838e-06, "loss": 0.4144, "step": 6817 }, { "epoch": 0.42706587951580827, "grad_norm": 0.6699215677434645, "learning_rate": 6.404547826654652e-06, "loss": 0.4933, "step": 6818 }, { "epoch": 0.4271285175151506, "grad_norm": 0.7420171122088257, "learning_rate": 6.403574244625016e-06, "loss": 0.3638, "step": 6819 }, { "epoch": 0.4271911555144929, "grad_norm": 0.7990998201458179, "learning_rate": 6.402600604824003e-06, "loss": 0.3936, "step": 6820 }, { "epoch": 0.4272537935138352, "grad_norm": 0.7781053167019569, "learning_rate": 6.4016269072916895e-06, "loss": 0.3797, "step": 6821 }, { "epoch": 0.42731643151317744, "grad_norm": 0.8778116251007051, "learning_rate": 6.400653152068152e-06, "loss": 0.4591, "step": 6822 }, { "epoch": 0.42737906951251975, "grad_norm": 0.8429921511546111, "learning_rate": 6.3996793391934695e-06, "loss": 0.3936, "step": 6823 }, { "epoch": 0.42744170751186206, "grad_norm": 0.8686375561381724, "learning_rate": 6.398705468707728e-06, "loss": 0.4717, "step": 6824 }, { "epoch": 0.42750434551120436, "grad_norm": 0.8203556898543629, "learning_rate": 6.397731540651009e-06, "loss": 0.4059, "step": 6825 }, { "epoch": 0.42756698351054667, "grad_norm": 0.6893725244594692, "learning_rate": 6.3967575550634e-06, "loss": 0.4782, "step": 6826 }, { "epoch": 0.427629621509889, "grad_norm": 0.8617134697540916, "learning_rate": 6.395783511984991e-06, "loss": 0.4253, "step": 6827 }, { "epoch": 0.4276922595092313, "grad_norm": 0.7042699991482769, "learning_rate": 6.394809411455874e-06, "loss": 0.4741, "step": 6828 }, { "epoch": 0.4277548975085736, "grad_norm": 0.8124720210867789, "learning_rate": 6.393835253516142e-06, "loss": 0.4072, "step": 6829 }, { "epoch": 0.4278175355079159, "grad_norm": 0.8010614481459831, "learning_rate": 6.3928610382058906e-06, "loss": 0.3837, "step": 6830 }, { "epoch": 0.42788017350725815, "grad_norm": 0.7996460790754579, "learning_rate": 6.3918867655652224e-06, "loss": 0.4096, "step": 6831 }, { "epoch": 0.42794281150660046, "grad_norm": 0.7510575404543337, "learning_rate": 6.390912435634237e-06, "loss": 0.3774, "step": 6832 }, { "epoch": 0.42800544950594277, "grad_norm": 0.839891465272156, "learning_rate": 6.389938048453037e-06, "loss": 0.4052, "step": 6833 }, { "epoch": 0.4280680875052851, "grad_norm": 0.8043414372359128, "learning_rate": 6.388963604061729e-06, "loss": 0.3911, "step": 6834 }, { "epoch": 0.4281307255046274, "grad_norm": 0.8552553068650348, "learning_rate": 6.3879891025004204e-06, "loss": 0.4441, "step": 6835 }, { "epoch": 0.4281933635039697, "grad_norm": 0.7725017109750117, "learning_rate": 6.387014543809224e-06, "loss": 0.367, "step": 6836 }, { "epoch": 0.428256001503312, "grad_norm": 0.7396060360829241, "learning_rate": 6.386039928028251e-06, "loss": 0.4051, "step": 6837 }, { "epoch": 0.4283186395026543, "grad_norm": 0.8063519202546503, "learning_rate": 6.385065255197618e-06, "loss": 0.4174, "step": 6838 }, { "epoch": 0.42838127750199656, "grad_norm": 0.8094947426616946, "learning_rate": 6.38409052535744e-06, "loss": 0.398, "step": 6839 }, { "epoch": 0.42844391550133887, "grad_norm": 0.7984855414116593, "learning_rate": 6.383115738547841e-06, "loss": 0.3854, "step": 6840 }, { "epoch": 0.4285065535006812, "grad_norm": 0.8372834641642993, "learning_rate": 6.38214089480894e-06, "loss": 0.3832, "step": 6841 }, { "epoch": 0.4285691915000235, "grad_norm": 0.6452798753199825, "learning_rate": 6.3811659941808645e-06, "loss": 0.488, "step": 6842 }, { "epoch": 0.4286318294993658, "grad_norm": 0.821579383853831, "learning_rate": 6.38019103670374e-06, "loss": 0.4088, "step": 6843 }, { "epoch": 0.4286944674987081, "grad_norm": 0.821364010979275, "learning_rate": 6.379216022417695e-06, "loss": 0.3914, "step": 6844 }, { "epoch": 0.4287571054980504, "grad_norm": 0.8131595881035587, "learning_rate": 6.378240951362864e-06, "loss": 0.3853, "step": 6845 }, { "epoch": 0.4288197434973927, "grad_norm": 0.8261036295654737, "learning_rate": 6.377265823579379e-06, "loss": 0.471, "step": 6846 }, { "epoch": 0.428882381496735, "grad_norm": 0.8619126360178772, "learning_rate": 6.376290639107379e-06, "loss": 0.4221, "step": 6847 }, { "epoch": 0.4289450194960773, "grad_norm": 0.8220690470384581, "learning_rate": 6.375315397986999e-06, "loss": 0.402, "step": 6848 }, { "epoch": 0.4290076574954196, "grad_norm": 0.7934688647799508, "learning_rate": 6.37434010025838e-06, "loss": 0.3998, "step": 6849 }, { "epoch": 0.4290702954947619, "grad_norm": 0.6111558884699579, "learning_rate": 6.373364745961671e-06, "loss": 0.4647, "step": 6850 }, { "epoch": 0.4291329334941042, "grad_norm": 0.8017783054392705, "learning_rate": 6.372389335137013e-06, "loss": 0.4106, "step": 6851 }, { "epoch": 0.4291955714934465, "grad_norm": 0.8100912981134643, "learning_rate": 6.371413867824555e-06, "loss": 0.4496, "step": 6852 }, { "epoch": 0.4292582094927888, "grad_norm": 0.8136127842754445, "learning_rate": 6.3704383440644476e-06, "loss": 0.4267, "step": 6853 }, { "epoch": 0.4293208474921311, "grad_norm": 0.7930948597540477, "learning_rate": 6.369462763896843e-06, "loss": 0.3993, "step": 6854 }, { "epoch": 0.4293834854914734, "grad_norm": 0.7986654288650543, "learning_rate": 6.368487127361898e-06, "loss": 0.3865, "step": 6855 }, { "epoch": 0.4294461234908157, "grad_norm": 0.7725959414791697, "learning_rate": 6.367511434499769e-06, "loss": 0.3977, "step": 6856 }, { "epoch": 0.429508761490158, "grad_norm": 0.8091137140392908, "learning_rate": 6.366535685350615e-06, "loss": 0.4037, "step": 6857 }, { "epoch": 0.4295713994895003, "grad_norm": 1.0006894929353856, "learning_rate": 6.365559879954597e-06, "loss": 0.4427, "step": 6858 }, { "epoch": 0.4296340374888426, "grad_norm": 0.8034323109692486, "learning_rate": 6.364584018351882e-06, "loss": 0.4457, "step": 6859 }, { "epoch": 0.4296966754881849, "grad_norm": 0.8365934054471423, "learning_rate": 6.363608100582636e-06, "loss": 0.4023, "step": 6860 }, { "epoch": 0.4297593134875272, "grad_norm": 0.7970076080302468, "learning_rate": 6.362632126687027e-06, "loss": 0.3708, "step": 6861 }, { "epoch": 0.4298219514868695, "grad_norm": 0.8502454814481574, "learning_rate": 6.3616560967052265e-06, "loss": 0.4471, "step": 6862 }, { "epoch": 0.42988458948621183, "grad_norm": 0.6164643886013668, "learning_rate": 6.360680010677409e-06, "loss": 0.486, "step": 6863 }, { "epoch": 0.4299472274855541, "grad_norm": 0.8867837564842136, "learning_rate": 6.35970386864375e-06, "loss": 0.4629, "step": 6864 }, { "epoch": 0.4300098654848964, "grad_norm": 0.8061588079498566, "learning_rate": 6.358727670644428e-06, "loss": 0.4263, "step": 6865 }, { "epoch": 0.4300725034842387, "grad_norm": 0.8620675415569141, "learning_rate": 6.357751416719621e-06, "loss": 0.4385, "step": 6866 }, { "epoch": 0.430135141483581, "grad_norm": 0.9180294247158842, "learning_rate": 6.356775106909514e-06, "loss": 0.3896, "step": 6867 }, { "epoch": 0.4301977794829233, "grad_norm": 0.79562525910238, "learning_rate": 6.355798741254292e-06, "loss": 0.3909, "step": 6868 }, { "epoch": 0.4302604174822656, "grad_norm": 0.8242046816411843, "learning_rate": 6.354822319794142e-06, "loss": 0.3845, "step": 6869 }, { "epoch": 0.43032305548160793, "grad_norm": 0.8787025807221915, "learning_rate": 6.353845842569255e-06, "loss": 0.4509, "step": 6870 }, { "epoch": 0.43038569348095024, "grad_norm": 0.8770793165508883, "learning_rate": 6.35286930961982e-06, "loss": 0.4058, "step": 6871 }, { "epoch": 0.43044833148029255, "grad_norm": 0.6222329904513053, "learning_rate": 6.351892720986034e-06, "loss": 0.4626, "step": 6872 }, { "epoch": 0.4305109694796348, "grad_norm": 0.9067285289361099, "learning_rate": 6.350916076708095e-06, "loss": 0.4383, "step": 6873 }, { "epoch": 0.4305736074789771, "grad_norm": 0.823867226843081, "learning_rate": 6.3499393768261964e-06, "loss": 0.3873, "step": 6874 }, { "epoch": 0.4306362454783194, "grad_norm": 0.826824670249191, "learning_rate": 6.3489626213805435e-06, "loss": 0.4464, "step": 6875 }, { "epoch": 0.4306988834776617, "grad_norm": 0.7843957546580942, "learning_rate": 6.347985810411339e-06, "loss": 0.3806, "step": 6876 }, { "epoch": 0.43076152147700403, "grad_norm": 0.7983227142607546, "learning_rate": 6.347008943958788e-06, "loss": 0.4008, "step": 6877 }, { "epoch": 0.43082415947634634, "grad_norm": 0.8079203713460428, "learning_rate": 6.3460320220631e-06, "loss": 0.3831, "step": 6878 }, { "epoch": 0.43088679747568864, "grad_norm": 0.7894108824694991, "learning_rate": 6.345055044764482e-06, "loss": 0.3788, "step": 6879 }, { "epoch": 0.43094943547503095, "grad_norm": 0.807843020615278, "learning_rate": 6.344078012103149e-06, "loss": 0.412, "step": 6880 }, { "epoch": 0.4310120734743732, "grad_norm": 0.8154127489444869, "learning_rate": 6.343100924119316e-06, "loss": 0.4271, "step": 6881 }, { "epoch": 0.4310747114737155, "grad_norm": 0.8191563304214262, "learning_rate": 6.342123780853201e-06, "loss": 0.3946, "step": 6882 }, { "epoch": 0.4311373494730578, "grad_norm": 0.7899228225966077, "learning_rate": 6.341146582345021e-06, "loss": 0.4151, "step": 6883 }, { "epoch": 0.4311999874724001, "grad_norm": 0.807639521576959, "learning_rate": 6.340169328635e-06, "loss": 0.3852, "step": 6884 }, { "epoch": 0.43126262547174243, "grad_norm": 0.7762864068279623, "learning_rate": 6.339192019763358e-06, "loss": 0.3726, "step": 6885 }, { "epoch": 0.43132526347108474, "grad_norm": 0.8092873938417777, "learning_rate": 6.3382146557703265e-06, "loss": 0.4005, "step": 6886 }, { "epoch": 0.43138790147042705, "grad_norm": 0.7730750882073855, "learning_rate": 6.337237236696129e-06, "loss": 0.3919, "step": 6887 }, { "epoch": 0.43145053946976936, "grad_norm": 0.7996718953222396, "learning_rate": 6.336259762581e-06, "loss": 0.4222, "step": 6888 }, { "epoch": 0.43151317746911166, "grad_norm": 0.8210281093750281, "learning_rate": 6.3352822334651706e-06, "loss": 0.3862, "step": 6889 }, { "epoch": 0.4315758154684539, "grad_norm": 0.8488993816574733, "learning_rate": 6.334304649388879e-06, "loss": 0.4025, "step": 6890 }, { "epoch": 0.4316384534677962, "grad_norm": 0.807758424143934, "learning_rate": 6.333327010392359e-06, "loss": 0.4327, "step": 6891 }, { "epoch": 0.43170109146713853, "grad_norm": 0.8309698540736472, "learning_rate": 6.332349316515849e-06, "loss": 0.4215, "step": 6892 }, { "epoch": 0.43176372946648084, "grad_norm": 0.8897186307960483, "learning_rate": 6.3313715677995965e-06, "loss": 0.4214, "step": 6893 }, { "epoch": 0.43182636746582315, "grad_norm": 0.7766204125373786, "learning_rate": 6.330393764283842e-06, "loss": 0.402, "step": 6894 }, { "epoch": 0.43188900546516545, "grad_norm": 0.8859295441233831, "learning_rate": 6.329415906008835e-06, "loss": 0.4148, "step": 6895 }, { "epoch": 0.43195164346450776, "grad_norm": 0.8153603185752898, "learning_rate": 6.328437993014821e-06, "loss": 0.4015, "step": 6896 }, { "epoch": 0.43201428146385007, "grad_norm": 0.8862162777959827, "learning_rate": 6.327460025342054e-06, "loss": 0.4163, "step": 6897 }, { "epoch": 0.4320769194631923, "grad_norm": 0.8074215575482968, "learning_rate": 6.326482003030785e-06, "loss": 0.4448, "step": 6898 }, { "epoch": 0.43213955746253463, "grad_norm": 0.8125179713342575, "learning_rate": 6.3255039261212704e-06, "loss": 0.3622, "step": 6899 }, { "epoch": 0.43220219546187694, "grad_norm": 0.8645627364817535, "learning_rate": 6.3245257946537675e-06, "loss": 0.4287, "step": 6900 }, { "epoch": 0.43226483346121924, "grad_norm": 0.7895499991826834, "learning_rate": 6.323547608668539e-06, "loss": 0.3676, "step": 6901 }, { "epoch": 0.43232747146056155, "grad_norm": 0.8114698551545295, "learning_rate": 6.322569368205844e-06, "loss": 0.3978, "step": 6902 }, { "epoch": 0.43239010945990386, "grad_norm": 0.8612498104368931, "learning_rate": 6.321591073305948e-06, "loss": 0.4417, "step": 6903 }, { "epoch": 0.43245274745924617, "grad_norm": 0.8199571313294881, "learning_rate": 6.32061272400912e-06, "loss": 0.4072, "step": 6904 }, { "epoch": 0.4325153854585885, "grad_norm": 0.8329089235025339, "learning_rate": 6.3196343203556255e-06, "loss": 0.4355, "step": 6905 }, { "epoch": 0.4325780234579307, "grad_norm": 0.8350092123973486, "learning_rate": 6.3186558623857375e-06, "loss": 0.4214, "step": 6906 }, { "epoch": 0.43264066145727303, "grad_norm": 0.8884874175540013, "learning_rate": 6.317677350139731e-06, "loss": 0.4297, "step": 6907 }, { "epoch": 0.43270329945661534, "grad_norm": 0.8664476039054911, "learning_rate": 6.316698783657877e-06, "loss": 0.4313, "step": 6908 }, { "epoch": 0.43276593745595765, "grad_norm": 0.8171078770190862, "learning_rate": 6.315720162980459e-06, "loss": 0.4174, "step": 6909 }, { "epoch": 0.43282857545529996, "grad_norm": 0.8284317232936551, "learning_rate": 6.314741488147755e-06, "loss": 0.4311, "step": 6910 }, { "epoch": 0.43289121345464227, "grad_norm": 0.6883341999411612, "learning_rate": 6.313762759200047e-06, "loss": 0.4734, "step": 6911 }, { "epoch": 0.4329538514539846, "grad_norm": 0.6394260903554199, "learning_rate": 6.312783976177619e-06, "loss": 0.4676, "step": 6912 }, { "epoch": 0.4330164894533269, "grad_norm": 0.8467493908634026, "learning_rate": 6.311805139120759e-06, "loss": 0.409, "step": 6913 }, { "epoch": 0.4330791274526692, "grad_norm": 0.918911263881806, "learning_rate": 6.310826248069757e-06, "loss": 0.4173, "step": 6914 }, { "epoch": 0.43314176545201144, "grad_norm": 0.6608569512664294, "learning_rate": 6.309847303064904e-06, "loss": 0.4875, "step": 6915 }, { "epoch": 0.43320440345135375, "grad_norm": 0.8249450264759965, "learning_rate": 6.308868304146493e-06, "loss": 0.4188, "step": 6916 }, { "epoch": 0.43326704145069606, "grad_norm": 0.6296622598407157, "learning_rate": 6.307889251354817e-06, "loss": 0.4466, "step": 6917 }, { "epoch": 0.43332967945003836, "grad_norm": 0.7843353322775819, "learning_rate": 6.306910144730178e-06, "loss": 0.4363, "step": 6918 }, { "epoch": 0.43339231744938067, "grad_norm": 0.8732512478339876, "learning_rate": 6.305930984312876e-06, "loss": 0.4344, "step": 6919 }, { "epoch": 0.433454955448723, "grad_norm": 0.8299659476816825, "learning_rate": 6.304951770143212e-06, "loss": 0.4511, "step": 6920 }, { "epoch": 0.4335175934480653, "grad_norm": 0.8547374292388198, "learning_rate": 6.30397250226149e-06, "loss": 0.4332, "step": 6921 }, { "epoch": 0.4335802314474076, "grad_norm": 0.8832655479612412, "learning_rate": 6.302993180708017e-06, "loss": 0.4182, "step": 6922 }, { "epoch": 0.43364286944674985, "grad_norm": 0.8426994845835769, "learning_rate": 6.302013805523104e-06, "loss": 0.3841, "step": 6923 }, { "epoch": 0.43370550744609215, "grad_norm": 0.6691197317074452, "learning_rate": 6.3010343767470615e-06, "loss": 0.4734, "step": 6924 }, { "epoch": 0.43376814544543446, "grad_norm": 0.7561611679208823, "learning_rate": 6.300054894420202e-06, "loss": 0.3934, "step": 6925 }, { "epoch": 0.43383078344477677, "grad_norm": 0.8432805015653163, "learning_rate": 6.29907535858284e-06, "loss": 0.4428, "step": 6926 }, { "epoch": 0.4338934214441191, "grad_norm": 0.7952441438238136, "learning_rate": 6.2980957692752985e-06, "loss": 0.3848, "step": 6927 }, { "epoch": 0.4339560594434614, "grad_norm": 0.7716769868661375, "learning_rate": 6.297116126537892e-06, "loss": 0.4253, "step": 6928 }, { "epoch": 0.4340186974428037, "grad_norm": 0.7986796509894714, "learning_rate": 6.2961364304109465e-06, "loss": 0.4442, "step": 6929 }, { "epoch": 0.434081335442146, "grad_norm": 0.8778382384223775, "learning_rate": 6.295156680934785e-06, "loss": 0.4374, "step": 6930 }, { "epoch": 0.43414397344148825, "grad_norm": 0.8519406376329371, "learning_rate": 6.294176878149732e-06, "loss": 0.4117, "step": 6931 }, { "epoch": 0.43420661144083056, "grad_norm": 0.7824898858398567, "learning_rate": 6.29319702209612e-06, "loss": 0.4269, "step": 6932 }, { "epoch": 0.43426924944017287, "grad_norm": 0.8432887428184046, "learning_rate": 6.292217112814278e-06, "loss": 0.4567, "step": 6933 }, { "epoch": 0.4343318874395152, "grad_norm": 0.8684506327662309, "learning_rate": 6.29123715034454e-06, "loss": 0.4006, "step": 6934 }, { "epoch": 0.4343945254388575, "grad_norm": 0.7846403166625968, "learning_rate": 6.29025713472724e-06, "loss": 0.3989, "step": 6935 }, { "epoch": 0.4344571634381998, "grad_norm": 0.8284227794559181, "learning_rate": 6.2892770660027195e-06, "loss": 0.433, "step": 6936 }, { "epoch": 0.4345198014375421, "grad_norm": 0.803561402667832, "learning_rate": 6.288296944211314e-06, "loss": 0.3867, "step": 6937 }, { "epoch": 0.4345824394368844, "grad_norm": 0.8260260787516739, "learning_rate": 6.287316769393368e-06, "loss": 0.3993, "step": 6938 }, { "epoch": 0.4346450774362267, "grad_norm": 0.7917586376613079, "learning_rate": 6.286336541589224e-06, "loss": 0.3898, "step": 6939 }, { "epoch": 0.43470771543556896, "grad_norm": 0.7938304398882511, "learning_rate": 6.2853562608392285e-06, "loss": 0.3877, "step": 6940 }, { "epoch": 0.43477035343491127, "grad_norm": 0.8058927845736918, "learning_rate": 6.284375927183732e-06, "loss": 0.4014, "step": 6941 }, { "epoch": 0.4348329914342536, "grad_norm": 0.9042809960182784, "learning_rate": 6.283395540663084e-06, "loss": 0.4275, "step": 6942 }, { "epoch": 0.4348956294335959, "grad_norm": 0.8120417589560422, "learning_rate": 6.282415101317637e-06, "loss": 0.3809, "step": 6943 }, { "epoch": 0.4349582674329382, "grad_norm": 0.8340324645112046, "learning_rate": 6.281434609187747e-06, "loss": 0.4297, "step": 6944 }, { "epoch": 0.4350209054322805, "grad_norm": 0.8020535525952452, "learning_rate": 6.2804540643137695e-06, "loss": 0.4264, "step": 6945 }, { "epoch": 0.4350835434316228, "grad_norm": 0.8501913336186879, "learning_rate": 6.2794734667360656e-06, "loss": 0.4201, "step": 6946 }, { "epoch": 0.4351461814309651, "grad_norm": 0.846692004774439, "learning_rate": 6.278492816494995e-06, "loss": 0.4368, "step": 6947 }, { "epoch": 0.43520881943030737, "grad_norm": 0.8112919494609505, "learning_rate": 6.277512113630926e-06, "loss": 0.3915, "step": 6948 }, { "epoch": 0.4352714574296497, "grad_norm": 0.7959256653546355, "learning_rate": 6.276531358184218e-06, "loss": 0.3964, "step": 6949 }, { "epoch": 0.435334095428992, "grad_norm": 0.8202473052270659, "learning_rate": 6.275550550195244e-06, "loss": 0.4123, "step": 6950 }, { "epoch": 0.4353967334283343, "grad_norm": 0.8980415437413534, "learning_rate": 6.274569689704373e-06, "loss": 0.4471, "step": 6951 }, { "epoch": 0.4354593714276766, "grad_norm": 0.8302792049004983, "learning_rate": 6.273588776751975e-06, "loss": 0.4345, "step": 6952 }, { "epoch": 0.4355220094270189, "grad_norm": 0.8365463436189223, "learning_rate": 6.2726078113784284e-06, "loss": 0.3983, "step": 6953 }, { "epoch": 0.4355846474263612, "grad_norm": 0.7340888757020818, "learning_rate": 6.271626793624109e-06, "loss": 0.3648, "step": 6954 }, { "epoch": 0.4356472854257035, "grad_norm": 0.8283953968955111, "learning_rate": 6.270645723529392e-06, "loss": 0.44, "step": 6955 }, { "epoch": 0.43570992342504583, "grad_norm": 0.8713009131873362, "learning_rate": 6.269664601134661e-06, "loss": 0.4305, "step": 6956 }, { "epoch": 0.4357725614243881, "grad_norm": 0.832871528609332, "learning_rate": 6.268683426480302e-06, "loss": 0.4386, "step": 6957 }, { "epoch": 0.4358351994237304, "grad_norm": 0.7994330509045782, "learning_rate": 6.267702199606697e-06, "loss": 0.4523, "step": 6958 }, { "epoch": 0.4358978374230727, "grad_norm": 0.7877657331228953, "learning_rate": 6.266720920554233e-06, "loss": 0.4139, "step": 6959 }, { "epoch": 0.435960475422415, "grad_norm": 0.799533192999623, "learning_rate": 6.265739589363301e-06, "loss": 0.3951, "step": 6960 }, { "epoch": 0.4360231134217573, "grad_norm": 0.825096108890505, "learning_rate": 6.264758206074294e-06, "loss": 0.4302, "step": 6961 }, { "epoch": 0.4360857514210996, "grad_norm": 0.8473080845821608, "learning_rate": 6.263776770727604e-06, "loss": 0.4238, "step": 6962 }, { "epoch": 0.43614838942044193, "grad_norm": 0.8782002921307371, "learning_rate": 6.2627952833636265e-06, "loss": 0.4138, "step": 6963 }, { "epoch": 0.43621102741978424, "grad_norm": 0.6465523935638365, "learning_rate": 6.2618137440227596e-06, "loss": 0.4434, "step": 6964 }, { "epoch": 0.4362736654191265, "grad_norm": 0.7642540240802848, "learning_rate": 6.2608321527454085e-06, "loss": 0.3892, "step": 6965 }, { "epoch": 0.4363363034184688, "grad_norm": 0.8070645748189339, "learning_rate": 6.25985050957197e-06, "loss": 0.4008, "step": 6966 }, { "epoch": 0.4363989414178111, "grad_norm": 0.8553204884324167, "learning_rate": 6.258868814542851e-06, "loss": 0.4088, "step": 6967 }, { "epoch": 0.4364615794171534, "grad_norm": 0.7567228773731739, "learning_rate": 6.257887067698457e-06, "loss": 0.3953, "step": 6968 }, { "epoch": 0.4365242174164957, "grad_norm": 0.7369655284802369, "learning_rate": 6.256905269079197e-06, "loss": 0.4071, "step": 6969 }, { "epoch": 0.436586855415838, "grad_norm": 0.8592335998322596, "learning_rate": 6.255923418725486e-06, "loss": 0.4127, "step": 6970 }, { "epoch": 0.43664949341518033, "grad_norm": 0.8159754637055912, "learning_rate": 6.2549415166777325e-06, "loss": 0.3888, "step": 6971 }, { "epoch": 0.43671213141452264, "grad_norm": 0.7041956269694497, "learning_rate": 6.253959562976353e-06, "loss": 0.4492, "step": 6972 }, { "epoch": 0.4367747694138649, "grad_norm": 0.7959781547224999, "learning_rate": 6.252977557661764e-06, "loss": 0.384, "step": 6973 }, { "epoch": 0.4368374074132072, "grad_norm": 0.8178178305160714, "learning_rate": 6.251995500774388e-06, "loss": 0.4035, "step": 6974 }, { "epoch": 0.4369000454125495, "grad_norm": 0.8002605694834637, "learning_rate": 6.2510133923546445e-06, "loss": 0.4113, "step": 6975 }, { "epoch": 0.4369626834118918, "grad_norm": 0.7840288642498756, "learning_rate": 6.250031232442957e-06, "loss": 0.4024, "step": 6976 }, { "epoch": 0.4370253214112341, "grad_norm": 0.8269678301238208, "learning_rate": 6.24904902107975e-06, "loss": 0.4068, "step": 6977 }, { "epoch": 0.43708795941057643, "grad_norm": 0.845783556342557, "learning_rate": 6.2480667583054564e-06, "loss": 0.4549, "step": 6978 }, { "epoch": 0.43715059740991874, "grad_norm": 0.7815625329079047, "learning_rate": 6.247084444160504e-06, "loss": 0.4035, "step": 6979 }, { "epoch": 0.43721323540926105, "grad_norm": 0.8086138415722025, "learning_rate": 6.246102078685323e-06, "loss": 0.4537, "step": 6980 }, { "epoch": 0.43727587340860335, "grad_norm": 0.8125912838685965, "learning_rate": 6.245119661920348e-06, "loss": 0.3804, "step": 6981 }, { "epoch": 0.4373385114079456, "grad_norm": 0.8865659442286508, "learning_rate": 6.244137193906018e-06, "loss": 0.4462, "step": 6982 }, { "epoch": 0.4374011494072879, "grad_norm": 0.8597116700804651, "learning_rate": 6.24315467468277e-06, "loss": 0.4343, "step": 6983 }, { "epoch": 0.4374637874066302, "grad_norm": 0.6679193920616371, "learning_rate": 6.242172104291045e-06, "loss": 0.4629, "step": 6984 }, { "epoch": 0.43752642540597253, "grad_norm": 0.8377059242562396, "learning_rate": 6.241189482771285e-06, "loss": 0.4053, "step": 6985 }, { "epoch": 0.43758906340531484, "grad_norm": 0.9798094281639665, "learning_rate": 6.2402068101639355e-06, "loss": 0.4088, "step": 6986 }, { "epoch": 0.43765170140465715, "grad_norm": 0.7054656418975634, "learning_rate": 6.239224086509444e-06, "loss": 0.4601, "step": 6987 }, { "epoch": 0.43771433940399945, "grad_norm": 0.6445122958381644, "learning_rate": 6.23824131184826e-06, "loss": 0.4937, "step": 6988 }, { "epoch": 0.43777697740334176, "grad_norm": 0.8420880631988714, "learning_rate": 6.237258486220833e-06, "loss": 0.4096, "step": 6989 }, { "epoch": 0.437839615402684, "grad_norm": 0.8468437778884553, "learning_rate": 6.236275609667616e-06, "loss": 0.4371, "step": 6990 }, { "epoch": 0.4379022534020263, "grad_norm": 0.8069586661467938, "learning_rate": 6.235292682229066e-06, "loss": 0.369, "step": 6991 }, { "epoch": 0.43796489140136863, "grad_norm": 0.8533864506029613, "learning_rate": 6.23430970394564e-06, "loss": 0.4356, "step": 6992 }, { "epoch": 0.43802752940071094, "grad_norm": 0.8947234670955009, "learning_rate": 6.233326674857798e-06, "loss": 0.4401, "step": 6993 }, { "epoch": 0.43809016740005324, "grad_norm": 0.8503118105477868, "learning_rate": 6.232343595006001e-06, "loss": 0.4328, "step": 6994 }, { "epoch": 0.43815280539939555, "grad_norm": 0.7796105071986964, "learning_rate": 6.231360464430713e-06, "loss": 0.3828, "step": 6995 }, { "epoch": 0.43821544339873786, "grad_norm": 0.8247537944653608, "learning_rate": 6.230377283172399e-06, "loss": 0.4237, "step": 6996 }, { "epoch": 0.43827808139808017, "grad_norm": 0.8204477576902593, "learning_rate": 6.229394051271529e-06, "loss": 0.4059, "step": 6997 }, { "epoch": 0.4383407193974225, "grad_norm": 0.8392581248420808, "learning_rate": 6.228410768768569e-06, "loss": 0.4088, "step": 6998 }, { "epoch": 0.4384033573967647, "grad_norm": 0.7814993533212238, "learning_rate": 6.227427435703997e-06, "loss": 0.4789, "step": 6999 }, { "epoch": 0.43846599539610703, "grad_norm": 0.8482480648779913, "learning_rate": 6.226444052118281e-06, "loss": 0.4342, "step": 7000 }, { "epoch": 0.43852863339544934, "grad_norm": 0.8354755741080019, "learning_rate": 6.225460618051903e-06, "loss": 0.4437, "step": 7001 }, { "epoch": 0.43859127139479165, "grad_norm": 0.845584704059052, "learning_rate": 6.224477133545337e-06, "loss": 0.4231, "step": 7002 }, { "epoch": 0.43865390939413396, "grad_norm": 0.7836456583199372, "learning_rate": 6.223493598639064e-06, "loss": 0.4167, "step": 7003 }, { "epoch": 0.43871654739347626, "grad_norm": 0.8231558048763771, "learning_rate": 6.2225100133735686e-06, "loss": 0.433, "step": 7004 }, { "epoch": 0.43877918539281857, "grad_norm": 0.7500087693226566, "learning_rate": 6.221526377789334e-06, "loss": 0.3985, "step": 7005 }, { "epoch": 0.4388418233921609, "grad_norm": 0.9385883543602057, "learning_rate": 6.220542691926847e-06, "loss": 0.4102, "step": 7006 }, { "epoch": 0.43890446139150313, "grad_norm": 0.7618135721650444, "learning_rate": 6.219558955826597e-06, "loss": 0.4008, "step": 7007 }, { "epoch": 0.43896709939084544, "grad_norm": 0.8179436682968664, "learning_rate": 6.218575169529073e-06, "loss": 0.4555, "step": 7008 }, { "epoch": 0.43902973739018775, "grad_norm": 0.9234184974136109, "learning_rate": 6.217591333074769e-06, "loss": 0.3786, "step": 7009 }, { "epoch": 0.43909237538953005, "grad_norm": 0.8426266268075493, "learning_rate": 6.2166074465041794e-06, "loss": 0.3973, "step": 7010 }, { "epoch": 0.43915501338887236, "grad_norm": 0.8772702981467944, "learning_rate": 6.215623509857802e-06, "loss": 0.424, "step": 7011 }, { "epoch": 0.43921765138821467, "grad_norm": 0.8816344303042379, "learning_rate": 6.2146395231761355e-06, "loss": 0.4163, "step": 7012 }, { "epoch": 0.439280289387557, "grad_norm": 0.8712330180288441, "learning_rate": 6.213655486499681e-06, "loss": 0.401, "step": 7013 }, { "epoch": 0.4393429273868993, "grad_norm": 0.7876667822546431, "learning_rate": 6.2126713998689406e-06, "loss": 0.3925, "step": 7014 }, { "epoch": 0.43940556538624154, "grad_norm": 0.8539965078256253, "learning_rate": 6.211687263324421e-06, "loss": 0.412, "step": 7015 }, { "epoch": 0.43946820338558384, "grad_norm": 0.791486253456644, "learning_rate": 6.210703076906627e-06, "loss": 0.3823, "step": 7016 }, { "epoch": 0.43953084138492615, "grad_norm": 0.7939741575661771, "learning_rate": 6.209718840656072e-06, "loss": 0.4042, "step": 7017 }, { "epoch": 0.43959347938426846, "grad_norm": 0.8339649302648373, "learning_rate": 6.208734554613263e-06, "loss": 0.4122, "step": 7018 }, { "epoch": 0.43965611738361077, "grad_norm": 0.8729986003978393, "learning_rate": 6.207750218818716e-06, "loss": 0.4016, "step": 7019 }, { "epoch": 0.4397187553829531, "grad_norm": 0.8269813697846571, "learning_rate": 6.206765833312947e-06, "loss": 0.4151, "step": 7020 }, { "epoch": 0.4397813933822954, "grad_norm": 0.8717408650300136, "learning_rate": 6.205781398136471e-06, "loss": 0.4381, "step": 7021 }, { "epoch": 0.4398440313816377, "grad_norm": 0.7943589701578141, "learning_rate": 6.2047969133298105e-06, "loss": 0.3937, "step": 7022 }, { "epoch": 0.43990666938098, "grad_norm": 0.8297281900664509, "learning_rate": 6.203812378933482e-06, "loss": 0.4109, "step": 7023 }, { "epoch": 0.43996930738032225, "grad_norm": 0.793808672655192, "learning_rate": 6.202827794988016e-06, "loss": 0.3723, "step": 7024 }, { "epoch": 0.44003194537966456, "grad_norm": 0.8111247260112464, "learning_rate": 6.201843161533934e-06, "loss": 0.3945, "step": 7025 }, { "epoch": 0.44009458337900687, "grad_norm": 0.8476646595251701, "learning_rate": 6.200858478611763e-06, "loss": 0.4218, "step": 7026 }, { "epoch": 0.4401572213783492, "grad_norm": 0.823637498665479, "learning_rate": 6.199873746262035e-06, "loss": 0.4175, "step": 7027 }, { "epoch": 0.4402198593776915, "grad_norm": 0.8241892241828769, "learning_rate": 6.198888964525282e-06, "loss": 0.4042, "step": 7028 }, { "epoch": 0.4402824973770338, "grad_norm": 0.8950372845408145, "learning_rate": 6.197904133442034e-06, "loss": 0.4703, "step": 7029 }, { "epoch": 0.4403451353763761, "grad_norm": 0.8075587280883637, "learning_rate": 6.196919253052832e-06, "loss": 0.4392, "step": 7030 }, { "epoch": 0.4404077733757184, "grad_norm": 0.8372637489930024, "learning_rate": 6.195934323398211e-06, "loss": 0.4404, "step": 7031 }, { "epoch": 0.44047041137506066, "grad_norm": 0.7844948901612582, "learning_rate": 6.19494934451871e-06, "loss": 0.3717, "step": 7032 }, { "epoch": 0.44053304937440296, "grad_norm": 0.7661259958228197, "learning_rate": 6.193964316454872e-06, "loss": 0.3873, "step": 7033 }, { "epoch": 0.44059568737374527, "grad_norm": 0.6654014298246277, "learning_rate": 6.192979239247243e-06, "loss": 0.4508, "step": 7034 }, { "epoch": 0.4406583253730876, "grad_norm": 0.8284603324915125, "learning_rate": 6.191994112936367e-06, "loss": 0.4442, "step": 7035 }, { "epoch": 0.4407209633724299, "grad_norm": 0.8100722072631561, "learning_rate": 6.191008937562791e-06, "loss": 0.3742, "step": 7036 }, { "epoch": 0.4407836013717722, "grad_norm": 0.9382126723477926, "learning_rate": 6.1900237131670645e-06, "loss": 0.4305, "step": 7037 }, { "epoch": 0.4408462393711145, "grad_norm": 0.7966167220074114, "learning_rate": 6.1890384397897445e-06, "loss": 0.375, "step": 7038 }, { "epoch": 0.4409088773704568, "grad_norm": 0.8044826134925255, "learning_rate": 6.18805311747138e-06, "loss": 0.4008, "step": 7039 }, { "epoch": 0.4409715153697991, "grad_norm": 0.8346667219780086, "learning_rate": 6.187067746252529e-06, "loss": 0.4731, "step": 7040 }, { "epoch": 0.44103415336914137, "grad_norm": 0.9486943430285524, "learning_rate": 6.186082326173748e-06, "loss": 0.4495, "step": 7041 }, { "epoch": 0.4410967913684837, "grad_norm": 0.9002160113070622, "learning_rate": 6.1850968572755985e-06, "loss": 0.4449, "step": 7042 }, { "epoch": 0.441159429367826, "grad_norm": 0.8917996670726691, "learning_rate": 6.184111339598644e-06, "loss": 0.4243, "step": 7043 }, { "epoch": 0.4412220673671683, "grad_norm": 0.7876695825568715, "learning_rate": 6.183125773183447e-06, "loss": 0.3852, "step": 7044 }, { "epoch": 0.4412847053665106, "grad_norm": 0.8209011410864507, "learning_rate": 6.182140158070572e-06, "loss": 0.4558, "step": 7045 }, { "epoch": 0.4413473433658529, "grad_norm": 0.8055283829149532, "learning_rate": 6.181154494300589e-06, "loss": 0.4122, "step": 7046 }, { "epoch": 0.4414099813651952, "grad_norm": 0.8386754764367852, "learning_rate": 6.180168781914069e-06, "loss": 0.3956, "step": 7047 }, { "epoch": 0.4414726193645375, "grad_norm": 0.7829288385910596, "learning_rate": 6.179183020951581e-06, "loss": 0.3571, "step": 7048 }, { "epoch": 0.4415352573638798, "grad_norm": 0.7903657141482003, "learning_rate": 6.178197211453703e-06, "loss": 0.3876, "step": 7049 }, { "epoch": 0.4415978953632221, "grad_norm": 0.8510330803816678, "learning_rate": 6.177211353461007e-06, "loss": 0.4695, "step": 7050 }, { "epoch": 0.4416605333625644, "grad_norm": 0.7514687411352243, "learning_rate": 6.176225447014075e-06, "loss": 0.3567, "step": 7051 }, { "epoch": 0.4417231713619067, "grad_norm": 0.8361808760500966, "learning_rate": 6.175239492153484e-06, "loss": 0.4143, "step": 7052 }, { "epoch": 0.441785809361249, "grad_norm": 0.8312595994143652, "learning_rate": 6.1742534889198195e-06, "loss": 0.4252, "step": 7053 }, { "epoch": 0.4418484473605913, "grad_norm": 0.8273272297968065, "learning_rate": 6.17326743735366e-06, "loss": 0.3879, "step": 7054 }, { "epoch": 0.4419110853599336, "grad_norm": 0.8957439987003355, "learning_rate": 6.172281337495597e-06, "loss": 0.427, "step": 7055 }, { "epoch": 0.4419737233592759, "grad_norm": 0.8378695546485152, "learning_rate": 6.171295189386217e-06, "loss": 0.4504, "step": 7056 }, { "epoch": 0.4420363613586182, "grad_norm": 0.8142648776410771, "learning_rate": 6.17030899306611e-06, "loss": 0.44, "step": 7057 }, { "epoch": 0.4420989993579605, "grad_norm": 0.8888622046876056, "learning_rate": 6.169322748575866e-06, "loss": 0.4228, "step": 7058 }, { "epoch": 0.4421616373573028, "grad_norm": 0.8924114498524893, "learning_rate": 6.168336455956081e-06, "loss": 0.4487, "step": 7059 }, { "epoch": 0.4422242753566451, "grad_norm": 0.7947379330436075, "learning_rate": 6.1673501152473506e-06, "loss": 0.4063, "step": 7060 }, { "epoch": 0.4422869133559874, "grad_norm": 0.8750140277668756, "learning_rate": 6.166363726490274e-06, "loss": 0.4125, "step": 7061 }, { "epoch": 0.4423495513553297, "grad_norm": 0.8652046370207572, "learning_rate": 6.1653772897254485e-06, "loss": 0.4216, "step": 7062 }, { "epoch": 0.442412189354672, "grad_norm": 0.7786271341003169, "learning_rate": 6.164390804993479e-06, "loss": 0.342, "step": 7063 }, { "epoch": 0.44247482735401433, "grad_norm": 0.8524555914423714, "learning_rate": 6.1634042723349655e-06, "loss": 0.4145, "step": 7064 }, { "epoch": 0.44253746535335664, "grad_norm": 0.6989181806113666, "learning_rate": 6.162417691790518e-06, "loss": 0.4717, "step": 7065 }, { "epoch": 0.4426001033526989, "grad_norm": 0.8671545098027121, "learning_rate": 6.161431063400742e-06, "loss": 0.4458, "step": 7066 }, { "epoch": 0.4426627413520412, "grad_norm": 0.8118286067204178, "learning_rate": 6.160444387206248e-06, "loss": 0.4067, "step": 7067 }, { "epoch": 0.4427253793513835, "grad_norm": 0.8381321645901203, "learning_rate": 6.159457663247647e-06, "loss": 0.4161, "step": 7068 }, { "epoch": 0.4427880173507258, "grad_norm": 0.8464990252102608, "learning_rate": 6.1584708915655545e-06, "loss": 0.4378, "step": 7069 }, { "epoch": 0.4428506553500681, "grad_norm": 0.7690046109671237, "learning_rate": 6.157484072200585e-06, "loss": 0.3996, "step": 7070 }, { "epoch": 0.44291329334941043, "grad_norm": 0.6721179732010419, "learning_rate": 6.156497205193356e-06, "loss": 0.4888, "step": 7071 }, { "epoch": 0.44297593134875274, "grad_norm": 0.7692889966518451, "learning_rate": 6.155510290584486e-06, "loss": 0.369, "step": 7072 }, { "epoch": 0.44303856934809505, "grad_norm": 0.8221161859308744, "learning_rate": 6.1545233284146015e-06, "loss": 0.3673, "step": 7073 }, { "epoch": 0.4431012073474373, "grad_norm": 0.7663887119948747, "learning_rate": 6.153536318724319e-06, "loss": 0.3636, "step": 7074 }, { "epoch": 0.4431638453467796, "grad_norm": 0.8082237934145556, "learning_rate": 6.1525492615542685e-06, "loss": 0.4197, "step": 7075 }, { "epoch": 0.4432264833461219, "grad_norm": 0.8311838647353378, "learning_rate": 6.151562156945077e-06, "loss": 0.3979, "step": 7076 }, { "epoch": 0.4432891213454642, "grad_norm": 0.8899660595164237, "learning_rate": 6.150575004937374e-06, "loss": 0.3904, "step": 7077 }, { "epoch": 0.44335175934480653, "grad_norm": 0.8831380240058878, "learning_rate": 6.149587805571787e-06, "loss": 0.397, "step": 7078 }, { "epoch": 0.44341439734414884, "grad_norm": 0.8375994784202399, "learning_rate": 6.148600558888956e-06, "loss": 0.3914, "step": 7079 }, { "epoch": 0.44347703534349114, "grad_norm": 0.7003251154014396, "learning_rate": 6.14761326492951e-06, "loss": 0.4788, "step": 7080 }, { "epoch": 0.44353967334283345, "grad_norm": 0.922495136605924, "learning_rate": 6.146625923734092e-06, "loss": 0.388, "step": 7081 }, { "epoch": 0.4436023113421757, "grad_norm": 0.8111315278702687, "learning_rate": 6.145638535343338e-06, "loss": 0.4237, "step": 7082 }, { "epoch": 0.443664949341518, "grad_norm": 0.8628504554053189, "learning_rate": 6.144651099797887e-06, "loss": 0.4651, "step": 7083 }, { "epoch": 0.4437275873408603, "grad_norm": 0.8315956467854672, "learning_rate": 6.143663617138385e-06, "loss": 0.3949, "step": 7084 }, { "epoch": 0.4437902253402026, "grad_norm": 0.8728054750601754, "learning_rate": 6.142676087405477e-06, "loss": 0.4237, "step": 7085 }, { "epoch": 0.44385286333954493, "grad_norm": 0.7861948733351488, "learning_rate": 6.14168851063981e-06, "loss": 0.3875, "step": 7086 }, { "epoch": 0.44391550133888724, "grad_norm": 0.8732427302054222, "learning_rate": 6.140700886882029e-06, "loss": 0.4207, "step": 7087 }, { "epoch": 0.44397813933822955, "grad_norm": 0.8371482910077233, "learning_rate": 6.139713216172789e-06, "loss": 0.4046, "step": 7088 }, { "epoch": 0.44404077733757186, "grad_norm": 0.8616797855785567, "learning_rate": 6.1387254985527425e-06, "loss": 0.4082, "step": 7089 }, { "epoch": 0.44410341533691416, "grad_norm": 0.8244726489077445, "learning_rate": 6.137737734062544e-06, "loss": 0.3983, "step": 7090 }, { "epoch": 0.4441660533362564, "grad_norm": 0.9133433565493906, "learning_rate": 6.1367499227428476e-06, "loss": 0.4181, "step": 7091 }, { "epoch": 0.4442286913355987, "grad_norm": 0.8051598304697611, "learning_rate": 6.135762064634313e-06, "loss": 0.4149, "step": 7092 }, { "epoch": 0.44429132933494103, "grad_norm": 0.6305433656807986, "learning_rate": 6.134774159777601e-06, "loss": 0.4569, "step": 7093 }, { "epoch": 0.44435396733428334, "grad_norm": 0.8353451585065521, "learning_rate": 6.133786208213375e-06, "loss": 0.3886, "step": 7094 }, { "epoch": 0.44441660533362565, "grad_norm": 0.7931094713157375, "learning_rate": 6.132798209982298e-06, "loss": 0.4214, "step": 7095 }, { "epoch": 0.44447924333296795, "grad_norm": 0.8407195364194993, "learning_rate": 6.1318101651250354e-06, "loss": 0.3608, "step": 7096 }, { "epoch": 0.44454188133231026, "grad_norm": 0.7355747914886198, "learning_rate": 6.130822073682257e-06, "loss": 0.3761, "step": 7097 }, { "epoch": 0.44460451933165257, "grad_norm": 0.8022334593950333, "learning_rate": 6.129833935694632e-06, "loss": 0.3849, "step": 7098 }, { "epoch": 0.4446671573309948, "grad_norm": 0.7807187889242819, "learning_rate": 6.128845751202833e-06, "loss": 0.3871, "step": 7099 }, { "epoch": 0.44472979533033713, "grad_norm": 0.8199217000145429, "learning_rate": 6.127857520247535e-06, "loss": 0.4195, "step": 7100 }, { "epoch": 0.44479243332967944, "grad_norm": 0.786232408065461, "learning_rate": 6.126869242869408e-06, "loss": 0.4033, "step": 7101 }, { "epoch": 0.44485507132902175, "grad_norm": 0.7573738818427667, "learning_rate": 6.125880919109136e-06, "loss": 0.387, "step": 7102 }, { "epoch": 0.44491770932836405, "grad_norm": 0.8872590705638186, "learning_rate": 6.1248925490073975e-06, "loss": 0.4173, "step": 7103 }, { "epoch": 0.44498034732770636, "grad_norm": 0.8557675893489266, "learning_rate": 6.123904132604872e-06, "loss": 0.422, "step": 7104 }, { "epoch": 0.44504298532704867, "grad_norm": 0.8935823086683431, "learning_rate": 6.122915669942243e-06, "loss": 0.416, "step": 7105 }, { "epoch": 0.445105623326391, "grad_norm": 0.8226319645255328, "learning_rate": 6.121927161060197e-06, "loss": 0.4042, "step": 7106 }, { "epoch": 0.4451682613257333, "grad_norm": 0.8455922543170097, "learning_rate": 6.120938605999421e-06, "loss": 0.3987, "step": 7107 }, { "epoch": 0.44523089932507554, "grad_norm": 0.8314268159770023, "learning_rate": 6.1199500048006056e-06, "loss": 0.4077, "step": 7108 }, { "epoch": 0.44529353732441784, "grad_norm": 0.8563341188633548, "learning_rate": 6.1189613575044374e-06, "loss": 0.3843, "step": 7109 }, { "epoch": 0.44535617532376015, "grad_norm": 0.8796905952891079, "learning_rate": 6.117972664151613e-06, "loss": 0.4186, "step": 7110 }, { "epoch": 0.44541881332310246, "grad_norm": 0.6207557589327003, "learning_rate": 6.116983924782827e-06, "loss": 0.4859, "step": 7111 }, { "epoch": 0.44548145132244477, "grad_norm": 0.8090496328075467, "learning_rate": 6.115995139438774e-06, "loss": 0.3916, "step": 7112 }, { "epoch": 0.4455440893217871, "grad_norm": 0.7784797093815878, "learning_rate": 6.115006308160155e-06, "loss": 0.3775, "step": 7113 }, { "epoch": 0.4456067273211294, "grad_norm": 0.8316654879296358, "learning_rate": 6.1140174309876685e-06, "loss": 0.4329, "step": 7114 }, { "epoch": 0.4456693653204717, "grad_norm": 0.7831812235167461, "learning_rate": 6.113028507962018e-06, "loss": 0.3742, "step": 7115 }, { "epoch": 0.44573200331981394, "grad_norm": 0.8472809942638903, "learning_rate": 6.112039539123909e-06, "loss": 0.3854, "step": 7116 }, { "epoch": 0.44579464131915625, "grad_norm": 0.8551334578736515, "learning_rate": 6.111050524514045e-06, "loss": 0.4333, "step": 7117 }, { "epoch": 0.44585727931849856, "grad_norm": 0.8285727018897218, "learning_rate": 6.1100614641731345e-06, "loss": 0.407, "step": 7118 }, { "epoch": 0.44591991731784086, "grad_norm": 0.8767133011964767, "learning_rate": 6.1090723581418894e-06, "loss": 0.4078, "step": 7119 }, { "epoch": 0.44598255531718317, "grad_norm": 0.8925871251992905, "learning_rate": 6.10808320646102e-06, "loss": 0.4299, "step": 7120 }, { "epoch": 0.4460451933165255, "grad_norm": 0.8630030257422803, "learning_rate": 6.10709400917124e-06, "loss": 0.4011, "step": 7121 }, { "epoch": 0.4461078313158678, "grad_norm": 0.8661217069914453, "learning_rate": 6.106104766313264e-06, "loss": 0.3895, "step": 7122 }, { "epoch": 0.4461704693152101, "grad_norm": 0.8219584955515756, "learning_rate": 6.105115477927811e-06, "loss": 0.3723, "step": 7123 }, { "epoch": 0.44623310731455235, "grad_norm": 0.810400339214137, "learning_rate": 6.1041261440556e-06, "loss": 0.3856, "step": 7124 }, { "epoch": 0.44629574531389465, "grad_norm": 0.7332494836448255, "learning_rate": 6.103136764737353e-06, "loss": 0.4886, "step": 7125 }, { "epoch": 0.44635838331323696, "grad_norm": 0.8229785782585796, "learning_rate": 6.102147340013791e-06, "loss": 0.3981, "step": 7126 }, { "epoch": 0.44642102131257927, "grad_norm": 0.7974142041689148, "learning_rate": 6.10115786992564e-06, "loss": 0.3944, "step": 7127 }, { "epoch": 0.4464836593119216, "grad_norm": 0.851538263913061, "learning_rate": 6.100168354513627e-06, "loss": 0.4355, "step": 7128 }, { "epoch": 0.4465462973112639, "grad_norm": 0.9040107065489676, "learning_rate": 6.099178793818479e-06, "loss": 0.4036, "step": 7129 }, { "epoch": 0.4466089353106062, "grad_norm": 0.7778193373558036, "learning_rate": 6.09818918788093e-06, "loss": 0.4137, "step": 7130 }, { "epoch": 0.4466715733099485, "grad_norm": 0.8540810300086795, "learning_rate": 6.0971995367417085e-06, "loss": 0.4098, "step": 7131 }, { "epoch": 0.4467342113092908, "grad_norm": 0.6395549141442667, "learning_rate": 6.096209840441552e-06, "loss": 0.4579, "step": 7132 }, { "epoch": 0.44679684930863306, "grad_norm": 0.8063951118329427, "learning_rate": 6.095220099021193e-06, "loss": 0.4037, "step": 7133 }, { "epoch": 0.44685948730797537, "grad_norm": 0.8346446461995014, "learning_rate": 6.094230312521372e-06, "loss": 0.4596, "step": 7134 }, { "epoch": 0.4469221253073177, "grad_norm": 0.792131783463099, "learning_rate": 6.093240480982828e-06, "loss": 0.4256, "step": 7135 }, { "epoch": 0.44698476330666, "grad_norm": 0.6466205172707459, "learning_rate": 6.092250604446305e-06, "loss": 0.4516, "step": 7136 }, { "epoch": 0.4470474013060023, "grad_norm": 0.8295242508407908, "learning_rate": 6.0912606829525435e-06, "loss": 0.4278, "step": 7137 }, { "epoch": 0.4471100393053446, "grad_norm": 0.856339321192567, "learning_rate": 6.090270716542288e-06, "loss": 0.4303, "step": 7138 }, { "epoch": 0.4471726773046869, "grad_norm": 0.7833189263819842, "learning_rate": 6.089280705256289e-06, "loss": 0.4051, "step": 7139 }, { "epoch": 0.4472353153040292, "grad_norm": 0.8268614721305938, "learning_rate": 6.088290649135293e-06, "loss": 0.3989, "step": 7140 }, { "epoch": 0.44729795330337146, "grad_norm": 0.8238451488559723, "learning_rate": 6.087300548220053e-06, "loss": 0.3845, "step": 7141 }, { "epoch": 0.4473605913027138, "grad_norm": 0.8090677457386504, "learning_rate": 6.086310402551321e-06, "loss": 0.4101, "step": 7142 }, { "epoch": 0.4474232293020561, "grad_norm": 0.9024095421453966, "learning_rate": 6.08532021216985e-06, "loss": 0.4349, "step": 7143 }, { "epoch": 0.4474858673013984, "grad_norm": 0.8508369587450704, "learning_rate": 6.084329977116398e-06, "loss": 0.4242, "step": 7144 }, { "epoch": 0.4475485053007407, "grad_norm": 0.8735107205836279, "learning_rate": 6.083339697431723e-06, "loss": 0.4154, "step": 7145 }, { "epoch": 0.447611143300083, "grad_norm": 0.8050134215608332, "learning_rate": 6.0823493731565855e-06, "loss": 0.414, "step": 7146 }, { "epoch": 0.4476737812994253, "grad_norm": 0.8322143378368125, "learning_rate": 6.081359004331745e-06, "loss": 0.392, "step": 7147 }, { "epoch": 0.4477364192987676, "grad_norm": 0.6591606008588861, "learning_rate": 6.080368590997968e-06, "loss": 0.4666, "step": 7148 }, { "epoch": 0.4477990572981099, "grad_norm": 0.8705855803952741, "learning_rate": 6.07937813319602e-06, "loss": 0.3927, "step": 7149 }, { "epoch": 0.4478616952974522, "grad_norm": 0.8191001997004829, "learning_rate": 6.078387630966668e-06, "loss": 0.4403, "step": 7150 }, { "epoch": 0.4479243332967945, "grad_norm": 0.8244242339645655, "learning_rate": 6.077397084350681e-06, "loss": 0.4073, "step": 7151 }, { "epoch": 0.4479869712961368, "grad_norm": 0.6145455597775608, "learning_rate": 6.076406493388829e-06, "loss": 0.4494, "step": 7152 }, { "epoch": 0.4480496092954791, "grad_norm": 0.8049483822742195, "learning_rate": 6.075415858121885e-06, "loss": 0.4682, "step": 7153 }, { "epoch": 0.4481122472948214, "grad_norm": 0.8521312582594768, "learning_rate": 6.074425178590627e-06, "loss": 0.4282, "step": 7154 }, { "epoch": 0.4481748852941637, "grad_norm": 0.7687532391203126, "learning_rate": 6.073434454835828e-06, "loss": 0.381, "step": 7155 }, { "epoch": 0.448237523293506, "grad_norm": 0.8563825254080027, "learning_rate": 6.072443686898267e-06, "loss": 0.3799, "step": 7156 }, { "epoch": 0.44830016129284833, "grad_norm": 0.8552363984042972, "learning_rate": 6.071452874818726e-06, "loss": 0.4109, "step": 7157 }, { "epoch": 0.4483627992921906, "grad_norm": 0.8192543934537149, "learning_rate": 6.070462018637986e-06, "loss": 0.406, "step": 7158 }, { "epoch": 0.4484254372915329, "grad_norm": 0.8203607587516769, "learning_rate": 6.06947111839683e-06, "loss": 0.3981, "step": 7159 }, { "epoch": 0.4484880752908752, "grad_norm": 0.8686874810985228, "learning_rate": 6.068480174136044e-06, "loss": 0.4245, "step": 7160 }, { "epoch": 0.4485507132902175, "grad_norm": 0.7974934551535245, "learning_rate": 6.067489185896416e-06, "loss": 0.4183, "step": 7161 }, { "epoch": 0.4486133512895598, "grad_norm": 0.7921863895147582, "learning_rate": 6.066498153718735e-06, "loss": 0.4163, "step": 7162 }, { "epoch": 0.4486759892889021, "grad_norm": 0.8706224699889564, "learning_rate": 6.0655070776437916e-06, "loss": 0.4514, "step": 7163 }, { "epoch": 0.44873862728824443, "grad_norm": 0.8429639184155217, "learning_rate": 6.0645159577123804e-06, "loss": 0.4128, "step": 7164 }, { "epoch": 0.44880126528758674, "grad_norm": 0.8816285450705315, "learning_rate": 6.063524793965294e-06, "loss": 0.4111, "step": 7165 }, { "epoch": 0.448863903286929, "grad_norm": 0.8980590371946763, "learning_rate": 6.062533586443329e-06, "loss": 0.4181, "step": 7166 }, { "epoch": 0.4489265412862713, "grad_norm": 0.8209123636597258, "learning_rate": 6.061542335187285e-06, "loss": 0.4322, "step": 7167 }, { "epoch": 0.4489891792856136, "grad_norm": 0.8402773135175436, "learning_rate": 6.0605510402379615e-06, "loss": 0.4161, "step": 7168 }, { "epoch": 0.4490518172849559, "grad_norm": 0.8017186527333146, "learning_rate": 6.05955970163616e-06, "loss": 0.3887, "step": 7169 }, { "epoch": 0.4491144552842982, "grad_norm": 0.8375699949573485, "learning_rate": 6.058568319422684e-06, "loss": 0.4274, "step": 7170 }, { "epoch": 0.4491770932836405, "grad_norm": 0.7981348982712422, "learning_rate": 6.057576893638342e-06, "loss": 0.4001, "step": 7171 }, { "epoch": 0.44923973128298283, "grad_norm": 0.7989963827478735, "learning_rate": 6.056585424323936e-06, "loss": 0.3984, "step": 7172 }, { "epoch": 0.44930236928232514, "grad_norm": 0.8511002512830053, "learning_rate": 6.055593911520279e-06, "loss": 0.4442, "step": 7173 }, { "epoch": 0.44936500728166745, "grad_norm": 0.8159146752787921, "learning_rate": 6.0546023552681806e-06, "loss": 0.3898, "step": 7174 }, { "epoch": 0.4494276452810097, "grad_norm": 0.7580487086638622, "learning_rate": 6.0536107556084535e-06, "loss": 0.3819, "step": 7175 }, { "epoch": 0.449490283280352, "grad_norm": 0.8597433479142107, "learning_rate": 6.052619112581913e-06, "loss": 0.4044, "step": 7176 }, { "epoch": 0.4495529212796943, "grad_norm": 0.8132200409486989, "learning_rate": 6.051627426229373e-06, "loss": 0.3988, "step": 7177 }, { "epoch": 0.4496155592790366, "grad_norm": 0.6936749197215721, "learning_rate": 6.050635696591653e-06, "loss": 0.4547, "step": 7178 }, { "epoch": 0.44967819727837893, "grad_norm": 0.8579318161513325, "learning_rate": 6.049643923709573e-06, "loss": 0.4027, "step": 7179 }, { "epoch": 0.44974083527772124, "grad_norm": 0.8332800318729066, "learning_rate": 6.048652107623954e-06, "loss": 0.3898, "step": 7180 }, { "epoch": 0.44980347327706355, "grad_norm": 0.7877560336325864, "learning_rate": 6.047660248375622e-06, "loss": 0.4124, "step": 7181 }, { "epoch": 0.44986611127640586, "grad_norm": 0.8476343596987411, "learning_rate": 6.046668346005397e-06, "loss": 0.4649, "step": 7182 }, { "epoch": 0.4499287492757481, "grad_norm": 0.8428252615744056, "learning_rate": 6.045676400554109e-06, "loss": 0.3902, "step": 7183 }, { "epoch": 0.4499913872750904, "grad_norm": 0.8611401604948618, "learning_rate": 6.044684412062587e-06, "loss": 0.4463, "step": 7184 }, { "epoch": 0.4500540252744327, "grad_norm": 0.8674530762534604, "learning_rate": 6.043692380571661e-06, "loss": 0.4269, "step": 7185 }, { "epoch": 0.45011666327377503, "grad_norm": 0.9301775577807422, "learning_rate": 6.0427003061221614e-06, "loss": 0.4893, "step": 7186 }, { "epoch": 0.45017930127311734, "grad_norm": 0.8653250068656798, "learning_rate": 6.041708188754925e-06, "loss": 0.4006, "step": 7187 }, { "epoch": 0.45024193927245965, "grad_norm": 0.8540147433529238, "learning_rate": 6.040716028510786e-06, "loss": 0.4451, "step": 7188 }, { "epoch": 0.45030457727180195, "grad_norm": 0.8153822526764654, "learning_rate": 6.039723825430581e-06, "loss": 0.4232, "step": 7189 }, { "epoch": 0.45036721527114426, "grad_norm": 0.8171232445376646, "learning_rate": 6.038731579555152e-06, "loss": 0.4042, "step": 7190 }, { "epoch": 0.4504298532704865, "grad_norm": 0.8573441351092969, "learning_rate": 6.0377392909253376e-06, "loss": 0.4411, "step": 7191 }, { "epoch": 0.4504924912698288, "grad_norm": 0.7627894332241419, "learning_rate": 6.036746959581982e-06, "loss": 0.3629, "step": 7192 }, { "epoch": 0.45055512926917113, "grad_norm": 0.8176446668467777, "learning_rate": 6.035754585565927e-06, "loss": 0.4233, "step": 7193 }, { "epoch": 0.45061776726851344, "grad_norm": 0.8084247839805621, "learning_rate": 6.034762168918024e-06, "loss": 0.3828, "step": 7194 }, { "epoch": 0.45068040526785574, "grad_norm": 0.8800578579238509, "learning_rate": 6.0337697096791144e-06, "loss": 0.4221, "step": 7195 }, { "epoch": 0.45074304326719805, "grad_norm": 0.9009094330588684, "learning_rate": 6.032777207890055e-06, "loss": 0.3879, "step": 7196 }, { "epoch": 0.45080568126654036, "grad_norm": 0.896902597065488, "learning_rate": 6.0317846635916935e-06, "loss": 0.4126, "step": 7197 }, { "epoch": 0.45086831926588267, "grad_norm": 0.656208554985264, "learning_rate": 6.030792076824883e-06, "loss": 0.438, "step": 7198 }, { "epoch": 0.450930957265225, "grad_norm": 0.796579891830537, "learning_rate": 6.029799447630478e-06, "loss": 0.4314, "step": 7199 }, { "epoch": 0.4509935952645672, "grad_norm": 0.8437462553481901, "learning_rate": 6.028806776049339e-06, "loss": 0.4143, "step": 7200 }, { "epoch": 0.45105623326390953, "grad_norm": 0.8018665365021894, "learning_rate": 6.027814062122321e-06, "loss": 0.4217, "step": 7201 }, { "epoch": 0.45111887126325184, "grad_norm": 0.7744697237486667, "learning_rate": 6.026821305890286e-06, "loss": 0.4085, "step": 7202 }, { "epoch": 0.45118150926259415, "grad_norm": 0.8077280285591566, "learning_rate": 6.025828507394095e-06, "loss": 0.42, "step": 7203 }, { "epoch": 0.45124414726193646, "grad_norm": 0.8247573917541463, "learning_rate": 6.024835666674611e-06, "loss": 0.4363, "step": 7204 }, { "epoch": 0.45130678526127876, "grad_norm": 0.8275986413561498, "learning_rate": 6.023842783772703e-06, "loss": 0.4266, "step": 7205 }, { "epoch": 0.45136942326062107, "grad_norm": 0.8757032081907496, "learning_rate": 6.0228498587292365e-06, "loss": 0.4479, "step": 7206 }, { "epoch": 0.4514320612599634, "grad_norm": 0.7984752285686231, "learning_rate": 6.021856891585077e-06, "loss": 0.4153, "step": 7207 }, { "epoch": 0.45149469925930563, "grad_norm": 0.9135778520380424, "learning_rate": 6.020863882381098e-06, "loss": 0.4214, "step": 7208 }, { "epoch": 0.45155733725864794, "grad_norm": 0.6417643242734742, "learning_rate": 6.019870831158175e-06, "loss": 0.4696, "step": 7209 }, { "epoch": 0.45161997525799025, "grad_norm": 0.8794297851419889, "learning_rate": 6.018877737957179e-06, "loss": 0.4395, "step": 7210 }, { "epoch": 0.45168261325733255, "grad_norm": 0.7742964076250707, "learning_rate": 6.017884602818985e-06, "loss": 0.4077, "step": 7211 }, { "epoch": 0.45174525125667486, "grad_norm": 0.915118538286134, "learning_rate": 6.016891425784472e-06, "loss": 0.4422, "step": 7212 }, { "epoch": 0.45180788925601717, "grad_norm": 0.8448738438341328, "learning_rate": 6.015898206894518e-06, "loss": 0.4185, "step": 7213 }, { "epoch": 0.4518705272553595, "grad_norm": 0.6016199551742631, "learning_rate": 6.014904946190008e-06, "loss": 0.4659, "step": 7214 }, { "epoch": 0.4519331652547018, "grad_norm": 0.8553399796461781, "learning_rate": 6.01391164371182e-06, "loss": 0.3957, "step": 7215 }, { "epoch": 0.4519958032540441, "grad_norm": 0.7266611524468352, "learning_rate": 6.0129182995008405e-06, "loss": 0.3903, "step": 7216 }, { "epoch": 0.45205844125338634, "grad_norm": 0.8740985103964959, "learning_rate": 6.011924913597956e-06, "loss": 0.4304, "step": 7217 }, { "epoch": 0.45212107925272865, "grad_norm": 0.8088478312169057, "learning_rate": 6.010931486044056e-06, "loss": 0.3928, "step": 7218 }, { "epoch": 0.45218371725207096, "grad_norm": 0.8901720030938991, "learning_rate": 6.009938016880027e-06, "loss": 0.4272, "step": 7219 }, { "epoch": 0.45224635525141327, "grad_norm": 0.9100925074105825, "learning_rate": 6.008944506146761e-06, "loss": 0.4347, "step": 7220 }, { "epoch": 0.4523089932507556, "grad_norm": 0.9586398208814862, "learning_rate": 6.007950953885152e-06, "loss": 0.4001, "step": 7221 }, { "epoch": 0.4523716312500979, "grad_norm": 0.8262855164295421, "learning_rate": 6.006957360136097e-06, "loss": 0.4282, "step": 7222 }, { "epoch": 0.4524342692494402, "grad_norm": 0.8477116858061433, "learning_rate": 6.005963724940488e-06, "loss": 0.4301, "step": 7223 }, { "epoch": 0.4524969072487825, "grad_norm": 0.8548030898981446, "learning_rate": 6.0049700483392256e-06, "loss": 0.4397, "step": 7224 }, { "epoch": 0.45255954524812475, "grad_norm": 0.8346272474980441, "learning_rate": 6.003976330373209e-06, "loss": 0.4013, "step": 7225 }, { "epoch": 0.45262218324746706, "grad_norm": 0.8489884076278897, "learning_rate": 6.002982571083341e-06, "loss": 0.4238, "step": 7226 }, { "epoch": 0.45268482124680937, "grad_norm": 0.8158716853757892, "learning_rate": 6.001988770510525e-06, "loss": 0.3594, "step": 7227 }, { "epoch": 0.4527474592461517, "grad_norm": 0.7944858110094658, "learning_rate": 6.000994928695665e-06, "loss": 0.4066, "step": 7228 }, { "epoch": 0.452810097245494, "grad_norm": 0.8653958505107483, "learning_rate": 6.0000010456796665e-06, "loss": 0.4251, "step": 7229 }, { "epoch": 0.4528727352448363, "grad_norm": 0.7772557692617131, "learning_rate": 5.9990071215034395e-06, "loss": 0.4029, "step": 7230 }, { "epoch": 0.4529353732441786, "grad_norm": 0.8123052725847562, "learning_rate": 5.998013156207895e-06, "loss": 0.3994, "step": 7231 }, { "epoch": 0.4529980112435209, "grad_norm": 0.8980272345833492, "learning_rate": 5.997019149833944e-06, "loss": 0.4497, "step": 7232 }, { "epoch": 0.45306064924286316, "grad_norm": 0.8997153538505126, "learning_rate": 5.996025102422498e-06, "loss": 0.4584, "step": 7233 }, { "epoch": 0.45312328724220546, "grad_norm": 0.7957474076371804, "learning_rate": 5.995031014014476e-06, "loss": 0.3767, "step": 7234 }, { "epoch": 0.45318592524154777, "grad_norm": 0.6991705267232449, "learning_rate": 5.994036884650792e-06, "loss": 0.4598, "step": 7235 }, { "epoch": 0.4532485632408901, "grad_norm": 0.7972065800131347, "learning_rate": 5.993042714372365e-06, "loss": 0.4249, "step": 7236 }, { "epoch": 0.4533112012402324, "grad_norm": 0.7997565009618652, "learning_rate": 5.992048503220116e-06, "loss": 0.4099, "step": 7237 }, { "epoch": 0.4533738392395747, "grad_norm": 0.9269149754130059, "learning_rate": 5.9910542512349655e-06, "loss": 0.451, "step": 7238 }, { "epoch": 0.453436477238917, "grad_norm": 0.7412081967403893, "learning_rate": 5.990059958457839e-06, "loss": 0.3982, "step": 7239 }, { "epoch": 0.4534991152382593, "grad_norm": 0.7413833629069678, "learning_rate": 5.989065624929661e-06, "loss": 0.3528, "step": 7240 }, { "epoch": 0.4535617532376016, "grad_norm": 0.7678300664366698, "learning_rate": 5.988071250691358e-06, "loss": 0.4105, "step": 7241 }, { "epoch": 0.45362439123694387, "grad_norm": 0.7832415636166005, "learning_rate": 5.9870768357838584e-06, "loss": 0.4305, "step": 7242 }, { "epoch": 0.4536870292362862, "grad_norm": 0.8231020377789262, "learning_rate": 5.986082380248094e-06, "loss": 0.4161, "step": 7243 }, { "epoch": 0.4537496672356285, "grad_norm": 0.8424602295240952, "learning_rate": 5.9850878841249935e-06, "loss": 0.4172, "step": 7244 }, { "epoch": 0.4538123052349708, "grad_norm": 0.8478914826706868, "learning_rate": 5.984093347455496e-06, "loss": 0.377, "step": 7245 }, { "epoch": 0.4538749432343131, "grad_norm": 0.8825148847709534, "learning_rate": 5.983098770280531e-06, "loss": 0.4654, "step": 7246 }, { "epoch": 0.4539375812336554, "grad_norm": 0.8256113327984209, "learning_rate": 5.982104152641041e-06, "loss": 0.3589, "step": 7247 }, { "epoch": 0.4540002192329977, "grad_norm": 0.6541957784372626, "learning_rate": 5.981109494577959e-06, "loss": 0.493, "step": 7248 }, { "epoch": 0.45406285723234, "grad_norm": 0.8049634569905252, "learning_rate": 5.98011479613223e-06, "loss": 0.4322, "step": 7249 }, { "epoch": 0.4541254952316823, "grad_norm": 0.8177960484829336, "learning_rate": 5.979120057344793e-06, "loss": 0.3814, "step": 7250 }, { "epoch": 0.4541881332310246, "grad_norm": 0.8255318684891345, "learning_rate": 5.978125278256593e-06, "loss": 0.4249, "step": 7251 }, { "epoch": 0.4542507712303669, "grad_norm": 0.8364076351267591, "learning_rate": 5.977130458908575e-06, "loss": 0.4361, "step": 7252 }, { "epoch": 0.4543134092297092, "grad_norm": 0.8503698441021725, "learning_rate": 5.976135599341686e-06, "loss": 0.3993, "step": 7253 }, { "epoch": 0.4543760472290515, "grad_norm": 0.8597839893711302, "learning_rate": 5.9751406995968754e-06, "loss": 0.3426, "step": 7254 }, { "epoch": 0.4544386852283938, "grad_norm": 0.8829433054587587, "learning_rate": 5.974145759715091e-06, "loss": 0.4138, "step": 7255 }, { "epoch": 0.4545013232277361, "grad_norm": 0.791114094366797, "learning_rate": 5.973150779737288e-06, "loss": 0.3855, "step": 7256 }, { "epoch": 0.45456396122707843, "grad_norm": 0.8037950307773267, "learning_rate": 5.9721557597044175e-06, "loss": 0.4186, "step": 7257 }, { "epoch": 0.45462659922642074, "grad_norm": 0.8445348635700054, "learning_rate": 5.971160699657434e-06, "loss": 0.4194, "step": 7258 }, { "epoch": 0.454689237225763, "grad_norm": 0.7492297485444325, "learning_rate": 5.970165599637298e-06, "loss": 0.3762, "step": 7259 }, { "epoch": 0.4547518752251053, "grad_norm": 0.8048854752931841, "learning_rate": 5.969170459684965e-06, "loss": 0.4397, "step": 7260 }, { "epoch": 0.4548145132244476, "grad_norm": 0.8344182587756327, "learning_rate": 5.968175279841397e-06, "loss": 0.4235, "step": 7261 }, { "epoch": 0.4548771512237899, "grad_norm": 0.8335477473486507, "learning_rate": 5.967180060147553e-06, "loss": 0.3836, "step": 7262 }, { "epoch": 0.4549397892231322, "grad_norm": 0.6622232972913409, "learning_rate": 5.966184800644399e-06, "loss": 0.4807, "step": 7263 }, { "epoch": 0.4550024272224745, "grad_norm": 0.7826662355061684, "learning_rate": 5.965189501372899e-06, "loss": 0.396, "step": 7264 }, { "epoch": 0.45506506522181683, "grad_norm": 0.8527757338849207, "learning_rate": 5.964194162374022e-06, "loss": 0.4214, "step": 7265 }, { "epoch": 0.45512770322115914, "grad_norm": 0.7874642856658781, "learning_rate": 5.963198783688733e-06, "loss": 0.3843, "step": 7266 }, { "epoch": 0.4551903412205014, "grad_norm": 0.8432308947115811, "learning_rate": 5.9622033653580015e-06, "loss": 0.3926, "step": 7267 }, { "epoch": 0.4552529792198437, "grad_norm": 0.676961002980568, "learning_rate": 5.961207907422801e-06, "loss": 0.4971, "step": 7268 }, { "epoch": 0.455315617219186, "grad_norm": 0.802238308978865, "learning_rate": 5.960212409924108e-06, "loss": 0.3621, "step": 7269 }, { "epoch": 0.4553782552185283, "grad_norm": 0.9103414622585043, "learning_rate": 5.959216872902892e-06, "loss": 0.4069, "step": 7270 }, { "epoch": 0.4554408932178706, "grad_norm": 0.8369955642609506, "learning_rate": 5.958221296400131e-06, "loss": 0.4036, "step": 7271 }, { "epoch": 0.45550353121721293, "grad_norm": 0.8307780593611529, "learning_rate": 5.957225680456803e-06, "loss": 0.3882, "step": 7272 }, { "epoch": 0.45556616921655524, "grad_norm": 0.7559519333460364, "learning_rate": 5.956230025113889e-06, "loss": 0.4078, "step": 7273 }, { "epoch": 0.45562880721589755, "grad_norm": 0.8005507454004357, "learning_rate": 5.955234330412372e-06, "loss": 0.3792, "step": 7274 }, { "epoch": 0.4556914452152398, "grad_norm": 0.7714666743046654, "learning_rate": 5.95423859639323e-06, "loss": 0.4011, "step": 7275 }, { "epoch": 0.4557540832145821, "grad_norm": 0.8602224262267443, "learning_rate": 5.9532428230974495e-06, "loss": 0.406, "step": 7276 }, { "epoch": 0.4558167212139244, "grad_norm": 0.7821113578712118, "learning_rate": 5.952247010566019e-06, "loss": 0.3993, "step": 7277 }, { "epoch": 0.4558793592132667, "grad_norm": 0.7968523230689563, "learning_rate": 5.951251158839926e-06, "loss": 0.3658, "step": 7278 }, { "epoch": 0.45594199721260903, "grad_norm": 0.7929512932386589, "learning_rate": 5.950255267960158e-06, "loss": 0.4239, "step": 7279 }, { "epoch": 0.45600463521195134, "grad_norm": 0.7518411673128701, "learning_rate": 5.949259337967706e-06, "loss": 0.3939, "step": 7280 }, { "epoch": 0.45606727321129364, "grad_norm": 0.8124532081326706, "learning_rate": 5.948263368903562e-06, "loss": 0.3988, "step": 7281 }, { "epoch": 0.45612991121063595, "grad_norm": 0.6312193288914961, "learning_rate": 5.947267360808724e-06, "loss": 0.4827, "step": 7282 }, { "epoch": 0.45619254920997826, "grad_norm": 1.0272610775242232, "learning_rate": 5.946271313724186e-06, "loss": 0.4219, "step": 7283 }, { "epoch": 0.4562551872093205, "grad_norm": 0.8321960873131456, "learning_rate": 5.945275227690943e-06, "loss": 0.4247, "step": 7284 }, { "epoch": 0.4563178252086628, "grad_norm": 0.8363029815161974, "learning_rate": 5.944279102749997e-06, "loss": 0.4179, "step": 7285 }, { "epoch": 0.4563804632080051, "grad_norm": 0.7978496855467927, "learning_rate": 5.943282938942349e-06, "loss": 0.3737, "step": 7286 }, { "epoch": 0.45644310120734743, "grad_norm": 0.828444359698197, "learning_rate": 5.9422867363089985e-06, "loss": 0.3995, "step": 7287 }, { "epoch": 0.45650573920668974, "grad_norm": 0.8108228439742083, "learning_rate": 5.941290494890952e-06, "loss": 0.4049, "step": 7288 }, { "epoch": 0.45656837720603205, "grad_norm": 0.8735607121477229, "learning_rate": 5.940294214729214e-06, "loss": 0.4624, "step": 7289 }, { "epoch": 0.45663101520537436, "grad_norm": 0.7471955054343452, "learning_rate": 5.939297895864791e-06, "loss": 0.3401, "step": 7290 }, { "epoch": 0.45669365320471667, "grad_norm": 0.845189451689567, "learning_rate": 5.9383015383386936e-06, "loss": 0.4352, "step": 7291 }, { "epoch": 0.4567562912040589, "grad_norm": 0.7915203261108215, "learning_rate": 5.937305142191929e-06, "loss": 0.3552, "step": 7292 }, { "epoch": 0.4568189292034012, "grad_norm": 0.8451649834964304, "learning_rate": 5.936308707465511e-06, "loss": 0.4199, "step": 7293 }, { "epoch": 0.45688156720274353, "grad_norm": 0.7848438405783399, "learning_rate": 5.935312234200454e-06, "loss": 0.3699, "step": 7294 }, { "epoch": 0.45694420520208584, "grad_norm": 0.8329741914552646, "learning_rate": 5.934315722437771e-06, "loss": 0.4172, "step": 7295 }, { "epoch": 0.45700684320142815, "grad_norm": 0.8010992069826075, "learning_rate": 5.93331917221848e-06, "loss": 0.4181, "step": 7296 }, { "epoch": 0.45706948120077046, "grad_norm": 0.838393993158111, "learning_rate": 5.932322583583598e-06, "loss": 0.4118, "step": 7297 }, { "epoch": 0.45713211920011276, "grad_norm": 0.7880807078156197, "learning_rate": 5.931325956574147e-06, "loss": 0.3923, "step": 7298 }, { "epoch": 0.45719475719945507, "grad_norm": 0.8383268761573646, "learning_rate": 5.930329291231144e-06, "loss": 0.4407, "step": 7299 }, { "epoch": 0.4572573951987973, "grad_norm": 0.8691589342035542, "learning_rate": 5.9293325875956165e-06, "loss": 0.4119, "step": 7300 }, { "epoch": 0.45732003319813963, "grad_norm": 0.8681791299524413, "learning_rate": 5.928335845708588e-06, "loss": 0.4372, "step": 7301 }, { "epoch": 0.45738267119748194, "grad_norm": 0.8399079125852288, "learning_rate": 5.9273390656110815e-06, "loss": 0.4151, "step": 7302 }, { "epoch": 0.45744530919682425, "grad_norm": 0.7838295480120805, "learning_rate": 5.926342247344129e-06, "loss": 0.3677, "step": 7303 }, { "epoch": 0.45750794719616655, "grad_norm": 0.8145316661231898, "learning_rate": 5.925345390948756e-06, "loss": 0.4128, "step": 7304 }, { "epoch": 0.45757058519550886, "grad_norm": 0.8381544553506137, "learning_rate": 5.924348496465997e-06, "loss": 0.4141, "step": 7305 }, { "epoch": 0.45763322319485117, "grad_norm": 0.8214593687780598, "learning_rate": 5.92335156393688e-06, "loss": 0.4115, "step": 7306 }, { "epoch": 0.4576958611941935, "grad_norm": 0.9767153709675364, "learning_rate": 5.922354593402443e-06, "loss": 0.3943, "step": 7307 }, { "epoch": 0.4577584991935358, "grad_norm": 0.8340497119915431, "learning_rate": 5.921357584903719e-06, "loss": 0.4285, "step": 7308 }, { "epoch": 0.45782113719287804, "grad_norm": 0.8284387745098624, "learning_rate": 5.920360538481746e-06, "loss": 0.4243, "step": 7309 }, { "epoch": 0.45788377519222034, "grad_norm": 0.7634700150856443, "learning_rate": 5.919363454177561e-06, "loss": 0.3724, "step": 7310 }, { "epoch": 0.45794641319156265, "grad_norm": 0.7980359832052474, "learning_rate": 5.918366332032207e-06, "loss": 0.3673, "step": 7311 }, { "epoch": 0.45800905119090496, "grad_norm": 0.8498059581182648, "learning_rate": 5.917369172086726e-06, "loss": 0.4229, "step": 7312 }, { "epoch": 0.45807168919024727, "grad_norm": 0.822376149540633, "learning_rate": 5.916371974382156e-06, "loss": 0.36, "step": 7313 }, { "epoch": 0.4581343271895896, "grad_norm": 0.8494242501379571, "learning_rate": 5.915374738959548e-06, "loss": 0.4503, "step": 7314 }, { "epoch": 0.4581969651889319, "grad_norm": 0.7673803978908711, "learning_rate": 5.914377465859944e-06, "loss": 0.3521, "step": 7315 }, { "epoch": 0.4582596031882742, "grad_norm": 0.8133205129927373, "learning_rate": 5.913380155124395e-06, "loss": 0.3966, "step": 7316 }, { "epoch": 0.45832224118761644, "grad_norm": 0.8335715603952599, "learning_rate": 5.912382806793949e-06, "loss": 0.4052, "step": 7317 }, { "epoch": 0.45838487918695875, "grad_norm": 0.8948647948463924, "learning_rate": 5.911385420909657e-06, "loss": 0.4605, "step": 7318 }, { "epoch": 0.45844751718630106, "grad_norm": 0.8230782313989555, "learning_rate": 5.910387997512573e-06, "loss": 0.4343, "step": 7319 }, { "epoch": 0.45851015518564336, "grad_norm": 0.8428510706525638, "learning_rate": 5.90939053664375e-06, "loss": 0.4141, "step": 7320 }, { "epoch": 0.45857279318498567, "grad_norm": 0.8557103743815296, "learning_rate": 5.908393038344243e-06, "loss": 0.4453, "step": 7321 }, { "epoch": 0.458635431184328, "grad_norm": 0.7942404996490774, "learning_rate": 5.90739550265511e-06, "loss": 0.3788, "step": 7322 }, { "epoch": 0.4586980691836703, "grad_norm": 0.8349473458142908, "learning_rate": 5.9063979296174086e-06, "loss": 0.4041, "step": 7323 }, { "epoch": 0.4587607071830126, "grad_norm": 0.7755314593346991, "learning_rate": 5.905400319272202e-06, "loss": 0.3924, "step": 7324 }, { "epoch": 0.4588233451823549, "grad_norm": 0.8210139456554161, "learning_rate": 5.904402671660551e-06, "loss": 0.3858, "step": 7325 }, { "epoch": 0.45888598318169715, "grad_norm": 0.8128185216527007, "learning_rate": 5.903404986823517e-06, "loss": 0.4171, "step": 7326 }, { "epoch": 0.45894862118103946, "grad_norm": 0.7999496764809298, "learning_rate": 5.9024072648021645e-06, "loss": 0.392, "step": 7327 }, { "epoch": 0.45901125918038177, "grad_norm": 0.8523537473306387, "learning_rate": 5.901409505637562e-06, "loss": 0.4117, "step": 7328 }, { "epoch": 0.4590738971797241, "grad_norm": 0.8620185305477255, "learning_rate": 5.900411709370779e-06, "loss": 0.4233, "step": 7329 }, { "epoch": 0.4591365351790664, "grad_norm": 0.8405955274170902, "learning_rate": 5.8994138760428825e-06, "loss": 0.4042, "step": 7330 }, { "epoch": 0.4591991731784087, "grad_norm": 0.6238118298975999, "learning_rate": 5.898416005694941e-06, "loss": 0.4447, "step": 7331 }, { "epoch": 0.459261811177751, "grad_norm": 0.7858707112381, "learning_rate": 5.8974180983680314e-06, "loss": 0.4067, "step": 7332 }, { "epoch": 0.4593244491770933, "grad_norm": 0.7398816421154825, "learning_rate": 5.896420154103229e-06, "loss": 0.3647, "step": 7333 }, { "epoch": 0.45938708717643556, "grad_norm": 0.8118826689322494, "learning_rate": 5.895422172941604e-06, "loss": 0.4133, "step": 7334 }, { "epoch": 0.45944972517577787, "grad_norm": 0.8736544930781404, "learning_rate": 5.894424154924237e-06, "loss": 0.4147, "step": 7335 }, { "epoch": 0.4595123631751202, "grad_norm": 0.8581565161244351, "learning_rate": 5.893426100092204e-06, "loss": 0.3878, "step": 7336 }, { "epoch": 0.4595750011744625, "grad_norm": 0.8585870259703227, "learning_rate": 5.89242800848659e-06, "loss": 0.4181, "step": 7337 }, { "epoch": 0.4596376391738048, "grad_norm": 0.817670804559115, "learning_rate": 5.891429880148471e-06, "loss": 0.4279, "step": 7338 }, { "epoch": 0.4597002771731471, "grad_norm": 0.7498579162682844, "learning_rate": 5.890431715118935e-06, "loss": 0.4087, "step": 7339 }, { "epoch": 0.4597629151724894, "grad_norm": 0.8598189668173543, "learning_rate": 5.889433513439063e-06, "loss": 0.4069, "step": 7340 }, { "epoch": 0.4598255531718317, "grad_norm": 0.8704018355318833, "learning_rate": 5.8884352751499405e-06, "loss": 0.4489, "step": 7341 }, { "epoch": 0.45988819117117397, "grad_norm": 0.914280208018377, "learning_rate": 5.887437000292659e-06, "loss": 0.4333, "step": 7342 }, { "epoch": 0.4599508291705163, "grad_norm": 0.8926674931602008, "learning_rate": 5.886438688908307e-06, "loss": 0.4229, "step": 7343 }, { "epoch": 0.4600134671698586, "grad_norm": 0.7667393727255714, "learning_rate": 5.885440341037973e-06, "loss": 0.3797, "step": 7344 }, { "epoch": 0.4600761051692009, "grad_norm": 0.8748169027843923, "learning_rate": 5.88444195672275e-06, "loss": 0.4194, "step": 7345 }, { "epoch": 0.4601387431685432, "grad_norm": 0.8723679585137472, "learning_rate": 5.883443536003733e-06, "loss": 0.4403, "step": 7346 }, { "epoch": 0.4602013811678855, "grad_norm": 0.8809060515290255, "learning_rate": 5.882445078922014e-06, "loss": 0.4357, "step": 7347 }, { "epoch": 0.4602640191672278, "grad_norm": 0.8825258016798011, "learning_rate": 5.881446585518693e-06, "loss": 0.4298, "step": 7348 }, { "epoch": 0.4603266571665701, "grad_norm": 0.809090792080625, "learning_rate": 5.880448055834868e-06, "loss": 0.4268, "step": 7349 }, { "epoch": 0.4603892951659124, "grad_norm": 0.831922604302968, "learning_rate": 5.879449489911635e-06, "loss": 0.4075, "step": 7350 }, { "epoch": 0.4604519331652547, "grad_norm": 0.7050610930835589, "learning_rate": 5.8784508877901004e-06, "loss": 0.4539, "step": 7351 }, { "epoch": 0.460514571164597, "grad_norm": 0.8645349657925256, "learning_rate": 5.877452249511363e-06, "loss": 0.4544, "step": 7352 }, { "epoch": 0.4605772091639393, "grad_norm": 0.8715842727312607, "learning_rate": 5.876453575116527e-06, "loss": 0.4347, "step": 7353 }, { "epoch": 0.4606398471632816, "grad_norm": 0.8965626852176053, "learning_rate": 5.8754548646467e-06, "loss": 0.4271, "step": 7354 }, { "epoch": 0.4607024851626239, "grad_norm": 0.8624845947153723, "learning_rate": 5.874456118142989e-06, "loss": 0.3979, "step": 7355 }, { "epoch": 0.4607651231619662, "grad_norm": 0.8113106068186875, "learning_rate": 5.873457335646503e-06, "loss": 0.3752, "step": 7356 }, { "epoch": 0.4608277611613085, "grad_norm": 0.8325246820847738, "learning_rate": 5.872458517198348e-06, "loss": 0.3766, "step": 7357 }, { "epoch": 0.46089039916065083, "grad_norm": 0.8410104805790336, "learning_rate": 5.871459662839642e-06, "loss": 0.4272, "step": 7358 }, { "epoch": 0.4609530371599931, "grad_norm": 0.7144333823217909, "learning_rate": 5.87046077261149e-06, "loss": 0.4655, "step": 7359 }, { "epoch": 0.4610156751593354, "grad_norm": 0.8045494538602319, "learning_rate": 5.869461846555014e-06, "loss": 0.3815, "step": 7360 }, { "epoch": 0.4610783131586777, "grad_norm": 0.8429837272819095, "learning_rate": 5.868462884711327e-06, "loss": 0.4134, "step": 7361 }, { "epoch": 0.46114095115802, "grad_norm": 0.8541785848323837, "learning_rate": 5.867463887121546e-06, "loss": 0.4192, "step": 7362 }, { "epoch": 0.4612035891573623, "grad_norm": 0.9035022451821358, "learning_rate": 5.866464853826791e-06, "loss": 0.4115, "step": 7363 }, { "epoch": 0.4612662271567046, "grad_norm": 0.87572486866157, "learning_rate": 5.86546578486818e-06, "loss": 0.4563, "step": 7364 }, { "epoch": 0.46132886515604693, "grad_norm": 0.8055881119790603, "learning_rate": 5.864466680286839e-06, "loss": 0.364, "step": 7365 }, { "epoch": 0.46139150315538924, "grad_norm": 0.7787034659412275, "learning_rate": 5.863467540123887e-06, "loss": 0.3662, "step": 7366 }, { "epoch": 0.46145414115473155, "grad_norm": 0.8243900788371756, "learning_rate": 5.862468364420453e-06, "loss": 0.4152, "step": 7367 }, { "epoch": 0.4615167791540738, "grad_norm": 0.7266384640143674, "learning_rate": 5.861469153217659e-06, "loss": 0.3883, "step": 7368 }, { "epoch": 0.4615794171534161, "grad_norm": 0.8996614565751281, "learning_rate": 5.860469906556636e-06, "loss": 0.466, "step": 7369 }, { "epoch": 0.4616420551527584, "grad_norm": 0.8237977942780469, "learning_rate": 5.859470624478511e-06, "loss": 0.4055, "step": 7370 }, { "epoch": 0.4617046931521007, "grad_norm": 0.7625333606902456, "learning_rate": 5.858471307024416e-06, "loss": 0.4246, "step": 7371 }, { "epoch": 0.46176733115144303, "grad_norm": 0.8195220619684618, "learning_rate": 5.857471954235483e-06, "loss": 0.4041, "step": 7372 }, { "epoch": 0.46182996915078534, "grad_norm": 0.8566103027716838, "learning_rate": 5.856472566152844e-06, "loss": 0.399, "step": 7373 }, { "epoch": 0.46189260715012764, "grad_norm": 0.8663553332767043, "learning_rate": 5.855473142817636e-06, "loss": 0.3985, "step": 7374 }, { "epoch": 0.46195524514946995, "grad_norm": 0.8160725943159235, "learning_rate": 5.854473684270995e-06, "loss": 0.4142, "step": 7375 }, { "epoch": 0.4620178831488122, "grad_norm": 0.8421100503058886, "learning_rate": 5.853474190554058e-06, "loss": 0.4265, "step": 7376 }, { "epoch": 0.4620805211481545, "grad_norm": 0.8287256247778623, "learning_rate": 5.852474661707965e-06, "loss": 0.3988, "step": 7377 }, { "epoch": 0.4621431591474968, "grad_norm": 0.9119647416530688, "learning_rate": 5.851475097773857e-06, "loss": 0.4331, "step": 7378 }, { "epoch": 0.4622057971468391, "grad_norm": 0.8720433269500734, "learning_rate": 5.850475498792876e-06, "loss": 0.4142, "step": 7379 }, { "epoch": 0.46226843514618143, "grad_norm": 0.808103969005756, "learning_rate": 5.8494758648061655e-06, "loss": 0.4056, "step": 7380 }, { "epoch": 0.46233107314552374, "grad_norm": 0.8971023745264679, "learning_rate": 5.848476195854871e-06, "loss": 0.3987, "step": 7381 }, { "epoch": 0.46239371114486605, "grad_norm": 0.8263179995861649, "learning_rate": 5.847476491980137e-06, "loss": 0.3832, "step": 7382 }, { "epoch": 0.46245634914420836, "grad_norm": 0.819024684154698, "learning_rate": 5.846476753223115e-06, "loss": 0.3591, "step": 7383 }, { "epoch": 0.4625189871435506, "grad_norm": 0.8008599077062348, "learning_rate": 5.845476979624953e-06, "loss": 0.3837, "step": 7384 }, { "epoch": 0.4625816251428929, "grad_norm": 0.798239931713754, "learning_rate": 5.8444771712268015e-06, "loss": 0.407, "step": 7385 }, { "epoch": 0.4626442631422352, "grad_norm": 0.8513655300037652, "learning_rate": 5.843477328069813e-06, "loss": 0.433, "step": 7386 }, { "epoch": 0.46270690114157753, "grad_norm": 0.8202401661802331, "learning_rate": 5.842477450195138e-06, "loss": 0.4248, "step": 7387 }, { "epoch": 0.46276953914091984, "grad_norm": 0.7499279211909682, "learning_rate": 5.841477537643938e-06, "loss": 0.3831, "step": 7388 }, { "epoch": 0.46283217714026215, "grad_norm": 0.8925546698337153, "learning_rate": 5.840477590457366e-06, "loss": 0.45, "step": 7389 }, { "epoch": 0.46289481513960445, "grad_norm": 0.8366760896348576, "learning_rate": 5.839477608676582e-06, "loss": 0.4387, "step": 7390 }, { "epoch": 0.46295745313894676, "grad_norm": 0.7929495638272848, "learning_rate": 5.838477592342741e-06, "loss": 0.384, "step": 7391 }, { "epoch": 0.46302009113828907, "grad_norm": 0.7907330638753441, "learning_rate": 5.837477541497006e-06, "loss": 0.4092, "step": 7392 }, { "epoch": 0.4630827291376313, "grad_norm": 0.8718516747683523, "learning_rate": 5.836477456180543e-06, "loss": 0.4153, "step": 7393 }, { "epoch": 0.46314536713697363, "grad_norm": 0.8634919499168434, "learning_rate": 5.83547733643451e-06, "loss": 0.423, "step": 7394 }, { "epoch": 0.46320800513631594, "grad_norm": 0.8195498171432036, "learning_rate": 5.834477182300077e-06, "loss": 0.4129, "step": 7395 }, { "epoch": 0.46327064313565824, "grad_norm": 0.8283967146489453, "learning_rate": 5.833476993818406e-06, "loss": 0.4193, "step": 7396 }, { "epoch": 0.46333328113500055, "grad_norm": 0.7214546980554266, "learning_rate": 5.832476771030669e-06, "loss": 0.3438, "step": 7397 }, { "epoch": 0.46339591913434286, "grad_norm": 0.7009911276719792, "learning_rate": 5.831476513978034e-06, "loss": 0.4436, "step": 7398 }, { "epoch": 0.46345855713368517, "grad_norm": 0.7777192251161787, "learning_rate": 5.83047622270167e-06, "loss": 0.3697, "step": 7399 }, { "epoch": 0.4635211951330275, "grad_norm": 0.8274355710571631, "learning_rate": 5.829475897242749e-06, "loss": 0.3899, "step": 7400 }, { "epoch": 0.4635838331323697, "grad_norm": 0.7897544376367838, "learning_rate": 5.828475537642448e-06, "loss": 0.3673, "step": 7401 }, { "epoch": 0.46364647113171203, "grad_norm": 0.8201423286933427, "learning_rate": 5.827475143941941e-06, "loss": 0.4165, "step": 7402 }, { "epoch": 0.46370910913105434, "grad_norm": 0.8619591948710202, "learning_rate": 5.826474716182403e-06, "loss": 0.4279, "step": 7403 }, { "epoch": 0.46377174713039665, "grad_norm": 0.9020847820697517, "learning_rate": 5.825474254405012e-06, "loss": 0.4429, "step": 7404 }, { "epoch": 0.46383438512973896, "grad_norm": 0.7991288882428081, "learning_rate": 5.824473758650947e-06, "loss": 0.3661, "step": 7405 }, { "epoch": 0.46389702312908127, "grad_norm": 0.8242451018719648, "learning_rate": 5.823473228961389e-06, "loss": 0.3866, "step": 7406 }, { "epoch": 0.4639596611284236, "grad_norm": 0.8346464712699988, "learning_rate": 5.822472665377521e-06, "loss": 0.4427, "step": 7407 }, { "epoch": 0.4640222991277659, "grad_norm": 0.8502275552881667, "learning_rate": 5.821472067940525e-06, "loss": 0.4459, "step": 7408 }, { "epoch": 0.46408493712710813, "grad_norm": 0.8275483928234003, "learning_rate": 5.820471436691586e-06, "loss": 0.3907, "step": 7409 }, { "epoch": 0.46414757512645044, "grad_norm": 0.7389800279344092, "learning_rate": 5.81947077167189e-06, "loss": 0.3677, "step": 7410 }, { "epoch": 0.46421021312579275, "grad_norm": 0.8771556716891337, "learning_rate": 5.8184700729226265e-06, "loss": 0.3971, "step": 7411 }, { "epoch": 0.46427285112513506, "grad_norm": 0.6664626269209791, "learning_rate": 5.817469340484982e-06, "loss": 0.4651, "step": 7412 }, { "epoch": 0.46433548912447736, "grad_norm": 0.7455795770548862, "learning_rate": 5.816468574400147e-06, "loss": 0.3814, "step": 7413 }, { "epoch": 0.46439812712381967, "grad_norm": 0.8215362728953497, "learning_rate": 5.815467774709314e-06, "loss": 0.4305, "step": 7414 }, { "epoch": 0.464460765123162, "grad_norm": 0.8271448778303871, "learning_rate": 5.814466941453677e-06, "loss": 0.4136, "step": 7415 }, { "epoch": 0.4645234031225043, "grad_norm": 0.676706790181029, "learning_rate": 5.813466074674429e-06, "loss": 0.4531, "step": 7416 }, { "epoch": 0.4645860411218466, "grad_norm": 0.8477768968248793, "learning_rate": 5.812465174412766e-06, "loss": 0.4134, "step": 7417 }, { "epoch": 0.46464867912118885, "grad_norm": 0.8263711211638451, "learning_rate": 5.8114642407098865e-06, "loss": 0.3843, "step": 7418 }, { "epoch": 0.46471131712053115, "grad_norm": 0.749129566199124, "learning_rate": 5.810463273606987e-06, "loss": 0.3697, "step": 7419 }, { "epoch": 0.46477395511987346, "grad_norm": 0.8507969964086752, "learning_rate": 5.80946227314527e-06, "loss": 0.4263, "step": 7420 }, { "epoch": 0.46483659311921577, "grad_norm": 0.7927445044684227, "learning_rate": 5.808461239365934e-06, "loss": 0.4221, "step": 7421 }, { "epoch": 0.4648992311185581, "grad_norm": 0.824096088848119, "learning_rate": 5.807460172310185e-06, "loss": 0.4188, "step": 7422 }, { "epoch": 0.4649618691179004, "grad_norm": 0.7537717968147264, "learning_rate": 5.806459072019225e-06, "loss": 0.3663, "step": 7423 }, { "epoch": 0.4650245071172427, "grad_norm": 0.853620318677933, "learning_rate": 5.80545793853426e-06, "loss": 0.3957, "step": 7424 }, { "epoch": 0.465087145116585, "grad_norm": 0.8717049055654306, "learning_rate": 5.804456771896497e-06, "loss": 0.4441, "step": 7425 }, { "epoch": 0.46514978311592725, "grad_norm": 0.8002957321497987, "learning_rate": 5.803455572147143e-06, "loss": 0.3911, "step": 7426 }, { "epoch": 0.46521242111526956, "grad_norm": 0.8380948528397048, "learning_rate": 5.802454339327409e-06, "loss": 0.4012, "step": 7427 }, { "epoch": 0.46527505911461187, "grad_norm": 0.8440693745078609, "learning_rate": 5.801453073478505e-06, "loss": 0.3898, "step": 7428 }, { "epoch": 0.4653376971139542, "grad_norm": 0.7830974693343077, "learning_rate": 5.800451774641645e-06, "loss": 0.3982, "step": 7429 }, { "epoch": 0.4654003351132965, "grad_norm": 0.8235463420977145, "learning_rate": 5.79945044285804e-06, "loss": 0.3942, "step": 7430 }, { "epoch": 0.4654629731126388, "grad_norm": 0.84601901837183, "learning_rate": 5.7984490781689075e-06, "loss": 0.4509, "step": 7431 }, { "epoch": 0.4655256111119811, "grad_norm": 0.780489882091647, "learning_rate": 5.7974476806154625e-06, "loss": 0.4245, "step": 7432 }, { "epoch": 0.4655882491113234, "grad_norm": 0.840866180882839, "learning_rate": 5.796446250238922e-06, "loss": 0.396, "step": 7433 }, { "epoch": 0.4656508871106657, "grad_norm": 0.8238289446905294, "learning_rate": 5.795444787080505e-06, "loss": 0.3944, "step": 7434 }, { "epoch": 0.46571352511000796, "grad_norm": 0.8654746891910412, "learning_rate": 5.794443291181435e-06, "loss": 0.3917, "step": 7435 }, { "epoch": 0.46577616310935027, "grad_norm": 0.834900624760926, "learning_rate": 5.793441762582932e-06, "loss": 0.3934, "step": 7436 }, { "epoch": 0.4658388011086926, "grad_norm": 0.8551059388070777, "learning_rate": 5.792440201326218e-06, "loss": 0.4625, "step": 7437 }, { "epoch": 0.4659014391080349, "grad_norm": 0.7737114942014963, "learning_rate": 5.791438607452519e-06, "loss": 0.4327, "step": 7438 }, { "epoch": 0.4659640771073772, "grad_norm": 0.8101937617228511, "learning_rate": 5.790436981003058e-06, "loss": 0.3799, "step": 7439 }, { "epoch": 0.4660267151067195, "grad_norm": 0.8747019259419067, "learning_rate": 5.789435322019066e-06, "loss": 0.3939, "step": 7440 }, { "epoch": 0.4660893531060618, "grad_norm": 0.8154530784203402, "learning_rate": 5.78843363054177e-06, "loss": 0.4049, "step": 7441 }, { "epoch": 0.4661519911054041, "grad_norm": 0.8047054682992045, "learning_rate": 5.787431906612397e-06, "loss": 0.3656, "step": 7442 }, { "epoch": 0.46621462910474637, "grad_norm": 0.8312431507581111, "learning_rate": 5.786430150272182e-06, "loss": 0.4327, "step": 7443 }, { "epoch": 0.4662772671040887, "grad_norm": 0.8050909273940338, "learning_rate": 5.785428361562357e-06, "loss": 0.417, "step": 7444 }, { "epoch": 0.466339905103431, "grad_norm": 0.765909715989697, "learning_rate": 5.784426540524154e-06, "loss": 0.3868, "step": 7445 }, { "epoch": 0.4664025431027733, "grad_norm": 0.7817451956711788, "learning_rate": 5.783424687198809e-06, "loss": 0.4034, "step": 7446 }, { "epoch": 0.4664651811021156, "grad_norm": 0.8187632305779504, "learning_rate": 5.782422801627557e-06, "loss": 0.4274, "step": 7447 }, { "epoch": 0.4665278191014579, "grad_norm": 0.786024040176266, "learning_rate": 5.78142088385164e-06, "loss": 0.383, "step": 7448 }, { "epoch": 0.4665904571008002, "grad_norm": 0.8232906135260617, "learning_rate": 5.780418933912293e-06, "loss": 0.3958, "step": 7449 }, { "epoch": 0.4666530951001425, "grad_norm": 0.8396062708107969, "learning_rate": 5.7794169518507585e-06, "loss": 0.3945, "step": 7450 }, { "epoch": 0.4667157330994848, "grad_norm": 0.825691840445722, "learning_rate": 5.778414937708276e-06, "loss": 0.4045, "step": 7451 }, { "epoch": 0.4667783710988271, "grad_norm": 0.908447591958243, "learning_rate": 5.777412891526091e-06, "loss": 0.4314, "step": 7452 }, { "epoch": 0.4668410090981694, "grad_norm": 0.7800017760278237, "learning_rate": 5.776410813345448e-06, "loss": 0.4122, "step": 7453 }, { "epoch": 0.4669036470975117, "grad_norm": 0.8213402626346648, "learning_rate": 5.775408703207591e-06, "loss": 0.393, "step": 7454 }, { "epoch": 0.466966285096854, "grad_norm": 0.832634675947275, "learning_rate": 5.774406561153767e-06, "loss": 0.3917, "step": 7455 }, { "epoch": 0.4670289230961963, "grad_norm": 0.7674389860916158, "learning_rate": 5.773404387225226e-06, "loss": 0.445, "step": 7456 }, { "epoch": 0.4670915610955386, "grad_norm": 0.8739051655898448, "learning_rate": 5.772402181463217e-06, "loss": 0.461, "step": 7457 }, { "epoch": 0.46715419909488093, "grad_norm": 0.8059657556138964, "learning_rate": 5.7713999439089915e-06, "loss": 0.4002, "step": 7458 }, { "epoch": 0.46721683709422324, "grad_norm": 0.8139310226828742, "learning_rate": 5.7703976746038e-06, "loss": 0.381, "step": 7459 }, { "epoch": 0.4672794750935655, "grad_norm": 0.960453991554637, "learning_rate": 5.769395373588897e-06, "loss": 0.4756, "step": 7460 }, { "epoch": 0.4673421130929078, "grad_norm": 0.8475941611735102, "learning_rate": 5.768393040905539e-06, "loss": 0.4265, "step": 7461 }, { "epoch": 0.4674047510922501, "grad_norm": 0.6253514780400943, "learning_rate": 5.76739067659498e-06, "loss": 0.4809, "step": 7462 }, { "epoch": 0.4674673890915924, "grad_norm": 0.8063981044182457, "learning_rate": 5.76638828069848e-06, "loss": 0.351, "step": 7463 }, { "epoch": 0.4675300270909347, "grad_norm": 0.8157475873116644, "learning_rate": 5.765385853257294e-06, "loss": 0.4035, "step": 7464 }, { "epoch": 0.467592665090277, "grad_norm": 0.840665134848864, "learning_rate": 5.764383394312684e-06, "loss": 0.3796, "step": 7465 }, { "epoch": 0.46765530308961933, "grad_norm": 0.8279326664516277, "learning_rate": 5.763380903905913e-06, "loss": 0.3995, "step": 7466 }, { "epoch": 0.46771794108896164, "grad_norm": 0.86820643607514, "learning_rate": 5.762378382078242e-06, "loss": 0.4488, "step": 7467 }, { "epoch": 0.4677805790883039, "grad_norm": 0.8415419885074327, "learning_rate": 5.761375828870934e-06, "loss": 0.4281, "step": 7468 }, { "epoch": 0.4678432170876462, "grad_norm": 0.8453737575053423, "learning_rate": 5.7603732443252554e-06, "loss": 0.4012, "step": 7469 }, { "epoch": 0.4679058550869885, "grad_norm": 0.8499282191070417, "learning_rate": 5.759370628482474e-06, "loss": 0.3729, "step": 7470 }, { "epoch": 0.4679684930863308, "grad_norm": 0.7560869044350427, "learning_rate": 5.758367981383858e-06, "loss": 0.3794, "step": 7471 }, { "epoch": 0.4680311310856731, "grad_norm": 0.8043343460175949, "learning_rate": 5.757365303070673e-06, "loss": 0.3829, "step": 7472 }, { "epoch": 0.46809376908501543, "grad_norm": 0.787883070083223, "learning_rate": 5.7563625935841925e-06, "loss": 0.4186, "step": 7473 }, { "epoch": 0.46815640708435774, "grad_norm": 0.9240505124751506, "learning_rate": 5.7553598529656875e-06, "loss": 0.3923, "step": 7474 }, { "epoch": 0.46821904508370005, "grad_norm": 0.8941126645497888, "learning_rate": 5.754357081256431e-06, "loss": 0.4202, "step": 7475 }, { "epoch": 0.46828168308304235, "grad_norm": 0.8501711034412889, "learning_rate": 5.753354278497696e-06, "loss": 0.4211, "step": 7476 }, { "epoch": 0.4683443210823846, "grad_norm": 0.905971888222946, "learning_rate": 5.75235144473076e-06, "loss": 0.4467, "step": 7477 }, { "epoch": 0.4684069590817269, "grad_norm": 0.8740673006880181, "learning_rate": 5.751348579996898e-06, "loss": 0.408, "step": 7478 }, { "epoch": 0.4684695970810692, "grad_norm": 0.8422784295749376, "learning_rate": 5.750345684337388e-06, "loss": 0.3992, "step": 7479 }, { "epoch": 0.46853223508041153, "grad_norm": 0.8525338732967783, "learning_rate": 5.749342757793513e-06, "loss": 0.407, "step": 7480 }, { "epoch": 0.46859487307975384, "grad_norm": 0.8766151161124832, "learning_rate": 5.74833980040655e-06, "loss": 0.4463, "step": 7481 }, { "epoch": 0.46865751107909615, "grad_norm": 1.011638690852574, "learning_rate": 5.747336812217781e-06, "loss": 0.4496, "step": 7482 }, { "epoch": 0.46872014907843845, "grad_norm": 0.8382143424759507, "learning_rate": 5.74633379326849e-06, "loss": 0.4055, "step": 7483 }, { "epoch": 0.46878278707778076, "grad_norm": 0.8161131203502818, "learning_rate": 5.745330743599963e-06, "loss": 0.4134, "step": 7484 }, { "epoch": 0.468845425077123, "grad_norm": 0.8886880008660026, "learning_rate": 5.7443276632534825e-06, "loss": 0.3754, "step": 7485 }, { "epoch": 0.4689080630764653, "grad_norm": 0.8601911572220059, "learning_rate": 5.74332455227034e-06, "loss": 0.406, "step": 7486 }, { "epoch": 0.46897070107580763, "grad_norm": 0.8559423270916431, "learning_rate": 5.742321410691819e-06, "loss": 0.4047, "step": 7487 }, { "epoch": 0.46903333907514994, "grad_norm": 0.9160334935675234, "learning_rate": 5.74131823855921e-06, "loss": 0.4113, "step": 7488 }, { "epoch": 0.46909597707449224, "grad_norm": 0.9119441198637808, "learning_rate": 5.7403150359138074e-06, "loss": 0.4003, "step": 7489 }, { "epoch": 0.46915861507383455, "grad_norm": 0.9377558804830229, "learning_rate": 5.7393118027968976e-06, "loss": 0.4155, "step": 7490 }, { "epoch": 0.46922125307317686, "grad_norm": 0.8355307534838727, "learning_rate": 5.7383085392497785e-06, "loss": 0.451, "step": 7491 }, { "epoch": 0.46928389107251917, "grad_norm": 0.8364585945702798, "learning_rate": 5.737305245313742e-06, "loss": 0.4018, "step": 7492 }, { "epoch": 0.4693465290718614, "grad_norm": 0.819384983642569, "learning_rate": 5.736301921030087e-06, "loss": 0.3758, "step": 7493 }, { "epoch": 0.4694091670712037, "grad_norm": 0.8448169932378978, "learning_rate": 5.735298566440105e-06, "loss": 0.4124, "step": 7494 }, { "epoch": 0.46947180507054603, "grad_norm": 0.7775313727372252, "learning_rate": 5.734295181585099e-06, "loss": 0.3773, "step": 7495 }, { "epoch": 0.46953444306988834, "grad_norm": 0.7880911846453217, "learning_rate": 5.733291766506368e-06, "loss": 0.3852, "step": 7496 }, { "epoch": 0.46959708106923065, "grad_norm": 0.8512338765585014, "learning_rate": 5.7322883212452106e-06, "loss": 0.3955, "step": 7497 }, { "epoch": 0.46965971906857296, "grad_norm": 0.9125343420732516, "learning_rate": 5.7312848458429296e-06, "loss": 0.3955, "step": 7498 }, { "epoch": 0.46972235706791526, "grad_norm": 0.6327422328500356, "learning_rate": 5.73028134034083e-06, "loss": 0.4724, "step": 7499 }, { "epoch": 0.46978499506725757, "grad_norm": 0.906407976798345, "learning_rate": 5.729277804780215e-06, "loss": 0.378, "step": 7500 }, { "epoch": 0.4698476330665999, "grad_norm": 0.8241198188226416, "learning_rate": 5.728274239202389e-06, "loss": 0.4372, "step": 7501 }, { "epoch": 0.46991027106594213, "grad_norm": 0.811804059219454, "learning_rate": 5.72727064364866e-06, "loss": 0.4073, "step": 7502 }, { "epoch": 0.46997290906528444, "grad_norm": 0.8897934545918266, "learning_rate": 5.726267018160337e-06, "loss": 0.4208, "step": 7503 }, { "epoch": 0.47003554706462675, "grad_norm": 0.899159149927829, "learning_rate": 5.725263362778728e-06, "loss": 0.4318, "step": 7504 }, { "epoch": 0.47009818506396905, "grad_norm": 0.8394961449799604, "learning_rate": 5.724259677545145e-06, "loss": 0.4229, "step": 7505 }, { "epoch": 0.47016082306331136, "grad_norm": 0.8981149463316586, "learning_rate": 5.723255962500899e-06, "loss": 0.3948, "step": 7506 }, { "epoch": 0.47022346106265367, "grad_norm": 0.898192789149829, "learning_rate": 5.722252217687303e-06, "loss": 0.3744, "step": 7507 }, { "epoch": 0.470286099061996, "grad_norm": 0.8155954022467082, "learning_rate": 5.721248443145673e-06, "loss": 0.437, "step": 7508 }, { "epoch": 0.4703487370613383, "grad_norm": 0.8230678734743162, "learning_rate": 5.7202446389173225e-06, "loss": 0.3768, "step": 7509 }, { "epoch": 0.47041137506068054, "grad_norm": 0.8072209393126867, "learning_rate": 5.719240805043569e-06, "loss": 0.4374, "step": 7510 }, { "epoch": 0.47047401306002284, "grad_norm": 0.8146648189854523, "learning_rate": 5.718236941565729e-06, "loss": 0.3973, "step": 7511 }, { "epoch": 0.47053665105936515, "grad_norm": 0.8122896378850457, "learning_rate": 5.7172330485251245e-06, "loss": 0.388, "step": 7512 }, { "epoch": 0.47059928905870746, "grad_norm": 0.8950950075091879, "learning_rate": 5.716229125963076e-06, "loss": 0.4308, "step": 7513 }, { "epoch": 0.47066192705804977, "grad_norm": 0.7935412054329256, "learning_rate": 5.7152251739209015e-06, "loss": 0.3916, "step": 7514 }, { "epoch": 0.4707245650573921, "grad_norm": 0.8525931586764125, "learning_rate": 5.714221192439927e-06, "loss": 0.4076, "step": 7515 }, { "epoch": 0.4707872030567344, "grad_norm": 0.75862719682992, "learning_rate": 5.7132171815614754e-06, "loss": 0.3347, "step": 7516 }, { "epoch": 0.4708498410560767, "grad_norm": 0.7835661373353561, "learning_rate": 5.712213141326872e-06, "loss": 0.3805, "step": 7517 }, { "epoch": 0.470912479055419, "grad_norm": 0.8160548633562025, "learning_rate": 5.711209071777445e-06, "loss": 0.3463, "step": 7518 }, { "epoch": 0.47097511705476125, "grad_norm": 0.7801286092513658, "learning_rate": 5.7102049729545195e-06, "loss": 0.3889, "step": 7519 }, { "epoch": 0.47103775505410356, "grad_norm": 0.7895926758548764, "learning_rate": 5.709200844899424e-06, "loss": 0.3574, "step": 7520 }, { "epoch": 0.47110039305344587, "grad_norm": 0.7479031594847513, "learning_rate": 5.708196687653492e-06, "loss": 0.3804, "step": 7521 }, { "epoch": 0.4711630310527882, "grad_norm": 0.8071072065774366, "learning_rate": 5.707192501258053e-06, "loss": 0.4144, "step": 7522 }, { "epoch": 0.4712256690521305, "grad_norm": 0.91543141006196, "learning_rate": 5.706188285754439e-06, "loss": 0.4232, "step": 7523 }, { "epoch": 0.4712883070514728, "grad_norm": 0.6849949104183701, "learning_rate": 5.705184041183984e-06, "loss": 0.4593, "step": 7524 }, { "epoch": 0.4713509450508151, "grad_norm": 0.875696117931053, "learning_rate": 5.7041797675880225e-06, "loss": 0.4297, "step": 7525 }, { "epoch": 0.4714135830501574, "grad_norm": 0.8862247606051495, "learning_rate": 5.703175465007892e-06, "loss": 0.4248, "step": 7526 }, { "epoch": 0.47147622104949966, "grad_norm": 0.8562235147691412, "learning_rate": 5.7021711334849285e-06, "loss": 0.4397, "step": 7527 }, { "epoch": 0.47153885904884196, "grad_norm": 0.8410575973278718, "learning_rate": 5.70116677306047e-06, "loss": 0.407, "step": 7528 }, { "epoch": 0.47160149704818427, "grad_norm": 0.8679054297821136, "learning_rate": 5.700162383775858e-06, "loss": 0.4018, "step": 7529 }, { "epoch": 0.4716641350475266, "grad_norm": 0.8696733435426353, "learning_rate": 5.699157965672432e-06, "loss": 0.4731, "step": 7530 }, { "epoch": 0.4717267730468689, "grad_norm": 0.8766392023596524, "learning_rate": 5.698153518791535e-06, "loss": 0.4305, "step": 7531 }, { "epoch": 0.4717894110462112, "grad_norm": 0.8483176937732925, "learning_rate": 5.697149043174508e-06, "loss": 0.4493, "step": 7532 }, { "epoch": 0.4718520490455535, "grad_norm": 0.8564556818093039, "learning_rate": 5.696144538862698e-06, "loss": 0.3984, "step": 7533 }, { "epoch": 0.4719146870448958, "grad_norm": 0.7983366355274739, "learning_rate": 5.695140005897449e-06, "loss": 0.4258, "step": 7534 }, { "epoch": 0.47197732504423806, "grad_norm": 0.9172386835881903, "learning_rate": 5.6941354443201095e-06, "loss": 0.398, "step": 7535 }, { "epoch": 0.47203996304358037, "grad_norm": 0.8151958987314794, "learning_rate": 5.6931308541720265e-06, "loss": 0.391, "step": 7536 }, { "epoch": 0.4721026010429227, "grad_norm": 0.8257668977189373, "learning_rate": 5.6921262354945475e-06, "loss": 0.387, "step": 7537 }, { "epoch": 0.472165239042265, "grad_norm": 0.7684606049281606, "learning_rate": 5.691121588329024e-06, "loss": 0.3636, "step": 7538 }, { "epoch": 0.4722278770416073, "grad_norm": 0.791690016480046, "learning_rate": 5.690116912716809e-06, "loss": 0.372, "step": 7539 }, { "epoch": 0.4722905150409496, "grad_norm": 0.9609689786479673, "learning_rate": 5.6891122086992525e-06, "loss": 0.4489, "step": 7540 }, { "epoch": 0.4723531530402919, "grad_norm": 0.8556145462687915, "learning_rate": 5.6881074763177104e-06, "loss": 0.4297, "step": 7541 }, { "epoch": 0.4724157910396342, "grad_norm": 0.8822301109666584, "learning_rate": 5.687102715613537e-06, "loss": 0.4157, "step": 7542 }, { "epoch": 0.4724784290389765, "grad_norm": 0.8012563768166705, "learning_rate": 5.686097926628087e-06, "loss": 0.4115, "step": 7543 }, { "epoch": 0.4725410670383188, "grad_norm": 0.8924389646451273, "learning_rate": 5.6850931094027204e-06, "loss": 0.3941, "step": 7544 }, { "epoch": 0.4726037050376611, "grad_norm": 0.7918542178952299, "learning_rate": 5.684088263978793e-06, "loss": 0.3849, "step": 7545 }, { "epoch": 0.4726663430370034, "grad_norm": 0.8173045897413244, "learning_rate": 5.683083390397667e-06, "loss": 0.4183, "step": 7546 }, { "epoch": 0.4727289810363457, "grad_norm": 0.7950459945187979, "learning_rate": 5.682078488700702e-06, "loss": 0.3688, "step": 7547 }, { "epoch": 0.472791619035688, "grad_norm": 0.8142634949909937, "learning_rate": 5.681073558929258e-06, "loss": 0.3933, "step": 7548 }, { "epoch": 0.4728542570350303, "grad_norm": 0.8449422010570827, "learning_rate": 5.680068601124701e-06, "loss": 0.4119, "step": 7549 }, { "epoch": 0.4729168950343726, "grad_norm": 0.7934776257721305, "learning_rate": 5.679063615328394e-06, "loss": 0.3866, "step": 7550 }, { "epoch": 0.4729795330337149, "grad_norm": 0.7686498442301233, "learning_rate": 5.678058601581704e-06, "loss": 0.3815, "step": 7551 }, { "epoch": 0.4730421710330572, "grad_norm": 0.9110333104074891, "learning_rate": 5.677053559925993e-06, "loss": 0.3907, "step": 7552 }, { "epoch": 0.4731048090323995, "grad_norm": 0.8094773296327528, "learning_rate": 5.6760484904026344e-06, "loss": 0.3812, "step": 7553 }, { "epoch": 0.4731674470317418, "grad_norm": 0.8052825477627191, "learning_rate": 5.675043393052994e-06, "loss": 0.3922, "step": 7554 }, { "epoch": 0.4732300850310841, "grad_norm": 0.8216854815607809, "learning_rate": 5.674038267918442e-06, "loss": 0.396, "step": 7555 }, { "epoch": 0.4732927230304264, "grad_norm": 0.8188911662781712, "learning_rate": 5.673033115040351e-06, "loss": 0.4064, "step": 7556 }, { "epoch": 0.4733553610297687, "grad_norm": 0.8872547960112821, "learning_rate": 5.67202793446009e-06, "loss": 0.4235, "step": 7557 }, { "epoch": 0.473417999029111, "grad_norm": 0.8772598761928995, "learning_rate": 5.671022726219034e-06, "loss": 0.4657, "step": 7558 }, { "epoch": 0.47348063702845333, "grad_norm": 0.8534457384386614, "learning_rate": 5.6700174903585605e-06, "loss": 0.402, "step": 7559 }, { "epoch": 0.4735432750277956, "grad_norm": 0.8839603751858535, "learning_rate": 5.669012226920041e-06, "loss": 0.4328, "step": 7560 }, { "epoch": 0.4736059130271379, "grad_norm": 0.8545964677893084, "learning_rate": 5.668006935944854e-06, "loss": 0.3814, "step": 7561 }, { "epoch": 0.4736685510264802, "grad_norm": 0.6195314373629338, "learning_rate": 5.667001617474378e-06, "loss": 0.473, "step": 7562 }, { "epoch": 0.4737311890258225, "grad_norm": 0.7966906338193845, "learning_rate": 5.665996271549991e-06, "loss": 0.4179, "step": 7563 }, { "epoch": 0.4737938270251648, "grad_norm": 0.8056807611323871, "learning_rate": 5.664990898213076e-06, "loss": 0.385, "step": 7564 }, { "epoch": 0.4738564650245071, "grad_norm": 0.8310330384940774, "learning_rate": 5.663985497505011e-06, "loss": 0.411, "step": 7565 }, { "epoch": 0.47391910302384943, "grad_norm": 0.8909645971261946, "learning_rate": 5.66298006946718e-06, "loss": 0.4009, "step": 7566 }, { "epoch": 0.47398174102319174, "grad_norm": 0.8633020684638129, "learning_rate": 5.661974614140965e-06, "loss": 0.4062, "step": 7567 }, { "epoch": 0.47404437902253405, "grad_norm": 0.8225520794666752, "learning_rate": 5.660969131567753e-06, "loss": 0.427, "step": 7568 }, { "epoch": 0.4741070170218763, "grad_norm": 0.8741019606697472, "learning_rate": 5.659963621788928e-06, "loss": 0.373, "step": 7569 }, { "epoch": 0.4741696550212186, "grad_norm": 0.8318253372718167, "learning_rate": 5.658958084845879e-06, "loss": 0.3885, "step": 7570 }, { "epoch": 0.4742322930205609, "grad_norm": 0.8097280715198402, "learning_rate": 5.657952520779992e-06, "loss": 0.4008, "step": 7571 }, { "epoch": 0.4742949310199032, "grad_norm": 0.7737064245607224, "learning_rate": 5.656946929632658e-06, "loss": 0.3795, "step": 7572 }, { "epoch": 0.47435756901924553, "grad_norm": 0.7882066462921429, "learning_rate": 5.6559413114452665e-06, "loss": 0.3554, "step": 7573 }, { "epoch": 0.47442020701858784, "grad_norm": 0.8517283428448745, "learning_rate": 5.654935666259209e-06, "loss": 0.3547, "step": 7574 }, { "epoch": 0.47448284501793014, "grad_norm": 0.8158843311108529, "learning_rate": 5.653929994115875e-06, "loss": 0.3878, "step": 7575 }, { "epoch": 0.47454548301727245, "grad_norm": 0.7894752226970219, "learning_rate": 5.652924295056664e-06, "loss": 0.4098, "step": 7576 }, { "epoch": 0.4746081210166147, "grad_norm": 0.827304195595095, "learning_rate": 5.651918569122967e-06, "loss": 0.4061, "step": 7577 }, { "epoch": 0.474670759015957, "grad_norm": 0.7659275162248118, "learning_rate": 5.650912816356182e-06, "loss": 0.3759, "step": 7578 }, { "epoch": 0.4747333970152993, "grad_norm": 0.8697249878426011, "learning_rate": 5.649907036797703e-06, "loss": 0.4255, "step": 7579 }, { "epoch": 0.4747960350146416, "grad_norm": 0.9329661310398205, "learning_rate": 5.648901230488929e-06, "loss": 0.4258, "step": 7580 }, { "epoch": 0.47485867301398393, "grad_norm": 0.9028582736129028, "learning_rate": 5.647895397471263e-06, "loss": 0.4095, "step": 7581 }, { "epoch": 0.47492131101332624, "grad_norm": 0.9338415967227579, "learning_rate": 5.6468895377861e-06, "loss": 0.3811, "step": 7582 }, { "epoch": 0.47498394901266855, "grad_norm": 0.7341528663687866, "learning_rate": 5.645883651474844e-06, "loss": 0.3691, "step": 7583 }, { "epoch": 0.47504658701201086, "grad_norm": 0.7851634215338611, "learning_rate": 5.644877738578896e-06, "loss": 0.3787, "step": 7584 }, { "epoch": 0.47510922501135316, "grad_norm": 0.8287646498025428, "learning_rate": 5.6438717991396615e-06, "loss": 0.3841, "step": 7585 }, { "epoch": 0.4751718630106954, "grad_norm": 0.8303330026498296, "learning_rate": 5.642865833198545e-06, "loss": 0.3987, "step": 7586 }, { "epoch": 0.4752345010100377, "grad_norm": 0.8264376203526039, "learning_rate": 5.641859840796951e-06, "loss": 0.3959, "step": 7587 }, { "epoch": 0.47529713900938003, "grad_norm": 0.820658604007337, "learning_rate": 5.6408538219762865e-06, "loss": 0.4103, "step": 7588 }, { "epoch": 0.47535977700872234, "grad_norm": 0.8246053308880228, "learning_rate": 5.639847776777958e-06, "loss": 0.416, "step": 7589 }, { "epoch": 0.47542241500806465, "grad_norm": 0.9077343726790641, "learning_rate": 5.638841705243379e-06, "loss": 0.4341, "step": 7590 }, { "epoch": 0.47548505300740695, "grad_norm": 0.9027597957524827, "learning_rate": 5.637835607413957e-06, "loss": 0.4344, "step": 7591 }, { "epoch": 0.47554769100674926, "grad_norm": 0.7971165239489943, "learning_rate": 5.636829483331101e-06, "loss": 0.4118, "step": 7592 }, { "epoch": 0.47561032900609157, "grad_norm": 0.843950122759513, "learning_rate": 5.635823333036227e-06, "loss": 0.4015, "step": 7593 }, { "epoch": 0.4756729670054338, "grad_norm": 0.8106151324164289, "learning_rate": 5.634817156570744e-06, "loss": 0.4323, "step": 7594 }, { "epoch": 0.47573560500477613, "grad_norm": 0.8121566911009126, "learning_rate": 5.633810953976072e-06, "loss": 0.448, "step": 7595 }, { "epoch": 0.47579824300411844, "grad_norm": 0.7802051915915843, "learning_rate": 5.632804725293621e-06, "loss": 0.4131, "step": 7596 }, { "epoch": 0.47586088100346075, "grad_norm": 0.7803232632276558, "learning_rate": 5.631798470564812e-06, "loss": 0.3679, "step": 7597 }, { "epoch": 0.47592351900280305, "grad_norm": 0.662027205524899, "learning_rate": 5.6307921898310595e-06, "loss": 0.4489, "step": 7598 }, { "epoch": 0.47598615700214536, "grad_norm": 0.803487267323282, "learning_rate": 5.629785883133785e-06, "loss": 0.4448, "step": 7599 }, { "epoch": 0.47604879500148767, "grad_norm": 0.8346573053176894, "learning_rate": 5.628779550514405e-06, "loss": 0.4034, "step": 7600 }, { "epoch": 0.47611143300083, "grad_norm": 0.8397650014527799, "learning_rate": 5.627773192014342e-06, "loss": 0.43, "step": 7601 }, { "epoch": 0.4761740710001722, "grad_norm": 0.8917604172752833, "learning_rate": 5.62676680767502e-06, "loss": 0.4379, "step": 7602 }, { "epoch": 0.47623670899951454, "grad_norm": 0.8582677359309394, "learning_rate": 5.625760397537857e-06, "loss": 0.4362, "step": 7603 }, { "epoch": 0.47629934699885684, "grad_norm": 0.8195025045401498, "learning_rate": 5.624753961644281e-06, "loss": 0.4306, "step": 7604 }, { "epoch": 0.47636198499819915, "grad_norm": 0.8505395670403835, "learning_rate": 5.623747500035717e-06, "loss": 0.4041, "step": 7605 }, { "epoch": 0.47642462299754146, "grad_norm": 0.8809195864917726, "learning_rate": 5.622741012753589e-06, "loss": 0.441, "step": 7606 }, { "epoch": 0.47648726099688377, "grad_norm": 0.7837980425967587, "learning_rate": 5.621734499839326e-06, "loss": 0.3687, "step": 7607 }, { "epoch": 0.4765498989962261, "grad_norm": 0.8453981632239935, "learning_rate": 5.620727961334355e-06, "loss": 0.4298, "step": 7608 }, { "epoch": 0.4766125369955684, "grad_norm": 0.9135642107234784, "learning_rate": 5.619721397280105e-06, "loss": 0.3978, "step": 7609 }, { "epoch": 0.4766751749949107, "grad_norm": 0.8405069345576077, "learning_rate": 5.618714807718009e-06, "loss": 0.4375, "step": 7610 }, { "epoch": 0.47673781299425294, "grad_norm": 0.8470291899186385, "learning_rate": 5.617708192689496e-06, "loss": 0.4042, "step": 7611 }, { "epoch": 0.47680045099359525, "grad_norm": 0.8029504471322804, "learning_rate": 5.616701552235999e-06, "loss": 0.4184, "step": 7612 }, { "epoch": 0.47686308899293756, "grad_norm": 0.7986567242127149, "learning_rate": 5.615694886398952e-06, "loss": 0.4182, "step": 7613 }, { "epoch": 0.47692572699227986, "grad_norm": 0.8107209264230264, "learning_rate": 5.614688195219788e-06, "loss": 0.3712, "step": 7614 }, { "epoch": 0.47698836499162217, "grad_norm": 0.8407405825590355, "learning_rate": 5.613681478739946e-06, "loss": 0.4401, "step": 7615 }, { "epoch": 0.4770510029909645, "grad_norm": 0.8835744396932966, "learning_rate": 5.6126747370008595e-06, "loss": 0.4396, "step": 7616 }, { "epoch": 0.4771136409903068, "grad_norm": 0.7991894387459769, "learning_rate": 5.6116679700439666e-06, "loss": 0.3913, "step": 7617 }, { "epoch": 0.4771762789896491, "grad_norm": 0.8631808060806841, "learning_rate": 5.610661177910706e-06, "loss": 0.3904, "step": 7618 }, { "epoch": 0.47723891698899135, "grad_norm": 0.8245094001659242, "learning_rate": 5.609654360642519e-06, "loss": 0.3914, "step": 7619 }, { "epoch": 0.47730155498833365, "grad_norm": 0.8890889612164937, "learning_rate": 5.608647518280845e-06, "loss": 0.4112, "step": 7620 }, { "epoch": 0.47736419298767596, "grad_norm": 0.8342983983923028, "learning_rate": 5.6076406508671264e-06, "loss": 0.4351, "step": 7621 }, { "epoch": 0.47742683098701827, "grad_norm": 0.7801816803049687, "learning_rate": 5.606633758442806e-06, "loss": 0.35, "step": 7622 }, { "epoch": 0.4774894689863606, "grad_norm": 0.8357731452072638, "learning_rate": 5.6056268410493266e-06, "loss": 0.4387, "step": 7623 }, { "epoch": 0.4775521069857029, "grad_norm": 0.8030100277080913, "learning_rate": 5.604619898728135e-06, "loss": 0.3992, "step": 7624 }, { "epoch": 0.4776147449850452, "grad_norm": 0.8469189887353809, "learning_rate": 5.603612931520677e-06, "loss": 0.4347, "step": 7625 }, { "epoch": 0.4776773829843875, "grad_norm": 0.8067143753730599, "learning_rate": 5.6026059394683965e-06, "loss": 0.3873, "step": 7626 }, { "epoch": 0.4777400209837298, "grad_norm": 0.8098281192212632, "learning_rate": 5.6015989226127445e-06, "loss": 0.4084, "step": 7627 }, { "epoch": 0.47780265898307206, "grad_norm": 0.8378749366191879, "learning_rate": 5.60059188099517e-06, "loss": 0.3934, "step": 7628 }, { "epoch": 0.47786529698241437, "grad_norm": 0.642917724338331, "learning_rate": 5.599584814657122e-06, "loss": 0.4817, "step": 7629 }, { "epoch": 0.4779279349817567, "grad_norm": 0.8116202767657149, "learning_rate": 5.598577723640049e-06, "loss": 0.4205, "step": 7630 }, { "epoch": 0.477990572981099, "grad_norm": 0.8951455399036353, "learning_rate": 5.597570607985409e-06, "loss": 0.3894, "step": 7631 }, { "epoch": 0.4780532109804413, "grad_norm": 0.83164507103376, "learning_rate": 5.59656346773465e-06, "loss": 0.404, "step": 7632 }, { "epoch": 0.4781158489797836, "grad_norm": 0.8185849349621243, "learning_rate": 5.59555630292923e-06, "loss": 0.396, "step": 7633 }, { "epoch": 0.4781784869791259, "grad_norm": 0.794426227250999, "learning_rate": 5.5945491136106e-06, "loss": 0.4148, "step": 7634 }, { "epoch": 0.4782411249784682, "grad_norm": 0.8938009813760087, "learning_rate": 5.593541899820219e-06, "loss": 0.421, "step": 7635 }, { "epoch": 0.47830376297781046, "grad_norm": 0.837935971043074, "learning_rate": 5.592534661599543e-06, "loss": 0.4207, "step": 7636 }, { "epoch": 0.4783664009771528, "grad_norm": 0.8638912020747327, "learning_rate": 5.591527398990032e-06, "loss": 0.4657, "step": 7637 }, { "epoch": 0.4784290389764951, "grad_norm": 0.7923710054943357, "learning_rate": 5.590520112033142e-06, "loss": 0.3879, "step": 7638 }, { "epoch": 0.4784916769758374, "grad_norm": 0.8916034751061275, "learning_rate": 5.589512800770334e-06, "loss": 0.4631, "step": 7639 }, { "epoch": 0.4785543149751797, "grad_norm": 0.8012527663750874, "learning_rate": 5.58850546524307e-06, "loss": 0.3733, "step": 7640 }, { "epoch": 0.478616952974522, "grad_norm": 0.8182043797910192, "learning_rate": 5.587498105492813e-06, "loss": 0.3911, "step": 7641 }, { "epoch": 0.4786795909738643, "grad_norm": 0.8119061202460586, "learning_rate": 5.586490721561024e-06, "loss": 0.4035, "step": 7642 }, { "epoch": 0.4787422289732066, "grad_norm": 0.7685132372805785, "learning_rate": 5.585483313489168e-06, "loss": 0.3705, "step": 7643 }, { "epoch": 0.47880486697254887, "grad_norm": 0.7803341894600284, "learning_rate": 5.584475881318709e-06, "loss": 0.3803, "step": 7644 }, { "epoch": 0.4788675049718912, "grad_norm": 0.7899273171362987, "learning_rate": 5.583468425091116e-06, "loss": 0.3944, "step": 7645 }, { "epoch": 0.4789301429712335, "grad_norm": 0.805471654194722, "learning_rate": 5.582460944847855e-06, "loss": 0.4034, "step": 7646 }, { "epoch": 0.4789927809705758, "grad_norm": 0.8766828160956219, "learning_rate": 5.581453440630392e-06, "loss": 0.4074, "step": 7647 }, { "epoch": 0.4790554189699181, "grad_norm": 0.7866047499689405, "learning_rate": 5.580445912480198e-06, "loss": 0.3717, "step": 7648 }, { "epoch": 0.4791180569692604, "grad_norm": 0.8433331624788399, "learning_rate": 5.579438360438742e-06, "loss": 0.405, "step": 7649 }, { "epoch": 0.4791806949686027, "grad_norm": 0.7931284466553121, "learning_rate": 5.578430784547496e-06, "loss": 0.3847, "step": 7650 }, { "epoch": 0.479243332967945, "grad_norm": 0.7784468269157583, "learning_rate": 5.577423184847932e-06, "loss": 0.4053, "step": 7651 }, { "epoch": 0.47930597096728733, "grad_norm": 0.8703266302868496, "learning_rate": 5.576415561381521e-06, "loss": 0.4004, "step": 7652 }, { "epoch": 0.4793686089666296, "grad_norm": 0.8173945588450079, "learning_rate": 5.57540791418974e-06, "loss": 0.3775, "step": 7653 }, { "epoch": 0.4794312469659719, "grad_norm": 0.9354349636145959, "learning_rate": 5.574400243314061e-06, "loss": 0.3936, "step": 7654 }, { "epoch": 0.4794938849653142, "grad_norm": 0.8234484683406883, "learning_rate": 5.573392548795963e-06, "loss": 0.4031, "step": 7655 }, { "epoch": 0.4795565229646565, "grad_norm": 0.8264852295127677, "learning_rate": 5.572384830676919e-06, "loss": 0.4072, "step": 7656 }, { "epoch": 0.4796191609639988, "grad_norm": 0.8801452114068201, "learning_rate": 5.571377088998412e-06, "loss": 0.4512, "step": 7657 }, { "epoch": 0.4796817989633411, "grad_norm": 0.8609611121744891, "learning_rate": 5.570369323801916e-06, "loss": 0.4317, "step": 7658 }, { "epoch": 0.47974443696268343, "grad_norm": 0.7926132259919715, "learning_rate": 5.569361535128913e-06, "loss": 0.3932, "step": 7659 }, { "epoch": 0.47980707496202574, "grad_norm": 0.7600893785937515, "learning_rate": 5.568353723020884e-06, "loss": 0.3454, "step": 7660 }, { "epoch": 0.479869712961368, "grad_norm": 0.8108845619341065, "learning_rate": 5.56734588751931e-06, "loss": 0.4065, "step": 7661 }, { "epoch": 0.4799323509607103, "grad_norm": 0.8608520300404868, "learning_rate": 5.5663380286656746e-06, "loss": 0.4613, "step": 7662 }, { "epoch": 0.4799949889600526, "grad_norm": 0.6601608866053466, "learning_rate": 5.565330146501459e-06, "loss": 0.4891, "step": 7663 }, { "epoch": 0.4800576269593949, "grad_norm": 0.7878088645701828, "learning_rate": 5.564322241068152e-06, "loss": 0.4144, "step": 7664 }, { "epoch": 0.4801202649587372, "grad_norm": 0.8105628721679198, "learning_rate": 5.563314312407236e-06, "loss": 0.4196, "step": 7665 }, { "epoch": 0.4801829029580795, "grad_norm": 0.848317468588712, "learning_rate": 5.562306360560198e-06, "loss": 0.3931, "step": 7666 }, { "epoch": 0.48024554095742183, "grad_norm": 0.8420553341128426, "learning_rate": 5.561298385568525e-06, "loss": 0.3957, "step": 7667 }, { "epoch": 0.48030817895676414, "grad_norm": 0.8198588701302505, "learning_rate": 5.560290387473708e-06, "loss": 0.3694, "step": 7668 }, { "epoch": 0.4803708169561064, "grad_norm": 0.7551902715437862, "learning_rate": 5.559282366317233e-06, "loss": 0.3527, "step": 7669 }, { "epoch": 0.4804334549554487, "grad_norm": 0.9122172897805222, "learning_rate": 5.5582743221405935e-06, "loss": 0.3966, "step": 7670 }, { "epoch": 0.480496092954791, "grad_norm": 0.88421890411236, "learning_rate": 5.55726625498528e-06, "loss": 0.4781, "step": 7671 }, { "epoch": 0.4805587309541333, "grad_norm": 0.9186521590835878, "learning_rate": 5.556258164892782e-06, "loss": 0.4359, "step": 7672 }, { "epoch": 0.4806213689534756, "grad_norm": 0.8618144556184048, "learning_rate": 5.555250051904594e-06, "loss": 0.387, "step": 7673 }, { "epoch": 0.48068400695281793, "grad_norm": 0.8073749255586788, "learning_rate": 5.554241916062213e-06, "loss": 0.3922, "step": 7674 }, { "epoch": 0.48074664495216024, "grad_norm": 0.8540921389679489, "learning_rate": 5.5532337574071315e-06, "loss": 0.4064, "step": 7675 }, { "epoch": 0.48080928295150255, "grad_norm": 0.7812973092311535, "learning_rate": 5.552225575980846e-06, "loss": 0.3763, "step": 7676 }, { "epoch": 0.48087192095084486, "grad_norm": 0.8425771607194925, "learning_rate": 5.551217371824852e-06, "loss": 0.4055, "step": 7677 }, { "epoch": 0.4809345589501871, "grad_norm": 0.8051373973061652, "learning_rate": 5.550209144980648e-06, "loss": 0.3755, "step": 7678 }, { "epoch": 0.4809971969495294, "grad_norm": 0.8709336728191635, "learning_rate": 5.5492008954897355e-06, "loss": 0.4167, "step": 7679 }, { "epoch": 0.4810598349488717, "grad_norm": 0.8765766789929856, "learning_rate": 5.5481926233936125e-06, "loss": 0.413, "step": 7680 }, { "epoch": 0.48112247294821403, "grad_norm": 0.8581232946522142, "learning_rate": 5.547184328733777e-06, "loss": 0.4002, "step": 7681 }, { "epoch": 0.48118511094755634, "grad_norm": 0.8719221086716762, "learning_rate": 5.546176011551733e-06, "loss": 0.4029, "step": 7682 }, { "epoch": 0.48124774894689865, "grad_norm": 0.7793072006804277, "learning_rate": 5.545167671888986e-06, "loss": 0.4353, "step": 7683 }, { "epoch": 0.48131038694624095, "grad_norm": 0.799857668761409, "learning_rate": 5.544159309787035e-06, "loss": 0.409, "step": 7684 }, { "epoch": 0.48137302494558326, "grad_norm": 0.845873913573082, "learning_rate": 5.543150925287386e-06, "loss": 0.4064, "step": 7685 }, { "epoch": 0.4814356629449255, "grad_norm": 0.7557915546130286, "learning_rate": 5.542142518431542e-06, "loss": 0.4091, "step": 7686 }, { "epoch": 0.4814983009442678, "grad_norm": 0.9184223916243539, "learning_rate": 5.541134089261013e-06, "loss": 0.4273, "step": 7687 }, { "epoch": 0.48156093894361013, "grad_norm": 0.8513861884386417, "learning_rate": 5.540125637817305e-06, "loss": 0.403, "step": 7688 }, { "epoch": 0.48162357694295244, "grad_norm": 0.6880930740303207, "learning_rate": 5.539117164141926e-06, "loss": 0.4452, "step": 7689 }, { "epoch": 0.48168621494229474, "grad_norm": 0.8254573122516012, "learning_rate": 5.538108668276383e-06, "loss": 0.4151, "step": 7690 }, { "epoch": 0.48174885294163705, "grad_norm": 0.7797556118799718, "learning_rate": 5.537100150262187e-06, "loss": 0.3848, "step": 7691 }, { "epoch": 0.48181149094097936, "grad_norm": 0.8967925255214368, "learning_rate": 5.53609161014085e-06, "loss": 0.3948, "step": 7692 }, { "epoch": 0.48187412894032167, "grad_norm": 0.8640583671431938, "learning_rate": 5.535083047953883e-06, "loss": 0.4092, "step": 7693 }, { "epoch": 0.481936766939664, "grad_norm": 0.7967631063589129, "learning_rate": 5.534074463742798e-06, "loss": 0.3866, "step": 7694 }, { "epoch": 0.4819994049390062, "grad_norm": 0.7641956087474686, "learning_rate": 5.533065857549108e-06, "loss": 0.367, "step": 7695 }, { "epoch": 0.48206204293834853, "grad_norm": 0.8963485574446745, "learning_rate": 5.53205722941433e-06, "loss": 0.3857, "step": 7696 }, { "epoch": 0.48212468093769084, "grad_norm": 0.944580011173828, "learning_rate": 5.5310485793799775e-06, "loss": 0.4253, "step": 7697 }, { "epoch": 0.48218731893703315, "grad_norm": 0.8510550187697165, "learning_rate": 5.530039907487568e-06, "loss": 0.475, "step": 7698 }, { "epoch": 0.48224995693637546, "grad_norm": 0.8616697217759945, "learning_rate": 5.529031213778615e-06, "loss": 0.4292, "step": 7699 }, { "epoch": 0.48231259493571776, "grad_norm": 0.7800443297089573, "learning_rate": 5.528022498294638e-06, "loss": 0.3888, "step": 7700 }, { "epoch": 0.48237523293506007, "grad_norm": 0.925188246454495, "learning_rate": 5.5270137610771605e-06, "loss": 0.4037, "step": 7701 }, { "epoch": 0.4824378709344024, "grad_norm": 0.8493890534670453, "learning_rate": 5.526005002167698e-06, "loss": 0.4391, "step": 7702 }, { "epoch": 0.48250050893374463, "grad_norm": 0.8728487454107166, "learning_rate": 5.52499622160777e-06, "loss": 0.4425, "step": 7703 }, { "epoch": 0.48256314693308694, "grad_norm": 0.8977319763805593, "learning_rate": 5.523987419438901e-06, "loss": 0.4259, "step": 7704 }, { "epoch": 0.48262578493242925, "grad_norm": 0.8801264628662789, "learning_rate": 5.522978595702614e-06, "loss": 0.43, "step": 7705 }, { "epoch": 0.48268842293177155, "grad_norm": 0.8819300031497571, "learning_rate": 5.5219697504404304e-06, "loss": 0.4026, "step": 7706 }, { "epoch": 0.48275106093111386, "grad_norm": 0.8044110553904317, "learning_rate": 5.520960883693873e-06, "loss": 0.3628, "step": 7707 }, { "epoch": 0.48281369893045617, "grad_norm": 0.8203354906400597, "learning_rate": 5.519951995504471e-06, "loss": 0.3608, "step": 7708 }, { "epoch": 0.4828763369297985, "grad_norm": 0.8516882801639779, "learning_rate": 5.518943085913747e-06, "loss": 0.4562, "step": 7709 }, { "epoch": 0.4829389749291408, "grad_norm": 0.929736840169864, "learning_rate": 5.517934154963232e-06, "loss": 0.4246, "step": 7710 }, { "epoch": 0.48300161292848304, "grad_norm": 0.7410728098883129, "learning_rate": 5.516925202694449e-06, "loss": 0.4502, "step": 7711 }, { "epoch": 0.48306425092782534, "grad_norm": 0.8116612019132464, "learning_rate": 5.515916229148927e-06, "loss": 0.4158, "step": 7712 }, { "epoch": 0.48312688892716765, "grad_norm": 0.8226922346445023, "learning_rate": 5.5149072343681986e-06, "loss": 0.3239, "step": 7713 }, { "epoch": 0.48318952692650996, "grad_norm": 0.7233908623822075, "learning_rate": 5.513898218393794e-06, "loss": 0.459, "step": 7714 }, { "epoch": 0.48325216492585227, "grad_norm": 0.7572135732354839, "learning_rate": 5.512889181267244e-06, "loss": 0.3749, "step": 7715 }, { "epoch": 0.4833148029251946, "grad_norm": 0.7930765173249704, "learning_rate": 5.511880123030078e-06, "loss": 0.3975, "step": 7716 }, { "epoch": 0.4833774409245369, "grad_norm": 0.8575700323284117, "learning_rate": 5.510871043723832e-06, "loss": 0.3986, "step": 7717 }, { "epoch": 0.4834400789238792, "grad_norm": 0.8368980199072655, "learning_rate": 5.509861943390038e-06, "loss": 0.4311, "step": 7718 }, { "epoch": 0.4835027169232215, "grad_norm": 0.8645323577754473, "learning_rate": 5.5088528220702335e-06, "loss": 0.4227, "step": 7719 }, { "epoch": 0.48356535492256375, "grad_norm": 0.7938727948552088, "learning_rate": 5.507843679805951e-06, "loss": 0.4053, "step": 7720 }, { "epoch": 0.48362799292190606, "grad_norm": 0.7922668101238655, "learning_rate": 5.5068345166387295e-06, "loss": 0.3725, "step": 7721 }, { "epoch": 0.48369063092124837, "grad_norm": 0.8000415176334377, "learning_rate": 5.505825332610104e-06, "loss": 0.3171, "step": 7722 }, { "epoch": 0.4837532689205907, "grad_norm": 0.7884556034731384, "learning_rate": 5.504816127761614e-06, "loss": 0.415, "step": 7723 }, { "epoch": 0.483815906919933, "grad_norm": 0.826169202592857, "learning_rate": 5.5038069021348e-06, "loss": 0.4258, "step": 7724 }, { "epoch": 0.4838785449192753, "grad_norm": 0.7995974515618545, "learning_rate": 5.5027976557711995e-06, "loss": 0.4181, "step": 7725 }, { "epoch": 0.4839411829186176, "grad_norm": 0.9505464077250152, "learning_rate": 5.501788388712353e-06, "loss": 0.4336, "step": 7726 }, { "epoch": 0.4840038209179599, "grad_norm": 0.8111076149969676, "learning_rate": 5.5007791009998046e-06, "loss": 0.386, "step": 7727 }, { "epoch": 0.48406645891730216, "grad_norm": 0.6387611720516625, "learning_rate": 5.499769792675096e-06, "loss": 0.4629, "step": 7728 }, { "epoch": 0.48412909691664446, "grad_norm": 0.9155316611749938, "learning_rate": 5.498760463779768e-06, "loss": 0.4209, "step": 7729 }, { "epoch": 0.48419173491598677, "grad_norm": 0.9669332673877128, "learning_rate": 5.497751114355369e-06, "loss": 0.4337, "step": 7730 }, { "epoch": 0.4842543729153291, "grad_norm": 0.9002353614593587, "learning_rate": 5.496741744443441e-06, "loss": 0.4395, "step": 7731 }, { "epoch": 0.4843170109146714, "grad_norm": 0.848119200754335, "learning_rate": 5.49573235408553e-06, "loss": 0.4471, "step": 7732 }, { "epoch": 0.4843796489140137, "grad_norm": 0.7767910106313825, "learning_rate": 5.494722943323183e-06, "loss": 0.3874, "step": 7733 }, { "epoch": 0.484442286913356, "grad_norm": 0.8015401777566884, "learning_rate": 5.493713512197949e-06, "loss": 0.3932, "step": 7734 }, { "epoch": 0.4845049249126983, "grad_norm": 0.7985747478342451, "learning_rate": 5.492704060751375e-06, "loss": 0.3811, "step": 7735 }, { "epoch": 0.4845675629120406, "grad_norm": 0.8282192876274803, "learning_rate": 5.4916945890250095e-06, "loss": 0.4375, "step": 7736 }, { "epoch": 0.48463020091138287, "grad_norm": 0.8635752072157362, "learning_rate": 5.490685097060404e-06, "loss": 0.3957, "step": 7737 }, { "epoch": 0.4846928389107252, "grad_norm": 0.9171520712613376, "learning_rate": 5.489675584899108e-06, "loss": 0.4138, "step": 7738 }, { "epoch": 0.4847554769100675, "grad_norm": 0.806206787776343, "learning_rate": 5.488666052582675e-06, "loss": 0.3979, "step": 7739 }, { "epoch": 0.4848181149094098, "grad_norm": 0.7829792049090019, "learning_rate": 5.4876565001526575e-06, "loss": 0.388, "step": 7740 }, { "epoch": 0.4848807529087521, "grad_norm": 0.7901590470444992, "learning_rate": 5.486646927650607e-06, "loss": 0.3725, "step": 7741 }, { "epoch": 0.4849433909080944, "grad_norm": 0.8124632410744235, "learning_rate": 5.485637335118077e-06, "loss": 0.4118, "step": 7742 }, { "epoch": 0.4850060289074367, "grad_norm": 0.9216434067110852, "learning_rate": 5.484627722596626e-06, "loss": 0.4141, "step": 7743 }, { "epoch": 0.485068666906779, "grad_norm": 0.8210086533155874, "learning_rate": 5.483618090127808e-06, "loss": 0.4338, "step": 7744 }, { "epoch": 0.4851313049061213, "grad_norm": 0.8900510932681304, "learning_rate": 5.48260843775318e-06, "loss": 0.4374, "step": 7745 }, { "epoch": 0.4851939429054636, "grad_norm": 0.8188941972595953, "learning_rate": 5.481598765514296e-06, "loss": 0.391, "step": 7746 }, { "epoch": 0.4852565809048059, "grad_norm": 0.819450124414884, "learning_rate": 5.4805890734527215e-06, "loss": 0.3967, "step": 7747 }, { "epoch": 0.4853192189041482, "grad_norm": 0.761281374153849, "learning_rate": 5.47957936161001e-06, "loss": 0.3873, "step": 7748 }, { "epoch": 0.4853818569034905, "grad_norm": 0.8207052763321432, "learning_rate": 5.478569630027724e-06, "loss": 0.4063, "step": 7749 }, { "epoch": 0.4854444949028328, "grad_norm": 0.8332657963462927, "learning_rate": 5.4775598787474196e-06, "loss": 0.411, "step": 7750 }, { "epoch": 0.4855071329021751, "grad_norm": 0.8215794893129449, "learning_rate": 5.476550107810665e-06, "loss": 0.3675, "step": 7751 }, { "epoch": 0.48556977090151743, "grad_norm": 0.8948148320049125, "learning_rate": 5.475540317259019e-06, "loss": 0.4162, "step": 7752 }, { "epoch": 0.4856324089008597, "grad_norm": 0.852292355224699, "learning_rate": 5.474530507134046e-06, "loss": 0.4315, "step": 7753 }, { "epoch": 0.485695046900202, "grad_norm": 0.858031268275807, "learning_rate": 5.473520677477308e-06, "loss": 0.396, "step": 7754 }, { "epoch": 0.4857576848995443, "grad_norm": 0.832032964246917, "learning_rate": 5.47251082833037e-06, "loss": 0.4229, "step": 7755 }, { "epoch": 0.4858203228988866, "grad_norm": 0.8797715616465962, "learning_rate": 5.471500959734801e-06, "loss": 0.3944, "step": 7756 }, { "epoch": 0.4858829608982289, "grad_norm": 0.8386398442424283, "learning_rate": 5.470491071732165e-06, "loss": 0.3854, "step": 7757 }, { "epoch": 0.4859455988975712, "grad_norm": 0.8751952253108982, "learning_rate": 5.469481164364027e-06, "loss": 0.3875, "step": 7758 }, { "epoch": 0.4860082368969135, "grad_norm": 0.8391006895200483, "learning_rate": 5.468471237671958e-06, "loss": 0.3737, "step": 7759 }, { "epoch": 0.48607087489625583, "grad_norm": 0.809911668904557, "learning_rate": 5.467461291697527e-06, "loss": 0.4191, "step": 7760 }, { "epoch": 0.48613351289559814, "grad_norm": 0.8073691518885828, "learning_rate": 5.466451326482301e-06, "loss": 0.3667, "step": 7761 }, { "epoch": 0.4861961508949404, "grad_norm": 0.9416004402721146, "learning_rate": 5.465441342067855e-06, "loss": 0.3958, "step": 7762 }, { "epoch": 0.4862587888942827, "grad_norm": 0.8245721100778752, "learning_rate": 5.464431338495753e-06, "loss": 0.4061, "step": 7763 }, { "epoch": 0.486321426893625, "grad_norm": 0.822583196873881, "learning_rate": 5.463421315807571e-06, "loss": 0.4426, "step": 7764 }, { "epoch": 0.4863840648929673, "grad_norm": 0.8181857902093336, "learning_rate": 5.462411274044884e-06, "loss": 0.4029, "step": 7765 }, { "epoch": 0.4864467028923096, "grad_norm": 0.8275071897583343, "learning_rate": 5.461401213249262e-06, "loss": 0.3932, "step": 7766 }, { "epoch": 0.48650934089165193, "grad_norm": 0.7581498959237205, "learning_rate": 5.46039113346228e-06, "loss": 0.4143, "step": 7767 }, { "epoch": 0.48657197889099424, "grad_norm": 0.9057280425237672, "learning_rate": 5.459381034725515e-06, "loss": 0.4615, "step": 7768 }, { "epoch": 0.48663461689033655, "grad_norm": 0.810523483213158, "learning_rate": 5.45837091708054e-06, "loss": 0.4044, "step": 7769 }, { "epoch": 0.4866972548896788, "grad_norm": 0.8346812716837084, "learning_rate": 5.457360780568934e-06, "loss": 0.4225, "step": 7770 }, { "epoch": 0.4867598928890211, "grad_norm": 0.8716370954635201, "learning_rate": 5.456350625232272e-06, "loss": 0.4125, "step": 7771 }, { "epoch": 0.4868225308883634, "grad_norm": 0.8953397982350769, "learning_rate": 5.455340451112134e-06, "loss": 0.4444, "step": 7772 }, { "epoch": 0.4868851688877057, "grad_norm": 0.8464681562568621, "learning_rate": 5.454330258250099e-06, "loss": 0.4378, "step": 7773 }, { "epoch": 0.48694780688704803, "grad_norm": 0.7974384959435178, "learning_rate": 5.453320046687747e-06, "loss": 0.3609, "step": 7774 }, { "epoch": 0.48701044488639034, "grad_norm": 0.7497932199572223, "learning_rate": 5.452309816466656e-06, "loss": 0.3558, "step": 7775 }, { "epoch": 0.48707308288573264, "grad_norm": 0.8303597583268895, "learning_rate": 5.45129956762841e-06, "loss": 0.4092, "step": 7776 }, { "epoch": 0.48713572088507495, "grad_norm": 0.8162001423251961, "learning_rate": 5.450289300214591e-06, "loss": 0.4268, "step": 7777 }, { "epoch": 0.4871983588844172, "grad_norm": 0.8735815911790556, "learning_rate": 5.449279014266778e-06, "loss": 0.386, "step": 7778 }, { "epoch": 0.4872609968837595, "grad_norm": 0.868322446134831, "learning_rate": 5.448268709826561e-06, "loss": 0.4476, "step": 7779 }, { "epoch": 0.4873236348831018, "grad_norm": 0.7916883636962564, "learning_rate": 5.447258386935518e-06, "loss": 0.3809, "step": 7780 }, { "epoch": 0.4873862728824441, "grad_norm": 0.8203766829141648, "learning_rate": 5.446248045635238e-06, "loss": 0.4153, "step": 7781 }, { "epoch": 0.48744891088178643, "grad_norm": 0.9093110519028507, "learning_rate": 5.445237685967304e-06, "loss": 0.4606, "step": 7782 }, { "epoch": 0.48751154888112874, "grad_norm": 0.833587871521891, "learning_rate": 5.444227307973306e-06, "loss": 0.3857, "step": 7783 }, { "epoch": 0.48757418688047105, "grad_norm": 0.8448530531149201, "learning_rate": 5.443216911694829e-06, "loss": 0.4112, "step": 7784 }, { "epoch": 0.48763682487981336, "grad_norm": 0.8793244908239211, "learning_rate": 5.442206497173461e-06, "loss": 0.463, "step": 7785 }, { "epoch": 0.48769946287915567, "grad_norm": 0.8464728666410072, "learning_rate": 5.441196064450793e-06, "loss": 0.3815, "step": 7786 }, { "epoch": 0.4877621008784979, "grad_norm": 0.7776044268184341, "learning_rate": 5.440185613568412e-06, "loss": 0.377, "step": 7787 }, { "epoch": 0.4878247388778402, "grad_norm": 0.8183689268710741, "learning_rate": 5.439175144567909e-06, "loss": 0.4212, "step": 7788 }, { "epoch": 0.48788737687718253, "grad_norm": 0.8160233446564648, "learning_rate": 5.438164657490877e-06, "loss": 0.3949, "step": 7789 }, { "epoch": 0.48795001487652484, "grad_norm": 0.869160858263596, "learning_rate": 5.437154152378905e-06, "loss": 0.4287, "step": 7790 }, { "epoch": 0.48801265287586715, "grad_norm": 0.8800247310376946, "learning_rate": 5.436143629273588e-06, "loss": 0.4354, "step": 7791 }, { "epoch": 0.48807529087520946, "grad_norm": 0.8507683926483326, "learning_rate": 5.435133088216517e-06, "loss": 0.3789, "step": 7792 }, { "epoch": 0.48813792887455176, "grad_norm": 0.8143354900272304, "learning_rate": 5.434122529249287e-06, "loss": 0.3891, "step": 7793 }, { "epoch": 0.48820056687389407, "grad_norm": 0.8595117462281867, "learning_rate": 5.433111952413496e-06, "loss": 0.4012, "step": 7794 }, { "epoch": 0.4882632048732363, "grad_norm": 0.8720254475095656, "learning_rate": 5.432101357750734e-06, "loss": 0.435, "step": 7795 }, { "epoch": 0.48832584287257863, "grad_norm": 0.8291313221331921, "learning_rate": 5.431090745302601e-06, "loss": 0.4101, "step": 7796 }, { "epoch": 0.48838848087192094, "grad_norm": 0.8016292701947417, "learning_rate": 5.430080115110692e-06, "loss": 0.3657, "step": 7797 }, { "epoch": 0.48845111887126325, "grad_norm": 0.839904107659988, "learning_rate": 5.429069467216606e-06, "loss": 0.3815, "step": 7798 }, { "epoch": 0.48851375687060555, "grad_norm": 0.8539687843241052, "learning_rate": 5.4280588016619415e-06, "loss": 0.4132, "step": 7799 }, { "epoch": 0.48857639486994786, "grad_norm": 0.7967949788469144, "learning_rate": 5.4270481184882985e-06, "loss": 0.4254, "step": 7800 }, { "epoch": 0.48863903286929017, "grad_norm": 0.8480529532204412, "learning_rate": 5.4260374177372735e-06, "loss": 0.4064, "step": 7801 }, { "epoch": 0.4887016708686325, "grad_norm": 0.7962109860627781, "learning_rate": 5.42502669945047e-06, "loss": 0.445, "step": 7802 }, { "epoch": 0.4887643088679748, "grad_norm": 0.6114569004983532, "learning_rate": 5.4240159636694886e-06, "loss": 0.4677, "step": 7803 }, { "epoch": 0.48882694686731704, "grad_norm": 0.7863609040109345, "learning_rate": 5.423005210435932e-06, "loss": 0.4233, "step": 7804 }, { "epoch": 0.48888958486665934, "grad_norm": 0.8592966359066769, "learning_rate": 5.421994439791401e-06, "loss": 0.4444, "step": 7805 }, { "epoch": 0.48895222286600165, "grad_norm": 0.7787910218400447, "learning_rate": 5.420983651777501e-06, "loss": 0.3976, "step": 7806 }, { "epoch": 0.48901486086534396, "grad_norm": 0.8596040888314856, "learning_rate": 5.419972846435838e-06, "loss": 0.4009, "step": 7807 }, { "epoch": 0.48907749886468627, "grad_norm": 0.8181128216708677, "learning_rate": 5.418962023808013e-06, "loss": 0.3904, "step": 7808 }, { "epoch": 0.4891401368640286, "grad_norm": 0.871837638984344, "learning_rate": 5.417951183935634e-06, "loss": 0.3967, "step": 7809 }, { "epoch": 0.4892027748633709, "grad_norm": 0.7983377523408712, "learning_rate": 5.416940326860305e-06, "loss": 0.4202, "step": 7810 }, { "epoch": 0.4892654128627132, "grad_norm": 0.8806497239697385, "learning_rate": 5.415929452623636e-06, "loss": 0.4004, "step": 7811 }, { "epoch": 0.48932805086205544, "grad_norm": 0.9073317767383656, "learning_rate": 5.414918561267234e-06, "loss": 0.4517, "step": 7812 }, { "epoch": 0.48939068886139775, "grad_norm": 0.7808474111107123, "learning_rate": 5.413907652832706e-06, "loss": 0.3627, "step": 7813 }, { "epoch": 0.48945332686074006, "grad_norm": 0.8431000876837843, "learning_rate": 5.412896727361663e-06, "loss": 0.4238, "step": 7814 }, { "epoch": 0.48951596486008236, "grad_norm": 0.6622793784676336, "learning_rate": 5.411885784895713e-06, "loss": 0.4635, "step": 7815 }, { "epoch": 0.48957860285942467, "grad_norm": 0.7699498279969255, "learning_rate": 5.4108748254764685e-06, "loss": 0.4347, "step": 7816 }, { "epoch": 0.489641240858767, "grad_norm": 0.7959289560929675, "learning_rate": 5.40986384914554e-06, "loss": 0.3887, "step": 7817 }, { "epoch": 0.4897038788581093, "grad_norm": 0.8665892572790617, "learning_rate": 5.408852855944539e-06, "loss": 0.3556, "step": 7818 }, { "epoch": 0.4897665168574516, "grad_norm": 0.8359095099015971, "learning_rate": 5.407841845915077e-06, "loss": 0.4116, "step": 7819 }, { "epoch": 0.48982915485679385, "grad_norm": 0.8660351534738039, "learning_rate": 5.406830819098771e-06, "loss": 0.4201, "step": 7820 }, { "epoch": 0.48989179285613615, "grad_norm": 0.8247001923524315, "learning_rate": 5.4058197755372334e-06, "loss": 0.4078, "step": 7821 }, { "epoch": 0.48995443085547846, "grad_norm": 0.8799007744994145, "learning_rate": 5.404808715272079e-06, "loss": 0.3797, "step": 7822 }, { "epoch": 0.49001706885482077, "grad_norm": 0.8506934694515301, "learning_rate": 5.403797638344919e-06, "loss": 0.3795, "step": 7823 }, { "epoch": 0.4900797068541631, "grad_norm": 0.8184728155438548, "learning_rate": 5.402786544797376e-06, "loss": 0.3699, "step": 7824 }, { "epoch": 0.4901423448535054, "grad_norm": 0.7681290265944334, "learning_rate": 5.401775434671064e-06, "loss": 0.349, "step": 7825 }, { "epoch": 0.4902049828528477, "grad_norm": 0.8441266430696093, "learning_rate": 5.400764308007601e-06, "loss": 0.4283, "step": 7826 }, { "epoch": 0.49026762085219, "grad_norm": 0.7981436740186872, "learning_rate": 5.399753164848604e-06, "loss": 0.392, "step": 7827 }, { "epoch": 0.4903302588515323, "grad_norm": 0.7990938495785406, "learning_rate": 5.398742005235691e-06, "loss": 0.4194, "step": 7828 }, { "epoch": 0.49039289685087456, "grad_norm": 0.887476411284897, "learning_rate": 5.397730829210484e-06, "loss": 0.4062, "step": 7829 }, { "epoch": 0.49045553485021687, "grad_norm": 0.842788637349625, "learning_rate": 5.396719636814604e-06, "loss": 0.3864, "step": 7830 }, { "epoch": 0.4905181728495592, "grad_norm": 0.8406999911505058, "learning_rate": 5.395708428089669e-06, "loss": 0.3869, "step": 7831 }, { "epoch": 0.4905808108489015, "grad_norm": 0.8557224845774545, "learning_rate": 5.394697203077301e-06, "loss": 0.4229, "step": 7832 }, { "epoch": 0.4906434488482438, "grad_norm": 0.7731646841132203, "learning_rate": 5.393685961819123e-06, "loss": 0.4141, "step": 7833 }, { "epoch": 0.4907060868475861, "grad_norm": 0.8114024183385804, "learning_rate": 5.39267470435676e-06, "loss": 0.3988, "step": 7834 }, { "epoch": 0.4907687248469284, "grad_norm": 0.8523156208942252, "learning_rate": 5.39166343073183e-06, "loss": 0.4244, "step": 7835 }, { "epoch": 0.4908313628462707, "grad_norm": 0.8470661426757569, "learning_rate": 5.390652140985963e-06, "loss": 0.4148, "step": 7836 }, { "epoch": 0.49089400084561297, "grad_norm": 0.8284440078502625, "learning_rate": 5.38964083516078e-06, "loss": 0.3933, "step": 7837 }, { "epoch": 0.4909566388449553, "grad_norm": 0.8739395705425842, "learning_rate": 5.388629513297908e-06, "loss": 0.3703, "step": 7838 }, { "epoch": 0.4910192768442976, "grad_norm": 1.0242507026505416, "learning_rate": 5.387618175438973e-06, "loss": 0.3997, "step": 7839 }, { "epoch": 0.4910819148436399, "grad_norm": 0.6648881210719019, "learning_rate": 5.386606821625602e-06, "loss": 0.4533, "step": 7840 }, { "epoch": 0.4911445528429822, "grad_norm": 0.8129163002880583, "learning_rate": 5.385595451899424e-06, "loss": 0.4157, "step": 7841 }, { "epoch": 0.4912071908423245, "grad_norm": 0.8402643074034954, "learning_rate": 5.3845840663020635e-06, "loss": 0.3892, "step": 7842 }, { "epoch": 0.4912698288416668, "grad_norm": 0.8498948508182753, "learning_rate": 5.3835726648751514e-06, "loss": 0.4158, "step": 7843 }, { "epoch": 0.4913324668410091, "grad_norm": 0.8046864053487216, "learning_rate": 5.382561247660319e-06, "loss": 0.3919, "step": 7844 }, { "epoch": 0.4913951048403514, "grad_norm": 0.8102035442754907, "learning_rate": 5.381549814699194e-06, "loss": 0.3658, "step": 7845 }, { "epoch": 0.4914577428396937, "grad_norm": 0.835587213368401, "learning_rate": 5.380538366033408e-06, "loss": 0.4012, "step": 7846 }, { "epoch": 0.491520380839036, "grad_norm": 0.926219588846347, "learning_rate": 5.37952690170459e-06, "loss": 0.4104, "step": 7847 }, { "epoch": 0.4915830188383783, "grad_norm": 0.8194331266439853, "learning_rate": 5.378515421754377e-06, "loss": 0.3866, "step": 7848 }, { "epoch": 0.4916456568377206, "grad_norm": 0.6835926797526378, "learning_rate": 5.3775039262243975e-06, "loss": 0.4826, "step": 7849 }, { "epoch": 0.4917082948370629, "grad_norm": 0.8571309467996837, "learning_rate": 5.376492415156288e-06, "loss": 0.4293, "step": 7850 }, { "epoch": 0.4917709328364052, "grad_norm": 0.8596236155413312, "learning_rate": 5.3754808885916785e-06, "loss": 0.4357, "step": 7851 }, { "epoch": 0.4918335708357475, "grad_norm": 0.8455601125330918, "learning_rate": 5.3744693465722075e-06, "loss": 0.4046, "step": 7852 }, { "epoch": 0.49189620883508983, "grad_norm": 0.7693286592471997, "learning_rate": 5.3734577891395076e-06, "loss": 0.4149, "step": 7853 }, { "epoch": 0.4919588468344321, "grad_norm": 0.8622862297643794, "learning_rate": 5.372446216335218e-06, "loss": 0.387, "step": 7854 }, { "epoch": 0.4920214848337744, "grad_norm": 0.758349678677896, "learning_rate": 5.371434628200971e-06, "loss": 0.401, "step": 7855 }, { "epoch": 0.4920841228331167, "grad_norm": 0.8849485600509556, "learning_rate": 5.370423024778406e-06, "loss": 0.4083, "step": 7856 }, { "epoch": 0.492146760832459, "grad_norm": 0.8043598312548669, "learning_rate": 5.36941140610916e-06, "loss": 0.3774, "step": 7857 }, { "epoch": 0.4922093988318013, "grad_norm": 0.8220235380744366, "learning_rate": 5.3683997722348735e-06, "loss": 0.3856, "step": 7858 }, { "epoch": 0.4922720368311436, "grad_norm": 0.7969904963749725, "learning_rate": 5.367388123197183e-06, "loss": 0.4149, "step": 7859 }, { "epoch": 0.49233467483048593, "grad_norm": 0.7920737713759081, "learning_rate": 5.366376459037731e-06, "loss": 0.397, "step": 7860 }, { "epoch": 0.49239731282982824, "grad_norm": 0.8114401972699259, "learning_rate": 5.365364779798152e-06, "loss": 0.394, "step": 7861 }, { "epoch": 0.4924599508291705, "grad_norm": 0.761833287214267, "learning_rate": 5.364353085520094e-06, "loss": 0.4012, "step": 7862 }, { "epoch": 0.4925225888285128, "grad_norm": 0.8901351018065041, "learning_rate": 5.363341376245195e-06, "loss": 0.4216, "step": 7863 }, { "epoch": 0.4925852268278551, "grad_norm": 0.7683836309605898, "learning_rate": 5.362329652015098e-06, "loss": 0.3382, "step": 7864 }, { "epoch": 0.4926478648271974, "grad_norm": 0.8053831542438471, "learning_rate": 5.361317912871443e-06, "loss": 0.4203, "step": 7865 }, { "epoch": 0.4927105028265397, "grad_norm": 0.863627552505772, "learning_rate": 5.360306158855877e-06, "loss": 0.3956, "step": 7866 }, { "epoch": 0.49277314082588203, "grad_norm": 0.8685823831818547, "learning_rate": 5.359294390010042e-06, "loss": 0.4121, "step": 7867 }, { "epoch": 0.49283577882522434, "grad_norm": 0.7673873367748734, "learning_rate": 5.358282606375585e-06, "loss": 0.388, "step": 7868 }, { "epoch": 0.49289841682456664, "grad_norm": 1.0311238123950504, "learning_rate": 5.357270807994148e-06, "loss": 0.4243, "step": 7869 }, { "epoch": 0.49296105482390895, "grad_norm": 0.5899100513273636, "learning_rate": 5.356258994907377e-06, "loss": 0.4528, "step": 7870 }, { "epoch": 0.4930236928232512, "grad_norm": 0.7986596359538951, "learning_rate": 5.355247167156923e-06, "loss": 0.3892, "step": 7871 }, { "epoch": 0.4930863308225935, "grad_norm": 0.8978114359982385, "learning_rate": 5.354235324784428e-06, "loss": 0.4201, "step": 7872 }, { "epoch": 0.4931489688219358, "grad_norm": 0.8870222247736032, "learning_rate": 5.353223467831541e-06, "loss": 0.4077, "step": 7873 }, { "epoch": 0.4932116068212781, "grad_norm": 0.835042582701501, "learning_rate": 5.352211596339909e-06, "loss": 0.392, "step": 7874 }, { "epoch": 0.49327424482062043, "grad_norm": 0.8521420935256944, "learning_rate": 5.351199710351185e-06, "loss": 0.4016, "step": 7875 }, { "epoch": 0.49333688281996274, "grad_norm": 0.790092956141432, "learning_rate": 5.350187809907015e-06, "loss": 0.4177, "step": 7876 }, { "epoch": 0.49339952081930505, "grad_norm": 0.8494269544932228, "learning_rate": 5.34917589504905e-06, "loss": 0.4231, "step": 7877 }, { "epoch": 0.49346215881864736, "grad_norm": 0.7821824369536213, "learning_rate": 5.348163965818939e-06, "loss": 0.4001, "step": 7878 }, { "epoch": 0.4935247968179896, "grad_norm": 0.7996456881929658, "learning_rate": 5.347152022258335e-06, "loss": 0.3693, "step": 7879 }, { "epoch": 0.4935874348173319, "grad_norm": 0.8805903743372407, "learning_rate": 5.34614006440889e-06, "loss": 0.4454, "step": 7880 }, { "epoch": 0.4936500728166742, "grad_norm": 0.5782300649844151, "learning_rate": 5.345128092312255e-06, "loss": 0.4661, "step": 7881 }, { "epoch": 0.49371271081601653, "grad_norm": 0.8711726444194026, "learning_rate": 5.344116106010085e-06, "loss": 0.3903, "step": 7882 }, { "epoch": 0.49377534881535884, "grad_norm": 0.8249134436194034, "learning_rate": 5.34310410554403e-06, "loss": 0.4417, "step": 7883 }, { "epoch": 0.49383798681470115, "grad_norm": 0.8117752207448842, "learning_rate": 5.342092090955747e-06, "loss": 0.4327, "step": 7884 }, { "epoch": 0.49390062481404345, "grad_norm": 0.8039528914119225, "learning_rate": 5.341080062286892e-06, "loss": 0.428, "step": 7885 }, { "epoch": 0.49396326281338576, "grad_norm": 0.8662733260885733, "learning_rate": 5.3400680195791165e-06, "loss": 0.4118, "step": 7886 }, { "epoch": 0.494025900812728, "grad_norm": 0.8270479419120977, "learning_rate": 5.339055962874078e-06, "loss": 0.3956, "step": 7887 }, { "epoch": 0.4940885388120703, "grad_norm": 0.799513815582835, "learning_rate": 5.338043892213434e-06, "loss": 0.3779, "step": 7888 }, { "epoch": 0.49415117681141263, "grad_norm": 0.7806505730769561, "learning_rate": 5.3370318076388405e-06, "loss": 0.3595, "step": 7889 }, { "epoch": 0.49421381481075494, "grad_norm": 0.9095921763509253, "learning_rate": 5.336019709191955e-06, "loss": 0.4271, "step": 7890 }, { "epoch": 0.49427645281009724, "grad_norm": 0.6893381859105598, "learning_rate": 5.335007596914435e-06, "loss": 0.4833, "step": 7891 }, { "epoch": 0.49433909080943955, "grad_norm": 0.8073637246194516, "learning_rate": 5.333995470847943e-06, "loss": 0.4166, "step": 7892 }, { "epoch": 0.49440172880878186, "grad_norm": 0.8322161369184835, "learning_rate": 5.332983331034133e-06, "loss": 0.4047, "step": 7893 }, { "epoch": 0.49446436680812417, "grad_norm": 0.8206220806994431, "learning_rate": 5.331971177514667e-06, "loss": 0.3869, "step": 7894 }, { "epoch": 0.4945270048074665, "grad_norm": 0.6488075499760424, "learning_rate": 5.330959010331207e-06, "loss": 0.4887, "step": 7895 }, { "epoch": 0.4945896428068087, "grad_norm": 0.8070859165589779, "learning_rate": 5.329946829525413e-06, "loss": 0.4046, "step": 7896 }, { "epoch": 0.49465228080615103, "grad_norm": 0.8743892645906777, "learning_rate": 5.328934635138945e-06, "loss": 0.4081, "step": 7897 }, { "epoch": 0.49471491880549334, "grad_norm": 0.8516912683525127, "learning_rate": 5.327922427213466e-06, "loss": 0.4212, "step": 7898 }, { "epoch": 0.49477755680483565, "grad_norm": 0.8319476676261982, "learning_rate": 5.326910205790642e-06, "loss": 0.424, "step": 7899 }, { "epoch": 0.49484019480417796, "grad_norm": 0.8555964868166106, "learning_rate": 5.325897970912131e-06, "loss": 0.4158, "step": 7900 }, { "epoch": 0.49490283280352027, "grad_norm": 0.864033225706373, "learning_rate": 5.3248857226196e-06, "loss": 0.4032, "step": 7901 }, { "epoch": 0.4949654708028626, "grad_norm": 0.7707699647158502, "learning_rate": 5.323873460954709e-06, "loss": 0.3582, "step": 7902 }, { "epoch": 0.4950281088022049, "grad_norm": 0.8530023822428766, "learning_rate": 5.3228611859591306e-06, "loss": 0.4086, "step": 7903 }, { "epoch": 0.49509074680154713, "grad_norm": 0.8298906351227376, "learning_rate": 5.321848897674523e-06, "loss": 0.4033, "step": 7904 }, { "epoch": 0.49515338480088944, "grad_norm": 0.8266420842412621, "learning_rate": 5.320836596142557e-06, "loss": 0.4005, "step": 7905 }, { "epoch": 0.49521602280023175, "grad_norm": 0.7812994653625422, "learning_rate": 5.319824281404896e-06, "loss": 0.4242, "step": 7906 }, { "epoch": 0.49527866079957406, "grad_norm": 0.8369635012752109, "learning_rate": 5.318811953503208e-06, "loss": 0.4095, "step": 7907 }, { "epoch": 0.49534129879891636, "grad_norm": 0.8657117774282258, "learning_rate": 5.317799612479161e-06, "loss": 0.4136, "step": 7908 }, { "epoch": 0.49540393679825867, "grad_norm": 0.8288579604049164, "learning_rate": 5.316787258374424e-06, "loss": 0.4363, "step": 7909 }, { "epoch": 0.495466574797601, "grad_norm": 0.8538797602504867, "learning_rate": 5.315774891230664e-06, "loss": 0.4208, "step": 7910 }, { "epoch": 0.4955292127969433, "grad_norm": 0.8318187547466278, "learning_rate": 5.31476251108955e-06, "loss": 0.4148, "step": 7911 }, { "epoch": 0.4955918507962856, "grad_norm": 0.7081102965052162, "learning_rate": 5.313750117992754e-06, "loss": 0.4722, "step": 7912 }, { "epoch": 0.49565448879562785, "grad_norm": 0.8283823256182651, "learning_rate": 5.312737711981943e-06, "loss": 0.3954, "step": 7913 }, { "epoch": 0.49571712679497015, "grad_norm": 0.8038997895633818, "learning_rate": 5.3117252930987925e-06, "loss": 0.3652, "step": 7914 }, { "epoch": 0.49577976479431246, "grad_norm": 0.7747548633190274, "learning_rate": 5.31071286138497e-06, "loss": 0.3998, "step": 7915 }, { "epoch": 0.49584240279365477, "grad_norm": 0.848885694076301, "learning_rate": 5.3097004168821475e-06, "loss": 0.4001, "step": 7916 }, { "epoch": 0.4959050407929971, "grad_norm": 0.9014473592385716, "learning_rate": 5.308687959631998e-06, "loss": 0.3995, "step": 7917 }, { "epoch": 0.4959676787923394, "grad_norm": 0.8442513227076458, "learning_rate": 5.307675489676196e-06, "loss": 0.4105, "step": 7918 }, { "epoch": 0.4960303167916817, "grad_norm": 0.8472701083013067, "learning_rate": 5.306663007056414e-06, "loss": 0.367, "step": 7919 }, { "epoch": 0.496092954791024, "grad_norm": 0.8038317430648945, "learning_rate": 5.305650511814325e-06, "loss": 0.3832, "step": 7920 }, { "epoch": 0.49615559279036625, "grad_norm": 0.8135051057217856, "learning_rate": 5.304638003991604e-06, "loss": 0.4016, "step": 7921 }, { "epoch": 0.49621823078970856, "grad_norm": 0.829587598439087, "learning_rate": 5.303625483629926e-06, "loss": 0.409, "step": 7922 }, { "epoch": 0.49628086878905087, "grad_norm": 0.8392312163579224, "learning_rate": 5.302612950770969e-06, "loss": 0.4207, "step": 7923 }, { "epoch": 0.4963435067883932, "grad_norm": 0.8826883802058997, "learning_rate": 5.301600405456405e-06, "loss": 0.4311, "step": 7924 }, { "epoch": 0.4964061447877355, "grad_norm": 0.7611269608598478, "learning_rate": 5.3005878477279115e-06, "loss": 0.39, "step": 7925 }, { "epoch": 0.4964687827870778, "grad_norm": 0.8507211383984858, "learning_rate": 5.299575277627167e-06, "loss": 0.4042, "step": 7926 }, { "epoch": 0.4965314207864201, "grad_norm": 0.7646915413553428, "learning_rate": 5.298562695195849e-06, "loss": 0.3777, "step": 7927 }, { "epoch": 0.4965940587857624, "grad_norm": 0.8321403105894943, "learning_rate": 5.297550100475636e-06, "loss": 0.3869, "step": 7928 }, { "epoch": 0.49665669678510466, "grad_norm": 0.9178049796167754, "learning_rate": 5.296537493508204e-06, "loss": 0.4434, "step": 7929 }, { "epoch": 0.49671933478444696, "grad_norm": 0.7988394349718981, "learning_rate": 5.2955248743352325e-06, "loss": 0.3926, "step": 7930 }, { "epoch": 0.49678197278378927, "grad_norm": 0.7514736174165068, "learning_rate": 5.294512242998405e-06, "loss": 0.3351, "step": 7931 }, { "epoch": 0.4968446107831316, "grad_norm": 0.782004623661919, "learning_rate": 5.293499599539399e-06, "loss": 0.4195, "step": 7932 }, { "epoch": 0.4969072487824739, "grad_norm": 0.8148770156390843, "learning_rate": 5.292486943999893e-06, "loss": 0.387, "step": 7933 }, { "epoch": 0.4969698867818162, "grad_norm": 0.8062887252967835, "learning_rate": 5.2914742764215695e-06, "loss": 0.3989, "step": 7934 }, { "epoch": 0.4970325247811585, "grad_norm": 0.7760871119834902, "learning_rate": 5.2904615968461135e-06, "loss": 0.3833, "step": 7935 }, { "epoch": 0.4970951627805008, "grad_norm": 0.8435145787087438, "learning_rate": 5.289448905315203e-06, "loss": 0.3965, "step": 7936 }, { "epoch": 0.4971578007798431, "grad_norm": 0.7704307295350524, "learning_rate": 5.288436201870522e-06, "loss": 0.3775, "step": 7937 }, { "epoch": 0.49722043877918537, "grad_norm": 0.8159246068451939, "learning_rate": 5.287423486553753e-06, "loss": 0.3849, "step": 7938 }, { "epoch": 0.4972830767785277, "grad_norm": 0.8544436003446759, "learning_rate": 5.28641075940658e-06, "loss": 0.4313, "step": 7939 }, { "epoch": 0.49734571477787, "grad_norm": 0.7892392216351488, "learning_rate": 5.285398020470688e-06, "loss": 0.3995, "step": 7940 }, { "epoch": 0.4974083527772123, "grad_norm": 0.8444807168901788, "learning_rate": 5.28438526978776e-06, "loss": 0.4257, "step": 7941 }, { "epoch": 0.4974709907765546, "grad_norm": 0.8263308388183, "learning_rate": 5.28337250739948e-06, "loss": 0.4291, "step": 7942 }, { "epoch": 0.4975336287758969, "grad_norm": 0.8658541577837781, "learning_rate": 5.282359733347538e-06, "loss": 0.388, "step": 7943 }, { "epoch": 0.4975962667752392, "grad_norm": 0.8114391300245896, "learning_rate": 5.281346947673614e-06, "loss": 0.3934, "step": 7944 }, { "epoch": 0.4976589047745815, "grad_norm": 0.8508272394641022, "learning_rate": 5.280334150419399e-06, "loss": 0.3635, "step": 7945 }, { "epoch": 0.4977215427739238, "grad_norm": 0.8053853064409539, "learning_rate": 5.279321341626576e-06, "loss": 0.3869, "step": 7946 }, { "epoch": 0.4977841807732661, "grad_norm": 0.8392272243803702, "learning_rate": 5.278308521336838e-06, "loss": 0.4042, "step": 7947 }, { "epoch": 0.4978468187726084, "grad_norm": 0.8203888529599408, "learning_rate": 5.277295689591867e-06, "loss": 0.3744, "step": 7948 }, { "epoch": 0.4979094567719507, "grad_norm": 0.6440666730972482, "learning_rate": 5.276282846433356e-06, "loss": 0.4639, "step": 7949 }, { "epoch": 0.497972094771293, "grad_norm": 0.7988735391111218, "learning_rate": 5.2752699919029916e-06, "loss": 0.3568, "step": 7950 }, { "epoch": 0.4980347327706353, "grad_norm": 0.8294870455593558, "learning_rate": 5.274257126042461e-06, "loss": 0.3705, "step": 7951 }, { "epoch": 0.4980973707699776, "grad_norm": 0.8754582511620503, "learning_rate": 5.273244248893459e-06, "loss": 0.3617, "step": 7952 }, { "epoch": 0.49816000876931993, "grad_norm": 0.8599521105047396, "learning_rate": 5.272231360497671e-06, "loss": 0.4236, "step": 7953 }, { "epoch": 0.49822264676866224, "grad_norm": 0.8208086732258043, "learning_rate": 5.271218460896791e-06, "loss": 0.4124, "step": 7954 }, { "epoch": 0.4982852847680045, "grad_norm": 0.8444877968827157, "learning_rate": 5.270205550132508e-06, "loss": 0.4116, "step": 7955 }, { "epoch": 0.4983479227673468, "grad_norm": 0.7994621899656763, "learning_rate": 5.269192628246515e-06, "loss": 0.393, "step": 7956 }, { "epoch": 0.4984105607666891, "grad_norm": 0.7453354700303731, "learning_rate": 5.268179695280502e-06, "loss": 0.4128, "step": 7957 }, { "epoch": 0.4984731987660314, "grad_norm": 0.6216481618109457, "learning_rate": 5.2671667512761644e-06, "loss": 0.4464, "step": 7958 }, { "epoch": 0.4985358367653737, "grad_norm": 0.8202839752861416, "learning_rate": 5.266153796275193e-06, "loss": 0.3953, "step": 7959 }, { "epoch": 0.498598474764716, "grad_norm": 0.7968070868647587, "learning_rate": 5.265140830319283e-06, "loss": 0.4577, "step": 7960 }, { "epoch": 0.49866111276405833, "grad_norm": 0.8147653442841004, "learning_rate": 5.264127853450127e-06, "loss": 0.4213, "step": 7961 }, { "epoch": 0.49872375076340064, "grad_norm": 0.81848027929116, "learning_rate": 5.2631148657094186e-06, "loss": 0.4019, "step": 7962 }, { "epoch": 0.4987863887627429, "grad_norm": 0.811596455275525, "learning_rate": 5.262101867138855e-06, "loss": 0.3965, "step": 7963 }, { "epoch": 0.4988490267620852, "grad_norm": 0.6115116410096689, "learning_rate": 5.2610888577801275e-06, "loss": 0.4718, "step": 7964 }, { "epoch": 0.4989116647614275, "grad_norm": 0.8078039146125968, "learning_rate": 5.260075837674936e-06, "loss": 0.4231, "step": 7965 }, { "epoch": 0.4989743027607698, "grad_norm": 0.7915967685217743, "learning_rate": 5.259062806864975e-06, "loss": 0.389, "step": 7966 }, { "epoch": 0.4990369407601121, "grad_norm": 0.8861880610229149, "learning_rate": 5.258049765391939e-06, "loss": 0.4373, "step": 7967 }, { "epoch": 0.49909957875945443, "grad_norm": 0.8399777850189752, "learning_rate": 5.257036713297527e-06, "loss": 0.3754, "step": 7968 }, { "epoch": 0.49916221675879674, "grad_norm": 0.8622998758205977, "learning_rate": 5.256023650623438e-06, "loss": 0.4377, "step": 7969 }, { "epoch": 0.49922485475813905, "grad_norm": 0.6751485501308481, "learning_rate": 5.255010577411367e-06, "loss": 0.4477, "step": 7970 }, { "epoch": 0.4992874927574813, "grad_norm": 0.8540396117937091, "learning_rate": 5.253997493703014e-06, "loss": 0.4349, "step": 7971 }, { "epoch": 0.4993501307568236, "grad_norm": 0.8281200389726247, "learning_rate": 5.252984399540075e-06, "loss": 0.3779, "step": 7972 }, { "epoch": 0.4994127687561659, "grad_norm": 0.8104148694047016, "learning_rate": 5.251971294964253e-06, "loss": 0.4024, "step": 7973 }, { "epoch": 0.4994754067555082, "grad_norm": 0.8357479158163168, "learning_rate": 5.250958180017245e-06, "loss": 0.3662, "step": 7974 }, { "epoch": 0.49953804475485053, "grad_norm": 0.8496535940790881, "learning_rate": 5.249945054740753e-06, "loss": 0.4898, "step": 7975 }, { "epoch": 0.49960068275419284, "grad_norm": 0.812400015028082, "learning_rate": 5.248931919176474e-06, "loss": 0.4026, "step": 7976 }, { "epoch": 0.49966332075353515, "grad_norm": 0.8226028563850549, "learning_rate": 5.247918773366112e-06, "loss": 0.3925, "step": 7977 }, { "epoch": 0.49972595875287745, "grad_norm": 0.815346627117892, "learning_rate": 5.246905617351368e-06, "loss": 0.3981, "step": 7978 }, { "epoch": 0.49978859675221976, "grad_norm": 0.7904925247117606, "learning_rate": 5.245892451173943e-06, "loss": 0.3678, "step": 7979 }, { "epoch": 0.499851234751562, "grad_norm": 0.7584586341950792, "learning_rate": 5.244879274875538e-06, "loss": 0.4021, "step": 7980 }, { "epoch": 0.4999138727509043, "grad_norm": 0.8300592137564236, "learning_rate": 5.243866088497855e-06, "loss": 0.4086, "step": 7981 }, { "epoch": 0.4999765107502466, "grad_norm": 0.8806050242063084, "learning_rate": 5.242852892082602e-06, "loss": 0.3989, "step": 7982 }, { "epoch": 0.500039148749589, "grad_norm": 0.8506439922381973, "learning_rate": 5.241839685671477e-06, "loss": 0.4501, "step": 7983 }, { "epoch": 0.5001017867489312, "grad_norm": 0.7935941142765665, "learning_rate": 5.240826469306187e-06, "loss": 0.4177, "step": 7984 }, { "epoch": 0.5001644247482735, "grad_norm": 0.7754983338674267, "learning_rate": 5.2398132430284324e-06, "loss": 0.3946, "step": 7985 }, { "epoch": 0.5002270627476159, "grad_norm": 0.8513153618286038, "learning_rate": 5.2388000068799215e-06, "loss": 0.4112, "step": 7986 }, { "epoch": 0.5002897007469581, "grad_norm": 0.8486055503098392, "learning_rate": 5.237786760902359e-06, "loss": 0.4156, "step": 7987 }, { "epoch": 0.5003523387463005, "grad_norm": 0.8049487741527228, "learning_rate": 5.236773505137449e-06, "loss": 0.4398, "step": 7988 }, { "epoch": 0.5004149767456427, "grad_norm": 0.8295204714015403, "learning_rate": 5.235760239626897e-06, "loss": 0.4072, "step": 7989 }, { "epoch": 0.5004776147449851, "grad_norm": 0.8974043095878751, "learning_rate": 5.234746964412409e-06, "loss": 0.3842, "step": 7990 }, { "epoch": 0.5005402527443273, "grad_norm": 0.8635897434161738, "learning_rate": 5.233733679535693e-06, "loss": 0.4028, "step": 7991 }, { "epoch": 0.5006028907436696, "grad_norm": 0.8727767633894371, "learning_rate": 5.232720385038457e-06, "loss": 0.427, "step": 7992 }, { "epoch": 0.500665528743012, "grad_norm": 0.8666107999003545, "learning_rate": 5.231707080962405e-06, "loss": 0.4009, "step": 7993 }, { "epoch": 0.5007281667423542, "grad_norm": 0.8749977635542127, "learning_rate": 5.230693767349245e-06, "loss": 0.3859, "step": 7994 }, { "epoch": 0.5007908047416966, "grad_norm": 0.8254774378947833, "learning_rate": 5.22968044424069e-06, "loss": 0.4366, "step": 7995 }, { "epoch": 0.5008534427410388, "grad_norm": 0.8361807198716493, "learning_rate": 5.228667111678444e-06, "loss": 0.4297, "step": 7996 }, { "epoch": 0.5009160807403812, "grad_norm": 0.8100662417850062, "learning_rate": 5.2276537697042175e-06, "loss": 0.3781, "step": 7997 }, { "epoch": 0.5009787187397234, "grad_norm": 0.8886558287824778, "learning_rate": 5.226640418359719e-06, "loss": 0.3917, "step": 7998 }, { "epoch": 0.5010413567390658, "grad_norm": 0.8162881982772808, "learning_rate": 5.225627057686659e-06, "loss": 0.381, "step": 7999 }, { "epoch": 0.501103994738408, "grad_norm": 0.8020472082690054, "learning_rate": 5.224613687726748e-06, "loss": 0.3849, "step": 8000 }, { "epoch": 0.5011666327377503, "grad_norm": 0.8306363253981661, "learning_rate": 5.2236003085216945e-06, "loss": 0.4042, "step": 8001 }, { "epoch": 0.5012292707370927, "grad_norm": 0.9002771537194435, "learning_rate": 5.222586920113212e-06, "loss": 0.4188, "step": 8002 }, { "epoch": 0.5012919087364349, "grad_norm": 0.9480919588354509, "learning_rate": 5.221573522543009e-06, "loss": 0.4185, "step": 8003 }, { "epoch": 0.5013545467357773, "grad_norm": 0.8493062797966797, "learning_rate": 5.220560115852801e-06, "loss": 0.4219, "step": 8004 }, { "epoch": 0.5014171847351195, "grad_norm": 0.8380257501779065, "learning_rate": 5.219546700084297e-06, "loss": 0.3956, "step": 8005 }, { "epoch": 0.5014798227344619, "grad_norm": 0.8379784466985123, "learning_rate": 5.218533275279208e-06, "loss": 0.3925, "step": 8006 }, { "epoch": 0.5015424607338042, "grad_norm": 0.8100619490455729, "learning_rate": 5.217519841479252e-06, "loss": 0.3812, "step": 8007 }, { "epoch": 0.5016050987331465, "grad_norm": 0.9269129404524993, "learning_rate": 5.2165063987261364e-06, "loss": 0.397, "step": 8008 }, { "epoch": 0.5016677367324888, "grad_norm": 0.8527603309041438, "learning_rate": 5.215492947061579e-06, "loss": 0.3825, "step": 8009 }, { "epoch": 0.501730374731831, "grad_norm": 0.8561274275261406, "learning_rate": 5.214479486527292e-06, "loss": 0.4132, "step": 8010 }, { "epoch": 0.5017930127311734, "grad_norm": 0.8779429906805598, "learning_rate": 5.213466017164988e-06, "loss": 0.4321, "step": 8011 }, { "epoch": 0.5018556507305156, "grad_norm": 0.8028248351682816, "learning_rate": 5.212452539016384e-06, "loss": 0.4156, "step": 8012 }, { "epoch": 0.501918288729858, "grad_norm": 0.8037583194441319, "learning_rate": 5.211439052123193e-06, "loss": 0.3672, "step": 8013 }, { "epoch": 0.5019809267292002, "grad_norm": 0.8611811905466134, "learning_rate": 5.210425556527133e-06, "loss": 0.4009, "step": 8014 }, { "epoch": 0.5020435647285426, "grad_norm": 0.857523677906572, "learning_rate": 5.209412052269917e-06, "loss": 0.3957, "step": 8015 }, { "epoch": 0.5021062027278849, "grad_norm": 0.8481795307747624, "learning_rate": 5.208398539393262e-06, "loss": 0.4023, "step": 8016 }, { "epoch": 0.5021688407272271, "grad_norm": 0.8107896379575389, "learning_rate": 5.207385017938884e-06, "loss": 0.3482, "step": 8017 }, { "epoch": 0.5022314787265695, "grad_norm": 0.8438547074022043, "learning_rate": 5.206371487948502e-06, "loss": 0.4063, "step": 8018 }, { "epoch": 0.5022941167259117, "grad_norm": 0.821161368259614, "learning_rate": 5.205357949463829e-06, "loss": 0.3826, "step": 8019 }, { "epoch": 0.5023567547252541, "grad_norm": 0.8744236883246292, "learning_rate": 5.204344402526586e-06, "loss": 0.4054, "step": 8020 }, { "epoch": 0.5024193927245963, "grad_norm": 0.8576207992013603, "learning_rate": 5.203330847178491e-06, "loss": 0.4163, "step": 8021 }, { "epoch": 0.5024820307239387, "grad_norm": 0.826617971725051, "learning_rate": 5.202317283461258e-06, "loss": 0.4063, "step": 8022 }, { "epoch": 0.502544668723281, "grad_norm": 0.8465921618698485, "learning_rate": 5.20130371141661e-06, "loss": 0.3797, "step": 8023 }, { "epoch": 0.5026073067226233, "grad_norm": 0.8187812185798657, "learning_rate": 5.200290131086262e-06, "loss": 0.4048, "step": 8024 }, { "epoch": 0.5026699447219656, "grad_norm": 0.8558601401323526, "learning_rate": 5.199276542511936e-06, "loss": 0.4512, "step": 8025 }, { "epoch": 0.5027325827213078, "grad_norm": 0.8249342012235099, "learning_rate": 5.19826294573535e-06, "loss": 0.4177, "step": 8026 }, { "epoch": 0.5027952207206502, "grad_norm": 0.7959262473278265, "learning_rate": 5.197249340798225e-06, "loss": 0.3903, "step": 8027 }, { "epoch": 0.5028578587199924, "grad_norm": 0.8393562640424916, "learning_rate": 5.19623572774228e-06, "loss": 0.3767, "step": 8028 }, { "epoch": 0.5029204967193348, "grad_norm": 0.9503802591640028, "learning_rate": 5.195222106609237e-06, "loss": 0.3921, "step": 8029 }, { "epoch": 0.5029831347186771, "grad_norm": 0.8823429411032977, "learning_rate": 5.194208477440815e-06, "loss": 0.4426, "step": 8030 }, { "epoch": 0.5030457727180194, "grad_norm": 0.8395143355612145, "learning_rate": 5.193194840278736e-06, "loss": 0.4259, "step": 8031 }, { "epoch": 0.5031084107173617, "grad_norm": 0.8882241876057857, "learning_rate": 5.192181195164721e-06, "loss": 0.4326, "step": 8032 }, { "epoch": 0.503171048716704, "grad_norm": 0.8159306140136473, "learning_rate": 5.191167542140494e-06, "loss": 0.4126, "step": 8033 }, { "epoch": 0.5032336867160463, "grad_norm": 0.664016546044879, "learning_rate": 5.190153881247775e-06, "loss": 0.4704, "step": 8034 }, { "epoch": 0.5032963247153885, "grad_norm": 0.8297950877206287, "learning_rate": 5.1891402125282874e-06, "loss": 0.3843, "step": 8035 }, { "epoch": 0.5033589627147309, "grad_norm": 0.8557950535592587, "learning_rate": 5.188126536023752e-06, "loss": 0.3668, "step": 8036 }, { "epoch": 0.5034216007140732, "grad_norm": 0.8725138012244125, "learning_rate": 5.187112851775893e-06, "loss": 0.4299, "step": 8037 }, { "epoch": 0.5034842387134155, "grad_norm": 0.7848800966670751, "learning_rate": 5.186099159826437e-06, "loss": 0.3772, "step": 8038 }, { "epoch": 0.5035468767127578, "grad_norm": 0.8771549081271235, "learning_rate": 5.185085460217103e-06, "loss": 0.4508, "step": 8039 }, { "epoch": 0.5036095147121001, "grad_norm": 0.8574122223061589, "learning_rate": 5.184071752989617e-06, "loss": 0.3817, "step": 8040 }, { "epoch": 0.5036721527114424, "grad_norm": 0.6820718466171694, "learning_rate": 5.1830580381857035e-06, "loss": 0.4745, "step": 8041 }, { "epoch": 0.5037347907107848, "grad_norm": 0.89903002884049, "learning_rate": 5.182044315847088e-06, "loss": 0.3913, "step": 8042 }, { "epoch": 0.503797428710127, "grad_norm": 0.7883772130849629, "learning_rate": 5.1810305860154954e-06, "loss": 0.3649, "step": 8043 }, { "epoch": 0.5038600667094693, "grad_norm": 0.8733375755069828, "learning_rate": 5.1800168487326484e-06, "loss": 0.3782, "step": 8044 }, { "epoch": 0.5039227047088116, "grad_norm": 0.8081392548035743, "learning_rate": 5.179003104040274e-06, "loss": 0.4168, "step": 8045 }, { "epoch": 0.5039853427081539, "grad_norm": 0.8570888128296615, "learning_rate": 5.1779893519801e-06, "loss": 0.4069, "step": 8046 }, { "epoch": 0.5040479807074962, "grad_norm": 0.8282308944934249, "learning_rate": 5.176975592593852e-06, "loss": 0.3991, "step": 8047 }, { "epoch": 0.5041106187068385, "grad_norm": 0.8454696445077061, "learning_rate": 5.175961825923256e-06, "loss": 0.4072, "step": 8048 }, { "epoch": 0.5041732567061809, "grad_norm": 0.8416495928378603, "learning_rate": 5.174948052010036e-06, "loss": 0.4255, "step": 8049 }, { "epoch": 0.5042358947055231, "grad_norm": 0.8035968177759322, "learning_rate": 5.173934270895924e-06, "loss": 0.3809, "step": 8050 }, { "epoch": 0.5042985327048654, "grad_norm": 0.7811145048950143, "learning_rate": 5.172920482622644e-06, "loss": 0.405, "step": 8051 }, { "epoch": 0.5043611707042077, "grad_norm": 0.7820981396056917, "learning_rate": 5.171906687231926e-06, "loss": 0.4173, "step": 8052 }, { "epoch": 0.50442380870355, "grad_norm": 0.9286241336374645, "learning_rate": 5.170892884765496e-06, "loss": 0.4505, "step": 8053 }, { "epoch": 0.5044864467028923, "grad_norm": 0.9315916847868658, "learning_rate": 5.169879075265082e-06, "loss": 0.4105, "step": 8054 }, { "epoch": 0.5045490847022346, "grad_norm": 0.8261978317948668, "learning_rate": 5.168865258772415e-06, "loss": 0.3585, "step": 8055 }, { "epoch": 0.504611722701577, "grad_norm": 0.8031583606995134, "learning_rate": 5.167851435329223e-06, "loss": 0.4132, "step": 8056 }, { "epoch": 0.5046743607009192, "grad_norm": 0.8174561561741606, "learning_rate": 5.166837604977234e-06, "loss": 0.4027, "step": 8057 }, { "epoch": 0.5047369987002616, "grad_norm": 0.6093826619876005, "learning_rate": 5.165823767758179e-06, "loss": 0.4428, "step": 8058 }, { "epoch": 0.5047996366996038, "grad_norm": 0.7888799784493576, "learning_rate": 5.164809923713786e-06, "loss": 0.4323, "step": 8059 }, { "epoch": 0.5048622746989461, "grad_norm": 0.8900370215194766, "learning_rate": 5.163796072885787e-06, "loss": 0.4152, "step": 8060 }, { "epoch": 0.5049249126982884, "grad_norm": 0.8364098438572981, "learning_rate": 5.1627822153159114e-06, "loss": 0.4003, "step": 8061 }, { "epoch": 0.5049875506976307, "grad_norm": 0.8749614036837053, "learning_rate": 5.161768351045888e-06, "loss": 0.4579, "step": 8062 }, { "epoch": 0.505050188696973, "grad_norm": 0.8120707595929619, "learning_rate": 5.160754480117449e-06, "loss": 0.3675, "step": 8063 }, { "epoch": 0.5051128266963153, "grad_norm": 0.8495317239529898, "learning_rate": 5.159740602572327e-06, "loss": 0.3944, "step": 8064 }, { "epoch": 0.5051754646956577, "grad_norm": 0.8364271787328391, "learning_rate": 5.158726718452254e-06, "loss": 0.4141, "step": 8065 }, { "epoch": 0.5052381026949999, "grad_norm": 0.8151853250910139, "learning_rate": 5.157712827798956e-06, "loss": 0.4367, "step": 8066 }, { "epoch": 0.5053007406943423, "grad_norm": 0.8125851261089694, "learning_rate": 5.156698930654171e-06, "loss": 0.3612, "step": 8067 }, { "epoch": 0.5053633786936845, "grad_norm": 0.836116566385592, "learning_rate": 5.155685027059628e-06, "loss": 0.3715, "step": 8068 }, { "epoch": 0.5054260166930268, "grad_norm": 0.8731375023853436, "learning_rate": 5.154671117057062e-06, "loss": 0.3901, "step": 8069 }, { "epoch": 0.5054886546923691, "grad_norm": 0.9355272388348169, "learning_rate": 5.1536572006882025e-06, "loss": 0.4074, "step": 8070 }, { "epoch": 0.5055512926917114, "grad_norm": 0.81317065166747, "learning_rate": 5.152643277994784e-06, "loss": 0.4168, "step": 8071 }, { "epoch": 0.5056139306910538, "grad_norm": 0.7794451784541079, "learning_rate": 5.151629349018541e-06, "loss": 0.409, "step": 8072 }, { "epoch": 0.505676568690396, "grad_norm": 0.8853980457621164, "learning_rate": 5.150615413801205e-06, "loss": 0.4377, "step": 8073 }, { "epoch": 0.5057392066897384, "grad_norm": 0.8364357103622432, "learning_rate": 5.1496014723845115e-06, "loss": 0.3847, "step": 8074 }, { "epoch": 0.5058018446890806, "grad_norm": 0.9111081856716149, "learning_rate": 5.148587524810192e-06, "loss": 0.4354, "step": 8075 }, { "epoch": 0.5058644826884229, "grad_norm": 0.8008319609454848, "learning_rate": 5.147573571119985e-06, "loss": 0.4053, "step": 8076 }, { "epoch": 0.5059271206877652, "grad_norm": 0.9004110357777888, "learning_rate": 5.146559611355619e-06, "loss": 0.4147, "step": 8077 }, { "epoch": 0.5059897586871075, "grad_norm": 0.87960291098056, "learning_rate": 5.145545645558835e-06, "loss": 0.4207, "step": 8078 }, { "epoch": 0.5060523966864499, "grad_norm": 0.7768895802117284, "learning_rate": 5.144531673771364e-06, "loss": 0.409, "step": 8079 }, { "epoch": 0.5061150346857921, "grad_norm": 1.2886105876281264, "learning_rate": 5.143517696034943e-06, "loss": 0.3849, "step": 8080 }, { "epoch": 0.5061776726851345, "grad_norm": 0.8154978771860472, "learning_rate": 5.142503712391307e-06, "loss": 0.424, "step": 8081 }, { "epoch": 0.5062403106844767, "grad_norm": 0.8038735797254247, "learning_rate": 5.141489722882191e-06, "loss": 0.367, "step": 8082 }, { "epoch": 0.5063029486838191, "grad_norm": 0.8357167331261031, "learning_rate": 5.140475727549332e-06, "loss": 0.3973, "step": 8083 }, { "epoch": 0.5063655866831613, "grad_norm": 0.8349711802998276, "learning_rate": 5.139461726434466e-06, "loss": 0.4304, "step": 8084 }, { "epoch": 0.5064282246825036, "grad_norm": 0.8548342644787248, "learning_rate": 5.1384477195793305e-06, "loss": 0.4479, "step": 8085 }, { "epoch": 0.506490862681846, "grad_norm": 0.83197188733113, "learning_rate": 5.13743370702566e-06, "loss": 0.4346, "step": 8086 }, { "epoch": 0.5065535006811882, "grad_norm": 0.8250549496756557, "learning_rate": 5.136419688815193e-06, "loss": 0.3859, "step": 8087 }, { "epoch": 0.5066161386805306, "grad_norm": 0.8116013772756776, "learning_rate": 5.135405664989666e-06, "loss": 0.3748, "step": 8088 }, { "epoch": 0.5066787766798728, "grad_norm": 0.84163557303631, "learning_rate": 5.134391635590817e-06, "loss": 0.4043, "step": 8089 }, { "epoch": 0.5067414146792152, "grad_norm": 0.7673559796250048, "learning_rate": 5.133377600660384e-06, "loss": 0.3762, "step": 8090 }, { "epoch": 0.5068040526785574, "grad_norm": 0.8257926747553206, "learning_rate": 5.132363560240103e-06, "loss": 0.3895, "step": 8091 }, { "epoch": 0.5068666906778998, "grad_norm": 0.7850492348659938, "learning_rate": 5.131349514371713e-06, "loss": 0.4238, "step": 8092 }, { "epoch": 0.506929328677242, "grad_norm": 0.7909102540572227, "learning_rate": 5.130335463096953e-06, "loss": 0.4208, "step": 8093 }, { "epoch": 0.5069919666765843, "grad_norm": 0.8867157074954916, "learning_rate": 5.129321406457562e-06, "loss": 0.4498, "step": 8094 }, { "epoch": 0.5070546046759267, "grad_norm": 0.8603970850642437, "learning_rate": 5.128307344495277e-06, "loss": 0.4471, "step": 8095 }, { "epoch": 0.5071172426752689, "grad_norm": 0.8225699522037913, "learning_rate": 5.127293277251838e-06, "loss": 0.3977, "step": 8096 }, { "epoch": 0.5071798806746113, "grad_norm": 0.8993782312048304, "learning_rate": 5.126279204768985e-06, "loss": 0.4245, "step": 8097 }, { "epoch": 0.5072425186739535, "grad_norm": 0.8807013207158939, "learning_rate": 5.125265127088457e-06, "loss": 0.4122, "step": 8098 }, { "epoch": 0.5073051566732959, "grad_norm": 0.8400532688221081, "learning_rate": 5.124251044251992e-06, "loss": 0.408, "step": 8099 }, { "epoch": 0.5073677946726382, "grad_norm": 0.8376484229720591, "learning_rate": 5.12323695630133e-06, "loss": 0.4198, "step": 8100 }, { "epoch": 0.5074304326719804, "grad_norm": 0.9398446725619658, "learning_rate": 5.1222228632782125e-06, "loss": 0.4253, "step": 8101 }, { "epoch": 0.5074930706713228, "grad_norm": 0.7814471523645199, "learning_rate": 5.12120876522438e-06, "loss": 0.396, "step": 8102 }, { "epoch": 0.507555708670665, "grad_norm": 0.8352154390525708, "learning_rate": 5.1201946621815726e-06, "loss": 0.4066, "step": 8103 }, { "epoch": 0.5076183466700074, "grad_norm": 0.8009129082129066, "learning_rate": 5.11918055419153e-06, "loss": 0.3641, "step": 8104 }, { "epoch": 0.5076809846693496, "grad_norm": 0.7927495789352963, "learning_rate": 5.1181664412959915e-06, "loss": 0.4111, "step": 8105 }, { "epoch": 0.507743622668692, "grad_norm": 0.8615003864246524, "learning_rate": 5.117152323536704e-06, "loss": 0.3976, "step": 8106 }, { "epoch": 0.5078062606680342, "grad_norm": 0.6234391516275531, "learning_rate": 5.116138200955404e-06, "loss": 0.4745, "step": 8107 }, { "epoch": 0.5078688986673766, "grad_norm": 0.8673996846538435, "learning_rate": 5.1151240735938335e-06, "loss": 0.4299, "step": 8108 }, { "epoch": 0.5079315366667189, "grad_norm": 0.8114425906826885, "learning_rate": 5.114109941493733e-06, "loss": 0.3912, "step": 8109 }, { "epoch": 0.5079941746660611, "grad_norm": 0.8511463312699007, "learning_rate": 5.11309580469685e-06, "loss": 0.4121, "step": 8110 }, { "epoch": 0.5080568126654035, "grad_norm": 0.9311957013962127, "learning_rate": 5.11208166324492e-06, "loss": 0.3836, "step": 8111 }, { "epoch": 0.5081194506647457, "grad_norm": 0.835675750815414, "learning_rate": 5.1110675171796894e-06, "loss": 0.397, "step": 8112 }, { "epoch": 0.5081820886640881, "grad_norm": 0.7747721306109212, "learning_rate": 5.110053366542897e-06, "loss": 0.3761, "step": 8113 }, { "epoch": 0.5082447266634303, "grad_norm": 0.8231672420442243, "learning_rate": 5.109039211376289e-06, "loss": 0.441, "step": 8114 }, { "epoch": 0.5083073646627727, "grad_norm": 0.8525045347629554, "learning_rate": 5.108025051721608e-06, "loss": 0.4036, "step": 8115 }, { "epoch": 0.508370002662115, "grad_norm": 0.8168524900142093, "learning_rate": 5.1070108876205945e-06, "loss": 0.42, "step": 8116 }, { "epoch": 0.5084326406614573, "grad_norm": 0.802805936511242, "learning_rate": 5.105996719114993e-06, "loss": 0.4165, "step": 8117 }, { "epoch": 0.5084952786607996, "grad_norm": 0.8682486002428388, "learning_rate": 5.104982546246546e-06, "loss": 0.4346, "step": 8118 }, { "epoch": 0.5085579166601418, "grad_norm": 0.8724789924733433, "learning_rate": 5.103968369057e-06, "loss": 0.4295, "step": 8119 }, { "epoch": 0.5086205546594842, "grad_norm": 0.8718102462129081, "learning_rate": 5.102954187588096e-06, "loss": 0.418, "step": 8120 }, { "epoch": 0.5086831926588264, "grad_norm": 0.8102590429913963, "learning_rate": 5.101940001881578e-06, "loss": 0.3984, "step": 8121 }, { "epoch": 0.5087458306581688, "grad_norm": 0.9130852680041658, "learning_rate": 5.100925811979191e-06, "loss": 0.4005, "step": 8122 }, { "epoch": 0.5088084686575111, "grad_norm": 0.8749224686347895, "learning_rate": 5.099911617922677e-06, "loss": 0.4371, "step": 8123 }, { "epoch": 0.5088711066568534, "grad_norm": 0.8844124138380799, "learning_rate": 5.0988974197537844e-06, "loss": 0.4015, "step": 8124 }, { "epoch": 0.5089337446561957, "grad_norm": 0.7804559421490065, "learning_rate": 5.097883217514255e-06, "loss": 0.4032, "step": 8125 }, { "epoch": 0.5089963826555379, "grad_norm": 0.8740727763685209, "learning_rate": 5.096869011245835e-06, "loss": 0.3849, "step": 8126 }, { "epoch": 0.5090590206548803, "grad_norm": 0.6334344695103602, "learning_rate": 5.095854800990267e-06, "loss": 0.4459, "step": 8127 }, { "epoch": 0.5091216586542225, "grad_norm": 0.8100561754916401, "learning_rate": 5.0948405867892996e-06, "loss": 0.398, "step": 8128 }, { "epoch": 0.5091842966535649, "grad_norm": 0.8910205252620531, "learning_rate": 5.093826368684675e-06, "loss": 0.3864, "step": 8129 }, { "epoch": 0.5092469346529072, "grad_norm": 0.850469688702369, "learning_rate": 5.092812146718138e-06, "loss": 0.4156, "step": 8130 }, { "epoch": 0.5093095726522495, "grad_norm": 0.8029490891357857, "learning_rate": 5.091797920931438e-06, "loss": 0.3941, "step": 8131 }, { "epoch": 0.5093722106515918, "grad_norm": 0.8123531563859379, "learning_rate": 5.090783691366318e-06, "loss": 0.3587, "step": 8132 }, { "epoch": 0.5094348486509341, "grad_norm": 0.860788174514449, "learning_rate": 5.089769458064524e-06, "loss": 0.3556, "step": 8133 }, { "epoch": 0.5094974866502764, "grad_norm": 0.8904094432488073, "learning_rate": 5.088755221067803e-06, "loss": 0.3506, "step": 8134 }, { "epoch": 0.5095601246496186, "grad_norm": 0.8668427724356307, "learning_rate": 5.0877409804179e-06, "loss": 0.4513, "step": 8135 }, { "epoch": 0.509622762648961, "grad_norm": 0.7714066750873813, "learning_rate": 5.086726736156563e-06, "loss": 0.3533, "step": 8136 }, { "epoch": 0.5096854006483033, "grad_norm": 0.809149306408102, "learning_rate": 5.085712488325536e-06, "loss": 0.4114, "step": 8137 }, { "epoch": 0.5097480386476456, "grad_norm": 0.820436395779765, "learning_rate": 5.0846982369665685e-06, "loss": 0.3845, "step": 8138 }, { "epoch": 0.5098106766469879, "grad_norm": 0.8558996049456354, "learning_rate": 5.083683982121404e-06, "loss": 0.4108, "step": 8139 }, { "epoch": 0.5098733146463302, "grad_norm": 0.8396233169620599, "learning_rate": 5.082669723831793e-06, "loss": 0.3787, "step": 8140 }, { "epoch": 0.5099359526456725, "grad_norm": 0.7975123818311078, "learning_rate": 5.08165546213948e-06, "loss": 0.3815, "step": 8141 }, { "epoch": 0.5099985906450148, "grad_norm": 0.7886711040506519, "learning_rate": 5.080641197086214e-06, "loss": 0.3845, "step": 8142 }, { "epoch": 0.5100612286443571, "grad_norm": 0.853168662054745, "learning_rate": 5.07962692871374e-06, "loss": 0.4329, "step": 8143 }, { "epoch": 0.5101238666436994, "grad_norm": 0.7981870658412051, "learning_rate": 5.0786126570638074e-06, "loss": 0.3701, "step": 8144 }, { "epoch": 0.5101865046430417, "grad_norm": 0.8891153115638999, "learning_rate": 5.077598382178164e-06, "loss": 0.3688, "step": 8145 }, { "epoch": 0.510249142642384, "grad_norm": 0.7963331447370106, "learning_rate": 5.076584104098555e-06, "loss": 0.3853, "step": 8146 }, { "epoch": 0.5103117806417263, "grad_norm": 0.8144980103500984, "learning_rate": 5.075569822866731e-06, "loss": 0.3645, "step": 8147 }, { "epoch": 0.5103744186410686, "grad_norm": 0.8504132906616959, "learning_rate": 5.0745555385244395e-06, "loss": 0.3928, "step": 8148 }, { "epoch": 0.510437056640411, "grad_norm": 0.8137162521695348, "learning_rate": 5.073541251113428e-06, "loss": 0.4101, "step": 8149 }, { "epoch": 0.5104996946397532, "grad_norm": 0.8632122197435231, "learning_rate": 5.072526960675446e-06, "loss": 0.3858, "step": 8150 }, { "epoch": 0.5105623326390956, "grad_norm": 0.8291133635605801, "learning_rate": 5.071512667252239e-06, "loss": 0.3919, "step": 8151 }, { "epoch": 0.5106249706384378, "grad_norm": 0.8211168111674864, "learning_rate": 5.0704983708855575e-06, "loss": 0.4086, "step": 8152 }, { "epoch": 0.5106876086377801, "grad_norm": 0.7772460819712467, "learning_rate": 5.0694840716171514e-06, "loss": 0.3867, "step": 8153 }, { "epoch": 0.5107502466371224, "grad_norm": 0.8242500236360439, "learning_rate": 5.068469769488767e-06, "loss": 0.4106, "step": 8154 }, { "epoch": 0.5108128846364647, "grad_norm": 0.8718000425989151, "learning_rate": 5.067455464542154e-06, "loss": 0.3872, "step": 8155 }, { "epoch": 0.510875522635807, "grad_norm": 0.8212499481755572, "learning_rate": 5.066441156819061e-06, "loss": 0.3712, "step": 8156 }, { "epoch": 0.5109381606351493, "grad_norm": 0.8123176945494615, "learning_rate": 5.065426846361239e-06, "loss": 0.394, "step": 8157 }, { "epoch": 0.5110007986344917, "grad_norm": 0.8155692014466943, "learning_rate": 5.064412533210436e-06, "loss": 0.3838, "step": 8158 }, { "epoch": 0.5110634366338339, "grad_norm": 0.7717409647903027, "learning_rate": 5.063398217408401e-06, "loss": 0.4015, "step": 8159 }, { "epoch": 0.5111260746331762, "grad_norm": 0.7916534049254282, "learning_rate": 5.062383898996881e-06, "loss": 0.4542, "step": 8160 }, { "epoch": 0.5111887126325185, "grad_norm": 0.8447602763841638, "learning_rate": 5.061369578017631e-06, "loss": 0.3978, "step": 8161 }, { "epoch": 0.5112513506318608, "grad_norm": 0.8442638416411274, "learning_rate": 5.060355254512398e-06, "loss": 0.3851, "step": 8162 }, { "epoch": 0.5113139886312031, "grad_norm": 0.8376393770252213, "learning_rate": 5.059340928522931e-06, "loss": 0.4026, "step": 8163 }, { "epoch": 0.5113766266305454, "grad_norm": 0.9284692312567296, "learning_rate": 5.058326600090979e-06, "loss": 0.4269, "step": 8164 }, { "epoch": 0.5114392646298878, "grad_norm": 0.759287628163956, "learning_rate": 5.0573122692582944e-06, "loss": 0.3937, "step": 8165 }, { "epoch": 0.51150190262923, "grad_norm": 0.7953324350140922, "learning_rate": 5.056297936066626e-06, "loss": 0.4346, "step": 8166 }, { "epoch": 0.5115645406285724, "grad_norm": 0.7853856801386242, "learning_rate": 5.055283600557724e-06, "loss": 0.419, "step": 8167 }, { "epoch": 0.5116271786279146, "grad_norm": 0.8257065895466549, "learning_rate": 5.054269262773339e-06, "loss": 0.3682, "step": 8168 }, { "epoch": 0.5116898166272569, "grad_norm": 0.8418582609388429, "learning_rate": 5.053254922755221e-06, "loss": 0.4184, "step": 8169 }, { "epoch": 0.5117524546265992, "grad_norm": 0.7940881567032659, "learning_rate": 5.05224058054512e-06, "loss": 0.4226, "step": 8170 }, { "epoch": 0.5118150926259415, "grad_norm": 0.9060713627497136, "learning_rate": 5.051226236184789e-06, "loss": 0.4397, "step": 8171 }, { "epoch": 0.5118777306252839, "grad_norm": 0.8187363233897705, "learning_rate": 5.050211889715975e-06, "loss": 0.3751, "step": 8172 }, { "epoch": 0.5119403686246261, "grad_norm": 0.7893762629117332, "learning_rate": 5.049197541180429e-06, "loss": 0.3599, "step": 8173 }, { "epoch": 0.5120030066239685, "grad_norm": 0.8780007790526597, "learning_rate": 5.048183190619904e-06, "loss": 0.4165, "step": 8174 }, { "epoch": 0.5120656446233107, "grad_norm": 0.6004435776961486, "learning_rate": 5.04716883807615e-06, "loss": 0.4752, "step": 8175 }, { "epoch": 0.5121282826226531, "grad_norm": 0.8325553783440437, "learning_rate": 5.04615448359092e-06, "loss": 0.4573, "step": 8176 }, { "epoch": 0.5121909206219953, "grad_norm": 0.8325072703192636, "learning_rate": 5.045140127205961e-06, "loss": 0.4183, "step": 8177 }, { "epoch": 0.5122535586213376, "grad_norm": 0.793268313924038, "learning_rate": 5.044125768963025e-06, "loss": 0.4082, "step": 8178 }, { "epoch": 0.51231619662068, "grad_norm": 0.830256471234238, "learning_rate": 5.043111408903865e-06, "loss": 0.4293, "step": 8179 }, { "epoch": 0.5123788346200222, "grad_norm": 0.7990623442745683, "learning_rate": 5.042097047070232e-06, "loss": 0.3673, "step": 8180 }, { "epoch": 0.5124414726193646, "grad_norm": 0.8125581975866385, "learning_rate": 5.041082683503876e-06, "loss": 0.4071, "step": 8181 }, { "epoch": 0.5125041106187068, "grad_norm": 0.7503788488776666, "learning_rate": 5.04006831824655e-06, "loss": 0.3525, "step": 8182 }, { "epoch": 0.5125667486180492, "grad_norm": 0.7972272407324117, "learning_rate": 5.039053951340004e-06, "loss": 0.4058, "step": 8183 }, { "epoch": 0.5126293866173914, "grad_norm": 0.8427175982523937, "learning_rate": 5.038039582825991e-06, "loss": 0.3419, "step": 8184 }, { "epoch": 0.5126920246167337, "grad_norm": 0.809310962822573, "learning_rate": 5.037025212746261e-06, "loss": 0.4036, "step": 8185 }, { "epoch": 0.512754662616076, "grad_norm": 0.7925747545242319, "learning_rate": 5.036010841142565e-06, "loss": 0.3735, "step": 8186 }, { "epoch": 0.5128173006154183, "grad_norm": 0.8506693738981296, "learning_rate": 5.034996468056658e-06, "loss": 0.4267, "step": 8187 }, { "epoch": 0.5128799386147607, "grad_norm": 0.8242075153267425, "learning_rate": 5.03398209353029e-06, "loss": 0.3769, "step": 8188 }, { "epoch": 0.5129425766141029, "grad_norm": 0.7851733855253988, "learning_rate": 5.032967717605211e-06, "loss": 0.3929, "step": 8189 }, { "epoch": 0.5130052146134453, "grad_norm": 0.593507373843471, "learning_rate": 5.031953340323176e-06, "loss": 0.4359, "step": 8190 }, { "epoch": 0.5130678526127875, "grad_norm": 0.8742871694148957, "learning_rate": 5.030938961725937e-06, "loss": 0.4573, "step": 8191 }, { "epoch": 0.5131304906121299, "grad_norm": 0.7764141209221231, "learning_rate": 5.029924581855241e-06, "loss": 0.3808, "step": 8192 }, { "epoch": 0.5131931286114721, "grad_norm": 0.8474582411315787, "learning_rate": 5.028910200752845e-06, "loss": 0.3876, "step": 8193 }, { "epoch": 0.5132557666108144, "grad_norm": 0.8155711101107981, "learning_rate": 5.027895818460501e-06, "loss": 0.4103, "step": 8194 }, { "epoch": 0.5133184046101568, "grad_norm": 0.7849134527702236, "learning_rate": 5.026881435019959e-06, "loss": 0.4048, "step": 8195 }, { "epoch": 0.513381042609499, "grad_norm": 1.147885338187495, "learning_rate": 5.025867050472973e-06, "loss": 0.37, "step": 8196 }, { "epoch": 0.5134436806088414, "grad_norm": 0.8158199460703229, "learning_rate": 5.024852664861293e-06, "loss": 0.3596, "step": 8197 }, { "epoch": 0.5135063186081836, "grad_norm": 0.8838762368246317, "learning_rate": 5.023838278226674e-06, "loss": 0.4684, "step": 8198 }, { "epoch": 0.513568956607526, "grad_norm": 0.8026550697076904, "learning_rate": 5.022823890610866e-06, "loss": 0.3802, "step": 8199 }, { "epoch": 0.5136315946068682, "grad_norm": 0.7817695767275606, "learning_rate": 5.021809502055624e-06, "loss": 0.3999, "step": 8200 }, { "epoch": 0.5136942326062106, "grad_norm": 0.7985235231883355, "learning_rate": 5.0207951126026965e-06, "loss": 0.3915, "step": 8201 }, { "epoch": 0.5137568706055529, "grad_norm": 0.8446644384406377, "learning_rate": 5.01978072229384e-06, "loss": 0.447, "step": 8202 }, { "epoch": 0.5138195086048951, "grad_norm": 0.7712975427400917, "learning_rate": 5.018766331170804e-06, "loss": 0.3877, "step": 8203 }, { "epoch": 0.5138821466042375, "grad_norm": 0.8704864603668108, "learning_rate": 5.017751939275344e-06, "loss": 0.4311, "step": 8204 }, { "epoch": 0.5139447846035797, "grad_norm": 0.876030647378853, "learning_rate": 5.016737546649211e-06, "loss": 0.4032, "step": 8205 }, { "epoch": 0.5140074226029221, "grad_norm": 0.8380114615861824, "learning_rate": 5.015723153334156e-06, "loss": 0.4364, "step": 8206 }, { "epoch": 0.5140700606022643, "grad_norm": 0.9084707108747004, "learning_rate": 5.014708759371933e-06, "loss": 0.4539, "step": 8207 }, { "epoch": 0.5141326986016067, "grad_norm": 0.687212550392713, "learning_rate": 5.0136943648042945e-06, "loss": 0.4527, "step": 8208 }, { "epoch": 0.514195336600949, "grad_norm": 0.837037220347326, "learning_rate": 5.012679969672996e-06, "loss": 0.3772, "step": 8209 }, { "epoch": 0.5142579746002912, "grad_norm": 0.8254889530425422, "learning_rate": 5.011665574019785e-06, "loss": 0.3781, "step": 8210 }, { "epoch": 0.5143206125996336, "grad_norm": 0.8519098050777713, "learning_rate": 5.010651177886418e-06, "loss": 0.4309, "step": 8211 }, { "epoch": 0.5143832505989758, "grad_norm": 0.8600762942264881, "learning_rate": 5.009636781314645e-06, "loss": 0.3941, "step": 8212 }, { "epoch": 0.5144458885983182, "grad_norm": 0.9065036575879774, "learning_rate": 5.008622384346223e-06, "loss": 0.4462, "step": 8213 }, { "epoch": 0.5145085265976604, "grad_norm": 0.8614397836200416, "learning_rate": 5.0076079870229e-06, "loss": 0.3942, "step": 8214 }, { "epoch": 0.5145711645970028, "grad_norm": 0.8170226229385308, "learning_rate": 5.006593589386432e-06, "loss": 0.4099, "step": 8215 }, { "epoch": 0.5146338025963451, "grad_norm": 0.9140444540926199, "learning_rate": 5.005579191478569e-06, "loss": 0.4493, "step": 8216 }, { "epoch": 0.5146964405956874, "grad_norm": 0.7996468026087599, "learning_rate": 5.004564793341067e-06, "loss": 0.3996, "step": 8217 }, { "epoch": 0.5147590785950297, "grad_norm": 0.8302413625404509, "learning_rate": 5.003550395015677e-06, "loss": 0.4069, "step": 8218 }, { "epoch": 0.5148217165943719, "grad_norm": 0.8799563870785911, "learning_rate": 5.002535996544151e-06, "loss": 0.4107, "step": 8219 }, { "epoch": 0.5148843545937143, "grad_norm": 0.6025268732464654, "learning_rate": 5.001521597968243e-06, "loss": 0.4449, "step": 8220 }, { "epoch": 0.5149469925930565, "grad_norm": 0.838228644515971, "learning_rate": 5.000507199329707e-06, "loss": 0.4103, "step": 8221 }, { "epoch": 0.5150096305923989, "grad_norm": 0.9011588226587794, "learning_rate": 4.999492800670295e-06, "loss": 0.3787, "step": 8222 }, { "epoch": 0.5150722685917412, "grad_norm": 0.9211265470495409, "learning_rate": 4.9984784020317585e-06, "loss": 0.3875, "step": 8223 }, { "epoch": 0.5151349065910835, "grad_norm": 0.8336764394111199, "learning_rate": 4.997464003455851e-06, "loss": 0.3745, "step": 8224 }, { "epoch": 0.5151975445904258, "grad_norm": 0.8568850954855144, "learning_rate": 4.996449604984326e-06, "loss": 0.4333, "step": 8225 }, { "epoch": 0.5152601825897681, "grad_norm": 0.8192524118220557, "learning_rate": 4.995435206658936e-06, "loss": 0.4029, "step": 8226 }, { "epoch": 0.5153228205891104, "grad_norm": 0.9186246423189697, "learning_rate": 4.994420808521432e-06, "loss": 0.4355, "step": 8227 }, { "epoch": 0.5153854585884526, "grad_norm": 0.842169111792994, "learning_rate": 4.993406410613571e-06, "loss": 0.4247, "step": 8228 }, { "epoch": 0.515448096587795, "grad_norm": 0.8151011012291627, "learning_rate": 4.992392012977101e-06, "loss": 0.433, "step": 8229 }, { "epoch": 0.5155107345871373, "grad_norm": 0.6835126937199044, "learning_rate": 4.991377615653779e-06, "loss": 0.4605, "step": 8230 }, { "epoch": 0.5155733725864796, "grad_norm": 0.8292155460650013, "learning_rate": 4.990363218685356e-06, "loss": 0.4249, "step": 8231 }, { "epoch": 0.5156360105858219, "grad_norm": 0.9072666740665797, "learning_rate": 4.989348822113584e-06, "loss": 0.3906, "step": 8232 }, { "epoch": 0.5156986485851642, "grad_norm": 0.8459255112664072, "learning_rate": 4.988334425980217e-06, "loss": 0.3923, "step": 8233 }, { "epoch": 0.5157612865845065, "grad_norm": 0.8106019197128435, "learning_rate": 4.987320030327007e-06, "loss": 0.3827, "step": 8234 }, { "epoch": 0.5158239245838488, "grad_norm": 0.8885156916622667, "learning_rate": 4.986305635195707e-06, "loss": 0.469, "step": 8235 }, { "epoch": 0.5158865625831911, "grad_norm": 0.9150318129800625, "learning_rate": 4.985291240628068e-06, "loss": 0.4355, "step": 8236 }, { "epoch": 0.5159492005825334, "grad_norm": 0.827923641617823, "learning_rate": 4.984276846665846e-06, "loss": 0.4019, "step": 8237 }, { "epoch": 0.5160118385818757, "grad_norm": 0.827646843631745, "learning_rate": 4.983262453350791e-06, "loss": 0.4291, "step": 8238 }, { "epoch": 0.516074476581218, "grad_norm": 0.7756214692605201, "learning_rate": 4.982248060724658e-06, "loss": 0.3976, "step": 8239 }, { "epoch": 0.5161371145805603, "grad_norm": 0.7763333087568015, "learning_rate": 4.981233668829197e-06, "loss": 0.4072, "step": 8240 }, { "epoch": 0.5161997525799026, "grad_norm": 0.8084699117763976, "learning_rate": 4.980219277706162e-06, "loss": 0.4768, "step": 8241 }, { "epoch": 0.516262390579245, "grad_norm": 0.8752254011324899, "learning_rate": 4.979204887397305e-06, "loss": 0.4099, "step": 8242 }, { "epoch": 0.5163250285785872, "grad_norm": 0.8828522677098437, "learning_rate": 4.978190497944379e-06, "loss": 0.4244, "step": 8243 }, { "epoch": 0.5163876665779294, "grad_norm": 0.7901172044364118, "learning_rate": 4.977176109389137e-06, "loss": 0.373, "step": 8244 }, { "epoch": 0.5164503045772718, "grad_norm": 0.6143202859095989, "learning_rate": 4.976161721773327e-06, "loss": 0.4346, "step": 8245 }, { "epoch": 0.5165129425766141, "grad_norm": 0.6291407398036827, "learning_rate": 4.975147335138707e-06, "loss": 0.4865, "step": 8246 }, { "epoch": 0.5165755805759564, "grad_norm": 0.8643768448732758, "learning_rate": 4.9741329495270285e-06, "loss": 0.3993, "step": 8247 }, { "epoch": 0.5166382185752987, "grad_norm": 0.8057230066816489, "learning_rate": 4.973118564980042e-06, "loss": 0.4196, "step": 8248 }, { "epoch": 0.516700856574641, "grad_norm": 0.81420651305386, "learning_rate": 4.972104181539501e-06, "loss": 0.4012, "step": 8249 }, { "epoch": 0.5167634945739833, "grad_norm": 0.7611102643474783, "learning_rate": 4.971089799247155e-06, "loss": 0.3851, "step": 8250 }, { "epoch": 0.5168261325733257, "grad_norm": 0.8042271439111526, "learning_rate": 4.97007541814476e-06, "loss": 0.3891, "step": 8251 }, { "epoch": 0.5168887705726679, "grad_norm": 0.9401912685682359, "learning_rate": 4.969061038274067e-06, "loss": 0.4212, "step": 8252 }, { "epoch": 0.5169514085720102, "grad_norm": 0.8215498967807255, "learning_rate": 4.968046659676825e-06, "loss": 0.42, "step": 8253 }, { "epoch": 0.5170140465713525, "grad_norm": 0.8352947932288015, "learning_rate": 4.967032282394789e-06, "loss": 0.4099, "step": 8254 }, { "epoch": 0.5170766845706948, "grad_norm": 0.7974978969511005, "learning_rate": 4.966017906469711e-06, "loss": 0.3793, "step": 8255 }, { "epoch": 0.5171393225700371, "grad_norm": 0.6616749435423456, "learning_rate": 4.965003531943342e-06, "loss": 0.4612, "step": 8256 }, { "epoch": 0.5172019605693794, "grad_norm": 0.832357808452164, "learning_rate": 4.9639891588574355e-06, "loss": 0.3959, "step": 8257 }, { "epoch": 0.5172645985687218, "grad_norm": 0.8091187360376992, "learning_rate": 4.96297478725374e-06, "loss": 0.3975, "step": 8258 }, { "epoch": 0.517327236568064, "grad_norm": 0.815399737765134, "learning_rate": 4.96196041717401e-06, "loss": 0.401, "step": 8259 }, { "epoch": 0.5173898745674064, "grad_norm": 0.79173461999385, "learning_rate": 4.9609460486599975e-06, "loss": 0.3727, "step": 8260 }, { "epoch": 0.5174525125667486, "grad_norm": 0.889254010581241, "learning_rate": 4.959931681753452e-06, "loss": 0.4452, "step": 8261 }, { "epoch": 0.5175151505660909, "grad_norm": 0.8605392512603198, "learning_rate": 4.958917316496125e-06, "loss": 0.384, "step": 8262 }, { "epoch": 0.5175777885654332, "grad_norm": 0.8456699344476227, "learning_rate": 4.957902952929767e-06, "loss": 0.4113, "step": 8263 }, { "epoch": 0.5176404265647755, "grad_norm": 0.8433476854099257, "learning_rate": 4.9568885910961346e-06, "loss": 0.4131, "step": 8264 }, { "epoch": 0.5177030645641179, "grad_norm": 0.8340628488471051, "learning_rate": 4.955874231036976e-06, "loss": 0.4, "step": 8265 }, { "epoch": 0.5177657025634601, "grad_norm": 0.6931030114538771, "learning_rate": 4.954859872794041e-06, "loss": 0.4578, "step": 8266 }, { "epoch": 0.5178283405628025, "grad_norm": 0.8011190885217403, "learning_rate": 4.953845516409082e-06, "loss": 0.4212, "step": 8267 }, { "epoch": 0.5178909785621447, "grad_norm": 0.8512431700461776, "learning_rate": 4.9528311619238505e-06, "loss": 0.4343, "step": 8268 }, { "epoch": 0.517953616561487, "grad_norm": 0.7682427802294428, "learning_rate": 4.951816809380098e-06, "loss": 0.3841, "step": 8269 }, { "epoch": 0.5180162545608293, "grad_norm": 0.6146662680002114, "learning_rate": 4.950802458819573e-06, "loss": 0.4445, "step": 8270 }, { "epoch": 0.5180788925601716, "grad_norm": 0.8509936467588098, "learning_rate": 4.949788110284028e-06, "loss": 0.4005, "step": 8271 }, { "epoch": 0.518141530559514, "grad_norm": 0.8364949678758272, "learning_rate": 4.948773763815215e-06, "loss": 0.4072, "step": 8272 }, { "epoch": 0.5182041685588562, "grad_norm": 0.831107904886415, "learning_rate": 4.94775941945488e-06, "loss": 0.4135, "step": 8273 }, { "epoch": 0.5182668065581986, "grad_norm": 0.8261837197946177, "learning_rate": 4.94674507724478e-06, "loss": 0.3757, "step": 8274 }, { "epoch": 0.5183294445575408, "grad_norm": 0.8143575860522001, "learning_rate": 4.9457307372266626e-06, "loss": 0.3793, "step": 8275 }, { "epoch": 0.5183920825568832, "grad_norm": 0.9110989653488255, "learning_rate": 4.944716399442277e-06, "loss": 0.4336, "step": 8276 }, { "epoch": 0.5184547205562254, "grad_norm": 0.8356966059248633, "learning_rate": 4.943702063933375e-06, "loss": 0.3502, "step": 8277 }, { "epoch": 0.5185173585555677, "grad_norm": 0.7350057760555081, "learning_rate": 4.942687730741707e-06, "loss": 0.4641, "step": 8278 }, { "epoch": 0.51857999655491, "grad_norm": 0.8851145211257363, "learning_rate": 4.9416733999090224e-06, "loss": 0.4187, "step": 8279 }, { "epoch": 0.5186426345542523, "grad_norm": 0.8371013148767406, "learning_rate": 4.940659071477071e-06, "loss": 0.3954, "step": 8280 }, { "epoch": 0.5187052725535947, "grad_norm": 0.7936648583949438, "learning_rate": 4.939644745487605e-06, "loss": 0.3954, "step": 8281 }, { "epoch": 0.5187679105529369, "grad_norm": 0.7015726917456452, "learning_rate": 4.938630421982369e-06, "loss": 0.4615, "step": 8282 }, { "epoch": 0.5188305485522793, "grad_norm": 0.8435321880558735, "learning_rate": 4.937616101003119e-06, "loss": 0.3821, "step": 8283 }, { "epoch": 0.5188931865516215, "grad_norm": 0.8485357330768095, "learning_rate": 4.9366017825916e-06, "loss": 0.4147, "step": 8284 }, { "epoch": 0.5189558245509639, "grad_norm": 0.8265796330554315, "learning_rate": 4.935587466789565e-06, "loss": 0.3552, "step": 8285 }, { "epoch": 0.5190184625503061, "grad_norm": 0.7675391020301982, "learning_rate": 4.9345731536387624e-06, "loss": 0.4196, "step": 8286 }, { "epoch": 0.5190811005496484, "grad_norm": 0.8926290526553952, "learning_rate": 4.93355884318094e-06, "loss": 0.4004, "step": 8287 }, { "epoch": 0.5191437385489908, "grad_norm": 0.804698253728434, "learning_rate": 4.932544535457848e-06, "loss": 0.4282, "step": 8288 }, { "epoch": 0.519206376548333, "grad_norm": 0.92177242013466, "learning_rate": 4.931530230511236e-06, "loss": 0.4409, "step": 8289 }, { "epoch": 0.5192690145476754, "grad_norm": 0.8215425522240162, "learning_rate": 4.930515928382852e-06, "loss": 0.3989, "step": 8290 }, { "epoch": 0.5193316525470176, "grad_norm": 0.8331119500621229, "learning_rate": 4.9295016291144425e-06, "loss": 0.4129, "step": 8291 }, { "epoch": 0.51939429054636, "grad_norm": 0.8321720908750989, "learning_rate": 4.928487332747762e-06, "loss": 0.3826, "step": 8292 }, { "epoch": 0.5194569285457022, "grad_norm": 0.9172825962983301, "learning_rate": 4.927473039324556e-06, "loss": 0.4167, "step": 8293 }, { "epoch": 0.5195195665450445, "grad_norm": 0.8981800452771517, "learning_rate": 4.9264587488865725e-06, "loss": 0.3965, "step": 8294 }, { "epoch": 0.5195822045443869, "grad_norm": 0.7825633850341278, "learning_rate": 4.925444461475562e-06, "loss": 0.423, "step": 8295 }, { "epoch": 0.5196448425437291, "grad_norm": 0.8813045588858088, "learning_rate": 4.92443017713327e-06, "loss": 0.428, "step": 8296 }, { "epoch": 0.5197074805430715, "grad_norm": 0.8532770034252974, "learning_rate": 4.923415895901446e-06, "loss": 0.4306, "step": 8297 }, { "epoch": 0.5197701185424137, "grad_norm": 0.86377767924659, "learning_rate": 4.922401617821839e-06, "loss": 0.4418, "step": 8298 }, { "epoch": 0.5198327565417561, "grad_norm": 0.8148973039925508, "learning_rate": 4.921387342936195e-06, "loss": 0.4157, "step": 8299 }, { "epoch": 0.5198953945410983, "grad_norm": 0.821509069593858, "learning_rate": 4.9203730712862605e-06, "loss": 0.3929, "step": 8300 }, { "epoch": 0.5199580325404407, "grad_norm": 0.6552489831018905, "learning_rate": 4.919358802913788e-06, "loss": 0.4901, "step": 8301 }, { "epoch": 0.520020670539783, "grad_norm": 0.8431769067183542, "learning_rate": 4.91834453786052e-06, "loss": 0.3825, "step": 8302 }, { "epoch": 0.5200833085391252, "grad_norm": 0.7598506183319799, "learning_rate": 4.917330276168208e-06, "loss": 0.345, "step": 8303 }, { "epoch": 0.5201459465384676, "grad_norm": 0.8376127005486502, "learning_rate": 4.9163160178785974e-06, "loss": 0.4103, "step": 8304 }, { "epoch": 0.5202085845378098, "grad_norm": 0.9141167011673258, "learning_rate": 4.915301763033433e-06, "loss": 0.4194, "step": 8305 }, { "epoch": 0.5202712225371522, "grad_norm": 0.9280675885667682, "learning_rate": 4.914287511674465e-06, "loss": 0.4336, "step": 8306 }, { "epoch": 0.5203338605364944, "grad_norm": 0.8239301464585392, "learning_rate": 4.913273263843439e-06, "loss": 0.3831, "step": 8307 }, { "epoch": 0.5203964985358368, "grad_norm": 0.8569234667200174, "learning_rate": 4.912259019582102e-06, "loss": 0.433, "step": 8308 }, { "epoch": 0.5204591365351791, "grad_norm": 0.8347503872170327, "learning_rate": 4.911244778932198e-06, "loss": 0.4203, "step": 8309 }, { "epoch": 0.5205217745345214, "grad_norm": 0.8991425219628698, "learning_rate": 4.9102305419354765e-06, "loss": 0.4311, "step": 8310 }, { "epoch": 0.5205844125338637, "grad_norm": 0.8520573512286624, "learning_rate": 4.909216308633683e-06, "loss": 0.3989, "step": 8311 }, { "epoch": 0.5206470505332059, "grad_norm": 0.8185562911718917, "learning_rate": 4.908202079068563e-06, "loss": 0.4184, "step": 8312 }, { "epoch": 0.5207096885325483, "grad_norm": 0.822184273964514, "learning_rate": 4.907187853281863e-06, "loss": 0.3981, "step": 8313 }, { "epoch": 0.5207723265318905, "grad_norm": 0.6846611508335989, "learning_rate": 4.906173631315327e-06, "loss": 0.4446, "step": 8314 }, { "epoch": 0.5208349645312329, "grad_norm": 0.8314560880297882, "learning_rate": 4.905159413210703e-06, "loss": 0.4268, "step": 8315 }, { "epoch": 0.5208976025305752, "grad_norm": 0.7675838948863013, "learning_rate": 4.9041451990097345e-06, "loss": 0.4026, "step": 8316 }, { "epoch": 0.5209602405299175, "grad_norm": 0.812425978302691, "learning_rate": 4.903130988754167e-06, "loss": 0.4223, "step": 8317 }, { "epoch": 0.5210228785292598, "grad_norm": 0.855245647063735, "learning_rate": 4.9021167824857464e-06, "loss": 0.4256, "step": 8318 }, { "epoch": 0.521085516528602, "grad_norm": 0.8599840599051471, "learning_rate": 4.9011025802462155e-06, "loss": 0.3878, "step": 8319 }, { "epoch": 0.5211481545279444, "grad_norm": 0.76629488607917, "learning_rate": 4.9000883820773226e-06, "loss": 0.3649, "step": 8320 }, { "epoch": 0.5212107925272866, "grad_norm": 0.867576202566668, "learning_rate": 4.899074188020811e-06, "loss": 0.3971, "step": 8321 }, { "epoch": 0.521273430526629, "grad_norm": 0.9380380379149199, "learning_rate": 4.898059998118424e-06, "loss": 0.3881, "step": 8322 }, { "epoch": 0.5213360685259713, "grad_norm": 0.7625159470075953, "learning_rate": 4.897045812411906e-06, "loss": 0.4065, "step": 8323 }, { "epoch": 0.5213987065253136, "grad_norm": 0.8074871144965239, "learning_rate": 4.896031630943002e-06, "loss": 0.3877, "step": 8324 }, { "epoch": 0.5214613445246559, "grad_norm": 0.8814120407625147, "learning_rate": 4.895017453753455e-06, "loss": 0.401, "step": 8325 }, { "epoch": 0.5215239825239982, "grad_norm": 0.8814988126299924, "learning_rate": 4.894003280885009e-06, "loss": 0.3974, "step": 8326 }, { "epoch": 0.5215866205233405, "grad_norm": 0.8479427046925739, "learning_rate": 4.892989112379408e-06, "loss": 0.4268, "step": 8327 }, { "epoch": 0.5216492585226827, "grad_norm": 0.776645153653262, "learning_rate": 4.891974948278393e-06, "loss": 0.3646, "step": 8328 }, { "epoch": 0.5217118965220251, "grad_norm": 0.8367775185278502, "learning_rate": 4.890960788623711e-06, "loss": 0.4411, "step": 8329 }, { "epoch": 0.5217745345213674, "grad_norm": 0.8313669719625093, "learning_rate": 4.8899466334571035e-06, "loss": 0.3983, "step": 8330 }, { "epoch": 0.5218371725207097, "grad_norm": 0.915134588890553, "learning_rate": 4.888932482820312e-06, "loss": 0.4074, "step": 8331 }, { "epoch": 0.521899810520052, "grad_norm": 0.8038796411409596, "learning_rate": 4.8879183367550814e-06, "loss": 0.378, "step": 8332 }, { "epoch": 0.5219624485193943, "grad_norm": 0.7570974113717254, "learning_rate": 4.886904195303153e-06, "loss": 0.3512, "step": 8333 }, { "epoch": 0.5220250865187366, "grad_norm": 0.6263168362328521, "learning_rate": 4.885890058506268e-06, "loss": 0.4615, "step": 8334 }, { "epoch": 0.5220877245180789, "grad_norm": 0.8440334048885821, "learning_rate": 4.884875926406168e-06, "loss": 0.3962, "step": 8335 }, { "epoch": 0.5221503625174212, "grad_norm": 0.8512598014101018, "learning_rate": 4.883861799044599e-06, "loss": 0.3968, "step": 8336 }, { "epoch": 0.5222130005167634, "grad_norm": 0.8203551914400148, "learning_rate": 4.882847676463296e-06, "loss": 0.3833, "step": 8337 }, { "epoch": 0.5222756385161058, "grad_norm": 0.8578814583859293, "learning_rate": 4.8818335587040084e-06, "loss": 0.3848, "step": 8338 }, { "epoch": 0.5223382765154481, "grad_norm": 0.8954215095449285, "learning_rate": 4.880819445808473e-06, "loss": 0.4081, "step": 8339 }, { "epoch": 0.5224009145147904, "grad_norm": 0.8462433468545525, "learning_rate": 4.879805337818429e-06, "loss": 0.4469, "step": 8340 }, { "epoch": 0.5224635525141327, "grad_norm": 0.8855930314455385, "learning_rate": 4.878791234775621e-06, "loss": 0.3918, "step": 8341 }, { "epoch": 0.522526190513475, "grad_norm": 0.637816447769661, "learning_rate": 4.877777136721789e-06, "loss": 0.4472, "step": 8342 }, { "epoch": 0.5225888285128173, "grad_norm": 0.8485499781466452, "learning_rate": 4.876763043698671e-06, "loss": 0.4197, "step": 8343 }, { "epoch": 0.5226514665121597, "grad_norm": 0.8103629571524302, "learning_rate": 4.875748955748011e-06, "loss": 0.3888, "step": 8344 }, { "epoch": 0.5227141045115019, "grad_norm": 0.8350342649271675, "learning_rate": 4.874734872911546e-06, "loss": 0.403, "step": 8345 }, { "epoch": 0.5227767425108442, "grad_norm": 0.8583710748960465, "learning_rate": 4.873720795231015e-06, "loss": 0.4177, "step": 8346 }, { "epoch": 0.5228393805101865, "grad_norm": 0.8991801394828847, "learning_rate": 4.872706722748163e-06, "loss": 0.4165, "step": 8347 }, { "epoch": 0.5229020185095288, "grad_norm": 0.9054270943299326, "learning_rate": 4.871692655504723e-06, "loss": 0.4151, "step": 8348 }, { "epoch": 0.5229646565088711, "grad_norm": 0.8285499819690326, "learning_rate": 4.870678593542439e-06, "loss": 0.363, "step": 8349 }, { "epoch": 0.5230272945082134, "grad_norm": 0.8202270380093121, "learning_rate": 4.8696645369030475e-06, "loss": 0.3918, "step": 8350 }, { "epoch": 0.5230899325075558, "grad_norm": 0.8239767160615856, "learning_rate": 4.868650485628289e-06, "loss": 0.3904, "step": 8351 }, { "epoch": 0.523152570506898, "grad_norm": 0.878423312327017, "learning_rate": 4.867636439759898e-06, "loss": 0.3678, "step": 8352 }, { "epoch": 0.5232152085062403, "grad_norm": 0.8106257963076834, "learning_rate": 4.866622399339619e-06, "loss": 0.3839, "step": 8353 }, { "epoch": 0.5232778465055826, "grad_norm": 0.7658372441309625, "learning_rate": 4.865608364409185e-06, "loss": 0.3588, "step": 8354 }, { "epoch": 0.5233404845049249, "grad_norm": 0.8693178757647078, "learning_rate": 4.864594335010335e-06, "loss": 0.3902, "step": 8355 }, { "epoch": 0.5234031225042672, "grad_norm": 0.8420626133106874, "learning_rate": 4.863580311184809e-06, "loss": 0.4018, "step": 8356 }, { "epoch": 0.5234657605036095, "grad_norm": 0.7665215866853986, "learning_rate": 4.86256629297434e-06, "loss": 0.393, "step": 8357 }, { "epoch": 0.5235283985029519, "grad_norm": 0.7988679749310039, "learning_rate": 4.861552280420671e-06, "loss": 0.4178, "step": 8358 }, { "epoch": 0.5235910365022941, "grad_norm": 0.8234980148189301, "learning_rate": 4.860538273565535e-06, "loss": 0.3868, "step": 8359 }, { "epoch": 0.5236536745016365, "grad_norm": 0.8298855336775456, "learning_rate": 4.8595242724506696e-06, "loss": 0.4128, "step": 8360 }, { "epoch": 0.5237163125009787, "grad_norm": 0.82806868008111, "learning_rate": 4.8585102771178105e-06, "loss": 0.4025, "step": 8361 }, { "epoch": 0.523778950500321, "grad_norm": 0.8574148350554026, "learning_rate": 4.857496287608695e-06, "loss": 0.4641, "step": 8362 }, { "epoch": 0.5238415884996633, "grad_norm": 0.6205821496586041, "learning_rate": 4.856482303965059e-06, "loss": 0.4504, "step": 8363 }, { "epoch": 0.5239042264990056, "grad_norm": 0.7758440027640646, "learning_rate": 4.855468326228638e-06, "loss": 0.3613, "step": 8364 }, { "epoch": 0.523966864498348, "grad_norm": 0.7930971167718887, "learning_rate": 4.854454354441166e-06, "loss": 0.3809, "step": 8365 }, { "epoch": 0.5240295024976902, "grad_norm": 0.8431568107795171, "learning_rate": 4.853440388644381e-06, "loss": 0.4421, "step": 8366 }, { "epoch": 0.5240921404970326, "grad_norm": 0.7635144192730712, "learning_rate": 4.852426428880017e-06, "loss": 0.3557, "step": 8367 }, { "epoch": 0.5241547784963748, "grad_norm": 0.8704897877294172, "learning_rate": 4.851412475189809e-06, "loss": 0.4198, "step": 8368 }, { "epoch": 0.5242174164957172, "grad_norm": 0.7997707379065948, "learning_rate": 4.850398527615489e-06, "loss": 0.4208, "step": 8369 }, { "epoch": 0.5242800544950594, "grad_norm": 0.8076608914069535, "learning_rate": 4.8493845861987956e-06, "loss": 0.3619, "step": 8370 }, { "epoch": 0.5243426924944017, "grad_norm": 0.7952971870742692, "learning_rate": 4.848370650981461e-06, "loss": 0.3845, "step": 8371 }, { "epoch": 0.524405330493744, "grad_norm": 0.7619663335362562, "learning_rate": 4.8473567220052184e-06, "loss": 0.3706, "step": 8372 }, { "epoch": 0.5244679684930863, "grad_norm": 0.7589487960416016, "learning_rate": 4.846342799311799e-06, "loss": 0.3397, "step": 8373 }, { "epoch": 0.5245306064924287, "grad_norm": 0.7976099123610207, "learning_rate": 4.845328882942939e-06, "loss": 0.3711, "step": 8374 }, { "epoch": 0.5245932444917709, "grad_norm": 0.8516831085533494, "learning_rate": 4.844314972940372e-06, "loss": 0.4008, "step": 8375 }, { "epoch": 0.5246558824911133, "grad_norm": 0.8498487127884362, "learning_rate": 4.84330106934583e-06, "loss": 0.4362, "step": 8376 }, { "epoch": 0.5247185204904555, "grad_norm": 0.7411643772093817, "learning_rate": 4.842287172201045e-06, "loss": 0.4563, "step": 8377 }, { "epoch": 0.5247811584897978, "grad_norm": 0.767939072266038, "learning_rate": 4.841273281547748e-06, "loss": 0.3418, "step": 8378 }, { "epoch": 0.5248437964891401, "grad_norm": 0.8784258580136556, "learning_rate": 4.840259397427674e-06, "loss": 0.4156, "step": 8379 }, { "epoch": 0.5249064344884824, "grad_norm": 0.8400903470707345, "learning_rate": 4.839245519882552e-06, "loss": 0.37, "step": 8380 }, { "epoch": 0.5249690724878248, "grad_norm": 0.8529783698934237, "learning_rate": 4.838231648954115e-06, "loss": 0.4434, "step": 8381 }, { "epoch": 0.525031710487167, "grad_norm": 0.8438851814813682, "learning_rate": 4.837217784684091e-06, "loss": 0.3965, "step": 8382 }, { "epoch": 0.5250943484865094, "grad_norm": 0.6585478982526103, "learning_rate": 4.836203927114214e-06, "loss": 0.4836, "step": 8383 }, { "epoch": 0.5251569864858516, "grad_norm": 0.8378215686517626, "learning_rate": 4.835190076286214e-06, "loss": 0.4103, "step": 8384 }, { "epoch": 0.525219624485194, "grad_norm": 0.6713805220985095, "learning_rate": 4.834176232241822e-06, "loss": 0.4676, "step": 8385 }, { "epoch": 0.5252822624845362, "grad_norm": 0.8483044385091858, "learning_rate": 4.833162395022767e-06, "loss": 0.4208, "step": 8386 }, { "epoch": 0.5253449004838785, "grad_norm": 0.8757765509062551, "learning_rate": 4.8321485646707775e-06, "loss": 0.4557, "step": 8387 }, { "epoch": 0.5254075384832209, "grad_norm": 0.8168022779286629, "learning_rate": 4.831134741227586e-06, "loss": 0.3649, "step": 8388 }, { "epoch": 0.5254701764825631, "grad_norm": 0.8520543878770334, "learning_rate": 4.8301209247349195e-06, "loss": 0.4038, "step": 8389 }, { "epoch": 0.5255328144819055, "grad_norm": 0.8088179102228152, "learning_rate": 4.829107115234506e-06, "loss": 0.4141, "step": 8390 }, { "epoch": 0.5255954524812477, "grad_norm": 0.824519619482851, "learning_rate": 4.828093312768077e-06, "loss": 0.4433, "step": 8391 }, { "epoch": 0.5256580904805901, "grad_norm": 0.769653672651507, "learning_rate": 4.827079517377356e-06, "loss": 0.4202, "step": 8392 }, { "epoch": 0.5257207284799323, "grad_norm": 0.613339692648587, "learning_rate": 4.8260657291040766e-06, "loss": 0.4209, "step": 8393 }, { "epoch": 0.5257833664792747, "grad_norm": 0.8058754390742849, "learning_rate": 4.825051947989965e-06, "loss": 0.3868, "step": 8394 }, { "epoch": 0.525846004478617, "grad_norm": 0.8610861032962648, "learning_rate": 4.824038174076746e-06, "loss": 0.3967, "step": 8395 }, { "epoch": 0.5259086424779592, "grad_norm": 0.8807194228280499, "learning_rate": 4.823024407406149e-06, "loss": 0.3864, "step": 8396 }, { "epoch": 0.5259712804773016, "grad_norm": 0.8154321281134747, "learning_rate": 4.8220106480199004e-06, "loss": 0.4204, "step": 8397 }, { "epoch": 0.5260339184766438, "grad_norm": 0.8595353287879933, "learning_rate": 4.820996895959727e-06, "loss": 0.4284, "step": 8398 }, { "epoch": 0.5260965564759862, "grad_norm": 0.8161517946080806, "learning_rate": 4.819983151267353e-06, "loss": 0.4042, "step": 8399 }, { "epoch": 0.5261591944753284, "grad_norm": 0.8782843650398903, "learning_rate": 4.818969413984508e-06, "loss": 0.4385, "step": 8400 }, { "epoch": 0.5262218324746708, "grad_norm": 0.7992976706980636, "learning_rate": 4.817955684152915e-06, "loss": 0.394, "step": 8401 }, { "epoch": 0.526284470474013, "grad_norm": 0.9025715185615782, "learning_rate": 4.816941961814297e-06, "loss": 0.3686, "step": 8402 }, { "epoch": 0.5263471084733553, "grad_norm": 0.8329998690616642, "learning_rate": 4.815928247010385e-06, "loss": 0.3483, "step": 8403 }, { "epoch": 0.5264097464726977, "grad_norm": 0.7593134202013102, "learning_rate": 4.814914539782899e-06, "loss": 0.3771, "step": 8404 }, { "epoch": 0.5264723844720399, "grad_norm": 0.7833125207390207, "learning_rate": 4.813900840173566e-06, "loss": 0.3697, "step": 8405 }, { "epoch": 0.5265350224713823, "grad_norm": 0.8700357118395365, "learning_rate": 4.8128871482241085e-06, "loss": 0.4255, "step": 8406 }, { "epoch": 0.5265976604707245, "grad_norm": 0.6562720523143896, "learning_rate": 4.81187346397625e-06, "loss": 0.4698, "step": 8407 }, { "epoch": 0.5266602984700669, "grad_norm": 0.8886634614829774, "learning_rate": 4.810859787471716e-06, "loss": 0.3977, "step": 8408 }, { "epoch": 0.5267229364694092, "grad_norm": 0.6232919283719757, "learning_rate": 4.809846118752228e-06, "loss": 0.4957, "step": 8409 }, { "epoch": 0.5267855744687515, "grad_norm": 0.8469086421446168, "learning_rate": 4.808832457859509e-06, "loss": 0.4632, "step": 8410 }, { "epoch": 0.5268482124680938, "grad_norm": 0.8539336772120959, "learning_rate": 4.8078188048352795e-06, "loss": 0.4137, "step": 8411 }, { "epoch": 0.526910850467436, "grad_norm": 0.8493410391721196, "learning_rate": 4.806805159721265e-06, "loss": 0.4114, "step": 8412 }, { "epoch": 0.5269734884667784, "grad_norm": 0.8020180782102472, "learning_rate": 4.805791522559186e-06, "loss": 0.4283, "step": 8413 }, { "epoch": 0.5270361264661206, "grad_norm": 0.80091785377821, "learning_rate": 4.8047778933907646e-06, "loss": 0.3736, "step": 8414 }, { "epoch": 0.527098764465463, "grad_norm": 0.8328861586184464, "learning_rate": 4.803764272257721e-06, "loss": 0.4252, "step": 8415 }, { "epoch": 0.5271614024648053, "grad_norm": 0.840321941462567, "learning_rate": 4.802750659201776e-06, "loss": 0.3816, "step": 8416 }, { "epoch": 0.5272240404641476, "grad_norm": 0.8306058943916537, "learning_rate": 4.801737054264651e-06, "loss": 0.4174, "step": 8417 }, { "epoch": 0.5272866784634899, "grad_norm": 0.8690476615057071, "learning_rate": 4.800723457488067e-06, "loss": 0.4211, "step": 8418 }, { "epoch": 0.5273493164628322, "grad_norm": 0.8715311147400883, "learning_rate": 4.799709868913741e-06, "loss": 0.4095, "step": 8419 }, { "epoch": 0.5274119544621745, "grad_norm": 1.0188360859693413, "learning_rate": 4.798696288583392e-06, "loss": 0.4267, "step": 8420 }, { "epoch": 0.5274745924615167, "grad_norm": 0.8830083148672181, "learning_rate": 4.797682716538742e-06, "loss": 0.4214, "step": 8421 }, { "epoch": 0.5275372304608591, "grad_norm": 0.7945811223409723, "learning_rate": 4.79666915282151e-06, "loss": 0.3927, "step": 8422 }, { "epoch": 0.5275998684602013, "grad_norm": 0.8166124943007212, "learning_rate": 4.795655597473415e-06, "loss": 0.4072, "step": 8423 }, { "epoch": 0.5276625064595437, "grad_norm": 0.8414101612901311, "learning_rate": 4.7946420505361715e-06, "loss": 0.3996, "step": 8424 }, { "epoch": 0.527725144458886, "grad_norm": 0.7988905908725723, "learning_rate": 4.793628512051499e-06, "loss": 0.4292, "step": 8425 }, { "epoch": 0.5277877824582283, "grad_norm": 0.8137057765568944, "learning_rate": 4.792614982061117e-06, "loss": 0.3568, "step": 8426 }, { "epoch": 0.5278504204575706, "grad_norm": 0.7594865046103919, "learning_rate": 4.7916014606067405e-06, "loss": 0.3481, "step": 8427 }, { "epoch": 0.5279130584569128, "grad_norm": 0.7931012516542959, "learning_rate": 4.790587947730085e-06, "loss": 0.4143, "step": 8428 }, { "epoch": 0.5279756964562552, "grad_norm": 0.817557305999515, "learning_rate": 4.789574443472868e-06, "loss": 0.4016, "step": 8429 }, { "epoch": 0.5280383344555974, "grad_norm": 0.830356892882505, "learning_rate": 4.788560947876807e-06, "loss": 0.3847, "step": 8430 }, { "epoch": 0.5281009724549398, "grad_norm": 0.8419739154870107, "learning_rate": 4.787547460983618e-06, "loss": 0.3853, "step": 8431 }, { "epoch": 0.5281636104542821, "grad_norm": 0.8124343556354402, "learning_rate": 4.786533982835014e-06, "loss": 0.4008, "step": 8432 }, { "epoch": 0.5282262484536244, "grad_norm": 0.8252887119894482, "learning_rate": 4.78552051347271e-06, "loss": 0.3993, "step": 8433 }, { "epoch": 0.5282888864529667, "grad_norm": 0.8287322864271958, "learning_rate": 4.784507052938422e-06, "loss": 0.3783, "step": 8434 }, { "epoch": 0.528351524452309, "grad_norm": 0.8220961313624732, "learning_rate": 4.783493601273865e-06, "loss": 0.3875, "step": 8435 }, { "epoch": 0.5284141624516513, "grad_norm": 0.8370707852547616, "learning_rate": 4.7824801585207505e-06, "loss": 0.4561, "step": 8436 }, { "epoch": 0.5284768004509935, "grad_norm": 0.7848331301212559, "learning_rate": 4.781466724720793e-06, "loss": 0.3643, "step": 8437 }, { "epoch": 0.5285394384503359, "grad_norm": 0.8391149280381097, "learning_rate": 4.780453299915705e-06, "loss": 0.4171, "step": 8438 }, { "epoch": 0.5286020764496782, "grad_norm": 0.8038354919374762, "learning_rate": 4.7794398841472e-06, "loss": 0.4385, "step": 8439 }, { "epoch": 0.5286647144490205, "grad_norm": 0.864626195795564, "learning_rate": 4.778426477456991e-06, "loss": 0.4631, "step": 8440 }, { "epoch": 0.5287273524483628, "grad_norm": 0.6790903458323172, "learning_rate": 4.77741307988679e-06, "loss": 0.4833, "step": 8441 }, { "epoch": 0.5287899904477051, "grad_norm": 0.7875130070270306, "learning_rate": 4.776399691478306e-06, "loss": 0.4072, "step": 8442 }, { "epoch": 0.5288526284470474, "grad_norm": 0.9486887186176891, "learning_rate": 4.775386312273255e-06, "loss": 0.398, "step": 8443 }, { "epoch": 0.5289152664463898, "grad_norm": 0.6207010556105994, "learning_rate": 4.774372942313344e-06, "loss": 0.4428, "step": 8444 }, { "epoch": 0.528977904445732, "grad_norm": 0.8197421655477164, "learning_rate": 4.773359581640284e-06, "loss": 0.3998, "step": 8445 }, { "epoch": 0.5290405424450743, "grad_norm": 0.8400927241396039, "learning_rate": 4.772346230295785e-06, "loss": 0.3696, "step": 8446 }, { "epoch": 0.5291031804444166, "grad_norm": 0.8796538692129006, "learning_rate": 4.7713328883215585e-06, "loss": 0.4643, "step": 8447 }, { "epoch": 0.5291658184437589, "grad_norm": 0.841138301736903, "learning_rate": 4.770319555759311e-06, "loss": 0.4141, "step": 8448 }, { "epoch": 0.5292284564431012, "grad_norm": 0.8296306323086969, "learning_rate": 4.769306232650755e-06, "loss": 0.4451, "step": 8449 }, { "epoch": 0.5292910944424435, "grad_norm": 0.8606735315280005, "learning_rate": 4.768292919037597e-06, "loss": 0.3994, "step": 8450 }, { "epoch": 0.5293537324417859, "grad_norm": 0.801435858625443, "learning_rate": 4.767279614961545e-06, "loss": 0.3903, "step": 8451 }, { "epoch": 0.5294163704411281, "grad_norm": 0.8658539509683838, "learning_rate": 4.766266320464308e-06, "loss": 0.4287, "step": 8452 }, { "epoch": 0.5294790084404705, "grad_norm": 0.9145566626841278, "learning_rate": 4.765253035587593e-06, "loss": 0.377, "step": 8453 }, { "epoch": 0.5295416464398127, "grad_norm": 0.8012393258632249, "learning_rate": 4.764239760373105e-06, "loss": 0.4118, "step": 8454 }, { "epoch": 0.529604284439155, "grad_norm": 0.7742160869930997, "learning_rate": 4.763226494862553e-06, "loss": 0.4201, "step": 8455 }, { "epoch": 0.5296669224384973, "grad_norm": 0.7999674862224556, "learning_rate": 4.762213239097644e-06, "loss": 0.3937, "step": 8456 }, { "epoch": 0.5297295604378396, "grad_norm": 0.7650003994916776, "learning_rate": 4.7611999931200785e-06, "loss": 0.3563, "step": 8457 }, { "epoch": 0.529792198437182, "grad_norm": 0.8057518028500243, "learning_rate": 4.760186756971568e-06, "loss": 0.3673, "step": 8458 }, { "epoch": 0.5298548364365242, "grad_norm": 0.8129428632307455, "learning_rate": 4.7591735306938144e-06, "loss": 0.3701, "step": 8459 }, { "epoch": 0.5299174744358666, "grad_norm": 0.7528753831259157, "learning_rate": 4.758160314328524e-06, "loss": 0.37, "step": 8460 }, { "epoch": 0.5299801124352088, "grad_norm": 0.6615284763400032, "learning_rate": 4.7571471079174e-06, "loss": 0.4589, "step": 8461 }, { "epoch": 0.5300427504345511, "grad_norm": 0.7614224348371016, "learning_rate": 4.756133911502146e-06, "loss": 0.4006, "step": 8462 }, { "epoch": 0.5301053884338934, "grad_norm": 0.6181027303970573, "learning_rate": 4.755120725124464e-06, "loss": 0.4719, "step": 8463 }, { "epoch": 0.5301680264332357, "grad_norm": 0.849906428039547, "learning_rate": 4.75410754882606e-06, "loss": 0.3694, "step": 8464 }, { "epoch": 0.530230664432578, "grad_norm": 0.8643811872880157, "learning_rate": 4.7530943826486346e-06, "loss": 0.3804, "step": 8465 }, { "epoch": 0.5302933024319203, "grad_norm": 0.8120786903020153, "learning_rate": 4.752081226633888e-06, "loss": 0.3458, "step": 8466 }, { "epoch": 0.5303559404312627, "grad_norm": 0.8465476308347545, "learning_rate": 4.751068080823527e-06, "loss": 0.3908, "step": 8467 }, { "epoch": 0.5304185784306049, "grad_norm": 0.8099323582478126, "learning_rate": 4.750054945259249e-06, "loss": 0.4, "step": 8468 }, { "epoch": 0.5304812164299473, "grad_norm": 0.8281894615413593, "learning_rate": 4.7490418199827555e-06, "loss": 0.4138, "step": 8469 }, { "epoch": 0.5305438544292895, "grad_norm": 0.9554040639268596, "learning_rate": 4.7480287050357485e-06, "loss": 0.435, "step": 8470 }, { "epoch": 0.5306064924286318, "grad_norm": 0.8673062663612796, "learning_rate": 4.7470156004599265e-06, "loss": 0.4083, "step": 8471 }, { "epoch": 0.5306691304279741, "grad_norm": 0.8297521844871348, "learning_rate": 4.746002506296989e-06, "loss": 0.3802, "step": 8472 }, { "epoch": 0.5307317684273164, "grad_norm": 0.8786653351890761, "learning_rate": 4.744989422588635e-06, "loss": 0.4429, "step": 8473 }, { "epoch": 0.5307944064266588, "grad_norm": 0.8881565578152587, "learning_rate": 4.743976349376565e-06, "loss": 0.3979, "step": 8474 }, { "epoch": 0.530857044426001, "grad_norm": 0.5796710474482555, "learning_rate": 4.7429632867024726e-06, "loss": 0.4474, "step": 8475 }, { "epoch": 0.5309196824253434, "grad_norm": 0.5745348610497721, "learning_rate": 4.741950234608062e-06, "loss": 0.4194, "step": 8476 }, { "epoch": 0.5309823204246856, "grad_norm": 0.6522677862473089, "learning_rate": 4.740937193135027e-06, "loss": 0.4738, "step": 8477 }, { "epoch": 0.531044958424028, "grad_norm": 0.7983832235709819, "learning_rate": 4.739924162325065e-06, "loss": 0.3913, "step": 8478 }, { "epoch": 0.5311075964233702, "grad_norm": 0.8652145885129, "learning_rate": 4.738911142219874e-06, "loss": 0.4402, "step": 8479 }, { "epoch": 0.5311702344227125, "grad_norm": 0.8504741645729315, "learning_rate": 4.7378981328611475e-06, "loss": 0.4218, "step": 8480 }, { "epoch": 0.5312328724220549, "grad_norm": 0.8223584744509833, "learning_rate": 4.736885134290583e-06, "loss": 0.4096, "step": 8481 }, { "epoch": 0.5312955104213971, "grad_norm": 0.8442517782000695, "learning_rate": 4.735872146549875e-06, "loss": 0.4036, "step": 8482 }, { "epoch": 0.5313581484207395, "grad_norm": 0.7685115511404648, "learning_rate": 4.7348591696807195e-06, "loss": 0.4152, "step": 8483 }, { "epoch": 0.5314207864200817, "grad_norm": 0.803529806272926, "learning_rate": 4.733846203724807e-06, "loss": 0.3706, "step": 8484 }, { "epoch": 0.5314834244194241, "grad_norm": 0.841338412328239, "learning_rate": 4.7328332487238355e-06, "loss": 0.4363, "step": 8485 }, { "epoch": 0.5315460624187663, "grad_norm": 0.890131064158318, "learning_rate": 4.731820304719498e-06, "loss": 0.3979, "step": 8486 }, { "epoch": 0.5316087004181086, "grad_norm": 0.8611482922366105, "learning_rate": 4.730807371753486e-06, "loss": 0.4074, "step": 8487 }, { "epoch": 0.531671338417451, "grad_norm": 0.8577886363225563, "learning_rate": 4.7297944498674936e-06, "loss": 0.3846, "step": 8488 }, { "epoch": 0.5317339764167932, "grad_norm": 0.7759674483181839, "learning_rate": 4.7287815391032105e-06, "loss": 0.3243, "step": 8489 }, { "epoch": 0.5317966144161356, "grad_norm": 0.8136479178717942, "learning_rate": 4.7277686395023305e-06, "loss": 0.3946, "step": 8490 }, { "epoch": 0.5318592524154778, "grad_norm": 0.8747044702606179, "learning_rate": 4.7267557511065435e-06, "loss": 0.3983, "step": 8491 }, { "epoch": 0.5319218904148202, "grad_norm": 0.8751716196325963, "learning_rate": 4.72574287395754e-06, "loss": 0.4328, "step": 8492 }, { "epoch": 0.5319845284141624, "grad_norm": 0.8759550027225119, "learning_rate": 4.724730008097011e-06, "loss": 0.4111, "step": 8493 }, { "epoch": 0.5320471664135048, "grad_norm": 0.8384794958249635, "learning_rate": 4.723717153566644e-06, "loss": 0.4291, "step": 8494 }, { "epoch": 0.532109804412847, "grad_norm": 0.8356331514591465, "learning_rate": 4.722704310408133e-06, "loss": 0.3656, "step": 8495 }, { "epoch": 0.5321724424121893, "grad_norm": 0.8606498983335911, "learning_rate": 4.721691478663163e-06, "loss": 0.4007, "step": 8496 }, { "epoch": 0.5322350804115317, "grad_norm": 0.8088110542643416, "learning_rate": 4.7206786583734245e-06, "loss": 0.3973, "step": 8497 }, { "epoch": 0.5322977184108739, "grad_norm": 0.8033528692020342, "learning_rate": 4.719665849580603e-06, "loss": 0.3459, "step": 8498 }, { "epoch": 0.5323603564102163, "grad_norm": 0.8347800701705566, "learning_rate": 4.718653052326388e-06, "loss": 0.4127, "step": 8499 }, { "epoch": 0.5324229944095585, "grad_norm": 0.9039422005179815, "learning_rate": 4.717640266652466e-06, "loss": 0.3958, "step": 8500 }, { "epoch": 0.5324856324089009, "grad_norm": 0.7663363530811628, "learning_rate": 4.716627492600521e-06, "loss": 0.3727, "step": 8501 }, { "epoch": 0.5325482704082432, "grad_norm": 0.809727258048179, "learning_rate": 4.715614730212243e-06, "loss": 0.3705, "step": 8502 }, { "epoch": 0.5326109084075855, "grad_norm": 0.8692739906803444, "learning_rate": 4.714601979529313e-06, "loss": 0.4113, "step": 8503 }, { "epoch": 0.5326735464069278, "grad_norm": 0.8675514476511111, "learning_rate": 4.71358924059342e-06, "loss": 0.3915, "step": 8504 }, { "epoch": 0.53273618440627, "grad_norm": 0.657091522970408, "learning_rate": 4.7125765134462485e-06, "loss": 0.4834, "step": 8505 }, { "epoch": 0.5327988224056124, "grad_norm": 0.7704490734692838, "learning_rate": 4.711563798129478e-06, "loss": 0.3626, "step": 8506 }, { "epoch": 0.5328614604049546, "grad_norm": 0.8007594500292604, "learning_rate": 4.7105510946847974e-06, "loss": 0.426, "step": 8507 }, { "epoch": 0.532924098404297, "grad_norm": 0.7650632316873688, "learning_rate": 4.709538403153887e-06, "loss": 0.3874, "step": 8508 }, { "epoch": 0.5329867364036392, "grad_norm": 0.8301906676764672, "learning_rate": 4.708525723578431e-06, "loss": 0.4306, "step": 8509 }, { "epoch": 0.5330493744029816, "grad_norm": 0.794793305462976, "learning_rate": 4.707513056000109e-06, "loss": 0.3669, "step": 8510 }, { "epoch": 0.5331120124023239, "grad_norm": 0.8134689306565951, "learning_rate": 4.706500400460604e-06, "loss": 0.3918, "step": 8511 }, { "epoch": 0.5331746504016661, "grad_norm": 0.9975015062398807, "learning_rate": 4.705487757001595e-06, "loss": 0.3928, "step": 8512 }, { "epoch": 0.5332372884010085, "grad_norm": 0.8972716492508751, "learning_rate": 4.704475125664767e-06, "loss": 0.4149, "step": 8513 }, { "epoch": 0.5332999264003507, "grad_norm": 0.8665096069662614, "learning_rate": 4.703462506491798e-06, "loss": 0.3856, "step": 8514 }, { "epoch": 0.5333625643996931, "grad_norm": 0.8345556206871755, "learning_rate": 4.702449899524366e-06, "loss": 0.4024, "step": 8515 }, { "epoch": 0.5334252023990353, "grad_norm": 0.8624409010333672, "learning_rate": 4.701437304804152e-06, "loss": 0.4372, "step": 8516 }, { "epoch": 0.5334878403983777, "grad_norm": 0.6652151144200709, "learning_rate": 4.700424722372835e-06, "loss": 0.47, "step": 8517 }, { "epoch": 0.53355047839772, "grad_norm": 0.8623183143235869, "learning_rate": 4.69941215227209e-06, "loss": 0.3895, "step": 8518 }, { "epoch": 0.5336131163970623, "grad_norm": 0.7974306567907623, "learning_rate": 4.6983995945435976e-06, "loss": 0.4115, "step": 8519 }, { "epoch": 0.5336757543964046, "grad_norm": 0.7886421710119246, "learning_rate": 4.697387049229034e-06, "loss": 0.3794, "step": 8520 }, { "epoch": 0.5337383923957468, "grad_norm": 0.6299261353891272, "learning_rate": 4.696374516370074e-06, "loss": 0.4495, "step": 8521 }, { "epoch": 0.5338010303950892, "grad_norm": 0.8313265786297619, "learning_rate": 4.695361996008397e-06, "loss": 0.4009, "step": 8522 }, { "epoch": 0.5338636683944314, "grad_norm": 0.8795218247214486, "learning_rate": 4.6943494881856764e-06, "loss": 0.4062, "step": 8523 }, { "epoch": 0.5339263063937738, "grad_norm": 0.7656828706028218, "learning_rate": 4.693336992943587e-06, "loss": 0.3787, "step": 8524 }, { "epoch": 0.5339889443931161, "grad_norm": 0.8047435289493533, "learning_rate": 4.692324510323806e-06, "loss": 0.4094, "step": 8525 }, { "epoch": 0.5340515823924584, "grad_norm": 0.8915335408823764, "learning_rate": 4.6913120403680036e-06, "loss": 0.4361, "step": 8526 }, { "epoch": 0.5341142203918007, "grad_norm": 0.7550587117335418, "learning_rate": 4.690299583117854e-06, "loss": 0.4126, "step": 8527 }, { "epoch": 0.534176858391143, "grad_norm": 0.7759318564331061, "learning_rate": 4.689287138615032e-06, "loss": 0.3902, "step": 8528 }, { "epoch": 0.5342394963904853, "grad_norm": 0.8334389834941682, "learning_rate": 4.68827470690121e-06, "loss": 0.4086, "step": 8529 }, { "epoch": 0.5343021343898275, "grad_norm": 0.8007653907531018, "learning_rate": 4.687262288018057e-06, "loss": 0.3612, "step": 8530 }, { "epoch": 0.5343647723891699, "grad_norm": 0.8615092163036691, "learning_rate": 4.686249882007247e-06, "loss": 0.4211, "step": 8531 }, { "epoch": 0.5344274103885122, "grad_norm": 0.7861744972181965, "learning_rate": 4.68523748891045e-06, "loss": 0.3958, "step": 8532 }, { "epoch": 0.5344900483878545, "grad_norm": 0.7377644576785397, "learning_rate": 4.684225108769337e-06, "loss": 0.3818, "step": 8533 }, { "epoch": 0.5345526863871968, "grad_norm": 0.7748792933025885, "learning_rate": 4.683212741625578e-06, "loss": 0.3854, "step": 8534 }, { "epoch": 0.5346153243865391, "grad_norm": 0.6239464191512842, "learning_rate": 4.68220038752084e-06, "loss": 0.4657, "step": 8535 }, { "epoch": 0.5346779623858814, "grad_norm": 0.8526356080617827, "learning_rate": 4.681188046496793e-06, "loss": 0.4076, "step": 8536 }, { "epoch": 0.5347406003852236, "grad_norm": 0.8976844267340167, "learning_rate": 4.680175718595107e-06, "loss": 0.4112, "step": 8537 }, { "epoch": 0.534803238384566, "grad_norm": 0.8030213829075548, "learning_rate": 4.679163403857446e-06, "loss": 0.4038, "step": 8538 }, { "epoch": 0.5348658763839083, "grad_norm": 0.7935074874497828, "learning_rate": 4.678151102325479e-06, "loss": 0.379, "step": 8539 }, { "epoch": 0.5349285143832506, "grad_norm": 0.917849386906571, "learning_rate": 4.677138814040871e-06, "loss": 0.4084, "step": 8540 }, { "epoch": 0.5349911523825929, "grad_norm": 0.857618743617297, "learning_rate": 4.676126539045291e-06, "loss": 0.4389, "step": 8541 }, { "epoch": 0.5350537903819352, "grad_norm": 0.7669309557540673, "learning_rate": 4.675114277380403e-06, "loss": 0.3754, "step": 8542 }, { "epoch": 0.5351164283812775, "grad_norm": 0.8718933104051194, "learning_rate": 4.6741020290878715e-06, "loss": 0.4003, "step": 8543 }, { "epoch": 0.5351790663806199, "grad_norm": 0.8354789370777383, "learning_rate": 4.67308979420936e-06, "loss": 0.3817, "step": 8544 }, { "epoch": 0.5352417043799621, "grad_norm": 0.8226377288165075, "learning_rate": 4.6720775727865346e-06, "loss": 0.3832, "step": 8545 }, { "epoch": 0.5353043423793044, "grad_norm": 0.8926365704161758, "learning_rate": 4.671065364861057e-06, "loss": 0.4471, "step": 8546 }, { "epoch": 0.5353669803786467, "grad_norm": 0.7509759440909816, "learning_rate": 4.67005317047459e-06, "loss": 0.3614, "step": 8547 }, { "epoch": 0.535429618377989, "grad_norm": 0.8579693102480402, "learning_rate": 4.669040989668794e-06, "loss": 0.4106, "step": 8548 }, { "epoch": 0.5354922563773313, "grad_norm": 0.7684435886907219, "learning_rate": 4.668028822485332e-06, "loss": 0.4237, "step": 8549 }, { "epoch": 0.5355548943766736, "grad_norm": 0.8139181867172096, "learning_rate": 4.667016668965868e-06, "loss": 0.3616, "step": 8550 }, { "epoch": 0.535617532376016, "grad_norm": 0.8166356933554751, "learning_rate": 4.666004529152059e-06, "loss": 0.3781, "step": 8551 }, { "epoch": 0.5356801703753582, "grad_norm": 0.8482998427779693, "learning_rate": 4.6649924030855655e-06, "loss": 0.4002, "step": 8552 }, { "epoch": 0.5357428083747006, "grad_norm": 0.8131137198362869, "learning_rate": 4.663980290808046e-06, "loss": 0.3703, "step": 8553 }, { "epoch": 0.5358054463740428, "grad_norm": 0.7765144377823517, "learning_rate": 4.662968192361161e-06, "loss": 0.3832, "step": 8554 }, { "epoch": 0.5358680843733851, "grad_norm": 0.8498136547141185, "learning_rate": 4.661956107786568e-06, "loss": 0.4226, "step": 8555 }, { "epoch": 0.5359307223727274, "grad_norm": 0.814803841522404, "learning_rate": 4.660944037125924e-06, "loss": 0.3919, "step": 8556 }, { "epoch": 0.5359933603720697, "grad_norm": 0.7763152395018666, "learning_rate": 4.659931980420885e-06, "loss": 0.3954, "step": 8557 }, { "epoch": 0.536055998371412, "grad_norm": 0.9032277520218136, "learning_rate": 4.658919937713109e-06, "loss": 0.4019, "step": 8558 }, { "epoch": 0.5361186363707543, "grad_norm": 0.9020945295785122, "learning_rate": 4.657907909044253e-06, "loss": 0.4094, "step": 8559 }, { "epoch": 0.5361812743700967, "grad_norm": 0.8292600981727662, "learning_rate": 4.6568958944559705e-06, "loss": 0.389, "step": 8560 }, { "epoch": 0.5362439123694389, "grad_norm": 0.8171194737431698, "learning_rate": 4.655883893989918e-06, "loss": 0.3727, "step": 8561 }, { "epoch": 0.5363065503687813, "grad_norm": 0.8181240508414599, "learning_rate": 4.654871907687746e-06, "loss": 0.3951, "step": 8562 }, { "epoch": 0.5363691883681235, "grad_norm": 0.8547815329356324, "learning_rate": 4.653859935591112e-06, "loss": 0.4356, "step": 8563 }, { "epoch": 0.5364318263674658, "grad_norm": 0.8780003507940628, "learning_rate": 4.652847977741667e-06, "loss": 0.3929, "step": 8564 }, { "epoch": 0.5364944643668081, "grad_norm": 0.8265740243799823, "learning_rate": 4.651836034181063e-06, "loss": 0.3996, "step": 8565 }, { "epoch": 0.5365571023661504, "grad_norm": 0.881561027878183, "learning_rate": 4.650824104950953e-06, "loss": 0.4226, "step": 8566 }, { "epoch": 0.5366197403654928, "grad_norm": 0.8365895144774037, "learning_rate": 4.649812190092985e-06, "loss": 0.4065, "step": 8567 }, { "epoch": 0.536682378364835, "grad_norm": 0.9169495859664819, "learning_rate": 4.648800289648815e-06, "loss": 0.4281, "step": 8568 }, { "epoch": 0.5367450163641774, "grad_norm": 0.8334993207763901, "learning_rate": 4.647788403660091e-06, "loss": 0.3852, "step": 8569 }, { "epoch": 0.5368076543635196, "grad_norm": 0.853030406626701, "learning_rate": 4.64677653216846e-06, "loss": 0.4066, "step": 8570 }, { "epoch": 0.5368702923628619, "grad_norm": 0.7452566029148869, "learning_rate": 4.645764675215574e-06, "loss": 0.3704, "step": 8571 }, { "epoch": 0.5369329303622042, "grad_norm": 0.8207728637478179, "learning_rate": 4.644752832843079e-06, "loss": 0.3742, "step": 8572 }, { "epoch": 0.5369955683615465, "grad_norm": 0.823992810637124, "learning_rate": 4.6437410050926236e-06, "loss": 0.4291, "step": 8573 }, { "epoch": 0.5370582063608889, "grad_norm": 0.9205656961402713, "learning_rate": 4.6427291920058535e-06, "loss": 0.3816, "step": 8574 }, { "epoch": 0.5371208443602311, "grad_norm": 0.7452103296375718, "learning_rate": 4.641717393624417e-06, "loss": 0.4049, "step": 8575 }, { "epoch": 0.5371834823595735, "grad_norm": 0.7358737639620359, "learning_rate": 4.640705609989957e-06, "loss": 0.4484, "step": 8576 }, { "epoch": 0.5372461203589157, "grad_norm": 0.8596648756980765, "learning_rate": 4.639693841144123e-06, "loss": 0.452, "step": 8577 }, { "epoch": 0.5373087583582581, "grad_norm": 0.8951858991581522, "learning_rate": 4.6386820871285585e-06, "loss": 0.4011, "step": 8578 }, { "epoch": 0.5373713963576003, "grad_norm": 0.7812270883013765, "learning_rate": 4.637670347984904e-06, "loss": 0.3585, "step": 8579 }, { "epoch": 0.5374340343569426, "grad_norm": 0.7892978790689239, "learning_rate": 4.636658623754806e-06, "loss": 0.3786, "step": 8580 }, { "epoch": 0.537496672356285, "grad_norm": 0.8490821482587102, "learning_rate": 4.6356469144799075e-06, "loss": 0.414, "step": 8581 }, { "epoch": 0.5375593103556272, "grad_norm": 0.7886319867892835, "learning_rate": 4.634635220201849e-06, "loss": 0.3619, "step": 8582 }, { "epoch": 0.5376219483549696, "grad_norm": 0.7866299135705869, "learning_rate": 4.633623540962272e-06, "loss": 0.4185, "step": 8583 }, { "epoch": 0.5376845863543118, "grad_norm": 0.6613530593009469, "learning_rate": 4.6326118768028185e-06, "loss": 0.479, "step": 8584 }, { "epoch": 0.5377472243536542, "grad_norm": 0.8724711337171289, "learning_rate": 4.631600227765129e-06, "loss": 0.4192, "step": 8585 }, { "epoch": 0.5378098623529964, "grad_norm": 0.8071383468556116, "learning_rate": 4.6305885938908404e-06, "loss": 0.3949, "step": 8586 }, { "epoch": 0.5378725003523388, "grad_norm": 0.8015165545687636, "learning_rate": 4.629576975221595e-06, "loss": 0.3861, "step": 8587 }, { "epoch": 0.537935138351681, "grad_norm": 0.8758734129316289, "learning_rate": 4.62856537179903e-06, "loss": 0.4287, "step": 8588 }, { "epoch": 0.5379977763510233, "grad_norm": 0.8081228425484251, "learning_rate": 4.627553783664785e-06, "loss": 0.4079, "step": 8589 }, { "epoch": 0.5380604143503657, "grad_norm": 0.7753723334877602, "learning_rate": 4.626542210860493e-06, "loss": 0.3559, "step": 8590 }, { "epoch": 0.5381230523497079, "grad_norm": 0.8332003906941802, "learning_rate": 4.625530653427794e-06, "loss": 0.4282, "step": 8591 }, { "epoch": 0.5381856903490503, "grad_norm": 0.77725966562478, "learning_rate": 4.624519111408322e-06, "loss": 0.3819, "step": 8592 }, { "epoch": 0.5382483283483925, "grad_norm": 0.8010264169383214, "learning_rate": 4.623507584843715e-06, "loss": 0.3947, "step": 8593 }, { "epoch": 0.5383109663477349, "grad_norm": 0.8052209415365797, "learning_rate": 4.622496073775605e-06, "loss": 0.3988, "step": 8594 }, { "epoch": 0.5383736043470772, "grad_norm": 0.8848100025075215, "learning_rate": 4.621484578245624e-06, "loss": 0.4131, "step": 8595 }, { "epoch": 0.5384362423464194, "grad_norm": 0.8253895407070881, "learning_rate": 4.620473098295409e-06, "loss": 0.385, "step": 8596 }, { "epoch": 0.5384988803457618, "grad_norm": 0.8045665862357558, "learning_rate": 4.619461633966594e-06, "loss": 0.3867, "step": 8597 }, { "epoch": 0.538561518345104, "grad_norm": 0.8315847778567144, "learning_rate": 4.618450185300808e-06, "loss": 0.3857, "step": 8598 }, { "epoch": 0.5386241563444464, "grad_norm": 0.8731585884412407, "learning_rate": 4.617438752339683e-06, "loss": 0.4493, "step": 8599 }, { "epoch": 0.5386867943437886, "grad_norm": 0.8514981147959592, "learning_rate": 4.616427335124849e-06, "loss": 0.4058, "step": 8600 }, { "epoch": 0.538749432343131, "grad_norm": 0.8863130777631777, "learning_rate": 4.615415933697939e-06, "loss": 0.399, "step": 8601 }, { "epoch": 0.5388120703424732, "grad_norm": 0.8769595733675947, "learning_rate": 4.6144045481005795e-06, "loss": 0.4263, "step": 8602 }, { "epoch": 0.5388747083418156, "grad_norm": 0.9247867768786829, "learning_rate": 4.6133931783744e-06, "loss": 0.4412, "step": 8603 }, { "epoch": 0.5389373463411579, "grad_norm": 0.8280747952883598, "learning_rate": 4.612381824561028e-06, "loss": 0.3959, "step": 8604 }, { "epoch": 0.5389999843405001, "grad_norm": 0.8047901820033639, "learning_rate": 4.611370486702092e-06, "loss": 0.3655, "step": 8605 }, { "epoch": 0.5390626223398425, "grad_norm": 0.8120344163829782, "learning_rate": 4.610359164839221e-06, "loss": 0.3861, "step": 8606 }, { "epoch": 0.5391252603391847, "grad_norm": 0.8302240187869443, "learning_rate": 4.60934785901404e-06, "loss": 0.3776, "step": 8607 }, { "epoch": 0.5391878983385271, "grad_norm": 0.8224426673396226, "learning_rate": 4.608336569268171e-06, "loss": 0.422, "step": 8608 }, { "epoch": 0.5392505363378693, "grad_norm": 0.7592416007342319, "learning_rate": 4.607325295643243e-06, "loss": 0.3774, "step": 8609 }, { "epoch": 0.5393131743372117, "grad_norm": 0.847112958027747, "learning_rate": 4.606314038180878e-06, "loss": 0.4368, "step": 8610 }, { "epoch": 0.539375812336554, "grad_norm": 0.8000871252012979, "learning_rate": 4.605302796922701e-06, "loss": 0.3833, "step": 8611 }, { "epoch": 0.5394384503358963, "grad_norm": 0.8318448451076517, "learning_rate": 4.6042915719103336e-06, "loss": 0.3964, "step": 8612 }, { "epoch": 0.5395010883352386, "grad_norm": 0.7600354999714688, "learning_rate": 4.603280363185397e-06, "loss": 0.3511, "step": 8613 }, { "epoch": 0.5395637263345808, "grad_norm": 0.8103888192305806, "learning_rate": 4.602269170789515e-06, "loss": 0.3629, "step": 8614 }, { "epoch": 0.5396263643339232, "grad_norm": 0.8245287900289039, "learning_rate": 4.601257994764309e-06, "loss": 0.3631, "step": 8615 }, { "epoch": 0.5396890023332654, "grad_norm": 0.8399591736297621, "learning_rate": 4.600246835151399e-06, "loss": 0.4082, "step": 8616 }, { "epoch": 0.5397516403326078, "grad_norm": 0.9099926198030557, "learning_rate": 4.599235691992401e-06, "loss": 0.452, "step": 8617 }, { "epoch": 0.5398142783319501, "grad_norm": 0.8838386037275572, "learning_rate": 4.598224565328938e-06, "loss": 0.4362, "step": 8618 }, { "epoch": 0.5398769163312924, "grad_norm": 0.8586504368342553, "learning_rate": 4.5972134552026255e-06, "loss": 0.4362, "step": 8619 }, { "epoch": 0.5399395543306347, "grad_norm": 0.8613257333022379, "learning_rate": 4.596202361655083e-06, "loss": 0.4086, "step": 8620 }, { "epoch": 0.5400021923299769, "grad_norm": 0.8578673873342019, "learning_rate": 4.595191284727925e-06, "loss": 0.3817, "step": 8621 }, { "epoch": 0.5400648303293193, "grad_norm": 0.8354815538930884, "learning_rate": 4.594180224462767e-06, "loss": 0.3736, "step": 8622 }, { "epoch": 0.5401274683286615, "grad_norm": 0.8251973649942925, "learning_rate": 4.5931691809012294e-06, "loss": 0.4023, "step": 8623 }, { "epoch": 0.5401901063280039, "grad_norm": 0.8183495101240186, "learning_rate": 4.592158154084923e-06, "loss": 0.3928, "step": 8624 }, { "epoch": 0.5402527443273462, "grad_norm": 0.7651899046276451, "learning_rate": 4.591147144055464e-06, "loss": 0.3778, "step": 8625 }, { "epoch": 0.5403153823266885, "grad_norm": 0.7885767017023846, "learning_rate": 4.590136150854462e-06, "loss": 0.3544, "step": 8626 }, { "epoch": 0.5403780203260308, "grad_norm": 0.8419296695730727, "learning_rate": 4.589125174523533e-06, "loss": 0.398, "step": 8627 }, { "epoch": 0.5404406583253731, "grad_norm": 0.8168406971301244, "learning_rate": 4.588114215104289e-06, "loss": 0.4088, "step": 8628 }, { "epoch": 0.5405032963247154, "grad_norm": 0.815804311826966, "learning_rate": 4.587103272638339e-06, "loss": 0.3895, "step": 8629 }, { "epoch": 0.5405659343240576, "grad_norm": 0.8018184675947149, "learning_rate": 4.586092347167296e-06, "loss": 0.3616, "step": 8630 }, { "epoch": 0.5406285723234, "grad_norm": 0.8628797355372565, "learning_rate": 4.585081438732769e-06, "loss": 0.4076, "step": 8631 }, { "epoch": 0.5406912103227423, "grad_norm": 0.7427170815601744, "learning_rate": 4.5840705473763645e-06, "loss": 0.3519, "step": 8632 }, { "epoch": 0.5407538483220846, "grad_norm": 0.7645466312216643, "learning_rate": 4.583059673139696e-06, "loss": 0.3796, "step": 8633 }, { "epoch": 0.5408164863214269, "grad_norm": 0.8360453518880107, "learning_rate": 4.582048816064367e-06, "loss": 0.3919, "step": 8634 }, { "epoch": 0.5408791243207692, "grad_norm": 0.8049227798919629, "learning_rate": 4.581037976191988e-06, "loss": 0.4014, "step": 8635 }, { "epoch": 0.5409417623201115, "grad_norm": 0.8514753489589875, "learning_rate": 4.580027153564164e-06, "loss": 0.4261, "step": 8636 }, { "epoch": 0.5410044003194538, "grad_norm": 0.6091840274831328, "learning_rate": 4.5790163482225e-06, "loss": 0.4512, "step": 8637 }, { "epoch": 0.5410670383187961, "grad_norm": 0.7716727883392713, "learning_rate": 4.5780055602085995e-06, "loss": 0.3965, "step": 8638 }, { "epoch": 0.5411296763181384, "grad_norm": 0.8743585227800618, "learning_rate": 4.57699478956407e-06, "loss": 0.4063, "step": 8639 }, { "epoch": 0.5411923143174807, "grad_norm": 0.8289444642992058, "learning_rate": 4.575984036330514e-06, "loss": 0.4463, "step": 8640 }, { "epoch": 0.541254952316823, "grad_norm": 0.8541505830537509, "learning_rate": 4.574973300549531e-06, "loss": 0.3802, "step": 8641 }, { "epoch": 0.5413175903161653, "grad_norm": 0.8139541388282158, "learning_rate": 4.573962582262727e-06, "loss": 0.3652, "step": 8642 }, { "epoch": 0.5413802283155076, "grad_norm": 0.8869633255862771, "learning_rate": 4.572951881511703e-06, "loss": 0.4124, "step": 8643 }, { "epoch": 0.54144286631485, "grad_norm": 1.0166344685242523, "learning_rate": 4.571941198338059e-06, "loss": 0.4098, "step": 8644 }, { "epoch": 0.5415055043141922, "grad_norm": 0.7647582203903737, "learning_rate": 4.570930532783395e-06, "loss": 0.422, "step": 8645 }, { "epoch": 0.5415681423135345, "grad_norm": 0.8116078983050721, "learning_rate": 4.56991988488931e-06, "loss": 0.4085, "step": 8646 }, { "epoch": 0.5416307803128768, "grad_norm": 0.8699580690698412, "learning_rate": 4.568909254697401e-06, "loss": 0.3593, "step": 8647 }, { "epoch": 0.5416934183122191, "grad_norm": 0.8468522791304863, "learning_rate": 4.567898642249268e-06, "loss": 0.4067, "step": 8648 }, { "epoch": 0.5417560563115614, "grad_norm": 0.8194267656329062, "learning_rate": 4.5668880475865074e-06, "loss": 0.3992, "step": 8649 }, { "epoch": 0.5418186943109037, "grad_norm": 0.8714582716115331, "learning_rate": 4.565877470750712e-06, "loss": 0.4208, "step": 8650 }, { "epoch": 0.541881332310246, "grad_norm": 0.8084668805087262, "learning_rate": 4.564866911783484e-06, "loss": 0.3719, "step": 8651 }, { "epoch": 0.5419439703095883, "grad_norm": 0.8234704420274026, "learning_rate": 4.563856370726413e-06, "loss": 0.3818, "step": 8652 }, { "epoch": 0.5420066083089307, "grad_norm": 0.8325303019058051, "learning_rate": 4.562845847621096e-06, "loss": 0.4147, "step": 8653 }, { "epoch": 0.5420692463082729, "grad_norm": 0.8659306993011628, "learning_rate": 4.561835342509126e-06, "loss": 0.4233, "step": 8654 }, { "epoch": 0.5421318843076152, "grad_norm": 0.8160510977057552, "learning_rate": 4.560824855432092e-06, "loss": 0.3946, "step": 8655 }, { "epoch": 0.5421945223069575, "grad_norm": 0.8209282441017852, "learning_rate": 4.559814386431591e-06, "loss": 0.369, "step": 8656 }, { "epoch": 0.5422571603062998, "grad_norm": 0.8180717869808087, "learning_rate": 4.55880393554921e-06, "loss": 0.3992, "step": 8657 }, { "epoch": 0.5423197983056421, "grad_norm": 0.8393114174532597, "learning_rate": 4.557793502826542e-06, "loss": 0.4002, "step": 8658 }, { "epoch": 0.5423824363049844, "grad_norm": 0.7892594922254912, "learning_rate": 4.556783088305172e-06, "loss": 0.3962, "step": 8659 }, { "epoch": 0.5424450743043268, "grad_norm": 0.9296505174570873, "learning_rate": 4.555772692026694e-06, "loss": 0.3744, "step": 8660 }, { "epoch": 0.542507712303669, "grad_norm": 0.8180178533451575, "learning_rate": 4.554762314032697e-06, "loss": 0.4235, "step": 8661 }, { "epoch": 0.5425703503030114, "grad_norm": 0.8773522202944793, "learning_rate": 4.553751954364764e-06, "loss": 0.4044, "step": 8662 }, { "epoch": 0.5426329883023536, "grad_norm": 0.8572873404528825, "learning_rate": 4.552741613064484e-06, "loss": 0.4066, "step": 8663 }, { "epoch": 0.5426956263016959, "grad_norm": 0.8183542124555219, "learning_rate": 4.551731290173441e-06, "loss": 0.4067, "step": 8664 }, { "epoch": 0.5427582643010382, "grad_norm": 0.8855322071903567, "learning_rate": 4.550720985733223e-06, "loss": 0.4221, "step": 8665 }, { "epoch": 0.5428209023003805, "grad_norm": 0.8563454968304164, "learning_rate": 4.549710699785412e-06, "loss": 0.3789, "step": 8666 }, { "epoch": 0.5428835402997229, "grad_norm": 0.8497230836807781, "learning_rate": 4.5487004323715926e-06, "loss": 0.4103, "step": 8667 }, { "epoch": 0.5429461782990651, "grad_norm": 0.9074384666512223, "learning_rate": 4.5476901835333455e-06, "loss": 0.3896, "step": 8668 }, { "epoch": 0.5430088162984075, "grad_norm": 0.9128556546408672, "learning_rate": 4.546679953312253e-06, "loss": 0.4159, "step": 8669 }, { "epoch": 0.5430714542977497, "grad_norm": 0.7722160476202012, "learning_rate": 4.545669741749901e-06, "loss": 0.3669, "step": 8670 }, { "epoch": 0.5431340922970921, "grad_norm": 0.8229033332554118, "learning_rate": 4.544659548887867e-06, "loss": 0.3931, "step": 8671 }, { "epoch": 0.5431967302964343, "grad_norm": 0.7861989102161862, "learning_rate": 4.543649374767729e-06, "loss": 0.3683, "step": 8672 }, { "epoch": 0.5432593682957766, "grad_norm": 0.8266903732747362, "learning_rate": 4.542639219431067e-06, "loss": 0.3567, "step": 8673 }, { "epoch": 0.543322006295119, "grad_norm": 0.8021200967062938, "learning_rate": 4.541629082919461e-06, "loss": 0.3705, "step": 8674 }, { "epoch": 0.5433846442944612, "grad_norm": 0.8551118188032625, "learning_rate": 4.540618965274487e-06, "loss": 0.4076, "step": 8675 }, { "epoch": 0.5434472822938036, "grad_norm": 0.6751752733764843, "learning_rate": 4.5396088665377205e-06, "loss": 0.476, "step": 8676 }, { "epoch": 0.5435099202931458, "grad_norm": 0.7984399198367409, "learning_rate": 4.538598786750739e-06, "loss": 0.4066, "step": 8677 }, { "epoch": 0.5435725582924882, "grad_norm": 0.8356154994630994, "learning_rate": 4.537588725955117e-06, "loss": 0.4255, "step": 8678 }, { "epoch": 0.5436351962918304, "grad_norm": 0.7608267430054005, "learning_rate": 4.536578684192429e-06, "loss": 0.3734, "step": 8679 }, { "epoch": 0.5436978342911727, "grad_norm": 0.8516350651978298, "learning_rate": 4.535568661504249e-06, "loss": 0.3968, "step": 8680 }, { "epoch": 0.543760472290515, "grad_norm": 0.8201126000276872, "learning_rate": 4.534558657932148e-06, "loss": 0.4031, "step": 8681 }, { "epoch": 0.5438231102898573, "grad_norm": 0.8843213812247163, "learning_rate": 4.5335486735176995e-06, "loss": 0.3961, "step": 8682 }, { "epoch": 0.5438857482891997, "grad_norm": 0.8395661200317318, "learning_rate": 4.532538708302475e-06, "loss": 0.4163, "step": 8683 }, { "epoch": 0.5439483862885419, "grad_norm": 0.8111918120104595, "learning_rate": 4.531528762328044e-06, "loss": 0.4038, "step": 8684 }, { "epoch": 0.5440110242878843, "grad_norm": 0.8891191016934314, "learning_rate": 4.530518835635974e-06, "loss": 0.3981, "step": 8685 }, { "epoch": 0.5440736622872265, "grad_norm": 0.8759219434550524, "learning_rate": 4.5295089282678375e-06, "loss": 0.3591, "step": 8686 }, { "epoch": 0.5441363002865689, "grad_norm": 0.826977552948902, "learning_rate": 4.528499040265199e-06, "loss": 0.4064, "step": 8687 }, { "epoch": 0.5441989382859111, "grad_norm": 0.8676448647771443, "learning_rate": 4.52748917166963e-06, "loss": 0.4398, "step": 8688 }, { "epoch": 0.5442615762852534, "grad_norm": 0.8796027814567974, "learning_rate": 4.526479322522694e-06, "loss": 0.4048, "step": 8689 }, { "epoch": 0.5443242142845958, "grad_norm": 0.714108980385441, "learning_rate": 4.525469492865956e-06, "loss": 0.4666, "step": 8690 }, { "epoch": 0.544386852283938, "grad_norm": 0.8524017786867305, "learning_rate": 4.5244596827409824e-06, "loss": 0.4013, "step": 8691 }, { "epoch": 0.5444494902832804, "grad_norm": 0.9007358271880064, "learning_rate": 4.523449892189336e-06, "loss": 0.4231, "step": 8692 }, { "epoch": 0.5445121282826226, "grad_norm": 0.8134751362843943, "learning_rate": 4.522440121252581e-06, "loss": 0.3707, "step": 8693 }, { "epoch": 0.544574766281965, "grad_norm": 0.7985911566517242, "learning_rate": 4.521430369972279e-06, "loss": 0.3876, "step": 8694 }, { "epoch": 0.5446374042813072, "grad_norm": 0.7915629605426515, "learning_rate": 4.520420638389992e-06, "loss": 0.4001, "step": 8695 }, { "epoch": 0.5447000422806496, "grad_norm": 0.8311040898597415, "learning_rate": 4.519410926547279e-06, "loss": 0.4003, "step": 8696 }, { "epoch": 0.5447626802799919, "grad_norm": 0.8002243994691046, "learning_rate": 4.518401234485703e-06, "loss": 0.3773, "step": 8697 }, { "epoch": 0.5448253182793341, "grad_norm": 0.8579145823377928, "learning_rate": 4.517391562246823e-06, "loss": 0.4315, "step": 8698 }, { "epoch": 0.5448879562786765, "grad_norm": 0.8001699805475285, "learning_rate": 4.516381909872193e-06, "loss": 0.3441, "step": 8699 }, { "epoch": 0.5449505942780187, "grad_norm": 0.8400720033006611, "learning_rate": 4.515372277403375e-06, "loss": 0.4262, "step": 8700 }, { "epoch": 0.5450132322773611, "grad_norm": 0.7657622810937764, "learning_rate": 4.514362664881924e-06, "loss": 0.3808, "step": 8701 }, { "epoch": 0.5450758702767033, "grad_norm": 0.8701869964318742, "learning_rate": 4.513353072349395e-06, "loss": 0.4247, "step": 8702 }, { "epoch": 0.5451385082760457, "grad_norm": 0.8422714699085098, "learning_rate": 4.512343499847345e-06, "loss": 0.4153, "step": 8703 }, { "epoch": 0.545201146275388, "grad_norm": 0.7949842710408792, "learning_rate": 4.511333947417327e-06, "loss": 0.3728, "step": 8704 }, { "epoch": 0.5452637842747302, "grad_norm": 0.8573995286008472, "learning_rate": 4.510324415100892e-06, "loss": 0.4522, "step": 8705 }, { "epoch": 0.5453264222740726, "grad_norm": 0.7838455481062667, "learning_rate": 4.509314902939597e-06, "loss": 0.4005, "step": 8706 }, { "epoch": 0.5453890602734148, "grad_norm": 0.8039399073888147, "learning_rate": 4.508305410974991e-06, "loss": 0.3748, "step": 8707 }, { "epoch": 0.5454516982727572, "grad_norm": 0.6359995722686133, "learning_rate": 4.507295939248627e-06, "loss": 0.4906, "step": 8708 }, { "epoch": 0.5455143362720994, "grad_norm": 0.7944841590378993, "learning_rate": 4.506286487802053e-06, "loss": 0.3773, "step": 8709 }, { "epoch": 0.5455769742714418, "grad_norm": 0.795086125180082, "learning_rate": 4.505277056676819e-06, "loss": 0.4048, "step": 8710 }, { "epoch": 0.5456396122707841, "grad_norm": 0.5886969868773282, "learning_rate": 4.504267645914472e-06, "loss": 0.4526, "step": 8711 }, { "epoch": 0.5457022502701264, "grad_norm": 0.8303443441273884, "learning_rate": 4.503258255556561e-06, "loss": 0.4021, "step": 8712 }, { "epoch": 0.5457648882694687, "grad_norm": 0.8200314616331927, "learning_rate": 4.502248885644634e-06, "loss": 0.3879, "step": 8713 }, { "epoch": 0.5458275262688109, "grad_norm": 0.8537446081599446, "learning_rate": 4.501239536220233e-06, "loss": 0.3904, "step": 8714 }, { "epoch": 0.5458901642681533, "grad_norm": 0.8205096783883978, "learning_rate": 4.5002302073249054e-06, "loss": 0.3742, "step": 8715 }, { "epoch": 0.5459528022674955, "grad_norm": 0.8376322835488214, "learning_rate": 4.499220899000195e-06, "loss": 0.3802, "step": 8716 }, { "epoch": 0.5460154402668379, "grad_norm": 0.8419011686963737, "learning_rate": 4.498211611287648e-06, "loss": 0.3904, "step": 8717 }, { "epoch": 0.5460780782661802, "grad_norm": 0.8342793856574692, "learning_rate": 4.497202344228803e-06, "loss": 0.4093, "step": 8718 }, { "epoch": 0.5461407162655225, "grad_norm": 0.6525500755173509, "learning_rate": 4.496193097865201e-06, "loss": 0.4583, "step": 8719 }, { "epoch": 0.5462033542648648, "grad_norm": 0.665222221007658, "learning_rate": 4.495183872238387e-06, "loss": 0.4668, "step": 8720 }, { "epoch": 0.5462659922642071, "grad_norm": 0.7392075911745356, "learning_rate": 4.4941746673898975e-06, "loss": 0.3919, "step": 8721 }, { "epoch": 0.5463286302635494, "grad_norm": 0.7795778012694874, "learning_rate": 4.493165483361274e-06, "loss": 0.3929, "step": 8722 }, { "epoch": 0.5463912682628916, "grad_norm": 0.7683705829475411, "learning_rate": 4.492156320194051e-06, "loss": 0.3888, "step": 8723 }, { "epoch": 0.546453906262234, "grad_norm": 0.8068147943991193, "learning_rate": 4.491147177929768e-06, "loss": 0.3937, "step": 8724 }, { "epoch": 0.5465165442615763, "grad_norm": 0.8205050749355604, "learning_rate": 4.490138056609962e-06, "loss": 0.3665, "step": 8725 }, { "epoch": 0.5465791822609186, "grad_norm": 0.7971698351116298, "learning_rate": 4.4891289562761695e-06, "loss": 0.3861, "step": 8726 }, { "epoch": 0.5466418202602609, "grad_norm": 0.8617131531755979, "learning_rate": 4.488119876969924e-06, "loss": 0.4268, "step": 8727 }, { "epoch": 0.5467044582596032, "grad_norm": 0.8649220392379688, "learning_rate": 4.487110818732759e-06, "loss": 0.4105, "step": 8728 }, { "epoch": 0.5467670962589455, "grad_norm": 0.8138600350671747, "learning_rate": 4.486101781606208e-06, "loss": 0.418, "step": 8729 }, { "epoch": 0.5468297342582877, "grad_norm": 0.7570525710096044, "learning_rate": 4.485092765631802e-06, "loss": 0.3878, "step": 8730 }, { "epoch": 0.5468923722576301, "grad_norm": 0.8990989684801048, "learning_rate": 4.484083770851075e-06, "loss": 0.4004, "step": 8731 }, { "epoch": 0.5469550102569724, "grad_norm": 0.7831740976983047, "learning_rate": 4.483074797305555e-06, "loss": 0.4157, "step": 8732 }, { "epoch": 0.5470176482563147, "grad_norm": 1.008093142520931, "learning_rate": 4.482065845036769e-06, "loss": 0.4394, "step": 8733 }, { "epoch": 0.547080286255657, "grad_norm": 0.6615527975185659, "learning_rate": 4.481056914086253e-06, "loss": 0.4787, "step": 8734 }, { "epoch": 0.5471429242549993, "grad_norm": 0.8499117728578817, "learning_rate": 4.48004800449553e-06, "loss": 0.3865, "step": 8735 }, { "epoch": 0.5472055622543416, "grad_norm": 0.9478365683252198, "learning_rate": 4.479039116306128e-06, "loss": 0.3941, "step": 8736 }, { "epoch": 0.547268200253684, "grad_norm": 0.8859089286349159, "learning_rate": 4.478030249559571e-06, "loss": 0.4668, "step": 8737 }, { "epoch": 0.5473308382530262, "grad_norm": 0.7594792551181221, "learning_rate": 4.477021404297388e-06, "loss": 0.3485, "step": 8738 }, { "epoch": 0.5473934762523684, "grad_norm": 0.8613535868814068, "learning_rate": 4.4760125805611e-06, "loss": 0.4086, "step": 8739 }, { "epoch": 0.5474561142517108, "grad_norm": 0.9092184394120338, "learning_rate": 4.475003778392231e-06, "loss": 0.4601, "step": 8740 }, { "epoch": 0.5475187522510531, "grad_norm": 0.9131311681127091, "learning_rate": 4.473994997832305e-06, "loss": 0.402, "step": 8741 }, { "epoch": 0.5475813902503954, "grad_norm": 0.8522904467865263, "learning_rate": 4.4729862389228395e-06, "loss": 0.3842, "step": 8742 }, { "epoch": 0.5476440282497377, "grad_norm": 0.83011499957043, "learning_rate": 4.471977501705361e-06, "loss": 0.3762, "step": 8743 }, { "epoch": 0.54770666624908, "grad_norm": 0.906469301906311, "learning_rate": 4.4709687862213866e-06, "loss": 0.3912, "step": 8744 }, { "epoch": 0.5477693042484223, "grad_norm": 0.842820391037041, "learning_rate": 4.469960092512434e-06, "loss": 0.4, "step": 8745 }, { "epoch": 0.5478319422477647, "grad_norm": 0.8599749344806793, "learning_rate": 4.468951420620023e-06, "loss": 0.4259, "step": 8746 }, { "epoch": 0.5478945802471069, "grad_norm": 0.8493718361881636, "learning_rate": 4.4679427705856716e-06, "loss": 0.3832, "step": 8747 }, { "epoch": 0.5479572182464492, "grad_norm": 0.7999333141781302, "learning_rate": 4.466934142450893e-06, "loss": 0.4297, "step": 8748 }, { "epoch": 0.5480198562457915, "grad_norm": 0.8718383415980234, "learning_rate": 4.4659255362572034e-06, "loss": 0.4225, "step": 8749 }, { "epoch": 0.5480824942451338, "grad_norm": 0.8085488540749443, "learning_rate": 4.464916952046119e-06, "loss": 0.4084, "step": 8750 }, { "epoch": 0.5481451322444761, "grad_norm": 0.8161412682035433, "learning_rate": 4.46390838985915e-06, "loss": 0.3929, "step": 8751 }, { "epoch": 0.5482077702438184, "grad_norm": 0.6189875968843953, "learning_rate": 4.462899849737814e-06, "loss": 0.4238, "step": 8752 }, { "epoch": 0.5482704082431608, "grad_norm": 0.8599309357599648, "learning_rate": 4.461891331723619e-06, "loss": 0.3957, "step": 8753 }, { "epoch": 0.548333046242503, "grad_norm": 0.8704519170320036, "learning_rate": 4.460882835858076e-06, "loss": 0.3666, "step": 8754 }, { "epoch": 0.5483956842418453, "grad_norm": 1.1039026884354772, "learning_rate": 4.459874362182696e-06, "loss": 0.4495, "step": 8755 }, { "epoch": 0.5484583222411876, "grad_norm": 0.9369652866182835, "learning_rate": 4.458865910738988e-06, "loss": 0.4188, "step": 8756 }, { "epoch": 0.5485209602405299, "grad_norm": 0.8634630691605202, "learning_rate": 4.4578574815684594e-06, "loss": 0.4308, "step": 8757 }, { "epoch": 0.5485835982398722, "grad_norm": 0.6180715008543457, "learning_rate": 4.456849074712617e-06, "loss": 0.454, "step": 8758 }, { "epoch": 0.5486462362392145, "grad_norm": 0.8462215663666632, "learning_rate": 4.4558406902129676e-06, "loss": 0.4028, "step": 8759 }, { "epoch": 0.5487088742385569, "grad_norm": 0.7990003863561067, "learning_rate": 4.454832328111017e-06, "loss": 0.4056, "step": 8760 }, { "epoch": 0.5487715122378991, "grad_norm": 0.8417851641851641, "learning_rate": 4.453823988448267e-06, "loss": 0.3871, "step": 8761 }, { "epoch": 0.5488341502372415, "grad_norm": 0.7965953607893494, "learning_rate": 4.452815671266224e-06, "loss": 0.4232, "step": 8762 }, { "epoch": 0.5488967882365837, "grad_norm": 0.8254643297115859, "learning_rate": 4.45180737660639e-06, "loss": 0.4092, "step": 8763 }, { "epoch": 0.548959426235926, "grad_norm": 0.8589993484463168, "learning_rate": 4.450799104510266e-06, "loss": 0.4521, "step": 8764 }, { "epoch": 0.5490220642352683, "grad_norm": 0.8171111604888377, "learning_rate": 4.449790855019353e-06, "loss": 0.3989, "step": 8765 }, { "epoch": 0.5490847022346106, "grad_norm": 0.7662102279502758, "learning_rate": 4.44878262817515e-06, "loss": 0.3804, "step": 8766 }, { "epoch": 0.549147340233953, "grad_norm": 0.8112324928141962, "learning_rate": 4.447774424019157e-06, "loss": 0.4252, "step": 8767 }, { "epoch": 0.5492099782332952, "grad_norm": 0.784753264871901, "learning_rate": 4.446766242592872e-06, "loss": 0.3772, "step": 8768 }, { "epoch": 0.5492726162326376, "grad_norm": 0.8477352082175696, "learning_rate": 4.44575808393779e-06, "loss": 0.4144, "step": 8769 }, { "epoch": 0.5493352542319798, "grad_norm": 0.82156319511374, "learning_rate": 4.444749948095407e-06, "loss": 0.4084, "step": 8770 }, { "epoch": 0.5493978922313222, "grad_norm": 0.7868513018952876, "learning_rate": 4.443741835107219e-06, "loss": 0.3804, "step": 8771 }, { "epoch": 0.5494605302306644, "grad_norm": 0.7609558785458117, "learning_rate": 4.442733745014722e-06, "loss": 0.3492, "step": 8772 }, { "epoch": 0.5495231682300067, "grad_norm": 0.8504765806639322, "learning_rate": 4.441725677859407e-06, "loss": 0.4286, "step": 8773 }, { "epoch": 0.549585806229349, "grad_norm": 0.8651755673633245, "learning_rate": 4.440717633682768e-06, "loss": 0.3964, "step": 8774 }, { "epoch": 0.5496484442286913, "grad_norm": 0.7729948871953441, "learning_rate": 4.4397096125262935e-06, "loss": 0.3818, "step": 8775 }, { "epoch": 0.5497110822280337, "grad_norm": 0.7730306680175655, "learning_rate": 4.438701614431476e-06, "loss": 0.3512, "step": 8776 }, { "epoch": 0.5497737202273759, "grad_norm": 0.8406537252115255, "learning_rate": 4.437693639439804e-06, "loss": 0.4084, "step": 8777 }, { "epoch": 0.5498363582267183, "grad_norm": 0.8434581406798797, "learning_rate": 4.436685687592766e-06, "loss": 0.3984, "step": 8778 }, { "epoch": 0.5498989962260605, "grad_norm": 0.8671994137802351, "learning_rate": 4.435677758931849e-06, "loss": 0.4175, "step": 8779 }, { "epoch": 0.5499616342254029, "grad_norm": 0.8286847190288255, "learning_rate": 4.434669853498541e-06, "loss": 0.3822, "step": 8780 }, { "epoch": 0.5500242722247451, "grad_norm": 0.8895303619791296, "learning_rate": 4.433661971334326e-06, "loss": 0.4056, "step": 8781 }, { "epoch": 0.5500869102240874, "grad_norm": 0.7174618594689168, "learning_rate": 4.432654112480691e-06, "loss": 0.3562, "step": 8782 }, { "epoch": 0.5501495482234298, "grad_norm": 0.8523831987463145, "learning_rate": 4.431646276979118e-06, "loss": 0.3855, "step": 8783 }, { "epoch": 0.550212186222772, "grad_norm": 0.8730891780325573, "learning_rate": 4.430638464871088e-06, "loss": 0.3982, "step": 8784 }, { "epoch": 0.5502748242221144, "grad_norm": 0.7642049438637409, "learning_rate": 4.429630676198085e-06, "loss": 0.409, "step": 8785 }, { "epoch": 0.5503374622214566, "grad_norm": 0.8611154384369291, "learning_rate": 4.428622911001591e-06, "loss": 0.4003, "step": 8786 }, { "epoch": 0.550400100220799, "grad_norm": 0.8101619530545775, "learning_rate": 4.427615169323082e-06, "loss": 0.4178, "step": 8787 }, { "epoch": 0.5504627382201412, "grad_norm": 0.858638762563344, "learning_rate": 4.4266074512040385e-06, "loss": 0.4239, "step": 8788 }, { "epoch": 0.5505253762194835, "grad_norm": 0.9115879557326895, "learning_rate": 4.425599756685939e-06, "loss": 0.44, "step": 8789 }, { "epoch": 0.5505880142188259, "grad_norm": 0.9257866497022995, "learning_rate": 4.4245920858102615e-06, "loss": 0.4175, "step": 8790 }, { "epoch": 0.5506506522181681, "grad_norm": 0.6778372789655936, "learning_rate": 4.4235844386184806e-06, "loss": 0.4594, "step": 8791 }, { "epoch": 0.5507132902175105, "grad_norm": 0.775576758657971, "learning_rate": 4.42257681515207e-06, "loss": 0.4469, "step": 8792 }, { "epoch": 0.5507759282168527, "grad_norm": 0.8009381239420793, "learning_rate": 4.421569215452506e-06, "loss": 0.3761, "step": 8793 }, { "epoch": 0.5508385662161951, "grad_norm": 0.8610251038604615, "learning_rate": 4.42056163956126e-06, "loss": 0.4102, "step": 8794 }, { "epoch": 0.5509012042155373, "grad_norm": 0.7581608704002927, "learning_rate": 4.419554087519805e-06, "loss": 0.3713, "step": 8795 }, { "epoch": 0.5509638422148797, "grad_norm": 0.8064117551188036, "learning_rate": 4.41854655936961e-06, "loss": 0.3927, "step": 8796 }, { "epoch": 0.551026480214222, "grad_norm": 0.8158863347466037, "learning_rate": 4.417539055152145e-06, "loss": 0.3602, "step": 8797 }, { "epoch": 0.5510891182135642, "grad_norm": 0.857207174921926, "learning_rate": 4.416531574908884e-06, "loss": 0.435, "step": 8798 }, { "epoch": 0.5511517562129066, "grad_norm": 0.8122475783704319, "learning_rate": 4.4155241186812906e-06, "loss": 0.3759, "step": 8799 }, { "epoch": 0.5512143942122488, "grad_norm": 0.806960575421135, "learning_rate": 4.414516686510833e-06, "loss": 0.395, "step": 8800 }, { "epoch": 0.5512770322115912, "grad_norm": 0.8842976469449171, "learning_rate": 4.4135092784389775e-06, "loss": 0.393, "step": 8801 }, { "epoch": 0.5513396702109334, "grad_norm": 0.8121133691128163, "learning_rate": 4.412501894507188e-06, "loss": 0.4093, "step": 8802 }, { "epoch": 0.5514023082102758, "grad_norm": 0.8671912793404551, "learning_rate": 4.411494534756931e-06, "loss": 0.4658, "step": 8803 }, { "epoch": 0.5514649462096181, "grad_norm": 0.846800354933488, "learning_rate": 4.410487199229667e-06, "loss": 0.4482, "step": 8804 }, { "epoch": 0.5515275842089604, "grad_norm": 0.84561616375093, "learning_rate": 4.40947988796686e-06, "loss": 0.3622, "step": 8805 }, { "epoch": 0.5515902222083027, "grad_norm": 0.6915392549651753, "learning_rate": 4.408472601009971e-06, "loss": 0.4559, "step": 8806 }, { "epoch": 0.5516528602076449, "grad_norm": 0.7923789968265642, "learning_rate": 4.407465338400457e-06, "loss": 0.3782, "step": 8807 }, { "epoch": 0.5517154982069873, "grad_norm": 0.863038959872379, "learning_rate": 4.406458100179781e-06, "loss": 0.4386, "step": 8808 }, { "epoch": 0.5517781362063295, "grad_norm": 0.8470063366982831, "learning_rate": 4.405450886389401e-06, "loss": 0.4342, "step": 8809 }, { "epoch": 0.5518407742056719, "grad_norm": 0.7858528930620371, "learning_rate": 4.4044436970707714e-06, "loss": 0.4226, "step": 8810 }, { "epoch": 0.5519034122050142, "grad_norm": 0.7548632596406363, "learning_rate": 4.403436532265351e-06, "loss": 0.3823, "step": 8811 }, { "epoch": 0.5519660502043565, "grad_norm": 0.614726727433017, "learning_rate": 4.402429392014593e-06, "loss": 0.4582, "step": 8812 }, { "epoch": 0.5520286882036988, "grad_norm": 0.8228313708530004, "learning_rate": 4.4014222763599516e-06, "loss": 0.3828, "step": 8813 }, { "epoch": 0.552091326203041, "grad_norm": 0.8159195601419343, "learning_rate": 4.400415185342881e-06, "loss": 0.3887, "step": 8814 }, { "epoch": 0.5521539642023834, "grad_norm": 0.8069859853286084, "learning_rate": 4.399408119004833e-06, "loss": 0.4061, "step": 8815 }, { "epoch": 0.5522166022017256, "grad_norm": 0.6667763572874912, "learning_rate": 4.398401077387256e-06, "loss": 0.4655, "step": 8816 }, { "epoch": 0.552279240201068, "grad_norm": 0.8482454626720464, "learning_rate": 4.397394060531605e-06, "loss": 0.4102, "step": 8817 }, { "epoch": 0.5523418782004103, "grad_norm": 0.7687748588907338, "learning_rate": 4.396387068479325e-06, "loss": 0.4094, "step": 8818 }, { "epoch": 0.5524045161997526, "grad_norm": 0.8258935600410918, "learning_rate": 4.395380101271866e-06, "loss": 0.4251, "step": 8819 }, { "epoch": 0.5524671541990949, "grad_norm": 0.8363685152289603, "learning_rate": 4.394373158950674e-06, "loss": 0.4312, "step": 8820 }, { "epoch": 0.5525297921984372, "grad_norm": 0.8509015673016521, "learning_rate": 4.393366241557197e-06, "loss": 0.4151, "step": 8821 }, { "epoch": 0.5525924301977795, "grad_norm": 0.8707405310547339, "learning_rate": 4.392359349132874e-06, "loss": 0.4011, "step": 8822 }, { "epoch": 0.5526550681971217, "grad_norm": 0.8265185552655989, "learning_rate": 4.391352481719156e-06, "loss": 0.4015, "step": 8823 }, { "epoch": 0.5527177061964641, "grad_norm": 0.8417618277481987, "learning_rate": 4.390345639357484e-06, "loss": 0.3801, "step": 8824 }, { "epoch": 0.5527803441958064, "grad_norm": 0.8461606397584447, "learning_rate": 4.389338822089294e-06, "loss": 0.377, "step": 8825 }, { "epoch": 0.5528429821951487, "grad_norm": 0.8363292649202372, "learning_rate": 4.388332029956035e-06, "loss": 0.4183, "step": 8826 }, { "epoch": 0.552905620194491, "grad_norm": 0.8378407044956516, "learning_rate": 4.387325262999141e-06, "loss": 0.3675, "step": 8827 }, { "epoch": 0.5529682581938333, "grad_norm": 0.8534021685930547, "learning_rate": 4.386318521260056e-06, "loss": 0.4481, "step": 8828 }, { "epoch": 0.5530308961931756, "grad_norm": 0.8560692195674516, "learning_rate": 4.385311804780213e-06, "loss": 0.4328, "step": 8829 }, { "epoch": 0.5530935341925179, "grad_norm": 0.8524958861893095, "learning_rate": 4.3843051136010486e-06, "loss": 0.4285, "step": 8830 }, { "epoch": 0.5531561721918602, "grad_norm": 0.8721633438256171, "learning_rate": 4.3832984477640025e-06, "loss": 0.4009, "step": 8831 }, { "epoch": 0.5532188101912024, "grad_norm": 0.8312988998727714, "learning_rate": 4.382291807310506e-06, "loss": 0.3877, "step": 8832 }, { "epoch": 0.5532814481905448, "grad_norm": 0.7987481141487346, "learning_rate": 4.381285192281994e-06, "loss": 0.3888, "step": 8833 }, { "epoch": 0.5533440861898871, "grad_norm": 0.8953325808712677, "learning_rate": 4.380278602719895e-06, "loss": 0.3842, "step": 8834 }, { "epoch": 0.5534067241892294, "grad_norm": 0.8204572795839753, "learning_rate": 4.379272038665646e-06, "loss": 0.3832, "step": 8835 }, { "epoch": 0.5534693621885717, "grad_norm": 0.8469574353111705, "learning_rate": 4.378265500160675e-06, "loss": 0.3732, "step": 8836 }, { "epoch": 0.553532000187914, "grad_norm": 0.8257813581919773, "learning_rate": 4.377258987246412e-06, "loss": 0.4066, "step": 8837 }, { "epoch": 0.5535946381872563, "grad_norm": 0.8797394670334177, "learning_rate": 4.376252499964285e-06, "loss": 0.421, "step": 8838 }, { "epoch": 0.5536572761865985, "grad_norm": 0.620202706154314, "learning_rate": 4.3752460383557195e-06, "loss": 0.441, "step": 8839 }, { "epoch": 0.5537199141859409, "grad_norm": 0.9287154011994591, "learning_rate": 4.374239602462144e-06, "loss": 0.4299, "step": 8840 }, { "epoch": 0.5537825521852832, "grad_norm": 0.8367555653620719, "learning_rate": 4.373233192324983e-06, "loss": 0.3846, "step": 8841 }, { "epoch": 0.5538451901846255, "grad_norm": 0.84926903706584, "learning_rate": 4.37222680798566e-06, "loss": 0.4077, "step": 8842 }, { "epoch": 0.5539078281839678, "grad_norm": 0.8237091325153442, "learning_rate": 4.371220449485596e-06, "loss": 0.4311, "step": 8843 }, { "epoch": 0.5539704661833101, "grad_norm": 0.7486922831732652, "learning_rate": 4.370214116866216e-06, "loss": 0.3897, "step": 8844 }, { "epoch": 0.5540331041826524, "grad_norm": 0.8474162529441699, "learning_rate": 4.3692078101689405e-06, "loss": 0.4032, "step": 8845 }, { "epoch": 0.5540957421819948, "grad_norm": 0.7991510967950041, "learning_rate": 4.368201529435189e-06, "loss": 0.3711, "step": 8846 }, { "epoch": 0.554158380181337, "grad_norm": 0.8121861409051884, "learning_rate": 4.3671952747063805e-06, "loss": 0.4227, "step": 8847 }, { "epoch": 0.5542210181806793, "grad_norm": 0.9007755424515778, "learning_rate": 4.36618904602393e-06, "loss": 0.4157, "step": 8848 }, { "epoch": 0.5542836561800216, "grad_norm": 0.8425458447368818, "learning_rate": 4.365182843429257e-06, "loss": 0.429, "step": 8849 }, { "epoch": 0.5543462941793639, "grad_norm": 0.8424287113443708, "learning_rate": 4.3641766669637765e-06, "loss": 0.3748, "step": 8850 }, { "epoch": 0.5544089321787062, "grad_norm": 0.6787925690033931, "learning_rate": 4.363170516668901e-06, "loss": 0.4604, "step": 8851 }, { "epoch": 0.5544715701780485, "grad_norm": 0.8056913629317355, "learning_rate": 4.362164392586046e-06, "loss": 0.4099, "step": 8852 }, { "epoch": 0.5545342081773909, "grad_norm": 0.9170905949242301, "learning_rate": 4.361158294756621e-06, "loss": 0.3978, "step": 8853 }, { "epoch": 0.5545968461767331, "grad_norm": 0.8803360549457399, "learning_rate": 4.3601522232220414e-06, "loss": 0.4232, "step": 8854 }, { "epoch": 0.5546594841760755, "grad_norm": 0.8894763821011104, "learning_rate": 4.359146178023715e-06, "loss": 0.4149, "step": 8855 }, { "epoch": 0.5547221221754177, "grad_norm": 0.8484053009108277, "learning_rate": 4.35814015920305e-06, "loss": 0.4223, "step": 8856 }, { "epoch": 0.55478476017476, "grad_norm": 0.7807841464093069, "learning_rate": 4.3571341668014565e-06, "loss": 0.3932, "step": 8857 }, { "epoch": 0.5548473981741023, "grad_norm": 0.8539629400610177, "learning_rate": 4.35612820086034e-06, "loss": 0.4263, "step": 8858 }, { "epoch": 0.5549100361734446, "grad_norm": 0.625455880717446, "learning_rate": 4.355122261421106e-06, "loss": 0.4527, "step": 8859 }, { "epoch": 0.554972674172787, "grad_norm": 0.8673560558867687, "learning_rate": 4.354116348525158e-06, "loss": 0.4293, "step": 8860 }, { "epoch": 0.5550353121721292, "grad_norm": 0.8450604396131685, "learning_rate": 4.353110462213903e-06, "loss": 0.416, "step": 8861 }, { "epoch": 0.5550979501714716, "grad_norm": 0.785718698890534, "learning_rate": 4.352104602528738e-06, "loss": 0.3662, "step": 8862 }, { "epoch": 0.5551605881708138, "grad_norm": 0.8004545942476894, "learning_rate": 4.351098769511071e-06, "loss": 0.4069, "step": 8863 }, { "epoch": 0.5552232261701561, "grad_norm": 0.83837965711179, "learning_rate": 4.350092963202299e-06, "loss": 0.3838, "step": 8864 }, { "epoch": 0.5552858641694984, "grad_norm": 0.8043836788642341, "learning_rate": 4.34908718364382e-06, "loss": 0.4006, "step": 8865 }, { "epoch": 0.5553485021688407, "grad_norm": 0.8401974257919825, "learning_rate": 4.348081430877034e-06, "loss": 0.3756, "step": 8866 }, { "epoch": 0.555411140168183, "grad_norm": 0.8030993033489529, "learning_rate": 4.3470757049433374e-06, "loss": 0.395, "step": 8867 }, { "epoch": 0.5554737781675253, "grad_norm": 0.8329897737397362, "learning_rate": 4.346070005884126e-06, "loss": 0.4035, "step": 8868 }, { "epoch": 0.5555364161668677, "grad_norm": 0.747957268107343, "learning_rate": 4.345064333740794e-06, "loss": 0.3626, "step": 8869 }, { "epoch": 0.5555990541662099, "grad_norm": 0.8587767417211878, "learning_rate": 4.344058688554737e-06, "loss": 0.383, "step": 8870 }, { "epoch": 0.5556616921655523, "grad_norm": 0.888416706393778, "learning_rate": 4.343053070367342e-06, "loss": 0.4435, "step": 8871 }, { "epoch": 0.5557243301648945, "grad_norm": 0.8337335876001202, "learning_rate": 4.342047479220008e-06, "loss": 0.4195, "step": 8872 }, { "epoch": 0.5557869681642368, "grad_norm": 0.8310951030616605, "learning_rate": 4.341041915154123e-06, "loss": 0.3925, "step": 8873 }, { "epoch": 0.5558496061635791, "grad_norm": 0.797903168227397, "learning_rate": 4.3400363782110726e-06, "loss": 0.3623, "step": 8874 }, { "epoch": 0.5559122441629214, "grad_norm": 0.8915419499591158, "learning_rate": 4.339030868432249e-06, "loss": 0.4086, "step": 8875 }, { "epoch": 0.5559748821622638, "grad_norm": 0.8592020431000599, "learning_rate": 4.338025385859036e-06, "loss": 0.424, "step": 8876 }, { "epoch": 0.556037520161606, "grad_norm": 0.8720707072291513, "learning_rate": 4.337019930532822e-06, "loss": 0.3964, "step": 8877 }, { "epoch": 0.5561001581609484, "grad_norm": 0.8524034376029885, "learning_rate": 4.336014502494991e-06, "loss": 0.3893, "step": 8878 }, { "epoch": 0.5561627961602906, "grad_norm": 1.01460841446399, "learning_rate": 4.3350091017869265e-06, "loss": 0.4146, "step": 8879 }, { "epoch": 0.556225434159633, "grad_norm": 0.8752960827196645, "learning_rate": 4.334003728450008e-06, "loss": 0.4031, "step": 8880 }, { "epoch": 0.5562880721589752, "grad_norm": 0.8220877241363093, "learning_rate": 4.332998382525622e-06, "loss": 0.3741, "step": 8881 }, { "epoch": 0.5563507101583175, "grad_norm": 0.8287098983566771, "learning_rate": 4.331993064055146e-06, "loss": 0.4025, "step": 8882 }, { "epoch": 0.5564133481576599, "grad_norm": 0.8566036948241738, "learning_rate": 4.33098777307996e-06, "loss": 0.3973, "step": 8883 }, { "epoch": 0.5564759861570021, "grad_norm": 0.9072245507454003, "learning_rate": 4.329982509641442e-06, "loss": 0.4405, "step": 8884 }, { "epoch": 0.5565386241563445, "grad_norm": 0.7878760315139149, "learning_rate": 4.328977273780967e-06, "loss": 0.377, "step": 8885 }, { "epoch": 0.5566012621556867, "grad_norm": 0.8418760407582627, "learning_rate": 4.327972065539912e-06, "loss": 0.3698, "step": 8886 }, { "epoch": 0.5566639001550291, "grad_norm": 0.8258474278302255, "learning_rate": 4.326966884959653e-06, "loss": 0.3573, "step": 8887 }, { "epoch": 0.5567265381543713, "grad_norm": 0.8081291380361365, "learning_rate": 4.32596173208156e-06, "loss": 0.3855, "step": 8888 }, { "epoch": 0.5567891761537137, "grad_norm": 0.8640484230613181, "learning_rate": 4.324956606947007e-06, "loss": 0.3983, "step": 8889 }, { "epoch": 0.556851814153056, "grad_norm": 0.8501599580320645, "learning_rate": 4.323951509597366e-06, "loss": 0.4128, "step": 8890 }, { "epoch": 0.5569144521523982, "grad_norm": 0.9396763308155353, "learning_rate": 4.322946440074007e-06, "loss": 0.423, "step": 8891 }, { "epoch": 0.5569770901517406, "grad_norm": 0.9068851056933501, "learning_rate": 4.321941398418298e-06, "loss": 0.3906, "step": 8892 }, { "epoch": 0.5570397281510828, "grad_norm": 0.8031898678527488, "learning_rate": 4.320936384671607e-06, "loss": 0.364, "step": 8893 }, { "epoch": 0.5571023661504252, "grad_norm": 0.8605307725117637, "learning_rate": 4.319931398875301e-06, "loss": 0.4021, "step": 8894 }, { "epoch": 0.5571650041497674, "grad_norm": 0.8297527264787296, "learning_rate": 4.318926441070743e-06, "loss": 0.4354, "step": 8895 }, { "epoch": 0.5572276421491098, "grad_norm": 0.8801912287912068, "learning_rate": 4.3179215112993e-06, "loss": 0.4266, "step": 8896 }, { "epoch": 0.557290280148452, "grad_norm": 0.7916290741802737, "learning_rate": 4.316916609602335e-06, "loss": 0.401, "step": 8897 }, { "epoch": 0.5573529181477943, "grad_norm": 0.846472520389119, "learning_rate": 4.315911736021208e-06, "loss": 0.4161, "step": 8898 }, { "epoch": 0.5574155561471367, "grad_norm": 0.847841319452227, "learning_rate": 4.314906890597281e-06, "loss": 0.3904, "step": 8899 }, { "epoch": 0.5574781941464789, "grad_norm": 0.850044801842975, "learning_rate": 4.313902073371913e-06, "loss": 0.4157, "step": 8900 }, { "epoch": 0.5575408321458213, "grad_norm": 0.8544290406696509, "learning_rate": 4.312897284386464e-06, "loss": 0.4227, "step": 8901 }, { "epoch": 0.5576034701451635, "grad_norm": 0.8633695750405825, "learning_rate": 4.311892523682292e-06, "loss": 0.3918, "step": 8902 }, { "epoch": 0.5576661081445059, "grad_norm": 0.7787183327341837, "learning_rate": 4.310887791300748e-06, "loss": 0.3771, "step": 8903 }, { "epoch": 0.5577287461438482, "grad_norm": 0.8298417415018248, "learning_rate": 4.309883087283193e-06, "loss": 0.3851, "step": 8904 }, { "epoch": 0.5577913841431905, "grad_norm": 0.8630839124505457, "learning_rate": 4.308878411670978e-06, "loss": 0.4375, "step": 8905 }, { "epoch": 0.5578540221425328, "grad_norm": 0.8228516126025475, "learning_rate": 4.307873764505456e-06, "loss": 0.399, "step": 8906 }, { "epoch": 0.557916660141875, "grad_norm": 0.7636333513305394, "learning_rate": 4.306869145827977e-06, "loss": 0.3549, "step": 8907 }, { "epoch": 0.5579792981412174, "grad_norm": 0.8376629621758838, "learning_rate": 4.3058645556798905e-06, "loss": 0.4137, "step": 8908 }, { "epoch": 0.5580419361405596, "grad_norm": 0.7346850812716104, "learning_rate": 4.304859994102551e-06, "loss": 0.345, "step": 8909 }, { "epoch": 0.558104574139902, "grad_norm": 0.811745904410283, "learning_rate": 4.303855461137303e-06, "loss": 0.3908, "step": 8910 }, { "epoch": 0.5581672121392443, "grad_norm": 0.8501930243715233, "learning_rate": 4.302850956825493e-06, "loss": 0.4041, "step": 8911 }, { "epoch": 0.5582298501385866, "grad_norm": 0.7745382756901013, "learning_rate": 4.301846481208466e-06, "loss": 0.4003, "step": 8912 }, { "epoch": 0.5582924881379289, "grad_norm": 0.8245631729582189, "learning_rate": 4.300842034327569e-06, "loss": 0.3518, "step": 8913 }, { "epoch": 0.5583551261372712, "grad_norm": 0.8290656252150235, "learning_rate": 4.299837616224144e-06, "loss": 0.4037, "step": 8914 }, { "epoch": 0.5584177641366135, "grad_norm": 0.7911768064451823, "learning_rate": 4.298833226939531e-06, "loss": 0.3622, "step": 8915 }, { "epoch": 0.5584804021359557, "grad_norm": 0.8056336827607878, "learning_rate": 4.297828866515073e-06, "loss": 0.4149, "step": 8916 }, { "epoch": 0.5585430401352981, "grad_norm": 0.8573964978222457, "learning_rate": 4.296824534992108e-06, "loss": 0.4077, "step": 8917 }, { "epoch": 0.5586056781346403, "grad_norm": 0.8403239127561999, "learning_rate": 4.295820232411978e-06, "loss": 0.4097, "step": 8918 }, { "epoch": 0.5586683161339827, "grad_norm": 0.8164003771207038, "learning_rate": 4.294815958816017e-06, "loss": 0.414, "step": 8919 }, { "epoch": 0.558730954133325, "grad_norm": 0.8296211319289342, "learning_rate": 4.293811714245562e-06, "loss": 0.4419, "step": 8920 }, { "epoch": 0.5587935921326673, "grad_norm": 0.8126781383546376, "learning_rate": 4.292807498741948e-06, "loss": 0.4017, "step": 8921 }, { "epoch": 0.5588562301320096, "grad_norm": 0.7901943852430922, "learning_rate": 4.291803312346509e-06, "loss": 0.4245, "step": 8922 }, { "epoch": 0.5589188681313518, "grad_norm": 0.7828384404203131, "learning_rate": 4.290799155100577e-06, "loss": 0.4264, "step": 8923 }, { "epoch": 0.5589815061306942, "grad_norm": 0.7898492068696185, "learning_rate": 4.289795027045482e-06, "loss": 0.38, "step": 8924 }, { "epoch": 0.5590441441300364, "grad_norm": 0.7774481174283694, "learning_rate": 4.288790928222557e-06, "loss": 0.367, "step": 8925 }, { "epoch": 0.5591067821293788, "grad_norm": 0.8609254414132054, "learning_rate": 4.287786858673127e-06, "loss": 0.442, "step": 8926 }, { "epoch": 0.5591694201287211, "grad_norm": 0.9310645388924375, "learning_rate": 4.286782818438525e-06, "loss": 0.4586, "step": 8927 }, { "epoch": 0.5592320581280634, "grad_norm": 0.7942532033157346, "learning_rate": 4.285778807560074e-06, "loss": 0.3785, "step": 8928 }, { "epoch": 0.5592946961274057, "grad_norm": 0.6626434236879376, "learning_rate": 4.284774826079099e-06, "loss": 0.4642, "step": 8929 }, { "epoch": 0.559357334126748, "grad_norm": 0.8061063490734179, "learning_rate": 4.283770874036925e-06, "loss": 0.4261, "step": 8930 }, { "epoch": 0.5594199721260903, "grad_norm": 0.8549865513228989, "learning_rate": 4.282766951474876e-06, "loss": 0.3729, "step": 8931 }, { "epoch": 0.5594826101254325, "grad_norm": 0.8119693671084341, "learning_rate": 4.281763058434272e-06, "loss": 0.3927, "step": 8932 }, { "epoch": 0.5595452481247749, "grad_norm": 0.7926083624532693, "learning_rate": 4.280759194956433e-06, "loss": 0.3873, "step": 8933 }, { "epoch": 0.5596078861241172, "grad_norm": 0.8580109084862262, "learning_rate": 4.27975536108268e-06, "loss": 0.3941, "step": 8934 }, { "epoch": 0.5596705241234595, "grad_norm": 0.9118540385135707, "learning_rate": 4.278751556854327e-06, "loss": 0.4185, "step": 8935 }, { "epoch": 0.5597331621228018, "grad_norm": 0.8838124795049036, "learning_rate": 4.277747782312697e-06, "loss": 0.4155, "step": 8936 }, { "epoch": 0.5597958001221441, "grad_norm": 0.8494957262935877, "learning_rate": 4.276744037499102e-06, "loss": 0.4135, "step": 8937 }, { "epoch": 0.5598584381214864, "grad_norm": 0.6493866667079219, "learning_rate": 4.275740322454856e-06, "loss": 0.4303, "step": 8938 }, { "epoch": 0.5599210761208288, "grad_norm": 0.8495557728171185, "learning_rate": 4.2747366372212735e-06, "loss": 0.4072, "step": 8939 }, { "epoch": 0.559983714120171, "grad_norm": 0.7905104268841443, "learning_rate": 4.273732981839666e-06, "loss": 0.3739, "step": 8940 }, { "epoch": 0.5600463521195133, "grad_norm": 0.7873868246250928, "learning_rate": 4.272729356351342e-06, "loss": 0.3773, "step": 8941 }, { "epoch": 0.5601089901188556, "grad_norm": 0.841889091546919, "learning_rate": 4.271725760797614e-06, "loss": 0.3765, "step": 8942 }, { "epoch": 0.5601716281181979, "grad_norm": 0.9225513394890702, "learning_rate": 4.270722195219789e-06, "loss": 0.3766, "step": 8943 }, { "epoch": 0.5602342661175402, "grad_norm": 0.812170579006237, "learning_rate": 4.269718659659174e-06, "loss": 0.3744, "step": 8944 }, { "epoch": 0.5602969041168825, "grad_norm": 0.8788981492227589, "learning_rate": 4.268715154157071e-06, "loss": 0.4068, "step": 8945 }, { "epoch": 0.5603595421162249, "grad_norm": 0.9122530202080029, "learning_rate": 4.26771167875479e-06, "loss": 0.4042, "step": 8946 }, { "epoch": 0.5604221801155671, "grad_norm": 0.8871327684502051, "learning_rate": 4.266708233493633e-06, "loss": 0.4023, "step": 8947 }, { "epoch": 0.5604848181149094, "grad_norm": 0.7926108400389431, "learning_rate": 4.265704818414902e-06, "loss": 0.3887, "step": 8948 }, { "epoch": 0.5605474561142517, "grad_norm": 0.8587792016791875, "learning_rate": 4.264701433559897e-06, "loss": 0.3927, "step": 8949 }, { "epoch": 0.560610094113594, "grad_norm": 0.8213471993670857, "learning_rate": 4.263698078969916e-06, "loss": 0.4079, "step": 8950 }, { "epoch": 0.5606727321129363, "grad_norm": 0.8346638117294238, "learning_rate": 4.262694754686259e-06, "loss": 0.403, "step": 8951 }, { "epoch": 0.5607353701122786, "grad_norm": 0.8079861238483157, "learning_rate": 4.261691460750223e-06, "loss": 0.3864, "step": 8952 }, { "epoch": 0.560798008111621, "grad_norm": 0.7619154083263758, "learning_rate": 4.260688197203105e-06, "loss": 0.363, "step": 8953 }, { "epoch": 0.5608606461109632, "grad_norm": 0.8414542425518661, "learning_rate": 4.259684964086195e-06, "loss": 0.3502, "step": 8954 }, { "epoch": 0.5609232841103056, "grad_norm": 0.8555485472104805, "learning_rate": 4.25868176144079e-06, "loss": 0.3787, "step": 8955 }, { "epoch": 0.5609859221096478, "grad_norm": 0.8837702150128155, "learning_rate": 4.257678589308183e-06, "loss": 0.3966, "step": 8956 }, { "epoch": 0.5610485601089901, "grad_norm": 0.8625706178132027, "learning_rate": 4.256675447729662e-06, "loss": 0.4097, "step": 8957 }, { "epoch": 0.5611111981083324, "grad_norm": 0.8222954908086152, "learning_rate": 4.255672336746518e-06, "loss": 0.391, "step": 8958 }, { "epoch": 0.5611738361076747, "grad_norm": 0.8254212553859952, "learning_rate": 4.254669256400039e-06, "loss": 0.4032, "step": 8959 }, { "epoch": 0.561236474107017, "grad_norm": 0.7719832568013405, "learning_rate": 4.253666206731511e-06, "loss": 0.3982, "step": 8960 }, { "epoch": 0.5612991121063593, "grad_norm": 0.8306324234249766, "learning_rate": 4.252663187782221e-06, "loss": 0.4145, "step": 8961 }, { "epoch": 0.5613617501057017, "grad_norm": 0.8021509220655104, "learning_rate": 4.251660199593453e-06, "loss": 0.4272, "step": 8962 }, { "epoch": 0.5614243881050439, "grad_norm": 0.6660566686626523, "learning_rate": 4.250657242206488e-06, "loss": 0.4592, "step": 8963 }, { "epoch": 0.5614870261043863, "grad_norm": 0.8240112126934138, "learning_rate": 4.2496543156626116e-06, "loss": 0.3915, "step": 8964 }, { "epoch": 0.5615496641037285, "grad_norm": 0.7484139308207889, "learning_rate": 4.2486514200031036e-06, "loss": 0.3551, "step": 8965 }, { "epoch": 0.5616123021030708, "grad_norm": 0.778466893847344, "learning_rate": 4.247648555269242e-06, "loss": 0.3711, "step": 8966 }, { "epoch": 0.5616749401024131, "grad_norm": 0.8997811158176471, "learning_rate": 4.246645721502305e-06, "loss": 0.4609, "step": 8967 }, { "epoch": 0.5617375781017554, "grad_norm": 0.7933645603526323, "learning_rate": 4.245642918743571e-06, "loss": 0.3768, "step": 8968 }, { "epoch": 0.5618002161010978, "grad_norm": 0.8322255082564521, "learning_rate": 4.244640147034314e-06, "loss": 0.3551, "step": 8969 }, { "epoch": 0.56186285410044, "grad_norm": 0.9180942519647391, "learning_rate": 4.243637406415809e-06, "loss": 0.3938, "step": 8970 }, { "epoch": 0.5619254920997824, "grad_norm": 0.8221396703547958, "learning_rate": 4.2426346969293285e-06, "loss": 0.4209, "step": 8971 }, { "epoch": 0.5619881300991246, "grad_norm": 0.832977165663261, "learning_rate": 4.241632018616142e-06, "loss": 0.3818, "step": 8972 }, { "epoch": 0.5620507680984669, "grad_norm": 0.8333768814030588, "learning_rate": 4.240629371517525e-06, "loss": 0.4273, "step": 8973 }, { "epoch": 0.5621134060978092, "grad_norm": 0.8886107956841365, "learning_rate": 4.2396267556747445e-06, "loss": 0.4073, "step": 8974 }, { "epoch": 0.5621760440971515, "grad_norm": 0.8950602280476848, "learning_rate": 4.2386241711290676e-06, "loss": 0.4647, "step": 8975 }, { "epoch": 0.5622386820964939, "grad_norm": 0.8607882373135369, "learning_rate": 4.23762161792176e-06, "loss": 0.3915, "step": 8976 }, { "epoch": 0.5623013200958361, "grad_norm": 0.8772104684856662, "learning_rate": 4.236619096094089e-06, "loss": 0.3539, "step": 8977 }, { "epoch": 0.5623639580951785, "grad_norm": 0.8806165296870622, "learning_rate": 4.235616605687318e-06, "loss": 0.4085, "step": 8978 }, { "epoch": 0.5624265960945207, "grad_norm": 0.8075084667479057, "learning_rate": 4.234614146742709e-06, "loss": 0.3936, "step": 8979 }, { "epoch": 0.5624892340938631, "grad_norm": 0.8771080909165959, "learning_rate": 4.233611719301523e-06, "loss": 0.4192, "step": 8980 }, { "epoch": 0.5625518720932053, "grad_norm": 0.6660268974239746, "learning_rate": 4.232609323405022e-06, "loss": 0.4463, "step": 8981 }, { "epoch": 0.5626145100925476, "grad_norm": 0.877757904616518, "learning_rate": 4.231606959094462e-06, "loss": 0.4488, "step": 8982 }, { "epoch": 0.56267714809189, "grad_norm": 0.8662825842123826, "learning_rate": 4.2306046264111035e-06, "loss": 0.3776, "step": 8983 }, { "epoch": 0.5627397860912322, "grad_norm": 0.850716440429454, "learning_rate": 4.229602325396201e-06, "loss": 0.3676, "step": 8984 }, { "epoch": 0.5628024240905746, "grad_norm": 0.851763877062701, "learning_rate": 4.22860005609101e-06, "loss": 0.4445, "step": 8985 }, { "epoch": 0.5628650620899168, "grad_norm": 0.8247387772461785, "learning_rate": 4.227597818536784e-06, "loss": 0.3949, "step": 8986 }, { "epoch": 0.5629277000892592, "grad_norm": 0.800712448025856, "learning_rate": 4.226595612774775e-06, "loss": 0.3963, "step": 8987 }, { "epoch": 0.5629903380886014, "grad_norm": 0.8677203342342891, "learning_rate": 4.225593438846234e-06, "loss": 0.3836, "step": 8988 }, { "epoch": 0.5630529760879438, "grad_norm": 0.841857837905872, "learning_rate": 4.224591296792411e-06, "loss": 0.3848, "step": 8989 }, { "epoch": 0.563115614087286, "grad_norm": 0.6001350412563271, "learning_rate": 4.223589186654555e-06, "loss": 0.444, "step": 8990 }, { "epoch": 0.5631782520866283, "grad_norm": 0.6469290450157653, "learning_rate": 4.222587108473909e-06, "loss": 0.4546, "step": 8991 }, { "epoch": 0.5632408900859707, "grad_norm": 0.821100092864716, "learning_rate": 4.2215850622917255e-06, "loss": 0.3858, "step": 8992 }, { "epoch": 0.5633035280853129, "grad_norm": 0.828132127755665, "learning_rate": 4.220583048149243e-06, "loss": 0.4099, "step": 8993 }, { "epoch": 0.5633661660846553, "grad_norm": 0.817283646307675, "learning_rate": 4.2195810660877085e-06, "loss": 0.3707, "step": 8994 }, { "epoch": 0.5634288040839975, "grad_norm": 0.6799324986259885, "learning_rate": 4.218579116148362e-06, "loss": 0.4599, "step": 8995 }, { "epoch": 0.5634914420833399, "grad_norm": 0.898000152829016, "learning_rate": 4.217577198372444e-06, "loss": 0.4441, "step": 8996 }, { "epoch": 0.5635540800826822, "grad_norm": 0.7956594657001702, "learning_rate": 4.216575312801193e-06, "loss": 0.3727, "step": 8997 }, { "epoch": 0.5636167180820245, "grad_norm": 0.5909556095479785, "learning_rate": 4.215573459475848e-06, "loss": 0.4384, "step": 8998 }, { "epoch": 0.5636793560813668, "grad_norm": 0.8292295452850293, "learning_rate": 4.214571638437646e-06, "loss": 0.3551, "step": 8999 }, { "epoch": 0.563741994080709, "grad_norm": 0.7967709276382278, "learning_rate": 4.213569849727818e-06, "loss": 0.3688, "step": 9000 }, { "epoch": 0.5638046320800514, "grad_norm": 0.8482496193766773, "learning_rate": 4.2125680933876034e-06, "loss": 0.3682, "step": 9001 }, { "epoch": 0.5638672700793936, "grad_norm": 0.7377651788576776, "learning_rate": 4.211566369458231e-06, "loss": 0.3694, "step": 9002 }, { "epoch": 0.563929908078736, "grad_norm": 0.8906190227863341, "learning_rate": 4.210564677980935e-06, "loss": 0.4394, "step": 9003 }, { "epoch": 0.5639925460780782, "grad_norm": 0.8775181552274232, "learning_rate": 4.209563018996943e-06, "loss": 0.4078, "step": 9004 }, { "epoch": 0.5640551840774206, "grad_norm": 0.8543795693568778, "learning_rate": 4.208561392547483e-06, "loss": 0.3709, "step": 9005 }, { "epoch": 0.5641178220767629, "grad_norm": 0.7725985391771746, "learning_rate": 4.207559798673784e-06, "loss": 0.4106, "step": 9006 }, { "epoch": 0.5641804600761051, "grad_norm": 0.8558869271120437, "learning_rate": 4.20655823741707e-06, "loss": 0.3926, "step": 9007 }, { "epoch": 0.5642430980754475, "grad_norm": 0.8076914515337426, "learning_rate": 4.205556708818567e-06, "loss": 0.3934, "step": 9008 }, { "epoch": 0.5643057360747897, "grad_norm": 0.8159237182253515, "learning_rate": 4.204555212919494e-06, "loss": 0.4248, "step": 9009 }, { "epoch": 0.5643683740741321, "grad_norm": 0.856631519307626, "learning_rate": 4.203553749761079e-06, "loss": 0.3734, "step": 9010 }, { "epoch": 0.5644310120734743, "grad_norm": 0.7821386615030638, "learning_rate": 4.202552319384539e-06, "loss": 0.4017, "step": 9011 }, { "epoch": 0.5644936500728167, "grad_norm": 0.854476566645593, "learning_rate": 4.201550921831094e-06, "loss": 0.384, "step": 9012 }, { "epoch": 0.564556288072159, "grad_norm": 0.7655070762257632, "learning_rate": 4.200549557141962e-06, "loss": 0.3777, "step": 9013 }, { "epoch": 0.5646189260715013, "grad_norm": 0.7721076374414685, "learning_rate": 4.199548225358357e-06, "loss": 0.3771, "step": 9014 }, { "epoch": 0.5646815640708436, "grad_norm": 0.8094753013057944, "learning_rate": 4.198546926521496e-06, "loss": 0.4074, "step": 9015 }, { "epoch": 0.5647442020701858, "grad_norm": 0.7940037027024056, "learning_rate": 4.197545660672593e-06, "loss": 0.3591, "step": 9016 }, { "epoch": 0.5648068400695282, "grad_norm": 0.8109560740933962, "learning_rate": 4.1965444278528596e-06, "loss": 0.4219, "step": 9017 }, { "epoch": 0.5648694780688704, "grad_norm": 0.8160078696197399, "learning_rate": 4.1955432281035045e-06, "loss": 0.4022, "step": 9018 }, { "epoch": 0.5649321160682128, "grad_norm": 0.7826220139099365, "learning_rate": 4.1945420614657405e-06, "loss": 0.3541, "step": 9019 }, { "epoch": 0.5649947540675551, "grad_norm": 0.8975085830502799, "learning_rate": 4.193540927980776e-06, "loss": 0.3481, "step": 9020 }, { "epoch": 0.5650573920668974, "grad_norm": 0.8975207007780642, "learning_rate": 4.192539827689816e-06, "loss": 0.4366, "step": 9021 }, { "epoch": 0.5651200300662397, "grad_norm": 0.7731238195339272, "learning_rate": 4.1915387606340664e-06, "loss": 0.3587, "step": 9022 }, { "epoch": 0.565182668065582, "grad_norm": 0.7835964017016582, "learning_rate": 4.190537726854731e-06, "loss": 0.3828, "step": 9023 }, { "epoch": 0.5652453060649243, "grad_norm": 0.7911796777236286, "learning_rate": 4.189536726393014e-06, "loss": 0.3733, "step": 9024 }, { "epoch": 0.5653079440642665, "grad_norm": 0.8859474615576641, "learning_rate": 4.188535759290116e-06, "loss": 0.3768, "step": 9025 }, { "epoch": 0.5653705820636089, "grad_norm": 0.6337596044941708, "learning_rate": 4.187534825587236e-06, "loss": 0.4516, "step": 9026 }, { "epoch": 0.5654332200629512, "grad_norm": 0.8181425502357973, "learning_rate": 4.186533925325573e-06, "loss": 0.3875, "step": 9027 }, { "epoch": 0.5654958580622935, "grad_norm": 0.8649233531374786, "learning_rate": 4.185533058546324e-06, "loss": 0.425, "step": 9028 }, { "epoch": 0.5655584960616358, "grad_norm": 0.866278382519741, "learning_rate": 4.184532225290687e-06, "loss": 0.4014, "step": 9029 }, { "epoch": 0.5656211340609781, "grad_norm": 0.6514685518883522, "learning_rate": 4.183531425599855e-06, "loss": 0.4545, "step": 9030 }, { "epoch": 0.5656837720603204, "grad_norm": 0.853948094248871, "learning_rate": 4.1825306595150195e-06, "loss": 0.4195, "step": 9031 }, { "epoch": 0.5657464100596626, "grad_norm": 0.7213422080767339, "learning_rate": 4.181529927077376e-06, "loss": 0.4705, "step": 9032 }, { "epoch": 0.565809048059005, "grad_norm": 0.880970898417182, "learning_rate": 4.1805292283281115e-06, "loss": 0.3881, "step": 9033 }, { "epoch": 0.5658716860583473, "grad_norm": 0.7685229676712284, "learning_rate": 4.179528563308416e-06, "loss": 0.3602, "step": 9034 }, { "epoch": 0.5659343240576896, "grad_norm": 0.8829839253910727, "learning_rate": 4.178527932059477e-06, "loss": 0.3786, "step": 9035 }, { "epoch": 0.5659969620570319, "grad_norm": 0.8326659347015134, "learning_rate": 4.177527334622481e-06, "loss": 0.4167, "step": 9036 }, { "epoch": 0.5660596000563742, "grad_norm": 0.8441953255129491, "learning_rate": 4.176526771038611e-06, "loss": 0.3838, "step": 9037 }, { "epoch": 0.5661222380557165, "grad_norm": 0.6124373263320664, "learning_rate": 4.175526241349054e-06, "loss": 0.4366, "step": 9038 }, { "epoch": 0.5661848760550588, "grad_norm": 0.8336924001943044, "learning_rate": 4.174525745594989e-06, "loss": 0.4292, "step": 9039 }, { "epoch": 0.5662475140544011, "grad_norm": 0.832521388875729, "learning_rate": 4.173525283817598e-06, "loss": 0.3882, "step": 9040 }, { "epoch": 0.5663101520537434, "grad_norm": 0.818232123544067, "learning_rate": 4.17252485605806e-06, "loss": 0.4007, "step": 9041 }, { "epoch": 0.5663727900530857, "grad_norm": 0.8706118572510553, "learning_rate": 4.171524462357553e-06, "loss": 0.4, "step": 9042 }, { "epoch": 0.566435428052428, "grad_norm": 0.8395603424772847, "learning_rate": 4.170524102757252e-06, "loss": 0.4259, "step": 9043 }, { "epoch": 0.5664980660517703, "grad_norm": 0.8894226498664206, "learning_rate": 4.1695237772983324e-06, "loss": 0.4091, "step": 9044 }, { "epoch": 0.5665607040511126, "grad_norm": 0.7729933431528359, "learning_rate": 4.168523486021969e-06, "loss": 0.3887, "step": 9045 }, { "epoch": 0.566623342050455, "grad_norm": 0.7735134998404896, "learning_rate": 4.167523228969331e-06, "loss": 0.4094, "step": 9046 }, { "epoch": 0.5666859800497972, "grad_norm": 0.8222126195372413, "learning_rate": 4.166523006181594e-06, "loss": 0.3815, "step": 9047 }, { "epoch": 0.5667486180491396, "grad_norm": 0.8349738730174896, "learning_rate": 4.165522817699925e-06, "loss": 0.397, "step": 9048 }, { "epoch": 0.5668112560484818, "grad_norm": 0.7237469638758185, "learning_rate": 4.1645226635654905e-06, "loss": 0.4951, "step": 9049 }, { "epoch": 0.5668738940478241, "grad_norm": 0.8571360589361725, "learning_rate": 4.16352254381946e-06, "loss": 0.4312, "step": 9050 }, { "epoch": 0.5669365320471664, "grad_norm": 0.8519413415472317, "learning_rate": 4.162522458502995e-06, "loss": 0.3568, "step": 9051 }, { "epoch": 0.5669991700465087, "grad_norm": 0.8272698106484035, "learning_rate": 4.161522407657261e-06, "loss": 0.4036, "step": 9052 }, { "epoch": 0.567061808045851, "grad_norm": 0.8947769148412213, "learning_rate": 4.1605223913234216e-06, "loss": 0.3979, "step": 9053 }, { "epoch": 0.5671244460451933, "grad_norm": 0.8236390446586193, "learning_rate": 4.159522409542636e-06, "loss": 0.414, "step": 9054 }, { "epoch": 0.5671870840445357, "grad_norm": 0.7952101978079202, "learning_rate": 4.158522462356062e-06, "loss": 0.4193, "step": 9055 }, { "epoch": 0.5672497220438779, "grad_norm": 0.8397563493239961, "learning_rate": 4.157522549804862e-06, "loss": 0.3819, "step": 9056 }, { "epoch": 0.5673123600432202, "grad_norm": 0.7948840343630229, "learning_rate": 4.156522671930188e-06, "loss": 0.3972, "step": 9057 }, { "epoch": 0.5673749980425625, "grad_norm": 0.8460380660428998, "learning_rate": 4.155522828773199e-06, "loss": 0.3651, "step": 9058 }, { "epoch": 0.5674376360419048, "grad_norm": 0.8023750216264258, "learning_rate": 4.154523020375048e-06, "loss": 0.4116, "step": 9059 }, { "epoch": 0.5675002740412471, "grad_norm": 0.8437878151502904, "learning_rate": 4.153523246776887e-06, "loss": 0.3898, "step": 9060 }, { "epoch": 0.5675629120405894, "grad_norm": 0.8579137805658927, "learning_rate": 4.1525235080198635e-06, "loss": 0.4303, "step": 9061 }, { "epoch": 0.5676255500399318, "grad_norm": 0.7832550765003957, "learning_rate": 4.151523804145131e-06, "loss": 0.3948, "step": 9062 }, { "epoch": 0.567688188039274, "grad_norm": 0.7600768275962893, "learning_rate": 4.150524135193837e-06, "loss": 0.3423, "step": 9063 }, { "epoch": 0.5677508260386164, "grad_norm": 0.8549728652754164, "learning_rate": 4.149524501207125e-06, "loss": 0.4116, "step": 9064 }, { "epoch": 0.5678134640379586, "grad_norm": 0.8036085812873484, "learning_rate": 4.148524902226144e-06, "loss": 0.3679, "step": 9065 }, { "epoch": 0.5678761020373009, "grad_norm": 0.8663898434516495, "learning_rate": 4.147525338292036e-06, "loss": 0.3962, "step": 9066 }, { "epoch": 0.5679387400366432, "grad_norm": 0.8380988591348376, "learning_rate": 4.1465258094459425e-06, "loss": 0.4088, "step": 9067 }, { "epoch": 0.5680013780359855, "grad_norm": 0.8245446411571812, "learning_rate": 4.145526315729007e-06, "loss": 0.4009, "step": 9068 }, { "epoch": 0.5680640160353279, "grad_norm": 0.8100490276999892, "learning_rate": 4.144526857182366e-06, "loss": 0.3983, "step": 9069 }, { "epoch": 0.5681266540346701, "grad_norm": 0.8542611820269308, "learning_rate": 4.143527433847157e-06, "loss": 0.3883, "step": 9070 }, { "epoch": 0.5681892920340125, "grad_norm": 0.7983340179748324, "learning_rate": 4.14252804576452e-06, "loss": 0.3754, "step": 9071 }, { "epoch": 0.5682519300333547, "grad_norm": 0.8356768794490791, "learning_rate": 4.141528692975587e-06, "loss": 0.3775, "step": 9072 }, { "epoch": 0.5683145680326971, "grad_norm": 0.8528837294151288, "learning_rate": 4.140529375521491e-06, "loss": 0.4212, "step": 9073 }, { "epoch": 0.5683772060320393, "grad_norm": 0.8363384425608786, "learning_rate": 4.1395300934433656e-06, "loss": 0.3796, "step": 9074 }, { "epoch": 0.5684398440313816, "grad_norm": 0.7433462227702758, "learning_rate": 4.138530846782341e-06, "loss": 0.3768, "step": 9075 }, { "epoch": 0.568502482030724, "grad_norm": 0.8391322834916494, "learning_rate": 4.137531635579549e-06, "loss": 0.3861, "step": 9076 }, { "epoch": 0.5685651200300662, "grad_norm": 0.8418169835785805, "learning_rate": 4.1365324598761135e-06, "loss": 0.3715, "step": 9077 }, { "epoch": 0.5686277580294086, "grad_norm": 0.7707820178781668, "learning_rate": 4.135533319713162e-06, "loss": 0.4013, "step": 9078 }, { "epoch": 0.5686903960287508, "grad_norm": 0.7760127053359359, "learning_rate": 4.1345342151318205e-06, "loss": 0.3205, "step": 9079 }, { "epoch": 0.5687530340280932, "grad_norm": 0.8387316036597797, "learning_rate": 4.133535146173211e-06, "loss": 0.3727, "step": 9080 }, { "epoch": 0.5688156720274354, "grad_norm": 0.8686479074979002, "learning_rate": 4.132536112878456e-06, "loss": 0.4625, "step": 9081 }, { "epoch": 0.5688783100267777, "grad_norm": 0.842810310745741, "learning_rate": 4.131537115288675e-06, "loss": 0.3698, "step": 9082 }, { "epoch": 0.56894094802612, "grad_norm": 0.7978947723640736, "learning_rate": 4.130538153444985e-06, "loss": 0.39, "step": 9083 }, { "epoch": 0.5690035860254623, "grad_norm": 0.8500707292062027, "learning_rate": 4.1295392273885096e-06, "loss": 0.4232, "step": 9084 }, { "epoch": 0.5690662240248047, "grad_norm": 0.7850457175377245, "learning_rate": 4.128540337160361e-06, "loss": 0.4191, "step": 9085 }, { "epoch": 0.5691288620241469, "grad_norm": 0.6320252014628114, "learning_rate": 4.127541482801653e-06, "loss": 0.4737, "step": 9086 }, { "epoch": 0.5691915000234893, "grad_norm": 0.7969737250381412, "learning_rate": 4.126542664353499e-06, "loss": 0.3799, "step": 9087 }, { "epoch": 0.5692541380228315, "grad_norm": 0.8447488851934101, "learning_rate": 4.125543881857012e-06, "loss": 0.4339, "step": 9088 }, { "epoch": 0.5693167760221739, "grad_norm": 0.7612103290100898, "learning_rate": 4.124545135353301e-06, "loss": 0.3856, "step": 9089 }, { "epoch": 0.5693794140215162, "grad_norm": 0.7677657292072165, "learning_rate": 4.123546424883474e-06, "loss": 0.3313, "step": 9090 }, { "epoch": 0.5694420520208584, "grad_norm": 0.7915922756938965, "learning_rate": 4.1225477504886395e-06, "loss": 0.3884, "step": 9091 }, { "epoch": 0.5695046900202008, "grad_norm": 0.8477467874011984, "learning_rate": 4.1215491122099e-06, "loss": 0.3963, "step": 9092 }, { "epoch": 0.569567328019543, "grad_norm": 0.8909494090389064, "learning_rate": 4.120550510088365e-06, "loss": 0.3983, "step": 9093 }, { "epoch": 0.5696299660188854, "grad_norm": 0.7506584312509169, "learning_rate": 4.119551944165135e-06, "loss": 0.3362, "step": 9094 }, { "epoch": 0.5696926040182276, "grad_norm": 0.8117717410206902, "learning_rate": 4.118553414481309e-06, "loss": 0.3733, "step": 9095 }, { "epoch": 0.56975524201757, "grad_norm": 0.8334762792058173, "learning_rate": 4.117554921077987e-06, "loss": 0.3807, "step": 9096 }, { "epoch": 0.5698178800169122, "grad_norm": 0.8691593095065586, "learning_rate": 4.11655646399627e-06, "loss": 0.3957, "step": 9097 }, { "epoch": 0.5698805180162546, "grad_norm": 0.8003169486579127, "learning_rate": 4.115558043277252e-06, "loss": 0.411, "step": 9098 }, { "epoch": 0.5699431560155969, "grad_norm": 0.8566646408629202, "learning_rate": 4.114559658962028e-06, "loss": 0.4136, "step": 9099 }, { "epoch": 0.5700057940149391, "grad_norm": 0.8552949510609552, "learning_rate": 4.113561311091695e-06, "loss": 0.3689, "step": 9100 }, { "epoch": 0.5700684320142815, "grad_norm": 0.8127725579498769, "learning_rate": 4.11256299970734e-06, "loss": 0.3745, "step": 9101 }, { "epoch": 0.5701310700136237, "grad_norm": 0.8694494527447885, "learning_rate": 4.1115647248500595e-06, "loss": 0.3912, "step": 9102 }, { "epoch": 0.5701937080129661, "grad_norm": 0.8432338307499968, "learning_rate": 4.11056648656094e-06, "loss": 0.3574, "step": 9103 }, { "epoch": 0.5702563460123083, "grad_norm": 0.8625076836253832, "learning_rate": 4.109568284881067e-06, "loss": 0.4126, "step": 9104 }, { "epoch": 0.5703189840116507, "grad_norm": 0.6011501683715904, "learning_rate": 4.1085701198515295e-06, "loss": 0.4702, "step": 9105 }, { "epoch": 0.570381622010993, "grad_norm": 0.9075888175278651, "learning_rate": 4.107571991513413e-06, "loss": 0.4207, "step": 9106 }, { "epoch": 0.5704442600103353, "grad_norm": 0.7703548262389723, "learning_rate": 4.1065738999077975e-06, "loss": 0.372, "step": 9107 }, { "epoch": 0.5705068980096776, "grad_norm": 0.8333869238160954, "learning_rate": 4.105575845075765e-06, "loss": 0.3927, "step": 9108 }, { "epoch": 0.5705695360090198, "grad_norm": 0.8489385747619631, "learning_rate": 4.1045778270583985e-06, "loss": 0.4103, "step": 9109 }, { "epoch": 0.5706321740083622, "grad_norm": 0.850541482650302, "learning_rate": 4.103579845896772e-06, "loss": 0.4389, "step": 9110 }, { "epoch": 0.5706948120077044, "grad_norm": 0.7811265950549899, "learning_rate": 4.1025819016319685e-06, "loss": 0.3325, "step": 9111 }, { "epoch": 0.5707574500070468, "grad_norm": 0.8899452736010368, "learning_rate": 4.10158399430506e-06, "loss": 0.3838, "step": 9112 }, { "epoch": 0.5708200880063891, "grad_norm": 0.8619038703622535, "learning_rate": 4.10058612395712e-06, "loss": 0.4105, "step": 9113 }, { "epoch": 0.5708827260057314, "grad_norm": 0.8849689200318144, "learning_rate": 4.099588290629223e-06, "loss": 0.3847, "step": 9114 }, { "epoch": 0.5709453640050737, "grad_norm": 0.8598218951672996, "learning_rate": 4.098590494362439e-06, "loss": 0.4169, "step": 9115 }, { "epoch": 0.5710080020044159, "grad_norm": 0.8036572851030126, "learning_rate": 4.097592735197836e-06, "loss": 0.4084, "step": 9116 }, { "epoch": 0.5710706400037583, "grad_norm": 0.7708364195021155, "learning_rate": 4.0965950131764856e-06, "loss": 0.3732, "step": 9117 }, { "epoch": 0.5711332780031005, "grad_norm": 0.8066837253536556, "learning_rate": 4.0955973283394525e-06, "loss": 0.4102, "step": 9118 }, { "epoch": 0.5711959160024429, "grad_norm": 0.8612498546816416, "learning_rate": 4.0945996807278e-06, "loss": 0.3965, "step": 9119 }, { "epoch": 0.5712585540017852, "grad_norm": 0.8166241814633945, "learning_rate": 4.093602070382591e-06, "loss": 0.4477, "step": 9120 }, { "epoch": 0.5713211920011275, "grad_norm": 0.8452090557351012, "learning_rate": 4.09260449734489e-06, "loss": 0.4117, "step": 9121 }, { "epoch": 0.5713838300004698, "grad_norm": 0.5964505608076732, "learning_rate": 4.091606961655758e-06, "loss": 0.4421, "step": 9122 }, { "epoch": 0.5714464679998121, "grad_norm": 0.8448504174931037, "learning_rate": 4.0906094633562514e-06, "loss": 0.4041, "step": 9123 }, { "epoch": 0.5715091059991544, "grad_norm": 0.6834294409940257, "learning_rate": 4.089612002487428e-06, "loss": 0.4484, "step": 9124 }, { "epoch": 0.5715717439984966, "grad_norm": 0.86043878808204, "learning_rate": 4.0886145790903434e-06, "loss": 0.3897, "step": 9125 }, { "epoch": 0.571634381997839, "grad_norm": 0.8723999140072135, "learning_rate": 4.087617193206052e-06, "loss": 0.396, "step": 9126 }, { "epoch": 0.5716970199971813, "grad_norm": 0.8757522407016729, "learning_rate": 4.086619844875606e-06, "loss": 0.4177, "step": 9127 }, { "epoch": 0.5717596579965236, "grad_norm": 0.7974246579156059, "learning_rate": 4.085622534140058e-06, "loss": 0.3682, "step": 9128 }, { "epoch": 0.5718222959958659, "grad_norm": 0.795497902845987, "learning_rate": 4.084625261040453e-06, "loss": 0.3971, "step": 9129 }, { "epoch": 0.5718849339952082, "grad_norm": 0.9050040920148305, "learning_rate": 4.083628025617844e-06, "loss": 0.389, "step": 9130 }, { "epoch": 0.5719475719945505, "grad_norm": 0.7773381721287426, "learning_rate": 4.082630827913277e-06, "loss": 0.3326, "step": 9131 }, { "epoch": 0.5720102099938928, "grad_norm": 0.7965304655290741, "learning_rate": 4.081633667967794e-06, "loss": 0.4, "step": 9132 }, { "epoch": 0.5720728479932351, "grad_norm": 0.8316061627240057, "learning_rate": 4.0806365458224405e-06, "loss": 0.4034, "step": 9133 }, { "epoch": 0.5721354859925774, "grad_norm": 0.9614309771006826, "learning_rate": 4.079639461518256e-06, "loss": 0.3859, "step": 9134 }, { "epoch": 0.5721981239919197, "grad_norm": 0.7476738969074131, "learning_rate": 4.0786424150962835e-06, "loss": 0.488, "step": 9135 }, { "epoch": 0.572260761991262, "grad_norm": 0.8113406132758085, "learning_rate": 4.07764540659756e-06, "loss": 0.4238, "step": 9136 }, { "epoch": 0.5723233999906043, "grad_norm": 0.8258008910533977, "learning_rate": 4.076648436063122e-06, "loss": 0.4049, "step": 9137 }, { "epoch": 0.5723860379899466, "grad_norm": 0.8118071566118852, "learning_rate": 4.075651503534005e-06, "loss": 0.3785, "step": 9138 }, { "epoch": 0.572448675989289, "grad_norm": 0.8485712404739856, "learning_rate": 4.074654609051244e-06, "loss": 0.362, "step": 9139 }, { "epoch": 0.5725113139886312, "grad_norm": 0.632136280150685, "learning_rate": 4.0736577526558726e-06, "loss": 0.4512, "step": 9140 }, { "epoch": 0.5725739519879735, "grad_norm": 0.8068292255794987, "learning_rate": 4.072660934388919e-06, "loss": 0.39, "step": 9141 }, { "epoch": 0.5726365899873158, "grad_norm": 0.7751875132791606, "learning_rate": 4.071664154291414e-06, "loss": 0.3906, "step": 9142 }, { "epoch": 0.5726992279866581, "grad_norm": 0.8109721128272229, "learning_rate": 4.070667412404384e-06, "loss": 0.4207, "step": 9143 }, { "epoch": 0.5727618659860004, "grad_norm": 0.858758950351563, "learning_rate": 4.069670708768857e-06, "loss": 0.3737, "step": 9144 }, { "epoch": 0.5728245039853427, "grad_norm": 0.8317566574833665, "learning_rate": 4.0686740434258565e-06, "loss": 0.417, "step": 9145 }, { "epoch": 0.572887141984685, "grad_norm": 0.7682195439278361, "learning_rate": 4.067677416416403e-06, "loss": 0.3365, "step": 9146 }, { "epoch": 0.5729497799840273, "grad_norm": 0.7684683145023714, "learning_rate": 4.0666808277815204e-06, "loss": 0.3616, "step": 9147 }, { "epoch": 0.5730124179833697, "grad_norm": 0.7825411226663413, "learning_rate": 4.0656842775622295e-06, "loss": 0.3607, "step": 9148 }, { "epoch": 0.5730750559827119, "grad_norm": 0.9153194023590385, "learning_rate": 4.064687765799547e-06, "loss": 0.4249, "step": 9149 }, { "epoch": 0.5731376939820542, "grad_norm": 0.8647722208545455, "learning_rate": 4.0636912925344896e-06, "loss": 0.3792, "step": 9150 }, { "epoch": 0.5732003319813965, "grad_norm": 0.9074863238402099, "learning_rate": 4.0626948578080715e-06, "loss": 0.3773, "step": 9151 }, { "epoch": 0.5732629699807388, "grad_norm": 0.8140366181148173, "learning_rate": 4.061698461661309e-06, "loss": 0.3744, "step": 9152 }, { "epoch": 0.5733256079800811, "grad_norm": 0.804121468648543, "learning_rate": 4.060702104135211e-06, "loss": 0.3654, "step": 9153 }, { "epoch": 0.5733882459794234, "grad_norm": 0.89817180940552, "learning_rate": 4.059705785270789e-06, "loss": 0.3828, "step": 9154 }, { "epoch": 0.5734508839787658, "grad_norm": 0.8238490122570967, "learning_rate": 4.058709505109049e-06, "loss": 0.3831, "step": 9155 }, { "epoch": 0.573513521978108, "grad_norm": 0.8420778198134564, "learning_rate": 4.0577132636910015e-06, "loss": 0.3884, "step": 9156 }, { "epoch": 0.5735761599774504, "grad_norm": 0.7665435335550538, "learning_rate": 4.056717061057652e-06, "loss": 0.3926, "step": 9157 }, { "epoch": 0.5736387979767926, "grad_norm": 0.8242961659018726, "learning_rate": 4.055720897250004e-06, "loss": 0.3542, "step": 9158 }, { "epoch": 0.5737014359761349, "grad_norm": 0.7517851855879413, "learning_rate": 4.054724772309059e-06, "loss": 0.3145, "step": 9159 }, { "epoch": 0.5737640739754772, "grad_norm": 0.8664357639243848, "learning_rate": 4.053728686275816e-06, "loss": 0.4256, "step": 9160 }, { "epoch": 0.5738267119748195, "grad_norm": 0.7850426801057693, "learning_rate": 4.0527326391912775e-06, "loss": 0.3678, "step": 9161 }, { "epoch": 0.5738893499741619, "grad_norm": 0.8109427116579194, "learning_rate": 4.05173663109644e-06, "loss": 0.3965, "step": 9162 }, { "epoch": 0.5739519879735041, "grad_norm": 0.800967103439767, "learning_rate": 4.050740662032296e-06, "loss": 0.392, "step": 9163 }, { "epoch": 0.5740146259728465, "grad_norm": 0.6681850145486236, "learning_rate": 4.049744732039846e-06, "loss": 0.4728, "step": 9164 }, { "epoch": 0.5740772639721887, "grad_norm": 0.852330474679865, "learning_rate": 4.048748841160077e-06, "loss": 0.4609, "step": 9165 }, { "epoch": 0.574139901971531, "grad_norm": 0.9490688758138467, "learning_rate": 4.047752989433982e-06, "loss": 0.4314, "step": 9166 }, { "epoch": 0.5742025399708733, "grad_norm": 0.7970848990394845, "learning_rate": 4.046757176902551e-06, "loss": 0.3785, "step": 9167 }, { "epoch": 0.5742651779702156, "grad_norm": 0.8085289193499289, "learning_rate": 4.045761403606771e-06, "loss": 0.4298, "step": 9168 }, { "epoch": 0.574327815969558, "grad_norm": 0.7786448481327419, "learning_rate": 4.0447656695876305e-06, "loss": 0.3546, "step": 9169 }, { "epoch": 0.5743904539689002, "grad_norm": 0.8649307575884547, "learning_rate": 4.043769974886112e-06, "loss": 0.3775, "step": 9170 }, { "epoch": 0.5744530919682426, "grad_norm": 0.8419033788865952, "learning_rate": 4.042774319543198e-06, "loss": 0.3825, "step": 9171 }, { "epoch": 0.5745157299675848, "grad_norm": 0.8753520295033514, "learning_rate": 4.041778703599872e-06, "loss": 0.4281, "step": 9172 }, { "epoch": 0.5745783679669272, "grad_norm": 0.8176706830461352, "learning_rate": 4.040783127097111e-06, "loss": 0.3752, "step": 9173 }, { "epoch": 0.5746410059662694, "grad_norm": 0.9387307620558478, "learning_rate": 4.039787590075896e-06, "loss": 0.4506, "step": 9174 }, { "epoch": 0.5747036439656117, "grad_norm": 0.8747175693828758, "learning_rate": 4.038792092577199e-06, "loss": 0.4019, "step": 9175 }, { "epoch": 0.574766281964954, "grad_norm": 0.7941789306273972, "learning_rate": 4.037796634641999e-06, "loss": 0.358, "step": 9176 }, { "epoch": 0.5748289199642963, "grad_norm": 0.8614534299011919, "learning_rate": 4.036801216311269e-06, "loss": 0.4354, "step": 9177 }, { "epoch": 0.5748915579636387, "grad_norm": 0.8091169575999202, "learning_rate": 4.0358058376259805e-06, "loss": 0.3955, "step": 9178 }, { "epoch": 0.5749541959629809, "grad_norm": 0.7948647450137211, "learning_rate": 4.034810498627102e-06, "loss": 0.3785, "step": 9179 }, { "epoch": 0.5750168339623233, "grad_norm": 0.8459991456719577, "learning_rate": 4.033815199355603e-06, "loss": 0.4345, "step": 9180 }, { "epoch": 0.5750794719616655, "grad_norm": 0.6854488938440206, "learning_rate": 4.032819939852448e-06, "loss": 0.4523, "step": 9181 }, { "epoch": 0.5751421099610079, "grad_norm": 0.9043972998223937, "learning_rate": 4.031824720158606e-06, "loss": 0.4093, "step": 9182 }, { "epoch": 0.5752047479603501, "grad_norm": 0.7853719161491862, "learning_rate": 4.030829540315037e-06, "loss": 0.3681, "step": 9183 }, { "epoch": 0.5752673859596924, "grad_norm": 0.8140304312546152, "learning_rate": 4.029834400362702e-06, "loss": 0.3647, "step": 9184 }, { "epoch": 0.5753300239590348, "grad_norm": 0.8097900857884539, "learning_rate": 4.0288393003425665e-06, "loss": 0.3508, "step": 9185 }, { "epoch": 0.575392661958377, "grad_norm": 0.8177217170752902, "learning_rate": 4.027844240295583e-06, "loss": 0.3772, "step": 9186 }, { "epoch": 0.5754552999577194, "grad_norm": 0.8278724041765154, "learning_rate": 4.026849220262714e-06, "loss": 0.3662, "step": 9187 }, { "epoch": 0.5755179379570616, "grad_norm": 0.8110314069523847, "learning_rate": 4.02585424028491e-06, "loss": 0.4001, "step": 9188 }, { "epoch": 0.575580575956404, "grad_norm": 0.8029426833593647, "learning_rate": 4.024859300403126e-06, "loss": 0.388, "step": 9189 }, { "epoch": 0.5756432139557462, "grad_norm": 0.8503303905464353, "learning_rate": 4.023864400658315e-06, "loss": 0.3886, "step": 9190 }, { "epoch": 0.5757058519550885, "grad_norm": 0.8336715075515339, "learning_rate": 4.022869541091427e-06, "loss": 0.3904, "step": 9191 }, { "epoch": 0.5757684899544309, "grad_norm": 0.8134556661121898, "learning_rate": 4.021874721743409e-06, "loss": 0.3996, "step": 9192 }, { "epoch": 0.5758311279537731, "grad_norm": 0.8252010604876214, "learning_rate": 4.020879942655207e-06, "loss": 0.4122, "step": 9193 }, { "epoch": 0.5758937659531155, "grad_norm": 0.8165824677710315, "learning_rate": 4.019885203867771e-06, "loss": 0.3848, "step": 9194 }, { "epoch": 0.5759564039524577, "grad_norm": 0.7798561297551468, "learning_rate": 4.018890505422041e-06, "loss": 0.3136, "step": 9195 }, { "epoch": 0.5760190419518001, "grad_norm": 0.893284577755628, "learning_rate": 4.017895847358961e-06, "loss": 0.4025, "step": 9196 }, { "epoch": 0.5760816799511423, "grad_norm": 0.8475868783520006, "learning_rate": 4.0169012297194695e-06, "loss": 0.3325, "step": 9197 }, { "epoch": 0.5761443179504847, "grad_norm": 0.8287359863432083, "learning_rate": 4.015906652544506e-06, "loss": 0.3863, "step": 9198 }, { "epoch": 0.576206955949827, "grad_norm": 0.8660351053074302, "learning_rate": 4.014912115875007e-06, "loss": 0.4206, "step": 9199 }, { "epoch": 0.5762695939491692, "grad_norm": 0.7780169127365633, "learning_rate": 4.013917619751909e-06, "loss": 0.3649, "step": 9200 }, { "epoch": 0.5763322319485116, "grad_norm": 0.5912279374491998, "learning_rate": 4.012923164216143e-06, "loss": 0.4963, "step": 9201 }, { "epoch": 0.5763948699478538, "grad_norm": 0.8177633594714069, "learning_rate": 4.011928749308643e-06, "loss": 0.3691, "step": 9202 }, { "epoch": 0.5764575079471962, "grad_norm": 0.8928041790797343, "learning_rate": 4.01093437507034e-06, "loss": 0.3839, "step": 9203 }, { "epoch": 0.5765201459465384, "grad_norm": 0.657605542993357, "learning_rate": 4.0099400415421615e-06, "loss": 0.4584, "step": 9204 }, { "epoch": 0.5765827839458808, "grad_norm": 0.8673239501699737, "learning_rate": 4.008945748765036e-06, "loss": 0.3903, "step": 9205 }, { "epoch": 0.5766454219452231, "grad_norm": 0.7991635542310801, "learning_rate": 4.007951496779885e-06, "loss": 0.4071, "step": 9206 }, { "epoch": 0.5767080599445654, "grad_norm": 0.8411673907542685, "learning_rate": 4.006957285627637e-06, "loss": 0.4198, "step": 9207 }, { "epoch": 0.5767706979439077, "grad_norm": 0.8699973667754853, "learning_rate": 4.00596311534921e-06, "loss": 0.4603, "step": 9208 }, { "epoch": 0.5768333359432499, "grad_norm": 0.851217733290639, "learning_rate": 4.0049689859855265e-06, "loss": 0.3907, "step": 9209 }, { "epoch": 0.5768959739425923, "grad_norm": 0.8644449031769051, "learning_rate": 4.003974897577503e-06, "loss": 0.4057, "step": 9210 }, { "epoch": 0.5769586119419345, "grad_norm": 0.8211445414996701, "learning_rate": 4.0029808501660585e-06, "loss": 0.4077, "step": 9211 }, { "epoch": 0.5770212499412769, "grad_norm": 0.9162899407744532, "learning_rate": 4.001986843792105e-06, "loss": 0.4309, "step": 9212 }, { "epoch": 0.5770838879406192, "grad_norm": 0.7675235184205325, "learning_rate": 4.000992878496561e-06, "loss": 0.3949, "step": 9213 }, { "epoch": 0.5771465259399615, "grad_norm": 0.8529829192305607, "learning_rate": 3.999998954320335e-06, "loss": 0.3905, "step": 9214 }, { "epoch": 0.5772091639393038, "grad_norm": 0.8438605563346278, "learning_rate": 3.999005071304337e-06, "loss": 0.4113, "step": 9215 }, { "epoch": 0.5772718019386461, "grad_norm": 0.7478300066971886, "learning_rate": 3.998011229489477e-06, "loss": 0.3415, "step": 9216 }, { "epoch": 0.5773344399379884, "grad_norm": 0.8353708886966609, "learning_rate": 3.997017428916661e-06, "loss": 0.3796, "step": 9217 }, { "epoch": 0.5773970779373306, "grad_norm": 0.8898195221793773, "learning_rate": 3.996023669626793e-06, "loss": 0.4033, "step": 9218 }, { "epoch": 0.577459715936673, "grad_norm": 0.6206744381760402, "learning_rate": 3.995029951660777e-06, "loss": 0.4822, "step": 9219 }, { "epoch": 0.5775223539360153, "grad_norm": 0.8952898542299293, "learning_rate": 3.994036275059515e-06, "loss": 0.398, "step": 9220 }, { "epoch": 0.5775849919353576, "grad_norm": 0.786520142072733, "learning_rate": 3.993042639863904e-06, "loss": 0.3635, "step": 9221 }, { "epoch": 0.5776476299346999, "grad_norm": 0.8544735172837111, "learning_rate": 3.992049046114848e-06, "loss": 0.4366, "step": 9222 }, { "epoch": 0.5777102679340422, "grad_norm": 0.7997455442068655, "learning_rate": 3.99105549385324e-06, "loss": 0.385, "step": 9223 }, { "epoch": 0.5777729059333845, "grad_norm": 0.6339778106361418, "learning_rate": 3.9900619831199746e-06, "loss": 0.4576, "step": 9224 }, { "epoch": 0.5778355439327267, "grad_norm": 0.9316430841104023, "learning_rate": 3.989068513955946e-06, "loss": 0.401, "step": 9225 }, { "epoch": 0.5778981819320691, "grad_norm": 0.8239781497651938, "learning_rate": 3.9880750864020455e-06, "loss": 0.3768, "step": 9226 }, { "epoch": 0.5779608199314114, "grad_norm": 0.8453066420838193, "learning_rate": 3.98708170049916e-06, "loss": 0.3806, "step": 9227 }, { "epoch": 0.5780234579307537, "grad_norm": 0.8432989797099439, "learning_rate": 3.986088356288182e-06, "loss": 0.3921, "step": 9228 }, { "epoch": 0.578086095930096, "grad_norm": 0.8546790695498535, "learning_rate": 3.9850950538099955e-06, "loss": 0.4177, "step": 9229 }, { "epoch": 0.5781487339294383, "grad_norm": 0.7548129751501416, "learning_rate": 3.984101793105482e-06, "loss": 0.3557, "step": 9230 }, { "epoch": 0.5782113719287806, "grad_norm": 0.8128059314363433, "learning_rate": 3.983108574215529e-06, "loss": 0.3824, "step": 9231 }, { "epoch": 0.578274009928123, "grad_norm": 0.8330050370678508, "learning_rate": 3.982115397181016e-06, "loss": 0.3779, "step": 9232 }, { "epoch": 0.5783366479274652, "grad_norm": 0.9030929860579232, "learning_rate": 3.981122262042822e-06, "loss": 0.3937, "step": 9233 }, { "epoch": 0.5783992859268074, "grad_norm": 0.8798571439047934, "learning_rate": 3.980129168841826e-06, "loss": 0.4539, "step": 9234 }, { "epoch": 0.5784619239261498, "grad_norm": 0.8144838660040709, "learning_rate": 3.979136117618903e-06, "loss": 0.336, "step": 9235 }, { "epoch": 0.5785245619254921, "grad_norm": 0.9789005180445544, "learning_rate": 3.978143108414924e-06, "loss": 0.4184, "step": 9236 }, { "epoch": 0.5785871999248344, "grad_norm": 0.8770774414513642, "learning_rate": 3.977150141270767e-06, "loss": 0.3718, "step": 9237 }, { "epoch": 0.5786498379241767, "grad_norm": 0.7991531317722628, "learning_rate": 3.9761572162272996e-06, "loss": 0.3997, "step": 9238 }, { "epoch": 0.578712475923519, "grad_norm": 0.7674747504133311, "learning_rate": 3.975164333325388e-06, "loss": 0.3676, "step": 9239 }, { "epoch": 0.5787751139228613, "grad_norm": 0.92333420387007, "learning_rate": 3.974171492605907e-06, "loss": 0.3914, "step": 9240 }, { "epoch": 0.5788377519222037, "grad_norm": 0.8341282815611117, "learning_rate": 3.973178694109715e-06, "loss": 0.3898, "step": 9241 }, { "epoch": 0.5789003899215459, "grad_norm": 0.8214588907912201, "learning_rate": 3.97218593787768e-06, "loss": 0.3704, "step": 9242 }, { "epoch": 0.5789630279208882, "grad_norm": 0.8077983870110159, "learning_rate": 3.971193223950663e-06, "loss": 0.4068, "step": 9243 }, { "epoch": 0.5790256659202305, "grad_norm": 0.9098281741350787, "learning_rate": 3.970200552369523e-06, "loss": 0.4849, "step": 9244 }, { "epoch": 0.5790883039195728, "grad_norm": 0.8239003356089123, "learning_rate": 3.969207923175119e-06, "loss": 0.3714, "step": 9245 }, { "epoch": 0.5791509419189151, "grad_norm": 0.8674315676105785, "learning_rate": 3.968215336408309e-06, "loss": 0.4281, "step": 9246 }, { "epoch": 0.5792135799182574, "grad_norm": 0.8675790462703468, "learning_rate": 3.967222792109948e-06, "loss": 0.4027, "step": 9247 }, { "epoch": 0.5792762179175998, "grad_norm": 0.8260809178675771, "learning_rate": 3.966230290320886e-06, "loss": 0.3745, "step": 9248 }, { "epoch": 0.579338855916942, "grad_norm": 0.9272543112883376, "learning_rate": 3.965237831081979e-06, "loss": 0.3968, "step": 9249 }, { "epoch": 0.5794014939162843, "grad_norm": 0.8177814410122711, "learning_rate": 3.964245414434073e-06, "loss": 0.3953, "step": 9250 }, { "epoch": 0.5794641319156266, "grad_norm": 0.6259555474605812, "learning_rate": 3.963253040418021e-06, "loss": 0.4508, "step": 9251 }, { "epoch": 0.5795267699149689, "grad_norm": 0.7856692861431003, "learning_rate": 3.962260709074665e-06, "loss": 0.3793, "step": 9252 }, { "epoch": 0.5795894079143112, "grad_norm": 0.80354142506726, "learning_rate": 3.961268420444849e-06, "loss": 0.3722, "step": 9253 }, { "epoch": 0.5796520459136535, "grad_norm": 0.8238630929051528, "learning_rate": 3.96027617456942e-06, "loss": 0.3693, "step": 9254 }, { "epoch": 0.5797146839129959, "grad_norm": 0.5586446140127471, "learning_rate": 3.9592839714892165e-06, "loss": 0.4433, "step": 9255 }, { "epoch": 0.5797773219123381, "grad_norm": 0.8267607426883095, "learning_rate": 3.958291811245077e-06, "loss": 0.3916, "step": 9256 }, { "epoch": 0.5798399599116805, "grad_norm": 0.8298631232595794, "learning_rate": 3.95729969387784e-06, "loss": 0.408, "step": 9257 }, { "epoch": 0.5799025979110227, "grad_norm": 0.8604404822413042, "learning_rate": 3.95630761942834e-06, "loss": 0.3885, "step": 9258 }, { "epoch": 0.579965235910365, "grad_norm": 0.8203183744763, "learning_rate": 3.955315587937414e-06, "loss": 0.3786, "step": 9259 }, { "epoch": 0.5800278739097073, "grad_norm": 0.7745435895231673, "learning_rate": 3.954323599445892e-06, "loss": 0.3629, "step": 9260 }, { "epoch": 0.5800905119090496, "grad_norm": 0.8373591706865864, "learning_rate": 3.953331653994605e-06, "loss": 0.3976, "step": 9261 }, { "epoch": 0.580153149908392, "grad_norm": 0.8591924190489527, "learning_rate": 3.95233975162438e-06, "loss": 0.4227, "step": 9262 }, { "epoch": 0.5802157879077342, "grad_norm": 0.8160342275817565, "learning_rate": 3.9513478923760465e-06, "loss": 0.4177, "step": 9263 }, { "epoch": 0.5802784259070766, "grad_norm": 0.7596658056017551, "learning_rate": 3.950356076290429e-06, "loss": 0.3768, "step": 9264 }, { "epoch": 0.5803410639064188, "grad_norm": 0.8097210265492545, "learning_rate": 3.949364303408349e-06, "loss": 0.4038, "step": 9265 }, { "epoch": 0.5804037019057612, "grad_norm": 0.92545007526652, "learning_rate": 3.948372573770629e-06, "loss": 0.4339, "step": 9266 }, { "epoch": 0.5804663399051034, "grad_norm": 0.8784464696031571, "learning_rate": 3.947380887418088e-06, "loss": 0.437, "step": 9267 }, { "epoch": 0.5805289779044457, "grad_norm": 0.6265999998234161, "learning_rate": 3.946389244391547e-06, "loss": 0.4442, "step": 9268 }, { "epoch": 0.580591615903788, "grad_norm": 0.802599968007027, "learning_rate": 3.94539764473182e-06, "loss": 0.3622, "step": 9269 }, { "epoch": 0.5806542539031303, "grad_norm": 0.8918104314527737, "learning_rate": 3.944406088479722e-06, "loss": 0.3994, "step": 9270 }, { "epoch": 0.5807168919024727, "grad_norm": 0.8156285249974992, "learning_rate": 3.943414575676065e-06, "loss": 0.4177, "step": 9271 }, { "epoch": 0.5807795299018149, "grad_norm": 0.8677083331491727, "learning_rate": 3.9424231063616605e-06, "loss": 0.4132, "step": 9272 }, { "epoch": 0.5808421679011573, "grad_norm": 0.8282407707027899, "learning_rate": 3.941431680577317e-06, "loss": 0.4359, "step": 9273 }, { "epoch": 0.5809048059004995, "grad_norm": 0.778541881147215, "learning_rate": 3.940440298363841e-06, "loss": 0.3799, "step": 9274 }, { "epoch": 0.5809674438998418, "grad_norm": 0.777723888531033, "learning_rate": 3.93944895976204e-06, "loss": 0.357, "step": 9275 }, { "epoch": 0.5810300818991841, "grad_norm": 0.8201687387315164, "learning_rate": 3.938457664812716e-06, "loss": 0.4294, "step": 9276 }, { "epoch": 0.5810927198985264, "grad_norm": 0.8350961237257022, "learning_rate": 3.937466413556672e-06, "loss": 0.4114, "step": 9277 }, { "epoch": 0.5811553578978688, "grad_norm": 0.9037376881131411, "learning_rate": 3.9364752060347076e-06, "loss": 0.3883, "step": 9278 }, { "epoch": 0.581217995897211, "grad_norm": 0.8853764770222549, "learning_rate": 3.93548404228762e-06, "loss": 0.4249, "step": 9279 }, { "epoch": 0.5812806338965534, "grad_norm": 0.8554272757271584, "learning_rate": 3.934492922356209e-06, "loss": 0.3849, "step": 9280 }, { "epoch": 0.5813432718958956, "grad_norm": 0.8276789267551391, "learning_rate": 3.9335018462812664e-06, "loss": 0.3491, "step": 9281 }, { "epoch": 0.581405909895238, "grad_norm": 0.8975645003371701, "learning_rate": 3.932510814103586e-06, "loss": 0.41, "step": 9282 }, { "epoch": 0.5814685478945802, "grad_norm": 0.8530568035616807, "learning_rate": 3.931519825863957e-06, "loss": 0.4173, "step": 9283 }, { "epoch": 0.5815311858939225, "grad_norm": 0.8716744091834964, "learning_rate": 3.930528881603172e-06, "loss": 0.4471, "step": 9284 }, { "epoch": 0.5815938238932649, "grad_norm": 0.7762953082333861, "learning_rate": 3.929537981362015e-06, "loss": 0.365, "step": 9285 }, { "epoch": 0.5816564618926071, "grad_norm": 0.839493384468355, "learning_rate": 3.928547125181275e-06, "loss": 0.4274, "step": 9286 }, { "epoch": 0.5817190998919495, "grad_norm": 0.8058894974040506, "learning_rate": 3.9275563131017334e-06, "loss": 0.3846, "step": 9287 }, { "epoch": 0.5817817378912917, "grad_norm": 0.8009062455816796, "learning_rate": 3.9265655451641724e-06, "loss": 0.3404, "step": 9288 }, { "epoch": 0.5818443758906341, "grad_norm": 0.8552428125558591, "learning_rate": 3.925574821409375e-06, "loss": 0.3933, "step": 9289 }, { "epoch": 0.5819070138899763, "grad_norm": 0.8279765235464153, "learning_rate": 3.924584141878116e-06, "loss": 0.4252, "step": 9290 }, { "epoch": 0.5819696518893187, "grad_norm": 0.7773568752483974, "learning_rate": 3.923593506611173e-06, "loss": 0.378, "step": 9291 }, { "epoch": 0.582032289888661, "grad_norm": 0.8801324223611535, "learning_rate": 3.922602915649322e-06, "loss": 0.4037, "step": 9292 }, { "epoch": 0.5820949278880032, "grad_norm": 0.9235182981754939, "learning_rate": 3.921612369033334e-06, "loss": 0.4245, "step": 9293 }, { "epoch": 0.5821575658873456, "grad_norm": 0.8521363865322845, "learning_rate": 3.920621866803982e-06, "loss": 0.4154, "step": 9294 }, { "epoch": 0.5822202038866878, "grad_norm": 0.8611008783283619, "learning_rate": 3.9196314090020325e-06, "loss": 0.4138, "step": 9295 }, { "epoch": 0.5822828418860302, "grad_norm": 0.8789688638349739, "learning_rate": 3.918640995668256e-06, "loss": 0.4207, "step": 9296 }, { "epoch": 0.5823454798853724, "grad_norm": 0.6282608950144507, "learning_rate": 3.917650626843417e-06, "loss": 0.4694, "step": 9297 }, { "epoch": 0.5824081178847148, "grad_norm": 0.6816585784911534, "learning_rate": 3.916660302568279e-06, "loss": 0.4788, "step": 9298 }, { "epoch": 0.5824707558840571, "grad_norm": 0.8015324597002215, "learning_rate": 3.915670022883604e-06, "loss": 0.361, "step": 9299 }, { "epoch": 0.5825333938833994, "grad_norm": 0.8691927060512609, "learning_rate": 3.914679787830151e-06, "loss": 0.4013, "step": 9300 }, { "epoch": 0.5825960318827417, "grad_norm": 0.7914541580831037, "learning_rate": 3.913689597448681e-06, "loss": 0.3733, "step": 9301 }, { "epoch": 0.5826586698820839, "grad_norm": 0.818992061393021, "learning_rate": 3.912699451779948e-06, "loss": 0.3621, "step": 9302 }, { "epoch": 0.5827213078814263, "grad_norm": 0.9075483649747602, "learning_rate": 3.911709350864709e-06, "loss": 0.415, "step": 9303 }, { "epoch": 0.5827839458807685, "grad_norm": 0.867859144184351, "learning_rate": 3.910719294743712e-06, "loss": 0.4325, "step": 9304 }, { "epoch": 0.5828465838801109, "grad_norm": 0.8586174585317268, "learning_rate": 3.9097292834577125e-06, "loss": 0.3913, "step": 9305 }, { "epoch": 0.5829092218794532, "grad_norm": 0.805833348667351, "learning_rate": 3.908739317047458e-06, "loss": 0.4179, "step": 9306 }, { "epoch": 0.5829718598787955, "grad_norm": 0.8813991176067705, "learning_rate": 3.9077493955536966e-06, "loss": 0.398, "step": 9307 }, { "epoch": 0.5830344978781378, "grad_norm": 0.8448686073032198, "learning_rate": 3.9067595190171726e-06, "loss": 0.4151, "step": 9308 }, { "epoch": 0.58309713587748, "grad_norm": 0.7510801614900123, "learning_rate": 3.9057696874786285e-06, "loss": 0.3482, "step": 9309 }, { "epoch": 0.5831597738768224, "grad_norm": 0.9134795153402595, "learning_rate": 3.904779900978809e-06, "loss": 0.4569, "step": 9310 }, { "epoch": 0.5832224118761646, "grad_norm": 0.7845623296342462, "learning_rate": 3.9037901595584516e-06, "loss": 0.3518, "step": 9311 }, { "epoch": 0.583285049875507, "grad_norm": 0.8495250437174282, "learning_rate": 3.902800463258293e-06, "loss": 0.4039, "step": 9312 }, { "epoch": 0.5833476878748493, "grad_norm": 0.787846399161558, "learning_rate": 3.901810812119072e-06, "loss": 0.3753, "step": 9313 }, { "epoch": 0.5834103258741916, "grad_norm": 0.6538709845382875, "learning_rate": 3.900821206181521e-06, "loss": 0.4629, "step": 9314 }, { "epoch": 0.5834729638735339, "grad_norm": 0.8719786226726617, "learning_rate": 3.899831645486375e-06, "loss": 0.4252, "step": 9315 }, { "epoch": 0.5835356018728762, "grad_norm": 0.8894246290680687, "learning_rate": 3.898842130074362e-06, "loss": 0.4171, "step": 9316 }, { "epoch": 0.5835982398722185, "grad_norm": 0.8402913199931554, "learning_rate": 3.8978526599862105e-06, "loss": 0.3594, "step": 9317 }, { "epoch": 0.5836608778715607, "grad_norm": 0.8844876600616342, "learning_rate": 3.896863235262649e-06, "loss": 0.4341, "step": 9318 }, { "epoch": 0.5837235158709031, "grad_norm": 0.7419016657531234, "learning_rate": 3.895873855944402e-06, "loss": 0.3704, "step": 9319 }, { "epoch": 0.5837861538702454, "grad_norm": 0.9004846238439643, "learning_rate": 3.894884522072191e-06, "loss": 0.4255, "step": 9320 }, { "epoch": 0.5838487918695877, "grad_norm": 0.8684989580514672, "learning_rate": 3.893895233686738e-06, "loss": 0.3962, "step": 9321 }, { "epoch": 0.58391142986893, "grad_norm": 0.7791255525094425, "learning_rate": 3.892905990828762e-06, "loss": 0.3463, "step": 9322 }, { "epoch": 0.5839740678682723, "grad_norm": 0.8693009886339688, "learning_rate": 3.891916793538981e-06, "loss": 0.4023, "step": 9323 }, { "epoch": 0.5840367058676146, "grad_norm": 0.8408933278463772, "learning_rate": 3.890927641858112e-06, "loss": 0.3854, "step": 9324 }, { "epoch": 0.5840993438669569, "grad_norm": 0.8214476378487731, "learning_rate": 3.889938535826866e-06, "loss": 0.3979, "step": 9325 }, { "epoch": 0.5841619818662992, "grad_norm": 0.8620911139859612, "learning_rate": 3.8889494754859565e-06, "loss": 0.4213, "step": 9326 }, { "epoch": 0.5842246198656414, "grad_norm": 0.8825802488102054, "learning_rate": 3.887960460876093e-06, "loss": 0.4068, "step": 9327 }, { "epoch": 0.5842872578649838, "grad_norm": 0.8021179045535233, "learning_rate": 3.886971492037983e-06, "loss": 0.3736, "step": 9328 }, { "epoch": 0.5843498958643261, "grad_norm": 0.784759849622292, "learning_rate": 3.885982569012333e-06, "loss": 0.4105, "step": 9329 }, { "epoch": 0.5844125338636684, "grad_norm": 0.8259056792577519, "learning_rate": 3.884993691839847e-06, "loss": 0.3972, "step": 9330 }, { "epoch": 0.5844751718630107, "grad_norm": 0.7830608463860602, "learning_rate": 3.884004860561226e-06, "loss": 0.3604, "step": 9331 }, { "epoch": 0.584537809862353, "grad_norm": 0.7290004166956301, "learning_rate": 3.883016075217174e-06, "loss": 0.4608, "step": 9332 }, { "epoch": 0.5846004478616953, "grad_norm": 0.7531086859005183, "learning_rate": 3.882027335848388e-06, "loss": 0.3763, "step": 9333 }, { "epoch": 0.5846630858610375, "grad_norm": 0.8486345869770495, "learning_rate": 3.881038642495564e-06, "loss": 0.372, "step": 9334 }, { "epoch": 0.5847257238603799, "grad_norm": 0.845436544218654, "learning_rate": 3.880049995199396e-06, "loss": 0.4167, "step": 9335 }, { "epoch": 0.5847883618597222, "grad_norm": 0.8324889171093961, "learning_rate": 3.87906139400058e-06, "loss": 0.4048, "step": 9336 }, { "epoch": 0.5848509998590645, "grad_norm": 0.9372624365438007, "learning_rate": 3.878072838939805e-06, "loss": 0.4491, "step": 9337 }, { "epoch": 0.5849136378584068, "grad_norm": 0.8336248746849739, "learning_rate": 3.8770843300577586e-06, "loss": 0.3827, "step": 9338 }, { "epoch": 0.5849762758577491, "grad_norm": 0.8107530571196853, "learning_rate": 3.87609586739513e-06, "loss": 0.3933, "step": 9339 }, { "epoch": 0.5850389138570914, "grad_norm": 0.9297394636485009, "learning_rate": 3.875107450992605e-06, "loss": 0.3948, "step": 9340 }, { "epoch": 0.5851015518564338, "grad_norm": 0.8255109450370091, "learning_rate": 3.874119080890864e-06, "loss": 0.4229, "step": 9341 }, { "epoch": 0.585164189855776, "grad_norm": 0.6476857689668699, "learning_rate": 3.873130757130593e-06, "loss": 0.4636, "step": 9342 }, { "epoch": 0.5852268278551183, "grad_norm": 0.906715683396556, "learning_rate": 3.872142479752467e-06, "loss": 0.3915, "step": 9343 }, { "epoch": 0.5852894658544606, "grad_norm": 0.807713895444904, "learning_rate": 3.871154248797168e-06, "loss": 0.4197, "step": 9344 }, { "epoch": 0.5853521038538029, "grad_norm": 0.7569420926502558, "learning_rate": 3.870166064305369e-06, "loss": 0.385, "step": 9345 }, { "epoch": 0.5854147418531452, "grad_norm": 0.7878914944621, "learning_rate": 3.869177926317745e-06, "loss": 0.3751, "step": 9346 }, { "epoch": 0.5854773798524875, "grad_norm": 0.7262646916769039, "learning_rate": 3.868189834874965e-06, "loss": 0.3495, "step": 9347 }, { "epoch": 0.5855400178518299, "grad_norm": 0.8586260584178836, "learning_rate": 3.867201790017705e-06, "loss": 0.415, "step": 9348 }, { "epoch": 0.5856026558511721, "grad_norm": 0.8658001800215857, "learning_rate": 3.866213791786628e-06, "loss": 0.4182, "step": 9349 }, { "epoch": 0.5856652938505145, "grad_norm": 0.6438488097369989, "learning_rate": 3.8652258402224e-06, "loss": 0.4558, "step": 9350 }, { "epoch": 0.5857279318498567, "grad_norm": 0.8122270185601821, "learning_rate": 3.864237935365689e-06, "loss": 0.3788, "step": 9351 }, { "epoch": 0.585790569849199, "grad_norm": 0.8282963676854748, "learning_rate": 3.863250077257154e-06, "loss": 0.4012, "step": 9352 }, { "epoch": 0.5858532078485413, "grad_norm": 0.8018151977044892, "learning_rate": 3.862262265937459e-06, "loss": 0.4195, "step": 9353 }, { "epoch": 0.5859158458478836, "grad_norm": 0.8134350297005, "learning_rate": 3.861274501447259e-06, "loss": 0.3919, "step": 9354 }, { "epoch": 0.585978483847226, "grad_norm": 0.8015340198991988, "learning_rate": 3.8602867838272125e-06, "loss": 0.4006, "step": 9355 }, { "epoch": 0.5860411218465682, "grad_norm": 0.8063261016013364, "learning_rate": 3.859299113117972e-06, "loss": 0.3692, "step": 9356 }, { "epoch": 0.5861037598459106, "grad_norm": 0.8604890553832818, "learning_rate": 3.858311489360194e-06, "loss": 0.3806, "step": 9357 }, { "epoch": 0.5861663978452528, "grad_norm": 0.8276882461566658, "learning_rate": 3.857323912594526e-06, "loss": 0.42, "step": 9358 }, { "epoch": 0.5862290358445951, "grad_norm": 0.8875958914920968, "learning_rate": 3.856336382861615e-06, "loss": 0.4251, "step": 9359 }, { "epoch": 0.5862916738439374, "grad_norm": 0.8187553642230909, "learning_rate": 3.855348900202114e-06, "loss": 0.3945, "step": 9360 }, { "epoch": 0.5863543118432797, "grad_norm": 0.8392129085295704, "learning_rate": 3.854361464656664e-06, "loss": 0.4225, "step": 9361 }, { "epoch": 0.586416949842622, "grad_norm": 0.8542464415506619, "learning_rate": 3.853374076265909e-06, "loss": 0.44, "step": 9362 }, { "epoch": 0.5864795878419643, "grad_norm": 0.862512467438548, "learning_rate": 3.8523867350704905e-06, "loss": 0.4136, "step": 9363 }, { "epoch": 0.5865422258413067, "grad_norm": 0.8653755132308424, "learning_rate": 3.851399441111045e-06, "loss": 0.4185, "step": 9364 }, { "epoch": 0.5866048638406489, "grad_norm": 0.7698889389169881, "learning_rate": 3.850412194428214e-06, "loss": 0.377, "step": 9365 }, { "epoch": 0.5866675018399913, "grad_norm": 0.8326812059072358, "learning_rate": 3.84942499506263e-06, "loss": 0.4419, "step": 9366 }, { "epoch": 0.5867301398393335, "grad_norm": 0.8072115248736348, "learning_rate": 3.848437843054926e-06, "loss": 0.3882, "step": 9367 }, { "epoch": 0.5867927778386758, "grad_norm": 0.8380537409945225, "learning_rate": 3.847450738445732e-06, "loss": 0.4258, "step": 9368 }, { "epoch": 0.5868554158380181, "grad_norm": 0.8785212089093404, "learning_rate": 3.8464636812756815e-06, "loss": 0.4029, "step": 9369 }, { "epoch": 0.5869180538373604, "grad_norm": 0.8813111288095716, "learning_rate": 3.845476671585401e-06, "loss": 0.3884, "step": 9370 }, { "epoch": 0.5869806918367028, "grad_norm": 0.8711885279961111, "learning_rate": 3.844489709415515e-06, "loss": 0.4115, "step": 9371 }, { "epoch": 0.587043329836045, "grad_norm": 0.7809072747849007, "learning_rate": 3.843502794806646e-06, "loss": 0.3756, "step": 9372 }, { "epoch": 0.5871059678353874, "grad_norm": 0.8684762498169585, "learning_rate": 3.842515927799416e-06, "loss": 0.403, "step": 9373 }, { "epoch": 0.5871686058347296, "grad_norm": 0.6343339872513408, "learning_rate": 3.841529108434447e-06, "loss": 0.4399, "step": 9374 }, { "epoch": 0.587231243834072, "grad_norm": 0.6630056334176522, "learning_rate": 3.840542336752354e-06, "loss": 0.4582, "step": 9375 }, { "epoch": 0.5872938818334142, "grad_norm": 0.8451919223223922, "learning_rate": 3.839555612793754e-06, "loss": 0.4124, "step": 9376 }, { "epoch": 0.5873565198327565, "grad_norm": 0.85522463291276, "learning_rate": 3.838568936599259e-06, "loss": 0.3966, "step": 9377 }, { "epoch": 0.5874191578320989, "grad_norm": 0.8517464918918133, "learning_rate": 3.837582308209482e-06, "loss": 0.3822, "step": 9378 }, { "epoch": 0.5874817958314411, "grad_norm": 0.8286514661347697, "learning_rate": 3.8365957276650345e-06, "loss": 0.3894, "step": 9379 }, { "epoch": 0.5875444338307835, "grad_norm": 0.8702069601791402, "learning_rate": 3.835609195006524e-06, "loss": 0.4083, "step": 9380 }, { "epoch": 0.5876070718301257, "grad_norm": 0.8915064556995311, "learning_rate": 3.834622710274553e-06, "loss": 0.41, "step": 9381 }, { "epoch": 0.5876697098294681, "grad_norm": 0.8599247448170824, "learning_rate": 3.833636273509727e-06, "loss": 0.3705, "step": 9382 }, { "epoch": 0.5877323478288103, "grad_norm": 0.8991745462157105, "learning_rate": 3.83264988475265e-06, "loss": 0.4234, "step": 9383 }, { "epoch": 0.5877949858281526, "grad_norm": 0.8961705048164459, "learning_rate": 3.831663544043921e-06, "loss": 0.4227, "step": 9384 }, { "epoch": 0.587857623827495, "grad_norm": 0.8119049060895035, "learning_rate": 3.830677251424136e-06, "loss": 0.4317, "step": 9385 }, { "epoch": 0.5879202618268372, "grad_norm": 0.8527311111500927, "learning_rate": 3.829691006933893e-06, "loss": 0.4064, "step": 9386 }, { "epoch": 0.5879828998261796, "grad_norm": 0.8418570182052006, "learning_rate": 3.8287048106137835e-06, "loss": 0.3654, "step": 9387 }, { "epoch": 0.5880455378255218, "grad_norm": 0.8018741776908334, "learning_rate": 3.827718662504403e-06, "loss": 0.4092, "step": 9388 }, { "epoch": 0.5881081758248642, "grad_norm": 0.8335119329178756, "learning_rate": 3.826732562646341e-06, "loss": 0.3922, "step": 9389 }, { "epoch": 0.5881708138242064, "grad_norm": 0.9528785943954262, "learning_rate": 3.825746511080183e-06, "loss": 0.3827, "step": 9390 }, { "epoch": 0.5882334518235488, "grad_norm": 0.7955921925813335, "learning_rate": 3.8247605078465165e-06, "loss": 0.3779, "step": 9391 }, { "epoch": 0.588296089822891, "grad_norm": 0.8329017083586222, "learning_rate": 3.8237745529859274e-06, "loss": 0.4212, "step": 9392 }, { "epoch": 0.5883587278222333, "grad_norm": 0.8454336257652281, "learning_rate": 3.822788646538995e-06, "loss": 0.3435, "step": 9393 }, { "epoch": 0.5884213658215757, "grad_norm": 0.8954181779617465, "learning_rate": 3.8218027885463e-06, "loss": 0.4095, "step": 9394 }, { "epoch": 0.5884840038209179, "grad_norm": 0.810641968222367, "learning_rate": 3.82081697904842e-06, "loss": 0.4359, "step": 9395 }, { "epoch": 0.5885466418202603, "grad_norm": 0.8479033286735888, "learning_rate": 3.819831218085932e-06, "loss": 0.3793, "step": 9396 }, { "epoch": 0.5886092798196025, "grad_norm": 0.803098092205834, "learning_rate": 3.818845505699412e-06, "loss": 0.4045, "step": 9397 }, { "epoch": 0.5886719178189449, "grad_norm": 0.8440707514322338, "learning_rate": 3.81785984192943e-06, "loss": 0.4003, "step": 9398 }, { "epoch": 0.5887345558182872, "grad_norm": 0.8574758527256114, "learning_rate": 3.816874226816555e-06, "loss": 0.4031, "step": 9399 }, { "epoch": 0.5887971938176295, "grad_norm": 0.8389969002974337, "learning_rate": 3.815888660401357e-06, "loss": 0.4007, "step": 9400 }, { "epoch": 0.5888598318169718, "grad_norm": 0.8656389501753936, "learning_rate": 3.814903142724402e-06, "loss": 0.4043, "step": 9401 }, { "epoch": 0.588922469816314, "grad_norm": 0.615461160666068, "learning_rate": 3.8139176738262528e-06, "loss": 0.4292, "step": 9402 }, { "epoch": 0.5889851078156564, "grad_norm": 0.8570907382598272, "learning_rate": 3.8129322537474735e-06, "loss": 0.4203, "step": 9403 }, { "epoch": 0.5890477458149986, "grad_norm": 0.8335337512055533, "learning_rate": 3.8119468825286226e-06, "loss": 0.3955, "step": 9404 }, { "epoch": 0.589110383814341, "grad_norm": 0.8660857342628715, "learning_rate": 3.8109615602102568e-06, "loss": 0.427, "step": 9405 }, { "epoch": 0.5891730218136833, "grad_norm": 0.927337389938096, "learning_rate": 3.8099762868329355e-06, "loss": 0.4314, "step": 9406 }, { "epoch": 0.5892356598130256, "grad_norm": 0.7684532403403537, "learning_rate": 3.80899106243721e-06, "loss": 0.4046, "step": 9407 }, { "epoch": 0.5892982978123679, "grad_norm": 0.8568659791856101, "learning_rate": 3.808005887063635e-06, "loss": 0.4249, "step": 9408 }, { "epoch": 0.5893609358117102, "grad_norm": 0.773109381483739, "learning_rate": 3.8070207607527587e-06, "loss": 0.3687, "step": 9409 }, { "epoch": 0.5894235738110525, "grad_norm": 0.8388772470365053, "learning_rate": 3.806035683545129e-06, "loss": 0.4274, "step": 9410 }, { "epoch": 0.5894862118103947, "grad_norm": 0.7978302254720402, "learning_rate": 3.8050506554812918e-06, "loss": 0.3555, "step": 9411 }, { "epoch": 0.5895488498097371, "grad_norm": 0.8068977918272396, "learning_rate": 3.8040656766017912e-06, "loss": 0.3888, "step": 9412 }, { "epoch": 0.5896114878090793, "grad_norm": 0.8442984057532069, "learning_rate": 3.8030807469471708e-06, "loss": 0.399, "step": 9413 }, { "epoch": 0.5896741258084217, "grad_norm": 0.8727898303265742, "learning_rate": 3.802095866557966e-06, "loss": 0.3885, "step": 9414 }, { "epoch": 0.589736763807764, "grad_norm": 0.8542466752485053, "learning_rate": 3.80111103547472e-06, "loss": 0.3975, "step": 9415 }, { "epoch": 0.5897994018071063, "grad_norm": 0.8036711571473193, "learning_rate": 3.8001262537379657e-06, "loss": 0.3964, "step": 9416 }, { "epoch": 0.5898620398064486, "grad_norm": 0.8759750572261364, "learning_rate": 3.7991415213882375e-06, "loss": 0.3932, "step": 9417 }, { "epoch": 0.5899246778057908, "grad_norm": 0.8490069813335078, "learning_rate": 3.7981568384660683e-06, "loss": 0.4113, "step": 9418 }, { "epoch": 0.5899873158051332, "grad_norm": 0.7782619269059683, "learning_rate": 3.7971722050119865e-06, "loss": 0.4068, "step": 9419 }, { "epoch": 0.5900499538044754, "grad_norm": 0.7937355369682404, "learning_rate": 3.7961876210665184e-06, "loss": 0.4144, "step": 9420 }, { "epoch": 0.5901125918038178, "grad_norm": 0.8231844523835266, "learning_rate": 3.795203086670193e-06, "loss": 0.4028, "step": 9421 }, { "epoch": 0.5901752298031601, "grad_norm": 0.8670031125717438, "learning_rate": 3.7942186018635315e-06, "loss": 0.4506, "step": 9422 }, { "epoch": 0.5902378678025024, "grad_norm": 0.8146831374836344, "learning_rate": 3.7932341666870543e-06, "loss": 0.3922, "step": 9423 }, { "epoch": 0.5903005058018447, "grad_norm": 0.878460543920206, "learning_rate": 3.792249781181285e-06, "loss": 0.4155, "step": 9424 }, { "epoch": 0.590363143801187, "grad_norm": 0.8845606164384965, "learning_rate": 3.7912654453867374e-06, "loss": 0.4409, "step": 9425 }, { "epoch": 0.5904257818005293, "grad_norm": 0.8121926677040058, "learning_rate": 3.79028115934393e-06, "loss": 0.3983, "step": 9426 }, { "epoch": 0.5904884197998715, "grad_norm": 0.8383762772025947, "learning_rate": 3.789296923093374e-06, "loss": 0.4131, "step": 9427 }, { "epoch": 0.5905510577992139, "grad_norm": 0.8057120647070957, "learning_rate": 3.788312736675581e-06, "loss": 0.426, "step": 9428 }, { "epoch": 0.5906136957985562, "grad_norm": 0.88289965673992, "learning_rate": 3.7873286001310615e-06, "loss": 0.4285, "step": 9429 }, { "epoch": 0.5906763337978985, "grad_norm": 0.7946113602197927, "learning_rate": 3.7863445135003217e-06, "loss": 0.392, "step": 9430 }, { "epoch": 0.5907389717972408, "grad_norm": 0.8315627646564079, "learning_rate": 3.785360476823867e-06, "loss": 0.3492, "step": 9431 }, { "epoch": 0.5908016097965831, "grad_norm": 0.8648266102349383, "learning_rate": 3.7843764901421997e-06, "loss": 0.3857, "step": 9432 }, { "epoch": 0.5908642477959254, "grad_norm": 0.8826094877142557, "learning_rate": 3.7833925534958205e-06, "loss": 0.3947, "step": 9433 }, { "epoch": 0.5909268857952678, "grad_norm": 0.846168718539482, "learning_rate": 3.7824086669252317e-06, "loss": 0.4024, "step": 9434 }, { "epoch": 0.59098952379461, "grad_norm": 0.846798840516969, "learning_rate": 3.7814248304709283e-06, "loss": 0.3825, "step": 9435 }, { "epoch": 0.5910521617939523, "grad_norm": 0.8312879662819238, "learning_rate": 3.780441044173405e-06, "loss": 0.4246, "step": 9436 }, { "epoch": 0.5911147997932946, "grad_norm": 0.6405909542743241, "learning_rate": 3.7794573080731533e-06, "loss": 0.473, "step": 9437 }, { "epoch": 0.5911774377926369, "grad_norm": 0.9000794865656011, "learning_rate": 3.778473622210667e-06, "loss": 0.3813, "step": 9438 }, { "epoch": 0.5912400757919792, "grad_norm": 0.8117413430873354, "learning_rate": 3.777489986626433e-06, "loss": 0.3857, "step": 9439 }, { "epoch": 0.5913027137913215, "grad_norm": 0.8206051051630182, "learning_rate": 3.7765064013609377e-06, "loss": 0.3896, "step": 9440 }, { "epoch": 0.5913653517906639, "grad_norm": 0.8119922727633184, "learning_rate": 3.775522866454665e-06, "loss": 0.3758, "step": 9441 }, { "epoch": 0.5914279897900061, "grad_norm": 0.8342288915893228, "learning_rate": 3.774539381948098e-06, "loss": 0.4037, "step": 9442 }, { "epoch": 0.5914906277893484, "grad_norm": 0.8183122179919767, "learning_rate": 3.7735559478817187e-06, "loss": 0.3542, "step": 9443 }, { "epoch": 0.5915532657886907, "grad_norm": 0.8807031281507498, "learning_rate": 3.7725725642960047e-06, "loss": 0.3898, "step": 9444 }, { "epoch": 0.591615903788033, "grad_norm": 0.8890342034589305, "learning_rate": 3.771589231231432e-06, "loss": 0.4258, "step": 9445 }, { "epoch": 0.5916785417873753, "grad_norm": 0.627356124502783, "learning_rate": 3.770605948728473e-06, "loss": 0.4454, "step": 9446 }, { "epoch": 0.5917411797867176, "grad_norm": 0.7950358842777119, "learning_rate": 3.769622716827602e-06, "loss": 0.3671, "step": 9447 }, { "epoch": 0.59180381778606, "grad_norm": 0.8939115243339973, "learning_rate": 3.7686395355692893e-06, "loss": 0.3932, "step": 9448 }, { "epoch": 0.5918664557854022, "grad_norm": 0.848168790136014, "learning_rate": 3.7676564049940006e-06, "loss": 0.4312, "step": 9449 }, { "epoch": 0.5919290937847446, "grad_norm": 0.8731012605389756, "learning_rate": 3.7666733251422037e-06, "loss": 0.4041, "step": 9450 }, { "epoch": 0.5919917317840868, "grad_norm": 0.8399871538934409, "learning_rate": 3.7656902960543594e-06, "loss": 0.3793, "step": 9451 }, { "epoch": 0.5920543697834291, "grad_norm": 0.7756633826687063, "learning_rate": 3.764707317770935e-06, "loss": 0.3746, "step": 9452 }, { "epoch": 0.5921170077827714, "grad_norm": 0.8503247446723915, "learning_rate": 3.763724390332385e-06, "loss": 0.3713, "step": 9453 }, { "epoch": 0.5921796457821137, "grad_norm": 0.7750107331050015, "learning_rate": 3.7627415137791684e-06, "loss": 0.3659, "step": 9454 }, { "epoch": 0.592242283781456, "grad_norm": 0.8757175808585556, "learning_rate": 3.761758688151742e-06, "loss": 0.4447, "step": 9455 }, { "epoch": 0.5923049217807983, "grad_norm": 0.8050960732478746, "learning_rate": 3.7607759134905577e-06, "loss": 0.3914, "step": 9456 }, { "epoch": 0.5923675597801407, "grad_norm": 0.8849930608928874, "learning_rate": 3.759793189836066e-06, "loss": 0.3915, "step": 9457 }, { "epoch": 0.5924301977794829, "grad_norm": 0.7605465464536162, "learning_rate": 3.7588105172287165e-06, "loss": 0.3573, "step": 9458 }, { "epoch": 0.5924928357788253, "grad_norm": 0.6268822566713679, "learning_rate": 3.7578278957089576e-06, "loss": 0.4567, "step": 9459 }, { "epoch": 0.5925554737781675, "grad_norm": 0.8704499683119844, "learning_rate": 3.7568453253172305e-06, "loss": 0.4375, "step": 9460 }, { "epoch": 0.5926181117775098, "grad_norm": 0.8160474499310038, "learning_rate": 3.755862806093983e-06, "loss": 0.3982, "step": 9461 }, { "epoch": 0.5926807497768521, "grad_norm": 0.8267396002405445, "learning_rate": 3.7548803380796527e-06, "loss": 0.4075, "step": 9462 }, { "epoch": 0.5927433877761944, "grad_norm": 0.8397192672001312, "learning_rate": 3.753897921314679e-06, "loss": 0.4105, "step": 9463 }, { "epoch": 0.5928060257755368, "grad_norm": 0.760634142549383, "learning_rate": 3.752915555839498e-06, "loss": 0.3645, "step": 9464 }, { "epoch": 0.592868663774879, "grad_norm": 0.8868627640653154, "learning_rate": 3.7519332416945444e-06, "loss": 0.4089, "step": 9465 }, { "epoch": 0.5929313017742214, "grad_norm": 0.84923344775496, "learning_rate": 3.7509509789202505e-06, "loss": 0.3669, "step": 9466 }, { "epoch": 0.5929939397735636, "grad_norm": 0.6040476325027667, "learning_rate": 3.7499687675570453e-06, "loss": 0.4483, "step": 9467 }, { "epoch": 0.5930565777729059, "grad_norm": 0.8468092969315549, "learning_rate": 3.7489866076453585e-06, "loss": 0.4021, "step": 9468 }, { "epoch": 0.5931192157722482, "grad_norm": 0.8008068149367267, "learning_rate": 3.7480044992256126e-06, "loss": 0.3632, "step": 9469 }, { "epoch": 0.5931818537715905, "grad_norm": 0.8332358433011937, "learning_rate": 3.747022442338236e-06, "loss": 0.431, "step": 9470 }, { "epoch": 0.5932444917709329, "grad_norm": 0.7669645437007613, "learning_rate": 3.746040437023649e-06, "loss": 0.3879, "step": 9471 }, { "epoch": 0.5933071297702751, "grad_norm": 0.8515004784850266, "learning_rate": 3.745058483322269e-06, "loss": 0.4092, "step": 9472 }, { "epoch": 0.5933697677696175, "grad_norm": 0.9022660484934328, "learning_rate": 3.7440765812745157e-06, "loss": 0.4117, "step": 9473 }, { "epoch": 0.5934324057689597, "grad_norm": 0.8388213361983982, "learning_rate": 3.743094730920803e-06, "loss": 0.3953, "step": 9474 }, { "epoch": 0.5934950437683021, "grad_norm": 0.9402203610280374, "learning_rate": 3.742112932301544e-06, "loss": 0.4668, "step": 9475 }, { "epoch": 0.5935576817676443, "grad_norm": 0.8576448348935242, "learning_rate": 3.7411311854571513e-06, "loss": 0.4222, "step": 9476 }, { "epoch": 0.5936203197669866, "grad_norm": 0.8281108180985431, "learning_rate": 3.740149490428032e-06, "loss": 0.3769, "step": 9477 }, { "epoch": 0.593682957766329, "grad_norm": 0.7889098866517851, "learning_rate": 3.7391678472545945e-06, "loss": 0.3916, "step": 9478 }, { "epoch": 0.5937455957656712, "grad_norm": 0.8502482253384477, "learning_rate": 3.7381862559772396e-06, "loss": 0.3606, "step": 9479 }, { "epoch": 0.5938082337650136, "grad_norm": 0.772656109352512, "learning_rate": 3.737204716636374e-06, "loss": 0.3448, "step": 9480 }, { "epoch": 0.5938708717643558, "grad_norm": 0.9104187566525882, "learning_rate": 3.7362232292723975e-06, "loss": 0.4373, "step": 9481 }, { "epoch": 0.5939335097636982, "grad_norm": 0.8634733526289926, "learning_rate": 3.735241793925707e-06, "loss": 0.4477, "step": 9482 }, { "epoch": 0.5939961477630404, "grad_norm": 0.7584551463849788, "learning_rate": 3.7342604106366996e-06, "loss": 0.3401, "step": 9483 }, { "epoch": 0.5940587857623828, "grad_norm": 0.8303498776980013, "learning_rate": 3.733279079445768e-06, "loss": 0.3808, "step": 9484 }, { "epoch": 0.594121423761725, "grad_norm": 0.6337941723253169, "learning_rate": 3.7322978003933052e-06, "loss": 0.4644, "step": 9485 }, { "epoch": 0.5941840617610673, "grad_norm": 0.8258789887628346, "learning_rate": 3.7313165735197e-06, "loss": 0.4049, "step": 9486 }, { "epoch": 0.5942466997604097, "grad_norm": 0.8139026906025005, "learning_rate": 3.730335398865339e-06, "loss": 0.3783, "step": 9487 }, { "epoch": 0.5943093377597519, "grad_norm": 1.0233225130584365, "learning_rate": 3.7293542764706094e-06, "loss": 0.4068, "step": 9488 }, { "epoch": 0.5943719757590943, "grad_norm": 0.7989597572381435, "learning_rate": 3.7283732063758926e-06, "loss": 0.3809, "step": 9489 }, { "epoch": 0.5944346137584365, "grad_norm": 0.8275141213857731, "learning_rate": 3.7273921886215724e-06, "loss": 0.3988, "step": 9490 }, { "epoch": 0.5944972517577789, "grad_norm": 0.8285886262625258, "learning_rate": 3.7264112232480254e-06, "loss": 0.3736, "step": 9491 }, { "epoch": 0.5945598897571212, "grad_norm": 0.8390167389170006, "learning_rate": 3.7254303102956285e-06, "loss": 0.398, "step": 9492 }, { "epoch": 0.5946225277564634, "grad_norm": 0.7933910312881017, "learning_rate": 3.7244494498047575e-06, "loss": 0.3674, "step": 9493 }, { "epoch": 0.5946851657558058, "grad_norm": 0.8436520187118405, "learning_rate": 3.7234686418157834e-06, "loss": 0.3821, "step": 9494 }, { "epoch": 0.594747803755148, "grad_norm": 0.8230577918998445, "learning_rate": 3.722487886369077e-06, "loss": 0.3656, "step": 9495 }, { "epoch": 0.5948104417544904, "grad_norm": 0.7794557369419104, "learning_rate": 3.7215071835050055e-06, "loss": 0.3815, "step": 9496 }, { "epoch": 0.5948730797538326, "grad_norm": 0.8890008307668446, "learning_rate": 3.720526533263936e-06, "loss": 0.4152, "step": 9497 }, { "epoch": 0.594935717753175, "grad_norm": 0.7717513542816439, "learning_rate": 3.7195459356862314e-06, "loss": 0.3647, "step": 9498 }, { "epoch": 0.5949983557525172, "grad_norm": 0.8575249711820822, "learning_rate": 3.718565390812255e-06, "loss": 0.408, "step": 9499 }, { "epoch": 0.5950609937518596, "grad_norm": 0.8193440222927083, "learning_rate": 3.717584898682365e-06, "loss": 0.3657, "step": 9500 }, { "epoch": 0.5951236317512019, "grad_norm": 0.8833132776292795, "learning_rate": 3.7166044593369166e-06, "loss": 0.396, "step": 9501 }, { "epoch": 0.5951862697505441, "grad_norm": 0.8370452784796697, "learning_rate": 3.715624072816269e-06, "loss": 0.4113, "step": 9502 }, { "epoch": 0.5952489077498865, "grad_norm": 0.8340777373402584, "learning_rate": 3.7146437391607727e-06, "loss": 0.3977, "step": 9503 }, { "epoch": 0.5953115457492287, "grad_norm": 0.8870956818192562, "learning_rate": 3.7136634584107787e-06, "loss": 0.4298, "step": 9504 }, { "epoch": 0.5953741837485711, "grad_norm": 0.7956984755005726, "learning_rate": 3.712683230606634e-06, "loss": 0.3898, "step": 9505 }, { "epoch": 0.5954368217479133, "grad_norm": 0.828677090603777, "learning_rate": 3.7117030557886857e-06, "loss": 0.4182, "step": 9506 }, { "epoch": 0.5954994597472557, "grad_norm": 0.7821305625638973, "learning_rate": 3.7107229339972817e-06, "loss": 0.379, "step": 9507 }, { "epoch": 0.595562097746598, "grad_norm": 0.8581747992376428, "learning_rate": 3.7097428652727596e-06, "loss": 0.4136, "step": 9508 }, { "epoch": 0.5956247357459403, "grad_norm": 0.8144620069815539, "learning_rate": 3.708762849655462e-06, "loss": 0.4002, "step": 9509 }, { "epoch": 0.5956873737452826, "grad_norm": 0.786103301405273, "learning_rate": 3.707782887185723e-06, "loss": 0.4003, "step": 9510 }, { "epoch": 0.5957500117446248, "grad_norm": 0.6403882062705168, "learning_rate": 3.7068029779038815e-06, "loss": 0.4308, "step": 9511 }, { "epoch": 0.5958126497439672, "grad_norm": 0.8425261952745727, "learning_rate": 3.70582312185027e-06, "loss": 0.3928, "step": 9512 }, { "epoch": 0.5958752877433094, "grad_norm": 0.8424423674101679, "learning_rate": 3.704843319065218e-06, "loss": 0.4174, "step": 9513 }, { "epoch": 0.5959379257426518, "grad_norm": 0.8693277191337955, "learning_rate": 3.7038635695890556e-06, "loss": 0.4516, "step": 9514 }, { "epoch": 0.5960005637419941, "grad_norm": 0.7891615282858704, "learning_rate": 3.7028838734621077e-06, "loss": 0.3558, "step": 9515 }, { "epoch": 0.5960632017413364, "grad_norm": 0.8972776921305003, "learning_rate": 3.7019042307247023e-06, "loss": 0.4088, "step": 9516 }, { "epoch": 0.5961258397406787, "grad_norm": 0.8228242525462338, "learning_rate": 3.7009246414171594e-06, "loss": 0.3926, "step": 9517 }, { "epoch": 0.596188477740021, "grad_norm": 0.9367546038982905, "learning_rate": 3.6999451055797987e-06, "loss": 0.4157, "step": 9518 }, { "epoch": 0.5962511157393633, "grad_norm": 0.8423935749449137, "learning_rate": 3.69896562325294e-06, "loss": 0.4024, "step": 9519 }, { "epoch": 0.5963137537387055, "grad_norm": 0.861903403552497, "learning_rate": 3.6979861944768974e-06, "loss": 0.377, "step": 9520 }, { "epoch": 0.5963763917380479, "grad_norm": 0.7902364947374011, "learning_rate": 3.697006819291985e-06, "loss": 0.3707, "step": 9521 }, { "epoch": 0.5964390297373902, "grad_norm": 0.793768571930897, "learning_rate": 3.6960274977385123e-06, "loss": 0.378, "step": 9522 }, { "epoch": 0.5965016677367325, "grad_norm": 0.8371608646844813, "learning_rate": 3.6950482298567913e-06, "loss": 0.403, "step": 9523 }, { "epoch": 0.5965643057360748, "grad_norm": 0.7931060162781939, "learning_rate": 3.6940690156871274e-06, "loss": 0.4132, "step": 9524 }, { "epoch": 0.5966269437354171, "grad_norm": 0.8701771980986978, "learning_rate": 3.6930898552698223e-06, "loss": 0.4017, "step": 9525 }, { "epoch": 0.5966895817347594, "grad_norm": 0.8403652495239872, "learning_rate": 3.692110748645184e-06, "loss": 0.4108, "step": 9526 }, { "epoch": 0.5967522197341016, "grad_norm": 0.8319355590419495, "learning_rate": 3.6911316958535094e-06, "loss": 0.4117, "step": 9527 }, { "epoch": 0.596814857733444, "grad_norm": 0.8175857936130638, "learning_rate": 3.6901526969350975e-06, "loss": 0.4001, "step": 9528 }, { "epoch": 0.5968774957327863, "grad_norm": 0.6005760820867894, "learning_rate": 3.689173751930244e-06, "loss": 0.4772, "step": 9529 }, { "epoch": 0.5969401337321286, "grad_norm": 0.8715194893183594, "learning_rate": 3.688194860879242e-06, "loss": 0.4, "step": 9530 }, { "epoch": 0.5970027717314709, "grad_norm": 0.6588809133179446, "learning_rate": 3.687216023822382e-06, "loss": 0.4438, "step": 9531 }, { "epoch": 0.5970654097308132, "grad_norm": 0.6240061007302489, "learning_rate": 3.6862372407999556e-06, "loss": 0.4496, "step": 9532 }, { "epoch": 0.5971280477301555, "grad_norm": 0.8883927051904333, "learning_rate": 3.6852585118522478e-06, "loss": 0.4145, "step": 9533 }, { "epoch": 0.5971906857294978, "grad_norm": 0.6704239600954294, "learning_rate": 3.6842798370195415e-06, "loss": 0.4575, "step": 9534 }, { "epoch": 0.5972533237288401, "grad_norm": 0.8218919228067874, "learning_rate": 3.6833012163421235e-06, "loss": 0.3719, "step": 9535 }, { "epoch": 0.5973159617281824, "grad_norm": 0.8017756892051985, "learning_rate": 3.682322649860271e-06, "loss": 0.3934, "step": 9536 }, { "epoch": 0.5973785997275247, "grad_norm": 0.8862374378723851, "learning_rate": 3.6813441376142638e-06, "loss": 0.4384, "step": 9537 }, { "epoch": 0.597441237726867, "grad_norm": 0.921630330863919, "learning_rate": 3.6803656796443766e-06, "loss": 0.4205, "step": 9538 }, { "epoch": 0.5975038757262093, "grad_norm": 0.8866821112381152, "learning_rate": 3.679387275990882e-06, "loss": 0.4035, "step": 9539 }, { "epoch": 0.5975665137255516, "grad_norm": 0.7721333598036609, "learning_rate": 3.678408926694053e-06, "loss": 0.3682, "step": 9540 }, { "epoch": 0.597629151724894, "grad_norm": 0.8077549466257694, "learning_rate": 3.6774306317941577e-06, "loss": 0.358, "step": 9541 }, { "epoch": 0.5976917897242362, "grad_norm": 0.8577152519304525, "learning_rate": 3.6764523913314635e-06, "loss": 0.3646, "step": 9542 }, { "epoch": 0.5977544277235786, "grad_norm": 0.8318066595261245, "learning_rate": 3.675474205346233e-06, "loss": 0.3751, "step": 9543 }, { "epoch": 0.5978170657229208, "grad_norm": 0.615956177021101, "learning_rate": 3.67449607387873e-06, "loss": 0.4506, "step": 9544 }, { "epoch": 0.5978797037222631, "grad_norm": 0.6298886932727799, "learning_rate": 3.673517996969216e-06, "loss": 0.4698, "step": 9545 }, { "epoch": 0.5979423417216054, "grad_norm": 0.8663668682324432, "learning_rate": 3.672539974657948e-06, "loss": 0.4367, "step": 9546 }, { "epoch": 0.5980049797209477, "grad_norm": 0.8916734013180277, "learning_rate": 3.6715620069851804e-06, "loss": 0.4007, "step": 9547 }, { "epoch": 0.59806761772029, "grad_norm": 0.78975835548435, "learning_rate": 3.6705840939911665e-06, "loss": 0.3841, "step": 9548 }, { "epoch": 0.5981302557196323, "grad_norm": 0.8155362605955118, "learning_rate": 3.669606235716159e-06, "loss": 0.4167, "step": 9549 }, { "epoch": 0.5981928937189747, "grad_norm": 0.8787309027916234, "learning_rate": 3.6686284322004056e-06, "loss": 0.4213, "step": 9550 }, { "epoch": 0.5982555317183169, "grad_norm": 0.8230223901511561, "learning_rate": 3.667650683484153e-06, "loss": 0.4094, "step": 9551 }, { "epoch": 0.5983181697176592, "grad_norm": 0.7967650696436859, "learning_rate": 3.666672989607644e-06, "loss": 0.3965, "step": 9552 }, { "epoch": 0.5983808077170015, "grad_norm": 0.8456189579865719, "learning_rate": 3.6656953506111226e-06, "loss": 0.3986, "step": 9553 }, { "epoch": 0.5984434457163438, "grad_norm": 0.833354623484281, "learning_rate": 3.6647177665348294e-06, "loss": 0.406, "step": 9554 }, { "epoch": 0.5985060837156861, "grad_norm": 0.8513875893534262, "learning_rate": 3.6637402374190013e-06, "loss": 0.4485, "step": 9555 }, { "epoch": 0.5985687217150284, "grad_norm": 0.7705486947057313, "learning_rate": 3.662762763303872e-06, "loss": 0.3886, "step": 9556 }, { "epoch": 0.5986313597143708, "grad_norm": 0.8835269357940436, "learning_rate": 3.661785344229676e-06, "loss": 0.3766, "step": 9557 }, { "epoch": 0.598693997713713, "grad_norm": 0.8692560382718213, "learning_rate": 3.6608079802366435e-06, "loss": 0.4248, "step": 9558 }, { "epoch": 0.5987566357130554, "grad_norm": 0.891332880557128, "learning_rate": 3.6598306713650035e-06, "loss": 0.4002, "step": 9559 }, { "epoch": 0.5988192737123976, "grad_norm": 0.8886426178934678, "learning_rate": 3.6588534176549806e-06, "loss": 0.412, "step": 9560 }, { "epoch": 0.5988819117117399, "grad_norm": 0.8217234745649474, "learning_rate": 3.657876219146801e-06, "loss": 0.3961, "step": 9561 }, { "epoch": 0.5989445497110822, "grad_norm": 0.8619632633168278, "learning_rate": 3.6568990758806833e-06, "loss": 0.3946, "step": 9562 }, { "epoch": 0.5990071877104245, "grad_norm": 0.854135534415068, "learning_rate": 3.655921987896851e-06, "loss": 0.3925, "step": 9563 }, { "epoch": 0.5990698257097669, "grad_norm": 0.8486975708022418, "learning_rate": 3.6549449552355197e-06, "loss": 0.3989, "step": 9564 }, { "epoch": 0.5991324637091091, "grad_norm": 0.8211524165616707, "learning_rate": 3.653967977936902e-06, "loss": 0.3778, "step": 9565 }, { "epoch": 0.5991951017084515, "grad_norm": 0.8136029636581908, "learning_rate": 3.652991056041213e-06, "loss": 0.3586, "step": 9566 }, { "epoch": 0.5992577397077937, "grad_norm": 0.8283909139487015, "learning_rate": 3.652014189588663e-06, "loss": 0.4039, "step": 9567 }, { "epoch": 0.5993203777071361, "grad_norm": 0.8568707250421048, "learning_rate": 3.6510373786194586e-06, "loss": 0.4078, "step": 9568 }, { "epoch": 0.5993830157064783, "grad_norm": 0.8455894458273573, "learning_rate": 3.6500606231738052e-06, "loss": 0.4011, "step": 9569 }, { "epoch": 0.5994456537058206, "grad_norm": 0.8834199370739313, "learning_rate": 3.6490839232919085e-06, "loss": 0.4195, "step": 9570 }, { "epoch": 0.599508291705163, "grad_norm": 0.8852088892837442, "learning_rate": 3.6481072790139647e-06, "loss": 0.3729, "step": 9571 }, { "epoch": 0.5995709297045052, "grad_norm": 0.8068068019733186, "learning_rate": 3.6471306903801797e-06, "loss": 0.3814, "step": 9572 }, { "epoch": 0.5996335677038476, "grad_norm": 0.8852697432682156, "learning_rate": 3.6461541574307467e-06, "loss": 0.4125, "step": 9573 }, { "epoch": 0.5996962057031898, "grad_norm": 0.8064310450742124, "learning_rate": 3.645177680205858e-06, "loss": 0.4157, "step": 9574 }, { "epoch": 0.5997588437025322, "grad_norm": 0.8147988973628107, "learning_rate": 3.6442012587457086e-06, "loss": 0.3481, "step": 9575 }, { "epoch": 0.5998214817018744, "grad_norm": 0.9183682649586963, "learning_rate": 3.6432248930904878e-06, "loss": 0.3835, "step": 9576 }, { "epoch": 0.5998841197012167, "grad_norm": 0.7870756721571569, "learning_rate": 3.6422485832803804e-06, "loss": 0.3744, "step": 9577 }, { "epoch": 0.599946757700559, "grad_norm": 0.6304989667842099, "learning_rate": 3.641272329355575e-06, "loss": 0.4614, "step": 9578 }, { "epoch": 0.6000093956999013, "grad_norm": 0.8638603811887173, "learning_rate": 3.6402961313562525e-06, "loss": 0.3796, "step": 9579 }, { "epoch": 0.6000720336992437, "grad_norm": 0.8886887615702299, "learning_rate": 3.6393199893225912e-06, "loss": 0.366, "step": 9580 }, { "epoch": 0.6001346716985859, "grad_norm": 0.8042618091287869, "learning_rate": 3.6383439032947743e-06, "loss": 0.4051, "step": 9581 }, { "epoch": 0.6001973096979283, "grad_norm": 0.8516525896948317, "learning_rate": 3.637367873312975e-06, "loss": 0.3609, "step": 9582 }, { "epoch": 0.6002599476972705, "grad_norm": 0.8043546614597497, "learning_rate": 3.6363918994173653e-06, "loss": 0.384, "step": 9583 }, { "epoch": 0.6003225856966129, "grad_norm": 0.7979289646080456, "learning_rate": 3.6354159816481193e-06, "loss": 0.3604, "step": 9584 }, { "epoch": 0.6003852236959552, "grad_norm": 0.8580939208378414, "learning_rate": 3.634440120045405e-06, "loss": 0.4003, "step": 9585 }, { "epoch": 0.6004478616952974, "grad_norm": 0.7846405047787695, "learning_rate": 3.6334643146493875e-06, "loss": 0.3723, "step": 9586 }, { "epoch": 0.6005104996946398, "grad_norm": 0.7960274327288812, "learning_rate": 3.632488565500234e-06, "loss": 0.3935, "step": 9587 }, { "epoch": 0.600573137693982, "grad_norm": 0.8665987951593725, "learning_rate": 3.631512872638104e-06, "loss": 0.3979, "step": 9588 }, { "epoch": 0.6006357756933244, "grad_norm": 0.8217486749044349, "learning_rate": 3.630537236103158e-06, "loss": 0.3887, "step": 9589 }, { "epoch": 0.6006984136926666, "grad_norm": 0.8543184415357866, "learning_rate": 3.629561655935554e-06, "loss": 0.3914, "step": 9590 }, { "epoch": 0.600761051692009, "grad_norm": 0.8223504747090821, "learning_rate": 3.6285861321754455e-06, "loss": 0.3727, "step": 9591 }, { "epoch": 0.6008236896913512, "grad_norm": 0.7973731334375687, "learning_rate": 3.6276106648629884e-06, "loss": 0.3505, "step": 9592 }, { "epoch": 0.6008863276906936, "grad_norm": 0.8238963511537256, "learning_rate": 3.6266352540383303e-06, "loss": 0.3854, "step": 9593 }, { "epoch": 0.6009489656900359, "grad_norm": 0.8468260436768466, "learning_rate": 3.6256598997416203e-06, "loss": 0.4066, "step": 9594 }, { "epoch": 0.6010116036893781, "grad_norm": 0.8554000358730725, "learning_rate": 3.6246846020130033e-06, "loss": 0.3912, "step": 9595 }, { "epoch": 0.6010742416887205, "grad_norm": 0.8107039907994752, "learning_rate": 3.6237093608926234e-06, "loss": 0.416, "step": 9596 }, { "epoch": 0.6011368796880627, "grad_norm": 0.7814688809840568, "learning_rate": 3.6227341764206225e-06, "loss": 0.3757, "step": 9597 }, { "epoch": 0.6011995176874051, "grad_norm": 0.8732152938828355, "learning_rate": 3.6217590486371364e-06, "loss": 0.3822, "step": 9598 }, { "epoch": 0.6012621556867473, "grad_norm": 0.834926256365086, "learning_rate": 3.620783977582305e-06, "loss": 0.365, "step": 9599 }, { "epoch": 0.6013247936860897, "grad_norm": 0.9084849503960782, "learning_rate": 3.619808963296261e-06, "loss": 0.4147, "step": 9600 }, { "epoch": 0.601387431685432, "grad_norm": 0.7114398305689339, "learning_rate": 3.6188340058191367e-06, "loss": 0.3706, "step": 9601 }, { "epoch": 0.6014500696847742, "grad_norm": 0.8453936973535966, "learning_rate": 3.617859105191061e-06, "loss": 0.3918, "step": 9602 }, { "epoch": 0.6015127076841166, "grad_norm": 0.8162154463793851, "learning_rate": 3.6168842614521606e-06, "loss": 0.3697, "step": 9603 }, { "epoch": 0.6015753456834588, "grad_norm": 0.6506604899101996, "learning_rate": 3.6159094746425616e-06, "loss": 0.4512, "step": 9604 }, { "epoch": 0.6016379836828012, "grad_norm": 0.7982046872804865, "learning_rate": 3.614934744802385e-06, "loss": 0.3936, "step": 9605 }, { "epoch": 0.6017006216821434, "grad_norm": 0.7179636280204259, "learning_rate": 3.6139600719717515e-06, "loss": 0.4594, "step": 9606 }, { "epoch": 0.6017632596814858, "grad_norm": 0.8679618139448028, "learning_rate": 3.6129854561907786e-06, "loss": 0.4035, "step": 9607 }, { "epoch": 0.6018258976808281, "grad_norm": 0.8924998507843402, "learning_rate": 3.6120108974995804e-06, "loss": 0.3991, "step": 9608 }, { "epoch": 0.6018885356801704, "grad_norm": 0.8347072823109402, "learning_rate": 3.611036395938272e-06, "loss": 0.425, "step": 9609 }, { "epoch": 0.6019511736795127, "grad_norm": 0.9209942459325431, "learning_rate": 3.6100619515469644e-06, "loss": 0.4199, "step": 9610 }, { "epoch": 0.6020138116788549, "grad_norm": 0.8166827203686606, "learning_rate": 3.6090875643657646e-06, "loss": 0.3668, "step": 9611 }, { "epoch": 0.6020764496781973, "grad_norm": 0.8336717128636475, "learning_rate": 3.608113234434778e-06, "loss": 0.3979, "step": 9612 }, { "epoch": 0.6021390876775395, "grad_norm": 0.8618165610151592, "learning_rate": 3.60713896179411e-06, "loss": 0.3838, "step": 9613 }, { "epoch": 0.6022017256768819, "grad_norm": 0.7921176816242823, "learning_rate": 3.6061647464838613e-06, "loss": 0.3595, "step": 9614 }, { "epoch": 0.6022643636762242, "grad_norm": 0.842328117078862, "learning_rate": 3.6051905885441297e-06, "loss": 0.3892, "step": 9615 }, { "epoch": 0.6023270016755665, "grad_norm": 0.8438838737828993, "learning_rate": 3.6042164880150114e-06, "loss": 0.469, "step": 9616 }, { "epoch": 0.6023896396749088, "grad_norm": 0.8167333640402633, "learning_rate": 3.6032424449366006e-06, "loss": 0.3813, "step": 9617 }, { "epoch": 0.6024522776742511, "grad_norm": 0.8352752535267886, "learning_rate": 3.6022684593489922e-06, "loss": 0.364, "step": 9618 }, { "epoch": 0.6025149156735934, "grad_norm": 0.7169465918563471, "learning_rate": 3.6012945312922733e-06, "loss": 0.4594, "step": 9619 }, { "epoch": 0.6025775536729356, "grad_norm": 0.8154730263712073, "learning_rate": 3.6003206608065313e-06, "loss": 0.4266, "step": 9620 }, { "epoch": 0.602640191672278, "grad_norm": 0.8764435463371122, "learning_rate": 3.5993468479318495e-06, "loss": 0.3729, "step": 9621 }, { "epoch": 0.6027028296716203, "grad_norm": 0.7756398272231071, "learning_rate": 3.5983730927083126e-06, "loss": 0.3618, "step": 9622 }, { "epoch": 0.6027654676709626, "grad_norm": 0.863466457340737, "learning_rate": 3.5973993951759987e-06, "loss": 0.3977, "step": 9623 }, { "epoch": 0.6028281056703049, "grad_norm": 0.8073139402399383, "learning_rate": 3.596425755374986e-06, "loss": 0.3562, "step": 9624 }, { "epoch": 0.6028907436696472, "grad_norm": 0.8104859733718505, "learning_rate": 3.5954521733453503e-06, "loss": 0.3919, "step": 9625 }, { "epoch": 0.6029533816689895, "grad_norm": 0.7762998907569248, "learning_rate": 3.5944786491271617e-06, "loss": 0.3843, "step": 9626 }, { "epoch": 0.6030160196683318, "grad_norm": 0.8544914970410309, "learning_rate": 3.593505182760495e-06, "loss": 0.3956, "step": 9627 }, { "epoch": 0.6030786576676741, "grad_norm": 0.8208390876517021, "learning_rate": 3.592531774285416e-06, "loss": 0.3355, "step": 9628 }, { "epoch": 0.6031412956670164, "grad_norm": 0.7844438831544356, "learning_rate": 3.5915584237419887e-06, "loss": 0.3923, "step": 9629 }, { "epoch": 0.6032039336663587, "grad_norm": 0.8347378194490674, "learning_rate": 3.59058513117028e-06, "loss": 0.3614, "step": 9630 }, { "epoch": 0.603266571665701, "grad_norm": 0.8543432083166389, "learning_rate": 3.589611896610349e-06, "loss": 0.4087, "step": 9631 }, { "epoch": 0.6033292096650433, "grad_norm": 0.9123922950967838, "learning_rate": 3.588638720102253e-06, "loss": 0.4152, "step": 9632 }, { "epoch": 0.6033918476643856, "grad_norm": 0.7922271917990966, "learning_rate": 3.587665601686049e-06, "loss": 0.3913, "step": 9633 }, { "epoch": 0.603454485663728, "grad_norm": 0.8407933360886621, "learning_rate": 3.586692541401792e-06, "loss": 0.3838, "step": 9634 }, { "epoch": 0.6035171236630702, "grad_norm": 0.8209278792319589, "learning_rate": 3.5857195392895303e-06, "loss": 0.4568, "step": 9635 }, { "epoch": 0.6035797616624125, "grad_norm": 0.8622343159893129, "learning_rate": 3.584746595389317e-06, "loss": 0.4463, "step": 9636 }, { "epoch": 0.6036423996617548, "grad_norm": 0.8120149555028301, "learning_rate": 3.5837737097411963e-06, "loss": 0.4269, "step": 9637 }, { "epoch": 0.6037050376610971, "grad_norm": 0.8840096458509763, "learning_rate": 3.5828008823852118e-06, "loss": 0.3872, "step": 9638 }, { "epoch": 0.6037676756604394, "grad_norm": 0.8224846539805668, "learning_rate": 3.581828113361407e-06, "loss": 0.3854, "step": 9639 }, { "epoch": 0.6038303136597817, "grad_norm": 0.9157380847166438, "learning_rate": 3.5808554027098196e-06, "loss": 0.4212, "step": 9640 }, { "epoch": 0.603892951659124, "grad_norm": 0.5842337134352109, "learning_rate": 3.579882750470488e-06, "loss": 0.4696, "step": 9641 }, { "epoch": 0.6039555896584663, "grad_norm": 0.8123910131847504, "learning_rate": 3.578910156683445e-06, "loss": 0.3557, "step": 9642 }, { "epoch": 0.6040182276578087, "grad_norm": 0.8072377170760772, "learning_rate": 3.577937621388724e-06, "loss": 0.3874, "step": 9643 }, { "epoch": 0.6040808656571509, "grad_norm": 0.8252856511342065, "learning_rate": 3.5769651446263533e-06, "loss": 0.3974, "step": 9644 }, { "epoch": 0.6041435036564932, "grad_norm": 0.9085029999267213, "learning_rate": 3.5759927264363624e-06, "loss": 0.4052, "step": 9645 }, { "epoch": 0.6042061416558355, "grad_norm": 0.8151278442976209, "learning_rate": 3.5750203668587763e-06, "loss": 0.3736, "step": 9646 }, { "epoch": 0.6042687796551778, "grad_norm": 0.8446415651455651, "learning_rate": 3.574048065933615e-06, "loss": 0.423, "step": 9647 }, { "epoch": 0.6043314176545201, "grad_norm": 0.8439053864206462, "learning_rate": 3.573075823700901e-06, "loss": 0.3885, "step": 9648 }, { "epoch": 0.6043940556538624, "grad_norm": 0.7770111906514254, "learning_rate": 3.5721036402006504e-06, "loss": 0.3725, "step": 9649 }, { "epoch": 0.6044566936532048, "grad_norm": 0.8115748031712515, "learning_rate": 3.5711315154728787e-06, "loss": 0.3957, "step": 9650 }, { "epoch": 0.604519331652547, "grad_norm": 0.9089311651471786, "learning_rate": 3.5701594495575996e-06, "loss": 0.3869, "step": 9651 }, { "epoch": 0.6045819696518894, "grad_norm": 0.8725336598712466, "learning_rate": 3.569187442494823e-06, "loss": 0.3534, "step": 9652 }, { "epoch": 0.6046446076512316, "grad_norm": 0.8574064844448988, "learning_rate": 3.5682154943245573e-06, "loss": 0.3853, "step": 9653 }, { "epoch": 0.6047072456505739, "grad_norm": 0.865206634551699, "learning_rate": 3.567243605086805e-06, "loss": 0.4021, "step": 9654 }, { "epoch": 0.6047698836499162, "grad_norm": 0.8094246797801979, "learning_rate": 3.5662717748215724e-06, "loss": 0.3956, "step": 9655 }, { "epoch": 0.6048325216492585, "grad_norm": 1.0485827203990103, "learning_rate": 3.565300003568862e-06, "loss": 0.4187, "step": 9656 }, { "epoch": 0.6048951596486009, "grad_norm": 0.8166640186620533, "learning_rate": 3.5643282913686687e-06, "loss": 0.3742, "step": 9657 }, { "epoch": 0.6049577976479431, "grad_norm": 0.81365600960788, "learning_rate": 3.5633566382609896e-06, "loss": 0.4164, "step": 9658 }, { "epoch": 0.6050204356472855, "grad_norm": 0.826857984067531, "learning_rate": 3.5623850442858176e-06, "loss": 0.4417, "step": 9659 }, { "epoch": 0.6050830736466277, "grad_norm": 0.794450685166045, "learning_rate": 3.5614135094831443e-06, "loss": 0.3676, "step": 9660 }, { "epoch": 0.60514571164597, "grad_norm": 0.6659454984540912, "learning_rate": 3.560442033892958e-06, "loss": 0.4595, "step": 9661 }, { "epoch": 0.6052083496453123, "grad_norm": 0.6705442107669021, "learning_rate": 3.559470617555244e-06, "loss": 0.4439, "step": 9662 }, { "epoch": 0.6052709876446546, "grad_norm": 0.8225150113328279, "learning_rate": 3.5584992605099864e-06, "loss": 0.3872, "step": 9663 }, { "epoch": 0.605333625643997, "grad_norm": 0.8666271635703626, "learning_rate": 3.557527962797167e-06, "loss": 0.3891, "step": 9664 }, { "epoch": 0.6053962636433392, "grad_norm": 0.8608107915849893, "learning_rate": 3.5565567244567656e-06, "loss": 0.3947, "step": 9665 }, { "epoch": 0.6054589016426816, "grad_norm": 0.7958299310308541, "learning_rate": 3.5555855455287576e-06, "loss": 0.3673, "step": 9666 }, { "epoch": 0.6055215396420238, "grad_norm": 0.8091326280492803, "learning_rate": 3.5546144260531164e-06, "loss": 0.4318, "step": 9667 }, { "epoch": 0.6055841776413662, "grad_norm": 0.7996416482459321, "learning_rate": 3.553643366069812e-06, "loss": 0.4065, "step": 9668 }, { "epoch": 0.6056468156407084, "grad_norm": 0.8275159691540213, "learning_rate": 3.5526723656188168e-06, "loss": 0.4621, "step": 9669 }, { "epoch": 0.6057094536400507, "grad_norm": 0.8608102874422148, "learning_rate": 3.551701424740096e-06, "loss": 0.4262, "step": 9670 }, { "epoch": 0.605772091639393, "grad_norm": 0.8071048780718468, "learning_rate": 3.550730543473612e-06, "loss": 0.392, "step": 9671 }, { "epoch": 0.6058347296387353, "grad_norm": 0.9783096324787565, "learning_rate": 3.549759721859327e-06, "loss": 0.4378, "step": 9672 }, { "epoch": 0.6058973676380777, "grad_norm": 0.8873433301721682, "learning_rate": 3.5487889599372022e-06, "loss": 0.3781, "step": 9673 }, { "epoch": 0.6059600056374199, "grad_norm": 0.9146620930280432, "learning_rate": 3.547818257747194e-06, "loss": 0.4011, "step": 9674 }, { "epoch": 0.6060226436367623, "grad_norm": 0.8611045308575306, "learning_rate": 3.5468476153292554e-06, "loss": 0.4009, "step": 9675 }, { "epoch": 0.6060852816361045, "grad_norm": 0.8390794536313446, "learning_rate": 3.5458770327233377e-06, "loss": 0.3869, "step": 9676 }, { "epoch": 0.6061479196354469, "grad_norm": 0.8065591163643271, "learning_rate": 3.5449065099693923e-06, "loss": 0.3869, "step": 9677 }, { "epoch": 0.6062105576347891, "grad_norm": 0.8088138973754808, "learning_rate": 3.5439360471073653e-06, "loss": 0.3627, "step": 9678 }, { "epoch": 0.6062731956341314, "grad_norm": 0.8489917489202126, "learning_rate": 3.542965644177201e-06, "loss": 0.3984, "step": 9679 }, { "epoch": 0.6063358336334738, "grad_norm": 0.8439624694958294, "learning_rate": 3.541995301218839e-06, "loss": 0.4102, "step": 9680 }, { "epoch": 0.606398471632816, "grad_norm": 0.7483815627521515, "learning_rate": 3.541025018272221e-06, "loss": 0.3472, "step": 9681 }, { "epoch": 0.6064611096321584, "grad_norm": 0.8514469763952743, "learning_rate": 3.540054795377286e-06, "loss": 0.3899, "step": 9682 }, { "epoch": 0.6065237476315006, "grad_norm": 0.8301024584142481, "learning_rate": 3.5390846325739668e-06, "loss": 0.4417, "step": 9683 }, { "epoch": 0.606586385630843, "grad_norm": 0.821005871923224, "learning_rate": 3.5381145299021944e-06, "loss": 0.4291, "step": 9684 }, { "epoch": 0.6066490236301852, "grad_norm": 0.84178640778587, "learning_rate": 3.5371444874018995e-06, "loss": 0.4027, "step": 9685 }, { "epoch": 0.6067116616295275, "grad_norm": 0.741312662342523, "learning_rate": 3.5361745051130093e-06, "loss": 0.3744, "step": 9686 }, { "epoch": 0.6067742996288699, "grad_norm": 0.8310305992765353, "learning_rate": 3.535204583075449e-06, "loss": 0.4324, "step": 9687 }, { "epoch": 0.6068369376282121, "grad_norm": 0.8775238484418909, "learning_rate": 3.534234721329138e-06, "loss": 0.3909, "step": 9688 }, { "epoch": 0.6068995756275545, "grad_norm": 0.8926100381242794, "learning_rate": 3.533264919913999e-06, "loss": 0.4157, "step": 9689 }, { "epoch": 0.6069622136268967, "grad_norm": 0.8708117373108819, "learning_rate": 3.532295178869946e-06, "loss": 0.4008, "step": 9690 }, { "epoch": 0.6070248516262391, "grad_norm": 0.783375585200903, "learning_rate": 3.531325498236898e-06, "loss": 0.3504, "step": 9691 }, { "epoch": 0.6070874896255813, "grad_norm": 0.8302752484203094, "learning_rate": 3.5303558780547653e-06, "loss": 0.398, "step": 9692 }, { "epoch": 0.6071501276249237, "grad_norm": 0.8277044068845071, "learning_rate": 3.529386318363458e-06, "loss": 0.3979, "step": 9693 }, { "epoch": 0.607212765624266, "grad_norm": 0.8066586863539971, "learning_rate": 3.528416819202881e-06, "loss": 0.3977, "step": 9694 }, { "epoch": 0.6072754036236082, "grad_norm": 0.8712630731101312, "learning_rate": 3.5274473806129417e-06, "loss": 0.4479, "step": 9695 }, { "epoch": 0.6073380416229506, "grad_norm": 0.8449047043372487, "learning_rate": 3.526478002633542e-06, "loss": 0.3775, "step": 9696 }, { "epoch": 0.6074006796222928, "grad_norm": 0.8321718492149762, "learning_rate": 3.5255086853045804e-06, "loss": 0.4157, "step": 9697 }, { "epoch": 0.6074633176216352, "grad_norm": 0.8289154611929592, "learning_rate": 3.5245394286659557e-06, "loss": 0.4036, "step": 9698 }, { "epoch": 0.6075259556209774, "grad_norm": 0.8126482990241076, "learning_rate": 3.5235702327575617e-06, "loss": 0.3979, "step": 9699 }, { "epoch": 0.6075885936203198, "grad_norm": 0.901266378226136, "learning_rate": 3.522601097619289e-06, "loss": 0.4001, "step": 9700 }, { "epoch": 0.6076512316196621, "grad_norm": 0.8097399479566938, "learning_rate": 3.5216320232910317e-06, "loss": 0.3812, "step": 9701 }, { "epoch": 0.6077138696190044, "grad_norm": 0.8471664499750191, "learning_rate": 3.520663009812674e-06, "loss": 0.4252, "step": 9702 }, { "epoch": 0.6077765076183467, "grad_norm": 0.779155122475151, "learning_rate": 3.519694057224102e-06, "loss": 0.3852, "step": 9703 }, { "epoch": 0.6078391456176889, "grad_norm": 0.7889769241469368, "learning_rate": 3.518725165565197e-06, "loss": 0.3958, "step": 9704 }, { "epoch": 0.6079017836170313, "grad_norm": 0.8423295741698041, "learning_rate": 3.517756334875839e-06, "loss": 0.3957, "step": 9705 }, { "epoch": 0.6079644216163735, "grad_norm": 0.813358978200895, "learning_rate": 3.5167875651959056e-06, "loss": 0.3738, "step": 9706 }, { "epoch": 0.6080270596157159, "grad_norm": 0.7812316642271944, "learning_rate": 3.5158188565652718e-06, "loss": 0.3311, "step": 9707 }, { "epoch": 0.6080896976150582, "grad_norm": 0.8900615245155519, "learning_rate": 3.514850209023809e-06, "loss": 0.4282, "step": 9708 }, { "epoch": 0.6081523356144005, "grad_norm": 0.9395384790120399, "learning_rate": 3.513881622611385e-06, "loss": 0.3772, "step": 9709 }, { "epoch": 0.6082149736137428, "grad_norm": 0.8054092091032726, "learning_rate": 3.5129130973678717e-06, "loss": 0.4279, "step": 9710 }, { "epoch": 0.608277611613085, "grad_norm": 0.7878079690640529, "learning_rate": 3.51194463333313e-06, "loss": 0.3728, "step": 9711 }, { "epoch": 0.6083402496124274, "grad_norm": 0.9903094437931644, "learning_rate": 3.5109762305470248e-06, "loss": 0.4343, "step": 9712 }, { "epoch": 0.6084028876117696, "grad_norm": 0.6949756107440873, "learning_rate": 3.5100078890494138e-06, "loss": 0.4598, "step": 9713 }, { "epoch": 0.608465525611112, "grad_norm": 0.8115056853472925, "learning_rate": 3.5090396088801544e-06, "loss": 0.3642, "step": 9714 }, { "epoch": 0.6085281636104543, "grad_norm": 0.8034507825936942, "learning_rate": 3.508071390079102e-06, "loss": 0.4007, "step": 9715 }, { "epoch": 0.6085908016097966, "grad_norm": 0.8394936564828512, "learning_rate": 3.507103232686108e-06, "loss": 0.3964, "step": 9716 }, { "epoch": 0.6086534396091389, "grad_norm": 0.8416980489230802, "learning_rate": 3.5061351367410223e-06, "loss": 0.3744, "step": 9717 }, { "epoch": 0.6087160776084812, "grad_norm": 0.8060512105231278, "learning_rate": 3.5051671022836893e-06, "loss": 0.3544, "step": 9718 }, { "epoch": 0.6087787156078235, "grad_norm": 0.793205456931171, "learning_rate": 3.504199129353957e-06, "loss": 0.3778, "step": 9719 }, { "epoch": 0.6088413536071657, "grad_norm": 0.9596707205192185, "learning_rate": 3.503231217991667e-06, "loss": 0.4431, "step": 9720 }, { "epoch": 0.6089039916065081, "grad_norm": 0.7997533123595434, "learning_rate": 3.502263368236658e-06, "loss": 0.3877, "step": 9721 }, { "epoch": 0.6089666296058504, "grad_norm": 0.7958846859366928, "learning_rate": 3.5012955801287675e-06, "loss": 0.3412, "step": 9722 }, { "epoch": 0.6090292676051927, "grad_norm": 0.8387820941012577, "learning_rate": 3.5003278537078268e-06, "loss": 0.3649, "step": 9723 }, { "epoch": 0.609091905604535, "grad_norm": 0.8814341978136442, "learning_rate": 3.4993601890136718e-06, "loss": 0.397, "step": 9724 }, { "epoch": 0.6091545436038773, "grad_norm": 0.7882195860513179, "learning_rate": 3.49839258608613e-06, "loss": 0.3774, "step": 9725 }, { "epoch": 0.6092171816032196, "grad_norm": 0.8018303317534629, "learning_rate": 3.4974250449650276e-06, "loss": 0.4038, "step": 9726 }, { "epoch": 0.609279819602562, "grad_norm": 0.7836940251136513, "learning_rate": 3.496457565690188e-06, "loss": 0.3601, "step": 9727 }, { "epoch": 0.6093424576019042, "grad_norm": 0.8911560696852069, "learning_rate": 3.4954901483014337e-06, "loss": 0.4104, "step": 9728 }, { "epoch": 0.6094050956012464, "grad_norm": 0.796598012138569, "learning_rate": 3.494522792838586e-06, "loss": 0.334, "step": 9729 }, { "epoch": 0.6094677336005888, "grad_norm": 0.827075507452092, "learning_rate": 3.49355549934146e-06, "loss": 0.3617, "step": 9730 }, { "epoch": 0.6095303715999311, "grad_norm": 0.7846067967712754, "learning_rate": 3.492588267849869e-06, "loss": 0.3688, "step": 9731 }, { "epoch": 0.6095930095992734, "grad_norm": 0.8239232324498017, "learning_rate": 3.491621098403623e-06, "loss": 0.4023, "step": 9732 }, { "epoch": 0.6096556475986157, "grad_norm": 0.7741889871765422, "learning_rate": 3.490653991042534e-06, "loss": 0.3865, "step": 9733 }, { "epoch": 0.609718285597958, "grad_norm": 0.8413030309448518, "learning_rate": 3.4896869458064073e-06, "loss": 0.3691, "step": 9734 }, { "epoch": 0.6097809235973003, "grad_norm": 0.8381235624660623, "learning_rate": 3.488719962735045e-06, "loss": 0.3937, "step": 9735 }, { "epoch": 0.6098435615966427, "grad_norm": 0.8722923140820434, "learning_rate": 3.487753041868249e-06, "loss": 0.3834, "step": 9736 }, { "epoch": 0.6099061995959849, "grad_norm": 0.75835529682703, "learning_rate": 3.4867861832458193e-06, "loss": 0.3982, "step": 9737 }, { "epoch": 0.6099688375953272, "grad_norm": 0.8436492909756026, "learning_rate": 3.4858193869075517e-06, "loss": 0.3969, "step": 9738 }, { "epoch": 0.6100314755946695, "grad_norm": 0.83079520168407, "learning_rate": 3.484852652893239e-06, "loss": 0.3997, "step": 9739 }, { "epoch": 0.6100941135940118, "grad_norm": 0.8939683514050346, "learning_rate": 3.4838859812426716e-06, "loss": 0.4672, "step": 9740 }, { "epoch": 0.6101567515933541, "grad_norm": 0.8964803948579952, "learning_rate": 3.48291937199564e-06, "loss": 0.3674, "step": 9741 }, { "epoch": 0.6102193895926964, "grad_norm": 0.8051854408046961, "learning_rate": 3.4819528251919277e-06, "loss": 0.4205, "step": 9742 }, { "epoch": 0.6102820275920388, "grad_norm": 0.8953496634927135, "learning_rate": 3.4809863408713197e-06, "loss": 0.4459, "step": 9743 }, { "epoch": 0.610344665591381, "grad_norm": 0.8976732073609286, "learning_rate": 3.4800199190735945e-06, "loss": 0.413, "step": 9744 }, { "epoch": 0.6104073035907233, "grad_norm": 0.9099482947022447, "learning_rate": 3.479053559838532e-06, "loss": 0.4087, "step": 9745 }, { "epoch": 0.6104699415900656, "grad_norm": 0.8701921808527087, "learning_rate": 3.478087263205906e-06, "loss": 0.3799, "step": 9746 }, { "epoch": 0.6105325795894079, "grad_norm": 0.6090944986162163, "learning_rate": 3.4771210292154933e-06, "loss": 0.4405, "step": 9747 }, { "epoch": 0.6105952175887502, "grad_norm": 0.8407902343623279, "learning_rate": 3.4761548579070604e-06, "loss": 0.3877, "step": 9748 }, { "epoch": 0.6106578555880925, "grad_norm": 0.7980849310418586, "learning_rate": 3.475188749320376e-06, "loss": 0.4005, "step": 9749 }, { "epoch": 0.6107204935874349, "grad_norm": 0.8463628861542035, "learning_rate": 3.474222703495207e-06, "loss": 0.4081, "step": 9750 }, { "epoch": 0.6107831315867771, "grad_norm": 0.809815687031824, "learning_rate": 3.473256720471314e-06, "loss": 0.3634, "step": 9751 }, { "epoch": 0.6108457695861195, "grad_norm": 0.8405927264955733, "learning_rate": 3.4722908002884586e-06, "loss": 0.4007, "step": 9752 }, { "epoch": 0.6109084075854617, "grad_norm": 0.9073940825472115, "learning_rate": 3.471324942986396e-06, "loss": 0.4463, "step": 9753 }, { "epoch": 0.610971045584804, "grad_norm": 0.7741343752390233, "learning_rate": 3.470359148604884e-06, "loss": 0.383, "step": 9754 }, { "epoch": 0.6110336835841463, "grad_norm": 0.9289339404935111, "learning_rate": 3.46939341718367e-06, "loss": 0.4107, "step": 9755 }, { "epoch": 0.6110963215834886, "grad_norm": 0.7692906162614732, "learning_rate": 3.46842774876251e-06, "loss": 0.3656, "step": 9756 }, { "epoch": 0.611158959582831, "grad_norm": 0.7994154726786582, "learning_rate": 3.4674621433811486e-06, "loss": 0.352, "step": 9757 }, { "epoch": 0.6112215975821732, "grad_norm": 0.8514407688802049, "learning_rate": 3.4664966010793276e-06, "loss": 0.3551, "step": 9758 }, { "epoch": 0.6112842355815156, "grad_norm": 0.6383338215697982, "learning_rate": 3.465531121896793e-06, "loss": 0.4681, "step": 9759 }, { "epoch": 0.6113468735808578, "grad_norm": 0.9173296376843815, "learning_rate": 3.464565705873282e-06, "loss": 0.4176, "step": 9760 }, { "epoch": 0.6114095115802002, "grad_norm": 0.772579411865578, "learning_rate": 3.463600353048531e-06, "loss": 0.377, "step": 9761 }, { "epoch": 0.6114721495795424, "grad_norm": 0.8112864470059851, "learning_rate": 3.4626350634622745e-06, "loss": 0.4, "step": 9762 }, { "epoch": 0.6115347875788847, "grad_norm": 0.8270597228309834, "learning_rate": 3.461669837154245e-06, "loss": 0.3935, "step": 9763 }, { "epoch": 0.611597425578227, "grad_norm": 0.8346252141836049, "learning_rate": 3.460704674164168e-06, "loss": 0.3841, "step": 9764 }, { "epoch": 0.6116600635775693, "grad_norm": 0.861275895043666, "learning_rate": 3.459739574531775e-06, "loss": 0.3823, "step": 9765 }, { "epoch": 0.6117227015769117, "grad_norm": 0.6054865180271968, "learning_rate": 3.458774538296785e-06, "loss": 0.4659, "step": 9766 }, { "epoch": 0.6117853395762539, "grad_norm": 0.9211374929087534, "learning_rate": 3.457809565498923e-06, "loss": 0.4127, "step": 9767 }, { "epoch": 0.6118479775755963, "grad_norm": 0.8506401059648963, "learning_rate": 3.4568446561779056e-06, "loss": 0.4438, "step": 9768 }, { "epoch": 0.6119106155749385, "grad_norm": 0.7798453847900142, "learning_rate": 3.455879810373448e-06, "loss": 0.3523, "step": 9769 }, { "epoch": 0.6119732535742808, "grad_norm": 0.823302625321148, "learning_rate": 3.4549150281252635e-06, "loss": 0.4141, "step": 9770 }, { "epoch": 0.6120358915736231, "grad_norm": 0.8569296759908261, "learning_rate": 3.453950309473064e-06, "loss": 0.4123, "step": 9771 }, { "epoch": 0.6120985295729654, "grad_norm": 0.8316559765606508, "learning_rate": 3.4529856544565563e-06, "loss": 0.3956, "step": 9772 }, { "epoch": 0.6121611675723078, "grad_norm": 0.7800614913423736, "learning_rate": 3.452021063115445e-06, "loss": 0.3948, "step": 9773 }, { "epoch": 0.61222380557165, "grad_norm": 0.768378489870306, "learning_rate": 3.4510565354894354e-06, "loss": 0.3969, "step": 9774 }, { "epoch": 0.6122864435709924, "grad_norm": 0.8153828900095746, "learning_rate": 3.4500920716182256e-06, "loss": 0.4374, "step": 9775 }, { "epoch": 0.6123490815703346, "grad_norm": 0.818873972060771, "learning_rate": 3.4491276715415143e-06, "loss": 0.4021, "step": 9776 }, { "epoch": 0.612411719569677, "grad_norm": 0.8403618231351219, "learning_rate": 3.4481633352989967e-06, "loss": 0.405, "step": 9777 }, { "epoch": 0.6124743575690192, "grad_norm": 0.7867535802352226, "learning_rate": 3.447199062930363e-06, "loss": 0.3726, "step": 9778 }, { "epoch": 0.6125369955683615, "grad_norm": 0.8195034074801414, "learning_rate": 3.4462348544753055e-06, "loss": 0.3938, "step": 9779 }, { "epoch": 0.6125996335677039, "grad_norm": 0.8191474642654354, "learning_rate": 3.4452707099735087e-06, "loss": 0.3793, "step": 9780 }, { "epoch": 0.6126622715670461, "grad_norm": 0.8160393267360601, "learning_rate": 3.4443066294646586e-06, "loss": 0.3821, "step": 9781 }, { "epoch": 0.6127249095663885, "grad_norm": 0.8263534860563171, "learning_rate": 3.4433426129884346e-06, "loss": 0.3805, "step": 9782 }, { "epoch": 0.6127875475657307, "grad_norm": 0.7435804736681602, "learning_rate": 3.4423786605845196e-06, "loss": 0.3274, "step": 9783 }, { "epoch": 0.6128501855650731, "grad_norm": 0.8565792561244718, "learning_rate": 3.4414147722925868e-06, "loss": 0.3921, "step": 9784 }, { "epoch": 0.6129128235644153, "grad_norm": 0.7861873231027915, "learning_rate": 3.4404509481523116e-06, "loss": 0.3331, "step": 9785 }, { "epoch": 0.6129754615637577, "grad_norm": 0.9022709359352509, "learning_rate": 3.439487188203366e-06, "loss": 0.4186, "step": 9786 }, { "epoch": 0.6130380995631, "grad_norm": 0.8605052890970081, "learning_rate": 3.4385234924854162e-06, "loss": 0.35, "step": 9787 }, { "epoch": 0.6131007375624422, "grad_norm": 0.8799264511403584, "learning_rate": 3.43755986103813e-06, "loss": 0.3995, "step": 9788 }, { "epoch": 0.6131633755617846, "grad_norm": 0.8117983528180653, "learning_rate": 3.43659629390117e-06, "loss": 0.3752, "step": 9789 }, { "epoch": 0.6132260135611268, "grad_norm": 0.8338358434617744, "learning_rate": 3.4356327911141964e-06, "loss": 0.3622, "step": 9790 }, { "epoch": 0.6132886515604692, "grad_norm": 0.8434852293544977, "learning_rate": 3.434669352716866e-06, "loss": 0.3838, "step": 9791 }, { "epoch": 0.6133512895598114, "grad_norm": 0.8421216084795262, "learning_rate": 3.4337059787488357e-06, "loss": 0.3999, "step": 9792 }, { "epoch": 0.6134139275591538, "grad_norm": 0.748796404298811, "learning_rate": 3.4327426692497594e-06, "loss": 0.3896, "step": 9793 }, { "epoch": 0.6134765655584961, "grad_norm": 0.9601961324964248, "learning_rate": 3.431779424259286e-06, "loss": 0.4153, "step": 9794 }, { "epoch": 0.6135392035578383, "grad_norm": 0.8540534781725708, "learning_rate": 3.430816243817063e-06, "loss": 0.3949, "step": 9795 }, { "epoch": 0.6136018415571807, "grad_norm": 0.8832530690395711, "learning_rate": 3.429853127962733e-06, "loss": 0.4061, "step": 9796 }, { "epoch": 0.6136644795565229, "grad_norm": 0.885884383925136, "learning_rate": 3.4288900767359407e-06, "loss": 0.4101, "step": 9797 }, { "epoch": 0.6137271175558653, "grad_norm": 0.8450733179760356, "learning_rate": 3.4279270901763244e-06, "loss": 0.3763, "step": 9798 }, { "epoch": 0.6137897555552075, "grad_norm": 0.8476878656920313, "learning_rate": 3.42696416832352e-06, "loss": 0.4205, "step": 9799 }, { "epoch": 0.6138523935545499, "grad_norm": 0.6192656166685508, "learning_rate": 3.4260013112171637e-06, "loss": 0.4575, "step": 9800 }, { "epoch": 0.6139150315538922, "grad_norm": 0.813232081996365, "learning_rate": 3.4250385188968834e-06, "loss": 0.3875, "step": 9801 }, { "epoch": 0.6139776695532345, "grad_norm": 0.8972722056870552, "learning_rate": 3.4240757914023113e-06, "loss": 0.4396, "step": 9802 }, { "epoch": 0.6140403075525768, "grad_norm": 0.8330601475518499, "learning_rate": 3.4231131287730735e-06, "loss": 0.3695, "step": 9803 }, { "epoch": 0.614102945551919, "grad_norm": 0.9572819233708229, "learning_rate": 3.42215053104879e-06, "loss": 0.4265, "step": 9804 }, { "epoch": 0.6141655835512614, "grad_norm": 0.8455706801053025, "learning_rate": 3.421187998269085e-06, "loss": 0.3605, "step": 9805 }, { "epoch": 0.6142282215506036, "grad_norm": 0.850183752106454, "learning_rate": 3.4202255304735744e-06, "loss": 0.3847, "step": 9806 }, { "epoch": 0.614290859549946, "grad_norm": 0.839822619250771, "learning_rate": 3.419263127701875e-06, "loss": 0.3652, "step": 9807 }, { "epoch": 0.6143534975492883, "grad_norm": 0.8893791385669508, "learning_rate": 3.4183007899935973e-06, "loss": 0.373, "step": 9808 }, { "epoch": 0.6144161355486306, "grad_norm": 0.7949443255504632, "learning_rate": 3.4173385173883533e-06, "loss": 0.3932, "step": 9809 }, { "epoch": 0.6144787735479729, "grad_norm": 0.8698944418121313, "learning_rate": 3.416376309925748e-06, "loss": 0.3688, "step": 9810 }, { "epoch": 0.6145414115473152, "grad_norm": 0.8660315789379274, "learning_rate": 3.41541416764539e-06, "loss": 0.3976, "step": 9811 }, { "epoch": 0.6146040495466575, "grad_norm": 0.928227206441588, "learning_rate": 3.4144520905868784e-06, "loss": 0.4751, "step": 9812 }, { "epoch": 0.6146666875459997, "grad_norm": 0.897221648515523, "learning_rate": 3.4134900787898123e-06, "loss": 0.3865, "step": 9813 }, { "epoch": 0.6147293255453421, "grad_norm": 0.7725722913758315, "learning_rate": 3.4125281322937903e-06, "loss": 0.3469, "step": 9814 }, { "epoch": 0.6147919635446844, "grad_norm": 0.8474026198136682, "learning_rate": 3.411566251138405e-06, "loss": 0.3785, "step": 9815 }, { "epoch": 0.6148546015440267, "grad_norm": 0.8273386137884132, "learning_rate": 3.4106044353632472e-06, "loss": 0.3655, "step": 9816 }, { "epoch": 0.614917239543369, "grad_norm": 0.8414422434669443, "learning_rate": 3.409642685007905e-06, "loss": 0.4147, "step": 9817 }, { "epoch": 0.6149798775427113, "grad_norm": 0.8101820259974948, "learning_rate": 3.4086810001119665e-06, "loss": 0.385, "step": 9818 }, { "epoch": 0.6150425155420536, "grad_norm": 0.6749869991724371, "learning_rate": 3.4077193807150106e-06, "loss": 0.4567, "step": 9819 }, { "epoch": 0.6151051535413958, "grad_norm": 0.8689103163520406, "learning_rate": 3.4067578268566225e-06, "loss": 0.3922, "step": 9820 }, { "epoch": 0.6151677915407382, "grad_norm": 0.9591017886200244, "learning_rate": 3.4057963385763794e-06, "loss": 0.4403, "step": 9821 }, { "epoch": 0.6152304295400804, "grad_norm": 0.8271424137490421, "learning_rate": 3.4048349159138525e-06, "loss": 0.391, "step": 9822 }, { "epoch": 0.6152930675394228, "grad_norm": 0.8195170470142639, "learning_rate": 3.403873558908618e-06, "loss": 0.3856, "step": 9823 }, { "epoch": 0.6153557055387651, "grad_norm": 0.8742728750940196, "learning_rate": 3.4029122676002433e-06, "loss": 0.424, "step": 9824 }, { "epoch": 0.6154183435381074, "grad_norm": 0.8263266881430765, "learning_rate": 3.4019510420282963e-06, "loss": 0.3658, "step": 9825 }, { "epoch": 0.6154809815374497, "grad_norm": 0.8543237134524705, "learning_rate": 3.4009898822323416e-06, "loss": 0.3842, "step": 9826 }, { "epoch": 0.615543619536792, "grad_norm": 0.8503644826461033, "learning_rate": 3.40002878825194e-06, "loss": 0.4107, "step": 9827 }, { "epoch": 0.6156062575361343, "grad_norm": 0.8186109639502599, "learning_rate": 3.3990677601266485e-06, "loss": 0.387, "step": 9828 }, { "epoch": 0.6156688955354765, "grad_norm": 0.8431883508435931, "learning_rate": 3.398106797896027e-06, "loss": 0.4021, "step": 9829 }, { "epoch": 0.6157315335348189, "grad_norm": 0.7914854700200612, "learning_rate": 3.3971459015996265e-06, "loss": 0.3888, "step": 9830 }, { "epoch": 0.6157941715341612, "grad_norm": 0.8638886214791687, "learning_rate": 3.3961850712769988e-06, "loss": 0.3768, "step": 9831 }, { "epoch": 0.6158568095335035, "grad_norm": 0.8657727411900771, "learning_rate": 3.3952243069676915e-06, "loss": 0.3773, "step": 9832 }, { "epoch": 0.6159194475328458, "grad_norm": 0.8291463856658744, "learning_rate": 3.394263608711249e-06, "loss": 0.3811, "step": 9833 }, { "epoch": 0.6159820855321881, "grad_norm": 0.7933157196735786, "learning_rate": 3.393302976547214e-06, "loss": 0.3631, "step": 9834 }, { "epoch": 0.6160447235315304, "grad_norm": 0.8632717677115246, "learning_rate": 3.392342410515128e-06, "loss": 0.4322, "step": 9835 }, { "epoch": 0.6161073615308728, "grad_norm": 0.9330233290942278, "learning_rate": 3.3913819106545266e-06, "loss": 0.3922, "step": 9836 }, { "epoch": 0.616169999530215, "grad_norm": 0.8143436198484681, "learning_rate": 3.3904214770049438e-06, "loss": 0.3739, "step": 9837 }, { "epoch": 0.6162326375295573, "grad_norm": 0.8653744572845068, "learning_rate": 3.3894611096059095e-06, "loss": 0.3887, "step": 9838 }, { "epoch": 0.6162952755288996, "grad_norm": 0.8883004829777138, "learning_rate": 3.388500808496956e-06, "loss": 0.3915, "step": 9839 }, { "epoch": 0.6163579135282419, "grad_norm": 0.8001032084514366, "learning_rate": 3.3875405737176104e-06, "loss": 0.3631, "step": 9840 }, { "epoch": 0.6164205515275842, "grad_norm": 0.8573803294308272, "learning_rate": 3.3865804053073936e-06, "loss": 0.4069, "step": 9841 }, { "epoch": 0.6164831895269265, "grad_norm": 0.7910096375913158, "learning_rate": 3.3856203033058266e-06, "loss": 0.4124, "step": 9842 }, { "epoch": 0.6165458275262689, "grad_norm": 0.8369616395904045, "learning_rate": 3.384660267752426e-06, "loss": 0.4107, "step": 9843 }, { "epoch": 0.6166084655256111, "grad_norm": 0.8370660730477724, "learning_rate": 3.3837002986867103e-06, "loss": 0.3986, "step": 9844 }, { "epoch": 0.6166711035249535, "grad_norm": 0.8574289878464181, "learning_rate": 3.38274039614819e-06, "loss": 0.4094, "step": 9845 }, { "epoch": 0.6167337415242957, "grad_norm": 0.7461478505018856, "learning_rate": 3.381780560176374e-06, "loss": 0.3653, "step": 9846 }, { "epoch": 0.616796379523638, "grad_norm": 0.8200672930525423, "learning_rate": 3.380820790810771e-06, "loss": 0.3734, "step": 9847 }, { "epoch": 0.6168590175229803, "grad_norm": 0.8265763979899611, "learning_rate": 3.3798610880908844e-06, "loss": 0.3842, "step": 9848 }, { "epoch": 0.6169216555223226, "grad_norm": 0.8405813155367117, "learning_rate": 3.3789014520562173e-06, "loss": 0.4053, "step": 9849 }, { "epoch": 0.616984293521665, "grad_norm": 0.8772164237560036, "learning_rate": 3.3779418827462673e-06, "loss": 0.3836, "step": 9850 }, { "epoch": 0.6170469315210072, "grad_norm": 0.8184853950065766, "learning_rate": 3.3769823802005296e-06, "loss": 0.3788, "step": 9851 }, { "epoch": 0.6171095695203496, "grad_norm": 0.8256907054701123, "learning_rate": 3.376022944458499e-06, "loss": 0.3726, "step": 9852 }, { "epoch": 0.6171722075196918, "grad_norm": 0.8819572439533047, "learning_rate": 3.3750635755596662e-06, "loss": 0.4013, "step": 9853 }, { "epoch": 0.6172348455190341, "grad_norm": 0.8161390232724338, "learning_rate": 3.3741042735435175e-06, "loss": 0.4034, "step": 9854 }, { "epoch": 0.6172974835183764, "grad_norm": 0.8990926843818193, "learning_rate": 3.373145038449538e-06, "loss": 0.4158, "step": 9855 }, { "epoch": 0.6173601215177187, "grad_norm": 0.845699986176942, "learning_rate": 3.3721858703172095e-06, "loss": 0.3974, "step": 9856 }, { "epoch": 0.617422759517061, "grad_norm": 0.8967685593296006, "learning_rate": 3.3712267691860144e-06, "loss": 0.4273, "step": 9857 }, { "epoch": 0.6174853975164033, "grad_norm": 0.858282957107384, "learning_rate": 3.370267735095428e-06, "loss": 0.41, "step": 9858 }, { "epoch": 0.6175480355157457, "grad_norm": 0.7982285223854865, "learning_rate": 3.3693087680849245e-06, "loss": 0.3713, "step": 9859 }, { "epoch": 0.6176106735150879, "grad_norm": 0.8446681725164991, "learning_rate": 3.368349868193974e-06, "loss": 0.3636, "step": 9860 }, { "epoch": 0.6176733115144303, "grad_norm": 0.8486723909698393, "learning_rate": 3.3673910354620467e-06, "loss": 0.3955, "step": 9861 }, { "epoch": 0.6177359495137725, "grad_norm": 0.8192960363856093, "learning_rate": 3.3664322699286066e-06, "loss": 0.3668, "step": 9862 }, { "epoch": 0.6177985875131148, "grad_norm": 0.7973238609798827, "learning_rate": 3.365473571633117e-06, "loss": 0.3725, "step": 9863 }, { "epoch": 0.6178612255124571, "grad_norm": 0.8399492613833552, "learning_rate": 3.3645149406150396e-06, "loss": 0.4073, "step": 9864 }, { "epoch": 0.6179238635117994, "grad_norm": 0.8213249134769567, "learning_rate": 3.363556376913829e-06, "loss": 0.3762, "step": 9865 }, { "epoch": 0.6179865015111418, "grad_norm": 0.8162755310062095, "learning_rate": 3.362597880568943e-06, "loss": 0.385, "step": 9866 }, { "epoch": 0.618049139510484, "grad_norm": 0.7960218024942821, "learning_rate": 3.3616394516198327e-06, "loss": 0.3862, "step": 9867 }, { "epoch": 0.6181117775098264, "grad_norm": 0.8536182542037762, "learning_rate": 3.360681090105946e-06, "loss": 0.3762, "step": 9868 }, { "epoch": 0.6181744155091686, "grad_norm": 0.6395996224597477, "learning_rate": 3.35972279606673e-06, "loss": 0.4411, "step": 9869 }, { "epoch": 0.618237053508511, "grad_norm": 0.6226802049427427, "learning_rate": 3.358764569541628e-06, "loss": 0.4721, "step": 9870 }, { "epoch": 0.6182996915078532, "grad_norm": 0.9139862678085126, "learning_rate": 3.357806410570082e-06, "loss": 0.4363, "step": 9871 }, { "epoch": 0.6183623295071955, "grad_norm": 0.9574181443844422, "learning_rate": 3.3568483191915275e-06, "loss": 0.4384, "step": 9872 }, { "epoch": 0.6184249675065379, "grad_norm": 0.8833154305570581, "learning_rate": 3.355890295445402e-06, "loss": 0.3622, "step": 9873 }, { "epoch": 0.6184876055058801, "grad_norm": 0.9316008924005139, "learning_rate": 3.3549323393711376e-06, "loss": 0.4401, "step": 9874 }, { "epoch": 0.6185502435052225, "grad_norm": 0.8546987274602134, "learning_rate": 3.3539744510081605e-06, "loss": 0.3805, "step": 9875 }, { "epoch": 0.6186128815045647, "grad_norm": 0.8425493027908019, "learning_rate": 3.3530166303959035e-06, "loss": 0.4087, "step": 9876 }, { "epoch": 0.6186755195039071, "grad_norm": 0.8738511495886717, "learning_rate": 3.3520588775737863e-06, "loss": 0.3778, "step": 9877 }, { "epoch": 0.6187381575032493, "grad_norm": 0.7576205690570722, "learning_rate": 3.351101192581232e-06, "loss": 0.3642, "step": 9878 }, { "epoch": 0.6188007955025916, "grad_norm": 0.8685669070624663, "learning_rate": 3.3501435754576596e-06, "loss": 0.4387, "step": 9879 }, { "epoch": 0.618863433501934, "grad_norm": 0.9078648775024661, "learning_rate": 3.3491860262424835e-06, "loss": 0.4138, "step": 9880 }, { "epoch": 0.6189260715012762, "grad_norm": 0.8404686276862937, "learning_rate": 3.3482285449751164e-06, "loss": 0.3849, "step": 9881 }, { "epoch": 0.6189887095006186, "grad_norm": 0.8435397974269538, "learning_rate": 3.347271131694969e-06, "loss": 0.4142, "step": 9882 }, { "epoch": 0.6190513474999608, "grad_norm": 0.8141369389217802, "learning_rate": 3.3463137864414493e-06, "loss": 0.4115, "step": 9883 }, { "epoch": 0.6191139854993032, "grad_norm": 0.7876462654840778, "learning_rate": 3.3453565092539586e-06, "loss": 0.3589, "step": 9884 }, { "epoch": 0.6191766234986454, "grad_norm": 0.8612849686777218, "learning_rate": 3.3443993001719033e-06, "loss": 0.3755, "step": 9885 }, { "epoch": 0.6192392614979878, "grad_norm": 0.8848200634263593, "learning_rate": 3.3434421592346787e-06, "loss": 0.4262, "step": 9886 }, { "epoch": 0.61930189949733, "grad_norm": 0.8326777078869274, "learning_rate": 3.3424850864816844e-06, "loss": 0.396, "step": 9887 }, { "epoch": 0.6193645374966723, "grad_norm": 0.8525793608778848, "learning_rate": 3.341528081952311e-06, "loss": 0.3877, "step": 9888 }, { "epoch": 0.6194271754960147, "grad_norm": 0.9076697227494595, "learning_rate": 3.3405711456859485e-06, "loss": 0.4054, "step": 9889 }, { "epoch": 0.6194898134953569, "grad_norm": 0.8361560390713397, "learning_rate": 3.3396142777219865e-06, "loss": 0.3594, "step": 9890 }, { "epoch": 0.6195524514946993, "grad_norm": 0.8121466919961073, "learning_rate": 3.3386574780998093e-06, "loss": 0.3324, "step": 9891 }, { "epoch": 0.6196150894940415, "grad_norm": 0.6002024281240804, "learning_rate": 3.3377007468587984e-06, "loss": 0.4407, "step": 9892 }, { "epoch": 0.6196777274933839, "grad_norm": 1.0469240291032438, "learning_rate": 3.336744084038331e-06, "loss": 0.3911, "step": 9893 }, { "epoch": 0.6197403654927262, "grad_norm": 0.8298508992020505, "learning_rate": 3.3357874896777887e-06, "loss": 0.4004, "step": 9894 }, { "epoch": 0.6198030034920685, "grad_norm": 0.867944761988687, "learning_rate": 3.334830963816541e-06, "loss": 0.3982, "step": 9895 }, { "epoch": 0.6198656414914108, "grad_norm": 0.6739956677249181, "learning_rate": 3.3338745064939604e-06, "loss": 0.4718, "step": 9896 }, { "epoch": 0.619928279490753, "grad_norm": 0.9056997535597704, "learning_rate": 3.3329181177494152e-06, "loss": 0.4035, "step": 9897 }, { "epoch": 0.6199909174900954, "grad_norm": 0.8248899974567109, "learning_rate": 3.331961797622268e-06, "loss": 0.3883, "step": 9898 }, { "epoch": 0.6200535554894376, "grad_norm": 0.8507858081009424, "learning_rate": 3.3310055461518844e-06, "loss": 0.3995, "step": 9899 }, { "epoch": 0.62011619348878, "grad_norm": 0.7582421280198478, "learning_rate": 3.330049363377621e-06, "loss": 0.3758, "step": 9900 }, { "epoch": 0.6201788314881223, "grad_norm": 0.8200717536133159, "learning_rate": 3.3290932493388373e-06, "loss": 0.3568, "step": 9901 }, { "epoch": 0.6202414694874646, "grad_norm": 0.8375968243119839, "learning_rate": 3.3281372040748826e-06, "loss": 0.4157, "step": 9902 }, { "epoch": 0.6203041074868069, "grad_norm": 0.8299064769994459, "learning_rate": 3.327181227625112e-06, "loss": 0.4091, "step": 9903 }, { "epoch": 0.6203667454861491, "grad_norm": 0.7782152287015951, "learning_rate": 3.326225320028874e-06, "loss": 0.4155, "step": 9904 }, { "epoch": 0.6204293834854915, "grad_norm": 0.8648267533317593, "learning_rate": 3.325269481325512e-06, "loss": 0.3739, "step": 9905 }, { "epoch": 0.6204920214848337, "grad_norm": 0.8009629785552789, "learning_rate": 3.3243137115543695e-06, "loss": 0.3819, "step": 9906 }, { "epoch": 0.6205546594841761, "grad_norm": 0.8224993986157951, "learning_rate": 3.323358010754784e-06, "loss": 0.4102, "step": 9907 }, { "epoch": 0.6206172974835183, "grad_norm": 0.8990799109399378, "learning_rate": 3.3224023789660954e-06, "loss": 0.3962, "step": 9908 }, { "epoch": 0.6206799354828607, "grad_norm": 0.9317754570969504, "learning_rate": 3.3214468162276364e-06, "loss": 0.4368, "step": 9909 }, { "epoch": 0.620742573482203, "grad_norm": 0.8845314067880615, "learning_rate": 3.3204913225787373e-06, "loss": 0.4181, "step": 9910 }, { "epoch": 0.6208052114815453, "grad_norm": 0.8509462147307928, "learning_rate": 3.3195358980587266e-06, "loss": 0.4081, "step": 9911 }, { "epoch": 0.6208678494808876, "grad_norm": 0.8647013022615758, "learning_rate": 3.3185805427069297e-06, "loss": 0.4085, "step": 9912 }, { "epoch": 0.6209304874802298, "grad_norm": 0.9032703720767141, "learning_rate": 3.3176252565626716e-06, "loss": 0.3753, "step": 9913 }, { "epoch": 0.6209931254795722, "grad_norm": 0.7801637228323545, "learning_rate": 3.3166700396652696e-06, "loss": 0.3826, "step": 9914 }, { "epoch": 0.6210557634789144, "grad_norm": 0.80946870756844, "learning_rate": 3.3157148920540404e-06, "loss": 0.4132, "step": 9915 }, { "epoch": 0.6211184014782568, "grad_norm": 0.819089550133005, "learning_rate": 3.3147598137683e-06, "loss": 0.4148, "step": 9916 }, { "epoch": 0.6211810394775991, "grad_norm": 0.9011478000663169, "learning_rate": 3.313804804847359e-06, "loss": 0.4302, "step": 9917 }, { "epoch": 0.6212436774769414, "grad_norm": 0.7973767786695912, "learning_rate": 3.3128498653305248e-06, "loss": 0.3596, "step": 9918 }, { "epoch": 0.6213063154762837, "grad_norm": 0.8391138271437536, "learning_rate": 3.311894995257102e-06, "loss": 0.3737, "step": 9919 }, { "epoch": 0.621368953475626, "grad_norm": 0.835202713029005, "learning_rate": 3.3109401946663954e-06, "loss": 0.4093, "step": 9920 }, { "epoch": 0.6214315914749683, "grad_norm": 0.8575829583422815, "learning_rate": 3.3099854635977024e-06, "loss": 0.4477, "step": 9921 }, { "epoch": 0.6214942294743105, "grad_norm": 0.8803012010216477, "learning_rate": 3.3090308020903226e-06, "loss": 0.3879, "step": 9922 }, { "epoch": 0.6215568674736529, "grad_norm": 0.7033472755183396, "learning_rate": 3.30807621018355e-06, "loss": 0.4683, "step": 9923 }, { "epoch": 0.6216195054729952, "grad_norm": 0.8030275886443429, "learning_rate": 3.3071216879166727e-06, "loss": 0.3959, "step": 9924 }, { "epoch": 0.6216821434723375, "grad_norm": 0.8719853820186455, "learning_rate": 3.3061672353289826e-06, "loss": 0.4067, "step": 9925 }, { "epoch": 0.6217447814716798, "grad_norm": 0.849615826305834, "learning_rate": 3.3052128524597633e-06, "loss": 0.3997, "step": 9926 }, { "epoch": 0.6218074194710221, "grad_norm": 0.8684141413183983, "learning_rate": 3.3042585393482973e-06, "loss": 0.4033, "step": 9927 }, { "epoch": 0.6218700574703644, "grad_norm": 0.8313589052574375, "learning_rate": 3.3033042960338636e-06, "loss": 0.4102, "step": 9928 }, { "epoch": 0.6219326954697066, "grad_norm": 0.8157447087843034, "learning_rate": 3.302350122555741e-06, "loss": 0.4171, "step": 9929 }, { "epoch": 0.621995333469049, "grad_norm": 0.7554451495436163, "learning_rate": 3.301396018953201e-06, "loss": 0.3575, "step": 9930 }, { "epoch": 0.6220579714683913, "grad_norm": 0.8746985056419716, "learning_rate": 3.3004419852655172e-06, "loss": 0.3866, "step": 9931 }, { "epoch": 0.6221206094677336, "grad_norm": 0.7733269984661096, "learning_rate": 3.2994880215319574e-06, "loss": 0.3675, "step": 9932 }, { "epoch": 0.6221832474670759, "grad_norm": 0.8529501986640472, "learning_rate": 3.298534127791785e-06, "loss": 0.3632, "step": 9933 }, { "epoch": 0.6222458854664182, "grad_norm": 0.7816473527738388, "learning_rate": 3.2975803040842646e-06, "loss": 0.3918, "step": 9934 }, { "epoch": 0.6223085234657605, "grad_norm": 0.8253066268017523, "learning_rate": 3.296626550448655e-06, "loss": 0.3884, "step": 9935 }, { "epoch": 0.6223711614651029, "grad_norm": 0.8751479878499975, "learning_rate": 3.295672866924212e-06, "loss": 0.4094, "step": 9936 }, { "epoch": 0.6224337994644451, "grad_norm": 0.8264341610135324, "learning_rate": 3.294719253550191e-06, "loss": 0.3846, "step": 9937 }, { "epoch": 0.6224964374637874, "grad_norm": 0.7979877710012544, "learning_rate": 3.293765710365842e-06, "loss": 0.3728, "step": 9938 }, { "epoch": 0.6225590754631297, "grad_norm": 0.9268623079455228, "learning_rate": 3.292812237410411e-06, "loss": 0.4059, "step": 9939 }, { "epoch": 0.622621713462472, "grad_norm": 0.8048249847738604, "learning_rate": 3.2918588347231473e-06, "loss": 0.3863, "step": 9940 }, { "epoch": 0.6226843514618143, "grad_norm": 0.8258769914528391, "learning_rate": 3.29090550234329e-06, "loss": 0.3632, "step": 9941 }, { "epoch": 0.6227469894611566, "grad_norm": 0.8740371750251013, "learning_rate": 3.2899522403100804e-06, "loss": 0.4008, "step": 9942 }, { "epoch": 0.622809627460499, "grad_norm": 0.8947994085635457, "learning_rate": 3.288999048662754e-06, "loss": 0.4434, "step": 9943 }, { "epoch": 0.6228722654598412, "grad_norm": 0.7433949303631712, "learning_rate": 3.288045927440544e-06, "loss": 0.3944, "step": 9944 }, { "epoch": 0.6229349034591836, "grad_norm": 0.8262809351320275, "learning_rate": 3.2870928766826804e-06, "loss": 0.3995, "step": 9945 }, { "epoch": 0.6229975414585258, "grad_norm": 0.8328271139496463, "learning_rate": 3.286139896428393e-06, "loss": 0.3929, "step": 9946 }, { "epoch": 0.6230601794578681, "grad_norm": 0.7783285505600054, "learning_rate": 3.285186986716906e-06, "loss": 0.3586, "step": 9947 }, { "epoch": 0.6231228174572104, "grad_norm": 0.8237485749632816, "learning_rate": 3.2842341475874382e-06, "loss": 0.3935, "step": 9948 }, { "epoch": 0.6231854554565527, "grad_norm": 0.8613241241485031, "learning_rate": 3.2832813790792144e-06, "loss": 0.3897, "step": 9949 }, { "epoch": 0.623248093455895, "grad_norm": 0.8624373955203666, "learning_rate": 3.282328681231446e-06, "loss": 0.4365, "step": 9950 }, { "epoch": 0.6233107314552373, "grad_norm": 0.8351256034788757, "learning_rate": 3.2813760540833495e-06, "loss": 0.4025, "step": 9951 }, { "epoch": 0.6233733694545797, "grad_norm": 0.753724186534271, "learning_rate": 3.2804234976741327e-06, "loss": 0.3739, "step": 9952 }, { "epoch": 0.6234360074539219, "grad_norm": 0.8051227012942032, "learning_rate": 3.279471012043004e-06, "loss": 0.4116, "step": 9953 }, { "epoch": 0.6234986454532643, "grad_norm": 0.8971359002699296, "learning_rate": 3.2785185972291678e-06, "loss": 0.3906, "step": 9954 }, { "epoch": 0.6235612834526065, "grad_norm": 0.7702972195537033, "learning_rate": 3.2775662532718256e-06, "loss": 0.3729, "step": 9955 }, { "epoch": 0.6236239214519488, "grad_norm": 0.798035564520532, "learning_rate": 3.2766139802101764e-06, "loss": 0.3586, "step": 9956 }, { "epoch": 0.6236865594512911, "grad_norm": 0.7802297347969629, "learning_rate": 3.275661778083414e-06, "loss": 0.4036, "step": 9957 }, { "epoch": 0.6237491974506334, "grad_norm": 0.8361457181555709, "learning_rate": 3.2747096469307344e-06, "loss": 0.3755, "step": 9958 }, { "epoch": 0.6238118354499758, "grad_norm": 0.8481412740949169, "learning_rate": 3.2737575867913253e-06, "loss": 0.3785, "step": 9959 }, { "epoch": 0.623874473449318, "grad_norm": 0.8037302300298711, "learning_rate": 3.272805597704375e-06, "loss": 0.39, "step": 9960 }, { "epoch": 0.6239371114486604, "grad_norm": 0.8369197784693977, "learning_rate": 3.271853679709067e-06, "loss": 0.4209, "step": 9961 }, { "epoch": 0.6239997494480026, "grad_norm": 0.8856967295384879, "learning_rate": 3.2709018328445814e-06, "loss": 0.4099, "step": 9962 }, { "epoch": 0.6240623874473449, "grad_norm": 0.8538743687395097, "learning_rate": 3.269950057150098e-06, "loss": 0.3985, "step": 9963 }, { "epoch": 0.6241250254466872, "grad_norm": 0.8324557625424475, "learning_rate": 3.2689983526647916e-06, "loss": 0.3868, "step": 9964 }, { "epoch": 0.6241876634460295, "grad_norm": 0.803612900933842, "learning_rate": 3.2680467194278343e-06, "loss": 0.3674, "step": 9965 }, { "epoch": 0.6242503014453719, "grad_norm": 0.8883773999271743, "learning_rate": 3.267095157478395e-06, "loss": 0.428, "step": 9966 }, { "epoch": 0.6243129394447141, "grad_norm": 0.840831869906111, "learning_rate": 3.266143666855639e-06, "loss": 0.3561, "step": 9967 }, { "epoch": 0.6243755774440565, "grad_norm": 0.8397482963592986, "learning_rate": 3.2651922475987337e-06, "loss": 0.386, "step": 9968 }, { "epoch": 0.6244382154433987, "grad_norm": 0.8073222685632787, "learning_rate": 3.2642408997468373e-06, "loss": 0.3773, "step": 9969 }, { "epoch": 0.6245008534427411, "grad_norm": 0.8678350896789233, "learning_rate": 3.2632896233391074e-06, "loss": 0.4219, "step": 9970 }, { "epoch": 0.6245634914420833, "grad_norm": 0.8474451205517592, "learning_rate": 3.2623384184146988e-06, "loss": 0.3983, "step": 9971 }, { "epoch": 0.6246261294414256, "grad_norm": 0.8153013217827841, "learning_rate": 3.2613872850127637e-06, "loss": 0.4125, "step": 9972 }, { "epoch": 0.624688767440768, "grad_norm": 0.8348835465659808, "learning_rate": 3.2604362231724508e-06, "loss": 0.365, "step": 9973 }, { "epoch": 0.6247514054401102, "grad_norm": 0.8419164836010276, "learning_rate": 3.2594852329329052e-06, "loss": 0.3999, "step": 9974 }, { "epoch": 0.6248140434394526, "grad_norm": 0.795945215411622, "learning_rate": 3.258534314333271e-06, "loss": 0.4089, "step": 9975 }, { "epoch": 0.6248766814387948, "grad_norm": 0.8393596581538777, "learning_rate": 3.257583467412686e-06, "loss": 0.3952, "step": 9976 }, { "epoch": 0.6249393194381372, "grad_norm": 0.8670695985278849, "learning_rate": 3.256632692210291e-06, "loss": 0.4019, "step": 9977 }, { "epoch": 0.6250019574374794, "grad_norm": 0.818870970429117, "learning_rate": 3.255681988765217e-06, "loss": 0.3867, "step": 9978 }, { "epoch": 0.6250645954368218, "grad_norm": 0.805176230963566, "learning_rate": 3.254731357116597e-06, "loss": 0.3882, "step": 9979 }, { "epoch": 0.625127233436164, "grad_norm": 0.8186074330642693, "learning_rate": 3.2537807973035573e-06, "loss": 0.3864, "step": 9980 }, { "epoch": 0.6251898714355063, "grad_norm": 0.8432707306664072, "learning_rate": 3.2528303093652247e-06, "loss": 0.4313, "step": 9981 }, { "epoch": 0.6252525094348487, "grad_norm": 0.819881900904387, "learning_rate": 3.2518798933407215e-06, "loss": 0.4169, "step": 9982 }, { "epoch": 0.6253151474341909, "grad_norm": 0.8586405351498242, "learning_rate": 3.2509295492691646e-06, "loss": 0.418, "step": 9983 }, { "epoch": 0.6253777854335333, "grad_norm": 0.8395543139350511, "learning_rate": 3.249979277189673e-06, "loss": 0.3661, "step": 9984 }, { "epoch": 0.6254404234328755, "grad_norm": 0.8194059574922934, "learning_rate": 3.249029077141358e-06, "loss": 0.3547, "step": 9985 }, { "epoch": 0.6255030614322179, "grad_norm": 0.8159147476690534, "learning_rate": 3.2480789491633326e-06, "loss": 0.3928, "step": 9986 }, { "epoch": 0.6255656994315602, "grad_norm": 0.8173003611855921, "learning_rate": 3.247128893294703e-06, "loss": 0.4084, "step": 9987 }, { "epoch": 0.6256283374309024, "grad_norm": 0.8098595470619774, "learning_rate": 3.246178909574572e-06, "loss": 0.4028, "step": 9988 }, { "epoch": 0.6256909754302448, "grad_norm": 1.3909637587397286, "learning_rate": 3.245228998042045e-06, "loss": 0.3851, "step": 9989 }, { "epoch": 0.625753613429587, "grad_norm": 0.8695215772656758, "learning_rate": 3.2442791587362165e-06, "loss": 0.4241, "step": 9990 }, { "epoch": 0.6258162514289294, "grad_norm": 0.8365707649620259, "learning_rate": 3.243329391696185e-06, "loss": 0.374, "step": 9991 }, { "epoch": 0.6258788894282716, "grad_norm": 0.8367945720431932, "learning_rate": 3.24237969696104e-06, "loss": 0.3668, "step": 9992 }, { "epoch": 0.625941527427614, "grad_norm": 0.663890711764318, "learning_rate": 3.241430074569874e-06, "loss": 0.4553, "step": 9993 }, { "epoch": 0.6260041654269562, "grad_norm": 0.8827493724834891, "learning_rate": 3.240480524561771e-06, "loss": 0.3752, "step": 9994 }, { "epoch": 0.6260668034262986, "grad_norm": 0.7983413724237716, "learning_rate": 3.2395310469758178e-06, "loss": 0.3962, "step": 9995 }, { "epoch": 0.6261294414256409, "grad_norm": 0.8025552525628159, "learning_rate": 3.2385816418510928e-06, "loss": 0.3733, "step": 9996 }, { "epoch": 0.6261920794249831, "grad_norm": 0.7848572104930112, "learning_rate": 3.237632309226675e-06, "loss": 0.3872, "step": 9997 }, { "epoch": 0.6262547174243255, "grad_norm": 0.7931330575632385, "learning_rate": 3.236683049141638e-06, "loss": 0.3684, "step": 9998 }, { "epoch": 0.6263173554236677, "grad_norm": 0.8403972693133809, "learning_rate": 3.235733861635054e-06, "loss": 0.4314, "step": 9999 }, { "epoch": 0.6263799934230101, "grad_norm": 0.8131115553381607, "learning_rate": 3.2347847467459915e-06, "loss": 0.3653, "step": 10000 }, { "epoch": 0.6264426314223523, "grad_norm": 0.8708853283747959, "learning_rate": 3.2338357045135173e-06, "loss": 0.3996, "step": 10001 }, { "epoch": 0.6265052694216947, "grad_norm": 0.897739341697837, "learning_rate": 3.2328867349766933e-06, "loss": 0.4167, "step": 10002 }, { "epoch": 0.626567907421037, "grad_norm": 0.781492272360584, "learning_rate": 3.231937838174577e-06, "loss": 0.3458, "step": 10003 }, { "epoch": 0.6266305454203793, "grad_norm": 0.8726647341175183, "learning_rate": 3.2309890141462295e-06, "loss": 0.4042, "step": 10004 }, { "epoch": 0.6266931834197216, "grad_norm": 0.8448343330142105, "learning_rate": 3.230040262930702e-06, "loss": 0.4064, "step": 10005 }, { "epoch": 0.6267558214190638, "grad_norm": 0.8273460188594023, "learning_rate": 3.2290915845670467e-06, "loss": 0.3857, "step": 10006 }, { "epoch": 0.6268184594184062, "grad_norm": 0.8608878504480427, "learning_rate": 3.2281429790943097e-06, "loss": 0.3707, "step": 10007 }, { "epoch": 0.6268810974177484, "grad_norm": 0.8569418949830816, "learning_rate": 3.2271944465515376e-06, "loss": 0.4024, "step": 10008 }, { "epoch": 0.6269437354170908, "grad_norm": 0.7836666432738681, "learning_rate": 3.2262459869777694e-06, "loss": 0.3708, "step": 10009 }, { "epoch": 0.6270063734164331, "grad_norm": 0.8057241369532389, "learning_rate": 3.2252976004120473e-06, "loss": 0.3971, "step": 10010 }, { "epoch": 0.6270690114157754, "grad_norm": 0.8395015288530249, "learning_rate": 3.2243492868934047e-06, "loss": 0.4105, "step": 10011 }, { "epoch": 0.6271316494151177, "grad_norm": 0.8888444614752293, "learning_rate": 3.223401046460875e-06, "loss": 0.3659, "step": 10012 }, { "epoch": 0.6271942874144599, "grad_norm": 0.8207625910131067, "learning_rate": 3.222452879153486e-06, "loss": 0.3601, "step": 10013 }, { "epoch": 0.6272569254138023, "grad_norm": 0.818162612087328, "learning_rate": 3.2215047850102676e-06, "loss": 0.4276, "step": 10014 }, { "epoch": 0.6273195634131445, "grad_norm": 0.8330615292213903, "learning_rate": 3.220556764070244e-06, "loss": 0.3593, "step": 10015 }, { "epoch": 0.6273822014124869, "grad_norm": 0.8137811358748187, "learning_rate": 3.219608816372433e-06, "loss": 0.3997, "step": 10016 }, { "epoch": 0.6274448394118292, "grad_norm": 0.8934690105316516, "learning_rate": 3.2186609419558545e-06, "loss": 0.4001, "step": 10017 }, { "epoch": 0.6275074774111715, "grad_norm": 0.6002135827085645, "learning_rate": 3.2177131408595207e-06, "loss": 0.4601, "step": 10018 }, { "epoch": 0.6275701154105138, "grad_norm": 0.8680181193387471, "learning_rate": 3.2167654131224463e-06, "loss": 0.4079, "step": 10019 }, { "epoch": 0.6276327534098561, "grad_norm": 0.8320900122617565, "learning_rate": 3.215817758783638e-06, "loss": 0.3847, "step": 10020 }, { "epoch": 0.6276953914091984, "grad_norm": 0.8263119520112634, "learning_rate": 3.214870177882101e-06, "loss": 0.3809, "step": 10021 }, { "epoch": 0.6277580294085406, "grad_norm": 0.868465694041472, "learning_rate": 3.2139226704568393e-06, "loss": 0.4073, "step": 10022 }, { "epoch": 0.627820667407883, "grad_norm": 0.8666924864844658, "learning_rate": 3.212975236546852e-06, "loss": 0.3856, "step": 10023 }, { "epoch": 0.6278833054072253, "grad_norm": 0.81066449646964, "learning_rate": 3.212027876191136e-06, "loss": 0.377, "step": 10024 }, { "epoch": 0.6279459434065676, "grad_norm": 0.806559401387833, "learning_rate": 3.211080589428685e-06, "loss": 0.3913, "step": 10025 }, { "epoch": 0.6280085814059099, "grad_norm": 0.8122360006004732, "learning_rate": 3.2101333762984876e-06, "loss": 0.3442, "step": 10026 }, { "epoch": 0.6280712194052522, "grad_norm": 0.8875487110092369, "learning_rate": 3.2091862368395343e-06, "loss": 0.4329, "step": 10027 }, { "epoch": 0.6281338574045945, "grad_norm": 0.8913116916447883, "learning_rate": 3.2082391710908074e-06, "loss": 0.4294, "step": 10028 }, { "epoch": 0.6281964954039368, "grad_norm": 0.8102169239167812, "learning_rate": 3.207292179091289e-06, "loss": 0.3819, "step": 10029 }, { "epoch": 0.6282591334032791, "grad_norm": 0.8577155098845842, "learning_rate": 3.2063452608799562e-06, "loss": 0.4338, "step": 10030 }, { "epoch": 0.6283217714026214, "grad_norm": 0.861169274382156, "learning_rate": 3.205398416495784e-06, "loss": 0.399, "step": 10031 }, { "epoch": 0.6283844094019637, "grad_norm": 0.7993526768507209, "learning_rate": 3.204451645977748e-06, "loss": 0.415, "step": 10032 }, { "epoch": 0.628447047401306, "grad_norm": 0.7988604988768119, "learning_rate": 3.203504949364816e-06, "loss": 0.3637, "step": 10033 }, { "epoch": 0.6285096854006483, "grad_norm": 0.854117918534581, "learning_rate": 3.2025583266959535e-06, "loss": 0.385, "step": 10034 }, { "epoch": 0.6285723233999906, "grad_norm": 0.8578574271345212, "learning_rate": 3.201611778010123e-06, "loss": 0.4029, "step": 10035 }, { "epoch": 0.628634961399333, "grad_norm": 0.8186812791387317, "learning_rate": 3.2006653033462863e-06, "loss": 0.396, "step": 10036 }, { "epoch": 0.6286975993986752, "grad_norm": 0.7895222773560193, "learning_rate": 3.1997189027434e-06, "loss": 0.3557, "step": 10037 }, { "epoch": 0.6287602373980175, "grad_norm": 0.7701570939647804, "learning_rate": 3.1987725762404174e-06, "loss": 0.3419, "step": 10038 }, { "epoch": 0.6288228753973598, "grad_norm": 0.8111331728361865, "learning_rate": 3.1978263238762895e-06, "loss": 0.3742, "step": 10039 }, { "epoch": 0.6288855133967021, "grad_norm": 0.8100016275470417, "learning_rate": 3.1968801456899635e-06, "loss": 0.3983, "step": 10040 }, { "epoch": 0.6289481513960444, "grad_norm": 0.8169217699514902, "learning_rate": 3.1959340417203866e-06, "loss": 0.3827, "step": 10041 }, { "epoch": 0.6290107893953867, "grad_norm": 0.9048469396308211, "learning_rate": 3.1949880120065e-06, "loss": 0.4274, "step": 10042 }, { "epoch": 0.629073427394729, "grad_norm": 0.863317576187418, "learning_rate": 3.194042056587242e-06, "loss": 0.4067, "step": 10043 }, { "epoch": 0.6291360653940713, "grad_norm": 0.5966553504590368, "learning_rate": 3.193096175501547e-06, "loss": 0.4519, "step": 10044 }, { "epoch": 0.6291987033934137, "grad_norm": 0.8777923251154279, "learning_rate": 3.1921503687883497e-06, "loss": 0.4158, "step": 10045 }, { "epoch": 0.6292613413927559, "grad_norm": 0.7965466372500477, "learning_rate": 3.191204636486579e-06, "loss": 0.3738, "step": 10046 }, { "epoch": 0.6293239793920982, "grad_norm": 0.7817715989415831, "learning_rate": 3.19025897863516e-06, "loss": 0.3524, "step": 10047 }, { "epoch": 0.6293866173914405, "grad_norm": 0.9221802956226562, "learning_rate": 3.1893133952730183e-06, "loss": 0.3983, "step": 10048 }, { "epoch": 0.6294492553907828, "grad_norm": 0.8407534235818618, "learning_rate": 3.188367886439071e-06, "loss": 0.3804, "step": 10049 }, { "epoch": 0.6295118933901251, "grad_norm": 0.8685526043731063, "learning_rate": 3.1874224521722406e-06, "loss": 0.4135, "step": 10050 }, { "epoch": 0.6295745313894674, "grad_norm": 0.8564520792219998, "learning_rate": 3.186477092511438e-06, "loss": 0.3979, "step": 10051 }, { "epoch": 0.6296371693888098, "grad_norm": 0.8346828906033581, "learning_rate": 3.1855318074955737e-06, "loss": 0.4196, "step": 10052 }, { "epoch": 0.629699807388152, "grad_norm": 0.8476365833644792, "learning_rate": 3.184586597163558e-06, "loss": 0.3836, "step": 10053 }, { "epoch": 0.6297624453874944, "grad_norm": 0.8202879309028138, "learning_rate": 3.1836414615542952e-06, "loss": 0.3999, "step": 10054 }, { "epoch": 0.6298250833868366, "grad_norm": 0.8828361525409305, "learning_rate": 3.1826964007066873e-06, "loss": 0.4437, "step": 10055 }, { "epoch": 0.6298877213861789, "grad_norm": 0.936863695710271, "learning_rate": 3.181751414659631e-06, "loss": 0.4236, "step": 10056 }, { "epoch": 0.6299503593855212, "grad_norm": 0.8829332067685569, "learning_rate": 3.180806503452026e-06, "loss": 0.3902, "step": 10057 }, { "epoch": 0.6300129973848635, "grad_norm": 0.8243841156625323, "learning_rate": 3.1798616671227624e-06, "loss": 0.3932, "step": 10058 }, { "epoch": 0.6300756353842059, "grad_norm": 0.7578670124463665, "learning_rate": 3.1789169057107284e-06, "loss": 0.3645, "step": 10059 }, { "epoch": 0.6301382733835481, "grad_norm": 0.8594334902761426, "learning_rate": 3.1779722192548146e-06, "loss": 0.3998, "step": 10060 }, { "epoch": 0.6302009113828905, "grad_norm": 0.9004578201014023, "learning_rate": 3.1770276077939012e-06, "loss": 0.3666, "step": 10061 }, { "epoch": 0.6302635493822327, "grad_norm": 0.8557628958278738, "learning_rate": 3.1760830713668712e-06, "loss": 0.3809, "step": 10062 }, { "epoch": 0.6303261873815751, "grad_norm": 0.8878073186010379, "learning_rate": 3.1751386100126004e-06, "loss": 0.3969, "step": 10063 }, { "epoch": 0.6303888253809173, "grad_norm": 0.8327486960546945, "learning_rate": 3.1741942237699618e-06, "loss": 0.3928, "step": 10064 }, { "epoch": 0.6304514633802596, "grad_norm": 0.7826557034735965, "learning_rate": 3.173249912677829e-06, "loss": 0.3995, "step": 10065 }, { "epoch": 0.630514101379602, "grad_norm": 0.7820268381859058, "learning_rate": 3.1723056767750685e-06, "loss": 0.3918, "step": 10066 }, { "epoch": 0.6305767393789442, "grad_norm": 0.8159875262850627, "learning_rate": 3.171361516100546e-06, "loss": 0.4158, "step": 10067 }, { "epoch": 0.6306393773782866, "grad_norm": 0.9185687458384924, "learning_rate": 3.1704174306931212e-06, "loss": 0.4285, "step": 10068 }, { "epoch": 0.6307020153776288, "grad_norm": 0.6496418723581682, "learning_rate": 3.1694734205916568e-06, "loss": 0.4429, "step": 10069 }, { "epoch": 0.6307646533769712, "grad_norm": 0.8977202138072483, "learning_rate": 3.168529485835004e-06, "loss": 0.4256, "step": 10070 }, { "epoch": 0.6308272913763134, "grad_norm": 0.8608825050125894, "learning_rate": 3.1675856264620198e-06, "loss": 0.4011, "step": 10071 }, { "epoch": 0.6308899293756557, "grad_norm": 0.8918371587631131, "learning_rate": 3.16664184251155e-06, "loss": 0.429, "step": 10072 }, { "epoch": 0.630952567374998, "grad_norm": 0.8332667662644055, "learning_rate": 3.1656981340224424e-06, "loss": 0.4085, "step": 10073 }, { "epoch": 0.6310152053743403, "grad_norm": 0.8966365026181222, "learning_rate": 3.16475450103354e-06, "loss": 0.4058, "step": 10074 }, { "epoch": 0.6310778433736827, "grad_norm": 0.5949851228967236, "learning_rate": 3.1638109435836837e-06, "loss": 0.4409, "step": 10075 }, { "epoch": 0.6311404813730249, "grad_norm": 0.7873986043444872, "learning_rate": 3.16286746171171e-06, "loss": 0.3719, "step": 10076 }, { "epoch": 0.6312031193723673, "grad_norm": 0.8117930487127004, "learning_rate": 3.1619240554564503e-06, "loss": 0.3863, "step": 10077 }, { "epoch": 0.6312657573717095, "grad_norm": 0.7963511245868238, "learning_rate": 3.160980724856738e-06, "loss": 0.3616, "step": 10078 }, { "epoch": 0.6313283953710519, "grad_norm": 0.8499713468180391, "learning_rate": 3.1600374699514023e-06, "loss": 0.3864, "step": 10079 }, { "epoch": 0.6313910333703942, "grad_norm": 0.8643275210855316, "learning_rate": 3.1590942907792654e-06, "loss": 0.3909, "step": 10080 }, { "epoch": 0.6314536713697364, "grad_norm": 0.8172178229074062, "learning_rate": 3.1581511873791495e-06, "loss": 0.4068, "step": 10081 }, { "epoch": 0.6315163093690788, "grad_norm": 0.8742128222748927, "learning_rate": 3.1572081597898714e-06, "loss": 0.4245, "step": 10082 }, { "epoch": 0.631578947368421, "grad_norm": 0.812115779353254, "learning_rate": 3.1562652080502483e-06, "loss": 0.3762, "step": 10083 }, { "epoch": 0.6316415853677634, "grad_norm": 0.6562375820679582, "learning_rate": 3.1553223321990915e-06, "loss": 0.4693, "step": 10084 }, { "epoch": 0.6317042233671056, "grad_norm": 0.817189336971027, "learning_rate": 3.154379532275209e-06, "loss": 0.384, "step": 10085 }, { "epoch": 0.631766861366448, "grad_norm": 0.65270300915403, "learning_rate": 3.1534368083174072e-06, "loss": 0.4532, "step": 10086 }, { "epoch": 0.6318294993657902, "grad_norm": 0.8687342304284852, "learning_rate": 3.15249416036449e-06, "loss": 0.411, "step": 10087 }, { "epoch": 0.6318921373651326, "grad_norm": 0.8594235047181206, "learning_rate": 3.1515515884552562e-06, "loss": 0.3836, "step": 10088 }, { "epoch": 0.6319547753644749, "grad_norm": 0.918977647967224, "learning_rate": 3.1506090926285026e-06, "loss": 0.4192, "step": 10089 }, { "epoch": 0.6320174133638171, "grad_norm": 0.8250795527758451, "learning_rate": 3.1496666729230206e-06, "loss": 0.4103, "step": 10090 }, { "epoch": 0.6320800513631595, "grad_norm": 0.8146944769177754, "learning_rate": 3.1487243293776027e-06, "loss": 0.3801, "step": 10091 }, { "epoch": 0.6321426893625017, "grad_norm": 0.8511479036631459, "learning_rate": 3.147782062031036e-06, "loss": 0.3718, "step": 10092 }, { "epoch": 0.6322053273618441, "grad_norm": 0.8653882607749145, "learning_rate": 3.1468398709221026e-06, "loss": 0.3946, "step": 10093 }, { "epoch": 0.6322679653611863, "grad_norm": 0.8293914299611647, "learning_rate": 3.145897756089583e-06, "loss": 0.4142, "step": 10094 }, { "epoch": 0.6323306033605287, "grad_norm": 0.7714493445151747, "learning_rate": 3.1449557175722556e-06, "loss": 0.3742, "step": 10095 }, { "epoch": 0.632393241359871, "grad_norm": 0.8210482695314857, "learning_rate": 3.1440137554088957e-06, "loss": 0.3978, "step": 10096 }, { "epoch": 0.6324558793592132, "grad_norm": 0.6312033095845699, "learning_rate": 3.1430718696382757e-06, "loss": 0.4646, "step": 10097 }, { "epoch": 0.6325185173585556, "grad_norm": 0.8176760248075898, "learning_rate": 3.142130060299161e-06, "loss": 0.344, "step": 10098 }, { "epoch": 0.6325811553578978, "grad_norm": 0.8545090759730612, "learning_rate": 3.141188327430318e-06, "loss": 0.4063, "step": 10099 }, { "epoch": 0.6326437933572402, "grad_norm": 0.8094477209303108, "learning_rate": 3.1402466710705087e-06, "loss": 0.3943, "step": 10100 }, { "epoch": 0.6327064313565824, "grad_norm": 0.8393830809184062, "learning_rate": 3.139305091258492e-06, "loss": 0.3951, "step": 10101 }, { "epoch": 0.6327690693559248, "grad_norm": 0.8299325756579354, "learning_rate": 3.138363588033023e-06, "loss": 0.3507, "step": 10102 }, { "epoch": 0.6328317073552671, "grad_norm": 0.6482010844011756, "learning_rate": 3.1374221614328525e-06, "loss": 0.4719, "step": 10103 }, { "epoch": 0.6328943453546094, "grad_norm": 0.7452811658556635, "learning_rate": 3.136480811496734e-06, "loss": 0.3768, "step": 10104 }, { "epoch": 0.6329569833539517, "grad_norm": 0.822923133000225, "learning_rate": 3.135539538263408e-06, "loss": 0.3324, "step": 10105 }, { "epoch": 0.6330196213532939, "grad_norm": 0.8699950973202858, "learning_rate": 3.134598341771623e-06, "loss": 0.4102, "step": 10106 }, { "epoch": 0.6330822593526363, "grad_norm": 0.8627909319810553, "learning_rate": 3.133657222060117e-06, "loss": 0.4233, "step": 10107 }, { "epoch": 0.6331448973519785, "grad_norm": 0.8408188348786505, "learning_rate": 3.132716179167624e-06, "loss": 0.3719, "step": 10108 }, { "epoch": 0.6332075353513209, "grad_norm": 0.8513108010489995, "learning_rate": 3.1317752131328817e-06, "loss": 0.426, "step": 10109 }, { "epoch": 0.6332701733506632, "grad_norm": 0.9282812919096408, "learning_rate": 3.130834323994618e-06, "loss": 0.4292, "step": 10110 }, { "epoch": 0.6333328113500055, "grad_norm": 0.8570684450355165, "learning_rate": 3.1298935117915597e-06, "loss": 0.3869, "step": 10111 }, { "epoch": 0.6333954493493478, "grad_norm": 0.8573332670252863, "learning_rate": 3.128952776562432e-06, "loss": 0.3744, "step": 10112 }, { "epoch": 0.6334580873486901, "grad_norm": 0.8857345117561461, "learning_rate": 3.1280121183459554e-06, "loss": 0.4103, "step": 10113 }, { "epoch": 0.6335207253480324, "grad_norm": 0.8606946999288179, "learning_rate": 3.127071537180846e-06, "loss": 0.3927, "step": 10114 }, { "epoch": 0.6335833633473746, "grad_norm": 0.8538680217288657, "learning_rate": 3.1261310331058214e-06, "loss": 0.3878, "step": 10115 }, { "epoch": 0.633646001346717, "grad_norm": 0.867028664126616, "learning_rate": 3.1251906061595905e-06, "loss": 0.3753, "step": 10116 }, { "epoch": 0.6337086393460593, "grad_norm": 0.8692211755909209, "learning_rate": 3.124250256380863e-06, "loss": 0.3851, "step": 10117 }, { "epoch": 0.6337712773454016, "grad_norm": 0.776317513181448, "learning_rate": 3.1233099838083436e-06, "loss": 0.3796, "step": 10118 }, { "epoch": 0.6338339153447439, "grad_norm": 0.9916166809884424, "learning_rate": 3.1223697884807323e-06, "loss": 0.3967, "step": 10119 }, { "epoch": 0.6338965533440862, "grad_norm": 0.8293383293314004, "learning_rate": 3.12142967043673e-06, "loss": 0.3976, "step": 10120 }, { "epoch": 0.6339591913434285, "grad_norm": 0.8736564337442472, "learning_rate": 3.1204896297150305e-06, "loss": 0.3743, "step": 10121 }, { "epoch": 0.6340218293427707, "grad_norm": 0.8572098473673238, "learning_rate": 3.1195496663543273e-06, "loss": 0.3737, "step": 10122 }, { "epoch": 0.6340844673421131, "grad_norm": 0.7968741248763062, "learning_rate": 3.118609780393307e-06, "loss": 0.3797, "step": 10123 }, { "epoch": 0.6341471053414554, "grad_norm": 0.7880267100178604, "learning_rate": 3.1176699718706586e-06, "loss": 0.3454, "step": 10124 }, { "epoch": 0.6342097433407977, "grad_norm": 0.8344006795597237, "learning_rate": 3.116730240825063e-06, "loss": 0.3708, "step": 10125 }, { "epoch": 0.63427238134014, "grad_norm": 0.8829717368556941, "learning_rate": 3.1157905872952017e-06, "loss": 0.4022, "step": 10126 }, { "epoch": 0.6343350193394823, "grad_norm": 0.8151762369248666, "learning_rate": 3.1148510113197493e-06, "loss": 0.3806, "step": 10127 }, { "epoch": 0.6343976573388246, "grad_norm": 0.8292154264055333, "learning_rate": 3.1139115129373785e-06, "loss": 0.3722, "step": 10128 }, { "epoch": 0.634460295338167, "grad_norm": 0.8563956080613868, "learning_rate": 3.1129720921867603e-06, "loss": 0.397, "step": 10129 }, { "epoch": 0.6345229333375092, "grad_norm": 0.8887833010728619, "learning_rate": 3.1120327491065607e-06, "loss": 0.4221, "step": 10130 }, { "epoch": 0.6345855713368515, "grad_norm": 0.9160778547876105, "learning_rate": 3.111093483735444e-06, "loss": 0.3987, "step": 10131 }, { "epoch": 0.6346482093361938, "grad_norm": 0.8161620613434719, "learning_rate": 3.110154296112068e-06, "loss": 0.3929, "step": 10132 }, { "epoch": 0.6347108473355361, "grad_norm": 0.8563702515480907, "learning_rate": 3.109215186275094e-06, "loss": 0.363, "step": 10133 }, { "epoch": 0.6347734853348784, "grad_norm": 0.8527330524004936, "learning_rate": 3.1082761542631736e-06, "loss": 0.3842, "step": 10134 }, { "epoch": 0.6348361233342207, "grad_norm": 0.9355372399768268, "learning_rate": 3.107337200114958e-06, "loss": 0.4189, "step": 10135 }, { "epoch": 0.634898761333563, "grad_norm": 0.8610878906906931, "learning_rate": 3.1063983238690943e-06, "loss": 0.3921, "step": 10136 }, { "epoch": 0.6349613993329053, "grad_norm": 0.9186969064026246, "learning_rate": 3.1054595255642266e-06, "loss": 0.425, "step": 10137 }, { "epoch": 0.6350240373322477, "grad_norm": 0.8258968837406847, "learning_rate": 3.1045208052389976e-06, "loss": 0.4187, "step": 10138 }, { "epoch": 0.6350866753315899, "grad_norm": 0.8750973125729861, "learning_rate": 3.103582162932044e-06, "loss": 0.4175, "step": 10139 }, { "epoch": 0.6351493133309322, "grad_norm": 0.8935285350108169, "learning_rate": 3.102643598682e-06, "loss": 0.3978, "step": 10140 }, { "epoch": 0.6352119513302745, "grad_norm": 0.863520941664485, "learning_rate": 3.101705112527497e-06, "loss": 0.3948, "step": 10141 }, { "epoch": 0.6352745893296168, "grad_norm": 0.7915669552393123, "learning_rate": 3.100766704507163e-06, "loss": 0.3874, "step": 10142 }, { "epoch": 0.6353372273289591, "grad_norm": 0.8687714014165596, "learning_rate": 3.099828374659626e-06, "loss": 0.4323, "step": 10143 }, { "epoch": 0.6353998653283014, "grad_norm": 0.8153647665392386, "learning_rate": 3.098890123023506e-06, "loss": 0.3739, "step": 10144 }, { "epoch": 0.6354625033276438, "grad_norm": 0.8228468909055324, "learning_rate": 3.097951949637421e-06, "loss": 0.3951, "step": 10145 }, { "epoch": 0.635525141326986, "grad_norm": 0.8206275120264467, "learning_rate": 3.097013854539986e-06, "loss": 0.3824, "step": 10146 }, { "epoch": 0.6355877793263283, "grad_norm": 0.8385255876178358, "learning_rate": 3.0960758377698147e-06, "loss": 0.4036, "step": 10147 }, { "epoch": 0.6356504173256706, "grad_norm": 0.8120078358477439, "learning_rate": 3.0951378993655156e-06, "loss": 0.3904, "step": 10148 }, { "epoch": 0.6357130553250129, "grad_norm": 0.9634750338404504, "learning_rate": 3.0942000393656923e-06, "loss": 0.3886, "step": 10149 }, { "epoch": 0.6357756933243552, "grad_norm": 0.6075010455549793, "learning_rate": 3.093262257808951e-06, "loss": 0.4395, "step": 10150 }, { "epoch": 0.6358383313236975, "grad_norm": 0.8728127375422928, "learning_rate": 3.092324554733887e-06, "loss": 0.4108, "step": 10151 }, { "epoch": 0.6359009693230399, "grad_norm": 0.8329886809715121, "learning_rate": 3.0913869301791e-06, "loss": 0.3705, "step": 10152 }, { "epoch": 0.6359636073223821, "grad_norm": 0.8047235107840978, "learning_rate": 3.0904493841831815e-06, "loss": 0.3931, "step": 10153 }, { "epoch": 0.6360262453217245, "grad_norm": 0.83202698516249, "learning_rate": 3.089511916784721e-06, "loss": 0.401, "step": 10154 }, { "epoch": 0.6360888833210667, "grad_norm": 0.8537680927700864, "learning_rate": 3.0885745280223035e-06, "loss": 0.4454, "step": 10155 }, { "epoch": 0.636151521320409, "grad_norm": 0.8434581589099183, "learning_rate": 3.0876372179345138e-06, "loss": 0.3623, "step": 10156 }, { "epoch": 0.6362141593197513, "grad_norm": 0.8085295402698722, "learning_rate": 3.0866999865599317e-06, "loss": 0.3892, "step": 10157 }, { "epoch": 0.6362767973190936, "grad_norm": 0.8288799077801371, "learning_rate": 3.0857628339371313e-06, "loss": 0.4035, "step": 10158 }, { "epoch": 0.636339435318436, "grad_norm": 0.8667876815121858, "learning_rate": 3.0848257601046907e-06, "loss": 0.3631, "step": 10159 }, { "epoch": 0.6364020733177782, "grad_norm": 0.7466847392964676, "learning_rate": 3.083888765101174e-06, "loss": 0.3354, "step": 10160 }, { "epoch": 0.6364647113171206, "grad_norm": 0.91162342921726, "learning_rate": 3.082951848965153e-06, "loss": 0.4476, "step": 10161 }, { "epoch": 0.6365273493164628, "grad_norm": 0.8319374867333426, "learning_rate": 3.082015011735191e-06, "loss": 0.366, "step": 10162 }, { "epoch": 0.6365899873158052, "grad_norm": 0.8556993880960331, "learning_rate": 3.081078253449845e-06, "loss": 0.4229, "step": 10163 }, { "epoch": 0.6366526253151474, "grad_norm": 0.8827044386602175, "learning_rate": 3.0801415741476757e-06, "loss": 0.412, "step": 10164 }, { "epoch": 0.6367152633144897, "grad_norm": 0.8404787404792975, "learning_rate": 3.0792049738672358e-06, "loss": 0.3965, "step": 10165 }, { "epoch": 0.636777901313832, "grad_norm": 0.6059610141413021, "learning_rate": 3.0782684526470753e-06, "loss": 0.456, "step": 10166 }, { "epoch": 0.6368405393131743, "grad_norm": 0.7947946918997177, "learning_rate": 3.077332010525741e-06, "loss": 0.3598, "step": 10167 }, { "epoch": 0.6369031773125167, "grad_norm": 0.8604210119028428, "learning_rate": 3.0763956475417795e-06, "loss": 0.42, "step": 10168 }, { "epoch": 0.6369658153118589, "grad_norm": 0.8456807787072534, "learning_rate": 3.0754593637337276e-06, "loss": 0.3829, "step": 10169 }, { "epoch": 0.6370284533112013, "grad_norm": 0.8276554712237506, "learning_rate": 3.0745231591401283e-06, "loss": 0.3749, "step": 10170 }, { "epoch": 0.6370910913105435, "grad_norm": 0.8130443046245603, "learning_rate": 3.0735870337995122e-06, "loss": 0.3869, "step": 10171 }, { "epoch": 0.6371537293098859, "grad_norm": 0.8193633808357939, "learning_rate": 3.0726509877504106e-06, "loss": 0.3811, "step": 10172 }, { "epoch": 0.6372163673092281, "grad_norm": 0.8970015467150805, "learning_rate": 3.071715021031353e-06, "loss": 0.3901, "step": 10173 }, { "epoch": 0.6372790053085704, "grad_norm": 0.8503761006171168, "learning_rate": 3.0707791336808636e-06, "loss": 0.4044, "step": 10174 }, { "epoch": 0.6373416433079128, "grad_norm": 0.8480174247491218, "learning_rate": 3.0698433257374618e-06, "loss": 0.4097, "step": 10175 }, { "epoch": 0.637404281307255, "grad_norm": 0.7551723417216355, "learning_rate": 3.0689075972396674e-06, "loss": 0.381, "step": 10176 }, { "epoch": 0.6374669193065974, "grad_norm": 0.8717668954014587, "learning_rate": 3.0679719482259956e-06, "loss": 0.4062, "step": 10177 }, { "epoch": 0.6375295573059396, "grad_norm": 0.8093850164310228, "learning_rate": 3.0670363787349545e-06, "loss": 0.3632, "step": 10178 }, { "epoch": 0.637592195305282, "grad_norm": 0.7809645827650716, "learning_rate": 3.066100888805057e-06, "loss": 0.3794, "step": 10179 }, { "epoch": 0.6376548333046242, "grad_norm": 0.8592304401112142, "learning_rate": 3.0651654784748062e-06, "loss": 0.3938, "step": 10180 }, { "epoch": 0.6377174713039665, "grad_norm": 0.8806990160534268, "learning_rate": 3.064230147782702e-06, "loss": 0.4228, "step": 10181 }, { "epoch": 0.6377801093033089, "grad_norm": 0.7601102239837214, "learning_rate": 3.0632948967672455e-06, "loss": 0.3453, "step": 10182 }, { "epoch": 0.6378427473026511, "grad_norm": 0.8740076252137299, "learning_rate": 3.0623597254669303e-06, "loss": 0.4086, "step": 10183 }, { "epoch": 0.6379053853019935, "grad_norm": 0.9390072404268757, "learning_rate": 3.0614246339202474e-06, "loss": 0.4041, "step": 10184 }, { "epoch": 0.6379680233013357, "grad_norm": 0.9376314916272918, "learning_rate": 3.0604896221656877e-06, "loss": 0.4499, "step": 10185 }, { "epoch": 0.6380306613006781, "grad_norm": 0.8305228041417172, "learning_rate": 3.0595546902417352e-06, "loss": 0.3809, "step": 10186 }, { "epoch": 0.6380932993000203, "grad_norm": 0.6875189329131757, "learning_rate": 3.0586198381868707e-06, "loss": 0.4483, "step": 10187 }, { "epoch": 0.6381559372993627, "grad_norm": 0.8852400692734131, "learning_rate": 3.0576850660395727e-06, "loss": 0.4251, "step": 10188 }, { "epoch": 0.638218575298705, "grad_norm": 0.833732456921471, "learning_rate": 3.05675037383832e-06, "loss": 0.3991, "step": 10189 }, { "epoch": 0.6382812132980472, "grad_norm": 0.8290947633853082, "learning_rate": 3.0558157616215823e-06, "loss": 0.3686, "step": 10190 }, { "epoch": 0.6383438512973896, "grad_norm": 0.8259933468603977, "learning_rate": 3.0548812294278285e-06, "loss": 0.3974, "step": 10191 }, { "epoch": 0.6384064892967318, "grad_norm": 0.8497706151403738, "learning_rate": 3.053946777295525e-06, "loss": 0.423, "step": 10192 }, { "epoch": 0.6384691272960742, "grad_norm": 0.6514798825360523, "learning_rate": 3.0530124052631323e-06, "loss": 0.4402, "step": 10193 }, { "epoch": 0.6385317652954164, "grad_norm": 0.8700521855968011, "learning_rate": 3.052078113369111e-06, "loss": 0.3955, "step": 10194 }, { "epoch": 0.6385944032947588, "grad_norm": 0.5571768141074077, "learning_rate": 3.051143901651916e-06, "loss": 0.4506, "step": 10195 }, { "epoch": 0.6386570412941011, "grad_norm": 0.8202136874803999, "learning_rate": 3.05020977015e-06, "loss": 0.3804, "step": 10196 }, { "epoch": 0.6387196792934434, "grad_norm": 0.618696206763964, "learning_rate": 3.0492757189018105e-06, "loss": 0.4186, "step": 10197 }, { "epoch": 0.6387823172927857, "grad_norm": 0.8012566614477595, "learning_rate": 3.048341747945795e-06, "loss": 0.3768, "step": 10198 }, { "epoch": 0.6388449552921279, "grad_norm": 0.8735894933965009, "learning_rate": 3.047407857320396e-06, "loss": 0.4151, "step": 10199 }, { "epoch": 0.6389075932914703, "grad_norm": 0.8492221903800766, "learning_rate": 3.0464740470640526e-06, "loss": 0.4049, "step": 10200 }, { "epoch": 0.6389702312908125, "grad_norm": 0.8974610351503725, "learning_rate": 3.045540317215199e-06, "loss": 0.4275, "step": 10201 }, { "epoch": 0.6390328692901549, "grad_norm": 0.7706889370358103, "learning_rate": 3.0446066678122688e-06, "loss": 0.3693, "step": 10202 }, { "epoch": 0.6390955072894972, "grad_norm": 0.9060423603262582, "learning_rate": 3.043673098893691e-06, "loss": 0.4184, "step": 10203 }, { "epoch": 0.6391581452888395, "grad_norm": 0.8288596014263737, "learning_rate": 3.0427396104978928e-06, "loss": 0.3634, "step": 10204 }, { "epoch": 0.6392207832881818, "grad_norm": 0.823938608798319, "learning_rate": 3.0418062026632942e-06, "loss": 0.3861, "step": 10205 }, { "epoch": 0.639283421287524, "grad_norm": 0.8659463115225638, "learning_rate": 3.0408728754283144e-06, "loss": 0.4026, "step": 10206 }, { "epoch": 0.6393460592868664, "grad_norm": 0.8111319010151798, "learning_rate": 3.039939628831373e-06, "loss": 0.3781, "step": 10207 }, { "epoch": 0.6394086972862086, "grad_norm": 0.8875240495465351, "learning_rate": 3.0390064629108797e-06, "loss": 0.4019, "step": 10208 }, { "epoch": 0.639471335285551, "grad_norm": 0.8373148879419275, "learning_rate": 3.038073377705245e-06, "loss": 0.3671, "step": 10209 }, { "epoch": 0.6395339732848933, "grad_norm": 0.8697757604930156, "learning_rate": 3.0371403732528727e-06, "loss": 0.4189, "step": 10210 }, { "epoch": 0.6395966112842356, "grad_norm": 0.8198972035096292, "learning_rate": 3.0362074495921684e-06, "loss": 0.3698, "step": 10211 }, { "epoch": 0.6396592492835779, "grad_norm": 0.7943369967877525, "learning_rate": 3.0352746067615296e-06, "loss": 0.4118, "step": 10212 }, { "epoch": 0.6397218872829202, "grad_norm": 0.9104902390300931, "learning_rate": 3.0343418447993535e-06, "loss": 0.4146, "step": 10213 }, { "epoch": 0.6397845252822625, "grad_norm": 0.8507042517698316, "learning_rate": 3.03340916374403e-06, "loss": 0.4159, "step": 10214 }, { "epoch": 0.6398471632816047, "grad_norm": 0.8684223392559914, "learning_rate": 3.032476563633949e-06, "loss": 0.433, "step": 10215 }, { "epoch": 0.6399098012809471, "grad_norm": 0.7748748302389251, "learning_rate": 3.0315440445075004e-06, "loss": 0.376, "step": 10216 }, { "epoch": 0.6399724392802894, "grad_norm": 0.8287748683338101, "learning_rate": 3.0306116064030642e-06, "loss": 0.3435, "step": 10217 }, { "epoch": 0.6400350772796317, "grad_norm": 0.8170381355859425, "learning_rate": 3.0296792493590204e-06, "loss": 0.3804, "step": 10218 }, { "epoch": 0.640097715278974, "grad_norm": 0.7872027261340864, "learning_rate": 3.028746973413743e-06, "loss": 0.3816, "step": 10219 }, { "epoch": 0.6401603532783163, "grad_norm": 0.8509654855802199, "learning_rate": 3.0278147786056066e-06, "loss": 0.4352, "step": 10220 }, { "epoch": 0.6402229912776586, "grad_norm": 0.8614318987503798, "learning_rate": 3.026882664972981e-06, "loss": 0.3841, "step": 10221 }, { "epoch": 0.640285629277001, "grad_norm": 0.6106593986566886, "learning_rate": 3.0259506325542293e-06, "loss": 0.4534, "step": 10222 }, { "epoch": 0.6403482672763432, "grad_norm": 0.8502064654406232, "learning_rate": 3.0250186813877168e-06, "loss": 0.4053, "step": 10223 }, { "epoch": 0.6404109052756854, "grad_norm": 0.8478353167976594, "learning_rate": 3.0240868115118007e-06, "loss": 0.4068, "step": 10224 }, { "epoch": 0.6404735432750278, "grad_norm": 0.8054464306230693, "learning_rate": 3.02315502296484e-06, "loss": 0.3839, "step": 10225 }, { "epoch": 0.6405361812743701, "grad_norm": 0.8122833801971849, "learning_rate": 3.0222233157851847e-06, "loss": 0.3682, "step": 10226 }, { "epoch": 0.6405988192737124, "grad_norm": 0.8743379113091581, "learning_rate": 3.0212916900111846e-06, "loss": 0.3979, "step": 10227 }, { "epoch": 0.6406614572730547, "grad_norm": 0.852205265903732, "learning_rate": 3.0203601456811865e-06, "loss": 0.3866, "step": 10228 }, { "epoch": 0.640724095272397, "grad_norm": 0.6411043661920018, "learning_rate": 3.0194286828335324e-06, "loss": 0.4538, "step": 10229 }, { "epoch": 0.6407867332717393, "grad_norm": 0.781394022072226, "learning_rate": 3.018497301506561e-06, "loss": 0.3522, "step": 10230 }, { "epoch": 0.6408493712710815, "grad_norm": 0.8756165132589925, "learning_rate": 3.0175660017386076e-06, "loss": 0.4311, "step": 10231 }, { "epoch": 0.6409120092704239, "grad_norm": 0.856223373565613, "learning_rate": 3.0166347835680073e-06, "loss": 0.3902, "step": 10232 }, { "epoch": 0.6409746472697662, "grad_norm": 0.8845656275464507, "learning_rate": 3.0157036470330864e-06, "loss": 0.4271, "step": 10233 }, { "epoch": 0.6410372852691085, "grad_norm": 0.8616938703306906, "learning_rate": 3.0147725921721705e-06, "loss": 0.3971, "step": 10234 }, { "epoch": 0.6410999232684508, "grad_norm": 0.8096566021778046, "learning_rate": 3.013841619023584e-06, "loss": 0.3778, "step": 10235 }, { "epoch": 0.6411625612677931, "grad_norm": 0.6474775949506898, "learning_rate": 3.0129107276256453e-06, "loss": 0.4489, "step": 10236 }, { "epoch": 0.6412251992671354, "grad_norm": 0.8065594888975145, "learning_rate": 3.0119799180166704e-06, "loss": 0.4193, "step": 10237 }, { "epoch": 0.6412878372664778, "grad_norm": 0.9121186361982992, "learning_rate": 3.0110491902349716e-06, "loss": 0.3722, "step": 10238 }, { "epoch": 0.64135047526582, "grad_norm": 0.8370006868985697, "learning_rate": 3.0101185443188564e-06, "loss": 0.3574, "step": 10239 }, { "epoch": 0.6414131132651623, "grad_norm": 0.8291413182967841, "learning_rate": 3.0091879803066305e-06, "loss": 0.3824, "step": 10240 }, { "epoch": 0.6414757512645046, "grad_norm": 0.8137008719024821, "learning_rate": 3.008257498236598e-06, "loss": 0.36, "step": 10241 }, { "epoch": 0.6415383892638469, "grad_norm": 0.848797315942417, "learning_rate": 3.0073270981470566e-06, "loss": 0.4721, "step": 10242 }, { "epoch": 0.6416010272631892, "grad_norm": 0.9386103922984361, "learning_rate": 3.0063967800762993e-06, "loss": 0.4341, "step": 10243 }, { "epoch": 0.6416636652625315, "grad_norm": 0.8071919842227091, "learning_rate": 3.005466544062623e-06, "loss": 0.4243, "step": 10244 }, { "epoch": 0.6417263032618739, "grad_norm": 0.8393665216047513, "learning_rate": 3.004536390144313e-06, "loss": 0.3961, "step": 10245 }, { "epoch": 0.6417889412612161, "grad_norm": 0.8497316644048435, "learning_rate": 3.0036063183596563e-06, "loss": 0.3877, "step": 10246 }, { "epoch": 0.6418515792605585, "grad_norm": 0.781170171249849, "learning_rate": 3.002676328746934e-06, "loss": 0.3647, "step": 10247 }, { "epoch": 0.6419142172599007, "grad_norm": 0.8494703007875176, "learning_rate": 3.001746421344424e-06, "loss": 0.384, "step": 10248 }, { "epoch": 0.641976855259243, "grad_norm": 0.8927099262988704, "learning_rate": 3.0008165961904035e-06, "loss": 0.4076, "step": 10249 }, { "epoch": 0.6420394932585853, "grad_norm": 0.8574776770335985, "learning_rate": 2.999886853323143e-06, "loss": 0.4291, "step": 10250 }, { "epoch": 0.6421021312579276, "grad_norm": 0.7970548661309891, "learning_rate": 2.9989571927809103e-06, "loss": 0.4134, "step": 10251 }, { "epoch": 0.64216476925727, "grad_norm": 0.7576989250739656, "learning_rate": 2.998027614601969e-06, "loss": 0.3404, "step": 10252 }, { "epoch": 0.6422274072566122, "grad_norm": 0.8543811159948124, "learning_rate": 2.9970981188245834e-06, "loss": 0.3718, "step": 10253 }, { "epoch": 0.6422900452559546, "grad_norm": 0.8522154150798915, "learning_rate": 2.9961687054870127e-06, "loss": 0.3484, "step": 10254 }, { "epoch": 0.6423526832552968, "grad_norm": 0.8780909610561823, "learning_rate": 2.9952393746275096e-06, "loss": 0.3666, "step": 10255 }, { "epoch": 0.6424153212546392, "grad_norm": 0.8252485115581059, "learning_rate": 2.9943101262843256e-06, "loss": 0.3792, "step": 10256 }, { "epoch": 0.6424779592539814, "grad_norm": 0.8496886147729128, "learning_rate": 2.9933809604957086e-06, "loss": 0.4048, "step": 10257 }, { "epoch": 0.6425405972533237, "grad_norm": 0.8393120334782985, "learning_rate": 2.9924518772999047e-06, "loss": 0.4129, "step": 10258 }, { "epoch": 0.642603235252666, "grad_norm": 0.8393416019209339, "learning_rate": 2.991522876735154e-06, "loss": 0.3729, "step": 10259 }, { "epoch": 0.6426658732520083, "grad_norm": 0.8225849753187521, "learning_rate": 2.9905939588396935e-06, "loss": 0.3523, "step": 10260 }, { "epoch": 0.6427285112513507, "grad_norm": 0.774592759946924, "learning_rate": 2.9896651236517586e-06, "loss": 0.3716, "step": 10261 }, { "epoch": 0.6427911492506929, "grad_norm": 0.9099907599973424, "learning_rate": 2.9887363712095807e-06, "loss": 0.4174, "step": 10262 }, { "epoch": 0.6428537872500353, "grad_norm": 0.8464504575992209, "learning_rate": 2.9878077015513874e-06, "loss": 0.4146, "step": 10263 }, { "epoch": 0.6429164252493775, "grad_norm": 0.8723521932922328, "learning_rate": 2.986879114715403e-06, "loss": 0.3962, "step": 10264 }, { "epoch": 0.6429790632487198, "grad_norm": 0.8489252520227913, "learning_rate": 2.9859506107398478e-06, "loss": 0.46, "step": 10265 }, { "epoch": 0.6430417012480621, "grad_norm": 0.9120070548478395, "learning_rate": 2.9850221896629385e-06, "loss": 0.4255, "step": 10266 }, { "epoch": 0.6431043392474044, "grad_norm": 0.9055479845802242, "learning_rate": 2.9840938515228906e-06, "loss": 0.4014, "step": 10267 }, { "epoch": 0.6431669772467468, "grad_norm": 0.8102025509213342, "learning_rate": 2.9831655963579144e-06, "loss": 0.4171, "step": 10268 }, { "epoch": 0.643229615246089, "grad_norm": 0.8557794590223856, "learning_rate": 2.9822374242062146e-06, "loss": 0.3735, "step": 10269 }, { "epoch": 0.6432922532454314, "grad_norm": 0.891417691823911, "learning_rate": 2.981309335105998e-06, "loss": 0.4229, "step": 10270 }, { "epoch": 0.6433548912447736, "grad_norm": 0.8168693356465079, "learning_rate": 2.980381329095463e-06, "loss": 0.3733, "step": 10271 }, { "epoch": 0.643417529244116, "grad_norm": 0.8949759401918692, "learning_rate": 2.9794534062128088e-06, "loss": 0.4081, "step": 10272 }, { "epoch": 0.6434801672434582, "grad_norm": 0.8656555314200102, "learning_rate": 2.9785255664962276e-06, "loss": 0.373, "step": 10273 }, { "epoch": 0.6435428052428005, "grad_norm": 0.8631045384608343, "learning_rate": 2.977597809983908e-06, "loss": 0.3808, "step": 10274 }, { "epoch": 0.6436054432421429, "grad_norm": 0.7972156564728105, "learning_rate": 2.9766701367140394e-06, "loss": 0.3979, "step": 10275 }, { "epoch": 0.6436680812414851, "grad_norm": 0.7969857469671194, "learning_rate": 2.9757425467248024e-06, "loss": 0.3898, "step": 10276 }, { "epoch": 0.6437307192408275, "grad_norm": 0.7993441197206472, "learning_rate": 2.974815040054379e-06, "loss": 0.4062, "step": 10277 }, { "epoch": 0.6437933572401697, "grad_norm": 0.7304738659173694, "learning_rate": 2.973887616740943e-06, "loss": 0.3451, "step": 10278 }, { "epoch": 0.6438559952395121, "grad_norm": 0.9049949035000983, "learning_rate": 2.9729602768226702e-06, "loss": 0.4217, "step": 10279 }, { "epoch": 0.6439186332388543, "grad_norm": 0.8726994574626912, "learning_rate": 2.9720330203377256e-06, "loss": 0.345, "step": 10280 }, { "epoch": 0.6439812712381967, "grad_norm": 1.1129882886445541, "learning_rate": 2.9711058473242814e-06, "loss": 0.4143, "step": 10281 }, { "epoch": 0.644043909237539, "grad_norm": 0.81683238055874, "learning_rate": 2.9701787578204965e-06, "loss": 0.3503, "step": 10282 }, { "epoch": 0.6441065472368812, "grad_norm": 0.7424718657324446, "learning_rate": 2.96925175186453e-06, "loss": 0.3668, "step": 10283 }, { "epoch": 0.6441691852362236, "grad_norm": 0.8393890094020249, "learning_rate": 2.9683248294945387e-06, "loss": 0.3722, "step": 10284 }, { "epoch": 0.6442318232355658, "grad_norm": 0.8023857287996329, "learning_rate": 2.9673979907486753e-06, "loss": 0.3517, "step": 10285 }, { "epoch": 0.6442944612349082, "grad_norm": 0.7749960276712824, "learning_rate": 2.9664712356650867e-06, "loss": 0.3603, "step": 10286 }, { "epoch": 0.6443570992342504, "grad_norm": 0.8248568614110336, "learning_rate": 2.96554456428192e-06, "loss": 0.4021, "step": 10287 }, { "epoch": 0.6444197372335928, "grad_norm": 0.9134601352400841, "learning_rate": 2.9646179766373173e-06, "loss": 0.4385, "step": 10288 }, { "epoch": 0.6444823752329351, "grad_norm": 0.6827649474643953, "learning_rate": 2.9636914727694143e-06, "loss": 0.456, "step": 10289 }, { "epoch": 0.6445450132322773, "grad_norm": 0.8693053195639877, "learning_rate": 2.96276505271635e-06, "loss": 0.402, "step": 10290 }, { "epoch": 0.6446076512316197, "grad_norm": 0.6781345810487694, "learning_rate": 2.961838716516253e-06, "loss": 0.4312, "step": 10291 }, { "epoch": 0.6446702892309619, "grad_norm": 0.962944011488053, "learning_rate": 2.960912464207254e-06, "loss": 0.4521, "step": 10292 }, { "epoch": 0.6447329272303043, "grad_norm": 0.9026021508867224, "learning_rate": 2.9599862958274766e-06, "loss": 0.3917, "step": 10293 }, { "epoch": 0.6447955652296465, "grad_norm": 0.8855419089219417, "learning_rate": 2.9590602114150423e-06, "loss": 0.4005, "step": 10294 }, { "epoch": 0.6448582032289889, "grad_norm": 0.8110215222370898, "learning_rate": 2.958134211008067e-06, "loss": 0.4069, "step": 10295 }, { "epoch": 0.6449208412283312, "grad_norm": 0.8611409868941502, "learning_rate": 2.957208294644667e-06, "loss": 0.4168, "step": 10296 }, { "epoch": 0.6449834792276735, "grad_norm": 0.8315556443618934, "learning_rate": 2.9562824623629537e-06, "loss": 0.4167, "step": 10297 }, { "epoch": 0.6450461172270158, "grad_norm": 0.8429850727787845, "learning_rate": 2.955356714201031e-06, "loss": 0.3842, "step": 10298 }, { "epoch": 0.645108755226358, "grad_norm": 0.8080620988163791, "learning_rate": 2.9544310501970074e-06, "loss": 0.3842, "step": 10299 }, { "epoch": 0.6451713932257004, "grad_norm": 0.8733440553156026, "learning_rate": 2.953505470388981e-06, "loss": 0.4157, "step": 10300 }, { "epoch": 0.6452340312250426, "grad_norm": 0.8806759358769563, "learning_rate": 2.95257997481505e-06, "loss": 0.3879, "step": 10301 }, { "epoch": 0.645296669224385, "grad_norm": 0.8258119587047822, "learning_rate": 2.9516545635133076e-06, "loss": 0.4152, "step": 10302 }, { "epoch": 0.6453593072237273, "grad_norm": 0.7848702807496213, "learning_rate": 2.950729236521843e-06, "loss": 0.4331, "step": 10303 }, { "epoch": 0.6454219452230696, "grad_norm": 0.7956511194903353, "learning_rate": 2.9498039938787426e-06, "loss": 0.3808, "step": 10304 }, { "epoch": 0.6454845832224119, "grad_norm": 0.879989382318073, "learning_rate": 2.9488788356220917e-06, "loss": 0.4216, "step": 10305 }, { "epoch": 0.6455472212217542, "grad_norm": 0.8857677088891731, "learning_rate": 2.9479537617899678e-06, "loss": 0.4363, "step": 10306 }, { "epoch": 0.6456098592210965, "grad_norm": 0.7971509826507288, "learning_rate": 2.9470287724204465e-06, "loss": 0.4065, "step": 10307 }, { "epoch": 0.6456724972204387, "grad_norm": 0.7968974309148572, "learning_rate": 2.946103867551604e-06, "loss": 0.3939, "step": 10308 }, { "epoch": 0.6457351352197811, "grad_norm": 0.6619685586891969, "learning_rate": 2.9451790472215064e-06, "loss": 0.4598, "step": 10309 }, { "epoch": 0.6457977732191233, "grad_norm": 0.8044010442450343, "learning_rate": 2.944254311468222e-06, "loss": 0.4132, "step": 10310 }, { "epoch": 0.6458604112184657, "grad_norm": 0.909642161256398, "learning_rate": 2.943329660329811e-06, "loss": 0.3797, "step": 10311 }, { "epoch": 0.645923049217808, "grad_norm": 0.8221438122054281, "learning_rate": 2.9424050938443327e-06, "loss": 0.3669, "step": 10312 }, { "epoch": 0.6459856872171503, "grad_norm": 0.8488889132447285, "learning_rate": 2.941480612049843e-06, "loss": 0.3947, "step": 10313 }, { "epoch": 0.6460483252164926, "grad_norm": 0.8524910115824746, "learning_rate": 2.9405562149843938e-06, "loss": 0.4013, "step": 10314 }, { "epoch": 0.6461109632158348, "grad_norm": 0.8844814306399492, "learning_rate": 2.9396319026860324e-06, "loss": 0.3921, "step": 10315 }, { "epoch": 0.6461736012151772, "grad_norm": 0.7854301659094172, "learning_rate": 2.938707675192803e-06, "loss": 0.3591, "step": 10316 }, { "epoch": 0.6462362392145194, "grad_norm": 0.8504965802041341, "learning_rate": 2.937783532542749e-06, "loss": 0.3955, "step": 10317 }, { "epoch": 0.6462988772138618, "grad_norm": 0.8086124692608985, "learning_rate": 2.936859474773909e-06, "loss": 0.3891, "step": 10318 }, { "epoch": 0.6463615152132041, "grad_norm": 0.794207150205077, "learning_rate": 2.935935501924315e-06, "loss": 0.373, "step": 10319 }, { "epoch": 0.6464241532125464, "grad_norm": 0.8875678962695084, "learning_rate": 2.9350116140319995e-06, "loss": 0.4041, "step": 10320 }, { "epoch": 0.6464867912118887, "grad_norm": 0.9006211715410856, "learning_rate": 2.934087811134988e-06, "loss": 0.3771, "step": 10321 }, { "epoch": 0.646549429211231, "grad_norm": 0.8415219035225022, "learning_rate": 2.933164093271307e-06, "loss": 0.4228, "step": 10322 }, { "epoch": 0.6466120672105733, "grad_norm": 0.8285426493375389, "learning_rate": 2.9322404604789754e-06, "loss": 0.3666, "step": 10323 }, { "epoch": 0.6466747052099155, "grad_norm": 0.8290085358984696, "learning_rate": 2.93131691279601e-06, "loss": 0.3676, "step": 10324 }, { "epoch": 0.6467373432092579, "grad_norm": 0.8481468063310242, "learning_rate": 2.9303934502604238e-06, "loss": 0.4214, "step": 10325 }, { "epoch": 0.6467999812086002, "grad_norm": 0.8448333199855925, "learning_rate": 2.929470072910226e-06, "loss": 0.3844, "step": 10326 }, { "epoch": 0.6468626192079425, "grad_norm": 0.8320634043379745, "learning_rate": 2.9285467807834266e-06, "loss": 0.4009, "step": 10327 }, { "epoch": 0.6469252572072848, "grad_norm": 0.8274107996728112, "learning_rate": 2.9276235739180257e-06, "loss": 0.3857, "step": 10328 }, { "epoch": 0.6469878952066271, "grad_norm": 0.9178922706531921, "learning_rate": 2.9267004523520237e-06, "loss": 0.4052, "step": 10329 }, { "epoch": 0.6470505332059694, "grad_norm": 0.8405711063514392, "learning_rate": 2.9257774161234146e-06, "loss": 0.4043, "step": 10330 }, { "epoch": 0.6471131712053118, "grad_norm": 0.8040466906975114, "learning_rate": 2.9248544652701934e-06, "loss": 0.3949, "step": 10331 }, { "epoch": 0.647175809204654, "grad_norm": 0.8752371576569937, "learning_rate": 2.9239315998303473e-06, "loss": 0.4544, "step": 10332 }, { "epoch": 0.6472384472039963, "grad_norm": 0.8908212459330261, "learning_rate": 2.9230088198418607e-06, "loss": 0.4076, "step": 10333 }, { "epoch": 0.6473010852033386, "grad_norm": 0.8178052810762054, "learning_rate": 2.922086125342718e-06, "loss": 0.3991, "step": 10334 }, { "epoch": 0.6473637232026809, "grad_norm": 0.9519684117357654, "learning_rate": 2.921163516370894e-06, "loss": 0.4422, "step": 10335 }, { "epoch": 0.6474263612020232, "grad_norm": 0.803030541899218, "learning_rate": 2.920240992964367e-06, "loss": 0.3689, "step": 10336 }, { "epoch": 0.6474889992013655, "grad_norm": 0.8716630519113805, "learning_rate": 2.919318555161107e-06, "loss": 0.3886, "step": 10337 }, { "epoch": 0.6475516372007079, "grad_norm": 0.8907214819312874, "learning_rate": 2.918396202999082e-06, "loss": 0.4277, "step": 10338 }, { "epoch": 0.6476142752000501, "grad_norm": 0.8768206537179998, "learning_rate": 2.917473936516255e-06, "loss": 0.4065, "step": 10339 }, { "epoch": 0.6476769131993924, "grad_norm": 0.867673632825933, "learning_rate": 2.9165517557505853e-06, "loss": 0.3917, "step": 10340 }, { "epoch": 0.6477395511987347, "grad_norm": 0.9264809550224065, "learning_rate": 2.9156296607400346e-06, "loss": 0.3989, "step": 10341 }, { "epoch": 0.647802189198077, "grad_norm": 0.8386319277767555, "learning_rate": 2.914707651522554e-06, "loss": 0.3914, "step": 10342 }, { "epoch": 0.6478648271974193, "grad_norm": 0.9494762896867374, "learning_rate": 2.9137857281360926e-06, "loss": 0.399, "step": 10343 }, { "epoch": 0.6479274651967616, "grad_norm": 0.8515455839039108, "learning_rate": 2.9128638906185964e-06, "loss": 0.4027, "step": 10344 }, { "epoch": 0.647990103196104, "grad_norm": 0.8992388241834988, "learning_rate": 2.9119421390080118e-06, "loss": 0.38, "step": 10345 }, { "epoch": 0.6480527411954462, "grad_norm": 0.8310640961699087, "learning_rate": 2.9110204733422765e-06, "loss": 0.4095, "step": 10346 }, { "epoch": 0.6481153791947886, "grad_norm": 0.8142870392487495, "learning_rate": 2.9100988936593254e-06, "loss": 0.3992, "step": 10347 }, { "epoch": 0.6481780171941308, "grad_norm": 0.8067688999593547, "learning_rate": 2.9091773999970928e-06, "loss": 0.3879, "step": 10348 }, { "epoch": 0.6482406551934731, "grad_norm": 0.8056179737525891, "learning_rate": 2.9082559923935038e-06, "loss": 0.3619, "step": 10349 }, { "epoch": 0.6483032931928154, "grad_norm": 0.8395649785630387, "learning_rate": 2.9073346708864893e-06, "loss": 0.4126, "step": 10350 }, { "epoch": 0.6483659311921577, "grad_norm": 0.8628716660130868, "learning_rate": 2.9064134355139673e-06, "loss": 0.4228, "step": 10351 }, { "epoch": 0.6484285691915, "grad_norm": 0.8260663583274955, "learning_rate": 2.9054922863138568e-06, "loss": 0.393, "step": 10352 }, { "epoch": 0.6484912071908423, "grad_norm": 0.8344115524617474, "learning_rate": 2.9045712233240714e-06, "loss": 0.4147, "step": 10353 }, { "epoch": 0.6485538451901847, "grad_norm": 0.8358296308118945, "learning_rate": 2.9036502465825244e-06, "loss": 0.4077, "step": 10354 }, { "epoch": 0.6486164831895269, "grad_norm": 0.8437963542236553, "learning_rate": 2.902729356127123e-06, "loss": 0.4231, "step": 10355 }, { "epoch": 0.6486791211888693, "grad_norm": 0.8912944601692091, "learning_rate": 2.9018085519957708e-06, "loss": 0.4127, "step": 10356 }, { "epoch": 0.6487417591882115, "grad_norm": 0.8699787871602025, "learning_rate": 2.900887834226366e-06, "loss": 0.4006, "step": 10357 }, { "epoch": 0.6488043971875538, "grad_norm": 0.8935710333830624, "learning_rate": 2.8999672028568086e-06, "loss": 0.3532, "step": 10358 }, { "epoch": 0.6488670351868961, "grad_norm": 0.6267816201418333, "learning_rate": 2.899046657924992e-06, "loss": 0.4515, "step": 10359 }, { "epoch": 0.6489296731862384, "grad_norm": 0.8567879348347045, "learning_rate": 2.898126199468804e-06, "loss": 0.4048, "step": 10360 }, { "epoch": 0.6489923111855808, "grad_norm": 0.859383708238034, "learning_rate": 2.897205827526132e-06, "loss": 0.3708, "step": 10361 }, { "epoch": 0.649054949184923, "grad_norm": 0.8013163292822556, "learning_rate": 2.896285542134857e-06, "loss": 0.3576, "step": 10362 }, { "epoch": 0.6491175871842654, "grad_norm": 0.8417205897410457, "learning_rate": 2.895365343332861e-06, "loss": 0.3396, "step": 10363 }, { "epoch": 0.6491802251836076, "grad_norm": 0.8073575119958157, "learning_rate": 2.8944452311580183e-06, "loss": 0.4036, "step": 10364 }, { "epoch": 0.64924286318295, "grad_norm": 0.8393851353054099, "learning_rate": 2.893525205648201e-06, "loss": 0.3954, "step": 10365 }, { "epoch": 0.6493055011822922, "grad_norm": 0.8291041905643626, "learning_rate": 2.8926052668412747e-06, "loss": 0.4253, "step": 10366 }, { "epoch": 0.6493681391816345, "grad_norm": 0.8698599590773702, "learning_rate": 2.8916854147751093e-06, "loss": 0.4225, "step": 10367 }, { "epoch": 0.6494307771809769, "grad_norm": 0.8228742987130873, "learning_rate": 2.890765649487563e-06, "loss": 0.3943, "step": 10368 }, { "epoch": 0.6494934151803191, "grad_norm": 0.8298622697754173, "learning_rate": 2.8898459710164937e-06, "loss": 0.3916, "step": 10369 }, { "epoch": 0.6495560531796615, "grad_norm": 0.8073854430803125, "learning_rate": 2.888926379399757e-06, "loss": 0.3638, "step": 10370 }, { "epoch": 0.6496186911790037, "grad_norm": 0.7897633186127557, "learning_rate": 2.888006874675202e-06, "loss": 0.3596, "step": 10371 }, { "epoch": 0.6496813291783461, "grad_norm": 0.8706973642402919, "learning_rate": 2.887087456880674e-06, "loss": 0.3939, "step": 10372 }, { "epoch": 0.6497439671776883, "grad_norm": 0.8344013214617296, "learning_rate": 2.8861681260540197e-06, "loss": 0.3496, "step": 10373 }, { "epoch": 0.6498066051770306, "grad_norm": 0.8819693736223284, "learning_rate": 2.885248882233079e-06, "loss": 0.418, "step": 10374 }, { "epoch": 0.649869243176373, "grad_norm": 0.7792810147012744, "learning_rate": 2.8843297254556836e-06, "loss": 0.3597, "step": 10375 }, { "epoch": 0.6499318811757152, "grad_norm": 0.8799978896618733, "learning_rate": 2.883410655759672e-06, "loss": 0.4493, "step": 10376 }, { "epoch": 0.6499945191750576, "grad_norm": 0.8033233341260583, "learning_rate": 2.882491673182871e-06, "loss": 0.3808, "step": 10377 }, { "epoch": 0.6500571571743998, "grad_norm": 0.8405588796915817, "learning_rate": 2.8815727777631054e-06, "loss": 0.3785, "step": 10378 }, { "epoch": 0.6501197951737422, "grad_norm": 0.7803805301132385, "learning_rate": 2.8806539695381975e-06, "loss": 0.3251, "step": 10379 }, { "epoch": 0.6501824331730844, "grad_norm": 0.8642376898156906, "learning_rate": 2.879735248545965e-06, "loss": 0.4144, "step": 10380 }, { "epoch": 0.6502450711724268, "grad_norm": 1.0126000152465713, "learning_rate": 2.878816614824222e-06, "loss": 0.3264, "step": 10381 }, { "epoch": 0.650307709171769, "grad_norm": 0.6358940282060264, "learning_rate": 2.877898068410783e-06, "loss": 0.4509, "step": 10382 }, { "epoch": 0.6503703471711113, "grad_norm": 0.7740573458112713, "learning_rate": 2.8769796093434505e-06, "loss": 0.3658, "step": 10383 }, { "epoch": 0.6504329851704537, "grad_norm": 0.9487106090636596, "learning_rate": 2.8760612376600337e-06, "loss": 0.4128, "step": 10384 }, { "epoch": 0.6504956231697959, "grad_norm": 0.6394488448321326, "learning_rate": 2.8751429533983304e-06, "loss": 0.4488, "step": 10385 }, { "epoch": 0.6505582611691383, "grad_norm": 0.8369254197177118, "learning_rate": 2.874224756596138e-06, "loss": 0.3536, "step": 10386 }, { "epoch": 0.6506208991684805, "grad_norm": 0.897369570145749, "learning_rate": 2.8733066472912486e-06, "loss": 0.4022, "step": 10387 }, { "epoch": 0.6506835371678229, "grad_norm": 0.8640380918730703, "learning_rate": 2.872388625521453e-06, "loss": 0.3967, "step": 10388 }, { "epoch": 0.6507461751671652, "grad_norm": 0.8331931280319146, "learning_rate": 2.871470691324536e-06, "loss": 0.369, "step": 10389 }, { "epoch": 0.6508088131665075, "grad_norm": 0.8364466715540126, "learning_rate": 2.870552844738279e-06, "loss": 0.357, "step": 10390 }, { "epoch": 0.6508714511658498, "grad_norm": 0.8133391605243996, "learning_rate": 2.8696350858004636e-06, "loss": 0.3882, "step": 10391 }, { "epoch": 0.650934089165192, "grad_norm": 0.6465287628990616, "learning_rate": 2.8687174145488617e-06, "loss": 0.4519, "step": 10392 }, { "epoch": 0.6509967271645344, "grad_norm": 0.8403809766936473, "learning_rate": 2.8677998310212485e-06, "loss": 0.4101, "step": 10393 }, { "epoch": 0.6510593651638766, "grad_norm": 0.7949436606825384, "learning_rate": 2.8668823352553898e-06, "loss": 0.4307, "step": 10394 }, { "epoch": 0.651122003163219, "grad_norm": 0.8883898961441539, "learning_rate": 2.8659649272890505e-06, "loss": 0.3878, "step": 10395 }, { "epoch": 0.6511846411625613, "grad_norm": 0.8819889338710134, "learning_rate": 2.8650476071599907e-06, "loss": 0.4286, "step": 10396 }, { "epoch": 0.6512472791619036, "grad_norm": 0.8291253881621348, "learning_rate": 2.864130374905968e-06, "loss": 0.3827, "step": 10397 }, { "epoch": 0.6513099171612459, "grad_norm": 0.8601341467449632, "learning_rate": 2.863213230564735e-06, "loss": 0.3998, "step": 10398 }, { "epoch": 0.6513725551605881, "grad_norm": 0.8063093706024058, "learning_rate": 2.8622961741740403e-06, "loss": 0.3818, "step": 10399 }, { "epoch": 0.6514351931599305, "grad_norm": 0.8476001235240168, "learning_rate": 2.861379205771634e-06, "loss": 0.4063, "step": 10400 }, { "epoch": 0.6514978311592727, "grad_norm": 0.896570138044474, "learning_rate": 2.8604623253952535e-06, "loss": 0.3905, "step": 10401 }, { "epoch": 0.6515604691586151, "grad_norm": 0.8462058990737387, "learning_rate": 2.859545533082644e-06, "loss": 0.3894, "step": 10402 }, { "epoch": 0.6516231071579573, "grad_norm": 0.8743977530242601, "learning_rate": 2.8586288288715368e-06, "loss": 0.3649, "step": 10403 }, { "epoch": 0.6516857451572997, "grad_norm": 0.8215147644248464, "learning_rate": 2.857712212799665e-06, "loss": 0.3713, "step": 10404 }, { "epoch": 0.651748383156642, "grad_norm": 0.8670843449997193, "learning_rate": 2.856795684904756e-06, "loss": 0.4197, "step": 10405 }, { "epoch": 0.6518110211559843, "grad_norm": 0.8431192775188515, "learning_rate": 2.855879245224535e-06, "loss": 0.3799, "step": 10406 }, { "epoch": 0.6518736591553266, "grad_norm": 0.8326989497353712, "learning_rate": 2.8549628937967215e-06, "loss": 0.3801, "step": 10407 }, { "epoch": 0.6519362971546688, "grad_norm": 0.8432863265575351, "learning_rate": 2.854046630659032e-06, "loss": 0.4044, "step": 10408 }, { "epoch": 0.6519989351540112, "grad_norm": 0.9192246783144232, "learning_rate": 2.8531304558491813e-06, "loss": 0.4233, "step": 10409 }, { "epoch": 0.6520615731533534, "grad_norm": 0.836515415001787, "learning_rate": 2.852214369404881e-06, "loss": 0.334, "step": 10410 }, { "epoch": 0.6521242111526958, "grad_norm": 0.9402295327671001, "learning_rate": 2.8512983713638365e-06, "loss": 0.3859, "step": 10411 }, { "epoch": 0.6521868491520381, "grad_norm": 0.8673302445144948, "learning_rate": 2.8503824617637494e-06, "loss": 0.4184, "step": 10412 }, { "epoch": 0.6522494871513804, "grad_norm": 0.8932502576670382, "learning_rate": 2.8494666406423187e-06, "loss": 0.4239, "step": 10413 }, { "epoch": 0.6523121251507227, "grad_norm": 0.8152511115895374, "learning_rate": 2.8485509080372414e-06, "loss": 0.3968, "step": 10414 }, { "epoch": 0.652374763150065, "grad_norm": 0.8068816374269449, "learning_rate": 2.8476352639862074e-06, "loss": 0.3852, "step": 10415 }, { "epoch": 0.6524374011494073, "grad_norm": 0.9048735704041232, "learning_rate": 2.8467197085269037e-06, "loss": 0.419, "step": 10416 }, { "epoch": 0.6525000391487495, "grad_norm": 0.6278350465927872, "learning_rate": 2.8458042416970183e-06, "loss": 0.4472, "step": 10417 }, { "epoch": 0.6525626771480919, "grad_norm": 0.8398318901052955, "learning_rate": 2.844888863534228e-06, "loss": 0.4051, "step": 10418 }, { "epoch": 0.6526253151474342, "grad_norm": 0.9012059278610793, "learning_rate": 2.843973574076214e-06, "loss": 0.4196, "step": 10419 }, { "epoch": 0.6526879531467765, "grad_norm": 0.8498325539587697, "learning_rate": 2.843058373360649e-06, "loss": 0.37, "step": 10420 }, { "epoch": 0.6527505911461188, "grad_norm": 0.8858974356403049, "learning_rate": 2.8421432614252004e-06, "loss": 0.4287, "step": 10421 }, { "epoch": 0.6528132291454611, "grad_norm": 0.82391750660526, "learning_rate": 2.8412282383075362e-06, "loss": 0.395, "step": 10422 }, { "epoch": 0.6528758671448034, "grad_norm": 0.8389989858640459, "learning_rate": 2.8403133040453185e-06, "loss": 0.3806, "step": 10423 }, { "epoch": 0.6529385051441456, "grad_norm": 0.8120858542936563, "learning_rate": 2.839398458676206e-06, "loss": 0.4255, "step": 10424 }, { "epoch": 0.653001143143488, "grad_norm": 0.8300560437668864, "learning_rate": 2.838483702237852e-06, "loss": 0.3713, "step": 10425 }, { "epoch": 0.6530637811428303, "grad_norm": 0.8726164344858178, "learning_rate": 2.8375690347679114e-06, "loss": 0.3903, "step": 10426 }, { "epoch": 0.6531264191421726, "grad_norm": 0.7860442326831735, "learning_rate": 2.8366544563040288e-06, "loss": 0.3769, "step": 10427 }, { "epoch": 0.6531890571415149, "grad_norm": 0.9018672309171418, "learning_rate": 2.835739966883853e-06, "loss": 0.388, "step": 10428 }, { "epoch": 0.6532516951408572, "grad_norm": 0.8599167739367177, "learning_rate": 2.834825566545022e-06, "loss": 0.3662, "step": 10429 }, { "epoch": 0.6533143331401995, "grad_norm": 0.9040832249469196, "learning_rate": 2.8339112553251712e-06, "loss": 0.4325, "step": 10430 }, { "epoch": 0.6533769711395419, "grad_norm": 0.789926999174696, "learning_rate": 2.832997033261937e-06, "loss": 0.3945, "step": 10431 }, { "epoch": 0.6534396091388841, "grad_norm": 0.763476073936599, "learning_rate": 2.8320829003929455e-06, "loss": 0.3583, "step": 10432 }, { "epoch": 0.6535022471382264, "grad_norm": 0.7939676141690398, "learning_rate": 2.831168856755825e-06, "loss": 0.419, "step": 10433 }, { "epoch": 0.6535648851375687, "grad_norm": 0.785675178988405, "learning_rate": 2.8302549023881955e-06, "loss": 0.3541, "step": 10434 }, { "epoch": 0.653627523136911, "grad_norm": 0.7706955621109115, "learning_rate": 2.8293410373276775e-06, "loss": 0.3672, "step": 10435 }, { "epoch": 0.6536901611362533, "grad_norm": 0.8580449312516804, "learning_rate": 2.8284272616118847e-06, "loss": 0.4169, "step": 10436 }, { "epoch": 0.6537527991355956, "grad_norm": 0.8366227083282205, "learning_rate": 2.82751357527843e-06, "loss": 0.3887, "step": 10437 }, { "epoch": 0.653815437134938, "grad_norm": 0.9447327512416109, "learning_rate": 2.82659997836492e-06, "loss": 0.3762, "step": 10438 }, { "epoch": 0.6538780751342802, "grad_norm": 0.9230915395198712, "learning_rate": 2.825686470908958e-06, "loss": 0.4167, "step": 10439 }, { "epoch": 0.6539407131336226, "grad_norm": 0.7953174303597588, "learning_rate": 2.8247730529481447e-06, "loss": 0.3828, "step": 10440 }, { "epoch": 0.6540033511329648, "grad_norm": 0.8492289788678702, "learning_rate": 2.823859724520076e-06, "loss": 0.3492, "step": 10441 }, { "epoch": 0.6540659891323071, "grad_norm": 0.8224368972877308, "learning_rate": 2.8229464856623434e-06, "loss": 0.4022, "step": 10442 }, { "epoch": 0.6541286271316494, "grad_norm": 0.8659729911840525, "learning_rate": 2.82203333641254e-06, "loss": 0.4202, "step": 10443 }, { "epoch": 0.6541912651309917, "grad_norm": 0.8972835251848739, "learning_rate": 2.821120276808248e-06, "loss": 0.4158, "step": 10444 }, { "epoch": 0.654253903130334, "grad_norm": 0.7540810571712666, "learning_rate": 2.8202073068870474e-06, "loss": 0.3406, "step": 10445 }, { "epoch": 0.6543165411296763, "grad_norm": 0.8975113017570621, "learning_rate": 2.8192944266865212e-06, "loss": 0.3906, "step": 10446 }, { "epoch": 0.6543791791290187, "grad_norm": 0.8675089979319727, "learning_rate": 2.8183816362442414e-06, "loss": 0.4214, "step": 10447 }, { "epoch": 0.6544418171283609, "grad_norm": 0.8181750002384726, "learning_rate": 2.817468935597778e-06, "loss": 0.4209, "step": 10448 }, { "epoch": 0.6545044551277032, "grad_norm": 0.8861150242341866, "learning_rate": 2.8165563247846983e-06, "loss": 0.4276, "step": 10449 }, { "epoch": 0.6545670931270455, "grad_norm": 0.8714412160240209, "learning_rate": 2.815643803842566e-06, "loss": 0.3685, "step": 10450 }, { "epoch": 0.6546297311263878, "grad_norm": 0.7414584897244966, "learning_rate": 2.8147313728089374e-06, "loss": 0.3594, "step": 10451 }, { "epoch": 0.6546923691257301, "grad_norm": 0.9315725875473181, "learning_rate": 2.8138190317213737e-06, "loss": 0.4223, "step": 10452 }, { "epoch": 0.6547550071250724, "grad_norm": 0.7882347550047597, "learning_rate": 2.812906780617424e-06, "loss": 0.378, "step": 10453 }, { "epoch": 0.6548176451244148, "grad_norm": 0.898681617417943, "learning_rate": 2.8119946195346375e-06, "loss": 0.4041, "step": 10454 }, { "epoch": 0.654880283123757, "grad_norm": 0.8693102352823863, "learning_rate": 2.8110825485105566e-06, "loss": 0.396, "step": 10455 }, { "epoch": 0.6549429211230994, "grad_norm": 0.7748172730440683, "learning_rate": 2.8101705675827263e-06, "loss": 0.377, "step": 10456 }, { "epoch": 0.6550055591224416, "grad_norm": 0.8336362379233505, "learning_rate": 2.8092586767886817e-06, "loss": 0.4149, "step": 10457 }, { "epoch": 0.6550681971217839, "grad_norm": 0.8618077682341168, "learning_rate": 2.8083468761659567e-06, "loss": 0.3698, "step": 10458 }, { "epoch": 0.6551308351211262, "grad_norm": 0.8211277854607744, "learning_rate": 2.8074351657520805e-06, "loss": 0.3693, "step": 10459 }, { "epoch": 0.6551934731204685, "grad_norm": 0.8146099831940025, "learning_rate": 2.8065235455845784e-06, "loss": 0.3597, "step": 10460 }, { "epoch": 0.6552561111198109, "grad_norm": 0.7824374980987565, "learning_rate": 2.8056120157009765e-06, "loss": 0.3841, "step": 10461 }, { "epoch": 0.6553187491191531, "grad_norm": 0.9037893403367803, "learning_rate": 2.8047005761387914e-06, "loss": 0.4394, "step": 10462 }, { "epoch": 0.6553813871184955, "grad_norm": 0.8873896728965475, "learning_rate": 2.803789226935537e-06, "loss": 0.4176, "step": 10463 }, { "epoch": 0.6554440251178377, "grad_norm": 0.8461357340046244, "learning_rate": 2.8028779681287244e-06, "loss": 0.3885, "step": 10464 }, { "epoch": 0.6555066631171801, "grad_norm": 0.822568675239481, "learning_rate": 2.8019667997558646e-06, "loss": 0.3742, "step": 10465 }, { "epoch": 0.6555693011165223, "grad_norm": 0.8854268255230161, "learning_rate": 2.8010557218544593e-06, "loss": 0.3959, "step": 10466 }, { "epoch": 0.6556319391158646, "grad_norm": 0.8115569344770054, "learning_rate": 2.800144734462008e-06, "loss": 0.3756, "step": 10467 }, { "epoch": 0.655694577115207, "grad_norm": 0.6235798428320724, "learning_rate": 2.7992338376160065e-06, "loss": 0.4683, "step": 10468 }, { "epoch": 0.6557572151145492, "grad_norm": 0.8249861650459941, "learning_rate": 2.7983230313539507e-06, "loss": 0.3856, "step": 10469 }, { "epoch": 0.6558198531138916, "grad_norm": 0.843061284957409, "learning_rate": 2.797412315713327e-06, "loss": 0.4036, "step": 10470 }, { "epoch": 0.6558824911132338, "grad_norm": 0.8697665540943362, "learning_rate": 2.796501690731621e-06, "loss": 0.3816, "step": 10471 }, { "epoch": 0.6559451291125762, "grad_norm": 0.8811405648175704, "learning_rate": 2.7955911564463157e-06, "loss": 0.3788, "step": 10472 }, { "epoch": 0.6560077671119184, "grad_norm": 0.8852399762296699, "learning_rate": 2.7946807128948844e-06, "loss": 0.3996, "step": 10473 }, { "epoch": 0.6560704051112608, "grad_norm": 0.8568141789280022, "learning_rate": 2.793770360114807e-06, "loss": 0.3817, "step": 10474 }, { "epoch": 0.656133043110603, "grad_norm": 0.8301428765710761, "learning_rate": 2.7928600981435503e-06, "loss": 0.3944, "step": 10475 }, { "epoch": 0.6561956811099453, "grad_norm": 0.8528294216043608, "learning_rate": 2.7919499270185825e-06, "loss": 0.3783, "step": 10476 }, { "epoch": 0.6562583191092877, "grad_norm": 0.8093223996443621, "learning_rate": 2.7910398467773623e-06, "loss": 0.4067, "step": 10477 }, { "epoch": 0.6563209571086299, "grad_norm": 0.8760548633035322, "learning_rate": 2.7901298574573554e-06, "loss": 0.4344, "step": 10478 }, { "epoch": 0.6563835951079723, "grad_norm": 0.8001644296913684, "learning_rate": 2.789219959096013e-06, "loss": 0.3699, "step": 10479 }, { "epoch": 0.6564462331073145, "grad_norm": 1.0290098066345696, "learning_rate": 2.7883101517307876e-06, "loss": 0.3872, "step": 10480 }, { "epoch": 0.6565088711066569, "grad_norm": 0.8729315527646886, "learning_rate": 2.7874004353991268e-06, "loss": 0.4006, "step": 10481 }, { "epoch": 0.6565715091059992, "grad_norm": 0.8973349695715365, "learning_rate": 2.7864908101384723e-06, "loss": 0.3986, "step": 10482 }, { "epoch": 0.6566341471053414, "grad_norm": 0.8731882037364518, "learning_rate": 2.7855812759862695e-06, "loss": 0.4167, "step": 10483 }, { "epoch": 0.6566967851046838, "grad_norm": 0.9195960588044376, "learning_rate": 2.784671832979952e-06, "loss": 0.3645, "step": 10484 }, { "epoch": 0.656759423104026, "grad_norm": 0.5794271468969137, "learning_rate": 2.7837624811569536e-06, "loss": 0.439, "step": 10485 }, { "epoch": 0.6568220611033684, "grad_norm": 0.8523861024068181, "learning_rate": 2.7828532205547005e-06, "loss": 0.4312, "step": 10486 }, { "epoch": 0.6568846991027106, "grad_norm": 0.8893370121647297, "learning_rate": 2.781944051210622e-06, "loss": 0.4264, "step": 10487 }, { "epoch": 0.656947337102053, "grad_norm": 0.8877196817427271, "learning_rate": 2.781034973162139e-06, "loss": 0.4119, "step": 10488 }, { "epoch": 0.6570099751013952, "grad_norm": 0.8859792750214247, "learning_rate": 2.780125986446668e-06, "loss": 0.3847, "step": 10489 }, { "epoch": 0.6570726131007376, "grad_norm": 0.8043505653036723, "learning_rate": 2.7792170911016226e-06, "loss": 0.3578, "step": 10490 }, { "epoch": 0.6571352511000799, "grad_norm": 0.8199554973030083, "learning_rate": 2.7783082871644127e-06, "loss": 0.4033, "step": 10491 }, { "epoch": 0.6571978890994221, "grad_norm": 0.6983037552330591, "learning_rate": 2.777399574672448e-06, "loss": 0.4637, "step": 10492 }, { "epoch": 0.6572605270987645, "grad_norm": 0.8010046337123049, "learning_rate": 2.776490953663129e-06, "loss": 0.364, "step": 10493 }, { "epoch": 0.6573231650981067, "grad_norm": 0.906547839826394, "learning_rate": 2.775582424173854e-06, "loss": 0.422, "step": 10494 }, { "epoch": 0.6573858030974491, "grad_norm": 0.6376351667438945, "learning_rate": 2.7746739862420204e-06, "loss": 0.4796, "step": 10495 }, { "epoch": 0.6574484410967913, "grad_norm": 0.8532615923187502, "learning_rate": 2.7737656399050183e-06, "loss": 0.3793, "step": 10496 }, { "epoch": 0.6575110790961337, "grad_norm": 0.875471421914361, "learning_rate": 2.772857385200236e-06, "loss": 0.377, "step": 10497 }, { "epoch": 0.657573717095476, "grad_norm": 0.9256858643508727, "learning_rate": 2.771949222165057e-06, "loss": 0.3769, "step": 10498 }, { "epoch": 0.6576363550948183, "grad_norm": 0.9125184672019782, "learning_rate": 2.7710411508368624e-06, "loss": 0.3773, "step": 10499 }, { "epoch": 0.6576989930941606, "grad_norm": 0.848082196410417, "learning_rate": 2.7701331712530266e-06, "loss": 0.3903, "step": 10500 }, { "epoch": 0.6577616310935028, "grad_norm": 0.7932083697750576, "learning_rate": 2.7692252834509227e-06, "loss": 0.3901, "step": 10501 }, { "epoch": 0.6578242690928452, "grad_norm": 0.6589029834508733, "learning_rate": 2.7683174874679207e-06, "loss": 0.4515, "step": 10502 }, { "epoch": 0.6578869070921874, "grad_norm": 0.8237624160155903, "learning_rate": 2.767409783341384e-06, "loss": 0.4176, "step": 10503 }, { "epoch": 0.6579495450915298, "grad_norm": 0.7896899257036095, "learning_rate": 2.766502171108677e-06, "loss": 0.355, "step": 10504 }, { "epoch": 0.6580121830908721, "grad_norm": 0.8695192033307263, "learning_rate": 2.765594650807155e-06, "loss": 0.388, "step": 10505 }, { "epoch": 0.6580748210902144, "grad_norm": 0.8557599087706516, "learning_rate": 2.7646872224741716e-06, "loss": 0.3996, "step": 10506 }, { "epoch": 0.6581374590895567, "grad_norm": 0.893409557056463, "learning_rate": 2.7637798861470777e-06, "loss": 0.4003, "step": 10507 }, { "epoch": 0.6582000970888989, "grad_norm": 0.9105818470024488, "learning_rate": 2.762872641863218e-06, "loss": 0.4352, "step": 10508 }, { "epoch": 0.6582627350882413, "grad_norm": 0.9250197017180433, "learning_rate": 2.7619654896599357e-06, "loss": 0.4033, "step": 10509 }, { "epoch": 0.6583253730875835, "grad_norm": 0.7174034241127208, "learning_rate": 2.761058429574568e-06, "loss": 0.3553, "step": 10510 }, { "epoch": 0.6583880110869259, "grad_norm": 0.802499997191021, "learning_rate": 2.7601514616444526e-06, "loss": 0.3653, "step": 10511 }, { "epoch": 0.6584506490862682, "grad_norm": 0.8694793004881942, "learning_rate": 2.759244585906916e-06, "loss": 0.3924, "step": 10512 }, { "epoch": 0.6585132870856105, "grad_norm": 0.8265276286817648, "learning_rate": 2.7583378023992914e-06, "loss": 0.3698, "step": 10513 }, { "epoch": 0.6585759250849528, "grad_norm": 0.8161750343480915, "learning_rate": 2.7574311111588982e-06, "loss": 0.3678, "step": 10514 }, { "epoch": 0.6586385630842951, "grad_norm": 0.8102956315409197, "learning_rate": 2.7565245122230566e-06, "loss": 0.3803, "step": 10515 }, { "epoch": 0.6587012010836374, "grad_norm": 0.9506141470957025, "learning_rate": 2.755618005629083e-06, "loss": 0.4194, "step": 10516 }, { "epoch": 0.6587638390829796, "grad_norm": 0.8131104540383892, "learning_rate": 2.754711591414288e-06, "loss": 0.3883, "step": 10517 }, { "epoch": 0.658826477082322, "grad_norm": 0.810085299833827, "learning_rate": 2.753805269615981e-06, "loss": 0.3535, "step": 10518 }, { "epoch": 0.6588891150816643, "grad_norm": 0.8061575767062655, "learning_rate": 2.7528990402714637e-06, "loss": 0.4028, "step": 10519 }, { "epoch": 0.6589517530810066, "grad_norm": 0.8448042912300225, "learning_rate": 2.7519929034180403e-06, "loss": 0.3732, "step": 10520 }, { "epoch": 0.6590143910803489, "grad_norm": 0.8670627012717862, "learning_rate": 2.7510868590930073e-06, "loss": 0.3614, "step": 10521 }, { "epoch": 0.6590770290796912, "grad_norm": 0.8590302245285453, "learning_rate": 2.7501809073336567e-06, "loss": 0.4143, "step": 10522 }, { "epoch": 0.6591396670790335, "grad_norm": 0.8188386568777282, "learning_rate": 2.7492750481772774e-06, "loss": 0.3777, "step": 10523 }, { "epoch": 0.6592023050783758, "grad_norm": 0.8943682546878242, "learning_rate": 2.748369281661155e-06, "loss": 0.393, "step": 10524 }, { "epoch": 0.6592649430777181, "grad_norm": 0.7384619430408738, "learning_rate": 2.747463607822571e-06, "loss": 0.3776, "step": 10525 }, { "epoch": 0.6593275810770604, "grad_norm": 0.8396966693354714, "learning_rate": 2.7465580266988027e-06, "loss": 0.4079, "step": 10526 }, { "epoch": 0.6593902190764027, "grad_norm": 0.7633615293457282, "learning_rate": 2.7456525383271227e-06, "loss": 0.3961, "step": 10527 }, { "epoch": 0.659452857075745, "grad_norm": 0.803665882773251, "learning_rate": 2.744747142744805e-06, "loss": 0.3931, "step": 10528 }, { "epoch": 0.6595154950750873, "grad_norm": 0.9018625869067244, "learning_rate": 2.74384183998911e-06, "loss": 0.4096, "step": 10529 }, { "epoch": 0.6595781330744296, "grad_norm": 0.8294406211138323, "learning_rate": 2.742936630097307e-06, "loss": 0.3866, "step": 10530 }, { "epoch": 0.659640771073772, "grad_norm": 0.8581959918850026, "learning_rate": 2.7420315131066504e-06, "loss": 0.3778, "step": 10531 }, { "epoch": 0.6597034090731142, "grad_norm": 0.8367833979549189, "learning_rate": 2.7411264890543966e-06, "loss": 0.3966, "step": 10532 }, { "epoch": 0.6597660470724565, "grad_norm": 0.8213657630150504, "learning_rate": 2.740221557977795e-06, "loss": 0.3866, "step": 10533 }, { "epoch": 0.6598286850717988, "grad_norm": 0.8337861443385182, "learning_rate": 2.739316719914094e-06, "loss": 0.4179, "step": 10534 }, { "epoch": 0.6598913230711411, "grad_norm": 0.8744381535718171, "learning_rate": 2.7384119749005366e-06, "loss": 0.4217, "step": 10535 }, { "epoch": 0.6599539610704834, "grad_norm": 0.8368926056603122, "learning_rate": 2.73750732297436e-06, "loss": 0.3692, "step": 10536 }, { "epoch": 0.6600165990698257, "grad_norm": 0.9177925624499972, "learning_rate": 2.7366027641728032e-06, "loss": 0.4137, "step": 10537 }, { "epoch": 0.660079237069168, "grad_norm": 0.7933454033427582, "learning_rate": 2.7356982985330953e-06, "loss": 0.3656, "step": 10538 }, { "epoch": 0.6601418750685103, "grad_norm": 0.8107925197255438, "learning_rate": 2.734793926092468e-06, "loss": 0.3628, "step": 10539 }, { "epoch": 0.6602045130678527, "grad_norm": 0.834629669218541, "learning_rate": 2.7338896468881425e-06, "loss": 0.4011, "step": 10540 }, { "epoch": 0.6602671510671949, "grad_norm": 0.8180125868046948, "learning_rate": 2.7329854609573405e-06, "loss": 0.4464, "step": 10541 }, { "epoch": 0.6603297890665372, "grad_norm": 0.8366481658583663, "learning_rate": 2.732081368337277e-06, "loss": 0.3823, "step": 10542 }, { "epoch": 0.6603924270658795, "grad_norm": 0.8059377659570938, "learning_rate": 2.7311773690651654e-06, "loss": 0.3762, "step": 10543 }, { "epoch": 0.6604550650652218, "grad_norm": 0.8470430606423485, "learning_rate": 2.730273463178215e-06, "loss": 0.3783, "step": 10544 }, { "epoch": 0.6605177030645641, "grad_norm": 0.783093410140835, "learning_rate": 2.729369650713627e-06, "loss": 0.3693, "step": 10545 }, { "epoch": 0.6605803410639064, "grad_norm": 0.8520242260210891, "learning_rate": 2.7284659317086083e-06, "loss": 0.4111, "step": 10546 }, { "epoch": 0.6606429790632488, "grad_norm": 0.8264447457527572, "learning_rate": 2.727562306200352e-06, "loss": 0.3744, "step": 10547 }, { "epoch": 0.660705617062591, "grad_norm": 0.9328623063216254, "learning_rate": 2.7266587742260543e-06, "loss": 0.384, "step": 10548 }, { "epoch": 0.6607682550619334, "grad_norm": 0.8886507548437687, "learning_rate": 2.725755335822903e-06, "loss": 0.406, "step": 10549 }, { "epoch": 0.6608308930612756, "grad_norm": 0.8534968708877394, "learning_rate": 2.724851991028085e-06, "loss": 0.4321, "step": 10550 }, { "epoch": 0.6608935310606179, "grad_norm": 0.8404263686467499, "learning_rate": 2.723948739878781e-06, "loss": 0.3565, "step": 10551 }, { "epoch": 0.6609561690599602, "grad_norm": 0.8431324916171505, "learning_rate": 2.723045582412169e-06, "loss": 0.4067, "step": 10552 }, { "epoch": 0.6610188070593025, "grad_norm": 0.7710335206681765, "learning_rate": 2.7221425186654228e-06, "loss": 0.3689, "step": 10553 }, { "epoch": 0.6610814450586449, "grad_norm": 0.8847690098468955, "learning_rate": 2.7212395486757144e-06, "loss": 0.4072, "step": 10554 }, { "epoch": 0.6611440830579871, "grad_norm": 0.8619659069107781, "learning_rate": 2.720336672480209e-06, "loss": 0.3864, "step": 10555 }, { "epoch": 0.6612067210573295, "grad_norm": 0.6766013911322136, "learning_rate": 2.7194338901160673e-06, "loss": 0.4462, "step": 10556 }, { "epoch": 0.6612693590566717, "grad_norm": 0.8175288777127528, "learning_rate": 2.7185312016204524e-06, "loss": 0.4087, "step": 10557 }, { "epoch": 0.661331997056014, "grad_norm": 0.8295655114850395, "learning_rate": 2.7176286070305158e-06, "loss": 0.3938, "step": 10558 }, { "epoch": 0.6613946350553563, "grad_norm": 0.8171957499995495, "learning_rate": 2.7167261063834105e-06, "loss": 0.3776, "step": 10559 }, { "epoch": 0.6614572730546986, "grad_norm": 0.8515607205138644, "learning_rate": 2.715823699716281e-06, "loss": 0.3985, "step": 10560 }, { "epoch": 0.661519911054041, "grad_norm": 0.7758217919607128, "learning_rate": 2.7149213870662738e-06, "loss": 0.3748, "step": 10561 }, { "epoch": 0.6615825490533832, "grad_norm": 0.9619279418412371, "learning_rate": 2.7140191684705233e-06, "loss": 0.432, "step": 10562 }, { "epoch": 0.6616451870527256, "grad_norm": 0.8445117755896003, "learning_rate": 2.7131170439661703e-06, "loss": 0.4059, "step": 10563 }, { "epoch": 0.6617078250520678, "grad_norm": 0.8594997113126612, "learning_rate": 2.7122150135903445e-06, "loss": 0.4076, "step": 10564 }, { "epoch": 0.6617704630514102, "grad_norm": 0.8884557954442183, "learning_rate": 2.711313077380171e-06, "loss": 0.4046, "step": 10565 }, { "epoch": 0.6618331010507524, "grad_norm": 0.8489197568254601, "learning_rate": 2.7104112353727785e-06, "loss": 0.405, "step": 10566 }, { "epoch": 0.6618957390500947, "grad_norm": 0.8468240103970458, "learning_rate": 2.709509487605284e-06, "loss": 0.4011, "step": 10567 }, { "epoch": 0.661958377049437, "grad_norm": 0.6342086440247348, "learning_rate": 2.7086078341148043e-06, "loss": 0.4308, "step": 10568 }, { "epoch": 0.6620210150487793, "grad_norm": 1.0097317932022372, "learning_rate": 2.7077062749384515e-06, "loss": 0.4149, "step": 10569 }, { "epoch": 0.6620836530481217, "grad_norm": 0.8475632724966082, "learning_rate": 2.706804810113334e-06, "loss": 0.4072, "step": 10570 }, { "epoch": 0.6621462910474639, "grad_norm": 0.8701569218210408, "learning_rate": 2.7059034396765537e-06, "loss": 0.3884, "step": 10571 }, { "epoch": 0.6622089290468063, "grad_norm": 0.8222236176108152, "learning_rate": 2.7050021636652163e-06, "loss": 0.3986, "step": 10572 }, { "epoch": 0.6622715670461485, "grad_norm": 0.8667029208814021, "learning_rate": 2.7041009821164154e-06, "loss": 0.4086, "step": 10573 }, { "epoch": 0.6623342050454909, "grad_norm": 0.6017389148564084, "learning_rate": 2.703199895067242e-06, "loss": 0.4598, "step": 10574 }, { "epoch": 0.6623968430448332, "grad_norm": 0.8126763791506332, "learning_rate": 2.7022989025547896e-06, "loss": 0.3667, "step": 10575 }, { "epoch": 0.6624594810441754, "grad_norm": 0.9530524813019465, "learning_rate": 2.7013980046161407e-06, "loss": 0.4037, "step": 10576 }, { "epoch": 0.6625221190435178, "grad_norm": 0.5799334926510245, "learning_rate": 2.7004972012883767e-06, "loss": 0.4675, "step": 10577 }, { "epoch": 0.66258475704286, "grad_norm": 0.8394396479903493, "learning_rate": 2.699596492608575e-06, "loss": 0.3629, "step": 10578 }, { "epoch": 0.6626473950422024, "grad_norm": 0.80381113652779, "learning_rate": 2.6986958786138064e-06, "loss": 0.3937, "step": 10579 }, { "epoch": 0.6627100330415446, "grad_norm": 0.7873035916427787, "learning_rate": 2.6977953593411442e-06, "loss": 0.3772, "step": 10580 }, { "epoch": 0.662772671040887, "grad_norm": 0.8752793353244589, "learning_rate": 2.6968949348276524e-06, "loss": 0.3781, "step": 10581 }, { "epoch": 0.6628353090402292, "grad_norm": 0.6464450114716772, "learning_rate": 2.695994605110393e-06, "loss": 0.4513, "step": 10582 }, { "epoch": 0.6628979470395716, "grad_norm": 0.834569172986903, "learning_rate": 2.6950943702264207e-06, "loss": 0.3521, "step": 10583 }, { "epoch": 0.6629605850389139, "grad_norm": 0.8416570715685402, "learning_rate": 2.694194230212794e-06, "loss": 0.3672, "step": 10584 }, { "epoch": 0.6630232230382561, "grad_norm": 0.8417448566458916, "learning_rate": 2.693294185106562e-06, "loss": 0.354, "step": 10585 }, { "epoch": 0.6630858610375985, "grad_norm": 0.8075919763134194, "learning_rate": 2.6923942349447677e-06, "loss": 0.3651, "step": 10586 }, { "epoch": 0.6631484990369407, "grad_norm": 0.8548873062861493, "learning_rate": 2.691494379764456e-06, "loss": 0.4434, "step": 10587 }, { "epoch": 0.6632111370362831, "grad_norm": 0.8950966787052406, "learning_rate": 2.690594619602662e-06, "loss": 0.3847, "step": 10588 }, { "epoch": 0.6632737750356253, "grad_norm": 0.8330852079889811, "learning_rate": 2.689694954496424e-06, "loss": 0.3932, "step": 10589 }, { "epoch": 0.6633364130349677, "grad_norm": 0.8413903491299921, "learning_rate": 2.688795384482771e-06, "loss": 0.4313, "step": 10590 }, { "epoch": 0.66339905103431, "grad_norm": 0.7999669852664408, "learning_rate": 2.687895909598728e-06, "loss": 0.3815, "step": 10591 }, { "epoch": 0.6634616890336522, "grad_norm": 0.8511676982858343, "learning_rate": 2.6869965298813194e-06, "loss": 0.4407, "step": 10592 }, { "epoch": 0.6635243270329946, "grad_norm": 0.7709921354444276, "learning_rate": 2.6860972453675603e-06, "loss": 0.3959, "step": 10593 }, { "epoch": 0.6635869650323368, "grad_norm": 0.8261454011823598, "learning_rate": 2.6851980560944703e-06, "loss": 0.3781, "step": 10594 }, { "epoch": 0.6636496030316792, "grad_norm": 0.8516701775117562, "learning_rate": 2.6842989620990577e-06, "loss": 0.4121, "step": 10595 }, { "epoch": 0.6637122410310214, "grad_norm": 0.9253871625103057, "learning_rate": 2.68339996341833e-06, "loss": 0.4024, "step": 10596 }, { "epoch": 0.6637748790303638, "grad_norm": 0.8112542013911264, "learning_rate": 2.6825010600892876e-06, "loss": 0.4093, "step": 10597 }, { "epoch": 0.6638375170297061, "grad_norm": 0.801155577692176, "learning_rate": 2.681602252148934e-06, "loss": 0.3685, "step": 10598 }, { "epoch": 0.6639001550290484, "grad_norm": 0.8200075984022431, "learning_rate": 2.680703539634262e-06, "loss": 0.4245, "step": 10599 }, { "epoch": 0.6639627930283907, "grad_norm": 0.9231068314324931, "learning_rate": 2.679804922582262e-06, "loss": 0.3967, "step": 10600 }, { "epoch": 0.6640254310277329, "grad_norm": 0.6062107412042328, "learning_rate": 2.6789064010299224e-06, "loss": 0.4481, "step": 10601 }, { "epoch": 0.6640880690270753, "grad_norm": 0.8402437300848702, "learning_rate": 2.6780079750142253e-06, "loss": 0.4136, "step": 10602 }, { "epoch": 0.6641507070264175, "grad_norm": 0.9418817337825358, "learning_rate": 2.6771096445721513e-06, "loss": 0.4273, "step": 10603 }, { "epoch": 0.6642133450257599, "grad_norm": 0.8556702093036436, "learning_rate": 2.6762114097406766e-06, "loss": 0.4175, "step": 10604 }, { "epoch": 0.6642759830251022, "grad_norm": 0.8388002181862593, "learning_rate": 2.6753132705567684e-06, "loss": 0.3858, "step": 10605 }, { "epoch": 0.6643386210244445, "grad_norm": 0.7772250655925604, "learning_rate": 2.674415227057401e-06, "loss": 0.3717, "step": 10606 }, { "epoch": 0.6644012590237868, "grad_norm": 0.8581132949980658, "learning_rate": 2.673517279279534e-06, "loss": 0.3886, "step": 10607 }, { "epoch": 0.6644638970231291, "grad_norm": 0.8401760053573407, "learning_rate": 2.672619427260127e-06, "loss": 0.4219, "step": 10608 }, { "epoch": 0.6645265350224714, "grad_norm": 0.795440395922846, "learning_rate": 2.6717216710361367e-06, "loss": 0.3762, "step": 10609 }, { "epoch": 0.6645891730218136, "grad_norm": 0.9114303403187887, "learning_rate": 2.6708240106445145e-06, "loss": 0.4309, "step": 10610 }, { "epoch": 0.664651811021156, "grad_norm": 0.9150278786085277, "learning_rate": 2.669926446122206e-06, "loss": 0.4261, "step": 10611 }, { "epoch": 0.6647144490204983, "grad_norm": 0.8825711360061459, "learning_rate": 2.66902897750616e-06, "loss": 0.39, "step": 10612 }, { "epoch": 0.6647770870198406, "grad_norm": 0.7849367288761202, "learning_rate": 2.6681316048333138e-06, "loss": 0.3798, "step": 10613 }, { "epoch": 0.6648397250191829, "grad_norm": 0.7837228912999019, "learning_rate": 2.6672343281406016e-06, "loss": 0.3844, "step": 10614 }, { "epoch": 0.6649023630185252, "grad_norm": 0.8858909605274421, "learning_rate": 2.6663371474649592e-06, "loss": 0.3987, "step": 10615 }, { "epoch": 0.6649650010178675, "grad_norm": 1.0132521876618958, "learning_rate": 2.665440062843313e-06, "loss": 0.4225, "step": 10616 }, { "epoch": 0.6650276390172097, "grad_norm": 0.8437265022293478, "learning_rate": 2.6645430743125876e-06, "loss": 0.3711, "step": 10617 }, { "epoch": 0.6650902770165521, "grad_norm": 0.8608168045761188, "learning_rate": 2.6636461819097027e-06, "loss": 0.4647, "step": 10618 }, { "epoch": 0.6651529150158944, "grad_norm": 0.8359356169005911, "learning_rate": 2.662749385671574e-06, "loss": 0.4302, "step": 10619 }, { "epoch": 0.6652155530152367, "grad_norm": 0.8540187691810339, "learning_rate": 2.661852685635113e-06, "loss": 0.3824, "step": 10620 }, { "epoch": 0.665278191014579, "grad_norm": 0.9036093374109347, "learning_rate": 2.660956081837231e-06, "loss": 0.4013, "step": 10621 }, { "epoch": 0.6653408290139213, "grad_norm": 0.8204087811028371, "learning_rate": 2.660059574314831e-06, "loss": 0.3938, "step": 10622 }, { "epoch": 0.6654034670132636, "grad_norm": 0.8862260146032699, "learning_rate": 2.6591631631048108e-06, "loss": 0.4154, "step": 10623 }, { "epoch": 0.665466105012606, "grad_norm": 0.8308021456024344, "learning_rate": 2.658266848244072e-06, "loss": 0.3871, "step": 10624 }, { "epoch": 0.6655287430119482, "grad_norm": 0.9382020684349474, "learning_rate": 2.6573706297695047e-06, "loss": 0.4072, "step": 10625 }, { "epoch": 0.6655913810112905, "grad_norm": 0.8727588030985728, "learning_rate": 2.656474507717996e-06, "loss": 0.4202, "step": 10626 }, { "epoch": 0.6656540190106328, "grad_norm": 0.9100666500676278, "learning_rate": 2.655578482126432e-06, "loss": 0.4265, "step": 10627 }, { "epoch": 0.6657166570099751, "grad_norm": 0.8015641110420767, "learning_rate": 2.654682553031693e-06, "loss": 0.3535, "step": 10628 }, { "epoch": 0.6657792950093174, "grad_norm": 0.7885341821397119, "learning_rate": 2.653786720470653e-06, "loss": 0.4117, "step": 10629 }, { "epoch": 0.6658419330086597, "grad_norm": 0.813900909932111, "learning_rate": 2.6528909844801893e-06, "loss": 0.4102, "step": 10630 }, { "epoch": 0.665904571008002, "grad_norm": 0.8075637000411765, "learning_rate": 2.6519953450971663e-06, "loss": 0.3781, "step": 10631 }, { "epoch": 0.6659672090073443, "grad_norm": 0.8840144108030318, "learning_rate": 2.6510998023584527e-06, "loss": 0.3958, "step": 10632 }, { "epoch": 0.6660298470066867, "grad_norm": 0.8167060442379384, "learning_rate": 2.6502043563009074e-06, "loss": 0.3854, "step": 10633 }, { "epoch": 0.6660924850060289, "grad_norm": 0.9230818945522841, "learning_rate": 2.649309006961387e-06, "loss": 0.3979, "step": 10634 }, { "epoch": 0.6661551230053712, "grad_norm": 0.957866674791101, "learning_rate": 2.648413754376744e-06, "loss": 0.471, "step": 10635 }, { "epoch": 0.6662177610047135, "grad_norm": 0.794939748809353, "learning_rate": 2.647518598583827e-06, "loss": 0.3882, "step": 10636 }, { "epoch": 0.6662803990040558, "grad_norm": 0.8935799626132236, "learning_rate": 2.6466235396194815e-06, "loss": 0.4043, "step": 10637 }, { "epoch": 0.6663430370033981, "grad_norm": 0.8448842978234916, "learning_rate": 2.6457285775205455e-06, "loss": 0.387, "step": 10638 }, { "epoch": 0.6664056750027404, "grad_norm": 0.8369656130004922, "learning_rate": 2.644833712323861e-06, "loss": 0.3814, "step": 10639 }, { "epoch": 0.6664683130020828, "grad_norm": 0.8021444601910567, "learning_rate": 2.6439389440662554e-06, "loss": 0.3843, "step": 10640 }, { "epoch": 0.666530951001425, "grad_norm": 0.8119608335996713, "learning_rate": 2.643044272784563e-06, "loss": 0.4072, "step": 10641 }, { "epoch": 0.6665935890007673, "grad_norm": 0.7825587706306486, "learning_rate": 2.6421496985156048e-06, "loss": 0.3631, "step": 10642 }, { "epoch": 0.6666562270001096, "grad_norm": 0.8158661365218994, "learning_rate": 2.641255221296203e-06, "loss": 0.3637, "step": 10643 }, { "epoch": 0.6667188649994519, "grad_norm": 0.8466405478533939, "learning_rate": 2.6403608411631744e-06, "loss": 0.4062, "step": 10644 }, { "epoch": 0.6667815029987942, "grad_norm": 0.890123554783813, "learning_rate": 2.6394665581533307e-06, "loss": 0.3749, "step": 10645 }, { "epoch": 0.6668441409981365, "grad_norm": 0.6248831007575749, "learning_rate": 2.6385723723034828e-06, "loss": 0.4402, "step": 10646 }, { "epoch": 0.6669067789974789, "grad_norm": 0.6835137522607952, "learning_rate": 2.637678283650431e-06, "loss": 0.4544, "step": 10647 }, { "epoch": 0.6669694169968211, "grad_norm": 0.9026128698519981, "learning_rate": 2.6367842922309805e-06, "loss": 0.3999, "step": 10648 }, { "epoch": 0.6670320549961635, "grad_norm": 0.7525337443421984, "learning_rate": 2.6358903980819294e-06, "loss": 0.3504, "step": 10649 }, { "epoch": 0.6670946929955057, "grad_norm": 0.7965820258277311, "learning_rate": 2.6349966012400674e-06, "loss": 0.3838, "step": 10650 }, { "epoch": 0.667157330994848, "grad_norm": 0.8882726506493914, "learning_rate": 2.634102901742185e-06, "loss": 0.3923, "step": 10651 }, { "epoch": 0.6672199689941903, "grad_norm": 0.899076796131552, "learning_rate": 2.633209299625066e-06, "loss": 0.4016, "step": 10652 }, { "epoch": 0.6672826069935326, "grad_norm": 0.8654493915775263, "learning_rate": 2.632315794925491e-06, "loss": 0.3556, "step": 10653 }, { "epoch": 0.667345244992875, "grad_norm": 0.8602233268870063, "learning_rate": 2.631422387680238e-06, "loss": 0.3616, "step": 10654 }, { "epoch": 0.6674078829922172, "grad_norm": 0.8535453381079506, "learning_rate": 2.6305290779260793e-06, "loss": 0.4174, "step": 10655 }, { "epoch": 0.6674705209915596, "grad_norm": 0.8682138679104213, "learning_rate": 2.629635865699781e-06, "loss": 0.4137, "step": 10656 }, { "epoch": 0.6675331589909018, "grad_norm": 0.7868585996292452, "learning_rate": 2.6287427510381107e-06, "loss": 0.3822, "step": 10657 }, { "epoch": 0.6675957969902442, "grad_norm": 0.9239727116970551, "learning_rate": 2.627849733977831e-06, "loss": 0.4059, "step": 10658 }, { "epoch": 0.6676584349895864, "grad_norm": 0.8159073722856902, "learning_rate": 2.6269568145556966e-06, "loss": 0.3691, "step": 10659 }, { "epoch": 0.6677210729889287, "grad_norm": 0.8299321012667853, "learning_rate": 2.62606399280846e-06, "loss": 0.3839, "step": 10660 }, { "epoch": 0.667783710988271, "grad_norm": 0.7769095789420725, "learning_rate": 2.62517126877287e-06, "loss": 0.3404, "step": 10661 }, { "epoch": 0.6678463489876133, "grad_norm": 0.8334908653802141, "learning_rate": 2.6242786424856716e-06, "loss": 0.3766, "step": 10662 }, { "epoch": 0.6679089869869557, "grad_norm": 0.8608658187109715, "learning_rate": 2.623386113983605e-06, "loss": 0.3982, "step": 10663 }, { "epoch": 0.6679716249862979, "grad_norm": 0.8331475853086834, "learning_rate": 2.622493683303405e-06, "loss": 0.3816, "step": 10664 }, { "epoch": 0.6680342629856403, "grad_norm": 0.755745772230954, "learning_rate": 2.6216013504818085e-06, "loss": 0.3647, "step": 10665 }, { "epoch": 0.6680969009849825, "grad_norm": 0.7931091988789316, "learning_rate": 2.6207091155555396e-06, "loss": 0.3543, "step": 10666 }, { "epoch": 0.6681595389843248, "grad_norm": 0.8408217688354258, "learning_rate": 2.619816978561327e-06, "loss": 0.3739, "step": 10667 }, { "epoch": 0.6682221769836671, "grad_norm": 0.8403639121717686, "learning_rate": 2.61892493953589e-06, "loss": 0.4043, "step": 10668 }, { "epoch": 0.6682848149830094, "grad_norm": 0.8802322674976492, "learning_rate": 2.618032998515944e-06, "loss": 0.3767, "step": 10669 }, { "epoch": 0.6683474529823518, "grad_norm": 0.8865497194012776, "learning_rate": 2.617141155538202e-06, "loss": 0.4031, "step": 10670 }, { "epoch": 0.668410090981694, "grad_norm": 0.8293657267713671, "learning_rate": 2.616249410639372e-06, "loss": 0.3767, "step": 10671 }, { "epoch": 0.6684727289810364, "grad_norm": 0.8644855345878866, "learning_rate": 2.615357763856159e-06, "loss": 0.4131, "step": 10672 }, { "epoch": 0.6685353669803786, "grad_norm": 0.9048535143508941, "learning_rate": 2.6144662152252613e-06, "loss": 0.4476, "step": 10673 }, { "epoch": 0.668598004979721, "grad_norm": 0.84324778568926, "learning_rate": 2.6135747647833786e-06, "loss": 0.4179, "step": 10674 }, { "epoch": 0.6686606429790632, "grad_norm": 0.8740625779797676, "learning_rate": 2.6126834125671995e-06, "loss": 0.3998, "step": 10675 }, { "epoch": 0.6687232809784055, "grad_norm": 0.8236544387418826, "learning_rate": 2.6117921586134165e-06, "loss": 0.3754, "step": 10676 }, { "epoch": 0.6687859189777479, "grad_norm": 0.7774866959183975, "learning_rate": 2.610901002958711e-06, "loss": 0.3472, "step": 10677 }, { "epoch": 0.6688485569770901, "grad_norm": 0.593816115673158, "learning_rate": 2.6100099456397644e-06, "loss": 0.4568, "step": 10678 }, { "epoch": 0.6689111949764325, "grad_norm": 0.8098778865193274, "learning_rate": 2.609118986693252e-06, "loss": 0.3971, "step": 10679 }, { "epoch": 0.6689738329757747, "grad_norm": 0.8969598311286378, "learning_rate": 2.608228126155845e-06, "loss": 0.3811, "step": 10680 }, { "epoch": 0.6690364709751171, "grad_norm": 0.8697771725772051, "learning_rate": 2.607337364064213e-06, "loss": 0.388, "step": 10681 }, { "epoch": 0.6690991089744593, "grad_norm": 0.583792182793882, "learning_rate": 2.6064467004550178e-06, "loss": 0.4567, "step": 10682 }, { "epoch": 0.6691617469738017, "grad_norm": 0.8518745815325568, "learning_rate": 2.6055561353649216e-06, "loss": 0.4115, "step": 10683 }, { "epoch": 0.669224384973144, "grad_norm": 0.8914765416263372, "learning_rate": 2.6046656688305795e-06, "loss": 0.4056, "step": 10684 }, { "epoch": 0.6692870229724862, "grad_norm": 0.8311113244619405, "learning_rate": 2.6037753008886415e-06, "loss": 0.3789, "step": 10685 }, { "epoch": 0.6693496609718286, "grad_norm": 0.8751924455313296, "learning_rate": 2.602885031575758e-06, "loss": 0.3941, "step": 10686 }, { "epoch": 0.6694122989711708, "grad_norm": 0.859956094224656, "learning_rate": 2.6019948609285724e-06, "loss": 0.3991, "step": 10687 }, { "epoch": 0.6694749369705132, "grad_norm": 0.8778073192113568, "learning_rate": 2.601104788983723e-06, "loss": 0.3997, "step": 10688 }, { "epoch": 0.6695375749698554, "grad_norm": 0.8531956432928838, "learning_rate": 2.6002148157778458e-06, "loss": 0.4262, "step": 10689 }, { "epoch": 0.6696002129691978, "grad_norm": 0.8254857281477964, "learning_rate": 2.5993249413475702e-06, "loss": 0.3946, "step": 10690 }, { "epoch": 0.6696628509685401, "grad_norm": 0.7745020143648099, "learning_rate": 2.5984351657295274e-06, "loss": 0.358, "step": 10691 }, { "epoch": 0.6697254889678824, "grad_norm": 0.8388475867440472, "learning_rate": 2.5975454889603392e-06, "loss": 0.3709, "step": 10692 }, { "epoch": 0.6697881269672247, "grad_norm": 0.8624695669324024, "learning_rate": 2.596655911076624e-06, "loss": 0.4255, "step": 10693 }, { "epoch": 0.6698507649665669, "grad_norm": 0.9245339500477394, "learning_rate": 2.5957664321149966e-06, "loss": 0.4135, "step": 10694 }, { "epoch": 0.6699134029659093, "grad_norm": 0.8138449234905999, "learning_rate": 2.594877052112071e-06, "loss": 0.3966, "step": 10695 }, { "epoch": 0.6699760409652515, "grad_norm": 0.8675268239885912, "learning_rate": 2.5939877711044513e-06, "loss": 0.3956, "step": 10696 }, { "epoch": 0.6700386789645939, "grad_norm": 0.8564459536150926, "learning_rate": 2.593098589128743e-06, "loss": 0.3927, "step": 10697 }, { "epoch": 0.6701013169639362, "grad_norm": 0.8247550633650648, "learning_rate": 2.5922095062215426e-06, "loss": 0.3498, "step": 10698 }, { "epoch": 0.6701639549632785, "grad_norm": 0.8811524930022312, "learning_rate": 2.5913205224194437e-06, "loss": 0.3929, "step": 10699 }, { "epoch": 0.6702265929626208, "grad_norm": 0.7734706816935365, "learning_rate": 2.5904316377590416e-06, "loss": 0.3351, "step": 10700 }, { "epoch": 0.670289230961963, "grad_norm": 0.8385084337880891, "learning_rate": 2.5895428522769204e-06, "loss": 0.3752, "step": 10701 }, { "epoch": 0.6703518689613054, "grad_norm": 0.8178245750000868, "learning_rate": 2.5886541660096624e-06, "loss": 0.3911, "step": 10702 }, { "epoch": 0.6704145069606476, "grad_norm": 0.8764153628977895, "learning_rate": 2.587765578993845e-06, "loss": 0.4101, "step": 10703 }, { "epoch": 0.67047714495999, "grad_norm": 0.8604546646200988, "learning_rate": 2.5868770912660456e-06, "loss": 0.3629, "step": 10704 }, { "epoch": 0.6705397829593323, "grad_norm": 0.8075245222800462, "learning_rate": 2.5859887028628337e-06, "loss": 0.3869, "step": 10705 }, { "epoch": 0.6706024209586746, "grad_norm": 0.9514977720281678, "learning_rate": 2.5851004138207747e-06, "loss": 0.3915, "step": 10706 }, { "epoch": 0.6706650589580169, "grad_norm": 0.8040394884027999, "learning_rate": 2.584212224176429e-06, "loss": 0.3602, "step": 10707 }, { "epoch": 0.6707276969573592, "grad_norm": 0.8652738387190789, "learning_rate": 2.583324133966358e-06, "loss": 0.3549, "step": 10708 }, { "epoch": 0.6707903349567015, "grad_norm": 0.6270474573896456, "learning_rate": 2.582436143227114e-06, "loss": 0.4411, "step": 10709 }, { "epoch": 0.6708529729560437, "grad_norm": 0.8446094654331299, "learning_rate": 2.581548251995247e-06, "loss": 0.4173, "step": 10710 }, { "epoch": 0.6709156109553861, "grad_norm": 0.8493896520558735, "learning_rate": 2.580660460307304e-06, "loss": 0.3487, "step": 10711 }, { "epoch": 0.6709782489547284, "grad_norm": 0.889849484861892, "learning_rate": 2.579772768199823e-06, "loss": 0.4253, "step": 10712 }, { "epoch": 0.6710408869540707, "grad_norm": 0.8064636139504762, "learning_rate": 2.5788851757093457e-06, "loss": 0.3883, "step": 10713 }, { "epoch": 0.671103524953413, "grad_norm": 0.8795587906476979, "learning_rate": 2.577997682872404e-06, "loss": 0.4085, "step": 10714 }, { "epoch": 0.6711661629527553, "grad_norm": 0.8645616763583072, "learning_rate": 2.577110289725527e-06, "loss": 0.3885, "step": 10715 }, { "epoch": 0.6712288009520976, "grad_norm": 0.8592897144151562, "learning_rate": 2.5762229963052393e-06, "loss": 0.3529, "step": 10716 }, { "epoch": 0.67129143895144, "grad_norm": 0.8162050781732761, "learning_rate": 2.575335802648064e-06, "loss": 0.3757, "step": 10717 }, { "epoch": 0.6713540769507822, "grad_norm": 0.7421994649708512, "learning_rate": 2.574448708790518e-06, "loss": 0.3678, "step": 10718 }, { "epoch": 0.6714167149501244, "grad_norm": 0.876534348910957, "learning_rate": 2.5735617147691123e-06, "loss": 0.4388, "step": 10719 }, { "epoch": 0.6714793529494668, "grad_norm": 0.886610446090802, "learning_rate": 2.572674820620357e-06, "loss": 0.4109, "step": 10720 }, { "epoch": 0.6715419909488091, "grad_norm": 0.8290731192768549, "learning_rate": 2.571788026380757e-06, "loss": 0.3895, "step": 10721 }, { "epoch": 0.6716046289481514, "grad_norm": 0.8364320159018006, "learning_rate": 2.5709013320868103e-06, "loss": 0.3946, "step": 10722 }, { "epoch": 0.6716672669474937, "grad_norm": 0.8432750164878831, "learning_rate": 2.5700147377750173e-06, "loss": 0.3907, "step": 10723 }, { "epoch": 0.671729904946836, "grad_norm": 0.8400358105219738, "learning_rate": 2.569128243481869e-06, "loss": 0.3776, "step": 10724 }, { "epoch": 0.6717925429461783, "grad_norm": 0.5428813801186494, "learning_rate": 2.5682418492438514e-06, "loss": 0.4256, "step": 10725 }, { "epoch": 0.6718551809455205, "grad_norm": 0.8030071865289986, "learning_rate": 2.567355555097453e-06, "loss": 0.3786, "step": 10726 }, { "epoch": 0.6719178189448629, "grad_norm": 0.7707166441989749, "learning_rate": 2.5664693610791515e-06, "loss": 0.3922, "step": 10727 }, { "epoch": 0.6719804569442052, "grad_norm": 0.892654029568887, "learning_rate": 2.565583267225422e-06, "loss": 0.4242, "step": 10728 }, { "epoch": 0.6720430949435475, "grad_norm": 0.8028247303219977, "learning_rate": 2.5646972735727378e-06, "loss": 0.3869, "step": 10729 }, { "epoch": 0.6721057329428898, "grad_norm": 0.8872107015294696, "learning_rate": 2.563811380157565e-06, "loss": 0.4042, "step": 10730 }, { "epoch": 0.6721683709422321, "grad_norm": 0.8971235263024111, "learning_rate": 2.562925587016367e-06, "loss": 0.4159, "step": 10731 }, { "epoch": 0.6722310089415744, "grad_norm": 0.7924067646747152, "learning_rate": 2.5620398941856055e-06, "loss": 0.3732, "step": 10732 }, { "epoch": 0.6722936469409168, "grad_norm": 0.841714177396116, "learning_rate": 2.5611543017017328e-06, "loss": 0.4158, "step": 10733 }, { "epoch": 0.672356284940259, "grad_norm": 0.7815945760751002, "learning_rate": 2.5602688096012033e-06, "loss": 0.3725, "step": 10734 }, { "epoch": 0.6724189229396013, "grad_norm": 0.8706718635735867, "learning_rate": 2.559383417920463e-06, "loss": 0.4056, "step": 10735 }, { "epoch": 0.6724815609389436, "grad_norm": 0.8205270255949842, "learning_rate": 2.558498126695954e-06, "loss": 0.3887, "step": 10736 }, { "epoch": 0.6725441989382859, "grad_norm": 0.8169579533569875, "learning_rate": 2.557612935964115e-06, "loss": 0.4028, "step": 10737 }, { "epoch": 0.6726068369376282, "grad_norm": 0.8685144825128706, "learning_rate": 2.556727845761381e-06, "loss": 0.4066, "step": 10738 }, { "epoch": 0.6726694749369705, "grad_norm": 0.877074533196129, "learning_rate": 2.555842856124182e-06, "loss": 0.404, "step": 10739 }, { "epoch": 0.6727321129363129, "grad_norm": 0.7958350558644522, "learning_rate": 2.5549579670889433e-06, "loss": 0.3936, "step": 10740 }, { "epoch": 0.6727947509356551, "grad_norm": 0.79808732601827, "learning_rate": 2.554073178692091e-06, "loss": 0.3735, "step": 10741 }, { "epoch": 0.6728573889349975, "grad_norm": 0.8400980980127353, "learning_rate": 2.5531884909700377e-06, "loss": 0.378, "step": 10742 }, { "epoch": 0.6729200269343397, "grad_norm": 0.7848487052065268, "learning_rate": 2.5523039039592025e-06, "loss": 0.3919, "step": 10743 }, { "epoch": 0.672982664933682, "grad_norm": 0.8574123954108184, "learning_rate": 2.551419417695993e-06, "loss": 0.4141, "step": 10744 }, { "epoch": 0.6730453029330243, "grad_norm": 0.8577679628855537, "learning_rate": 2.550535032216815e-06, "loss": 0.404, "step": 10745 }, { "epoch": 0.6731079409323666, "grad_norm": 0.7880892789891811, "learning_rate": 2.549650747558069e-06, "loss": 0.3913, "step": 10746 }, { "epoch": 0.673170578931709, "grad_norm": 0.8893753533833559, "learning_rate": 2.548766563756154e-06, "loss": 0.3875, "step": 10747 }, { "epoch": 0.6732332169310512, "grad_norm": 0.8780644920673786, "learning_rate": 2.5478824808474613e-06, "loss": 0.4392, "step": 10748 }, { "epoch": 0.6732958549303936, "grad_norm": 0.6127433578572419, "learning_rate": 2.5469984988683792e-06, "loss": 0.4428, "step": 10749 }, { "epoch": 0.6733584929297358, "grad_norm": 0.8303817458605341, "learning_rate": 2.5461146178552966e-06, "loss": 0.3885, "step": 10750 }, { "epoch": 0.6734211309290781, "grad_norm": 0.8206208944194407, "learning_rate": 2.545230837844589e-06, "loss": 0.3794, "step": 10751 }, { "epoch": 0.6734837689284204, "grad_norm": 0.7963244199442371, "learning_rate": 2.544347158872638e-06, "loss": 0.3825, "step": 10752 }, { "epoch": 0.6735464069277627, "grad_norm": 0.9492123392774926, "learning_rate": 2.543463580975814e-06, "loss": 0.3914, "step": 10753 }, { "epoch": 0.673609044927105, "grad_norm": 0.8574946142819554, "learning_rate": 2.5425801041904853e-06, "loss": 0.3828, "step": 10754 }, { "epoch": 0.6736716829264473, "grad_norm": 0.8428136066042107, "learning_rate": 2.541696728553015e-06, "loss": 0.3672, "step": 10755 }, { "epoch": 0.6737343209257897, "grad_norm": 0.8159117503969284, "learning_rate": 2.5408134540997633e-06, "loss": 0.3607, "step": 10756 }, { "epoch": 0.6737969589251319, "grad_norm": 0.9176953020683433, "learning_rate": 2.5399302808670866e-06, "loss": 0.4117, "step": 10757 }, { "epoch": 0.6738595969244743, "grad_norm": 0.8849556536773068, "learning_rate": 2.5390472088913344e-06, "loss": 0.4176, "step": 10758 }, { "epoch": 0.6739222349238165, "grad_norm": 0.8775490175786548, "learning_rate": 2.5381642382088556e-06, "loss": 0.3786, "step": 10759 }, { "epoch": 0.6739848729231588, "grad_norm": 0.7914848050719703, "learning_rate": 2.5372813688559963e-06, "loss": 0.375, "step": 10760 }, { "epoch": 0.6740475109225011, "grad_norm": 0.7769295840167323, "learning_rate": 2.536398600869092e-06, "loss": 0.3478, "step": 10761 }, { "epoch": 0.6741101489218434, "grad_norm": 0.8047067503875575, "learning_rate": 2.535515934284479e-06, "loss": 0.3795, "step": 10762 }, { "epoch": 0.6741727869211858, "grad_norm": 0.9588042738231642, "learning_rate": 2.5346333691384883e-06, "loss": 0.3872, "step": 10763 }, { "epoch": 0.674235424920528, "grad_norm": 0.8673552843192767, "learning_rate": 2.5337509054674448e-06, "loss": 0.4286, "step": 10764 }, { "epoch": 0.6742980629198704, "grad_norm": 0.8430265844531897, "learning_rate": 2.5328685433076723e-06, "loss": 0.3664, "step": 10765 }, { "epoch": 0.6743607009192126, "grad_norm": 0.7334282199021954, "learning_rate": 2.531986282695488e-06, "loss": 0.3401, "step": 10766 }, { "epoch": 0.674423338918555, "grad_norm": 0.9005148770257352, "learning_rate": 2.5311041236672045e-06, "loss": 0.3857, "step": 10767 }, { "epoch": 0.6744859769178972, "grad_norm": 0.8439616951596272, "learning_rate": 2.5302220662591338e-06, "loss": 0.3533, "step": 10768 }, { "epoch": 0.6745486149172395, "grad_norm": 0.8572661615093986, "learning_rate": 2.5293401105075833e-06, "loss": 0.3622, "step": 10769 }, { "epoch": 0.6746112529165819, "grad_norm": 0.8248808082167839, "learning_rate": 2.5284582564488523e-06, "loss": 0.3604, "step": 10770 }, { "epoch": 0.6746738909159241, "grad_norm": 0.5981757001779028, "learning_rate": 2.5275765041192377e-06, "loss": 0.4651, "step": 10771 }, { "epoch": 0.6747365289152665, "grad_norm": 0.8091282121284623, "learning_rate": 2.526694853555034e-06, "loss": 0.3522, "step": 10772 }, { "epoch": 0.6747991669146087, "grad_norm": 0.870267650601535, "learning_rate": 2.525813304792528e-06, "loss": 0.3944, "step": 10773 }, { "epoch": 0.6748618049139511, "grad_norm": 0.9130349549404958, "learning_rate": 2.524931857868007e-06, "loss": 0.3825, "step": 10774 }, { "epoch": 0.6749244429132933, "grad_norm": 0.8435989803405092, "learning_rate": 2.5240505128177474e-06, "loss": 0.4068, "step": 10775 }, { "epoch": 0.6749870809126356, "grad_norm": 0.8048443388948254, "learning_rate": 2.52316926967803e-06, "loss": 0.3393, "step": 10776 }, { "epoch": 0.675049718911978, "grad_norm": 0.7797309877631976, "learning_rate": 2.5222881284851234e-06, "loss": 0.3713, "step": 10777 }, { "epoch": 0.6751123569113202, "grad_norm": 0.8629000467717709, "learning_rate": 2.5214070892752996e-06, "loss": 0.3869, "step": 10778 }, { "epoch": 0.6751749949106626, "grad_norm": 0.8424331060292389, "learning_rate": 2.52052615208482e-06, "loss": 0.3672, "step": 10779 }, { "epoch": 0.6752376329100048, "grad_norm": 0.8390574938875655, "learning_rate": 2.519645316949944e-06, "loss": 0.3986, "step": 10780 }, { "epoch": 0.6753002709093472, "grad_norm": 0.8627121781396145, "learning_rate": 2.5187645839069274e-06, "loss": 0.4424, "step": 10781 }, { "epoch": 0.6753629089086894, "grad_norm": 0.6610000969192578, "learning_rate": 2.5178839529920207e-06, "loss": 0.4538, "step": 10782 }, { "epoch": 0.6754255469080318, "grad_norm": 0.8147243881532583, "learning_rate": 2.517003424241471e-06, "loss": 0.3987, "step": 10783 }, { "epoch": 0.6754881849073741, "grad_norm": 0.8152209209768609, "learning_rate": 2.5161229976915196e-06, "loss": 0.3735, "step": 10784 }, { "epoch": 0.6755508229067163, "grad_norm": 0.8078338142214772, "learning_rate": 2.5152426733784083e-06, "loss": 0.3767, "step": 10785 }, { "epoch": 0.6756134609060587, "grad_norm": 0.8663405914080429, "learning_rate": 2.5143624513383683e-06, "loss": 0.3908, "step": 10786 }, { "epoch": 0.6756760989054009, "grad_norm": 0.8301393516687318, "learning_rate": 2.5134823316076328e-06, "loss": 0.3772, "step": 10787 }, { "epoch": 0.6757387369047433, "grad_norm": 0.9383968413695656, "learning_rate": 2.5126023142224266e-06, "loss": 0.4172, "step": 10788 }, { "epoch": 0.6758013749040855, "grad_norm": 0.8856415051079414, "learning_rate": 2.5117223992189704e-06, "loss": 0.4266, "step": 10789 }, { "epoch": 0.6758640129034279, "grad_norm": 0.8806111794919843, "learning_rate": 2.510842586633482e-06, "loss": 0.4343, "step": 10790 }, { "epoch": 0.6759266509027702, "grad_norm": 0.9202710732792349, "learning_rate": 2.509962876502175e-06, "loss": 0.4064, "step": 10791 }, { "epoch": 0.6759892889021125, "grad_norm": 0.96947254629041, "learning_rate": 2.509083268861256e-06, "loss": 0.4069, "step": 10792 }, { "epoch": 0.6760519269014548, "grad_norm": 0.8396737872297189, "learning_rate": 2.5082037637469337e-06, "loss": 0.3651, "step": 10793 }, { "epoch": 0.676114564900797, "grad_norm": 0.8575889772477862, "learning_rate": 2.507324361195408e-06, "loss": 0.3818, "step": 10794 }, { "epoch": 0.6761772029001394, "grad_norm": 0.671050166426404, "learning_rate": 2.5064450612428716e-06, "loss": 0.4582, "step": 10795 }, { "epoch": 0.6762398408994816, "grad_norm": 0.6400454360017604, "learning_rate": 2.505565863925521e-06, "loss": 0.4665, "step": 10796 }, { "epoch": 0.676302478898824, "grad_norm": 0.8566058682532637, "learning_rate": 2.504686769279542e-06, "loss": 0.3877, "step": 10797 }, { "epoch": 0.6763651168981663, "grad_norm": 0.8889441340956707, "learning_rate": 2.5038077773411194e-06, "loss": 0.3894, "step": 10798 }, { "epoch": 0.6764277548975086, "grad_norm": 0.8138552263739688, "learning_rate": 2.502928888146432e-06, "loss": 0.3718, "step": 10799 }, { "epoch": 0.6764903928968509, "grad_norm": 0.8752402370097337, "learning_rate": 2.5020501017316555e-06, "loss": 0.401, "step": 10800 }, { "epoch": 0.6765530308961932, "grad_norm": 0.8183395673152927, "learning_rate": 2.501171418132958e-06, "loss": 0.36, "step": 10801 }, { "epoch": 0.6766156688955355, "grad_norm": 0.9086791897950403, "learning_rate": 2.500292837386511e-06, "loss": 0.3982, "step": 10802 }, { "epoch": 0.6766783068948777, "grad_norm": 0.8248196691656104, "learning_rate": 2.4994143595284743e-06, "loss": 0.3946, "step": 10803 }, { "epoch": 0.6767409448942201, "grad_norm": 0.8630610874549276, "learning_rate": 2.4985359845950046e-06, "loss": 0.4007, "step": 10804 }, { "epoch": 0.6768035828935623, "grad_norm": 0.8318710535256063, "learning_rate": 2.4976577126222606e-06, "loss": 0.3582, "step": 10805 }, { "epoch": 0.6768662208929047, "grad_norm": 0.8790474312651967, "learning_rate": 2.4967795436463895e-06, "loss": 0.342, "step": 10806 }, { "epoch": 0.676928858892247, "grad_norm": 0.9068569737661667, "learning_rate": 2.495901477703537e-06, "loss": 0.4143, "step": 10807 }, { "epoch": 0.6769914968915893, "grad_norm": 0.8125131248094245, "learning_rate": 2.495023514829844e-06, "loss": 0.3735, "step": 10808 }, { "epoch": 0.6770541348909316, "grad_norm": 0.8320985783334076, "learning_rate": 2.4941456550614485e-06, "loss": 0.3824, "step": 10809 }, { "epoch": 0.6771167728902738, "grad_norm": 0.8551305623488387, "learning_rate": 2.493267898434481e-06, "loss": 0.4052, "step": 10810 }, { "epoch": 0.6771794108896162, "grad_norm": 0.8725580248859187, "learning_rate": 2.492390244985074e-06, "loss": 0.4051, "step": 10811 }, { "epoch": 0.6772420488889584, "grad_norm": 0.8144793648981948, "learning_rate": 2.4915126947493496e-06, "loss": 0.3684, "step": 10812 }, { "epoch": 0.6773046868883008, "grad_norm": 0.7706573038020891, "learning_rate": 2.4906352477634287e-06, "loss": 0.3679, "step": 10813 }, { "epoch": 0.6773673248876431, "grad_norm": 0.8349434621904186, "learning_rate": 2.4897579040634246e-06, "loss": 0.3716, "step": 10814 }, { "epoch": 0.6774299628869854, "grad_norm": 0.8606512870200662, "learning_rate": 2.4888806636854536e-06, "loss": 0.3854, "step": 10815 }, { "epoch": 0.6774926008863277, "grad_norm": 0.7314160286778257, "learning_rate": 2.48800352666562e-06, "loss": 0.3462, "step": 10816 }, { "epoch": 0.67755523888567, "grad_norm": 0.7913200801156506, "learning_rate": 2.4871264930400275e-06, "loss": 0.36, "step": 10817 }, { "epoch": 0.6776178768850123, "grad_norm": 0.8882209560996701, "learning_rate": 2.486249562844773e-06, "loss": 0.4134, "step": 10818 }, { "epoch": 0.6776805148843545, "grad_norm": 0.8950935884998076, "learning_rate": 2.485372736115955e-06, "loss": 0.4362, "step": 10819 }, { "epoch": 0.6777431528836969, "grad_norm": 0.8719037498723706, "learning_rate": 2.4844960128896625e-06, "loss": 0.4018, "step": 10820 }, { "epoch": 0.6778057908830392, "grad_norm": 0.8903671988188038, "learning_rate": 2.48361939320198e-06, "loss": 0.4469, "step": 10821 }, { "epoch": 0.6778684288823815, "grad_norm": 0.7844147512964819, "learning_rate": 2.482742877088991e-06, "loss": 0.3627, "step": 10822 }, { "epoch": 0.6779310668817238, "grad_norm": 0.796158502845056, "learning_rate": 2.48186646458677e-06, "loss": 0.3602, "step": 10823 }, { "epoch": 0.6779937048810661, "grad_norm": 0.9258049921028516, "learning_rate": 2.480990155731395e-06, "loss": 0.4395, "step": 10824 }, { "epoch": 0.6780563428804084, "grad_norm": 0.8696161930803189, "learning_rate": 2.4801139505589327e-06, "loss": 0.3938, "step": 10825 }, { "epoch": 0.6781189808797508, "grad_norm": 0.8211082544609482, "learning_rate": 2.479237849105448e-06, "loss": 0.4098, "step": 10826 }, { "epoch": 0.678181618879093, "grad_norm": 0.8158446180695702, "learning_rate": 2.4783618514069994e-06, "loss": 0.403, "step": 10827 }, { "epoch": 0.6782442568784353, "grad_norm": 0.7608475771450403, "learning_rate": 2.4774859574996467e-06, "loss": 0.3655, "step": 10828 }, { "epoch": 0.6783068948777776, "grad_norm": 0.8947193760506698, "learning_rate": 2.4766101674194404e-06, "loss": 0.3666, "step": 10829 }, { "epoch": 0.6783695328771199, "grad_norm": 0.9871457967135265, "learning_rate": 2.475734481202428e-06, "loss": 0.4224, "step": 10830 }, { "epoch": 0.6784321708764622, "grad_norm": 0.9148316616069587, "learning_rate": 2.4748588988846526e-06, "loss": 0.4104, "step": 10831 }, { "epoch": 0.6784948088758045, "grad_norm": 0.7798319508892482, "learning_rate": 2.4739834205021524e-06, "loss": 0.3445, "step": 10832 }, { "epoch": 0.6785574468751469, "grad_norm": 0.8659819269190102, "learning_rate": 2.4731080460909644e-06, "loss": 0.3513, "step": 10833 }, { "epoch": 0.6786200848744891, "grad_norm": 0.8941020051077083, "learning_rate": 2.472232775687119e-06, "loss": 0.4044, "step": 10834 }, { "epoch": 0.6786827228738314, "grad_norm": 0.8591172334657698, "learning_rate": 2.4713576093266413e-06, "loss": 0.3901, "step": 10835 }, { "epoch": 0.6787453608731737, "grad_norm": 0.9017943400310083, "learning_rate": 2.470482547045552e-06, "loss": 0.3909, "step": 10836 }, { "epoch": 0.678807998872516, "grad_norm": 0.8630941692820906, "learning_rate": 2.4696075888798724e-06, "loss": 0.3931, "step": 10837 }, { "epoch": 0.6788706368718583, "grad_norm": 0.8422429667702894, "learning_rate": 2.4687327348656144e-06, "loss": 0.4135, "step": 10838 }, { "epoch": 0.6789332748712006, "grad_norm": 0.8462642784374209, "learning_rate": 2.467857985038786e-06, "loss": 0.3834, "step": 10839 }, { "epoch": 0.678995912870543, "grad_norm": 0.8190071754943492, "learning_rate": 2.4669833394353936e-06, "loss": 0.3628, "step": 10840 }, { "epoch": 0.6790585508698852, "grad_norm": 0.5774231239284378, "learning_rate": 2.4661087980914354e-06, "loss": 0.4617, "step": 10841 }, { "epoch": 0.6791211888692276, "grad_norm": 0.9779019887041442, "learning_rate": 2.465234361042911e-06, "loss": 0.4272, "step": 10842 }, { "epoch": 0.6791838268685698, "grad_norm": 0.8843630414697256, "learning_rate": 2.4643600283258107e-06, "loss": 0.4059, "step": 10843 }, { "epoch": 0.6792464648679121, "grad_norm": 0.9400910775252487, "learning_rate": 2.463485799976121e-06, "loss": 0.4232, "step": 10844 }, { "epoch": 0.6793091028672544, "grad_norm": 0.8244832459035792, "learning_rate": 2.4626116760298274e-06, "loss": 0.3984, "step": 10845 }, { "epoch": 0.6793717408665967, "grad_norm": 0.844896274080171, "learning_rate": 2.461737656522909e-06, "loss": 0.3834, "step": 10846 }, { "epoch": 0.679434378865939, "grad_norm": 0.794479970444011, "learning_rate": 2.4608637414913393e-06, "loss": 0.3443, "step": 10847 }, { "epoch": 0.6794970168652813, "grad_norm": 0.8283705363779441, "learning_rate": 2.4599899309710897e-06, "loss": 0.3426, "step": 10848 }, { "epoch": 0.6795596548646237, "grad_norm": 0.6400450598513979, "learning_rate": 2.4591162249981253e-06, "loss": 0.4392, "step": 10849 }, { "epoch": 0.6796222928639659, "grad_norm": 0.8554999911341441, "learning_rate": 2.4582426236084074e-06, "loss": 0.3818, "step": 10850 }, { "epoch": 0.6796849308633083, "grad_norm": 0.8504907948021365, "learning_rate": 2.457369126837897e-06, "loss": 0.3767, "step": 10851 }, { "epoch": 0.6797475688626505, "grad_norm": 0.6117339965921168, "learning_rate": 2.456495734722544e-06, "loss": 0.4376, "step": 10852 }, { "epoch": 0.6798102068619928, "grad_norm": 0.8204758889353534, "learning_rate": 2.455622447298298e-06, "loss": 0.3995, "step": 10853 }, { "epoch": 0.6798728448613351, "grad_norm": 0.9170243760988218, "learning_rate": 2.454749264601106e-06, "loss": 0.357, "step": 10854 }, { "epoch": 0.6799354828606774, "grad_norm": 0.9444422886923038, "learning_rate": 2.453876186666907e-06, "loss": 0.4238, "step": 10855 }, { "epoch": 0.6799981208600198, "grad_norm": 0.8520737912150894, "learning_rate": 2.453003213531636e-06, "loss": 0.3777, "step": 10856 }, { "epoch": 0.680060758859362, "grad_norm": 0.8762693730262922, "learning_rate": 2.4521303452312257e-06, "loss": 0.3775, "step": 10857 }, { "epoch": 0.6801233968587044, "grad_norm": 0.8363229217664236, "learning_rate": 2.451257581801603e-06, "loss": 0.3772, "step": 10858 }, { "epoch": 0.6801860348580466, "grad_norm": 0.8039009748714578, "learning_rate": 2.450384923278692e-06, "loss": 0.3766, "step": 10859 }, { "epoch": 0.6802486728573889, "grad_norm": 0.8401419288430545, "learning_rate": 2.4495123696984086e-06, "loss": 0.3933, "step": 10860 }, { "epoch": 0.6803113108567312, "grad_norm": 0.8166950716137398, "learning_rate": 2.4486399210966715e-06, "loss": 0.3843, "step": 10861 }, { "epoch": 0.6803739488560735, "grad_norm": 0.8597883653797992, "learning_rate": 2.4477675775093867e-06, "loss": 0.3241, "step": 10862 }, { "epoch": 0.6804365868554159, "grad_norm": 0.8681471218556395, "learning_rate": 2.446895338972464e-06, "loss": 0.3839, "step": 10863 }, { "epoch": 0.6804992248547581, "grad_norm": 0.8999951136156364, "learning_rate": 2.446023205521803e-06, "loss": 0.4037, "step": 10864 }, { "epoch": 0.6805618628541005, "grad_norm": 0.9325571580101071, "learning_rate": 2.445151177193301e-06, "loss": 0.4165, "step": 10865 }, { "epoch": 0.6806245008534427, "grad_norm": 0.9313463148239653, "learning_rate": 2.4442792540228512e-06, "loss": 0.4011, "step": 10866 }, { "epoch": 0.6806871388527851, "grad_norm": 0.6767057115057568, "learning_rate": 2.4434074360463412e-06, "loss": 0.4308, "step": 10867 }, { "epoch": 0.6807497768521273, "grad_norm": 0.8505091648460905, "learning_rate": 2.4425357232996557e-06, "loss": 0.412, "step": 10868 }, { "epoch": 0.6808124148514696, "grad_norm": 0.8658982624755879, "learning_rate": 2.441664115818672e-06, "loss": 0.4431, "step": 10869 }, { "epoch": 0.680875052850812, "grad_norm": 0.8766193338567385, "learning_rate": 2.440792613639269e-06, "loss": 0.3886, "step": 10870 }, { "epoch": 0.6809376908501542, "grad_norm": 0.8580090178387442, "learning_rate": 2.4399212167973185e-06, "loss": 0.3779, "step": 10871 }, { "epoch": 0.6810003288494966, "grad_norm": 0.816444161871824, "learning_rate": 2.4390499253286865e-06, "loss": 0.3798, "step": 10872 }, { "epoch": 0.6810629668488388, "grad_norm": 0.8367286941629094, "learning_rate": 2.438178739269234e-06, "loss": 0.3916, "step": 10873 }, { "epoch": 0.6811256048481812, "grad_norm": 0.8125316815359366, "learning_rate": 2.4373076586548204e-06, "loss": 0.4, "step": 10874 }, { "epoch": 0.6811882428475234, "grad_norm": 0.8667271008051443, "learning_rate": 2.4364366835213e-06, "loss": 0.4148, "step": 10875 }, { "epoch": 0.6812508808468658, "grad_norm": 0.8355740616027033, "learning_rate": 2.4355658139045206e-06, "loss": 0.3988, "step": 10876 }, { "epoch": 0.681313518846208, "grad_norm": 0.7958512095325985, "learning_rate": 2.4346950498403265e-06, "loss": 0.3424, "step": 10877 }, { "epoch": 0.6813761568455503, "grad_norm": 0.8313151440050057, "learning_rate": 2.4338243913645625e-06, "loss": 0.3698, "step": 10878 }, { "epoch": 0.6814387948448927, "grad_norm": 0.7684982647577663, "learning_rate": 2.432953838513061e-06, "loss": 0.3593, "step": 10879 }, { "epoch": 0.6815014328442349, "grad_norm": 0.6002941270374162, "learning_rate": 2.4320833913216573e-06, "loss": 0.4559, "step": 10880 }, { "epoch": 0.6815640708435773, "grad_norm": 0.7974719038087044, "learning_rate": 2.431213049826179e-06, "loss": 0.3677, "step": 10881 }, { "epoch": 0.6816267088429195, "grad_norm": 0.9365596903228881, "learning_rate": 2.430342814062447e-06, "loss": 0.3788, "step": 10882 }, { "epoch": 0.6816893468422619, "grad_norm": 0.8273257522461945, "learning_rate": 2.4294726840662822e-06, "loss": 0.3672, "step": 10883 }, { "epoch": 0.6817519848416042, "grad_norm": 0.8371836031723286, "learning_rate": 2.4286026598734993e-06, "loss": 0.3362, "step": 10884 }, { "epoch": 0.6818146228409464, "grad_norm": 0.8621486838958403, "learning_rate": 2.4277327415199077e-06, "loss": 0.4048, "step": 10885 }, { "epoch": 0.6818772608402888, "grad_norm": 0.7815086966656302, "learning_rate": 2.426862929041312e-06, "loss": 0.3651, "step": 10886 }, { "epoch": 0.681939898839631, "grad_norm": 0.7600070047822651, "learning_rate": 2.4259932224735176e-06, "loss": 0.3426, "step": 10887 }, { "epoch": 0.6820025368389734, "grad_norm": 0.8306613494673519, "learning_rate": 2.4251236218523188e-06, "loss": 0.4069, "step": 10888 }, { "epoch": 0.6820651748383156, "grad_norm": 0.9173665061290873, "learning_rate": 2.4242541272135107e-06, "loss": 0.4075, "step": 10889 }, { "epoch": 0.682127812837658, "grad_norm": 0.8868279840430403, "learning_rate": 2.423384738592881e-06, "loss": 0.4137, "step": 10890 }, { "epoch": 0.6821904508370003, "grad_norm": 0.8557437152259709, "learning_rate": 2.422515456026214e-06, "loss": 0.3806, "step": 10891 }, { "epoch": 0.6822530888363426, "grad_norm": 0.8085531915848756, "learning_rate": 2.421646279549289e-06, "loss": 0.3447, "step": 10892 }, { "epoch": 0.6823157268356849, "grad_norm": 0.6341929085572854, "learning_rate": 2.4207772091978817e-06, "loss": 0.4308, "step": 10893 }, { "epoch": 0.6823783648350271, "grad_norm": 0.8913641723112788, "learning_rate": 2.419908245007763e-06, "loss": 0.4185, "step": 10894 }, { "epoch": 0.6824410028343695, "grad_norm": 0.8625116138686819, "learning_rate": 2.4190393870146984e-06, "loss": 0.4129, "step": 10895 }, { "epoch": 0.6825036408337117, "grad_norm": 0.8250874293604501, "learning_rate": 2.418170635254451e-06, "loss": 0.3525, "step": 10896 }, { "epoch": 0.6825662788330541, "grad_norm": 0.8049518206516156, "learning_rate": 2.417301989762782e-06, "loss": 0.3797, "step": 10897 }, { "epoch": 0.6826289168323963, "grad_norm": 0.8632026887241918, "learning_rate": 2.4164334505754423e-06, "loss": 0.4043, "step": 10898 }, { "epoch": 0.6826915548317387, "grad_norm": 0.774107419518758, "learning_rate": 2.415565017728181e-06, "loss": 0.3365, "step": 10899 }, { "epoch": 0.682754192831081, "grad_norm": 0.8467056744801119, "learning_rate": 2.4146966912567436e-06, "loss": 0.3726, "step": 10900 }, { "epoch": 0.6828168308304233, "grad_norm": 0.7879059693643423, "learning_rate": 2.41382847119687e-06, "loss": 0.333, "step": 10901 }, { "epoch": 0.6828794688297656, "grad_norm": 0.8682313165177207, "learning_rate": 2.412960357584297e-06, "loss": 0.4118, "step": 10902 }, { "epoch": 0.6829421068291078, "grad_norm": 0.9095396544027348, "learning_rate": 2.4120923504547535e-06, "loss": 0.3919, "step": 10903 }, { "epoch": 0.6830047448284502, "grad_norm": 0.9143676354276622, "learning_rate": 2.4112244498439713e-06, "loss": 0.3917, "step": 10904 }, { "epoch": 0.6830673828277924, "grad_norm": 0.8689597703100855, "learning_rate": 2.410356655787671e-06, "loss": 0.4182, "step": 10905 }, { "epoch": 0.6831300208271348, "grad_norm": 0.8317110626421638, "learning_rate": 2.40948896832157e-06, "loss": 0.3846, "step": 10906 }, { "epoch": 0.6831926588264771, "grad_norm": 0.8326171040777027, "learning_rate": 2.4086213874813852e-06, "loss": 0.3718, "step": 10907 }, { "epoch": 0.6832552968258194, "grad_norm": 0.7911248554026503, "learning_rate": 2.4077539133028253e-06, "loss": 0.3634, "step": 10908 }, { "epoch": 0.6833179348251617, "grad_norm": 0.8586768667837953, "learning_rate": 2.4068865458215957e-06, "loss": 0.3853, "step": 10909 }, { "epoch": 0.683380572824504, "grad_norm": 0.8381952700508787, "learning_rate": 2.406019285073397e-06, "loss": 0.3816, "step": 10910 }, { "epoch": 0.6834432108238463, "grad_norm": 0.8591760273454863, "learning_rate": 2.4051521310939258e-06, "loss": 0.422, "step": 10911 }, { "epoch": 0.6835058488231885, "grad_norm": 0.914113362371902, "learning_rate": 2.404285083918873e-06, "loss": 0.3728, "step": 10912 }, { "epoch": 0.6835684868225309, "grad_norm": 0.8430349673151784, "learning_rate": 2.4034181435839293e-06, "loss": 0.3967, "step": 10913 }, { "epoch": 0.6836311248218732, "grad_norm": 0.8445450479863573, "learning_rate": 2.402551310124777e-06, "loss": 0.3842, "step": 10914 }, { "epoch": 0.6836937628212155, "grad_norm": 0.8606460894259514, "learning_rate": 2.4016845835770936e-06, "loss": 0.4054, "step": 10915 }, { "epoch": 0.6837564008205578, "grad_norm": 0.7938523104545677, "learning_rate": 2.400817963976556e-06, "loss": 0.377, "step": 10916 }, { "epoch": 0.6838190388199001, "grad_norm": 0.7120114299543655, "learning_rate": 2.399951451358834e-06, "loss": 0.4556, "step": 10917 }, { "epoch": 0.6838816768192424, "grad_norm": 0.896683127261559, "learning_rate": 2.3990850457595928e-06, "loss": 0.4213, "step": 10918 }, { "epoch": 0.6839443148185846, "grad_norm": 0.8510580954177077, "learning_rate": 2.3982187472144934e-06, "loss": 0.3519, "step": 10919 }, { "epoch": 0.684006952817927, "grad_norm": 0.8283578806545085, "learning_rate": 2.3973525557591936e-06, "loss": 0.3601, "step": 10920 }, { "epoch": 0.6840695908172693, "grad_norm": 0.8622372594835915, "learning_rate": 2.396486471429344e-06, "loss": 0.4037, "step": 10921 }, { "epoch": 0.6841322288166116, "grad_norm": 0.7887223009671219, "learning_rate": 2.395620494260596e-06, "loss": 0.3989, "step": 10922 }, { "epoch": 0.6841948668159539, "grad_norm": 0.8325368044031262, "learning_rate": 2.3947546242885922e-06, "loss": 0.3825, "step": 10923 }, { "epoch": 0.6842575048152962, "grad_norm": 0.8393098259654831, "learning_rate": 2.39388886154897e-06, "loss": 0.3671, "step": 10924 }, { "epoch": 0.6843201428146385, "grad_norm": 0.8772255459606986, "learning_rate": 2.3930232060773666e-06, "loss": 0.3894, "step": 10925 }, { "epoch": 0.6843827808139809, "grad_norm": 0.8558076556975458, "learning_rate": 2.392157657909413e-06, "loss": 0.3696, "step": 10926 }, { "epoch": 0.6844454188133231, "grad_norm": 0.8634261790727319, "learning_rate": 2.391292217080734e-06, "loss": 0.4069, "step": 10927 }, { "epoch": 0.6845080568126654, "grad_norm": 0.9354416949339597, "learning_rate": 2.3904268836269516e-06, "loss": 0.451, "step": 10928 }, { "epoch": 0.6845706948120077, "grad_norm": 0.808528148819946, "learning_rate": 2.389561657583681e-06, "loss": 0.3801, "step": 10929 }, { "epoch": 0.68463333281135, "grad_norm": 0.8335656507563161, "learning_rate": 2.388696538986539e-06, "loss": 0.3481, "step": 10930 }, { "epoch": 0.6846959708106923, "grad_norm": 0.8079714972906235, "learning_rate": 2.3878315278711322e-06, "loss": 0.3683, "step": 10931 }, { "epoch": 0.6847586088100346, "grad_norm": 0.8412707046101806, "learning_rate": 2.386966624273064e-06, "loss": 0.3971, "step": 10932 }, { "epoch": 0.684821246809377, "grad_norm": 0.8279513615258339, "learning_rate": 2.3861018282279334e-06, "loss": 0.399, "step": 10933 }, { "epoch": 0.6848838848087192, "grad_norm": 0.8298896284222986, "learning_rate": 2.3852371397713385e-06, "loss": 0.3649, "step": 10934 }, { "epoch": 0.6849465228080616, "grad_norm": 0.7910795588771463, "learning_rate": 2.3843725589388684e-06, "loss": 0.3896, "step": 10935 }, { "epoch": 0.6850091608074038, "grad_norm": 0.8612012385707444, "learning_rate": 2.3835080857661092e-06, "loss": 0.3914, "step": 10936 }, { "epoch": 0.6850717988067461, "grad_norm": 0.8026309954774878, "learning_rate": 2.382643720288642e-06, "loss": 0.372, "step": 10937 }, { "epoch": 0.6851344368060884, "grad_norm": 0.6101630988754337, "learning_rate": 2.3817794625420444e-06, "loss": 0.4564, "step": 10938 }, { "epoch": 0.6851970748054307, "grad_norm": 0.9180737557682271, "learning_rate": 2.380915312561891e-06, "loss": 0.4198, "step": 10939 }, { "epoch": 0.685259712804773, "grad_norm": 0.8774285364439603, "learning_rate": 2.3800512703837503e-06, "loss": 0.387, "step": 10940 }, { "epoch": 0.6853223508041153, "grad_norm": 0.7724177104163577, "learning_rate": 2.379187336043185e-06, "loss": 0.3548, "step": 10941 }, { "epoch": 0.6853849888034577, "grad_norm": 0.8375384802208372, "learning_rate": 2.3783235095757535e-06, "loss": 0.3764, "step": 10942 }, { "epoch": 0.6854476268027999, "grad_norm": 0.8504020523499036, "learning_rate": 2.377459791017015e-06, "loss": 0.3993, "step": 10943 }, { "epoch": 0.6855102648021422, "grad_norm": 0.7857221592774704, "learning_rate": 2.3765961804025185e-06, "loss": 0.3838, "step": 10944 }, { "epoch": 0.6855729028014845, "grad_norm": 0.8660723339848301, "learning_rate": 2.3757326777678098e-06, "loss": 0.3541, "step": 10945 }, { "epoch": 0.6856355408008268, "grad_norm": 0.8647533781869036, "learning_rate": 2.374869283148431e-06, "loss": 0.4109, "step": 10946 }, { "epoch": 0.6856981788001691, "grad_norm": 0.7807281295705972, "learning_rate": 2.3740059965799183e-06, "loss": 0.3137, "step": 10947 }, { "epoch": 0.6857608167995114, "grad_norm": 0.811227086410381, "learning_rate": 2.3731428180978076e-06, "loss": 0.3673, "step": 10948 }, { "epoch": 0.6858234547988538, "grad_norm": 0.8899824950503454, "learning_rate": 2.372279747737627e-06, "loss": 0.4076, "step": 10949 }, { "epoch": 0.685886092798196, "grad_norm": 0.8417453556460308, "learning_rate": 2.3714167855348984e-06, "loss": 0.3584, "step": 10950 }, { "epoch": 0.6859487307975384, "grad_norm": 0.8717860425678141, "learning_rate": 2.370553931525144e-06, "loss": 0.3894, "step": 10951 }, { "epoch": 0.6860113687968806, "grad_norm": 0.8478711619332481, "learning_rate": 2.3696911857438754e-06, "loss": 0.4047, "step": 10952 }, { "epoch": 0.6860740067962229, "grad_norm": 0.8226928544133286, "learning_rate": 2.368828548226608e-06, "loss": 0.3761, "step": 10953 }, { "epoch": 0.6861366447955652, "grad_norm": 0.928060669235318, "learning_rate": 2.3679660190088457e-06, "loss": 0.4022, "step": 10954 }, { "epoch": 0.6861992827949075, "grad_norm": 0.8418024213284435, "learning_rate": 2.367103598126088e-06, "loss": 0.3941, "step": 10955 }, { "epoch": 0.6862619207942499, "grad_norm": 0.8216380770439792, "learning_rate": 2.3662412856138376e-06, "loss": 0.3883, "step": 10956 }, { "epoch": 0.6863245587935921, "grad_norm": 0.8174896497644691, "learning_rate": 2.3653790815075846e-06, "loss": 0.4285, "step": 10957 }, { "epoch": 0.6863871967929345, "grad_norm": 0.8420694108565292, "learning_rate": 2.3645169858428173e-06, "loss": 0.401, "step": 10958 }, { "epoch": 0.6864498347922767, "grad_norm": 0.8666402907726944, "learning_rate": 2.3636549986550207e-06, "loss": 0.4389, "step": 10959 }, { "epoch": 0.6865124727916191, "grad_norm": 0.849199725006827, "learning_rate": 2.3627931199796734e-06, "loss": 0.4061, "step": 10960 }, { "epoch": 0.6865751107909613, "grad_norm": 0.8334490094286281, "learning_rate": 2.3619313498522484e-06, "loss": 0.3714, "step": 10961 }, { "epoch": 0.6866377487903036, "grad_norm": 0.7915763227286001, "learning_rate": 2.361069688308221e-06, "loss": 0.3812, "step": 10962 }, { "epoch": 0.686700386789646, "grad_norm": 0.8501176237201326, "learning_rate": 2.3602081353830552e-06, "loss": 0.374, "step": 10963 }, { "epoch": 0.6867630247889882, "grad_norm": 0.8603522638984965, "learning_rate": 2.3593466911122103e-06, "loss": 0.3915, "step": 10964 }, { "epoch": 0.6868256627883306, "grad_norm": 0.8129731779454826, "learning_rate": 2.3584853555311476e-06, "loss": 0.3665, "step": 10965 }, { "epoch": 0.6868883007876728, "grad_norm": 0.8320161629832601, "learning_rate": 2.357624128675318e-06, "loss": 0.3971, "step": 10966 }, { "epoch": 0.6869509387870152, "grad_norm": 0.8851242171575676, "learning_rate": 2.3567630105801696e-06, "loss": 0.4332, "step": 10967 }, { "epoch": 0.6870135767863574, "grad_norm": 0.8505100259692021, "learning_rate": 2.355902001281147e-06, "loss": 0.3993, "step": 10968 }, { "epoch": 0.6870762147856997, "grad_norm": 0.8055570838165919, "learning_rate": 2.355041100813688e-06, "loss": 0.3959, "step": 10969 }, { "epoch": 0.687138852785042, "grad_norm": 0.8024751614450225, "learning_rate": 2.3541803092132264e-06, "loss": 0.3298, "step": 10970 }, { "epoch": 0.6872014907843843, "grad_norm": 0.8342499946131037, "learning_rate": 2.353319626515197e-06, "loss": 0.3897, "step": 10971 }, { "epoch": 0.6872641287837267, "grad_norm": 0.7958401047160547, "learning_rate": 2.3524590527550223e-06, "loss": 0.3554, "step": 10972 }, { "epoch": 0.6873267667830689, "grad_norm": 0.8038969430516738, "learning_rate": 2.3515985879681224e-06, "loss": 0.3568, "step": 10973 }, { "epoch": 0.6873894047824113, "grad_norm": 0.8956107656186719, "learning_rate": 2.3507382321899187e-06, "loss": 0.4017, "step": 10974 }, { "epoch": 0.6874520427817535, "grad_norm": 0.774010598997273, "learning_rate": 2.349877985455821e-06, "loss": 0.3351, "step": 10975 }, { "epoch": 0.6875146807810959, "grad_norm": 0.8190531513263575, "learning_rate": 2.3490178478012363e-06, "loss": 0.4154, "step": 10976 }, { "epoch": 0.6875773187804382, "grad_norm": 0.8509588734177967, "learning_rate": 2.34815781926157e-06, "loss": 0.3836, "step": 10977 }, { "epoch": 0.6876399567797804, "grad_norm": 0.8476745445909953, "learning_rate": 2.34729789987222e-06, "loss": 0.4086, "step": 10978 }, { "epoch": 0.6877025947791228, "grad_norm": 0.8458482013757248, "learning_rate": 2.346438089668579e-06, "loss": 0.3837, "step": 10979 }, { "epoch": 0.687765232778465, "grad_norm": 0.8265940964054539, "learning_rate": 2.34557838868604e-06, "loss": 0.3597, "step": 10980 }, { "epoch": 0.6878278707778074, "grad_norm": 0.8816775204226751, "learning_rate": 2.3447187969599862e-06, "loss": 0.4082, "step": 10981 }, { "epoch": 0.6878905087771496, "grad_norm": 0.8223507024075196, "learning_rate": 2.3438593145258005e-06, "loss": 0.3631, "step": 10982 }, { "epoch": 0.687953146776492, "grad_norm": 0.8192186027573496, "learning_rate": 2.3429999414188594e-06, "loss": 0.3311, "step": 10983 }, { "epoch": 0.6880157847758342, "grad_norm": 0.8150717055075186, "learning_rate": 2.342140677674533e-06, "loss": 0.3803, "step": 10984 }, { "epoch": 0.6880784227751766, "grad_norm": 0.9725387241528289, "learning_rate": 2.34128152332819e-06, "loss": 0.4313, "step": 10985 }, { "epoch": 0.6881410607745189, "grad_norm": 0.8301772843999408, "learning_rate": 2.340422478415193e-06, "loss": 0.379, "step": 10986 }, { "epoch": 0.6882036987738611, "grad_norm": 0.6156358190841842, "learning_rate": 2.3395635429709e-06, "loss": 0.4595, "step": 10987 }, { "epoch": 0.6882663367732035, "grad_norm": 0.8383874526276811, "learning_rate": 2.338704717030664e-06, "loss": 0.4004, "step": 10988 }, { "epoch": 0.6883289747725457, "grad_norm": 0.9010148438619026, "learning_rate": 2.3378460006298375e-06, "loss": 0.4106, "step": 10989 }, { "epoch": 0.6883916127718881, "grad_norm": 0.6110054497416215, "learning_rate": 2.3369873938037624e-06, "loss": 0.4519, "step": 10990 }, { "epoch": 0.6884542507712303, "grad_norm": 0.8629039206918864, "learning_rate": 2.3361288965877813e-06, "loss": 0.3956, "step": 10991 }, { "epoch": 0.6885168887705727, "grad_norm": 0.8914890204061178, "learning_rate": 2.33527050901723e-06, "loss": 0.4118, "step": 10992 }, { "epoch": 0.688579526769915, "grad_norm": 0.800393275389066, "learning_rate": 2.334412231127439e-06, "loss": 0.356, "step": 10993 }, { "epoch": 0.6886421647692572, "grad_norm": 0.8729648500568838, "learning_rate": 2.333554062953736e-06, "loss": 0.3859, "step": 10994 }, { "epoch": 0.6887048027685996, "grad_norm": 0.8417406573407437, "learning_rate": 2.332696004531441e-06, "loss": 0.3683, "step": 10995 }, { "epoch": 0.6887674407679418, "grad_norm": 0.8726851083531093, "learning_rate": 2.331838055895875e-06, "loss": 0.4224, "step": 10996 }, { "epoch": 0.6888300787672842, "grad_norm": 0.8767331420843226, "learning_rate": 2.3309802170823476e-06, "loss": 0.3527, "step": 10997 }, { "epoch": 0.6888927167666264, "grad_norm": 0.8533154573157923, "learning_rate": 2.3301224881261713e-06, "loss": 0.3897, "step": 10998 }, { "epoch": 0.6889553547659688, "grad_norm": 0.8223270596963177, "learning_rate": 2.3292648690626472e-06, "loss": 0.3459, "step": 10999 }, { "epoch": 0.6890179927653111, "grad_norm": 0.8936607717957921, "learning_rate": 2.328407359927079e-06, "loss": 0.3892, "step": 11000 }, { "epoch": 0.6890806307646534, "grad_norm": 0.8429882327417167, "learning_rate": 2.32754996075476e-06, "loss": 0.4042, "step": 11001 }, { "epoch": 0.6891432687639957, "grad_norm": 0.8446514132020222, "learning_rate": 2.32669267158098e-06, "loss": 0.3594, "step": 11002 }, { "epoch": 0.6892059067633379, "grad_norm": 0.9426767383002366, "learning_rate": 2.3258354924410257e-06, "loss": 0.4199, "step": 11003 }, { "epoch": 0.6892685447626803, "grad_norm": 0.7735679629361464, "learning_rate": 2.3249784233701796e-06, "loss": 0.3738, "step": 11004 }, { "epoch": 0.6893311827620225, "grad_norm": 0.8735852107786174, "learning_rate": 2.3241214644037173e-06, "loss": 0.436, "step": 11005 }, { "epoch": 0.6893938207613649, "grad_norm": 0.88917471861967, "learning_rate": 2.323264615576911e-06, "loss": 0.435, "step": 11006 }, { "epoch": 0.6894564587607072, "grad_norm": 0.8517480995280866, "learning_rate": 2.3224078769250296e-06, "loss": 0.4401, "step": 11007 }, { "epoch": 0.6895190967600495, "grad_norm": 0.8381541068640533, "learning_rate": 2.321551248483339e-06, "loss": 0.3503, "step": 11008 }, { "epoch": 0.6895817347593918, "grad_norm": 0.925990884132742, "learning_rate": 2.3206947302870965e-06, "loss": 0.4003, "step": 11009 }, { "epoch": 0.6896443727587341, "grad_norm": 0.8530700804170771, "learning_rate": 2.3198383223715553e-06, "loss": 0.3943, "step": 11010 }, { "epoch": 0.6897070107580764, "grad_norm": 0.8814052144308594, "learning_rate": 2.318982024771967e-06, "loss": 0.3912, "step": 11011 }, { "epoch": 0.6897696487574186, "grad_norm": 0.8679435657897104, "learning_rate": 2.318125837523576e-06, "loss": 0.4015, "step": 11012 }, { "epoch": 0.689832286756761, "grad_norm": 0.8492903142160629, "learning_rate": 2.3172697606616228e-06, "loss": 0.3767, "step": 11013 }, { "epoch": 0.6898949247561033, "grad_norm": 0.7979644734470372, "learning_rate": 2.3164137942213427e-06, "loss": 0.3904, "step": 11014 }, { "epoch": 0.6899575627554456, "grad_norm": 0.8429615322809003, "learning_rate": 2.315557938237971e-06, "loss": 0.3938, "step": 11015 }, { "epoch": 0.6900202007547879, "grad_norm": 0.6060194548710566, "learning_rate": 2.314702192746731e-06, "loss": 0.4664, "step": 11016 }, { "epoch": 0.6900828387541302, "grad_norm": 0.851808386838099, "learning_rate": 2.3138465577828486e-06, "loss": 0.4007, "step": 11017 }, { "epoch": 0.6901454767534725, "grad_norm": 0.8477194898503372, "learning_rate": 2.312991033381541e-06, "loss": 0.389, "step": 11018 }, { "epoch": 0.6902081147528148, "grad_norm": 0.8912085941625645, "learning_rate": 2.3121356195780204e-06, "loss": 0.3646, "step": 11019 }, { "epoch": 0.6902707527521571, "grad_norm": 0.8142923816457505, "learning_rate": 2.311280316407497e-06, "loss": 0.3789, "step": 11020 }, { "epoch": 0.6903333907514994, "grad_norm": 0.8982098243136003, "learning_rate": 2.310425123905175e-06, "loss": 0.3745, "step": 11021 }, { "epoch": 0.6903960287508417, "grad_norm": 0.7774887884279847, "learning_rate": 2.3095700421062538e-06, "loss": 0.3711, "step": 11022 }, { "epoch": 0.690458666750184, "grad_norm": 0.8818645289955976, "learning_rate": 2.3087150710459276e-06, "loss": 0.4066, "step": 11023 }, { "epoch": 0.6905213047495263, "grad_norm": 0.8511022407719221, "learning_rate": 2.30786021075939e-06, "loss": 0.3955, "step": 11024 }, { "epoch": 0.6905839427488686, "grad_norm": 0.8130228348438207, "learning_rate": 2.3070054612818243e-06, "loss": 0.3677, "step": 11025 }, { "epoch": 0.690646580748211, "grad_norm": 0.9260498545587085, "learning_rate": 2.3061508226484153e-06, "loss": 0.4208, "step": 11026 }, { "epoch": 0.6907092187475532, "grad_norm": 0.812124126946776, "learning_rate": 2.3052962948943386e-06, "loss": 0.3836, "step": 11027 }, { "epoch": 0.6907718567468955, "grad_norm": 0.894822754396556, "learning_rate": 2.3044418780547666e-06, "loss": 0.4302, "step": 11028 }, { "epoch": 0.6908344947462378, "grad_norm": 0.8032161282032316, "learning_rate": 2.303587572164867e-06, "loss": 0.4267, "step": 11029 }, { "epoch": 0.6908971327455801, "grad_norm": 0.857876865746119, "learning_rate": 2.302733377259804e-06, "loss": 0.4003, "step": 11030 }, { "epoch": 0.6909597707449224, "grad_norm": 0.8543415645964715, "learning_rate": 2.3018792933747347e-06, "loss": 0.3858, "step": 11031 }, { "epoch": 0.6910224087442647, "grad_norm": 0.8101380989735327, "learning_rate": 2.3010253205448137e-06, "loss": 0.3628, "step": 11032 }, { "epoch": 0.691085046743607, "grad_norm": 0.8352317141866115, "learning_rate": 2.3001714588051925e-06, "loss": 0.3858, "step": 11033 }, { "epoch": 0.6911476847429493, "grad_norm": 0.8363833405746359, "learning_rate": 2.2993177081910157e-06, "loss": 0.4216, "step": 11034 }, { "epoch": 0.6912103227422917, "grad_norm": 0.819079981442048, "learning_rate": 2.2984640687374216e-06, "loss": 0.3467, "step": 11035 }, { "epoch": 0.6912729607416339, "grad_norm": 0.5998972794846542, "learning_rate": 2.297610540479549e-06, "loss": 0.4552, "step": 11036 }, { "epoch": 0.6913355987409762, "grad_norm": 0.7858368334826377, "learning_rate": 2.296757123452529e-06, "loss": 0.3809, "step": 11037 }, { "epoch": 0.6913982367403185, "grad_norm": 0.8604325766641525, "learning_rate": 2.2959038176914865e-06, "loss": 0.3898, "step": 11038 }, { "epoch": 0.6914608747396608, "grad_norm": 0.90145592691019, "learning_rate": 2.2950506232315458e-06, "loss": 0.4427, "step": 11039 }, { "epoch": 0.6915235127390031, "grad_norm": 0.859019164337857, "learning_rate": 2.294197540107821e-06, "loss": 0.3859, "step": 11040 }, { "epoch": 0.6915861507383454, "grad_norm": 0.8419277015705174, "learning_rate": 2.2933445683554294e-06, "loss": 0.3995, "step": 11041 }, { "epoch": 0.6916487887376878, "grad_norm": 0.804974293616934, "learning_rate": 2.2924917080094784e-06, "loss": 0.3897, "step": 11042 }, { "epoch": 0.69171142673703, "grad_norm": 0.8527175606308023, "learning_rate": 2.291638959105071e-06, "loss": 0.4153, "step": 11043 }, { "epoch": 0.6917740647363724, "grad_norm": 0.9061678499877946, "learning_rate": 2.2907863216773045e-06, "loss": 0.3926, "step": 11044 }, { "epoch": 0.6918367027357146, "grad_norm": 0.7830608952519899, "learning_rate": 2.2899337957612782e-06, "loss": 0.3556, "step": 11045 }, { "epoch": 0.6918993407350569, "grad_norm": 0.7679258470330422, "learning_rate": 2.2890813813920796e-06, "loss": 0.3812, "step": 11046 }, { "epoch": 0.6919619787343992, "grad_norm": 0.7925758199099068, "learning_rate": 2.288229078604795e-06, "loss": 0.3758, "step": 11047 }, { "epoch": 0.6920246167337415, "grad_norm": 0.8318171390008019, "learning_rate": 2.287376887434504e-06, "loss": 0.3881, "step": 11048 }, { "epoch": 0.6920872547330839, "grad_norm": 0.7540736660645286, "learning_rate": 2.286524807916283e-06, "loss": 0.3522, "step": 11049 }, { "epoch": 0.6921498927324261, "grad_norm": 0.6634093559587779, "learning_rate": 2.2856728400852053e-06, "loss": 0.437, "step": 11050 }, { "epoch": 0.6922125307317685, "grad_norm": 0.9125153322129462, "learning_rate": 2.284820983976338e-06, "loss": 0.4053, "step": 11051 }, { "epoch": 0.6922751687311107, "grad_norm": 0.7753164175174186, "learning_rate": 2.2839692396247427e-06, "loss": 0.3729, "step": 11052 }, { "epoch": 0.692337806730453, "grad_norm": 0.8611568591334047, "learning_rate": 2.283117607065476e-06, "loss": 0.3988, "step": 11053 }, { "epoch": 0.6924004447297953, "grad_norm": 0.8652800211463726, "learning_rate": 2.2822660863335943e-06, "loss": 0.4029, "step": 11054 }, { "epoch": 0.6924630827291376, "grad_norm": 0.8348509024824486, "learning_rate": 2.2814146774641453e-06, "loss": 0.3906, "step": 11055 }, { "epoch": 0.69252572072848, "grad_norm": 0.792262031423803, "learning_rate": 2.2805633804921727e-06, "loss": 0.3771, "step": 11056 }, { "epoch": 0.6925883587278222, "grad_norm": 0.8589512090713871, "learning_rate": 2.2797121954527156e-06, "loss": 0.3897, "step": 11057 }, { "epoch": 0.6926509967271646, "grad_norm": 0.8151181633707429, "learning_rate": 2.2788611223808083e-06, "loss": 0.4015, "step": 11058 }, { "epoch": 0.6927136347265068, "grad_norm": 0.8776101196256576, "learning_rate": 2.278010161311484e-06, "loss": 0.3539, "step": 11059 }, { "epoch": 0.6927762727258492, "grad_norm": 0.6065259850323337, "learning_rate": 2.2771593122797663e-06, "loss": 0.4625, "step": 11060 }, { "epoch": 0.6928389107251914, "grad_norm": 0.833600474815494, "learning_rate": 2.276308575320677e-06, "loss": 0.3763, "step": 11061 }, { "epoch": 0.6929015487245337, "grad_norm": 0.8160510825987164, "learning_rate": 2.27545795046923e-06, "loss": 0.4262, "step": 11062 }, { "epoch": 0.692964186723876, "grad_norm": 0.9002922757731833, "learning_rate": 2.274607437760442e-06, "loss": 0.4145, "step": 11063 }, { "epoch": 0.6930268247232183, "grad_norm": 0.7706870165440218, "learning_rate": 2.2737570372293177e-06, "loss": 0.3658, "step": 11064 }, { "epoch": 0.6930894627225607, "grad_norm": 0.9013489237331179, "learning_rate": 2.2729067489108596e-06, "loss": 0.3622, "step": 11065 }, { "epoch": 0.6931521007219029, "grad_norm": 0.8834019424976904, "learning_rate": 2.272056572840064e-06, "loss": 0.3786, "step": 11066 }, { "epoch": 0.6932147387212453, "grad_norm": 0.9186279934897519, "learning_rate": 2.2712065090519275e-06, "loss": 0.4289, "step": 11067 }, { "epoch": 0.6932773767205875, "grad_norm": 0.9644963221100933, "learning_rate": 2.2703565575814383e-06, "loss": 0.4121, "step": 11068 }, { "epoch": 0.6933400147199299, "grad_norm": 0.7588355034234243, "learning_rate": 2.2695067184635795e-06, "loss": 0.3575, "step": 11069 }, { "epoch": 0.6934026527192722, "grad_norm": 0.8361110526628049, "learning_rate": 2.2686569917333314e-06, "loss": 0.3663, "step": 11070 }, { "epoch": 0.6934652907186144, "grad_norm": 0.8320210462243596, "learning_rate": 2.267807377425666e-06, "loss": 0.417, "step": 11071 }, { "epoch": 0.6935279287179568, "grad_norm": 0.9148693281250789, "learning_rate": 2.266957875575558e-06, "loss": 0.4158, "step": 11072 }, { "epoch": 0.693590566717299, "grad_norm": 0.9174825537449839, "learning_rate": 2.266108486217971e-06, "loss": 0.4219, "step": 11073 }, { "epoch": 0.6936532047166414, "grad_norm": 0.7939171635957937, "learning_rate": 2.265259209387867e-06, "loss": 0.3907, "step": 11074 }, { "epoch": 0.6937158427159836, "grad_norm": 0.8213685582606708, "learning_rate": 2.264410045120199e-06, "loss": 0.3764, "step": 11075 }, { "epoch": 0.693778480715326, "grad_norm": 0.908822918817571, "learning_rate": 2.2635609934499232e-06, "loss": 0.3714, "step": 11076 }, { "epoch": 0.6938411187146682, "grad_norm": 0.92815322473849, "learning_rate": 2.262712054411985e-06, "loss": 0.4091, "step": 11077 }, { "epoch": 0.6939037567140105, "grad_norm": 0.8368122329735562, "learning_rate": 2.2618632280413266e-06, "loss": 0.3596, "step": 11078 }, { "epoch": 0.6939663947133529, "grad_norm": 0.9058652258141016, "learning_rate": 2.2610145143728855e-06, "loss": 0.4044, "step": 11079 }, { "epoch": 0.6940290327126951, "grad_norm": 0.9487699816600539, "learning_rate": 2.260165913441596e-06, "loss": 0.4182, "step": 11080 }, { "epoch": 0.6940916707120375, "grad_norm": 0.861721258363285, "learning_rate": 2.259317425282384e-06, "loss": 0.4088, "step": 11081 }, { "epoch": 0.6941543087113797, "grad_norm": 0.8836993220327286, "learning_rate": 2.2584690499301776e-06, "loss": 0.3882, "step": 11082 }, { "epoch": 0.6942169467107221, "grad_norm": 0.909573300983196, "learning_rate": 2.2576207874198936e-06, "loss": 0.4009, "step": 11083 }, { "epoch": 0.6942795847100643, "grad_norm": 0.8134664122188378, "learning_rate": 2.256772637786445e-06, "loss": 0.3791, "step": 11084 }, { "epoch": 0.6943422227094067, "grad_norm": 0.8440145318268542, "learning_rate": 2.2559246010647456e-06, "loss": 0.3817, "step": 11085 }, { "epoch": 0.694404860708749, "grad_norm": 0.6223246201308471, "learning_rate": 2.2550766772896995e-06, "loss": 0.4602, "step": 11086 }, { "epoch": 0.6944674987080912, "grad_norm": 0.9187502503094481, "learning_rate": 2.2542288664962066e-06, "loss": 0.3929, "step": 11087 }, { "epoch": 0.6945301367074336, "grad_norm": 0.8200537953447298, "learning_rate": 2.253381168719163e-06, "loss": 0.355, "step": 11088 }, { "epoch": 0.6945927747067758, "grad_norm": 0.8688936068976395, "learning_rate": 2.2525335839934604e-06, "loss": 0.3763, "step": 11089 }, { "epoch": 0.6946554127061182, "grad_norm": 0.883752136435789, "learning_rate": 2.2516861123539835e-06, "loss": 0.4357, "step": 11090 }, { "epoch": 0.6947180507054604, "grad_norm": 0.8867125185363357, "learning_rate": 2.2508387538356187e-06, "loss": 0.4251, "step": 11091 }, { "epoch": 0.6947806887048028, "grad_norm": 0.8928961519492388, "learning_rate": 2.2499915084732387e-06, "loss": 0.3751, "step": 11092 }, { "epoch": 0.6948433267041451, "grad_norm": 0.8144039005184792, "learning_rate": 2.2491443763017206e-06, "loss": 0.4015, "step": 11093 }, { "epoch": 0.6949059647034874, "grad_norm": 0.8305183618599087, "learning_rate": 2.2482973573559306e-06, "loss": 0.3728, "step": 11094 }, { "epoch": 0.6949686027028297, "grad_norm": 0.865530246221567, "learning_rate": 2.247450451670732e-06, "loss": 0.4042, "step": 11095 }, { "epoch": 0.6950312407021719, "grad_norm": 0.9035846887605126, "learning_rate": 2.2466036592809837e-06, "loss": 0.3577, "step": 11096 }, { "epoch": 0.6950938787015143, "grad_norm": 0.8464593323925345, "learning_rate": 2.24575698022154e-06, "loss": 0.3906, "step": 11097 }, { "epoch": 0.6951565167008565, "grad_norm": 0.5810091436138028, "learning_rate": 2.24491041452725e-06, "loss": 0.4217, "step": 11098 }, { "epoch": 0.6952191547001989, "grad_norm": 0.8449568975684872, "learning_rate": 2.2440639622329567e-06, "loss": 0.4211, "step": 11099 }, { "epoch": 0.6952817926995412, "grad_norm": 0.8600480645938265, "learning_rate": 2.2432176233735047e-06, "loss": 0.385, "step": 11100 }, { "epoch": 0.6953444306988835, "grad_norm": 0.9238241101846102, "learning_rate": 2.242371397983725e-06, "loss": 0.4347, "step": 11101 }, { "epoch": 0.6954070686982258, "grad_norm": 0.8251583520860104, "learning_rate": 2.241525286098452e-06, "loss": 0.3916, "step": 11102 }, { "epoch": 0.695469706697568, "grad_norm": 0.842975627129561, "learning_rate": 2.2406792877525103e-06, "loss": 0.3787, "step": 11103 }, { "epoch": 0.6955323446969104, "grad_norm": 0.9294759689711689, "learning_rate": 2.239833402980721e-06, "loss": 0.3757, "step": 11104 }, { "epoch": 0.6955949826962526, "grad_norm": 0.7792135803663062, "learning_rate": 2.2389876318179016e-06, "loss": 0.3882, "step": 11105 }, { "epoch": 0.695657620695595, "grad_norm": 0.8445777575739184, "learning_rate": 2.2381419742988635e-06, "loss": 0.384, "step": 11106 }, { "epoch": 0.6957202586949373, "grad_norm": 0.8254393257610524, "learning_rate": 2.2372964304584144e-06, "loss": 0.3743, "step": 11107 }, { "epoch": 0.6957828966942796, "grad_norm": 0.8893469262528247, "learning_rate": 2.236451000331355e-06, "loss": 0.4013, "step": 11108 }, { "epoch": 0.6958455346936219, "grad_norm": 0.8715675062306895, "learning_rate": 2.2356056839524875e-06, "loss": 0.3943, "step": 11109 }, { "epoch": 0.6959081726929642, "grad_norm": 0.8439063707850568, "learning_rate": 2.2347604813566016e-06, "loss": 0.353, "step": 11110 }, { "epoch": 0.6959708106923065, "grad_norm": 0.8687538393112991, "learning_rate": 2.2339153925784886e-06, "loss": 0.3516, "step": 11111 }, { "epoch": 0.6960334486916487, "grad_norm": 0.8801015427422296, "learning_rate": 2.2330704176529315e-06, "loss": 0.414, "step": 11112 }, { "epoch": 0.6960960866909911, "grad_norm": 0.8308464596105504, "learning_rate": 2.23222555661471e-06, "loss": 0.3978, "step": 11113 }, { "epoch": 0.6961587246903334, "grad_norm": 0.8034085056125703, "learning_rate": 2.2313808094985984e-06, "loss": 0.3727, "step": 11114 }, { "epoch": 0.6962213626896757, "grad_norm": 0.8153428435611542, "learning_rate": 2.2305361763393663e-06, "loss": 0.3669, "step": 11115 }, { "epoch": 0.696284000689018, "grad_norm": 0.8181322162575217, "learning_rate": 2.2296916571717793e-06, "loss": 0.3917, "step": 11116 }, { "epoch": 0.6963466386883603, "grad_norm": 0.858095404501356, "learning_rate": 2.228847252030596e-06, "loss": 0.3749, "step": 11117 }, { "epoch": 0.6964092766877026, "grad_norm": 0.8791929987590008, "learning_rate": 2.2280029609505745e-06, "loss": 0.4076, "step": 11118 }, { "epoch": 0.696471914687045, "grad_norm": 0.9075904257712961, "learning_rate": 2.2271587839664673e-06, "loss": 0.4036, "step": 11119 }, { "epoch": 0.6965345526863872, "grad_norm": 0.8187936460067378, "learning_rate": 2.226314721113019e-06, "loss": 0.3592, "step": 11120 }, { "epoch": 0.6965971906857295, "grad_norm": 0.8792383838391231, "learning_rate": 2.2254707724249712e-06, "loss": 0.4071, "step": 11121 }, { "epoch": 0.6966598286850718, "grad_norm": 0.8047387872623397, "learning_rate": 2.224626937937062e-06, "loss": 0.3689, "step": 11122 }, { "epoch": 0.6967224666844141, "grad_norm": 0.8256943903189772, "learning_rate": 2.223783217684023e-06, "loss": 0.3546, "step": 11123 }, { "epoch": 0.6967851046837564, "grad_norm": 0.901988200276119, "learning_rate": 2.222939611700582e-06, "loss": 0.3548, "step": 11124 }, { "epoch": 0.6968477426830987, "grad_norm": 0.8050447959780923, "learning_rate": 2.22209612002146e-06, "loss": 0.3329, "step": 11125 }, { "epoch": 0.696910380682441, "grad_norm": 0.7930698930359728, "learning_rate": 2.2212527426813797e-06, "loss": 0.3534, "step": 11126 }, { "epoch": 0.6969730186817833, "grad_norm": 0.8811597019844613, "learning_rate": 2.2204094797150505e-06, "loss": 0.3859, "step": 11127 }, { "epoch": 0.6970356566811257, "grad_norm": 0.8547682453597955, "learning_rate": 2.219566331157184e-06, "loss": 0.3883, "step": 11128 }, { "epoch": 0.6970982946804679, "grad_norm": 0.6725420534934455, "learning_rate": 2.2187232970424844e-06, "loss": 0.4402, "step": 11129 }, { "epoch": 0.6971609326798102, "grad_norm": 0.847417616601743, "learning_rate": 2.2178803774056494e-06, "loss": 0.3834, "step": 11130 }, { "epoch": 0.6972235706791525, "grad_norm": 0.8636948957654793, "learning_rate": 2.2170375722813746e-06, "loss": 0.3993, "step": 11131 }, { "epoch": 0.6972862086784948, "grad_norm": 0.9037856650557018, "learning_rate": 2.21619488170435e-06, "loss": 0.4242, "step": 11132 }, { "epoch": 0.6973488466778371, "grad_norm": 0.8520629643583332, "learning_rate": 2.2153523057092606e-06, "loss": 0.3988, "step": 11133 }, { "epoch": 0.6974114846771794, "grad_norm": 0.9114433672872067, "learning_rate": 2.2145098443307857e-06, "loss": 0.4634, "step": 11134 }, { "epoch": 0.6974741226765218, "grad_norm": 0.822998579049062, "learning_rate": 2.213667497603604e-06, "loss": 0.3823, "step": 11135 }, { "epoch": 0.697536760675864, "grad_norm": 0.9116973352588298, "learning_rate": 2.212825265562383e-06, "loss": 0.4035, "step": 11136 }, { "epoch": 0.6975993986752063, "grad_norm": 0.7771858065282452, "learning_rate": 2.211983148241794e-06, "loss": 0.3398, "step": 11137 }, { "epoch": 0.6976620366745486, "grad_norm": 0.9001861844547183, "learning_rate": 2.2111411456764964e-06, "loss": 0.383, "step": 11138 }, { "epoch": 0.6977246746738909, "grad_norm": 0.8480099451261408, "learning_rate": 2.210299257901146e-06, "loss": 0.3889, "step": 11139 }, { "epoch": 0.6977873126732332, "grad_norm": 0.956235380605185, "learning_rate": 2.209457484950397e-06, "loss": 0.4411, "step": 11140 }, { "epoch": 0.6978499506725755, "grad_norm": 0.8811955790220611, "learning_rate": 2.2086158268588947e-06, "loss": 0.4039, "step": 11141 }, { "epoch": 0.6979125886719179, "grad_norm": 0.5727675363632482, "learning_rate": 2.2077742836612835e-06, "loss": 0.4317, "step": 11142 }, { "epoch": 0.6979752266712601, "grad_norm": 0.8840698988263368, "learning_rate": 2.2069328553922e-06, "loss": 0.4045, "step": 11143 }, { "epoch": 0.6980378646706025, "grad_norm": 0.8088781891423894, "learning_rate": 2.2060915420862796e-06, "loss": 0.3835, "step": 11144 }, { "epoch": 0.6981005026699447, "grad_norm": 0.9217106903448247, "learning_rate": 2.2052503437781483e-06, "loss": 0.4321, "step": 11145 }, { "epoch": 0.698163140669287, "grad_norm": 0.8638596223400936, "learning_rate": 2.204409260502433e-06, "loss": 0.4495, "step": 11146 }, { "epoch": 0.6982257786686293, "grad_norm": 0.8093881776450633, "learning_rate": 2.203568292293752e-06, "loss": 0.3906, "step": 11147 }, { "epoch": 0.6982884166679716, "grad_norm": 0.833685381456447, "learning_rate": 2.202727439186718e-06, "loss": 0.3961, "step": 11148 }, { "epoch": 0.698351054667314, "grad_norm": 0.8250124321404212, "learning_rate": 2.201886701215943e-06, "loss": 0.3815, "step": 11149 }, { "epoch": 0.6984136926666562, "grad_norm": 0.859654153814677, "learning_rate": 2.2010460784160302e-06, "loss": 0.3461, "step": 11150 }, { "epoch": 0.6984763306659986, "grad_norm": 0.8083360462785651, "learning_rate": 2.200205570821579e-06, "loss": 0.335, "step": 11151 }, { "epoch": 0.6985389686653408, "grad_norm": 0.8725611586942609, "learning_rate": 2.1993651784671875e-06, "loss": 0.3608, "step": 11152 }, { "epoch": 0.6986016066646832, "grad_norm": 0.8343801538982706, "learning_rate": 2.1985249013874454e-06, "loss": 0.3894, "step": 11153 }, { "epoch": 0.6986642446640254, "grad_norm": 0.8225614342690245, "learning_rate": 2.1976847396169364e-06, "loss": 0.3749, "step": 11154 }, { "epoch": 0.6987268826633677, "grad_norm": 0.6187341911037807, "learning_rate": 2.1968446931902454e-06, "loss": 0.4697, "step": 11155 }, { "epoch": 0.69878952066271, "grad_norm": 0.8329521408015766, "learning_rate": 2.1960047621419474e-06, "loss": 0.3629, "step": 11156 }, { "epoch": 0.6988521586620523, "grad_norm": 0.939347847527218, "learning_rate": 2.1951649465066132e-06, "loss": 0.397, "step": 11157 }, { "epoch": 0.6989147966613947, "grad_norm": 0.80940969220344, "learning_rate": 2.1943252463188107e-06, "loss": 0.3579, "step": 11158 }, { "epoch": 0.6989774346607369, "grad_norm": 0.8311841972772418, "learning_rate": 2.193485661613102e-06, "loss": 0.402, "step": 11159 }, { "epoch": 0.6990400726600793, "grad_norm": 0.774714114757351, "learning_rate": 2.1926461924240427e-06, "loss": 0.3717, "step": 11160 }, { "epoch": 0.6991027106594215, "grad_norm": 0.8878541695685883, "learning_rate": 2.1918068387861892e-06, "loss": 0.399, "step": 11161 }, { "epoch": 0.6991653486587638, "grad_norm": 0.6410778620399384, "learning_rate": 2.1909676007340867e-06, "loss": 0.4738, "step": 11162 }, { "epoch": 0.6992279866581061, "grad_norm": 0.9049263490981012, "learning_rate": 2.1901284783022774e-06, "loss": 0.397, "step": 11163 }, { "epoch": 0.6992906246574484, "grad_norm": 0.8123411361748973, "learning_rate": 2.189289471525304e-06, "loss": 0.3901, "step": 11164 }, { "epoch": 0.6993532626567908, "grad_norm": 0.7838673322024821, "learning_rate": 2.1884505804376973e-06, "loss": 0.3519, "step": 11165 }, { "epoch": 0.699415900656133, "grad_norm": 0.8470410686658938, "learning_rate": 2.1876118050739858e-06, "loss": 0.3877, "step": 11166 }, { "epoch": 0.6994785386554754, "grad_norm": 0.8409569172273217, "learning_rate": 2.1867731454686954e-06, "loss": 0.3374, "step": 11167 }, { "epoch": 0.6995411766548176, "grad_norm": 0.803089655896206, "learning_rate": 2.1859346016563444e-06, "loss": 0.3836, "step": 11168 }, { "epoch": 0.69960381465416, "grad_norm": 0.8840835496775504, "learning_rate": 2.1850961736714448e-06, "loss": 0.3802, "step": 11169 }, { "epoch": 0.6996664526535022, "grad_norm": 0.8546493893130939, "learning_rate": 2.184257861548512e-06, "loss": 0.3817, "step": 11170 }, { "epoch": 0.6997290906528445, "grad_norm": 0.89599367753093, "learning_rate": 2.183419665322048e-06, "loss": 0.4064, "step": 11171 }, { "epoch": 0.6997917286521869, "grad_norm": 0.8701017313816476, "learning_rate": 2.182581585026553e-06, "loss": 0.3874, "step": 11172 }, { "epoch": 0.6998543666515291, "grad_norm": 0.8153342474025429, "learning_rate": 2.1817436206965216e-06, "loss": 0.3957, "step": 11173 }, { "epoch": 0.6999170046508715, "grad_norm": 0.8591981056374599, "learning_rate": 2.180905772366447e-06, "loss": 0.3554, "step": 11174 }, { "epoch": 0.6999796426502137, "grad_norm": 0.85063750619404, "learning_rate": 2.1800680400708147e-06, "loss": 0.3833, "step": 11175 }, { "epoch": 0.7000422806495561, "grad_norm": 0.9222948608478528, "learning_rate": 2.179230423844105e-06, "loss": 0.421, "step": 11176 }, { "epoch": 0.7001049186488983, "grad_norm": 0.8670301749539694, "learning_rate": 2.178392923720793e-06, "loss": 0.3785, "step": 11177 }, { "epoch": 0.7001675566482407, "grad_norm": 0.8044199887161716, "learning_rate": 2.1775555397353532e-06, "loss": 0.3824, "step": 11178 }, { "epoch": 0.700230194647583, "grad_norm": 0.8404863520643955, "learning_rate": 2.1767182719222517e-06, "loss": 0.3991, "step": 11179 }, { "epoch": 0.7002928326469252, "grad_norm": 0.8770356573643806, "learning_rate": 2.1758811203159504e-06, "loss": 0.4103, "step": 11180 }, { "epoch": 0.7003554706462676, "grad_norm": 0.8473830295732937, "learning_rate": 2.1750440849509056e-06, "loss": 0.3591, "step": 11181 }, { "epoch": 0.7004181086456098, "grad_norm": 0.8631144813484531, "learning_rate": 2.1742071658615692e-06, "loss": 0.3675, "step": 11182 }, { "epoch": 0.7004807466449522, "grad_norm": 0.8281868337373687, "learning_rate": 2.1733703630823926e-06, "loss": 0.3627, "step": 11183 }, { "epoch": 0.7005433846442944, "grad_norm": 0.9484344548532814, "learning_rate": 2.172533676647816e-06, "loss": 0.4361, "step": 11184 }, { "epoch": 0.7006060226436368, "grad_norm": 0.8496374505025228, "learning_rate": 2.1716971065922782e-06, "loss": 0.3921, "step": 11185 }, { "epoch": 0.7006686606429791, "grad_norm": 0.8921913451739357, "learning_rate": 2.1708606529502107e-06, "loss": 0.4093, "step": 11186 }, { "epoch": 0.7007312986423213, "grad_norm": 0.9233885952290137, "learning_rate": 2.1700243157560457e-06, "loss": 0.3939, "step": 11187 }, { "epoch": 0.7007939366416637, "grad_norm": 0.8609761144948589, "learning_rate": 2.169188095044205e-06, "loss": 0.3923, "step": 11188 }, { "epoch": 0.7008565746410059, "grad_norm": 0.818447972466462, "learning_rate": 2.1683519908491073e-06, "loss": 0.3757, "step": 11189 }, { "epoch": 0.7009192126403483, "grad_norm": 0.8822481617017308, "learning_rate": 2.167516003205168e-06, "loss": 0.4035, "step": 11190 }, { "epoch": 0.7009818506396905, "grad_norm": 0.8213100783877053, "learning_rate": 2.1666801321467932e-06, "loss": 0.387, "step": 11191 }, { "epoch": 0.7010444886390329, "grad_norm": 0.8184654509006938, "learning_rate": 2.1658443777083927e-06, "loss": 0.3718, "step": 11192 }, { "epoch": 0.7011071266383752, "grad_norm": 0.8930086363743978, "learning_rate": 2.165008739924363e-06, "loss": 0.3919, "step": 11193 }, { "epoch": 0.7011697646377175, "grad_norm": 0.8635783899207917, "learning_rate": 2.1641732188291004e-06, "loss": 0.3948, "step": 11194 }, { "epoch": 0.7012324026370598, "grad_norm": 0.8106898207138522, "learning_rate": 2.1633378144569923e-06, "loss": 0.4079, "step": 11195 }, { "epoch": 0.701295040636402, "grad_norm": 0.8793529848488367, "learning_rate": 2.1625025268424277e-06, "loss": 0.3836, "step": 11196 }, { "epoch": 0.7013576786357444, "grad_norm": 0.8696912067047893, "learning_rate": 2.1616673560197864e-06, "loss": 0.4127, "step": 11197 }, { "epoch": 0.7014203166350866, "grad_norm": 0.8949446513191467, "learning_rate": 2.1608323020234427e-06, "loss": 0.3952, "step": 11198 }, { "epoch": 0.701482954634429, "grad_norm": 0.831657416998631, "learning_rate": 2.159997364887769e-06, "loss": 0.4092, "step": 11199 }, { "epoch": 0.7015455926337713, "grad_norm": 0.8935253136991541, "learning_rate": 2.1591625446471294e-06, "loss": 0.4145, "step": 11200 }, { "epoch": 0.7016082306331136, "grad_norm": 0.8099616270182957, "learning_rate": 2.158327841335888e-06, "loss": 0.365, "step": 11201 }, { "epoch": 0.7016708686324559, "grad_norm": 0.9162688615657089, "learning_rate": 2.1574932549884e-06, "loss": 0.4085, "step": 11202 }, { "epoch": 0.7017335066317982, "grad_norm": 0.8608245281320759, "learning_rate": 2.156658785639016e-06, "loss": 0.373, "step": 11203 }, { "epoch": 0.7017961446311405, "grad_norm": 0.7394710799712974, "learning_rate": 2.1558244333220857e-06, "loss": 0.347, "step": 11204 }, { "epoch": 0.7018587826304827, "grad_norm": 0.8313269137566828, "learning_rate": 2.1549901980719494e-06, "loss": 0.3675, "step": 11205 }, { "epoch": 0.7019214206298251, "grad_norm": 0.8852399973488952, "learning_rate": 2.154156079922945e-06, "loss": 0.3725, "step": 11206 }, { "epoch": 0.7019840586291674, "grad_norm": 0.8312953980771192, "learning_rate": 2.1533220789094047e-06, "loss": 0.3986, "step": 11207 }, { "epoch": 0.7020466966285097, "grad_norm": 0.8896929888291809, "learning_rate": 2.152488195065655e-06, "loss": 0.3991, "step": 11208 }, { "epoch": 0.702109334627852, "grad_norm": 0.8810839807260443, "learning_rate": 2.151654428426019e-06, "loss": 0.4038, "step": 11209 }, { "epoch": 0.7021719726271943, "grad_norm": 0.8000464434997959, "learning_rate": 2.1508207790248163e-06, "loss": 0.3806, "step": 11210 }, { "epoch": 0.7022346106265366, "grad_norm": 0.9286961807453432, "learning_rate": 2.14998724689636e-06, "loss": 0.3765, "step": 11211 }, { "epoch": 0.702297248625879, "grad_norm": 0.8935934289366393, "learning_rate": 2.1491538320749556e-06, "loss": 0.4075, "step": 11212 }, { "epoch": 0.7023598866252212, "grad_norm": 0.8323863729076624, "learning_rate": 2.1483205345949098e-06, "loss": 0.3672, "step": 11213 }, { "epoch": 0.7024225246245634, "grad_norm": 0.8987267476027552, "learning_rate": 2.1474873544905204e-06, "loss": 0.4035, "step": 11214 }, { "epoch": 0.7024851626239058, "grad_norm": 0.8412790862419353, "learning_rate": 2.1466542917960814e-06, "loss": 0.4016, "step": 11215 }, { "epoch": 0.7025478006232481, "grad_norm": 0.7925281650928231, "learning_rate": 2.145821346545881e-06, "loss": 0.3728, "step": 11216 }, { "epoch": 0.7026104386225904, "grad_norm": 0.9412926753591689, "learning_rate": 2.1449885187742036e-06, "loss": 0.4062, "step": 11217 }, { "epoch": 0.7026730766219327, "grad_norm": 0.8473197783858984, "learning_rate": 2.1441558085153293e-06, "loss": 0.399, "step": 11218 }, { "epoch": 0.702735714621275, "grad_norm": 0.6222976167666503, "learning_rate": 2.1433232158035292e-06, "loss": 0.4445, "step": 11219 }, { "epoch": 0.7027983526206173, "grad_norm": 0.8428932608996511, "learning_rate": 2.1424907406730766e-06, "loss": 0.4204, "step": 11220 }, { "epoch": 0.7028609906199595, "grad_norm": 0.7776519587966538, "learning_rate": 2.1416583831582373e-06, "loss": 0.3556, "step": 11221 }, { "epoch": 0.7029236286193019, "grad_norm": 0.873568700436516, "learning_rate": 2.1408261432932686e-06, "loss": 0.4141, "step": 11222 }, { "epoch": 0.7029862666186442, "grad_norm": 0.8079589504651278, "learning_rate": 2.1399940211124276e-06, "loss": 0.3858, "step": 11223 }, { "epoch": 0.7030489046179865, "grad_norm": 0.8670379112671022, "learning_rate": 2.1391620166499627e-06, "loss": 0.4005, "step": 11224 }, { "epoch": 0.7031115426173288, "grad_norm": 0.8385485972032239, "learning_rate": 2.1383301299401204e-06, "loss": 0.4008, "step": 11225 }, { "epoch": 0.7031741806166711, "grad_norm": 0.8902450999216666, "learning_rate": 2.137498361017141e-06, "loss": 0.3999, "step": 11226 }, { "epoch": 0.7032368186160134, "grad_norm": 0.8539551405282296, "learning_rate": 2.136666709915261e-06, "loss": 0.3686, "step": 11227 }, { "epoch": 0.7032994566153558, "grad_norm": 0.7851100325044841, "learning_rate": 2.1358351766687084e-06, "loss": 0.402, "step": 11228 }, { "epoch": 0.703362094614698, "grad_norm": 0.8646289579108732, "learning_rate": 2.135003761311712e-06, "loss": 0.428, "step": 11229 }, { "epoch": 0.7034247326140403, "grad_norm": 0.8462134382596244, "learning_rate": 2.134172463878494e-06, "loss": 0.4021, "step": 11230 }, { "epoch": 0.7034873706133826, "grad_norm": 0.8952869651396211, "learning_rate": 2.1333412844032692e-06, "loss": 0.4218, "step": 11231 }, { "epoch": 0.7035500086127249, "grad_norm": 0.811593014489584, "learning_rate": 2.132510222920249e-06, "loss": 0.3943, "step": 11232 }, { "epoch": 0.7036126466120672, "grad_norm": 0.8287298186797898, "learning_rate": 2.1316792794636406e-06, "loss": 0.379, "step": 11233 }, { "epoch": 0.7036752846114095, "grad_norm": 0.8254141503379633, "learning_rate": 2.1308484540676455e-06, "loss": 0.4013, "step": 11234 }, { "epoch": 0.7037379226107519, "grad_norm": 0.7690112518963881, "learning_rate": 2.13001774676646e-06, "loss": 0.3907, "step": 11235 }, { "epoch": 0.7038005606100941, "grad_norm": 0.8794036180323845, "learning_rate": 2.1291871575942755e-06, "loss": 0.3946, "step": 11236 }, { "epoch": 0.7038631986094365, "grad_norm": 0.8393048207696658, "learning_rate": 2.1283566865852824e-06, "loss": 0.4069, "step": 11237 }, { "epoch": 0.7039258366087787, "grad_norm": 0.8169385849612377, "learning_rate": 2.1275263337736592e-06, "loss": 0.3927, "step": 11238 }, { "epoch": 0.703988474608121, "grad_norm": 0.8642617016825032, "learning_rate": 2.1266960991935862e-06, "loss": 0.4252, "step": 11239 }, { "epoch": 0.7040511126074633, "grad_norm": 0.838071293385041, "learning_rate": 2.125865982879236e-06, "loss": 0.4017, "step": 11240 }, { "epoch": 0.7041137506068056, "grad_norm": 0.8269968551898805, "learning_rate": 2.1250359848647754e-06, "loss": 0.3417, "step": 11241 }, { "epoch": 0.704176388606148, "grad_norm": 0.8317579101876242, "learning_rate": 2.1242061051843672e-06, "loss": 0.3704, "step": 11242 }, { "epoch": 0.7042390266054902, "grad_norm": 0.760305095795124, "learning_rate": 2.123376343872169e-06, "loss": 0.358, "step": 11243 }, { "epoch": 0.7043016646048326, "grad_norm": 0.8235965818330504, "learning_rate": 2.1225467009623353e-06, "loss": 0.3809, "step": 11244 }, { "epoch": 0.7043643026041748, "grad_norm": 0.8629297377911029, "learning_rate": 2.1217171764890103e-06, "loss": 0.4079, "step": 11245 }, { "epoch": 0.7044269406035171, "grad_norm": 0.8535950423220191, "learning_rate": 2.120887770486342e-06, "loss": 0.3943, "step": 11246 }, { "epoch": 0.7044895786028594, "grad_norm": 0.8761162598416384, "learning_rate": 2.1200584829884686e-06, "loss": 0.4018, "step": 11247 }, { "epoch": 0.7045522166022017, "grad_norm": 0.7635020225870693, "learning_rate": 2.1192293140295223e-06, "loss": 0.3558, "step": 11248 }, { "epoch": 0.704614854601544, "grad_norm": 0.7748434398786649, "learning_rate": 2.1184002636436325e-06, "loss": 0.361, "step": 11249 }, { "epoch": 0.7046774926008863, "grad_norm": 0.823498602955324, "learning_rate": 2.117571331864923e-06, "loss": 0.3498, "step": 11250 }, { "epoch": 0.7047401306002287, "grad_norm": 0.9157089307054879, "learning_rate": 2.1167425187275114e-06, "loss": 0.4195, "step": 11251 }, { "epoch": 0.7048027685995709, "grad_norm": 0.9015335273754236, "learning_rate": 2.115913824265514e-06, "loss": 0.4525, "step": 11252 }, { "epoch": 0.7048654065989133, "grad_norm": 0.810999382413001, "learning_rate": 2.1150852485130387e-06, "loss": 0.376, "step": 11253 }, { "epoch": 0.7049280445982555, "grad_norm": 0.8401319782246577, "learning_rate": 2.1142567915041878e-06, "loss": 0.3632, "step": 11254 }, { "epoch": 0.7049906825975978, "grad_norm": 0.917356807707226, "learning_rate": 2.1134284532730632e-06, "loss": 0.4002, "step": 11255 }, { "epoch": 0.7050533205969401, "grad_norm": 0.8226490302614652, "learning_rate": 2.112600233853761e-06, "loss": 0.3716, "step": 11256 }, { "epoch": 0.7051159585962824, "grad_norm": 0.8381860382213414, "learning_rate": 2.111772133280369e-06, "loss": 0.4121, "step": 11257 }, { "epoch": 0.7051785965956248, "grad_norm": 0.8375051062638361, "learning_rate": 2.1109441515869724e-06, "loss": 0.3768, "step": 11258 }, { "epoch": 0.705241234594967, "grad_norm": 0.8542009709310564, "learning_rate": 2.1101162888076503e-06, "loss": 0.3804, "step": 11259 }, { "epoch": 0.7053038725943094, "grad_norm": 0.8162771575260983, "learning_rate": 2.109288544976479e-06, "loss": 0.3665, "step": 11260 }, { "epoch": 0.7053665105936516, "grad_norm": 0.8735578153584213, "learning_rate": 2.108460920127527e-06, "loss": 0.3914, "step": 11261 }, { "epoch": 0.705429148592994, "grad_norm": 0.8477081492659556, "learning_rate": 2.1076334142948583e-06, "loss": 0.4099, "step": 11262 }, { "epoch": 0.7054917865923362, "grad_norm": 0.8834535446067193, "learning_rate": 2.1068060275125374e-06, "loss": 0.4032, "step": 11263 }, { "epoch": 0.7055544245916785, "grad_norm": 0.9277528069070972, "learning_rate": 2.105978759814617e-06, "loss": 0.3743, "step": 11264 }, { "epoch": 0.7056170625910209, "grad_norm": 0.8408884237594416, "learning_rate": 2.1051516112351466e-06, "loss": 0.3635, "step": 11265 }, { "epoch": 0.7056797005903631, "grad_norm": 0.832182659299251, "learning_rate": 2.104324581808175e-06, "loss": 0.3931, "step": 11266 }, { "epoch": 0.7057423385897055, "grad_norm": 0.6659339859365426, "learning_rate": 2.1034976715677408e-06, "loss": 0.4703, "step": 11267 }, { "epoch": 0.7058049765890477, "grad_norm": 0.9508029657170708, "learning_rate": 2.10267088054788e-06, "loss": 0.4154, "step": 11268 }, { "epoch": 0.7058676145883901, "grad_norm": 0.629202858008937, "learning_rate": 2.1018442087826235e-06, "loss": 0.4389, "step": 11269 }, { "epoch": 0.7059302525877323, "grad_norm": 0.8657515241915757, "learning_rate": 2.1010176563059974e-06, "loss": 0.3983, "step": 11270 }, { "epoch": 0.7059928905870746, "grad_norm": 0.8150318785237417, "learning_rate": 2.100191223152021e-06, "loss": 0.3801, "step": 11271 }, { "epoch": 0.706055528586417, "grad_norm": 0.927466784910936, "learning_rate": 2.0993649093547135e-06, "loss": 0.411, "step": 11272 }, { "epoch": 0.7061181665857592, "grad_norm": 0.826757752441679, "learning_rate": 2.098538714948085e-06, "loss": 0.385, "step": 11273 }, { "epoch": 0.7061808045851016, "grad_norm": 0.8984980652432659, "learning_rate": 2.0977126399661402e-06, "loss": 0.4227, "step": 11274 }, { "epoch": 0.7062434425844438, "grad_norm": 0.8753287546489347, "learning_rate": 2.0968866844428824e-06, "loss": 0.4168, "step": 11275 }, { "epoch": 0.7063060805837862, "grad_norm": 0.8564117152261849, "learning_rate": 2.0960608484123085e-06, "loss": 0.3818, "step": 11276 }, { "epoch": 0.7063687185831284, "grad_norm": 0.800491040270582, "learning_rate": 2.0952351319084093e-06, "loss": 0.3667, "step": 11277 }, { "epoch": 0.7064313565824708, "grad_norm": 0.8822058056649303, "learning_rate": 2.0944095349651706e-06, "loss": 0.3564, "step": 11278 }, { "epoch": 0.7064939945818131, "grad_norm": 0.8482883489583126, "learning_rate": 2.093584057616575e-06, "loss": 0.4427, "step": 11279 }, { "epoch": 0.7065566325811553, "grad_norm": 0.8287782160699172, "learning_rate": 2.092758699896597e-06, "loss": 0.3519, "step": 11280 }, { "epoch": 0.7066192705804977, "grad_norm": 0.9099358993584626, "learning_rate": 2.0919334618392123e-06, "loss": 0.3837, "step": 11281 }, { "epoch": 0.7066819085798399, "grad_norm": 0.8681566353188288, "learning_rate": 2.0911083434783865e-06, "loss": 0.4007, "step": 11282 }, { "epoch": 0.7067445465791823, "grad_norm": 0.8747676991661928, "learning_rate": 2.0902833448480793e-06, "loss": 0.3919, "step": 11283 }, { "epoch": 0.7068071845785245, "grad_norm": 0.8123615827535597, "learning_rate": 2.089458465982251e-06, "loss": 0.387, "step": 11284 }, { "epoch": 0.7068698225778669, "grad_norm": 0.7981125702843854, "learning_rate": 2.0886337069148526e-06, "loss": 0.3676, "step": 11285 }, { "epoch": 0.7069324605772092, "grad_norm": 0.5974331812997216, "learning_rate": 2.0878090676798313e-06, "loss": 0.4355, "step": 11286 }, { "epoch": 0.7069950985765515, "grad_norm": 0.8248930981138515, "learning_rate": 2.0869845483111295e-06, "loss": 0.3897, "step": 11287 }, { "epoch": 0.7070577365758938, "grad_norm": 0.8290001791127641, "learning_rate": 2.0861601488426825e-06, "loss": 0.3512, "step": 11288 }, { "epoch": 0.707120374575236, "grad_norm": 0.8317763731974456, "learning_rate": 2.0853358693084265e-06, "loss": 0.3703, "step": 11289 }, { "epoch": 0.7071830125745784, "grad_norm": 0.8587232976767644, "learning_rate": 2.084511709742287e-06, "loss": 0.4458, "step": 11290 }, { "epoch": 0.7072456505739206, "grad_norm": 0.906587670416025, "learning_rate": 2.0836876701781865e-06, "loss": 0.4131, "step": 11291 }, { "epoch": 0.707308288573263, "grad_norm": 0.9633039264171344, "learning_rate": 2.082863750650041e-06, "loss": 0.4425, "step": 11292 }, { "epoch": 0.7073709265726053, "grad_norm": 0.8608104947810815, "learning_rate": 2.0820399511917673e-06, "loss": 0.3917, "step": 11293 }, { "epoch": 0.7074335645719476, "grad_norm": 0.8251759297794218, "learning_rate": 2.0812162718372704e-06, "loss": 0.3675, "step": 11294 }, { "epoch": 0.7074962025712899, "grad_norm": 0.8792079644724808, "learning_rate": 2.080392712620453e-06, "loss": 0.3814, "step": 11295 }, { "epoch": 0.7075588405706321, "grad_norm": 0.8320557754339496, "learning_rate": 2.0795692735752143e-06, "loss": 0.3831, "step": 11296 }, { "epoch": 0.7076214785699745, "grad_norm": 0.7892240751802807, "learning_rate": 2.0787459547354446e-06, "loss": 0.3995, "step": 11297 }, { "epoch": 0.7076841165693167, "grad_norm": 0.9179898674890663, "learning_rate": 2.0779227561350344e-06, "loss": 0.3806, "step": 11298 }, { "epoch": 0.7077467545686591, "grad_norm": 0.8354564537322483, "learning_rate": 2.0770996778078665e-06, "loss": 0.3995, "step": 11299 }, { "epoch": 0.7078093925680013, "grad_norm": 0.9768568658244057, "learning_rate": 2.076276719787819e-06, "loss": 0.4233, "step": 11300 }, { "epoch": 0.7078720305673437, "grad_norm": 0.8091428292595455, "learning_rate": 2.0754538821087633e-06, "loss": 0.3513, "step": 11301 }, { "epoch": 0.707934668566686, "grad_norm": 0.8449412183105257, "learning_rate": 2.0746311648045674e-06, "loss": 0.3684, "step": 11302 }, { "epoch": 0.7079973065660283, "grad_norm": 0.8752520223710633, "learning_rate": 2.0738085679090974e-06, "loss": 0.4214, "step": 11303 }, { "epoch": 0.7080599445653706, "grad_norm": 0.8771268298872761, "learning_rate": 2.07298609145621e-06, "loss": 0.3996, "step": 11304 }, { "epoch": 0.7081225825647128, "grad_norm": 0.8592512577846504, "learning_rate": 2.072163735479757e-06, "loss": 0.4109, "step": 11305 }, { "epoch": 0.7081852205640552, "grad_norm": 0.9435681264652284, "learning_rate": 2.071341500013589e-06, "loss": 0.3995, "step": 11306 }, { "epoch": 0.7082478585633974, "grad_norm": 0.6034359619406897, "learning_rate": 2.070519385091549e-06, "loss": 0.4593, "step": 11307 }, { "epoch": 0.7083104965627398, "grad_norm": 0.8922939816694632, "learning_rate": 2.0696973907474743e-06, "loss": 0.3929, "step": 11308 }, { "epoch": 0.7083731345620821, "grad_norm": 0.9057636373010804, "learning_rate": 2.0688755170152e-06, "loss": 0.3751, "step": 11309 }, { "epoch": 0.7084357725614244, "grad_norm": 0.8923618797197569, "learning_rate": 2.068053763928552e-06, "loss": 0.441, "step": 11310 }, { "epoch": 0.7084984105607667, "grad_norm": 0.8394060352425476, "learning_rate": 2.0672321315213544e-06, "loss": 0.3909, "step": 11311 }, { "epoch": 0.708561048560109, "grad_norm": 0.8512927368063065, "learning_rate": 2.0664106198274283e-06, "loss": 0.3943, "step": 11312 }, { "epoch": 0.7086236865594513, "grad_norm": 0.7749541522426676, "learning_rate": 2.0655892288805857e-06, "loss": 0.4007, "step": 11313 }, { "epoch": 0.7086863245587935, "grad_norm": 0.8750049299237885, "learning_rate": 2.064767958714633e-06, "loss": 0.4168, "step": 11314 }, { "epoch": 0.7087489625581359, "grad_norm": 0.8400890860751221, "learning_rate": 2.0639468093633773e-06, "loss": 0.3651, "step": 11315 }, { "epoch": 0.7088116005574782, "grad_norm": 0.9512566844166928, "learning_rate": 2.063125780860617e-06, "loss": 0.3952, "step": 11316 }, { "epoch": 0.7088742385568205, "grad_norm": 0.6040070126476632, "learning_rate": 2.062304873240144e-06, "loss": 0.4397, "step": 11317 }, { "epoch": 0.7089368765561628, "grad_norm": 0.8342551302918562, "learning_rate": 2.0614840865357477e-06, "loss": 0.363, "step": 11318 }, { "epoch": 0.7089995145555051, "grad_norm": 0.8375650139294174, "learning_rate": 2.0606634207812116e-06, "loss": 0.3801, "step": 11319 }, { "epoch": 0.7090621525548474, "grad_norm": 0.8476474716080339, "learning_rate": 2.0598428760103133e-06, "loss": 0.4039, "step": 11320 }, { "epoch": 0.7091247905541898, "grad_norm": 0.7899398183168702, "learning_rate": 2.0590224522568295e-06, "loss": 0.3855, "step": 11321 }, { "epoch": 0.709187428553532, "grad_norm": 0.8360327664318987, "learning_rate": 2.058202149554527e-06, "loss": 0.3678, "step": 11322 }, { "epoch": 0.7092500665528743, "grad_norm": 0.787754803191229, "learning_rate": 2.0573819679371694e-06, "loss": 0.4266, "step": 11323 }, { "epoch": 0.7093127045522166, "grad_norm": 0.8728229197010352, "learning_rate": 2.0565619074385173e-06, "loss": 0.3813, "step": 11324 }, { "epoch": 0.7093753425515589, "grad_norm": 0.6287358556562918, "learning_rate": 2.0557419680923235e-06, "loss": 0.4412, "step": 11325 }, { "epoch": 0.7094379805509012, "grad_norm": 0.8350539565391961, "learning_rate": 2.054922149932336e-06, "loss": 0.3867, "step": 11326 }, { "epoch": 0.7095006185502435, "grad_norm": 0.6570261675446816, "learning_rate": 2.0541024529923007e-06, "loss": 0.463, "step": 11327 }, { "epoch": 0.7095632565495859, "grad_norm": 0.8484024575458463, "learning_rate": 2.053282877305955e-06, "loss": 0.4015, "step": 11328 }, { "epoch": 0.7096258945489281, "grad_norm": 0.8897962591783489, "learning_rate": 2.0524634229070307e-06, "loss": 0.3769, "step": 11329 }, { "epoch": 0.7096885325482704, "grad_norm": 0.8194808930631572, "learning_rate": 2.0516440898292606e-06, "loss": 0.3897, "step": 11330 }, { "epoch": 0.7097511705476127, "grad_norm": 0.7826308043930225, "learning_rate": 2.0508248781063655e-06, "loss": 0.3421, "step": 11331 }, { "epoch": 0.709813808546955, "grad_norm": 0.8916661398190401, "learning_rate": 2.050005787772068e-06, "loss": 0.3988, "step": 11332 }, { "epoch": 0.7098764465462973, "grad_norm": 0.8515957927041149, "learning_rate": 2.0491868188600794e-06, "loss": 0.367, "step": 11333 }, { "epoch": 0.7099390845456396, "grad_norm": 0.7851068540985393, "learning_rate": 2.0483679714041094e-06, "loss": 0.3484, "step": 11334 }, { "epoch": 0.710001722544982, "grad_norm": 0.7909834063779754, "learning_rate": 2.047549245437861e-06, "loss": 0.3612, "step": 11335 }, { "epoch": 0.7100643605443242, "grad_norm": 0.8789540841812137, "learning_rate": 2.046730640995034e-06, "loss": 0.3972, "step": 11336 }, { "epoch": 0.7101269985436666, "grad_norm": 0.6227259752655652, "learning_rate": 2.0459121581093223e-06, "loss": 0.4487, "step": 11337 }, { "epoch": 0.7101896365430088, "grad_norm": 0.8713899782621398, "learning_rate": 2.045093796814412e-06, "loss": 0.3908, "step": 11338 }, { "epoch": 0.7102522745423511, "grad_norm": 0.781541273947386, "learning_rate": 2.0442755571439916e-06, "loss": 0.3683, "step": 11339 }, { "epoch": 0.7103149125416934, "grad_norm": 0.8520209263787529, "learning_rate": 2.0434574391317364e-06, "loss": 0.4031, "step": 11340 }, { "epoch": 0.7103775505410357, "grad_norm": 0.8675088389058679, "learning_rate": 2.0426394428113227e-06, "loss": 0.4152, "step": 11341 }, { "epoch": 0.710440188540378, "grad_norm": 0.83877999559695, "learning_rate": 2.0418215682164194e-06, "loss": 0.3874, "step": 11342 }, { "epoch": 0.7105028265397203, "grad_norm": 0.8512010594708105, "learning_rate": 2.0410038153806888e-06, "loss": 0.3298, "step": 11343 }, { "epoch": 0.7105654645390627, "grad_norm": 0.827841088941387, "learning_rate": 2.0401861843377906e-06, "loss": 0.3797, "step": 11344 }, { "epoch": 0.7106281025384049, "grad_norm": 0.8887046351619825, "learning_rate": 2.039368675121378e-06, "loss": 0.4151, "step": 11345 }, { "epoch": 0.7106907405377473, "grad_norm": 0.9003479645316882, "learning_rate": 2.038551287765101e-06, "loss": 0.4065, "step": 11346 }, { "epoch": 0.7107533785370895, "grad_norm": 0.9008732284672085, "learning_rate": 2.0377340223026004e-06, "loss": 0.4062, "step": 11347 }, { "epoch": 0.7108160165364318, "grad_norm": 0.9045268248122715, "learning_rate": 2.0369168787675194e-06, "loss": 0.4122, "step": 11348 }, { "epoch": 0.7108786545357741, "grad_norm": 0.7772298118145087, "learning_rate": 2.0360998571934874e-06, "loss": 0.3766, "step": 11349 }, { "epoch": 0.7109412925351164, "grad_norm": 0.8383547994366461, "learning_rate": 2.0352829576141376e-06, "loss": 0.4197, "step": 11350 }, { "epoch": 0.7110039305344588, "grad_norm": 0.7770414809411255, "learning_rate": 2.034466180063091e-06, "loss": 0.3519, "step": 11351 }, { "epoch": 0.711066568533801, "grad_norm": 0.8140043784323261, "learning_rate": 2.0336495245739673e-06, "loss": 0.367, "step": 11352 }, { "epoch": 0.7111292065331434, "grad_norm": 0.7841087092913083, "learning_rate": 2.0328329911803797e-06, "loss": 0.4061, "step": 11353 }, { "epoch": 0.7111918445324856, "grad_norm": 0.8250293436352535, "learning_rate": 2.032016579915937e-06, "loss": 0.3673, "step": 11354 }, { "epoch": 0.7112544825318279, "grad_norm": 0.8036584764656387, "learning_rate": 2.0312002908142427e-06, "loss": 0.3386, "step": 11355 }, { "epoch": 0.7113171205311702, "grad_norm": 0.8456852955446321, "learning_rate": 2.0303841239088932e-06, "loss": 0.382, "step": 11356 }, { "epoch": 0.7113797585305125, "grad_norm": 0.6354467503602952, "learning_rate": 2.029568079233485e-06, "loss": 0.4599, "step": 11357 }, { "epoch": 0.7114423965298549, "grad_norm": 0.9101790154773329, "learning_rate": 2.028752156821607e-06, "loss": 0.4166, "step": 11358 }, { "epoch": 0.7115050345291971, "grad_norm": 0.9223044230165164, "learning_rate": 2.027936356706843e-06, "loss": 0.3724, "step": 11359 }, { "epoch": 0.7115676725285395, "grad_norm": 0.8721641952942556, "learning_rate": 2.0271206789227694e-06, "loss": 0.3624, "step": 11360 }, { "epoch": 0.7116303105278817, "grad_norm": 0.5965626401331241, "learning_rate": 2.02630512350296e-06, "loss": 0.4317, "step": 11361 }, { "epoch": 0.7116929485272241, "grad_norm": 0.7988319007120864, "learning_rate": 2.0254896904809847e-06, "loss": 0.3738, "step": 11362 }, { "epoch": 0.7117555865265663, "grad_norm": 0.8430856365890025, "learning_rate": 2.024674379890405e-06, "loss": 0.3673, "step": 11363 }, { "epoch": 0.7118182245259086, "grad_norm": 0.8436923341482927, "learning_rate": 2.023859191764779e-06, "loss": 0.3967, "step": 11364 }, { "epoch": 0.711880862525251, "grad_norm": 0.8234641259849307, "learning_rate": 2.023044126137662e-06, "loss": 0.3661, "step": 11365 }, { "epoch": 0.7119435005245932, "grad_norm": 0.8852014322107893, "learning_rate": 2.0222291830426e-06, "loss": 0.425, "step": 11366 }, { "epoch": 0.7120061385239356, "grad_norm": 0.8087581439919429, "learning_rate": 2.0214143625131394e-06, "loss": 0.4208, "step": 11367 }, { "epoch": 0.7120687765232778, "grad_norm": 0.8678508478563739, "learning_rate": 2.0205996645828165e-06, "loss": 0.3919, "step": 11368 }, { "epoch": 0.7121314145226202, "grad_norm": 0.8660205749195927, "learning_rate": 2.019785089285164e-06, "loss": 0.3756, "step": 11369 }, { "epoch": 0.7121940525219624, "grad_norm": 0.8442077528527961, "learning_rate": 2.0189706366537102e-06, "loss": 0.3964, "step": 11370 }, { "epoch": 0.7122566905213048, "grad_norm": 0.8317136206628734, "learning_rate": 2.018156306721979e-06, "loss": 0.4079, "step": 11371 }, { "epoch": 0.712319328520647, "grad_norm": 0.8995756035479973, "learning_rate": 2.0173420995234876e-06, "loss": 0.4067, "step": 11372 }, { "epoch": 0.7123819665199893, "grad_norm": 0.7335157945612467, "learning_rate": 2.016528015091747e-06, "loss": 0.3488, "step": 11373 }, { "epoch": 0.7124446045193317, "grad_norm": 0.8305618581127322, "learning_rate": 2.015714053460269e-06, "loss": 0.4028, "step": 11374 }, { "epoch": 0.7125072425186739, "grad_norm": 0.7800804426767388, "learning_rate": 2.0149002146625522e-06, "loss": 0.3635, "step": 11375 }, { "epoch": 0.7125698805180163, "grad_norm": 0.8309016373081658, "learning_rate": 2.014086498732099e-06, "loss": 0.3591, "step": 11376 }, { "epoch": 0.7126325185173585, "grad_norm": 0.8137263244591433, "learning_rate": 2.0132729057023997e-06, "loss": 0.3401, "step": 11377 }, { "epoch": 0.7126951565167009, "grad_norm": 0.8205254756842203, "learning_rate": 2.0124594356069416e-06, "loss": 0.3524, "step": 11378 }, { "epoch": 0.7127577945160432, "grad_norm": 0.7388824624703175, "learning_rate": 2.011646088479208e-06, "loss": 0.3729, "step": 11379 }, { "epoch": 0.7128204325153854, "grad_norm": 0.822748578540339, "learning_rate": 2.010832864352676e-06, "loss": 0.3921, "step": 11380 }, { "epoch": 0.7128830705147278, "grad_norm": 0.7847289048850306, "learning_rate": 2.0100197632608176e-06, "loss": 0.3761, "step": 11381 }, { "epoch": 0.71294570851407, "grad_norm": 0.8634929260395874, "learning_rate": 2.0092067852370995e-06, "loss": 0.3863, "step": 11382 }, { "epoch": 0.7130083465134124, "grad_norm": 0.812064566718495, "learning_rate": 2.008393930314987e-06, "loss": 0.3596, "step": 11383 }, { "epoch": 0.7130709845127546, "grad_norm": 0.9095251323220483, "learning_rate": 2.007581198527933e-06, "loss": 0.4052, "step": 11384 }, { "epoch": 0.713133622512097, "grad_norm": 0.823246514877354, "learning_rate": 2.0067685899093948e-06, "loss": 0.3938, "step": 11385 }, { "epoch": 0.7131962605114393, "grad_norm": 0.8867347672489106, "learning_rate": 2.0059561044928173e-06, "loss": 0.3798, "step": 11386 }, { "epoch": 0.7132588985107816, "grad_norm": 0.8724141528240843, "learning_rate": 2.0051437423116414e-06, "loss": 0.4056, "step": 11387 }, { "epoch": 0.7133215365101239, "grad_norm": 0.8425146138258806, "learning_rate": 2.0043315033993054e-06, "loss": 0.4113, "step": 11388 }, { "epoch": 0.7133841745094661, "grad_norm": 0.8691726137890807, "learning_rate": 2.0035193877892412e-06, "loss": 0.3561, "step": 11389 }, { "epoch": 0.7134468125088085, "grad_norm": 0.8632094493869509, "learning_rate": 2.002707395514873e-06, "loss": 0.374, "step": 11390 }, { "epoch": 0.7135094505081507, "grad_norm": 0.8376471053107977, "learning_rate": 2.0018955266096265e-06, "loss": 0.4039, "step": 11391 }, { "epoch": 0.7135720885074931, "grad_norm": 0.8229377533375254, "learning_rate": 2.001083781106916e-06, "loss": 0.3846, "step": 11392 }, { "epoch": 0.7136347265068353, "grad_norm": 0.8680614914972045, "learning_rate": 2.0002721590401545e-06, "loss": 0.3674, "step": 11393 }, { "epoch": 0.7136973645061777, "grad_norm": 0.9037313997391035, "learning_rate": 1.999460660442746e-06, "loss": 0.3719, "step": 11394 }, { "epoch": 0.71376000250552, "grad_norm": 0.8713324661661659, "learning_rate": 1.9986492853480947e-06, "loss": 0.4276, "step": 11395 }, { "epoch": 0.7138226405048623, "grad_norm": 0.8295871062037521, "learning_rate": 1.9978380337895965e-06, "loss": 0.3697, "step": 11396 }, { "epoch": 0.7138852785042046, "grad_norm": 0.8485417158905524, "learning_rate": 1.997026905800642e-06, "loss": 0.376, "step": 11397 }, { "epoch": 0.7139479165035468, "grad_norm": 0.8374409748955307, "learning_rate": 1.996215901414617e-06, "loss": 0.392, "step": 11398 }, { "epoch": 0.7140105545028892, "grad_norm": 0.7814842169424883, "learning_rate": 1.995405020664902e-06, "loss": 0.3762, "step": 11399 }, { "epoch": 0.7140731925022314, "grad_norm": 0.8209365291783581, "learning_rate": 1.9945942635848745e-06, "loss": 0.3959, "step": 11400 }, { "epoch": 0.7141358305015738, "grad_norm": 0.8368123582821918, "learning_rate": 1.993783630207906e-06, "loss": 0.3988, "step": 11401 }, { "epoch": 0.7141984685009161, "grad_norm": 0.7889326206644715, "learning_rate": 1.9929731205673614e-06, "loss": 0.3625, "step": 11402 }, { "epoch": 0.7142611065002584, "grad_norm": 0.9299807194711881, "learning_rate": 1.992162734696599e-06, "loss": 0.3822, "step": 11403 }, { "epoch": 0.7143237444996007, "grad_norm": 0.8662717575470088, "learning_rate": 1.9913524726289784e-06, "loss": 0.418, "step": 11404 }, { "epoch": 0.7143863824989429, "grad_norm": 0.8750184038211837, "learning_rate": 1.9905423343978487e-06, "loss": 0.4125, "step": 11405 }, { "epoch": 0.7144490204982853, "grad_norm": 0.9206728017322948, "learning_rate": 1.9897323200365555e-06, "loss": 0.3663, "step": 11406 }, { "epoch": 0.7145116584976275, "grad_norm": 0.8196138491961363, "learning_rate": 1.988922429578438e-06, "loss": 0.3909, "step": 11407 }, { "epoch": 0.7145742964969699, "grad_norm": 0.8564507902538055, "learning_rate": 1.9881126630568308e-06, "loss": 0.4106, "step": 11408 }, { "epoch": 0.7146369344963122, "grad_norm": 0.8213200429538576, "learning_rate": 1.987303020505067e-06, "loss": 0.3552, "step": 11409 }, { "epoch": 0.7146995724956545, "grad_norm": 0.8367285331229816, "learning_rate": 1.98649350195647e-06, "loss": 0.3488, "step": 11410 }, { "epoch": 0.7147622104949968, "grad_norm": 0.8200130256714342, "learning_rate": 1.9856841074443606e-06, "loss": 0.389, "step": 11411 }, { "epoch": 0.7148248484943391, "grad_norm": 0.8755859507822509, "learning_rate": 1.9848748370020506e-06, "loss": 0.3828, "step": 11412 }, { "epoch": 0.7148874864936814, "grad_norm": 0.7962207735404855, "learning_rate": 1.984065690662853e-06, "loss": 0.3644, "step": 11413 }, { "epoch": 0.7149501244930236, "grad_norm": 0.7629187156271467, "learning_rate": 1.9832566684600724e-06, "loss": 0.3745, "step": 11414 }, { "epoch": 0.715012762492366, "grad_norm": 0.8852944599910698, "learning_rate": 1.982447770427007e-06, "loss": 0.3774, "step": 11415 }, { "epoch": 0.7150754004917083, "grad_norm": 0.8248279312018882, "learning_rate": 1.9816389965969492e-06, "loss": 0.3802, "step": 11416 }, { "epoch": 0.7151380384910506, "grad_norm": 0.8246223368708766, "learning_rate": 1.9808303470031927e-06, "loss": 0.3413, "step": 11417 }, { "epoch": 0.7152006764903929, "grad_norm": 0.9145686447287099, "learning_rate": 1.980021821679019e-06, "loss": 0.3986, "step": 11418 }, { "epoch": 0.7152633144897352, "grad_norm": 0.8394278168814452, "learning_rate": 1.979213420657708e-06, "loss": 0.4014, "step": 11419 }, { "epoch": 0.7153259524890775, "grad_norm": 0.8449433278255679, "learning_rate": 1.9784051439725333e-06, "loss": 0.3813, "step": 11420 }, { "epoch": 0.7153885904884199, "grad_norm": 0.8236953899148257, "learning_rate": 1.9775969916567615e-06, "loss": 0.3596, "step": 11421 }, { "epoch": 0.7154512284877621, "grad_norm": 0.6523469352730332, "learning_rate": 1.976788963743661e-06, "loss": 0.4379, "step": 11422 }, { "epoch": 0.7155138664871044, "grad_norm": 0.8159985820017046, "learning_rate": 1.9759810602664877e-06, "loss": 0.3979, "step": 11423 }, { "epoch": 0.7155765044864467, "grad_norm": 0.8560013183099432, "learning_rate": 1.975173281258495e-06, "loss": 0.3773, "step": 11424 }, { "epoch": 0.715639142485789, "grad_norm": 0.900587848603846, "learning_rate": 1.97436562675293e-06, "loss": 0.4281, "step": 11425 }, { "epoch": 0.7157017804851313, "grad_norm": 0.8095659436209743, "learning_rate": 1.9735580967830393e-06, "loss": 0.3962, "step": 11426 }, { "epoch": 0.7157644184844736, "grad_norm": 0.8537050637648194, "learning_rate": 1.972750691382059e-06, "loss": 0.3555, "step": 11427 }, { "epoch": 0.715827056483816, "grad_norm": 0.7923542491763237, "learning_rate": 1.9719434105832214e-06, "loss": 0.3616, "step": 11428 }, { "epoch": 0.7158896944831582, "grad_norm": 1.0147843767205131, "learning_rate": 1.9711362544197553e-06, "loss": 0.3745, "step": 11429 }, { "epoch": 0.7159523324825006, "grad_norm": 0.9077943217600472, "learning_rate": 1.970329222924882e-06, "loss": 0.3937, "step": 11430 }, { "epoch": 0.7160149704818428, "grad_norm": 0.5763982071061108, "learning_rate": 1.9695223161318216e-06, "loss": 0.4339, "step": 11431 }, { "epoch": 0.7160776084811851, "grad_norm": 0.8852242307557473, "learning_rate": 1.968715534073785e-06, "loss": 0.3789, "step": 11432 }, { "epoch": 0.7161402464805274, "grad_norm": 0.8868032595008274, "learning_rate": 1.96790887678398e-06, "loss": 0.442, "step": 11433 }, { "epoch": 0.7162028844798697, "grad_norm": 0.8124366714623941, "learning_rate": 1.9671023442956067e-06, "loss": 0.3826, "step": 11434 }, { "epoch": 0.716265522479212, "grad_norm": 0.8534493452699528, "learning_rate": 1.9662959366418653e-06, "loss": 0.3972, "step": 11435 }, { "epoch": 0.7163281604785543, "grad_norm": 0.9146893109046131, "learning_rate": 1.9654896538559464e-06, "loss": 0.3881, "step": 11436 }, { "epoch": 0.7163907984778967, "grad_norm": 0.84844505246249, "learning_rate": 1.9646834959710364e-06, "loss": 0.3905, "step": 11437 }, { "epoch": 0.7164534364772389, "grad_norm": 0.8222295922995758, "learning_rate": 1.9638774630203174e-06, "loss": 0.337, "step": 11438 }, { "epoch": 0.7165160744765812, "grad_norm": 0.8170327419989263, "learning_rate": 1.963071555036965e-06, "loss": 0.3874, "step": 11439 }, { "epoch": 0.7165787124759235, "grad_norm": 0.861401181641719, "learning_rate": 1.9622657720541497e-06, "loss": 0.3845, "step": 11440 }, { "epoch": 0.7166413504752658, "grad_norm": 0.8605790329785992, "learning_rate": 1.961460114105041e-06, "loss": 0.4122, "step": 11441 }, { "epoch": 0.7167039884746081, "grad_norm": 0.9170504895538198, "learning_rate": 1.9606545812227955e-06, "loss": 0.3703, "step": 11442 }, { "epoch": 0.7167666264739504, "grad_norm": 0.8519931871582739, "learning_rate": 1.959849173440574e-06, "loss": 0.4035, "step": 11443 }, { "epoch": 0.7168292644732928, "grad_norm": 0.6076619340938435, "learning_rate": 1.9590438907915247e-06, "loss": 0.4395, "step": 11444 }, { "epoch": 0.716891902472635, "grad_norm": 0.8197019251777017, "learning_rate": 1.958238733308793e-06, "loss": 0.4225, "step": 11445 }, { "epoch": 0.7169545404719774, "grad_norm": 0.908701289584711, "learning_rate": 1.95743370102552e-06, "loss": 0.4163, "step": 11446 }, { "epoch": 0.7170171784713196, "grad_norm": 0.8431378748069619, "learning_rate": 1.9566287939748407e-06, "loss": 0.4083, "step": 11447 }, { "epoch": 0.7170798164706619, "grad_norm": 0.8431573361470804, "learning_rate": 1.9558240121898847e-06, "loss": 0.3955, "step": 11448 }, { "epoch": 0.7171424544700042, "grad_norm": 0.8383731369686466, "learning_rate": 1.9550193557037755e-06, "loss": 0.3577, "step": 11449 }, { "epoch": 0.7172050924693465, "grad_norm": 0.8046568060336687, "learning_rate": 1.9542148245496366e-06, "loss": 0.3868, "step": 11450 }, { "epoch": 0.7172677304686889, "grad_norm": 0.8404686434204288, "learning_rate": 1.9534104187605794e-06, "loss": 0.37, "step": 11451 }, { "epoch": 0.7173303684680311, "grad_norm": 0.596772441126336, "learning_rate": 1.952606138369717e-06, "loss": 0.4513, "step": 11452 }, { "epoch": 0.7173930064673735, "grad_norm": 1.0116702280408443, "learning_rate": 1.951801983410151e-06, "loss": 0.4137, "step": 11453 }, { "epoch": 0.7174556444667157, "grad_norm": 0.8973462809331181, "learning_rate": 1.950997953914981e-06, "loss": 0.4213, "step": 11454 }, { "epoch": 0.7175182824660581, "grad_norm": 0.8021907077943159, "learning_rate": 1.950194049917301e-06, "loss": 0.3615, "step": 11455 }, { "epoch": 0.7175809204654003, "grad_norm": 0.8391018606124357, "learning_rate": 1.9493902714502004e-06, "loss": 0.3767, "step": 11456 }, { "epoch": 0.7176435584647426, "grad_norm": 0.8146112610516942, "learning_rate": 1.948586618546763e-06, "loss": 0.4143, "step": 11457 }, { "epoch": 0.717706196464085, "grad_norm": 0.8784896385290132, "learning_rate": 1.9477830912400644e-06, "loss": 0.3983, "step": 11458 }, { "epoch": 0.7177688344634272, "grad_norm": 0.8102700709944469, "learning_rate": 1.9469796895631817e-06, "loss": 0.3194, "step": 11459 }, { "epoch": 0.7178314724627696, "grad_norm": 0.8835337676021063, "learning_rate": 1.94617641354918e-06, "loss": 0.3678, "step": 11460 }, { "epoch": 0.7178941104621118, "grad_norm": 0.8090161346502635, "learning_rate": 1.945373263231126e-06, "loss": 0.4185, "step": 11461 }, { "epoch": 0.7179567484614542, "grad_norm": 0.8634577151328899, "learning_rate": 1.944570238642075e-06, "loss": 0.4264, "step": 11462 }, { "epoch": 0.7180193864607964, "grad_norm": 0.8577963480573995, "learning_rate": 1.943767339815081e-06, "loss": 0.3773, "step": 11463 }, { "epoch": 0.7180820244601387, "grad_norm": 0.8436878411799618, "learning_rate": 1.94296456678319e-06, "loss": 0.3603, "step": 11464 }, { "epoch": 0.718144662459481, "grad_norm": 0.8088335814503315, "learning_rate": 1.9421619195794445e-06, "loss": 0.3896, "step": 11465 }, { "epoch": 0.7182073004588233, "grad_norm": 0.8095388734171065, "learning_rate": 1.941359398236882e-06, "loss": 0.386, "step": 11466 }, { "epoch": 0.7182699384581657, "grad_norm": 0.8592725972993882, "learning_rate": 1.940557002788533e-06, "loss": 0.3905, "step": 11467 }, { "epoch": 0.7183325764575079, "grad_norm": 0.8590766273484365, "learning_rate": 1.9397547332674253e-06, "loss": 0.391, "step": 11468 }, { "epoch": 0.7183952144568503, "grad_norm": 0.8832752572608855, "learning_rate": 1.938952589706583e-06, "loss": 0.3942, "step": 11469 }, { "epoch": 0.7184578524561925, "grad_norm": 0.8504510646667558, "learning_rate": 1.93815057213902e-06, "loss": 0.4085, "step": 11470 }, { "epoch": 0.7185204904555349, "grad_norm": 0.9308144827937689, "learning_rate": 1.937348680597748e-06, "loss": 0.4083, "step": 11471 }, { "epoch": 0.7185831284548772, "grad_norm": 0.589372422936036, "learning_rate": 1.9365469151157725e-06, "loss": 0.4437, "step": 11472 }, { "epoch": 0.7186457664542194, "grad_norm": 0.9424502972401706, "learning_rate": 1.935745275726094e-06, "loss": 0.408, "step": 11473 }, { "epoch": 0.7187084044535618, "grad_norm": 0.8502213609161442, "learning_rate": 1.9349437624617096e-06, "loss": 0.3946, "step": 11474 }, { "epoch": 0.718771042452904, "grad_norm": 0.9062266627931345, "learning_rate": 1.9341423753556072e-06, "loss": 0.3879, "step": 11475 }, { "epoch": 0.7188336804522464, "grad_norm": 0.9530254882306926, "learning_rate": 1.933341114440775e-06, "loss": 0.4006, "step": 11476 }, { "epoch": 0.7188963184515886, "grad_norm": 0.8448833072250396, "learning_rate": 1.93253997975019e-06, "loss": 0.424, "step": 11477 }, { "epoch": 0.718958956450931, "grad_norm": 0.8429112494751154, "learning_rate": 1.9317389713168306e-06, "loss": 0.3696, "step": 11478 }, { "epoch": 0.7190215944502732, "grad_norm": 0.7979377655864324, "learning_rate": 1.9309380891736638e-06, "loss": 0.3678, "step": 11479 }, { "epoch": 0.7190842324496156, "grad_norm": 0.8162333755302649, "learning_rate": 1.930137333353656e-06, "loss": 0.3422, "step": 11480 }, { "epoch": 0.7191468704489579, "grad_norm": 0.9425178070384164, "learning_rate": 1.9293367038897655e-06, "loss": 0.3782, "step": 11481 }, { "epoch": 0.7192095084483001, "grad_norm": 0.863308250257877, "learning_rate": 1.9285362008149455e-06, "loss": 0.4245, "step": 11482 }, { "epoch": 0.7192721464476425, "grad_norm": 0.8205419196714939, "learning_rate": 1.927735824162146e-06, "loss": 0.3574, "step": 11483 }, { "epoch": 0.7193347844469847, "grad_norm": 0.8853414769712296, "learning_rate": 1.926935573964308e-06, "loss": 0.4252, "step": 11484 }, { "epoch": 0.7193974224463271, "grad_norm": 0.6490218833634469, "learning_rate": 1.926135450254374e-06, "loss": 0.4325, "step": 11485 }, { "epoch": 0.7194600604456693, "grad_norm": 0.6593078903709182, "learning_rate": 1.925335453065274e-06, "loss": 0.43, "step": 11486 }, { "epoch": 0.7195226984450117, "grad_norm": 0.8160351024335553, "learning_rate": 1.9245355824299387e-06, "loss": 0.3645, "step": 11487 }, { "epoch": 0.719585336444354, "grad_norm": 0.9285099867632124, "learning_rate": 1.9237358383812903e-06, "loss": 0.3808, "step": 11488 }, { "epoch": 0.7196479744436962, "grad_norm": 0.8724501587133326, "learning_rate": 1.922936220952246e-06, "loss": 0.3628, "step": 11489 }, { "epoch": 0.7197106124430386, "grad_norm": 0.8724750128592061, "learning_rate": 1.9221367301757164e-06, "loss": 0.3826, "step": 11490 }, { "epoch": 0.7197732504423808, "grad_norm": 0.7917550571191599, "learning_rate": 1.9213373660846118e-06, "loss": 0.3899, "step": 11491 }, { "epoch": 0.7198358884417232, "grad_norm": 0.806249387084973, "learning_rate": 1.920538128711832e-06, "loss": 0.3948, "step": 11492 }, { "epoch": 0.7198985264410654, "grad_norm": 0.8356046478226828, "learning_rate": 1.9197390180902733e-06, "loss": 0.3837, "step": 11493 }, { "epoch": 0.7199611644404078, "grad_norm": 0.8013325687012477, "learning_rate": 1.9189400342528293e-06, "loss": 0.3649, "step": 11494 }, { "epoch": 0.7200238024397501, "grad_norm": 0.9317568483775337, "learning_rate": 1.918141177232384e-06, "loss": 0.4132, "step": 11495 }, { "epoch": 0.7200864404390924, "grad_norm": 0.8370349590181806, "learning_rate": 1.9173424470618213e-06, "loss": 0.3629, "step": 11496 }, { "epoch": 0.7201490784384347, "grad_norm": 0.8959812163300014, "learning_rate": 1.916543843774016e-06, "loss": 0.4067, "step": 11497 }, { "epoch": 0.7202117164377769, "grad_norm": 0.8306540050517824, "learning_rate": 1.915745367401838e-06, "loss": 0.3741, "step": 11498 }, { "epoch": 0.7202743544371193, "grad_norm": 0.8428474373467443, "learning_rate": 1.914947017978153e-06, "loss": 0.4153, "step": 11499 }, { "epoch": 0.7203369924364615, "grad_norm": 0.9127233676850921, "learning_rate": 1.914148795535822e-06, "loss": 0.361, "step": 11500 }, { "epoch": 0.7203996304358039, "grad_norm": 0.7955006020790325, "learning_rate": 1.9133507001076973e-06, "loss": 0.3705, "step": 11501 }, { "epoch": 0.7204622684351462, "grad_norm": 0.8815940496227525, "learning_rate": 1.912552731726632e-06, "loss": 0.425, "step": 11502 }, { "epoch": 0.7205249064344885, "grad_norm": 0.8061935557140069, "learning_rate": 1.9117548904254703e-06, "loss": 0.389, "step": 11503 }, { "epoch": 0.7205875444338308, "grad_norm": 0.8602831832247971, "learning_rate": 1.910957176237048e-06, "loss": 0.3989, "step": 11504 }, { "epoch": 0.7206501824331731, "grad_norm": 0.7978693577204943, "learning_rate": 1.9101595891942037e-06, "loss": 0.3998, "step": 11505 }, { "epoch": 0.7207128204325154, "grad_norm": 0.8154479049372412, "learning_rate": 1.9093621293297644e-06, "loss": 0.3667, "step": 11506 }, { "epoch": 0.7207754584318576, "grad_norm": 0.8288792296973001, "learning_rate": 1.9085647966765536e-06, "loss": 0.3767, "step": 11507 }, { "epoch": 0.7208380964312, "grad_norm": 0.8344054573646028, "learning_rate": 1.9077675912673893e-06, "loss": 0.4003, "step": 11508 }, { "epoch": 0.7209007344305423, "grad_norm": 0.8587497778040827, "learning_rate": 1.9069705131350846e-06, "loss": 0.3923, "step": 11509 }, { "epoch": 0.7209633724298846, "grad_norm": 0.6492900691994328, "learning_rate": 1.9061735623124462e-06, "loss": 0.4298, "step": 11510 }, { "epoch": 0.7210260104292269, "grad_norm": 0.8029524420103372, "learning_rate": 1.9053767388322796e-06, "loss": 0.3753, "step": 11511 }, { "epoch": 0.7210886484285692, "grad_norm": 0.9056190449502559, "learning_rate": 1.9045800427273815e-06, "loss": 0.4087, "step": 11512 }, { "epoch": 0.7211512864279115, "grad_norm": 0.8479757386959013, "learning_rate": 1.9037834740305411e-06, "loss": 0.398, "step": 11513 }, { "epoch": 0.7212139244272537, "grad_norm": 0.8949829315981095, "learning_rate": 1.9029870327745498e-06, "loss": 0.4261, "step": 11514 }, { "epoch": 0.7212765624265961, "grad_norm": 0.8534673105140321, "learning_rate": 1.9021907189921868e-06, "loss": 0.4219, "step": 11515 }, { "epoch": 0.7213392004259384, "grad_norm": 0.6495649735203132, "learning_rate": 1.9013945327162286e-06, "loss": 0.4545, "step": 11516 }, { "epoch": 0.7214018384252807, "grad_norm": 0.8795296657513703, "learning_rate": 1.900598473979447e-06, "loss": 0.4116, "step": 11517 }, { "epoch": 0.721464476424623, "grad_norm": 0.8140953137558902, "learning_rate": 1.8998025428146072e-06, "loss": 0.3678, "step": 11518 }, { "epoch": 0.7215271144239653, "grad_norm": 0.8696590713529976, "learning_rate": 1.8990067392544687e-06, "loss": 0.3655, "step": 11519 }, { "epoch": 0.7215897524233076, "grad_norm": 0.7650920670233144, "learning_rate": 1.8982110633317897e-06, "loss": 0.3755, "step": 11520 }, { "epoch": 0.72165239042265, "grad_norm": 0.8454765355302898, "learning_rate": 1.8974155150793188e-06, "loss": 0.3793, "step": 11521 }, { "epoch": 0.7217150284219922, "grad_norm": 0.8167713898088031, "learning_rate": 1.8966200945298003e-06, "loss": 0.359, "step": 11522 }, { "epoch": 0.7217776664213345, "grad_norm": 0.8284046954385099, "learning_rate": 1.8958248017159758e-06, "loss": 0.3626, "step": 11523 }, { "epoch": 0.7218403044206768, "grad_norm": 0.8175740832081264, "learning_rate": 1.8950296366705788e-06, "loss": 0.369, "step": 11524 }, { "epoch": 0.7219029424200191, "grad_norm": 0.8212561777149703, "learning_rate": 1.8942345994263384e-06, "loss": 0.3879, "step": 11525 }, { "epoch": 0.7219655804193614, "grad_norm": 0.8710844246068417, "learning_rate": 1.8934396900159786e-06, "loss": 0.3913, "step": 11526 }, { "epoch": 0.7220282184187037, "grad_norm": 0.8864178978123639, "learning_rate": 1.8926449084722153e-06, "loss": 0.3996, "step": 11527 }, { "epoch": 0.722090856418046, "grad_norm": 0.9041440653140244, "learning_rate": 1.8918502548277668e-06, "loss": 0.4181, "step": 11528 }, { "epoch": 0.7221534944173883, "grad_norm": 0.8131174006351102, "learning_rate": 1.8910557291153382e-06, "loss": 0.3741, "step": 11529 }, { "epoch": 0.7222161324167307, "grad_norm": 0.8108864519603487, "learning_rate": 1.8902613313676328e-06, "loss": 0.3477, "step": 11530 }, { "epoch": 0.7222787704160729, "grad_norm": 0.9743825205737506, "learning_rate": 1.8894670616173482e-06, "loss": 0.4233, "step": 11531 }, { "epoch": 0.7223414084154152, "grad_norm": 0.8985623285665918, "learning_rate": 1.8886729198971753e-06, "loss": 0.423, "step": 11532 }, { "epoch": 0.7224040464147575, "grad_norm": 0.905385921157256, "learning_rate": 1.887878906239804e-06, "loss": 0.4001, "step": 11533 }, { "epoch": 0.7224666844140998, "grad_norm": 0.8303658870123407, "learning_rate": 1.8870850206779145e-06, "loss": 0.3546, "step": 11534 }, { "epoch": 0.7225293224134421, "grad_norm": 0.8128278433037668, "learning_rate": 1.8862912632441837e-06, "loss": 0.3618, "step": 11535 }, { "epoch": 0.7225919604127844, "grad_norm": 0.7811717230957717, "learning_rate": 1.8854976339712806e-06, "loss": 0.3545, "step": 11536 }, { "epoch": 0.7226545984121268, "grad_norm": 0.8584645224820568, "learning_rate": 1.8847041328918748e-06, "loss": 0.4241, "step": 11537 }, { "epoch": 0.722717236411469, "grad_norm": 0.8489179918770334, "learning_rate": 1.8839107600386246e-06, "loss": 0.4229, "step": 11538 }, { "epoch": 0.7227798744108114, "grad_norm": 0.8232241524940306, "learning_rate": 1.8831175154441861e-06, "loss": 0.3883, "step": 11539 }, { "epoch": 0.7228425124101536, "grad_norm": 0.8922387852080396, "learning_rate": 1.8823243991412093e-06, "loss": 0.3975, "step": 11540 }, { "epoch": 0.7229051504094959, "grad_norm": 0.9374083386952053, "learning_rate": 1.8815314111623373e-06, "loss": 0.4343, "step": 11541 }, { "epoch": 0.7229677884088382, "grad_norm": 0.9274120169760124, "learning_rate": 1.8807385515402127e-06, "loss": 0.3969, "step": 11542 }, { "epoch": 0.7230304264081805, "grad_norm": 0.8293895193265317, "learning_rate": 1.8799458203074683e-06, "loss": 0.3671, "step": 11543 }, { "epoch": 0.7230930644075229, "grad_norm": 0.7934177586230541, "learning_rate": 1.8791532174967331e-06, "loss": 0.3831, "step": 11544 }, { "epoch": 0.7231557024068651, "grad_norm": 0.8419728600745029, "learning_rate": 1.8783607431406292e-06, "loss": 0.3811, "step": 11545 }, { "epoch": 0.7232183404062075, "grad_norm": 0.8359841043513674, "learning_rate": 1.8775683972717778e-06, "loss": 0.3513, "step": 11546 }, { "epoch": 0.7232809784055497, "grad_norm": 0.8194521060647252, "learning_rate": 1.8767761799227908e-06, "loss": 0.3905, "step": 11547 }, { "epoch": 0.723343616404892, "grad_norm": 0.8269666816331928, "learning_rate": 1.8759840911262762e-06, "loss": 0.3442, "step": 11548 }, { "epoch": 0.7234062544042343, "grad_norm": 0.9076790919495233, "learning_rate": 1.8751921309148363e-06, "loss": 0.3774, "step": 11549 }, { "epoch": 0.7234688924035766, "grad_norm": 0.8373402307061402, "learning_rate": 1.8744002993210663e-06, "loss": 0.3903, "step": 11550 }, { "epoch": 0.723531530402919, "grad_norm": 0.8589226078663483, "learning_rate": 1.873608596377562e-06, "loss": 0.4061, "step": 11551 }, { "epoch": 0.7235941684022612, "grad_norm": 0.8034849068718678, "learning_rate": 1.872817022116908e-06, "loss": 0.3894, "step": 11552 }, { "epoch": 0.7236568064016036, "grad_norm": 0.8295339811230732, "learning_rate": 1.8720255765716844e-06, "loss": 0.4074, "step": 11553 }, { "epoch": 0.7237194444009458, "grad_norm": 0.8830668590890735, "learning_rate": 1.8712342597744698e-06, "loss": 0.4187, "step": 11554 }, { "epoch": 0.7237820824002882, "grad_norm": 0.848756153091303, "learning_rate": 1.8704430717578342e-06, "loss": 0.3979, "step": 11555 }, { "epoch": 0.7238447203996304, "grad_norm": 0.8428489661620451, "learning_rate": 1.8696520125543427e-06, "loss": 0.3812, "step": 11556 }, { "epoch": 0.7239073583989727, "grad_norm": 0.9393338339662239, "learning_rate": 1.8688610821965553e-06, "loss": 0.411, "step": 11557 }, { "epoch": 0.723969996398315, "grad_norm": 0.8278678721629781, "learning_rate": 1.868070280717027e-06, "loss": 0.4, "step": 11558 }, { "epoch": 0.7240326343976573, "grad_norm": 0.6080548987287275, "learning_rate": 1.8672796081483051e-06, "loss": 0.4278, "step": 11559 }, { "epoch": 0.7240952723969997, "grad_norm": 0.8815366846492728, "learning_rate": 1.8664890645229384e-06, "loss": 0.3753, "step": 11560 }, { "epoch": 0.7241579103963419, "grad_norm": 0.8387375665737419, "learning_rate": 1.865698649873463e-06, "loss": 0.3885, "step": 11561 }, { "epoch": 0.7242205483956843, "grad_norm": 0.6370851530377826, "learning_rate": 1.8649083642324112e-06, "loss": 0.4237, "step": 11562 }, { "epoch": 0.7242831863950265, "grad_norm": 0.7789604858638645, "learning_rate": 1.864118207632315e-06, "loss": 0.3813, "step": 11563 }, { "epoch": 0.7243458243943689, "grad_norm": 0.8890011062275455, "learning_rate": 1.863328180105695e-06, "loss": 0.4317, "step": 11564 }, { "epoch": 0.7244084623937112, "grad_norm": 0.8438711548313749, "learning_rate": 1.8625382816850695e-06, "loss": 0.4141, "step": 11565 }, { "epoch": 0.7244711003930534, "grad_norm": 0.9479455441592307, "learning_rate": 1.861748512402951e-06, "loss": 0.4077, "step": 11566 }, { "epoch": 0.7245337383923958, "grad_norm": 0.8690370615909231, "learning_rate": 1.860958872291846e-06, "loss": 0.3911, "step": 11567 }, { "epoch": 0.724596376391738, "grad_norm": 0.8345970317508662, "learning_rate": 1.8601693613842547e-06, "loss": 0.4141, "step": 11568 }, { "epoch": 0.7246590143910804, "grad_norm": 0.8627231177886144, "learning_rate": 1.8593799797126766e-06, "loss": 0.3857, "step": 11569 }, { "epoch": 0.7247216523904226, "grad_norm": 0.8632526757463818, "learning_rate": 1.8585907273096022e-06, "loss": 0.4051, "step": 11570 }, { "epoch": 0.724784290389765, "grad_norm": 0.8485354343448515, "learning_rate": 1.8578016042075143e-06, "loss": 0.3856, "step": 11571 }, { "epoch": 0.7248469283891072, "grad_norm": 0.7685088392546271, "learning_rate": 1.8570126104388975e-06, "loss": 0.3797, "step": 11572 }, { "epoch": 0.7249095663884495, "grad_norm": 0.8581548290269924, "learning_rate": 1.8562237460362247e-06, "loss": 0.3972, "step": 11573 }, { "epoch": 0.7249722043877919, "grad_norm": 0.5841658173580331, "learning_rate": 1.8554350110319663e-06, "loss": 0.4397, "step": 11574 }, { "epoch": 0.7250348423871341, "grad_norm": 0.8421278587504324, "learning_rate": 1.8546464054585867e-06, "loss": 0.3831, "step": 11575 }, { "epoch": 0.7250974803864765, "grad_norm": 0.8093501374484341, "learning_rate": 1.8538579293485442e-06, "loss": 0.3598, "step": 11576 }, { "epoch": 0.7251601183858187, "grad_norm": 0.8056207762268858, "learning_rate": 1.8530695827342938e-06, "loss": 0.3551, "step": 11577 }, { "epoch": 0.7252227563851611, "grad_norm": 0.843324021396696, "learning_rate": 1.8522813656482818e-06, "loss": 0.399, "step": 11578 }, { "epoch": 0.7252853943845033, "grad_norm": 0.8261802030754893, "learning_rate": 1.8514932781229527e-06, "loss": 0.4048, "step": 11579 }, { "epoch": 0.7253480323838457, "grad_norm": 0.8197070457835945, "learning_rate": 1.8507053201907465e-06, "loss": 0.4424, "step": 11580 }, { "epoch": 0.725410670383188, "grad_norm": 0.6078657814880475, "learning_rate": 1.849917491884094e-06, "loss": 0.4416, "step": 11581 }, { "epoch": 0.7254733083825302, "grad_norm": 0.8401840294130575, "learning_rate": 1.8491297932354224e-06, "loss": 0.3572, "step": 11582 }, { "epoch": 0.7255359463818726, "grad_norm": 0.7936366815474412, "learning_rate": 1.8483422242771532e-06, "loss": 0.3742, "step": 11583 }, { "epoch": 0.7255985843812148, "grad_norm": 0.8576444537795179, "learning_rate": 1.8475547850417031e-06, "loss": 0.3695, "step": 11584 }, { "epoch": 0.7256612223805572, "grad_norm": 0.7858856078729853, "learning_rate": 1.8467674755614828e-06, "loss": 0.3208, "step": 11585 }, { "epoch": 0.7257238603798994, "grad_norm": 0.7628718822495226, "learning_rate": 1.8459802958688965e-06, "loss": 0.3942, "step": 11586 }, { "epoch": 0.7257864983792418, "grad_norm": 0.7983090309126889, "learning_rate": 1.845193245996349e-06, "loss": 0.3827, "step": 11587 }, { "epoch": 0.7258491363785841, "grad_norm": 0.9295360564916245, "learning_rate": 1.8444063259762314e-06, "loss": 0.4243, "step": 11588 }, { "epoch": 0.7259117743779264, "grad_norm": 0.845553789589911, "learning_rate": 1.8436195358409365e-06, "loss": 0.3897, "step": 11589 }, { "epoch": 0.7259744123772687, "grad_norm": 0.8862593768099601, "learning_rate": 1.8428328756228476e-06, "loss": 0.4607, "step": 11590 }, { "epoch": 0.7260370503766109, "grad_norm": 0.8371952714754041, "learning_rate": 1.842046345354343e-06, "loss": 0.3678, "step": 11591 }, { "epoch": 0.7260996883759533, "grad_norm": 0.7839763293349497, "learning_rate": 1.8412599450677975e-06, "loss": 0.4089, "step": 11592 }, { "epoch": 0.7261623263752955, "grad_norm": 0.5747511765715834, "learning_rate": 1.8404736747955787e-06, "loss": 0.4357, "step": 11593 }, { "epoch": 0.7262249643746379, "grad_norm": 0.8383959003780519, "learning_rate": 1.8396875345700498e-06, "loss": 0.3611, "step": 11594 }, { "epoch": 0.7262876023739802, "grad_norm": 0.7998382017531057, "learning_rate": 1.8389015244235674e-06, "loss": 0.3675, "step": 11595 }, { "epoch": 0.7263502403733225, "grad_norm": 0.8724650624187651, "learning_rate": 1.8381156443884862e-06, "loss": 0.4297, "step": 11596 }, { "epoch": 0.7264128783726648, "grad_norm": 0.9063330310198633, "learning_rate": 1.83732989449715e-06, "loss": 0.4164, "step": 11597 }, { "epoch": 0.726475516372007, "grad_norm": 0.8358069090342459, "learning_rate": 1.8365442747819046e-06, "loss": 0.3358, "step": 11598 }, { "epoch": 0.7265381543713494, "grad_norm": 0.9686100252110155, "learning_rate": 1.835758785275084e-06, "loss": 0.3972, "step": 11599 }, { "epoch": 0.7266007923706916, "grad_norm": 0.830922491620201, "learning_rate": 1.8349734260090192e-06, "loss": 0.3829, "step": 11600 }, { "epoch": 0.726663430370034, "grad_norm": 0.9001166877413397, "learning_rate": 1.8341881970160352e-06, "loss": 0.4118, "step": 11601 }, { "epoch": 0.7267260683693763, "grad_norm": 0.8855850651959551, "learning_rate": 1.8334030983284528e-06, "loss": 0.3489, "step": 11602 }, { "epoch": 0.7267887063687186, "grad_norm": 0.8893476084999701, "learning_rate": 1.8326181299785867e-06, "loss": 0.4143, "step": 11603 }, { "epoch": 0.7268513443680609, "grad_norm": 0.865938207760662, "learning_rate": 1.831833291998744e-06, "loss": 0.3619, "step": 11604 }, { "epoch": 0.7269139823674032, "grad_norm": 0.7965119324998581, "learning_rate": 1.8310485844212316e-06, "loss": 0.3907, "step": 11605 }, { "epoch": 0.7269766203667455, "grad_norm": 0.8119465902810905, "learning_rate": 1.8302640072783495e-06, "loss": 0.3522, "step": 11606 }, { "epoch": 0.7270392583660877, "grad_norm": 0.8916018163427769, "learning_rate": 1.829479560602389e-06, "loss": 0.4237, "step": 11607 }, { "epoch": 0.7271018963654301, "grad_norm": 0.5721348776962013, "learning_rate": 1.8286952444256384e-06, "loss": 0.4489, "step": 11608 }, { "epoch": 0.7271645343647724, "grad_norm": 0.8306188041985345, "learning_rate": 1.82791105878038e-06, "loss": 0.3899, "step": 11609 }, { "epoch": 0.7272271723641147, "grad_norm": 0.8882668730236837, "learning_rate": 1.8271270036988914e-06, "loss": 0.4008, "step": 11610 }, { "epoch": 0.727289810363457, "grad_norm": 0.8315356810320012, "learning_rate": 1.826343079213444e-06, "loss": 0.3857, "step": 11611 }, { "epoch": 0.7273524483627993, "grad_norm": 0.8754419425709976, "learning_rate": 1.8255592853563027e-06, "loss": 0.4046, "step": 11612 }, { "epoch": 0.7274150863621416, "grad_norm": 0.8369478450911685, "learning_rate": 1.8247756221597324e-06, "loss": 0.3567, "step": 11613 }, { "epoch": 0.727477724361484, "grad_norm": 0.8579502340172138, "learning_rate": 1.8239920896559866e-06, "loss": 0.4033, "step": 11614 }, { "epoch": 0.7275403623608262, "grad_norm": 0.8252353075110065, "learning_rate": 1.8232086878773142e-06, "loss": 0.3791, "step": 11615 }, { "epoch": 0.7276030003601685, "grad_norm": 0.8184518183336933, "learning_rate": 1.8224254168559636e-06, "loss": 0.4099, "step": 11616 }, { "epoch": 0.7276656383595108, "grad_norm": 0.8062776743201024, "learning_rate": 1.8216422766241725e-06, "loss": 0.4155, "step": 11617 }, { "epoch": 0.7277282763588531, "grad_norm": 0.8261265063956978, "learning_rate": 1.8208592672141756e-06, "loss": 0.3932, "step": 11618 }, { "epoch": 0.7277909143581954, "grad_norm": 0.8529836244555212, "learning_rate": 1.8200763886582012e-06, "loss": 0.3886, "step": 11619 }, { "epoch": 0.7278535523575377, "grad_norm": 0.7769068681847423, "learning_rate": 1.8192936409884725e-06, "loss": 0.3947, "step": 11620 }, { "epoch": 0.72791619035688, "grad_norm": 0.9150644665231674, "learning_rate": 1.8185110242372067e-06, "loss": 0.4327, "step": 11621 }, { "epoch": 0.7279788283562223, "grad_norm": 0.860247537465197, "learning_rate": 1.817728538436619e-06, "loss": 0.4297, "step": 11622 }, { "epoch": 0.7280414663555645, "grad_norm": 0.8694836392803998, "learning_rate": 1.8169461836189156e-06, "loss": 0.4128, "step": 11623 }, { "epoch": 0.7281041043549069, "grad_norm": 0.824258303173113, "learning_rate": 1.8161639598162961e-06, "loss": 0.4051, "step": 11624 }, { "epoch": 0.7281667423542492, "grad_norm": 0.801891482666869, "learning_rate": 1.8153818670609607e-06, "loss": 0.3575, "step": 11625 }, { "epoch": 0.7282293803535915, "grad_norm": 0.7740201755855333, "learning_rate": 1.8145999053850994e-06, "loss": 0.3384, "step": 11626 }, { "epoch": 0.7282920183529338, "grad_norm": 0.8238406314551925, "learning_rate": 1.813818074820896e-06, "loss": 0.3634, "step": 11627 }, { "epoch": 0.7283546563522761, "grad_norm": 0.8343582893013708, "learning_rate": 1.813036375400533e-06, "loss": 0.3585, "step": 11628 }, { "epoch": 0.7284172943516184, "grad_norm": 0.8749194598595453, "learning_rate": 1.812254807156184e-06, "loss": 0.3879, "step": 11629 }, { "epoch": 0.7284799323509608, "grad_norm": 0.8015800514399987, "learning_rate": 1.811473370120017e-06, "loss": 0.3739, "step": 11630 }, { "epoch": 0.728542570350303, "grad_norm": 0.8745388941274268, "learning_rate": 1.8106920643241998e-06, "loss": 0.4347, "step": 11631 }, { "epoch": 0.7286052083496453, "grad_norm": 0.9125083015359058, "learning_rate": 1.8099108898008894e-06, "loss": 0.4091, "step": 11632 }, { "epoch": 0.7286678463489876, "grad_norm": 0.8008233700778663, "learning_rate": 1.8091298465822365e-06, "loss": 0.3677, "step": 11633 }, { "epoch": 0.7287304843483299, "grad_norm": 0.8632263774546691, "learning_rate": 1.8083489347003941e-06, "loss": 0.3648, "step": 11634 }, { "epoch": 0.7287931223476722, "grad_norm": 0.8308987906476623, "learning_rate": 1.8075681541875013e-06, "loss": 0.3614, "step": 11635 }, { "epoch": 0.7288557603470145, "grad_norm": 0.8246014926186072, "learning_rate": 1.8067875050756956e-06, "loss": 0.3567, "step": 11636 }, { "epoch": 0.7289183983463569, "grad_norm": 0.9157858696674444, "learning_rate": 1.8060069873971091e-06, "loss": 0.4405, "step": 11637 }, { "epoch": 0.7289810363456991, "grad_norm": 0.8305103596732755, "learning_rate": 1.805226601183866e-06, "loss": 0.3684, "step": 11638 }, { "epoch": 0.7290436743450415, "grad_norm": 0.9221113468744381, "learning_rate": 1.8044463464680912e-06, "loss": 0.4423, "step": 11639 }, { "epoch": 0.7291063123443837, "grad_norm": 0.8687393640498731, "learning_rate": 1.8036662232818974e-06, "loss": 0.412, "step": 11640 }, { "epoch": 0.729168950343726, "grad_norm": 0.9221301952145349, "learning_rate": 1.8028862316573952e-06, "loss": 0.4102, "step": 11641 }, { "epoch": 0.7292315883430683, "grad_norm": 0.9233899532290715, "learning_rate": 1.8021063716266873e-06, "loss": 0.3935, "step": 11642 }, { "epoch": 0.7292942263424106, "grad_norm": 0.7840301522684845, "learning_rate": 1.8013266432218767e-06, "loss": 0.3858, "step": 11643 }, { "epoch": 0.729356864341753, "grad_norm": 0.6162146893584046, "learning_rate": 1.8005470464750552e-06, "loss": 0.4719, "step": 11644 }, { "epoch": 0.7294195023410952, "grad_norm": 0.8144806723881514, "learning_rate": 1.7997675814183108e-06, "loss": 0.3637, "step": 11645 }, { "epoch": 0.7294821403404376, "grad_norm": 0.8297578793050774, "learning_rate": 1.7989882480837267e-06, "loss": 0.3715, "step": 11646 }, { "epoch": 0.7295447783397798, "grad_norm": 0.8643221580632899, "learning_rate": 1.7982090465033792e-06, "loss": 0.3802, "step": 11647 }, { "epoch": 0.7296074163391222, "grad_norm": 0.8818049569771758, "learning_rate": 1.7974299767093433e-06, "loss": 0.3704, "step": 11648 }, { "epoch": 0.7296700543384644, "grad_norm": 0.8670598389880269, "learning_rate": 1.7966510387336834e-06, "loss": 0.4282, "step": 11649 }, { "epoch": 0.7297326923378067, "grad_norm": 0.8575107428799013, "learning_rate": 1.7958722326084616e-06, "loss": 0.3736, "step": 11650 }, { "epoch": 0.729795330337149, "grad_norm": 0.6390048142315615, "learning_rate": 1.795093558365732e-06, "loss": 0.4449, "step": 11651 }, { "epoch": 0.7298579683364913, "grad_norm": 0.9325939267719561, "learning_rate": 1.7943150160375477e-06, "loss": 0.4427, "step": 11652 }, { "epoch": 0.7299206063358337, "grad_norm": 0.920323926280183, "learning_rate": 1.7935366056559529e-06, "loss": 0.436, "step": 11653 }, { "epoch": 0.7299832443351759, "grad_norm": 0.8637694142047729, "learning_rate": 1.7927583272529858e-06, "loss": 0.3875, "step": 11654 }, { "epoch": 0.7300458823345183, "grad_norm": 0.9011027717760662, "learning_rate": 1.7919801808606819e-06, "loss": 0.3925, "step": 11655 }, { "epoch": 0.7301085203338605, "grad_norm": 0.8403223677900985, "learning_rate": 1.7912021665110674e-06, "loss": 0.4113, "step": 11656 }, { "epoch": 0.7301711583332028, "grad_norm": 0.7933267675159567, "learning_rate": 1.7904242842361685e-06, "loss": 0.3936, "step": 11657 }, { "epoch": 0.7302337963325451, "grad_norm": 0.8399792037835626, "learning_rate": 1.789646534068002e-06, "loss": 0.3928, "step": 11658 }, { "epoch": 0.7302964343318874, "grad_norm": 0.7805100728523086, "learning_rate": 1.7888689160385803e-06, "loss": 0.3655, "step": 11659 }, { "epoch": 0.7303590723312298, "grad_norm": 0.8274531309441931, "learning_rate": 1.7880914301799101e-06, "loss": 0.3584, "step": 11660 }, { "epoch": 0.730421710330572, "grad_norm": 0.9111621442329982, "learning_rate": 1.7873140765239905e-06, "loss": 0.3719, "step": 11661 }, { "epoch": 0.7304843483299144, "grad_norm": 0.8241040192802938, "learning_rate": 1.7865368551028223e-06, "loss": 0.3858, "step": 11662 }, { "epoch": 0.7305469863292566, "grad_norm": 0.8744389515366764, "learning_rate": 1.7857597659483927e-06, "loss": 0.3905, "step": 11663 }, { "epoch": 0.730609624328599, "grad_norm": 1.0228310489429735, "learning_rate": 1.7849828090926869e-06, "loss": 0.4211, "step": 11664 }, { "epoch": 0.7306722623279412, "grad_norm": 0.8206668391736842, "learning_rate": 1.7842059845676862e-06, "loss": 0.3764, "step": 11665 }, { "epoch": 0.7307349003272835, "grad_norm": 0.8293066981381717, "learning_rate": 1.7834292924053649e-06, "loss": 0.3916, "step": 11666 }, { "epoch": 0.7307975383266259, "grad_norm": 0.888041260474085, "learning_rate": 1.7826527326376908e-06, "loss": 0.396, "step": 11667 }, { "epoch": 0.7308601763259681, "grad_norm": 0.829244337900466, "learning_rate": 1.781876305296627e-06, "loss": 0.3707, "step": 11668 }, { "epoch": 0.7309228143253105, "grad_norm": 0.871302605157666, "learning_rate": 1.7811000104141318e-06, "loss": 0.386, "step": 11669 }, { "epoch": 0.7309854523246527, "grad_norm": 0.8750785471978336, "learning_rate": 1.7803238480221563e-06, "loss": 0.423, "step": 11670 }, { "epoch": 0.7310480903239951, "grad_norm": 0.9078770229447739, "learning_rate": 1.7795478181526505e-06, "loss": 0.3917, "step": 11671 }, { "epoch": 0.7311107283233373, "grad_norm": 0.811434402506633, "learning_rate": 1.7787719208375536e-06, "loss": 0.3747, "step": 11672 }, { "epoch": 0.7311733663226797, "grad_norm": 0.8396826879701155, "learning_rate": 1.7779961561088016e-06, "loss": 0.3836, "step": 11673 }, { "epoch": 0.731236004322022, "grad_norm": 0.823390689218042, "learning_rate": 1.777220523998327e-06, "loss": 0.3679, "step": 11674 }, { "epoch": 0.7312986423213642, "grad_norm": 0.8041324841678839, "learning_rate": 1.7764450245380539e-06, "loss": 0.4101, "step": 11675 }, { "epoch": 0.7313612803207066, "grad_norm": 0.8745814735725665, "learning_rate": 1.7756696577599014e-06, "loss": 0.4075, "step": 11676 }, { "epoch": 0.7314239183200488, "grad_norm": 0.919457117719914, "learning_rate": 1.774894423695785e-06, "loss": 0.3903, "step": 11677 }, { "epoch": 0.7314865563193912, "grad_norm": 0.8462444494655559, "learning_rate": 1.7741193223776127e-06, "loss": 0.3581, "step": 11678 }, { "epoch": 0.7315491943187334, "grad_norm": 0.9294772902472179, "learning_rate": 1.7733443538372858e-06, "loss": 0.419, "step": 11679 }, { "epoch": 0.7316118323180758, "grad_norm": 0.9390269390222808, "learning_rate": 1.772569518106706e-06, "loss": 0.389, "step": 11680 }, { "epoch": 0.7316744703174181, "grad_norm": 0.9223503354936649, "learning_rate": 1.7717948152177638e-06, "loss": 0.3679, "step": 11681 }, { "epoch": 0.7317371083167603, "grad_norm": 0.8653334652221994, "learning_rate": 1.7710202452023444e-06, "loss": 0.4118, "step": 11682 }, { "epoch": 0.7317997463161027, "grad_norm": 0.8663799730849773, "learning_rate": 1.7702458080923329e-06, "loss": 0.3897, "step": 11683 }, { "epoch": 0.7318623843154449, "grad_norm": 0.8475195702811681, "learning_rate": 1.7694715039196032e-06, "loss": 0.3889, "step": 11684 }, { "epoch": 0.7319250223147873, "grad_norm": 0.831466129459279, "learning_rate": 1.7686973327160261e-06, "loss": 0.3715, "step": 11685 }, { "epoch": 0.7319876603141295, "grad_norm": 0.9294735639590042, "learning_rate": 1.7679232945134668e-06, "loss": 0.419, "step": 11686 }, { "epoch": 0.7320502983134719, "grad_norm": 0.8787576199332603, "learning_rate": 1.7671493893437847e-06, "loss": 0.4062, "step": 11687 }, { "epoch": 0.7321129363128142, "grad_norm": 0.8452148666844048, "learning_rate": 1.7663756172388318e-06, "loss": 0.3434, "step": 11688 }, { "epoch": 0.7321755743121565, "grad_norm": 0.893393618877138, "learning_rate": 1.7656019782304602e-06, "loss": 0.3829, "step": 11689 }, { "epoch": 0.7322382123114988, "grad_norm": 0.8121257464320836, "learning_rate": 1.7648284723505098e-06, "loss": 0.3565, "step": 11690 }, { "epoch": 0.732300850310841, "grad_norm": 0.9249338303580062, "learning_rate": 1.764055099630822e-06, "loss": 0.3991, "step": 11691 }, { "epoch": 0.7323634883101834, "grad_norm": 0.8784266832630947, "learning_rate": 1.763281860103227e-06, "loss": 0.4187, "step": 11692 }, { "epoch": 0.7324261263095256, "grad_norm": 0.9071341372320457, "learning_rate": 1.7625087537995512e-06, "loss": 0.3964, "step": 11693 }, { "epoch": 0.732488764308868, "grad_norm": 0.8672218406664063, "learning_rate": 1.7617357807516162e-06, "loss": 0.3874, "step": 11694 }, { "epoch": 0.7325514023082103, "grad_norm": 0.9452802315216748, "learning_rate": 1.7609629409912371e-06, "loss": 0.3973, "step": 11695 }, { "epoch": 0.7326140403075526, "grad_norm": 0.5619930271544324, "learning_rate": 1.7601902345502248e-06, "loss": 0.4324, "step": 11696 }, { "epoch": 0.7326766783068949, "grad_norm": 0.8126362080806508, "learning_rate": 1.759417661460382e-06, "loss": 0.402, "step": 11697 }, { "epoch": 0.7327393163062372, "grad_norm": 0.8145201055443283, "learning_rate": 1.7586452217535116e-06, "loss": 0.3369, "step": 11698 }, { "epoch": 0.7328019543055795, "grad_norm": 0.9348413149317552, "learning_rate": 1.7578729154614032e-06, "loss": 0.3976, "step": 11699 }, { "epoch": 0.7328645923049217, "grad_norm": 0.8484552414071665, "learning_rate": 1.75710074261585e-06, "loss": 0.3583, "step": 11700 }, { "epoch": 0.7329272303042641, "grad_norm": 0.8172037039201933, "learning_rate": 1.7563287032486316e-06, "loss": 0.3953, "step": 11701 }, { "epoch": 0.7329898683036064, "grad_norm": 0.7902367837461853, "learning_rate": 1.7555567973915256e-06, "loss": 0.3927, "step": 11702 }, { "epoch": 0.7330525063029487, "grad_norm": 0.8505798816971754, "learning_rate": 1.7547850250763042e-06, "loss": 0.4144, "step": 11703 }, { "epoch": 0.733115144302291, "grad_norm": 0.8211427218286801, "learning_rate": 1.7540133863347342e-06, "loss": 0.3711, "step": 11704 }, { "epoch": 0.7331777823016333, "grad_norm": 0.8149989005455122, "learning_rate": 1.7532418811985746e-06, "loss": 0.3633, "step": 11705 }, { "epoch": 0.7332404203009756, "grad_norm": 0.8748539525669868, "learning_rate": 1.7524705096995808e-06, "loss": 0.4132, "step": 11706 }, { "epoch": 0.7333030583003178, "grad_norm": 0.8527397492362347, "learning_rate": 1.7516992718695036e-06, "loss": 0.3899, "step": 11707 }, { "epoch": 0.7333656962996602, "grad_norm": 0.8298736553534817, "learning_rate": 1.7509281677400885e-06, "loss": 0.3599, "step": 11708 }, { "epoch": 0.7334283342990024, "grad_norm": 0.8757712957992435, "learning_rate": 1.7501571973430736e-06, "loss": 0.355, "step": 11709 }, { "epoch": 0.7334909722983448, "grad_norm": 0.914713962961841, "learning_rate": 1.7493863607101914e-06, "loss": 0.4165, "step": 11710 }, { "epoch": 0.7335536102976871, "grad_norm": 0.8327027744282908, "learning_rate": 1.74861565787317e-06, "loss": 0.355, "step": 11711 }, { "epoch": 0.7336162482970294, "grad_norm": 0.8612865120281747, "learning_rate": 1.7478450888637317e-06, "loss": 0.3833, "step": 11712 }, { "epoch": 0.7336788862963717, "grad_norm": 0.8781669697156335, "learning_rate": 1.7470746537135929e-06, "loss": 0.3785, "step": 11713 }, { "epoch": 0.733741524295714, "grad_norm": 0.7952540971571515, "learning_rate": 1.7463043524544654e-06, "loss": 0.379, "step": 11714 }, { "epoch": 0.7338041622950563, "grad_norm": 0.8687650902623548, "learning_rate": 1.7455341851180523e-06, "loss": 0.3914, "step": 11715 }, { "epoch": 0.7338668002943985, "grad_norm": 0.8657720616492501, "learning_rate": 1.744764151736057e-06, "loss": 0.3876, "step": 11716 }, { "epoch": 0.7339294382937409, "grad_norm": 0.8794410807791561, "learning_rate": 1.7439942523401748e-06, "loss": 0.3979, "step": 11717 }, { "epoch": 0.7339920762930832, "grad_norm": 0.8889105831696783, "learning_rate": 1.7432244869620935e-06, "loss": 0.4168, "step": 11718 }, { "epoch": 0.7340547142924255, "grad_norm": 0.8863785040222056, "learning_rate": 1.7424548556334969e-06, "loss": 0.4003, "step": 11719 }, { "epoch": 0.7341173522917678, "grad_norm": 0.9021765394421237, "learning_rate": 1.7416853583860632e-06, "loss": 0.4143, "step": 11720 }, { "epoch": 0.7341799902911101, "grad_norm": 0.9119264958827329, "learning_rate": 1.7409159952514648e-06, "loss": 0.3842, "step": 11721 }, { "epoch": 0.7342426282904524, "grad_norm": 0.7892037216886343, "learning_rate": 1.7401467662613686e-06, "loss": 0.3779, "step": 11722 }, { "epoch": 0.7343052662897948, "grad_norm": 0.862071203126577, "learning_rate": 1.7393776714474352e-06, "loss": 0.383, "step": 11723 }, { "epoch": 0.734367904289137, "grad_norm": 0.9399202076051036, "learning_rate": 1.7386087108413242e-06, "loss": 0.4448, "step": 11724 }, { "epoch": 0.7344305422884793, "grad_norm": 0.8434181528658055, "learning_rate": 1.7378398844746812e-06, "loss": 0.3832, "step": 11725 }, { "epoch": 0.7344931802878216, "grad_norm": 0.7929734706936268, "learning_rate": 1.7370711923791567e-06, "loss": 0.3612, "step": 11726 }, { "epoch": 0.7345558182871639, "grad_norm": 0.8515851927591271, "learning_rate": 1.736302634586387e-06, "loss": 0.4149, "step": 11727 }, { "epoch": 0.7346184562865062, "grad_norm": 0.8552023633405632, "learning_rate": 1.735534211128007e-06, "loss": 0.3951, "step": 11728 }, { "epoch": 0.7346810942858485, "grad_norm": 0.8845055041587205, "learning_rate": 1.7347659220356444e-06, "loss": 0.3827, "step": 11729 }, { "epoch": 0.7347437322851909, "grad_norm": 0.9040422478711307, "learning_rate": 1.733997767340923e-06, "loss": 0.3768, "step": 11730 }, { "epoch": 0.7348063702845331, "grad_norm": 0.8294674605161346, "learning_rate": 1.7332297470754594e-06, "loss": 0.3924, "step": 11731 }, { "epoch": 0.7348690082838754, "grad_norm": 0.7717650179707483, "learning_rate": 1.7324618612708637e-06, "loss": 0.3525, "step": 11732 }, { "epoch": 0.7349316462832177, "grad_norm": 0.8419905180335207, "learning_rate": 1.7316941099587464e-06, "loss": 0.4261, "step": 11733 }, { "epoch": 0.73499428428256, "grad_norm": 0.8930532496794235, "learning_rate": 1.7309264931707048e-06, "loss": 0.4012, "step": 11734 }, { "epoch": 0.7350569222819023, "grad_norm": 0.8205045317301592, "learning_rate": 1.7301590109383365e-06, "loss": 0.3614, "step": 11735 }, { "epoch": 0.7351195602812446, "grad_norm": 0.8053875916176979, "learning_rate": 1.7293916632932306e-06, "loss": 0.3846, "step": 11736 }, { "epoch": 0.735182198280587, "grad_norm": 0.8769834213196138, "learning_rate": 1.7286244502669708e-06, "loss": 0.3757, "step": 11737 }, { "epoch": 0.7352448362799292, "grad_norm": 0.8931177216703691, "learning_rate": 1.7278573718911363e-06, "loss": 0.3762, "step": 11738 }, { "epoch": 0.7353074742792716, "grad_norm": 0.8098157623368545, "learning_rate": 1.727090428197299e-06, "loss": 0.3616, "step": 11739 }, { "epoch": 0.7353701122786138, "grad_norm": 0.8799611655749997, "learning_rate": 1.7263236192170273e-06, "loss": 0.3674, "step": 11740 }, { "epoch": 0.7354327502779561, "grad_norm": 0.8684804663788455, "learning_rate": 1.7255569449818816e-06, "loss": 0.388, "step": 11741 }, { "epoch": 0.7354953882772984, "grad_norm": 0.8451072723357946, "learning_rate": 1.7247904055234215e-06, "loss": 0.4045, "step": 11742 }, { "epoch": 0.7355580262766407, "grad_norm": 0.8416221343490853, "learning_rate": 1.724024000873194e-06, "loss": 0.376, "step": 11743 }, { "epoch": 0.735620664275983, "grad_norm": 0.864396114461225, "learning_rate": 1.723257731062749e-06, "loss": 0.3522, "step": 11744 }, { "epoch": 0.7356833022753253, "grad_norm": 0.8248973014900216, "learning_rate": 1.7224915961236239e-06, "loss": 0.3896, "step": 11745 }, { "epoch": 0.7357459402746677, "grad_norm": 0.8027073212810744, "learning_rate": 1.721725596087353e-06, "loss": 0.3356, "step": 11746 }, { "epoch": 0.7358085782740099, "grad_norm": 0.8485643816620712, "learning_rate": 1.7209597309854648e-06, "loss": 0.381, "step": 11747 }, { "epoch": 0.7358712162733523, "grad_norm": 0.8679015425080536, "learning_rate": 1.7201940008494827e-06, "loss": 0.4223, "step": 11748 }, { "epoch": 0.7359338542726945, "grad_norm": 0.8777241291406083, "learning_rate": 1.7194284057109228e-06, "loss": 0.3906, "step": 11749 }, { "epoch": 0.7359964922720368, "grad_norm": 0.8761097818535972, "learning_rate": 1.7186629456013003e-06, "loss": 0.405, "step": 11750 }, { "epoch": 0.7360591302713791, "grad_norm": 0.7729760752499669, "learning_rate": 1.7178976205521197e-06, "loss": 0.3635, "step": 11751 }, { "epoch": 0.7361217682707214, "grad_norm": 0.8133615243324831, "learning_rate": 1.7171324305948828e-06, "loss": 0.4, "step": 11752 }, { "epoch": 0.7361844062700638, "grad_norm": 0.8693737951340367, "learning_rate": 1.716367375761082e-06, "loss": 0.3793, "step": 11753 }, { "epoch": 0.736247044269406, "grad_norm": 0.8585713206228857, "learning_rate": 1.7156024560822115e-06, "loss": 0.4285, "step": 11754 }, { "epoch": 0.7363096822687484, "grad_norm": 0.8965390245695601, "learning_rate": 1.7148376715897535e-06, "loss": 0.3526, "step": 11755 }, { "epoch": 0.7363723202680906, "grad_norm": 0.8513703516343105, "learning_rate": 1.7140730223151869e-06, "loss": 0.4167, "step": 11756 }, { "epoch": 0.736434958267433, "grad_norm": 0.8179547627816905, "learning_rate": 1.7133085082899842e-06, "loss": 0.3795, "step": 11757 }, { "epoch": 0.7364975962667752, "grad_norm": 0.7592482360781884, "learning_rate": 1.7125441295456118e-06, "loss": 0.3203, "step": 11758 }, { "epoch": 0.7365602342661175, "grad_norm": 0.9076140923092615, "learning_rate": 1.7117798861135354e-06, "loss": 0.4222, "step": 11759 }, { "epoch": 0.7366228722654599, "grad_norm": 0.6155504468468321, "learning_rate": 1.7110157780252084e-06, "loss": 0.4357, "step": 11760 }, { "epoch": 0.7366855102648021, "grad_norm": 0.8572861612901348, "learning_rate": 1.710251805312083e-06, "loss": 0.3768, "step": 11761 }, { "epoch": 0.7367481482641445, "grad_norm": 0.8371013477167776, "learning_rate": 1.709487968005602e-06, "loss": 0.3348, "step": 11762 }, { "epoch": 0.7368107862634867, "grad_norm": 0.8300476022897114, "learning_rate": 1.708724266137209e-06, "loss": 0.3636, "step": 11763 }, { "epoch": 0.7368734242628291, "grad_norm": 0.8960396183080886, "learning_rate": 1.7079606997383358e-06, "loss": 0.4093, "step": 11764 }, { "epoch": 0.7369360622621713, "grad_norm": 0.8462346275125134, "learning_rate": 1.7071972688404115e-06, "loss": 0.3882, "step": 11765 }, { "epoch": 0.7369987002615136, "grad_norm": 0.9558666923004508, "learning_rate": 1.706433973474858e-06, "loss": 0.3874, "step": 11766 }, { "epoch": 0.737061338260856, "grad_norm": 0.839487041089926, "learning_rate": 1.7056708136730925e-06, "loss": 0.3792, "step": 11767 }, { "epoch": 0.7371239762601982, "grad_norm": 0.8857246607012824, "learning_rate": 1.7049077894665294e-06, "loss": 0.3798, "step": 11768 }, { "epoch": 0.7371866142595406, "grad_norm": 0.878869143740285, "learning_rate": 1.7041449008865728e-06, "loss": 0.3947, "step": 11769 }, { "epoch": 0.7372492522588828, "grad_norm": 0.861859092469052, "learning_rate": 1.7033821479646246e-06, "loss": 0.3906, "step": 11770 }, { "epoch": 0.7373118902582252, "grad_norm": 0.6113748382375664, "learning_rate": 1.702619530732077e-06, "loss": 0.4827, "step": 11771 }, { "epoch": 0.7373745282575674, "grad_norm": 0.8714957763553534, "learning_rate": 1.701857049220323e-06, "loss": 0.406, "step": 11772 }, { "epoch": 0.7374371662569098, "grad_norm": 0.8626818847770524, "learning_rate": 1.7010947034607451e-06, "loss": 0.3943, "step": 11773 }, { "epoch": 0.7374998042562521, "grad_norm": 0.8453622054621688, "learning_rate": 1.7003324934847215e-06, "loss": 0.3584, "step": 11774 }, { "epoch": 0.7375624422555943, "grad_norm": 0.8012928159323418, "learning_rate": 1.6995704193236235e-06, "loss": 0.3672, "step": 11775 }, { "epoch": 0.7376250802549367, "grad_norm": 0.9595840006514634, "learning_rate": 1.6988084810088213e-06, "loss": 0.4161, "step": 11776 }, { "epoch": 0.7376877182542789, "grad_norm": 0.8783492453751516, "learning_rate": 1.6980466785716742e-06, "loss": 0.4029, "step": 11777 }, { "epoch": 0.7377503562536213, "grad_norm": 0.8947081034486641, "learning_rate": 1.697285012043539e-06, "loss": 0.4392, "step": 11778 }, { "epoch": 0.7378129942529635, "grad_norm": 0.8930420610346381, "learning_rate": 1.6965234814557663e-06, "loss": 0.3881, "step": 11779 }, { "epoch": 0.7378756322523059, "grad_norm": 0.6061776176051027, "learning_rate": 1.6957620868396979e-06, "loss": 0.4517, "step": 11780 }, { "epoch": 0.7379382702516482, "grad_norm": 0.8263485725757382, "learning_rate": 1.6950008282266771e-06, "loss": 0.4095, "step": 11781 }, { "epoch": 0.7380009082509905, "grad_norm": 0.9496994825950164, "learning_rate": 1.6942397056480358e-06, "loss": 0.4474, "step": 11782 }, { "epoch": 0.7380635462503328, "grad_norm": 0.8340295000729387, "learning_rate": 1.6934787191351015e-06, "loss": 0.4192, "step": 11783 }, { "epoch": 0.738126184249675, "grad_norm": 0.8436066664129119, "learning_rate": 1.6927178687191953e-06, "loss": 0.3877, "step": 11784 }, { "epoch": 0.7381888222490174, "grad_norm": 0.9104914306471628, "learning_rate": 1.6919571544316377e-06, "loss": 0.3488, "step": 11785 }, { "epoch": 0.7382514602483596, "grad_norm": 0.8107973448712952, "learning_rate": 1.691196576303737e-06, "loss": 0.3607, "step": 11786 }, { "epoch": 0.738314098247702, "grad_norm": 0.8346553759293002, "learning_rate": 1.6904361343667997e-06, "loss": 0.3568, "step": 11787 }, { "epoch": 0.7383767362470443, "grad_norm": 0.8060257869551308, "learning_rate": 1.6896758286521248e-06, "loss": 0.3559, "step": 11788 }, { "epoch": 0.7384393742463866, "grad_norm": 0.8409859766560969, "learning_rate": 1.6889156591910061e-06, "loss": 0.3864, "step": 11789 }, { "epoch": 0.7385020122457289, "grad_norm": 0.8098985940701998, "learning_rate": 1.6881556260147347e-06, "loss": 0.393, "step": 11790 }, { "epoch": 0.7385646502450711, "grad_norm": 0.8109268615100332, "learning_rate": 1.6873957291545928e-06, "loss": 0.3764, "step": 11791 }, { "epoch": 0.7386272882444135, "grad_norm": 0.922538746099421, "learning_rate": 1.686635968641856e-06, "loss": 0.3706, "step": 11792 }, { "epoch": 0.7386899262437557, "grad_norm": 0.8985525576337892, "learning_rate": 1.6858763445077992e-06, "loss": 0.3717, "step": 11793 }, { "epoch": 0.7387525642430981, "grad_norm": 0.6460328563791166, "learning_rate": 1.6851168567836867e-06, "loss": 0.4593, "step": 11794 }, { "epoch": 0.7388152022424403, "grad_norm": 0.6006884188404923, "learning_rate": 1.6843575055007805e-06, "loss": 0.4418, "step": 11795 }, { "epoch": 0.7388778402417827, "grad_norm": 0.8755079575441096, "learning_rate": 1.6835982906903342e-06, "loss": 0.3847, "step": 11796 }, { "epoch": 0.738940478241125, "grad_norm": 0.8345966245105763, "learning_rate": 1.6828392123835979e-06, "loss": 0.3744, "step": 11797 }, { "epoch": 0.7390031162404673, "grad_norm": 0.8366498269220212, "learning_rate": 1.6820802706118155e-06, "loss": 0.3578, "step": 11798 }, { "epoch": 0.7390657542398096, "grad_norm": 0.8996955095513518, "learning_rate": 1.6813214654062232e-06, "loss": 0.403, "step": 11799 }, { "epoch": 0.7391283922391518, "grad_norm": 0.8587470154256355, "learning_rate": 1.680562796798057e-06, "loss": 0.3874, "step": 11800 }, { "epoch": 0.7391910302384942, "grad_norm": 0.7959109564537614, "learning_rate": 1.6798042648185409e-06, "loss": 0.3553, "step": 11801 }, { "epoch": 0.7392536682378364, "grad_norm": 0.8223113408855621, "learning_rate": 1.6790458694988988e-06, "loss": 0.3682, "step": 11802 }, { "epoch": 0.7393163062371788, "grad_norm": 0.9509787651666294, "learning_rate": 1.678287610870345e-06, "loss": 0.3717, "step": 11803 }, { "epoch": 0.7393789442365211, "grad_norm": 0.8721756672262173, "learning_rate": 1.6775294889640898e-06, "loss": 0.3774, "step": 11804 }, { "epoch": 0.7394415822358634, "grad_norm": 0.6454657074380385, "learning_rate": 1.6767715038113375e-06, "loss": 0.4562, "step": 11805 }, { "epoch": 0.7395042202352057, "grad_norm": 0.9285399210081571, "learning_rate": 1.676013655443287e-06, "loss": 0.4156, "step": 11806 }, { "epoch": 0.739566858234548, "grad_norm": 0.8608798220708764, "learning_rate": 1.6752559438911314e-06, "loss": 0.4018, "step": 11807 }, { "epoch": 0.7396294962338903, "grad_norm": 0.8744188962894086, "learning_rate": 1.6744983691860566e-06, "loss": 0.3891, "step": 11808 }, { "epoch": 0.7396921342332325, "grad_norm": 0.8903510524343295, "learning_rate": 1.673740931359248e-06, "loss": 0.3774, "step": 11809 }, { "epoch": 0.7397547722325749, "grad_norm": 0.8616632494368693, "learning_rate": 1.6729836304418778e-06, "loss": 0.3945, "step": 11810 }, { "epoch": 0.7398174102319172, "grad_norm": 0.8544518635148393, "learning_rate": 1.6722264664651212e-06, "loss": 0.3868, "step": 11811 }, { "epoch": 0.7398800482312595, "grad_norm": 0.8055752112590663, "learning_rate": 1.6714694394601405e-06, "loss": 0.3968, "step": 11812 }, { "epoch": 0.7399426862306018, "grad_norm": 0.9153565674718221, "learning_rate": 1.6707125494580956e-06, "loss": 0.4123, "step": 11813 }, { "epoch": 0.7400053242299441, "grad_norm": 0.8157185119202053, "learning_rate": 1.6699557964901403e-06, "loss": 0.3729, "step": 11814 }, { "epoch": 0.7400679622292864, "grad_norm": 0.7549464589845437, "learning_rate": 1.6691991805874225e-06, "loss": 0.3565, "step": 11815 }, { "epoch": 0.7401306002286286, "grad_norm": 0.8948399717919115, "learning_rate": 1.6684427017810844e-06, "loss": 0.3961, "step": 11816 }, { "epoch": 0.740193238227971, "grad_norm": 0.9155217304056809, "learning_rate": 1.6676863601022614e-06, "loss": 0.381, "step": 11817 }, { "epoch": 0.7402558762273133, "grad_norm": 0.7978925473027714, "learning_rate": 1.6669301555820866e-06, "loss": 0.3731, "step": 11818 }, { "epoch": 0.7403185142266556, "grad_norm": 0.8663129324655787, "learning_rate": 1.666174088251687e-06, "loss": 0.4009, "step": 11819 }, { "epoch": 0.7403811522259979, "grad_norm": 0.8342167436771286, "learning_rate": 1.6654181581421802e-06, "loss": 0.3659, "step": 11820 }, { "epoch": 0.7404437902253402, "grad_norm": 0.855629195411428, "learning_rate": 1.664662365284681e-06, "loss": 0.3685, "step": 11821 }, { "epoch": 0.7405064282246825, "grad_norm": 0.8438324843729923, "learning_rate": 1.6639067097102985e-06, "loss": 0.3585, "step": 11822 }, { "epoch": 0.7405690662240249, "grad_norm": 0.8661566496273404, "learning_rate": 1.6631511914501342e-06, "loss": 0.3885, "step": 11823 }, { "epoch": 0.7406317042233671, "grad_norm": 0.9050592415635598, "learning_rate": 1.6623958105352867e-06, "loss": 0.4229, "step": 11824 }, { "epoch": 0.7406943422227094, "grad_norm": 0.9313964609033556, "learning_rate": 1.6616405669968471e-06, "loss": 0.4261, "step": 11825 }, { "epoch": 0.7407569802220517, "grad_norm": 0.8503859002606946, "learning_rate": 1.6608854608658998e-06, "loss": 0.3707, "step": 11826 }, { "epoch": 0.740819618221394, "grad_norm": 0.8860057752050012, "learning_rate": 1.6601304921735266e-06, "loss": 0.3811, "step": 11827 }, { "epoch": 0.7408822562207363, "grad_norm": 0.8174177332173598, "learning_rate": 1.6593756609508038e-06, "loss": 0.3674, "step": 11828 }, { "epoch": 0.7409448942200786, "grad_norm": 0.8589202215499073, "learning_rate": 1.6586209672287989e-06, "loss": 0.3749, "step": 11829 }, { "epoch": 0.741007532219421, "grad_norm": 0.8154020151273754, "learning_rate": 1.657866411038575e-06, "loss": 0.3727, "step": 11830 }, { "epoch": 0.7410701702187632, "grad_norm": 0.8586097472686849, "learning_rate": 1.6571119924111905e-06, "loss": 0.3991, "step": 11831 }, { "epoch": 0.7411328082181056, "grad_norm": 0.8713803970626045, "learning_rate": 1.6563577113776964e-06, "loss": 0.3837, "step": 11832 }, { "epoch": 0.7411954462174478, "grad_norm": 0.7491324190424864, "learning_rate": 1.6556035679691395e-06, "loss": 0.3376, "step": 11833 }, { "epoch": 0.7412580842167901, "grad_norm": 0.8281375843416549, "learning_rate": 1.6548495622165595e-06, "loss": 0.371, "step": 11834 }, { "epoch": 0.7413207222161324, "grad_norm": 0.8380128621161567, "learning_rate": 1.6540956941509934e-06, "loss": 0.3613, "step": 11835 }, { "epoch": 0.7413833602154747, "grad_norm": 0.8303978969493737, "learning_rate": 1.6533419638034681e-06, "loss": 0.3521, "step": 11836 }, { "epoch": 0.741445998214817, "grad_norm": 0.7934103807202587, "learning_rate": 1.65258837120501e-06, "loss": 0.3624, "step": 11837 }, { "epoch": 0.7415086362141593, "grad_norm": 0.8772461756918352, "learning_rate": 1.6518349163866365e-06, "loss": 0.4329, "step": 11838 }, { "epoch": 0.7415712742135017, "grad_norm": 0.9185585895800769, "learning_rate": 1.6510815993793588e-06, "loss": 0.3873, "step": 11839 }, { "epoch": 0.7416339122128439, "grad_norm": 0.8328112545495217, "learning_rate": 1.6503284202141844e-06, "loss": 0.3823, "step": 11840 }, { "epoch": 0.7416965502121862, "grad_norm": 0.8999956214448852, "learning_rate": 1.6495753789221136e-06, "loss": 0.4153, "step": 11841 }, { "epoch": 0.7417591882115285, "grad_norm": 0.8707698009148278, "learning_rate": 1.6488224755341425e-06, "loss": 0.363, "step": 11842 }, { "epoch": 0.7418218262108708, "grad_norm": 0.9513175400368599, "learning_rate": 1.6480697100812582e-06, "loss": 0.3953, "step": 11843 }, { "epoch": 0.7418844642102131, "grad_norm": 0.8552955380710765, "learning_rate": 1.6473170825944484e-06, "loss": 0.3861, "step": 11844 }, { "epoch": 0.7419471022095554, "grad_norm": 0.8579190117080489, "learning_rate": 1.6465645931046886e-06, "loss": 0.3903, "step": 11845 }, { "epoch": 0.7420097402088978, "grad_norm": 0.8807389857162559, "learning_rate": 1.645812241642954e-06, "loss": 0.3709, "step": 11846 }, { "epoch": 0.74207237820824, "grad_norm": 0.8447124427572337, "learning_rate": 1.64506002824021e-06, "loss": 0.377, "step": 11847 }, { "epoch": 0.7421350162075824, "grad_norm": 0.9016128035041909, "learning_rate": 1.644307952927418e-06, "loss": 0.386, "step": 11848 }, { "epoch": 0.7421976542069246, "grad_norm": 0.8335771504814392, "learning_rate": 1.643556015735534e-06, "loss": 0.3803, "step": 11849 }, { "epoch": 0.7422602922062669, "grad_norm": 0.8371833752708828, "learning_rate": 1.6428042166955066e-06, "loss": 0.3749, "step": 11850 }, { "epoch": 0.7423229302056092, "grad_norm": 0.8757755887933715, "learning_rate": 1.642052555838281e-06, "loss": 0.4017, "step": 11851 }, { "epoch": 0.7423855682049515, "grad_norm": 0.9147134564510182, "learning_rate": 1.6413010331947942e-06, "loss": 0.4031, "step": 11852 }, { "epoch": 0.7424482062042939, "grad_norm": 0.7778454165207621, "learning_rate": 1.6405496487959815e-06, "loss": 0.3456, "step": 11853 }, { "epoch": 0.7425108442036361, "grad_norm": 0.8729887265002357, "learning_rate": 1.6397984026727677e-06, "loss": 0.3767, "step": 11854 }, { "epoch": 0.7425734822029785, "grad_norm": 0.9001793216399397, "learning_rate": 1.639047294856076e-06, "loss": 0.3411, "step": 11855 }, { "epoch": 0.7426361202023207, "grad_norm": 1.1026505822681958, "learning_rate": 1.6382963253768225e-06, "loss": 0.4014, "step": 11856 }, { "epoch": 0.7426987582016631, "grad_norm": 0.8825129663154513, "learning_rate": 1.6375454942659153e-06, "loss": 0.3706, "step": 11857 }, { "epoch": 0.7427613962010053, "grad_norm": 0.8626373132304608, "learning_rate": 1.63679480155426e-06, "loss": 0.3954, "step": 11858 }, { "epoch": 0.7428240342003476, "grad_norm": 0.7761317522481476, "learning_rate": 1.636044247272755e-06, "loss": 0.3847, "step": 11859 }, { "epoch": 0.74288667219969, "grad_norm": 0.8452930870960725, "learning_rate": 1.635293831452292e-06, "loss": 0.3898, "step": 11860 }, { "epoch": 0.7429493101990322, "grad_norm": 0.814954677412457, "learning_rate": 1.6345435541237604e-06, "loss": 0.3867, "step": 11861 }, { "epoch": 0.7430119481983746, "grad_norm": 0.90526297557367, "learning_rate": 1.6337934153180406e-06, "loss": 0.3872, "step": 11862 }, { "epoch": 0.7430745861977168, "grad_norm": 0.7852354276068977, "learning_rate": 1.633043415066008e-06, "loss": 0.3724, "step": 11863 }, { "epoch": 0.7431372241970592, "grad_norm": 0.836996312279598, "learning_rate": 1.632293553398534e-06, "loss": 0.3573, "step": 11864 }, { "epoch": 0.7431998621964014, "grad_norm": 0.8256371179420641, "learning_rate": 1.6315438303464826e-06, "loss": 0.3904, "step": 11865 }, { "epoch": 0.7432625001957438, "grad_norm": 0.7737962636039803, "learning_rate": 1.6307942459407128e-06, "loss": 0.3381, "step": 11866 }, { "epoch": 0.743325138195086, "grad_norm": 0.8796370808828068, "learning_rate": 1.6300448002120767e-06, "loss": 0.3679, "step": 11867 }, { "epoch": 0.7433877761944283, "grad_norm": 0.8589009828177969, "learning_rate": 1.6292954931914223e-06, "loss": 0.4023, "step": 11868 }, { "epoch": 0.7434504141937707, "grad_norm": 0.8413680639096403, "learning_rate": 1.6285463249095896e-06, "loss": 0.383, "step": 11869 }, { "epoch": 0.7435130521931129, "grad_norm": 0.8081772302151818, "learning_rate": 1.6277972953974174e-06, "loss": 0.3675, "step": 11870 }, { "epoch": 0.7435756901924553, "grad_norm": 0.9259100592922929, "learning_rate": 1.6270484046857342e-06, "loss": 0.4415, "step": 11871 }, { "epoch": 0.7436383281917975, "grad_norm": 0.9046595136047892, "learning_rate": 1.6262996528053638e-06, "loss": 0.4188, "step": 11872 }, { "epoch": 0.7437009661911399, "grad_norm": 0.5979113494552318, "learning_rate": 1.6255510397871267e-06, "loss": 0.4481, "step": 11873 }, { "epoch": 0.7437636041904822, "grad_norm": 0.9042197393744067, "learning_rate": 1.6248025656618355e-06, "loss": 0.3963, "step": 11874 }, { "epoch": 0.7438262421898244, "grad_norm": 0.93483188662867, "learning_rate": 1.6240542304602968e-06, "loss": 0.4175, "step": 11875 }, { "epoch": 0.7438888801891668, "grad_norm": 0.9142103451192279, "learning_rate": 1.6233060342133132e-06, "loss": 0.4276, "step": 11876 }, { "epoch": 0.743951518188509, "grad_norm": 0.822849293872721, "learning_rate": 1.6225579769516781e-06, "loss": 0.4122, "step": 11877 }, { "epoch": 0.7440141561878514, "grad_norm": 0.8698794743099043, "learning_rate": 1.6218100587061853e-06, "loss": 0.4258, "step": 11878 }, { "epoch": 0.7440767941871936, "grad_norm": 0.7817149532748595, "learning_rate": 1.621062279507617e-06, "loss": 0.3791, "step": 11879 }, { "epoch": 0.744139432186536, "grad_norm": 0.909483219890683, "learning_rate": 1.620314639386753e-06, "loss": 0.4084, "step": 11880 }, { "epoch": 0.7442020701858783, "grad_norm": 0.8520694354542131, "learning_rate": 1.6195671383743656e-06, "loss": 0.3717, "step": 11881 }, { "epoch": 0.7442647081852206, "grad_norm": 0.9485319452211051, "learning_rate": 1.6188197765012203e-06, "loss": 0.4232, "step": 11882 }, { "epoch": 0.7443273461845629, "grad_norm": 0.8513189199463589, "learning_rate": 1.6180725537980824e-06, "loss": 0.3637, "step": 11883 }, { "epoch": 0.7443899841839051, "grad_norm": 0.8098699914662619, "learning_rate": 1.6173254702957058e-06, "loss": 0.3562, "step": 11884 }, { "epoch": 0.7444526221832475, "grad_norm": 0.8464637476623581, "learning_rate": 1.6165785260248407e-06, "loss": 0.3475, "step": 11885 }, { "epoch": 0.7445152601825897, "grad_norm": 0.7377754149579766, "learning_rate": 1.61583172101623e-06, "loss": 0.3769, "step": 11886 }, { "epoch": 0.7445778981819321, "grad_norm": 0.8619488771945731, "learning_rate": 1.6150850553006153e-06, "loss": 0.3593, "step": 11887 }, { "epoch": 0.7446405361812743, "grad_norm": 0.8793871537236594, "learning_rate": 1.6143385289087282e-06, "loss": 0.4452, "step": 11888 }, { "epoch": 0.7447031741806167, "grad_norm": 0.8818230387213096, "learning_rate": 1.6135921418712959e-06, "loss": 0.3896, "step": 11889 }, { "epoch": 0.744765812179959, "grad_norm": 0.823266602215866, "learning_rate": 1.612845894219039e-06, "loss": 0.383, "step": 11890 }, { "epoch": 0.7448284501793013, "grad_norm": 0.8508000692902633, "learning_rate": 1.612099785982672e-06, "loss": 0.3995, "step": 11891 }, { "epoch": 0.7448910881786436, "grad_norm": 0.6491359922407461, "learning_rate": 1.6113538171929088e-06, "loss": 0.4546, "step": 11892 }, { "epoch": 0.7449537261779858, "grad_norm": 0.8483934890691623, "learning_rate": 1.6106079878804515e-06, "loss": 0.3943, "step": 11893 }, { "epoch": 0.7450163641773282, "grad_norm": 0.8020489722952857, "learning_rate": 1.6098622980759982e-06, "loss": 0.3664, "step": 11894 }, { "epoch": 0.7450790021766704, "grad_norm": 0.9182462462652727, "learning_rate": 1.6091167478102404e-06, "loss": 0.4054, "step": 11895 }, { "epoch": 0.7451416401760128, "grad_norm": 0.7998969976275621, "learning_rate": 1.6083713371138681e-06, "loss": 0.3892, "step": 11896 }, { "epoch": 0.7452042781753551, "grad_norm": 0.6339988269446581, "learning_rate": 1.6076260660175613e-06, "loss": 0.4755, "step": 11897 }, { "epoch": 0.7452669161746974, "grad_norm": 0.8167324000223108, "learning_rate": 1.6068809345519954e-06, "loss": 0.3739, "step": 11898 }, { "epoch": 0.7453295541740397, "grad_norm": 0.8758623272457485, "learning_rate": 1.6061359427478396e-06, "loss": 0.4217, "step": 11899 }, { "epoch": 0.7453921921733819, "grad_norm": 0.875641350547336, "learning_rate": 1.6053910906357567e-06, "loss": 0.3998, "step": 11900 }, { "epoch": 0.7454548301727243, "grad_norm": 0.8262546852758041, "learning_rate": 1.6046463782464088e-06, "loss": 0.4046, "step": 11901 }, { "epoch": 0.7455174681720665, "grad_norm": 0.8552060657183658, "learning_rate": 1.6039018056104455e-06, "loss": 0.3753, "step": 11902 }, { "epoch": 0.7455801061714089, "grad_norm": 0.7877645230873452, "learning_rate": 1.603157372758513e-06, "loss": 0.3363, "step": 11903 }, { "epoch": 0.7456427441707512, "grad_norm": 0.8149858404974144, "learning_rate": 1.6024130797212551e-06, "loss": 0.3798, "step": 11904 }, { "epoch": 0.7457053821700935, "grad_norm": 0.8373782972441624, "learning_rate": 1.6016689265293056e-06, "loss": 0.396, "step": 11905 }, { "epoch": 0.7457680201694358, "grad_norm": 0.6492516250448825, "learning_rate": 1.6009249132132937e-06, "loss": 0.4578, "step": 11906 }, { "epoch": 0.7458306581687781, "grad_norm": 0.885191727095637, "learning_rate": 1.600181039803843e-06, "loss": 0.3384, "step": 11907 }, { "epoch": 0.7458932961681204, "grad_norm": 0.9180938211960823, "learning_rate": 1.5994373063315722e-06, "loss": 0.4148, "step": 11908 }, { "epoch": 0.7459559341674626, "grad_norm": 0.8250442724729949, "learning_rate": 1.5986937128270914e-06, "loss": 0.4194, "step": 11909 }, { "epoch": 0.746018572166805, "grad_norm": 0.9185782233348584, "learning_rate": 1.59795025932101e-06, "loss": 0.3725, "step": 11910 }, { "epoch": 0.7460812101661473, "grad_norm": 0.7821795665146143, "learning_rate": 1.5972069458439278e-06, "loss": 0.4073, "step": 11911 }, { "epoch": 0.7461438481654896, "grad_norm": 0.8606406127854005, "learning_rate": 1.596463772426437e-06, "loss": 0.3521, "step": 11912 }, { "epoch": 0.7462064861648319, "grad_norm": 0.8180239298324905, "learning_rate": 1.5957207390991308e-06, "loss": 0.3759, "step": 11913 }, { "epoch": 0.7462691241641742, "grad_norm": 0.9280067382055457, "learning_rate": 1.5949778458925912e-06, "loss": 0.4307, "step": 11914 }, { "epoch": 0.7463317621635165, "grad_norm": 0.8232469428191261, "learning_rate": 1.5942350928373956e-06, "loss": 0.3895, "step": 11915 }, { "epoch": 0.7463944001628589, "grad_norm": 0.8494001127210588, "learning_rate": 1.5934924799641156e-06, "loss": 0.3764, "step": 11916 }, { "epoch": 0.7464570381622011, "grad_norm": 0.8233464942588791, "learning_rate": 1.592750007303317e-06, "loss": 0.3748, "step": 11917 }, { "epoch": 0.7465196761615434, "grad_norm": 0.8493570296353078, "learning_rate": 1.5920076748855595e-06, "loss": 0.3878, "step": 11918 }, { "epoch": 0.7465823141608857, "grad_norm": 0.8961262182960409, "learning_rate": 1.5912654827413997e-06, "loss": 0.3835, "step": 11919 }, { "epoch": 0.746644952160228, "grad_norm": 0.8342343667216483, "learning_rate": 1.5905234309013857e-06, "loss": 0.3704, "step": 11920 }, { "epoch": 0.7467075901595703, "grad_norm": 0.836408151192671, "learning_rate": 1.589781519396058e-06, "loss": 0.3854, "step": 11921 }, { "epoch": 0.7467702281589126, "grad_norm": 0.8076876836010475, "learning_rate": 1.5890397482559582e-06, "loss": 0.389, "step": 11922 }, { "epoch": 0.746832866158255, "grad_norm": 0.8688345555762208, "learning_rate": 1.5882981175116153e-06, "loss": 0.4059, "step": 11923 }, { "epoch": 0.7468955041575972, "grad_norm": 0.8962333221819213, "learning_rate": 1.587556627193555e-06, "loss": 0.4167, "step": 11924 }, { "epoch": 0.7469581421569395, "grad_norm": 0.6718107623112778, "learning_rate": 1.586815277332297e-06, "loss": 0.4679, "step": 11925 }, { "epoch": 0.7470207801562818, "grad_norm": 0.841996753974758, "learning_rate": 1.5860740679583564e-06, "loss": 0.3707, "step": 11926 }, { "epoch": 0.7470834181556241, "grad_norm": 0.9142455019380605, "learning_rate": 1.5853329991022403e-06, "loss": 0.4218, "step": 11927 }, { "epoch": 0.7471460561549664, "grad_norm": 0.7105677436666755, "learning_rate": 1.58459207079445e-06, "loss": 0.2911, "step": 11928 }, { "epoch": 0.7472086941543087, "grad_norm": 0.8507402184743743, "learning_rate": 1.5838512830654845e-06, "loss": 0.3796, "step": 11929 }, { "epoch": 0.747271332153651, "grad_norm": 0.7867281045043849, "learning_rate": 1.5831106359458354e-06, "loss": 0.3608, "step": 11930 }, { "epoch": 0.7473339701529933, "grad_norm": 0.8799705391134328, "learning_rate": 1.5823701294659872e-06, "loss": 0.4116, "step": 11931 }, { "epoch": 0.7473966081523357, "grad_norm": 0.8962421779456973, "learning_rate": 1.5816297636564187e-06, "loss": 0.417, "step": 11932 }, { "epoch": 0.7474592461516779, "grad_norm": 0.8698199846131511, "learning_rate": 1.5808895385476031e-06, "loss": 0.3847, "step": 11933 }, { "epoch": 0.7475218841510202, "grad_norm": 0.8530753045810614, "learning_rate": 1.580149454170009e-06, "loss": 0.3755, "step": 11934 }, { "epoch": 0.7475845221503625, "grad_norm": 0.8146859065457619, "learning_rate": 1.5794095105540985e-06, "loss": 0.3579, "step": 11935 }, { "epoch": 0.7476471601497048, "grad_norm": 0.588894589288442, "learning_rate": 1.578669707730327e-06, "loss": 0.439, "step": 11936 }, { "epoch": 0.7477097981490471, "grad_norm": 0.9079302217927938, "learning_rate": 1.5779300457291442e-06, "loss": 0.3863, "step": 11937 }, { "epoch": 0.7477724361483894, "grad_norm": 0.9000014853025456, "learning_rate": 1.5771905245809954e-06, "loss": 0.3816, "step": 11938 }, { "epoch": 0.7478350741477318, "grad_norm": 0.8333665745594241, "learning_rate": 1.5764511443163216e-06, "loss": 0.3835, "step": 11939 }, { "epoch": 0.747897712147074, "grad_norm": 0.9100275271301139, "learning_rate": 1.5757119049655544e-06, "loss": 0.4076, "step": 11940 }, { "epoch": 0.7479603501464164, "grad_norm": 0.8404770674447912, "learning_rate": 1.5749728065591207e-06, "loss": 0.3906, "step": 11941 }, { "epoch": 0.7480229881457586, "grad_norm": 0.8571678464949887, "learning_rate": 1.5742338491274418e-06, "loss": 0.394, "step": 11942 }, { "epoch": 0.7480856261451009, "grad_norm": 0.9104724858576014, "learning_rate": 1.5734950327009336e-06, "loss": 0.401, "step": 11943 }, { "epoch": 0.7481482641444432, "grad_norm": 0.8407428282688308, "learning_rate": 1.5727563573100058e-06, "loss": 0.3635, "step": 11944 }, { "epoch": 0.7482109021437855, "grad_norm": 0.8015801455113569, "learning_rate": 1.5720178229850608e-06, "loss": 0.3499, "step": 11945 }, { "epoch": 0.7482735401431279, "grad_norm": 0.5999472346039169, "learning_rate": 1.5712794297565003e-06, "loss": 0.463, "step": 11946 }, { "epoch": 0.7483361781424701, "grad_norm": 0.8488274326514242, "learning_rate": 1.570541177654713e-06, "loss": 0.3951, "step": 11947 }, { "epoch": 0.7483988161418125, "grad_norm": 0.7824717886174579, "learning_rate": 1.5698030667100894e-06, "loss": 0.3876, "step": 11948 }, { "epoch": 0.7484614541411547, "grad_norm": 0.8538961805603468, "learning_rate": 1.569065096953008e-06, "loss": 0.4132, "step": 11949 }, { "epoch": 0.748524092140497, "grad_norm": 0.8857909659072213, "learning_rate": 1.5683272684138439e-06, "loss": 0.3733, "step": 11950 }, { "epoch": 0.7485867301398393, "grad_norm": 0.8812493843687469, "learning_rate": 1.5675895811229664e-06, "loss": 0.3726, "step": 11951 }, { "epoch": 0.7486493681391816, "grad_norm": 0.8303935681873068, "learning_rate": 1.5668520351107391e-06, "loss": 0.409, "step": 11952 }, { "epoch": 0.748712006138524, "grad_norm": 0.8604884561967024, "learning_rate": 1.5661146304075193e-06, "loss": 0.4333, "step": 11953 }, { "epoch": 0.7487746441378662, "grad_norm": 0.9401877892710939, "learning_rate": 1.5653773670436567e-06, "loss": 0.3862, "step": 11954 }, { "epoch": 0.7488372821372086, "grad_norm": 0.8575874688932112, "learning_rate": 1.5646402450494991e-06, "loss": 0.3843, "step": 11955 }, { "epoch": 0.7488999201365508, "grad_norm": 0.8390913791869685, "learning_rate": 1.5639032644553886e-06, "loss": 0.3885, "step": 11956 }, { "epoch": 0.7489625581358932, "grad_norm": 0.8021998301199078, "learning_rate": 1.5631664252916574e-06, "loss": 0.3875, "step": 11957 }, { "epoch": 0.7490251961352354, "grad_norm": 0.6643164170151098, "learning_rate": 1.5624297275886341e-06, "loss": 0.4293, "step": 11958 }, { "epoch": 0.7490878341345777, "grad_norm": 0.870196420178917, "learning_rate": 1.561693171376641e-06, "loss": 0.3709, "step": 11959 }, { "epoch": 0.74915047213392, "grad_norm": 0.8479236465329602, "learning_rate": 1.560956756685995e-06, "loss": 0.4012, "step": 11960 }, { "epoch": 0.7492131101332623, "grad_norm": 0.885587748282231, "learning_rate": 1.5602204835470075e-06, "loss": 0.3893, "step": 11961 }, { "epoch": 0.7492757481326047, "grad_norm": 0.9195862575620303, "learning_rate": 1.5594843519899822e-06, "loss": 0.3898, "step": 11962 }, { "epoch": 0.7493383861319469, "grad_norm": 0.9261706819854788, "learning_rate": 1.5587483620452204e-06, "loss": 0.3719, "step": 11963 }, { "epoch": 0.7494010241312893, "grad_norm": 0.7765288124994855, "learning_rate": 1.5580125137430136e-06, "loss": 0.3668, "step": 11964 }, { "epoch": 0.7494636621306315, "grad_norm": 1.0009474452278564, "learning_rate": 1.5572768071136519e-06, "loss": 0.3951, "step": 11965 }, { "epoch": 0.7495263001299739, "grad_norm": 0.8603722519775233, "learning_rate": 1.556541242187416e-06, "loss": 0.361, "step": 11966 }, { "epoch": 0.7495889381293162, "grad_norm": 0.8268644822439459, "learning_rate": 1.5558058189945818e-06, "loss": 0.358, "step": 11967 }, { "epoch": 0.7496515761286584, "grad_norm": 0.8817672739278148, "learning_rate": 1.5550705375654196e-06, "loss": 0.4261, "step": 11968 }, { "epoch": 0.7497142141280008, "grad_norm": 0.7689901967077553, "learning_rate": 1.5543353979301928e-06, "loss": 0.369, "step": 11969 }, { "epoch": 0.749776852127343, "grad_norm": 0.8233937523660382, "learning_rate": 1.5536004001191612e-06, "loss": 0.3942, "step": 11970 }, { "epoch": 0.7498394901266854, "grad_norm": 0.8858165351981097, "learning_rate": 1.5528655441625746e-06, "loss": 0.3675, "step": 11971 }, { "epoch": 0.7499021281260276, "grad_norm": 0.6066396927793752, "learning_rate": 1.552130830090684e-06, "loss": 0.4611, "step": 11972 }, { "epoch": 0.74996476612537, "grad_norm": 0.8704888308346053, "learning_rate": 1.5513962579337282e-06, "loss": 0.3717, "step": 11973 }, { "epoch": 0.7500274041247122, "grad_norm": 0.7992132261822809, "learning_rate": 1.550661827721941e-06, "loss": 0.3777, "step": 11974 }, { "epoch": 0.7500900421240546, "grad_norm": 0.8263031706815099, "learning_rate": 1.549927539485555e-06, "loss": 0.3695, "step": 11975 }, { "epoch": 0.7501526801233969, "grad_norm": 0.8586928902613998, "learning_rate": 1.549193393254792e-06, "loss": 0.3522, "step": 11976 }, { "epoch": 0.7502153181227391, "grad_norm": 0.8548493109313451, "learning_rate": 1.5484593890598688e-06, "loss": 0.3783, "step": 11977 }, { "epoch": 0.7502779561220815, "grad_norm": 0.8523678329575161, "learning_rate": 1.5477255269309981e-06, "loss": 0.406, "step": 11978 }, { "epoch": 0.7503405941214237, "grad_norm": 0.808569482304882, "learning_rate": 1.5469918068983857e-06, "loss": 0.3322, "step": 11979 }, { "epoch": 0.7504032321207661, "grad_norm": 0.8491672799350333, "learning_rate": 1.5462582289922296e-06, "loss": 0.4096, "step": 11980 }, { "epoch": 0.7504658701201083, "grad_norm": 0.7971833584303613, "learning_rate": 1.5455247932427276e-06, "loss": 0.3509, "step": 11981 }, { "epoch": 0.7505285081194507, "grad_norm": 0.8367430127230451, "learning_rate": 1.5447914996800656e-06, "loss": 0.337, "step": 11982 }, { "epoch": 0.750591146118793, "grad_norm": 0.8563949224477432, "learning_rate": 1.5440583483344262e-06, "loss": 0.4055, "step": 11983 }, { "epoch": 0.7506537841181352, "grad_norm": 0.8491785452346478, "learning_rate": 1.5433253392359876e-06, "loss": 0.401, "step": 11984 }, { "epoch": 0.7507164221174776, "grad_norm": 0.8144619223160948, "learning_rate": 1.54259247241492e-06, "loss": 0.3905, "step": 11985 }, { "epoch": 0.7507790601168198, "grad_norm": 0.8641185109118955, "learning_rate": 1.5418597479013875e-06, "loss": 0.3715, "step": 11986 }, { "epoch": 0.7508416981161622, "grad_norm": 0.8968295887984475, "learning_rate": 1.54112716572555e-06, "loss": 0.4065, "step": 11987 }, { "epoch": 0.7509043361155044, "grad_norm": 0.5407236994527924, "learning_rate": 1.5403947259175583e-06, "loss": 0.4461, "step": 11988 }, { "epoch": 0.7509669741148468, "grad_norm": 0.8263792241123655, "learning_rate": 1.5396624285075639e-06, "loss": 0.3766, "step": 11989 }, { "epoch": 0.7510296121141891, "grad_norm": 0.8298491755745055, "learning_rate": 1.5389302735257061e-06, "loss": 0.3258, "step": 11990 }, { "epoch": 0.7510922501135314, "grad_norm": 0.8854934861887589, "learning_rate": 1.5381982610021206e-06, "loss": 0.3935, "step": 11991 }, { "epoch": 0.7511548881128737, "grad_norm": 0.889846768425515, "learning_rate": 1.5374663909669357e-06, "loss": 0.3971, "step": 11992 }, { "epoch": 0.7512175261122159, "grad_norm": 0.9406427409206626, "learning_rate": 1.5367346634502784e-06, "loss": 0.4021, "step": 11993 }, { "epoch": 0.7512801641115583, "grad_norm": 0.898920003606628, "learning_rate": 1.536003078482265e-06, "loss": 0.3792, "step": 11994 }, { "epoch": 0.7513428021109005, "grad_norm": 0.8818439404352337, "learning_rate": 1.5352716360930082e-06, "loss": 0.3882, "step": 11995 }, { "epoch": 0.7514054401102429, "grad_norm": 0.9551594206118696, "learning_rate": 1.5345403363126137e-06, "loss": 0.4314, "step": 11996 }, { "epoch": 0.7514680781095852, "grad_norm": 0.8986252417932213, "learning_rate": 1.5338091791711807e-06, "loss": 0.3826, "step": 11997 }, { "epoch": 0.7515307161089275, "grad_norm": 0.8354050909664956, "learning_rate": 1.5330781646988068e-06, "loss": 0.3674, "step": 11998 }, { "epoch": 0.7515933541082698, "grad_norm": 0.8437892890199851, "learning_rate": 1.532347292925579e-06, "loss": 0.3948, "step": 11999 }, { "epoch": 0.7516559921076121, "grad_norm": 0.887651998178469, "learning_rate": 1.53161656388158e-06, "loss": 0.4104, "step": 12000 }, { "epoch": 0.7517186301069544, "grad_norm": 0.8937004304914231, "learning_rate": 1.530885977596886e-06, "loss": 0.4185, "step": 12001 }, { "epoch": 0.7517812681062966, "grad_norm": 0.8356557978887268, "learning_rate": 1.53015553410157e-06, "loss": 0.3716, "step": 12002 }, { "epoch": 0.751843906105639, "grad_norm": 0.8719938508386742, "learning_rate": 1.529425233425697e-06, "loss": 0.3894, "step": 12003 }, { "epoch": 0.7519065441049813, "grad_norm": 0.8478615647617207, "learning_rate": 1.5286950755993253e-06, "loss": 0.3931, "step": 12004 }, { "epoch": 0.7519691821043236, "grad_norm": 0.9102791357400324, "learning_rate": 1.5279650606525081e-06, "loss": 0.4166, "step": 12005 }, { "epoch": 0.7520318201036659, "grad_norm": 0.882587472441255, "learning_rate": 1.5272351886152925e-06, "loss": 0.3677, "step": 12006 }, { "epoch": 0.7520944581030082, "grad_norm": 0.7897609497778899, "learning_rate": 1.526505459517722e-06, "loss": 0.3459, "step": 12007 }, { "epoch": 0.7521570961023505, "grad_norm": 0.9021480915944998, "learning_rate": 1.525775873389832e-06, "loss": 0.3621, "step": 12008 }, { "epoch": 0.7522197341016927, "grad_norm": 0.8866114496363339, "learning_rate": 1.525046430261652e-06, "loss": 0.3603, "step": 12009 }, { "epoch": 0.7522823721010351, "grad_norm": 0.6963022737378745, "learning_rate": 1.5243171301632042e-06, "loss": 0.4492, "step": 12010 }, { "epoch": 0.7523450101003774, "grad_norm": 0.8872131396940486, "learning_rate": 1.52358797312451e-06, "loss": 0.3938, "step": 12011 }, { "epoch": 0.7524076480997197, "grad_norm": 0.8283124681072626, "learning_rate": 1.52285895917558e-06, "loss": 0.38, "step": 12012 }, { "epoch": 0.752470286099062, "grad_norm": 0.8320077962929506, "learning_rate": 1.5221300883464207e-06, "loss": 0.3673, "step": 12013 }, { "epoch": 0.7525329240984043, "grad_norm": 0.8728298781511284, "learning_rate": 1.5214013606670308e-06, "loss": 0.4094, "step": 12014 }, { "epoch": 0.7525955620977466, "grad_norm": 0.8845317647705367, "learning_rate": 1.5206727761674085e-06, "loss": 0.3966, "step": 12015 }, { "epoch": 0.752658200097089, "grad_norm": 0.9663469692414564, "learning_rate": 1.51994433487754e-06, "loss": 0.3993, "step": 12016 }, { "epoch": 0.7527208380964312, "grad_norm": 0.9490561453142573, "learning_rate": 1.5192160368274088e-06, "loss": 0.3868, "step": 12017 }, { "epoch": 0.7527834760957735, "grad_norm": 0.8081491161280109, "learning_rate": 1.5184878820469917e-06, "loss": 0.3618, "step": 12018 }, { "epoch": 0.7528461140951158, "grad_norm": 0.8049908500896578, "learning_rate": 1.5177598705662593e-06, "loss": 0.3657, "step": 12019 }, { "epoch": 0.7529087520944581, "grad_norm": 0.7696115867496006, "learning_rate": 1.5170320024151758e-06, "loss": 0.2997, "step": 12020 }, { "epoch": 0.7529713900938004, "grad_norm": 0.8513884436413107, "learning_rate": 1.5163042776237031e-06, "loss": 0.3654, "step": 12021 }, { "epoch": 0.7530340280931427, "grad_norm": 0.8606073857279313, "learning_rate": 1.5155766962217927e-06, "loss": 0.4103, "step": 12022 }, { "epoch": 0.753096666092485, "grad_norm": 0.8340292149849713, "learning_rate": 1.5148492582393904e-06, "loss": 0.3663, "step": 12023 }, { "epoch": 0.7531593040918273, "grad_norm": 0.8334613612007321, "learning_rate": 1.5141219637064413e-06, "loss": 0.3486, "step": 12024 }, { "epoch": 0.7532219420911697, "grad_norm": 0.6307241710339819, "learning_rate": 1.5133948126528797e-06, "loss": 0.4648, "step": 12025 }, { "epoch": 0.7532845800905119, "grad_norm": 0.9541416966637157, "learning_rate": 1.512667805108634e-06, "loss": 0.4088, "step": 12026 }, { "epoch": 0.7533472180898542, "grad_norm": 0.8423412597779133, "learning_rate": 1.511940941103629e-06, "loss": 0.3912, "step": 12027 }, { "epoch": 0.7534098560891965, "grad_norm": 0.8747448159343094, "learning_rate": 1.5112142206677827e-06, "loss": 0.3969, "step": 12028 }, { "epoch": 0.7534724940885388, "grad_norm": 0.7693822848165849, "learning_rate": 1.5104876438310045e-06, "loss": 0.3603, "step": 12029 }, { "epoch": 0.7535351320878811, "grad_norm": 0.9333795522570462, "learning_rate": 1.509761210623204e-06, "loss": 0.4269, "step": 12030 }, { "epoch": 0.7535977700872234, "grad_norm": 0.8405048802783457, "learning_rate": 1.5090349210742806e-06, "loss": 0.3669, "step": 12031 }, { "epoch": 0.7536604080865658, "grad_norm": 0.8810852948344641, "learning_rate": 1.5083087752141257e-06, "loss": 0.3846, "step": 12032 }, { "epoch": 0.753723046085908, "grad_norm": 0.8631990981701155, "learning_rate": 1.5075827730726312e-06, "loss": 0.3743, "step": 12033 }, { "epoch": 0.7537856840852503, "grad_norm": 0.8939501503273081, "learning_rate": 1.5068569146796786e-06, "loss": 0.4198, "step": 12034 }, { "epoch": 0.7538483220845926, "grad_norm": 0.9062960752381407, "learning_rate": 1.506131200065143e-06, "loss": 0.4364, "step": 12035 }, { "epoch": 0.7539109600839349, "grad_norm": 0.8302509525518216, "learning_rate": 1.5054056292588965e-06, "loss": 0.3824, "step": 12036 }, { "epoch": 0.7539735980832772, "grad_norm": 0.6444690179366316, "learning_rate": 1.504680202290802e-06, "loss": 0.4401, "step": 12037 }, { "epoch": 0.7540362360826195, "grad_norm": 0.8288550252239916, "learning_rate": 1.5039549191907183e-06, "loss": 0.4137, "step": 12038 }, { "epoch": 0.7540988740819619, "grad_norm": 0.887074236445502, "learning_rate": 1.5032297799885004e-06, "loss": 0.4127, "step": 12039 }, { "epoch": 0.7541615120813041, "grad_norm": 0.8741853461113877, "learning_rate": 1.5025047847139918e-06, "loss": 0.3864, "step": 12040 }, { "epoch": 0.7542241500806465, "grad_norm": 1.0003584149816809, "learning_rate": 1.5017799333970373e-06, "loss": 0.3809, "step": 12041 }, { "epoch": 0.7542867880799887, "grad_norm": 0.9075992258664127, "learning_rate": 1.5010552260674705e-06, "loss": 0.3754, "step": 12042 }, { "epoch": 0.754349426079331, "grad_norm": 0.822777704233257, "learning_rate": 1.5003306627551196e-06, "loss": 0.3891, "step": 12043 }, { "epoch": 0.7544120640786733, "grad_norm": 0.7804710526194045, "learning_rate": 1.499606243489808e-06, "loss": 0.423, "step": 12044 }, { "epoch": 0.7544747020780156, "grad_norm": 0.7914756448036254, "learning_rate": 1.4988819683013533e-06, "loss": 0.3731, "step": 12045 }, { "epoch": 0.754537340077358, "grad_norm": 0.8919338372953055, "learning_rate": 1.4981578372195666e-06, "loss": 0.3563, "step": 12046 }, { "epoch": 0.7545999780767002, "grad_norm": 0.8859717989464533, "learning_rate": 1.497433850274252e-06, "loss": 0.4231, "step": 12047 }, { "epoch": 0.7546626160760426, "grad_norm": 0.8315980553782404, "learning_rate": 1.4967100074952112e-06, "loss": 0.3744, "step": 12048 }, { "epoch": 0.7547252540753848, "grad_norm": 0.8654507408627964, "learning_rate": 1.4959863089122357e-06, "loss": 0.4119, "step": 12049 }, { "epoch": 0.7547878920747272, "grad_norm": 0.8754364771840297, "learning_rate": 1.4952627545551156e-06, "loss": 0.4102, "step": 12050 }, { "epoch": 0.7548505300740694, "grad_norm": 0.852395707280758, "learning_rate": 1.494539344453631e-06, "loss": 0.385, "step": 12051 }, { "epoch": 0.7549131680734117, "grad_norm": 0.8622502522919606, "learning_rate": 1.4938160786375571e-06, "loss": 0.4335, "step": 12052 }, { "epoch": 0.754975806072754, "grad_norm": 0.8363698520867168, "learning_rate": 1.4930929571366649e-06, "loss": 0.3592, "step": 12053 }, { "epoch": 0.7550384440720963, "grad_norm": 0.8255128263141399, "learning_rate": 1.4923699799807173e-06, "loss": 0.3555, "step": 12054 }, { "epoch": 0.7551010820714387, "grad_norm": 0.8847000275755614, "learning_rate": 1.491647147199472e-06, "loss": 0.3839, "step": 12055 }, { "epoch": 0.7551637200707809, "grad_norm": 0.8782690476810847, "learning_rate": 1.49092445882268e-06, "loss": 0.3851, "step": 12056 }, { "epoch": 0.7552263580701233, "grad_norm": 0.8114538366080732, "learning_rate": 1.4902019148800906e-06, "loss": 0.3434, "step": 12057 }, { "epoch": 0.7552889960694655, "grad_norm": 0.8908265666607006, "learning_rate": 1.4894795154014396e-06, "loss": 0.4367, "step": 12058 }, { "epoch": 0.7553516340688078, "grad_norm": 0.9022545634382629, "learning_rate": 1.4887572604164651e-06, "loss": 0.3218, "step": 12059 }, { "epoch": 0.7554142720681501, "grad_norm": 0.8082514697085837, "learning_rate": 1.488035149954894e-06, "loss": 0.3735, "step": 12060 }, { "epoch": 0.7554769100674924, "grad_norm": 0.8190592068141946, "learning_rate": 1.4873131840464472e-06, "loss": 0.3449, "step": 12061 }, { "epoch": 0.7555395480668348, "grad_norm": 0.8412084901721537, "learning_rate": 1.4865913627208423e-06, "loss": 0.4023, "step": 12062 }, { "epoch": 0.755602186066177, "grad_norm": 0.8508167281017351, "learning_rate": 1.4858696860077887e-06, "loss": 0.3708, "step": 12063 }, { "epoch": 0.7556648240655194, "grad_norm": 0.8758496322682837, "learning_rate": 1.485148153936991e-06, "loss": 0.3916, "step": 12064 }, { "epoch": 0.7557274620648616, "grad_norm": 0.8789108499442083, "learning_rate": 1.4844267665381461e-06, "loss": 0.4069, "step": 12065 }, { "epoch": 0.755790100064204, "grad_norm": 0.889033140297008, "learning_rate": 1.4837055238409482e-06, "loss": 0.3924, "step": 12066 }, { "epoch": 0.7558527380635462, "grad_norm": 0.869399837784918, "learning_rate": 1.4829844258750854e-06, "loss": 0.4203, "step": 12067 }, { "epoch": 0.7559153760628885, "grad_norm": 0.8178829653922268, "learning_rate": 1.4822634726702362e-06, "loss": 0.3932, "step": 12068 }, { "epoch": 0.7559780140622309, "grad_norm": 0.8326968119522478, "learning_rate": 1.4815426642560753e-06, "loss": 0.3462, "step": 12069 }, { "epoch": 0.7560406520615731, "grad_norm": 0.868141065309054, "learning_rate": 1.4808220006622714e-06, "loss": 0.4209, "step": 12070 }, { "epoch": 0.7561032900609155, "grad_norm": 0.5779643001989403, "learning_rate": 1.4801014819184872e-06, "loss": 0.4322, "step": 12071 }, { "epoch": 0.7561659280602577, "grad_norm": 0.8493150641320725, "learning_rate": 1.4793811080543791e-06, "loss": 0.3707, "step": 12072 }, { "epoch": 0.7562285660596001, "grad_norm": 0.9353432798726812, "learning_rate": 1.4786608790995965e-06, "loss": 0.412, "step": 12073 }, { "epoch": 0.7562912040589423, "grad_norm": 0.8845801508952047, "learning_rate": 1.4779407950837876e-06, "loss": 0.4216, "step": 12074 }, { "epoch": 0.7563538420582847, "grad_norm": 0.8135034036090062, "learning_rate": 1.4772208560365875e-06, "loss": 0.3649, "step": 12075 }, { "epoch": 0.756416480057627, "grad_norm": 0.8224854715111544, "learning_rate": 1.4765010619876325e-06, "loss": 0.4017, "step": 12076 }, { "epoch": 0.7564791180569692, "grad_norm": 0.8370650229538394, "learning_rate": 1.4757814129665483e-06, "loss": 0.3722, "step": 12077 }, { "epoch": 0.7565417560563116, "grad_norm": 0.8777461323114917, "learning_rate": 1.4750619090029545e-06, "loss": 0.4136, "step": 12078 }, { "epoch": 0.7566043940556538, "grad_norm": 0.8055311314149846, "learning_rate": 1.4743425501264669e-06, "loss": 0.3439, "step": 12079 }, { "epoch": 0.7566670320549962, "grad_norm": 0.8627102798447374, "learning_rate": 1.4736233363666947e-06, "loss": 0.3963, "step": 12080 }, { "epoch": 0.7567296700543384, "grad_norm": 0.9237897223725997, "learning_rate": 1.4729042677532402e-06, "loss": 0.3651, "step": 12081 }, { "epoch": 0.7567923080536808, "grad_norm": 0.8751048493186416, "learning_rate": 1.4721853443156992e-06, "loss": 0.3874, "step": 12082 }, { "epoch": 0.7568549460530231, "grad_norm": 0.8078939659599695, "learning_rate": 1.4714665660836657e-06, "loss": 0.3817, "step": 12083 }, { "epoch": 0.7569175840523654, "grad_norm": 0.8859802167491553, "learning_rate": 1.4707479330867214e-06, "loss": 0.3764, "step": 12084 }, { "epoch": 0.7569802220517077, "grad_norm": 0.8357608618559447, "learning_rate": 1.4700294453544495e-06, "loss": 0.3817, "step": 12085 }, { "epoch": 0.7570428600510499, "grad_norm": 0.9007892305382708, "learning_rate": 1.4693111029164203e-06, "loss": 0.3989, "step": 12086 }, { "epoch": 0.7571054980503923, "grad_norm": 0.8613858842284355, "learning_rate": 1.468592905802202e-06, "loss": 0.405, "step": 12087 }, { "epoch": 0.7571681360497345, "grad_norm": 0.5787630543574862, "learning_rate": 1.4678748540413545e-06, "loss": 0.4498, "step": 12088 }, { "epoch": 0.7572307740490769, "grad_norm": 0.8614577077308977, "learning_rate": 1.467156947663434e-06, "loss": 0.3962, "step": 12089 }, { "epoch": 0.7572934120484192, "grad_norm": 0.8149783363975587, "learning_rate": 1.4664391866979887e-06, "loss": 0.3926, "step": 12090 }, { "epoch": 0.7573560500477615, "grad_norm": 0.837917429529752, "learning_rate": 1.4657215711745614e-06, "loss": 0.3995, "step": 12091 }, { "epoch": 0.7574186880471038, "grad_norm": 0.8489410439696408, "learning_rate": 1.4650041011226918e-06, "loss": 0.3635, "step": 12092 }, { "epoch": 0.757481326046446, "grad_norm": 0.7804052952517737, "learning_rate": 1.4642867765719077e-06, "loss": 0.3516, "step": 12093 }, { "epoch": 0.7575439640457884, "grad_norm": 0.6308275215487699, "learning_rate": 1.4635695975517378e-06, "loss": 0.477, "step": 12094 }, { "epoch": 0.7576066020451306, "grad_norm": 0.8448207262879098, "learning_rate": 1.4628525640916997e-06, "loss": 0.4399, "step": 12095 }, { "epoch": 0.757669240044473, "grad_norm": 0.8022785374886535, "learning_rate": 1.4621356762213062e-06, "loss": 0.3923, "step": 12096 }, { "epoch": 0.7577318780438153, "grad_norm": 0.5982452442792109, "learning_rate": 1.4614189339700652e-06, "loss": 0.4482, "step": 12097 }, { "epoch": 0.7577945160431576, "grad_norm": 0.9614340097255135, "learning_rate": 1.4607023373674778e-06, "loss": 0.4606, "step": 12098 }, { "epoch": 0.7578571540424999, "grad_norm": 0.8908489262647957, "learning_rate": 1.4599858864430377e-06, "loss": 0.4232, "step": 12099 }, { "epoch": 0.7579197920418422, "grad_norm": 0.87538951886884, "learning_rate": 1.459269581226237e-06, "loss": 0.4207, "step": 12100 }, { "epoch": 0.7579824300411845, "grad_norm": 0.8294292724135188, "learning_rate": 1.4585534217465575e-06, "loss": 0.3906, "step": 12101 }, { "epoch": 0.7580450680405267, "grad_norm": 0.8972783137529105, "learning_rate": 1.4578374080334745e-06, "loss": 0.4211, "step": 12102 }, { "epoch": 0.7581077060398691, "grad_norm": 0.8161628837569789, "learning_rate": 1.4571215401164634e-06, "loss": 0.3617, "step": 12103 }, { "epoch": 0.7581703440392114, "grad_norm": 0.8296136213492064, "learning_rate": 1.456405818024987e-06, "loss": 0.3888, "step": 12104 }, { "epoch": 0.7582329820385537, "grad_norm": 0.8266694684585637, "learning_rate": 1.4556902417885048e-06, "loss": 0.375, "step": 12105 }, { "epoch": 0.758295620037896, "grad_norm": 0.90250392254311, "learning_rate": 1.4549748114364704e-06, "loss": 0.386, "step": 12106 }, { "epoch": 0.7583582580372383, "grad_norm": 0.8414386087688799, "learning_rate": 1.4542595269983301e-06, "loss": 0.3414, "step": 12107 }, { "epoch": 0.7584208960365806, "grad_norm": 0.8698788675521039, "learning_rate": 1.4535443885035249e-06, "loss": 0.4012, "step": 12108 }, { "epoch": 0.758483534035923, "grad_norm": 0.9039688369997464, "learning_rate": 1.4528293959814921e-06, "loss": 0.4205, "step": 12109 }, { "epoch": 0.7585461720352652, "grad_norm": 0.6096599535270937, "learning_rate": 1.45211454946166e-06, "loss": 0.47, "step": 12110 }, { "epoch": 0.7586088100346075, "grad_norm": 0.800281967472866, "learning_rate": 1.4513998489734516e-06, "loss": 0.3973, "step": 12111 }, { "epoch": 0.7586714480339498, "grad_norm": 0.797479587430063, "learning_rate": 1.4506852945462824e-06, "loss": 0.3845, "step": 12112 }, { "epoch": 0.7587340860332921, "grad_norm": 0.9216217989610656, "learning_rate": 1.449970886209567e-06, "loss": 0.444, "step": 12113 }, { "epoch": 0.7587967240326344, "grad_norm": 0.8284214014644163, "learning_rate": 1.4492566239927081e-06, "loss": 0.3688, "step": 12114 }, { "epoch": 0.7588593620319767, "grad_norm": 0.8263567545125491, "learning_rate": 1.4485425079251064e-06, "loss": 0.3415, "step": 12115 }, { "epoch": 0.758922000031319, "grad_norm": 0.8561944383866715, "learning_rate": 1.447828538036154e-06, "loss": 0.3535, "step": 12116 }, { "epoch": 0.7589846380306613, "grad_norm": 0.8692455544846025, "learning_rate": 1.4471147143552366e-06, "loss": 0.3821, "step": 12117 }, { "epoch": 0.7590472760300035, "grad_norm": 0.8284878131614669, "learning_rate": 1.4464010369117386e-06, "loss": 0.3747, "step": 12118 }, { "epoch": 0.7591099140293459, "grad_norm": 0.8726638697485299, "learning_rate": 1.4456875057350339e-06, "loss": 0.3881, "step": 12119 }, { "epoch": 0.7591725520286882, "grad_norm": 0.8197134021885127, "learning_rate": 1.444974120854491e-06, "loss": 0.4079, "step": 12120 }, { "epoch": 0.7592351900280305, "grad_norm": 0.8764897981348678, "learning_rate": 1.4442608822994713e-06, "loss": 0.3801, "step": 12121 }, { "epoch": 0.7592978280273728, "grad_norm": 0.8514554315397287, "learning_rate": 1.443547790099335e-06, "loss": 0.3572, "step": 12122 }, { "epoch": 0.7593604660267151, "grad_norm": 0.8839789439295451, "learning_rate": 1.4428348442834322e-06, "loss": 0.3685, "step": 12123 }, { "epoch": 0.7594231040260574, "grad_norm": 0.8545258875998649, "learning_rate": 1.4421220448811074e-06, "loss": 0.3935, "step": 12124 }, { "epoch": 0.7594857420253998, "grad_norm": 0.848692878557991, "learning_rate": 1.4414093919216982e-06, "loss": 0.4061, "step": 12125 }, { "epoch": 0.759548380024742, "grad_norm": 0.8887646655648441, "learning_rate": 1.4406968854345405e-06, "loss": 0.3791, "step": 12126 }, { "epoch": 0.7596110180240843, "grad_norm": 0.8666986232780239, "learning_rate": 1.439984525448959e-06, "loss": 0.3744, "step": 12127 }, { "epoch": 0.7596736560234266, "grad_norm": 0.8458068465182685, "learning_rate": 1.4392723119942758e-06, "loss": 0.3759, "step": 12128 }, { "epoch": 0.7597362940227689, "grad_norm": 0.8959274460596769, "learning_rate": 1.4385602450998048e-06, "loss": 0.4413, "step": 12129 }, { "epoch": 0.7597989320221112, "grad_norm": 0.9182813027365249, "learning_rate": 1.4378483247948538e-06, "loss": 0.3805, "step": 12130 }, { "epoch": 0.7598615700214535, "grad_norm": 0.8185410664869485, "learning_rate": 1.4371365511087286e-06, "loss": 0.3612, "step": 12131 }, { "epoch": 0.7599242080207959, "grad_norm": 0.8273158217254062, "learning_rate": 1.436424924070724e-06, "loss": 0.4093, "step": 12132 }, { "epoch": 0.7599868460201381, "grad_norm": 0.8802481823697501, "learning_rate": 1.435713443710131e-06, "loss": 0.4003, "step": 12133 }, { "epoch": 0.7600494840194805, "grad_norm": 0.8612892029211697, "learning_rate": 1.4350021100562329e-06, "loss": 0.3752, "step": 12134 }, { "epoch": 0.7601121220188227, "grad_norm": 0.8521674629684327, "learning_rate": 1.4342909231383111e-06, "loss": 0.4209, "step": 12135 }, { "epoch": 0.760174760018165, "grad_norm": 0.8365471735770125, "learning_rate": 1.4335798829856368e-06, "loss": 0.3497, "step": 12136 }, { "epoch": 0.7602373980175073, "grad_norm": 0.8947058259618716, "learning_rate": 1.4328689896274762e-06, "loss": 0.416, "step": 12137 }, { "epoch": 0.7603000360168496, "grad_norm": 0.9100640442852441, "learning_rate": 1.4321582430930903e-06, "loss": 0.38, "step": 12138 }, { "epoch": 0.760362674016192, "grad_norm": 0.8346595032643489, "learning_rate": 1.4314476434117314e-06, "loss": 0.3886, "step": 12139 }, { "epoch": 0.7604253120155342, "grad_norm": 0.8900171016250094, "learning_rate": 1.4307371906126517e-06, "loss": 0.4049, "step": 12140 }, { "epoch": 0.7604879500148766, "grad_norm": 0.8748385984273385, "learning_rate": 1.430026884725092e-06, "loss": 0.3778, "step": 12141 }, { "epoch": 0.7605505880142188, "grad_norm": 0.9193700281573849, "learning_rate": 1.429316725778288e-06, "loss": 0.4085, "step": 12142 }, { "epoch": 0.7606132260135611, "grad_norm": 0.8517764403034263, "learning_rate": 1.4286067138014686e-06, "loss": 0.3902, "step": 12143 }, { "epoch": 0.7606758640129034, "grad_norm": 0.9105667386684505, "learning_rate": 1.4278968488238614e-06, "loss": 0.36, "step": 12144 }, { "epoch": 0.7607385020122457, "grad_norm": 0.8605863534803899, "learning_rate": 1.4271871308746833e-06, "loss": 0.4158, "step": 12145 }, { "epoch": 0.760801140011588, "grad_norm": 0.581404724954555, "learning_rate": 1.4264775599831459e-06, "loss": 0.4295, "step": 12146 }, { "epoch": 0.7608637780109303, "grad_norm": 0.8392758306452512, "learning_rate": 1.4257681361784552e-06, "loss": 0.3513, "step": 12147 }, { "epoch": 0.7609264160102727, "grad_norm": 0.8411815384199427, "learning_rate": 1.4250588594898102e-06, "loss": 0.3654, "step": 12148 }, { "epoch": 0.7609890540096149, "grad_norm": 0.8299617079723282, "learning_rate": 1.424349729946407e-06, "loss": 0.3789, "step": 12149 }, { "epoch": 0.7610516920089573, "grad_norm": 0.8959514571918932, "learning_rate": 1.4236407475774333e-06, "loss": 0.3829, "step": 12150 }, { "epoch": 0.7611143300082995, "grad_norm": 0.6026465301981816, "learning_rate": 1.4229319124120683e-06, "loss": 0.4287, "step": 12151 }, { "epoch": 0.7611769680076418, "grad_norm": 0.8065029805443577, "learning_rate": 1.4222232244794915e-06, "loss": 0.3316, "step": 12152 }, { "epoch": 0.7612396060069841, "grad_norm": 0.7968459535160386, "learning_rate": 1.4215146838088706e-06, "loss": 0.3224, "step": 12153 }, { "epoch": 0.7613022440063264, "grad_norm": 0.8475643951001758, "learning_rate": 1.4208062904293696e-06, "loss": 0.3802, "step": 12154 }, { "epoch": 0.7613648820056688, "grad_norm": 0.8340806372446653, "learning_rate": 1.4200980443701456e-06, "loss": 0.3836, "step": 12155 }, { "epoch": 0.761427520005011, "grad_norm": 0.9024225163701779, "learning_rate": 1.419389945660351e-06, "loss": 0.3734, "step": 12156 }, { "epoch": 0.7614901580043534, "grad_norm": 0.8612187242989288, "learning_rate": 1.4186819943291307e-06, "loss": 0.3831, "step": 12157 }, { "epoch": 0.7615527960036956, "grad_norm": 0.8436102091523723, "learning_rate": 1.4179741904056233e-06, "loss": 0.4005, "step": 12158 }, { "epoch": 0.761615434003038, "grad_norm": 0.8405025323603112, "learning_rate": 1.417266533918964e-06, "loss": 0.3593, "step": 12159 }, { "epoch": 0.7616780720023802, "grad_norm": 0.9150130272063589, "learning_rate": 1.4165590248982775e-06, "loss": 0.3976, "step": 12160 }, { "epoch": 0.7617407100017225, "grad_norm": 0.5399217039827935, "learning_rate": 1.415851663372688e-06, "loss": 0.4353, "step": 12161 }, { "epoch": 0.7618033480010649, "grad_norm": 0.8759214536211869, "learning_rate": 1.4151444493713102e-06, "loss": 0.3237, "step": 12162 }, { "epoch": 0.7618659860004071, "grad_norm": 0.806527025232558, "learning_rate": 1.4144373829232516e-06, "loss": 0.3832, "step": 12163 }, { "epoch": 0.7619286239997495, "grad_norm": 0.8935201645899035, "learning_rate": 1.4137304640576161e-06, "loss": 0.4146, "step": 12164 }, { "epoch": 0.7619912619990917, "grad_norm": 0.9051083301153748, "learning_rate": 1.4130236928035001e-06, "loss": 0.4014, "step": 12165 }, { "epoch": 0.7620538999984341, "grad_norm": 0.8365884918426034, "learning_rate": 1.4123170691899952e-06, "loss": 0.3938, "step": 12166 }, { "epoch": 0.7621165379977763, "grad_norm": 0.8612325268446295, "learning_rate": 1.4116105932461833e-06, "loss": 0.3732, "step": 12167 }, { "epoch": 0.7621791759971187, "grad_norm": 0.8560047304272803, "learning_rate": 1.4109042650011473e-06, "loss": 0.3833, "step": 12168 }, { "epoch": 0.762241813996461, "grad_norm": 0.9225428810652551, "learning_rate": 1.4101980844839564e-06, "loss": 0.3805, "step": 12169 }, { "epoch": 0.7623044519958032, "grad_norm": 0.8367052780872859, "learning_rate": 1.4094920517236805e-06, "loss": 0.3817, "step": 12170 }, { "epoch": 0.7623670899951456, "grad_norm": 0.942516333000648, "learning_rate": 1.408786166749378e-06, "loss": 0.4151, "step": 12171 }, { "epoch": 0.7624297279944878, "grad_norm": 0.7940558823169905, "learning_rate": 1.408080429590103e-06, "loss": 0.3862, "step": 12172 }, { "epoch": 0.7624923659938302, "grad_norm": 0.8858264866321723, "learning_rate": 1.4073748402749044e-06, "loss": 0.3825, "step": 12173 }, { "epoch": 0.7625550039931724, "grad_norm": 0.8182237648717382, "learning_rate": 1.4066693988328246e-06, "loss": 0.3641, "step": 12174 }, { "epoch": 0.7626176419925148, "grad_norm": 0.8833151484153386, "learning_rate": 1.405964105292899e-06, "loss": 0.4084, "step": 12175 }, { "epoch": 0.7626802799918571, "grad_norm": 0.7922951216724363, "learning_rate": 1.4052589596841566e-06, "loss": 0.3888, "step": 12176 }, { "epoch": 0.7627429179911993, "grad_norm": 0.8973038108809323, "learning_rate": 1.4045539620356225e-06, "loss": 0.3763, "step": 12177 }, { "epoch": 0.7628055559905417, "grad_norm": 0.6552503798120737, "learning_rate": 1.4038491123763158e-06, "loss": 0.466, "step": 12178 }, { "epoch": 0.7628681939898839, "grad_norm": 0.8997362292877513, "learning_rate": 1.4031444107352476e-06, "loss": 0.3925, "step": 12179 }, { "epoch": 0.7629308319892263, "grad_norm": 0.8322086031144545, "learning_rate": 1.4024398571414227e-06, "loss": 0.3595, "step": 12180 }, { "epoch": 0.7629934699885685, "grad_norm": 0.7974591151758381, "learning_rate": 1.4017354516238413e-06, "loss": 0.3475, "step": 12181 }, { "epoch": 0.7630561079879109, "grad_norm": 0.8321760075293948, "learning_rate": 1.4010311942114968e-06, "loss": 0.3787, "step": 12182 }, { "epoch": 0.7631187459872532, "grad_norm": 0.8438390755992784, "learning_rate": 1.4003270849333756e-06, "loss": 0.3841, "step": 12183 }, { "epoch": 0.7631813839865955, "grad_norm": 0.8823872514427169, "learning_rate": 1.3996231238184587e-06, "loss": 0.404, "step": 12184 }, { "epoch": 0.7632440219859378, "grad_norm": 0.9496868439398807, "learning_rate": 1.3989193108957238e-06, "loss": 0.4235, "step": 12185 }, { "epoch": 0.76330665998528, "grad_norm": 0.859109847342525, "learning_rate": 1.398215646194137e-06, "loss": 0.3743, "step": 12186 }, { "epoch": 0.7633692979846224, "grad_norm": 0.9607859801888406, "learning_rate": 1.3975121297426641e-06, "loss": 0.4061, "step": 12187 }, { "epoch": 0.7634319359839646, "grad_norm": 0.8304989218471871, "learning_rate": 1.3968087615702607e-06, "loss": 0.3572, "step": 12188 }, { "epoch": 0.763494573983307, "grad_norm": 0.804747260847467, "learning_rate": 1.3961055417058772e-06, "loss": 0.3372, "step": 12189 }, { "epoch": 0.7635572119826493, "grad_norm": 0.8799194405001639, "learning_rate": 1.3954024701784591e-06, "loss": 0.3996, "step": 12190 }, { "epoch": 0.7636198499819916, "grad_norm": 0.8661435233794036, "learning_rate": 1.394699547016944e-06, "loss": 0.4019, "step": 12191 }, { "epoch": 0.7636824879813339, "grad_norm": 0.8703947555515046, "learning_rate": 1.3939967722502646e-06, "loss": 0.4061, "step": 12192 }, { "epoch": 0.7637451259806762, "grad_norm": 0.8344715705171691, "learning_rate": 1.3932941459073456e-06, "loss": 0.3951, "step": 12193 }, { "epoch": 0.7638077639800185, "grad_norm": 0.8667857435777439, "learning_rate": 1.3925916680171108e-06, "loss": 0.3646, "step": 12194 }, { "epoch": 0.7638704019793607, "grad_norm": 0.9135956982314181, "learning_rate": 1.3918893386084715e-06, "loss": 0.3663, "step": 12195 }, { "epoch": 0.7639330399787031, "grad_norm": 0.5942318700825758, "learning_rate": 1.3911871577103375e-06, "loss": 0.4524, "step": 12196 }, { "epoch": 0.7639956779780454, "grad_norm": 0.8329658567539894, "learning_rate": 1.3904851253516106e-06, "loss": 0.4239, "step": 12197 }, { "epoch": 0.7640583159773877, "grad_norm": 1.1739444533116838, "learning_rate": 1.389783241561185e-06, "loss": 0.4172, "step": 12198 }, { "epoch": 0.76412095397673, "grad_norm": 0.9153387588657722, "learning_rate": 1.389081506367952e-06, "loss": 0.3718, "step": 12199 }, { "epoch": 0.7641835919760723, "grad_norm": 0.8249830806502529, "learning_rate": 1.388379919800794e-06, "loss": 0.3421, "step": 12200 }, { "epoch": 0.7642462299754146, "grad_norm": 0.8288664151670275, "learning_rate": 1.387678481888589e-06, "loss": 0.342, "step": 12201 }, { "epoch": 0.7643088679747568, "grad_norm": 0.875040188020738, "learning_rate": 1.3869771926602066e-06, "loss": 0.3958, "step": 12202 }, { "epoch": 0.7643715059740992, "grad_norm": 0.8289110277979953, "learning_rate": 1.3862760521445152e-06, "loss": 0.3686, "step": 12203 }, { "epoch": 0.7644341439734414, "grad_norm": 0.8285509174668271, "learning_rate": 1.385575060370371e-06, "loss": 0.3814, "step": 12204 }, { "epoch": 0.7644967819727838, "grad_norm": 0.8130016442810921, "learning_rate": 1.3848742173666291e-06, "loss": 0.3788, "step": 12205 }, { "epoch": 0.7645594199721261, "grad_norm": 0.8072134104851697, "learning_rate": 1.3841735231621357e-06, "loss": 0.3895, "step": 12206 }, { "epoch": 0.7646220579714684, "grad_norm": 0.9122776678779502, "learning_rate": 1.3834729777857314e-06, "loss": 0.3982, "step": 12207 }, { "epoch": 0.7646846959708107, "grad_norm": 0.8591400599056112, "learning_rate": 1.38277258126625e-06, "loss": 0.3508, "step": 12208 }, { "epoch": 0.764747333970153, "grad_norm": 0.8757206343523144, "learning_rate": 1.382072333632521e-06, "loss": 0.3682, "step": 12209 }, { "epoch": 0.7648099719694953, "grad_norm": 0.9000216742400504, "learning_rate": 1.3813722349133646e-06, "loss": 0.3923, "step": 12210 }, { "epoch": 0.7648726099688375, "grad_norm": 0.8862747282644081, "learning_rate": 1.3806722851376004e-06, "loss": 0.4425, "step": 12211 }, { "epoch": 0.7649352479681799, "grad_norm": 0.8459340531240942, "learning_rate": 1.3799724843340368e-06, "loss": 0.3822, "step": 12212 }, { "epoch": 0.7649978859675222, "grad_norm": 0.8543327393280203, "learning_rate": 1.379272832531476e-06, "loss": 0.4108, "step": 12213 }, { "epoch": 0.7650605239668645, "grad_norm": 0.8764674788870692, "learning_rate": 1.378573329758719e-06, "loss": 0.4206, "step": 12214 }, { "epoch": 0.7651231619662068, "grad_norm": 0.79571163161263, "learning_rate": 1.3778739760445552e-06, "loss": 0.3399, "step": 12215 }, { "epoch": 0.7651857999655491, "grad_norm": 0.8924452190904014, "learning_rate": 1.3771747714177719e-06, "loss": 0.3663, "step": 12216 }, { "epoch": 0.7652484379648914, "grad_norm": 0.8883699019742137, "learning_rate": 1.3764757159071468e-06, "loss": 0.4024, "step": 12217 }, { "epoch": 0.7653110759642338, "grad_norm": 0.8378904128958053, "learning_rate": 1.3757768095414542e-06, "loss": 0.4029, "step": 12218 }, { "epoch": 0.765373713963576, "grad_norm": 0.8380432347798005, "learning_rate": 1.3750780523494589e-06, "loss": 0.3751, "step": 12219 }, { "epoch": 0.7654363519629183, "grad_norm": 0.5871769837485464, "learning_rate": 1.3743794443599256e-06, "loss": 0.474, "step": 12220 }, { "epoch": 0.7654989899622606, "grad_norm": 0.8752632841571062, "learning_rate": 1.373680985601607e-06, "loss": 0.3971, "step": 12221 }, { "epoch": 0.7655616279616029, "grad_norm": 0.8945133089204861, "learning_rate": 1.3729826761032506e-06, "loss": 0.392, "step": 12222 }, { "epoch": 0.7656242659609452, "grad_norm": 0.9052446885149752, "learning_rate": 1.3722845158936022e-06, "loss": 0.3942, "step": 12223 }, { "epoch": 0.7656869039602875, "grad_norm": 1.0010257552679802, "learning_rate": 1.3715865050013965e-06, "loss": 0.4183, "step": 12224 }, { "epoch": 0.7657495419596299, "grad_norm": 0.8651119353342405, "learning_rate": 1.3708886434553637e-06, "loss": 0.3872, "step": 12225 }, { "epoch": 0.7658121799589721, "grad_norm": 0.8474481649727038, "learning_rate": 1.3701909312842277e-06, "loss": 0.3476, "step": 12226 }, { "epoch": 0.7658748179583144, "grad_norm": 0.8956496979022419, "learning_rate": 1.3694933685167073e-06, "loss": 0.4008, "step": 12227 }, { "epoch": 0.7659374559576567, "grad_norm": 0.8120681337206862, "learning_rate": 1.3687959551815117e-06, "loss": 0.3751, "step": 12228 }, { "epoch": 0.766000093956999, "grad_norm": 0.8261852463441699, "learning_rate": 1.3680986913073496e-06, "loss": 0.361, "step": 12229 }, { "epoch": 0.7660627319563413, "grad_norm": 0.9130933963427353, "learning_rate": 1.3674015769229204e-06, "loss": 0.3882, "step": 12230 }, { "epoch": 0.7661253699556836, "grad_norm": 0.8191546884633607, "learning_rate": 1.3667046120569145e-06, "loss": 0.4183, "step": 12231 }, { "epoch": 0.766188007955026, "grad_norm": 0.618787594078647, "learning_rate": 1.3660077967380226e-06, "loss": 0.4425, "step": 12232 }, { "epoch": 0.7662506459543682, "grad_norm": 0.8657862546819474, "learning_rate": 1.3653111309949247e-06, "loss": 0.3563, "step": 12233 }, { "epoch": 0.7663132839537106, "grad_norm": 0.850080431081994, "learning_rate": 1.364614614856295e-06, "loss": 0.3904, "step": 12234 }, { "epoch": 0.7663759219530528, "grad_norm": 0.8449672500295595, "learning_rate": 1.3639182483508024e-06, "loss": 0.3583, "step": 12235 }, { "epoch": 0.7664385599523951, "grad_norm": 0.8022654823269632, "learning_rate": 1.3632220315071082e-06, "loss": 0.3486, "step": 12236 }, { "epoch": 0.7665011979517374, "grad_norm": 0.8739947890307009, "learning_rate": 1.3625259643538718e-06, "loss": 0.4033, "step": 12237 }, { "epoch": 0.7665638359510797, "grad_norm": 0.8599039321160153, "learning_rate": 1.361830046919742e-06, "loss": 0.4449, "step": 12238 }, { "epoch": 0.766626473950422, "grad_norm": 0.6286663936904477, "learning_rate": 1.3611342792333626e-06, "loss": 0.4488, "step": 12239 }, { "epoch": 0.7666891119497643, "grad_norm": 0.8598966412011767, "learning_rate": 1.3604386613233716e-06, "loss": 0.3792, "step": 12240 }, { "epoch": 0.7667517499491067, "grad_norm": 0.6222692885101674, "learning_rate": 1.3597431932183991e-06, "loss": 0.4435, "step": 12241 }, { "epoch": 0.7668143879484489, "grad_norm": 0.9060732377064754, "learning_rate": 1.3590478749470742e-06, "loss": 0.404, "step": 12242 }, { "epoch": 0.7668770259477913, "grad_norm": 0.8669299732562432, "learning_rate": 1.3583527065380147e-06, "loss": 0.3622, "step": 12243 }, { "epoch": 0.7669396639471335, "grad_norm": 0.7910703676855856, "learning_rate": 1.3576576880198332e-06, "loss": 0.3617, "step": 12244 }, { "epoch": 0.7670023019464758, "grad_norm": 0.7716745320012269, "learning_rate": 1.3569628194211365e-06, "loss": 0.404, "step": 12245 }, { "epoch": 0.7670649399458181, "grad_norm": 0.8754808038602627, "learning_rate": 1.3562681007705276e-06, "loss": 0.3856, "step": 12246 }, { "epoch": 0.7671275779451604, "grad_norm": 0.923508622234522, "learning_rate": 1.3555735320965996e-06, "loss": 0.3957, "step": 12247 }, { "epoch": 0.7671902159445028, "grad_norm": 0.8581669218832356, "learning_rate": 1.3548791134279415e-06, "loss": 0.3782, "step": 12248 }, { "epoch": 0.767252853943845, "grad_norm": 0.9003232766851769, "learning_rate": 1.354184844793136e-06, "loss": 0.4163, "step": 12249 }, { "epoch": 0.7673154919431874, "grad_norm": 0.8725274676133806, "learning_rate": 1.3534907262207574e-06, "loss": 0.3907, "step": 12250 }, { "epoch": 0.7673781299425296, "grad_norm": 0.8997347758293992, "learning_rate": 1.352796757739379e-06, "loss": 0.4311, "step": 12251 }, { "epoch": 0.7674407679418719, "grad_norm": 0.822014412230924, "learning_rate": 1.3521029393775625e-06, "loss": 0.3961, "step": 12252 }, { "epoch": 0.7675034059412142, "grad_norm": 0.8272594107552561, "learning_rate": 1.3514092711638665e-06, "loss": 0.3716, "step": 12253 }, { "epoch": 0.7675660439405565, "grad_norm": 0.8906456033923704, "learning_rate": 1.3507157531268406e-06, "loss": 0.3791, "step": 12254 }, { "epoch": 0.7676286819398989, "grad_norm": 0.9387038573497993, "learning_rate": 1.3500223852950334e-06, "loss": 0.3866, "step": 12255 }, { "epoch": 0.7676913199392411, "grad_norm": 0.7884871208395204, "learning_rate": 1.3493291676969817e-06, "loss": 0.348, "step": 12256 }, { "epoch": 0.7677539579385835, "grad_norm": 0.9070487413923388, "learning_rate": 1.348636100361219e-06, "loss": 0.3967, "step": 12257 }, { "epoch": 0.7678165959379257, "grad_norm": 0.8103851404919518, "learning_rate": 1.3479431833162727e-06, "loss": 0.4112, "step": 12258 }, { "epoch": 0.7678792339372681, "grad_norm": 0.6163145405401721, "learning_rate": 1.3472504165906614e-06, "loss": 0.4359, "step": 12259 }, { "epoch": 0.7679418719366103, "grad_norm": 0.8677157888759892, "learning_rate": 1.3465578002129021e-06, "loss": 0.3793, "step": 12260 }, { "epoch": 0.7680045099359526, "grad_norm": 0.9681653462652177, "learning_rate": 1.3458653342115013e-06, "loss": 0.3968, "step": 12261 }, { "epoch": 0.768067147935295, "grad_norm": 0.841812904657243, "learning_rate": 1.3451730186149608e-06, "loss": 0.3964, "step": 12262 }, { "epoch": 0.7681297859346372, "grad_norm": 0.8733077293418391, "learning_rate": 1.344480853451779e-06, "loss": 0.3822, "step": 12263 }, { "epoch": 0.7681924239339796, "grad_norm": 0.8266265659234332, "learning_rate": 1.3437888387504427e-06, "loss": 0.3912, "step": 12264 }, { "epoch": 0.7682550619333218, "grad_norm": 0.8766775646978666, "learning_rate": 1.3430969745394373e-06, "loss": 0.4276, "step": 12265 }, { "epoch": 0.7683176999326642, "grad_norm": 0.832345945638818, "learning_rate": 1.3424052608472389e-06, "loss": 0.3911, "step": 12266 }, { "epoch": 0.7683803379320064, "grad_norm": 0.9029772326378117, "learning_rate": 1.3417136977023187e-06, "loss": 0.3985, "step": 12267 }, { "epoch": 0.7684429759313488, "grad_norm": 0.7963639359669342, "learning_rate": 1.3410222851331407e-06, "loss": 0.3755, "step": 12268 }, { "epoch": 0.7685056139306911, "grad_norm": 0.6263038791494209, "learning_rate": 1.3403310231681654e-06, "loss": 0.4595, "step": 12269 }, { "epoch": 0.7685682519300333, "grad_norm": 0.8256420051082473, "learning_rate": 1.339639911835845e-06, "loss": 0.3829, "step": 12270 }, { "epoch": 0.7686308899293757, "grad_norm": 0.852379488506237, "learning_rate": 1.3389489511646236e-06, "loss": 0.3347, "step": 12271 }, { "epoch": 0.7686935279287179, "grad_norm": 0.8585707141728599, "learning_rate": 1.3382581411829443e-06, "loss": 0.401, "step": 12272 }, { "epoch": 0.7687561659280603, "grad_norm": 0.9324896700705212, "learning_rate": 1.3375674819192398e-06, "loss": 0.385, "step": 12273 }, { "epoch": 0.7688188039274025, "grad_norm": 0.8501849052077861, "learning_rate": 1.3368769734019371e-06, "loss": 0.3334, "step": 12274 }, { "epoch": 0.7688814419267449, "grad_norm": 0.8096883810030475, "learning_rate": 1.3361866156594582e-06, "loss": 0.3975, "step": 12275 }, { "epoch": 0.7689440799260872, "grad_norm": 0.846333219715112, "learning_rate": 1.3354964087202188e-06, "loss": 0.3691, "step": 12276 }, { "epoch": 0.7690067179254295, "grad_norm": 0.8612027673259276, "learning_rate": 1.3348063526126253e-06, "loss": 0.3966, "step": 12277 }, { "epoch": 0.7690693559247718, "grad_norm": 0.8796977199554558, "learning_rate": 1.3341164473650842e-06, "loss": 0.3898, "step": 12278 }, { "epoch": 0.769131993924114, "grad_norm": 0.8721480628550387, "learning_rate": 1.3334266930059908e-06, "loss": 0.416, "step": 12279 }, { "epoch": 0.7691946319234564, "grad_norm": 0.8348490366687206, "learning_rate": 1.3327370895637332e-06, "loss": 0.3577, "step": 12280 }, { "epoch": 0.7692572699227986, "grad_norm": 0.8757652763913041, "learning_rate": 1.3320476370666996e-06, "loss": 0.3714, "step": 12281 }, { "epoch": 0.769319907922141, "grad_norm": 0.8513967452411835, "learning_rate": 1.3313583355432658e-06, "loss": 0.4232, "step": 12282 }, { "epoch": 0.7693825459214833, "grad_norm": 0.8239432119636488, "learning_rate": 1.330669185021804e-06, "loss": 0.3591, "step": 12283 }, { "epoch": 0.7694451839208256, "grad_norm": 0.8915006557559466, "learning_rate": 1.3299801855306799e-06, "loss": 0.3757, "step": 12284 }, { "epoch": 0.7695078219201679, "grad_norm": 0.9178594081581041, "learning_rate": 1.3292913370982524e-06, "loss": 0.4218, "step": 12285 }, { "epoch": 0.7695704599195101, "grad_norm": 0.928447367181959, "learning_rate": 1.3286026397528745e-06, "loss": 0.3583, "step": 12286 }, { "epoch": 0.7696330979188525, "grad_norm": 0.7783729181841863, "learning_rate": 1.3279140935228919e-06, "loss": 0.3603, "step": 12287 }, { "epoch": 0.7696957359181947, "grad_norm": 0.8401157053524564, "learning_rate": 1.3272256984366472e-06, "loss": 0.3871, "step": 12288 }, { "epoch": 0.7697583739175371, "grad_norm": 0.9408392144149074, "learning_rate": 1.3265374545224758e-06, "loss": 0.4105, "step": 12289 }, { "epoch": 0.7698210119168793, "grad_norm": 0.7957814527438397, "learning_rate": 1.3258493618087044e-06, "loss": 0.3846, "step": 12290 }, { "epoch": 0.7698836499162217, "grad_norm": 0.8623089413119133, "learning_rate": 1.3251614203236552e-06, "loss": 0.3857, "step": 12291 }, { "epoch": 0.769946287915564, "grad_norm": 0.8903758053659436, "learning_rate": 1.324473630095644e-06, "loss": 0.3831, "step": 12292 }, { "epoch": 0.7700089259149063, "grad_norm": 0.8506720586775667, "learning_rate": 1.3237859911529805e-06, "loss": 0.3791, "step": 12293 }, { "epoch": 0.7700715639142486, "grad_norm": 0.837274298096631, "learning_rate": 1.3230985035239679e-06, "loss": 0.3633, "step": 12294 }, { "epoch": 0.7701342019135908, "grad_norm": 0.87591629870011, "learning_rate": 1.3224111672369022e-06, "loss": 0.3593, "step": 12295 }, { "epoch": 0.7701968399129332, "grad_norm": 0.5899016869889776, "learning_rate": 1.3217239823200768e-06, "loss": 0.4531, "step": 12296 }, { "epoch": 0.7702594779122754, "grad_norm": 0.8636842496720666, "learning_rate": 1.3210369488017731e-06, "loss": 0.3889, "step": 12297 }, { "epoch": 0.7703221159116178, "grad_norm": 0.9140629392528077, "learning_rate": 1.3203500667102731e-06, "loss": 0.3943, "step": 12298 }, { "epoch": 0.7703847539109601, "grad_norm": 0.8680987128716712, "learning_rate": 1.3196633360738475e-06, "loss": 0.399, "step": 12299 }, { "epoch": 0.7704473919103024, "grad_norm": 0.8754438066877356, "learning_rate": 1.3189767569207622e-06, "loss": 0.3639, "step": 12300 }, { "epoch": 0.7705100299096447, "grad_norm": 0.9101517533593068, "learning_rate": 1.3182903292792764e-06, "loss": 0.4255, "step": 12301 }, { "epoch": 0.770572667908987, "grad_norm": 0.864852217140999, "learning_rate": 1.3176040531776447e-06, "loss": 0.3706, "step": 12302 }, { "epoch": 0.7706353059083293, "grad_norm": 1.001305764890246, "learning_rate": 1.3169179286441131e-06, "loss": 0.3807, "step": 12303 }, { "epoch": 0.7706979439076715, "grad_norm": 0.8725198174029146, "learning_rate": 1.316231955706922e-06, "loss": 0.3951, "step": 12304 }, { "epoch": 0.7707605819070139, "grad_norm": 0.832433413801481, "learning_rate": 1.3155461343943065e-06, "loss": 0.3573, "step": 12305 }, { "epoch": 0.7708232199063562, "grad_norm": 0.8701916708162051, "learning_rate": 1.3148604647344987e-06, "loss": 0.4212, "step": 12306 }, { "epoch": 0.7708858579056985, "grad_norm": 0.9087456117006217, "learning_rate": 1.3141749467557175e-06, "loss": 0.3535, "step": 12307 }, { "epoch": 0.7709484959050408, "grad_norm": 0.9161933143618876, "learning_rate": 1.3134895804861797e-06, "loss": 0.4136, "step": 12308 }, { "epoch": 0.7710111339043831, "grad_norm": 0.848254235899191, "learning_rate": 1.3128043659540945e-06, "loss": 0.3583, "step": 12309 }, { "epoch": 0.7710737719037254, "grad_norm": 0.9801474998198765, "learning_rate": 1.3121193031876666e-06, "loss": 0.4052, "step": 12310 }, { "epoch": 0.7711364099030676, "grad_norm": 0.8843363150764962, "learning_rate": 1.3114343922150929e-06, "loss": 0.4011, "step": 12311 }, { "epoch": 0.77119904790241, "grad_norm": 0.7566068904697433, "learning_rate": 1.3107496330645636e-06, "loss": 0.3538, "step": 12312 }, { "epoch": 0.7712616859017523, "grad_norm": 0.8735747913101004, "learning_rate": 1.3100650257642633e-06, "loss": 0.4076, "step": 12313 }, { "epoch": 0.7713243239010946, "grad_norm": 0.8381689522462522, "learning_rate": 1.3093805703423707e-06, "loss": 0.3641, "step": 12314 }, { "epoch": 0.7713869619004369, "grad_norm": 0.8875881338787945, "learning_rate": 1.3086962668270608e-06, "loss": 0.391, "step": 12315 }, { "epoch": 0.7714495998997792, "grad_norm": 0.8689477692038108, "learning_rate": 1.3080121152464976e-06, "loss": 0.3983, "step": 12316 }, { "epoch": 0.7715122378991215, "grad_norm": 0.805034511545749, "learning_rate": 1.3073281156288408e-06, "loss": 0.3687, "step": 12317 }, { "epoch": 0.7715748758984639, "grad_norm": 0.8496713211669047, "learning_rate": 1.3066442680022446e-06, "loss": 0.3606, "step": 12318 }, { "epoch": 0.7716375138978061, "grad_norm": 0.8219621642146782, "learning_rate": 1.3059605723948554e-06, "loss": 0.4005, "step": 12319 }, { "epoch": 0.7717001518971484, "grad_norm": 0.8160582202967481, "learning_rate": 1.3052770288348155e-06, "loss": 0.4053, "step": 12320 }, { "epoch": 0.7717627898964907, "grad_norm": 0.8838637168407966, "learning_rate": 1.3045936373502567e-06, "loss": 0.3667, "step": 12321 }, { "epoch": 0.771825427895833, "grad_norm": 0.8842355824779993, "learning_rate": 1.3039103979693107e-06, "loss": 0.3854, "step": 12322 }, { "epoch": 0.7718880658951753, "grad_norm": 0.7892414844447273, "learning_rate": 1.3032273107200983e-06, "loss": 0.3733, "step": 12323 }, { "epoch": 0.7719507038945176, "grad_norm": 0.760181252062533, "learning_rate": 1.3025443756307365e-06, "loss": 0.3492, "step": 12324 }, { "epoch": 0.77201334189386, "grad_norm": 0.8329584399273702, "learning_rate": 1.3018615927293354e-06, "loss": 0.3807, "step": 12325 }, { "epoch": 0.7720759798932022, "grad_norm": 0.842647905698077, "learning_rate": 1.3011789620439969e-06, "loss": 0.3462, "step": 12326 }, { "epoch": 0.7721386178925446, "grad_norm": 0.9079725623156318, "learning_rate": 1.3004964836028189e-06, "loss": 0.4182, "step": 12327 }, { "epoch": 0.7722012558918868, "grad_norm": 0.9048109028644575, "learning_rate": 1.2998141574338925e-06, "loss": 0.3853, "step": 12328 }, { "epoch": 0.7722638938912291, "grad_norm": 0.8214420143148367, "learning_rate": 1.2991319835653022e-06, "loss": 0.3993, "step": 12329 }, { "epoch": 0.7723265318905714, "grad_norm": 0.6246362478516573, "learning_rate": 1.2984499620251246e-06, "loss": 0.4571, "step": 12330 }, { "epoch": 0.7723891698899137, "grad_norm": 0.8192057052510013, "learning_rate": 1.2977680928414355e-06, "loss": 0.352, "step": 12331 }, { "epoch": 0.772451807889256, "grad_norm": 0.9625354291327105, "learning_rate": 1.2970863760422987e-06, "loss": 0.4036, "step": 12332 }, { "epoch": 0.7725144458885983, "grad_norm": 0.9337835973231406, "learning_rate": 1.296404811655772e-06, "loss": 0.4218, "step": 12333 }, { "epoch": 0.7725770838879407, "grad_norm": 0.88297485657945, "learning_rate": 1.2957233997099127e-06, "loss": 0.3974, "step": 12334 }, { "epoch": 0.7726397218872829, "grad_norm": 0.91373687410806, "learning_rate": 1.2950421402327656e-06, "loss": 0.3633, "step": 12335 }, { "epoch": 0.7727023598866252, "grad_norm": 0.8160823646348158, "learning_rate": 1.294361033252372e-06, "loss": 0.3998, "step": 12336 }, { "epoch": 0.7727649978859675, "grad_norm": 0.8190134024867194, "learning_rate": 1.2936800787967657e-06, "loss": 0.3736, "step": 12337 }, { "epoch": 0.7728276358853098, "grad_norm": 0.8278311124249005, "learning_rate": 1.2929992768939754e-06, "loss": 0.3874, "step": 12338 }, { "epoch": 0.7728902738846521, "grad_norm": 0.8257325599841278, "learning_rate": 1.2923186275720213e-06, "loss": 0.3699, "step": 12339 }, { "epoch": 0.7729529118839944, "grad_norm": 0.883432868743919, "learning_rate": 1.2916381308589226e-06, "loss": 0.3919, "step": 12340 }, { "epoch": 0.7730155498833368, "grad_norm": 0.5302492083898949, "learning_rate": 1.2909577867826862e-06, "loss": 0.4561, "step": 12341 }, { "epoch": 0.773078187882679, "grad_norm": 0.9380692862493498, "learning_rate": 1.2902775953713141e-06, "loss": 0.3854, "step": 12342 }, { "epoch": 0.7731408258820214, "grad_norm": 0.8466118783088102, "learning_rate": 1.2895975566528067e-06, "loss": 0.3928, "step": 12343 }, { "epoch": 0.7732034638813636, "grad_norm": 0.936118839948978, "learning_rate": 1.2889176706551527e-06, "loss": 0.3655, "step": 12344 }, { "epoch": 0.7732661018807059, "grad_norm": 0.8726356793394074, "learning_rate": 1.2882379374063358e-06, "loss": 0.4271, "step": 12345 }, { "epoch": 0.7733287398800482, "grad_norm": 0.6011967985217059, "learning_rate": 1.2875583569343348e-06, "loss": 0.4423, "step": 12346 }, { "epoch": 0.7733913778793905, "grad_norm": 0.8632061578173503, "learning_rate": 1.2868789292671186e-06, "loss": 0.3684, "step": 12347 }, { "epoch": 0.7734540158787329, "grad_norm": 0.8372987043481332, "learning_rate": 1.286199654432657e-06, "loss": 0.3761, "step": 12348 }, { "epoch": 0.7735166538780751, "grad_norm": 0.8443332059364841, "learning_rate": 1.2855205324589065e-06, "loss": 0.388, "step": 12349 }, { "epoch": 0.7735792918774175, "grad_norm": 0.7884892637451854, "learning_rate": 1.284841563373821e-06, "loss": 0.3554, "step": 12350 }, { "epoch": 0.7736419298767597, "grad_norm": 0.8161245481337203, "learning_rate": 1.284162747205344e-06, "loss": 0.391, "step": 12351 }, { "epoch": 0.7737045678761021, "grad_norm": 0.8653773025894754, "learning_rate": 1.28348408398142e-06, "loss": 0.3647, "step": 12352 }, { "epoch": 0.7737672058754443, "grad_norm": 0.9334148401456415, "learning_rate": 1.282805573729981e-06, "loss": 0.4278, "step": 12353 }, { "epoch": 0.7738298438747866, "grad_norm": 0.9388038489254796, "learning_rate": 1.2821272164789544e-06, "loss": 0.407, "step": 12354 }, { "epoch": 0.773892481874129, "grad_norm": 0.8202349285183792, "learning_rate": 1.2814490122562618e-06, "loss": 0.3634, "step": 12355 }, { "epoch": 0.7739551198734712, "grad_norm": 0.813603308603816, "learning_rate": 1.280770961089816e-06, "loss": 0.3852, "step": 12356 }, { "epoch": 0.7740177578728136, "grad_norm": 0.8696831403845436, "learning_rate": 1.2800930630075299e-06, "loss": 0.385, "step": 12357 }, { "epoch": 0.7740803958721558, "grad_norm": 0.9441015566193206, "learning_rate": 1.2794153180373032e-06, "loss": 0.4151, "step": 12358 }, { "epoch": 0.7741430338714982, "grad_norm": 0.8103937954557542, "learning_rate": 1.2787377262070328e-06, "loss": 0.3493, "step": 12359 }, { "epoch": 0.7742056718708404, "grad_norm": 0.6638519928599349, "learning_rate": 1.278060287544607e-06, "loss": 0.4831, "step": 12360 }, { "epoch": 0.7742683098701827, "grad_norm": 0.8879343144643036, "learning_rate": 1.2773830020779116e-06, "loss": 0.3763, "step": 12361 }, { "epoch": 0.774330947869525, "grad_norm": 0.8464242547894821, "learning_rate": 1.2767058698348234e-06, "loss": 0.3961, "step": 12362 }, { "epoch": 0.7743935858688673, "grad_norm": 0.8761276392227342, "learning_rate": 1.2760288908432123e-06, "loss": 0.3978, "step": 12363 }, { "epoch": 0.7744562238682097, "grad_norm": 0.8401883586864802, "learning_rate": 1.275352065130942e-06, "loss": 0.3803, "step": 12364 }, { "epoch": 0.7745188618675519, "grad_norm": 0.7933253600489356, "learning_rate": 1.274675392725873e-06, "loss": 0.3436, "step": 12365 }, { "epoch": 0.7745814998668943, "grad_norm": 0.6859216186287934, "learning_rate": 1.2739988736558568e-06, "loss": 0.4552, "step": 12366 }, { "epoch": 0.7746441378662365, "grad_norm": 0.8819591333049047, "learning_rate": 1.2733225079487387e-06, "loss": 0.4201, "step": 12367 }, { "epoch": 0.7747067758655789, "grad_norm": 0.8434452877895916, "learning_rate": 1.2726462956323575e-06, "loss": 0.3752, "step": 12368 }, { "epoch": 0.7747694138649212, "grad_norm": 0.8660507402347892, "learning_rate": 1.271970236734545e-06, "loss": 0.4358, "step": 12369 }, { "epoch": 0.7748320518642634, "grad_norm": 0.8849578083307087, "learning_rate": 1.271294331283131e-06, "loss": 0.37, "step": 12370 }, { "epoch": 0.7748946898636058, "grad_norm": 0.8741280966858911, "learning_rate": 1.270618579305935e-06, "loss": 0.401, "step": 12371 }, { "epoch": 0.774957327862948, "grad_norm": 0.8543666373500239, "learning_rate": 1.26994298083077e-06, "loss": 0.3993, "step": 12372 }, { "epoch": 0.7750199658622904, "grad_norm": 0.8854738056838369, "learning_rate": 1.2692675358854429e-06, "loss": 0.414, "step": 12373 }, { "epoch": 0.7750826038616326, "grad_norm": 0.9524606046843588, "learning_rate": 1.2685922444977577e-06, "loss": 0.411, "step": 12374 }, { "epoch": 0.775145241860975, "grad_norm": 0.9005337605623157, "learning_rate": 1.2679171066955087e-06, "loss": 0.4087, "step": 12375 }, { "epoch": 0.7752078798603173, "grad_norm": 0.8109933223322892, "learning_rate": 1.267242122506484e-06, "loss": 0.3442, "step": 12376 }, { "epoch": 0.7752705178596596, "grad_norm": 0.8745178836179539, "learning_rate": 1.2665672919584666e-06, "loss": 0.4255, "step": 12377 }, { "epoch": 0.7753331558590019, "grad_norm": 0.8869850763163398, "learning_rate": 1.2658926150792321e-06, "loss": 0.3737, "step": 12378 }, { "epoch": 0.7753957938583441, "grad_norm": 0.7613141643678695, "learning_rate": 1.2652180918965495e-06, "loss": 0.3462, "step": 12379 }, { "epoch": 0.7754584318576865, "grad_norm": 0.6272199561690032, "learning_rate": 1.2645437224381847e-06, "loss": 0.4403, "step": 12380 }, { "epoch": 0.7755210698570287, "grad_norm": 0.8515664859748265, "learning_rate": 1.2638695067318934e-06, "loss": 0.3928, "step": 12381 }, { "epoch": 0.7755837078563711, "grad_norm": 0.869694533946577, "learning_rate": 1.2631954448054257e-06, "loss": 0.3871, "step": 12382 }, { "epoch": 0.7756463458557133, "grad_norm": 0.8268472115828417, "learning_rate": 1.2625215366865284e-06, "loss": 0.3648, "step": 12383 }, { "epoch": 0.7757089838550557, "grad_norm": 0.8512667264589964, "learning_rate": 1.2618477824029378e-06, "loss": 0.3827, "step": 12384 }, { "epoch": 0.775771621854398, "grad_norm": 0.9044054261520558, "learning_rate": 1.261174181982387e-06, "loss": 0.4135, "step": 12385 }, { "epoch": 0.7758342598537403, "grad_norm": 0.7980711235028576, "learning_rate": 1.2605007354526005e-06, "loss": 0.387, "step": 12386 }, { "epoch": 0.7758968978530826, "grad_norm": 0.8118561406199543, "learning_rate": 1.2598274428412981e-06, "loss": 0.3578, "step": 12387 }, { "epoch": 0.7759595358524248, "grad_norm": 0.8638026019064967, "learning_rate": 1.259154304176191e-06, "loss": 0.405, "step": 12388 }, { "epoch": 0.7760221738517672, "grad_norm": 0.8023203714566688, "learning_rate": 1.2584813194849882e-06, "loss": 0.3172, "step": 12389 }, { "epoch": 0.7760848118511094, "grad_norm": 0.8719130229989496, "learning_rate": 1.2578084887953874e-06, "loss": 0.356, "step": 12390 }, { "epoch": 0.7761474498504518, "grad_norm": 0.859367762281227, "learning_rate": 1.2571358121350851e-06, "loss": 0.3633, "step": 12391 }, { "epoch": 0.7762100878497941, "grad_norm": 0.8214370841659522, "learning_rate": 1.2564632895317674e-06, "loss": 0.3865, "step": 12392 }, { "epoch": 0.7762727258491364, "grad_norm": 0.8133400859355885, "learning_rate": 1.2557909210131158e-06, "loss": 0.403, "step": 12393 }, { "epoch": 0.7763353638484787, "grad_norm": 0.8392181928546578, "learning_rate": 1.2551187066068048e-06, "loss": 0.362, "step": 12394 }, { "epoch": 0.7763980018478209, "grad_norm": 0.8447082933478081, "learning_rate": 1.254446646340503e-06, "loss": 0.4402, "step": 12395 }, { "epoch": 0.7764606398471633, "grad_norm": 0.8895647690088395, "learning_rate": 1.253774740241872e-06, "loss": 0.3888, "step": 12396 }, { "epoch": 0.7765232778465055, "grad_norm": 0.864060825967737, "learning_rate": 1.253102988338567e-06, "loss": 0.396, "step": 12397 }, { "epoch": 0.7765859158458479, "grad_norm": 0.8901097485877715, "learning_rate": 1.2524313906582397e-06, "loss": 0.3797, "step": 12398 }, { "epoch": 0.7766485538451902, "grad_norm": 0.8396445035340923, "learning_rate": 1.2517599472285309e-06, "loss": 0.375, "step": 12399 }, { "epoch": 0.7767111918445325, "grad_norm": 0.9169467201390308, "learning_rate": 1.2510886580770793e-06, "loss": 0.4272, "step": 12400 }, { "epoch": 0.7767738298438748, "grad_norm": 0.8380507412385916, "learning_rate": 1.2504175232315147e-06, "loss": 0.4052, "step": 12401 }, { "epoch": 0.7768364678432171, "grad_norm": 0.7714042704407105, "learning_rate": 1.2497465427194604e-06, "loss": 0.3751, "step": 12402 }, { "epoch": 0.7768991058425594, "grad_norm": 0.8346391511360877, "learning_rate": 1.2490757165685351e-06, "loss": 0.3652, "step": 12403 }, { "epoch": 0.7769617438419016, "grad_norm": 0.8648889488417811, "learning_rate": 1.248405044806349e-06, "loss": 0.3788, "step": 12404 }, { "epoch": 0.777024381841244, "grad_norm": 0.8859567727468758, "learning_rate": 1.247734527460508e-06, "loss": 0.4079, "step": 12405 }, { "epoch": 0.7770870198405863, "grad_norm": 0.7905909282709578, "learning_rate": 1.2470641645586084e-06, "loss": 0.3639, "step": 12406 }, { "epoch": 0.7771496578399286, "grad_norm": 0.8504930693465917, "learning_rate": 1.2463939561282462e-06, "loss": 0.3982, "step": 12407 }, { "epoch": 0.7772122958392709, "grad_norm": 0.8632289838401725, "learning_rate": 1.245723902197004e-06, "loss": 0.3791, "step": 12408 }, { "epoch": 0.7772749338386132, "grad_norm": 0.8621762644423718, "learning_rate": 1.245054002792464e-06, "loss": 0.3916, "step": 12409 }, { "epoch": 0.7773375718379555, "grad_norm": 0.8551682890961082, "learning_rate": 1.2443842579421982e-06, "loss": 0.3734, "step": 12410 }, { "epoch": 0.7774002098372979, "grad_norm": 0.9116861798887316, "learning_rate": 1.2437146676737738e-06, "loss": 0.4102, "step": 12411 }, { "epoch": 0.7774628478366401, "grad_norm": 0.9668186691818756, "learning_rate": 1.2430452320147506e-06, "loss": 0.3806, "step": 12412 }, { "epoch": 0.7775254858359824, "grad_norm": 0.5886965180339016, "learning_rate": 1.242375950992683e-06, "loss": 0.4268, "step": 12413 }, { "epoch": 0.7775881238353247, "grad_norm": 0.8589425557745741, "learning_rate": 1.2417068246351183e-06, "loss": 0.3914, "step": 12414 }, { "epoch": 0.777650761834667, "grad_norm": 0.8808942094376502, "learning_rate": 1.2410378529695977e-06, "loss": 0.4046, "step": 12415 }, { "epoch": 0.7777133998340093, "grad_norm": 0.8732217344504519, "learning_rate": 1.2403690360236559e-06, "loss": 0.4025, "step": 12416 }, { "epoch": 0.7777760378333516, "grad_norm": 0.8320248099709006, "learning_rate": 1.2397003738248243e-06, "loss": 0.4015, "step": 12417 }, { "epoch": 0.777838675832694, "grad_norm": 0.8829633354406548, "learning_rate": 1.239031866400623e-06, "loss": 0.4172, "step": 12418 }, { "epoch": 0.7779013138320362, "grad_norm": 0.8521015985364613, "learning_rate": 1.2383635137785683e-06, "loss": 0.376, "step": 12419 }, { "epoch": 0.7779639518313785, "grad_norm": 1.0258487545038506, "learning_rate": 1.2376953159861698e-06, "loss": 0.3582, "step": 12420 }, { "epoch": 0.7780265898307208, "grad_norm": 0.8721869437818351, "learning_rate": 1.2370272730509303e-06, "loss": 0.4317, "step": 12421 }, { "epoch": 0.7780892278300631, "grad_norm": 0.8742034589601159, "learning_rate": 1.2363593850003463e-06, "loss": 0.4017, "step": 12422 }, { "epoch": 0.7781518658294054, "grad_norm": 0.838142439696835, "learning_rate": 1.235691651861909e-06, "loss": 0.383, "step": 12423 }, { "epoch": 0.7782145038287477, "grad_norm": 0.8735763494739729, "learning_rate": 1.2350240736631004e-06, "loss": 0.3851, "step": 12424 }, { "epoch": 0.77827714182809, "grad_norm": 0.8220990049237745, "learning_rate": 1.2343566504313997e-06, "loss": 0.3647, "step": 12425 }, { "epoch": 0.7783397798274323, "grad_norm": 0.8552996038970379, "learning_rate": 1.2336893821942797e-06, "loss": 0.3561, "step": 12426 }, { "epoch": 0.7784024178267747, "grad_norm": 0.8783479788257794, "learning_rate": 1.2330222689792037e-06, "loss": 0.4179, "step": 12427 }, { "epoch": 0.7784650558261169, "grad_norm": 0.6509871397321537, "learning_rate": 1.2323553108136306e-06, "loss": 0.4343, "step": 12428 }, { "epoch": 0.7785276938254592, "grad_norm": 0.9433165155813781, "learning_rate": 1.2316885077250118e-06, "loss": 0.4542, "step": 12429 }, { "epoch": 0.7785903318248015, "grad_norm": 0.88755951854897, "learning_rate": 1.2310218597407936e-06, "loss": 0.4183, "step": 12430 }, { "epoch": 0.7786529698241438, "grad_norm": 0.8509520942625268, "learning_rate": 1.2303553668884156e-06, "loss": 0.4038, "step": 12431 }, { "epoch": 0.7787156078234861, "grad_norm": 0.8033388484924359, "learning_rate": 1.2296890291953084e-06, "loss": 0.3926, "step": 12432 }, { "epoch": 0.7787782458228284, "grad_norm": 0.8638064312592364, "learning_rate": 1.2290228466889021e-06, "loss": 0.3816, "step": 12433 }, { "epoch": 0.7788408838221708, "grad_norm": 0.8424050368209411, "learning_rate": 1.2283568193966144e-06, "loss": 0.3801, "step": 12434 }, { "epoch": 0.778903521821513, "grad_norm": 0.8666862643615055, "learning_rate": 1.227690947345861e-06, "loss": 0.3857, "step": 12435 }, { "epoch": 0.7789661598208554, "grad_norm": 0.8686736923936453, "learning_rate": 1.2270252305640485e-06, "loss": 0.4062, "step": 12436 }, { "epoch": 0.7790287978201976, "grad_norm": 0.8320222240225894, "learning_rate": 1.226359669078578e-06, "loss": 0.4106, "step": 12437 }, { "epoch": 0.7790914358195399, "grad_norm": 0.8165734461065778, "learning_rate": 1.2256942629168434e-06, "loss": 0.3627, "step": 12438 }, { "epoch": 0.7791540738188822, "grad_norm": 0.8550602469740148, "learning_rate": 1.2250290121062342e-06, "loss": 0.3549, "step": 12439 }, { "epoch": 0.7792167118182245, "grad_norm": 0.8752959533651217, "learning_rate": 1.2243639166741311e-06, "loss": 0.3931, "step": 12440 }, { "epoch": 0.7792793498175669, "grad_norm": 0.8553201404728615, "learning_rate": 1.2236989766479085e-06, "loss": 0.4212, "step": 12441 }, { "epoch": 0.7793419878169091, "grad_norm": 0.8068529722051702, "learning_rate": 1.2230341920549383e-06, "loss": 0.3362, "step": 12442 }, { "epoch": 0.7794046258162515, "grad_norm": 0.8213480335348555, "learning_rate": 1.2223695629225807e-06, "loss": 0.3575, "step": 12443 }, { "epoch": 0.7794672638155937, "grad_norm": 0.6427167741126153, "learning_rate": 1.221705089278194e-06, "loss": 0.4595, "step": 12444 }, { "epoch": 0.779529901814936, "grad_norm": 0.8536325412943101, "learning_rate": 1.2210407711491278e-06, "loss": 0.3648, "step": 12445 }, { "epoch": 0.7795925398142783, "grad_norm": 0.8688561616237563, "learning_rate": 1.2203766085627245e-06, "loss": 0.4071, "step": 12446 }, { "epoch": 0.7796551778136206, "grad_norm": 0.7919945309040384, "learning_rate": 1.2197126015463212e-06, "loss": 0.3483, "step": 12447 }, { "epoch": 0.779717815812963, "grad_norm": 0.8889753382056303, "learning_rate": 1.2190487501272497e-06, "loss": 0.3962, "step": 12448 }, { "epoch": 0.7797804538123052, "grad_norm": 0.8686685973920277, "learning_rate": 1.2183850543328313e-06, "loss": 0.3653, "step": 12449 }, { "epoch": 0.7798430918116476, "grad_norm": 0.9291756812324542, "learning_rate": 1.2177215141903875e-06, "loss": 0.4342, "step": 12450 }, { "epoch": 0.7799057298109898, "grad_norm": 0.8472692897593083, "learning_rate": 1.2170581297272282e-06, "loss": 0.3879, "step": 12451 }, { "epoch": 0.7799683678103322, "grad_norm": 0.9220927086234658, "learning_rate": 1.2163949009706572e-06, "loss": 0.4131, "step": 12452 }, { "epoch": 0.7800310058096744, "grad_norm": 0.7646970546406672, "learning_rate": 1.2157318279479758e-06, "loss": 0.3551, "step": 12453 }, { "epoch": 0.7800936438090167, "grad_norm": 0.8575350989105152, "learning_rate": 1.2150689106864744e-06, "loss": 0.3873, "step": 12454 }, { "epoch": 0.780156281808359, "grad_norm": 0.8089179409442001, "learning_rate": 1.2144061492134395e-06, "loss": 0.4001, "step": 12455 }, { "epoch": 0.7802189198077013, "grad_norm": 0.8945672937818295, "learning_rate": 1.2137435435561502e-06, "loss": 0.3955, "step": 12456 }, { "epoch": 0.7802815578070437, "grad_norm": 0.8832264716450391, "learning_rate": 1.213081093741879e-06, "loss": 0.3741, "step": 12457 }, { "epoch": 0.7803441958063859, "grad_norm": 0.8712700433687108, "learning_rate": 1.2124187997978914e-06, "loss": 0.3928, "step": 12458 }, { "epoch": 0.7804068338057283, "grad_norm": 0.8635938283574096, "learning_rate": 1.211756661751451e-06, "loss": 0.38, "step": 12459 }, { "epoch": 0.7804694718050705, "grad_norm": 0.8597971984703143, "learning_rate": 1.211094679629809e-06, "loss": 0.3861, "step": 12460 }, { "epoch": 0.7805321098044129, "grad_norm": 0.8709805743478284, "learning_rate": 1.2104328534602133e-06, "loss": 0.3987, "step": 12461 }, { "epoch": 0.7805947478037552, "grad_norm": 0.8923179153279734, "learning_rate": 1.2097711832699039e-06, "loss": 0.4191, "step": 12462 }, { "epoch": 0.7806573858030974, "grad_norm": 0.8841407174795513, "learning_rate": 1.209109669086117e-06, "loss": 0.4055, "step": 12463 }, { "epoch": 0.7807200238024398, "grad_norm": 0.649097438478201, "learning_rate": 1.2084483109360795e-06, "loss": 0.4617, "step": 12464 }, { "epoch": 0.780782661801782, "grad_norm": 0.8203171901401434, "learning_rate": 1.2077871088470138e-06, "loss": 0.3729, "step": 12465 }, { "epoch": 0.7808452998011244, "grad_norm": 0.8754471972946111, "learning_rate": 1.2071260628461346e-06, "loss": 0.3971, "step": 12466 }, { "epoch": 0.7809079378004666, "grad_norm": 0.6035617523868719, "learning_rate": 1.2064651729606485e-06, "loss": 0.4664, "step": 12467 }, { "epoch": 0.780970575799809, "grad_norm": 0.8819394083496126, "learning_rate": 1.205804439217762e-06, "loss": 0.4063, "step": 12468 }, { "epoch": 0.7810332137991512, "grad_norm": 0.8239023986824715, "learning_rate": 1.2051438616446692e-06, "loss": 0.3851, "step": 12469 }, { "epoch": 0.7810958517984935, "grad_norm": 0.8408721745393352, "learning_rate": 1.204483440268559e-06, "loss": 0.3728, "step": 12470 }, { "epoch": 0.7811584897978359, "grad_norm": 0.819197871647985, "learning_rate": 1.2038231751166135e-06, "loss": 0.3958, "step": 12471 }, { "epoch": 0.7812211277971781, "grad_norm": 0.8699531864584115, "learning_rate": 1.203163066216012e-06, "loss": 0.4039, "step": 12472 }, { "epoch": 0.7812837657965205, "grad_norm": 0.8477735105218411, "learning_rate": 1.2025031135939241e-06, "loss": 0.3853, "step": 12473 }, { "epoch": 0.7813464037958627, "grad_norm": 0.8137540656951348, "learning_rate": 1.2018433172775123e-06, "loss": 0.3847, "step": 12474 }, { "epoch": 0.7814090417952051, "grad_norm": 0.8340649380492705, "learning_rate": 1.2011836772939334e-06, "loss": 0.3709, "step": 12475 }, { "epoch": 0.7814716797945473, "grad_norm": 0.8766719568733403, "learning_rate": 1.2005241936703404e-06, "loss": 0.3875, "step": 12476 }, { "epoch": 0.7815343177938897, "grad_norm": 0.950701853655318, "learning_rate": 1.1998648664338773e-06, "loss": 0.4164, "step": 12477 }, { "epoch": 0.781596955793232, "grad_norm": 0.834688152608824, "learning_rate": 1.1992056956116816e-06, "loss": 0.3837, "step": 12478 }, { "epoch": 0.7816595937925742, "grad_norm": 0.8387132246295391, "learning_rate": 1.1985466812308843e-06, "loss": 0.3852, "step": 12479 }, { "epoch": 0.7817222317919166, "grad_norm": 0.836600491456288, "learning_rate": 1.1978878233186104e-06, "loss": 0.3844, "step": 12480 }, { "epoch": 0.7817848697912588, "grad_norm": 0.9224073533991326, "learning_rate": 1.1972291219019804e-06, "loss": 0.3952, "step": 12481 }, { "epoch": 0.7818475077906012, "grad_norm": 0.8800461935576587, "learning_rate": 1.196570577008106e-06, "loss": 0.4028, "step": 12482 }, { "epoch": 0.7819101457899434, "grad_norm": 0.8824934662934697, "learning_rate": 1.195912188664093e-06, "loss": 0.4268, "step": 12483 }, { "epoch": 0.7819727837892858, "grad_norm": 0.8439125834754762, "learning_rate": 1.1952539568970378e-06, "loss": 0.4106, "step": 12484 }, { "epoch": 0.7820354217886281, "grad_norm": 0.821755625928481, "learning_rate": 1.194595881734038e-06, "loss": 0.3832, "step": 12485 }, { "epoch": 0.7820980597879704, "grad_norm": 0.8623035330065902, "learning_rate": 1.193937963202178e-06, "loss": 0.3385, "step": 12486 }, { "epoch": 0.7821606977873127, "grad_norm": 0.8529232638827211, "learning_rate": 1.1932802013285378e-06, "loss": 0.4026, "step": 12487 }, { "epoch": 0.7822233357866549, "grad_norm": 0.8541063082519852, "learning_rate": 1.192622596140191e-06, "loss": 0.394, "step": 12488 }, { "epoch": 0.7822859737859973, "grad_norm": 0.8172320237653747, "learning_rate": 1.1919651476642035e-06, "loss": 0.3528, "step": 12489 }, { "epoch": 0.7823486117853395, "grad_norm": 0.8793798024649238, "learning_rate": 1.1913078559276393e-06, "loss": 0.3789, "step": 12490 }, { "epoch": 0.7824112497846819, "grad_norm": 0.891468978010751, "learning_rate": 1.1906507209575496e-06, "loss": 0.3956, "step": 12491 }, { "epoch": 0.7824738877840242, "grad_norm": 0.8910957456063036, "learning_rate": 1.1899937427809843e-06, "loss": 0.3871, "step": 12492 }, { "epoch": 0.7825365257833665, "grad_norm": 0.6155558995388807, "learning_rate": 1.1893369214249816e-06, "loss": 0.4307, "step": 12493 }, { "epoch": 0.7825991637827088, "grad_norm": 0.8593005927652628, "learning_rate": 1.18868025691658e-06, "loss": 0.3757, "step": 12494 }, { "epoch": 0.7826618017820511, "grad_norm": 0.9084884415743852, "learning_rate": 1.1880237492828067e-06, "loss": 0.4122, "step": 12495 }, { "epoch": 0.7827244397813934, "grad_norm": 0.7915471775014099, "learning_rate": 1.1873673985506834e-06, "loss": 0.3788, "step": 12496 }, { "epoch": 0.7827870777807356, "grad_norm": 0.8134793068386456, "learning_rate": 1.1867112047472258e-06, "loss": 0.3744, "step": 12497 }, { "epoch": 0.782849715780078, "grad_norm": 0.9382485776280854, "learning_rate": 1.1860551678994408e-06, "loss": 0.3813, "step": 12498 }, { "epoch": 0.7829123537794203, "grad_norm": 0.85964889984531, "learning_rate": 1.1853992880343346e-06, "loss": 0.4223, "step": 12499 }, { "epoch": 0.7829749917787626, "grad_norm": 0.8607324581181559, "learning_rate": 1.1847435651789018e-06, "loss": 0.3756, "step": 12500 }, { "epoch": 0.7830376297781049, "grad_norm": 0.8621726120912049, "learning_rate": 1.1840879993601307e-06, "loss": 0.3838, "step": 12501 }, { "epoch": 0.7831002677774472, "grad_norm": 0.8507022633390143, "learning_rate": 1.183432590605007e-06, "loss": 0.3419, "step": 12502 }, { "epoch": 0.7831629057767895, "grad_norm": 0.8391498001211138, "learning_rate": 1.1827773389405067e-06, "loss": 0.3771, "step": 12503 }, { "epoch": 0.7832255437761317, "grad_norm": 0.8534570559758352, "learning_rate": 1.1821222443935992e-06, "loss": 0.3479, "step": 12504 }, { "epoch": 0.7832881817754741, "grad_norm": 0.836627575682535, "learning_rate": 1.181467306991249e-06, "loss": 0.3563, "step": 12505 }, { "epoch": 0.7833508197748164, "grad_norm": 0.850158505494499, "learning_rate": 1.1808125267604131e-06, "loss": 0.3563, "step": 12506 }, { "epoch": 0.7834134577741587, "grad_norm": 0.9102770451913557, "learning_rate": 1.1801579037280424e-06, "loss": 0.3952, "step": 12507 }, { "epoch": 0.783476095773501, "grad_norm": 0.9587042696770496, "learning_rate": 1.1795034379210801e-06, "loss": 0.4219, "step": 12508 }, { "epoch": 0.7835387337728433, "grad_norm": 0.8754830031479091, "learning_rate": 1.1788491293664668e-06, "loss": 0.3696, "step": 12509 }, { "epoch": 0.7836013717721856, "grad_norm": 0.8697379608864397, "learning_rate": 1.1781949780911312e-06, "loss": 0.3955, "step": 12510 }, { "epoch": 0.783664009771528, "grad_norm": 0.8332060867778333, "learning_rate": 1.1775409841220009e-06, "loss": 0.3801, "step": 12511 }, { "epoch": 0.7837266477708702, "grad_norm": 0.8471572585858689, "learning_rate": 1.1768871474859934e-06, "loss": 0.4051, "step": 12512 }, { "epoch": 0.7837892857702125, "grad_norm": 0.9334339178257178, "learning_rate": 1.1762334682100203e-06, "loss": 0.4019, "step": 12513 }, { "epoch": 0.7838519237695548, "grad_norm": 0.8653171281415826, "learning_rate": 1.1755799463209877e-06, "loss": 0.3583, "step": 12514 }, { "epoch": 0.7839145617688971, "grad_norm": 0.7601389332828199, "learning_rate": 1.1749265818457939e-06, "loss": 0.3852, "step": 12515 }, { "epoch": 0.7839771997682394, "grad_norm": 0.789613967091774, "learning_rate": 1.174273374811332e-06, "loss": 0.3471, "step": 12516 }, { "epoch": 0.7840398377675817, "grad_norm": 0.8938031222906299, "learning_rate": 1.1736203252444872e-06, "loss": 0.4242, "step": 12517 }, { "epoch": 0.784102475766924, "grad_norm": 0.8723272644781694, "learning_rate": 1.1729674331721408e-06, "loss": 0.3922, "step": 12518 }, { "epoch": 0.7841651137662663, "grad_norm": 0.8427473589109223, "learning_rate": 1.1723146986211641e-06, "loss": 0.3832, "step": 12519 }, { "epoch": 0.7842277517656087, "grad_norm": 0.8477525904731877, "learning_rate": 1.171662121618426e-06, "loss": 0.3801, "step": 12520 }, { "epoch": 0.7842903897649509, "grad_norm": 0.8007579109190751, "learning_rate": 1.1710097021907863e-06, "loss": 0.326, "step": 12521 }, { "epoch": 0.7843530277642932, "grad_norm": 0.8141666616828654, "learning_rate": 1.1703574403650969e-06, "loss": 0.3609, "step": 12522 }, { "epoch": 0.7844156657636355, "grad_norm": 0.8110314859729555, "learning_rate": 1.169705336168207e-06, "loss": 0.3434, "step": 12523 }, { "epoch": 0.7844783037629778, "grad_norm": 0.6035203315559744, "learning_rate": 1.1690533896269557e-06, "loss": 0.4563, "step": 12524 }, { "epoch": 0.7845409417623201, "grad_norm": 0.8764179827479062, "learning_rate": 1.1684016007681782e-06, "loss": 0.3841, "step": 12525 }, { "epoch": 0.7846035797616624, "grad_norm": 0.8540981002897962, "learning_rate": 1.1677499696187006e-06, "loss": 0.3854, "step": 12526 }, { "epoch": 0.7846662177610048, "grad_norm": 0.8093702740982959, "learning_rate": 1.1670984962053456e-06, "loss": 0.3298, "step": 12527 }, { "epoch": 0.784728855760347, "grad_norm": 0.8763508667046342, "learning_rate": 1.1664471805549294e-06, "loss": 0.3659, "step": 12528 }, { "epoch": 0.7847914937596893, "grad_norm": 0.9142118776960689, "learning_rate": 1.1657960226942588e-06, "loss": 0.4067, "step": 12529 }, { "epoch": 0.7848541317590316, "grad_norm": 0.9178206139921472, "learning_rate": 1.1651450226501354e-06, "loss": 0.4157, "step": 12530 }, { "epoch": 0.7849167697583739, "grad_norm": 0.8511087880073451, "learning_rate": 1.1644941804493549e-06, "loss": 0.4155, "step": 12531 }, { "epoch": 0.7849794077577162, "grad_norm": 0.8791294288468114, "learning_rate": 1.163843496118705e-06, "loss": 0.4157, "step": 12532 }, { "epoch": 0.7850420457570585, "grad_norm": 0.7652534448291759, "learning_rate": 1.1631929696849697e-06, "loss": 0.3476, "step": 12533 }, { "epoch": 0.7851046837564009, "grad_norm": 0.8484990863994181, "learning_rate": 1.1625426011749224e-06, "loss": 0.394, "step": 12534 }, { "epoch": 0.7851673217557431, "grad_norm": 0.8330295214398087, "learning_rate": 1.161892390615335e-06, "loss": 0.4016, "step": 12535 }, { "epoch": 0.7852299597550855, "grad_norm": 0.9068729175260264, "learning_rate": 1.1612423380329678e-06, "loss": 0.4325, "step": 12536 }, { "epoch": 0.7852925977544277, "grad_norm": 0.908136726302521, "learning_rate": 1.16059244345458e-06, "loss": 0.3567, "step": 12537 }, { "epoch": 0.78535523575377, "grad_norm": 0.8699261599807185, "learning_rate": 1.1599427069069203e-06, "loss": 0.4127, "step": 12538 }, { "epoch": 0.7854178737531123, "grad_norm": 0.9623097158692404, "learning_rate": 1.1592931284167308e-06, "loss": 0.3789, "step": 12539 }, { "epoch": 0.7854805117524546, "grad_norm": 0.8626034928005188, "learning_rate": 1.1586437080107492e-06, "loss": 0.3802, "step": 12540 }, { "epoch": 0.785543149751797, "grad_norm": 0.8915173803094317, "learning_rate": 1.157994445715706e-06, "loss": 0.4068, "step": 12541 }, { "epoch": 0.7856057877511392, "grad_norm": 0.9344316638622888, "learning_rate": 1.157345341558324e-06, "loss": 0.3734, "step": 12542 }, { "epoch": 0.7856684257504816, "grad_norm": 0.7999672358641646, "learning_rate": 1.1566963955653198e-06, "loss": 0.3819, "step": 12543 }, { "epoch": 0.7857310637498238, "grad_norm": 0.8675701705065052, "learning_rate": 1.156047607763407e-06, "loss": 0.3668, "step": 12544 }, { "epoch": 0.7857937017491662, "grad_norm": 0.9980498018666933, "learning_rate": 1.1553989781792863e-06, "loss": 0.4194, "step": 12545 }, { "epoch": 0.7858563397485084, "grad_norm": 0.8503627040517849, "learning_rate": 1.1547505068396586e-06, "loss": 0.3792, "step": 12546 }, { "epoch": 0.7859189777478507, "grad_norm": 0.8941108668815011, "learning_rate": 1.1541021937712139e-06, "loss": 0.378, "step": 12547 }, { "epoch": 0.785981615747193, "grad_norm": 0.8288627666395484, "learning_rate": 1.153454039000637e-06, "loss": 0.3863, "step": 12548 }, { "epoch": 0.7860442537465353, "grad_norm": 0.8930496665814402, "learning_rate": 1.1528060425546055e-06, "loss": 0.3555, "step": 12549 }, { "epoch": 0.7861068917458777, "grad_norm": 0.9586383250394457, "learning_rate": 1.1521582044597913e-06, "loss": 0.3987, "step": 12550 }, { "epoch": 0.7861695297452199, "grad_norm": 0.9251726740107891, "learning_rate": 1.15151052474286e-06, "loss": 0.3975, "step": 12551 }, { "epoch": 0.7862321677445623, "grad_norm": 0.7760825067012649, "learning_rate": 1.1508630034304679e-06, "loss": 0.3253, "step": 12552 }, { "epoch": 0.7862948057439045, "grad_norm": 0.9326463251964879, "learning_rate": 1.150215640549271e-06, "loss": 0.4276, "step": 12553 }, { "epoch": 0.7863574437432468, "grad_norm": 0.8467911816758141, "learning_rate": 1.1495684361259106e-06, "loss": 0.3598, "step": 12554 }, { "epoch": 0.7864200817425891, "grad_norm": 0.8850768907093821, "learning_rate": 1.1489213901870306e-06, "loss": 0.3702, "step": 12555 }, { "epoch": 0.7864827197419314, "grad_norm": 0.8394291562691212, "learning_rate": 1.1482745027592601e-06, "loss": 0.3661, "step": 12556 }, { "epoch": 0.7865453577412738, "grad_norm": 0.8974553824856579, "learning_rate": 1.147627773869226e-06, "loss": 0.4004, "step": 12557 }, { "epoch": 0.786607995740616, "grad_norm": 0.9747020881095688, "learning_rate": 1.146981203543548e-06, "loss": 0.3862, "step": 12558 }, { "epoch": 0.7866706337399584, "grad_norm": 0.7948360944864055, "learning_rate": 1.1463347918088392e-06, "loss": 0.3311, "step": 12559 }, { "epoch": 0.7867332717393006, "grad_norm": 0.8013871823986165, "learning_rate": 1.1456885386917039e-06, "loss": 0.342, "step": 12560 }, { "epoch": 0.786795909738643, "grad_norm": 0.7904569508281576, "learning_rate": 1.1450424442187446e-06, "loss": 0.3585, "step": 12561 }, { "epoch": 0.7868585477379852, "grad_norm": 0.9295286746258085, "learning_rate": 1.1443965084165548e-06, "loss": 0.3997, "step": 12562 }, { "epoch": 0.7869211857373275, "grad_norm": 0.7859921129280641, "learning_rate": 1.143750731311718e-06, "loss": 0.3437, "step": 12563 }, { "epoch": 0.7869838237366699, "grad_norm": 0.9220150079920033, "learning_rate": 1.1431051129308191e-06, "loss": 0.4354, "step": 12564 }, { "epoch": 0.7870464617360121, "grad_norm": 0.8265061932800637, "learning_rate": 1.142459653300429e-06, "loss": 0.385, "step": 12565 }, { "epoch": 0.7871090997353545, "grad_norm": 0.8259562613414685, "learning_rate": 1.1418143524471159e-06, "loss": 0.395, "step": 12566 }, { "epoch": 0.7871717377346967, "grad_norm": 0.8922418850977815, "learning_rate": 1.1411692103974398e-06, "loss": 0.4167, "step": 12567 }, { "epoch": 0.7872343757340391, "grad_norm": 0.9220441100046687, "learning_rate": 1.140524227177956e-06, "loss": 0.4177, "step": 12568 }, { "epoch": 0.7872970137333813, "grad_norm": 0.7980269045755313, "learning_rate": 1.1398794028152094e-06, "loss": 0.3976, "step": 12569 }, { "epoch": 0.7873596517327237, "grad_norm": 1.0131259592108715, "learning_rate": 1.1392347373357443e-06, "loss": 0.4288, "step": 12570 }, { "epoch": 0.787422289732066, "grad_norm": 0.8067432752867904, "learning_rate": 1.138590230766094e-06, "loss": 0.358, "step": 12571 }, { "epoch": 0.7874849277314082, "grad_norm": 0.839617901168239, "learning_rate": 1.1379458831327856e-06, "loss": 0.3895, "step": 12572 }, { "epoch": 0.7875475657307506, "grad_norm": 0.8219542803675538, "learning_rate": 1.1373016944623427e-06, "loss": 0.3365, "step": 12573 }, { "epoch": 0.7876102037300928, "grad_norm": 0.8806636853603678, "learning_rate": 1.1366576647812788e-06, "loss": 0.3668, "step": 12574 }, { "epoch": 0.7876728417294352, "grad_norm": 0.8226454022683106, "learning_rate": 1.1360137941161027e-06, "loss": 0.3973, "step": 12575 }, { "epoch": 0.7877354797287774, "grad_norm": 0.959395050021133, "learning_rate": 1.1353700824933157e-06, "loss": 0.4064, "step": 12576 }, { "epoch": 0.7877981177281198, "grad_norm": 0.9166673214754757, "learning_rate": 1.1347265299394139e-06, "loss": 0.4014, "step": 12577 }, { "epoch": 0.7878607557274621, "grad_norm": 0.9729915466882186, "learning_rate": 1.1340831364808836e-06, "loss": 0.4254, "step": 12578 }, { "epoch": 0.7879233937268043, "grad_norm": 0.8089382619341868, "learning_rate": 1.133439902144211e-06, "loss": 0.3267, "step": 12579 }, { "epoch": 0.7879860317261467, "grad_norm": 0.8313768137378706, "learning_rate": 1.1327968269558692e-06, "loss": 0.3885, "step": 12580 }, { "epoch": 0.7880486697254889, "grad_norm": 0.9242032108179415, "learning_rate": 1.1321539109423263e-06, "loss": 0.4278, "step": 12581 }, { "epoch": 0.7881113077248313, "grad_norm": 0.8736085919046747, "learning_rate": 1.1315111541300483e-06, "loss": 0.4018, "step": 12582 }, { "epoch": 0.7881739457241735, "grad_norm": 0.879216381610442, "learning_rate": 1.1308685565454885e-06, "loss": 0.3933, "step": 12583 }, { "epoch": 0.7882365837235159, "grad_norm": 0.8693243379463957, "learning_rate": 1.1302261182150975e-06, "loss": 0.4112, "step": 12584 }, { "epoch": 0.7882992217228582, "grad_norm": 0.9245479163924568, "learning_rate": 1.129583839165318e-06, "loss": 0.4047, "step": 12585 }, { "epoch": 0.7883618597222005, "grad_norm": 0.9313087788020649, "learning_rate": 1.1289417194225842e-06, "loss": 0.3981, "step": 12586 }, { "epoch": 0.7884244977215428, "grad_norm": 0.8578031651168362, "learning_rate": 1.1282997590133293e-06, "loss": 0.3761, "step": 12587 }, { "epoch": 0.788487135720885, "grad_norm": 0.8770181245907679, "learning_rate": 1.1276579579639752e-06, "loss": 0.3792, "step": 12588 }, { "epoch": 0.7885497737202274, "grad_norm": 0.8640261512468043, "learning_rate": 1.127016316300938e-06, "loss": 0.3571, "step": 12589 }, { "epoch": 0.7886124117195696, "grad_norm": 0.8450787518959207, "learning_rate": 1.1263748340506264e-06, "loss": 0.3984, "step": 12590 }, { "epoch": 0.788675049718912, "grad_norm": 0.8863293540293675, "learning_rate": 1.1257335112394473e-06, "loss": 0.3868, "step": 12591 }, { "epoch": 0.7887376877182543, "grad_norm": 0.8399792939544477, "learning_rate": 1.1250923478937959e-06, "loss": 0.4424, "step": 12592 }, { "epoch": 0.7888003257175966, "grad_norm": 0.8192364635506901, "learning_rate": 1.1244513440400629e-06, "loss": 0.3765, "step": 12593 }, { "epoch": 0.7888629637169389, "grad_norm": 0.6182353984142571, "learning_rate": 1.1238104997046317e-06, "loss": 0.4126, "step": 12594 }, { "epoch": 0.7889256017162812, "grad_norm": 0.8809183708342481, "learning_rate": 1.1231698149138788e-06, "loss": 0.3773, "step": 12595 }, { "epoch": 0.7889882397156235, "grad_norm": 0.8520117725858163, "learning_rate": 1.1225292896941764e-06, "loss": 0.3609, "step": 12596 }, { "epoch": 0.7890508777149657, "grad_norm": 0.9269253258679584, "learning_rate": 1.1218889240718888e-06, "loss": 0.4004, "step": 12597 }, { "epoch": 0.7891135157143081, "grad_norm": 0.8451853310401818, "learning_rate": 1.1212487180733728e-06, "loss": 0.343, "step": 12598 }, { "epoch": 0.7891761537136504, "grad_norm": 0.8698372599410954, "learning_rate": 1.1206086717249799e-06, "loss": 0.4042, "step": 12599 }, { "epoch": 0.7892387917129927, "grad_norm": 0.8735373740461891, "learning_rate": 1.1199687850530522e-06, "loss": 0.3984, "step": 12600 }, { "epoch": 0.789301429712335, "grad_norm": 0.8398494756134819, "learning_rate": 1.1193290580839306e-06, "loss": 0.4091, "step": 12601 }, { "epoch": 0.7893640677116773, "grad_norm": 0.597634898509433, "learning_rate": 1.1186894908439455e-06, "loss": 0.4678, "step": 12602 }, { "epoch": 0.7894267057110196, "grad_norm": 0.840905532596803, "learning_rate": 1.1180500833594215e-06, "loss": 0.3912, "step": 12603 }, { "epoch": 0.789489343710362, "grad_norm": 0.8710162699311828, "learning_rate": 1.1174108356566753e-06, "loss": 0.3717, "step": 12604 }, { "epoch": 0.7895519817097042, "grad_norm": 0.8385308820675974, "learning_rate": 1.116771747762021e-06, "loss": 0.3737, "step": 12605 }, { "epoch": 0.7896146197090465, "grad_norm": 0.8777767513121948, "learning_rate": 1.1161328197017623e-06, "loss": 0.4109, "step": 12606 }, { "epoch": 0.7896772577083888, "grad_norm": 0.8927841442415565, "learning_rate": 1.1154940515021979e-06, "loss": 0.3905, "step": 12607 }, { "epoch": 0.7897398957077311, "grad_norm": 0.8942886696750962, "learning_rate": 1.114855443189619e-06, "loss": 0.3459, "step": 12608 }, { "epoch": 0.7898025337070734, "grad_norm": 0.8052167519777793, "learning_rate": 1.1142169947903098e-06, "loss": 0.3408, "step": 12609 }, { "epoch": 0.7898651717064157, "grad_norm": 0.859302406979004, "learning_rate": 1.113578706330552e-06, "loss": 0.3741, "step": 12610 }, { "epoch": 0.789927809705758, "grad_norm": 0.8917987913435566, "learning_rate": 1.1129405778366158e-06, "loss": 0.4072, "step": 12611 }, { "epoch": 0.7899904477051003, "grad_norm": 0.8463703405600349, "learning_rate": 1.1123026093347656e-06, "loss": 0.3638, "step": 12612 }, { "epoch": 0.7900530857044425, "grad_norm": 0.8317197832203322, "learning_rate": 1.1116648008512627e-06, "loss": 0.394, "step": 12613 }, { "epoch": 0.7901157237037849, "grad_norm": 0.5599908033273108, "learning_rate": 1.1110271524123584e-06, "loss": 0.4118, "step": 12614 }, { "epoch": 0.7901783617031272, "grad_norm": 0.6207584858572737, "learning_rate": 1.1103896640442985e-06, "loss": 0.449, "step": 12615 }, { "epoch": 0.7902409997024695, "grad_norm": 0.822705127396335, "learning_rate": 1.109752335773322e-06, "loss": 0.3815, "step": 12616 }, { "epoch": 0.7903036377018118, "grad_norm": 0.9334963578160161, "learning_rate": 1.1091151676256613e-06, "loss": 0.3956, "step": 12617 }, { "epoch": 0.7903662757011541, "grad_norm": 0.8770112302657027, "learning_rate": 1.1084781596275412e-06, "loss": 0.3676, "step": 12618 }, { "epoch": 0.7904289137004964, "grad_norm": 0.8490243165927922, "learning_rate": 1.1078413118051834e-06, "loss": 0.3391, "step": 12619 }, { "epoch": 0.7904915516998388, "grad_norm": 0.9090975941819758, "learning_rate": 1.1072046241848e-06, "loss": 0.4107, "step": 12620 }, { "epoch": 0.790554189699181, "grad_norm": 0.9059335009976701, "learning_rate": 1.1065680967925952e-06, "loss": 0.4311, "step": 12621 }, { "epoch": 0.7906168276985233, "grad_norm": 0.8679959157044181, "learning_rate": 1.1059317296547716e-06, "loss": 0.3827, "step": 12622 }, { "epoch": 0.7906794656978656, "grad_norm": 0.790533073789331, "learning_rate": 1.105295522797521e-06, "loss": 0.3434, "step": 12623 }, { "epoch": 0.7907421036972079, "grad_norm": 0.7876080744686105, "learning_rate": 1.1046594762470293e-06, "loss": 0.4153, "step": 12624 }, { "epoch": 0.7908047416965502, "grad_norm": 0.5997415484500698, "learning_rate": 1.1040235900294765e-06, "loss": 0.452, "step": 12625 }, { "epoch": 0.7908673796958925, "grad_norm": 0.7956269416036669, "learning_rate": 1.103387864171036e-06, "loss": 0.3866, "step": 12626 }, { "epoch": 0.7909300176952349, "grad_norm": 0.8538456122174175, "learning_rate": 1.1027522986978734e-06, "loss": 0.3825, "step": 12627 }, { "epoch": 0.7909926556945771, "grad_norm": 0.867656428991646, "learning_rate": 1.1021168936361503e-06, "loss": 0.3735, "step": 12628 }, { "epoch": 0.7910552936939195, "grad_norm": 0.8558457468742733, "learning_rate": 1.1014816490120194e-06, "loss": 0.3853, "step": 12629 }, { "epoch": 0.7911179316932617, "grad_norm": 0.8921446580853288, "learning_rate": 1.1008465648516265e-06, "loss": 0.3763, "step": 12630 }, { "epoch": 0.791180569692604, "grad_norm": 0.825027631348653, "learning_rate": 1.1002116411811137e-06, "loss": 0.3547, "step": 12631 }, { "epoch": 0.7912432076919463, "grad_norm": 0.8900634685150479, "learning_rate": 1.0995768780266136e-06, "loss": 0.4053, "step": 12632 }, { "epoch": 0.7913058456912886, "grad_norm": 0.8096631384442109, "learning_rate": 1.0989422754142532e-06, "loss": 0.3611, "step": 12633 }, { "epoch": 0.791368483690631, "grad_norm": 0.8979519719819447, "learning_rate": 1.0983078333701524e-06, "loss": 0.4074, "step": 12634 }, { "epoch": 0.7914311216899732, "grad_norm": 0.8752208134860319, "learning_rate": 1.0976735519204258e-06, "loss": 0.4095, "step": 12635 }, { "epoch": 0.7914937596893156, "grad_norm": 0.8851557733847465, "learning_rate": 1.0970394310911787e-06, "loss": 0.3781, "step": 12636 }, { "epoch": 0.7915563976886578, "grad_norm": 0.8385253708011943, "learning_rate": 1.0964054709085141e-06, "loss": 0.4098, "step": 12637 }, { "epoch": 0.7916190356880001, "grad_norm": 0.8410445452215256, "learning_rate": 1.095771671398524e-06, "loss": 0.3716, "step": 12638 }, { "epoch": 0.7916816736873424, "grad_norm": 0.8717988631173932, "learning_rate": 1.095138032587298e-06, "loss": 0.3856, "step": 12639 }, { "epoch": 0.7917443116866847, "grad_norm": 0.5863511275229438, "learning_rate": 1.0945045545009148e-06, "loss": 0.4428, "step": 12640 }, { "epoch": 0.791806949686027, "grad_norm": 0.8118897900505496, "learning_rate": 1.0938712371654496e-06, "loss": 0.3813, "step": 12641 }, { "epoch": 0.7918695876853693, "grad_norm": 0.8636006428571997, "learning_rate": 1.0932380806069686e-06, "loss": 0.3738, "step": 12642 }, { "epoch": 0.7919322256847117, "grad_norm": 0.7750494134827396, "learning_rate": 1.092605084851534e-06, "loss": 0.3581, "step": 12643 }, { "epoch": 0.7919948636840539, "grad_norm": 0.785496638956157, "learning_rate": 1.091972249925199e-06, "loss": 0.3479, "step": 12644 }, { "epoch": 0.7920575016833963, "grad_norm": 0.8270713459774287, "learning_rate": 1.09133957585401e-06, "loss": 0.3946, "step": 12645 }, { "epoch": 0.7921201396827385, "grad_norm": 0.9321326471054769, "learning_rate": 1.0907070626640116e-06, "loss": 0.4163, "step": 12646 }, { "epoch": 0.7921827776820808, "grad_norm": 0.8786108051564724, "learning_rate": 1.0900747103812342e-06, "loss": 0.3923, "step": 12647 }, { "epoch": 0.7922454156814231, "grad_norm": 0.806008713832629, "learning_rate": 1.0894425190317088e-06, "loss": 0.3673, "step": 12648 }, { "epoch": 0.7923080536807654, "grad_norm": 0.8976113807181694, "learning_rate": 1.0888104886414553e-06, "loss": 0.4349, "step": 12649 }, { "epoch": 0.7923706916801078, "grad_norm": 0.9270367645834846, "learning_rate": 1.0881786192364879e-06, "loss": 0.4159, "step": 12650 }, { "epoch": 0.79243332967945, "grad_norm": 0.8997108289121477, "learning_rate": 1.087546910842815e-06, "loss": 0.4275, "step": 12651 }, { "epoch": 0.7924959676787924, "grad_norm": 0.5780532063275952, "learning_rate": 1.0869153634864371e-06, "loss": 0.4639, "step": 12652 }, { "epoch": 0.7925586056781346, "grad_norm": 0.8575323407626309, "learning_rate": 1.0862839771933491e-06, "loss": 0.3554, "step": 12653 }, { "epoch": 0.792621243677477, "grad_norm": 0.8311403360871389, "learning_rate": 1.0856527519895376e-06, "loss": 0.3785, "step": 12654 }, { "epoch": 0.7926838816768192, "grad_norm": 0.8189344389762331, "learning_rate": 1.0850216879009872e-06, "loss": 0.3703, "step": 12655 }, { "epoch": 0.7927465196761615, "grad_norm": 0.9087566323591416, "learning_rate": 1.0843907849536689e-06, "loss": 0.4116, "step": 12656 }, { "epoch": 0.7928091576755039, "grad_norm": 0.6130651917232283, "learning_rate": 1.083760043173554e-06, "loss": 0.4546, "step": 12657 }, { "epoch": 0.7928717956748461, "grad_norm": 0.8775239804346403, "learning_rate": 1.083129462586603e-06, "loss": 0.3936, "step": 12658 }, { "epoch": 0.7929344336741885, "grad_norm": 0.8763540023287031, "learning_rate": 1.0824990432187704e-06, "loss": 0.3861, "step": 12659 }, { "epoch": 0.7929970716735307, "grad_norm": 0.9093882743295284, "learning_rate": 1.0818687850960035e-06, "loss": 0.4083, "step": 12660 }, { "epoch": 0.7930597096728731, "grad_norm": 0.836526001878492, "learning_rate": 1.0812386882442454e-06, "loss": 0.3766, "step": 12661 }, { "epoch": 0.7931223476722153, "grad_norm": 0.911648770826269, "learning_rate": 1.0806087526894305e-06, "loss": 0.3861, "step": 12662 }, { "epoch": 0.7931849856715576, "grad_norm": 0.8992772451276052, "learning_rate": 1.0799789784574848e-06, "loss": 0.4083, "step": 12663 }, { "epoch": 0.7932476236709, "grad_norm": 1.0113996450159366, "learning_rate": 1.079349365574332e-06, "loss": 0.3847, "step": 12664 }, { "epoch": 0.7933102616702422, "grad_norm": 0.8475065005162706, "learning_rate": 1.0787199140658883e-06, "loss": 0.3937, "step": 12665 }, { "epoch": 0.7933728996695846, "grad_norm": 0.8205202923840063, "learning_rate": 1.0780906239580613e-06, "loss": 0.384, "step": 12666 }, { "epoch": 0.7934355376689268, "grad_norm": 0.8926234557100142, "learning_rate": 1.0774614952767515e-06, "loss": 0.4043, "step": 12667 }, { "epoch": 0.7934981756682692, "grad_norm": 0.8552732714881277, "learning_rate": 1.0768325280478553e-06, "loss": 0.381, "step": 12668 }, { "epoch": 0.7935608136676114, "grad_norm": 0.7981590099004809, "learning_rate": 1.07620372229726e-06, "loss": 0.3509, "step": 12669 }, { "epoch": 0.7936234516669538, "grad_norm": 0.8442353233515729, "learning_rate": 1.0755750780508477e-06, "loss": 0.3527, "step": 12670 }, { "epoch": 0.7936860896662961, "grad_norm": 0.9265095986092947, "learning_rate": 1.0749465953344923e-06, "loss": 0.3712, "step": 12671 }, { "epoch": 0.7937487276656383, "grad_norm": 0.8188055747797444, "learning_rate": 1.0743182741740649e-06, "loss": 0.4183, "step": 12672 }, { "epoch": 0.7938113656649807, "grad_norm": 0.9183149116240987, "learning_rate": 1.073690114595425e-06, "loss": 0.4055, "step": 12673 }, { "epoch": 0.7938740036643229, "grad_norm": 0.9342348303657986, "learning_rate": 1.0730621166244299e-06, "loss": 0.4466, "step": 12674 }, { "epoch": 0.7939366416636653, "grad_norm": 0.9022680807451213, "learning_rate": 1.0724342802869264e-06, "loss": 0.4047, "step": 12675 }, { "epoch": 0.7939992796630075, "grad_norm": 0.7807491409273818, "learning_rate": 1.071806605608758e-06, "loss": 0.3186, "step": 12676 }, { "epoch": 0.7940619176623499, "grad_norm": 0.8274318545264474, "learning_rate": 1.0711790926157578e-06, "loss": 0.3814, "step": 12677 }, { "epoch": 0.7941245556616922, "grad_norm": 0.9652753259474526, "learning_rate": 1.0705517413337557e-06, "loss": 0.3958, "step": 12678 }, { "epoch": 0.7941871936610345, "grad_norm": 0.8408497737362595, "learning_rate": 1.0699245517885736e-06, "loss": 0.3942, "step": 12679 }, { "epoch": 0.7942498316603768, "grad_norm": 0.8293830315126343, "learning_rate": 1.069297524006025e-06, "loss": 0.3778, "step": 12680 }, { "epoch": 0.794312469659719, "grad_norm": 0.8788901701448908, "learning_rate": 1.0686706580119204e-06, "loss": 0.426, "step": 12681 }, { "epoch": 0.7943751076590614, "grad_norm": 0.8435570552448152, "learning_rate": 1.0680439538320609e-06, "loss": 0.3606, "step": 12682 }, { "epoch": 0.7944377456584036, "grad_norm": 0.8403595460808565, "learning_rate": 1.0674174114922431e-06, "loss": 0.3648, "step": 12683 }, { "epoch": 0.794500383657746, "grad_norm": 0.855074282066039, "learning_rate": 1.0667910310182545e-06, "loss": 0.3904, "step": 12684 }, { "epoch": 0.7945630216570883, "grad_norm": 0.8392856796740557, "learning_rate": 1.066164812435877e-06, "loss": 0.3768, "step": 12685 }, { "epoch": 0.7946256596564306, "grad_norm": 0.8575444780456306, "learning_rate": 1.0655387557708856e-06, "loss": 0.409, "step": 12686 }, { "epoch": 0.7946882976557729, "grad_norm": 0.8846081940541537, "learning_rate": 1.06491286104905e-06, "loss": 0.3603, "step": 12687 }, { "epoch": 0.7947509356551151, "grad_norm": 0.8530025768969287, "learning_rate": 1.064287128296131e-06, "loss": 0.3767, "step": 12688 }, { "epoch": 0.7948135736544575, "grad_norm": 0.7644961199816421, "learning_rate": 1.0636615575378834e-06, "loss": 0.3359, "step": 12689 }, { "epoch": 0.7948762116537997, "grad_norm": 0.8357518956747534, "learning_rate": 1.0630361488000574e-06, "loss": 0.4086, "step": 12690 }, { "epoch": 0.7949388496531421, "grad_norm": 0.8397837417332052, "learning_rate": 1.062410902108395e-06, "loss": 0.3876, "step": 12691 }, { "epoch": 0.7950014876524844, "grad_norm": 0.8433067173441143, "learning_rate": 1.0617858174886286e-06, "loss": 0.3859, "step": 12692 }, { "epoch": 0.7950641256518267, "grad_norm": 1.3014160315739554, "learning_rate": 1.061160894966491e-06, "loss": 0.379, "step": 12693 }, { "epoch": 0.795126763651169, "grad_norm": 0.9073540256244387, "learning_rate": 1.0605361345677019e-06, "loss": 0.42, "step": 12694 }, { "epoch": 0.7951894016505113, "grad_norm": 0.8865999213143042, "learning_rate": 1.0599115363179763e-06, "loss": 0.3702, "step": 12695 }, { "epoch": 0.7952520396498536, "grad_norm": 0.8889367291465299, "learning_rate": 1.0592871002430232e-06, "loss": 0.4249, "step": 12696 }, { "epoch": 0.7953146776491958, "grad_norm": 0.908479775739523, "learning_rate": 1.0586628263685434e-06, "loss": 0.4182, "step": 12697 }, { "epoch": 0.7953773156485382, "grad_norm": 0.9078028969389731, "learning_rate": 1.0580387147202341e-06, "loss": 0.3626, "step": 12698 }, { "epoch": 0.7954399536478804, "grad_norm": 0.8241063587829836, "learning_rate": 1.0574147653237838e-06, "loss": 0.3557, "step": 12699 }, { "epoch": 0.7955025916472228, "grad_norm": 0.8994043923209735, "learning_rate": 1.056790978204873e-06, "loss": 0.3577, "step": 12700 }, { "epoch": 0.7955652296465651, "grad_norm": 0.8892350287015249, "learning_rate": 1.0561673533891758e-06, "loss": 0.373, "step": 12701 }, { "epoch": 0.7956278676459074, "grad_norm": 0.8268516833893215, "learning_rate": 1.0555438909023642e-06, "loss": 0.3805, "step": 12702 }, { "epoch": 0.7956905056452497, "grad_norm": 0.889779483812979, "learning_rate": 1.0549205907700982e-06, "loss": 0.4234, "step": 12703 }, { "epoch": 0.795753143644592, "grad_norm": 0.9099650677779556, "learning_rate": 1.0542974530180327e-06, "loss": 0.3768, "step": 12704 }, { "epoch": 0.7958157816439343, "grad_norm": 0.9334918800545099, "learning_rate": 1.0536744776718166e-06, "loss": 0.4278, "step": 12705 }, { "epoch": 0.7958784196432765, "grad_norm": 0.8521032388026354, "learning_rate": 1.0530516647570898e-06, "loss": 0.3968, "step": 12706 }, { "epoch": 0.7959410576426189, "grad_norm": 0.8068091740613124, "learning_rate": 1.0524290142994904e-06, "loss": 0.3907, "step": 12707 }, { "epoch": 0.7960036956419612, "grad_norm": 0.8942302235430846, "learning_rate": 1.0518065263246457e-06, "loss": 0.369, "step": 12708 }, { "epoch": 0.7960663336413035, "grad_norm": 0.8851710822145202, "learning_rate": 1.0511842008581774e-06, "loss": 0.3923, "step": 12709 }, { "epoch": 0.7961289716406458, "grad_norm": 0.8275365678752492, "learning_rate": 1.0505620379256986e-06, "loss": 0.3608, "step": 12710 }, { "epoch": 0.7961916096399881, "grad_norm": 0.8853637199950685, "learning_rate": 1.0499400375528207e-06, "loss": 0.3485, "step": 12711 }, { "epoch": 0.7962542476393304, "grad_norm": 0.8697200931378051, "learning_rate": 1.0493181997651442e-06, "loss": 0.4293, "step": 12712 }, { "epoch": 0.7963168856386728, "grad_norm": 0.8649209403877667, "learning_rate": 1.048696524588264e-06, "loss": 0.3971, "step": 12713 }, { "epoch": 0.796379523638015, "grad_norm": 0.8757302029157517, "learning_rate": 1.0480750120477677e-06, "loss": 0.4115, "step": 12714 }, { "epoch": 0.7964421616373573, "grad_norm": 0.8769369671085957, "learning_rate": 1.0474536621692365e-06, "loss": 0.3802, "step": 12715 }, { "epoch": 0.7965047996366996, "grad_norm": 0.8409766682000331, "learning_rate": 1.0468324749782476e-06, "loss": 0.3472, "step": 12716 }, { "epoch": 0.7965674376360419, "grad_norm": 0.8421783974634877, "learning_rate": 1.0462114505003678e-06, "loss": 0.3628, "step": 12717 }, { "epoch": 0.7966300756353842, "grad_norm": 0.910293249464771, "learning_rate": 1.0455905887611584e-06, "loss": 0.4533, "step": 12718 }, { "epoch": 0.7966927136347265, "grad_norm": 0.8654381407813934, "learning_rate": 1.0449698897861733e-06, "loss": 0.39, "step": 12719 }, { "epoch": 0.7967553516340689, "grad_norm": 0.8529069523681924, "learning_rate": 1.0443493536009626e-06, "loss": 0.3851, "step": 12720 }, { "epoch": 0.7968179896334111, "grad_norm": 0.9004478516463202, "learning_rate": 1.043728980231067e-06, "loss": 0.3921, "step": 12721 }, { "epoch": 0.7968806276327534, "grad_norm": 0.8334458110882214, "learning_rate": 1.0431087697020204e-06, "loss": 0.3558, "step": 12722 }, { "epoch": 0.7969432656320957, "grad_norm": 0.8197447330094375, "learning_rate": 1.0424887220393503e-06, "loss": 0.3839, "step": 12723 }, { "epoch": 0.797005903631438, "grad_norm": 0.9066605462386369, "learning_rate": 1.0418688372685805e-06, "loss": 0.4233, "step": 12724 }, { "epoch": 0.7970685416307803, "grad_norm": 0.8836439588711488, "learning_rate": 1.0412491154152232e-06, "loss": 0.3832, "step": 12725 }, { "epoch": 0.7971311796301226, "grad_norm": 0.8688585782928075, "learning_rate": 1.0406295565047881e-06, "loss": 0.3823, "step": 12726 }, { "epoch": 0.797193817629465, "grad_norm": 0.9412892232969037, "learning_rate": 1.0400101605627744e-06, "loss": 0.4504, "step": 12727 }, { "epoch": 0.7972564556288072, "grad_norm": 0.9523238928349955, "learning_rate": 1.039390927614677e-06, "loss": 0.3978, "step": 12728 }, { "epoch": 0.7973190936281496, "grad_norm": 0.6234207997305287, "learning_rate": 1.038771857685985e-06, "loss": 0.4517, "step": 12729 }, { "epoch": 0.7973817316274918, "grad_norm": 0.8550890230783986, "learning_rate": 1.0381529508021786e-06, "loss": 0.3949, "step": 12730 }, { "epoch": 0.7974443696268341, "grad_norm": 1.0150004999949873, "learning_rate": 1.0375342069887323e-06, "loss": 0.3905, "step": 12731 }, { "epoch": 0.7975070076261764, "grad_norm": 0.8228910849121629, "learning_rate": 1.0369156262711123e-06, "loss": 0.3705, "step": 12732 }, { "epoch": 0.7975696456255187, "grad_norm": 0.826588708835123, "learning_rate": 1.0362972086747814e-06, "loss": 0.3893, "step": 12733 }, { "epoch": 0.797632283624861, "grad_norm": 0.5837220473426723, "learning_rate": 1.0356789542251939e-06, "loss": 0.4488, "step": 12734 }, { "epoch": 0.7976949216242033, "grad_norm": 0.8319897749135794, "learning_rate": 1.035060862947796e-06, "loss": 0.369, "step": 12735 }, { "epoch": 0.7977575596235457, "grad_norm": 0.9629464093938124, "learning_rate": 1.0344429348680285e-06, "loss": 0.4063, "step": 12736 }, { "epoch": 0.7978201976228879, "grad_norm": 0.8243629955768544, "learning_rate": 1.0338251700113267e-06, "loss": 0.3619, "step": 12737 }, { "epoch": 0.7978828356222303, "grad_norm": 0.8902547639837998, "learning_rate": 1.033207568403115e-06, "loss": 0.3954, "step": 12738 }, { "epoch": 0.7979454736215725, "grad_norm": 0.8090071329703602, "learning_rate": 1.0325901300688179e-06, "loss": 0.3676, "step": 12739 }, { "epoch": 0.7980081116209148, "grad_norm": 0.8107354720379732, "learning_rate": 1.031972855033847e-06, "loss": 0.3591, "step": 12740 }, { "epoch": 0.7980707496202571, "grad_norm": 0.9124359651876515, "learning_rate": 1.0313557433236088e-06, "loss": 0.4036, "step": 12741 }, { "epoch": 0.7981333876195994, "grad_norm": 0.6406916041776353, "learning_rate": 1.0307387949635062e-06, "loss": 0.4403, "step": 12742 }, { "epoch": 0.7981960256189418, "grad_norm": 0.895879415937103, "learning_rate": 1.0301220099789316e-06, "loss": 0.4038, "step": 12743 }, { "epoch": 0.798258663618284, "grad_norm": 0.8252980061952763, "learning_rate": 1.0295053883952716e-06, "loss": 0.377, "step": 12744 }, { "epoch": 0.7983213016176264, "grad_norm": 0.8373430441609334, "learning_rate": 1.0288889302379073e-06, "loss": 0.3597, "step": 12745 }, { "epoch": 0.7983839396169686, "grad_norm": 0.8576944298803667, "learning_rate": 1.0282726355322115e-06, "loss": 0.3589, "step": 12746 }, { "epoch": 0.7984465776163109, "grad_norm": 0.8251431411103242, "learning_rate": 1.0276565043035496e-06, "loss": 0.4242, "step": 12747 }, { "epoch": 0.7985092156156532, "grad_norm": 0.8098705020101363, "learning_rate": 1.0270405365772845e-06, "loss": 0.3744, "step": 12748 }, { "epoch": 0.7985718536149955, "grad_norm": 0.9200200111702185, "learning_rate": 1.0264247323787675e-06, "loss": 0.3793, "step": 12749 }, { "epoch": 0.7986344916143379, "grad_norm": 0.9160126380653592, "learning_rate": 1.0258090917333468e-06, "loss": 0.3865, "step": 12750 }, { "epoch": 0.7986971296136801, "grad_norm": 0.6238652305668035, "learning_rate": 1.0251936146663622e-06, "loss": 0.4377, "step": 12751 }, { "epoch": 0.7987597676130225, "grad_norm": 0.8806983672380236, "learning_rate": 1.0245783012031457e-06, "loss": 0.4016, "step": 12752 }, { "epoch": 0.7988224056123647, "grad_norm": 0.8146353044799716, "learning_rate": 1.0239631513690246e-06, "loss": 0.3962, "step": 12753 }, { "epoch": 0.7988850436117071, "grad_norm": 0.7989375459199375, "learning_rate": 1.023348165189318e-06, "loss": 0.3659, "step": 12754 }, { "epoch": 0.7989476816110493, "grad_norm": 0.8768730158784354, "learning_rate": 1.0227333426893387e-06, "loss": 0.3442, "step": 12755 }, { "epoch": 0.7990103196103916, "grad_norm": 0.8969705227901348, "learning_rate": 1.0221186838943919e-06, "loss": 0.3934, "step": 12756 }, { "epoch": 0.799072957609734, "grad_norm": 0.8749941063121978, "learning_rate": 1.0215041888297806e-06, "loss": 0.3713, "step": 12757 }, { "epoch": 0.7991355956090762, "grad_norm": 0.8323831011066374, "learning_rate": 1.0208898575207931e-06, "loss": 0.3809, "step": 12758 }, { "epoch": 0.7991982336084186, "grad_norm": 0.8166441867958962, "learning_rate": 1.020275689992719e-06, "loss": 0.3766, "step": 12759 }, { "epoch": 0.7992608716077608, "grad_norm": 0.6322463681543669, "learning_rate": 1.0196616862708363e-06, "loss": 0.4563, "step": 12760 }, { "epoch": 0.7993235096071032, "grad_norm": 0.811145547024655, "learning_rate": 1.0190478463804176e-06, "loss": 0.3927, "step": 12761 }, { "epoch": 0.7993861476064454, "grad_norm": 0.8812190291149108, "learning_rate": 1.0184341703467282e-06, "loss": 0.3801, "step": 12762 }, { "epoch": 0.7994487856057878, "grad_norm": 0.8104931363870541, "learning_rate": 1.0178206581950278e-06, "loss": 0.3936, "step": 12763 }, { "epoch": 0.7995114236051301, "grad_norm": 0.8817648541815524, "learning_rate": 1.017207309950568e-06, "loss": 0.3886, "step": 12764 }, { "epoch": 0.7995740616044723, "grad_norm": 0.8932591190663326, "learning_rate": 1.016594125638593e-06, "loss": 0.3588, "step": 12765 }, { "epoch": 0.7996366996038147, "grad_norm": 0.8181280125721213, "learning_rate": 1.015981105284345e-06, "loss": 0.3468, "step": 12766 }, { "epoch": 0.7996993376031569, "grad_norm": 0.8023332211238079, "learning_rate": 1.0153682489130522e-06, "loss": 0.4009, "step": 12767 }, { "epoch": 0.7997619756024993, "grad_norm": 0.6291875284984174, "learning_rate": 1.0147555565499435e-06, "loss": 0.4359, "step": 12768 }, { "epoch": 0.7998246136018415, "grad_norm": 0.8699751380931837, "learning_rate": 1.0141430282202357e-06, "loss": 0.3818, "step": 12769 }, { "epoch": 0.7998872516011839, "grad_norm": 0.9104581479778557, "learning_rate": 1.0135306639491405e-06, "loss": 0.4103, "step": 12770 }, { "epoch": 0.7999498896005262, "grad_norm": 0.908940886991088, "learning_rate": 1.0129184637618634e-06, "loss": 0.3894, "step": 12771 }, { "epoch": 0.8000125275998684, "grad_norm": 0.8809352419310713, "learning_rate": 1.0123064276836025e-06, "loss": 0.3683, "step": 12772 }, { "epoch": 0.8000751655992108, "grad_norm": 0.9143245140270528, "learning_rate": 1.0116945557395485e-06, "loss": 0.3597, "step": 12773 }, { "epoch": 0.800137803598553, "grad_norm": 0.8416454777915793, "learning_rate": 1.0110828479548862e-06, "loss": 0.3908, "step": 12774 }, { "epoch": 0.8002004415978954, "grad_norm": 0.9293757608770757, "learning_rate": 1.0104713043547941e-06, "loss": 0.3687, "step": 12775 }, { "epoch": 0.8002630795972376, "grad_norm": 0.8790950498208995, "learning_rate": 1.0098599249644448e-06, "loss": 0.3608, "step": 12776 }, { "epoch": 0.80032571759658, "grad_norm": 0.8408118383859268, "learning_rate": 1.009248709809002e-06, "loss": 0.3836, "step": 12777 }, { "epoch": 0.8003883555959223, "grad_norm": 0.6410163225015052, "learning_rate": 1.0086376589136226e-06, "loss": 0.4579, "step": 12778 }, { "epoch": 0.8004509935952646, "grad_norm": 0.909596841085858, "learning_rate": 1.008026772303458e-06, "loss": 0.3982, "step": 12779 }, { "epoch": 0.8005136315946069, "grad_norm": 0.8559137533650333, "learning_rate": 1.0074160500036523e-06, "loss": 0.358, "step": 12780 }, { "epoch": 0.8005762695939491, "grad_norm": 0.8395044377500965, "learning_rate": 1.0068054920393433e-06, "loss": 0.4067, "step": 12781 }, { "epoch": 0.8006389075932915, "grad_norm": 0.9019519942526985, "learning_rate": 1.00619509843566e-06, "loss": 0.3923, "step": 12782 }, { "epoch": 0.8007015455926337, "grad_norm": 0.8395612349543735, "learning_rate": 1.0055848692177294e-06, "loss": 0.3493, "step": 12783 }, { "epoch": 0.8007641835919761, "grad_norm": 0.8704261566038534, "learning_rate": 1.004974804410665e-06, "loss": 0.3797, "step": 12784 }, { "epoch": 0.8008268215913183, "grad_norm": 0.8719058061036661, "learning_rate": 1.0043649040395808e-06, "loss": 0.3813, "step": 12785 }, { "epoch": 0.8008894595906607, "grad_norm": 0.6286815766624659, "learning_rate": 1.0037551681295787e-06, "loss": 0.4481, "step": 12786 }, { "epoch": 0.800952097590003, "grad_norm": 0.8372900398816635, "learning_rate": 1.0031455967057557e-06, "loss": 0.396, "step": 12787 }, { "epoch": 0.8010147355893453, "grad_norm": 0.9182042044386384, "learning_rate": 1.0025361897932023e-06, "loss": 0.3938, "step": 12788 }, { "epoch": 0.8010773735886876, "grad_norm": 0.8912344247676331, "learning_rate": 1.0019269474170008e-06, "loss": 0.4026, "step": 12789 }, { "epoch": 0.8011400115880298, "grad_norm": 0.847432420884411, "learning_rate": 1.001317869602228e-06, "loss": 0.3856, "step": 12790 }, { "epoch": 0.8012026495873722, "grad_norm": 0.8267842610950057, "learning_rate": 1.0007089563739524e-06, "loss": 0.3698, "step": 12791 }, { "epoch": 0.8012652875867144, "grad_norm": 0.8176579818231531, "learning_rate": 1.0001002077572402e-06, "loss": 0.3635, "step": 12792 }, { "epoch": 0.8013279255860568, "grad_norm": 0.8482996674314407, "learning_rate": 9.994916237771446e-07, "loss": 0.3558, "step": 12793 }, { "epoch": 0.8013905635853991, "grad_norm": 0.8743124484314951, "learning_rate": 9.988832044587176e-07, "loss": 0.3818, "step": 12794 }, { "epoch": 0.8014532015847414, "grad_norm": 0.8426504385442005, "learning_rate": 9.982749498270005e-07, "loss": 0.3847, "step": 12795 }, { "epoch": 0.8015158395840837, "grad_norm": 0.8920079601237093, "learning_rate": 9.976668599070293e-07, "loss": 0.3569, "step": 12796 }, { "epoch": 0.8015784775834259, "grad_norm": 0.8586877706643378, "learning_rate": 9.97058934723833e-07, "loss": 0.3603, "step": 12797 }, { "epoch": 0.8016411155827683, "grad_norm": 0.9193613497723079, "learning_rate": 9.964511743024336e-07, "loss": 0.3619, "step": 12798 }, { "epoch": 0.8017037535821105, "grad_norm": 0.9078536990740554, "learning_rate": 9.958435786678471e-07, "loss": 0.366, "step": 12799 }, { "epoch": 0.8017663915814529, "grad_norm": 0.8976592768899251, "learning_rate": 9.952361478450811e-07, "loss": 0.3962, "step": 12800 }, { "epoch": 0.8018290295807952, "grad_norm": 0.8568266450960941, "learning_rate": 9.946288818591394e-07, "loss": 0.4357, "step": 12801 }, { "epoch": 0.8018916675801375, "grad_norm": 0.8254684837321501, "learning_rate": 9.940217807350155e-07, "loss": 0.3878, "step": 12802 }, { "epoch": 0.8019543055794798, "grad_norm": 0.5923909814402181, "learning_rate": 9.934148444976998e-07, "loss": 0.4342, "step": 12803 }, { "epoch": 0.8020169435788221, "grad_norm": 0.9101111274386255, "learning_rate": 9.928080731721728e-07, "loss": 0.3868, "step": 12804 }, { "epoch": 0.8020795815781644, "grad_norm": 0.8481401923001857, "learning_rate": 9.92201466783409e-07, "loss": 0.3615, "step": 12805 }, { "epoch": 0.8021422195775066, "grad_norm": 0.8958889485629838, "learning_rate": 9.915950253563773e-07, "loss": 0.3824, "step": 12806 }, { "epoch": 0.802204857576849, "grad_norm": 0.8425238404196935, "learning_rate": 9.909887489160375e-07, "loss": 0.379, "step": 12807 }, { "epoch": 0.8022674955761913, "grad_norm": 0.8204576315647109, "learning_rate": 9.903826374873443e-07, "loss": 0.3672, "step": 12808 }, { "epoch": 0.8023301335755336, "grad_norm": 0.8101129123952396, "learning_rate": 9.897766910952466e-07, "loss": 0.3634, "step": 12809 }, { "epoch": 0.8023927715748759, "grad_norm": 0.8561169179575965, "learning_rate": 9.89170909764685e-07, "loss": 0.4145, "step": 12810 }, { "epoch": 0.8024554095742182, "grad_norm": 0.7388103706316789, "learning_rate": 9.885652935205925e-07, "loss": 0.3176, "step": 12811 }, { "epoch": 0.8025180475735605, "grad_norm": 0.8581419659063821, "learning_rate": 9.879598423878978e-07, "loss": 0.385, "step": 12812 }, { "epoch": 0.8025806855729029, "grad_norm": 0.8500103532059387, "learning_rate": 9.873545563915204e-07, "loss": 0.3754, "step": 12813 }, { "epoch": 0.8026433235722451, "grad_norm": 0.8451410512583917, "learning_rate": 9.867494355563745e-07, "loss": 0.3799, "step": 12814 }, { "epoch": 0.8027059615715874, "grad_norm": 0.8626968242993068, "learning_rate": 9.861444799073672e-07, "loss": 0.3727, "step": 12815 }, { "epoch": 0.8027685995709297, "grad_norm": 0.9346952236277822, "learning_rate": 9.855396894693975e-07, "loss": 0.4043, "step": 12816 }, { "epoch": 0.802831237570272, "grad_norm": 0.868642189612378, "learning_rate": 9.849350642673577e-07, "loss": 0.3923, "step": 12817 }, { "epoch": 0.8028938755696143, "grad_norm": 0.877353201987724, "learning_rate": 9.84330604326138e-07, "loss": 0.3839, "step": 12818 }, { "epoch": 0.8029565135689566, "grad_norm": 0.9509274488424367, "learning_rate": 9.837263096706157e-07, "loss": 0.3625, "step": 12819 }, { "epoch": 0.803019151568299, "grad_norm": 0.8258503939802673, "learning_rate": 9.831221803256636e-07, "loss": 0.396, "step": 12820 }, { "epoch": 0.8030817895676412, "grad_norm": 0.6489747557357645, "learning_rate": 9.825182163161473e-07, "loss": 0.4576, "step": 12821 }, { "epoch": 0.8031444275669836, "grad_norm": 0.7801972984679978, "learning_rate": 9.81914417666928e-07, "loss": 0.3541, "step": 12822 }, { "epoch": 0.8032070655663258, "grad_norm": 0.8041037534290553, "learning_rate": 9.813107844028569e-07, "loss": 0.3836, "step": 12823 }, { "epoch": 0.8032697035656681, "grad_norm": 0.885235459032196, "learning_rate": 9.807073165487802e-07, "loss": 0.3753, "step": 12824 }, { "epoch": 0.8033323415650104, "grad_norm": 0.9288207007627259, "learning_rate": 9.80104014129536e-07, "loss": 0.4009, "step": 12825 }, { "epoch": 0.8033949795643527, "grad_norm": 0.766164780110576, "learning_rate": 9.795008771699555e-07, "loss": 0.3117, "step": 12826 }, { "epoch": 0.803457617563695, "grad_norm": 0.8328871247186147, "learning_rate": 9.788979056948666e-07, "loss": 0.3902, "step": 12827 }, { "epoch": 0.8035202555630373, "grad_norm": 0.8582101539724573, "learning_rate": 9.782950997290863e-07, "loss": 0.3644, "step": 12828 }, { "epoch": 0.8035828935623797, "grad_norm": 0.7953328053355502, "learning_rate": 9.776924592974257e-07, "loss": 0.3642, "step": 12829 }, { "epoch": 0.8036455315617219, "grad_norm": 0.9271910691767893, "learning_rate": 9.77089984424689e-07, "loss": 0.3699, "step": 12830 }, { "epoch": 0.8037081695610642, "grad_norm": 0.9021628687259613, "learning_rate": 9.764876751356766e-07, "loss": 0.3678, "step": 12831 }, { "epoch": 0.8037708075604065, "grad_norm": 0.8958910202965139, "learning_rate": 9.758855314551785e-07, "loss": 0.4111, "step": 12832 }, { "epoch": 0.8038334455597488, "grad_norm": 0.8241273085709235, "learning_rate": 9.752835534079785e-07, "loss": 0.4143, "step": 12833 }, { "epoch": 0.8038960835590911, "grad_norm": 0.7910616384280587, "learning_rate": 9.74681741018853e-07, "loss": 0.37, "step": 12834 }, { "epoch": 0.8039587215584334, "grad_norm": 0.8947476498174336, "learning_rate": 9.74080094312576e-07, "loss": 0.3776, "step": 12835 }, { "epoch": 0.8040213595577758, "grad_norm": 0.7723311771095349, "learning_rate": 9.73478613313909e-07, "loss": 0.3466, "step": 12836 }, { "epoch": 0.804083997557118, "grad_norm": 0.9356287081422243, "learning_rate": 9.728772980476103e-07, "loss": 0.4369, "step": 12837 }, { "epoch": 0.8041466355564604, "grad_norm": 0.9140105026026196, "learning_rate": 9.72276148538429e-07, "loss": 0.4121, "step": 12838 }, { "epoch": 0.8042092735558026, "grad_norm": 0.9010778975101709, "learning_rate": 9.716751648111078e-07, "loss": 0.3646, "step": 12839 }, { "epoch": 0.8042719115551449, "grad_norm": 0.8803071973654701, "learning_rate": 9.71074346890386e-07, "loss": 0.3668, "step": 12840 }, { "epoch": 0.8043345495544872, "grad_norm": 0.8969215026378871, "learning_rate": 9.704736948009913e-07, "loss": 0.3686, "step": 12841 }, { "epoch": 0.8043971875538295, "grad_norm": 0.7906330755980667, "learning_rate": 9.698732085676476e-07, "loss": 0.3678, "step": 12842 }, { "epoch": 0.8044598255531719, "grad_norm": 0.8116089248108201, "learning_rate": 9.692728882150698e-07, "loss": 0.3939, "step": 12843 }, { "epoch": 0.8045224635525141, "grad_norm": 0.8726857511713607, "learning_rate": 9.68672733767969e-07, "loss": 0.3633, "step": 12844 }, { "epoch": 0.8045851015518565, "grad_norm": 0.9251163401962641, "learning_rate": 9.680727452510464e-07, "loss": 0.4069, "step": 12845 }, { "epoch": 0.8046477395511987, "grad_norm": 0.840708246196672, "learning_rate": 9.674729226889985e-07, "loss": 0.3599, "step": 12846 }, { "epoch": 0.8047103775505411, "grad_norm": 0.9021700234049816, "learning_rate": 9.668732661065134e-07, "loss": 0.3682, "step": 12847 }, { "epoch": 0.8047730155498833, "grad_norm": 0.9323583892331443, "learning_rate": 9.66273775528272e-07, "loss": 0.3983, "step": 12848 }, { "epoch": 0.8048356535492256, "grad_norm": 0.7956312713271197, "learning_rate": 9.656744509789518e-07, "loss": 0.3895, "step": 12849 }, { "epoch": 0.804898291548568, "grad_norm": 0.6344044705913452, "learning_rate": 9.650752924832203e-07, "loss": 0.4498, "step": 12850 }, { "epoch": 0.8049609295479102, "grad_norm": 0.8960084881751633, "learning_rate": 9.64476300065738e-07, "loss": 0.3533, "step": 12851 }, { "epoch": 0.8050235675472526, "grad_norm": 0.8836789791494617, "learning_rate": 9.638774737511597e-07, "loss": 0.4006, "step": 12852 }, { "epoch": 0.8050862055465948, "grad_norm": 0.8192522723139921, "learning_rate": 9.632788135641346e-07, "loss": 0.3582, "step": 12853 }, { "epoch": 0.8051488435459372, "grad_norm": 0.9475612424945725, "learning_rate": 9.626803195293028e-07, "loss": 0.4064, "step": 12854 }, { "epoch": 0.8052114815452794, "grad_norm": 0.8375316649526489, "learning_rate": 9.620819916712987e-07, "loss": 0.3641, "step": 12855 }, { "epoch": 0.8052741195446217, "grad_norm": 0.851724752770884, "learning_rate": 9.614838300147488e-07, "loss": 0.3974, "step": 12856 }, { "epoch": 0.805336757543964, "grad_norm": 0.8277049049547257, "learning_rate": 9.60885834584273e-07, "loss": 0.3584, "step": 12857 }, { "epoch": 0.8053993955433063, "grad_norm": 0.9033420649611713, "learning_rate": 9.602880054044872e-07, "loss": 0.4009, "step": 12858 }, { "epoch": 0.8054620335426487, "grad_norm": 0.8347618528186865, "learning_rate": 9.59690342499997e-07, "loss": 0.3633, "step": 12859 }, { "epoch": 0.8055246715419909, "grad_norm": 0.9037703890364572, "learning_rate": 9.590928458954013e-07, "loss": 0.4135, "step": 12860 }, { "epoch": 0.8055873095413333, "grad_norm": 0.8381574829503854, "learning_rate": 9.584955156152948e-07, "loss": 0.3848, "step": 12861 }, { "epoch": 0.8056499475406755, "grad_norm": 0.872442185984471, "learning_rate": 9.578983516842633e-07, "loss": 0.381, "step": 12862 }, { "epoch": 0.8057125855400179, "grad_norm": 0.8474001027096487, "learning_rate": 9.573013541268855e-07, "loss": 0.3903, "step": 12863 }, { "epoch": 0.8057752235393602, "grad_norm": 0.8556060928824081, "learning_rate": 9.567045229677347e-07, "loss": 0.4079, "step": 12864 }, { "epoch": 0.8058378615387024, "grad_norm": 0.6576701826615069, "learning_rate": 9.561078582313765e-07, "loss": 0.4538, "step": 12865 }, { "epoch": 0.8059004995380448, "grad_norm": 0.9137245862722655, "learning_rate": 9.555113599423689e-07, "loss": 0.3733, "step": 12866 }, { "epoch": 0.805963137537387, "grad_norm": 0.829615576579356, "learning_rate": 9.549150281252633e-07, "loss": 0.3565, "step": 12867 }, { "epoch": 0.8060257755367294, "grad_norm": 0.8193802721249345, "learning_rate": 9.543188628046073e-07, "loss": 0.3341, "step": 12868 }, { "epoch": 0.8060884135360716, "grad_norm": 0.7727124387381722, "learning_rate": 9.53722864004936e-07, "loss": 0.4068, "step": 12869 }, { "epoch": 0.806151051535414, "grad_norm": 0.6452517628776667, "learning_rate": 9.531270317507845e-07, "loss": 0.4671, "step": 12870 }, { "epoch": 0.8062136895347563, "grad_norm": 0.8926866156266241, "learning_rate": 9.525313660666757e-07, "loss": 0.4045, "step": 12871 }, { "epoch": 0.8062763275340986, "grad_norm": 0.9438340031045891, "learning_rate": 9.519358669771268e-07, "loss": 0.3971, "step": 12872 }, { "epoch": 0.8063389655334409, "grad_norm": 0.8906661114353768, "learning_rate": 9.51340534506649e-07, "loss": 0.3891, "step": 12873 }, { "epoch": 0.8064016035327831, "grad_norm": 0.8096986407542536, "learning_rate": 9.507453686797464e-07, "loss": 0.3761, "step": 12874 }, { "epoch": 0.8064642415321255, "grad_norm": 0.874515299229083, "learning_rate": 9.501503695209158e-07, "loss": 0.3636, "step": 12875 }, { "epoch": 0.8065268795314677, "grad_norm": 0.7568328912506019, "learning_rate": 9.495555370546461e-07, "loss": 0.3415, "step": 12876 }, { "epoch": 0.8065895175308101, "grad_norm": 0.834327710105622, "learning_rate": 9.489608713054227e-07, "loss": 0.4108, "step": 12877 }, { "epoch": 0.8066521555301523, "grad_norm": 0.8775326005412718, "learning_rate": 9.483663722977232e-07, "loss": 0.4007, "step": 12878 }, { "epoch": 0.8067147935294947, "grad_norm": 0.8906382257824385, "learning_rate": 9.477720400560153e-07, "loss": 0.4052, "step": 12879 }, { "epoch": 0.806777431528837, "grad_norm": 0.8465224075721086, "learning_rate": 9.471778746047628e-07, "loss": 0.376, "step": 12880 }, { "epoch": 0.8068400695281792, "grad_norm": 0.8092226201435163, "learning_rate": 9.465838759684215e-07, "loss": 0.3659, "step": 12881 }, { "epoch": 0.8069027075275216, "grad_norm": 0.8002777380178409, "learning_rate": 9.459900441714398e-07, "loss": 0.3442, "step": 12882 }, { "epoch": 0.8069653455268638, "grad_norm": 0.8891825091529926, "learning_rate": 9.453963792382603e-07, "loss": 0.376, "step": 12883 }, { "epoch": 0.8070279835262062, "grad_norm": 0.89111220981614, "learning_rate": 9.44802881193318e-07, "loss": 0.3719, "step": 12884 }, { "epoch": 0.8070906215255484, "grad_norm": 0.8527838213238146, "learning_rate": 9.44209550061041e-07, "loss": 0.3452, "step": 12885 }, { "epoch": 0.8071532595248908, "grad_norm": 0.922870883811091, "learning_rate": 9.43616385865852e-07, "loss": 0.4218, "step": 12886 }, { "epoch": 0.8072158975242331, "grad_norm": 0.9554497405571056, "learning_rate": 9.43023388632166e-07, "loss": 0.3997, "step": 12887 }, { "epoch": 0.8072785355235754, "grad_norm": 0.8614465500820658, "learning_rate": 9.424305583843912e-07, "loss": 0.37, "step": 12888 }, { "epoch": 0.8073411735229177, "grad_norm": 0.9262887788508539, "learning_rate": 9.41837895146927e-07, "loss": 0.3782, "step": 12889 }, { "epoch": 0.8074038115222599, "grad_norm": 0.8383933223440881, "learning_rate": 9.412453989441689e-07, "loss": 0.3951, "step": 12890 }, { "epoch": 0.8074664495216023, "grad_norm": 0.916035220684202, "learning_rate": 9.406530698005028e-07, "loss": 0.3847, "step": 12891 }, { "epoch": 0.8075290875209445, "grad_norm": 0.8902875474015475, "learning_rate": 9.4006090774031e-07, "loss": 0.376, "step": 12892 }, { "epoch": 0.8075917255202869, "grad_norm": 0.8892764718828912, "learning_rate": 9.39468912787963e-07, "loss": 0.4314, "step": 12893 }, { "epoch": 0.8076543635196292, "grad_norm": 0.7988160252765756, "learning_rate": 9.388770849678302e-07, "loss": 0.389, "step": 12894 }, { "epoch": 0.8077170015189715, "grad_norm": 0.8026958293499501, "learning_rate": 9.382854243042688e-07, "loss": 0.3973, "step": 12895 }, { "epoch": 0.8077796395183138, "grad_norm": 0.8536486867263463, "learning_rate": 9.376939308216348e-07, "loss": 0.3293, "step": 12896 }, { "epoch": 0.8078422775176561, "grad_norm": 0.8518938308264704, "learning_rate": 9.371026045442727e-07, "loss": 0.3473, "step": 12897 }, { "epoch": 0.8079049155169984, "grad_norm": 0.9211880634595958, "learning_rate": 9.365114454965213e-07, "loss": 0.3813, "step": 12898 }, { "epoch": 0.8079675535163406, "grad_norm": 0.9461818438113997, "learning_rate": 9.359204537027133e-07, "loss": 0.3769, "step": 12899 }, { "epoch": 0.808030191515683, "grad_norm": 0.9037243501855758, "learning_rate": 9.353296291871733e-07, "loss": 0.3737, "step": 12900 }, { "epoch": 0.8080928295150253, "grad_norm": 0.8791719248194189, "learning_rate": 9.347389719742206e-07, "loss": 0.3582, "step": 12901 }, { "epoch": 0.8081554675143676, "grad_norm": 0.8938671335228917, "learning_rate": 9.341484820881652e-07, "loss": 0.3567, "step": 12902 }, { "epoch": 0.8082181055137099, "grad_norm": 0.8668627672938216, "learning_rate": 9.335581595533122e-07, "loss": 0.4259, "step": 12903 }, { "epoch": 0.8082807435130522, "grad_norm": 0.6094740470665155, "learning_rate": 9.329680043939621e-07, "loss": 0.4634, "step": 12904 }, { "epoch": 0.8083433815123945, "grad_norm": 0.87270982220204, "learning_rate": 9.323780166344038e-07, "loss": 0.403, "step": 12905 }, { "epoch": 0.8084060195117367, "grad_norm": 0.9110552291862154, "learning_rate": 9.317881962989212e-07, "loss": 0.399, "step": 12906 }, { "epoch": 0.8084686575110791, "grad_norm": 0.5910205924107854, "learning_rate": 9.31198543411791e-07, "loss": 0.4424, "step": 12907 }, { "epoch": 0.8085312955104214, "grad_norm": 0.84937104917728, "learning_rate": 9.306090579972848e-07, "loss": 0.3981, "step": 12908 }, { "epoch": 0.8085939335097637, "grad_norm": 0.867629231789529, "learning_rate": 9.300197400796645e-07, "loss": 0.4398, "step": 12909 }, { "epoch": 0.808656571509106, "grad_norm": 0.8610022399213138, "learning_rate": 9.294305896831873e-07, "loss": 0.3693, "step": 12910 }, { "epoch": 0.8087192095084483, "grad_norm": 0.8330345483105566, "learning_rate": 9.28841606832101e-07, "loss": 0.357, "step": 12911 }, { "epoch": 0.8087818475077906, "grad_norm": 0.8822588718529194, "learning_rate": 9.282527915506517e-07, "loss": 0.4087, "step": 12912 }, { "epoch": 0.808844485507133, "grad_norm": 0.8497959777936306, "learning_rate": 9.276641438630718e-07, "loss": 0.3475, "step": 12913 }, { "epoch": 0.8089071235064752, "grad_norm": 0.8601408326731518, "learning_rate": 9.270756637935924e-07, "loss": 0.3781, "step": 12914 }, { "epoch": 0.8089697615058175, "grad_norm": 0.8623987352718419, "learning_rate": 9.264873513664352e-07, "loss": 0.4525, "step": 12915 }, { "epoch": 0.8090323995051598, "grad_norm": 0.8342381896379482, "learning_rate": 9.258992066058142e-07, "loss": 0.3545, "step": 12916 }, { "epoch": 0.8090950375045021, "grad_norm": 0.865487672690156, "learning_rate": 9.253112295359384e-07, "loss": 0.3402, "step": 12917 }, { "epoch": 0.8091576755038444, "grad_norm": 0.8122800894346076, "learning_rate": 9.24723420181009e-07, "loss": 0.3786, "step": 12918 }, { "epoch": 0.8092203135031867, "grad_norm": 0.8905809933037251, "learning_rate": 9.241357785652183e-07, "loss": 0.4172, "step": 12919 }, { "epoch": 0.809282951502529, "grad_norm": 0.9252974092540868, "learning_rate": 9.23548304712757e-07, "loss": 0.3939, "step": 12920 }, { "epoch": 0.8093455895018713, "grad_norm": 0.8348872033638368, "learning_rate": 9.229609986478034e-07, "loss": 0.3482, "step": 12921 }, { "epoch": 0.8094082275012137, "grad_norm": 0.9076264191581424, "learning_rate": 9.223738603945314e-07, "loss": 0.4021, "step": 12922 }, { "epoch": 0.8094708655005559, "grad_norm": 0.8823123898733107, "learning_rate": 9.217868899771087e-07, "loss": 0.381, "step": 12923 }, { "epoch": 0.8095335034998982, "grad_norm": 0.8731444485690933, "learning_rate": 9.212000874196953e-07, "loss": 0.3836, "step": 12924 }, { "epoch": 0.8095961414992405, "grad_norm": 0.881320208747182, "learning_rate": 9.206134527464427e-07, "loss": 0.3884, "step": 12925 }, { "epoch": 0.8096587794985828, "grad_norm": 0.9413843668376268, "learning_rate": 9.200269859814981e-07, "loss": 0.4466, "step": 12926 }, { "epoch": 0.8097214174979251, "grad_norm": 0.8787637867923428, "learning_rate": 9.194406871489997e-07, "loss": 0.3891, "step": 12927 }, { "epoch": 0.8097840554972674, "grad_norm": 0.9129766533807006, "learning_rate": 9.188545562730789e-07, "loss": 0.3945, "step": 12928 }, { "epoch": 0.8098466934966098, "grad_norm": 0.9357137017248737, "learning_rate": 9.182685933778635e-07, "loss": 0.3515, "step": 12929 }, { "epoch": 0.809909331495952, "grad_norm": 0.9283030100770991, "learning_rate": 9.176827984874698e-07, "loss": 0.4165, "step": 12930 }, { "epoch": 0.8099719694952944, "grad_norm": 0.8362630069055331, "learning_rate": 9.170971716260091e-07, "loss": 0.4054, "step": 12931 }, { "epoch": 0.8100346074946366, "grad_norm": 0.8086539452858257, "learning_rate": 9.165117128175877e-07, "loss": 0.3566, "step": 12932 }, { "epoch": 0.8100972454939789, "grad_norm": 0.9214849044427641, "learning_rate": 9.159264220863023e-07, "loss": 0.3995, "step": 12933 }, { "epoch": 0.8101598834933212, "grad_norm": 0.8563095295681787, "learning_rate": 9.153412994562433e-07, "loss": 0.385, "step": 12934 }, { "epoch": 0.8102225214926635, "grad_norm": 0.8389797218914681, "learning_rate": 9.147563449514946e-07, "loss": 0.4074, "step": 12935 }, { "epoch": 0.8102851594920059, "grad_norm": 0.8159072161650869, "learning_rate": 9.14171558596133e-07, "loss": 0.3512, "step": 12936 }, { "epoch": 0.8103477974913481, "grad_norm": 0.8788292487972149, "learning_rate": 9.135869404142267e-07, "loss": 0.3844, "step": 12937 }, { "epoch": 0.8104104354906905, "grad_norm": 0.8659126735444529, "learning_rate": 9.130024904298423e-07, "loss": 0.3464, "step": 12938 }, { "epoch": 0.8104730734900327, "grad_norm": 0.8127916034008158, "learning_rate": 9.124182086670336e-07, "loss": 0.4037, "step": 12939 }, { "epoch": 0.810535711489375, "grad_norm": 0.9155656850947594, "learning_rate": 9.118340951498489e-07, "loss": 0.4058, "step": 12940 }, { "epoch": 0.8105983494887173, "grad_norm": 0.9446557459044882, "learning_rate": 9.112501499023335e-07, "loss": 0.4159, "step": 12941 }, { "epoch": 0.8106609874880596, "grad_norm": 0.8383463908690378, "learning_rate": 9.106663729485199e-07, "loss": 0.3326, "step": 12942 }, { "epoch": 0.810723625487402, "grad_norm": 0.8888246165632249, "learning_rate": 9.100827643124383e-07, "loss": 0.3649, "step": 12943 }, { "epoch": 0.8107862634867442, "grad_norm": 0.8869985112919054, "learning_rate": 9.094993240181088e-07, "loss": 0.3787, "step": 12944 }, { "epoch": 0.8108489014860866, "grad_norm": 0.8513674208279276, "learning_rate": 9.089160520895446e-07, "loss": 0.4063, "step": 12945 }, { "epoch": 0.8109115394854288, "grad_norm": 0.8673126024726577, "learning_rate": 9.08332948550757e-07, "loss": 0.4177, "step": 12946 }, { "epoch": 0.8109741774847712, "grad_norm": 0.9457709577323047, "learning_rate": 9.077500134257444e-07, "loss": 0.4186, "step": 12947 }, { "epoch": 0.8110368154841134, "grad_norm": 0.8885496926350526, "learning_rate": 9.071672467385012e-07, "loss": 0.4063, "step": 12948 }, { "epoch": 0.8110994534834557, "grad_norm": 0.814911139634187, "learning_rate": 9.06584648513012e-07, "loss": 0.4152, "step": 12949 }, { "epoch": 0.811162091482798, "grad_norm": 0.6007478104764822, "learning_rate": 9.060022187732598e-07, "loss": 0.4667, "step": 12950 }, { "epoch": 0.8112247294821403, "grad_norm": 0.9445928213602939, "learning_rate": 9.054199575432165e-07, "loss": 0.4106, "step": 12951 }, { "epoch": 0.8112873674814827, "grad_norm": 0.6297271641409337, "learning_rate": 9.048378648468476e-07, "loss": 0.4381, "step": 12952 }, { "epoch": 0.8113500054808249, "grad_norm": 0.6489716645772066, "learning_rate": 9.042559407081119e-07, "loss": 0.438, "step": 12953 }, { "epoch": 0.8114126434801673, "grad_norm": 0.8613575070506942, "learning_rate": 9.036741851509612e-07, "loss": 0.3945, "step": 12954 }, { "epoch": 0.8114752814795095, "grad_norm": 0.8487914637902619, "learning_rate": 9.030925981993421e-07, "loss": 0.3535, "step": 12955 }, { "epoch": 0.8115379194788519, "grad_norm": 0.9402340016248002, "learning_rate": 9.025111798771923e-07, "loss": 0.4175, "step": 12956 }, { "epoch": 0.8116005574781942, "grad_norm": 0.9387759525475106, "learning_rate": 9.019299302084428e-07, "loss": 0.4107, "step": 12957 }, { "epoch": 0.8116631954775364, "grad_norm": 0.9358561092019984, "learning_rate": 9.01348849217018e-07, "loss": 0.3889, "step": 12958 }, { "epoch": 0.8117258334768788, "grad_norm": 0.8533177934823342, "learning_rate": 9.007679369268341e-07, "loss": 0.3992, "step": 12959 }, { "epoch": 0.811788471476221, "grad_norm": 0.9164299933438841, "learning_rate": 9.001871933618045e-07, "loss": 0.4268, "step": 12960 }, { "epoch": 0.8118511094755634, "grad_norm": 0.8370472417372697, "learning_rate": 8.996066185458302e-07, "loss": 0.3809, "step": 12961 }, { "epoch": 0.8119137474749056, "grad_norm": 0.9142579451477072, "learning_rate": 8.990262125028076e-07, "loss": 0.4319, "step": 12962 }, { "epoch": 0.811976385474248, "grad_norm": 0.8618813783112794, "learning_rate": 8.984459752566288e-07, "loss": 0.3616, "step": 12963 }, { "epoch": 0.8120390234735902, "grad_norm": 0.886918562005584, "learning_rate": 8.978659068311746e-07, "loss": 0.3573, "step": 12964 }, { "epoch": 0.8121016614729325, "grad_norm": 0.8048287911430033, "learning_rate": 8.972860072503209e-07, "loss": 0.4161, "step": 12965 }, { "epoch": 0.8121642994722749, "grad_norm": 0.7943374825429685, "learning_rate": 8.96706276537937e-07, "loss": 0.3594, "step": 12966 }, { "epoch": 0.8122269374716171, "grad_norm": 0.8176823820249098, "learning_rate": 8.961267147178843e-07, "loss": 0.3721, "step": 12967 }, { "epoch": 0.8122895754709595, "grad_norm": 0.8131346708168399, "learning_rate": 8.955473218140165e-07, "loss": 0.4034, "step": 12968 }, { "epoch": 0.8123522134703017, "grad_norm": 0.871678366545842, "learning_rate": 8.94968097850184e-07, "loss": 0.4058, "step": 12969 }, { "epoch": 0.8124148514696441, "grad_norm": 0.9046927122286955, "learning_rate": 8.943890428502266e-07, "loss": 0.3839, "step": 12970 }, { "epoch": 0.8124774894689863, "grad_norm": 0.8482382798036103, "learning_rate": 8.93810156837977e-07, "loss": 0.4179, "step": 12971 }, { "epoch": 0.8125401274683287, "grad_norm": 0.8620510895178052, "learning_rate": 8.932314398372643e-07, "loss": 0.4045, "step": 12972 }, { "epoch": 0.812602765467671, "grad_norm": 0.8804157947360456, "learning_rate": 8.926528918719086e-07, "loss": 0.3987, "step": 12973 }, { "epoch": 0.8126654034670132, "grad_norm": 0.8524085990577561, "learning_rate": 8.920745129657215e-07, "loss": 0.3791, "step": 12974 }, { "epoch": 0.8127280414663556, "grad_norm": 0.8369324178194784, "learning_rate": 8.9149630314251e-07, "loss": 0.398, "step": 12975 }, { "epoch": 0.8127906794656978, "grad_norm": 0.8256010156346785, "learning_rate": 8.909182624260731e-07, "loss": 0.3688, "step": 12976 }, { "epoch": 0.8128533174650402, "grad_norm": 0.8690968421085195, "learning_rate": 8.903403908402025e-07, "loss": 0.3594, "step": 12977 }, { "epoch": 0.8129159554643824, "grad_norm": 0.9349021316275747, "learning_rate": 8.897626884086851e-07, "loss": 0.4341, "step": 12978 }, { "epoch": 0.8129785934637248, "grad_norm": 0.8687867001559869, "learning_rate": 8.891851551552988e-07, "loss": 0.3937, "step": 12979 }, { "epoch": 0.8130412314630671, "grad_norm": 0.8369727738710956, "learning_rate": 8.886077911038127e-07, "loss": 0.3829, "step": 12980 }, { "epoch": 0.8131038694624094, "grad_norm": 0.8685859503886456, "learning_rate": 8.880305962779944e-07, "loss": 0.4086, "step": 12981 }, { "epoch": 0.8131665074617517, "grad_norm": 0.9022687103660793, "learning_rate": 8.874535707016003e-07, "loss": 0.3903, "step": 12982 }, { "epoch": 0.8132291454610939, "grad_norm": 0.8366285791546489, "learning_rate": 8.868767143983803e-07, "loss": 0.3996, "step": 12983 }, { "epoch": 0.8132917834604363, "grad_norm": 0.8462312130928076, "learning_rate": 8.86300027392078e-07, "loss": 0.366, "step": 12984 }, { "epoch": 0.8133544214597785, "grad_norm": 0.9055323710076557, "learning_rate": 8.857235097064304e-07, "loss": 0.3837, "step": 12985 }, { "epoch": 0.8134170594591209, "grad_norm": 0.7939101884482986, "learning_rate": 8.851471613651652e-07, "loss": 0.3625, "step": 12986 }, { "epoch": 0.8134796974584632, "grad_norm": 0.8378535101090849, "learning_rate": 8.845709823920079e-07, "loss": 0.3532, "step": 12987 }, { "epoch": 0.8135423354578055, "grad_norm": 0.829625878647554, "learning_rate": 8.839949728106717e-07, "loss": 0.3998, "step": 12988 }, { "epoch": 0.8136049734571478, "grad_norm": 0.9242495314117991, "learning_rate": 8.834191326448677e-07, "loss": 0.4236, "step": 12989 }, { "epoch": 0.81366761145649, "grad_norm": 0.8896964905095741, "learning_rate": 8.828434619182963e-07, "loss": 0.4195, "step": 12990 }, { "epoch": 0.8137302494558324, "grad_norm": 0.9071299038983545, "learning_rate": 8.822679606546524e-07, "loss": 0.3869, "step": 12991 }, { "epoch": 0.8137928874551746, "grad_norm": 0.8534689194666545, "learning_rate": 8.816926288776228e-07, "loss": 0.3862, "step": 12992 }, { "epoch": 0.813855525454517, "grad_norm": 0.8744524604632617, "learning_rate": 8.811174666108895e-07, "loss": 0.3654, "step": 12993 }, { "epoch": 0.8139181634538593, "grad_norm": 0.8132148686338835, "learning_rate": 8.805424738781259e-07, "loss": 0.3802, "step": 12994 }, { "epoch": 0.8139808014532016, "grad_norm": 0.8280293985357411, "learning_rate": 8.799676507029975e-07, "loss": 0.3927, "step": 12995 }, { "epoch": 0.8140434394525439, "grad_norm": 0.8653953009115111, "learning_rate": 8.793929971091664e-07, "loss": 0.3805, "step": 12996 }, { "epoch": 0.8141060774518862, "grad_norm": 0.8836067704471523, "learning_rate": 8.788185131202826e-07, "loss": 0.421, "step": 12997 }, { "epoch": 0.8141687154512285, "grad_norm": 0.8302525894708621, "learning_rate": 8.782441987599955e-07, "loss": 0.3583, "step": 12998 }, { "epoch": 0.8142313534505707, "grad_norm": 0.8051489303066962, "learning_rate": 8.776700540519423e-07, "loss": 0.3701, "step": 12999 }, { "epoch": 0.8142939914499131, "grad_norm": 0.8529031124168767, "learning_rate": 8.770960790197547e-07, "loss": 0.3468, "step": 13000 }, { "epoch": 0.8143566294492554, "grad_norm": 0.8812199467817505, "learning_rate": 8.765222736870576e-07, "loss": 0.3723, "step": 13001 }, { "epoch": 0.8144192674485977, "grad_norm": 0.8707640236921274, "learning_rate": 8.75948638077469e-07, "loss": 0.399, "step": 13002 }, { "epoch": 0.81448190544794, "grad_norm": 0.8002282972107165, "learning_rate": 8.753751722146003e-07, "loss": 0.3572, "step": 13003 }, { "epoch": 0.8145445434472823, "grad_norm": 0.8681858876936007, "learning_rate": 8.748018761220533e-07, "loss": 0.4231, "step": 13004 }, { "epoch": 0.8146071814466246, "grad_norm": 0.859180422076744, "learning_rate": 8.742287498234281e-07, "loss": 0.3782, "step": 13005 }, { "epoch": 0.814669819445967, "grad_norm": 0.8075166290307283, "learning_rate": 8.736557933423123e-07, "loss": 0.3386, "step": 13006 }, { "epoch": 0.8147324574453092, "grad_norm": 0.8339716696746414, "learning_rate": 8.73083006702291e-07, "loss": 0.386, "step": 13007 }, { "epoch": 0.8147950954446515, "grad_norm": 0.8781486209708311, "learning_rate": 8.725103899269394e-07, "loss": 0.409, "step": 13008 }, { "epoch": 0.8148577334439938, "grad_norm": 0.9607109205638528, "learning_rate": 8.719379430398262e-07, "loss": 0.4147, "step": 13009 }, { "epoch": 0.8149203714433361, "grad_norm": 0.909489882973935, "learning_rate": 8.713656660645136e-07, "loss": 0.3803, "step": 13010 }, { "epoch": 0.8149830094426784, "grad_norm": 0.9204919274251563, "learning_rate": 8.707935590245565e-07, "loss": 0.3785, "step": 13011 }, { "epoch": 0.8150456474420207, "grad_norm": 0.8719444439198919, "learning_rate": 8.702216219435022e-07, "loss": 0.3936, "step": 13012 }, { "epoch": 0.815108285441363, "grad_norm": 0.8484879036203571, "learning_rate": 8.696498548448923e-07, "loss": 0.3808, "step": 13013 }, { "epoch": 0.8151709234407053, "grad_norm": 0.9584494769010727, "learning_rate": 8.6907825775226e-07, "loss": 0.3831, "step": 13014 }, { "epoch": 0.8152335614400475, "grad_norm": 0.9174475969180309, "learning_rate": 8.685068306891353e-07, "loss": 0.3691, "step": 13015 }, { "epoch": 0.8152961994393899, "grad_norm": 0.8509452532348487, "learning_rate": 8.679355736790357e-07, "loss": 0.3896, "step": 13016 }, { "epoch": 0.8153588374387322, "grad_norm": 0.8253547032205383, "learning_rate": 8.673644867454756e-07, "loss": 0.3927, "step": 13017 }, { "epoch": 0.8154214754380745, "grad_norm": 0.8475849366085396, "learning_rate": 8.667935699119595e-07, "loss": 0.3966, "step": 13018 }, { "epoch": 0.8154841134374168, "grad_norm": 0.8593024455172785, "learning_rate": 8.662228232019876e-07, "loss": 0.3948, "step": 13019 }, { "epoch": 0.8155467514367591, "grad_norm": 0.8142144015353295, "learning_rate": 8.656522466390515e-07, "loss": 0.323, "step": 13020 }, { "epoch": 0.8156093894361014, "grad_norm": 0.8953642284635266, "learning_rate": 8.650818402466354e-07, "loss": 0.3836, "step": 13021 }, { "epoch": 0.8156720274354438, "grad_norm": 0.9342555388192858, "learning_rate": 8.645116040482187e-07, "loss": 0.4308, "step": 13022 }, { "epoch": 0.815734665434786, "grad_norm": 0.8769833725961786, "learning_rate": 8.639415380672717e-07, "loss": 0.3887, "step": 13023 }, { "epoch": 0.8157973034341283, "grad_norm": 0.8669357214936964, "learning_rate": 8.633716423272592e-07, "loss": 0.3842, "step": 13024 }, { "epoch": 0.8158599414334706, "grad_norm": 0.8726161005343261, "learning_rate": 8.628019168516377e-07, "loss": 0.4271, "step": 13025 }, { "epoch": 0.8159225794328129, "grad_norm": 0.8447672495214898, "learning_rate": 8.622323616638573e-07, "loss": 0.4032, "step": 13026 }, { "epoch": 0.8159852174321552, "grad_norm": 0.8844973686622366, "learning_rate": 8.616629767873614e-07, "loss": 0.4185, "step": 13027 }, { "epoch": 0.8160478554314975, "grad_norm": 0.8473088285872525, "learning_rate": 8.610937622455851e-07, "loss": 0.3693, "step": 13028 }, { "epoch": 0.8161104934308399, "grad_norm": 0.8278814662939127, "learning_rate": 8.605247180619574e-07, "loss": 0.3814, "step": 13029 }, { "epoch": 0.8161731314301821, "grad_norm": 0.8116829504077905, "learning_rate": 8.599558442598998e-07, "loss": 0.3234, "step": 13030 }, { "epoch": 0.8162357694295245, "grad_norm": 0.9129802938675701, "learning_rate": 8.593871408628291e-07, "loss": 0.3843, "step": 13031 }, { "epoch": 0.8162984074288667, "grad_norm": 0.8544176450289587, "learning_rate": 8.588186078941513e-07, "loss": 0.3745, "step": 13032 }, { "epoch": 0.816361045428209, "grad_norm": 0.8138029251783616, "learning_rate": 8.582502453772696e-07, "loss": 0.3919, "step": 13033 }, { "epoch": 0.8164236834275513, "grad_norm": 0.8781899454611052, "learning_rate": 8.576820533355762e-07, "loss": 0.4149, "step": 13034 }, { "epoch": 0.8164863214268936, "grad_norm": 0.8725069465021005, "learning_rate": 8.571140317924581e-07, "loss": 0.4371, "step": 13035 }, { "epoch": 0.816548959426236, "grad_norm": 0.6895969097562928, "learning_rate": 8.56546180771296e-07, "loss": 0.4647, "step": 13036 }, { "epoch": 0.8166115974255782, "grad_norm": 0.8791878132397207, "learning_rate": 8.559785002954618e-07, "loss": 0.3939, "step": 13037 }, { "epoch": 0.8166742354249206, "grad_norm": 0.8221224220044754, "learning_rate": 8.554109903883218e-07, "loss": 0.3808, "step": 13038 }, { "epoch": 0.8167368734242628, "grad_norm": 0.7752315434431156, "learning_rate": 8.548436510732333e-07, "loss": 0.3698, "step": 13039 }, { "epoch": 0.8167995114236052, "grad_norm": 0.8551750474260927, "learning_rate": 8.542764823735506e-07, "loss": 0.3797, "step": 13040 }, { "epoch": 0.8168621494229474, "grad_norm": 0.8630305677354914, "learning_rate": 8.537094843126164e-07, "loss": 0.3602, "step": 13041 }, { "epoch": 0.8169247874222897, "grad_norm": 0.6825962876188603, "learning_rate": 8.531426569137708e-07, "loss": 0.4673, "step": 13042 }, { "epoch": 0.816987425421632, "grad_norm": 0.6471080253358855, "learning_rate": 8.525760002003431e-07, "loss": 0.4578, "step": 13043 }, { "epoch": 0.8170500634209743, "grad_norm": 0.8626598204923709, "learning_rate": 8.520095141956569e-07, "loss": 0.3862, "step": 13044 }, { "epoch": 0.8171127014203167, "grad_norm": 0.8359818157556665, "learning_rate": 8.514431989230288e-07, "loss": 0.3765, "step": 13045 }, { "epoch": 0.8171753394196589, "grad_norm": 0.8718828496370461, "learning_rate": 8.508770544057688e-07, "loss": 0.3867, "step": 13046 }, { "epoch": 0.8172379774190013, "grad_norm": 0.8368725586075016, "learning_rate": 8.503110806671783e-07, "loss": 0.3543, "step": 13047 }, { "epoch": 0.8173006154183435, "grad_norm": 0.8607228984242866, "learning_rate": 8.497452777305548e-07, "loss": 0.3772, "step": 13048 }, { "epoch": 0.8173632534176858, "grad_norm": 0.8702121397624598, "learning_rate": 8.491796456191864e-07, "loss": 0.3864, "step": 13049 }, { "epoch": 0.8174258914170281, "grad_norm": 0.9741379598000708, "learning_rate": 8.486141843563539e-07, "loss": 0.4123, "step": 13050 }, { "epoch": 0.8174885294163704, "grad_norm": 0.8736546144219929, "learning_rate": 8.480488939653315e-07, "loss": 0.4249, "step": 13051 }, { "epoch": 0.8175511674157128, "grad_norm": 0.8685691778423474, "learning_rate": 8.474837744693876e-07, "loss": 0.3806, "step": 13052 }, { "epoch": 0.817613805415055, "grad_norm": 0.8481046022557316, "learning_rate": 8.469188258917826e-07, "loss": 0.3839, "step": 13053 }, { "epoch": 0.8176764434143974, "grad_norm": 0.8330477268709156, "learning_rate": 8.463540482557697e-07, "loss": 0.4029, "step": 13054 }, { "epoch": 0.8177390814137396, "grad_norm": 0.941296090713639, "learning_rate": 8.457894415845952e-07, "loss": 0.3828, "step": 13055 }, { "epoch": 0.817801719413082, "grad_norm": 0.8092580384735099, "learning_rate": 8.452250059014971e-07, "loss": 0.3809, "step": 13056 }, { "epoch": 0.8178643574124242, "grad_norm": 0.7748784072113423, "learning_rate": 8.446607412297098e-07, "loss": 0.359, "step": 13057 }, { "epoch": 0.8179269954117665, "grad_norm": 0.9387474317607185, "learning_rate": 8.440966475924584e-07, "loss": 0.4039, "step": 13058 }, { "epoch": 0.8179896334111089, "grad_norm": 0.8927948494553825, "learning_rate": 8.435327250129594e-07, "loss": 0.3853, "step": 13059 }, { "epoch": 0.8180522714104511, "grad_norm": 0.8469859926860324, "learning_rate": 8.429689735144242e-07, "loss": 0.3666, "step": 13060 }, { "epoch": 0.8181149094097935, "grad_norm": 0.8534501882548563, "learning_rate": 8.424053931200587e-07, "loss": 0.366, "step": 13061 }, { "epoch": 0.8181775474091357, "grad_norm": 0.8785465136345493, "learning_rate": 8.418419838530594e-07, "loss": 0.3961, "step": 13062 }, { "epoch": 0.8182401854084781, "grad_norm": 0.9285946598123678, "learning_rate": 8.412787457366156e-07, "loss": 0.3965, "step": 13063 }, { "epoch": 0.8183028234078203, "grad_norm": 0.8513372287710865, "learning_rate": 8.407156787939103e-07, "loss": 0.3964, "step": 13064 }, { "epoch": 0.8183654614071627, "grad_norm": 0.836086568922767, "learning_rate": 8.401527830481182e-07, "loss": 0.3809, "step": 13065 }, { "epoch": 0.818428099406505, "grad_norm": 0.860784805732971, "learning_rate": 8.395900585224115e-07, "loss": 0.366, "step": 13066 }, { "epoch": 0.8184907374058472, "grad_norm": 0.7761201825960998, "learning_rate": 8.390275052399499e-07, "loss": 0.3445, "step": 13067 }, { "epoch": 0.8185533754051896, "grad_norm": 0.8631881607369682, "learning_rate": 8.384651232238883e-07, "loss": 0.3524, "step": 13068 }, { "epoch": 0.8186160134045318, "grad_norm": 0.7585279087601859, "learning_rate": 8.379029124973737e-07, "loss": 0.3286, "step": 13069 }, { "epoch": 0.8186786514038742, "grad_norm": 0.8246997580057457, "learning_rate": 8.373408730835486e-07, "loss": 0.3853, "step": 13070 }, { "epoch": 0.8187412894032164, "grad_norm": 0.8616695512410256, "learning_rate": 8.367790050055463e-07, "loss": 0.3525, "step": 13071 }, { "epoch": 0.8188039274025588, "grad_norm": 0.892024220731985, "learning_rate": 8.362173082864927e-07, "loss": 0.4017, "step": 13072 }, { "epoch": 0.8188665654019011, "grad_norm": 0.8762184839644747, "learning_rate": 8.35655782949506e-07, "loss": 0.4041, "step": 13073 }, { "epoch": 0.8189292034012433, "grad_norm": 0.9387349867677839, "learning_rate": 8.350944290177021e-07, "loss": 0.3842, "step": 13074 }, { "epoch": 0.8189918414005857, "grad_norm": 0.8105985634419185, "learning_rate": 8.345332465141836e-07, "loss": 0.3087, "step": 13075 }, { "epoch": 0.8190544793999279, "grad_norm": 0.9043141586795492, "learning_rate": 8.339722354620505e-07, "loss": 0.415, "step": 13076 }, { "epoch": 0.8191171173992703, "grad_norm": 0.8352443954471811, "learning_rate": 8.334113958843936e-07, "loss": 0.3592, "step": 13077 }, { "epoch": 0.8191797553986125, "grad_norm": 0.8501396926386581, "learning_rate": 8.328507278042953e-07, "loss": 0.4051, "step": 13078 }, { "epoch": 0.8192423933979549, "grad_norm": 0.8517194053105127, "learning_rate": 8.322902312448361e-07, "loss": 0.3831, "step": 13079 }, { "epoch": 0.8193050313972972, "grad_norm": 0.8775932993479559, "learning_rate": 8.317299062290841e-07, "loss": 0.3924, "step": 13080 }, { "epoch": 0.8193676693966395, "grad_norm": 0.8745244536393091, "learning_rate": 8.311697527801032e-07, "loss": 0.3639, "step": 13081 }, { "epoch": 0.8194303073959818, "grad_norm": 0.9448847126435325, "learning_rate": 8.306097709209476e-07, "loss": 0.391, "step": 13082 }, { "epoch": 0.819492945395324, "grad_norm": 0.7801958126407247, "learning_rate": 8.300499606746692e-07, "loss": 0.3777, "step": 13083 }, { "epoch": 0.8195555833946664, "grad_norm": 0.8732064577806804, "learning_rate": 8.29490322064308e-07, "loss": 0.3843, "step": 13084 }, { "epoch": 0.8196182213940086, "grad_norm": 0.8835495990390112, "learning_rate": 8.289308551128994e-07, "loss": 0.3735, "step": 13085 }, { "epoch": 0.819680859393351, "grad_norm": 0.8324790307457738, "learning_rate": 8.283715598434711e-07, "loss": 0.3854, "step": 13086 }, { "epoch": 0.8197434973926933, "grad_norm": 0.8619570721005607, "learning_rate": 8.278124362790435e-07, "loss": 0.3665, "step": 13087 }, { "epoch": 0.8198061353920356, "grad_norm": 0.8572878669101907, "learning_rate": 8.272534844426294e-07, "loss": 0.3908, "step": 13088 }, { "epoch": 0.8198687733913779, "grad_norm": 0.9267951352257306, "learning_rate": 8.266947043572376e-07, "loss": 0.3754, "step": 13089 }, { "epoch": 0.8199314113907202, "grad_norm": 0.8450548034860031, "learning_rate": 8.261360960458664e-07, "loss": 0.3927, "step": 13090 }, { "epoch": 0.8199940493900625, "grad_norm": 0.9032227655414511, "learning_rate": 8.255776595315069e-07, "loss": 0.3834, "step": 13091 }, { "epoch": 0.8200566873894047, "grad_norm": 0.8050974352946285, "learning_rate": 8.250193948371466e-07, "loss": 0.3865, "step": 13092 }, { "epoch": 0.8201193253887471, "grad_norm": 0.8002521095460114, "learning_rate": 8.244613019857633e-07, "loss": 0.3524, "step": 13093 }, { "epoch": 0.8201819633880894, "grad_norm": 0.8998709247600494, "learning_rate": 8.239033810003277e-07, "loss": 0.4168, "step": 13094 }, { "epoch": 0.8202446013874317, "grad_norm": 0.8991488241024856, "learning_rate": 8.233456319038041e-07, "loss": 0.4156, "step": 13095 }, { "epoch": 0.820307239386774, "grad_norm": 0.87922315830797, "learning_rate": 8.227880547191497e-07, "loss": 0.3858, "step": 13096 }, { "epoch": 0.8203698773861163, "grad_norm": 0.8319372529344741, "learning_rate": 8.222306494693133e-07, "loss": 0.3724, "step": 13097 }, { "epoch": 0.8204325153854586, "grad_norm": 0.9950605073822612, "learning_rate": 8.216734161772393e-07, "loss": 0.3935, "step": 13098 }, { "epoch": 0.8204951533848008, "grad_norm": 0.8920368067795043, "learning_rate": 8.211163548658624e-07, "loss": 0.4141, "step": 13099 }, { "epoch": 0.8205577913841432, "grad_norm": 0.882866160259302, "learning_rate": 8.205594655581128e-07, "loss": 0.3976, "step": 13100 }, { "epoch": 0.8206204293834855, "grad_norm": 0.8221515622031429, "learning_rate": 8.200027482769113e-07, "loss": 0.4062, "step": 13101 }, { "epoch": 0.8206830673828278, "grad_norm": 0.7777902244721443, "learning_rate": 8.194462030451727e-07, "loss": 0.3491, "step": 13102 }, { "epoch": 0.8207457053821701, "grad_norm": 0.8405730944075652, "learning_rate": 8.188898298858045e-07, "loss": 0.3734, "step": 13103 }, { "epoch": 0.8208083433815124, "grad_norm": 0.9258188396999897, "learning_rate": 8.18333628821707e-07, "loss": 0.4358, "step": 13104 }, { "epoch": 0.8208709813808547, "grad_norm": 0.6899050317564238, "learning_rate": 8.177775998757731e-07, "loss": 0.4108, "step": 13105 }, { "epoch": 0.820933619380197, "grad_norm": 0.8965673109588665, "learning_rate": 8.172217430708884e-07, "loss": 0.3873, "step": 13106 }, { "epoch": 0.8209962573795393, "grad_norm": 0.8769454127754752, "learning_rate": 8.166660584299341e-07, "loss": 0.3739, "step": 13107 }, { "epoch": 0.8210588953788815, "grad_norm": 0.9268388789878439, "learning_rate": 8.161105459757806e-07, "loss": 0.423, "step": 13108 }, { "epoch": 0.8211215333782239, "grad_norm": 0.9237364096771132, "learning_rate": 8.155552057312944e-07, "loss": 0.4114, "step": 13109 }, { "epoch": 0.8211841713775662, "grad_norm": 0.9103525515739535, "learning_rate": 8.150000377193329e-07, "loss": 0.3539, "step": 13110 }, { "epoch": 0.8212468093769085, "grad_norm": 0.6825789826274761, "learning_rate": 8.144450419627464e-07, "loss": 0.465, "step": 13111 }, { "epoch": 0.8213094473762508, "grad_norm": 0.8914062900169679, "learning_rate": 8.138902184843789e-07, "loss": 0.4488, "step": 13112 }, { "epoch": 0.8213720853755931, "grad_norm": 0.8431198425919294, "learning_rate": 8.13335567307067e-07, "loss": 0.3739, "step": 13113 }, { "epoch": 0.8214347233749354, "grad_norm": 0.8997368229848637, "learning_rate": 8.127810884536402e-07, "loss": 0.3804, "step": 13114 }, { "epoch": 0.8214973613742778, "grad_norm": 0.8950402072620695, "learning_rate": 8.122267819469198e-07, "loss": 0.3675, "step": 13115 }, { "epoch": 0.82155999937362, "grad_norm": 0.8378369095912671, "learning_rate": 8.116726478097237e-07, "loss": 0.3977, "step": 13116 }, { "epoch": 0.8216226373729623, "grad_norm": 0.8746848961521297, "learning_rate": 8.111186860648573e-07, "loss": 0.3914, "step": 13117 }, { "epoch": 0.8216852753723046, "grad_norm": 0.932568195564416, "learning_rate": 8.105648967351243e-07, "loss": 0.3883, "step": 13118 }, { "epoch": 0.8217479133716469, "grad_norm": 0.8716784104243331, "learning_rate": 8.10011279843318e-07, "loss": 0.4063, "step": 13119 }, { "epoch": 0.8218105513709892, "grad_norm": 0.6597891452564006, "learning_rate": 8.094578354122251e-07, "loss": 0.445, "step": 13120 }, { "epoch": 0.8218731893703315, "grad_norm": 0.8469447502588512, "learning_rate": 8.089045634646259e-07, "loss": 0.3891, "step": 13121 }, { "epoch": 0.8219358273696739, "grad_norm": 0.8577807419785699, "learning_rate": 8.083514640232925e-07, "loss": 0.3977, "step": 13122 }, { "epoch": 0.8219984653690161, "grad_norm": 0.851359595290133, "learning_rate": 8.077985371109909e-07, "loss": 0.3818, "step": 13123 }, { "epoch": 0.8220611033683585, "grad_norm": 0.8369662160427659, "learning_rate": 8.072457827504782e-07, "loss": 0.3634, "step": 13124 }, { "epoch": 0.8221237413677007, "grad_norm": 0.9516127528790349, "learning_rate": 8.066932009645074e-07, "loss": 0.3625, "step": 13125 }, { "epoch": 0.822186379367043, "grad_norm": 0.9058016238958901, "learning_rate": 8.06140791775824e-07, "loss": 0.4261, "step": 13126 }, { "epoch": 0.8222490173663853, "grad_norm": 0.9485916218880598, "learning_rate": 8.055885552071641e-07, "loss": 0.3872, "step": 13127 }, { "epoch": 0.8223116553657276, "grad_norm": 0.8431792083167041, "learning_rate": 8.050364912812575e-07, "loss": 0.3974, "step": 13128 }, { "epoch": 0.82237429336507, "grad_norm": 0.8604738623304615, "learning_rate": 8.044846000208278e-07, "loss": 0.391, "step": 13129 }, { "epoch": 0.8224369313644122, "grad_norm": 0.9313073623202568, "learning_rate": 8.039328814485909e-07, "loss": 0.3943, "step": 13130 }, { "epoch": 0.8224995693637546, "grad_norm": 0.8681803076568452, "learning_rate": 8.03381335587255e-07, "loss": 0.3647, "step": 13131 }, { "epoch": 0.8225622073630968, "grad_norm": 0.88235744307142, "learning_rate": 8.028299624595209e-07, "loss": 0.3793, "step": 13132 }, { "epoch": 0.8226248453624391, "grad_norm": 0.8381350321110403, "learning_rate": 8.022787620880862e-07, "loss": 0.3845, "step": 13133 }, { "epoch": 0.8226874833617814, "grad_norm": 0.9528040594647559, "learning_rate": 8.017277344956353e-07, "loss": 0.414, "step": 13134 }, { "epoch": 0.8227501213611237, "grad_norm": 0.8032097423339682, "learning_rate": 8.011768797048514e-07, "loss": 0.383, "step": 13135 }, { "epoch": 0.822812759360466, "grad_norm": 0.936236713476028, "learning_rate": 8.006261977384066e-07, "loss": 0.4298, "step": 13136 }, { "epoch": 0.8228753973598083, "grad_norm": 0.8158751899364992, "learning_rate": 8.00075688618967e-07, "loss": 0.393, "step": 13137 }, { "epoch": 0.8229380353591507, "grad_norm": 0.8498957959911019, "learning_rate": 7.995253523691915e-07, "loss": 0.4173, "step": 13138 }, { "epoch": 0.8230006733584929, "grad_norm": 0.7990877695888677, "learning_rate": 7.98975189011732e-07, "loss": 0.373, "step": 13139 }, { "epoch": 0.8230633113578353, "grad_norm": 0.839319123263176, "learning_rate": 7.984251985692338e-07, "loss": 0.3811, "step": 13140 }, { "epoch": 0.8231259493571775, "grad_norm": 0.8388042676561289, "learning_rate": 7.978753810643324e-07, "loss": 0.3581, "step": 13141 }, { "epoch": 0.8231885873565198, "grad_norm": 0.8178405690427871, "learning_rate": 7.973257365196624e-07, "loss": 0.3684, "step": 13142 }, { "epoch": 0.8232512253558621, "grad_norm": 0.6008436620959752, "learning_rate": 7.967762649578431e-07, "loss": 0.4606, "step": 13143 }, { "epoch": 0.8233138633552044, "grad_norm": 0.883367704503284, "learning_rate": 7.962269664014943e-07, "loss": 0.4216, "step": 13144 }, { "epoch": 0.8233765013545468, "grad_norm": 0.8433278827473459, "learning_rate": 7.956778408732241e-07, "loss": 0.4281, "step": 13145 }, { "epoch": 0.823439139353889, "grad_norm": 0.8185004593870768, "learning_rate": 7.951288883956343e-07, "loss": 0.3251, "step": 13146 }, { "epoch": 0.8235017773532314, "grad_norm": 0.8687165649548859, "learning_rate": 7.945801089913196e-07, "loss": 0.3591, "step": 13147 }, { "epoch": 0.8235644153525736, "grad_norm": 0.8471989818323972, "learning_rate": 7.940315026828688e-07, "loss": 0.3592, "step": 13148 }, { "epoch": 0.823627053351916, "grad_norm": 0.8148501051170058, "learning_rate": 7.934830694928614e-07, "loss": 0.3972, "step": 13149 }, { "epoch": 0.8236896913512582, "grad_norm": 0.8448270608968893, "learning_rate": 7.929348094438705e-07, "loss": 0.3823, "step": 13150 }, { "epoch": 0.8237523293506005, "grad_norm": 0.826289132195028, "learning_rate": 7.923867225584653e-07, "loss": 0.3383, "step": 13151 }, { "epoch": 0.8238149673499429, "grad_norm": 0.8880542565405274, "learning_rate": 7.918388088592022e-07, "loss": 0.3745, "step": 13152 }, { "epoch": 0.8238776053492851, "grad_norm": 0.8328302664288244, "learning_rate": 7.912910683686359e-07, "loss": 0.3226, "step": 13153 }, { "epoch": 0.8239402433486275, "grad_norm": 0.8620386355582756, "learning_rate": 7.907435011093101e-07, "loss": 0.3726, "step": 13154 }, { "epoch": 0.8240028813479697, "grad_norm": 0.8519944767881104, "learning_rate": 7.901961071037634e-07, "loss": 0.3545, "step": 13155 }, { "epoch": 0.8240655193473121, "grad_norm": 0.8619833448771369, "learning_rate": 7.896488863745261e-07, "loss": 0.3849, "step": 13156 }, { "epoch": 0.8241281573466543, "grad_norm": 0.8145696141604697, "learning_rate": 7.891018389441224e-07, "loss": 0.3738, "step": 13157 }, { "epoch": 0.8241907953459966, "grad_norm": 0.664891925344058, "learning_rate": 7.885549648350671e-07, "loss": 0.4309, "step": 13158 }, { "epoch": 0.824253433345339, "grad_norm": 0.8553198487108292, "learning_rate": 7.880082640698727e-07, "loss": 0.3791, "step": 13159 }, { "epoch": 0.8243160713446812, "grad_norm": 0.8585940006994185, "learning_rate": 7.874617366710396e-07, "loss": 0.3697, "step": 13160 }, { "epoch": 0.8243787093440236, "grad_norm": 0.8180329040630634, "learning_rate": 7.869153826610626e-07, "loss": 0.3803, "step": 13161 }, { "epoch": 0.8244413473433658, "grad_norm": 0.8445235036235914, "learning_rate": 7.863692020624314e-07, "loss": 0.3658, "step": 13162 }, { "epoch": 0.8245039853427082, "grad_norm": 0.8704978176682914, "learning_rate": 7.858231948976253e-07, "loss": 0.4284, "step": 13163 }, { "epoch": 0.8245666233420504, "grad_norm": 0.8976888620528521, "learning_rate": 7.852773611891195e-07, "loss": 0.3972, "step": 13164 }, { "epoch": 0.8246292613413928, "grad_norm": 0.8420933143750846, "learning_rate": 7.847317009593797e-07, "loss": 0.3698, "step": 13165 }, { "epoch": 0.8246918993407351, "grad_norm": 0.8323338141721551, "learning_rate": 7.841862142308649e-07, "loss": 0.4169, "step": 13166 }, { "epoch": 0.8247545373400773, "grad_norm": 0.8511665463092252, "learning_rate": 7.83640901026027e-07, "loss": 0.3641, "step": 13167 }, { "epoch": 0.8248171753394197, "grad_norm": 0.7992765163325808, "learning_rate": 7.83095761367314e-07, "loss": 0.3398, "step": 13168 }, { "epoch": 0.8248798133387619, "grad_norm": 0.8382096250779467, "learning_rate": 7.825507952771616e-07, "loss": 0.3594, "step": 13169 }, { "epoch": 0.8249424513381043, "grad_norm": 0.8472874633854303, "learning_rate": 7.820060027780002e-07, "loss": 0.3778, "step": 13170 }, { "epoch": 0.8250050893374465, "grad_norm": 0.7999275202145562, "learning_rate": 7.814613838922558e-07, "loss": 0.356, "step": 13171 }, { "epoch": 0.8250677273367889, "grad_norm": 0.806795477241581, "learning_rate": 7.809169386423438e-07, "loss": 0.3668, "step": 13172 }, { "epoch": 0.8251303653361312, "grad_norm": 0.6367946291426326, "learning_rate": 7.80372667050674e-07, "loss": 0.4516, "step": 13173 }, { "epoch": 0.8251930033354735, "grad_norm": 0.862820268196397, "learning_rate": 7.798285691396479e-07, "loss": 0.4078, "step": 13174 }, { "epoch": 0.8252556413348158, "grad_norm": 0.7982510496828896, "learning_rate": 7.792846449316616e-07, "loss": 0.3377, "step": 13175 }, { "epoch": 0.825318279334158, "grad_norm": 0.7768398048558595, "learning_rate": 7.787408944491015e-07, "loss": 0.3432, "step": 13176 }, { "epoch": 0.8253809173335004, "grad_norm": 0.8389682867343532, "learning_rate": 7.781973177143509e-07, "loss": 0.3698, "step": 13177 }, { "epoch": 0.8254435553328426, "grad_norm": 0.8759669279566097, "learning_rate": 7.776539147497819e-07, "loss": 0.3979, "step": 13178 }, { "epoch": 0.825506193332185, "grad_norm": 0.8842873634214855, "learning_rate": 7.771106855777622e-07, "loss": 0.4037, "step": 13179 }, { "epoch": 0.8255688313315273, "grad_norm": 0.9415770871728483, "learning_rate": 7.765676302206487e-07, "loss": 0.4212, "step": 13180 }, { "epoch": 0.8256314693308696, "grad_norm": 0.879031288427798, "learning_rate": 7.760247487007966e-07, "loss": 0.415, "step": 13181 }, { "epoch": 0.8256941073302119, "grad_norm": 0.8206321756542123, "learning_rate": 7.7548204104055e-07, "loss": 0.3881, "step": 13182 }, { "epoch": 0.8257567453295541, "grad_norm": 0.6218567897908684, "learning_rate": 7.749395072622473e-07, "loss": 0.4541, "step": 13183 }, { "epoch": 0.8258193833288965, "grad_norm": 0.8274335813678547, "learning_rate": 7.743971473882167e-07, "loss": 0.3938, "step": 13184 }, { "epoch": 0.8258820213282387, "grad_norm": 0.9147732804462686, "learning_rate": 7.738549614407854e-07, "loss": 0.4008, "step": 13185 }, { "epoch": 0.8259446593275811, "grad_norm": 0.8068885048117015, "learning_rate": 7.733129494422681e-07, "loss": 0.3613, "step": 13186 }, { "epoch": 0.8260072973269234, "grad_norm": 0.815596144631582, "learning_rate": 7.727711114149744e-07, "loss": 0.3989, "step": 13187 }, { "epoch": 0.8260699353262657, "grad_norm": 0.8379221322810751, "learning_rate": 7.722294473812064e-07, "loss": 0.3647, "step": 13188 }, { "epoch": 0.826132573325608, "grad_norm": 0.876074519774937, "learning_rate": 7.716879573632574e-07, "loss": 0.4358, "step": 13189 }, { "epoch": 0.8261952113249503, "grad_norm": 0.9485913007185944, "learning_rate": 7.71146641383419e-07, "loss": 0.4451, "step": 13190 }, { "epoch": 0.8262578493242926, "grad_norm": 0.8454628887359736, "learning_rate": 7.706054994639689e-07, "loss": 0.388, "step": 13191 }, { "epoch": 0.8263204873236348, "grad_norm": 0.844603812185292, "learning_rate": 7.700645316271821e-07, "loss": 0.3844, "step": 13192 }, { "epoch": 0.8263831253229772, "grad_norm": 0.8460861193420489, "learning_rate": 7.695237378953224e-07, "loss": 0.3804, "step": 13193 }, { "epoch": 0.8264457633223194, "grad_norm": 0.824921602534395, "learning_rate": 7.689831182906526e-07, "loss": 0.3823, "step": 13194 }, { "epoch": 0.8265084013216618, "grad_norm": 0.8048289666268725, "learning_rate": 7.684426728354227e-07, "loss": 0.3499, "step": 13195 }, { "epoch": 0.8265710393210041, "grad_norm": 0.8443033858114539, "learning_rate": 7.679024015518782e-07, "loss": 0.3998, "step": 13196 }, { "epoch": 0.8266336773203464, "grad_norm": 0.8847341969936166, "learning_rate": 7.673623044622557e-07, "loss": 0.3707, "step": 13197 }, { "epoch": 0.8266963153196887, "grad_norm": 0.8355106234958525, "learning_rate": 7.668223815887859e-07, "loss": 0.361, "step": 13198 }, { "epoch": 0.826758953319031, "grad_norm": 0.8177233358807602, "learning_rate": 7.662826329536932e-07, "loss": 0.3692, "step": 13199 }, { "epoch": 0.8268215913183733, "grad_norm": 0.9051109084391217, "learning_rate": 7.657430585791937e-07, "loss": 0.3644, "step": 13200 }, { "epoch": 0.8268842293177155, "grad_norm": 0.8601182256911316, "learning_rate": 7.652036584874955e-07, "loss": 0.4066, "step": 13201 }, { "epoch": 0.8269468673170579, "grad_norm": 0.8027772901708257, "learning_rate": 7.646644327007996e-07, "loss": 0.397, "step": 13202 }, { "epoch": 0.8270095053164002, "grad_norm": 0.880512156222641, "learning_rate": 7.641253812413024e-07, "loss": 0.3912, "step": 13203 }, { "epoch": 0.8270721433157425, "grad_norm": 0.8805756972507103, "learning_rate": 7.635865041311913e-07, "loss": 0.3687, "step": 13204 }, { "epoch": 0.8271347813150848, "grad_norm": 0.8696713327644341, "learning_rate": 7.630478013926463e-07, "loss": 0.3628, "step": 13205 }, { "epoch": 0.8271974193144271, "grad_norm": 0.9128632509441876, "learning_rate": 7.625092730478395e-07, "loss": 0.4157, "step": 13206 }, { "epoch": 0.8272600573137694, "grad_norm": 0.8438632333421012, "learning_rate": 7.619709191189367e-07, "loss": 0.342, "step": 13207 }, { "epoch": 0.8273226953131116, "grad_norm": 0.8126838871171056, "learning_rate": 7.614327396280985e-07, "loss": 0.3701, "step": 13208 }, { "epoch": 0.827385333312454, "grad_norm": 0.8173501560580816, "learning_rate": 7.60894734597476e-07, "loss": 0.3857, "step": 13209 }, { "epoch": 0.8274479713117963, "grad_norm": 0.8953442793857092, "learning_rate": 7.603569040492115e-07, "loss": 0.358, "step": 13210 }, { "epoch": 0.8275106093111386, "grad_norm": 0.8140999543379897, "learning_rate": 7.598192480054445e-07, "loss": 0.3774, "step": 13211 }, { "epoch": 0.8275732473104809, "grad_norm": 0.8039510727530088, "learning_rate": 7.592817664883051e-07, "loss": 0.3815, "step": 13212 }, { "epoch": 0.8276358853098232, "grad_norm": 0.8412355606585095, "learning_rate": 7.587444595199145e-07, "loss": 0.3918, "step": 13213 }, { "epoch": 0.8276985233091655, "grad_norm": 0.884781098714875, "learning_rate": 7.582073271223894e-07, "loss": 0.4101, "step": 13214 }, { "epoch": 0.8277611613085079, "grad_norm": 0.88734828936073, "learning_rate": 7.576703693178384e-07, "loss": 0.3607, "step": 13215 }, { "epoch": 0.8278237993078501, "grad_norm": 0.8136848849869747, "learning_rate": 7.571335861283607e-07, "loss": 0.3504, "step": 13216 }, { "epoch": 0.8278864373071924, "grad_norm": 0.7790699769368101, "learning_rate": 7.56596977576054e-07, "loss": 0.3707, "step": 13217 }, { "epoch": 0.8279490753065347, "grad_norm": 0.8850769364457022, "learning_rate": 7.560605436830027e-07, "loss": 0.3915, "step": 13218 }, { "epoch": 0.828011713305877, "grad_norm": 0.9162774612411548, "learning_rate": 7.555242844712863e-07, "loss": 0.3966, "step": 13219 }, { "epoch": 0.8280743513052193, "grad_norm": 0.8596282775454123, "learning_rate": 7.549881999629793e-07, "loss": 0.3805, "step": 13220 }, { "epoch": 0.8281369893045616, "grad_norm": 0.8592437041604798, "learning_rate": 7.544522901801455e-07, "loss": 0.3498, "step": 13221 }, { "epoch": 0.828199627303904, "grad_norm": 0.8531870542934844, "learning_rate": 7.539165551448435e-07, "loss": 0.3734, "step": 13222 }, { "epoch": 0.8282622653032462, "grad_norm": 0.8725951813155489, "learning_rate": 7.533809948791248e-07, "loss": 0.3569, "step": 13223 }, { "epoch": 0.8283249033025886, "grad_norm": 0.6126662214299605, "learning_rate": 7.528456094050324e-07, "loss": 0.4486, "step": 13224 }, { "epoch": 0.8283875413019308, "grad_norm": 0.8901972345030736, "learning_rate": 7.523103987446023e-07, "loss": 0.4077, "step": 13225 }, { "epoch": 0.8284501793012731, "grad_norm": 0.9408893576676104, "learning_rate": 7.517753629198643e-07, "loss": 0.4018, "step": 13226 }, { "epoch": 0.8285128173006154, "grad_norm": 0.8334120872237485, "learning_rate": 7.512405019528418e-07, "loss": 0.3622, "step": 13227 }, { "epoch": 0.8285754552999577, "grad_norm": 0.8682237923862406, "learning_rate": 7.507058158655473e-07, "loss": 0.3907, "step": 13228 }, { "epoch": 0.8286380932993, "grad_norm": 0.8426318036294099, "learning_rate": 7.501713046799913e-07, "loss": 0.3065, "step": 13229 }, { "epoch": 0.8287007312986423, "grad_norm": 0.8506202745508297, "learning_rate": 7.496369684181736e-07, "loss": 0.3879, "step": 13230 }, { "epoch": 0.8287633692979847, "grad_norm": 0.8553513039751964, "learning_rate": 7.491028071020872e-07, "loss": 0.3658, "step": 13231 }, { "epoch": 0.8288260072973269, "grad_norm": 0.8626253651035126, "learning_rate": 7.485688207537179e-07, "loss": 0.3735, "step": 13232 }, { "epoch": 0.8288886452966693, "grad_norm": 0.8919008659363387, "learning_rate": 7.480350093950456e-07, "loss": 0.3302, "step": 13233 }, { "epoch": 0.8289512832960115, "grad_norm": 0.9819615370382709, "learning_rate": 7.475013730480407e-07, "loss": 0.3907, "step": 13234 }, { "epoch": 0.8290139212953538, "grad_norm": 0.8750407524621948, "learning_rate": 7.469679117346684e-07, "loss": 0.3614, "step": 13235 }, { "epoch": 0.8290765592946961, "grad_norm": 0.7750421422978954, "learning_rate": 7.464346254768856e-07, "loss": 0.3275, "step": 13236 }, { "epoch": 0.8291391972940384, "grad_norm": 0.6495754404939887, "learning_rate": 7.459015142966442e-07, "loss": 0.473, "step": 13237 }, { "epoch": 0.8292018352933808, "grad_norm": 0.8337157855834809, "learning_rate": 7.453685782158865e-07, "loss": 0.3733, "step": 13238 }, { "epoch": 0.829264473292723, "grad_norm": 0.8330997724867111, "learning_rate": 7.448358172565473e-07, "loss": 0.3625, "step": 13239 }, { "epoch": 0.8293271112920654, "grad_norm": 0.8379852396904802, "learning_rate": 7.443032314405563e-07, "loss": 0.372, "step": 13240 }, { "epoch": 0.8293897492914076, "grad_norm": 0.8536318717395994, "learning_rate": 7.437708207898337e-07, "loss": 0.3869, "step": 13241 }, { "epoch": 0.8294523872907499, "grad_norm": 0.8637310357674601, "learning_rate": 7.432385853262941e-07, "loss": 0.3994, "step": 13242 }, { "epoch": 0.8295150252900922, "grad_norm": 0.8108281762152755, "learning_rate": 7.427065250718435e-07, "loss": 0.3514, "step": 13243 }, { "epoch": 0.8295776632894345, "grad_norm": 0.9363124412411521, "learning_rate": 7.421746400483837e-07, "loss": 0.3881, "step": 13244 }, { "epoch": 0.8296403012887769, "grad_norm": 0.8440469545562534, "learning_rate": 7.41642930277805e-07, "loss": 0.3781, "step": 13245 }, { "epoch": 0.8297029392881191, "grad_norm": 0.9446064759510692, "learning_rate": 7.411113957819943e-07, "loss": 0.4009, "step": 13246 }, { "epoch": 0.8297655772874615, "grad_norm": 0.8352025110765492, "learning_rate": 7.405800365828291e-07, "loss": 0.3861, "step": 13247 }, { "epoch": 0.8298282152868037, "grad_norm": 0.8394929643441329, "learning_rate": 7.400488527021804e-07, "loss": 0.352, "step": 13248 }, { "epoch": 0.8298908532861461, "grad_norm": 0.7967884701903237, "learning_rate": 7.395178441619116e-07, "loss": 0.3219, "step": 13249 }, { "epoch": 0.8299534912854883, "grad_norm": 0.7816225001627685, "learning_rate": 7.389870109838793e-07, "loss": 0.351, "step": 13250 }, { "epoch": 0.8300161292848306, "grad_norm": 0.9122976982523106, "learning_rate": 7.384563531899325e-07, "loss": 0.3891, "step": 13251 }, { "epoch": 0.830078767284173, "grad_norm": 0.8417115899125148, "learning_rate": 7.379258708019116e-07, "loss": 0.3872, "step": 13252 }, { "epoch": 0.8301414052835152, "grad_norm": 0.905047744583743, "learning_rate": 7.373955638416546e-07, "loss": 0.4108, "step": 13253 }, { "epoch": 0.8302040432828576, "grad_norm": 0.8938652907997369, "learning_rate": 7.368654323309859e-07, "loss": 0.4417, "step": 13254 }, { "epoch": 0.8302666812821998, "grad_norm": 0.8405740177766212, "learning_rate": 7.363354762917285e-07, "loss": 0.3719, "step": 13255 }, { "epoch": 0.8303293192815422, "grad_norm": 0.8952436531434531, "learning_rate": 7.358056957456944e-07, "loss": 0.4146, "step": 13256 }, { "epoch": 0.8303919572808844, "grad_norm": 0.8142069948382233, "learning_rate": 7.352760907146889e-07, "loss": 0.3994, "step": 13257 }, { "epoch": 0.8304545952802268, "grad_norm": 0.7926373023722658, "learning_rate": 7.347466612205112e-07, "loss": 0.3706, "step": 13258 }, { "epoch": 0.8305172332795691, "grad_norm": 0.8824087193882049, "learning_rate": 7.342174072849528e-07, "loss": 0.4041, "step": 13259 }, { "epoch": 0.8305798712789113, "grad_norm": 0.8552266974933367, "learning_rate": 7.33688328929798e-07, "loss": 0.3984, "step": 13260 }, { "epoch": 0.8306425092782537, "grad_norm": 0.8412234809241439, "learning_rate": 7.331594261768215e-07, "loss": 0.3934, "step": 13261 }, { "epoch": 0.8307051472775959, "grad_norm": 0.6217991515962579, "learning_rate": 7.326306990477955e-07, "loss": 0.4524, "step": 13262 }, { "epoch": 0.8307677852769383, "grad_norm": 0.8310482555993631, "learning_rate": 7.321021475644835e-07, "loss": 0.4104, "step": 13263 }, { "epoch": 0.8308304232762805, "grad_norm": 0.9625646662162506, "learning_rate": 7.315737717486388e-07, "loss": 0.4423, "step": 13264 }, { "epoch": 0.8308930612756229, "grad_norm": 0.914483735237155, "learning_rate": 7.3104557162201e-07, "loss": 0.3729, "step": 13265 }, { "epoch": 0.8309556992749652, "grad_norm": 0.7991791642826039, "learning_rate": 7.305175472063375e-07, "loss": 0.3343, "step": 13266 }, { "epoch": 0.8310183372743074, "grad_norm": 0.9726334787712739, "learning_rate": 7.299896985233556e-07, "loss": 0.401, "step": 13267 }, { "epoch": 0.8310809752736498, "grad_norm": 0.903143532941678, "learning_rate": 7.294620255947899e-07, "loss": 0.4112, "step": 13268 }, { "epoch": 0.831143613272992, "grad_norm": 0.9163218777526667, "learning_rate": 7.289345284423594e-07, "loss": 0.3997, "step": 13269 }, { "epoch": 0.8312062512723344, "grad_norm": 0.6304682545907689, "learning_rate": 7.284072070877768e-07, "loss": 0.4392, "step": 13270 }, { "epoch": 0.8312688892716766, "grad_norm": 0.8201485857204753, "learning_rate": 7.278800615527471e-07, "loss": 0.3664, "step": 13271 }, { "epoch": 0.831331527271019, "grad_norm": 0.8522246839030108, "learning_rate": 7.273530918589655e-07, "loss": 0.3604, "step": 13272 }, { "epoch": 0.8313941652703613, "grad_norm": 0.914467420047328, "learning_rate": 7.268262980281249e-07, "loss": 0.4191, "step": 13273 }, { "epoch": 0.8314568032697036, "grad_norm": 0.773818004349468, "learning_rate": 7.262996800819066e-07, "loss": 0.354, "step": 13274 }, { "epoch": 0.8315194412690459, "grad_norm": 0.8456549105164357, "learning_rate": 7.257732380419874e-07, "loss": 0.3646, "step": 13275 }, { "epoch": 0.8315820792683881, "grad_norm": 0.927235587245696, "learning_rate": 7.252469719300347e-07, "loss": 0.4051, "step": 13276 }, { "epoch": 0.8316447172677305, "grad_norm": 0.6006713549325716, "learning_rate": 7.247208817677099e-07, "loss": 0.4524, "step": 13277 }, { "epoch": 0.8317073552670727, "grad_norm": 0.8979575404213022, "learning_rate": 7.241949675766663e-07, "loss": 0.3573, "step": 13278 }, { "epoch": 0.8317699932664151, "grad_norm": 0.8207427759705662, "learning_rate": 7.236692293785524e-07, "loss": 0.378, "step": 13279 }, { "epoch": 0.8318326312657573, "grad_norm": 0.6328326725708041, "learning_rate": 7.23143667195007e-07, "loss": 0.4186, "step": 13280 }, { "epoch": 0.8318952692650997, "grad_norm": 0.8331961214040625, "learning_rate": 7.226182810476606e-07, "loss": 0.3903, "step": 13281 }, { "epoch": 0.831957907264442, "grad_norm": 0.8411052066265421, "learning_rate": 7.220930709581409e-07, "loss": 0.4178, "step": 13282 }, { "epoch": 0.8320205452637843, "grad_norm": 0.9357320480441172, "learning_rate": 7.215680369480649e-07, "loss": 0.3939, "step": 13283 }, { "epoch": 0.8320831832631266, "grad_norm": 0.8776458164979177, "learning_rate": 7.210431790390421e-07, "loss": 0.4113, "step": 13284 }, { "epoch": 0.8321458212624688, "grad_norm": 0.8435053486163226, "learning_rate": 7.205184972526763e-07, "loss": 0.365, "step": 13285 }, { "epoch": 0.8322084592618112, "grad_norm": 0.8669467002681015, "learning_rate": 7.199939916105636e-07, "loss": 0.3709, "step": 13286 }, { "epoch": 0.8322710972611534, "grad_norm": 0.8798944715901688, "learning_rate": 7.194696621342912e-07, "loss": 0.3737, "step": 13287 }, { "epoch": 0.8323337352604958, "grad_norm": 0.8348874326692152, "learning_rate": 7.189455088454433e-07, "loss": 0.3481, "step": 13288 }, { "epoch": 0.8323963732598381, "grad_norm": 0.8959455594270543, "learning_rate": 7.184215317655929e-07, "loss": 0.3947, "step": 13289 }, { "epoch": 0.8324590112591804, "grad_norm": 0.844639525896671, "learning_rate": 7.17897730916306e-07, "loss": 0.3305, "step": 13290 }, { "epoch": 0.8325216492585227, "grad_norm": 0.8329633970386181, "learning_rate": 7.173741063191442e-07, "loss": 0.3945, "step": 13291 }, { "epoch": 0.8325842872578649, "grad_norm": 0.9173274835862989, "learning_rate": 7.168506579956591e-07, "loss": 0.3914, "step": 13292 }, { "epoch": 0.8326469252572073, "grad_norm": 0.6069555127876489, "learning_rate": 7.163273859673964e-07, "loss": 0.4205, "step": 13293 }, { "epoch": 0.8327095632565495, "grad_norm": 0.8192407557565851, "learning_rate": 7.158042902558932e-07, "loss": 0.3756, "step": 13294 }, { "epoch": 0.8327722012558919, "grad_norm": 0.9339642252931983, "learning_rate": 7.152813708826789e-07, "loss": 0.4306, "step": 13295 }, { "epoch": 0.8328348392552342, "grad_norm": 0.872604078145448, "learning_rate": 7.147586278692803e-07, "loss": 0.3974, "step": 13296 }, { "epoch": 0.8328974772545765, "grad_norm": 0.591653727060034, "learning_rate": 7.142360612372123e-07, "loss": 0.43, "step": 13297 }, { "epoch": 0.8329601152539188, "grad_norm": 0.865684680978511, "learning_rate": 7.137136710079828e-07, "loss": 0.3634, "step": 13298 }, { "epoch": 0.8330227532532611, "grad_norm": 0.9203161282543497, "learning_rate": 7.131914572030934e-07, "loss": 0.3656, "step": 13299 }, { "epoch": 0.8330853912526034, "grad_norm": 0.7955463189648971, "learning_rate": 7.126694198440404e-07, "loss": 0.3649, "step": 13300 }, { "epoch": 0.8331480292519456, "grad_norm": 0.807346399625569, "learning_rate": 7.121475589523102e-07, "loss": 0.3531, "step": 13301 }, { "epoch": 0.833210667251288, "grad_norm": 0.9652389817031489, "learning_rate": 7.116258745493815e-07, "loss": 0.4578, "step": 13302 }, { "epoch": 0.8332733052506303, "grad_norm": 0.8057869898046381, "learning_rate": 7.111043666567286e-07, "loss": 0.3392, "step": 13303 }, { "epoch": 0.8333359432499726, "grad_norm": 0.8550233218378096, "learning_rate": 7.105830352958143e-07, "loss": 0.3665, "step": 13304 }, { "epoch": 0.8333985812493149, "grad_norm": 0.8632052569976382, "learning_rate": 7.100618804880999e-07, "loss": 0.4304, "step": 13305 }, { "epoch": 0.8334612192486572, "grad_norm": 0.8992261008642877, "learning_rate": 7.095409022550348e-07, "loss": 0.363, "step": 13306 }, { "epoch": 0.8335238572479995, "grad_norm": 0.8434924019831872, "learning_rate": 7.090201006180624e-07, "loss": 0.3468, "step": 13307 }, { "epoch": 0.8335864952473419, "grad_norm": 0.9127263616731589, "learning_rate": 7.08499475598618e-07, "loss": 0.3747, "step": 13308 }, { "epoch": 0.8336491332466841, "grad_norm": 0.868388675116896, "learning_rate": 7.079790272181325e-07, "loss": 0.4136, "step": 13309 }, { "epoch": 0.8337117712460264, "grad_norm": 0.8749421293618557, "learning_rate": 7.074587554980272e-07, "loss": 0.3767, "step": 13310 }, { "epoch": 0.8337744092453687, "grad_norm": 0.8451485412895492, "learning_rate": 7.069386604597161e-07, "loss": 0.4145, "step": 13311 }, { "epoch": 0.833837047244711, "grad_norm": 0.6754659171889088, "learning_rate": 7.064187421246066e-07, "loss": 0.4503, "step": 13312 }, { "epoch": 0.8338996852440533, "grad_norm": 0.6341610233523386, "learning_rate": 7.058990005140975e-07, "loss": 0.4581, "step": 13313 }, { "epoch": 0.8339623232433956, "grad_norm": 0.8518130438805624, "learning_rate": 7.053794356495841e-07, "loss": 0.3636, "step": 13314 }, { "epoch": 0.834024961242738, "grad_norm": 0.8655682656311624, "learning_rate": 7.048600475524497e-07, "loss": 0.3661, "step": 13315 }, { "epoch": 0.8340875992420802, "grad_norm": 0.8127274876041458, "learning_rate": 7.04340836244073e-07, "loss": 0.3683, "step": 13316 }, { "epoch": 0.8341502372414225, "grad_norm": 0.8674867213136642, "learning_rate": 7.038218017458248e-07, "loss": 0.3917, "step": 13317 }, { "epoch": 0.8342128752407648, "grad_norm": 0.80390267647694, "learning_rate": 7.033029440790679e-07, "loss": 0.3289, "step": 13318 }, { "epoch": 0.8342755132401071, "grad_norm": 0.8042772938268499, "learning_rate": 7.027842632651604e-07, "loss": 0.3439, "step": 13319 }, { "epoch": 0.8343381512394494, "grad_norm": 0.8392862258908023, "learning_rate": 7.022657593254501e-07, "loss": 0.3565, "step": 13320 }, { "epoch": 0.8344007892387917, "grad_norm": 0.6239198753538151, "learning_rate": 7.017474322812773e-07, "loss": 0.4431, "step": 13321 }, { "epoch": 0.834463427238134, "grad_norm": 0.7867323776931664, "learning_rate": 7.012292821539796e-07, "loss": 0.4038, "step": 13322 }, { "epoch": 0.8345260652374763, "grad_norm": 0.8356350368533002, "learning_rate": 7.007113089648826e-07, "loss": 0.3801, "step": 13323 }, { "epoch": 0.8345887032368187, "grad_norm": 0.8936255588857506, "learning_rate": 7.001935127353054e-07, "loss": 0.4081, "step": 13324 }, { "epoch": 0.8346513412361609, "grad_norm": 0.8141195350490259, "learning_rate": 6.996758934865621e-07, "loss": 0.3573, "step": 13325 }, { "epoch": 0.8347139792355032, "grad_norm": 0.8856845166252009, "learning_rate": 6.991584512399569e-07, "loss": 0.3885, "step": 13326 }, { "epoch": 0.8347766172348455, "grad_norm": 0.8504351873696487, "learning_rate": 6.986411860167869e-07, "loss": 0.3691, "step": 13327 }, { "epoch": 0.8348392552341878, "grad_norm": 0.8461589051060646, "learning_rate": 6.981240978383452e-07, "loss": 0.4166, "step": 13328 }, { "epoch": 0.8349018932335301, "grad_norm": 0.7925263045307597, "learning_rate": 6.976071867259144e-07, "loss": 0.3683, "step": 13329 }, { "epoch": 0.8349645312328724, "grad_norm": 0.8206527307366399, "learning_rate": 6.970904527007688e-07, "loss": 0.3626, "step": 13330 }, { "epoch": 0.8350271692322148, "grad_norm": 0.8625524410959958, "learning_rate": 6.9657389578418e-07, "loss": 0.3493, "step": 13331 }, { "epoch": 0.835089807231557, "grad_norm": 0.9317167244829467, "learning_rate": 6.960575159974087e-07, "loss": 0.4004, "step": 13332 }, { "epoch": 0.8351524452308994, "grad_norm": 0.7839222482375139, "learning_rate": 6.955413133617089e-07, "loss": 0.3862, "step": 13333 }, { "epoch": 0.8352150832302416, "grad_norm": 0.86047183790989, "learning_rate": 6.950252878983271e-07, "loss": 0.4036, "step": 13334 }, { "epoch": 0.8352777212295839, "grad_norm": 0.8289127945788617, "learning_rate": 6.945094396285041e-07, "loss": 0.3636, "step": 13335 }, { "epoch": 0.8353403592289262, "grad_norm": 0.851164857339236, "learning_rate": 6.939937685734704e-07, "loss": 0.3858, "step": 13336 }, { "epoch": 0.8354029972282685, "grad_norm": 0.8398507166420847, "learning_rate": 6.934782747544533e-07, "loss": 0.3717, "step": 13337 }, { "epoch": 0.8354656352276109, "grad_norm": 0.8575418762566672, "learning_rate": 6.9296295819267e-07, "loss": 0.3874, "step": 13338 }, { "epoch": 0.8355282732269531, "grad_norm": 0.8423344282108245, "learning_rate": 6.924478189093292e-07, "loss": 0.3522, "step": 13339 }, { "epoch": 0.8355909112262955, "grad_norm": 0.8393884090182125, "learning_rate": 6.919328569256372e-07, "loss": 0.3993, "step": 13340 }, { "epoch": 0.8356535492256377, "grad_norm": 0.7989117771673035, "learning_rate": 6.914180722627883e-07, "loss": 0.3444, "step": 13341 }, { "epoch": 0.8357161872249801, "grad_norm": 0.8131763607681222, "learning_rate": 6.909034649419716e-07, "loss": 0.3927, "step": 13342 }, { "epoch": 0.8357788252243223, "grad_norm": 0.8826774137728727, "learning_rate": 6.903890349843678e-07, "loss": 0.3636, "step": 13343 }, { "epoch": 0.8358414632236646, "grad_norm": 0.8711432128694586, "learning_rate": 6.898747824111512e-07, "loss": 0.382, "step": 13344 }, { "epoch": 0.835904101223007, "grad_norm": 0.8251002896982228, "learning_rate": 6.893607072434877e-07, "loss": 0.391, "step": 13345 }, { "epoch": 0.8359667392223492, "grad_norm": 0.8544252666802568, "learning_rate": 6.88846809502538e-07, "loss": 0.4016, "step": 13346 }, { "epoch": 0.8360293772216916, "grad_norm": 0.8737318130745472, "learning_rate": 6.883330892094537e-07, "loss": 0.4057, "step": 13347 }, { "epoch": 0.8360920152210338, "grad_norm": 0.8569779576611987, "learning_rate": 6.878195463853798e-07, "loss": 0.3886, "step": 13348 }, { "epoch": 0.8361546532203762, "grad_norm": 0.8805777031562242, "learning_rate": 6.873061810514548e-07, "loss": 0.3561, "step": 13349 }, { "epoch": 0.8362172912197184, "grad_norm": 0.8514561512600024, "learning_rate": 6.86792993228807e-07, "loss": 0.3563, "step": 13350 }, { "epoch": 0.8362799292190607, "grad_norm": 0.8930537479545364, "learning_rate": 6.862799829385603e-07, "loss": 0.3568, "step": 13351 }, { "epoch": 0.836342567218403, "grad_norm": 0.8069125638326715, "learning_rate": 6.857671502018304e-07, "loss": 0.3796, "step": 13352 }, { "epoch": 0.8364052052177453, "grad_norm": 0.895469951336712, "learning_rate": 6.852544950397256e-07, "loss": 0.4167, "step": 13353 }, { "epoch": 0.8364678432170877, "grad_norm": 0.6273540279974923, "learning_rate": 6.847420174733449e-07, "loss": 0.4365, "step": 13354 }, { "epoch": 0.8365304812164299, "grad_norm": 0.8266919184750026, "learning_rate": 6.842297175237855e-07, "loss": 0.4082, "step": 13355 }, { "epoch": 0.8365931192157723, "grad_norm": 0.878479599568586, "learning_rate": 6.837175952121305e-07, "loss": 0.3985, "step": 13356 }, { "epoch": 0.8366557572151145, "grad_norm": 0.7659752196379502, "learning_rate": 6.832056505594614e-07, "loss": 0.3777, "step": 13357 }, { "epoch": 0.8367183952144569, "grad_norm": 0.8847051606578954, "learning_rate": 6.826938835868491e-07, "loss": 0.3531, "step": 13358 }, { "epoch": 0.8367810332137992, "grad_norm": 0.899375404251128, "learning_rate": 6.821822943153577e-07, "loss": 0.3549, "step": 13359 }, { "epoch": 0.8368436712131414, "grad_norm": 0.9197454371111351, "learning_rate": 6.816708827660451e-07, "loss": 0.3784, "step": 13360 }, { "epoch": 0.8369063092124838, "grad_norm": 0.8878260031557852, "learning_rate": 6.811596489599598e-07, "loss": 0.3994, "step": 13361 }, { "epoch": 0.836968947211826, "grad_norm": 0.8366915630196498, "learning_rate": 6.80648592918145e-07, "loss": 0.4154, "step": 13362 }, { "epoch": 0.8370315852111684, "grad_norm": 0.8058486626034892, "learning_rate": 6.80137714661635e-07, "loss": 0.3531, "step": 13363 }, { "epoch": 0.8370942232105106, "grad_norm": 0.9815596964757765, "learning_rate": 6.796270142114586e-07, "loss": 0.3769, "step": 13364 }, { "epoch": 0.837156861209853, "grad_norm": 0.8187409920325248, "learning_rate": 6.791164915886373e-07, "loss": 0.3604, "step": 13365 }, { "epoch": 0.8372194992091953, "grad_norm": 0.8463553123287934, "learning_rate": 6.78606146814183e-07, "loss": 0.3646, "step": 13366 }, { "epoch": 0.8372821372085376, "grad_norm": 0.8715499072204559, "learning_rate": 6.780959799091013e-07, "loss": 0.4205, "step": 13367 }, { "epoch": 0.8373447752078799, "grad_norm": 0.9205973691453876, "learning_rate": 6.775859908943921e-07, "loss": 0.3814, "step": 13368 }, { "epoch": 0.8374074132072221, "grad_norm": 0.8038204871238435, "learning_rate": 6.770761797910453e-07, "loss": 0.3541, "step": 13369 }, { "epoch": 0.8374700512065645, "grad_norm": 0.7747659623789664, "learning_rate": 6.765665466200455e-07, "loss": 0.3848, "step": 13370 }, { "epoch": 0.8375326892059067, "grad_norm": 0.9392436184187727, "learning_rate": 6.76057091402369e-07, "loss": 0.4206, "step": 13371 }, { "epoch": 0.8375953272052491, "grad_norm": 0.8537503472646294, "learning_rate": 6.755478141589844e-07, "loss": 0.3416, "step": 13372 }, { "epoch": 0.8376579652045913, "grad_norm": 0.8829636562927211, "learning_rate": 6.75038714910854e-07, "loss": 0.3886, "step": 13373 }, { "epoch": 0.8377206032039337, "grad_norm": 0.9067802038629943, "learning_rate": 6.745297936789341e-07, "loss": 0.4277, "step": 13374 }, { "epoch": 0.837783241203276, "grad_norm": 0.8696998221476943, "learning_rate": 6.74021050484171e-07, "loss": 0.3824, "step": 13375 }, { "epoch": 0.8378458792026182, "grad_norm": 0.8228442136922991, "learning_rate": 6.735124853475039e-07, "loss": 0.3922, "step": 13376 }, { "epoch": 0.8379085172019606, "grad_norm": 0.8390721414624841, "learning_rate": 6.730040982898661e-07, "loss": 0.3835, "step": 13377 }, { "epoch": 0.8379711552013028, "grad_norm": 0.9071869937076835, "learning_rate": 6.724958893321832e-07, "loss": 0.3877, "step": 13378 }, { "epoch": 0.8380337932006452, "grad_norm": 0.802235818599097, "learning_rate": 6.719878584953721e-07, "loss": 0.3805, "step": 13379 }, { "epoch": 0.8380964311999874, "grad_norm": 0.8601543573921945, "learning_rate": 6.714800058003435e-07, "loss": 0.3706, "step": 13380 }, { "epoch": 0.8381590691993298, "grad_norm": 0.8557390001949448, "learning_rate": 6.709723312680017e-07, "loss": 0.3817, "step": 13381 }, { "epoch": 0.8382217071986721, "grad_norm": 1.3013802424528618, "learning_rate": 6.704648349192417e-07, "loss": 0.4012, "step": 13382 }, { "epoch": 0.8382843451980144, "grad_norm": 0.9724764526116069, "learning_rate": 6.699575167749534e-07, "loss": 0.4009, "step": 13383 }, { "epoch": 0.8383469831973567, "grad_norm": 0.897661419638479, "learning_rate": 6.694503768560178e-07, "loss": 0.3995, "step": 13384 }, { "epoch": 0.8384096211966989, "grad_norm": 0.8123614443760716, "learning_rate": 6.689434151833085e-07, "loss": 0.3469, "step": 13385 }, { "epoch": 0.8384722591960413, "grad_norm": 0.9148886154399811, "learning_rate": 6.684366317776919e-07, "loss": 0.3343, "step": 13386 }, { "epoch": 0.8385348971953835, "grad_norm": 0.8420655858934348, "learning_rate": 6.679300266600275e-07, "loss": 0.3673, "step": 13387 }, { "epoch": 0.8385975351947259, "grad_norm": 0.5674007869930938, "learning_rate": 6.674235998511669e-07, "loss": 0.4312, "step": 13388 }, { "epoch": 0.8386601731940682, "grad_norm": 0.881880779663093, "learning_rate": 6.669173513719546e-07, "loss": 0.3972, "step": 13389 }, { "epoch": 0.8387228111934105, "grad_norm": 0.8210278858282445, "learning_rate": 6.664112812432294e-07, "loss": 0.3499, "step": 13390 }, { "epoch": 0.8387854491927528, "grad_norm": 0.9353567103964641, "learning_rate": 6.659053894858186e-07, "loss": 0.4032, "step": 13391 }, { "epoch": 0.8388480871920951, "grad_norm": 0.8565213950948777, "learning_rate": 6.653996761205484e-07, "loss": 0.3624, "step": 13392 }, { "epoch": 0.8389107251914374, "grad_norm": 0.8533902968869246, "learning_rate": 6.648941411682313e-07, "loss": 0.373, "step": 13393 }, { "epoch": 0.8389733631907796, "grad_norm": 0.8083031424577041, "learning_rate": 6.643887846496766e-07, "loss": 0.377, "step": 13394 }, { "epoch": 0.839036001190122, "grad_norm": 0.9077269372145351, "learning_rate": 6.638836065856835e-07, "loss": 0.4189, "step": 13395 }, { "epoch": 0.8390986391894643, "grad_norm": 0.8445598275685782, "learning_rate": 6.633786069970466e-07, "loss": 0.3672, "step": 13396 }, { "epoch": 0.8391612771888066, "grad_norm": 0.807201928917584, "learning_rate": 6.628737859045508e-07, "loss": 0.3186, "step": 13397 }, { "epoch": 0.8392239151881489, "grad_norm": 0.9158004377601894, "learning_rate": 6.623691433289741e-07, "loss": 0.3953, "step": 13398 }, { "epoch": 0.8392865531874912, "grad_norm": 0.8074793227881015, "learning_rate": 6.618646792910893e-07, "loss": 0.3516, "step": 13399 }, { "epoch": 0.8393491911868335, "grad_norm": 0.8232857796053843, "learning_rate": 6.613603938116603e-07, "loss": 0.3348, "step": 13400 }, { "epoch": 0.8394118291861757, "grad_norm": 0.7769459229979659, "learning_rate": 6.608562869114409e-07, "loss": 0.3289, "step": 13401 }, { "epoch": 0.8394744671855181, "grad_norm": 0.8789448488179713, "learning_rate": 6.603523586111832e-07, "loss": 0.3835, "step": 13402 }, { "epoch": 0.8395371051848604, "grad_norm": 0.9137880062940446, "learning_rate": 6.598486089316286e-07, "loss": 0.4289, "step": 13403 }, { "epoch": 0.8395997431842027, "grad_norm": 0.8586776094276706, "learning_rate": 6.593450378935102e-07, "loss": 0.3829, "step": 13404 }, { "epoch": 0.839662381183545, "grad_norm": 0.9168507827650292, "learning_rate": 6.588416455175561e-07, "loss": 0.4322, "step": 13405 }, { "epoch": 0.8397250191828873, "grad_norm": 0.9235478425460929, "learning_rate": 6.58338431824484e-07, "loss": 0.3999, "step": 13406 }, { "epoch": 0.8397876571822296, "grad_norm": 0.8866489192385818, "learning_rate": 6.578353968350093e-07, "loss": 0.375, "step": 13407 }, { "epoch": 0.839850295181572, "grad_norm": 0.9025011987049032, "learning_rate": 6.573325405698355e-07, "loss": 0.4061, "step": 13408 }, { "epoch": 0.8399129331809142, "grad_norm": 0.8578449131160866, "learning_rate": 6.568298630496606e-07, "loss": 0.3903, "step": 13409 }, { "epoch": 0.8399755711802565, "grad_norm": 0.8558058118092244, "learning_rate": 6.563273642951734e-07, "loss": 0.4243, "step": 13410 }, { "epoch": 0.8400382091795988, "grad_norm": 0.8469086665475503, "learning_rate": 6.558250443270597e-07, "loss": 0.3771, "step": 13411 }, { "epoch": 0.8401008471789411, "grad_norm": 0.8139064667010322, "learning_rate": 6.55322903165993e-07, "loss": 0.3471, "step": 13412 }, { "epoch": 0.8401634851782834, "grad_norm": 0.8588018630531655, "learning_rate": 6.548209408326417e-07, "loss": 0.38, "step": 13413 }, { "epoch": 0.8402261231776257, "grad_norm": 0.8164579791450353, "learning_rate": 6.543191573476676e-07, "loss": 0.3641, "step": 13414 }, { "epoch": 0.840288761176968, "grad_norm": 0.9115285290132784, "learning_rate": 6.538175527317219e-07, "loss": 0.4188, "step": 13415 }, { "epoch": 0.8403513991763103, "grad_norm": 0.8381640578260668, "learning_rate": 6.533161270054545e-07, "loss": 0.3479, "step": 13416 }, { "epoch": 0.8404140371756527, "grad_norm": 0.8157501508143342, "learning_rate": 6.528148801895013e-07, "loss": 0.374, "step": 13417 }, { "epoch": 0.8404766751749949, "grad_norm": 0.8903054984898472, "learning_rate": 6.523138123044948e-07, "loss": 0.4018, "step": 13418 }, { "epoch": 0.8405393131743372, "grad_norm": 0.9120818609896209, "learning_rate": 6.518129233710574e-07, "loss": 0.3827, "step": 13419 }, { "epoch": 0.8406019511736795, "grad_norm": 0.9506775313746499, "learning_rate": 6.513122134098083e-07, "loss": 0.3955, "step": 13420 }, { "epoch": 0.8406645891730218, "grad_norm": 0.8240356821510335, "learning_rate": 6.508116824413557e-07, "loss": 0.3882, "step": 13421 }, { "epoch": 0.8407272271723641, "grad_norm": 0.8718175395974685, "learning_rate": 6.503113304863013e-07, "loss": 0.3824, "step": 13422 }, { "epoch": 0.8407898651717064, "grad_norm": 0.8535920528435978, "learning_rate": 6.498111575652399e-07, "loss": 0.4049, "step": 13423 }, { "epoch": 0.8408525031710488, "grad_norm": 0.896789091598635, "learning_rate": 6.493111636987582e-07, "loss": 0.417, "step": 13424 }, { "epoch": 0.840915141170391, "grad_norm": 0.8621167606620789, "learning_rate": 6.488113489074371e-07, "loss": 0.3959, "step": 13425 }, { "epoch": 0.8409777791697333, "grad_norm": 0.8187669954588288, "learning_rate": 6.483117132118483e-07, "loss": 0.3521, "step": 13426 }, { "epoch": 0.8410404171690756, "grad_norm": 0.8672206077009958, "learning_rate": 6.478122566325573e-07, "loss": 0.3686, "step": 13427 }, { "epoch": 0.8411030551684179, "grad_norm": 0.8840812520743236, "learning_rate": 6.473129791901206e-07, "loss": 0.3653, "step": 13428 }, { "epoch": 0.8411656931677602, "grad_norm": 0.8463201133268394, "learning_rate": 6.468138809050911e-07, "loss": 0.3781, "step": 13429 }, { "epoch": 0.8412283311671025, "grad_norm": 0.8781225366273275, "learning_rate": 6.463149617980097e-07, "loss": 0.407, "step": 13430 }, { "epoch": 0.8412909691664449, "grad_norm": 0.8767631358310709, "learning_rate": 6.458162218894127e-07, "loss": 0.3633, "step": 13431 }, { "epoch": 0.8413536071657871, "grad_norm": 0.8354146338969092, "learning_rate": 6.453176611998268e-07, "loss": 0.3687, "step": 13432 }, { "epoch": 0.8414162451651295, "grad_norm": 0.7933776242378255, "learning_rate": 6.44819279749776e-07, "loss": 0.3253, "step": 13433 }, { "epoch": 0.8414788831644717, "grad_norm": 0.8527197988945147, "learning_rate": 6.443210775597714e-07, "loss": 0.4011, "step": 13434 }, { "epoch": 0.841541521163814, "grad_norm": 0.845544656269711, "learning_rate": 6.438230546503199e-07, "loss": 0.3514, "step": 13435 }, { "epoch": 0.8416041591631563, "grad_norm": 0.8413525835787151, "learning_rate": 6.433252110419202e-07, "loss": 0.3791, "step": 13436 }, { "epoch": 0.8416667971624986, "grad_norm": 0.9283534437890152, "learning_rate": 6.428275467550621e-07, "loss": 0.3927, "step": 13437 }, { "epoch": 0.841729435161841, "grad_norm": 0.8272663693749889, "learning_rate": 6.423300618102319e-07, "loss": 0.3458, "step": 13438 }, { "epoch": 0.8417920731611832, "grad_norm": 0.8572203065978905, "learning_rate": 6.418327562279053e-07, "loss": 0.4214, "step": 13439 }, { "epoch": 0.8418547111605256, "grad_norm": 0.771873061687504, "learning_rate": 6.413356300285517e-07, "loss": 0.3614, "step": 13440 }, { "epoch": 0.8419173491598678, "grad_norm": 0.8924971802216533, "learning_rate": 6.408386832326314e-07, "loss": 0.3683, "step": 13441 }, { "epoch": 0.8419799871592102, "grad_norm": 0.8282906609756984, "learning_rate": 6.403419158606005e-07, "loss": 0.3476, "step": 13442 }, { "epoch": 0.8420426251585524, "grad_norm": 0.837090680146753, "learning_rate": 6.398453279329059e-07, "loss": 0.3762, "step": 13443 }, { "epoch": 0.8421052631578947, "grad_norm": 0.8876421501867011, "learning_rate": 6.39348919469987e-07, "loss": 0.4179, "step": 13444 }, { "epoch": 0.842167901157237, "grad_norm": 0.8725016672444784, "learning_rate": 6.388526904922759e-07, "loss": 0.4044, "step": 13445 }, { "epoch": 0.8422305391565793, "grad_norm": 0.7643037820323705, "learning_rate": 6.383566410201974e-07, "loss": 0.3304, "step": 13446 }, { "epoch": 0.8422931771559217, "grad_norm": 0.9063343403928082, "learning_rate": 6.37860771074168e-07, "loss": 0.4036, "step": 13447 }, { "epoch": 0.8423558151552639, "grad_norm": 0.9465322961690545, "learning_rate": 6.373650806746001e-07, "loss": 0.424, "step": 13448 }, { "epoch": 0.8424184531546063, "grad_norm": 0.8689567351144044, "learning_rate": 6.368695698418941e-07, "loss": 0.355, "step": 13449 }, { "epoch": 0.8424810911539485, "grad_norm": 0.9224533735441196, "learning_rate": 6.363742385964478e-07, "loss": 0.3743, "step": 13450 }, { "epoch": 0.8425437291532909, "grad_norm": 0.8341410815871985, "learning_rate": 6.358790869586473e-07, "loss": 0.4216, "step": 13451 }, { "epoch": 0.8426063671526332, "grad_norm": 0.9264091148551347, "learning_rate": 6.353841149488738e-07, "loss": 0.448, "step": 13452 }, { "epoch": 0.8426690051519754, "grad_norm": 0.991757671416981, "learning_rate": 6.348893225875002e-07, "loss": 0.4152, "step": 13453 }, { "epoch": 0.8427316431513178, "grad_norm": 0.8288662897362408, "learning_rate": 6.343947098948927e-07, "loss": 0.3619, "step": 13454 }, { "epoch": 0.84279428115066, "grad_norm": 0.8641143323595434, "learning_rate": 6.339002768914087e-07, "loss": 0.3668, "step": 13455 }, { "epoch": 0.8428569191500024, "grad_norm": 0.8582869699296476, "learning_rate": 6.334060235973988e-07, "loss": 0.3793, "step": 13456 }, { "epoch": 0.8429195571493446, "grad_norm": 0.8875056162899714, "learning_rate": 6.329119500332087e-07, "loss": 0.3984, "step": 13457 }, { "epoch": 0.842982195148687, "grad_norm": 0.873106232542641, "learning_rate": 6.324180562191722e-07, "loss": 0.4055, "step": 13458 }, { "epoch": 0.8430448331480292, "grad_norm": 0.8797998897041956, "learning_rate": 6.319243421756199e-07, "loss": 0.4049, "step": 13459 }, { "epoch": 0.8431074711473715, "grad_norm": 0.9227071860616315, "learning_rate": 6.314308079228726e-07, "loss": 0.3934, "step": 13460 }, { "epoch": 0.8431701091467139, "grad_norm": 0.8270894127392496, "learning_rate": 6.30937453481244e-07, "loss": 0.3922, "step": 13461 }, { "epoch": 0.8432327471460561, "grad_norm": 0.8874719331180978, "learning_rate": 6.304442788710413e-07, "loss": 0.4234, "step": 13462 }, { "epoch": 0.8432953851453985, "grad_norm": 0.8853742511156081, "learning_rate": 6.299512841125626e-07, "loss": 0.3808, "step": 13463 }, { "epoch": 0.8433580231447407, "grad_norm": 0.8716409365955213, "learning_rate": 6.294584692261002e-07, "loss": 0.3864, "step": 13464 }, { "epoch": 0.8434206611440831, "grad_norm": 0.8473789342758893, "learning_rate": 6.289658342319372e-07, "loss": 0.3693, "step": 13465 }, { "epoch": 0.8434832991434253, "grad_norm": 0.8559662899002163, "learning_rate": 6.284733791503533e-07, "loss": 0.4064, "step": 13466 }, { "epoch": 0.8435459371427677, "grad_norm": 0.8445037959365513, "learning_rate": 6.279811040016148e-07, "loss": 0.3886, "step": 13467 }, { "epoch": 0.84360857514211, "grad_norm": 0.5724963556141492, "learning_rate": 6.274890088059871e-07, "loss": 0.4415, "step": 13468 }, { "epoch": 0.8436712131414522, "grad_norm": 0.8805284562472635, "learning_rate": 6.269970935837233e-07, "loss": 0.3948, "step": 13469 }, { "epoch": 0.8437338511407946, "grad_norm": 0.8727643168080884, "learning_rate": 6.265053583550706e-07, "loss": 0.3823, "step": 13470 }, { "epoch": 0.8437964891401368, "grad_norm": 0.9106523884515172, "learning_rate": 6.260138031402696e-07, "loss": 0.4241, "step": 13471 }, { "epoch": 0.8438591271394792, "grad_norm": 0.8593460410587551, "learning_rate": 6.255224279595518e-07, "loss": 0.4101, "step": 13472 }, { "epoch": 0.8439217651388214, "grad_norm": 0.8908902398765288, "learning_rate": 6.250312328331431e-07, "loss": 0.3689, "step": 13473 }, { "epoch": 0.8439844031381638, "grad_norm": 0.8447879167943739, "learning_rate": 6.245402177812599e-07, "loss": 0.4092, "step": 13474 }, { "epoch": 0.8440470411375061, "grad_norm": 0.8603790975812267, "learning_rate": 6.240493828241128e-07, "loss": 0.361, "step": 13475 }, { "epoch": 0.8441096791368484, "grad_norm": 0.8971279279944685, "learning_rate": 6.235587279819072e-07, "loss": 0.4156, "step": 13476 }, { "epoch": 0.8441723171361907, "grad_norm": 0.885161796455134, "learning_rate": 6.230682532748361e-07, "loss": 0.3985, "step": 13477 }, { "epoch": 0.8442349551355329, "grad_norm": 0.8587595859844966, "learning_rate": 6.225779587230885e-07, "loss": 0.398, "step": 13478 }, { "epoch": 0.8442975931348753, "grad_norm": 0.9276407495182495, "learning_rate": 6.220878443468442e-07, "loss": 0.4076, "step": 13479 }, { "epoch": 0.8443602311342175, "grad_norm": 0.8868490353229708, "learning_rate": 6.215979101662773e-07, "loss": 0.3896, "step": 13480 }, { "epoch": 0.8444228691335599, "grad_norm": 0.8815731076610963, "learning_rate": 6.211081562015526e-07, "loss": 0.42, "step": 13481 }, { "epoch": 0.8444855071329022, "grad_norm": 0.6592336872579033, "learning_rate": 6.206185824728295e-07, "loss": 0.4725, "step": 13482 }, { "epoch": 0.8445481451322445, "grad_norm": 0.875774182378357, "learning_rate": 6.20129189000257e-07, "loss": 0.3872, "step": 13483 }, { "epoch": 0.8446107831315868, "grad_norm": 0.8376421434096163, "learning_rate": 6.196399758039795e-07, "loss": 0.3566, "step": 13484 }, { "epoch": 0.844673421130929, "grad_norm": 0.8991582210984852, "learning_rate": 6.191509429041353e-07, "loss": 0.3764, "step": 13485 }, { "epoch": 0.8447360591302714, "grad_norm": 0.8467525462288455, "learning_rate": 6.186620903208512e-07, "loss": 0.3748, "step": 13486 }, { "epoch": 0.8447986971296136, "grad_norm": 0.8292662954653534, "learning_rate": 6.181734180742488e-07, "loss": 0.3453, "step": 13487 }, { "epoch": 0.844861335128956, "grad_norm": 0.8289206052954443, "learning_rate": 6.176849261844419e-07, "loss": 0.3504, "step": 13488 }, { "epoch": 0.8449239731282983, "grad_norm": 0.9179156856456723, "learning_rate": 6.171966146715369e-07, "loss": 0.3635, "step": 13489 }, { "epoch": 0.8449866111276406, "grad_norm": 0.6079404975364304, "learning_rate": 6.167084835556325e-07, "loss": 0.4775, "step": 13490 }, { "epoch": 0.8450492491269829, "grad_norm": 0.853358307620942, "learning_rate": 6.162205328568194e-07, "loss": 0.4022, "step": 13491 }, { "epoch": 0.8451118871263252, "grad_norm": 0.934476966997766, "learning_rate": 6.157327625951843e-07, "loss": 0.3802, "step": 13492 }, { "epoch": 0.8451745251256675, "grad_norm": 0.8951684014997312, "learning_rate": 6.15245172790801e-07, "loss": 0.4186, "step": 13493 }, { "epoch": 0.8452371631250097, "grad_norm": 0.8292029594401948, "learning_rate": 6.147577634637413e-07, "loss": 0.3673, "step": 13494 }, { "epoch": 0.8452998011243521, "grad_norm": 0.8922187926256429, "learning_rate": 6.14270534634066e-07, "loss": 0.3805, "step": 13495 }, { "epoch": 0.8453624391236944, "grad_norm": 0.845809755884411, "learning_rate": 6.137834863218295e-07, "loss": 0.3678, "step": 13496 }, { "epoch": 0.8454250771230367, "grad_norm": 0.8794388990558779, "learning_rate": 6.13296618547079e-07, "loss": 0.3842, "step": 13497 }, { "epoch": 0.845487715122379, "grad_norm": 0.8704064973423138, "learning_rate": 6.128099313298541e-07, "loss": 0.4124, "step": 13498 }, { "epoch": 0.8455503531217213, "grad_norm": 0.9335877945336223, "learning_rate": 6.123234246901866e-07, "loss": 0.3794, "step": 13499 }, { "epoch": 0.8456129911210636, "grad_norm": 0.8471420420935851, "learning_rate": 6.118370986480999e-07, "loss": 0.385, "step": 13500 }, { "epoch": 0.845675629120406, "grad_norm": 0.8252787111283435, "learning_rate": 6.113509532236139e-07, "loss": 0.3682, "step": 13501 }, { "epoch": 0.8457382671197482, "grad_norm": 0.8555479919410461, "learning_rate": 6.108649884367362e-07, "loss": 0.3653, "step": 13502 }, { "epoch": 0.8458009051190905, "grad_norm": 0.8958184971108341, "learning_rate": 6.103792043074713e-07, "loss": 0.3953, "step": 13503 }, { "epoch": 0.8458635431184328, "grad_norm": 0.8750337001785596, "learning_rate": 6.098936008558131e-07, "loss": 0.3998, "step": 13504 }, { "epoch": 0.8459261811177751, "grad_norm": 0.9084089040861821, "learning_rate": 6.094081781017497e-07, "loss": 0.3621, "step": 13505 }, { "epoch": 0.8459888191171174, "grad_norm": 0.9309589479875249, "learning_rate": 6.089229360652599e-07, "loss": 0.3785, "step": 13506 }, { "epoch": 0.8460514571164597, "grad_norm": 0.91643434871522, "learning_rate": 6.084378747663172e-07, "loss": 0.3781, "step": 13507 }, { "epoch": 0.846114095115802, "grad_norm": 0.9074609767558326, "learning_rate": 6.079529942248868e-07, "loss": 0.3716, "step": 13508 }, { "epoch": 0.8461767331151443, "grad_norm": 0.9074489456338919, "learning_rate": 6.07468294460925e-07, "loss": 0.3565, "step": 13509 }, { "epoch": 0.8462393711144865, "grad_norm": 0.8887907413872156, "learning_rate": 6.069837754943853e-07, "loss": 0.3858, "step": 13510 }, { "epoch": 0.8463020091138289, "grad_norm": 0.8113872564801081, "learning_rate": 6.064994373452071e-07, "loss": 0.3146, "step": 13511 }, { "epoch": 0.8463646471131712, "grad_norm": 0.9196473947884839, "learning_rate": 6.060152800333286e-07, "loss": 0.421, "step": 13512 }, { "epoch": 0.8464272851125135, "grad_norm": 0.8817431776195372, "learning_rate": 6.055313035786769e-07, "loss": 0.3808, "step": 13513 }, { "epoch": 0.8464899231118558, "grad_norm": 0.874414527911658, "learning_rate": 6.050475080011725e-07, "loss": 0.4012, "step": 13514 }, { "epoch": 0.8465525611111981, "grad_norm": 0.9567828070255532, "learning_rate": 6.045638933207282e-07, "loss": 0.4257, "step": 13515 }, { "epoch": 0.8466151991105404, "grad_norm": 0.848178873758519, "learning_rate": 6.040804595572497e-07, "loss": 0.3857, "step": 13516 }, { "epoch": 0.8466778371098828, "grad_norm": 0.854881750316407, "learning_rate": 6.03597206730635e-07, "loss": 0.4152, "step": 13517 }, { "epoch": 0.846740475109225, "grad_norm": 0.8407716505888015, "learning_rate": 6.031141348607761e-07, "loss": 0.3885, "step": 13518 }, { "epoch": 0.8468031131085673, "grad_norm": 0.9734085431501454, "learning_rate": 6.026312439675553e-07, "loss": 0.4049, "step": 13519 }, { "epoch": 0.8468657511079096, "grad_norm": 0.8087929054869709, "learning_rate": 6.021485340708477e-07, "loss": 0.3502, "step": 13520 }, { "epoch": 0.8469283891072519, "grad_norm": 0.8899431139964624, "learning_rate": 6.016660051905238e-07, "loss": 0.3517, "step": 13521 }, { "epoch": 0.8469910271065942, "grad_norm": 0.8608179915210714, "learning_rate": 6.01183657346443e-07, "loss": 0.3627, "step": 13522 }, { "epoch": 0.8470536651059365, "grad_norm": 0.8722120279285496, "learning_rate": 6.007014905584602e-07, "loss": 0.3832, "step": 13523 }, { "epoch": 0.8471163031052789, "grad_norm": 0.8690799817086744, "learning_rate": 6.002195048464199e-07, "loss": 0.3894, "step": 13524 }, { "epoch": 0.8471789411046211, "grad_norm": 0.8694013394420095, "learning_rate": 5.997377002301618e-07, "loss": 0.359, "step": 13525 }, { "epoch": 0.8472415791039635, "grad_norm": 0.7993384863797472, "learning_rate": 5.992560767295158e-07, "loss": 0.3851, "step": 13526 }, { "epoch": 0.8473042171033057, "grad_norm": 0.9055435975169182, "learning_rate": 5.98774634364307e-07, "loss": 0.4048, "step": 13527 }, { "epoch": 0.847366855102648, "grad_norm": 0.7934534933565518, "learning_rate": 5.982933731543517e-07, "loss": 0.3224, "step": 13528 }, { "epoch": 0.8474294931019903, "grad_norm": 0.8243557863017559, "learning_rate": 5.978122931194569e-07, "loss": 0.335, "step": 13529 }, { "epoch": 0.8474921311013326, "grad_norm": 0.9019012941182575, "learning_rate": 5.973313942794256e-07, "loss": 0.4091, "step": 13530 }, { "epoch": 0.847554769100675, "grad_norm": 0.8794565688210341, "learning_rate": 5.968506766540521e-07, "loss": 0.3669, "step": 13531 }, { "epoch": 0.8476174071000172, "grad_norm": 0.834386362820316, "learning_rate": 5.963701402631217e-07, "loss": 0.3875, "step": 13532 }, { "epoch": 0.8476800450993596, "grad_norm": 0.8556046812149115, "learning_rate": 5.958897851264139e-07, "loss": 0.3452, "step": 13533 }, { "epoch": 0.8477426830987018, "grad_norm": 0.8369894626784755, "learning_rate": 5.954096112636987e-07, "loss": 0.371, "step": 13534 }, { "epoch": 0.8478053210980441, "grad_norm": 0.9498125880536274, "learning_rate": 5.949296186947429e-07, "loss": 0.3897, "step": 13535 }, { "epoch": 0.8478679590973864, "grad_norm": 0.9415369779676381, "learning_rate": 5.944498074393013e-07, "loss": 0.4592, "step": 13536 }, { "epoch": 0.8479305970967287, "grad_norm": 0.5753217547917915, "learning_rate": 5.93970177517123e-07, "loss": 0.4466, "step": 13537 }, { "epoch": 0.847993235096071, "grad_norm": 0.9319425221783904, "learning_rate": 5.934907289479508e-07, "loss": 0.3551, "step": 13538 }, { "epoch": 0.8480558730954133, "grad_norm": 0.8673981660026089, "learning_rate": 5.930114617515165e-07, "loss": 0.3746, "step": 13539 }, { "epoch": 0.8481185110947557, "grad_norm": 0.9226842193455315, "learning_rate": 5.925323759475494e-07, "loss": 0.4017, "step": 13540 }, { "epoch": 0.8481811490940979, "grad_norm": 0.8609729198902301, "learning_rate": 5.92053471555768e-07, "loss": 0.412, "step": 13541 }, { "epoch": 0.8482437870934403, "grad_norm": 0.8489135368834813, "learning_rate": 5.915747485958834e-07, "loss": 0.4029, "step": 13542 }, { "epoch": 0.8483064250927825, "grad_norm": 0.9414986975533244, "learning_rate": 5.910962070875998e-07, "loss": 0.3936, "step": 13543 }, { "epoch": 0.8483690630921248, "grad_norm": 0.8116632236922077, "learning_rate": 5.906178470506158e-07, "loss": 0.3706, "step": 13544 }, { "epoch": 0.8484317010914671, "grad_norm": 0.852392394787291, "learning_rate": 5.90139668504619e-07, "loss": 0.4068, "step": 13545 }, { "epoch": 0.8484943390908094, "grad_norm": 0.8839829033527947, "learning_rate": 5.896616714692927e-07, "loss": 0.3874, "step": 13546 }, { "epoch": 0.8485569770901518, "grad_norm": 0.8458435528688907, "learning_rate": 5.891838559643104e-07, "loss": 0.3726, "step": 13547 }, { "epoch": 0.848619615089494, "grad_norm": 0.8253035996881153, "learning_rate": 5.887062220093381e-07, "loss": 0.3744, "step": 13548 }, { "epoch": 0.8486822530888364, "grad_norm": 0.7954313606442143, "learning_rate": 5.882287696240374e-07, "loss": 0.3896, "step": 13549 }, { "epoch": 0.8487448910881786, "grad_norm": 0.8703428498709661, "learning_rate": 5.877514988280597e-07, "loss": 0.3654, "step": 13550 }, { "epoch": 0.848807529087521, "grad_norm": 0.8280652887647062, "learning_rate": 5.872744096410487e-07, "loss": 0.3562, "step": 13551 }, { "epoch": 0.8488701670868632, "grad_norm": 0.8915210432397387, "learning_rate": 5.867975020826416e-07, "loss": 0.3731, "step": 13552 }, { "epoch": 0.8489328050862055, "grad_norm": 0.8413047251664084, "learning_rate": 5.863207761724693e-07, "loss": 0.3697, "step": 13553 }, { "epoch": 0.8489954430855479, "grad_norm": 0.9283711805374978, "learning_rate": 5.858442319301532e-07, "loss": 0.4031, "step": 13554 }, { "epoch": 0.8490580810848901, "grad_norm": 0.9249085403960884, "learning_rate": 5.853678693753073e-07, "loss": 0.4184, "step": 13555 }, { "epoch": 0.8491207190842325, "grad_norm": 0.8595576368285577, "learning_rate": 5.848916885275397e-07, "loss": 0.3724, "step": 13556 }, { "epoch": 0.8491833570835747, "grad_norm": 0.5759684904164073, "learning_rate": 5.84415689406449e-07, "loss": 0.4298, "step": 13557 }, { "epoch": 0.8492459950829171, "grad_norm": 0.8552970149158085, "learning_rate": 5.839398720316286e-07, "loss": 0.3618, "step": 13558 }, { "epoch": 0.8493086330822593, "grad_norm": 0.8423071498484144, "learning_rate": 5.834642364226628e-07, "loss": 0.355, "step": 13559 }, { "epoch": 0.8493712710816017, "grad_norm": 0.8633931737450061, "learning_rate": 5.829887825991276e-07, "loss": 0.3853, "step": 13560 }, { "epoch": 0.849433909080944, "grad_norm": 0.841565347111892, "learning_rate": 5.825135105805952e-07, "loss": 0.3516, "step": 13561 }, { "epoch": 0.8494965470802862, "grad_norm": 0.8266688028569953, "learning_rate": 5.820384203866269e-07, "loss": 0.3538, "step": 13562 }, { "epoch": 0.8495591850796286, "grad_norm": 0.8400981257737311, "learning_rate": 5.815635120367769e-07, "loss": 0.3775, "step": 13563 }, { "epoch": 0.8496218230789708, "grad_norm": 0.8426603308713919, "learning_rate": 5.810887855505926e-07, "loss": 0.3927, "step": 13564 }, { "epoch": 0.8496844610783132, "grad_norm": 0.9276927840536991, "learning_rate": 5.806142409476145e-07, "loss": 0.4366, "step": 13565 }, { "epoch": 0.8497470990776554, "grad_norm": 0.600439422972364, "learning_rate": 5.801398782473733e-07, "loss": 0.4543, "step": 13566 }, { "epoch": 0.8498097370769978, "grad_norm": 0.9033021929518298, "learning_rate": 5.796656974693964e-07, "loss": 0.3795, "step": 13567 }, { "epoch": 0.8498723750763401, "grad_norm": 0.9701658700266792, "learning_rate": 5.791916986331996e-07, "loss": 0.4256, "step": 13568 }, { "epoch": 0.8499350130756823, "grad_norm": 0.8875873352257336, "learning_rate": 5.787178817582917e-07, "loss": 0.3761, "step": 13569 }, { "epoch": 0.8499976510750247, "grad_norm": 0.5889435819839354, "learning_rate": 5.78244246864178e-07, "loss": 0.4184, "step": 13570 }, { "epoch": 0.8500602890743669, "grad_norm": 0.8019882765885019, "learning_rate": 5.777707939703515e-07, "loss": 0.3416, "step": 13571 }, { "epoch": 0.8501229270737093, "grad_norm": 0.8750515255606793, "learning_rate": 5.772975230963002e-07, "loss": 0.399, "step": 13572 }, { "epoch": 0.8501855650730515, "grad_norm": 0.8747359650768225, "learning_rate": 5.768244342615031e-07, "loss": 0.3832, "step": 13573 }, { "epoch": 0.8502482030723939, "grad_norm": 0.8198915859990994, "learning_rate": 5.763515274854337e-07, "loss": 0.3818, "step": 13574 }, { "epoch": 0.8503108410717362, "grad_norm": 0.8175970031022336, "learning_rate": 5.758788027875556e-07, "loss": 0.4116, "step": 13575 }, { "epoch": 0.8503734790710785, "grad_norm": 0.8555839239854939, "learning_rate": 5.754062601873278e-07, "loss": 0.3941, "step": 13576 }, { "epoch": 0.8504361170704208, "grad_norm": 0.9021158468077546, "learning_rate": 5.749338997041997e-07, "loss": 0.4334, "step": 13577 }, { "epoch": 0.850498755069763, "grad_norm": 0.9003606038178533, "learning_rate": 5.744617213576131e-07, "loss": 0.4071, "step": 13578 }, { "epoch": 0.8505613930691054, "grad_norm": 0.8912903689857077, "learning_rate": 5.73989725167004e-07, "loss": 0.4158, "step": 13579 }, { "epoch": 0.8506240310684476, "grad_norm": 0.9150853251976403, "learning_rate": 5.735179111517997e-07, "loss": 0.3813, "step": 13580 }, { "epoch": 0.85068666906779, "grad_norm": 0.8949027503327376, "learning_rate": 5.73046279331419e-07, "loss": 0.3705, "step": 13581 }, { "epoch": 0.8507493070671323, "grad_norm": 0.945072085002833, "learning_rate": 5.725748297252759e-07, "loss": 0.4136, "step": 13582 }, { "epoch": 0.8508119450664746, "grad_norm": 0.8088606643403846, "learning_rate": 5.721035623527743e-07, "loss": 0.3658, "step": 13583 }, { "epoch": 0.8508745830658169, "grad_norm": 0.8851857706765173, "learning_rate": 5.716324772333115e-07, "loss": 0.3995, "step": 13584 }, { "epoch": 0.8509372210651592, "grad_norm": 0.8698503550327896, "learning_rate": 5.711615743862775e-07, "loss": 0.3717, "step": 13585 }, { "epoch": 0.8509998590645015, "grad_norm": 0.8911375802140209, "learning_rate": 5.706908538310546e-07, "loss": 0.3896, "step": 13586 }, { "epoch": 0.8510624970638437, "grad_norm": 0.9102401632813593, "learning_rate": 5.7022031558702e-07, "loss": 0.3805, "step": 13587 }, { "epoch": 0.8511251350631861, "grad_norm": 0.8603668603488097, "learning_rate": 5.69749959673539e-07, "loss": 0.3838, "step": 13588 }, { "epoch": 0.8511877730625284, "grad_norm": 0.8356716892866951, "learning_rate": 5.692797861099719e-07, "loss": 0.3531, "step": 13589 }, { "epoch": 0.8512504110618707, "grad_norm": 0.8830975682527782, "learning_rate": 5.688097949156712e-07, "loss": 0.3635, "step": 13590 }, { "epoch": 0.851313049061213, "grad_norm": 0.8381340274066198, "learning_rate": 5.68339986109982e-07, "loss": 0.3701, "step": 13591 }, { "epoch": 0.8513756870605553, "grad_norm": 0.8793360511484049, "learning_rate": 5.678703597122409e-07, "loss": 0.377, "step": 13592 }, { "epoch": 0.8514383250598976, "grad_norm": 0.8305559712599914, "learning_rate": 5.674009157417793e-07, "loss": 0.3426, "step": 13593 }, { "epoch": 0.8515009630592398, "grad_norm": 0.8717425326151015, "learning_rate": 5.669316542179171e-07, "loss": 0.4117, "step": 13594 }, { "epoch": 0.8515636010585822, "grad_norm": 0.8729282447067457, "learning_rate": 5.664625751599711e-07, "loss": 0.3988, "step": 13595 }, { "epoch": 0.8516262390579245, "grad_norm": 0.8537718395847764, "learning_rate": 5.659936785872494e-07, "loss": 0.3808, "step": 13596 }, { "epoch": 0.8516888770572668, "grad_norm": 0.8415484879623086, "learning_rate": 5.655249645190508e-07, "loss": 0.3996, "step": 13597 }, { "epoch": 0.8517515150566091, "grad_norm": 0.7871422364398799, "learning_rate": 5.650564329746677e-07, "loss": 0.3528, "step": 13598 }, { "epoch": 0.8518141530559514, "grad_norm": 0.6146225781811805, "learning_rate": 5.645880839733853e-07, "loss": 0.4566, "step": 13599 }, { "epoch": 0.8518767910552937, "grad_norm": 0.9232299879411361, "learning_rate": 5.641199175344803e-07, "loss": 0.3877, "step": 13600 }, { "epoch": 0.851939429054636, "grad_norm": 0.9274236912383736, "learning_rate": 5.636519336772228e-07, "loss": 0.3777, "step": 13601 }, { "epoch": 0.8520020670539783, "grad_norm": 0.8703513055944838, "learning_rate": 5.631841324208743e-07, "loss": 0.3619, "step": 13602 }, { "epoch": 0.8520647050533205, "grad_norm": 0.8148042335253884, "learning_rate": 5.627165137846919e-07, "loss": 0.3972, "step": 13603 }, { "epoch": 0.8521273430526629, "grad_norm": 0.6280404129256465, "learning_rate": 5.622490777879197e-07, "loss": 0.4539, "step": 13604 }, { "epoch": 0.8521899810520052, "grad_norm": 0.8152891775882993, "learning_rate": 5.617818244498002e-07, "loss": 0.3606, "step": 13605 }, { "epoch": 0.8522526190513475, "grad_norm": 0.8769198245257509, "learning_rate": 5.613147537895647e-07, "loss": 0.376, "step": 13606 }, { "epoch": 0.8523152570506898, "grad_norm": 0.8572978294714848, "learning_rate": 5.608478658264376e-07, "loss": 0.3658, "step": 13607 }, { "epoch": 0.8523778950500321, "grad_norm": 0.8424245264919895, "learning_rate": 5.603811605796366e-07, "loss": 0.3922, "step": 13608 }, { "epoch": 0.8524405330493744, "grad_norm": 0.8666905434734636, "learning_rate": 5.599146380683712e-07, "loss": 0.4107, "step": 13609 }, { "epoch": 0.8525031710487168, "grad_norm": 0.8494694457329665, "learning_rate": 5.594482983118432e-07, "loss": 0.3312, "step": 13610 }, { "epoch": 0.852565809048059, "grad_norm": 0.8620432574549403, "learning_rate": 5.589821413292462e-07, "loss": 0.3344, "step": 13611 }, { "epoch": 0.8526284470474013, "grad_norm": 0.9532780059636449, "learning_rate": 5.585161671397687e-07, "loss": 0.4264, "step": 13612 }, { "epoch": 0.8526910850467436, "grad_norm": 0.9156396831586116, "learning_rate": 5.58050375762591e-07, "loss": 0.411, "step": 13613 }, { "epoch": 0.8527537230460859, "grad_norm": 0.8627191851838208, "learning_rate": 5.575847672168849e-07, "loss": 0.3993, "step": 13614 }, { "epoch": 0.8528163610454282, "grad_norm": 0.8735428206416087, "learning_rate": 5.571193415218135e-07, "loss": 0.3711, "step": 13615 }, { "epoch": 0.8528789990447705, "grad_norm": 0.8577028981359086, "learning_rate": 5.566540986965352e-07, "loss": 0.3873, "step": 13616 }, { "epoch": 0.8529416370441129, "grad_norm": 0.8778330086158294, "learning_rate": 5.561890387601987e-07, "loss": 0.3547, "step": 13617 }, { "epoch": 0.8530042750434551, "grad_norm": 0.6174842030915503, "learning_rate": 5.557241617319465e-07, "loss": 0.4459, "step": 13618 }, { "epoch": 0.8530669130427974, "grad_norm": 0.5808859741150131, "learning_rate": 5.552594676309115e-07, "loss": 0.4377, "step": 13619 }, { "epoch": 0.8531295510421397, "grad_norm": 0.8260630269693926, "learning_rate": 5.547949564762229e-07, "loss": 0.3598, "step": 13620 }, { "epoch": 0.853192189041482, "grad_norm": 0.8694272010962936, "learning_rate": 5.543306282869976e-07, "loss": 0.3967, "step": 13621 }, { "epoch": 0.8532548270408243, "grad_norm": 0.9325510860998407, "learning_rate": 5.538664830823498e-07, "loss": 0.4221, "step": 13622 }, { "epoch": 0.8533174650401666, "grad_norm": 0.8694209470642589, "learning_rate": 5.534025208813831e-07, "loss": 0.4046, "step": 13623 }, { "epoch": 0.853380103039509, "grad_norm": 0.8591403994914437, "learning_rate": 5.529387417031939e-07, "loss": 0.3906, "step": 13624 }, { "epoch": 0.8534427410388512, "grad_norm": 0.8789346384675177, "learning_rate": 5.524751455668715e-07, "loss": 0.3453, "step": 13625 }, { "epoch": 0.8535053790381936, "grad_norm": 0.9363231597249752, "learning_rate": 5.520117324914975e-07, "loss": 0.4344, "step": 13626 }, { "epoch": 0.8535680170375358, "grad_norm": 0.8766747785843215, "learning_rate": 5.515485024961465e-07, "loss": 0.3281, "step": 13627 }, { "epoch": 0.8536306550368781, "grad_norm": 0.8467781769686614, "learning_rate": 5.510854555998834e-07, "loss": 0.4056, "step": 13628 }, { "epoch": 0.8536932930362204, "grad_norm": 0.8285473057869693, "learning_rate": 5.5062259182177e-07, "loss": 0.3487, "step": 13629 }, { "epoch": 0.8537559310355627, "grad_norm": 0.8730928200975602, "learning_rate": 5.501599111808565e-07, "loss": 0.3267, "step": 13630 }, { "epoch": 0.853818569034905, "grad_norm": 0.8111800146790324, "learning_rate": 5.496974136961857e-07, "loss": 0.3259, "step": 13631 }, { "epoch": 0.8538812070342473, "grad_norm": 0.8822865585976095, "learning_rate": 5.492350993867968e-07, "loss": 0.388, "step": 13632 }, { "epoch": 0.8539438450335897, "grad_norm": 0.9054582047294051, "learning_rate": 5.487729682717169e-07, "loss": 0.3763, "step": 13633 }, { "epoch": 0.8540064830329319, "grad_norm": 0.9192378713598537, "learning_rate": 5.483110203699682e-07, "loss": 0.3834, "step": 13634 }, { "epoch": 0.8540691210322743, "grad_norm": 0.8756359321846818, "learning_rate": 5.478492557005638e-07, "loss": 0.3941, "step": 13635 }, { "epoch": 0.8541317590316165, "grad_norm": 0.8372954299336627, "learning_rate": 5.473876742825107e-07, "loss": 0.3511, "step": 13636 }, { "epoch": 0.8541943970309588, "grad_norm": 0.8739980075163479, "learning_rate": 5.469262761348065e-07, "loss": 0.3696, "step": 13637 }, { "epoch": 0.8542570350303011, "grad_norm": 0.5970876987541094, "learning_rate": 5.464650612764438e-07, "loss": 0.4267, "step": 13638 }, { "epoch": 0.8543196730296434, "grad_norm": 0.8468703155177748, "learning_rate": 5.460040297264063e-07, "loss": 0.3641, "step": 13639 }, { "epoch": 0.8543823110289858, "grad_norm": 0.8629078836980016, "learning_rate": 5.455431815036683e-07, "loss": 0.3642, "step": 13640 }, { "epoch": 0.854444949028328, "grad_norm": 0.8979459900377185, "learning_rate": 5.450825166272006e-07, "loss": 0.3924, "step": 13641 }, { "epoch": 0.8545075870276704, "grad_norm": 0.8763008156434878, "learning_rate": 5.446220351159637e-07, "loss": 0.4052, "step": 13642 }, { "epoch": 0.8545702250270126, "grad_norm": 0.9164421200661367, "learning_rate": 5.441617369889108e-07, "loss": 0.3822, "step": 13643 }, { "epoch": 0.8546328630263549, "grad_norm": 0.8880461399585213, "learning_rate": 5.437016222649877e-07, "loss": 0.3781, "step": 13644 }, { "epoch": 0.8546955010256972, "grad_norm": 0.8569740524015446, "learning_rate": 5.432416909631321e-07, "loss": 0.4051, "step": 13645 }, { "epoch": 0.8547581390250395, "grad_norm": 0.876554432042823, "learning_rate": 5.427819431022768e-07, "loss": 0.3924, "step": 13646 }, { "epoch": 0.8548207770243819, "grad_norm": 0.889136602186312, "learning_rate": 5.423223787013438e-07, "loss": 0.3754, "step": 13647 }, { "epoch": 0.8548834150237241, "grad_norm": 0.9236379344613429, "learning_rate": 5.418629977792489e-07, "loss": 0.4308, "step": 13648 }, { "epoch": 0.8549460530230665, "grad_norm": 0.8684483459058218, "learning_rate": 5.414038003549e-07, "loss": 0.3998, "step": 13649 }, { "epoch": 0.8550086910224087, "grad_norm": 0.8004628646174152, "learning_rate": 5.409447864471989e-07, "loss": 0.3713, "step": 13650 }, { "epoch": 0.8550713290217511, "grad_norm": 0.8915323069637459, "learning_rate": 5.40485956075038e-07, "loss": 0.3844, "step": 13651 }, { "epoch": 0.8551339670210933, "grad_norm": 0.9527773922611982, "learning_rate": 5.400273092573028e-07, "loss": 0.4021, "step": 13652 }, { "epoch": 0.8551966050204356, "grad_norm": 0.9168259285886996, "learning_rate": 5.395688460128718e-07, "loss": 0.3753, "step": 13653 }, { "epoch": 0.855259243019778, "grad_norm": 0.9729970410909411, "learning_rate": 5.391105663606133e-07, "loss": 0.4137, "step": 13654 }, { "epoch": 0.8553218810191202, "grad_norm": 0.8601966258095626, "learning_rate": 5.386524703193934e-07, "loss": 0.3712, "step": 13655 }, { "epoch": 0.8553845190184626, "grad_norm": 0.8688997986502452, "learning_rate": 5.381945579080655e-07, "loss": 0.3713, "step": 13656 }, { "epoch": 0.8554471570178048, "grad_norm": 0.9262186943947636, "learning_rate": 5.377368291454782e-07, "loss": 0.3953, "step": 13657 }, { "epoch": 0.8555097950171472, "grad_norm": 0.9671198445364305, "learning_rate": 5.372792840504704e-07, "loss": 0.4671, "step": 13658 }, { "epoch": 0.8555724330164894, "grad_norm": 0.9735898669989651, "learning_rate": 5.368219226418758e-07, "loss": 0.4156, "step": 13659 }, { "epoch": 0.8556350710158318, "grad_norm": 0.9078989502213282, "learning_rate": 5.363647449385201e-07, "loss": 0.3598, "step": 13660 }, { "epoch": 0.8556977090151741, "grad_norm": 0.7806398395034392, "learning_rate": 5.359077509592192e-07, "loss": 0.353, "step": 13661 }, { "epoch": 0.8557603470145163, "grad_norm": 0.8828688419291478, "learning_rate": 5.354509407227842e-07, "loss": 0.3998, "step": 13662 }, { "epoch": 0.8558229850138587, "grad_norm": 0.8150688722798433, "learning_rate": 5.349943142480163e-07, "loss": 0.3643, "step": 13663 }, { "epoch": 0.8558856230132009, "grad_norm": 0.8206207551097752, "learning_rate": 5.34537871553712e-07, "loss": 0.393, "step": 13664 }, { "epoch": 0.8559482610125433, "grad_norm": 0.9076449868437412, "learning_rate": 5.340816126586579e-07, "loss": 0.383, "step": 13665 }, { "epoch": 0.8560108990118855, "grad_norm": 0.9407692310620932, "learning_rate": 5.336255375816334e-07, "loss": 0.3478, "step": 13666 }, { "epoch": 0.8560735370112279, "grad_norm": 0.824791442855143, "learning_rate": 5.33169646341411e-07, "loss": 0.3944, "step": 13667 }, { "epoch": 0.8561361750105702, "grad_norm": 0.6052659712595495, "learning_rate": 5.327139389567537e-07, "loss": 0.4442, "step": 13668 }, { "epoch": 0.8561988130099125, "grad_norm": 0.8013745162958942, "learning_rate": 5.322584154464211e-07, "loss": 0.39, "step": 13669 }, { "epoch": 0.8562614510092548, "grad_norm": 0.9694743385758487, "learning_rate": 5.318030758291609e-07, "loss": 0.353, "step": 13670 }, { "epoch": 0.856324089008597, "grad_norm": 0.8989776325995084, "learning_rate": 5.313479201237149e-07, "loss": 0.3594, "step": 13671 }, { "epoch": 0.8563867270079394, "grad_norm": 0.8046220201450625, "learning_rate": 5.30892948348819e-07, "loss": 0.3732, "step": 13672 }, { "epoch": 0.8564493650072816, "grad_norm": 0.9100291734290538, "learning_rate": 5.304381605231985e-07, "loss": 0.4338, "step": 13673 }, { "epoch": 0.856512003006624, "grad_norm": 0.8854894896867925, "learning_rate": 5.299835566655732e-07, "loss": 0.3961, "step": 13674 }, { "epoch": 0.8565746410059663, "grad_norm": 0.9416181672450481, "learning_rate": 5.295291367946542e-07, "loss": 0.3833, "step": 13675 }, { "epoch": 0.8566372790053086, "grad_norm": 0.8842015682348576, "learning_rate": 5.290749009291457e-07, "loss": 0.386, "step": 13676 }, { "epoch": 0.8566999170046509, "grad_norm": 0.9377192425206471, "learning_rate": 5.286208490877431e-07, "loss": 0.3893, "step": 13677 }, { "epoch": 0.8567625550039931, "grad_norm": 0.9057716056299234, "learning_rate": 5.281669812891371e-07, "loss": 0.3732, "step": 13678 }, { "epoch": 0.8568251930033355, "grad_norm": 0.795428373762524, "learning_rate": 5.277132975520083e-07, "loss": 0.3944, "step": 13679 }, { "epoch": 0.8568878310026777, "grad_norm": 0.8848640798943369, "learning_rate": 5.272597978950294e-07, "loss": 0.4023, "step": 13680 }, { "epoch": 0.8569504690020201, "grad_norm": 0.9168144832975446, "learning_rate": 5.268064823368685e-07, "loss": 0.4085, "step": 13681 }, { "epoch": 0.8570131070013624, "grad_norm": 0.8416359483221546, "learning_rate": 5.263533508961827e-07, "loss": 0.3644, "step": 13682 }, { "epoch": 0.8570757450007047, "grad_norm": 0.876962275011439, "learning_rate": 5.259004035916237e-07, "loss": 0.4226, "step": 13683 }, { "epoch": 0.857138383000047, "grad_norm": 0.8608845327254689, "learning_rate": 5.254476404418341e-07, "loss": 0.384, "step": 13684 }, { "epoch": 0.8572010209993893, "grad_norm": 0.9225313709498415, "learning_rate": 5.249950614654509e-07, "loss": 0.4209, "step": 13685 }, { "epoch": 0.8572636589987316, "grad_norm": 0.9197207801602236, "learning_rate": 5.245426666810999e-07, "loss": 0.35, "step": 13686 }, { "epoch": 0.8573262969980738, "grad_norm": 0.927452834965069, "learning_rate": 5.240904561074045e-07, "loss": 0.3838, "step": 13687 }, { "epoch": 0.8573889349974162, "grad_norm": 0.8499331035056412, "learning_rate": 5.236384297629765e-07, "loss": 0.4205, "step": 13688 }, { "epoch": 0.8574515729967584, "grad_norm": 0.9312937488891896, "learning_rate": 5.23186587666421e-07, "loss": 0.4034, "step": 13689 }, { "epoch": 0.8575142109961008, "grad_norm": 0.8973578347504347, "learning_rate": 5.227349298363376e-07, "loss": 0.4027, "step": 13690 }, { "epoch": 0.8575768489954431, "grad_norm": 0.8652605209401537, "learning_rate": 5.222834562913148e-07, "loss": 0.3747, "step": 13691 }, { "epoch": 0.8576394869947854, "grad_norm": 0.8878975542338722, "learning_rate": 5.218321670499365e-07, "loss": 0.4354, "step": 13692 }, { "epoch": 0.8577021249941277, "grad_norm": 0.885429715112323, "learning_rate": 5.213810621307775e-07, "loss": 0.4013, "step": 13693 }, { "epoch": 0.85776476299347, "grad_norm": 0.5967361298901782, "learning_rate": 5.209301415524054e-07, "loss": 0.4394, "step": 13694 }, { "epoch": 0.8578274009928123, "grad_norm": 0.8655168563700054, "learning_rate": 5.204794053333784e-07, "loss": 0.3636, "step": 13695 }, { "epoch": 0.8578900389921545, "grad_norm": 0.8573350522078227, "learning_rate": 5.200288534922521e-07, "loss": 0.3625, "step": 13696 }, { "epoch": 0.8579526769914969, "grad_norm": 0.5609471891112332, "learning_rate": 5.195784860475684e-07, "loss": 0.4355, "step": 13697 }, { "epoch": 0.8580153149908392, "grad_norm": 0.8737498468310649, "learning_rate": 5.191283030178668e-07, "loss": 0.4006, "step": 13698 }, { "epoch": 0.8580779529901815, "grad_norm": 0.8760419187210269, "learning_rate": 5.186783044216765e-07, "loss": 0.4099, "step": 13699 }, { "epoch": 0.8581405909895238, "grad_norm": 0.8482603888506393, "learning_rate": 5.18228490277518e-07, "loss": 0.3755, "step": 13700 }, { "epoch": 0.8582032289888661, "grad_norm": 0.8758611476429602, "learning_rate": 5.177788606039075e-07, "loss": 0.4156, "step": 13701 }, { "epoch": 0.8582658669882084, "grad_norm": 0.8442818931315692, "learning_rate": 5.173294154193509e-07, "loss": 0.3719, "step": 13702 }, { "epoch": 0.8583285049875506, "grad_norm": 0.8371389265607231, "learning_rate": 5.168801547423474e-07, "loss": 0.3914, "step": 13703 }, { "epoch": 0.858391142986893, "grad_norm": 0.6401703711597313, "learning_rate": 5.164310785913878e-07, "loss": 0.4729, "step": 13704 }, { "epoch": 0.8584537809862353, "grad_norm": 0.962125601695983, "learning_rate": 5.15982186984958e-07, "loss": 0.3672, "step": 13705 }, { "epoch": 0.8585164189855776, "grad_norm": 0.9330807502874855, "learning_rate": 5.155334799415329e-07, "loss": 0.412, "step": 13706 }, { "epoch": 0.8585790569849199, "grad_norm": 0.8895689937792356, "learning_rate": 5.150849574795835e-07, "loss": 0.3788, "step": 13707 }, { "epoch": 0.8586416949842622, "grad_norm": 0.8031167978894036, "learning_rate": 5.146366196175689e-07, "loss": 0.3811, "step": 13708 }, { "epoch": 0.8587043329836045, "grad_norm": 0.8429234782105429, "learning_rate": 5.141884663739444e-07, "loss": 0.3752, "step": 13709 }, { "epoch": 0.8587669709829469, "grad_norm": 0.9547653877167716, "learning_rate": 5.137404977671546e-07, "loss": 0.4172, "step": 13710 }, { "epoch": 0.8588296089822891, "grad_norm": 0.8805012436908901, "learning_rate": 5.13292713815639e-07, "loss": 0.3667, "step": 13711 }, { "epoch": 0.8588922469816314, "grad_norm": 0.8652319340230994, "learning_rate": 5.128451145378278e-07, "loss": 0.4055, "step": 13712 }, { "epoch": 0.8589548849809737, "grad_norm": 0.9168331188597493, "learning_rate": 5.123976999521435e-07, "loss": 0.3792, "step": 13713 }, { "epoch": 0.859017522980316, "grad_norm": 0.8629591358433758, "learning_rate": 5.119504700770039e-07, "loss": 0.4033, "step": 13714 }, { "epoch": 0.8590801609796583, "grad_norm": 0.8334082580813527, "learning_rate": 5.115034249308154e-07, "loss": 0.4087, "step": 13715 }, { "epoch": 0.8591427989790006, "grad_norm": 0.9675392749821111, "learning_rate": 5.110565645319793e-07, "loss": 0.4087, "step": 13716 }, { "epoch": 0.859205436978343, "grad_norm": 0.8665322634921095, "learning_rate": 5.106098888988886e-07, "loss": 0.3809, "step": 13717 }, { "epoch": 0.8592680749776852, "grad_norm": 0.8370386940611247, "learning_rate": 5.101633980499283e-07, "loss": 0.3604, "step": 13718 }, { "epoch": 0.8593307129770276, "grad_norm": 0.8146188066929537, "learning_rate": 5.097170920034755e-07, "loss": 0.3715, "step": 13719 }, { "epoch": 0.8593933509763698, "grad_norm": 0.8477896573222959, "learning_rate": 5.092709707779008e-07, "loss": 0.4042, "step": 13720 }, { "epoch": 0.8594559889757121, "grad_norm": 0.9148307549451704, "learning_rate": 5.088250343915663e-07, "loss": 0.3663, "step": 13721 }, { "epoch": 0.8595186269750544, "grad_norm": 0.8653460522491639, "learning_rate": 5.083792828628264e-07, "loss": 0.3783, "step": 13722 }, { "epoch": 0.8595812649743967, "grad_norm": 0.8758961438646781, "learning_rate": 5.079337162100284e-07, "loss": 0.3485, "step": 13723 }, { "epoch": 0.859643902973739, "grad_norm": 0.6048639263220748, "learning_rate": 5.074883344515136e-07, "loss": 0.4426, "step": 13724 }, { "epoch": 0.8597065409730813, "grad_norm": 0.891984820349102, "learning_rate": 5.070431376056129e-07, "loss": 0.3982, "step": 13725 }, { "epoch": 0.8597691789724237, "grad_norm": 0.8216029025570459, "learning_rate": 5.065981256906505e-07, "loss": 0.3517, "step": 13726 }, { "epoch": 0.8598318169717659, "grad_norm": 0.8552427114359128, "learning_rate": 5.061532987249434e-07, "loss": 0.3558, "step": 13727 }, { "epoch": 0.8598944549711082, "grad_norm": 0.9741076932024423, "learning_rate": 5.057086567268005e-07, "loss": 0.3916, "step": 13728 }, { "epoch": 0.8599570929704505, "grad_norm": 0.8417000829041485, "learning_rate": 5.052641997145235e-07, "loss": 0.3992, "step": 13729 }, { "epoch": 0.8600197309697928, "grad_norm": 0.8657350759366026, "learning_rate": 5.048199277064054e-07, "loss": 0.3922, "step": 13730 }, { "epoch": 0.8600823689691351, "grad_norm": 0.8902305114716077, "learning_rate": 5.043758407207344e-07, "loss": 0.4075, "step": 13731 }, { "epoch": 0.8601450069684774, "grad_norm": 0.8467937480082643, "learning_rate": 5.039319387757874e-07, "loss": 0.3729, "step": 13732 }, { "epoch": 0.8602076449678198, "grad_norm": 0.9774390608383293, "learning_rate": 5.034882218898368e-07, "loss": 0.4039, "step": 13733 }, { "epoch": 0.860270282967162, "grad_norm": 0.8766293980688911, "learning_rate": 5.030446900811459e-07, "loss": 0.3699, "step": 13734 }, { "epoch": 0.8603329209665044, "grad_norm": 0.8421236938517598, "learning_rate": 5.026013433679699e-07, "loss": 0.3861, "step": 13735 }, { "epoch": 0.8603955589658466, "grad_norm": 0.8306821179874758, "learning_rate": 5.021581817685578e-07, "loss": 0.3841, "step": 13736 }, { "epoch": 0.8604581969651889, "grad_norm": 0.8642238597084698, "learning_rate": 5.017152053011499e-07, "loss": 0.3447, "step": 13737 }, { "epoch": 0.8605208349645312, "grad_norm": 0.8943175983776553, "learning_rate": 5.012724139839786e-07, "loss": 0.414, "step": 13738 }, { "epoch": 0.8605834729638735, "grad_norm": 0.6156659528991284, "learning_rate": 5.008298078352686e-07, "loss": 0.4416, "step": 13739 }, { "epoch": 0.8606461109632159, "grad_norm": 0.7898717757786741, "learning_rate": 5.0038738687324e-07, "loss": 0.3505, "step": 13740 }, { "epoch": 0.8607087489625581, "grad_norm": 0.881015970486942, "learning_rate": 4.999451511161013e-07, "loss": 0.3789, "step": 13741 }, { "epoch": 0.8607713869619005, "grad_norm": 0.8408227839598164, "learning_rate": 4.995031005820561e-07, "loss": 0.3874, "step": 13742 }, { "epoch": 0.8608340249612427, "grad_norm": 0.8503344680206444, "learning_rate": 4.990612352892982e-07, "loss": 0.3678, "step": 13743 }, { "epoch": 0.8608966629605851, "grad_norm": 0.9000409448255654, "learning_rate": 4.986195552560158e-07, "loss": 0.3842, "step": 13744 }, { "epoch": 0.8609593009599273, "grad_norm": 0.8741976566138334, "learning_rate": 4.981780605003872e-07, "loss": 0.3492, "step": 13745 }, { "epoch": 0.8610219389592696, "grad_norm": 0.9111996215527114, "learning_rate": 4.977367510405861e-07, "loss": 0.3799, "step": 13746 }, { "epoch": 0.861084576958612, "grad_norm": 0.8117166103369041, "learning_rate": 4.972956268947754e-07, "loss": 0.3907, "step": 13747 }, { "epoch": 0.8611472149579542, "grad_norm": 0.9084631019085709, "learning_rate": 4.96854688081112e-07, "loss": 0.3875, "step": 13748 }, { "epoch": 0.8612098529572966, "grad_norm": 0.897190040670462, "learning_rate": 4.96413934617746e-07, "loss": 0.4214, "step": 13749 }, { "epoch": 0.8612724909566388, "grad_norm": 0.8388433841765985, "learning_rate": 4.959733665228173e-07, "loss": 0.3932, "step": 13750 }, { "epoch": 0.8613351289559812, "grad_norm": 0.8903657351273051, "learning_rate": 4.955329838144624e-07, "loss": 0.4135, "step": 13751 }, { "epoch": 0.8613977669553234, "grad_norm": 0.8654030176744897, "learning_rate": 4.950927865108051e-07, "loss": 0.35, "step": 13752 }, { "epoch": 0.8614604049546657, "grad_norm": 0.8891932726958427, "learning_rate": 4.946527746299656e-07, "loss": 0.3997, "step": 13753 }, { "epoch": 0.8615230429540081, "grad_norm": 0.7780057065769187, "learning_rate": 4.942129481900537e-07, "loss": 0.3559, "step": 13754 }, { "epoch": 0.8615856809533503, "grad_norm": 0.8746983935484882, "learning_rate": 4.937733072091733e-07, "loss": 0.4408, "step": 13755 }, { "epoch": 0.8616483189526927, "grad_norm": 0.8569465589399727, "learning_rate": 4.933338517054193e-07, "loss": 0.3951, "step": 13756 }, { "epoch": 0.8617109569520349, "grad_norm": 0.8864322420174312, "learning_rate": 4.928945816968811e-07, "loss": 0.398, "step": 13757 }, { "epoch": 0.8617735949513773, "grad_norm": 0.9001170112304511, "learning_rate": 4.924554972016382e-07, "loss": 0.3856, "step": 13758 }, { "epoch": 0.8618362329507195, "grad_norm": 0.8890179277310638, "learning_rate": 4.920165982377645e-07, "loss": 0.3828, "step": 13759 }, { "epoch": 0.8618988709500619, "grad_norm": 0.9138836567815534, "learning_rate": 4.915778848233227e-07, "loss": 0.3586, "step": 13760 }, { "epoch": 0.8619615089494042, "grad_norm": 0.8557558411493272, "learning_rate": 4.911393569763728e-07, "loss": 0.3834, "step": 13761 }, { "epoch": 0.8620241469487464, "grad_norm": 0.8242477169403968, "learning_rate": 4.907010147149643e-07, "loss": 0.3769, "step": 13762 }, { "epoch": 0.8620867849480888, "grad_norm": 0.8766060725910297, "learning_rate": 4.902628580571389e-07, "loss": 0.3498, "step": 13763 }, { "epoch": 0.862149422947431, "grad_norm": 0.8590489190228072, "learning_rate": 4.89824887020931e-07, "loss": 0.3719, "step": 13764 }, { "epoch": 0.8622120609467734, "grad_norm": 0.9710226342372779, "learning_rate": 4.893871016243673e-07, "loss": 0.3893, "step": 13765 }, { "epoch": 0.8622746989461156, "grad_norm": 0.9356705850790152, "learning_rate": 4.889495018854684e-07, "loss": 0.375, "step": 13766 }, { "epoch": 0.862337336945458, "grad_norm": 0.8351262949349904, "learning_rate": 4.885120878222455e-07, "loss": 0.3814, "step": 13767 }, { "epoch": 0.8623999749448003, "grad_norm": 0.8044872445154431, "learning_rate": 4.880748594527024e-07, "loss": 0.3308, "step": 13768 }, { "epoch": 0.8624626129441426, "grad_norm": 0.8589294315259303, "learning_rate": 4.876378167948348e-07, "loss": 0.3761, "step": 13769 }, { "epoch": 0.8625252509434849, "grad_norm": 0.8133521086896282, "learning_rate": 4.872009598666327e-07, "loss": 0.3758, "step": 13770 }, { "epoch": 0.8625878889428271, "grad_norm": 0.8298187635633326, "learning_rate": 4.867642886860769e-07, "loss": 0.3772, "step": 13771 }, { "epoch": 0.8626505269421695, "grad_norm": 0.8457973281226696, "learning_rate": 4.863278032711405e-07, "loss": 0.3571, "step": 13772 }, { "epoch": 0.8627131649415117, "grad_norm": 0.865475005926299, "learning_rate": 4.858915036397899e-07, "loss": 0.3851, "step": 13773 }, { "epoch": 0.8627758029408541, "grad_norm": 0.8383037165284962, "learning_rate": 4.854553898099817e-07, "loss": 0.3608, "step": 13774 }, { "epoch": 0.8628384409401963, "grad_norm": 0.8727173896872842, "learning_rate": 4.850194617996684e-07, "loss": 0.3779, "step": 13775 }, { "epoch": 0.8629010789395387, "grad_norm": 0.8682526222668105, "learning_rate": 4.845837196267922e-07, "loss": 0.3959, "step": 13776 }, { "epoch": 0.862963716938881, "grad_norm": 0.8367348947381307, "learning_rate": 4.841481633092881e-07, "loss": 0.3461, "step": 13777 }, { "epoch": 0.8630263549382233, "grad_norm": 0.8503502086892161, "learning_rate": 4.837127928650831e-07, "loss": 0.3956, "step": 13778 }, { "epoch": 0.8630889929375656, "grad_norm": 0.8891158897321481, "learning_rate": 4.832776083120983e-07, "loss": 0.3994, "step": 13779 }, { "epoch": 0.8631516309369078, "grad_norm": 0.8895065971027935, "learning_rate": 4.828426096682459e-07, "loss": 0.3668, "step": 13780 }, { "epoch": 0.8632142689362502, "grad_norm": 0.8071677192741932, "learning_rate": 4.824077969514301e-07, "loss": 0.3943, "step": 13781 }, { "epoch": 0.8632769069355924, "grad_norm": 0.8543507102303598, "learning_rate": 4.819731701795466e-07, "loss": 0.3573, "step": 13782 }, { "epoch": 0.8633395449349348, "grad_norm": 0.8951818612899446, "learning_rate": 4.815387293704871e-07, "loss": 0.3932, "step": 13783 }, { "epoch": 0.8634021829342771, "grad_norm": 0.9442320673352149, "learning_rate": 4.811044745421323e-07, "loss": 0.3739, "step": 13784 }, { "epoch": 0.8634648209336194, "grad_norm": 0.867871468874045, "learning_rate": 4.806704057123562e-07, "loss": 0.3431, "step": 13785 }, { "epoch": 0.8635274589329617, "grad_norm": 0.9183000673225998, "learning_rate": 4.802365228990247e-07, "loss": 0.3896, "step": 13786 }, { "epoch": 0.8635900969323039, "grad_norm": 0.8546154030113424, "learning_rate": 4.798028261199961e-07, "loss": 0.3561, "step": 13787 }, { "epoch": 0.8636527349316463, "grad_norm": 0.8769919651670247, "learning_rate": 4.793693153931228e-07, "loss": 0.3653, "step": 13788 }, { "epoch": 0.8637153729309885, "grad_norm": 0.8145536228873638, "learning_rate": 4.789359907362479e-07, "loss": 0.3743, "step": 13789 }, { "epoch": 0.8637780109303309, "grad_norm": 0.7839745253760134, "learning_rate": 4.785028521672063e-07, "loss": 0.3494, "step": 13790 }, { "epoch": 0.8638406489296732, "grad_norm": 0.8601428138122504, "learning_rate": 4.780698997038264e-07, "loss": 0.3903, "step": 13791 }, { "epoch": 0.8639032869290155, "grad_norm": 0.8653211815365994, "learning_rate": 4.77637133363929e-07, "loss": 0.3692, "step": 13792 }, { "epoch": 0.8639659249283578, "grad_norm": 0.8896887147478905, "learning_rate": 4.772045531653269e-07, "loss": 0.3545, "step": 13793 }, { "epoch": 0.8640285629277001, "grad_norm": 0.5863800216636046, "learning_rate": 4.767721591258245e-07, "loss": 0.4449, "step": 13794 }, { "epoch": 0.8640912009270424, "grad_norm": 0.9042727896399118, "learning_rate": 4.7633995126321996e-07, "loss": 0.3728, "step": 13795 }, { "epoch": 0.8641538389263846, "grad_norm": 0.7484176358255392, "learning_rate": 4.759079295953017e-07, "loss": 0.343, "step": 13796 }, { "epoch": 0.864216476925727, "grad_norm": 0.8354783191604294, "learning_rate": 4.754760941398534e-07, "loss": 0.3768, "step": 13797 }, { "epoch": 0.8642791149250693, "grad_norm": 0.8649937292027429, "learning_rate": 4.7504444491464907e-07, "loss": 0.3918, "step": 13798 }, { "epoch": 0.8643417529244116, "grad_norm": 0.8754148712436157, "learning_rate": 4.746129819374551e-07, "loss": 0.3723, "step": 13799 }, { "epoch": 0.8644043909237539, "grad_norm": 0.7646263675282471, "learning_rate": 4.7418170522603e-07, "loss": 0.3643, "step": 13800 }, { "epoch": 0.8644670289230962, "grad_norm": 0.9299180038050359, "learning_rate": 4.737506147981269e-07, "loss": 0.3915, "step": 13801 }, { "epoch": 0.8645296669224385, "grad_norm": 0.8579285173198999, "learning_rate": 4.733197106714887e-07, "loss": 0.403, "step": 13802 }, { "epoch": 0.8645923049217809, "grad_norm": 0.9095600742703492, "learning_rate": 4.7288899286385136e-07, "loss": 0.376, "step": 13803 }, { "epoch": 0.8646549429211231, "grad_norm": 0.9248934653752223, "learning_rate": 4.7245846139294283e-07, "loss": 0.3796, "step": 13804 }, { "epoch": 0.8647175809204654, "grad_norm": 0.8953194611183068, "learning_rate": 4.72028116276485e-07, "loss": 0.3854, "step": 13805 }, { "epoch": 0.8647802189198077, "grad_norm": 0.8991398728706873, "learning_rate": 4.715979575321894e-07, "loss": 0.4381, "step": 13806 }, { "epoch": 0.86484285691915, "grad_norm": 0.9120380156888438, "learning_rate": 4.7116798517776283e-07, "loss": 0.3803, "step": 13807 }, { "epoch": 0.8649054949184923, "grad_norm": 0.8855047240022172, "learning_rate": 4.707381992309018e-07, "loss": 0.3996, "step": 13808 }, { "epoch": 0.8649681329178346, "grad_norm": 0.8033432435832717, "learning_rate": 4.703085997092982e-07, "loss": 0.3337, "step": 13809 }, { "epoch": 0.865030770917177, "grad_norm": 0.8740501804905639, "learning_rate": 4.698791866306335e-07, "loss": 0.3395, "step": 13810 }, { "epoch": 0.8650934089165192, "grad_norm": 0.8142502506400007, "learning_rate": 4.694499600125824e-07, "loss": 0.3744, "step": 13811 }, { "epoch": 0.8651560469158615, "grad_norm": 0.8500607463718322, "learning_rate": 4.690209198728113e-07, "loss": 0.3868, "step": 13812 }, { "epoch": 0.8652186849152038, "grad_norm": 0.8080796702290612, "learning_rate": 4.6859206622898055e-07, "loss": 0.3911, "step": 13813 }, { "epoch": 0.8652813229145461, "grad_norm": 0.9049681994754131, "learning_rate": 4.681633990987416e-07, "loss": 0.4093, "step": 13814 }, { "epoch": 0.8653439609138884, "grad_norm": 0.8439248722408916, "learning_rate": 4.6773491849973707e-07, "loss": 0.4183, "step": 13815 }, { "epoch": 0.8654065989132307, "grad_norm": 0.9286631619802013, "learning_rate": 4.673066244496055e-07, "loss": 0.4125, "step": 13816 }, { "epoch": 0.865469236912573, "grad_norm": 0.8027528298815414, "learning_rate": 4.668785169659734e-07, "loss": 0.3412, "step": 13817 }, { "epoch": 0.8655318749119153, "grad_norm": 0.8744693270986267, "learning_rate": 4.664505960664639e-07, "loss": 0.3953, "step": 13818 }, { "epoch": 0.8655945129112577, "grad_norm": 0.8913720091561943, "learning_rate": 4.660228617686896e-07, "loss": 0.3887, "step": 13819 }, { "epoch": 0.8656571509105999, "grad_norm": 0.872721748154102, "learning_rate": 4.6559531409025584e-07, "loss": 0.4169, "step": 13820 }, { "epoch": 0.8657197889099422, "grad_norm": 0.8019018608019984, "learning_rate": 4.6516795304876074e-07, "loss": 0.3709, "step": 13821 }, { "epoch": 0.8657824269092845, "grad_norm": 0.6227030743189155, "learning_rate": 4.6474077866179413e-07, "loss": 0.4517, "step": 13822 }, { "epoch": 0.8658450649086268, "grad_norm": 0.8932789221618552, "learning_rate": 4.643137909469386e-07, "loss": 0.4017, "step": 13823 }, { "epoch": 0.8659077029079691, "grad_norm": 0.8888013966685622, "learning_rate": 4.6388698992176904e-07, "loss": 0.4196, "step": 13824 }, { "epoch": 0.8659703409073114, "grad_norm": 0.5789002103597685, "learning_rate": 4.634603756038536e-07, "loss": 0.4393, "step": 13825 }, { "epoch": 0.8660329789066538, "grad_norm": 0.8497385132082178, "learning_rate": 4.6303394801074987e-07, "loss": 0.3731, "step": 13826 }, { "epoch": 0.866095616905996, "grad_norm": 0.9445724191633232, "learning_rate": 4.626077071600121e-07, "loss": 0.436, "step": 13827 }, { "epoch": 0.8661582549053384, "grad_norm": 0.8979856956270541, "learning_rate": 4.6218165306918304e-07, "loss": 0.3693, "step": 13828 }, { "epoch": 0.8662208929046806, "grad_norm": 0.8496615766506856, "learning_rate": 4.617557857557997e-07, "loss": 0.3939, "step": 13829 }, { "epoch": 0.8662835309040229, "grad_norm": 0.8383373980050857, "learning_rate": 4.6133010523739033e-07, "loss": 0.3898, "step": 13830 }, { "epoch": 0.8663461689033652, "grad_norm": 0.911310906065585, "learning_rate": 4.609046115314764e-07, "loss": 0.4036, "step": 13831 }, { "epoch": 0.8664088069027075, "grad_norm": 0.9280652719587484, "learning_rate": 4.604793046555711e-07, "loss": 0.3895, "step": 13832 }, { "epoch": 0.8664714449020499, "grad_norm": 0.8037297671914061, "learning_rate": 4.6005418462717944e-07, "loss": 0.3606, "step": 13833 }, { "epoch": 0.8665340829013921, "grad_norm": 0.9142486345439699, "learning_rate": 4.596292514638001e-07, "loss": 0.4045, "step": 13834 }, { "epoch": 0.8665967209007345, "grad_norm": 0.845092642888046, "learning_rate": 4.5920450518292414e-07, "loss": 0.3852, "step": 13835 }, { "epoch": 0.8666593589000767, "grad_norm": 0.7999766935654136, "learning_rate": 4.587799458020342e-07, "loss": 0.3565, "step": 13836 }, { "epoch": 0.866721996899419, "grad_norm": 0.8919019898054643, "learning_rate": 4.5835557333860414e-07, "loss": 0.3682, "step": 13837 }, { "epoch": 0.8667846348987613, "grad_norm": 0.8759318615884889, "learning_rate": 4.579313878101016e-07, "loss": 0.3841, "step": 13838 }, { "epoch": 0.8668472728981036, "grad_norm": 0.8089519065227289, "learning_rate": 4.5750738923398595e-07, "loss": 0.3825, "step": 13839 }, { "epoch": 0.866909910897446, "grad_norm": 0.9463892895456552, "learning_rate": 4.5708357762770993e-07, "loss": 0.3938, "step": 13840 }, { "epoch": 0.8669725488967882, "grad_norm": 0.8656091335346049, "learning_rate": 4.566599530087157e-07, "loss": 0.3759, "step": 13841 }, { "epoch": 0.8670351868961306, "grad_norm": 0.8542633373212092, "learning_rate": 4.5623651539444204e-07, "loss": 0.4094, "step": 13842 }, { "epoch": 0.8670978248954728, "grad_norm": 0.8538734474958843, "learning_rate": 4.5581326480231616e-07, "loss": 0.3818, "step": 13843 }, { "epoch": 0.8671604628948152, "grad_norm": 0.8655348528370287, "learning_rate": 4.553902012497602e-07, "loss": 0.377, "step": 13844 }, { "epoch": 0.8672231008941574, "grad_norm": 0.8491655335605421, "learning_rate": 4.549673247541875e-07, "loss": 0.3738, "step": 13845 }, { "epoch": 0.8672857388934997, "grad_norm": 0.8221681285006079, "learning_rate": 4.54544635333003e-07, "loss": 0.3795, "step": 13846 }, { "epoch": 0.867348376892842, "grad_norm": 0.845634941347183, "learning_rate": 4.541221330036055e-07, "loss": 0.3463, "step": 13847 }, { "epoch": 0.8674110148921843, "grad_norm": 0.8379125806179762, "learning_rate": 4.53699817783384e-07, "loss": 0.3443, "step": 13848 }, { "epoch": 0.8674736528915267, "grad_norm": 0.8925230543440063, "learning_rate": 4.532776896897223e-07, "loss": 0.3836, "step": 13849 }, { "epoch": 0.8675362908908689, "grad_norm": 0.8611948146945935, "learning_rate": 4.5285574873999374e-07, "loss": 0.3686, "step": 13850 }, { "epoch": 0.8675989288902113, "grad_norm": 0.8398999987053327, "learning_rate": 4.524339949515677e-07, "loss": 0.3573, "step": 13851 }, { "epoch": 0.8676615668895535, "grad_norm": 0.8619327730379104, "learning_rate": 4.5201242834180146e-07, "loss": 0.4405, "step": 13852 }, { "epoch": 0.8677242048888959, "grad_norm": 0.8402619212590428, "learning_rate": 4.515910489280484e-07, "loss": 0.3582, "step": 13853 }, { "epoch": 0.8677868428882382, "grad_norm": 0.8972680099893688, "learning_rate": 4.511698567276523e-07, "loss": 0.4085, "step": 13854 }, { "epoch": 0.8678494808875804, "grad_norm": 0.8751854350606874, "learning_rate": 4.507488517579495e-07, "loss": 0.3866, "step": 13855 }, { "epoch": 0.8679121188869228, "grad_norm": 0.8594588059574562, "learning_rate": 4.5032803403626757e-07, "loss": 0.3626, "step": 13856 }, { "epoch": 0.867974756886265, "grad_norm": 0.9104522090164895, "learning_rate": 4.4990740357992893e-07, "loss": 0.4172, "step": 13857 }, { "epoch": 0.8680373948856074, "grad_norm": 0.8935177577542089, "learning_rate": 4.4948696040624576e-07, "loss": 0.3577, "step": 13858 }, { "epoch": 0.8681000328849496, "grad_norm": 0.909545967000386, "learning_rate": 4.490667045325231e-07, "loss": 0.3553, "step": 13859 }, { "epoch": 0.868162670884292, "grad_norm": 0.9007230130304591, "learning_rate": 4.4864663597606053e-07, "loss": 0.3598, "step": 13860 }, { "epoch": 0.8682253088836343, "grad_norm": 0.8299254824121292, "learning_rate": 4.482267547541463e-07, "loss": 0.3595, "step": 13861 }, { "epoch": 0.8682879468829765, "grad_norm": 0.8865958016141636, "learning_rate": 4.4780706088406455e-07, "loss": 0.3749, "step": 13862 }, { "epoch": 0.8683505848823189, "grad_norm": 0.858016752664445, "learning_rate": 4.4738755438308855e-07, "loss": 0.3628, "step": 13863 }, { "epoch": 0.8684132228816611, "grad_norm": 0.9210708565953851, "learning_rate": 4.469682352684868e-07, "loss": 0.3592, "step": 13864 }, { "epoch": 0.8684758608810035, "grad_norm": 0.8564218084407555, "learning_rate": 4.46549103557517e-07, "loss": 0.4614, "step": 13865 }, { "epoch": 0.8685384988803457, "grad_norm": 0.9124081251975897, "learning_rate": 4.461301592674311e-07, "loss": 0.3749, "step": 13866 }, { "epoch": 0.8686011368796881, "grad_norm": 0.9789294657262992, "learning_rate": 4.457114024154724e-07, "loss": 0.4337, "step": 13867 }, { "epoch": 0.8686637748790303, "grad_norm": 0.8111321819538473, "learning_rate": 4.452928330188788e-07, "loss": 0.3164, "step": 13868 }, { "epoch": 0.8687264128783727, "grad_norm": 0.8889581242826818, "learning_rate": 4.4487445109487715e-07, "loss": 0.3899, "step": 13869 }, { "epoch": 0.868789050877715, "grad_norm": 0.8095997911433007, "learning_rate": 4.4445625666068805e-07, "loss": 0.3255, "step": 13870 }, { "epoch": 0.8688516888770572, "grad_norm": 0.8331175746052548, "learning_rate": 4.4403824973352603e-07, "loss": 0.3491, "step": 13871 }, { "epoch": 0.8689143268763996, "grad_norm": 0.7515904347767565, "learning_rate": 4.4362043033059465e-07, "loss": 0.3566, "step": 13872 }, { "epoch": 0.8689769648757418, "grad_norm": 0.873253877375975, "learning_rate": 4.4320279846909234e-07, "loss": 0.3951, "step": 13873 }, { "epoch": 0.8690396028750842, "grad_norm": 0.8247141535269152, "learning_rate": 4.4278535416620914e-07, "loss": 0.3599, "step": 13874 }, { "epoch": 0.8691022408744264, "grad_norm": 0.8640043254086202, "learning_rate": 4.423680974391259e-07, "loss": 0.3965, "step": 13875 }, { "epoch": 0.8691648788737688, "grad_norm": 0.6846731395503498, "learning_rate": 4.4195102830501713e-07, "loss": 0.4325, "step": 13876 }, { "epoch": 0.8692275168731111, "grad_norm": 0.8089529326371696, "learning_rate": 4.415341467810508e-07, "loss": 0.3688, "step": 13877 }, { "epoch": 0.8692901548724534, "grad_norm": 0.813189553611974, "learning_rate": 4.4111745288438534e-07, "loss": 0.3679, "step": 13878 }, { "epoch": 0.8693527928717957, "grad_norm": 0.9349181066654012, "learning_rate": 4.4070094663217045e-07, "loss": 0.3805, "step": 13879 }, { "epoch": 0.8694154308711379, "grad_norm": 0.8259697898310369, "learning_rate": 4.402846280415518e-07, "loss": 0.389, "step": 13880 }, { "epoch": 0.8694780688704803, "grad_norm": 0.8420366197476745, "learning_rate": 4.3986849712966463e-07, "loss": 0.3756, "step": 13881 }, { "epoch": 0.8695407068698225, "grad_norm": 0.8740170115271229, "learning_rate": 4.394525539136363e-07, "loss": 0.3967, "step": 13882 }, { "epoch": 0.8696033448691649, "grad_norm": 0.8515608594632003, "learning_rate": 4.39036798410587e-07, "loss": 0.4007, "step": 13883 }, { "epoch": 0.8696659828685072, "grad_norm": 0.6120517274976706, "learning_rate": 4.3862123063762973e-07, "loss": 0.4328, "step": 13884 }, { "epoch": 0.8697286208678495, "grad_norm": 0.8516000315963618, "learning_rate": 4.382058506118686e-07, "loss": 0.3951, "step": 13885 }, { "epoch": 0.8697912588671918, "grad_norm": 0.8344926599356235, "learning_rate": 4.3779065835040213e-07, "loss": 0.3749, "step": 13886 }, { "epoch": 0.8698538968665341, "grad_norm": 0.8931245736541353, "learning_rate": 4.3737565387031886e-07, "loss": 0.3549, "step": 13887 }, { "epoch": 0.8699165348658764, "grad_norm": 0.8572852790489308, "learning_rate": 4.3696083718869965e-07, "loss": 0.4382, "step": 13888 }, { "epoch": 0.8699791728652186, "grad_norm": 0.8628940905655298, "learning_rate": 4.3654620832262075e-07, "loss": 0.3887, "step": 13889 }, { "epoch": 0.870041810864561, "grad_norm": 0.8198091114303546, "learning_rate": 4.361317672891463e-07, "loss": 0.3891, "step": 13890 }, { "epoch": 0.8701044488639033, "grad_norm": 0.8987995747482761, "learning_rate": 4.3571751410533545e-07, "loss": 0.3645, "step": 13891 }, { "epoch": 0.8701670868632456, "grad_norm": 0.8776714206541099, "learning_rate": 4.3530344878823907e-07, "loss": 0.3856, "step": 13892 }, { "epoch": 0.8702297248625879, "grad_norm": 0.6165117133523341, "learning_rate": 4.3488957135489895e-07, "loss": 0.4526, "step": 13893 }, { "epoch": 0.8702923628619302, "grad_norm": 0.8162826875658168, "learning_rate": 4.344758818223527e-07, "loss": 0.3955, "step": 13894 }, { "epoch": 0.8703550008612725, "grad_norm": 0.8069022734048876, "learning_rate": 4.3406238020762605e-07, "loss": 0.3914, "step": 13895 }, { "epoch": 0.8704176388606147, "grad_norm": 0.8503107281342543, "learning_rate": 4.336490665277393e-07, "loss": 0.3914, "step": 13896 }, { "epoch": 0.8704802768599571, "grad_norm": 0.789785898744827, "learning_rate": 4.33235940799705e-07, "loss": 0.3957, "step": 13897 }, { "epoch": 0.8705429148592994, "grad_norm": 0.8418693686418479, "learning_rate": 4.328230030405256e-07, "loss": 0.3318, "step": 13898 }, { "epoch": 0.8706055528586417, "grad_norm": 0.8637060931678925, "learning_rate": 4.324102532672003e-07, "loss": 0.3583, "step": 13899 }, { "epoch": 0.870668190857984, "grad_norm": 0.8458314315259697, "learning_rate": 4.319976914967167e-07, "loss": 0.3549, "step": 13900 }, { "epoch": 0.8707308288573263, "grad_norm": 0.8012902978533278, "learning_rate": 4.3158531774605605e-07, "loss": 0.3537, "step": 13901 }, { "epoch": 0.8707934668566686, "grad_norm": 0.9298577359103105, "learning_rate": 4.311731320321905e-07, "loss": 0.3846, "step": 13902 }, { "epoch": 0.870856104856011, "grad_norm": 0.7978926315588077, "learning_rate": 4.30761134372088e-07, "loss": 0.3401, "step": 13903 }, { "epoch": 0.8709187428553532, "grad_norm": 0.8898285396282537, "learning_rate": 4.303493247827051e-07, "loss": 0.4018, "step": 13904 }, { "epoch": 0.8709813808546955, "grad_norm": 0.9074306753158243, "learning_rate": 4.299377032809926e-07, "loss": 0.4164, "step": 13905 }, { "epoch": 0.8710440188540378, "grad_norm": 0.8896132463654455, "learning_rate": 4.2952626988389204e-07, "loss": 0.4352, "step": 13906 }, { "epoch": 0.8711066568533801, "grad_norm": 0.8256147879690885, "learning_rate": 4.2911502460833755e-07, "loss": 0.3801, "step": 13907 }, { "epoch": 0.8711692948527224, "grad_norm": 0.9259281708258635, "learning_rate": 4.2870396747125844e-07, "loss": 0.3746, "step": 13908 }, { "epoch": 0.8712319328520647, "grad_norm": 0.9193399712728869, "learning_rate": 4.282930984895722e-07, "loss": 0.3942, "step": 13909 }, { "epoch": 0.871294570851407, "grad_norm": 0.8896615538660791, "learning_rate": 4.2788241768019046e-07, "loss": 0.3384, "step": 13910 }, { "epoch": 0.8713572088507493, "grad_norm": 0.8328492388533009, "learning_rate": 4.274719250600162e-07, "loss": 0.3765, "step": 13911 }, { "epoch": 0.8714198468500917, "grad_norm": 0.8768002184253157, "learning_rate": 4.2706162064594713e-07, "loss": 0.4126, "step": 13912 }, { "epoch": 0.8714824848494339, "grad_norm": 0.8303829435246688, "learning_rate": 4.266515044548708e-07, "loss": 0.3718, "step": 13913 }, { "epoch": 0.8715451228487762, "grad_norm": 0.841158533688779, "learning_rate": 4.2624157650366706e-07, "loss": 0.3246, "step": 13914 }, { "epoch": 0.8716077608481185, "grad_norm": 0.8720825359106462, "learning_rate": 4.2583183680920905e-07, "loss": 0.3631, "step": 13915 }, { "epoch": 0.8716703988474608, "grad_norm": 0.8267263270454842, "learning_rate": 4.2542228538836104e-07, "loss": 0.4091, "step": 13916 }, { "epoch": 0.8717330368468031, "grad_norm": 0.8575235482032028, "learning_rate": 4.250129222579813e-07, "loss": 0.4016, "step": 13917 }, { "epoch": 0.8717956748461454, "grad_norm": 0.8792651210231471, "learning_rate": 4.2460374743491904e-07, "loss": 0.4145, "step": 13918 }, { "epoch": 0.8718583128454878, "grad_norm": 0.9213290077109294, "learning_rate": 4.2419476093601476e-07, "loss": 0.4103, "step": 13919 }, { "epoch": 0.87192095084483, "grad_norm": 0.8658992957756924, "learning_rate": 4.2378596277810435e-07, "loss": 0.3769, "step": 13920 }, { "epoch": 0.8719835888441723, "grad_norm": 0.8859925058293484, "learning_rate": 4.233773529780133e-07, "loss": 0.4031, "step": 13921 }, { "epoch": 0.8720462268435146, "grad_norm": 0.9508902777898273, "learning_rate": 4.229689315525598e-07, "loss": 0.4, "step": 13922 }, { "epoch": 0.8721088648428569, "grad_norm": 0.8334072039366847, "learning_rate": 4.225606985185543e-07, "loss": 0.3431, "step": 13923 }, { "epoch": 0.8721715028421992, "grad_norm": 0.7707005557231189, "learning_rate": 4.2215265389280057e-07, "loss": 0.3245, "step": 13924 }, { "epoch": 0.8722341408415415, "grad_norm": 0.8966814545196528, "learning_rate": 4.217447976920919e-07, "loss": 0.4051, "step": 13925 }, { "epoch": 0.8722967788408839, "grad_norm": 0.8181578126195963, "learning_rate": 4.213371299332181e-07, "loss": 0.3352, "step": 13926 }, { "epoch": 0.8723594168402261, "grad_norm": 0.8368885987231633, "learning_rate": 4.2092965063295797e-07, "loss": 0.3658, "step": 13927 }, { "epoch": 0.8724220548395685, "grad_norm": 0.9056519817043908, "learning_rate": 4.2052235980808266e-07, "loss": 0.4032, "step": 13928 }, { "epoch": 0.8724846928389107, "grad_norm": 0.8858576369605243, "learning_rate": 4.201152574753581e-07, "loss": 0.3492, "step": 13929 }, { "epoch": 0.872547330838253, "grad_norm": 0.8793644472199093, "learning_rate": 4.1970834365153924e-07, "loss": 0.3984, "step": 13930 }, { "epoch": 0.8726099688375953, "grad_norm": 0.6188823401754527, "learning_rate": 4.193016183533749e-07, "loss": 0.4522, "step": 13931 }, { "epoch": 0.8726726068369376, "grad_norm": 0.901229290419351, "learning_rate": 4.1889508159760617e-07, "loss": 0.38, "step": 13932 }, { "epoch": 0.87273524483628, "grad_norm": 0.8923963908740498, "learning_rate": 4.184887334009663e-07, "loss": 0.4148, "step": 13933 }, { "epoch": 0.8727978828356222, "grad_norm": 0.8703847536349492, "learning_rate": 4.180825737801808e-07, "loss": 0.3247, "step": 13934 }, { "epoch": 0.8728605208349646, "grad_norm": 0.9079613107406762, "learning_rate": 4.1767660275196576e-07, "loss": 0.3752, "step": 13935 }, { "epoch": 0.8729231588343068, "grad_norm": 0.5892297969567482, "learning_rate": 4.1727082033303333e-07, "loss": 0.4544, "step": 13936 }, { "epoch": 0.8729857968336492, "grad_norm": 0.8596083742034788, "learning_rate": 4.1686522654008355e-07, "loss": 0.3854, "step": 13937 }, { "epoch": 0.8730484348329914, "grad_norm": 0.8437883271207367, "learning_rate": 4.16459821389813e-07, "loss": 0.3872, "step": 13938 }, { "epoch": 0.8731110728323337, "grad_norm": 0.8736948253583842, "learning_rate": 4.160546048989067e-07, "loss": 0.3923, "step": 13939 }, { "epoch": 0.873173710831676, "grad_norm": 0.8440985654996446, "learning_rate": 4.156495770840435e-07, "loss": 0.3671, "step": 13940 }, { "epoch": 0.8732363488310183, "grad_norm": 0.5662930023977545, "learning_rate": 4.1524473796189457e-07, "loss": 0.4396, "step": 13941 }, { "epoch": 0.8732989868303607, "grad_norm": 0.9139948407817744, "learning_rate": 4.148400875491232e-07, "loss": 0.3872, "step": 13942 }, { "epoch": 0.8733616248297029, "grad_norm": 0.9214811995581579, "learning_rate": 4.1443562586238495e-07, "loss": 0.433, "step": 13943 }, { "epoch": 0.8734242628290453, "grad_norm": 0.8490096892344934, "learning_rate": 4.1403135291832707e-07, "loss": 0.4061, "step": 13944 }, { "epoch": 0.8734869008283875, "grad_norm": 0.8834315296705941, "learning_rate": 4.1362726873358905e-07, "loss": 0.3755, "step": 13945 }, { "epoch": 0.8735495388277298, "grad_norm": 0.8131168681414157, "learning_rate": 4.132233733248053e-07, "loss": 0.3788, "step": 13946 }, { "epoch": 0.8736121768270722, "grad_norm": 0.9172422705734891, "learning_rate": 4.128196667085987e-07, "loss": 0.3745, "step": 13947 }, { "epoch": 0.8736748148264144, "grad_norm": 0.8943969574150749, "learning_rate": 4.1241614890158657e-07, "loss": 0.4238, "step": 13948 }, { "epoch": 0.8737374528257568, "grad_norm": 0.8657438070541444, "learning_rate": 4.120128199203766e-07, "loss": 0.3999, "step": 13949 }, { "epoch": 0.873800090825099, "grad_norm": 0.8262011971237845, "learning_rate": 4.116096797815705e-07, "loss": 0.3501, "step": 13950 }, { "epoch": 0.8738627288244414, "grad_norm": 0.8866630062525608, "learning_rate": 4.112067285017618e-07, "loss": 0.3735, "step": 13951 }, { "epoch": 0.8739253668237836, "grad_norm": 0.8592342752404211, "learning_rate": 4.1080396609753493e-07, "loss": 0.3969, "step": 13952 }, { "epoch": 0.873988004823126, "grad_norm": 0.8168305209139017, "learning_rate": 4.1040139258546996e-07, "loss": 0.3954, "step": 13953 }, { "epoch": 0.8740506428224682, "grad_norm": 0.8626309616723855, "learning_rate": 4.099990079821342e-07, "loss": 0.4085, "step": 13954 }, { "epoch": 0.8741132808218105, "grad_norm": 0.937247266378566, "learning_rate": 4.0959681230409223e-07, "loss": 0.399, "step": 13955 }, { "epoch": 0.8741759188211529, "grad_norm": 0.9022972887479958, "learning_rate": 4.0919480556789736e-07, "loss": 0.3645, "step": 13956 }, { "epoch": 0.8742385568204951, "grad_norm": 0.8243971427612125, "learning_rate": 4.087929877900965e-07, "loss": 0.3581, "step": 13957 }, { "epoch": 0.8743011948198375, "grad_norm": 0.8953102055632833, "learning_rate": 4.0839135898722846e-07, "loss": 0.3724, "step": 13958 }, { "epoch": 0.8743638328191797, "grad_norm": 0.8953504528062478, "learning_rate": 4.079899191758241e-07, "loss": 0.3875, "step": 13959 }, { "epoch": 0.8744264708185221, "grad_norm": 0.9024800668432699, "learning_rate": 4.075886683724073e-07, "loss": 0.3656, "step": 13960 }, { "epoch": 0.8744891088178643, "grad_norm": 0.8411056267042303, "learning_rate": 4.071876065934921e-07, "loss": 0.3814, "step": 13961 }, { "epoch": 0.8745517468172067, "grad_norm": 0.86152720361257, "learning_rate": 4.067867338555875e-07, "loss": 0.3852, "step": 13962 }, { "epoch": 0.874614384816549, "grad_norm": 0.9116374427866112, "learning_rate": 4.0638605017519483e-07, "loss": 0.4089, "step": 13963 }, { "epoch": 0.8746770228158912, "grad_norm": 0.8519244711766205, "learning_rate": 4.059855555688047e-07, "loss": 0.3867, "step": 13964 }, { "epoch": 0.8747396608152336, "grad_norm": 0.8725372632363664, "learning_rate": 4.055852500529017e-07, "loss": 0.4286, "step": 13965 }, { "epoch": 0.8748022988145758, "grad_norm": 0.9299708417124968, "learning_rate": 4.0518513364396274e-07, "loss": 0.3893, "step": 13966 }, { "epoch": 0.8748649368139182, "grad_norm": 0.6203309127695815, "learning_rate": 4.047852063584562e-07, "loss": 0.4269, "step": 13967 }, { "epoch": 0.8749275748132604, "grad_norm": 0.8747718339064404, "learning_rate": 4.043854682128434e-07, "loss": 0.3758, "step": 13968 }, { "epoch": 0.8749902128126028, "grad_norm": 0.7909883702144181, "learning_rate": 4.0398591922357787e-07, "loss": 0.3591, "step": 13969 }, { "epoch": 0.8750528508119451, "grad_norm": 0.93488494835849, "learning_rate": 4.0358655940710425e-07, "loss": 0.3895, "step": 13970 }, { "epoch": 0.8751154888112873, "grad_norm": 0.8831675389634042, "learning_rate": 4.0318738877986096e-07, "loss": 0.3676, "step": 13971 }, { "epoch": 0.8751781268106297, "grad_norm": 0.7978160658273911, "learning_rate": 4.0278840735827886e-07, "loss": 0.3663, "step": 13972 }, { "epoch": 0.8752407648099719, "grad_norm": 0.8464567616086645, "learning_rate": 4.0238961515877916e-07, "loss": 0.3776, "step": 13973 }, { "epoch": 0.8753034028093143, "grad_norm": 0.8442449807009321, "learning_rate": 4.0199101219777603e-07, "loss": 0.383, "step": 13974 }, { "epoch": 0.8753660408086565, "grad_norm": 0.5858962125536575, "learning_rate": 4.015925984916769e-07, "loss": 0.4509, "step": 13975 }, { "epoch": 0.8754286788079989, "grad_norm": 0.8782590172054058, "learning_rate": 4.011943740568791e-07, "loss": 0.3896, "step": 13976 }, { "epoch": 0.8754913168073412, "grad_norm": 0.8770972773781855, "learning_rate": 4.007963389097752e-07, "loss": 0.3611, "step": 13977 }, { "epoch": 0.8755539548066835, "grad_norm": 0.663842646635357, "learning_rate": 4.0039849306674593e-07, "loss": 0.4435, "step": 13978 }, { "epoch": 0.8756165928060258, "grad_norm": 0.9707687812934754, "learning_rate": 4.0000083654416986e-07, "loss": 0.4263, "step": 13979 }, { "epoch": 0.875679230805368, "grad_norm": 0.6626638693457534, "learning_rate": 3.9960336935841336e-07, "loss": 0.4721, "step": 13980 }, { "epoch": 0.8757418688047104, "grad_norm": 0.6531425008480715, "learning_rate": 3.9920609152583445e-07, "loss": 0.4748, "step": 13981 }, { "epoch": 0.8758045068040526, "grad_norm": 0.8503861230772385, "learning_rate": 3.988090030627884e-07, "loss": 0.3708, "step": 13982 }, { "epoch": 0.875867144803395, "grad_norm": 0.8601808784570271, "learning_rate": 3.984121039856176e-07, "loss": 0.3891, "step": 13983 }, { "epoch": 0.8759297828027373, "grad_norm": 0.8105002372155308, "learning_rate": 3.9801539431065847e-07, "loss": 0.3675, "step": 13984 }, { "epoch": 0.8759924208020796, "grad_norm": 0.8796916883826137, "learning_rate": 3.9761887405423973e-07, "loss": 0.3716, "step": 13985 }, { "epoch": 0.8760550588014219, "grad_norm": 0.5920412668180547, "learning_rate": 3.972225432326826e-07, "loss": 0.4698, "step": 13986 }, { "epoch": 0.8761176968007642, "grad_norm": 0.8514595727323399, "learning_rate": 3.9682640186229914e-07, "loss": 0.3809, "step": 13987 }, { "epoch": 0.8761803348001065, "grad_norm": 0.8191068016837589, "learning_rate": 3.9643044995939626e-07, "loss": 0.3614, "step": 13988 }, { "epoch": 0.8762429727994487, "grad_norm": 0.8568856188588921, "learning_rate": 3.960346875402704e-07, "loss": 0.4032, "step": 13989 }, { "epoch": 0.8763056107987911, "grad_norm": 0.9177572504083826, "learning_rate": 3.956391146212102e-07, "loss": 0.3785, "step": 13990 }, { "epoch": 0.8763682487981334, "grad_norm": 0.8393966374061629, "learning_rate": 3.952437312184998e-07, "loss": 0.4063, "step": 13991 }, { "epoch": 0.8764308867974757, "grad_norm": 0.8957679061940997, "learning_rate": 3.9484853734841234e-07, "loss": 0.3832, "step": 13992 }, { "epoch": 0.876493524796818, "grad_norm": 0.8384232757620583, "learning_rate": 3.944535330272131e-07, "loss": 0.3372, "step": 13993 }, { "epoch": 0.8765561627961603, "grad_norm": 0.8957113675791107, "learning_rate": 3.9405871827116196e-07, "loss": 0.3593, "step": 13994 }, { "epoch": 0.8766188007955026, "grad_norm": 0.8818781757911657, "learning_rate": 3.9366409309650857e-07, "loss": 0.393, "step": 13995 }, { "epoch": 0.876681438794845, "grad_norm": 0.9274266921043032, "learning_rate": 3.932696575194955e-07, "loss": 0.3942, "step": 13996 }, { "epoch": 0.8767440767941872, "grad_norm": 0.7862644920822456, "learning_rate": 3.9287541155635875e-07, "loss": 0.3743, "step": 13997 }, { "epoch": 0.8768067147935295, "grad_norm": 0.9231606640591249, "learning_rate": 3.9248135522332587e-07, "loss": 0.3907, "step": 13998 }, { "epoch": 0.8768693527928718, "grad_norm": 0.8808602474700556, "learning_rate": 3.920874885366144e-07, "loss": 0.4056, "step": 13999 }, { "epoch": 0.8769319907922141, "grad_norm": 0.8659827858195399, "learning_rate": 3.9169381151243803e-07, "loss": 0.3927, "step": 14000 }, { "epoch": 0.8769946287915564, "grad_norm": 0.8687907357353045, "learning_rate": 3.9130032416699935e-07, "loss": 0.3865, "step": 14001 }, { "epoch": 0.8770572667908987, "grad_norm": 0.9505375962799076, "learning_rate": 3.909070265164955e-07, "loss": 0.4055, "step": 14002 }, { "epoch": 0.877119904790241, "grad_norm": 0.9151438818580124, "learning_rate": 3.905139185771134e-07, "loss": 0.362, "step": 14003 }, { "epoch": 0.8771825427895833, "grad_norm": 0.9225728521590839, "learning_rate": 3.9012100036503295e-07, "loss": 0.3852, "step": 14004 }, { "epoch": 0.8772451807889255, "grad_norm": 0.8325311895509329, "learning_rate": 3.89728271896429e-07, "loss": 0.3549, "step": 14005 }, { "epoch": 0.8773078187882679, "grad_norm": 0.8761844960970022, "learning_rate": 3.8933573318746475e-07, "loss": 0.3826, "step": 14006 }, { "epoch": 0.8773704567876102, "grad_norm": 0.8572863028429251, "learning_rate": 3.889433842542978e-07, "loss": 0.3786, "step": 14007 }, { "epoch": 0.8774330947869525, "grad_norm": 0.8942567198737035, "learning_rate": 3.885512251130763e-07, "loss": 0.3745, "step": 14008 }, { "epoch": 0.8774957327862948, "grad_norm": 0.8602726977422481, "learning_rate": 3.8815925577994296e-07, "loss": 0.3854, "step": 14009 }, { "epoch": 0.8775583707856371, "grad_norm": 0.8397052955880704, "learning_rate": 3.877674762710304e-07, "loss": 0.3603, "step": 14010 }, { "epoch": 0.8776210087849794, "grad_norm": 0.8740749811325975, "learning_rate": 3.873758866024652e-07, "loss": 0.3942, "step": 14011 }, { "epoch": 0.8776836467843218, "grad_norm": 0.9524843033579785, "learning_rate": 3.869844867903638e-07, "loss": 0.4259, "step": 14012 }, { "epoch": 0.877746284783664, "grad_norm": 0.5845218943909671, "learning_rate": 3.865932768508368e-07, "loss": 0.4271, "step": 14013 }, { "epoch": 0.8778089227830063, "grad_norm": 0.7981546253108208, "learning_rate": 3.862022567999879e-07, "loss": 0.3729, "step": 14014 }, { "epoch": 0.8778715607823486, "grad_norm": 0.8360023681106464, "learning_rate": 3.8581142665391036e-07, "loss": 0.3699, "step": 14015 }, { "epoch": 0.8779341987816909, "grad_norm": 0.7828346663051295, "learning_rate": 3.8542078642869075e-07, "loss": 0.3666, "step": 14016 }, { "epoch": 0.8779968367810332, "grad_norm": 0.8655193539966062, "learning_rate": 3.850303361404067e-07, "loss": 0.3689, "step": 14017 }, { "epoch": 0.8780594747803755, "grad_norm": 0.6518644144966935, "learning_rate": 3.846400758051322e-07, "loss": 0.4732, "step": 14018 }, { "epoch": 0.8781221127797179, "grad_norm": 0.856924094572329, "learning_rate": 3.8425000543892864e-07, "loss": 0.3882, "step": 14019 }, { "epoch": 0.8781847507790601, "grad_norm": 0.5984445183652974, "learning_rate": 3.8386012505785164e-07, "loss": 0.4558, "step": 14020 }, { "epoch": 0.8782473887784025, "grad_norm": 0.8654805500881209, "learning_rate": 3.8347043467794777e-07, "loss": 0.414, "step": 14021 }, { "epoch": 0.8783100267777447, "grad_norm": 0.9750271354594201, "learning_rate": 3.8308093431525863e-07, "loss": 0.3662, "step": 14022 }, { "epoch": 0.878372664777087, "grad_norm": 0.8297080128228211, "learning_rate": 3.826916239858147e-07, "loss": 0.3386, "step": 14023 }, { "epoch": 0.8784353027764293, "grad_norm": 0.8450030830167623, "learning_rate": 3.82302503705641e-07, "loss": 0.3613, "step": 14024 }, { "epoch": 0.8784979407757716, "grad_norm": 0.8972798699061757, "learning_rate": 3.819135734907531e-07, "loss": 0.4001, "step": 14025 }, { "epoch": 0.878560578775114, "grad_norm": 0.8900471339767992, "learning_rate": 3.8152483335715975e-07, "loss": 0.3809, "step": 14026 }, { "epoch": 0.8786232167744562, "grad_norm": 0.5899951275435747, "learning_rate": 3.8113628332086095e-07, "loss": 0.4238, "step": 14027 }, { "epoch": 0.8786858547737986, "grad_norm": 0.8358224708022404, "learning_rate": 3.807479233978506e-07, "loss": 0.4234, "step": 14028 }, { "epoch": 0.8787484927731408, "grad_norm": 0.901965451966369, "learning_rate": 3.8035975360411317e-07, "loss": 0.3912, "step": 14029 }, { "epoch": 0.8788111307724831, "grad_norm": 0.8673685878857976, "learning_rate": 3.799717739556247e-07, "loss": 0.4124, "step": 14030 }, { "epoch": 0.8788737687718254, "grad_norm": 0.8866022785116802, "learning_rate": 3.795839844683563e-07, "loss": 0.3278, "step": 14031 }, { "epoch": 0.8789364067711677, "grad_norm": 0.6467003243293497, "learning_rate": 3.791963851582692e-07, "loss": 0.4458, "step": 14032 }, { "epoch": 0.87899904477051, "grad_norm": 0.9262926256422755, "learning_rate": 3.788089760413161e-07, "loss": 0.3915, "step": 14033 }, { "epoch": 0.8790616827698523, "grad_norm": 0.9373236466417613, "learning_rate": 3.784217571334436e-07, "loss": 0.3673, "step": 14034 }, { "epoch": 0.8791243207691947, "grad_norm": 0.8316813353975687, "learning_rate": 3.780347284505892e-07, "loss": 0.3865, "step": 14035 }, { "epoch": 0.8791869587685369, "grad_norm": 0.9229254150070696, "learning_rate": 3.7764789000868216e-07, "loss": 0.4069, "step": 14036 }, { "epoch": 0.8792495967678793, "grad_norm": 0.8920173027233362, "learning_rate": 3.772612418236471e-07, "loss": 0.4138, "step": 14037 }, { "epoch": 0.8793122347672215, "grad_norm": 0.9383583481790605, "learning_rate": 3.7687478391139734e-07, "loss": 0.3878, "step": 14038 }, { "epoch": 0.8793748727665638, "grad_norm": 0.916573895500343, "learning_rate": 3.7648851628783844e-07, "loss": 0.3824, "step": 14039 }, { "epoch": 0.8794375107659061, "grad_norm": 0.8637816172082884, "learning_rate": 3.7610243896887164e-07, "loss": 0.3804, "step": 14040 }, { "epoch": 0.8795001487652484, "grad_norm": 0.9185018656814263, "learning_rate": 3.757165519703865e-07, "loss": 0.3667, "step": 14041 }, { "epoch": 0.8795627867645908, "grad_norm": 0.8643257345552875, "learning_rate": 3.753308553082663e-07, "loss": 0.3916, "step": 14042 }, { "epoch": 0.879625424763933, "grad_norm": 0.8711543083968465, "learning_rate": 3.7494534899838674e-07, "loss": 0.3676, "step": 14043 }, { "epoch": 0.8796880627632754, "grad_norm": 0.8672265883413147, "learning_rate": 3.7456003305661457e-07, "loss": 0.3756, "step": 14044 }, { "epoch": 0.8797507007626176, "grad_norm": 0.8869002116073331, "learning_rate": 3.7417490749880937e-07, "loss": 0.318, "step": 14045 }, { "epoch": 0.87981333876196, "grad_norm": 0.8499055453855936, "learning_rate": 3.737899723408245e-07, "loss": 0.3388, "step": 14046 }, { "epoch": 0.8798759767613022, "grad_norm": 0.8611924323968503, "learning_rate": 3.7340522759850226e-07, "loss": 0.3465, "step": 14047 }, { "epoch": 0.8799386147606445, "grad_norm": 0.8693711241509673, "learning_rate": 3.730206732876801e-07, "loss": 0.3687, "step": 14048 }, { "epoch": 0.8800012527599869, "grad_norm": 0.8702292196325648, "learning_rate": 3.7263630942418636e-07, "loss": 0.363, "step": 14049 }, { "epoch": 0.8800638907593291, "grad_norm": 0.8816062489964316, "learning_rate": 3.722521360238407e-07, "loss": 0.3589, "step": 14050 }, { "epoch": 0.8801265287586715, "grad_norm": 0.8841859784624069, "learning_rate": 3.718681531024559e-07, "loss": 0.391, "step": 14051 }, { "epoch": 0.8801891667580137, "grad_norm": 0.9129761228032405, "learning_rate": 3.7148436067583724e-07, "loss": 0.4, "step": 14052 }, { "epoch": 0.8802518047573561, "grad_norm": 0.8482373584734912, "learning_rate": 3.711007587597809e-07, "loss": 0.3888, "step": 14053 }, { "epoch": 0.8803144427566983, "grad_norm": 0.8743831320818731, "learning_rate": 3.7071734737007593e-07, "loss": 0.3456, "step": 14054 }, { "epoch": 0.8803770807560406, "grad_norm": 0.8982528806505776, "learning_rate": 3.7033412652250524e-07, "loss": 0.3714, "step": 14055 }, { "epoch": 0.880439718755383, "grad_norm": 0.8920676945708844, "learning_rate": 3.699510962328401e-07, "loss": 0.3867, "step": 14056 }, { "epoch": 0.8805023567547252, "grad_norm": 0.8446523518959315, "learning_rate": 3.6956825651684846e-07, "loss": 0.3922, "step": 14057 }, { "epoch": 0.8805649947540676, "grad_norm": 0.88424753801567, "learning_rate": 3.691856073902866e-07, "loss": 0.3913, "step": 14058 }, { "epoch": 0.8806276327534098, "grad_norm": 0.7617755219784613, "learning_rate": 3.6880314886890424e-07, "loss": 0.3428, "step": 14059 }, { "epoch": 0.8806902707527522, "grad_norm": 0.9219559189729135, "learning_rate": 3.684208809684442e-07, "loss": 0.4107, "step": 14060 }, { "epoch": 0.8807529087520944, "grad_norm": 0.8110942778954472, "learning_rate": 3.680388037046406e-07, "loss": 0.3415, "step": 14061 }, { "epoch": 0.8808155467514368, "grad_norm": 0.8496581343189424, "learning_rate": 3.6765691709321924e-07, "loss": 0.3417, "step": 14062 }, { "epoch": 0.8808781847507791, "grad_norm": 0.8743632628999576, "learning_rate": 3.672752211498981e-07, "loss": 0.3785, "step": 14063 }, { "epoch": 0.8809408227501213, "grad_norm": 0.853056364552332, "learning_rate": 3.6689371589039013e-07, "loss": 0.4037, "step": 14064 }, { "epoch": 0.8810034607494637, "grad_norm": 0.8873356277306939, "learning_rate": 3.6651240133039557e-07, "loss": 0.377, "step": 14065 }, { "epoch": 0.8810660987488059, "grad_norm": 0.8226704308556567, "learning_rate": 3.661312774856113e-07, "loss": 0.4159, "step": 14066 }, { "epoch": 0.8811287367481483, "grad_norm": 0.8807792092548994, "learning_rate": 3.657503443717236e-07, "loss": 0.4255, "step": 14067 }, { "epoch": 0.8811913747474905, "grad_norm": 0.7892046076307677, "learning_rate": 3.653696020044123e-07, "loss": 0.3687, "step": 14068 }, { "epoch": 0.8812540127468329, "grad_norm": 0.9489188309208688, "learning_rate": 3.6498905039934807e-07, "loss": 0.3895, "step": 14069 }, { "epoch": 0.8813166507461752, "grad_norm": 0.8624131752685421, "learning_rate": 3.646086895721951e-07, "loss": 0.385, "step": 14070 }, { "epoch": 0.8813792887455175, "grad_norm": 0.837574044055321, "learning_rate": 3.642285195386086e-07, "loss": 0.3928, "step": 14071 }, { "epoch": 0.8814419267448598, "grad_norm": 0.9171089083657346, "learning_rate": 3.6384854031423553e-07, "loss": 0.3851, "step": 14072 }, { "epoch": 0.881504564744202, "grad_norm": 0.872999612305069, "learning_rate": 3.6346875191471785e-07, "loss": 0.405, "step": 14073 }, { "epoch": 0.8815672027435444, "grad_norm": 0.8560025676106903, "learning_rate": 3.6308915435568746e-07, "loss": 0.3659, "step": 14074 }, { "epoch": 0.8816298407428866, "grad_norm": 0.8144684885418181, "learning_rate": 3.62709747652768e-07, "loss": 0.3539, "step": 14075 }, { "epoch": 0.881692478742229, "grad_norm": 0.8101483322922356, "learning_rate": 3.6233053182157586e-07, "loss": 0.3535, "step": 14076 }, { "epoch": 0.8817551167415713, "grad_norm": 0.7938247172348455, "learning_rate": 3.619515068777202e-07, "loss": 0.3712, "step": 14077 }, { "epoch": 0.8818177547409136, "grad_norm": 0.8890379648984326, "learning_rate": 3.615726728368013e-07, "loss": 0.3845, "step": 14078 }, { "epoch": 0.8818803927402559, "grad_norm": 0.8904373137281539, "learning_rate": 3.6119402971441175e-07, "loss": 0.3725, "step": 14079 }, { "epoch": 0.8819430307395982, "grad_norm": 0.8743556084524559, "learning_rate": 3.608155775261374e-07, "loss": 0.3717, "step": 14080 }, { "epoch": 0.8820056687389405, "grad_norm": 0.823137991681904, "learning_rate": 3.6043731628755405e-07, "loss": 0.3501, "step": 14081 }, { "epoch": 0.8820683067382827, "grad_norm": 0.5823822220330094, "learning_rate": 3.600592460142316e-07, "loss": 0.4493, "step": 14082 }, { "epoch": 0.8821309447376251, "grad_norm": 0.5993733911583382, "learning_rate": 3.5968136672173304e-07, "loss": 0.4598, "step": 14083 }, { "epoch": 0.8821935827369674, "grad_norm": 0.933344157969982, "learning_rate": 3.5930367842561044e-07, "loss": 0.4586, "step": 14084 }, { "epoch": 0.8822562207363097, "grad_norm": 0.9120339359347482, "learning_rate": 3.589261811414096e-07, "loss": 0.3899, "step": 14085 }, { "epoch": 0.882318858735652, "grad_norm": 0.8700332734489158, "learning_rate": 3.5854887488466883e-07, "loss": 0.3932, "step": 14086 }, { "epoch": 0.8823814967349943, "grad_norm": 0.8431711204302517, "learning_rate": 3.5817175967091776e-07, "loss": 0.3283, "step": 14087 }, { "epoch": 0.8824441347343366, "grad_norm": 0.8150247395336991, "learning_rate": 3.5779483551567905e-07, "loss": 0.3672, "step": 14088 }, { "epoch": 0.8825067727336788, "grad_norm": 0.9323135742470443, "learning_rate": 3.574181024344653e-07, "loss": 0.3989, "step": 14089 }, { "epoch": 0.8825694107330212, "grad_norm": 0.874515238365937, "learning_rate": 3.5704156044278517e-07, "loss": 0.4025, "step": 14090 }, { "epoch": 0.8826320487323635, "grad_norm": 0.8299726112452122, "learning_rate": 3.5666520955613517e-07, "loss": 0.3342, "step": 14091 }, { "epoch": 0.8826946867317058, "grad_norm": 0.8338126150329953, "learning_rate": 3.562890497900079e-07, "loss": 0.3617, "step": 14092 }, { "epoch": 0.8827573247310481, "grad_norm": 0.8397092308966333, "learning_rate": 3.559130811598849e-07, "loss": 0.334, "step": 14093 }, { "epoch": 0.8828199627303904, "grad_norm": 0.8753685149400108, "learning_rate": 3.5553730368124205e-07, "loss": 0.3712, "step": 14094 }, { "epoch": 0.8828826007297327, "grad_norm": 0.6040394088585379, "learning_rate": 3.5516171736954543e-07, "loss": 0.4189, "step": 14095 }, { "epoch": 0.882945238729075, "grad_norm": 0.5581709959139709, "learning_rate": 3.5478632224025477e-07, "loss": 0.4686, "step": 14096 }, { "epoch": 0.8830078767284173, "grad_norm": 0.8594430768100829, "learning_rate": 3.544111183088217e-07, "loss": 0.3867, "step": 14097 }, { "epoch": 0.8830705147277595, "grad_norm": 0.8503566481043175, "learning_rate": 3.540361055906877e-07, "loss": 0.3758, "step": 14098 }, { "epoch": 0.8831331527271019, "grad_norm": 0.9364975446432597, "learning_rate": 3.5366128410129154e-07, "loss": 0.3707, "step": 14099 }, { "epoch": 0.8831957907264442, "grad_norm": 0.8837198544144171, "learning_rate": 3.5328665385605754e-07, "loss": 0.3587, "step": 14100 }, { "epoch": 0.8832584287257865, "grad_norm": 0.8146900635395964, "learning_rate": 3.529122148704089e-07, "loss": 0.3584, "step": 14101 }, { "epoch": 0.8833210667251288, "grad_norm": 0.6328927858042217, "learning_rate": 3.5253796715975564e-07, "loss": 0.4678, "step": 14102 }, { "epoch": 0.8833837047244711, "grad_norm": 0.8810356297421909, "learning_rate": 3.5216391073950253e-07, "loss": 0.4014, "step": 14103 }, { "epoch": 0.8834463427238134, "grad_norm": 0.8742729587890181, "learning_rate": 3.517900456250456e-07, "loss": 0.3792, "step": 14104 }, { "epoch": 0.8835089807231558, "grad_norm": 0.880999493755953, "learning_rate": 3.5141637183177314e-07, "loss": 0.3933, "step": 14105 }, { "epoch": 0.883571618722498, "grad_norm": 0.8723673162840717, "learning_rate": 3.510428893750645e-07, "loss": 0.3953, "step": 14106 }, { "epoch": 0.8836342567218403, "grad_norm": 0.9081683204522124, "learning_rate": 3.5066959827029454e-07, "loss": 0.3773, "step": 14107 }, { "epoch": 0.8836968947211826, "grad_norm": 0.7595608421678699, "learning_rate": 3.5029649853282667e-07, "loss": 0.322, "step": 14108 }, { "epoch": 0.8837595327205249, "grad_norm": 0.8228244424157228, "learning_rate": 3.4992359017801737e-07, "loss": 0.3903, "step": 14109 }, { "epoch": 0.8838221707198672, "grad_norm": 0.9054445471221643, "learning_rate": 3.495508732212166e-07, "loss": 0.3966, "step": 14110 }, { "epoch": 0.8838848087192095, "grad_norm": 0.9392096654073443, "learning_rate": 3.491783476777655e-07, "loss": 0.4065, "step": 14111 }, { "epoch": 0.8839474467185519, "grad_norm": 0.8461453209344013, "learning_rate": 3.488060135629967e-07, "loss": 0.3303, "step": 14112 }, { "epoch": 0.8840100847178941, "grad_norm": 0.8273006003327861, "learning_rate": 3.4843387089223524e-07, "loss": 0.3678, "step": 14113 }, { "epoch": 0.8840727227172364, "grad_norm": 0.8851312032299019, "learning_rate": 3.480619196807994e-07, "loss": 0.3743, "step": 14114 }, { "epoch": 0.8841353607165787, "grad_norm": 0.8621146924212044, "learning_rate": 3.47690159943998e-07, "loss": 0.3658, "step": 14115 }, { "epoch": 0.884197998715921, "grad_norm": 0.8457316012437817, "learning_rate": 3.4731859169713335e-07, "loss": 0.3595, "step": 14116 }, { "epoch": 0.8842606367152633, "grad_norm": 0.8643759071263697, "learning_rate": 3.469472149554992e-07, "loss": 0.35, "step": 14117 }, { "epoch": 0.8843232747146056, "grad_norm": 0.5634378036707722, "learning_rate": 3.465760297343812e-07, "loss": 0.4421, "step": 14118 }, { "epoch": 0.884385912713948, "grad_norm": 0.845646075873081, "learning_rate": 3.462050360490571e-07, "loss": 0.3602, "step": 14119 }, { "epoch": 0.8844485507132902, "grad_norm": 0.9337562296802016, "learning_rate": 3.45834233914798e-07, "loss": 0.3711, "step": 14120 }, { "epoch": 0.8845111887126326, "grad_norm": 0.8477467591396314, "learning_rate": 3.454636233468656e-07, "loss": 0.3568, "step": 14121 }, { "epoch": 0.8845738267119748, "grad_norm": 0.8218167601932878, "learning_rate": 3.4509320436051486e-07, "loss": 0.3867, "step": 14122 }, { "epoch": 0.8846364647113171, "grad_norm": 0.8968271728038693, "learning_rate": 3.4472297697099143e-07, "loss": 0.3817, "step": 14123 }, { "epoch": 0.8846991027106594, "grad_norm": 0.8996502077854004, "learning_rate": 3.443529411935331e-07, "loss": 0.379, "step": 14124 }, { "epoch": 0.8847617407100017, "grad_norm": 0.866806582896012, "learning_rate": 3.4398309704337327e-07, "loss": 0.4121, "step": 14125 }, { "epoch": 0.884824378709344, "grad_norm": 0.9321864450297336, "learning_rate": 3.4361344453573367e-07, "loss": 0.4131, "step": 14126 }, { "epoch": 0.8848870167086863, "grad_norm": 0.8043033437245863, "learning_rate": 3.432439836858281e-07, "loss": 0.3566, "step": 14127 }, { "epoch": 0.8849496547080287, "grad_norm": 0.9006637224311673, "learning_rate": 3.42874714508864e-07, "loss": 0.4239, "step": 14128 }, { "epoch": 0.8850122927073709, "grad_norm": 0.9133230454471212, "learning_rate": 3.4250563702004246e-07, "loss": 0.3617, "step": 14129 }, { "epoch": 0.8850749307067133, "grad_norm": 0.8473170805492286, "learning_rate": 3.4213675123455246e-07, "loss": 0.3653, "step": 14130 }, { "epoch": 0.8851375687060555, "grad_norm": 0.8647850798279353, "learning_rate": 3.417680571675791e-07, "loss": 0.4197, "step": 14131 }, { "epoch": 0.8852002067053978, "grad_norm": 0.8217464676602, "learning_rate": 3.413995548342958e-07, "loss": 0.3714, "step": 14132 }, { "epoch": 0.8852628447047401, "grad_norm": 0.8809711647381091, "learning_rate": 3.4103124424987254e-07, "loss": 0.3536, "step": 14133 }, { "epoch": 0.8853254827040824, "grad_norm": 0.8827847853349143, "learning_rate": 3.406631254294679e-07, "loss": 0.3796, "step": 14134 }, { "epoch": 0.8853881207034248, "grad_norm": 0.7894367457211906, "learning_rate": 3.402951983882341e-07, "loss": 0.3659, "step": 14135 }, { "epoch": 0.885450758702767, "grad_norm": 0.8251006057737569, "learning_rate": 3.3992746314131406e-07, "loss": 0.3677, "step": 14136 }, { "epoch": 0.8855133967021094, "grad_norm": 0.5724255168927116, "learning_rate": 3.395599197038446e-07, "loss": 0.4286, "step": 14137 }, { "epoch": 0.8855760347014516, "grad_norm": 0.9335370595375754, "learning_rate": 3.3919256809095414e-07, "loss": 0.3886, "step": 14138 }, { "epoch": 0.8856386727007939, "grad_norm": 0.8807299720930952, "learning_rate": 3.3882540831776334e-07, "loss": 0.3919, "step": 14139 }, { "epoch": 0.8857013107001362, "grad_norm": 0.9078783482700185, "learning_rate": 3.384584403993829e-07, "loss": 0.3826, "step": 14140 }, { "epoch": 0.8857639486994785, "grad_norm": 0.8497822749481374, "learning_rate": 3.380916643509186e-07, "loss": 0.3727, "step": 14141 }, { "epoch": 0.8858265866988209, "grad_norm": 0.8025360607613096, "learning_rate": 3.3772508018746654e-07, "loss": 0.3719, "step": 14142 }, { "epoch": 0.8858892246981631, "grad_norm": 0.9759372599950686, "learning_rate": 3.3735868792411586e-07, "loss": 0.4315, "step": 14143 }, { "epoch": 0.8859518626975055, "grad_norm": 0.8452484573987421, "learning_rate": 3.369924875759473e-07, "loss": 0.357, "step": 14144 }, { "epoch": 0.8860145006968477, "grad_norm": 0.8358125422950737, "learning_rate": 3.3662647915803316e-07, "loss": 0.3745, "step": 14145 }, { "epoch": 0.8860771386961901, "grad_norm": 0.8891326805850921, "learning_rate": 3.362606626854381e-07, "loss": 0.378, "step": 14146 }, { "epoch": 0.8861397766955323, "grad_norm": 0.8649282077806922, "learning_rate": 3.3589503817322013e-07, "loss": 0.3667, "step": 14147 }, { "epoch": 0.8862024146948746, "grad_norm": 0.8805046215430001, "learning_rate": 3.3552960563642825e-07, "loss": 0.3544, "step": 14148 }, { "epoch": 0.886265052694217, "grad_norm": 0.8927620056005346, "learning_rate": 3.3516436509010376e-07, "loss": 0.3854, "step": 14149 }, { "epoch": 0.8863276906935592, "grad_norm": 0.8670087814445315, "learning_rate": 3.347993165492791e-07, "loss": 0.3733, "step": 14150 }, { "epoch": 0.8863903286929016, "grad_norm": 0.909730038480672, "learning_rate": 3.3443446002898117e-07, "loss": 0.4095, "step": 14151 }, { "epoch": 0.8864529666922438, "grad_norm": 0.8881914015687823, "learning_rate": 3.340697955442268e-07, "loss": 0.3866, "step": 14152 }, { "epoch": 0.8865156046915862, "grad_norm": 0.7965095461813618, "learning_rate": 3.337053231100257e-07, "loss": 0.3276, "step": 14153 }, { "epoch": 0.8865782426909284, "grad_norm": 0.8731046701698273, "learning_rate": 3.333410427413797e-07, "loss": 0.428, "step": 14154 }, { "epoch": 0.8866408806902708, "grad_norm": 0.9898300739279613, "learning_rate": 3.329769544532818e-07, "loss": 0.4239, "step": 14155 }, { "epoch": 0.8867035186896131, "grad_norm": 0.8678281193520022, "learning_rate": 3.32613058260719e-07, "loss": 0.4046, "step": 14156 }, { "epoch": 0.8867661566889553, "grad_norm": 0.8873112320200859, "learning_rate": 3.3224935417866975e-07, "loss": 0.373, "step": 14157 }, { "epoch": 0.8868287946882977, "grad_norm": 0.8729816944464479, "learning_rate": 3.318858422221022e-07, "loss": 0.4055, "step": 14158 }, { "epoch": 0.8868914326876399, "grad_norm": 0.929275924279114, "learning_rate": 3.315225224059809e-07, "loss": 0.3928, "step": 14159 }, { "epoch": 0.8869540706869823, "grad_norm": 0.9248730487723744, "learning_rate": 3.311593947452585e-07, "loss": 0.4144, "step": 14160 }, { "epoch": 0.8870167086863245, "grad_norm": 0.9093388623389898, "learning_rate": 3.3079645925488237e-07, "loss": 0.4028, "step": 14161 }, { "epoch": 0.8870793466856669, "grad_norm": 0.9788734470169854, "learning_rate": 3.304337159497911e-07, "loss": 0.3845, "step": 14162 }, { "epoch": 0.8871419846850092, "grad_norm": 0.8994315783820973, "learning_rate": 3.3007116484491397e-07, "loss": 0.3602, "step": 14163 }, { "epoch": 0.8872046226843514, "grad_norm": 0.8217694884211492, "learning_rate": 3.297088059551751e-07, "loss": 0.371, "step": 14164 }, { "epoch": 0.8872672606836938, "grad_norm": 0.8727464593393304, "learning_rate": 3.293466392954875e-07, "loss": 0.4185, "step": 14165 }, { "epoch": 0.887329898683036, "grad_norm": 0.9468517012075914, "learning_rate": 3.2898466488075984e-07, "loss": 0.3507, "step": 14166 }, { "epoch": 0.8873925366823784, "grad_norm": 0.8874440950162152, "learning_rate": 3.286228827258897e-07, "loss": 0.3992, "step": 14167 }, { "epoch": 0.8874551746817206, "grad_norm": 0.891262905015541, "learning_rate": 3.282612928457696e-07, "loss": 0.3682, "step": 14168 }, { "epoch": 0.887517812681063, "grad_norm": 0.8642864066813948, "learning_rate": 3.27899895255282e-07, "loss": 0.3781, "step": 14169 }, { "epoch": 0.8875804506804053, "grad_norm": 0.8184322939655596, "learning_rate": 3.275386899693017e-07, "loss": 0.3478, "step": 14170 }, { "epoch": 0.8876430886797476, "grad_norm": 0.8135944283011708, "learning_rate": 3.271776770026963e-07, "loss": 0.358, "step": 14171 }, { "epoch": 0.8877057266790899, "grad_norm": 0.9189499078004679, "learning_rate": 3.26816856370325e-07, "loss": 0.3751, "step": 14172 }, { "epoch": 0.8877683646784321, "grad_norm": 0.8749130649599405, "learning_rate": 3.2645622808703867e-07, "loss": 0.3933, "step": 14173 }, { "epoch": 0.8878310026777745, "grad_norm": 0.7569357064970345, "learning_rate": 3.2609579216768163e-07, "loss": 0.3457, "step": 14174 }, { "epoch": 0.8878936406771167, "grad_norm": 0.9429824826366419, "learning_rate": 3.257355486270891e-07, "loss": 0.4076, "step": 14175 }, { "epoch": 0.8879562786764591, "grad_norm": 0.8919815355646258, "learning_rate": 3.253754974800888e-07, "loss": 0.3822, "step": 14176 }, { "epoch": 0.8880189166758014, "grad_norm": 0.825935006127046, "learning_rate": 3.250156387415015e-07, "loss": 0.3509, "step": 14177 }, { "epoch": 0.8880815546751437, "grad_norm": 0.7744085478657369, "learning_rate": 3.2465597242613835e-07, "loss": 0.3299, "step": 14178 }, { "epoch": 0.888144192674486, "grad_norm": 0.9164533516503423, "learning_rate": 3.242964985488029e-07, "loss": 0.3768, "step": 14179 }, { "epoch": 0.8882068306738283, "grad_norm": 0.952609441245086, "learning_rate": 3.2393721712429106e-07, "loss": 0.3917, "step": 14180 }, { "epoch": 0.8882694686731706, "grad_norm": 0.9121089239973922, "learning_rate": 3.2357812816739163e-07, "loss": 0.4207, "step": 14181 }, { "epoch": 0.8883321066725128, "grad_norm": 0.5761341837018129, "learning_rate": 3.232192316928845e-07, "loss": 0.4534, "step": 14182 }, { "epoch": 0.8883947446718552, "grad_norm": 0.8910453729738204, "learning_rate": 3.228605277155411e-07, "loss": 0.3807, "step": 14183 }, { "epoch": 0.8884573826711974, "grad_norm": 0.8834185338363633, "learning_rate": 3.2250201625012633e-07, "loss": 0.4102, "step": 14184 }, { "epoch": 0.8885200206705398, "grad_norm": 0.9100211816038437, "learning_rate": 3.221436973113978e-07, "loss": 0.416, "step": 14185 }, { "epoch": 0.8885826586698821, "grad_norm": 0.8950916872352738, "learning_rate": 3.2178557091410265e-07, "loss": 0.3744, "step": 14186 }, { "epoch": 0.8886452966692244, "grad_norm": 0.8872764601220826, "learning_rate": 3.2142763707298183e-07, "loss": 0.4199, "step": 14187 }, { "epoch": 0.8887079346685667, "grad_norm": 0.8701717967527733, "learning_rate": 3.210698958027675e-07, "loss": 0.4043, "step": 14188 }, { "epoch": 0.888770572667909, "grad_norm": 0.825321906504695, "learning_rate": 3.20712347118185e-07, "loss": 0.3928, "step": 14189 }, { "epoch": 0.8888332106672513, "grad_norm": 0.8875606256155797, "learning_rate": 3.2035499103395097e-07, "loss": 0.3745, "step": 14190 }, { "epoch": 0.8888958486665935, "grad_norm": 0.8754503638669873, "learning_rate": 3.199978275647736e-07, "loss": 0.4061, "step": 14191 }, { "epoch": 0.8889584866659359, "grad_norm": 0.9496106093456748, "learning_rate": 3.196408567253545e-07, "loss": 0.4285, "step": 14192 }, { "epoch": 0.8890211246652782, "grad_norm": 0.9545679278286293, "learning_rate": 3.192840785303858e-07, "loss": 0.411, "step": 14193 }, { "epoch": 0.8890837626646205, "grad_norm": 0.8691831683280746, "learning_rate": 3.189274929945541e-07, "loss": 0.3841, "step": 14194 }, { "epoch": 0.8891464006639628, "grad_norm": 0.9208601229239332, "learning_rate": 3.1857110013253534e-07, "loss": 0.4084, "step": 14195 }, { "epoch": 0.8892090386633051, "grad_norm": 0.8446722442649119, "learning_rate": 3.1821489995899956e-07, "loss": 0.3804, "step": 14196 }, { "epoch": 0.8892716766626474, "grad_norm": 0.9171523850020002, "learning_rate": 3.178588924886067e-07, "loss": 0.4205, "step": 14197 }, { "epoch": 0.8893343146619896, "grad_norm": 0.8722283455678899, "learning_rate": 3.175030777360116e-07, "loss": 0.3822, "step": 14198 }, { "epoch": 0.889396952661332, "grad_norm": 0.9258630987631375, "learning_rate": 3.171474557158588e-07, "loss": 0.4346, "step": 14199 }, { "epoch": 0.8894595906606743, "grad_norm": 0.9061721798033777, "learning_rate": 3.1679202644278484e-07, "loss": 0.3447, "step": 14200 }, { "epoch": 0.8895222286600166, "grad_norm": 0.8432470295542694, "learning_rate": 3.1643678993142135e-07, "loss": 0.3656, "step": 14201 }, { "epoch": 0.8895848666593589, "grad_norm": 0.6018727073089741, "learning_rate": 3.1608174619638775e-07, "loss": 0.4467, "step": 14202 }, { "epoch": 0.8896475046587012, "grad_norm": 0.8722214601106364, "learning_rate": 3.1572689525230014e-07, "loss": 0.388, "step": 14203 }, { "epoch": 0.8897101426580435, "grad_norm": 0.8516150906680048, "learning_rate": 3.153722371137624e-07, "loss": 0.3683, "step": 14204 }, { "epoch": 0.8897727806573859, "grad_norm": 0.9024221976879122, "learning_rate": 3.150177717953734e-07, "loss": 0.3456, "step": 14205 }, { "epoch": 0.8898354186567281, "grad_norm": 0.873899444351979, "learning_rate": 3.14663499311722e-07, "loss": 0.3616, "step": 14206 }, { "epoch": 0.8898980566560704, "grad_norm": 0.8368910439334751, "learning_rate": 3.14309419677391e-07, "loss": 0.3562, "step": 14207 }, { "epoch": 0.8899606946554127, "grad_norm": 0.8463259891920614, "learning_rate": 3.139555329069538e-07, "loss": 0.3561, "step": 14208 }, { "epoch": 0.890023332654755, "grad_norm": 0.8395401272203865, "learning_rate": 3.1360183901497645e-07, "loss": 0.3965, "step": 14209 }, { "epoch": 0.8900859706540973, "grad_norm": 0.8921601699061367, "learning_rate": 3.1324833801601727e-07, "loss": 0.3895, "step": 14210 }, { "epoch": 0.8901486086534396, "grad_norm": 0.8588177079393438, "learning_rate": 3.1289502992462585e-07, "loss": 0.396, "step": 14211 }, { "epoch": 0.890211246652782, "grad_norm": 0.8391175561474526, "learning_rate": 3.12541914755346e-07, "loss": 0.3826, "step": 14212 }, { "epoch": 0.8902738846521242, "grad_norm": 0.8872233729317084, "learning_rate": 3.1218899252271064e-07, "loss": 0.3759, "step": 14213 }, { "epoch": 0.8903365226514666, "grad_norm": 0.8544865413618571, "learning_rate": 3.118362632412464e-07, "loss": 0.4007, "step": 14214 }, { "epoch": 0.8903991606508088, "grad_norm": 0.8111990916113396, "learning_rate": 3.114837269254717e-07, "loss": 0.3793, "step": 14215 }, { "epoch": 0.8904617986501511, "grad_norm": 0.8439602334399934, "learning_rate": 3.1113138358989714e-07, "loss": 0.4032, "step": 14216 }, { "epoch": 0.8905244366494934, "grad_norm": 0.5938635553722339, "learning_rate": 3.1077923324902446e-07, "loss": 0.4466, "step": 14217 }, { "epoch": 0.8905870746488357, "grad_norm": 0.8922524268926572, "learning_rate": 3.104272759173499e-07, "loss": 0.3702, "step": 14218 }, { "epoch": 0.890649712648178, "grad_norm": 0.8401567254487564, "learning_rate": 3.1007551160935847e-07, "loss": 0.381, "step": 14219 }, { "epoch": 0.8907123506475203, "grad_norm": 0.7985084191577794, "learning_rate": 3.0972394033952913e-07, "loss": 0.3544, "step": 14220 }, { "epoch": 0.8907749886468627, "grad_norm": 0.9206210144605022, "learning_rate": 3.093725621223337e-07, "loss": 0.3632, "step": 14221 }, { "epoch": 0.8908376266462049, "grad_norm": 0.8169331511420946, "learning_rate": 3.0902137697223443e-07, "loss": 0.3651, "step": 14222 }, { "epoch": 0.8909002646455472, "grad_norm": 0.8522609145530996, "learning_rate": 3.086703849036854e-07, "loss": 0.3688, "step": 14223 }, { "epoch": 0.8909629026448895, "grad_norm": 0.8836669000377733, "learning_rate": 3.083195859311344e-07, "loss": 0.389, "step": 14224 }, { "epoch": 0.8910255406442318, "grad_norm": 0.8371939977650827, "learning_rate": 3.0796898006902e-07, "loss": 0.3986, "step": 14225 }, { "epoch": 0.8910881786435741, "grad_norm": 0.8424563106499953, "learning_rate": 3.0761856733177273e-07, "loss": 0.3673, "step": 14226 }, { "epoch": 0.8911508166429164, "grad_norm": 0.6295762817358482, "learning_rate": 3.0726834773381673e-07, "loss": 0.4353, "step": 14227 }, { "epoch": 0.8912134546422588, "grad_norm": 0.8528321479668526, "learning_rate": 3.0691832128956657e-07, "loss": 0.3759, "step": 14228 }, { "epoch": 0.891276092641601, "grad_norm": 0.8890280879202304, "learning_rate": 3.0656848801342896e-07, "loss": 0.3764, "step": 14229 }, { "epoch": 0.8913387306409434, "grad_norm": 0.8606850915881802, "learning_rate": 3.062188479198036e-07, "loss": 0.3749, "step": 14230 }, { "epoch": 0.8914013686402856, "grad_norm": 0.8736303435311897, "learning_rate": 3.0586940102308225e-07, "loss": 0.3533, "step": 14231 }, { "epoch": 0.8914640066396279, "grad_norm": 0.6618818023827332, "learning_rate": 3.0552014733764735e-07, "loss": 0.4482, "step": 14232 }, { "epoch": 0.8915266446389702, "grad_norm": 0.8703837610088229, "learning_rate": 3.051710868778746e-07, "loss": 0.4155, "step": 14233 }, { "epoch": 0.8915892826383125, "grad_norm": 0.9094828978281655, "learning_rate": 3.048222196581313e-07, "loss": 0.3973, "step": 14234 }, { "epoch": 0.8916519206376549, "grad_norm": 0.9378379055028611, "learning_rate": 3.044735456927761e-07, "loss": 0.4013, "step": 14235 }, { "epoch": 0.8917145586369971, "grad_norm": 0.7552568791757666, "learning_rate": 3.041250649961619e-07, "loss": 0.3649, "step": 14236 }, { "epoch": 0.8917771966363395, "grad_norm": 0.9076441363414376, "learning_rate": 3.0377677758263224e-07, "loss": 0.4255, "step": 14237 }, { "epoch": 0.8918398346356817, "grad_norm": 0.6843482942907136, "learning_rate": 3.0342868346652065e-07, "loss": 0.4421, "step": 14238 }, { "epoch": 0.8919024726350241, "grad_norm": 0.8312571098340201, "learning_rate": 3.0308078266215733e-07, "loss": 0.3456, "step": 14239 }, { "epoch": 0.8919651106343663, "grad_norm": 0.9339371205812677, "learning_rate": 3.027330751838603e-07, "loss": 0.3615, "step": 14240 }, { "epoch": 0.8920277486337086, "grad_norm": 0.8891884614159249, "learning_rate": 3.0238556104594194e-07, "loss": 0.3855, "step": 14241 }, { "epoch": 0.892090386633051, "grad_norm": 0.9014248571564232, "learning_rate": 3.0203824026270647e-07, "loss": 0.4122, "step": 14242 }, { "epoch": 0.8921530246323932, "grad_norm": 0.9463904306159475, "learning_rate": 3.0169111284844734e-07, "loss": 0.4043, "step": 14243 }, { "epoch": 0.8922156626317356, "grad_norm": 0.8396936380888939, "learning_rate": 3.013441788174554e-07, "loss": 0.3465, "step": 14244 }, { "epoch": 0.8922783006310778, "grad_norm": 0.8422617743237424, "learning_rate": 3.0099743818400915e-07, "loss": 0.39, "step": 14245 }, { "epoch": 0.8923409386304202, "grad_norm": 0.9001467283134987, "learning_rate": 3.006508909623801e-07, "loss": 0.3432, "step": 14246 }, { "epoch": 0.8924035766297624, "grad_norm": 0.8606127445645676, "learning_rate": 3.0030453716683283e-07, "loss": 0.3476, "step": 14247 }, { "epoch": 0.8924662146291047, "grad_norm": 0.8422779015307474, "learning_rate": 2.9995837681162266e-07, "loss": 0.422, "step": 14248 }, { "epoch": 0.8925288526284471, "grad_norm": 0.9865600476303554, "learning_rate": 2.9961240991099815e-07, "loss": 0.4091, "step": 14249 }, { "epoch": 0.8925914906277893, "grad_norm": 0.9040843180141844, "learning_rate": 2.992666364791996e-07, "loss": 0.4058, "step": 14250 }, { "epoch": 0.8926541286271317, "grad_norm": 0.921621279436494, "learning_rate": 2.98921056530459e-07, "loss": 0.4081, "step": 14251 }, { "epoch": 0.8927167666264739, "grad_norm": 0.9154067340901727, "learning_rate": 2.9857567007899936e-07, "loss": 0.3849, "step": 14252 }, { "epoch": 0.8927794046258163, "grad_norm": 0.877227061951378, "learning_rate": 2.9823047713903874e-07, "loss": 0.3624, "step": 14253 }, { "epoch": 0.8928420426251585, "grad_norm": 0.8844977247406983, "learning_rate": 2.9788547772478416e-07, "loss": 0.3796, "step": 14254 }, { "epoch": 0.8929046806245009, "grad_norm": 0.606997204513245, "learning_rate": 2.975406718504359e-07, "loss": 0.4398, "step": 14255 }, { "epoch": 0.8929673186238432, "grad_norm": 0.8936313526669126, "learning_rate": 2.971960595301865e-07, "loss": 0.4026, "step": 14256 }, { "epoch": 0.8930299566231854, "grad_norm": 0.8537703002552546, "learning_rate": 2.968516407782196e-07, "loss": 0.3799, "step": 14257 }, { "epoch": 0.8930925946225278, "grad_norm": 0.9229462677520888, "learning_rate": 2.9650741560871276e-07, "loss": 0.3895, "step": 14258 }, { "epoch": 0.89315523262187, "grad_norm": 0.8241883197760892, "learning_rate": 2.961633840358341e-07, "loss": 0.3579, "step": 14259 }, { "epoch": 0.8932178706212124, "grad_norm": 0.8586553048254622, "learning_rate": 2.9581954607374285e-07, "loss": 0.3643, "step": 14260 }, { "epoch": 0.8932805086205546, "grad_norm": 0.9229188574053797, "learning_rate": 2.9547590173659214e-07, "loss": 0.3675, "step": 14261 }, { "epoch": 0.893343146619897, "grad_norm": 0.6074005858205944, "learning_rate": 2.9513245103852673e-07, "loss": 0.4499, "step": 14262 }, { "epoch": 0.8934057846192393, "grad_norm": 0.8694321991620504, "learning_rate": 2.947891939936831e-07, "loss": 0.3935, "step": 14263 }, { "epoch": 0.8934684226185816, "grad_norm": 0.8003269050269973, "learning_rate": 2.9444613061619e-07, "loss": 0.3572, "step": 14264 }, { "epoch": 0.8935310606179239, "grad_norm": 0.8945601830812837, "learning_rate": 2.941032609201672e-07, "loss": 0.401, "step": 14265 }, { "epoch": 0.8935936986172661, "grad_norm": 0.8558445521079702, "learning_rate": 2.937605849197267e-07, "loss": 0.3892, "step": 14266 }, { "epoch": 0.8936563366166085, "grad_norm": 0.8678800366783391, "learning_rate": 2.934181026289751e-07, "loss": 0.3751, "step": 14267 }, { "epoch": 0.8937189746159507, "grad_norm": 0.8388575190402071, "learning_rate": 2.9307581406200767e-07, "loss": 0.3648, "step": 14268 }, { "epoch": 0.8937816126152931, "grad_norm": 0.8168956216038544, "learning_rate": 2.927337192329127e-07, "loss": 0.3311, "step": 14269 }, { "epoch": 0.8938442506146353, "grad_norm": 0.9052638822604693, "learning_rate": 2.9239181815577276e-07, "loss": 0.3773, "step": 14270 }, { "epoch": 0.8939068886139777, "grad_norm": 0.8580523577522058, "learning_rate": 2.920501108446588e-07, "loss": 0.3686, "step": 14271 }, { "epoch": 0.89396952661332, "grad_norm": 0.6502320752355706, "learning_rate": 2.9170859731363634e-07, "loss": 0.4475, "step": 14272 }, { "epoch": 0.8940321646126622, "grad_norm": 0.8224231152388702, "learning_rate": 2.9136727757676174e-07, "loss": 0.3733, "step": 14273 }, { "epoch": 0.8940948026120046, "grad_norm": 0.8847875245199115, "learning_rate": 2.9102615164808444e-07, "loss": 0.4011, "step": 14274 }, { "epoch": 0.8941574406113468, "grad_norm": 0.8689336099060786, "learning_rate": 2.9068521954164373e-07, "loss": 0.3948, "step": 14275 }, { "epoch": 0.8942200786106892, "grad_norm": 0.7966955632856474, "learning_rate": 2.9034448127147395e-07, "loss": 0.3471, "step": 14276 }, { "epoch": 0.8942827166100314, "grad_norm": 0.9095458048538375, "learning_rate": 2.900039368515994e-07, "loss": 0.3838, "step": 14277 }, { "epoch": 0.8943453546093738, "grad_norm": 0.8702720264499085, "learning_rate": 2.8966358629603674e-07, "loss": 0.4069, "step": 14278 }, { "epoch": 0.8944079926087161, "grad_norm": 0.8350601903000797, "learning_rate": 2.893234296187958e-07, "loss": 0.3752, "step": 14279 }, { "epoch": 0.8944706306080584, "grad_norm": 0.8576602573859469, "learning_rate": 2.88983466833877e-07, "loss": 0.3697, "step": 14280 }, { "epoch": 0.8945332686074007, "grad_norm": 0.8572752220021613, "learning_rate": 2.886436979552726e-07, "loss": 0.3788, "step": 14281 }, { "epoch": 0.8945959066067429, "grad_norm": 0.9052531980586812, "learning_rate": 2.883041229969685e-07, "loss": 0.3847, "step": 14282 }, { "epoch": 0.8946585446060853, "grad_norm": 0.8711931443177112, "learning_rate": 2.879647419729409e-07, "loss": 0.3853, "step": 14283 }, { "epoch": 0.8947211826054275, "grad_norm": 0.583111971250103, "learning_rate": 2.8762555489715904e-07, "loss": 0.4349, "step": 14284 }, { "epoch": 0.8947838206047699, "grad_norm": 0.9131456160112209, "learning_rate": 2.8728656178358403e-07, "loss": 0.3974, "step": 14285 }, { "epoch": 0.8948464586041122, "grad_norm": 0.8617824832701602, "learning_rate": 2.869477626461692e-07, "loss": 0.3968, "step": 14286 }, { "epoch": 0.8949090966034545, "grad_norm": 0.8249753483041508, "learning_rate": 2.866091574988583e-07, "loss": 0.3764, "step": 14287 }, { "epoch": 0.8949717346027968, "grad_norm": 0.8858894821617089, "learning_rate": 2.8627074635559024e-07, "loss": 0.3709, "step": 14288 }, { "epoch": 0.8950343726021391, "grad_norm": 0.8132275951013057, "learning_rate": 2.859325292302928e-07, "loss": 0.3832, "step": 14289 }, { "epoch": 0.8950970106014814, "grad_norm": 0.8707994119636704, "learning_rate": 2.855945061368881e-07, "loss": 0.4024, "step": 14290 }, { "epoch": 0.8951596486008236, "grad_norm": 0.8784313181292672, "learning_rate": 2.852566770892878e-07, "loss": 0.3995, "step": 14291 }, { "epoch": 0.895222286600166, "grad_norm": 0.8252688927769382, "learning_rate": 2.849190421013981e-07, "loss": 0.3196, "step": 14292 }, { "epoch": 0.8952849245995083, "grad_norm": 0.9488189226492064, "learning_rate": 2.845816011871161e-07, "loss": 0.3754, "step": 14293 }, { "epoch": 0.8953475625988506, "grad_norm": 0.8386899188431887, "learning_rate": 2.842443543603296e-07, "loss": 0.3829, "step": 14294 }, { "epoch": 0.8954102005981929, "grad_norm": 0.950962895317383, "learning_rate": 2.8390730163492087e-07, "loss": 0.4105, "step": 14295 }, { "epoch": 0.8954728385975352, "grad_norm": 0.855336416668441, "learning_rate": 2.8357044302476323e-07, "loss": 0.3878, "step": 14296 }, { "epoch": 0.8955354765968775, "grad_norm": 0.8749179516755972, "learning_rate": 2.832337785437222e-07, "loss": 0.3666, "step": 14297 }, { "epoch": 0.8955981145962199, "grad_norm": 0.8372517062641676, "learning_rate": 2.828973082056541e-07, "loss": 0.3914, "step": 14298 }, { "epoch": 0.8956607525955621, "grad_norm": 1.0025817881588908, "learning_rate": 2.825610320244082e-07, "loss": 0.3831, "step": 14299 }, { "epoch": 0.8957233905949044, "grad_norm": 0.8504756876033578, "learning_rate": 2.8222495001382575e-07, "loss": 0.3566, "step": 14300 }, { "epoch": 0.8957860285942467, "grad_norm": 0.8299523301624452, "learning_rate": 2.8188906218774017e-07, "loss": 0.3711, "step": 14301 }, { "epoch": 0.895848666593589, "grad_norm": 0.6952208423337554, "learning_rate": 2.8155336855997583e-07, "loss": 0.4385, "step": 14302 }, { "epoch": 0.8959113045929313, "grad_norm": 0.8895673205881611, "learning_rate": 2.812178691443512e-07, "loss": 0.3872, "step": 14303 }, { "epoch": 0.8959739425922736, "grad_norm": 0.9086479888840019, "learning_rate": 2.8088256395467415e-07, "loss": 0.372, "step": 14304 }, { "epoch": 0.896036580591616, "grad_norm": 0.8486778281935715, "learning_rate": 2.805474530047475e-07, "loss": 0.3725, "step": 14305 }, { "epoch": 0.8960992185909582, "grad_norm": 0.8964174170791154, "learning_rate": 2.8021253630836407e-07, "loss": 0.4062, "step": 14306 }, { "epoch": 0.8961618565903005, "grad_norm": 0.8390722313980362, "learning_rate": 2.7987781387930845e-07, "loss": 0.3729, "step": 14307 }, { "epoch": 0.8962244945896428, "grad_norm": 0.8310924536567917, "learning_rate": 2.795432857313579e-07, "loss": 0.3509, "step": 14308 }, { "epoch": 0.8962871325889851, "grad_norm": 0.8687737278748768, "learning_rate": 2.7920895187828203e-07, "loss": 0.3586, "step": 14309 }, { "epoch": 0.8963497705883274, "grad_norm": 0.8729981348021092, "learning_rate": 2.788748123338414e-07, "loss": 0.3542, "step": 14310 }, { "epoch": 0.8964124085876697, "grad_norm": 0.8188075396144217, "learning_rate": 2.7854086711178953e-07, "loss": 0.3532, "step": 14311 }, { "epoch": 0.896475046587012, "grad_norm": 0.8684432717043526, "learning_rate": 2.782071162258726e-07, "loss": 0.3956, "step": 14312 }, { "epoch": 0.8965376845863543, "grad_norm": 0.9198315314740477, "learning_rate": 2.778735596898263e-07, "loss": 0.4069, "step": 14313 }, { "epoch": 0.8966003225856967, "grad_norm": 0.9048224539095053, "learning_rate": 2.775401975173819e-07, "loss": 0.3881, "step": 14314 }, { "epoch": 0.8966629605850389, "grad_norm": 0.9944439110007334, "learning_rate": 2.77207029722259e-07, "loss": 0.3919, "step": 14315 }, { "epoch": 0.8967255985843812, "grad_norm": 0.9098680227712398, "learning_rate": 2.7687405631817153e-07, "loss": 0.4097, "step": 14316 }, { "epoch": 0.8967882365837235, "grad_norm": 0.8881117467311148, "learning_rate": 2.765412773188247e-07, "loss": 0.3668, "step": 14317 }, { "epoch": 0.8968508745830658, "grad_norm": 0.7936293603409523, "learning_rate": 2.762086927379154e-07, "loss": 0.3444, "step": 14318 }, { "epoch": 0.8969135125824081, "grad_norm": 0.8916321889803542, "learning_rate": 2.758763025891331e-07, "loss": 0.3884, "step": 14319 }, { "epoch": 0.8969761505817504, "grad_norm": 0.8834657223492456, "learning_rate": 2.755441068861586e-07, "loss": 0.3995, "step": 14320 }, { "epoch": 0.8970387885810928, "grad_norm": 0.9089543795199876, "learning_rate": 2.7521210564266544e-07, "loss": 0.3709, "step": 14321 }, { "epoch": 0.897101426580435, "grad_norm": 0.8976842264133892, "learning_rate": 2.7488029887231927e-07, "loss": 0.375, "step": 14322 }, { "epoch": 0.8971640645797774, "grad_norm": 0.8442826184231725, "learning_rate": 2.7454868658877756e-07, "loss": 0.3349, "step": 14323 }, { "epoch": 0.8972267025791196, "grad_norm": 0.8773495453447628, "learning_rate": 2.7421726880568876e-07, "loss": 0.3585, "step": 14324 }, { "epoch": 0.8972893405784619, "grad_norm": 0.9413839925227435, "learning_rate": 2.738860455366943e-07, "loss": 0.4063, "step": 14325 }, { "epoch": 0.8973519785778042, "grad_norm": 0.8913655974760417, "learning_rate": 2.735550167954276e-07, "loss": 0.4182, "step": 14326 }, { "epoch": 0.8974146165771465, "grad_norm": 0.8508440386189441, "learning_rate": 2.7322418259551343e-07, "loss": 0.3927, "step": 14327 }, { "epoch": 0.8974772545764889, "grad_norm": 0.8396421102933385, "learning_rate": 2.728935429505686e-07, "loss": 0.3688, "step": 14328 }, { "epoch": 0.8975398925758311, "grad_norm": 0.8958656884171844, "learning_rate": 2.7256309787420387e-07, "loss": 0.3786, "step": 14329 }, { "epoch": 0.8976025305751735, "grad_norm": 0.9454556329839228, "learning_rate": 2.722328473800184e-07, "loss": 0.3954, "step": 14330 }, { "epoch": 0.8976651685745157, "grad_norm": 0.8256003624733897, "learning_rate": 2.719027914816075e-07, "loss": 0.3552, "step": 14331 }, { "epoch": 0.897727806573858, "grad_norm": 0.6740657054494643, "learning_rate": 2.7157293019255513e-07, "loss": 0.4349, "step": 14332 }, { "epoch": 0.8977904445732003, "grad_norm": 0.6809996305570334, "learning_rate": 2.7124326352643835e-07, "loss": 0.4571, "step": 14333 }, { "epoch": 0.8978530825725426, "grad_norm": 0.9311410540901185, "learning_rate": 2.7091379149682683e-07, "loss": 0.4457, "step": 14334 }, { "epoch": 0.897915720571885, "grad_norm": 0.9027428981278023, "learning_rate": 2.705845141172814e-07, "loss": 0.4034, "step": 14335 }, { "epoch": 0.8979783585712272, "grad_norm": 0.8671635641539329, "learning_rate": 2.70255431401355e-07, "loss": 0.4089, "step": 14336 }, { "epoch": 0.8980409965705696, "grad_norm": 0.8598228437675774, "learning_rate": 2.699265433625925e-07, "loss": 0.4106, "step": 14337 }, { "epoch": 0.8981036345699118, "grad_norm": 0.6301652939104607, "learning_rate": 2.6959785001453187e-07, "loss": 0.446, "step": 14338 }, { "epoch": 0.8981662725692542, "grad_norm": 0.846251183250362, "learning_rate": 2.6926935137070175e-07, "loss": 0.3846, "step": 14339 }, { "epoch": 0.8982289105685964, "grad_norm": 0.9028681905787441, "learning_rate": 2.68941047444623e-07, "loss": 0.3977, "step": 14340 }, { "epoch": 0.8982915485679387, "grad_norm": 0.8050973030138306, "learning_rate": 2.6861293824980874e-07, "loss": 0.3618, "step": 14341 }, { "epoch": 0.898354186567281, "grad_norm": 0.8944452941333808, "learning_rate": 2.682850237997647e-07, "loss": 0.4559, "step": 14342 }, { "epoch": 0.8984168245666233, "grad_norm": 0.8546997562662199, "learning_rate": 2.679573041079875e-07, "loss": 0.3703, "step": 14343 }, { "epoch": 0.8984794625659657, "grad_norm": 0.8273607919730627, "learning_rate": 2.676297791879656e-07, "loss": 0.3732, "step": 14344 }, { "epoch": 0.8985421005653079, "grad_norm": 0.9126584601679723, "learning_rate": 2.6730244905318e-07, "loss": 0.4214, "step": 14345 }, { "epoch": 0.8986047385646503, "grad_norm": 0.8567741548370126, "learning_rate": 2.6697531371710383e-07, "loss": 0.3639, "step": 14346 }, { "epoch": 0.8986673765639925, "grad_norm": 0.886672215601459, "learning_rate": 2.666483731932029e-07, "loss": 0.4224, "step": 14347 }, { "epoch": 0.8987300145633349, "grad_norm": 0.8478267726650823, "learning_rate": 2.663216274949332e-07, "loss": 0.3517, "step": 14348 }, { "epoch": 0.8987926525626772, "grad_norm": 0.6668028519301397, "learning_rate": 2.6599507663574387e-07, "loss": 0.4618, "step": 14349 }, { "epoch": 0.8988552905620194, "grad_norm": 0.9261982836674114, "learning_rate": 2.656687206290759e-07, "loss": 0.4314, "step": 14350 }, { "epoch": 0.8989179285613618, "grad_norm": 0.870072947906364, "learning_rate": 2.653425594883624e-07, "loss": 0.3948, "step": 14351 }, { "epoch": 0.898980566560704, "grad_norm": 0.8733515106669485, "learning_rate": 2.6501659322702765e-07, "loss": 0.3841, "step": 14352 }, { "epoch": 0.8990432045600464, "grad_norm": 0.9430715956108922, "learning_rate": 2.6469082185848926e-07, "loss": 0.3918, "step": 14353 }, { "epoch": 0.8991058425593886, "grad_norm": 0.8545059394755627, "learning_rate": 2.643652453961548e-07, "loss": 0.3895, "step": 14354 }, { "epoch": 0.899168480558731, "grad_norm": 0.897254044339704, "learning_rate": 2.640398638534264e-07, "loss": 0.3723, "step": 14355 }, { "epoch": 0.8992311185580733, "grad_norm": 0.9280590569320921, "learning_rate": 2.6371467724369603e-07, "loss": 0.3733, "step": 14356 }, { "epoch": 0.8992937565574155, "grad_norm": 0.8146484399743495, "learning_rate": 2.633896855803486e-07, "loss": 0.3423, "step": 14357 }, { "epoch": 0.8993563945567579, "grad_norm": 0.8021205909896214, "learning_rate": 2.6306488887676065e-07, "loss": 0.3234, "step": 14358 }, { "epoch": 0.8994190325561001, "grad_norm": 0.6088044986905019, "learning_rate": 2.627402871463014e-07, "loss": 0.4631, "step": 14359 }, { "epoch": 0.8994816705554425, "grad_norm": 0.7854999457070483, "learning_rate": 2.6241588040233136e-07, "loss": 0.3688, "step": 14360 }, { "epoch": 0.8995443085547847, "grad_norm": 0.8757503344093331, "learning_rate": 2.6209166865820257e-07, "loss": 0.3869, "step": 14361 }, { "epoch": 0.8996069465541271, "grad_norm": 0.8611928481024373, "learning_rate": 2.617676519272605e-07, "loss": 0.3522, "step": 14362 }, { "epoch": 0.8996695845534693, "grad_norm": 0.8197197451050644, "learning_rate": 2.6144383022284056e-07, "loss": 0.355, "step": 14363 }, { "epoch": 0.8997322225528117, "grad_norm": 0.8730077339880595, "learning_rate": 2.6112020355827327e-07, "loss": 0.3721, "step": 14364 }, { "epoch": 0.899794860552154, "grad_norm": 0.9199556058031785, "learning_rate": 2.6079677194687737e-07, "loss": 0.4029, "step": 14365 }, { "epoch": 0.8998574985514962, "grad_norm": 0.9507776839171186, "learning_rate": 2.6047353540196606e-07, "loss": 0.4324, "step": 14366 }, { "epoch": 0.8999201365508386, "grad_norm": 0.9401565204335052, "learning_rate": 2.601504939368432e-07, "loss": 0.4291, "step": 14367 }, { "epoch": 0.8999827745501808, "grad_norm": 0.9124622722494016, "learning_rate": 2.598276475648065e-07, "loss": 0.4135, "step": 14368 }, { "epoch": 0.9000454125495232, "grad_norm": 0.8798743802062009, "learning_rate": 2.595049962991436e-07, "loss": 0.3665, "step": 14369 }, { "epoch": 0.9001080505488654, "grad_norm": 0.8726020208403374, "learning_rate": 2.5918254015313506e-07, "loss": 0.3597, "step": 14370 }, { "epoch": 0.9001706885482078, "grad_norm": 0.8950567930691518, "learning_rate": 2.588602791400524e-07, "loss": 0.4136, "step": 14371 }, { "epoch": 0.9002333265475501, "grad_norm": 0.5703242044518737, "learning_rate": 2.585382132731606e-07, "loss": 0.4336, "step": 14372 }, { "epoch": 0.9002959645468924, "grad_norm": 0.8910507101092005, "learning_rate": 2.582163425657169e-07, "loss": 0.3723, "step": 14373 }, { "epoch": 0.9003586025462347, "grad_norm": 0.878492075785251, "learning_rate": 2.5789466703096787e-07, "loss": 0.4246, "step": 14374 }, { "epoch": 0.9004212405455769, "grad_norm": 0.870934467384153, "learning_rate": 2.5757318668215514e-07, "loss": 0.3796, "step": 14375 }, { "epoch": 0.9004838785449193, "grad_norm": 0.9577991353792757, "learning_rate": 2.5725190153250925e-07, "loss": 0.4078, "step": 14376 }, { "epoch": 0.9005465165442615, "grad_norm": 0.6815931758777708, "learning_rate": 2.5693081159525626e-07, "loss": 0.4406, "step": 14377 }, { "epoch": 0.9006091545436039, "grad_norm": 0.8782054545509501, "learning_rate": 2.566099168836117e-07, "loss": 0.3724, "step": 14378 }, { "epoch": 0.9006717925429462, "grad_norm": 0.8951872034579966, "learning_rate": 2.562892174107828e-07, "loss": 0.3855, "step": 14379 }, { "epoch": 0.9007344305422885, "grad_norm": 0.8450140842002598, "learning_rate": 2.559687131899702e-07, "loss": 0.3798, "step": 14380 }, { "epoch": 0.9007970685416308, "grad_norm": 0.882098244326199, "learning_rate": 2.55648404234366e-07, "loss": 0.4098, "step": 14381 }, { "epoch": 0.900859706540973, "grad_norm": 0.7882850187339416, "learning_rate": 2.553282905571541e-07, "loss": 0.3617, "step": 14382 }, { "epoch": 0.9009223445403154, "grad_norm": 0.866853563820558, "learning_rate": 2.5500837217151075e-07, "loss": 0.3808, "step": 14383 }, { "epoch": 0.9009849825396576, "grad_norm": 0.5931417882276466, "learning_rate": 2.5468864909060307e-07, "loss": 0.4332, "step": 14384 }, { "epoch": 0.901047620539, "grad_norm": 0.8585313129115665, "learning_rate": 2.5436912132759175e-07, "loss": 0.3963, "step": 14385 }, { "epoch": 0.9011102585383423, "grad_norm": 0.870269729303079, "learning_rate": 2.540497888956273e-07, "loss": 0.4012, "step": 14386 }, { "epoch": 0.9011728965376846, "grad_norm": 0.8722951040171596, "learning_rate": 2.5373065180785525e-07, "loss": 0.3298, "step": 14387 }, { "epoch": 0.9012355345370269, "grad_norm": 0.5912488748131159, "learning_rate": 2.534117100774108e-07, "loss": 0.452, "step": 14388 }, { "epoch": 0.9012981725363692, "grad_norm": 0.8082891855073645, "learning_rate": 2.5309296371742007e-07, "loss": 0.3646, "step": 14389 }, { "epoch": 0.9013608105357115, "grad_norm": 0.8368153230715094, "learning_rate": 2.5277441274100536e-07, "loss": 0.3685, "step": 14390 }, { "epoch": 0.9014234485350537, "grad_norm": 0.8260312454815939, "learning_rate": 2.524560571612761e-07, "loss": 0.3577, "step": 14391 }, { "epoch": 0.9014860865343961, "grad_norm": 0.8992186601616257, "learning_rate": 2.5213789699133737e-07, "loss": 0.3852, "step": 14392 }, { "epoch": 0.9015487245337384, "grad_norm": 0.9142421357377482, "learning_rate": 2.518199322442838e-07, "loss": 0.4073, "step": 14393 }, { "epoch": 0.9016113625330807, "grad_norm": 0.8416155840986792, "learning_rate": 2.515021629332032e-07, "loss": 0.3994, "step": 14394 }, { "epoch": 0.901674000532423, "grad_norm": 0.874397983904406, "learning_rate": 2.511845890711745e-07, "loss": 0.3862, "step": 14395 }, { "epoch": 0.9017366385317653, "grad_norm": 0.8956852623137096, "learning_rate": 2.508672106712701e-07, "loss": 0.3447, "step": 14396 }, { "epoch": 0.9017992765311076, "grad_norm": 0.8350867065223302, "learning_rate": 2.5055002774655226e-07, "loss": 0.3557, "step": 14397 }, { "epoch": 0.90186191453045, "grad_norm": 0.8831885074392142, "learning_rate": 2.502330403100767e-07, "loss": 0.3745, "step": 14398 }, { "epoch": 0.9019245525297922, "grad_norm": 0.8241025712588292, "learning_rate": 2.499162483748913e-07, "loss": 0.3666, "step": 14399 }, { "epoch": 0.9019871905291345, "grad_norm": 0.9751017462595527, "learning_rate": 2.4959965195403504e-07, "loss": 0.459, "step": 14400 }, { "epoch": 0.9020498285284768, "grad_norm": 0.8480102596897426, "learning_rate": 2.492832510605386e-07, "loss": 0.3642, "step": 14401 }, { "epoch": 0.9021124665278191, "grad_norm": 0.8345207767022091, "learning_rate": 2.4896704570742504e-07, "loss": 0.3952, "step": 14402 }, { "epoch": 0.9021751045271614, "grad_norm": 0.9889788454504901, "learning_rate": 2.486510359077099e-07, "loss": 0.3534, "step": 14403 }, { "epoch": 0.9022377425265037, "grad_norm": 0.890864050087619, "learning_rate": 2.483352216743995e-07, "loss": 0.3486, "step": 14404 }, { "epoch": 0.902300380525846, "grad_norm": 0.8603018663919936, "learning_rate": 2.4801960302049397e-07, "loss": 0.3897, "step": 14405 }, { "epoch": 0.9023630185251883, "grad_norm": 0.9734171398842055, "learning_rate": 2.47704179958983e-07, "loss": 0.4398, "step": 14406 }, { "epoch": 0.9024256565245307, "grad_norm": 0.8145832290407347, "learning_rate": 2.4738895250285054e-07, "loss": 0.3764, "step": 14407 }, { "epoch": 0.9024882945238729, "grad_norm": 0.9308113785097916, "learning_rate": 2.4707392066507077e-07, "loss": 0.4078, "step": 14408 }, { "epoch": 0.9025509325232152, "grad_norm": 0.8892975092171601, "learning_rate": 2.46759084458611e-07, "loss": 0.3873, "step": 14409 }, { "epoch": 0.9026135705225575, "grad_norm": 0.8944351696909383, "learning_rate": 2.464444438964292e-07, "loss": 0.4194, "step": 14410 }, { "epoch": 0.9026762085218998, "grad_norm": 0.9006192166296613, "learning_rate": 2.461299989914767e-07, "loss": 0.3861, "step": 14411 }, { "epoch": 0.9027388465212421, "grad_norm": 0.8287012517650527, "learning_rate": 2.458157497566954e-07, "loss": 0.3562, "step": 14412 }, { "epoch": 0.9028014845205844, "grad_norm": 0.8554093395783705, "learning_rate": 2.4550169620501996e-07, "loss": 0.4021, "step": 14413 }, { "epoch": 0.9028641225199268, "grad_norm": 0.8545475626769443, "learning_rate": 2.451878383493778e-07, "loss": 0.3919, "step": 14414 }, { "epoch": 0.902926760519269, "grad_norm": 0.8581170571147995, "learning_rate": 2.4487417620268625e-07, "loss": 0.3592, "step": 14415 }, { "epoch": 0.9029893985186113, "grad_norm": 0.7824429965350672, "learning_rate": 2.445607097778563e-07, "loss": 0.3474, "step": 14416 }, { "epoch": 0.9030520365179536, "grad_norm": 0.8966511671281959, "learning_rate": 2.442474390877908e-07, "loss": 0.3853, "step": 14417 }, { "epoch": 0.9031146745172959, "grad_norm": 0.8899360853077366, "learning_rate": 2.4393436414538287e-07, "loss": 0.3693, "step": 14418 }, { "epoch": 0.9031773125166382, "grad_norm": 0.8783688488496341, "learning_rate": 2.4362148496351987e-07, "loss": 0.3135, "step": 14419 }, { "epoch": 0.9032399505159805, "grad_norm": 0.8232678739240394, "learning_rate": 2.4330880155507887e-07, "loss": 0.3551, "step": 14420 }, { "epoch": 0.9033025885153229, "grad_norm": 0.8661415200026101, "learning_rate": 2.4299631393293057e-07, "loss": 0.3891, "step": 14421 }, { "epoch": 0.9033652265146651, "grad_norm": 0.6366136877983773, "learning_rate": 2.4268402210993633e-07, "loss": 0.4485, "step": 14422 }, { "epoch": 0.9034278645140075, "grad_norm": 0.9245819591797088, "learning_rate": 2.4237192609895157e-07, "loss": 0.4136, "step": 14423 }, { "epoch": 0.9034905025133497, "grad_norm": 0.9034428390435895, "learning_rate": 2.4206002591282027e-07, "loss": 0.4049, "step": 14424 }, { "epoch": 0.903553140512692, "grad_norm": 0.7624294905256396, "learning_rate": 2.4174832156438287e-07, "loss": 0.3509, "step": 14425 }, { "epoch": 0.9036157785120343, "grad_norm": 0.8651396353382869, "learning_rate": 2.4143681306646684e-07, "loss": 0.3531, "step": 14426 }, { "epoch": 0.9036784165113766, "grad_norm": 0.863425376649713, "learning_rate": 2.411255004318952e-07, "loss": 0.3939, "step": 14427 }, { "epoch": 0.903741054510719, "grad_norm": 0.8872063469276973, "learning_rate": 2.4081438367348163e-07, "loss": 0.4002, "step": 14428 }, { "epoch": 0.9038036925100612, "grad_norm": 0.9009244407051864, "learning_rate": 2.405034628040309e-07, "loss": 0.3766, "step": 14429 }, { "epoch": 0.9038663305094036, "grad_norm": 0.9743765265445637, "learning_rate": 2.401927378363411e-07, "loss": 0.4346, "step": 14430 }, { "epoch": 0.9039289685087458, "grad_norm": 0.8719161107373345, "learning_rate": 2.398822087832009e-07, "loss": 0.3756, "step": 14431 }, { "epoch": 0.9039916065080882, "grad_norm": 0.9690463532025732, "learning_rate": 2.395718756573928e-07, "loss": 0.3966, "step": 14432 }, { "epoch": 0.9040542445074304, "grad_norm": 0.8810532919925511, "learning_rate": 2.3926173847169056e-07, "loss": 0.3809, "step": 14433 }, { "epoch": 0.9041168825067727, "grad_norm": 0.8627233524954493, "learning_rate": 2.3895179723885886e-07, "loss": 0.4181, "step": 14434 }, { "epoch": 0.904179520506115, "grad_norm": 0.8478546718392607, "learning_rate": 2.3864205197165426e-07, "loss": 0.3682, "step": 14435 }, { "epoch": 0.9042421585054573, "grad_norm": 0.8659823669810899, "learning_rate": 2.3833250268282705e-07, "loss": 0.3619, "step": 14436 }, { "epoch": 0.9043047965047997, "grad_norm": 0.8798139203433948, "learning_rate": 2.3802314938511817e-07, "loss": 0.3961, "step": 14437 }, { "epoch": 0.9043674345041419, "grad_norm": 0.8696691348029164, "learning_rate": 2.3771399209125968e-07, "loss": 0.4133, "step": 14438 }, { "epoch": 0.9044300725034843, "grad_norm": 0.885283877231372, "learning_rate": 2.3740503081397693e-07, "loss": 0.3783, "step": 14439 }, { "epoch": 0.9044927105028265, "grad_norm": 0.9248617635464865, "learning_rate": 2.3709626556598752e-07, "loss": 0.4145, "step": 14440 }, { "epoch": 0.9045553485021688, "grad_norm": 0.9204900670019661, "learning_rate": 2.3678769635999909e-07, "loss": 0.4511, "step": 14441 }, { "epoch": 0.9046179865015112, "grad_norm": 0.835234136896742, "learning_rate": 2.3647932320871426e-07, "loss": 0.3832, "step": 14442 }, { "epoch": 0.9046806245008534, "grad_norm": 0.610582583527176, "learning_rate": 2.3617114612482394e-07, "loss": 0.4313, "step": 14443 }, { "epoch": 0.9047432625001958, "grad_norm": 0.8192334585012531, "learning_rate": 2.3586316512101416e-07, "loss": 0.3423, "step": 14444 }, { "epoch": 0.904805900499538, "grad_norm": 0.9610886973173783, "learning_rate": 2.3555538020996027e-07, "loss": 0.4279, "step": 14445 }, { "epoch": 0.9048685384988804, "grad_norm": 0.8353728850468612, "learning_rate": 2.3524779140433163e-07, "loss": 0.3574, "step": 14446 }, { "epoch": 0.9049311764982226, "grad_norm": 0.8605998246142004, "learning_rate": 2.3494039871678808e-07, "loss": 0.3904, "step": 14447 }, { "epoch": 0.904993814497565, "grad_norm": 0.8132921120647685, "learning_rate": 2.3463320215998176e-07, "loss": 0.3601, "step": 14448 }, { "epoch": 0.9050564524969072, "grad_norm": 0.8435621411626051, "learning_rate": 2.3432620174655752e-07, "loss": 0.354, "step": 14449 }, { "epoch": 0.9051190904962495, "grad_norm": 0.9197515517514555, "learning_rate": 2.3401939748915137e-07, "loss": 0.3946, "step": 14450 }, { "epoch": 0.9051817284955919, "grad_norm": 0.8891667347648184, "learning_rate": 2.3371278940039155e-07, "loss": 0.3786, "step": 14451 }, { "epoch": 0.9052443664949341, "grad_norm": 0.862105256016247, "learning_rate": 2.3340637749289796e-07, "loss": 0.3999, "step": 14452 }, { "epoch": 0.9053070044942765, "grad_norm": 0.8836864718129698, "learning_rate": 2.3310016177928274e-07, "loss": 0.3691, "step": 14453 }, { "epoch": 0.9053696424936187, "grad_norm": 0.8920959496937879, "learning_rate": 2.327941422721497e-07, "loss": 0.4249, "step": 14454 }, { "epoch": 0.9054322804929611, "grad_norm": 0.9171864767909105, "learning_rate": 2.3248831898409484e-07, "loss": 0.3998, "step": 14455 }, { "epoch": 0.9054949184923033, "grad_norm": 0.847121601618576, "learning_rate": 2.3218269192770537e-07, "loss": 0.3576, "step": 14456 }, { "epoch": 0.9055575564916457, "grad_norm": 0.8928182277097533, "learning_rate": 2.3187726111556063e-07, "loss": 0.3975, "step": 14457 }, { "epoch": 0.905620194490988, "grad_norm": 0.8739392872772436, "learning_rate": 2.3157202656023335e-07, "loss": 0.3536, "step": 14458 }, { "epoch": 0.9056828324903302, "grad_norm": 0.9051170366784979, "learning_rate": 2.3126698827428572e-07, "loss": 0.416, "step": 14459 }, { "epoch": 0.9057454704896726, "grad_norm": 0.7903708239522551, "learning_rate": 2.309621462702749e-07, "loss": 0.3588, "step": 14460 }, { "epoch": 0.9058081084890148, "grad_norm": 0.8500044732243955, "learning_rate": 2.3065750056074755e-07, "loss": 0.3877, "step": 14461 }, { "epoch": 0.9058707464883572, "grad_norm": 0.9265837096061915, "learning_rate": 2.3035305115824248e-07, "loss": 0.3815, "step": 14462 }, { "epoch": 0.9059333844876994, "grad_norm": 0.8410374295688078, "learning_rate": 2.3004879807529078e-07, "loss": 0.3746, "step": 14463 }, { "epoch": 0.9059960224870418, "grad_norm": 0.9595233541848897, "learning_rate": 2.2974474132441638e-07, "loss": 0.3703, "step": 14464 }, { "epoch": 0.9060586604863841, "grad_norm": 0.8830118589884784, "learning_rate": 2.294408809181331e-07, "loss": 0.372, "step": 14465 }, { "epoch": 0.9061212984857263, "grad_norm": 0.8289650484912829, "learning_rate": 2.2913721686894875e-07, "loss": 0.3637, "step": 14466 }, { "epoch": 0.9061839364850687, "grad_norm": 0.902207428254577, "learning_rate": 2.2883374918936275e-07, "loss": 0.4116, "step": 14467 }, { "epoch": 0.9062465744844109, "grad_norm": 0.9295550773870617, "learning_rate": 2.28530477891864e-07, "loss": 0.4029, "step": 14468 }, { "epoch": 0.9063092124837533, "grad_norm": 0.8569427925556324, "learning_rate": 2.2822740298893754e-07, "loss": 0.377, "step": 14469 }, { "epoch": 0.9063718504830955, "grad_norm": 0.886597382092008, "learning_rate": 2.2792452449305668e-07, "loss": 0.3625, "step": 14470 }, { "epoch": 0.9064344884824379, "grad_norm": 0.9244995358395378, "learning_rate": 2.2762184241668818e-07, "loss": 0.3792, "step": 14471 }, { "epoch": 0.9064971264817802, "grad_norm": 0.8506875288103419, "learning_rate": 2.2731935677229033e-07, "loss": 0.3608, "step": 14472 }, { "epoch": 0.9065597644811225, "grad_norm": 0.839552096796751, "learning_rate": 2.2701706757231378e-07, "loss": 0.373, "step": 14473 }, { "epoch": 0.9066224024804648, "grad_norm": 0.9025039781100302, "learning_rate": 2.267149748291997e-07, "loss": 0.3723, "step": 14474 }, { "epoch": 0.906685040479807, "grad_norm": 0.7967677359527542, "learning_rate": 2.264130785553842e-07, "loss": 0.3693, "step": 14475 }, { "epoch": 0.9067476784791494, "grad_norm": 0.889209257871171, "learning_rate": 2.261113787632918e-07, "loss": 0.3823, "step": 14476 }, { "epoch": 0.9068103164784916, "grad_norm": 0.8992084273983385, "learning_rate": 2.2580987546534207e-07, "loss": 0.3785, "step": 14477 }, { "epoch": 0.906872954477834, "grad_norm": 0.9180612474431262, "learning_rate": 2.2550856867394278e-07, "loss": 0.4192, "step": 14478 }, { "epoch": 0.9069355924771763, "grad_norm": 0.9169431105324396, "learning_rate": 2.2520745840149738e-07, "loss": 0.3898, "step": 14479 }, { "epoch": 0.9069982304765186, "grad_norm": 0.8271375012003271, "learning_rate": 2.2490654466039986e-07, "loss": 0.3717, "step": 14480 }, { "epoch": 0.9070608684758609, "grad_norm": 0.5951050444114396, "learning_rate": 2.2460582746303472e-07, "loss": 0.4636, "step": 14481 }, { "epoch": 0.9071235064752032, "grad_norm": 0.8218655199972478, "learning_rate": 2.2430530682178043e-07, "loss": 0.3322, "step": 14482 }, { "epoch": 0.9071861444745455, "grad_norm": 2.704555899610304, "learning_rate": 2.240049827490054e-07, "loss": 0.4291, "step": 14483 }, { "epoch": 0.9072487824738877, "grad_norm": 0.9365497208953425, "learning_rate": 2.2370485525707197e-07, "loss": 0.3821, "step": 14484 }, { "epoch": 0.9073114204732301, "grad_norm": 0.834089950420208, "learning_rate": 2.234049243583336e-07, "loss": 0.3638, "step": 14485 }, { "epoch": 0.9073740584725724, "grad_norm": 0.9065284463869558, "learning_rate": 2.2310519006513542e-07, "loss": 0.3645, "step": 14486 }, { "epoch": 0.9074366964719147, "grad_norm": 0.845364635226946, "learning_rate": 2.228056523898131e-07, "loss": 0.3629, "step": 14487 }, { "epoch": 0.907499334471257, "grad_norm": 0.8925117627911082, "learning_rate": 2.2250631134469792e-07, "loss": 0.4067, "step": 14488 }, { "epoch": 0.9075619724705993, "grad_norm": 0.9958688989535294, "learning_rate": 2.2220716694210942e-07, "loss": 0.3912, "step": 14489 }, { "epoch": 0.9076246104699416, "grad_norm": 0.8268102721594539, "learning_rate": 2.2190821919436056e-07, "loss": 0.3593, "step": 14490 }, { "epoch": 0.9076872484692838, "grad_norm": 0.911712917894971, "learning_rate": 2.2160946811375596e-07, "loss": 0.4143, "step": 14491 }, { "epoch": 0.9077498864686262, "grad_norm": 0.8334587493760908, "learning_rate": 2.2131091371259295e-07, "loss": 0.3792, "step": 14492 }, { "epoch": 0.9078125244679685, "grad_norm": 0.8448920211394677, "learning_rate": 2.2101255600316007e-07, "loss": 0.3878, "step": 14493 }, { "epoch": 0.9078751624673108, "grad_norm": 0.6182964019852157, "learning_rate": 2.2071439499773694e-07, "loss": 0.4557, "step": 14494 }, { "epoch": 0.9079378004666531, "grad_norm": 0.8754175588265944, "learning_rate": 2.2041643070859653e-07, "loss": 0.4313, "step": 14495 }, { "epoch": 0.9080004384659954, "grad_norm": 0.8996053292381104, "learning_rate": 2.2011866314800234e-07, "loss": 0.3983, "step": 14496 }, { "epoch": 0.9080630764653377, "grad_norm": 0.9542122239884184, "learning_rate": 2.198210923282118e-07, "loss": 0.4016, "step": 14497 }, { "epoch": 0.90812571446468, "grad_norm": 0.8963903683169457, "learning_rate": 2.1952371826147233e-07, "loss": 0.3935, "step": 14498 }, { "epoch": 0.9081883524640223, "grad_norm": 0.9305797885727992, "learning_rate": 2.1922654096002416e-07, "loss": 0.4286, "step": 14499 }, { "epoch": 0.9082509904633645, "grad_norm": 0.8976302851539563, "learning_rate": 2.1892956043609804e-07, "loss": 0.3982, "step": 14500 }, { "epoch": 0.9083136284627069, "grad_norm": 0.7840204598265965, "learning_rate": 2.186327767019192e-07, "loss": 0.3758, "step": 14501 }, { "epoch": 0.9083762664620492, "grad_norm": 0.7946842154361382, "learning_rate": 2.1833618976970284e-07, "loss": 0.3407, "step": 14502 }, { "epoch": 0.9084389044613915, "grad_norm": 0.8662399558515259, "learning_rate": 2.1803979965165646e-07, "loss": 0.3839, "step": 14503 }, { "epoch": 0.9085015424607338, "grad_norm": 0.8784802830767026, "learning_rate": 2.177436063599797e-07, "loss": 0.4042, "step": 14504 }, { "epoch": 0.9085641804600761, "grad_norm": 0.8631294216620459, "learning_rate": 2.1744760990686287e-07, "loss": 0.3569, "step": 14505 }, { "epoch": 0.9086268184594184, "grad_norm": 0.8819106150962902, "learning_rate": 2.1715181030449062e-07, "loss": 0.3829, "step": 14506 }, { "epoch": 0.9086894564587608, "grad_norm": 0.8902747930937875, "learning_rate": 2.1685620756503766e-07, "loss": 0.3732, "step": 14507 }, { "epoch": 0.908752094458103, "grad_norm": 0.899150800174386, "learning_rate": 2.1656080170067094e-07, "loss": 0.4179, "step": 14508 }, { "epoch": 0.9088147324574453, "grad_norm": 0.8036554442096415, "learning_rate": 2.1626559272354908e-07, "loss": 0.3514, "step": 14509 }, { "epoch": 0.9088773704567876, "grad_norm": 0.8831437374791827, "learning_rate": 2.1597058064582398e-07, "loss": 0.3767, "step": 14510 }, { "epoch": 0.9089400084561299, "grad_norm": 0.8124511399584443, "learning_rate": 2.156757654796371e-07, "loss": 0.3401, "step": 14511 }, { "epoch": 0.9090026464554722, "grad_norm": 0.6196710157677477, "learning_rate": 2.1538114723712422e-07, "loss": 0.4505, "step": 14512 }, { "epoch": 0.9090652844548145, "grad_norm": 0.9215828705768413, "learning_rate": 2.150867259304118e-07, "loss": 0.4375, "step": 14513 }, { "epoch": 0.9091279224541569, "grad_norm": 0.8101617248335953, "learning_rate": 2.1479250157161735e-07, "loss": 0.34, "step": 14514 }, { "epoch": 0.9091905604534991, "grad_norm": 0.8623013427544753, "learning_rate": 2.144984741728512e-07, "loss": 0.3252, "step": 14515 }, { "epoch": 0.9092531984528415, "grad_norm": 0.8268906408811817, "learning_rate": 2.1420464374621642e-07, "loss": 0.3766, "step": 14516 }, { "epoch": 0.9093158364521837, "grad_norm": 0.8872473569589333, "learning_rate": 2.1391101030380668e-07, "loss": 0.3596, "step": 14517 }, { "epoch": 0.909378474451526, "grad_norm": 0.9122549876090975, "learning_rate": 2.1361757385770843e-07, "loss": 0.4184, "step": 14518 }, { "epoch": 0.9094411124508683, "grad_norm": 0.870424847267297, "learning_rate": 2.1332433441999923e-07, "loss": 0.3954, "step": 14519 }, { "epoch": 0.9095037504502106, "grad_norm": 0.834374117451356, "learning_rate": 2.1303129200274886e-07, "loss": 0.3725, "step": 14520 }, { "epoch": 0.909566388449553, "grad_norm": 0.8284406014822289, "learning_rate": 2.1273844661801878e-07, "loss": 0.3879, "step": 14521 }, { "epoch": 0.9096290264488952, "grad_norm": 0.5969791788161002, "learning_rate": 2.1244579827786326e-07, "loss": 0.4292, "step": 14522 }, { "epoch": 0.9096916644482376, "grad_norm": 0.5629512855859092, "learning_rate": 2.121533469943271e-07, "loss": 0.4378, "step": 14523 }, { "epoch": 0.9097543024475798, "grad_norm": 0.822374010894613, "learning_rate": 2.1186109277944732e-07, "loss": 0.3504, "step": 14524 }, { "epoch": 0.9098169404469221, "grad_norm": 0.8930091055400947, "learning_rate": 2.1156903564525432e-07, "loss": 0.3761, "step": 14525 }, { "epoch": 0.9098795784462644, "grad_norm": 0.8333905139450267, "learning_rate": 2.1127717560376794e-07, "loss": 0.3563, "step": 14526 }, { "epoch": 0.9099422164456067, "grad_norm": 0.9192758876531361, "learning_rate": 2.109855126670024e-07, "loss": 0.43, "step": 14527 }, { "epoch": 0.910004854444949, "grad_norm": 0.9638136796342194, "learning_rate": 2.1069404684696148e-07, "loss": 0.3936, "step": 14528 }, { "epoch": 0.9100674924442913, "grad_norm": 0.7845641270774674, "learning_rate": 2.1040277815564336e-07, "loss": 0.3731, "step": 14529 }, { "epoch": 0.9101301304436337, "grad_norm": 0.7751697757840038, "learning_rate": 2.1011170660503565e-07, "loss": 0.3296, "step": 14530 }, { "epoch": 0.9101927684429759, "grad_norm": 0.8462184633834104, "learning_rate": 2.098208322071188e-07, "loss": 0.3655, "step": 14531 }, { "epoch": 0.9102554064423183, "grad_norm": 0.8193606633065464, "learning_rate": 2.0953015497386597e-07, "loss": 0.3068, "step": 14532 }, { "epoch": 0.9103180444416605, "grad_norm": 0.833623164964156, "learning_rate": 2.092396749172404e-07, "loss": 0.3507, "step": 14533 }, { "epoch": 0.9103806824410028, "grad_norm": 0.843677412879372, "learning_rate": 2.0894939204919916e-07, "loss": 0.399, "step": 14534 }, { "epoch": 0.9104433204403451, "grad_norm": 0.9164679358555833, "learning_rate": 2.0865930638169053e-07, "loss": 0.3845, "step": 14535 }, { "epoch": 0.9105059584396874, "grad_norm": 0.8918338834525462, "learning_rate": 2.0836941792665432e-07, "loss": 0.4017, "step": 14536 }, { "epoch": 0.9105685964390298, "grad_norm": 0.832323405719235, "learning_rate": 2.0807972669602217e-07, "loss": 0.3889, "step": 14537 }, { "epoch": 0.910631234438372, "grad_norm": 0.8606160096063852, "learning_rate": 2.077902327017184e-07, "loss": 0.3694, "step": 14538 }, { "epoch": 0.9106938724377144, "grad_norm": 0.8965167049908673, "learning_rate": 2.0750093595565735e-07, "loss": 0.3929, "step": 14539 }, { "epoch": 0.9107565104370566, "grad_norm": 0.6184377201607902, "learning_rate": 2.0721183646974785e-07, "loss": 0.4522, "step": 14540 }, { "epoch": 0.910819148436399, "grad_norm": 0.8609456763313632, "learning_rate": 2.069229342558887e-07, "loss": 0.3912, "step": 14541 }, { "epoch": 0.9108817864357412, "grad_norm": 0.8419065430874886, "learning_rate": 2.0663422932597044e-07, "loss": 0.3769, "step": 14542 }, { "epoch": 0.9109444244350835, "grad_norm": 0.9072836301890276, "learning_rate": 2.0634572169187682e-07, "loss": 0.4335, "step": 14543 }, { "epoch": 0.9110070624344259, "grad_norm": 0.6280693700625949, "learning_rate": 2.0605741136548396e-07, "loss": 0.4115, "step": 14544 }, { "epoch": 0.9110697004337681, "grad_norm": 0.6510622909595432, "learning_rate": 2.0576929835865732e-07, "loss": 0.4265, "step": 14545 }, { "epoch": 0.9111323384331105, "grad_norm": 0.9589818628430679, "learning_rate": 2.054813826832569e-07, "loss": 0.4292, "step": 14546 }, { "epoch": 0.9111949764324527, "grad_norm": 0.8697673958160035, "learning_rate": 2.0519366435113208e-07, "loss": 0.3421, "step": 14547 }, { "epoch": 0.9112576144317951, "grad_norm": 0.8690575246531955, "learning_rate": 2.0490614337412618e-07, "loss": 0.4229, "step": 14548 }, { "epoch": 0.9113202524311373, "grad_norm": 0.8832377581795572, "learning_rate": 2.046188197640736e-07, "loss": 0.3587, "step": 14549 }, { "epoch": 0.9113828904304796, "grad_norm": 0.8995928388056197, "learning_rate": 2.043316935327999e-07, "loss": 0.388, "step": 14550 }, { "epoch": 0.911445528429822, "grad_norm": 0.8617696179619232, "learning_rate": 2.0404476469212453e-07, "loss": 0.3613, "step": 14551 }, { "epoch": 0.9115081664291642, "grad_norm": 0.8697711551147823, "learning_rate": 2.0375803325385578e-07, "loss": 0.4149, "step": 14552 }, { "epoch": 0.9115708044285066, "grad_norm": 0.9348521317927773, "learning_rate": 2.034714992297976e-07, "loss": 0.4202, "step": 14553 }, { "epoch": 0.9116334424278488, "grad_norm": 0.9044066204784527, "learning_rate": 2.031851626317427e-07, "loss": 0.3445, "step": 14554 }, { "epoch": 0.9116960804271912, "grad_norm": 0.9336030872966831, "learning_rate": 2.0289902347147672e-07, "loss": 0.4229, "step": 14555 }, { "epoch": 0.9117587184265334, "grad_norm": 0.7917761028379916, "learning_rate": 2.0261308176077743e-07, "loss": 0.345, "step": 14556 }, { "epoch": 0.9118213564258758, "grad_norm": 0.8489674624866208, "learning_rate": 2.0232733751141376e-07, "loss": 0.3807, "step": 14557 }, { "epoch": 0.9118839944252181, "grad_norm": 1.2952041033619557, "learning_rate": 2.0204179073514795e-07, "loss": 0.4087, "step": 14558 }, { "epoch": 0.9119466324245603, "grad_norm": 0.8494492475237228, "learning_rate": 2.017564414437312e-07, "loss": 0.414, "step": 14559 }, { "epoch": 0.9120092704239027, "grad_norm": 0.8514332301504445, "learning_rate": 2.0147128964891126e-07, "loss": 0.3797, "step": 14560 }, { "epoch": 0.9120719084232449, "grad_norm": 0.8575108186935899, "learning_rate": 2.0118633536242272e-07, "loss": 0.3692, "step": 14561 }, { "epoch": 0.9121345464225873, "grad_norm": 0.8971263742967712, "learning_rate": 2.0090157859599614e-07, "loss": 0.3958, "step": 14562 }, { "epoch": 0.9121971844219295, "grad_norm": 0.8852048912540622, "learning_rate": 2.0061701936135102e-07, "loss": 0.3729, "step": 14563 }, { "epoch": 0.9122598224212719, "grad_norm": 0.8091321452144214, "learning_rate": 2.0033265767020026e-07, "loss": 0.359, "step": 14564 }, { "epoch": 0.9123224604206142, "grad_norm": 0.8192105544946708, "learning_rate": 2.0004849353424838e-07, "loss": 0.3858, "step": 14565 }, { "epoch": 0.9123850984199565, "grad_norm": 0.9208487360264388, "learning_rate": 1.9976452696519155e-07, "loss": 0.4269, "step": 14566 }, { "epoch": 0.9124477364192988, "grad_norm": 0.8454606741126122, "learning_rate": 1.9948075797471766e-07, "loss": 0.3661, "step": 14567 }, { "epoch": 0.912510374418641, "grad_norm": 0.7926150349362686, "learning_rate": 1.991971865745057e-07, "loss": 0.3617, "step": 14568 }, { "epoch": 0.9125730124179834, "grad_norm": 0.9233191477009968, "learning_rate": 1.9891381277622968e-07, "loss": 0.3615, "step": 14569 }, { "epoch": 0.9126356504173256, "grad_norm": 0.7796172021920009, "learning_rate": 1.986306365915519e-07, "loss": 0.3642, "step": 14570 }, { "epoch": 0.912698288416668, "grad_norm": 0.9302718848533903, "learning_rate": 1.9834765803212807e-07, "loss": 0.3886, "step": 14571 }, { "epoch": 0.9127609264160103, "grad_norm": 0.8437031841415149, "learning_rate": 1.9806487710960664e-07, "loss": 0.3943, "step": 14572 }, { "epoch": 0.9128235644153526, "grad_norm": 0.8238586193302636, "learning_rate": 1.9778229383562608e-07, "loss": 0.3639, "step": 14573 }, { "epoch": 0.9128862024146949, "grad_norm": 0.6012005467216828, "learning_rate": 1.9749990822181707e-07, "loss": 0.443, "step": 14574 }, { "epoch": 0.9129488404140371, "grad_norm": 0.9049530853214635, "learning_rate": 1.9721772027980368e-07, "loss": 0.4005, "step": 14575 }, { "epoch": 0.9130114784133795, "grad_norm": 0.8884528604523468, "learning_rate": 1.9693573002119937e-07, "loss": 0.3957, "step": 14576 }, { "epoch": 0.9130741164127217, "grad_norm": 0.8316383700627232, "learning_rate": 1.966539374576132e-07, "loss": 0.3998, "step": 14577 }, { "epoch": 0.9131367544120641, "grad_norm": 0.8446552122103382, "learning_rate": 1.9637234260064197e-07, "loss": 0.3993, "step": 14578 }, { "epoch": 0.9131993924114064, "grad_norm": 0.823613454066151, "learning_rate": 1.9609094546187647e-07, "loss": 0.3738, "step": 14579 }, { "epoch": 0.9132620304107487, "grad_norm": 0.8448139865984626, "learning_rate": 1.958097460528996e-07, "loss": 0.3841, "step": 14580 }, { "epoch": 0.913324668410091, "grad_norm": 0.8742507250744033, "learning_rate": 1.955287443852849e-07, "loss": 0.3809, "step": 14581 }, { "epoch": 0.9133873064094333, "grad_norm": 0.8457330393278842, "learning_rate": 1.952479404705998e-07, "loss": 0.3741, "step": 14582 }, { "epoch": 0.9134499444087756, "grad_norm": 0.9210817729944507, "learning_rate": 1.949673343204006e-07, "loss": 0.37, "step": 14583 }, { "epoch": 0.9135125824081178, "grad_norm": 0.8044618509076671, "learning_rate": 1.9468692594623805e-07, "loss": 0.3389, "step": 14584 }, { "epoch": 0.9135752204074602, "grad_norm": 0.9182852108223399, "learning_rate": 1.9440671535965239e-07, "loss": 0.4244, "step": 14585 }, { "epoch": 0.9136378584068024, "grad_norm": 0.8561050781748168, "learning_rate": 1.9412670257217936e-07, "loss": 0.3705, "step": 14586 }, { "epoch": 0.9137004964061448, "grad_norm": 0.9158719573305588, "learning_rate": 1.9384688759534363e-07, "loss": 0.4253, "step": 14587 }, { "epoch": 0.9137631344054871, "grad_norm": 0.8078956568747874, "learning_rate": 1.9356727044066103e-07, "loss": 0.3675, "step": 14588 }, { "epoch": 0.9138257724048294, "grad_norm": 0.8358340722977022, "learning_rate": 1.932878511196423e-07, "loss": 0.3763, "step": 14589 }, { "epoch": 0.9138884104041717, "grad_norm": 0.8142360198572128, "learning_rate": 1.9300862964378774e-07, "loss": 0.4056, "step": 14590 }, { "epoch": 0.913951048403514, "grad_norm": 0.6251688781410166, "learning_rate": 1.9272960602459035e-07, "loss": 0.4482, "step": 14591 }, { "epoch": 0.9140136864028563, "grad_norm": 0.9019783659831833, "learning_rate": 1.9245078027353426e-07, "loss": 0.3971, "step": 14592 }, { "epoch": 0.9140763244021985, "grad_norm": 0.9385954198260807, "learning_rate": 1.9217215240209697e-07, "loss": 0.4379, "step": 14593 }, { "epoch": 0.9141389624015409, "grad_norm": 0.799174369169606, "learning_rate": 1.9189372242174542e-07, "loss": 0.3479, "step": 14594 }, { "epoch": 0.9142016004008832, "grad_norm": 0.8477388708884852, "learning_rate": 1.91615490343941e-07, "loss": 0.4093, "step": 14595 }, { "epoch": 0.9142642384002255, "grad_norm": 1.0180491024602643, "learning_rate": 1.9133745618013567e-07, "loss": 0.4367, "step": 14596 }, { "epoch": 0.9143268763995678, "grad_norm": 0.876495455857534, "learning_rate": 1.9105961994177246e-07, "loss": 0.3915, "step": 14597 }, { "epoch": 0.9143895143989101, "grad_norm": 0.9067958793249522, "learning_rate": 1.9078198164028894e-07, "loss": 0.4106, "step": 14598 }, { "epoch": 0.9144521523982524, "grad_norm": 0.8756331128699929, "learning_rate": 1.9050454128711148e-07, "loss": 0.3874, "step": 14599 }, { "epoch": 0.9145147903975946, "grad_norm": 0.9129895395573933, "learning_rate": 1.9022729889365933e-07, "loss": 0.425, "step": 14600 }, { "epoch": 0.914577428396937, "grad_norm": 0.8349924471808255, "learning_rate": 1.8995025447134496e-07, "loss": 0.3732, "step": 14601 }, { "epoch": 0.9146400663962793, "grad_norm": 0.9229007484398715, "learning_rate": 1.8967340803156985e-07, "loss": 0.4004, "step": 14602 }, { "epoch": 0.9147027043956216, "grad_norm": 0.8457595880349401, "learning_rate": 1.89396759585731e-07, "loss": 0.3228, "step": 14603 }, { "epoch": 0.9147653423949639, "grad_norm": 0.8912367320006188, "learning_rate": 1.891203091452143e-07, "loss": 0.3613, "step": 14604 }, { "epoch": 0.9148279803943062, "grad_norm": 0.9113192261735382, "learning_rate": 1.8884405672139838e-07, "loss": 0.3984, "step": 14605 }, { "epoch": 0.9148906183936485, "grad_norm": 0.9363902083437875, "learning_rate": 1.885680023256542e-07, "loss": 0.3652, "step": 14606 }, { "epoch": 0.9149532563929909, "grad_norm": 0.8392296020760586, "learning_rate": 1.8829214596934376e-07, "loss": 0.3635, "step": 14607 }, { "epoch": 0.9150158943923331, "grad_norm": 0.8469015276185888, "learning_rate": 1.8801648766382187e-07, "loss": 0.369, "step": 14608 }, { "epoch": 0.9150785323916754, "grad_norm": 0.8800578305901238, "learning_rate": 1.8774102742043497e-07, "loss": 0.3427, "step": 14609 }, { "epoch": 0.9151411703910177, "grad_norm": 0.8955785344250305, "learning_rate": 1.8746576525052017e-07, "loss": 0.4333, "step": 14610 }, { "epoch": 0.91520380839036, "grad_norm": 0.8939193355077921, "learning_rate": 1.8719070116540727e-07, "loss": 0.3907, "step": 14611 }, { "epoch": 0.9152664463897023, "grad_norm": 0.9607461838215237, "learning_rate": 1.8691583517641887e-07, "loss": 0.4228, "step": 14612 }, { "epoch": 0.9153290843890446, "grad_norm": 0.8333820953686849, "learning_rate": 1.8664116729486814e-07, "loss": 0.3848, "step": 14613 }, { "epoch": 0.915391722388387, "grad_norm": 0.9906622003629626, "learning_rate": 1.863666975320605e-07, "loss": 0.4518, "step": 14614 }, { "epoch": 0.9154543603877292, "grad_norm": 0.8704696777314882, "learning_rate": 1.86092425899293e-07, "loss": 0.352, "step": 14615 }, { "epoch": 0.9155169983870716, "grad_norm": 0.8739631300968358, "learning_rate": 1.858183524078544e-07, "loss": 0.3658, "step": 14616 }, { "epoch": 0.9155796363864138, "grad_norm": 0.9004253320365077, "learning_rate": 1.8554447706902622e-07, "loss": 0.3906, "step": 14617 }, { "epoch": 0.9156422743857561, "grad_norm": 0.9045262262915919, "learning_rate": 1.8527079989408113e-07, "loss": 0.4091, "step": 14618 }, { "epoch": 0.9157049123850984, "grad_norm": 0.6484553383263393, "learning_rate": 1.849973208942829e-07, "loss": 0.4296, "step": 14619 }, { "epoch": 0.9157675503844407, "grad_norm": 0.8527038114534683, "learning_rate": 1.8472404008088918e-07, "loss": 0.3878, "step": 14620 }, { "epoch": 0.915830188383783, "grad_norm": 0.8903022802402267, "learning_rate": 1.8445095746514763e-07, "loss": 0.3588, "step": 14621 }, { "epoch": 0.9158928263831253, "grad_norm": 0.9076804279059656, "learning_rate": 1.841780730582987e-07, "loss": 0.3805, "step": 14622 }, { "epoch": 0.9159554643824677, "grad_norm": 0.8538237020628726, "learning_rate": 1.83905386871574e-07, "loss": 0.4052, "step": 14623 }, { "epoch": 0.9160181023818099, "grad_norm": 0.8196870752205448, "learning_rate": 1.8363289891619728e-07, "loss": 0.3601, "step": 14624 }, { "epoch": 0.9160807403811523, "grad_norm": 0.8366849545131746, "learning_rate": 1.8336060920338405e-07, "loss": 0.3707, "step": 14625 }, { "epoch": 0.9161433783804945, "grad_norm": 0.8802054485865731, "learning_rate": 1.8308851774434312e-07, "loss": 0.4103, "step": 14626 }, { "epoch": 0.9162060163798368, "grad_norm": 0.8682617357088023, "learning_rate": 1.8281662455027215e-07, "loss": 0.4059, "step": 14627 }, { "epoch": 0.9162686543791791, "grad_norm": 0.8952820263376404, "learning_rate": 1.825449296323628e-07, "loss": 0.3911, "step": 14628 }, { "epoch": 0.9163312923785214, "grad_norm": 0.8673713012727365, "learning_rate": 1.822734330017989e-07, "loss": 0.3861, "step": 14629 }, { "epoch": 0.9163939303778638, "grad_norm": 0.832598636140544, "learning_rate": 1.820021346697548e-07, "loss": 0.3702, "step": 14630 }, { "epoch": 0.916456568377206, "grad_norm": 0.8365237659581025, "learning_rate": 1.8173103464739717e-07, "loss": 0.383, "step": 14631 }, { "epoch": 0.9165192063765484, "grad_norm": 0.8410069306307457, "learning_rate": 1.8146013294588427e-07, "loss": 0.3567, "step": 14632 }, { "epoch": 0.9165818443758906, "grad_norm": 0.8709949044719849, "learning_rate": 1.8118942957636666e-07, "loss": 0.3859, "step": 14633 }, { "epoch": 0.9166444823752329, "grad_norm": 0.8317712005163819, "learning_rate": 1.8091892454998595e-07, "loss": 0.3568, "step": 14634 }, { "epoch": 0.9167071203745752, "grad_norm": 0.8748629190555344, "learning_rate": 1.8064861787787768e-07, "loss": 0.409, "step": 14635 }, { "epoch": 0.9167697583739175, "grad_norm": 0.8306565486180815, "learning_rate": 1.803785095711663e-07, "loss": 0.3652, "step": 14636 }, { "epoch": 0.9168323963732599, "grad_norm": 0.8306150441006032, "learning_rate": 1.801085996409696e-07, "loss": 0.3689, "step": 14637 }, { "epoch": 0.9168950343726021, "grad_norm": 0.9222494702524204, "learning_rate": 1.7983888809839811e-07, "loss": 0.3856, "step": 14638 }, { "epoch": 0.9169576723719445, "grad_norm": 0.980174896540621, "learning_rate": 1.7956937495455295e-07, "loss": 0.425, "step": 14639 }, { "epoch": 0.9170203103712867, "grad_norm": 0.8364452845338674, "learning_rate": 1.7930006022052694e-07, "loss": 0.3725, "step": 14640 }, { "epoch": 0.9170829483706291, "grad_norm": 0.8243818859532487, "learning_rate": 1.7903094390740505e-07, "loss": 0.3907, "step": 14641 }, { "epoch": 0.9171455863699713, "grad_norm": 0.8714631617923024, "learning_rate": 1.7876202602626457e-07, "loss": 0.4006, "step": 14642 }, { "epoch": 0.9172082243693136, "grad_norm": 0.8608068354982259, "learning_rate": 1.7849330658817278e-07, "loss": 0.3799, "step": 14643 }, { "epoch": 0.917270862368656, "grad_norm": 0.9645747703626029, "learning_rate": 1.7822478560419242e-07, "loss": 0.3748, "step": 14644 }, { "epoch": 0.9173335003679982, "grad_norm": 0.8159120042182578, "learning_rate": 1.7795646308537417e-07, "loss": 0.3336, "step": 14645 }, { "epoch": 0.9173961383673406, "grad_norm": 0.9276269374408209, "learning_rate": 1.776883390427636e-07, "loss": 0.3641, "step": 14646 }, { "epoch": 0.9174587763666828, "grad_norm": 0.8982690821382054, "learning_rate": 1.7742041348739636e-07, "loss": 0.3753, "step": 14647 }, { "epoch": 0.9175214143660252, "grad_norm": 0.794047761322713, "learning_rate": 1.771526864302997e-07, "loss": 0.3298, "step": 14648 }, { "epoch": 0.9175840523653674, "grad_norm": 1.009714043720396, "learning_rate": 1.7688515788249316e-07, "loss": 0.394, "step": 14649 }, { "epoch": 0.9176466903647098, "grad_norm": 0.8224101186173519, "learning_rate": 1.7661782785498959e-07, "loss": 0.3397, "step": 14650 }, { "epoch": 0.9177093283640521, "grad_norm": 0.8243043312325466, "learning_rate": 1.7635069635879077e-07, "loss": 0.365, "step": 14651 }, { "epoch": 0.9177719663633943, "grad_norm": 0.8930657760382512, "learning_rate": 1.7608376340489286e-07, "loss": 0.3722, "step": 14652 }, { "epoch": 0.9178346043627367, "grad_norm": 0.8786578291218512, "learning_rate": 1.7581702900428211e-07, "loss": 0.4, "step": 14653 }, { "epoch": 0.9178972423620789, "grad_norm": 0.8830951325357927, "learning_rate": 1.7555049316793804e-07, "loss": 0.4192, "step": 14654 }, { "epoch": 0.9179598803614213, "grad_norm": 0.9441034903145736, "learning_rate": 1.7528415590683133e-07, "loss": 0.387, "step": 14655 }, { "epoch": 0.9180225183607635, "grad_norm": 0.8447530105785896, "learning_rate": 1.7501801723192437e-07, "loss": 0.381, "step": 14656 }, { "epoch": 0.9180851563601059, "grad_norm": 0.8696518803051337, "learning_rate": 1.7475207715417163e-07, "loss": 0.382, "step": 14657 }, { "epoch": 0.9181477943594482, "grad_norm": 0.886163386541185, "learning_rate": 1.744863356845189e-07, "loss": 0.4008, "step": 14658 }, { "epoch": 0.9182104323587904, "grad_norm": 0.8567787738911893, "learning_rate": 1.7422079283390404e-07, "loss": 0.4104, "step": 14659 }, { "epoch": 0.9182730703581328, "grad_norm": 0.871876646413901, "learning_rate": 1.7395544861325718e-07, "loss": 0.3673, "step": 14660 }, { "epoch": 0.918335708357475, "grad_norm": 0.8675901159912227, "learning_rate": 1.736903030334991e-07, "loss": 0.3796, "step": 14661 }, { "epoch": 0.9183983463568174, "grad_norm": 0.9079797443122608, "learning_rate": 1.7342535610554488e-07, "loss": 0.3857, "step": 14662 }, { "epoch": 0.9184609843561596, "grad_norm": 0.56651834085239, "learning_rate": 1.7316060784029808e-07, "loss": 0.4667, "step": 14663 }, { "epoch": 0.918523622355502, "grad_norm": 0.8935188995572623, "learning_rate": 1.7289605824865718e-07, "loss": 0.3888, "step": 14664 }, { "epoch": 0.9185862603548443, "grad_norm": 0.8257029455718833, "learning_rate": 1.7263170734151013e-07, "loss": 0.3782, "step": 14665 }, { "epoch": 0.9186488983541866, "grad_norm": 0.7739864835045562, "learning_rate": 1.7236755512973824e-07, "loss": 0.3326, "step": 14666 }, { "epoch": 0.9187115363535289, "grad_norm": 0.8132792668192733, "learning_rate": 1.7210360162421391e-07, "loss": 0.3326, "step": 14667 }, { "epoch": 0.9187741743528711, "grad_norm": 0.8268939323045604, "learning_rate": 1.718398468358018e-07, "loss": 0.3911, "step": 14668 }, { "epoch": 0.9188368123522135, "grad_norm": 0.9318172145930547, "learning_rate": 1.715762907753571e-07, "loss": 0.3942, "step": 14669 }, { "epoch": 0.9188994503515557, "grad_norm": 0.8719330939825359, "learning_rate": 1.7131293345372835e-07, "loss": 0.3962, "step": 14670 }, { "epoch": 0.9189620883508981, "grad_norm": 0.8554845256202654, "learning_rate": 1.7104977488175522e-07, "loss": 0.3796, "step": 14671 }, { "epoch": 0.9190247263502404, "grad_norm": 0.9468012877077068, "learning_rate": 1.7078681507027017e-07, "loss": 0.3929, "step": 14672 }, { "epoch": 0.9190873643495827, "grad_norm": 0.8927901505399598, "learning_rate": 1.705240540300962e-07, "loss": 0.3962, "step": 14673 }, { "epoch": 0.919150002348925, "grad_norm": 0.7493932371054397, "learning_rate": 1.702614917720491e-07, "loss": 0.3494, "step": 14674 }, { "epoch": 0.9192126403482673, "grad_norm": 0.903848323609536, "learning_rate": 1.6999912830693467e-07, "loss": 0.3721, "step": 14675 }, { "epoch": 0.9192752783476096, "grad_norm": 0.8664793441007657, "learning_rate": 1.6973696364555314e-07, "loss": 0.3833, "step": 14676 }, { "epoch": 0.9193379163469518, "grad_norm": 0.8628408126673278, "learning_rate": 1.6947499779869425e-07, "loss": 0.3756, "step": 14677 }, { "epoch": 0.9194005543462942, "grad_norm": 0.8049012490904607, "learning_rate": 1.69213230777141e-07, "loss": 0.3688, "step": 14678 }, { "epoch": 0.9194631923456364, "grad_norm": 0.9301200680437386, "learning_rate": 1.6895166259166817e-07, "loss": 0.3987, "step": 14679 }, { "epoch": 0.9195258303449788, "grad_norm": 0.9015276109132928, "learning_rate": 1.6869029325304097e-07, "loss": 0.3581, "step": 14680 }, { "epoch": 0.9195884683443211, "grad_norm": 0.5656968627631848, "learning_rate": 1.6842912277201862e-07, "loss": 0.4422, "step": 14681 }, { "epoch": 0.9196511063436634, "grad_norm": 0.8580661346064088, "learning_rate": 1.6816815115935026e-07, "loss": 0.3847, "step": 14682 }, { "epoch": 0.9197137443430057, "grad_norm": 0.5576955191482998, "learning_rate": 1.6790737842577786e-07, "loss": 0.4151, "step": 14683 }, { "epoch": 0.9197763823423479, "grad_norm": 0.8667029746845923, "learning_rate": 1.6764680458203453e-07, "loss": 0.3602, "step": 14684 }, { "epoch": 0.9198390203416903, "grad_norm": 0.8704208810388455, "learning_rate": 1.6738642963884555e-07, "loss": 0.3723, "step": 14685 }, { "epoch": 0.9199016583410325, "grad_norm": 0.8432333727805947, "learning_rate": 1.671262536069279e-07, "loss": 0.3531, "step": 14686 }, { "epoch": 0.9199642963403749, "grad_norm": 0.8010457956969766, "learning_rate": 1.6686627649699028e-07, "loss": 0.358, "step": 14687 }, { "epoch": 0.9200269343397172, "grad_norm": 0.8191031675857771, "learning_rate": 1.6660649831973464e-07, "loss": 0.3808, "step": 14688 }, { "epoch": 0.9200895723390595, "grad_norm": 0.8763759328058821, "learning_rate": 1.6634691908585187e-07, "loss": 0.3987, "step": 14689 }, { "epoch": 0.9201522103384018, "grad_norm": 0.8410040823690403, "learning_rate": 1.660875388060279e-07, "loss": 0.3872, "step": 14690 }, { "epoch": 0.9202148483377441, "grad_norm": 0.9005790939474518, "learning_rate": 1.658283574909375e-07, "loss": 0.371, "step": 14691 }, { "epoch": 0.9202774863370864, "grad_norm": 0.8203826705390574, "learning_rate": 1.6556937515124938e-07, "loss": 0.3866, "step": 14692 }, { "epoch": 0.9203401243364286, "grad_norm": 0.8319142132305005, "learning_rate": 1.6531059179762333e-07, "loss": 0.3574, "step": 14693 }, { "epoch": 0.920402762335771, "grad_norm": 0.9424611396075832, "learning_rate": 1.650520074407108e-07, "loss": 0.4208, "step": 14694 }, { "epoch": 0.9204654003351133, "grad_norm": 0.8674179255005526, "learning_rate": 1.6479362209115445e-07, "loss": 0.3321, "step": 14695 }, { "epoch": 0.9205280383344556, "grad_norm": 0.9531663922781224, "learning_rate": 1.645354357595902e-07, "loss": 0.404, "step": 14696 }, { "epoch": 0.9205906763337979, "grad_norm": 0.8986312321566398, "learning_rate": 1.6427744845664506e-07, "loss": 0.3844, "step": 14697 }, { "epoch": 0.9206533143331402, "grad_norm": 0.9317387134377791, "learning_rate": 1.640196601929378e-07, "loss": 0.4197, "step": 14698 }, { "epoch": 0.9207159523324825, "grad_norm": 0.9229465544677935, "learning_rate": 1.6376207097907826e-07, "loss": 0.4056, "step": 14699 }, { "epoch": 0.9207785903318249, "grad_norm": 0.8951608289670081, "learning_rate": 1.6350468082567018e-07, "loss": 0.4103, "step": 14700 }, { "epoch": 0.9208412283311671, "grad_norm": 0.8778509790032191, "learning_rate": 1.6324748974330728e-07, "loss": 0.3773, "step": 14701 }, { "epoch": 0.9209038663305094, "grad_norm": 0.8701376953624392, "learning_rate": 1.6299049774257503e-07, "loss": 0.3673, "step": 14702 }, { "epoch": 0.9209665043298517, "grad_norm": 0.8703177454258434, "learning_rate": 1.6273370483405216e-07, "loss": 0.3636, "step": 14703 }, { "epoch": 0.921029142329194, "grad_norm": 0.8033007174368356, "learning_rate": 1.6247711102830687e-07, "loss": 0.3976, "step": 14704 }, { "epoch": 0.9210917803285363, "grad_norm": 0.8630477042594399, "learning_rate": 1.622207163359024e-07, "loss": 0.4073, "step": 14705 }, { "epoch": 0.9211544183278786, "grad_norm": 0.8340873092347382, "learning_rate": 1.6196452076739143e-07, "loss": 0.3926, "step": 14706 }, { "epoch": 0.921217056327221, "grad_norm": 0.6381933796970108, "learning_rate": 1.617085243333183e-07, "loss": 0.4362, "step": 14707 }, { "epoch": 0.9212796943265632, "grad_norm": 0.862160194036841, "learning_rate": 1.614527270442201e-07, "loss": 0.3503, "step": 14708 }, { "epoch": 0.9213423323259055, "grad_norm": 0.8701286557549277, "learning_rate": 1.6119712891062566e-07, "loss": 0.3676, "step": 14709 }, { "epoch": 0.9214049703252478, "grad_norm": 0.8345434737563768, "learning_rate": 1.6094172994305603e-07, "loss": 0.3976, "step": 14710 }, { "epoch": 0.9214676083245901, "grad_norm": 0.5860917157941177, "learning_rate": 1.6068653015202328e-07, "loss": 0.4412, "step": 14711 }, { "epoch": 0.9215302463239324, "grad_norm": 0.9074843540978847, "learning_rate": 1.6043152954803077e-07, "loss": 0.392, "step": 14712 }, { "epoch": 0.9215928843232747, "grad_norm": 0.87021248833068, "learning_rate": 1.6017672814157392e-07, "loss": 0.379, "step": 14713 }, { "epoch": 0.921655522322617, "grad_norm": 0.8711845489167479, "learning_rate": 1.5992212594314215e-07, "loss": 0.3888, "step": 14714 }, { "epoch": 0.9217181603219593, "grad_norm": 0.8787048095293242, "learning_rate": 1.596677229632143e-07, "loss": 0.3949, "step": 14715 }, { "epoch": 0.9217807983213017, "grad_norm": 0.8727650926833521, "learning_rate": 1.594135192122609e-07, "loss": 0.383, "step": 14716 }, { "epoch": 0.9218434363206439, "grad_norm": 0.8456146637483302, "learning_rate": 1.591595147007452e-07, "loss": 0.3814, "step": 14717 }, { "epoch": 0.9219060743199862, "grad_norm": 0.8674350305693941, "learning_rate": 1.5890570943912275e-07, "loss": 0.4286, "step": 14718 }, { "epoch": 0.9219687123193285, "grad_norm": 0.9521371063110421, "learning_rate": 1.586521034378402e-07, "loss": 0.3912, "step": 14719 }, { "epoch": 0.9220313503186708, "grad_norm": 0.8941422319783079, "learning_rate": 1.5839869670733532e-07, "loss": 0.4253, "step": 14720 }, { "epoch": 0.9220939883180131, "grad_norm": 0.8971905389329219, "learning_rate": 1.5814548925803863e-07, "loss": 0.3545, "step": 14721 }, { "epoch": 0.9221566263173554, "grad_norm": 0.8870241398058054, "learning_rate": 1.5789248110037237e-07, "loss": 0.4117, "step": 14722 }, { "epoch": 0.9222192643166978, "grad_norm": 0.8066239040174703, "learning_rate": 1.576396722447504e-07, "loss": 0.3532, "step": 14723 }, { "epoch": 0.92228190231604, "grad_norm": 0.8358823176851482, "learning_rate": 1.5738706270157832e-07, "loss": 0.3882, "step": 14724 }, { "epoch": 0.9223445403153824, "grad_norm": 0.870347429869684, "learning_rate": 1.5713465248125338e-07, "loss": 0.3976, "step": 14725 }, { "epoch": 0.9224071783147246, "grad_norm": 0.9476285041159658, "learning_rate": 1.56882441594165e-07, "loss": 0.4198, "step": 14726 }, { "epoch": 0.9224698163140669, "grad_norm": 0.9701170447208158, "learning_rate": 1.5663043005069433e-07, "loss": 0.4657, "step": 14727 }, { "epoch": 0.9225324543134092, "grad_norm": 0.9323239847495163, "learning_rate": 1.5637861786121422e-07, "loss": 0.4054, "step": 14728 }, { "epoch": 0.9225950923127515, "grad_norm": 0.8411628675788348, "learning_rate": 1.561270050360897e-07, "loss": 0.371, "step": 14729 }, { "epoch": 0.9226577303120939, "grad_norm": 0.9323844332405403, "learning_rate": 1.5587559158567578e-07, "loss": 0.4455, "step": 14730 }, { "epoch": 0.9227203683114361, "grad_norm": 0.8627126415481418, "learning_rate": 1.55624377520322e-07, "loss": 0.353, "step": 14731 }, { "epoch": 0.9227830063107785, "grad_norm": 0.8798562791391245, "learning_rate": 1.5537336285036842e-07, "loss": 0.3872, "step": 14732 }, { "epoch": 0.9228456443101207, "grad_norm": 0.8669605469895654, "learning_rate": 1.551225475861462e-07, "loss": 0.3668, "step": 14733 }, { "epoch": 0.9229082823094631, "grad_norm": 0.8469651957055837, "learning_rate": 1.5487193173797875e-07, "loss": 0.3606, "step": 14734 }, { "epoch": 0.9229709203088053, "grad_norm": 0.8818273202018854, "learning_rate": 1.5462151531618174e-07, "loss": 0.377, "step": 14735 }, { "epoch": 0.9230335583081476, "grad_norm": 0.8747548761336114, "learning_rate": 1.5437129833106356e-07, "loss": 0.3772, "step": 14736 }, { "epoch": 0.92309619630749, "grad_norm": 0.866207698472162, "learning_rate": 1.5412128079292154e-07, "loss": 0.3824, "step": 14737 }, { "epoch": 0.9231588343068322, "grad_norm": 0.6501220761793048, "learning_rate": 1.538714627120469e-07, "loss": 0.4498, "step": 14738 }, { "epoch": 0.9232214723061746, "grad_norm": 0.9843882874404228, "learning_rate": 1.5362184409872194e-07, "loss": 0.4079, "step": 14739 }, { "epoch": 0.9232841103055168, "grad_norm": 0.8867269921736035, "learning_rate": 1.5337242496322234e-07, "loss": 0.3621, "step": 14740 }, { "epoch": 0.9233467483048592, "grad_norm": 0.8937706730956598, "learning_rate": 1.531232053158127e-07, "loss": 0.3353, "step": 14741 }, { "epoch": 0.9234093863042014, "grad_norm": 0.928627934025503, "learning_rate": 1.5287418516675202e-07, "loss": 0.4199, "step": 14742 }, { "epoch": 0.9234720243035437, "grad_norm": 0.7880696250055326, "learning_rate": 1.5262536452628929e-07, "loss": 0.3789, "step": 14743 }, { "epoch": 0.9235346623028861, "grad_norm": 0.8925507061483414, "learning_rate": 1.5237674340466635e-07, "loss": 0.393, "step": 14744 }, { "epoch": 0.9235973003022283, "grad_norm": 0.8911311821883656, "learning_rate": 1.5212832181211613e-07, "loss": 0.3669, "step": 14745 }, { "epoch": 0.9236599383015707, "grad_norm": 0.8655947731574158, "learning_rate": 1.518800997588643e-07, "loss": 0.3957, "step": 14746 }, { "epoch": 0.9237225763009129, "grad_norm": 0.8655935167762343, "learning_rate": 1.5163207725512718e-07, "loss": 0.4103, "step": 14747 }, { "epoch": 0.9237852143002553, "grad_norm": 0.8202327832086467, "learning_rate": 1.5138425431111327e-07, "loss": 0.3804, "step": 14748 }, { "epoch": 0.9238478522995975, "grad_norm": 0.8797047388307302, "learning_rate": 1.5113663093702434e-07, "loss": 0.3945, "step": 14749 }, { "epoch": 0.9239104902989399, "grad_norm": 0.9433878849978943, "learning_rate": 1.5088920714305122e-07, "loss": 0.4002, "step": 14750 }, { "epoch": 0.9239731282982822, "grad_norm": 0.8268468528005631, "learning_rate": 1.5064198293937849e-07, "loss": 0.3713, "step": 14751 }, { "epoch": 0.9240357662976244, "grad_norm": 0.926740667748047, "learning_rate": 1.503949583361819e-07, "loss": 0.4132, "step": 14752 }, { "epoch": 0.9240984042969668, "grad_norm": 0.8756922386679483, "learning_rate": 1.501481333436289e-07, "loss": 0.3642, "step": 14753 }, { "epoch": 0.924161042296309, "grad_norm": 0.8992174410982819, "learning_rate": 1.4990150797187854e-07, "loss": 0.3925, "step": 14754 }, { "epoch": 0.9242236802956514, "grad_norm": 0.9211233852893941, "learning_rate": 1.4965508223108272e-07, "loss": 0.3878, "step": 14755 }, { "epoch": 0.9242863182949936, "grad_norm": 0.8192392210080767, "learning_rate": 1.4940885613138335e-07, "loss": 0.3685, "step": 14756 }, { "epoch": 0.924348956294336, "grad_norm": 0.8393876083120878, "learning_rate": 1.4916282968291617e-07, "loss": 0.4181, "step": 14757 }, { "epoch": 0.9244115942936783, "grad_norm": 0.6109444737881098, "learning_rate": 1.4891700289580758e-07, "loss": 0.449, "step": 14758 }, { "epoch": 0.9244742322930206, "grad_norm": 0.7785353875808478, "learning_rate": 1.4867137578017553e-07, "loss": 0.3343, "step": 14759 }, { "epoch": 0.9245368702923629, "grad_norm": 0.8576340463057066, "learning_rate": 1.4842594834612978e-07, "loss": 0.3675, "step": 14760 }, { "epoch": 0.9245995082917051, "grad_norm": 0.9163171630507834, "learning_rate": 1.4818072060377275e-07, "loss": 0.3946, "step": 14761 }, { "epoch": 0.9246621462910475, "grad_norm": 0.9389890631051475, "learning_rate": 1.4793569256319806e-07, "loss": 0.4194, "step": 14762 }, { "epoch": 0.9247247842903897, "grad_norm": 0.8159189939022967, "learning_rate": 1.4769086423448985e-07, "loss": 0.3418, "step": 14763 }, { "epoch": 0.9247874222897321, "grad_norm": 0.8626756667531456, "learning_rate": 1.4744623562772676e-07, "loss": 0.4092, "step": 14764 }, { "epoch": 0.9248500602890743, "grad_norm": 0.9630489414101225, "learning_rate": 1.4720180675297736e-07, "loss": 0.3723, "step": 14765 }, { "epoch": 0.9249126982884167, "grad_norm": 0.8244360493749009, "learning_rate": 1.469575776203025e-07, "loss": 0.3555, "step": 14766 }, { "epoch": 0.924975336287759, "grad_norm": 0.8536466871007101, "learning_rate": 1.467135482397547e-07, "loss": 0.3735, "step": 14767 }, { "epoch": 0.9250379742871012, "grad_norm": 0.8224869151772132, "learning_rate": 1.464697186213776e-07, "loss": 0.3885, "step": 14768 }, { "epoch": 0.9251006122864436, "grad_norm": 0.8176680232550725, "learning_rate": 1.4622608877520816e-07, "loss": 0.425, "step": 14769 }, { "epoch": 0.9251632502857858, "grad_norm": 0.868449000623306, "learning_rate": 1.4598265871127336e-07, "loss": 0.42, "step": 14770 }, { "epoch": 0.9252258882851282, "grad_norm": 0.8806399274656759, "learning_rate": 1.4573942843959355e-07, "loss": 0.3882, "step": 14771 }, { "epoch": 0.9252885262844704, "grad_norm": 0.8529272246331876, "learning_rate": 1.4549639797017955e-07, "loss": 0.3369, "step": 14772 }, { "epoch": 0.9253511642838128, "grad_norm": 0.9493763637548642, "learning_rate": 1.4525356731303508e-07, "loss": 0.383, "step": 14773 }, { "epoch": 0.9254138022831551, "grad_norm": 0.8542992384023457, "learning_rate": 1.450109364781549e-07, "loss": 0.3643, "step": 14774 }, { "epoch": 0.9254764402824974, "grad_norm": 0.8593425841409795, "learning_rate": 1.4476850547552546e-07, "loss": 0.3747, "step": 14775 }, { "epoch": 0.9255390782818397, "grad_norm": 0.8679999164442935, "learning_rate": 1.44526274315126e-07, "loss": 0.3788, "step": 14776 }, { "epoch": 0.9256017162811819, "grad_norm": 0.8616763060917516, "learning_rate": 1.442842430069258e-07, "loss": 0.3895, "step": 14777 }, { "epoch": 0.9256643542805243, "grad_norm": 0.9205589679185712, "learning_rate": 1.4404241156088793e-07, "loss": 0.3986, "step": 14778 }, { "epoch": 0.9257269922798665, "grad_norm": 0.8497547402807707, "learning_rate": 1.4380077998696562e-07, "loss": 0.3743, "step": 14779 }, { "epoch": 0.9257896302792089, "grad_norm": 0.9601213052860507, "learning_rate": 1.435593482951042e-07, "loss": 0.4144, "step": 14780 }, { "epoch": 0.9258522682785512, "grad_norm": 0.8836591760942433, "learning_rate": 1.433181164952413e-07, "loss": 0.3867, "step": 14781 }, { "epoch": 0.9259149062778935, "grad_norm": 0.8760520368926288, "learning_rate": 1.4307708459730619e-07, "loss": 0.4138, "step": 14782 }, { "epoch": 0.9259775442772358, "grad_norm": 0.8895842564908901, "learning_rate": 1.428362526112198e-07, "loss": 0.4087, "step": 14783 }, { "epoch": 0.9260401822765781, "grad_norm": 0.840200838812197, "learning_rate": 1.4259562054689536e-07, "loss": 0.3464, "step": 14784 }, { "epoch": 0.9261028202759204, "grad_norm": 0.8615470422687604, "learning_rate": 1.4235518841423657e-07, "loss": 0.3383, "step": 14785 }, { "epoch": 0.9261654582752626, "grad_norm": 0.9310291339020254, "learning_rate": 1.4211495622313998e-07, "loss": 0.3982, "step": 14786 }, { "epoch": 0.926228096274605, "grad_norm": 0.9075455857482108, "learning_rate": 1.4187492398349324e-07, "loss": 0.362, "step": 14787 }, { "epoch": 0.9262907342739473, "grad_norm": 0.8375365990183425, "learning_rate": 1.416350917051762e-07, "loss": 0.3815, "step": 14788 }, { "epoch": 0.9263533722732896, "grad_norm": 0.8950199824015383, "learning_rate": 1.4139545939805988e-07, "loss": 0.3952, "step": 14789 }, { "epoch": 0.9264160102726319, "grad_norm": 0.8726064605248296, "learning_rate": 1.4115602707200914e-07, "loss": 0.3987, "step": 14790 }, { "epoch": 0.9264786482719742, "grad_norm": 0.8463332551594024, "learning_rate": 1.409167947368778e-07, "loss": 0.3545, "step": 14791 }, { "epoch": 0.9265412862713165, "grad_norm": 0.9275502728661408, "learning_rate": 1.4067776240251352e-07, "loss": 0.4418, "step": 14792 }, { "epoch": 0.9266039242706587, "grad_norm": 0.8339378557199282, "learning_rate": 1.40438930078754e-07, "loss": 0.3697, "step": 14793 }, { "epoch": 0.9266665622700011, "grad_norm": 0.8207154579906358, "learning_rate": 1.4020029777543075e-07, "loss": 0.3304, "step": 14794 }, { "epoch": 0.9267292002693434, "grad_norm": 0.9048810287278517, "learning_rate": 1.399618655023649e-07, "loss": 0.3863, "step": 14795 }, { "epoch": 0.9267918382686857, "grad_norm": 0.8697015934525428, "learning_rate": 1.3972363326937078e-07, "loss": 0.3641, "step": 14796 }, { "epoch": 0.926854476268028, "grad_norm": 0.8352039808586214, "learning_rate": 1.3948560108625386e-07, "loss": 0.4061, "step": 14797 }, { "epoch": 0.9269171142673703, "grad_norm": 0.8626541857777776, "learning_rate": 1.3924776896281135e-07, "loss": 0.354, "step": 14798 }, { "epoch": 0.9269797522667126, "grad_norm": 0.8211911033377771, "learning_rate": 1.3901013690883315e-07, "loss": 0.3897, "step": 14799 }, { "epoch": 0.927042390266055, "grad_norm": 0.8543986577915138, "learning_rate": 1.387727049340998e-07, "loss": 0.3929, "step": 14800 }, { "epoch": 0.9271050282653972, "grad_norm": 0.9144360453186952, "learning_rate": 1.3853547304838454e-07, "loss": 0.3865, "step": 14801 }, { "epoch": 0.9271676662647395, "grad_norm": 0.8372001662173909, "learning_rate": 1.3829844126145186e-07, "loss": 0.3543, "step": 14802 }, { "epoch": 0.9272303042640818, "grad_norm": 0.9095526456520832, "learning_rate": 1.3806160958305727e-07, "loss": 0.4161, "step": 14803 }, { "epoch": 0.9272929422634241, "grad_norm": 0.8595976302790328, "learning_rate": 1.378249780229496e-07, "loss": 0.3997, "step": 14804 }, { "epoch": 0.9273555802627664, "grad_norm": 0.8866447014570322, "learning_rate": 1.375885465908683e-07, "loss": 0.3677, "step": 14805 }, { "epoch": 0.9274182182621087, "grad_norm": 0.8486575829675538, "learning_rate": 1.3735231529654446e-07, "loss": 0.3828, "step": 14806 }, { "epoch": 0.927480856261451, "grad_norm": 0.8787071780766434, "learning_rate": 1.3711628414970203e-07, "loss": 0.39, "step": 14807 }, { "epoch": 0.9275434942607933, "grad_norm": 0.9008417473472965, "learning_rate": 1.3688045316005595e-07, "loss": 0.3688, "step": 14808 }, { "epoch": 0.9276061322601357, "grad_norm": 0.8459581663878987, "learning_rate": 1.366448223373129e-07, "loss": 0.3521, "step": 14809 }, { "epoch": 0.9276687702594779, "grad_norm": 0.8539597241033332, "learning_rate": 1.3640939169117185e-07, "loss": 0.355, "step": 14810 }, { "epoch": 0.9277314082588202, "grad_norm": 0.9188623978334577, "learning_rate": 1.3617416123132275e-07, "loss": 0.3959, "step": 14811 }, { "epoch": 0.9277940462581625, "grad_norm": 0.8822292944047937, "learning_rate": 1.3593913096744848e-07, "loss": 0.3723, "step": 14812 }, { "epoch": 0.9278566842575048, "grad_norm": 0.8248233297112681, "learning_rate": 1.3570430090922238e-07, "loss": 0.388, "step": 14813 }, { "epoch": 0.9279193222568471, "grad_norm": 0.9041088586667478, "learning_rate": 1.3546967106630948e-07, "loss": 0.3848, "step": 14814 }, { "epoch": 0.9279819602561894, "grad_norm": 0.5702302141268994, "learning_rate": 1.352352414483682e-07, "loss": 0.4358, "step": 14815 }, { "epoch": 0.9280445982555318, "grad_norm": 0.9415040254149123, "learning_rate": 1.350010120650469e-07, "loss": 0.4007, "step": 14816 }, { "epoch": 0.928107236254874, "grad_norm": 0.8225867270837077, "learning_rate": 1.347669829259879e-07, "loss": 0.3606, "step": 14817 }, { "epoch": 0.9281698742542163, "grad_norm": 0.9034243581324374, "learning_rate": 1.3453315404082178e-07, "loss": 0.4105, "step": 14818 }, { "epoch": 0.9282325122535586, "grad_norm": 0.9134099059678861, "learning_rate": 1.342995254191748e-07, "loss": 0.4373, "step": 14819 }, { "epoch": 0.9282951502529009, "grad_norm": 0.8194571711691807, "learning_rate": 1.34066097070662e-07, "loss": 0.4004, "step": 14820 }, { "epoch": 0.9283577882522432, "grad_norm": 0.8895313448270207, "learning_rate": 1.3383286900489234e-07, "loss": 0.3348, "step": 14821 }, { "epoch": 0.9284204262515855, "grad_norm": 0.7787666992445146, "learning_rate": 1.3359984123146487e-07, "loss": 0.3488, "step": 14822 }, { "epoch": 0.9284830642509279, "grad_norm": 0.8870908822860342, "learning_rate": 1.333670137599713e-07, "loss": 0.341, "step": 14823 }, { "epoch": 0.9285457022502701, "grad_norm": 0.819408302851515, "learning_rate": 1.33134386599994e-07, "loss": 0.3385, "step": 14824 }, { "epoch": 0.9286083402496125, "grad_norm": 0.8065629235770662, "learning_rate": 1.329019597611092e-07, "loss": 0.3481, "step": 14825 }, { "epoch": 0.9286709782489547, "grad_norm": 0.8549365432085776, "learning_rate": 1.3266973325288314e-07, "loss": 0.3757, "step": 14826 }, { "epoch": 0.928733616248297, "grad_norm": 0.8550025460734466, "learning_rate": 1.324377070848748e-07, "loss": 0.398, "step": 14827 }, { "epoch": 0.9287962542476393, "grad_norm": 0.8495817991751914, "learning_rate": 1.322058812666327e-07, "loss": 0.3709, "step": 14828 }, { "epoch": 0.9288588922469816, "grad_norm": 0.8762292154676864, "learning_rate": 1.3197425580770085e-07, "loss": 0.3699, "step": 14829 }, { "epoch": 0.928921530246324, "grad_norm": 0.8730399124947688, "learning_rate": 1.3174283071761217e-07, "loss": 0.3476, "step": 14830 }, { "epoch": 0.9289841682456662, "grad_norm": 0.8844284552804863, "learning_rate": 1.3151160600589185e-07, "loss": 0.3772, "step": 14831 }, { "epoch": 0.9290468062450086, "grad_norm": 0.7921222323021608, "learning_rate": 1.312805816820578e-07, "loss": 0.3525, "step": 14832 }, { "epoch": 0.9291094442443508, "grad_norm": 0.8324825878217739, "learning_rate": 1.3104975775561802e-07, "loss": 0.3813, "step": 14833 }, { "epoch": 0.9291720822436932, "grad_norm": 0.8347876316994729, "learning_rate": 1.3081913423607484e-07, "loss": 0.3231, "step": 14834 }, { "epoch": 0.9292347202430354, "grad_norm": 0.8505363068282926, "learning_rate": 1.3058871113291905e-07, "loss": 0.3344, "step": 14835 }, { "epoch": 0.9292973582423777, "grad_norm": 0.8546499017360409, "learning_rate": 1.3035848845563637e-07, "loss": 0.3818, "step": 14836 }, { "epoch": 0.92935999624172, "grad_norm": 0.7734214993592076, "learning_rate": 1.3012846621370144e-07, "loss": 0.3188, "step": 14837 }, { "epoch": 0.9294226342410623, "grad_norm": 0.9162882430224971, "learning_rate": 1.298986444165834e-07, "loss": 0.3817, "step": 14838 }, { "epoch": 0.9294852722404047, "grad_norm": 0.835383660757356, "learning_rate": 1.2966902307374073e-07, "loss": 0.3987, "step": 14839 }, { "epoch": 0.9295479102397469, "grad_norm": 0.8143057255523661, "learning_rate": 1.2943960219462537e-07, "loss": 0.3958, "step": 14840 }, { "epoch": 0.9296105482390893, "grad_norm": 0.9256229615024858, "learning_rate": 1.2921038178867972e-07, "loss": 0.4176, "step": 14841 }, { "epoch": 0.9296731862384315, "grad_norm": 0.8689653060955106, "learning_rate": 1.289813618653385e-07, "loss": 0.3489, "step": 14842 }, { "epoch": 0.9297358242377739, "grad_norm": 0.8824659921072899, "learning_rate": 1.2875254243402913e-07, "loss": 0.3594, "step": 14843 }, { "epoch": 0.9297984622371162, "grad_norm": 0.8627417836952641, "learning_rate": 1.285239235041691e-07, "loss": 0.4255, "step": 14844 }, { "epoch": 0.9298611002364584, "grad_norm": 0.8493714958244634, "learning_rate": 1.2829550508516864e-07, "loss": 0.3633, "step": 14845 }, { "epoch": 0.9299237382358008, "grad_norm": 0.8857338694271654, "learning_rate": 1.2806728718642913e-07, "loss": 0.4049, "step": 14846 }, { "epoch": 0.929986376235143, "grad_norm": 0.8849269430732126, "learning_rate": 1.2783926981734474e-07, "loss": 0.3917, "step": 14847 }, { "epoch": 0.9300490142344854, "grad_norm": 0.9105546162479768, "learning_rate": 1.2761145298730016e-07, "loss": 0.4223, "step": 14848 }, { "epoch": 0.9301116522338276, "grad_norm": 0.8287636934119735, "learning_rate": 1.2738383670567234e-07, "loss": 0.3521, "step": 14849 }, { "epoch": 0.93017429023317, "grad_norm": 0.9480743438140772, "learning_rate": 1.271564209818299e-07, "loss": 0.3912, "step": 14850 }, { "epoch": 0.9302369282325123, "grad_norm": 0.8659408455713699, "learning_rate": 1.2692920582513425e-07, "loss": 0.3699, "step": 14851 }, { "epoch": 0.9302995662318545, "grad_norm": 0.8989106039705924, "learning_rate": 1.2670219124493678e-07, "loss": 0.4085, "step": 14852 }, { "epoch": 0.9303622042311969, "grad_norm": 0.936227841389022, "learning_rate": 1.2647537725058222e-07, "loss": 0.4087, "step": 14853 }, { "epoch": 0.9304248422305391, "grad_norm": 0.796468287554183, "learning_rate": 1.2624876385140482e-07, "loss": 0.3674, "step": 14854 }, { "epoch": 0.9304874802298815, "grad_norm": 0.8741601600161825, "learning_rate": 1.2602235105673323e-07, "loss": 0.3568, "step": 14855 }, { "epoch": 0.9305501182292237, "grad_norm": 0.8426381944408357, "learning_rate": 1.2579613887588603e-07, "loss": 0.3807, "step": 14856 }, { "epoch": 0.9306127562285661, "grad_norm": 0.8284514768097777, "learning_rate": 1.2557012731817476e-07, "loss": 0.3468, "step": 14857 }, { "epoch": 0.9306753942279083, "grad_norm": 0.8792954939370311, "learning_rate": 1.253443163929019e-07, "loss": 0.4166, "step": 14858 }, { "epoch": 0.9307380322272507, "grad_norm": 0.8834380715743249, "learning_rate": 1.2511870610936115e-07, "loss": 0.3745, "step": 14859 }, { "epoch": 0.930800670226593, "grad_norm": 0.866331568649274, "learning_rate": 1.2489329647684013e-07, "loss": 0.4207, "step": 14860 }, { "epoch": 0.9308633082259352, "grad_norm": 0.8578320411914555, "learning_rate": 1.246680875046158e-07, "loss": 0.4138, "step": 14861 }, { "epoch": 0.9309259462252776, "grad_norm": 0.8618282090216588, "learning_rate": 1.2444307920195742e-07, "loss": 0.355, "step": 14862 }, { "epoch": 0.9309885842246198, "grad_norm": 0.8626318707935294, "learning_rate": 1.2421827157812705e-07, "loss": 0.4052, "step": 14863 }, { "epoch": 0.9310512222239622, "grad_norm": 0.8332374341115997, "learning_rate": 1.2399366464237728e-07, "loss": 0.3664, "step": 14864 }, { "epoch": 0.9311138602233044, "grad_norm": 0.9098542887360828, "learning_rate": 1.237692584039535e-07, "loss": 0.3891, "step": 14865 }, { "epoch": 0.9311764982226468, "grad_norm": 0.8724387342321722, "learning_rate": 1.2354505287209272e-07, "loss": 0.3686, "step": 14866 }, { "epoch": 0.9312391362219891, "grad_norm": 0.9057349082443297, "learning_rate": 1.2332104805602153e-07, "loss": 0.3943, "step": 14867 }, { "epoch": 0.9313017742213314, "grad_norm": 0.8858741700690045, "learning_rate": 1.2309724396496192e-07, "loss": 0.3801, "step": 14868 }, { "epoch": 0.9313644122206737, "grad_norm": 0.9066682026826358, "learning_rate": 1.228736406081249e-07, "loss": 0.3629, "step": 14869 }, { "epoch": 0.9314270502200159, "grad_norm": 0.8868420095059182, "learning_rate": 1.2265023799471422e-07, "loss": 0.3679, "step": 14870 }, { "epoch": 0.9314896882193583, "grad_norm": 0.8481306255943579, "learning_rate": 1.224270361339247e-07, "loss": 0.3527, "step": 14871 }, { "epoch": 0.9315523262187005, "grad_norm": 0.8374382722773357, "learning_rate": 1.2220403503494405e-07, "loss": 0.3429, "step": 14872 }, { "epoch": 0.9316149642180429, "grad_norm": 0.9280709175444355, "learning_rate": 1.2198123470695044e-07, "loss": 0.3919, "step": 14873 }, { "epoch": 0.9316776022173852, "grad_norm": 0.8407332406081014, "learning_rate": 1.2175863515911434e-07, "loss": 0.3722, "step": 14874 }, { "epoch": 0.9317402402167275, "grad_norm": 0.9117235540767225, "learning_rate": 1.2153623640059843e-07, "loss": 0.4215, "step": 14875 }, { "epoch": 0.9318028782160698, "grad_norm": 0.8971909911983352, "learning_rate": 1.2131403844055645e-07, "loss": 0.3772, "step": 14876 }, { "epoch": 0.931865516215412, "grad_norm": 0.8358524098185524, "learning_rate": 1.2109204128813446e-07, "loss": 0.3972, "step": 14877 }, { "epoch": 0.9319281542147544, "grad_norm": 0.890081534261119, "learning_rate": 1.2087024495246958e-07, "loss": 0.3945, "step": 14878 }, { "epoch": 0.9319907922140966, "grad_norm": 0.9042606416102673, "learning_rate": 1.206486494426906e-07, "loss": 0.3744, "step": 14879 }, { "epoch": 0.932053430213439, "grad_norm": 0.8935887559624153, "learning_rate": 1.2042725476791916e-07, "loss": 0.4037, "step": 14880 }, { "epoch": 0.9321160682127813, "grad_norm": 0.8432360463470633, "learning_rate": 1.2020606093726794e-07, "loss": 0.3943, "step": 14881 }, { "epoch": 0.9321787062121236, "grad_norm": 0.8010867924941264, "learning_rate": 1.1998506795984077e-07, "loss": 0.3369, "step": 14882 }, { "epoch": 0.9322413442114659, "grad_norm": 0.8825084900600176, "learning_rate": 1.197642758447337e-07, "loss": 0.3662, "step": 14883 }, { "epoch": 0.9323039822108082, "grad_norm": 0.9142896129921322, "learning_rate": 1.1954368460103504e-07, "loss": 0.3837, "step": 14884 }, { "epoch": 0.9323666202101505, "grad_norm": 0.9181599635802045, "learning_rate": 1.1932329423782363e-07, "loss": 0.3906, "step": 14885 }, { "epoch": 0.9324292582094927, "grad_norm": 0.5817856240342016, "learning_rate": 1.191031047641722e-07, "loss": 0.4217, "step": 14886 }, { "epoch": 0.9324918962088351, "grad_norm": 0.8601607004318278, "learning_rate": 1.188831161891424e-07, "loss": 0.4175, "step": 14887 }, { "epoch": 0.9325545342081774, "grad_norm": 0.9355345647014428, "learning_rate": 1.1866332852178975e-07, "loss": 0.3964, "step": 14888 }, { "epoch": 0.9326171722075197, "grad_norm": 0.5847617300628062, "learning_rate": 1.1844374177116091e-07, "loss": 0.4703, "step": 14889 }, { "epoch": 0.932679810206862, "grad_norm": 0.9064154445897123, "learning_rate": 1.1822435594629312e-07, "loss": 0.4149, "step": 14890 }, { "epoch": 0.9327424482062043, "grad_norm": 0.6441901940234684, "learning_rate": 1.1800517105621745e-07, "loss": 0.4591, "step": 14891 }, { "epoch": 0.9328050862055466, "grad_norm": 0.819823209810609, "learning_rate": 1.1778618710995393e-07, "loss": 0.3965, "step": 14892 }, { "epoch": 0.932867724204889, "grad_norm": 0.5801108588221369, "learning_rate": 1.1756740411651757e-07, "loss": 0.4351, "step": 14893 }, { "epoch": 0.9329303622042312, "grad_norm": 0.8281394658359641, "learning_rate": 1.1734882208491393e-07, "loss": 0.3883, "step": 14894 }, { "epoch": 0.9329930002035735, "grad_norm": 0.8452440010724656, "learning_rate": 1.1713044102413861e-07, "loss": 0.3674, "step": 14895 }, { "epoch": 0.9330556382029158, "grad_norm": 0.8683728555191663, "learning_rate": 1.1691226094318053e-07, "loss": 0.3848, "step": 14896 }, { "epoch": 0.9331182762022581, "grad_norm": 0.8319066024520386, "learning_rate": 1.1669428185102027e-07, "loss": 0.4023, "step": 14897 }, { "epoch": 0.9331809142016004, "grad_norm": 0.8374522756863786, "learning_rate": 1.1647650375662956e-07, "loss": 0.3685, "step": 14898 }, { "epoch": 0.9332435522009427, "grad_norm": 0.776031069049152, "learning_rate": 1.1625892666897288e-07, "loss": 0.3175, "step": 14899 }, { "epoch": 0.933306190200285, "grad_norm": 0.9117908241665251, "learning_rate": 1.1604155059700417e-07, "loss": 0.3858, "step": 14900 }, { "epoch": 0.9333688281996273, "grad_norm": 0.8330558173417701, "learning_rate": 1.1582437554967241e-07, "loss": 0.3665, "step": 14901 }, { "epoch": 0.9334314661989696, "grad_norm": 0.8509185211188688, "learning_rate": 1.1560740153591598e-07, "loss": 0.3234, "step": 14902 }, { "epoch": 0.9334941041983119, "grad_norm": 0.8566830658180128, "learning_rate": 1.1539062856466499e-07, "loss": 0.3871, "step": 14903 }, { "epoch": 0.9335567421976542, "grad_norm": 1.039867810436582, "learning_rate": 1.1517405664484338e-07, "loss": 0.3954, "step": 14904 }, { "epoch": 0.9336193801969965, "grad_norm": 0.8281852721134726, "learning_rate": 1.1495768578536348e-07, "loss": 0.3264, "step": 14905 }, { "epoch": 0.9336820181963388, "grad_norm": 0.6132401579052688, "learning_rate": 1.1474151599513206e-07, "loss": 0.4536, "step": 14906 }, { "epoch": 0.9337446561956811, "grad_norm": 0.8612396303987934, "learning_rate": 1.1452554728304699e-07, "loss": 0.4071, "step": 14907 }, { "epoch": 0.9338072941950234, "grad_norm": 0.8243059038108371, "learning_rate": 1.1430977965799673e-07, "loss": 0.3722, "step": 14908 }, { "epoch": 0.9338699321943658, "grad_norm": 0.877374230428213, "learning_rate": 1.1409421312886249e-07, "loss": 0.3956, "step": 14909 }, { "epoch": 0.933932570193708, "grad_norm": 0.8745324126909703, "learning_rate": 1.1387884770451773e-07, "loss": 0.4001, "step": 14910 }, { "epoch": 0.9339952081930503, "grad_norm": 0.8311582738807403, "learning_rate": 1.1366368339382649e-07, "loss": 0.3862, "step": 14911 }, { "epoch": 0.9340578461923926, "grad_norm": 0.8382069614807007, "learning_rate": 1.1344872020564502e-07, "loss": 0.3565, "step": 14912 }, { "epoch": 0.9341204841917349, "grad_norm": 0.9540585497575519, "learning_rate": 1.1323395814882121e-07, "loss": 0.396, "step": 14913 }, { "epoch": 0.9341831221910772, "grad_norm": 0.8686014264073995, "learning_rate": 1.1301939723219468e-07, "loss": 0.37, "step": 14914 }, { "epoch": 0.9342457601904195, "grad_norm": 0.8874265054588759, "learning_rate": 1.128050374645967e-07, "loss": 0.401, "step": 14915 }, { "epoch": 0.9343083981897619, "grad_norm": 0.8727242587406843, "learning_rate": 1.1259087885485076e-07, "loss": 0.3613, "step": 14916 }, { "epoch": 0.9343710361891041, "grad_norm": 0.9265680980118758, "learning_rate": 1.1237692141177148e-07, "loss": 0.4319, "step": 14917 }, { "epoch": 0.9344336741884465, "grad_norm": 0.9075158892460304, "learning_rate": 1.1216316514416458e-07, "loss": 0.3868, "step": 14918 }, { "epoch": 0.9344963121877887, "grad_norm": 0.8696507785491144, "learning_rate": 1.1194961006082972e-07, "loss": 0.3928, "step": 14919 }, { "epoch": 0.934558950187131, "grad_norm": 0.9562524196568379, "learning_rate": 1.117362561705554e-07, "loss": 0.4034, "step": 14920 }, { "epoch": 0.9346215881864733, "grad_norm": 0.9268965423506195, "learning_rate": 1.1152310348212458e-07, "loss": 0.3991, "step": 14921 }, { "epoch": 0.9346842261858156, "grad_norm": 0.8522373128349177, "learning_rate": 1.1131015200431028e-07, "loss": 0.3787, "step": 14922 }, { "epoch": 0.934746864185158, "grad_norm": 0.8880620992786948, "learning_rate": 1.1109740174587768e-07, "loss": 0.4087, "step": 14923 }, { "epoch": 0.9348095021845002, "grad_norm": 0.8929341367082161, "learning_rate": 1.1088485271558313e-07, "loss": 0.3966, "step": 14924 }, { "epoch": 0.9348721401838426, "grad_norm": 0.9215826792863062, "learning_rate": 1.1067250492217574e-07, "loss": 0.3795, "step": 14925 }, { "epoch": 0.9349347781831848, "grad_norm": 0.8887104624430465, "learning_rate": 1.1046035837439517e-07, "loss": 0.3701, "step": 14926 }, { "epoch": 0.9349974161825271, "grad_norm": 0.858962210785154, "learning_rate": 1.1024841308097389e-07, "loss": 0.3865, "step": 14927 }, { "epoch": 0.9350600541818694, "grad_norm": 0.7515150448219543, "learning_rate": 1.1003666905063548e-07, "loss": 0.3668, "step": 14928 }, { "epoch": 0.9351226921812117, "grad_norm": 0.8845589376856708, "learning_rate": 1.0982512629209518e-07, "loss": 0.3829, "step": 14929 }, { "epoch": 0.935185330180554, "grad_norm": 0.8646364203795543, "learning_rate": 1.0961378481406104e-07, "loss": 0.3964, "step": 14930 }, { "epoch": 0.9352479681798963, "grad_norm": 0.9416062064011014, "learning_rate": 1.0940264462523109e-07, "loss": 0.4358, "step": 14931 }, { "epoch": 0.9353106061792387, "grad_norm": 0.8132364995271923, "learning_rate": 1.091917057342956e-07, "loss": 0.3729, "step": 14932 }, { "epoch": 0.9353732441785809, "grad_norm": 0.7012394066785659, "learning_rate": 1.0898096814993763e-07, "loss": 0.45, "step": 14933 }, { "epoch": 0.9354358821779233, "grad_norm": 0.8308493328312976, "learning_rate": 1.0877043188083081e-07, "loss": 0.3619, "step": 14934 }, { "epoch": 0.9354985201772655, "grad_norm": 0.8396614289530001, "learning_rate": 1.0856009693564041e-07, "loss": 0.3903, "step": 14935 }, { "epoch": 0.9355611581766078, "grad_norm": 0.9269502216604076, "learning_rate": 1.083499633230245e-07, "loss": 0.3798, "step": 14936 }, { "epoch": 0.9356237961759502, "grad_norm": 0.9039424739597433, "learning_rate": 1.0814003105163229e-07, "loss": 0.413, "step": 14937 }, { "epoch": 0.9356864341752924, "grad_norm": 0.860078920809883, "learning_rate": 1.0793030013010408e-07, "loss": 0.3807, "step": 14938 }, { "epoch": 0.9357490721746348, "grad_norm": 1.0183515065285744, "learning_rate": 1.0772077056707297e-07, "loss": 0.3767, "step": 14939 }, { "epoch": 0.935811710173977, "grad_norm": 0.9220386064898971, "learning_rate": 1.0751144237116317e-07, "loss": 0.3761, "step": 14940 }, { "epoch": 0.9358743481733194, "grad_norm": 0.9171624644242028, "learning_rate": 1.0730231555099002e-07, "loss": 0.3686, "step": 14941 }, { "epoch": 0.9359369861726616, "grad_norm": 0.6394471700600383, "learning_rate": 1.0709339011516218e-07, "loss": 0.4479, "step": 14942 }, { "epoch": 0.935999624172004, "grad_norm": 0.8323268372080883, "learning_rate": 1.0688466607227831e-07, "loss": 0.3964, "step": 14943 }, { "epoch": 0.9360622621713462, "grad_norm": 0.8683920779425349, "learning_rate": 1.0667614343092936e-07, "loss": 0.4211, "step": 14944 }, { "epoch": 0.9361249001706885, "grad_norm": 0.8443952710857786, "learning_rate": 1.0646782219969953e-07, "loss": 0.3555, "step": 14945 }, { "epoch": 0.9361875381700309, "grad_norm": 0.8542372288695079, "learning_rate": 1.06259702387162e-07, "loss": 0.3733, "step": 14946 }, { "epoch": 0.9362501761693731, "grad_norm": 0.8756341473143918, "learning_rate": 1.0605178400188265e-07, "loss": 0.4085, "step": 14947 }, { "epoch": 0.9363128141687155, "grad_norm": 0.8395323694616363, "learning_rate": 1.0584406705242134e-07, "loss": 0.3879, "step": 14948 }, { "epoch": 0.9363754521680577, "grad_norm": 0.8752522875424086, "learning_rate": 1.0563655154732621e-07, "loss": 0.4162, "step": 14949 }, { "epoch": 0.9364380901674001, "grad_norm": 0.8727472902257055, "learning_rate": 1.0542923749513934e-07, "loss": 0.3922, "step": 14950 }, { "epoch": 0.9365007281667423, "grad_norm": 0.9785931016371348, "learning_rate": 1.0522212490439332e-07, "loss": 0.3963, "step": 14951 }, { "epoch": 0.9365633661660847, "grad_norm": 0.835453200218373, "learning_rate": 1.0501521378361246e-07, "loss": 0.3578, "step": 14952 }, { "epoch": 0.936626004165427, "grad_norm": 0.8539136491733887, "learning_rate": 1.0480850414131494e-07, "loss": 0.352, "step": 14953 }, { "epoch": 0.9366886421647692, "grad_norm": 0.900775773879923, "learning_rate": 1.0460199598600729e-07, "loss": 0.3766, "step": 14954 }, { "epoch": 0.9367512801641116, "grad_norm": 0.8530717712453164, "learning_rate": 1.0439568932619048e-07, "loss": 0.3535, "step": 14955 }, { "epoch": 0.9368139181634538, "grad_norm": 0.8577573719642143, "learning_rate": 1.0418958417035551e-07, "loss": 0.3648, "step": 14956 }, { "epoch": 0.9368765561627962, "grad_norm": 0.8542352818234393, "learning_rate": 1.0398368052698616e-07, "loss": 0.3978, "step": 14957 }, { "epoch": 0.9369391941621384, "grad_norm": 0.9034953315849998, "learning_rate": 1.0377797840455727e-07, "loss": 0.3406, "step": 14958 }, { "epoch": 0.9370018321614808, "grad_norm": 0.8400733216734257, "learning_rate": 1.0357247781153601e-07, "loss": 0.3997, "step": 14959 }, { "epoch": 0.9370644701608231, "grad_norm": 0.8642730735896035, "learning_rate": 1.0336717875638003e-07, "loss": 0.352, "step": 14960 }, { "epoch": 0.9371271081601653, "grad_norm": 0.6254392045709497, "learning_rate": 1.0316208124753923e-07, "loss": 0.4494, "step": 14961 }, { "epoch": 0.9371897461595077, "grad_norm": 0.968849501168239, "learning_rate": 1.0295718529345689e-07, "loss": 0.3793, "step": 14962 }, { "epoch": 0.9372523841588499, "grad_norm": 0.8725691216494189, "learning_rate": 1.0275249090256567e-07, "loss": 0.4197, "step": 14963 }, { "epoch": 0.9373150221581923, "grad_norm": 0.9433696454355214, "learning_rate": 1.0254799808329053e-07, "loss": 0.4143, "step": 14964 }, { "epoch": 0.9373776601575345, "grad_norm": 0.9201601452343265, "learning_rate": 1.0234370684404915e-07, "loss": 0.367, "step": 14965 }, { "epoch": 0.9374402981568769, "grad_norm": 0.9122288976991932, "learning_rate": 1.0213961719324927e-07, "loss": 0.4277, "step": 14966 }, { "epoch": 0.9375029361562192, "grad_norm": 0.902421172514538, "learning_rate": 1.0193572913929195e-07, "loss": 0.456, "step": 14967 }, { "epoch": 0.9375655741555615, "grad_norm": 0.926927865999392, "learning_rate": 1.0173204269056936e-07, "loss": 0.3933, "step": 14968 }, { "epoch": 0.9376282121549038, "grad_norm": 0.7752449100387977, "learning_rate": 1.0152855785546534e-07, "loss": 0.3835, "step": 14969 }, { "epoch": 0.937690850154246, "grad_norm": 0.8409916037786301, "learning_rate": 1.0132527464235431e-07, "loss": 0.3621, "step": 14970 }, { "epoch": 0.9377534881535884, "grad_norm": 0.9387741937405402, "learning_rate": 1.0112219305960514e-07, "loss": 0.3894, "step": 14971 }, { "epoch": 0.9378161261529306, "grad_norm": 0.5727712530606373, "learning_rate": 1.0091931311557557e-07, "loss": 0.4377, "step": 14972 }, { "epoch": 0.937878764152273, "grad_norm": 0.8823421174016246, "learning_rate": 1.0071663481861615e-07, "loss": 0.3554, "step": 14973 }, { "epoch": 0.9379414021516153, "grad_norm": 0.9213356261454885, "learning_rate": 1.0051415817706911e-07, "loss": 0.3924, "step": 14974 }, { "epoch": 0.9380040401509576, "grad_norm": 0.906537426064168, "learning_rate": 1.0031188319926887e-07, "loss": 0.3895, "step": 14975 }, { "epoch": 0.9380666781502999, "grad_norm": 0.9045452568728798, "learning_rate": 1.0010980989354102e-07, "loss": 0.4245, "step": 14976 }, { "epoch": 0.9381293161496422, "grad_norm": 0.8936935632060466, "learning_rate": 9.990793826820333e-08, "loss": 0.4335, "step": 14977 }, { "epoch": 0.9381919541489845, "grad_norm": 0.8162069515934607, "learning_rate": 9.970626833156361e-08, "loss": 0.3391, "step": 14978 }, { "epoch": 0.9382545921483267, "grad_norm": 0.8866911733321454, "learning_rate": 9.950480009192354e-08, "loss": 0.3909, "step": 14979 }, { "epoch": 0.9383172301476691, "grad_norm": 0.9686447526653326, "learning_rate": 9.930353355757594e-08, "loss": 0.4082, "step": 14980 }, { "epoch": 0.9383798681470114, "grad_norm": 0.8422468624440923, "learning_rate": 9.910246873680418e-08, "loss": 0.3657, "step": 14981 }, { "epoch": 0.9384425061463537, "grad_norm": 0.9541171253903789, "learning_rate": 9.890160563788498e-08, "loss": 0.4016, "step": 14982 }, { "epoch": 0.938505144145696, "grad_norm": 0.7685938104964755, "learning_rate": 9.870094426908506e-08, "loss": 0.3361, "step": 14983 }, { "epoch": 0.9385677821450383, "grad_norm": 0.6462953196914702, "learning_rate": 9.850048463866336e-08, "loss": 0.4403, "step": 14984 }, { "epoch": 0.9386304201443806, "grad_norm": 0.843608234335462, "learning_rate": 9.830022675487216e-08, "loss": 0.3687, "step": 14985 }, { "epoch": 0.9386930581437228, "grad_norm": 0.9245198949508849, "learning_rate": 9.810017062595322e-08, "loss": 0.384, "step": 14986 }, { "epoch": 0.9387556961430652, "grad_norm": 0.8547119702805077, "learning_rate": 9.790031626014107e-08, "loss": 0.3624, "step": 14987 }, { "epoch": 0.9388183341424075, "grad_norm": 0.899765247436963, "learning_rate": 9.770066366566189e-08, "loss": 0.392, "step": 14988 }, { "epoch": 0.9388809721417498, "grad_norm": 0.8284542660554622, "learning_rate": 9.750121285073355e-08, "loss": 0.3433, "step": 14989 }, { "epoch": 0.9389436101410921, "grad_norm": 0.6139394693173027, "learning_rate": 9.730196382356504e-08, "loss": 0.4652, "step": 14990 }, { "epoch": 0.9390062481404344, "grad_norm": 0.8634082258614973, "learning_rate": 9.71029165923576e-08, "loss": 0.385, "step": 14991 }, { "epoch": 0.9390688861397767, "grad_norm": 0.9024714751119672, "learning_rate": 9.690407116530409e-08, "loss": 0.3607, "step": 14992 }, { "epoch": 0.939131524139119, "grad_norm": 0.9360065497260018, "learning_rate": 9.670542755058909e-08, "loss": 0.3454, "step": 14993 }, { "epoch": 0.9391941621384613, "grad_norm": 0.931992438878749, "learning_rate": 9.650698575638883e-08, "loss": 0.3957, "step": 14994 }, { "epoch": 0.9392568001378035, "grad_norm": 0.879882422065157, "learning_rate": 9.630874579087124e-08, "loss": 0.3859, "step": 14995 }, { "epoch": 0.9393194381371459, "grad_norm": 0.9779258276358039, "learning_rate": 9.611070766219533e-08, "loss": 0.4083, "step": 14996 }, { "epoch": 0.9393820761364882, "grad_norm": 0.867932464455154, "learning_rate": 9.591287137851346e-08, "loss": 0.3713, "step": 14997 }, { "epoch": 0.9394447141358305, "grad_norm": 0.8571455880730287, "learning_rate": 9.571523694796803e-08, "loss": 0.3811, "step": 14998 }, { "epoch": 0.9395073521351728, "grad_norm": 0.8403767786999081, "learning_rate": 9.551780437869362e-08, "loss": 0.4187, "step": 14999 }, { "epoch": 0.9395699901345151, "grad_norm": 0.9188640649905074, "learning_rate": 9.532057367881653e-08, "loss": 0.4296, "step": 15000 }, { "epoch": 0.9396326281338574, "grad_norm": 0.8440263208321244, "learning_rate": 9.512354485645525e-08, "loss": 0.3911, "step": 15001 }, { "epoch": 0.9396952661331998, "grad_norm": 0.8275315943371241, "learning_rate": 9.492671791971886e-08, "loss": 0.3646, "step": 15002 }, { "epoch": 0.939757904132542, "grad_norm": 0.8966430970268109, "learning_rate": 9.473009287670976e-08, "loss": 0.3621, "step": 15003 }, { "epoch": 0.9398205421318843, "grad_norm": 0.8629530687734365, "learning_rate": 9.453366973551981e-08, "loss": 0.405, "step": 15004 }, { "epoch": 0.9398831801312266, "grad_norm": 0.8578869630172165, "learning_rate": 9.433744850423476e-08, "loss": 0.3564, "step": 15005 }, { "epoch": 0.9399458181305689, "grad_norm": 0.873351697704251, "learning_rate": 9.414142919093094e-08, "loss": 0.3676, "step": 15006 }, { "epoch": 0.9400084561299112, "grad_norm": 0.8807952309818285, "learning_rate": 9.39456118036769e-08, "loss": 0.359, "step": 15007 }, { "epoch": 0.9400710941292535, "grad_norm": 0.9198203717044129, "learning_rate": 9.374999635053172e-08, "loss": 0.3985, "step": 15008 }, { "epoch": 0.9401337321285959, "grad_norm": 0.8284009255083131, "learning_rate": 9.355458283954733e-08, "loss": 0.3747, "step": 15009 }, { "epoch": 0.9401963701279381, "grad_norm": 0.8768815568479759, "learning_rate": 9.335937127876727e-08, "loss": 0.3992, "step": 15010 }, { "epoch": 0.9402590081272804, "grad_norm": 0.864491603673651, "learning_rate": 9.316436167622567e-08, "loss": 0.3972, "step": 15011 }, { "epoch": 0.9403216461266227, "grad_norm": 0.9208867569380882, "learning_rate": 9.296955403995e-08, "loss": 0.3785, "step": 15012 }, { "epoch": 0.940384284125965, "grad_norm": 0.9057563514088213, "learning_rate": 9.277494837795775e-08, "loss": 0.4086, "step": 15013 }, { "epoch": 0.9404469221253073, "grad_norm": 0.8918759174071023, "learning_rate": 9.258054469825972e-08, "loss": 0.4103, "step": 15014 }, { "epoch": 0.9405095601246496, "grad_norm": 0.8288735299812139, "learning_rate": 9.238634300885785e-08, "loss": 0.3955, "step": 15015 }, { "epoch": 0.940572198123992, "grad_norm": 0.877303180540676, "learning_rate": 9.219234331774462e-08, "loss": 0.3415, "step": 15016 }, { "epoch": 0.9406348361233342, "grad_norm": 0.9188588153149, "learning_rate": 9.199854563290588e-08, "loss": 0.3884, "step": 15017 }, { "epoch": 0.9406974741226766, "grad_norm": 0.9045105432006103, "learning_rate": 9.180494996231747e-08, "loss": 0.4267, "step": 15018 }, { "epoch": 0.9407601121220188, "grad_norm": 0.8147147901146932, "learning_rate": 9.161155631394858e-08, "loss": 0.3678, "step": 15019 }, { "epoch": 0.9408227501213611, "grad_norm": 0.90698343581865, "learning_rate": 9.141836469575893e-08, "loss": 0.3799, "step": 15020 }, { "epoch": 0.9408853881207034, "grad_norm": 0.8680999491769914, "learning_rate": 9.122537511569996e-08, "loss": 0.3611, "step": 15021 }, { "epoch": 0.9409480261200457, "grad_norm": 0.8494937901651554, "learning_rate": 9.10325875817164e-08, "loss": 0.3749, "step": 15022 }, { "epoch": 0.941010664119388, "grad_norm": 0.8856961129738505, "learning_rate": 9.084000210174248e-08, "loss": 0.3799, "step": 15023 }, { "epoch": 0.9410733021187303, "grad_norm": 0.9282434430789732, "learning_rate": 9.064761868370519e-08, "loss": 0.3984, "step": 15024 }, { "epoch": 0.9411359401180727, "grad_norm": 0.7780995323790996, "learning_rate": 9.04554373355232e-08, "loss": 0.3728, "step": 15025 }, { "epoch": 0.9411985781174149, "grad_norm": 0.8554683251549439, "learning_rate": 9.026345806510683e-08, "loss": 0.3839, "step": 15026 }, { "epoch": 0.9412612161167573, "grad_norm": 0.8513877869088127, "learning_rate": 9.007168088035756e-08, "loss": 0.3775, "step": 15027 }, { "epoch": 0.9413238541160995, "grad_norm": 0.8367091568398095, "learning_rate": 8.988010578916962e-08, "loss": 0.3808, "step": 15028 }, { "epoch": 0.9413864921154418, "grad_norm": 0.8777310807067898, "learning_rate": 8.968873279942724e-08, "loss": 0.3639, "step": 15029 }, { "epoch": 0.9414491301147841, "grad_norm": 0.8473700447344359, "learning_rate": 8.949756191900804e-08, "loss": 0.3372, "step": 15030 }, { "epoch": 0.9415117681141264, "grad_norm": 0.853888837729198, "learning_rate": 8.930659315578127e-08, "loss": 0.3942, "step": 15031 }, { "epoch": 0.9415744061134688, "grad_norm": 0.9311133491957395, "learning_rate": 8.91158265176062e-08, "loss": 0.3962, "step": 15032 }, { "epoch": 0.941637044112811, "grad_norm": 0.8657334304350252, "learning_rate": 8.892526201233543e-08, "loss": 0.3766, "step": 15033 }, { "epoch": 0.9416996821121534, "grad_norm": 0.8692807422768174, "learning_rate": 8.873489964781212e-08, "loss": 0.3811, "step": 15034 }, { "epoch": 0.9417623201114956, "grad_norm": 0.8106144448290615, "learning_rate": 8.854473943187225e-08, "loss": 0.3677, "step": 15035 }, { "epoch": 0.941824958110838, "grad_norm": 0.8849624764857373, "learning_rate": 8.835478137234233e-08, "loss": 0.3661, "step": 15036 }, { "epoch": 0.9418875961101802, "grad_norm": 0.8983354437111402, "learning_rate": 8.816502547704109e-08, "loss": 0.4237, "step": 15037 }, { "epoch": 0.9419502341095225, "grad_norm": 0.8570807125445875, "learning_rate": 8.797547175377897e-08, "loss": 0.3344, "step": 15038 }, { "epoch": 0.9420128721088649, "grad_norm": 0.8285971868945287, "learning_rate": 8.778612021035804e-08, "loss": 0.3631, "step": 15039 }, { "epoch": 0.9420755101082071, "grad_norm": 0.8681284486776323, "learning_rate": 8.759697085457264e-08, "loss": 0.3764, "step": 15040 }, { "epoch": 0.9421381481075495, "grad_norm": 0.8543070758281515, "learning_rate": 8.740802369420764e-08, "loss": 0.3881, "step": 15041 }, { "epoch": 0.9422007861068917, "grad_norm": 0.8950293250706944, "learning_rate": 8.721927873704073e-08, "loss": 0.3883, "step": 15042 }, { "epoch": 0.9422634241062341, "grad_norm": 0.8581271202830603, "learning_rate": 8.703073599083955e-08, "loss": 0.3944, "step": 15043 }, { "epoch": 0.9423260621055763, "grad_norm": 0.5852656299861249, "learning_rate": 8.684239546336515e-08, "loss": 0.4338, "step": 15044 }, { "epoch": 0.9423887001049186, "grad_norm": 0.932925579221156, "learning_rate": 8.665425716236963e-08, "loss": 0.4264, "step": 15045 }, { "epoch": 0.942451338104261, "grad_norm": 0.8465205949018723, "learning_rate": 8.646632109559683e-08, "loss": 0.3716, "step": 15046 }, { "epoch": 0.9425139761036032, "grad_norm": 0.8884334160768661, "learning_rate": 8.62785872707822e-08, "loss": 0.387, "step": 15047 }, { "epoch": 0.9425766141029456, "grad_norm": 0.856945213293012, "learning_rate": 8.60910556956529e-08, "loss": 0.3465, "step": 15048 }, { "epoch": 0.9426392521022878, "grad_norm": 0.9275148367817185, "learning_rate": 8.590372637792832e-08, "loss": 0.4536, "step": 15049 }, { "epoch": 0.9427018901016302, "grad_norm": 0.8574629232050859, "learning_rate": 8.57165993253184e-08, "loss": 0.3616, "step": 15050 }, { "epoch": 0.9427645281009724, "grad_norm": 0.9062461195690011, "learning_rate": 8.552967454552475e-08, "loss": 0.4133, "step": 15051 }, { "epoch": 0.9428271661003148, "grad_norm": 0.9039352241779474, "learning_rate": 8.534295204624232e-08, "loss": 0.3704, "step": 15052 }, { "epoch": 0.9428898040996571, "grad_norm": 0.8735891616919268, "learning_rate": 8.515643183515665e-08, "loss": 0.3464, "step": 15053 }, { "epoch": 0.9429524420989993, "grad_norm": 0.9924452003357368, "learning_rate": 8.497011391994381e-08, "loss": 0.4286, "step": 15054 }, { "epoch": 0.9430150800983417, "grad_norm": 0.8295186577279857, "learning_rate": 8.478399830827321e-08, "loss": 0.3759, "step": 15055 }, { "epoch": 0.9430777180976839, "grad_norm": 0.8779085132111857, "learning_rate": 8.459808500780597e-08, "loss": 0.3677, "step": 15056 }, { "epoch": 0.9431403560970263, "grad_norm": 0.8402429373829123, "learning_rate": 8.441237402619373e-08, "loss": 0.335, "step": 15057 }, { "epoch": 0.9432029940963685, "grad_norm": 0.8038403406607492, "learning_rate": 8.422686537108037e-08, "loss": 0.3641, "step": 15058 }, { "epoch": 0.9432656320957109, "grad_norm": 0.9203091974024096, "learning_rate": 8.404155905010203e-08, "loss": 0.3889, "step": 15059 }, { "epoch": 0.9433282700950532, "grad_norm": 0.7656766000779617, "learning_rate": 8.385645507088536e-08, "loss": 0.3418, "step": 15060 }, { "epoch": 0.9433909080943955, "grad_norm": 0.8313866937484016, "learning_rate": 8.367155344104982e-08, "loss": 0.3689, "step": 15061 }, { "epoch": 0.9434535460937378, "grad_norm": 0.8853392771852597, "learning_rate": 8.348685416820546e-08, "loss": 0.3811, "step": 15062 }, { "epoch": 0.94351618409308, "grad_norm": 0.8989111291103703, "learning_rate": 8.330235725995395e-08, "loss": 0.3747, "step": 15063 }, { "epoch": 0.9435788220924224, "grad_norm": 0.9115295698050226, "learning_rate": 8.311806272389089e-08, "loss": 0.3651, "step": 15064 }, { "epoch": 0.9436414600917646, "grad_norm": 0.9041824540782172, "learning_rate": 8.293397056760078e-08, "loss": 0.3873, "step": 15065 }, { "epoch": 0.943704098091107, "grad_norm": 0.9215098490840182, "learning_rate": 8.275008079866142e-08, "loss": 0.3611, "step": 15066 }, { "epoch": 0.9437667360904493, "grad_norm": 0.9029324620680333, "learning_rate": 8.256639342464068e-08, "loss": 0.3797, "step": 15067 }, { "epoch": 0.9438293740897916, "grad_norm": 0.8882971994184312, "learning_rate": 8.238290845310027e-08, "loss": 0.4231, "step": 15068 }, { "epoch": 0.9438920120891339, "grad_norm": 0.8546895311559969, "learning_rate": 8.219962589159192e-08, "loss": 0.3766, "step": 15069 }, { "epoch": 0.9439546500884761, "grad_norm": 0.9219635817465593, "learning_rate": 8.201654574766016e-08, "loss": 0.4483, "step": 15070 }, { "epoch": 0.9440172880878185, "grad_norm": 0.8736467468696671, "learning_rate": 8.183366802884007e-08, "loss": 0.3589, "step": 15071 }, { "epoch": 0.9440799260871607, "grad_norm": 0.9318194626142455, "learning_rate": 8.165099274265897e-08, "loss": 0.4156, "step": 15072 }, { "epoch": 0.9441425640865031, "grad_norm": 0.8827914191864529, "learning_rate": 8.146851989663585e-08, "loss": 0.3801, "step": 15073 }, { "epoch": 0.9442052020858454, "grad_norm": 0.8478875135479487, "learning_rate": 8.128624949828134e-08, "loss": 0.4021, "step": 15074 }, { "epoch": 0.9442678400851877, "grad_norm": 0.9463648967542433, "learning_rate": 8.110418155509836e-08, "loss": 0.3961, "step": 15075 }, { "epoch": 0.94433047808453, "grad_norm": 0.8013979175614064, "learning_rate": 8.092231607457924e-08, "loss": 0.3784, "step": 15076 }, { "epoch": 0.9443931160838723, "grad_norm": 0.8682717915605606, "learning_rate": 8.074065306421131e-08, "loss": 0.3851, "step": 15077 }, { "epoch": 0.9444557540832146, "grad_norm": 0.9173896154108943, "learning_rate": 8.055919253147082e-08, "loss": 0.3994, "step": 15078 }, { "epoch": 0.9445183920825568, "grad_norm": 0.8420364197043873, "learning_rate": 8.037793448382736e-08, "loss": 0.3749, "step": 15079 }, { "epoch": 0.9445810300818992, "grad_norm": 0.9029445915224482, "learning_rate": 8.019687892874106e-08, "loss": 0.3792, "step": 15080 }, { "epoch": 0.9446436680812414, "grad_norm": 0.8164357510595971, "learning_rate": 8.00160258736643e-08, "loss": 0.3511, "step": 15081 }, { "epoch": 0.9447063060805838, "grad_norm": 0.579050584196693, "learning_rate": 7.983537532604114e-08, "loss": 0.4457, "step": 15082 }, { "epoch": 0.9447689440799261, "grad_norm": 0.8501706104779784, "learning_rate": 7.965492729330725e-08, "loss": 0.3832, "step": 15083 }, { "epoch": 0.9448315820792684, "grad_norm": 0.8502739767099133, "learning_rate": 7.947468178289009e-08, "loss": 0.4133, "step": 15084 }, { "epoch": 0.9448942200786107, "grad_norm": 0.9306206949448025, "learning_rate": 7.929463880220756e-08, "loss": 0.442, "step": 15085 }, { "epoch": 0.944956858077953, "grad_norm": 0.6325357973729725, "learning_rate": 7.911479835867153e-08, "loss": 0.4259, "step": 15086 }, { "epoch": 0.9450194960772953, "grad_norm": 0.8753759279752198, "learning_rate": 7.893516045968386e-08, "loss": 0.381, "step": 15087 }, { "epoch": 0.9450821340766375, "grad_norm": 0.8602884644192695, "learning_rate": 7.875572511263863e-08, "loss": 0.3682, "step": 15088 }, { "epoch": 0.9451447720759799, "grad_norm": 0.8388082108833839, "learning_rate": 7.857649232492049e-08, "loss": 0.3582, "step": 15089 }, { "epoch": 0.9452074100753222, "grad_norm": 0.7862719259706356, "learning_rate": 7.839746210390742e-08, "loss": 0.3568, "step": 15090 }, { "epoch": 0.9452700480746645, "grad_norm": 0.8736361478322904, "learning_rate": 7.821863445696853e-08, "loss": 0.3962, "step": 15091 }, { "epoch": 0.9453326860740068, "grad_norm": 0.8997244324522835, "learning_rate": 7.804000939146461e-08, "loss": 0.376, "step": 15092 }, { "epoch": 0.9453953240733491, "grad_norm": 0.8825800846685155, "learning_rate": 7.786158691474699e-08, "loss": 0.3487, "step": 15093 }, { "epoch": 0.9454579620726914, "grad_norm": 0.9301132749807726, "learning_rate": 7.768336703416035e-08, "loss": 0.4218, "step": 15094 }, { "epoch": 0.9455206000720336, "grad_norm": 0.8879169455258805, "learning_rate": 7.750534975703939e-08, "loss": 0.4122, "step": 15095 }, { "epoch": 0.945583238071376, "grad_norm": 0.8799609519821208, "learning_rate": 7.732753509071212e-08, "loss": 0.3609, "step": 15096 }, { "epoch": 0.9456458760707183, "grad_norm": 0.9059213652778431, "learning_rate": 7.714992304249769e-08, "loss": 0.3892, "step": 15097 }, { "epoch": 0.9457085140700606, "grad_norm": 0.8708754889219446, "learning_rate": 7.697251361970526e-08, "loss": 0.3958, "step": 15098 }, { "epoch": 0.9457711520694029, "grad_norm": 0.8391827622005361, "learning_rate": 7.679530682963843e-08, "loss": 0.3678, "step": 15099 }, { "epoch": 0.9458337900687452, "grad_norm": 0.830268964751129, "learning_rate": 7.66183026795908e-08, "loss": 0.3677, "step": 15100 }, { "epoch": 0.9458964280680875, "grad_norm": 0.9324853484210957, "learning_rate": 7.644150117684767e-08, "loss": 0.3759, "step": 15101 }, { "epoch": 0.9459590660674299, "grad_norm": 0.8331055740304505, "learning_rate": 7.626490232868599e-08, "loss": 0.3571, "step": 15102 }, { "epoch": 0.9460217040667721, "grad_norm": 0.6008206658765382, "learning_rate": 7.60885061423744e-08, "loss": 0.4347, "step": 15103 }, { "epoch": 0.9460843420661144, "grad_norm": 0.809035032656066, "learning_rate": 7.591231262517428e-08, "loss": 0.3238, "step": 15104 }, { "epoch": 0.9461469800654567, "grad_norm": 0.8919737390462726, "learning_rate": 7.57363217843371e-08, "loss": 0.4016, "step": 15105 }, { "epoch": 0.946209618064799, "grad_norm": 0.8401098248494416, "learning_rate": 7.556053362710702e-08, "loss": 0.3801, "step": 15106 }, { "epoch": 0.9462722560641413, "grad_norm": 0.9457438234499866, "learning_rate": 7.53849481607194e-08, "loss": 0.4178, "step": 15107 }, { "epoch": 0.9463348940634836, "grad_norm": 0.9524289002847265, "learning_rate": 7.52095653924012e-08, "loss": 0.4229, "step": 15108 }, { "epoch": 0.946397532062826, "grad_norm": 0.8852801752813008, "learning_rate": 7.503438532937169e-08, "loss": 0.4169, "step": 15109 }, { "epoch": 0.9464601700621682, "grad_norm": 0.9324892131854013, "learning_rate": 7.485940797884118e-08, "loss": 0.403, "step": 15110 }, { "epoch": 0.9465228080615106, "grad_norm": 0.8880535208384499, "learning_rate": 7.468463334801113e-08, "loss": 0.3653, "step": 15111 }, { "epoch": 0.9465854460608528, "grad_norm": 0.8083086875165644, "learning_rate": 7.45100614440758e-08, "loss": 0.3421, "step": 15112 }, { "epoch": 0.9466480840601951, "grad_norm": 0.8836173237227464, "learning_rate": 7.433569227422056e-08, "loss": 0.3835, "step": 15113 }, { "epoch": 0.9467107220595374, "grad_norm": 0.7757431784710443, "learning_rate": 7.416152584562242e-08, "loss": 0.3258, "step": 15114 }, { "epoch": 0.9467733600588797, "grad_norm": 0.872938663273693, "learning_rate": 7.398756216545011e-08, "loss": 0.3888, "step": 15115 }, { "epoch": 0.946835998058222, "grad_norm": 0.6445781179228164, "learning_rate": 7.381380124086457e-08, "loss": 0.4723, "step": 15116 }, { "epoch": 0.9468986360575643, "grad_norm": 0.9466855288261795, "learning_rate": 7.364024307901674e-08, "loss": 0.4269, "step": 15117 }, { "epoch": 0.9469612740569067, "grad_norm": 0.8535603313552126, "learning_rate": 7.346688768705146e-08, "loss": 0.3858, "step": 15118 }, { "epoch": 0.9470239120562489, "grad_norm": 0.7860891134415974, "learning_rate": 7.329373507210302e-08, "loss": 0.3476, "step": 15119 }, { "epoch": 0.9470865500555912, "grad_norm": 0.8996584469426612, "learning_rate": 7.312078524129906e-08, "loss": 0.3961, "step": 15120 }, { "epoch": 0.9471491880549335, "grad_norm": 0.8680335030152334, "learning_rate": 7.294803820175833e-08, "loss": 0.3623, "step": 15121 }, { "epoch": 0.9472118260542758, "grad_norm": 0.8650025392507383, "learning_rate": 7.277549396059013e-08, "loss": 0.3759, "step": 15122 }, { "epoch": 0.9472744640536181, "grad_norm": 0.9643424482817751, "learning_rate": 7.260315252489769e-08, "loss": 0.4138, "step": 15123 }, { "epoch": 0.9473371020529604, "grad_norm": 0.8516200160862064, "learning_rate": 7.243101390177365e-08, "loss": 0.3824, "step": 15124 }, { "epoch": 0.9473997400523028, "grad_norm": 0.8415437457003867, "learning_rate": 7.225907809830401e-08, "loss": 0.3664, "step": 15125 }, { "epoch": 0.947462378051645, "grad_norm": 0.8023137413848173, "learning_rate": 7.208734512156534e-08, "loss": 0.3444, "step": 15126 }, { "epoch": 0.9475250160509874, "grad_norm": 0.8649747159617228, "learning_rate": 7.191581497862588e-08, "loss": 0.3777, "step": 15127 }, { "epoch": 0.9475876540503296, "grad_norm": 0.812909994197236, "learning_rate": 7.174448767654662e-08, "loss": 0.403, "step": 15128 }, { "epoch": 0.9476502920496719, "grad_norm": 0.8587039360717753, "learning_rate": 7.157336322237862e-08, "loss": 0.4062, "step": 15129 }, { "epoch": 0.9477129300490142, "grad_norm": 0.8866525834827717, "learning_rate": 7.140244162316623e-08, "loss": 0.4236, "step": 15130 }, { "epoch": 0.9477755680483565, "grad_norm": 0.9053852611884159, "learning_rate": 7.123172288594327e-08, "loss": 0.3577, "step": 15131 }, { "epoch": 0.9478382060476989, "grad_norm": 0.9015876408314591, "learning_rate": 7.1061207017738e-08, "loss": 0.3814, "step": 15132 }, { "epoch": 0.9479008440470411, "grad_norm": 0.9272912175763967, "learning_rate": 7.089089402556815e-08, "loss": 0.4045, "step": 15133 }, { "epoch": 0.9479634820463835, "grad_norm": 0.7901990319626707, "learning_rate": 7.072078391644421e-08, "loss": 0.3566, "step": 15134 }, { "epoch": 0.9480261200457257, "grad_norm": 0.9149550104214828, "learning_rate": 7.055087669736782e-08, "loss": 0.3951, "step": 15135 }, { "epoch": 0.9480887580450681, "grad_norm": 0.8583201376145567, "learning_rate": 7.038117237533227e-08, "loss": 0.3388, "step": 15136 }, { "epoch": 0.9481513960444103, "grad_norm": 0.805067196436258, "learning_rate": 7.021167095732251e-08, "loss": 0.305, "step": 15137 }, { "epoch": 0.9482140340437526, "grad_norm": 0.9226588394363644, "learning_rate": 7.00423724503152e-08, "loss": 0.3667, "step": 15138 }, { "epoch": 0.948276672043095, "grad_norm": 0.8098700623771306, "learning_rate": 6.987327686127921e-08, "loss": 0.39, "step": 15139 }, { "epoch": 0.9483393100424372, "grad_norm": 0.8252514292232077, "learning_rate": 6.970438419717396e-08, "loss": 0.3621, "step": 15140 }, { "epoch": 0.9484019480417796, "grad_norm": 0.9008757871531998, "learning_rate": 6.953569446495112e-08, "loss": 0.4405, "step": 15141 }, { "epoch": 0.9484645860411218, "grad_norm": 0.9368795598292678, "learning_rate": 6.936720767155459e-08, "loss": 0.4004, "step": 15142 }, { "epoch": 0.9485272240404642, "grad_norm": 0.9726035271030847, "learning_rate": 6.919892382391879e-08, "loss": 0.4238, "step": 15143 }, { "epoch": 0.9485898620398064, "grad_norm": 0.646511547724569, "learning_rate": 6.903084292897044e-08, "loss": 0.4683, "step": 15144 }, { "epoch": 0.9486525000391488, "grad_norm": 0.8478179710725083, "learning_rate": 6.886296499362844e-08, "loss": 0.3685, "step": 15145 }, { "epoch": 0.9487151380384911, "grad_norm": 0.8383188356509546, "learning_rate": 6.869529002480168e-08, "loss": 0.403, "step": 15146 }, { "epoch": 0.9487777760378333, "grad_norm": 0.8815048659386333, "learning_rate": 6.85278180293919e-08, "loss": 0.3836, "step": 15147 }, { "epoch": 0.9488404140371757, "grad_norm": 0.7926881952603503, "learning_rate": 6.836054901429245e-08, "loss": 0.3949, "step": 15148 }, { "epoch": 0.9489030520365179, "grad_norm": 0.9806884527468106, "learning_rate": 6.819348298638839e-08, "loss": 0.4027, "step": 15149 }, { "epoch": 0.9489656900358603, "grad_norm": 0.9373877024027468, "learning_rate": 6.802661995255533e-08, "loss": 0.4429, "step": 15150 }, { "epoch": 0.9490283280352025, "grad_norm": 0.850290935452023, "learning_rate": 6.785995991966276e-08, "loss": 0.3968, "step": 15151 }, { "epoch": 0.9490909660345449, "grad_norm": 0.6667995819587709, "learning_rate": 6.769350289456966e-08, "loss": 0.4353, "step": 15152 }, { "epoch": 0.9491536040338872, "grad_norm": 0.8988012124252165, "learning_rate": 6.75272488841272e-08, "loss": 0.4008, "step": 15153 }, { "epoch": 0.9492162420332294, "grad_norm": 0.7947650443158673, "learning_rate": 6.73611978951788e-08, "loss": 0.3263, "step": 15154 }, { "epoch": 0.9492788800325718, "grad_norm": 0.8374899957768258, "learning_rate": 6.7195349934559e-08, "loss": 0.3828, "step": 15155 }, { "epoch": 0.949341518031914, "grad_norm": 0.6168483389885411, "learning_rate": 6.7029705009094e-08, "loss": 0.4506, "step": 15156 }, { "epoch": 0.9494041560312564, "grad_norm": 0.8318807129083625, "learning_rate": 6.686426312560224e-08, "loss": 0.3852, "step": 15157 }, { "epoch": 0.9494667940305986, "grad_norm": 0.8393372337770555, "learning_rate": 6.66990242908927e-08, "loss": 0.3829, "step": 15158 }, { "epoch": 0.949529432029941, "grad_norm": 0.87593612123407, "learning_rate": 6.653398851176718e-08, "loss": 0.3998, "step": 15159 }, { "epoch": 0.9495920700292833, "grad_norm": 0.908124428373624, "learning_rate": 6.636915579501801e-08, "loss": 0.4054, "step": 15160 }, { "epoch": 0.9496547080286256, "grad_norm": 0.8825382751226057, "learning_rate": 6.620452614743089e-08, "loss": 0.4286, "step": 15161 }, { "epoch": 0.9497173460279679, "grad_norm": 0.8162059340917535, "learning_rate": 6.604009957578095e-08, "loss": 0.3649, "step": 15162 }, { "epoch": 0.9497799840273101, "grad_norm": 0.8709000184141983, "learning_rate": 6.58758760868361e-08, "loss": 0.405, "step": 15163 }, { "epoch": 0.9498426220266525, "grad_norm": 0.8461824038331884, "learning_rate": 6.571185568735594e-08, "loss": 0.3363, "step": 15164 }, { "epoch": 0.9499052600259947, "grad_norm": 0.8482351107950012, "learning_rate": 6.55480383840923e-08, "loss": 0.3688, "step": 15165 }, { "epoch": 0.9499678980253371, "grad_norm": 0.8783912390452302, "learning_rate": 6.538442418378643e-08, "loss": 0.3847, "step": 15166 }, { "epoch": 0.9500305360246794, "grad_norm": 0.8091029985868348, "learning_rate": 6.522101309317352e-08, "loss": 0.3946, "step": 15167 }, { "epoch": 0.9500931740240217, "grad_norm": 0.9746687304978656, "learning_rate": 6.505780511897985e-08, "loss": 0.4584, "step": 15168 }, { "epoch": 0.950155812023364, "grad_norm": 0.5546194949179687, "learning_rate": 6.489480026792339e-08, "loss": 0.4494, "step": 15169 }, { "epoch": 0.9502184500227063, "grad_norm": 0.8545235857456099, "learning_rate": 6.473199854671264e-08, "loss": 0.3924, "step": 15170 }, { "epoch": 0.9502810880220486, "grad_norm": 0.8992308773200962, "learning_rate": 6.456939996204892e-08, "loss": 0.4016, "step": 15171 }, { "epoch": 0.9503437260213908, "grad_norm": 0.8576701654237787, "learning_rate": 6.440700452062465e-08, "loss": 0.3629, "step": 15172 }, { "epoch": 0.9504063640207332, "grad_norm": 0.8036932817332635, "learning_rate": 6.424481222912449e-08, "loss": 0.3307, "step": 15173 }, { "epoch": 0.9504690020200754, "grad_norm": 0.6218048671045283, "learning_rate": 6.408282309422308e-08, "loss": 0.4356, "step": 15174 }, { "epoch": 0.9505316400194178, "grad_norm": 0.8967478035001041, "learning_rate": 6.392103712258957e-08, "loss": 0.4192, "step": 15175 }, { "epoch": 0.9505942780187601, "grad_norm": 0.891487185519194, "learning_rate": 6.375945432088249e-08, "loss": 0.3934, "step": 15176 }, { "epoch": 0.9506569160181024, "grad_norm": 0.8663686283785116, "learning_rate": 6.359807469575153e-08, "loss": 0.3916, "step": 15177 }, { "epoch": 0.9507195540174447, "grad_norm": 0.8546869862189056, "learning_rate": 6.343689825384081e-08, "loss": 0.381, "step": 15178 }, { "epoch": 0.9507821920167869, "grad_norm": 0.8529755531848552, "learning_rate": 6.327592500178393e-08, "loss": 0.3596, "step": 15179 }, { "epoch": 0.9508448300161293, "grad_norm": 0.9240758688583309, "learning_rate": 6.311515494620613e-08, "loss": 0.3971, "step": 15180 }, { "epoch": 0.9509074680154715, "grad_norm": 0.5949771221986326, "learning_rate": 6.295458809372434e-08, "loss": 0.4372, "step": 15181 }, { "epoch": 0.9509701060148139, "grad_norm": 0.9031642832076464, "learning_rate": 6.279422445094829e-08, "loss": 0.3821, "step": 15182 }, { "epoch": 0.9510327440141562, "grad_norm": 0.9778693046006339, "learning_rate": 6.263406402447824e-08, "loss": 0.3559, "step": 15183 }, { "epoch": 0.9510953820134985, "grad_norm": 0.8307377455103001, "learning_rate": 6.247410682090671e-08, "loss": 0.3819, "step": 15184 }, { "epoch": 0.9511580200128408, "grad_norm": 0.9049246454439309, "learning_rate": 6.231435284681786e-08, "loss": 0.3966, "step": 15185 }, { "epoch": 0.9512206580121831, "grad_norm": 0.955229242956645, "learning_rate": 6.215480210878644e-08, "loss": 0.3962, "step": 15186 }, { "epoch": 0.9512832960115254, "grad_norm": 0.8942132938914066, "learning_rate": 6.199545461337941e-08, "loss": 0.3474, "step": 15187 }, { "epoch": 0.9513459340108676, "grad_norm": 0.8165754647841473, "learning_rate": 6.183631036715598e-08, "loss": 0.3496, "step": 15188 }, { "epoch": 0.95140857201021, "grad_norm": 0.8966908377544361, "learning_rate": 6.167736937666702e-08, "loss": 0.3664, "step": 15189 }, { "epoch": 0.9514712100095523, "grad_norm": 0.8562943336959343, "learning_rate": 6.151863164845396e-08, "loss": 0.3535, "step": 15190 }, { "epoch": 0.9515338480088946, "grad_norm": 0.9350706316788144, "learning_rate": 6.136009718905044e-08, "loss": 0.3943, "step": 15191 }, { "epoch": 0.9515964860082369, "grad_norm": 0.6520634803629606, "learning_rate": 6.12017660049824e-08, "loss": 0.4268, "step": 15192 }, { "epoch": 0.9516591240075792, "grad_norm": 0.6130850505446637, "learning_rate": 6.104363810276626e-08, "loss": 0.443, "step": 15193 }, { "epoch": 0.9517217620069215, "grad_norm": 0.9101618799282681, "learning_rate": 6.08857134889107e-08, "loss": 0.4063, "step": 15194 }, { "epoch": 0.9517844000062639, "grad_norm": 0.8383014823639624, "learning_rate": 6.072799216991609e-08, "loss": 0.3762, "step": 15195 }, { "epoch": 0.9518470380056061, "grad_norm": 0.8827515174053461, "learning_rate": 6.057047415227335e-08, "loss": 0.4107, "step": 15196 }, { "epoch": 0.9519096760049484, "grad_norm": 0.8749623569337858, "learning_rate": 6.041315944246728e-08, "loss": 0.3797, "step": 15197 }, { "epoch": 0.9519723140042907, "grad_norm": 0.9580130370878973, "learning_rate": 6.025604804697215e-08, "loss": 0.3602, "step": 15198 }, { "epoch": 0.952034952003633, "grad_norm": 0.8929533683675394, "learning_rate": 6.009913997225559e-08, "loss": 0.3848, "step": 15199 }, { "epoch": 0.9520975900029753, "grad_norm": 0.9007927096604686, "learning_rate": 5.994243522477405e-08, "loss": 0.3766, "step": 15200 }, { "epoch": 0.9521602280023176, "grad_norm": 0.9161683784071581, "learning_rate": 5.978593381097964e-08, "loss": 0.3964, "step": 15201 }, { "epoch": 0.95222286600166, "grad_norm": 0.8142244308388425, "learning_rate": 5.962963573731329e-08, "loss": 0.3709, "step": 15202 }, { "epoch": 0.9522855040010022, "grad_norm": 0.8583021495087235, "learning_rate": 5.947354101020819e-08, "loss": 0.3877, "step": 15203 }, { "epoch": 0.9523481420003445, "grad_norm": 0.8647342606418376, "learning_rate": 5.9317649636088656e-08, "loss": 0.4133, "step": 15204 }, { "epoch": 0.9524107799996868, "grad_norm": 0.8523429609816765, "learning_rate": 5.916196162137178e-08, "loss": 0.387, "step": 15205 }, { "epoch": 0.9524734179990291, "grad_norm": 0.8895751423374791, "learning_rate": 5.9006476972465754e-08, "loss": 0.4193, "step": 15206 }, { "epoch": 0.9525360559983714, "grad_norm": 0.9050616724266982, "learning_rate": 5.8851195695770467e-08, "loss": 0.4108, "step": 15207 }, { "epoch": 0.9525986939977137, "grad_norm": 0.7964308989773079, "learning_rate": 5.869611779767636e-08, "loss": 0.3441, "step": 15208 }, { "epoch": 0.952661331997056, "grad_norm": 0.842916369948338, "learning_rate": 5.854124328456778e-08, "loss": 0.3718, "step": 15209 }, { "epoch": 0.9527239699963983, "grad_norm": 0.9530411615397253, "learning_rate": 5.8386572162818514e-08, "loss": 0.4018, "step": 15210 }, { "epoch": 0.9527866079957407, "grad_norm": 0.8497925526907734, "learning_rate": 5.823210443879568e-08, "loss": 0.3979, "step": 15211 }, { "epoch": 0.9528492459950829, "grad_norm": 0.8752777519650528, "learning_rate": 5.807784011885587e-08, "loss": 0.369, "step": 15212 }, { "epoch": 0.9529118839944252, "grad_norm": 0.8780369617534569, "learning_rate": 5.7923779209350104e-08, "loss": 0.3841, "step": 15213 }, { "epoch": 0.9529745219937675, "grad_norm": 0.6906437893288655, "learning_rate": 5.776992171661833e-08, "loss": 0.4486, "step": 15214 }, { "epoch": 0.9530371599931098, "grad_norm": 0.8271541204829124, "learning_rate": 5.761626764699435e-08, "loss": 0.3915, "step": 15215 }, { "epoch": 0.9530997979924521, "grad_norm": 0.8972640522528235, "learning_rate": 5.7462817006802006e-08, "loss": 0.3887, "step": 15216 }, { "epoch": 0.9531624359917944, "grad_norm": 0.963509829319896, "learning_rate": 5.730956980235736e-08, "loss": 0.4056, "step": 15217 }, { "epoch": 0.9532250739911368, "grad_norm": 0.9076880298166101, "learning_rate": 5.715652603996813e-08, "loss": 0.379, "step": 15218 }, { "epoch": 0.953287711990479, "grad_norm": 0.9114870763500346, "learning_rate": 5.700368572593429e-08, "loss": 0.3683, "step": 15219 }, { "epoch": 0.9533503499898214, "grad_norm": 0.859914603338617, "learning_rate": 5.68510488665458e-08, "loss": 0.393, "step": 15220 }, { "epoch": 0.9534129879891636, "grad_norm": 0.8942071799406345, "learning_rate": 5.669861546808542e-08, "loss": 0.424, "step": 15221 }, { "epoch": 0.9534756259885059, "grad_norm": 0.8561670097500427, "learning_rate": 5.654638553682812e-08, "loss": 0.3498, "step": 15222 }, { "epoch": 0.9535382639878482, "grad_norm": 0.8811259244659206, "learning_rate": 5.639435907903834e-08, "loss": 0.3708, "step": 15223 }, { "epoch": 0.9536009019871905, "grad_norm": 0.9133383112285534, "learning_rate": 5.6242536100974964e-08, "loss": 0.447, "step": 15224 }, { "epoch": 0.9536635399865329, "grad_norm": 0.8444589111367443, "learning_rate": 5.609091660888633e-08, "loss": 0.3699, "step": 15225 }, { "epoch": 0.9537261779858751, "grad_norm": 0.9566524156043652, "learning_rate": 5.5939500609012986e-08, "loss": 0.3937, "step": 15226 }, { "epoch": 0.9537888159852175, "grad_norm": 0.8387826920445289, "learning_rate": 5.578828810758774e-08, "loss": 0.3691, "step": 15227 }, { "epoch": 0.9538514539845597, "grad_norm": 0.8930262895042048, "learning_rate": 5.563727911083394e-08, "loss": 0.395, "step": 15228 }, { "epoch": 0.953914091983902, "grad_norm": 0.9009484591859717, "learning_rate": 5.548647362496773e-08, "loss": 0.3895, "step": 15229 }, { "epoch": 0.9539767299832443, "grad_norm": 0.8951663985320926, "learning_rate": 5.5335871656195804e-08, "loss": 0.3712, "step": 15230 }, { "epoch": 0.9540393679825866, "grad_norm": 0.8614378610643855, "learning_rate": 5.518547321071765e-08, "loss": 0.3706, "step": 15231 }, { "epoch": 0.954102005981929, "grad_norm": 0.9296830845718519, "learning_rate": 5.503527829472277e-08, "loss": 0.3994, "step": 15232 }, { "epoch": 0.9541646439812712, "grad_norm": 0.9262552220034107, "learning_rate": 5.488528691439343e-08, "loss": 0.3768, "step": 15233 }, { "epoch": 0.9542272819806136, "grad_norm": 0.9028274462355177, "learning_rate": 5.4735499075903586e-08, "loss": 0.4121, "step": 15234 }, { "epoch": 0.9542899199799558, "grad_norm": 0.6430835958847367, "learning_rate": 5.458591478541886e-08, "loss": 0.4671, "step": 15235 }, { "epoch": 0.9543525579792982, "grad_norm": 0.8963391449964239, "learning_rate": 5.4436534049095436e-08, "loss": 0.3785, "step": 15236 }, { "epoch": 0.9544151959786404, "grad_norm": 0.9240025824155932, "learning_rate": 5.4287356873082284e-08, "loss": 0.4159, "step": 15237 }, { "epoch": 0.9544778339779827, "grad_norm": 0.8764848645026678, "learning_rate": 5.4138383263520054e-08, "loss": 0.3603, "step": 15238 }, { "epoch": 0.9545404719773251, "grad_norm": 0.8737078942415979, "learning_rate": 5.398961322653939e-08, "loss": 0.3879, "step": 15239 }, { "epoch": 0.9546031099766673, "grad_norm": 0.8048156342816535, "learning_rate": 5.3841046768264846e-08, "loss": 0.3592, "step": 15240 }, { "epoch": 0.9546657479760097, "grad_norm": 0.9274565500265314, "learning_rate": 5.3692683894810414e-08, "loss": 0.385, "step": 15241 }, { "epoch": 0.9547283859753519, "grad_norm": 0.8794174706305752, "learning_rate": 5.3544524612282876e-08, "loss": 0.3946, "step": 15242 }, { "epoch": 0.9547910239746943, "grad_norm": 0.9116926777392583, "learning_rate": 5.339656892678069e-08, "loss": 0.3915, "step": 15243 }, { "epoch": 0.9548536619740365, "grad_norm": 0.9106171366490637, "learning_rate": 5.3248816844394536e-08, "loss": 0.4043, "step": 15244 }, { "epoch": 0.9549162999733789, "grad_norm": 0.8790682716636106, "learning_rate": 5.310126837120511e-08, "loss": 0.3992, "step": 15245 }, { "epoch": 0.9549789379727212, "grad_norm": 0.844796654705665, "learning_rate": 5.295392351328532e-08, "loss": 0.3398, "step": 15246 }, { "epoch": 0.9550415759720634, "grad_norm": 0.8854191039286493, "learning_rate": 5.280678227670088e-08, "loss": 0.3618, "step": 15247 }, { "epoch": 0.9551042139714058, "grad_norm": 1.0037023562393483, "learning_rate": 5.2659844667506954e-08, "loss": 0.3922, "step": 15248 }, { "epoch": 0.955166851970748, "grad_norm": 0.5740176928433699, "learning_rate": 5.2513110691752575e-08, "loss": 0.4411, "step": 15249 }, { "epoch": 0.9552294899700904, "grad_norm": 0.9353010861426966, "learning_rate": 5.236658035547681e-08, "loss": 0.3584, "step": 15250 }, { "epoch": 0.9552921279694326, "grad_norm": 0.9285336075176985, "learning_rate": 5.2220253664710394e-08, "loss": 0.3918, "step": 15251 }, { "epoch": 0.955354765968775, "grad_norm": 0.8925130769824358, "learning_rate": 5.207413062547684e-08, "loss": 0.3731, "step": 15252 }, { "epoch": 0.9554174039681173, "grad_norm": 0.8724212743089182, "learning_rate": 5.1928211243790215e-08, "loss": 0.3481, "step": 15253 }, { "epoch": 0.9554800419674596, "grad_norm": 0.9051831566450509, "learning_rate": 5.178249552565739e-08, "loss": 0.3834, "step": 15254 }, { "epoch": 0.9555426799668019, "grad_norm": 0.8695245780739478, "learning_rate": 5.163698347707524e-08, "loss": 0.3691, "step": 15255 }, { "epoch": 0.9556053179661441, "grad_norm": 0.8629111028847275, "learning_rate": 5.1491675104033415e-08, "loss": 0.3919, "step": 15256 }, { "epoch": 0.9556679559654865, "grad_norm": 0.8328583604156387, "learning_rate": 5.1346570412512674e-08, "loss": 0.3847, "step": 15257 }, { "epoch": 0.9557305939648287, "grad_norm": 0.9151284564858712, "learning_rate": 5.1201669408486034e-08, "loss": 0.3959, "step": 15258 }, { "epoch": 0.9557932319641711, "grad_norm": 0.9031206909069097, "learning_rate": 5.105697209791649e-08, "loss": 0.3886, "step": 15259 }, { "epoch": 0.9558558699635133, "grad_norm": 0.8401680262234839, "learning_rate": 5.091247848676095e-08, "loss": 0.4005, "step": 15260 }, { "epoch": 0.9559185079628557, "grad_norm": 0.8770636343681987, "learning_rate": 5.076818858096577e-08, "loss": 0.3872, "step": 15261 }, { "epoch": 0.955981145962198, "grad_norm": 0.8827060041355576, "learning_rate": 5.06241023864712e-08, "loss": 0.381, "step": 15262 }, { "epoch": 0.9560437839615402, "grad_norm": 0.9026497752806967, "learning_rate": 5.048021990920748e-08, "loss": 0.4168, "step": 15263 }, { "epoch": 0.9561064219608826, "grad_norm": 0.8360569473300837, "learning_rate": 5.0336541155096006e-08, "loss": 0.3757, "step": 15264 }, { "epoch": 0.9561690599602248, "grad_norm": 0.8562164362803881, "learning_rate": 5.019306613005148e-08, "loss": 0.3639, "step": 15265 }, { "epoch": 0.9562316979595672, "grad_norm": 0.9818202293436474, "learning_rate": 5.004979483997918e-08, "loss": 0.3885, "step": 15266 }, { "epoch": 0.9562943359589094, "grad_norm": 0.8978690350702947, "learning_rate": 4.9906727290776055e-08, "loss": 0.4365, "step": 15267 }, { "epoch": 0.9563569739582518, "grad_norm": 0.934801068521005, "learning_rate": 4.976386348833018e-08, "loss": 0.4082, "step": 15268 }, { "epoch": 0.9564196119575941, "grad_norm": 0.8593009793375922, "learning_rate": 4.9621203438522946e-08, "loss": 0.3855, "step": 15269 }, { "epoch": 0.9564822499569364, "grad_norm": 0.8332581823311529, "learning_rate": 4.947874714722578e-08, "loss": 0.3456, "step": 15270 }, { "epoch": 0.9565448879562787, "grad_norm": 0.909906988910217, "learning_rate": 4.933649462030232e-08, "loss": 0.3959, "step": 15271 }, { "epoch": 0.9566075259556209, "grad_norm": 0.8632977407262562, "learning_rate": 4.919444586360789e-08, "loss": 0.4241, "step": 15272 }, { "epoch": 0.9566701639549633, "grad_norm": 0.8844778222207289, "learning_rate": 4.905260088298835e-08, "loss": 0.3936, "step": 15273 }, { "epoch": 0.9567328019543055, "grad_norm": 0.8775418105656873, "learning_rate": 4.89109596842835e-08, "loss": 0.3738, "step": 15274 }, { "epoch": 0.9567954399536479, "grad_norm": 0.8977451832989181, "learning_rate": 4.876952227332199e-08, "loss": 0.3914, "step": 15275 }, { "epoch": 0.9568580779529902, "grad_norm": 0.8717042390319484, "learning_rate": 4.862828865592584e-08, "loss": 0.3881, "step": 15276 }, { "epoch": 0.9569207159523325, "grad_norm": 0.8251224606835292, "learning_rate": 4.848725883790817e-08, "loss": 0.348, "step": 15277 }, { "epoch": 0.9569833539516748, "grad_norm": 0.8366950048258435, "learning_rate": 4.834643282507434e-08, "loss": 0.36, "step": 15278 }, { "epoch": 0.9570459919510171, "grad_norm": 0.5864545573239239, "learning_rate": 4.8205810623220274e-08, "loss": 0.4456, "step": 15279 }, { "epoch": 0.9571086299503594, "grad_norm": 0.8615858268494393, "learning_rate": 4.8065392238134664e-08, "loss": 0.3822, "step": 15280 }, { "epoch": 0.9571712679497016, "grad_norm": 0.8642434648876063, "learning_rate": 4.792517767559623e-08, "loss": 0.3744, "step": 15281 }, { "epoch": 0.957233905949044, "grad_norm": 0.814450208416004, "learning_rate": 4.778516694137647e-08, "loss": 0.3418, "step": 15282 }, { "epoch": 0.9572965439483863, "grad_norm": 0.8956806910980577, "learning_rate": 4.764536004123854e-08, "loss": 0.4317, "step": 15283 }, { "epoch": 0.9573591819477286, "grad_norm": 0.9075066538680118, "learning_rate": 4.750575698093674e-08, "loss": 0.4369, "step": 15284 }, { "epoch": 0.9574218199470709, "grad_norm": 0.8645897916064694, "learning_rate": 4.736635776621701e-08, "loss": 0.3765, "step": 15285 }, { "epoch": 0.9574844579464132, "grad_norm": 0.8586174737368258, "learning_rate": 4.722716240281755e-08, "loss": 0.3235, "step": 15286 }, { "epoch": 0.9575470959457555, "grad_norm": 0.8689420298855414, "learning_rate": 4.708817089646767e-08, "loss": 0.4049, "step": 15287 }, { "epoch": 0.9576097339450977, "grad_norm": 0.8931855200576905, "learning_rate": 4.6949383252887224e-08, "loss": 0.3764, "step": 15288 }, { "epoch": 0.9576723719444401, "grad_norm": 0.8711537083543348, "learning_rate": 4.681079947778999e-08, "loss": 0.3536, "step": 15289 }, { "epoch": 0.9577350099437824, "grad_norm": 0.8794931082806541, "learning_rate": 4.667241957687974e-08, "loss": 0.3676, "step": 15290 }, { "epoch": 0.9577976479431247, "grad_norm": 0.8410188432181981, "learning_rate": 4.65342435558519e-08, "loss": 0.3997, "step": 15291 }, { "epoch": 0.957860285942467, "grad_norm": 0.8108072124762906, "learning_rate": 4.6396271420394156e-08, "loss": 0.3878, "step": 15292 }, { "epoch": 0.9579229239418093, "grad_norm": 0.8724914537789534, "learning_rate": 4.625850317618475e-08, "loss": 0.3531, "step": 15293 }, { "epoch": 0.9579855619411516, "grad_norm": 0.6350242764695084, "learning_rate": 4.612093882889523e-08, "loss": 0.4479, "step": 15294 }, { "epoch": 0.958048199940494, "grad_norm": 0.860772348907484, "learning_rate": 4.598357838418721e-08, "loss": 0.3812, "step": 15295 }, { "epoch": 0.9581108379398362, "grad_norm": 0.6239764831342037, "learning_rate": 4.584642184771504e-08, "loss": 0.4465, "step": 15296 }, { "epoch": 0.9581734759391785, "grad_norm": 0.9386718348732345, "learning_rate": 4.5709469225123096e-08, "loss": 0.3842, "step": 15297 }, { "epoch": 0.9582361139385208, "grad_norm": 0.8504973015566546, "learning_rate": 4.557272052204909e-08, "loss": 0.3999, "step": 15298 }, { "epoch": 0.9582987519378631, "grad_norm": 0.8810495086535847, "learning_rate": 4.543617574412185e-08, "loss": 0.3739, "step": 15299 }, { "epoch": 0.9583613899372054, "grad_norm": 0.8679170919544106, "learning_rate": 4.529983489696077e-08, "loss": 0.3623, "step": 15300 }, { "epoch": 0.9584240279365477, "grad_norm": 0.896068138141103, "learning_rate": 4.516369798617859e-08, "loss": 0.3995, "step": 15301 }, { "epoch": 0.95848666593589, "grad_norm": 0.8723492253812636, "learning_rate": 4.502776501737749e-08, "loss": 0.3838, "step": 15302 }, { "epoch": 0.9585493039352323, "grad_norm": 0.7747608936620805, "learning_rate": 4.489203599615411e-08, "loss": 0.3358, "step": 15303 }, { "epoch": 0.9586119419345747, "grad_norm": 0.8470925841621801, "learning_rate": 4.4756510928093965e-08, "loss": 0.3668, "step": 15304 }, { "epoch": 0.9586745799339169, "grad_norm": 0.8939239232424739, "learning_rate": 4.462118981877539e-08, "loss": 0.3711, "step": 15305 }, { "epoch": 0.9587372179332592, "grad_norm": 0.8635011635000369, "learning_rate": 4.448607267376836e-08, "loss": 0.4024, "step": 15306 }, { "epoch": 0.9587998559326015, "grad_norm": 0.8985912525370945, "learning_rate": 4.435115949863456e-08, "loss": 0.4104, "step": 15307 }, { "epoch": 0.9588624939319438, "grad_norm": 0.8817117091057654, "learning_rate": 4.4216450298926736e-08, "loss": 0.3617, "step": 15308 }, { "epoch": 0.9589251319312861, "grad_norm": 0.905955124032304, "learning_rate": 4.408194508018992e-08, "loss": 0.4174, "step": 15309 }, { "epoch": 0.9589877699306284, "grad_norm": 0.8954924473245595, "learning_rate": 4.394764384796024e-08, "loss": 0.372, "step": 15310 }, { "epoch": 0.9590504079299708, "grad_norm": 0.8634253049096153, "learning_rate": 4.381354660776493e-08, "loss": 0.392, "step": 15311 }, { "epoch": 0.959113045929313, "grad_norm": 0.855700492545292, "learning_rate": 4.367965336512403e-08, "loss": 0.3578, "step": 15312 }, { "epoch": 0.9591756839286553, "grad_norm": 0.8857635696051541, "learning_rate": 4.354596412554868e-08, "loss": 0.4047, "step": 15313 }, { "epoch": 0.9592383219279976, "grad_norm": 0.8358799558002553, "learning_rate": 4.3412478894541145e-08, "loss": 0.3565, "step": 15314 }, { "epoch": 0.9593009599273399, "grad_norm": 0.9259269322925344, "learning_rate": 4.327919767759592e-08, "loss": 0.382, "step": 15315 }, { "epoch": 0.9593635979266822, "grad_norm": 0.8827408557729712, "learning_rate": 4.3146120480199726e-08, "loss": 0.393, "step": 15316 }, { "epoch": 0.9594262359260245, "grad_norm": 0.8495653463173831, "learning_rate": 4.3013247307828746e-08, "loss": 0.3999, "step": 15317 }, { "epoch": 0.9594888739253669, "grad_norm": 0.80530272604344, "learning_rate": 4.288057816595248e-08, "loss": 0.3686, "step": 15318 }, { "epoch": 0.9595515119247091, "grad_norm": 0.8837064988689668, "learning_rate": 4.2748113060031574e-08, "loss": 0.3592, "step": 15319 }, { "epoch": 0.9596141499240515, "grad_norm": 0.8600026123983309, "learning_rate": 4.261585199551832e-08, "loss": 0.3552, "step": 15320 }, { "epoch": 0.9596767879233937, "grad_norm": 0.8654465728683391, "learning_rate": 4.2483794977857264e-08, "loss": 0.338, "step": 15321 }, { "epoch": 0.959739425922736, "grad_norm": 0.9153208149562682, "learning_rate": 4.2351942012483495e-08, "loss": 0.3354, "step": 15322 }, { "epoch": 0.9598020639220783, "grad_norm": 0.866859519564023, "learning_rate": 4.2220293104823227e-08, "loss": 0.3864, "step": 15323 }, { "epoch": 0.9598647019214206, "grad_norm": 0.8863362030696739, "learning_rate": 4.208884826029658e-08, "loss": 0.3936, "step": 15324 }, { "epoch": 0.959927339920763, "grad_norm": 0.8587414642228819, "learning_rate": 4.195760748431254e-08, "loss": 0.4096, "step": 15325 }, { "epoch": 0.9599899779201052, "grad_norm": 0.8432904721126594, "learning_rate": 4.182657078227403e-08, "loss": 0.3506, "step": 15326 }, { "epoch": 0.9600526159194476, "grad_norm": 0.8004953909229986, "learning_rate": 4.16957381595745e-08, "loss": 0.3717, "step": 15327 }, { "epoch": 0.9601152539187898, "grad_norm": 0.8909361408817863, "learning_rate": 4.156510962159799e-08, "loss": 0.3919, "step": 15328 }, { "epoch": 0.9601778919181322, "grad_norm": 0.9332616344343582, "learning_rate": 4.143468517372185e-08, "loss": 0.3865, "step": 15329 }, { "epoch": 0.9602405299174744, "grad_norm": 0.9187151296010319, "learning_rate": 4.1304464821315114e-08, "loss": 0.4433, "step": 15330 }, { "epoch": 0.9603031679168167, "grad_norm": 0.792129185651848, "learning_rate": 4.1174448569736846e-08, "loss": 0.3563, "step": 15331 }, { "epoch": 0.960365805916159, "grad_norm": 0.9254371344397172, "learning_rate": 4.104463642433831e-08, "loss": 0.393, "step": 15332 }, { "epoch": 0.9604284439155013, "grad_norm": 0.9191179374093187, "learning_rate": 4.091502839046302e-08, "loss": 0.3286, "step": 15333 }, { "epoch": 0.9604910819148437, "grad_norm": 0.6435470101395275, "learning_rate": 4.078562447344558e-08, "loss": 0.4481, "step": 15334 }, { "epoch": 0.9605537199141859, "grad_norm": 0.8587576988064328, "learning_rate": 4.065642467861286e-08, "loss": 0.374, "step": 15335 }, { "epoch": 0.9606163579135283, "grad_norm": 0.9007243091756292, "learning_rate": 4.05274290112817e-08, "loss": 0.4232, "step": 15336 }, { "epoch": 0.9606789959128705, "grad_norm": 0.9141629787079678, "learning_rate": 4.039863747676176e-08, "loss": 0.4193, "step": 15337 }, { "epoch": 0.9607416339122128, "grad_norm": 0.9261849861608009, "learning_rate": 4.02700500803549e-08, "loss": 0.4013, "step": 15338 }, { "epoch": 0.9608042719115552, "grad_norm": 0.8482849075669768, "learning_rate": 4.014166682735354e-08, "loss": 0.3231, "step": 15339 }, { "epoch": 0.9608669099108974, "grad_norm": 0.9052748192137557, "learning_rate": 4.0013487723041256e-08, "loss": 0.4023, "step": 15340 }, { "epoch": 0.9609295479102398, "grad_norm": 1.0008872664434418, "learning_rate": 3.988551277269492e-08, "loss": 0.3923, "step": 15341 }, { "epoch": 0.960992185909582, "grad_norm": 0.7785873246734197, "learning_rate": 3.975774198158144e-08, "loss": 0.3642, "step": 15342 }, { "epoch": 0.9610548239089244, "grad_norm": 0.8135601522254703, "learning_rate": 3.963017535495939e-08, "loss": 0.3213, "step": 15343 }, { "epoch": 0.9611174619082666, "grad_norm": 0.8802374030901752, "learning_rate": 3.9502812898080664e-08, "loss": 0.3712, "step": 15344 }, { "epoch": 0.961180099907609, "grad_norm": 0.8740405827297618, "learning_rate": 3.937565461618664e-08, "loss": 0.383, "step": 15345 }, { "epoch": 0.9612427379069513, "grad_norm": 0.9033242104465589, "learning_rate": 3.924870051451146e-08, "loss": 0.4379, "step": 15346 }, { "epoch": 0.9613053759062935, "grad_norm": 0.8853119623081632, "learning_rate": 3.912195059828039e-08, "loss": 0.3942, "step": 15347 }, { "epoch": 0.9613680139056359, "grad_norm": 0.8658876111158962, "learning_rate": 3.899540487271092e-08, "loss": 0.3827, "step": 15348 }, { "epoch": 0.9614306519049781, "grad_norm": 0.8212473629545832, "learning_rate": 3.8869063343011106e-08, "loss": 0.3641, "step": 15349 }, { "epoch": 0.9614932899043205, "grad_norm": 0.8714586178618858, "learning_rate": 3.874292601438179e-08, "loss": 0.3921, "step": 15350 }, { "epoch": 0.9615559279036627, "grad_norm": 0.8613215077712058, "learning_rate": 3.861699289201437e-08, "loss": 0.3401, "step": 15351 }, { "epoch": 0.9616185659030051, "grad_norm": 0.8469106780717525, "learning_rate": 3.849126398109193e-08, "loss": 0.3796, "step": 15352 }, { "epoch": 0.9616812039023473, "grad_norm": 0.896139734344734, "learning_rate": 3.836573928679033e-08, "loss": 0.4266, "step": 15353 }, { "epoch": 0.9617438419016897, "grad_norm": 0.8937707720733193, "learning_rate": 3.824041881427543e-08, "loss": 0.4073, "step": 15354 }, { "epoch": 0.961806479901032, "grad_norm": 0.6374670147624867, "learning_rate": 3.8115302568706435e-08, "loss": 0.4478, "step": 15355 }, { "epoch": 0.9618691179003742, "grad_norm": 0.6442835997468715, "learning_rate": 3.799039055523257e-08, "loss": 0.4496, "step": 15356 }, { "epoch": 0.9619317558997166, "grad_norm": 0.8915447684133789, "learning_rate": 3.7865682778994716e-08, "loss": 0.3495, "step": 15357 }, { "epoch": 0.9619943938990588, "grad_norm": 0.8814450830168428, "learning_rate": 3.7741179245126545e-08, "loss": 0.4177, "step": 15358 }, { "epoch": 0.9620570318984012, "grad_norm": 0.8764824163394052, "learning_rate": 3.761687995875285e-08, "loss": 0.3938, "step": 15359 }, { "epoch": 0.9621196698977434, "grad_norm": 0.8120958720789679, "learning_rate": 3.749278492498898e-08, "loss": 0.3787, "step": 15360 }, { "epoch": 0.9621823078970858, "grad_norm": 0.908215980122687, "learning_rate": 3.7368894148943625e-08, "loss": 0.3706, "step": 15361 }, { "epoch": 0.9622449458964281, "grad_norm": 0.9166272470814161, "learning_rate": 3.724520763571549e-08, "loss": 0.3835, "step": 15362 }, { "epoch": 0.9623075838957704, "grad_norm": 0.9350434424488853, "learning_rate": 3.712172539039549e-08, "loss": 0.3837, "step": 15363 }, { "epoch": 0.9623702218951127, "grad_norm": 0.843709482143319, "learning_rate": 3.699844741806624e-08, "loss": 0.4011, "step": 15364 }, { "epoch": 0.9624328598944549, "grad_norm": 0.7926094324984028, "learning_rate": 3.6875373723802565e-08, "loss": 0.3351, "step": 15365 }, { "epoch": 0.9624954978937973, "grad_norm": 0.8551186438211217, "learning_rate": 3.675250431266986e-08, "loss": 0.3826, "step": 15366 }, { "epoch": 0.9625581358931395, "grad_norm": 0.8944623418581544, "learning_rate": 3.662983918972518e-08, "loss": 0.3777, "step": 15367 }, { "epoch": 0.9626207738924819, "grad_norm": 0.9234970121926226, "learning_rate": 3.650737836001728e-08, "loss": 0.3666, "step": 15368 }, { "epoch": 0.9626834118918242, "grad_norm": 0.8329719825031984, "learning_rate": 3.638512182858711e-08, "loss": 0.3965, "step": 15369 }, { "epoch": 0.9627460498911665, "grad_norm": 0.8199981951948601, "learning_rate": 3.626306960046621e-08, "loss": 0.3828, "step": 15370 }, { "epoch": 0.9628086878905088, "grad_norm": 0.8299375185763909, "learning_rate": 3.614122168067946e-08, "loss": 0.3542, "step": 15371 }, { "epoch": 0.962871325889851, "grad_norm": 0.8939190319146617, "learning_rate": 3.601957807424061e-08, "loss": 0.4338, "step": 15372 }, { "epoch": 0.9629339638891934, "grad_norm": 0.6815749487331217, "learning_rate": 3.589813878615733e-08, "loss": 0.4618, "step": 15373 }, { "epoch": 0.9629966018885356, "grad_norm": 0.9020730381513189, "learning_rate": 3.57769038214284e-08, "loss": 0.3737, "step": 15374 }, { "epoch": 0.963059239887878, "grad_norm": 0.8266436130791119, "learning_rate": 3.565587318504371e-08, "loss": 0.3554, "step": 15375 }, { "epoch": 0.9631218778872203, "grad_norm": 0.6137759692116093, "learning_rate": 3.553504688198428e-08, "loss": 0.4255, "step": 15376 }, { "epoch": 0.9631845158865626, "grad_norm": 0.8334345586445071, "learning_rate": 3.541442491722447e-08, "loss": 0.3622, "step": 15377 }, { "epoch": 0.9632471538859049, "grad_norm": 0.8683782199701677, "learning_rate": 3.5294007295727515e-08, "loss": 0.3821, "step": 15378 }, { "epoch": 0.9633097918852472, "grad_norm": 0.8793176052096168, "learning_rate": 3.517379402245113e-08, "loss": 0.396, "step": 15379 }, { "epoch": 0.9633724298845895, "grad_norm": 0.8761870770501404, "learning_rate": 3.5053785102342476e-08, "loss": 0.377, "step": 15380 }, { "epoch": 0.9634350678839317, "grad_norm": 0.885357661471309, "learning_rate": 3.4933980540342024e-08, "loss": 0.3584, "step": 15381 }, { "epoch": 0.9634977058832741, "grad_norm": 0.8179891477091322, "learning_rate": 3.4814380341380296e-08, "loss": 0.3697, "step": 15382 }, { "epoch": 0.9635603438826164, "grad_norm": 0.8541799036182447, "learning_rate": 3.469498451038056e-08, "loss": 0.3793, "step": 15383 }, { "epoch": 0.9636229818819587, "grad_norm": 0.7858610693931908, "learning_rate": 3.457579305225667e-08, "loss": 0.3598, "step": 15384 }, { "epoch": 0.963685619881301, "grad_norm": 0.8251004743259767, "learning_rate": 3.44568059719147e-08, "loss": 0.3844, "step": 15385 }, { "epoch": 0.9637482578806433, "grad_norm": 0.8334686013298904, "learning_rate": 3.43380232742524e-08, "loss": 0.3824, "step": 15386 }, { "epoch": 0.9638108958799856, "grad_norm": 0.9077021886169134, "learning_rate": 3.421944496415863e-08, "loss": 0.3933, "step": 15387 }, { "epoch": 0.963873533879328, "grad_norm": 0.8033782328970996, "learning_rate": 3.410107104651394e-08, "loss": 0.3561, "step": 15388 }, { "epoch": 0.9639361718786702, "grad_norm": 0.8324352618288592, "learning_rate": 3.3982901526191106e-08, "loss": 0.3535, "step": 15389 }, { "epoch": 0.9639988098780125, "grad_norm": 0.9232741009370211, "learning_rate": 3.386493640805399e-08, "loss": 0.3858, "step": 15390 }, { "epoch": 0.9640614478773548, "grad_norm": 0.9762739272316912, "learning_rate": 3.3747175696957626e-08, "loss": 0.4084, "step": 15391 }, { "epoch": 0.9641240858766971, "grad_norm": 0.8459060839581716, "learning_rate": 3.362961939774923e-08, "loss": 0.4249, "step": 15392 }, { "epoch": 0.9641867238760394, "grad_norm": 0.8376993723899859, "learning_rate": 3.35122675152677e-08, "loss": 0.3599, "step": 15393 }, { "epoch": 0.9642493618753817, "grad_norm": 0.8758462410362228, "learning_rate": 3.339512005434309e-08, "loss": 0.4122, "step": 15394 }, { "epoch": 0.964311999874724, "grad_norm": 0.8817189381216336, "learning_rate": 3.3278177019797075e-08, "loss": 0.3655, "step": 15395 }, { "epoch": 0.9643746378740663, "grad_norm": 0.8806962003683689, "learning_rate": 3.3161438416443036e-08, "loss": 0.3946, "step": 15396 }, { "epoch": 0.9644372758734086, "grad_norm": 0.8822860955924249, "learning_rate": 3.304490424908602e-08, "loss": 0.3788, "step": 15397 }, { "epoch": 0.9644999138727509, "grad_norm": 0.8533238922418697, "learning_rate": 3.292857452252274e-08, "loss": 0.3994, "step": 15398 }, { "epoch": 0.9645625518720932, "grad_norm": 0.6066658964466997, "learning_rate": 3.2812449241541587e-08, "loss": 0.4322, "step": 15399 }, { "epoch": 0.9646251898714355, "grad_norm": 0.6146233853024793, "learning_rate": 3.2696528410921524e-08, "loss": 0.4462, "step": 15400 }, { "epoch": 0.9646878278707778, "grad_norm": 0.7844581900298092, "learning_rate": 3.258081203543484e-08, "loss": 0.4021, "step": 15401 }, { "epoch": 0.9647504658701201, "grad_norm": 0.8370720977185571, "learning_rate": 3.246530011984328e-08, "loss": 0.3853, "step": 15402 }, { "epoch": 0.9648131038694624, "grad_norm": 0.9027322019971741, "learning_rate": 3.234999266890249e-08, "loss": 0.3732, "step": 15403 }, { "epoch": 0.9648757418688048, "grad_norm": 0.8719036342776846, "learning_rate": 3.223488968735811e-08, "loss": 0.3796, "step": 15404 }, { "epoch": 0.964938379868147, "grad_norm": 0.8793114166234907, "learning_rate": 3.2119991179947464e-08, "loss": 0.3685, "step": 15405 }, { "epoch": 0.9650010178674893, "grad_norm": 0.9220126505661838, "learning_rate": 3.2005297151400105e-08, "loss": 0.3355, "step": 15406 }, { "epoch": 0.9650636558668316, "grad_norm": 0.8973396057434276, "learning_rate": 3.1890807606436705e-08, "loss": 0.4094, "step": 15407 }, { "epoch": 0.9651262938661739, "grad_norm": 0.9102606546153139, "learning_rate": 3.1776522549769594e-08, "loss": 0.3805, "step": 15408 }, { "epoch": 0.9651889318655162, "grad_norm": 0.837544816045977, "learning_rate": 3.166244198610336e-08, "loss": 0.3498, "step": 15409 }, { "epoch": 0.9652515698648585, "grad_norm": 0.9243009899231045, "learning_rate": 3.154856592013311e-08, "loss": 0.3638, "step": 15410 }, { "epoch": 0.9653142078642009, "grad_norm": 0.8681198020056655, "learning_rate": 3.143489435654623e-08, "loss": 0.4379, "step": 15411 }, { "epoch": 0.9653768458635431, "grad_norm": 0.9028621300032101, "learning_rate": 3.132142730002119e-08, "loss": 0.3192, "step": 15412 }, { "epoch": 0.9654394838628855, "grad_norm": 0.8946054255959653, "learning_rate": 3.1208164755228145e-08, "loss": 0.3542, "step": 15413 }, { "epoch": 0.9655021218622277, "grad_norm": 0.8639932920786096, "learning_rate": 3.109510672682947e-08, "loss": 0.3799, "step": 15414 }, { "epoch": 0.96556475986157, "grad_norm": 0.8902467252533232, "learning_rate": 3.0982253219478674e-08, "loss": 0.3907, "step": 15415 }, { "epoch": 0.9656273978609123, "grad_norm": 0.8245346305680328, "learning_rate": 3.086960423782037e-08, "loss": 0.3825, "step": 15416 }, { "epoch": 0.9656900358602546, "grad_norm": 0.8679844600605278, "learning_rate": 3.075715978649141e-08, "loss": 0.4119, "step": 15417 }, { "epoch": 0.965752673859597, "grad_norm": 0.856254855634409, "learning_rate": 3.0644919870120304e-08, "loss": 0.3707, "step": 15418 }, { "epoch": 0.9658153118589392, "grad_norm": 0.8074088061015086, "learning_rate": 3.0532884493326695e-08, "loss": 0.3468, "step": 15419 }, { "epoch": 0.9658779498582816, "grad_norm": 0.8357063582157184, "learning_rate": 3.04210536607219e-08, "loss": 0.3903, "step": 15420 }, { "epoch": 0.9659405878576238, "grad_norm": 0.8931454113661679, "learning_rate": 3.0309427376908893e-08, "loss": 0.3637, "step": 15421 }, { "epoch": 0.9660032258569661, "grad_norm": 0.8301947860670977, "learning_rate": 3.019800564648234e-08, "loss": 0.3343, "step": 15422 }, { "epoch": 0.9660658638563084, "grad_norm": 0.9024804483825647, "learning_rate": 3.0086788474028015e-08, "loss": 0.3896, "step": 15423 }, { "epoch": 0.9661285018556507, "grad_norm": 0.8719966482887124, "learning_rate": 2.997577586412448e-08, "loss": 0.3987, "step": 15424 }, { "epoch": 0.966191139854993, "grad_norm": 0.8538237712556744, "learning_rate": 2.9864967821340296e-08, "loss": 0.4214, "step": 15425 }, { "epoch": 0.9662537778543353, "grad_norm": 0.5993328661666445, "learning_rate": 2.9754364350236265e-08, "loss": 0.4196, "step": 15426 }, { "epoch": 0.9663164158536777, "grad_norm": 0.8610070933638883, "learning_rate": 2.9643965455364852e-08, "loss": 0.4037, "step": 15427 }, { "epoch": 0.9663790538530199, "grad_norm": 0.9352625631633119, "learning_rate": 2.9533771141271316e-08, "loss": 0.3709, "step": 15428 }, { "epoch": 0.9664416918523623, "grad_norm": 0.6080258045872599, "learning_rate": 2.9423781412489804e-08, "loss": 0.4405, "step": 15429 }, { "epoch": 0.9665043298517045, "grad_norm": 0.9477778289102553, "learning_rate": 2.9313996273547807e-08, "loss": 0.3948, "step": 15430 }, { "epoch": 0.9665669678510468, "grad_norm": 0.6065947703772894, "learning_rate": 2.9204415728964487e-08, "loss": 0.4312, "step": 15431 }, { "epoch": 0.9666296058503892, "grad_norm": 0.8943844212340013, "learning_rate": 2.909503978325012e-08, "loss": 0.3542, "step": 15432 }, { "epoch": 0.9666922438497314, "grad_norm": 0.8946677555776431, "learning_rate": 2.8985868440906672e-08, "loss": 0.4006, "step": 15433 }, { "epoch": 0.9667548818490738, "grad_norm": 0.8372665316120074, "learning_rate": 2.8876901706427208e-08, "loss": 0.3825, "step": 15434 }, { "epoch": 0.966817519848416, "grad_norm": 0.8727088110971003, "learning_rate": 2.8768139584297026e-08, "loss": 0.3773, "step": 15435 }, { "epoch": 0.9668801578477584, "grad_norm": 0.9136520664291765, "learning_rate": 2.865958207899311e-08, "loss": 0.3876, "step": 15436 }, { "epoch": 0.9669427958471006, "grad_norm": 0.882533458865436, "learning_rate": 2.8551229194984098e-08, "loss": 0.3964, "step": 15437 }, { "epoch": 0.967005433846443, "grad_norm": 0.6267922626957882, "learning_rate": 2.8443080936728652e-08, "loss": 0.4496, "step": 15438 }, { "epoch": 0.9670680718457852, "grad_norm": 0.907557982367982, "learning_rate": 2.833513730867876e-08, "loss": 0.4034, "step": 15439 }, { "epoch": 0.9671307098451275, "grad_norm": 0.8789509610846663, "learning_rate": 2.8227398315277544e-08, "loss": 0.3868, "step": 15440 }, { "epoch": 0.9671933478444699, "grad_norm": 0.8489270152978361, "learning_rate": 2.8119863960959225e-08, "loss": 0.3484, "step": 15441 }, { "epoch": 0.9672559858438121, "grad_norm": 0.8417123667107964, "learning_rate": 2.8012534250150824e-08, "loss": 0.3778, "step": 15442 }, { "epoch": 0.9673186238431545, "grad_norm": 0.8876150916811427, "learning_rate": 2.7905409187268806e-08, "loss": 0.4022, "step": 15443 }, { "epoch": 0.9673812618424967, "grad_norm": 0.9295910295455142, "learning_rate": 2.7798488776722422e-08, "loss": 0.4015, "step": 15444 }, { "epoch": 0.9674438998418391, "grad_norm": 0.9109692630299568, "learning_rate": 2.7691773022914257e-08, "loss": 0.3934, "step": 15445 }, { "epoch": 0.9675065378411813, "grad_norm": 0.8817209310005781, "learning_rate": 2.7585261930234696e-08, "loss": 0.3549, "step": 15446 }, { "epoch": 0.9675691758405236, "grad_norm": 0.8156650657128284, "learning_rate": 2.7478955503069116e-08, "loss": 0.3608, "step": 15447 }, { "epoch": 0.967631813839866, "grad_norm": 0.8998186826492154, "learning_rate": 2.7372853745792905e-08, "loss": 0.3669, "step": 15448 }, { "epoch": 0.9676944518392082, "grad_norm": 0.839900093778335, "learning_rate": 2.726695666277257e-08, "loss": 0.3754, "step": 15449 }, { "epoch": 0.9677570898385506, "grad_norm": 1.0325104531530294, "learning_rate": 2.7161264258367405e-08, "loss": 0.3513, "step": 15450 }, { "epoch": 0.9678197278378928, "grad_norm": 0.6063397094832422, "learning_rate": 2.7055776536927815e-08, "loss": 0.4611, "step": 15451 }, { "epoch": 0.9678823658372352, "grad_norm": 0.8449274323131678, "learning_rate": 2.6950493502795328e-08, "loss": 0.3744, "step": 15452 }, { "epoch": 0.9679450038365774, "grad_norm": 0.8895088164223637, "learning_rate": 2.68454151603037e-08, "loss": 0.3617, "step": 15453 }, { "epoch": 0.9680076418359198, "grad_norm": 0.8926202703977455, "learning_rate": 2.674054151377725e-08, "loss": 0.3769, "step": 15454 }, { "epoch": 0.9680702798352621, "grad_norm": 0.8708195415629615, "learning_rate": 2.6635872567533638e-08, "loss": 0.3817, "step": 15455 }, { "epoch": 0.9681329178346043, "grad_norm": 0.8807694805115854, "learning_rate": 2.653140832588108e-08, "loss": 0.387, "step": 15456 }, { "epoch": 0.9681955558339467, "grad_norm": 0.824042730121043, "learning_rate": 2.6427148793117808e-08, "loss": 0.3682, "step": 15457 }, { "epoch": 0.9682581938332889, "grad_norm": 0.8477880614793277, "learning_rate": 2.632309397353705e-08, "loss": 0.3787, "step": 15458 }, { "epoch": 0.9683208318326313, "grad_norm": 0.8147261519998235, "learning_rate": 2.621924387142094e-08, "loss": 0.3914, "step": 15459 }, { "epoch": 0.9683834698319735, "grad_norm": 0.9216044531334834, "learning_rate": 2.6115598491043837e-08, "loss": 0.3946, "step": 15460 }, { "epoch": 0.9684461078313159, "grad_norm": 0.855202580657381, "learning_rate": 2.601215783667177e-08, "loss": 0.3825, "step": 15461 }, { "epoch": 0.9685087458306582, "grad_norm": 0.8577544287478877, "learning_rate": 2.5908921912562445e-08, "loss": 0.3699, "step": 15462 }, { "epoch": 0.9685713838300005, "grad_norm": 0.8526912880306324, "learning_rate": 2.5805890722964687e-08, "loss": 0.3646, "step": 15463 }, { "epoch": 0.9686340218293428, "grad_norm": 0.8772139232915068, "learning_rate": 2.5703064272120103e-08, "loss": 0.3724, "step": 15464 }, { "epoch": 0.968696659828685, "grad_norm": 0.8280154789355384, "learning_rate": 2.5600442564260864e-08, "loss": 0.3615, "step": 15465 }, { "epoch": 0.9687592978280274, "grad_norm": 0.8215037061541222, "learning_rate": 2.549802560361081e-08, "loss": 0.3752, "step": 15466 }, { "epoch": 0.9688219358273696, "grad_norm": 0.876564630560865, "learning_rate": 2.5395813394384904e-08, "loss": 0.43, "step": 15467 }, { "epoch": 0.968884573826712, "grad_norm": 0.8266428880812274, "learning_rate": 2.5293805940790894e-08, "loss": 0.3727, "step": 15468 }, { "epoch": 0.9689472118260543, "grad_norm": 0.8992013061686593, "learning_rate": 2.5192003247027086e-08, "loss": 0.3616, "step": 15469 }, { "epoch": 0.9690098498253966, "grad_norm": 0.8709869401722116, "learning_rate": 2.5090405317284015e-08, "loss": 0.4017, "step": 15470 }, { "epoch": 0.9690724878247389, "grad_norm": 0.8740690988827476, "learning_rate": 2.4989012155743343e-08, "loss": 0.3349, "step": 15471 }, { "epoch": 0.9691351258240812, "grad_norm": 0.8315350072737148, "learning_rate": 2.4887823766577835e-08, "loss": 0.3689, "step": 15472 }, { "epoch": 0.9691977638234235, "grad_norm": 0.8878894635806208, "learning_rate": 2.478684015395305e-08, "loss": 0.3944, "step": 15473 }, { "epoch": 0.9692604018227657, "grad_norm": 0.8588358493992754, "learning_rate": 2.468606132202511e-08, "loss": 0.3661, "step": 15474 }, { "epoch": 0.9693230398221081, "grad_norm": 0.5639330338370152, "learning_rate": 2.4585487274942922e-08, "loss": 0.4507, "step": 15475 }, { "epoch": 0.9693856778214504, "grad_norm": 0.8738309592213866, "learning_rate": 2.448511801684539e-08, "loss": 0.4056, "step": 15476 }, { "epoch": 0.9694483158207927, "grad_norm": 0.8789353709100012, "learning_rate": 2.4384953551863655e-08, "loss": 0.4265, "step": 15477 }, { "epoch": 0.969510953820135, "grad_norm": 0.8530436814725615, "learning_rate": 2.428499388412109e-08, "loss": 0.3854, "step": 15478 }, { "epoch": 0.9695735918194773, "grad_norm": 0.8590354223729002, "learning_rate": 2.418523901773162e-08, "loss": 0.3817, "step": 15479 }, { "epoch": 0.9696362298188196, "grad_norm": 0.8454693754835346, "learning_rate": 2.408568895680141e-08, "loss": 0.3348, "step": 15480 }, { "epoch": 0.9696988678181618, "grad_norm": 0.8304642686624326, "learning_rate": 2.398634370542774e-08, "loss": 0.3382, "step": 15481 }, { "epoch": 0.9697615058175042, "grad_norm": 0.5667523256130544, "learning_rate": 2.3887203267699554e-08, "loss": 0.4238, "step": 15482 }, { "epoch": 0.9698241438168465, "grad_norm": 0.8777421270979004, "learning_rate": 2.3788267647697484e-08, "loss": 0.3699, "step": 15483 }, { "epoch": 0.9698867818161888, "grad_norm": 0.8437603821639976, "learning_rate": 2.3689536849494375e-08, "loss": 0.3934, "step": 15484 }, { "epoch": 0.9699494198155311, "grad_norm": 0.8913104005894753, "learning_rate": 2.3591010877153653e-08, "loss": 0.3799, "step": 15485 }, { "epoch": 0.9700120578148734, "grad_norm": 0.631730538081904, "learning_rate": 2.34926897347304e-08, "loss": 0.4716, "step": 15486 }, { "epoch": 0.9700746958142157, "grad_norm": 0.8520639787986684, "learning_rate": 2.3394573426271384e-08, "loss": 0.3679, "step": 15487 }, { "epoch": 0.970137333813558, "grad_norm": 0.613195926446623, "learning_rate": 2.3296661955816146e-08, "loss": 0.4389, "step": 15488 }, { "epoch": 0.9701999718129003, "grad_norm": 0.8862931508421793, "learning_rate": 2.319895532739369e-08, "loss": 0.3802, "step": 15489 }, { "epoch": 0.9702626098122425, "grad_norm": 0.8023470388298567, "learning_rate": 2.3101453545025797e-08, "loss": 0.3387, "step": 15490 }, { "epoch": 0.9703252478115849, "grad_norm": 0.8326551876503574, "learning_rate": 2.300415661272537e-08, "loss": 0.3863, "step": 15491 }, { "epoch": 0.9703878858109272, "grad_norm": 0.8836691961038243, "learning_rate": 2.290706453449809e-08, "loss": 0.3962, "step": 15492 }, { "epoch": 0.9704505238102695, "grad_norm": 0.8337424327400395, "learning_rate": 2.281017731434021e-08, "loss": 0.3649, "step": 15493 }, { "epoch": 0.9705131618096118, "grad_norm": 0.8810665943089141, "learning_rate": 2.271349495623909e-08, "loss": 0.3777, "step": 15494 }, { "epoch": 0.9705757998089541, "grad_norm": 0.7815184021376865, "learning_rate": 2.261701746417433e-08, "loss": 0.3025, "step": 15495 }, { "epoch": 0.9706384378082964, "grad_norm": 0.8650993625611085, "learning_rate": 2.2520744842116637e-08, "loss": 0.3546, "step": 15496 }, { "epoch": 0.9707010758076388, "grad_norm": 0.8896082669313604, "learning_rate": 2.242467709402951e-08, "loss": 0.3842, "step": 15497 }, { "epoch": 0.970763713806981, "grad_norm": 1.087475607049159, "learning_rate": 2.2328814223865902e-08, "loss": 0.3781, "step": 15498 }, { "epoch": 0.9708263518063233, "grad_norm": 0.9646850477555676, "learning_rate": 2.223315623557265e-08, "loss": 0.3847, "step": 15499 }, { "epoch": 0.9708889898056656, "grad_norm": 0.8156299032973611, "learning_rate": 2.2137703133086052e-08, "loss": 0.3314, "step": 15500 }, { "epoch": 0.9709516278050079, "grad_norm": 0.916430795838837, "learning_rate": 2.20424549203363e-08, "loss": 0.4117, "step": 15501 }, { "epoch": 0.9710142658043502, "grad_norm": 0.9417271406773436, "learning_rate": 2.1947411601242475e-08, "loss": 0.4319, "step": 15502 }, { "epoch": 0.9710769038036925, "grad_norm": 0.8927067960977508, "learning_rate": 2.185257317971756e-08, "loss": 0.4205, "step": 15503 }, { "epoch": 0.9711395418030349, "grad_norm": 0.8656732348886744, "learning_rate": 2.175793965966455e-08, "loss": 0.33, "step": 15504 }, { "epoch": 0.9712021798023771, "grad_norm": 0.8520842694662745, "learning_rate": 2.1663511044978658e-08, "loss": 0.3819, "step": 15505 }, { "epoch": 0.9712648178017194, "grad_norm": 0.8825635187542115, "learning_rate": 2.156928733954733e-08, "loss": 0.3583, "step": 15506 }, { "epoch": 0.9713274558010617, "grad_norm": 0.9205804375801561, "learning_rate": 2.1475268547247465e-08, "loss": 0.3942, "step": 15507 }, { "epoch": 0.971390093800404, "grad_norm": 0.8452031758732282, "learning_rate": 2.13814546719493e-08, "loss": 0.4157, "step": 15508 }, { "epoch": 0.9714527317997463, "grad_norm": 0.5964417940924551, "learning_rate": 2.12878457175153e-08, "loss": 0.4362, "step": 15509 }, { "epoch": 0.9715153697990886, "grad_norm": 0.8441093085017638, "learning_rate": 2.1194441687797383e-08, "loss": 0.3425, "step": 15510 }, { "epoch": 0.971578007798431, "grad_norm": 0.9187044616850477, "learning_rate": 2.1101242586640258e-08, "loss": 0.4031, "step": 15511 }, { "epoch": 0.9716406457977732, "grad_norm": 0.8136106876328638, "learning_rate": 2.1008248417880295e-08, "loss": 0.3648, "step": 15512 }, { "epoch": 0.9717032837971156, "grad_norm": 0.8921511339361118, "learning_rate": 2.091545918534499e-08, "loss": 0.3638, "step": 15513 }, { "epoch": 0.9717659217964578, "grad_norm": 0.8596286761105801, "learning_rate": 2.082287489285295e-08, "loss": 0.3615, "step": 15514 }, { "epoch": 0.9718285597958001, "grad_norm": 0.818539638186491, "learning_rate": 2.0730495544216134e-08, "loss": 0.3642, "step": 15515 }, { "epoch": 0.9718911977951424, "grad_norm": 0.8219156205183785, "learning_rate": 2.0638321143236496e-08, "loss": 0.3804, "step": 15516 }, { "epoch": 0.9719538357944847, "grad_norm": 1.0047880800569389, "learning_rate": 2.0546351693707113e-08, "loss": 0.3915, "step": 15517 }, { "epoch": 0.972016473793827, "grad_norm": 0.8904642228279224, "learning_rate": 2.04545871994144e-08, "loss": 0.4047, "step": 15518 }, { "epoch": 0.9720791117931693, "grad_norm": 0.8555023635490892, "learning_rate": 2.0363027664135337e-08, "loss": 0.3627, "step": 15519 }, { "epoch": 0.9721417497925117, "grad_norm": 0.9354070127846124, "learning_rate": 2.0271673091638022e-08, "loss": 0.393, "step": 15520 }, { "epoch": 0.9722043877918539, "grad_norm": 0.89223724558515, "learning_rate": 2.0180523485683336e-08, "loss": 0.3732, "step": 15521 }, { "epoch": 0.9722670257911963, "grad_norm": 0.8054023595279617, "learning_rate": 2.0089578850022717e-08, "loss": 0.3817, "step": 15522 }, { "epoch": 0.9723296637905385, "grad_norm": 0.8570897917997682, "learning_rate": 1.9998839188398733e-08, "loss": 0.3813, "step": 15523 }, { "epoch": 0.9723923017898808, "grad_norm": 0.908730777849681, "learning_rate": 1.990830450454728e-08, "loss": 0.3806, "step": 15524 }, { "epoch": 0.9724549397892231, "grad_norm": 0.8802477202936226, "learning_rate": 1.9817974802193717e-08, "loss": 0.4377, "step": 15525 }, { "epoch": 0.9725175777885654, "grad_norm": 0.8495482881452687, "learning_rate": 1.9727850085057287e-08, "loss": 0.3955, "step": 15526 }, { "epoch": 0.9725802157879078, "grad_norm": 0.9219715356078818, "learning_rate": 1.963793035684669e-08, "loss": 0.4025, "step": 15527 }, { "epoch": 0.97264285378725, "grad_norm": 0.9438259131276932, "learning_rate": 1.9548215621263412e-08, "loss": 0.3763, "step": 15528 }, { "epoch": 0.9727054917865924, "grad_norm": 0.9613642943391018, "learning_rate": 1.945870588200005e-08, "loss": 0.4117, "step": 15529 }, { "epoch": 0.9727681297859346, "grad_norm": 0.9390714553653292, "learning_rate": 1.9369401142740886e-08, "loss": 0.3647, "step": 15530 }, { "epoch": 0.9728307677852769, "grad_norm": 0.8669143396108595, "learning_rate": 1.9280301407161304e-08, "loss": 0.3756, "step": 15531 }, { "epoch": 0.9728934057846192, "grad_norm": 0.8341619847556833, "learning_rate": 1.919140667892949e-08, "loss": 0.3942, "step": 15532 }, { "epoch": 0.9729560437839615, "grad_norm": 0.8691716066055313, "learning_rate": 1.910271696170307e-08, "loss": 0.4, "step": 15533 }, { "epoch": 0.9730186817833039, "grad_norm": 0.8211138885859116, "learning_rate": 1.9014232259134125e-08, "loss": 0.3904, "step": 15534 }, { "epoch": 0.9730813197826461, "grad_norm": 0.8775086807461668, "learning_rate": 1.8925952574863627e-08, "loss": 0.3898, "step": 15535 }, { "epoch": 0.9731439577819885, "grad_norm": 0.9256680325498727, "learning_rate": 1.8837877912524784e-08, "loss": 0.4081, "step": 15536 }, { "epoch": 0.9732065957813307, "grad_norm": 0.8905936807019417, "learning_rate": 1.8750008275744136e-08, "loss": 0.3667, "step": 15537 }, { "epoch": 0.9732692337806731, "grad_norm": 0.9510096638958044, "learning_rate": 1.8662343668137685e-08, "loss": 0.4028, "step": 15538 }, { "epoch": 0.9733318717800153, "grad_norm": 0.8457618824338141, "learning_rate": 1.857488409331365e-08, "loss": 0.4045, "step": 15539 }, { "epoch": 0.9733945097793576, "grad_norm": 0.8799760017811097, "learning_rate": 1.8487629554871932e-08, "loss": 0.3944, "step": 15540 }, { "epoch": 0.9734571477787, "grad_norm": 0.8618069016081504, "learning_rate": 1.8400580056404105e-08, "loss": 0.4183, "step": 15541 }, { "epoch": 0.9735197857780422, "grad_norm": 0.5884803615992632, "learning_rate": 1.8313735601492855e-08, "loss": 0.4395, "step": 15542 }, { "epoch": 0.9735824237773846, "grad_norm": 0.8395287484676058, "learning_rate": 1.8227096193713102e-08, "loss": 0.373, "step": 15543 }, { "epoch": 0.9736450617767268, "grad_norm": 0.821531578984945, "learning_rate": 1.8140661836630325e-08, "loss": 0.361, "step": 15544 }, { "epoch": 0.9737076997760692, "grad_norm": 0.8409568930122362, "learning_rate": 1.8054432533803346e-08, "loss": 0.3459, "step": 15545 }, { "epoch": 0.9737703377754114, "grad_norm": 0.9055649304991141, "learning_rate": 1.7968408288779883e-08, "loss": 0.3876, "step": 15546 }, { "epoch": 0.9738329757747538, "grad_norm": 0.6152339420307192, "learning_rate": 1.7882589105101546e-08, "loss": 0.4573, "step": 15547 }, { "epoch": 0.9738956137740961, "grad_norm": 0.8202780242161521, "learning_rate": 1.7796974986301064e-08, "loss": 0.3765, "step": 15548 }, { "epoch": 0.9739582517734383, "grad_norm": 0.8751082787434763, "learning_rate": 1.7711565935901176e-08, "loss": 0.353, "step": 15549 }, { "epoch": 0.9740208897727807, "grad_norm": 0.8753208426160325, "learning_rate": 1.7626361957418514e-08, "loss": 0.3575, "step": 15550 }, { "epoch": 0.9740835277721229, "grad_norm": 0.8890187705877944, "learning_rate": 1.754136305435916e-08, "loss": 0.382, "step": 15551 }, { "epoch": 0.9741461657714653, "grad_norm": 0.844943097766584, "learning_rate": 1.7456569230222543e-08, "loss": 0.3437, "step": 15552 }, { "epoch": 0.9742088037708075, "grad_norm": 0.8924217961087381, "learning_rate": 1.7371980488498087e-08, "loss": 0.3891, "step": 15553 }, { "epoch": 0.9742714417701499, "grad_norm": 0.83003332640552, "learning_rate": 1.7287596832667453e-08, "loss": 0.346, "step": 15554 }, { "epoch": 0.9743340797694922, "grad_norm": 0.7852897493908926, "learning_rate": 1.720341826620453e-08, "loss": 0.3604, "step": 15555 }, { "epoch": 0.9743967177688344, "grad_norm": 0.851837898819784, "learning_rate": 1.711944479257377e-08, "loss": 0.3553, "step": 15556 }, { "epoch": 0.9744593557681768, "grad_norm": 0.8602878636442035, "learning_rate": 1.7035676415231294e-08, "loss": 0.3658, "step": 15557 }, { "epoch": 0.974521993767519, "grad_norm": 0.7925543549742983, "learning_rate": 1.6952113137625458e-08, "loss": 0.3961, "step": 15558 }, { "epoch": 0.9745846317668614, "grad_norm": 0.8365403938391225, "learning_rate": 1.6868754963195176e-08, "loss": 0.3571, "step": 15559 }, { "epoch": 0.9746472697662036, "grad_norm": 0.9112453449871689, "learning_rate": 1.6785601895372705e-08, "loss": 0.4191, "step": 15560 }, { "epoch": 0.974709907765546, "grad_norm": 0.8741393108464988, "learning_rate": 1.6702653937579194e-08, "loss": 0.3478, "step": 15561 }, { "epoch": 0.9747725457648883, "grad_norm": 0.6053530384974088, "learning_rate": 1.6619911093229135e-08, "loss": 0.4542, "step": 15562 }, { "epoch": 0.9748351837642306, "grad_norm": 0.8350430491967757, "learning_rate": 1.6537373365729246e-08, "loss": 0.3901, "step": 15563 }, { "epoch": 0.9748978217635729, "grad_norm": 0.894962998401825, "learning_rate": 1.645504075847515e-08, "loss": 0.4032, "step": 15564 }, { "epoch": 0.9749604597629151, "grad_norm": 0.6530313205758047, "learning_rate": 1.6372913274856907e-08, "loss": 0.4682, "step": 15565 }, { "epoch": 0.9750230977622575, "grad_norm": 0.8539551808324553, "learning_rate": 1.6290990918255144e-08, "loss": 0.3726, "step": 15566 }, { "epoch": 0.9750857357615997, "grad_norm": 0.8968540564407895, "learning_rate": 1.6209273692040507e-08, "loss": 0.3784, "step": 15567 }, { "epoch": 0.9751483737609421, "grad_norm": 0.8302701769595529, "learning_rate": 1.612776159957752e-08, "loss": 0.3507, "step": 15568 }, { "epoch": 0.9752110117602844, "grad_norm": 0.8438879591757934, "learning_rate": 1.604645464422072e-08, "loss": 0.3802, "step": 15569 }, { "epoch": 0.9752736497596267, "grad_norm": 0.6503180422545215, "learning_rate": 1.5965352829316883e-08, "loss": 0.45, "step": 15570 }, { "epoch": 0.975336287758969, "grad_norm": 0.9089955107040247, "learning_rate": 1.588445615820444e-08, "loss": 0.3797, "step": 15571 }, { "epoch": 0.9753989257583113, "grad_norm": 0.8957836808093547, "learning_rate": 1.580376463421296e-08, "loss": 0.4156, "step": 15572 }, { "epoch": 0.9754615637576536, "grad_norm": 0.5977263467429681, "learning_rate": 1.572327826066311e-08, "loss": 0.4258, "step": 15573 }, { "epoch": 0.9755242017569958, "grad_norm": 0.888267168689836, "learning_rate": 1.5642997040868912e-08, "loss": 0.3722, "step": 15574 }, { "epoch": 0.9755868397563382, "grad_norm": 0.6279163489864883, "learning_rate": 1.5562920978133834e-08, "loss": 0.4166, "step": 15575 }, { "epoch": 0.9756494777556804, "grad_norm": 0.8952849857472264, "learning_rate": 1.548305007575357e-08, "loss": 0.3687, "step": 15576 }, { "epoch": 0.9757121157550228, "grad_norm": 0.9240450920671383, "learning_rate": 1.54033843370166e-08, "loss": 0.3781, "step": 15577 }, { "epoch": 0.9757747537543651, "grad_norm": 0.8834392641669582, "learning_rate": 1.5323923765201975e-08, "loss": 0.3572, "step": 15578 }, { "epoch": 0.9758373917537074, "grad_norm": 0.8739250808814808, "learning_rate": 1.5244668363579296e-08, "loss": 0.3437, "step": 15579 }, { "epoch": 0.9759000297530497, "grad_norm": 0.8892102058901472, "learning_rate": 1.516561813541151e-08, "loss": 0.3895, "step": 15580 }, { "epoch": 0.975962667752392, "grad_norm": 0.8611910236892352, "learning_rate": 1.508677308395212e-08, "loss": 0.374, "step": 15581 }, { "epoch": 0.9760253057517343, "grad_norm": 0.9207111056590773, "learning_rate": 1.5008133212445763e-08, "loss": 0.3726, "step": 15582 }, { "epoch": 0.9760879437510765, "grad_norm": 0.7288690506337524, "learning_rate": 1.4929698524130397e-08, "loss": 0.3313, "step": 15583 }, { "epoch": 0.9761505817504189, "grad_norm": 0.8409234871754191, "learning_rate": 1.4851469022234e-08, "loss": 0.3523, "step": 15584 }, { "epoch": 0.9762132197497612, "grad_norm": 0.8900219490907414, "learning_rate": 1.4773444709976214e-08, "loss": 0.358, "step": 15585 }, { "epoch": 0.9762758577491035, "grad_norm": 0.8958946516593604, "learning_rate": 1.4695625590568918e-08, "loss": 0.3784, "step": 15586 }, { "epoch": 0.9763384957484458, "grad_norm": 0.8966450500190144, "learning_rate": 1.4618011667215104e-08, "loss": 0.4144, "step": 15587 }, { "epoch": 0.9764011337477881, "grad_norm": 0.8158735874720768, "learning_rate": 1.4540602943108883e-08, "loss": 0.3551, "step": 15588 }, { "epoch": 0.9764637717471304, "grad_norm": 0.9052695838862146, "learning_rate": 1.4463399421437152e-08, "loss": 0.3871, "step": 15589 }, { "epoch": 0.9765264097464726, "grad_norm": 0.7998960761372458, "learning_rate": 1.4386401105376812e-08, "loss": 0.3487, "step": 15590 }, { "epoch": 0.976589047745815, "grad_norm": 0.6128113198090288, "learning_rate": 1.4309607998098108e-08, "loss": 0.4312, "step": 15591 }, { "epoch": 0.9766516857451573, "grad_norm": 0.8511040762239117, "learning_rate": 1.423302010276073e-08, "loss": 0.3651, "step": 15592 }, { "epoch": 0.9767143237444996, "grad_norm": 0.8543717548599701, "learning_rate": 1.4156637422518271e-08, "loss": 0.3971, "step": 15593 }, { "epoch": 0.9767769617438419, "grad_norm": 0.8294673584483591, "learning_rate": 1.4080459960513771e-08, "loss": 0.375, "step": 15594 }, { "epoch": 0.9768395997431842, "grad_norm": 0.9320882995883175, "learning_rate": 1.4004487719883054e-08, "loss": 0.3787, "step": 15595 }, { "epoch": 0.9769022377425265, "grad_norm": 0.8635636842400572, "learning_rate": 1.3928720703753063e-08, "loss": 0.3766, "step": 15596 }, { "epoch": 0.9769648757418689, "grad_norm": 0.9241114003484158, "learning_rate": 1.385315891524186e-08, "loss": 0.4099, "step": 15597 }, { "epoch": 0.9770275137412111, "grad_norm": 0.9311301288361196, "learning_rate": 1.3777802357460845e-08, "loss": 0.3605, "step": 15598 }, { "epoch": 0.9770901517405534, "grad_norm": 0.9418603320027465, "learning_rate": 1.3702651033510872e-08, "loss": 0.4029, "step": 15599 }, { "epoch": 0.9771527897398957, "grad_norm": 0.8618782680595218, "learning_rate": 1.362770494648502e-08, "loss": 0.3757, "step": 15600 }, { "epoch": 0.977215427739238, "grad_norm": 0.8677808109132824, "learning_rate": 1.3552964099468046e-08, "loss": 0.3869, "step": 15601 }, { "epoch": 0.9772780657385803, "grad_norm": 0.8560425397735254, "learning_rate": 1.3478428495536933e-08, "loss": 0.3934, "step": 15602 }, { "epoch": 0.9773407037379226, "grad_norm": 0.9361531458583241, "learning_rate": 1.3404098137759225e-08, "loss": 0.3611, "step": 15603 }, { "epoch": 0.977403341737265, "grad_norm": 0.9034510572696007, "learning_rate": 1.3329973029194698e-08, "loss": 0.4027, "step": 15604 }, { "epoch": 0.9774659797366072, "grad_norm": 0.878136481889851, "learning_rate": 1.325605317289369e-08, "loss": 0.3539, "step": 15605 }, { "epoch": 0.9775286177359496, "grad_norm": 1.0591924827334729, "learning_rate": 1.3182338571899322e-08, "loss": 0.4058, "step": 15606 }, { "epoch": 0.9775912557352918, "grad_norm": 0.9588534355235336, "learning_rate": 1.3108829229245279e-08, "loss": 0.377, "step": 15607 }, { "epoch": 0.9776538937346341, "grad_norm": 0.8834177479042336, "learning_rate": 1.3035525147957473e-08, "loss": 0.3786, "step": 15608 }, { "epoch": 0.9777165317339764, "grad_norm": 0.876007709925796, "learning_rate": 1.2962426331052935e-08, "loss": 0.3679, "step": 15609 }, { "epoch": 0.9777791697333187, "grad_norm": 0.8842457028728595, "learning_rate": 1.2889532781540925e-08, "loss": 0.382, "step": 15610 }, { "epoch": 0.977841807732661, "grad_norm": 0.8740232980589311, "learning_rate": 1.2816844502421267e-08, "loss": 0.3666, "step": 15611 }, { "epoch": 0.9779044457320033, "grad_norm": 0.8960843747571566, "learning_rate": 1.274436149668601e-08, "loss": 0.3867, "step": 15612 }, { "epoch": 0.9779670837313457, "grad_norm": 0.7850856661139668, "learning_rate": 1.2672083767318322e-08, "loss": 0.3775, "step": 15613 }, { "epoch": 0.9780297217306879, "grad_norm": 0.9085293435061709, "learning_rate": 1.2600011317293603e-08, "loss": 0.4047, "step": 15614 }, { "epoch": 0.9780923597300302, "grad_norm": 0.832881731092479, "learning_rate": 1.2528144149577814e-08, "loss": 0.3292, "step": 15615 }, { "epoch": 0.9781549977293725, "grad_norm": 0.8708891428015757, "learning_rate": 1.24564822671297e-08, "loss": 0.3239, "step": 15616 }, { "epoch": 0.9782176357287148, "grad_norm": 0.9168026675385441, "learning_rate": 1.2385025672898565e-08, "loss": 0.3804, "step": 15617 }, { "epoch": 0.9782802737280571, "grad_norm": 0.7776715817357603, "learning_rate": 1.2313774369824838e-08, "loss": 0.3719, "step": 15618 }, { "epoch": 0.9783429117273994, "grad_norm": 0.8571590431524423, "learning_rate": 1.2242728360842283e-08, "loss": 0.3975, "step": 15619 }, { "epoch": 0.9784055497267418, "grad_norm": 0.9133696877367593, "learning_rate": 1.2171887648875224e-08, "loss": 0.3897, "step": 15620 }, { "epoch": 0.978468187726084, "grad_norm": 0.8529467756398598, "learning_rate": 1.2101252236838556e-08, "loss": 0.3828, "step": 15621 }, { "epoch": 0.9785308257254264, "grad_norm": 0.8555327199668895, "learning_rate": 1.2030822127640506e-08, "loss": 0.4106, "step": 15622 }, { "epoch": 0.9785934637247686, "grad_norm": 0.9070382109357943, "learning_rate": 1.1960597324179313e-08, "loss": 0.3439, "step": 15623 }, { "epoch": 0.9786561017241109, "grad_norm": 0.8923716832710369, "learning_rate": 1.1890577829345996e-08, "loss": 0.4382, "step": 15624 }, { "epoch": 0.9787187397234532, "grad_norm": 0.8068648762141828, "learning_rate": 1.1820763646022137e-08, "loss": 0.3947, "step": 15625 }, { "epoch": 0.9787813777227955, "grad_norm": 0.9559946065678869, "learning_rate": 1.1751154777081552e-08, "loss": 0.4281, "step": 15626 }, { "epoch": 0.9788440157221379, "grad_norm": 0.924798318882835, "learning_rate": 1.168175122538917e-08, "loss": 0.3849, "step": 15627 }, { "epoch": 0.9789066537214801, "grad_norm": 0.8798314198646873, "learning_rate": 1.1612552993801596e-08, "loss": 0.3705, "step": 15628 }, { "epoch": 0.9789692917208225, "grad_norm": 0.8538727970230279, "learning_rate": 1.154356008516766e-08, "loss": 0.3856, "step": 15629 }, { "epoch": 0.9790319297201647, "grad_norm": 0.9111834446903977, "learning_rate": 1.1474772502326759e-08, "loss": 0.3947, "step": 15630 }, { "epoch": 0.9790945677195071, "grad_norm": 0.904330173735385, "learning_rate": 1.140619024810996e-08, "loss": 0.3672, "step": 15631 }, { "epoch": 0.9791572057188493, "grad_norm": 0.8227269002406956, "learning_rate": 1.1337813325340008e-08, "loss": 0.3493, "step": 15632 }, { "epoch": 0.9792198437181916, "grad_norm": 0.8978305511933745, "learning_rate": 1.1269641736832426e-08, "loss": 0.3543, "step": 15633 }, { "epoch": 0.979282481717534, "grad_norm": 0.8585198609071293, "learning_rate": 1.1201675485391639e-08, "loss": 0.3512, "step": 15634 }, { "epoch": 0.9793451197168762, "grad_norm": 0.9296273521574732, "learning_rate": 1.1133914573815963e-08, "loss": 0.4369, "step": 15635 }, { "epoch": 0.9794077577162186, "grad_norm": 0.7921724020046694, "learning_rate": 1.1066359004894279e-08, "loss": 0.3381, "step": 15636 }, { "epoch": 0.9794703957155608, "grad_norm": 0.8762220002671272, "learning_rate": 1.099900878140714e-08, "loss": 0.3464, "step": 15637 }, { "epoch": 0.9795330337149032, "grad_norm": 0.9189152918465481, "learning_rate": 1.0931863906127327e-08, "loss": 0.41, "step": 15638 }, { "epoch": 0.9795956717142454, "grad_norm": 0.8515549822919135, "learning_rate": 1.0864924381817631e-08, "loss": 0.3548, "step": 15639 }, { "epoch": 0.9796583097135877, "grad_norm": 0.8324989381447386, "learning_rate": 1.0798190211233628e-08, "loss": 0.405, "step": 15640 }, { "epoch": 0.9797209477129301, "grad_norm": 0.885558003673698, "learning_rate": 1.073166139712256e-08, "loss": 0.4274, "step": 15641 }, { "epoch": 0.9797835857122723, "grad_norm": 0.9912207783986884, "learning_rate": 1.0665337942221687e-08, "loss": 0.3826, "step": 15642 }, { "epoch": 0.9798462237116147, "grad_norm": 0.9080975702256681, "learning_rate": 1.0599219849262154e-08, "loss": 0.3975, "step": 15643 }, { "epoch": 0.9799088617109569, "grad_norm": 0.6036427308774976, "learning_rate": 1.0533307120964564e-08, "loss": 0.4326, "step": 15644 }, { "epoch": 0.9799714997102993, "grad_norm": 0.8223244130040985, "learning_rate": 1.0467599760042302e-08, "loss": 0.36, "step": 15645 }, { "epoch": 0.9800341377096415, "grad_norm": 0.8981063380968806, "learning_rate": 1.0402097769199315e-08, "loss": 0.3728, "step": 15646 }, { "epoch": 0.9800967757089839, "grad_norm": 0.8481180343840593, "learning_rate": 1.0336801151132336e-08, "loss": 0.3725, "step": 15647 }, { "epoch": 0.9801594137083262, "grad_norm": 0.9330074878375821, "learning_rate": 1.0271709908529214e-08, "loss": 0.3631, "step": 15648 }, { "epoch": 0.9802220517076684, "grad_norm": 0.7887449897315326, "learning_rate": 1.0206824044068365e-08, "loss": 0.3658, "step": 15649 }, { "epoch": 0.9802846897070108, "grad_norm": 0.8552279148086137, "learning_rate": 1.0142143560420425e-08, "loss": 0.3373, "step": 15650 }, { "epoch": 0.980347327706353, "grad_norm": 0.8362413674076916, "learning_rate": 1.0077668460248269e-08, "loss": 0.3968, "step": 15651 }, { "epoch": 0.9804099657056954, "grad_norm": 0.9266330645285011, "learning_rate": 1.0013398746205328e-08, "loss": 0.4053, "step": 15652 }, { "epoch": 0.9804726037050376, "grad_norm": 0.9042584235646881, "learning_rate": 9.949334420937262e-09, "loss": 0.3971, "step": 15653 }, { "epoch": 0.98053524170438, "grad_norm": 0.8505433148831163, "learning_rate": 9.885475487080853e-09, "loss": 0.3455, "step": 15654 }, { "epoch": 0.9805978797037223, "grad_norm": 0.8986593424531159, "learning_rate": 9.821821947264554e-09, "loss": 0.4265, "step": 15655 }, { "epoch": 0.9806605177030646, "grad_norm": 0.9165511496350117, "learning_rate": 9.758373804107935e-09, "loss": 0.3662, "step": 15656 }, { "epoch": 0.9807231557024069, "grad_norm": 0.8809559216625207, "learning_rate": 9.695131060222796e-09, "loss": 0.3695, "step": 15657 }, { "epoch": 0.9807857937017491, "grad_norm": 0.8726399305906581, "learning_rate": 9.63209371821261e-09, "loss": 0.3803, "step": 15658 }, { "epoch": 0.9808484317010915, "grad_norm": 0.8386661272588318, "learning_rate": 9.569261780671413e-09, "loss": 0.3558, "step": 15659 }, { "epoch": 0.9809110697004337, "grad_norm": 0.8529367193034719, "learning_rate": 9.506635250186025e-09, "loss": 0.3774, "step": 15660 }, { "epoch": 0.9809737076997761, "grad_norm": 0.889459451132399, "learning_rate": 9.444214129333828e-09, "loss": 0.406, "step": 15661 }, { "epoch": 0.9810363456991184, "grad_norm": 0.8915189010667778, "learning_rate": 9.38199842068388e-09, "loss": 0.3817, "step": 15662 }, { "epoch": 0.9810989836984607, "grad_norm": 0.8268781095474886, "learning_rate": 9.319988126797463e-09, "loss": 0.3894, "step": 15663 }, { "epoch": 0.981161621697803, "grad_norm": 0.9796150500479677, "learning_rate": 9.258183250226426e-09, "loss": 0.4038, "step": 15664 }, { "epoch": 0.9812242596971452, "grad_norm": 0.9300560942956713, "learning_rate": 9.196583793514291e-09, "loss": 0.4273, "step": 15665 }, { "epoch": 0.9812868976964876, "grad_norm": 0.8145899349026362, "learning_rate": 9.135189759197915e-09, "loss": 0.3731, "step": 15666 }, { "epoch": 0.9813495356958298, "grad_norm": 0.9515651635406651, "learning_rate": 9.074001149803058e-09, "loss": 0.4388, "step": 15667 }, { "epoch": 0.9814121736951722, "grad_norm": 0.869825538050117, "learning_rate": 9.013017967848813e-09, "loss": 0.3936, "step": 15668 }, { "epoch": 0.9814748116945144, "grad_norm": 0.823536682965763, "learning_rate": 8.952240215845398e-09, "loss": 0.3443, "step": 15669 }, { "epoch": 0.9815374496938568, "grad_norm": 0.8620898040940296, "learning_rate": 8.891667896294143e-09, "loss": 0.3745, "step": 15670 }, { "epoch": 0.9816000876931991, "grad_norm": 0.9497539657337036, "learning_rate": 8.831301011688054e-09, "loss": 0.393, "step": 15671 }, { "epoch": 0.9816627256925414, "grad_norm": 0.8878284720224674, "learning_rate": 8.77113956451181e-09, "loss": 0.4035, "step": 15672 }, { "epoch": 0.9817253636918837, "grad_norm": 0.8874282268134375, "learning_rate": 8.711183557241765e-09, "loss": 0.3561, "step": 15673 }, { "epoch": 0.9817880016912259, "grad_norm": 0.902420374375134, "learning_rate": 8.651432992346498e-09, "loss": 0.3544, "step": 15674 }, { "epoch": 0.9818506396905683, "grad_norm": 0.893008877038261, "learning_rate": 8.591887872284044e-09, "loss": 0.3928, "step": 15675 }, { "epoch": 0.9819132776899105, "grad_norm": 0.8463650288791528, "learning_rate": 8.53254819950633e-09, "loss": 0.3663, "step": 15676 }, { "epoch": 0.9819759156892529, "grad_norm": 0.826556738610992, "learning_rate": 8.473413976455292e-09, "loss": 0.3655, "step": 15677 }, { "epoch": 0.9820385536885952, "grad_norm": 0.8671281443621475, "learning_rate": 8.414485205564538e-09, "loss": 0.4183, "step": 15678 }, { "epoch": 0.9821011916879375, "grad_norm": 0.8257724485554745, "learning_rate": 8.35576188926046e-09, "loss": 0.3488, "step": 15679 }, { "epoch": 0.9821638296872798, "grad_norm": 0.8270268719301973, "learning_rate": 8.297244029960017e-09, "loss": 0.3553, "step": 15680 }, { "epoch": 0.9822264676866221, "grad_norm": 0.9168854943473149, "learning_rate": 8.23893163007128e-09, "loss": 0.3968, "step": 15681 }, { "epoch": 0.9822891056859644, "grad_norm": 0.8056851845141761, "learning_rate": 8.180824691994549e-09, "loss": 0.3659, "step": 15682 }, { "epoch": 0.9823517436853066, "grad_norm": 0.9478005563804751, "learning_rate": 8.122923218121803e-09, "loss": 0.3662, "step": 15683 }, { "epoch": 0.982414381684649, "grad_norm": 0.843159692498125, "learning_rate": 8.06522721083558e-09, "loss": 0.3973, "step": 15684 }, { "epoch": 0.9824770196839913, "grad_norm": 0.8800040781782155, "learning_rate": 8.007736672511756e-09, "loss": 0.364, "step": 15685 }, { "epoch": 0.9825396576833336, "grad_norm": 0.884877081834232, "learning_rate": 7.950451605515664e-09, "loss": 0.404, "step": 15686 }, { "epoch": 0.9826022956826759, "grad_norm": 0.9056476659731542, "learning_rate": 7.893372012205413e-09, "loss": 0.4128, "step": 15687 }, { "epoch": 0.9826649336820182, "grad_norm": 0.8596870122272942, "learning_rate": 7.836497894930795e-09, "loss": 0.37, "step": 15688 }, { "epoch": 0.9827275716813605, "grad_norm": 0.8950752307603416, "learning_rate": 7.779829256032711e-09, "loss": 0.4156, "step": 15689 }, { "epoch": 0.9827902096807029, "grad_norm": 0.6377708219106584, "learning_rate": 7.723366097843188e-09, "loss": 0.42, "step": 15690 }, { "epoch": 0.9828528476800451, "grad_norm": 0.8796676369755636, "learning_rate": 7.667108422686475e-09, "loss": 0.4173, "step": 15691 }, { "epoch": 0.9829154856793874, "grad_norm": 0.8671558182823077, "learning_rate": 7.6110562328785e-09, "loss": 0.3591, "step": 15692 }, { "epoch": 0.9829781236787297, "grad_norm": 0.8873482495928546, "learning_rate": 7.555209530725749e-09, "loss": 0.3691, "step": 15693 }, { "epoch": 0.983040761678072, "grad_norm": 0.8968811091560122, "learning_rate": 7.499568318527495e-09, "loss": 0.3566, "step": 15694 }, { "epoch": 0.9831033996774143, "grad_norm": 0.8748620524341113, "learning_rate": 7.444132598573572e-09, "loss": 0.3967, "step": 15695 }, { "epoch": 0.9831660376767566, "grad_norm": 0.8527753523600466, "learning_rate": 7.38890237314549e-09, "loss": 0.3728, "step": 15696 }, { "epoch": 0.983228675676099, "grad_norm": 0.8457947559916966, "learning_rate": 7.33387764451754e-09, "loss": 0.3778, "step": 15697 }, { "epoch": 0.9832913136754412, "grad_norm": 0.8321121480296865, "learning_rate": 7.279058414953466e-09, "loss": 0.3555, "step": 15698 }, { "epoch": 0.9833539516747835, "grad_norm": 0.9059040174930564, "learning_rate": 7.224444686709798e-09, "loss": 0.4044, "step": 15699 }, { "epoch": 0.9834165896741258, "grad_norm": 0.870829289661591, "learning_rate": 7.17003646203529e-09, "loss": 0.3611, "step": 15700 }, { "epoch": 0.9834792276734681, "grad_norm": 0.8081086689837998, "learning_rate": 7.115833743168155e-09, "loss": 0.3615, "step": 15701 }, { "epoch": 0.9835418656728104, "grad_norm": 0.6434305177331117, "learning_rate": 7.061836532340494e-09, "loss": 0.4512, "step": 15702 }, { "epoch": 0.9836045036721527, "grad_norm": 0.9300188495597695, "learning_rate": 7.0080448317738635e-09, "loss": 0.3692, "step": 15703 }, { "epoch": 0.983667141671495, "grad_norm": 0.9048127442966296, "learning_rate": 6.9544586436831595e-09, "loss": 0.3494, "step": 15704 }, { "epoch": 0.9837297796708373, "grad_norm": 0.8591749115982118, "learning_rate": 6.9010779702738396e-09, "loss": 0.3948, "step": 15705 }, { "epoch": 0.9837924176701797, "grad_norm": 0.953186398334508, "learning_rate": 6.847902813743035e-09, "loss": 0.4029, "step": 15706 }, { "epoch": 0.9838550556695219, "grad_norm": 0.8962330834975352, "learning_rate": 6.794933176278995e-09, "loss": 0.3954, "step": 15707 }, { "epoch": 0.9839176936688642, "grad_norm": 0.7974492078912802, "learning_rate": 6.742169060062198e-09, "loss": 0.3773, "step": 15708 }, { "epoch": 0.9839803316682065, "grad_norm": 0.9099982042914235, "learning_rate": 6.689610467264795e-09, "loss": 0.4016, "step": 15709 }, { "epoch": 0.9840429696675488, "grad_norm": 0.9074747419752186, "learning_rate": 6.637257400049501e-09, "loss": 0.4065, "step": 15710 }, { "epoch": 0.9841056076668911, "grad_norm": 0.8659101929468286, "learning_rate": 6.585109860571814e-09, "loss": 0.3493, "step": 15711 }, { "epoch": 0.9841682456662334, "grad_norm": 0.9433390146961821, "learning_rate": 6.5331678509777955e-09, "loss": 0.3766, "step": 15712 }, { "epoch": 0.9842308836655758, "grad_norm": 0.8808238406167979, "learning_rate": 6.4814313734051785e-09, "loss": 0.389, "step": 15713 }, { "epoch": 0.984293521664918, "grad_norm": 0.7860007934356348, "learning_rate": 6.4299004299839266e-09, "loss": 0.3869, "step": 15714 }, { "epoch": 0.9843561596642604, "grad_norm": 0.828435842332852, "learning_rate": 6.3785750228351205e-09, "loss": 0.3896, "step": 15715 }, { "epoch": 0.9844187976636026, "grad_norm": 0.816223034010542, "learning_rate": 6.327455154070406e-09, "loss": 0.4009, "step": 15716 }, { "epoch": 0.9844814356629449, "grad_norm": 0.8984023621330234, "learning_rate": 6.2765408257947636e-09, "loss": 0.4115, "step": 15717 }, { "epoch": 0.9845440736622872, "grad_norm": 0.8674962058669398, "learning_rate": 6.225832040103741e-09, "loss": 0.3713, "step": 15718 }, { "epoch": 0.9846067116616295, "grad_norm": 0.8885875435694954, "learning_rate": 6.175328799084001e-09, "loss": 0.3944, "step": 15719 }, { "epoch": 0.9846693496609719, "grad_norm": 0.8441575867712644, "learning_rate": 6.125031104814994e-09, "loss": 0.3788, "step": 15720 }, { "epoch": 0.9847319876603141, "grad_norm": 0.9152786840080539, "learning_rate": 6.074938959366172e-09, "loss": 0.3703, "step": 15721 }, { "epoch": 0.9847946256596565, "grad_norm": 0.9141340008286434, "learning_rate": 6.025052364800332e-09, "loss": 0.3843, "step": 15722 }, { "epoch": 0.9848572636589987, "grad_norm": 0.8777916509678151, "learning_rate": 5.975371323169721e-09, "loss": 0.3723, "step": 15723 }, { "epoch": 0.984919901658341, "grad_norm": 0.8048099505460024, "learning_rate": 5.9258958365199234e-09, "loss": 0.3242, "step": 15724 }, { "epoch": 0.9849825396576833, "grad_norm": 0.8741928431597806, "learning_rate": 5.876625906887645e-09, "loss": 0.3287, "step": 15725 }, { "epoch": 0.9850451776570256, "grad_norm": 0.8433878962811131, "learning_rate": 5.827561536300152e-09, "loss": 0.3702, "step": 15726 }, { "epoch": 0.985107815656368, "grad_norm": 0.9299252542170622, "learning_rate": 5.778702726776941e-09, "loss": 0.4068, "step": 15727 }, { "epoch": 0.9851704536557102, "grad_norm": 0.951320112795922, "learning_rate": 5.730049480329181e-09, "loss": 0.3954, "step": 15728 }, { "epoch": 0.9852330916550526, "grad_norm": 1.042472639574018, "learning_rate": 5.6816017989597125e-09, "loss": 0.3811, "step": 15729 }, { "epoch": 0.9852957296543948, "grad_norm": 0.943603198416904, "learning_rate": 5.6333596846630536e-09, "loss": 0.4155, "step": 15730 }, { "epoch": 0.9853583676537372, "grad_norm": 0.8424967744584754, "learning_rate": 5.585323139423726e-09, "loss": 0.3795, "step": 15731 }, { "epoch": 0.9854210056530794, "grad_norm": 0.7972387661869396, "learning_rate": 5.537492165219594e-09, "loss": 0.339, "step": 15732 }, { "epoch": 0.9854836436524217, "grad_norm": 0.830750046308477, "learning_rate": 5.4898667640190806e-09, "loss": 0.369, "step": 15733 }, { "epoch": 0.9855462816517641, "grad_norm": 0.8109750245674223, "learning_rate": 5.442446937783396e-09, "loss": 0.3506, "step": 15734 }, { "epoch": 0.9856089196511063, "grad_norm": 0.9470091268509853, "learning_rate": 5.395232688463203e-09, "loss": 0.3994, "step": 15735 }, { "epoch": 0.9856715576504487, "grad_norm": 0.8565121105313072, "learning_rate": 5.348224018001946e-09, "loss": 0.3804, "step": 15736 }, { "epoch": 0.9857341956497909, "grad_norm": 0.9266249768624247, "learning_rate": 5.301420928335299e-09, "loss": 0.3887, "step": 15737 }, { "epoch": 0.9857968336491333, "grad_norm": 0.9410806489314764, "learning_rate": 5.254823421388944e-09, "loss": 0.437, "step": 15738 }, { "epoch": 0.9858594716484755, "grad_norm": 0.8737259984570497, "learning_rate": 5.208431499081345e-09, "loss": 0.3614, "step": 15739 }, { "epoch": 0.9859221096478179, "grad_norm": 0.8734879561305627, "learning_rate": 5.162245163321533e-09, "loss": 0.3307, "step": 15740 }, { "epoch": 0.9859847476471602, "grad_norm": 0.81905799540973, "learning_rate": 5.116264416011318e-09, "loss": 0.3379, "step": 15741 }, { "epoch": 0.9860473856465024, "grad_norm": 0.9291448023777427, "learning_rate": 5.070489259041966e-09, "loss": 0.3975, "step": 15742 }, { "epoch": 0.9861100236458448, "grad_norm": 0.8570474496260845, "learning_rate": 5.024919694299191e-09, "loss": 0.3987, "step": 15743 }, { "epoch": 0.986172661645187, "grad_norm": 0.9201394796121775, "learning_rate": 4.979555723657048e-09, "loss": 0.4051, "step": 15744 }, { "epoch": 0.9862352996445294, "grad_norm": 0.8720829858999735, "learning_rate": 4.934397348984043e-09, "loss": 0.3806, "step": 15745 }, { "epoch": 0.9862979376438716, "grad_norm": 0.8793711108022035, "learning_rate": 4.889444572138135e-09, "loss": 0.3754, "step": 15746 }, { "epoch": 0.986360575643214, "grad_norm": 0.8690142296787807, "learning_rate": 4.844697394970066e-09, "loss": 0.3693, "step": 15747 }, { "epoch": 0.9864232136425563, "grad_norm": 0.8912050348289832, "learning_rate": 4.800155819321139e-09, "loss": 0.3936, "step": 15748 }, { "epoch": 0.9864858516418985, "grad_norm": 0.7748203496513177, "learning_rate": 4.755819847025445e-09, "loss": 0.3261, "step": 15749 }, { "epoch": 0.9865484896412409, "grad_norm": 0.9164196365113978, "learning_rate": 4.711689479906523e-09, "loss": 0.4272, "step": 15750 }, { "epoch": 0.9866111276405831, "grad_norm": 0.9050374387545496, "learning_rate": 4.667764719782364e-09, "loss": 0.3941, "step": 15751 }, { "epoch": 0.9866737656399255, "grad_norm": 0.923408800587226, "learning_rate": 4.6240455684598566e-09, "loss": 0.4132, "step": 15752 }, { "epoch": 0.9867364036392677, "grad_norm": 0.820784166818434, "learning_rate": 4.580532027739226e-09, "loss": 0.3755, "step": 15753 }, { "epoch": 0.9867990416386101, "grad_norm": 0.7871304758126157, "learning_rate": 4.537224099410709e-09, "loss": 0.3498, "step": 15754 }, { "epoch": 0.9868616796379523, "grad_norm": 0.8612473031284489, "learning_rate": 4.4941217852567666e-09, "loss": 0.3792, "step": 15755 }, { "epoch": 0.9869243176372947, "grad_norm": 0.9187713638200249, "learning_rate": 4.451225087052647e-09, "loss": 0.417, "step": 15756 }, { "epoch": 0.986986955636637, "grad_norm": 0.8526022489813601, "learning_rate": 4.408534006563047e-09, "loss": 0.3936, "step": 15757 }, { "epoch": 0.9870495936359792, "grad_norm": 0.8461719893880868, "learning_rate": 4.3660485455460085e-09, "loss": 0.3923, "step": 15758 }, { "epoch": 0.9871122316353216, "grad_norm": 0.5895226919409384, "learning_rate": 4.3237687057490205e-09, "loss": 0.4514, "step": 15759 }, { "epoch": 0.9871748696346638, "grad_norm": 0.9109686183319761, "learning_rate": 4.281694488912913e-09, "loss": 0.3626, "step": 15760 }, { "epoch": 0.9872375076340062, "grad_norm": 0.8402383206271725, "learning_rate": 4.239825896769634e-09, "loss": 0.3949, "step": 15761 }, { "epoch": 0.9873001456333484, "grad_norm": 0.8950614234154214, "learning_rate": 4.198162931042804e-09, "loss": 0.3822, "step": 15762 }, { "epoch": 0.9873627836326908, "grad_norm": 0.9261086686947163, "learning_rate": 4.156705593446608e-09, "loss": 0.4034, "step": 15763 }, { "epoch": 0.9874254216320331, "grad_norm": 0.9035513330898188, "learning_rate": 4.115453885687459e-09, "loss": 0.361, "step": 15764 }, { "epoch": 0.9874880596313754, "grad_norm": 0.9114182779973193, "learning_rate": 4.074407809463443e-09, "loss": 0.3809, "step": 15765 }, { "epoch": 0.9875506976307177, "grad_norm": 0.8649086594205042, "learning_rate": 4.03356736646432e-09, "loss": 0.3493, "step": 15766 }, { "epoch": 0.9876133356300599, "grad_norm": 0.8325483153292591, "learning_rate": 3.992932558370966e-09, "loss": 0.3413, "step": 15767 }, { "epoch": 0.9876759736294023, "grad_norm": 0.8411840660583597, "learning_rate": 3.9525033868553775e-09, "loss": 0.3832, "step": 15768 }, { "epoch": 0.9877386116287445, "grad_norm": 0.9143449156635342, "learning_rate": 3.912279853582334e-09, "loss": 0.4281, "step": 15769 }, { "epoch": 0.9878012496280869, "grad_norm": 0.9337943666846527, "learning_rate": 3.8722619602071795e-09, "loss": 0.3826, "step": 15770 }, { "epoch": 0.9878638876274292, "grad_norm": 0.8206452217726375, "learning_rate": 3.832449708377484e-09, "loss": 0.3611, "step": 15771 }, { "epoch": 0.9879265256267715, "grad_norm": 0.902565349038071, "learning_rate": 3.7928430997308256e-09, "loss": 0.3943, "step": 15772 }, { "epoch": 0.9879891636261138, "grad_norm": 0.8291243094527064, "learning_rate": 3.7534421358981224e-09, "loss": 0.3762, "step": 15773 }, { "epoch": 0.988051801625456, "grad_norm": 0.6298659533638411, "learning_rate": 3.71424681850141e-09, "loss": 0.4651, "step": 15774 }, { "epoch": 0.9881144396247984, "grad_norm": 0.8426397692691254, "learning_rate": 3.6752571491532883e-09, "loss": 0.403, "step": 15775 }, { "epoch": 0.9881770776241406, "grad_norm": 0.6047479154031327, "learning_rate": 3.6364731294591393e-09, "loss": 0.4435, "step": 15776 }, { "epoch": 0.988239715623483, "grad_norm": 0.8520292184818723, "learning_rate": 3.5978947610154634e-09, "loss": 0.3426, "step": 15777 }, { "epoch": 0.9883023536228253, "grad_norm": 0.9057964253422695, "learning_rate": 3.559522045409325e-09, "loss": 0.4003, "step": 15778 }, { "epoch": 0.9883649916221676, "grad_norm": 0.8746718887717504, "learning_rate": 3.5213549842211257e-09, "loss": 0.3623, "step": 15779 }, { "epoch": 0.9884276296215099, "grad_norm": 0.9954812706533137, "learning_rate": 3.4833935790207217e-09, "loss": 0.4053, "step": 15780 }, { "epoch": 0.9884902676208522, "grad_norm": 0.8677829435917213, "learning_rate": 3.445637831371862e-09, "loss": 0.3903, "step": 15781 }, { "epoch": 0.9885529056201945, "grad_norm": 0.8570345514246759, "learning_rate": 3.408087742827748e-09, "loss": 0.3599, "step": 15782 }, { "epoch": 0.9886155436195367, "grad_norm": 0.833219055011574, "learning_rate": 3.370743314933811e-09, "loss": 0.3909, "step": 15783 }, { "epoch": 0.9886781816188791, "grad_norm": 0.8753715999556108, "learning_rate": 3.333604549227709e-09, "loss": 0.4046, "step": 15784 }, { "epoch": 0.9887408196182214, "grad_norm": 0.833313072697003, "learning_rate": 3.2966714472376647e-09, "loss": 0.3956, "step": 15785 }, { "epoch": 0.9888034576175637, "grad_norm": 0.8987306396861439, "learning_rate": 3.2599440104846837e-09, "loss": 0.4178, "step": 15786 }, { "epoch": 0.988866095616906, "grad_norm": 0.6631079013220555, "learning_rate": 3.223422240479224e-09, "loss": 0.4306, "step": 15787 }, { "epoch": 0.9889287336162483, "grad_norm": 0.9364052991697798, "learning_rate": 3.1871061387256376e-09, "loss": 0.4217, "step": 15788 }, { "epoch": 0.9889913716155906, "grad_norm": 0.8354459506959928, "learning_rate": 3.15099570671773e-09, "loss": 0.3451, "step": 15789 }, { "epoch": 0.989054009614933, "grad_norm": 0.8830894652108198, "learning_rate": 3.115090945942645e-09, "loss": 0.3669, "step": 15790 }, { "epoch": 0.9891166476142752, "grad_norm": 0.8581580897101101, "learning_rate": 3.079391857877534e-09, "loss": 0.3897, "step": 15791 }, { "epoch": 0.9891792856136175, "grad_norm": 0.8598571408349418, "learning_rate": 3.0438984439923325e-09, "loss": 0.3525, "step": 15792 }, { "epoch": 0.9892419236129598, "grad_norm": 0.809475342345139, "learning_rate": 3.008610705748094e-09, "loss": 0.3745, "step": 15793 }, { "epoch": 0.9893045616123021, "grad_norm": 0.8491539186852464, "learning_rate": 2.973528644596435e-09, "loss": 0.3759, "step": 15794 }, { "epoch": 0.9893671996116444, "grad_norm": 0.8822505104196008, "learning_rate": 2.9386522619817557e-09, "loss": 0.3879, "step": 15795 }, { "epoch": 0.9894298376109867, "grad_norm": 0.8714860457429187, "learning_rate": 2.90398155934013e-09, "loss": 0.3496, "step": 15796 }, { "epoch": 0.989492475610329, "grad_norm": 0.9018800508241017, "learning_rate": 2.8695165380981936e-09, "loss": 0.4002, "step": 15797 }, { "epoch": 0.9895551136096713, "grad_norm": 0.8835032326813157, "learning_rate": 2.835257199673702e-09, "loss": 0.3576, "step": 15798 }, { "epoch": 0.9896177516090137, "grad_norm": 0.6641592371270497, "learning_rate": 2.8012035454783037e-09, "loss": 0.4515, "step": 15799 }, { "epoch": 0.9896803896083559, "grad_norm": 0.938917026809371, "learning_rate": 2.767355576912545e-09, "loss": 0.4148, "step": 15800 }, { "epoch": 0.9897430276076982, "grad_norm": 0.8431167217867624, "learning_rate": 2.7337132953697555e-09, "loss": 0.3678, "step": 15801 }, { "epoch": 0.9898056656070405, "grad_norm": 0.8346776399682705, "learning_rate": 2.7002767022354935e-09, "loss": 0.3783, "step": 15802 }, { "epoch": 0.9898683036063828, "grad_norm": 0.9062714902129058, "learning_rate": 2.667045798884771e-09, "loss": 0.3845, "step": 15803 }, { "epoch": 0.9899309416057251, "grad_norm": 0.9662813106228351, "learning_rate": 2.6340205866864922e-09, "loss": 0.4166, "step": 15804 }, { "epoch": 0.9899935796050674, "grad_norm": 0.8376426042189882, "learning_rate": 2.6012010669990153e-09, "loss": 0.3991, "step": 15805 }, { "epoch": 0.9900562176044098, "grad_norm": 0.8462475320189239, "learning_rate": 2.5685872411740363e-09, "loss": 0.3572, "step": 15806 }, { "epoch": 0.990118855603752, "grad_norm": 0.8340570003029285, "learning_rate": 2.5361791105532608e-09, "loss": 0.3912, "step": 15807 }, { "epoch": 0.9901814936030943, "grad_norm": 0.8817979307877356, "learning_rate": 2.503976676471176e-09, "loss": 0.38, "step": 15808 }, { "epoch": 0.9902441316024366, "grad_norm": 0.9592019195147049, "learning_rate": 2.4719799402528333e-09, "loss": 0.3861, "step": 15809 }, { "epoch": 0.9903067696017789, "grad_norm": 0.8166214539113525, "learning_rate": 2.4401889032155125e-09, "loss": 0.3632, "step": 15810 }, { "epoch": 0.9903694076011212, "grad_norm": 0.9203647560976765, "learning_rate": 2.408603566667056e-09, "loss": 0.3733, "step": 15811 }, { "epoch": 0.9904320456004635, "grad_norm": 0.9020678572040455, "learning_rate": 2.3772239319086454e-09, "loss": 0.3941, "step": 15812 }, { "epoch": 0.9904946835998059, "grad_norm": 0.9353743037919008, "learning_rate": 2.34605000023147e-09, "loss": 0.3941, "step": 15813 }, { "epoch": 0.9905573215991481, "grad_norm": 0.7761563911233004, "learning_rate": 2.3150817729178375e-09, "loss": 0.3475, "step": 15814 }, { "epoch": 0.9906199595984905, "grad_norm": 0.8643438236504578, "learning_rate": 2.2843192512433943e-09, "loss": 0.3979, "step": 15815 }, { "epoch": 0.9906825975978327, "grad_norm": 0.8873630074576946, "learning_rate": 2.2537624364743494e-09, "loss": 0.4075, "step": 15816 }, { "epoch": 0.990745235597175, "grad_norm": 0.9062747866156118, "learning_rate": 2.223411329867475e-09, "loss": 0.4299, "step": 15817 }, { "epoch": 0.9908078735965173, "grad_norm": 0.8642805400551309, "learning_rate": 2.1932659326728833e-09, "loss": 0.3674, "step": 15818 }, { "epoch": 0.9908705115958596, "grad_norm": 0.8767588610884383, "learning_rate": 2.1633262461312476e-09, "loss": 0.4295, "step": 15819 }, { "epoch": 0.990933149595202, "grad_norm": 0.827033500009648, "learning_rate": 2.133592271474916e-09, "loss": 0.3896, "step": 15820 }, { "epoch": 0.9909957875945442, "grad_norm": 0.9128378793926395, "learning_rate": 2.1040640099267984e-09, "loss": 0.396, "step": 15821 }, { "epoch": 0.9910584255938866, "grad_norm": 0.8578925127570678, "learning_rate": 2.0747414627037e-09, "loss": 0.3576, "step": 15822 }, { "epoch": 0.9911210635932288, "grad_norm": 0.8530033755673532, "learning_rate": 2.0456246310118776e-09, "loss": 0.3748, "step": 15823 }, { "epoch": 0.9911837015925712, "grad_norm": 0.8607885979199853, "learning_rate": 2.0167135160492623e-09, "loss": 0.3939, "step": 15824 }, { "epoch": 0.9912463395919134, "grad_norm": 0.9350233211839329, "learning_rate": 1.988008119007123e-09, "loss": 0.3947, "step": 15825 }, { "epoch": 0.9913089775912557, "grad_norm": 0.5429249956489645, "learning_rate": 1.9595084410656275e-09, "loss": 0.4506, "step": 15826 }, { "epoch": 0.991371615590598, "grad_norm": 0.8896771092025705, "learning_rate": 1.9312144833988356e-09, "loss": 0.3802, "step": 15827 }, { "epoch": 0.9914342535899403, "grad_norm": 0.8610015648315894, "learning_rate": 1.9031262471708168e-09, "loss": 0.4042, "step": 15828 }, { "epoch": 0.9914968915892827, "grad_norm": 0.8644374704742545, "learning_rate": 1.875243733537868e-09, "loss": 0.386, "step": 15829 }, { "epoch": 0.9915595295886249, "grad_norm": 0.8833409181714443, "learning_rate": 1.8475669436474052e-09, "loss": 0.3557, "step": 15830 }, { "epoch": 0.9916221675879673, "grad_norm": 0.8108464013480067, "learning_rate": 1.8200958786390722e-09, "loss": 0.3658, "step": 15831 }, { "epoch": 0.9916848055873095, "grad_norm": 0.9154807836716566, "learning_rate": 1.7928305396430757e-09, "loss": 0.3817, "step": 15832 }, { "epoch": 0.9917474435866518, "grad_norm": 0.9550420713415126, "learning_rate": 1.7657709277818513e-09, "loss": 0.3855, "step": 15833 }, { "epoch": 0.9918100815859942, "grad_norm": 0.8599477570927531, "learning_rate": 1.738917044169508e-09, "loss": 0.3416, "step": 15834 }, { "epoch": 0.9918727195853364, "grad_norm": 0.7677895967690527, "learning_rate": 1.7122688899107176e-09, "loss": 0.3609, "step": 15835 }, { "epoch": 0.9919353575846788, "grad_norm": 0.900446743906251, "learning_rate": 1.6858264661029356e-09, "loss": 0.4283, "step": 15836 }, { "epoch": 0.991997995584021, "grad_norm": 0.5624683025814874, "learning_rate": 1.6595897738341803e-09, "loss": 0.4334, "step": 15837 }, { "epoch": 0.9920606335833634, "grad_norm": 0.7901362385848802, "learning_rate": 1.6335588141846992e-09, "loss": 0.3494, "step": 15838 }, { "epoch": 0.9921232715827056, "grad_norm": 0.8609981334671423, "learning_rate": 1.607733588225302e-09, "loss": 0.348, "step": 15839 }, { "epoch": 0.992185909582048, "grad_norm": 0.8393292119427442, "learning_rate": 1.5821140970190275e-09, "loss": 0.3511, "step": 15840 }, { "epoch": 0.9922485475813903, "grad_norm": 0.7887472406999779, "learning_rate": 1.5567003416211424e-09, "loss": 0.3369, "step": 15841 }, { "epoch": 0.9923111855807325, "grad_norm": 0.9328521182277406, "learning_rate": 1.531492323077477e-09, "loss": 0.4052, "step": 15842 }, { "epoch": 0.9923738235800749, "grad_norm": 0.9094711926459806, "learning_rate": 1.5064900424249794e-09, "loss": 0.3976, "step": 15843 }, { "epoch": 0.9924364615794171, "grad_norm": 0.8401269177400257, "learning_rate": 1.4816935006928268e-09, "loss": 0.4002, "step": 15844 }, { "epoch": 0.9924990995787595, "grad_norm": 0.895430832598951, "learning_rate": 1.4571026989024239e-09, "loss": 0.3651, "step": 15845 }, { "epoch": 0.9925617375781017, "grad_norm": 0.7973102548298426, "learning_rate": 1.4327176380651842e-09, "loss": 0.3202, "step": 15846 }, { "epoch": 0.9926243755774441, "grad_norm": 0.8433919468686695, "learning_rate": 1.4085383191853042e-09, "loss": 0.4005, "step": 15847 }, { "epoch": 0.9926870135767863, "grad_norm": 0.8626739151282595, "learning_rate": 1.3845647432575438e-09, "loss": 0.3996, "step": 15848 }, { "epoch": 0.9927496515761287, "grad_norm": 0.8870753854843522, "learning_rate": 1.3607969112694464e-09, "loss": 0.4203, "step": 15849 }, { "epoch": 0.992812289575471, "grad_norm": 0.8517407461024064, "learning_rate": 1.3372348241980083e-09, "loss": 0.3828, "step": 15850 }, { "epoch": 0.9928749275748132, "grad_norm": 0.9046268895178802, "learning_rate": 1.3138784830141195e-09, "loss": 0.394, "step": 15851 }, { "epoch": 0.9929375655741556, "grad_norm": 0.9590993493815972, "learning_rate": 1.2907278886786779e-09, "loss": 0.3904, "step": 15852 }, { "epoch": 0.9930002035734978, "grad_norm": 0.877204983130624, "learning_rate": 1.26778304214481e-09, "loss": 0.3548, "step": 15853 }, { "epoch": 0.9930628415728402, "grad_norm": 0.9061024452937938, "learning_rate": 1.2450439443567608e-09, "loss": 0.3995, "step": 15854 }, { "epoch": 0.9931254795721824, "grad_norm": 0.6408349839480836, "learning_rate": 1.222510596250448e-09, "loss": 0.44, "step": 15855 }, { "epoch": 0.9931881175715248, "grad_norm": 0.7931307495084391, "learning_rate": 1.200182998753463e-09, "loss": 0.3555, "step": 15856 }, { "epoch": 0.9932507555708671, "grad_norm": 0.8863794856453198, "learning_rate": 1.1780611527845155e-09, "loss": 0.4252, "step": 15857 }, { "epoch": 0.9933133935702093, "grad_norm": 0.8949367436310165, "learning_rate": 1.156145059254543e-09, "loss": 0.3962, "step": 15858 }, { "epoch": 0.9933760315695517, "grad_norm": 0.9082998449227617, "learning_rate": 1.1344347190656024e-09, "loss": 0.4331, "step": 15859 }, { "epoch": 0.9934386695688939, "grad_norm": 0.8504021984310107, "learning_rate": 1.1129301331114229e-09, "loss": 0.367, "step": 15860 }, { "epoch": 0.9935013075682363, "grad_norm": 0.936198474146538, "learning_rate": 1.091631302276297e-09, "loss": 0.3863, "step": 15861 }, { "epoch": 0.9935639455675785, "grad_norm": 0.861719912288722, "learning_rate": 1.0705382274378561e-09, "loss": 0.3564, "step": 15862 }, { "epoch": 0.9936265835669209, "grad_norm": 0.852072962622377, "learning_rate": 1.0496509094637397e-09, "loss": 0.3559, "step": 15863 }, { "epoch": 0.9936892215662632, "grad_norm": 0.876843869292355, "learning_rate": 1.0289693492138153e-09, "loss": 0.4105, "step": 15864 }, { "epoch": 0.9937518595656055, "grad_norm": 0.8354435259540086, "learning_rate": 1.008493547539624e-09, "loss": 0.3768, "step": 15865 }, { "epoch": 0.9938144975649478, "grad_norm": 0.8649256712297377, "learning_rate": 9.8822350528327e-10, "loss": 0.3834, "step": 15866 }, { "epoch": 0.99387713556429, "grad_norm": 0.889539304557597, "learning_rate": 9.681592232801962e-10, "loss": 0.3648, "step": 15867 }, { "epoch": 0.9939397735636324, "grad_norm": 0.800924294373022, "learning_rate": 9.48300702354743e-10, "loss": 0.372, "step": 15868 }, { "epoch": 0.9940024115629746, "grad_norm": 0.9188041158144797, "learning_rate": 9.286479433257e-10, "loss": 0.4096, "step": 15869 }, { "epoch": 0.994065049562317, "grad_norm": 0.9650696879235219, "learning_rate": 9.092009470013097e-10, "loss": 0.3727, "step": 15870 }, { "epoch": 0.9941276875616593, "grad_norm": 0.8671018426007665, "learning_rate": 8.899597141814875e-10, "loss": 0.3619, "step": 15871 }, { "epoch": 0.9941903255610016, "grad_norm": 0.8890605365170589, "learning_rate": 8.70924245659488e-10, "loss": 0.4185, "step": 15872 }, { "epoch": 0.9942529635603439, "grad_norm": 0.7773171759868001, "learning_rate": 8.520945422180183e-10, "loss": 0.3544, "step": 15873 }, { "epoch": 0.9943156015596862, "grad_norm": 0.9134803748102297, "learning_rate": 8.33470604632014e-10, "loss": 0.3668, "step": 15874 }, { "epoch": 0.9943782395590285, "grad_norm": 0.9520278810026416, "learning_rate": 8.150524336680843e-10, "loss": 0.4017, "step": 15875 }, { "epoch": 0.9944408775583707, "grad_norm": 0.8493976032568664, "learning_rate": 7.968400300845114e-10, "loss": 0.3803, "step": 15876 }, { "epoch": 0.9945035155577131, "grad_norm": 0.8416761219054876, "learning_rate": 7.78833394631251e-10, "loss": 0.3667, "step": 15877 }, { "epoch": 0.9945661535570554, "grad_norm": 0.8403776541426486, "learning_rate": 7.610325280488218e-10, "loss": 0.371, "step": 15878 }, { "epoch": 0.9946287915563977, "grad_norm": 0.8670656686686318, "learning_rate": 7.434374310705261e-10, "loss": 0.3887, "step": 15879 }, { "epoch": 0.99469142955574, "grad_norm": 0.7656283494808234, "learning_rate": 7.260481044202294e-10, "loss": 0.3238, "step": 15880 }, { "epoch": 0.9947540675550823, "grad_norm": 0.915908528853314, "learning_rate": 7.088645488134705e-10, "loss": 0.4109, "step": 15881 }, { "epoch": 0.9948167055544246, "grad_norm": 0.862428134132297, "learning_rate": 6.918867649574612e-10, "loss": 0.4025, "step": 15882 }, { "epoch": 0.9948793435537668, "grad_norm": 0.9060615681161723, "learning_rate": 6.751147535516423e-10, "loss": 0.3786, "step": 15883 }, { "epoch": 0.9949419815531092, "grad_norm": 0.6845570748085618, "learning_rate": 6.585485152860171e-10, "loss": 0.4628, "step": 15884 }, { "epoch": 0.9950046195524515, "grad_norm": 0.5967242406131008, "learning_rate": 6.42188050842818e-10, "loss": 0.4274, "step": 15885 }, { "epoch": 0.9950672575517938, "grad_norm": 0.7755071539676966, "learning_rate": 6.260333608948399e-10, "loss": 0.3483, "step": 15886 }, { "epoch": 0.9951298955511361, "grad_norm": 0.9481888784863586, "learning_rate": 6.100844461071065e-10, "loss": 0.4247, "step": 15887 }, { "epoch": 0.9951925335504784, "grad_norm": 0.8937275030460762, "learning_rate": 5.943413071363147e-10, "loss": 0.4007, "step": 15888 }, { "epoch": 0.9952551715498207, "grad_norm": 0.6354198814017397, "learning_rate": 5.788039446302795e-10, "loss": 0.4484, "step": 15889 }, { "epoch": 0.995317809549163, "grad_norm": 0.8873598868902375, "learning_rate": 5.634723592284897e-10, "loss": 0.3904, "step": 15890 }, { "epoch": 0.9953804475485053, "grad_norm": 0.8713032447427973, "learning_rate": 5.483465515621067e-10, "loss": 0.3711, "step": 15891 }, { "epoch": 0.9954430855478476, "grad_norm": 0.6443182019052921, "learning_rate": 5.334265222539658e-10, "loss": 0.416, "step": 15892 }, { "epoch": 0.9955057235471899, "grad_norm": 0.8386117865867101, "learning_rate": 5.187122719180204e-10, "loss": 0.4047, "step": 15893 }, { "epoch": 0.9955683615465322, "grad_norm": 0.9170268242975125, "learning_rate": 5.042038011598971e-10, "loss": 0.4149, "step": 15894 }, { "epoch": 0.9956309995458745, "grad_norm": 0.872388306268055, "learning_rate": 4.899011105763407e-10, "loss": 0.4066, "step": 15895 }, { "epoch": 0.9956936375452168, "grad_norm": 0.9087889929451237, "learning_rate": 4.758042007563246e-10, "loss": 0.3757, "step": 15896 }, { "epoch": 0.9957562755445591, "grad_norm": 0.8473053199216071, "learning_rate": 4.619130722804954e-10, "loss": 0.371, "step": 15897 }, { "epoch": 0.9958189135439014, "grad_norm": 0.8803476769833103, "learning_rate": 4.482277257200629e-10, "loss": 0.4368, "step": 15898 }, { "epoch": 0.9958815515432438, "grad_norm": 0.9885195642595558, "learning_rate": 4.3474816163902033e-10, "loss": 0.4124, "step": 15899 }, { "epoch": 0.995944189542586, "grad_norm": 0.873460731724712, "learning_rate": 4.214743805908139e-10, "loss": 0.3726, "step": 15900 }, { "epoch": 0.9960068275419283, "grad_norm": 0.902600009462044, "learning_rate": 4.084063831233387e-10, "loss": 0.3823, "step": 15901 }, { "epoch": 0.9960694655412706, "grad_norm": 0.8957632060248677, "learning_rate": 3.9554416977394263e-10, "loss": 0.3701, "step": 15902 }, { "epoch": 0.9961321035406129, "grad_norm": 0.8336326507065966, "learning_rate": 3.8288774107164694e-10, "loss": 0.3584, "step": 15903 }, { "epoch": 0.9961947415399552, "grad_norm": 0.825635988139314, "learning_rate": 3.704370975377014e-10, "loss": 0.3956, "step": 15904 }, { "epoch": 0.9962573795392975, "grad_norm": 0.892402440139362, "learning_rate": 3.581922396844739e-10, "loss": 0.4156, "step": 15905 }, { "epoch": 0.9963200175386399, "grad_norm": 0.9341090873158647, "learning_rate": 3.4615316801600575e-10, "loss": 0.397, "step": 15906 }, { "epoch": 0.9963826555379821, "grad_norm": 0.8477132451023782, "learning_rate": 3.3431988302801144e-10, "loss": 0.4034, "step": 15907 }, { "epoch": 0.9964452935373245, "grad_norm": 0.9389648247817085, "learning_rate": 3.226923852073238e-10, "loss": 0.3756, "step": 15908 }, { "epoch": 0.9965079315366667, "grad_norm": 0.8999460786625836, "learning_rate": 3.11270675032449e-10, "loss": 0.3585, "step": 15909 }, { "epoch": 0.996570569536009, "grad_norm": 0.9780194891730372, "learning_rate": 3.0005475297356647e-10, "loss": 0.4496, "step": 15910 }, { "epoch": 0.9966332075353513, "grad_norm": 0.9141230604709402, "learning_rate": 2.8904461949308405e-10, "loss": 0.3397, "step": 15911 }, { "epoch": 0.9966958455346936, "grad_norm": 0.9132930374692361, "learning_rate": 2.7824027504286253e-10, "loss": 0.4403, "step": 15912 }, { "epoch": 0.996758483534036, "grad_norm": 0.8494022497390158, "learning_rate": 2.6764172006865654e-10, "loss": 0.3125, "step": 15913 }, { "epoch": 0.9968211215333782, "grad_norm": 0.8507949387116837, "learning_rate": 2.572489550062285e-10, "loss": 0.3774, "step": 15914 }, { "epoch": 0.9968837595327206, "grad_norm": 0.8893871451376961, "learning_rate": 2.470619802830143e-10, "loss": 0.3821, "step": 15915 }, { "epoch": 0.9969463975320628, "grad_norm": 0.8487261968105335, "learning_rate": 2.3708079631923344e-10, "loss": 0.3833, "step": 15916 }, { "epoch": 0.9970090355314051, "grad_norm": 0.9499569412759221, "learning_rate": 2.273054035251132e-10, "loss": 0.3834, "step": 15917 }, { "epoch": 0.9970716735307474, "grad_norm": 0.8989291962079233, "learning_rate": 2.1773580230255442e-10, "loss": 0.3387, "step": 15918 }, { "epoch": 0.9971343115300897, "grad_norm": 0.9183905928142329, "learning_rate": 2.0837199304679646e-10, "loss": 0.3986, "step": 15919 }, { "epoch": 0.997196949529432, "grad_norm": 0.8033370585039532, "learning_rate": 1.9921397614197645e-10, "loss": 0.3624, "step": 15920 }, { "epoch": 0.9972595875287743, "grad_norm": 0.8936073567062089, "learning_rate": 1.902617519655703e-10, "loss": 0.3826, "step": 15921 }, { "epoch": 0.9973222255281167, "grad_norm": 0.8316492841846548, "learning_rate": 1.8151532088617195e-10, "loss": 0.3737, "step": 15922 }, { "epoch": 0.9973848635274589, "grad_norm": 0.8131774409945176, "learning_rate": 1.7297468326349375e-10, "loss": 0.3643, "step": 15923 }, { "epoch": 0.9974475015268013, "grad_norm": 0.8141310808931004, "learning_rate": 1.6463983944947638e-10, "loss": 0.377, "step": 15924 }, { "epoch": 0.9975101395261435, "grad_norm": 0.9016881227244774, "learning_rate": 1.565107897866236e-10, "loss": 0.3875, "step": 15925 }, { "epoch": 0.9975727775254858, "grad_norm": 0.8568613763292107, "learning_rate": 1.485875346096677e-10, "loss": 0.3697, "step": 15926 }, { "epoch": 0.9976354155248282, "grad_norm": 0.9134120729000144, "learning_rate": 1.408700742450142e-10, "loss": 0.3816, "step": 15927 }, { "epoch": 0.9976980535241704, "grad_norm": 0.8596580561203769, "learning_rate": 1.3335840901018693e-10, "loss": 0.3879, "step": 15928 }, { "epoch": 0.9977606915235128, "grad_norm": 0.8768099201517378, "learning_rate": 1.2605253921438298e-10, "loss": 0.3638, "step": 15929 }, { "epoch": 0.997823329522855, "grad_norm": 0.8903927025385795, "learning_rate": 1.189524651579177e-10, "loss": 0.3874, "step": 15930 }, { "epoch": 0.9978859675221974, "grad_norm": 0.8336435099951921, "learning_rate": 1.1205818713388994e-10, "loss": 0.4045, "step": 15931 }, { "epoch": 0.9979486055215396, "grad_norm": 0.8743756739415384, "learning_rate": 1.0536970542540659e-10, "loss": 0.3804, "step": 15932 }, { "epoch": 0.998011243520882, "grad_norm": 0.908754327541972, "learning_rate": 9.888702030780295e-11, "loss": 0.4114, "step": 15933 }, { "epoch": 0.9980738815202242, "grad_norm": 0.8771856333154879, "learning_rate": 9.261013204808767e-11, "loss": 0.3855, "step": 15934 }, { "epoch": 0.9981365195195665, "grad_norm": 0.8523308799604129, "learning_rate": 8.653904090438758e-11, "loss": 0.3801, "step": 15935 }, { "epoch": 0.9981991575189089, "grad_norm": 0.9188546690196592, "learning_rate": 8.0673747127058e-11, "loss": 0.3494, "step": 15936 }, { "epoch": 0.9982617955182511, "grad_norm": 0.9227705571814729, "learning_rate": 7.50142509570173e-11, "loss": 0.4268, "step": 15937 }, { "epoch": 0.9983244335175935, "grad_norm": 0.8646767520239018, "learning_rate": 6.956055262796746e-11, "loss": 0.4147, "step": 15938 }, { "epoch": 0.9983870715169357, "grad_norm": 0.842861575965517, "learning_rate": 6.431265236306327e-11, "loss": 0.3808, "step": 15939 }, { "epoch": 0.9984497095162781, "grad_norm": 0.9218101315939139, "learning_rate": 5.927055037935337e-11, "loss": 0.4192, "step": 15940 }, { "epoch": 0.9985123475156203, "grad_norm": 0.8663234260581287, "learning_rate": 5.4434246884449423e-11, "loss": 0.3535, "step": 15941 }, { "epoch": 0.9985749855149626, "grad_norm": 0.873843351933299, "learning_rate": 4.980374207652627e-11, "loss": 0.4063, "step": 15942 }, { "epoch": 0.998637623514305, "grad_norm": 0.8841067709514158, "learning_rate": 4.5379036147097375e-11, "loss": 0.3705, "step": 15943 }, { "epoch": 0.9987002615136472, "grad_norm": 0.8260284517679759, "learning_rate": 4.11601292776842e-11, "loss": 0.3692, "step": 15944 }, { "epoch": 0.9987628995129896, "grad_norm": 0.894007415895992, "learning_rate": 3.7147021642591764e-11, "loss": 0.3583, "step": 15945 }, { "epoch": 0.9988255375123318, "grad_norm": 0.8777327052772096, "learning_rate": 3.3339713406133067e-11, "loss": 0.3877, "step": 15946 }, { "epoch": 0.9988881755116742, "grad_norm": 0.8769129627307914, "learning_rate": 2.9738204725404674e-11, "loss": 0.3742, "step": 15947 }, { "epoch": 0.9989508135110164, "grad_norm": 0.8455403979131051, "learning_rate": 2.6342495749176465e-11, "loss": 0.3978, "step": 15948 }, { "epoch": 0.9990134515103588, "grad_norm": 0.5901786539196062, "learning_rate": 2.3152586616226323e-11, "loss": 0.4378, "step": 15949 }, { "epoch": 0.9990760895097011, "grad_norm": 0.8335914656626412, "learning_rate": 2.0168477458670787e-11, "loss": 0.3737, "step": 15950 }, { "epoch": 0.9991387275090433, "grad_norm": 0.8479999144983759, "learning_rate": 1.7390168398634387e-11, "loss": 0.3962, "step": 15951 }, { "epoch": 0.9992013655083857, "grad_norm": 0.8480530706392897, "learning_rate": 1.481765955102521e-11, "loss": 0.3801, "step": 15952 }, { "epoch": 0.9992640035077279, "grad_norm": 0.7969139932908254, "learning_rate": 1.2450951021869551e-11, "loss": 0.3684, "step": 15953 }, { "epoch": 0.9993266415070703, "grad_norm": 0.8445161769800401, "learning_rate": 1.0290042907756815e-11, "loss": 0.357, "step": 15954 }, { "epoch": 0.9993892795064125, "grad_norm": 0.9293412490739961, "learning_rate": 8.334935298615066e-12, "loss": 0.4363, "step": 15955 }, { "epoch": 0.9994519175057549, "grad_norm": 0.6858112326885529, "learning_rate": 6.5856282743803625e-12, "loss": 0.4503, "step": 15956 }, { "epoch": 0.9995145555050972, "grad_norm": 0.8584014483871996, "learning_rate": 5.04212190666209e-12, "loss": 0.3943, "step": 15957 }, { "epoch": 0.9995771935044395, "grad_norm": 0.8576438971321227, "learning_rate": 3.704416259853183e-12, "loss": 0.4347, "step": 15958 }, { "epoch": 0.9996398315037818, "grad_norm": 0.8606148090560346, "learning_rate": 2.572511388909682e-12, "loss": 0.376, "step": 15959 }, { "epoch": 0.999702469503124, "grad_norm": 0.8634623360478229, "learning_rate": 1.6464073399058422e-12, "loss": 0.3389, "step": 15960 }, { "epoch": 0.9997651075024664, "grad_norm": 0.8731434209772633, "learning_rate": 9.261041505892465e-13, "loss": 0.3737, "step": 15961 }, { "epoch": 0.9998277455018086, "grad_norm": 0.9143078052733866, "learning_rate": 4.116018520461396e-13, "loss": 0.3844, "step": 15962 }, { "epoch": 0.999890383501151, "grad_norm": 0.8724485662472965, "learning_rate": 1.0290046426053579e-13, "loss": 0.416, "step": 15963 }, { "epoch": 0.9999530215004933, "grad_norm": 0.8785715791011841, "learning_rate": 0.0, "loss": 0.3779, "step": 15964 }, { "epoch": 0.9999530215004933, "step": 15964, "total_flos": 3.566379079447347e+16, "train_loss": 0.41283787322302745, "train_runtime": 284241.4636, "train_samples_per_second": 14.379, "train_steps_per_second": 0.056 } ], "logging_steps": 1.0, "max_steps": 15964, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.566379079447347e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }