{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 31968, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 9.384384384384384e-05, "grad_norm": 18.832452357949645, "learning_rate": 0.0, "loss": 1.2886, "step": 1 }, { "epoch": 0.00018768768768768769, "grad_norm": 17.56368181184898, "learning_rate": 3.1279324366593687e-09, "loss": 1.3487, "step": 2 }, { "epoch": 0.00028153153153153153, "grad_norm": 16.65063653192825, "learning_rate": 6.2558648733187375e-09, "loss": 1.2794, "step": 3 }, { "epoch": 0.00037537537537537537, "grad_norm": 19.839430467837815, "learning_rate": 9.383797309978106e-09, "loss": 1.2552, "step": 4 }, { "epoch": 0.0004692192192192192, "grad_norm": 15.780660917877176, "learning_rate": 1.2511729746637475e-08, "loss": 1.3173, "step": 5 }, { "epoch": 0.0005630630630630631, "grad_norm": 15.802817871296325, "learning_rate": 1.5639662183296842e-08, "loss": 1.3039, "step": 6 }, { "epoch": 0.000656906906906907, "grad_norm": 18.415203713659047, "learning_rate": 1.876759461995621e-08, "loss": 1.3249, "step": 7 }, { "epoch": 0.0007507507507507507, "grad_norm": 15.940445604878414, "learning_rate": 2.189552705661558e-08, "loss": 1.2826, "step": 8 }, { "epoch": 0.0008445945945945946, "grad_norm": 15.17403369337047, "learning_rate": 2.502345949327495e-08, "loss": 1.2428, "step": 9 }, { "epoch": 0.0009384384384384384, "grad_norm": 16.739092251698423, "learning_rate": 2.8151391929934316e-08, "loss": 1.2766, "step": 10 }, { "epoch": 0.0010322822822822822, "grad_norm": 15.613614330950357, "learning_rate": 3.1279324366593685e-08, "loss": 1.1967, "step": 11 }, { "epoch": 0.0011261261261261261, "grad_norm": 16.793710762263636, "learning_rate": 3.440725680325306e-08, "loss": 1.2239, "step": 12 }, { "epoch": 0.00121996996996997, "grad_norm": 14.849090866786895, "learning_rate": 3.753518923991242e-08, "loss": 1.2653, "step": 13 }, { "epoch": 0.001313813813813814, "grad_norm": 16.121863362781756, "learning_rate": 4.066312167657179e-08, "loss": 1.232, "step": 14 }, { "epoch": 0.0014076576576576576, "grad_norm": 16.45040614323163, "learning_rate": 4.379105411323116e-08, "loss": 1.2431, "step": 15 }, { "epoch": 0.0015015015015015015, "grad_norm": 16.281583166039404, "learning_rate": 4.691898654989053e-08, "loss": 1.2416, "step": 16 }, { "epoch": 0.0015953453453453454, "grad_norm": 16.76726218696687, "learning_rate": 5.00469189865499e-08, "loss": 1.3566, "step": 17 }, { "epoch": 0.0016891891891891893, "grad_norm": 15.758434241016245, "learning_rate": 5.3174851423209266e-08, "loss": 1.2356, "step": 18 }, { "epoch": 0.001783033033033033, "grad_norm": 15.175673849183033, "learning_rate": 5.630278385986863e-08, "loss": 1.2793, "step": 19 }, { "epoch": 0.0018768768768768769, "grad_norm": 17.177101912243696, "learning_rate": 5.9430716296528004e-08, "loss": 1.3559, "step": 20 }, { "epoch": 0.0019707207207207205, "grad_norm": 17.403360040896224, "learning_rate": 6.255864873318737e-08, "loss": 1.3824, "step": 21 }, { "epoch": 0.0020645645645645644, "grad_norm": 14.229950367759708, "learning_rate": 6.568658116984674e-08, "loss": 1.2402, "step": 22 }, { "epoch": 0.0021584084084084083, "grad_norm": 15.68961261649414, "learning_rate": 6.881451360650611e-08, "loss": 1.2846, "step": 23 }, { "epoch": 0.0022522522522522522, "grad_norm": 16.58712617724437, "learning_rate": 7.194244604316547e-08, "loss": 1.2769, "step": 24 }, { "epoch": 0.002346096096096096, "grad_norm": 18.283840518127896, "learning_rate": 7.507037847982485e-08, "loss": 1.3121, "step": 25 }, { "epoch": 0.00243993993993994, "grad_norm": 16.947724448130227, "learning_rate": 7.819831091648422e-08, "loss": 1.3023, "step": 26 }, { "epoch": 0.002533783783783784, "grad_norm": 15.981653875629059, "learning_rate": 8.132624335314358e-08, "loss": 1.2842, "step": 27 }, { "epoch": 0.002627627627627628, "grad_norm": 13.900791722160225, "learning_rate": 8.445417578980295e-08, "loss": 1.0899, "step": 28 }, { "epoch": 0.0027214714714714713, "grad_norm": 14.456890857332203, "learning_rate": 8.758210822646232e-08, "loss": 1.2527, "step": 29 }, { "epoch": 0.002815315315315315, "grad_norm": 15.162765255438055, "learning_rate": 9.07100406631217e-08, "loss": 1.2188, "step": 30 }, { "epoch": 0.002909159159159159, "grad_norm": 16.869793079826316, "learning_rate": 9.383797309978105e-08, "loss": 1.2148, "step": 31 }, { "epoch": 0.003003003003003003, "grad_norm": 15.784288189047501, "learning_rate": 9.696590553644043e-08, "loss": 1.2752, "step": 32 }, { "epoch": 0.003096846846846847, "grad_norm": 16.7807360462967, "learning_rate": 1.000938379730998e-07, "loss": 1.3127, "step": 33 }, { "epoch": 0.0031906906906906908, "grad_norm": 15.503996069748643, "learning_rate": 1.0322177040975916e-07, "loss": 1.266, "step": 34 }, { "epoch": 0.0032845345345345347, "grad_norm": 18.629950784814753, "learning_rate": 1.0634970284641853e-07, "loss": 1.3968, "step": 35 }, { "epoch": 0.0033783783783783786, "grad_norm": 14.870121481248939, "learning_rate": 1.094776352830779e-07, "loss": 1.2301, "step": 36 }, { "epoch": 0.003472222222222222, "grad_norm": 15.307513540745884, "learning_rate": 1.1260556771973726e-07, "loss": 1.2557, "step": 37 }, { "epoch": 0.003566066066066066, "grad_norm": 16.283708585048238, "learning_rate": 1.1573350015639664e-07, "loss": 1.327, "step": 38 }, { "epoch": 0.00365990990990991, "grad_norm": 13.966142051762933, "learning_rate": 1.1886143259305601e-07, "loss": 1.1544, "step": 39 }, { "epoch": 0.0037537537537537537, "grad_norm": 16.48385680539478, "learning_rate": 1.2198936502971537e-07, "loss": 1.4012, "step": 40 }, { "epoch": 0.0038475975975975976, "grad_norm": 15.170254534033914, "learning_rate": 1.2511729746637474e-07, "loss": 1.2953, "step": 41 }, { "epoch": 0.003941441441441441, "grad_norm": 15.620482581270235, "learning_rate": 1.282452299030341e-07, "loss": 1.347, "step": 42 }, { "epoch": 0.004035285285285285, "grad_norm": 15.554897933398465, "learning_rate": 1.3137316233969348e-07, "loss": 1.2302, "step": 43 }, { "epoch": 0.004129129129129129, "grad_norm": 15.15916878676933, "learning_rate": 1.3450109477635286e-07, "loss": 1.123, "step": 44 }, { "epoch": 0.004222972972972973, "grad_norm": 14.689491573805904, "learning_rate": 1.3762902721301223e-07, "loss": 1.2722, "step": 45 }, { "epoch": 0.004316816816816817, "grad_norm": 15.886439464728971, "learning_rate": 1.4075695964967158e-07, "loss": 1.2244, "step": 46 }, { "epoch": 0.004410660660660661, "grad_norm": 13.54781887151892, "learning_rate": 1.4388489208633095e-07, "loss": 1.1556, "step": 47 }, { "epoch": 0.0045045045045045045, "grad_norm": 15.391092833609333, "learning_rate": 1.4701282452299032e-07, "loss": 1.3116, "step": 48 }, { "epoch": 0.004598348348348348, "grad_norm": 17.26848896080798, "learning_rate": 1.501407569596497e-07, "loss": 1.3393, "step": 49 }, { "epoch": 0.004692192192192192, "grad_norm": 16.516650885717443, "learning_rate": 1.5326868939630906e-07, "loss": 1.2824, "step": 50 }, { "epoch": 0.004786036036036036, "grad_norm": 12.729160437474901, "learning_rate": 1.5639662183296844e-07, "loss": 1.098, "step": 51 }, { "epoch": 0.00487987987987988, "grad_norm": 12.367919400404958, "learning_rate": 1.595245542696278e-07, "loss": 1.1093, "step": 52 }, { "epoch": 0.0049737237237237235, "grad_norm": 14.805349057306438, "learning_rate": 1.6265248670628716e-07, "loss": 1.2762, "step": 53 }, { "epoch": 0.005067567567567568, "grad_norm": 12.79631529664134, "learning_rate": 1.6578041914294653e-07, "loss": 1.1365, "step": 54 }, { "epoch": 0.005161411411411411, "grad_norm": 13.375629507371562, "learning_rate": 1.689083515796059e-07, "loss": 1.206, "step": 55 }, { "epoch": 0.005255255255255256, "grad_norm": 13.773175648753908, "learning_rate": 1.7203628401626527e-07, "loss": 1.2685, "step": 56 }, { "epoch": 0.005349099099099099, "grad_norm": 13.214650059192564, "learning_rate": 1.7516421645292465e-07, "loss": 1.2043, "step": 57 }, { "epoch": 0.005442942942942943, "grad_norm": 13.005362066583698, "learning_rate": 1.7829214888958402e-07, "loss": 1.2117, "step": 58 }, { "epoch": 0.005536786786786787, "grad_norm": 13.335740441709929, "learning_rate": 1.814200813262434e-07, "loss": 1.2682, "step": 59 }, { "epoch": 0.00563063063063063, "grad_norm": 12.58962410011341, "learning_rate": 1.8454801376290274e-07, "loss": 1.1315, "step": 60 }, { "epoch": 0.005724474474474475, "grad_norm": 12.667225082180535, "learning_rate": 1.876759461995621e-07, "loss": 1.1897, "step": 61 }, { "epoch": 0.005818318318318318, "grad_norm": 12.559198949220681, "learning_rate": 1.9080387863622148e-07, "loss": 1.204, "step": 62 }, { "epoch": 0.0059121621621621625, "grad_norm": 11.776115282128803, "learning_rate": 1.9393181107288085e-07, "loss": 1.1489, "step": 63 }, { "epoch": 0.006006006006006006, "grad_norm": 13.247370347125406, "learning_rate": 1.9705974350954023e-07, "loss": 1.2682, "step": 64 }, { "epoch": 0.006099849849849849, "grad_norm": 24.08169729189658, "learning_rate": 2.001876759461996e-07, "loss": 1.1628, "step": 65 }, { "epoch": 0.006193693693693694, "grad_norm": 12.537223210052312, "learning_rate": 2.0331560838285894e-07, "loss": 1.1515, "step": 66 }, { "epoch": 0.006287537537537537, "grad_norm": 12.508273694697282, "learning_rate": 2.0644354081951832e-07, "loss": 1.1956, "step": 67 }, { "epoch": 0.0063813813813813815, "grad_norm": 12.71880821113464, "learning_rate": 2.095714732561777e-07, "loss": 1.246, "step": 68 }, { "epoch": 0.006475225225225225, "grad_norm": 13.198822433022567, "learning_rate": 2.1269940569283706e-07, "loss": 1.2625, "step": 69 }, { "epoch": 0.006569069069069069, "grad_norm": 12.098082923186496, "learning_rate": 2.1582733812949643e-07, "loss": 1.0854, "step": 70 }, { "epoch": 0.006662912912912913, "grad_norm": 12.599387578300801, "learning_rate": 2.189552705661558e-07, "loss": 1.2117, "step": 71 }, { "epoch": 0.006756756756756757, "grad_norm": 9.128796927191777, "learning_rate": 2.2208320300281518e-07, "loss": 1.0195, "step": 72 }, { "epoch": 0.006850600600600601, "grad_norm": 9.825240583762367, "learning_rate": 2.2521113543947453e-07, "loss": 1.1069, "step": 73 }, { "epoch": 0.006944444444444444, "grad_norm": 9.01709301412562, "learning_rate": 2.283390678761339e-07, "loss": 1.0788, "step": 74 }, { "epoch": 0.007038288288288288, "grad_norm": 9.77009379800081, "learning_rate": 2.3146700031279327e-07, "loss": 1.1863, "step": 75 }, { "epoch": 0.007132132132132132, "grad_norm": 8.23861691073359, "learning_rate": 2.3459493274945264e-07, "loss": 1.0572, "step": 76 }, { "epoch": 0.007225975975975976, "grad_norm": 8.732150452550492, "learning_rate": 2.3772286518611202e-07, "loss": 1.1429, "step": 77 }, { "epoch": 0.00731981981981982, "grad_norm": 7.538223899289297, "learning_rate": 2.4085079762277136e-07, "loss": 1.0394, "step": 78 }, { "epoch": 0.007413663663663664, "grad_norm": 7.654768900866635, "learning_rate": 2.4397873005943073e-07, "loss": 1.0701, "step": 79 }, { "epoch": 0.0075075075075075074, "grad_norm": 7.315041796551025, "learning_rate": 2.471066624960901e-07, "loss": 1.0742, "step": 80 }, { "epoch": 0.007601351351351352, "grad_norm": 7.676833271832479, "learning_rate": 2.502345949327495e-07, "loss": 1.093, "step": 81 }, { "epoch": 0.007695195195195195, "grad_norm": 6.7193237233382055, "learning_rate": 2.5336252736940885e-07, "loss": 1.0086, "step": 82 }, { "epoch": 0.007789039039039039, "grad_norm": 7.3736508650451915, "learning_rate": 2.564904598060682e-07, "loss": 1.0421, "step": 83 }, { "epoch": 0.007882882882882882, "grad_norm": 7.24794701622045, "learning_rate": 2.596183922427276e-07, "loss": 1.0711, "step": 84 }, { "epoch": 0.007976726726726727, "grad_norm": 7.37497725654748, "learning_rate": 2.6274632467938697e-07, "loss": 1.0357, "step": 85 }, { "epoch": 0.00807057057057057, "grad_norm": 6.577303654317609, "learning_rate": 2.6587425711604634e-07, "loss": 0.9599, "step": 86 }, { "epoch": 0.008164414414414414, "grad_norm": 7.2175490065202625, "learning_rate": 2.690021895527057e-07, "loss": 1.1093, "step": 87 }, { "epoch": 0.008258258258258258, "grad_norm": 6.552852137973587, "learning_rate": 2.721301219893651e-07, "loss": 1.0771, "step": 88 }, { "epoch": 0.008352102102102103, "grad_norm": 6.664026476565812, "learning_rate": 2.7525805442602446e-07, "loss": 1.0831, "step": 89 }, { "epoch": 0.008445945945945946, "grad_norm": 6.467865048596798, "learning_rate": 2.783859868626838e-07, "loss": 1.0175, "step": 90 }, { "epoch": 0.00853978978978979, "grad_norm": 7.040483792383359, "learning_rate": 2.8151391929934315e-07, "loss": 1.0717, "step": 91 }, { "epoch": 0.008633633633633633, "grad_norm": 6.060993318633368, "learning_rate": 2.846418517360025e-07, "loss": 1.0368, "step": 92 }, { "epoch": 0.008727477477477477, "grad_norm": 5.549781046149201, "learning_rate": 2.877697841726619e-07, "loss": 0.9143, "step": 93 }, { "epoch": 0.008821321321321322, "grad_norm": 6.215641532696371, "learning_rate": 2.9089771660932127e-07, "loss": 1.0405, "step": 94 }, { "epoch": 0.008915165165165165, "grad_norm": 5.64272882373066, "learning_rate": 2.9402564904598064e-07, "loss": 1.0148, "step": 95 }, { "epoch": 0.009009009009009009, "grad_norm": 5.237480137124845, "learning_rate": 2.9715358148264e-07, "loss": 1.028, "step": 96 }, { "epoch": 0.009102852852852852, "grad_norm": 5.574629459498996, "learning_rate": 3.002815139192994e-07, "loss": 0.9934, "step": 97 }, { "epoch": 0.009196696696696696, "grad_norm": 6.490488812955929, "learning_rate": 3.0340944635595876e-07, "loss": 0.972, "step": 98 }, { "epoch": 0.009290540540540541, "grad_norm": 4.820181043831159, "learning_rate": 3.0653737879261813e-07, "loss": 0.9453, "step": 99 }, { "epoch": 0.009384384384384385, "grad_norm": 4.709993345810176, "learning_rate": 3.096653112292775e-07, "loss": 1.0331, "step": 100 }, { "epoch": 0.009478228228228228, "grad_norm": 4.581640614554616, "learning_rate": 3.127932436659369e-07, "loss": 0.8956, "step": 101 }, { "epoch": 0.009572072072072071, "grad_norm": 4.615034193289159, "learning_rate": 3.159211761025962e-07, "loss": 0.9213, "step": 102 }, { "epoch": 0.009665915915915917, "grad_norm": 3.9689540180550287, "learning_rate": 3.190491085392556e-07, "loss": 0.8739, "step": 103 }, { "epoch": 0.00975975975975976, "grad_norm": 5.660590554249074, "learning_rate": 3.2217704097591494e-07, "loss": 0.9718, "step": 104 }, { "epoch": 0.009853603603603604, "grad_norm": 4.553805912956958, "learning_rate": 3.253049734125743e-07, "loss": 0.9948, "step": 105 }, { "epoch": 0.009947447447447447, "grad_norm": 3.7632211177495822, "learning_rate": 3.284329058492337e-07, "loss": 0.9539, "step": 106 }, { "epoch": 0.01004129129129129, "grad_norm": 3.7766099686520467, "learning_rate": 3.3156083828589306e-07, "loss": 0.978, "step": 107 }, { "epoch": 0.010135135135135136, "grad_norm": 4.129104240366342, "learning_rate": 3.3468877072255243e-07, "loss": 0.8498, "step": 108 }, { "epoch": 0.01022897897897898, "grad_norm": 3.942675256744849, "learning_rate": 3.378167031592118e-07, "loss": 0.9005, "step": 109 }, { "epoch": 0.010322822822822823, "grad_norm": 4.08616746356657, "learning_rate": 3.409446355958711e-07, "loss": 0.8784, "step": 110 }, { "epoch": 0.010416666666666666, "grad_norm": 3.5163996710064325, "learning_rate": 3.4407256803253055e-07, "loss": 0.8686, "step": 111 }, { "epoch": 0.010510510510510511, "grad_norm": 4.0974519071195905, "learning_rate": 3.4720050046918987e-07, "loss": 0.9585, "step": 112 }, { "epoch": 0.010604354354354355, "grad_norm": 3.264025544863903, "learning_rate": 3.503284329058493e-07, "loss": 0.8865, "step": 113 }, { "epoch": 0.010698198198198198, "grad_norm": 3.1673959974352344, "learning_rate": 3.534563653425086e-07, "loss": 0.8936, "step": 114 }, { "epoch": 0.010792042042042042, "grad_norm": 3.342498812007377, "learning_rate": 3.5658429777916804e-07, "loss": 0.9169, "step": 115 }, { "epoch": 0.010885885885885885, "grad_norm": 3.2904950230793206, "learning_rate": 3.5971223021582736e-07, "loss": 0.9424, "step": 116 }, { "epoch": 0.01097972972972973, "grad_norm": 3.6804551764583424, "learning_rate": 3.628401626524868e-07, "loss": 0.8792, "step": 117 }, { "epoch": 0.011073573573573574, "grad_norm": 4.851916690937378, "learning_rate": 3.659680950891461e-07, "loss": 0.9474, "step": 118 }, { "epoch": 0.011167417417417417, "grad_norm": 3.1175682441852466, "learning_rate": 3.6909602752580547e-07, "loss": 0.8769, "step": 119 }, { "epoch": 0.01126126126126126, "grad_norm": 3.381427946372186, "learning_rate": 3.7222395996246485e-07, "loss": 0.9464, "step": 120 }, { "epoch": 0.011355105105105106, "grad_norm": 6.455106625731381, "learning_rate": 3.753518923991242e-07, "loss": 0.8462, "step": 121 }, { "epoch": 0.01144894894894895, "grad_norm": 3.6427630991302253, "learning_rate": 3.7847982483578354e-07, "loss": 0.868, "step": 122 }, { "epoch": 0.011542792792792793, "grad_norm": 2.983530881834476, "learning_rate": 3.8160775727244296e-07, "loss": 0.8718, "step": 123 }, { "epoch": 0.011636636636636636, "grad_norm": 2.737099011859125, "learning_rate": 3.847356897091023e-07, "loss": 0.8727, "step": 124 }, { "epoch": 0.01173048048048048, "grad_norm": 3.528825857313906, "learning_rate": 3.878636221457617e-07, "loss": 0.8887, "step": 125 }, { "epoch": 0.011824324324324325, "grad_norm": 2.983273558045758, "learning_rate": 3.9099155458242103e-07, "loss": 0.9414, "step": 126 }, { "epoch": 0.011918168168168168, "grad_norm": 3.188512411320661, "learning_rate": 3.9411948701908045e-07, "loss": 0.8808, "step": 127 }, { "epoch": 0.012012012012012012, "grad_norm": 2.756885034287822, "learning_rate": 3.9724741945573977e-07, "loss": 0.7806, "step": 128 }, { "epoch": 0.012105855855855855, "grad_norm": 2.5771941799429974, "learning_rate": 4.003753518923992e-07, "loss": 0.7944, "step": 129 }, { "epoch": 0.012199699699699699, "grad_norm": 3.4860557628388826, "learning_rate": 4.035032843290585e-07, "loss": 0.9446, "step": 130 }, { "epoch": 0.012293543543543544, "grad_norm": 2.7887733581054883, "learning_rate": 4.066312167657179e-07, "loss": 0.7341, "step": 131 }, { "epoch": 0.012387387387387387, "grad_norm": 3.333823093804399, "learning_rate": 4.0975914920237726e-07, "loss": 0.9639, "step": 132 }, { "epoch": 0.012481231231231231, "grad_norm": 3.305601301166128, "learning_rate": 4.1288708163903663e-07, "loss": 0.9074, "step": 133 }, { "epoch": 0.012575075075075074, "grad_norm": 2.598492947105503, "learning_rate": 4.16015014075696e-07, "loss": 0.8526, "step": 134 }, { "epoch": 0.01266891891891892, "grad_norm": 3.0422207934886942, "learning_rate": 4.191429465123554e-07, "loss": 0.9028, "step": 135 }, { "epoch": 0.012762762762762763, "grad_norm": 2.961422096125288, "learning_rate": 4.222708789490147e-07, "loss": 0.823, "step": 136 }, { "epoch": 0.012856606606606607, "grad_norm": 3.8847305763397584, "learning_rate": 4.253988113856741e-07, "loss": 0.8865, "step": 137 }, { "epoch": 0.01295045045045045, "grad_norm": 3.4972340386078082, "learning_rate": 4.2852674382233344e-07, "loss": 0.7934, "step": 138 }, { "epoch": 0.013044294294294293, "grad_norm": 3.0044452734426654, "learning_rate": 4.3165467625899287e-07, "loss": 0.8892, "step": 139 }, { "epoch": 0.013138138138138139, "grad_norm": 3.051878989180435, "learning_rate": 4.347826086956522e-07, "loss": 0.842, "step": 140 }, { "epoch": 0.013231981981981982, "grad_norm": 2.6940243300023554, "learning_rate": 4.379105411323116e-07, "loss": 0.7761, "step": 141 }, { "epoch": 0.013325825825825826, "grad_norm": 2.8666081928883522, "learning_rate": 4.4103847356897093e-07, "loss": 0.8243, "step": 142 }, { "epoch": 0.013419669669669669, "grad_norm": 5.131182147126366, "learning_rate": 4.4416640600563036e-07, "loss": 0.8, "step": 143 }, { "epoch": 0.013513513513513514, "grad_norm": 2.6311159998229714, "learning_rate": 4.472943384422897e-07, "loss": 0.8405, "step": 144 }, { "epoch": 0.013607357357357358, "grad_norm": 3.403195168485844, "learning_rate": 4.5042227087894905e-07, "loss": 0.822, "step": 145 }, { "epoch": 0.013701201201201201, "grad_norm": 3.1070231089888622, "learning_rate": 4.535502033156084e-07, "loss": 0.8461, "step": 146 }, { "epoch": 0.013795045045045045, "grad_norm": 3.168217056262293, "learning_rate": 4.566781357522678e-07, "loss": 0.8749, "step": 147 }, { "epoch": 0.013888888888888888, "grad_norm": 2.5712506584067545, "learning_rate": 4.5980606818892717e-07, "loss": 0.8188, "step": 148 }, { "epoch": 0.013982732732732733, "grad_norm": 2.551323350976561, "learning_rate": 4.6293400062558654e-07, "loss": 0.685, "step": 149 }, { "epoch": 0.014076576576576577, "grad_norm": 2.96185645333232, "learning_rate": 4.6606193306224586e-07, "loss": 0.8115, "step": 150 }, { "epoch": 0.01417042042042042, "grad_norm": 3.724181492176994, "learning_rate": 4.691898654989053e-07, "loss": 0.8175, "step": 151 }, { "epoch": 0.014264264264264264, "grad_norm": 2.289382617204156, "learning_rate": 4.723177979355646e-07, "loss": 0.7163, "step": 152 }, { "epoch": 0.014358108108108109, "grad_norm": 3.0612548600197336, "learning_rate": 4.7544573037222403e-07, "loss": 0.8314, "step": 153 }, { "epoch": 0.014451951951951952, "grad_norm": 2.6873396521469473, "learning_rate": 4.785736628088834e-07, "loss": 0.8045, "step": 154 }, { "epoch": 0.014545795795795796, "grad_norm": 3.0490159839879727, "learning_rate": 4.817015952455427e-07, "loss": 0.7671, "step": 155 }, { "epoch": 0.01463963963963964, "grad_norm": 2.4133328330259856, "learning_rate": 4.848295276822021e-07, "loss": 0.8024, "step": 156 }, { "epoch": 0.014733483483483483, "grad_norm": 2.8949900525332546, "learning_rate": 4.879574601188615e-07, "loss": 0.7968, "step": 157 }, { "epoch": 0.014827327327327328, "grad_norm": 2.4208935467864063, "learning_rate": 4.910853925555208e-07, "loss": 0.7417, "step": 158 }, { "epoch": 0.014921171171171171, "grad_norm": 2.5039387970618576, "learning_rate": 4.942133249921802e-07, "loss": 0.6901, "step": 159 }, { "epoch": 0.015015015015015015, "grad_norm": 2.935043083990418, "learning_rate": 4.973412574288396e-07, "loss": 0.8719, "step": 160 }, { "epoch": 0.015108858858858858, "grad_norm": 3.3741099789561533, "learning_rate": 5.00469189865499e-07, "loss": 0.8245, "step": 161 }, { "epoch": 0.015202702702702704, "grad_norm": 2.312195015706183, "learning_rate": 5.035971223021583e-07, "loss": 0.6898, "step": 162 }, { "epoch": 0.015296546546546547, "grad_norm": 2.8718381413316196, "learning_rate": 5.067250547388177e-07, "loss": 0.7698, "step": 163 }, { "epoch": 0.01539039039039039, "grad_norm": 2.861757658533961, "learning_rate": 5.098529871754771e-07, "loss": 0.82, "step": 164 }, { "epoch": 0.015484234234234234, "grad_norm": 2.378853986812856, "learning_rate": 5.129809196121364e-07, "loss": 0.7409, "step": 165 }, { "epoch": 0.015578078078078077, "grad_norm": 2.7392933256103853, "learning_rate": 5.161088520487958e-07, "loss": 0.8075, "step": 166 }, { "epoch": 0.015671921921921923, "grad_norm": 4.7980467474948, "learning_rate": 5.192367844854552e-07, "loss": 0.8521, "step": 167 }, { "epoch": 0.015765765765765764, "grad_norm": 3.1216318954229965, "learning_rate": 5.223647169221146e-07, "loss": 0.7859, "step": 168 }, { "epoch": 0.01585960960960961, "grad_norm": 2.6153680764881417, "learning_rate": 5.254926493587739e-07, "loss": 0.7964, "step": 169 }, { "epoch": 0.015953453453453455, "grad_norm": 3.0184438222283947, "learning_rate": 5.286205817954332e-07, "loss": 0.8189, "step": 170 }, { "epoch": 0.016047297297297296, "grad_norm": 2.835491460379126, "learning_rate": 5.317485142320927e-07, "loss": 0.7862, "step": 171 }, { "epoch": 0.01614114114114114, "grad_norm": 3.1348422081497866, "learning_rate": 5.34876446668752e-07, "loss": 0.7436, "step": 172 }, { "epoch": 0.016234984984984983, "grad_norm": 3.3381121362201864, "learning_rate": 5.380043791054114e-07, "loss": 0.7375, "step": 173 }, { "epoch": 0.01632882882882883, "grad_norm": 2.4497413787062414, "learning_rate": 5.411323115420707e-07, "loss": 0.8031, "step": 174 }, { "epoch": 0.016422672672672674, "grad_norm": 2.601563304153984, "learning_rate": 5.442602439787302e-07, "loss": 0.8605, "step": 175 }, { "epoch": 0.016516516516516516, "grad_norm": 2.456084294407952, "learning_rate": 5.473881764153894e-07, "loss": 0.7857, "step": 176 }, { "epoch": 0.01661036036036036, "grad_norm": 3.428669190714414, "learning_rate": 5.505161088520489e-07, "loss": 0.8189, "step": 177 }, { "epoch": 0.016704204204204206, "grad_norm": 2.61714701848109, "learning_rate": 5.536440412887082e-07, "loss": 0.8112, "step": 178 }, { "epoch": 0.016798048048048048, "grad_norm": 3.050426469562784, "learning_rate": 5.567719737253676e-07, "loss": 0.8116, "step": 179 }, { "epoch": 0.016891891891891893, "grad_norm": 2.795843895601452, "learning_rate": 5.598999061620269e-07, "loss": 0.7963, "step": 180 }, { "epoch": 0.016985735735735735, "grad_norm": 2.8120678022459002, "learning_rate": 5.630278385986863e-07, "loss": 0.7615, "step": 181 }, { "epoch": 0.01707957957957958, "grad_norm": 2.330901442373248, "learning_rate": 5.661557710353457e-07, "loss": 0.6738, "step": 182 }, { "epoch": 0.017173423423423425, "grad_norm": 2.5581447013587564, "learning_rate": 5.69283703472005e-07, "loss": 0.7285, "step": 183 }, { "epoch": 0.017267267267267267, "grad_norm": 2.738035619207259, "learning_rate": 5.724116359086644e-07, "loss": 0.7252, "step": 184 }, { "epoch": 0.017361111111111112, "grad_norm": 2.682168422039159, "learning_rate": 5.755395683453238e-07, "loss": 0.7265, "step": 185 }, { "epoch": 0.017454954954954954, "grad_norm": 2.4073560074163125, "learning_rate": 5.786675007819832e-07, "loss": 0.7859, "step": 186 }, { "epoch": 0.0175487987987988, "grad_norm": 2.2858672119146433, "learning_rate": 5.817954332186425e-07, "loss": 0.7379, "step": 187 }, { "epoch": 0.017642642642642644, "grad_norm": 2.338247977779584, "learning_rate": 5.849233656553019e-07, "loss": 0.7727, "step": 188 }, { "epoch": 0.017736486486486486, "grad_norm": 2.744016943106608, "learning_rate": 5.880512980919613e-07, "loss": 0.8035, "step": 189 }, { "epoch": 0.01783033033033033, "grad_norm": 2.9177179794328088, "learning_rate": 5.911792305286207e-07, "loss": 0.8051, "step": 190 }, { "epoch": 0.017924174174174173, "grad_norm": 2.8116143890152996, "learning_rate": 5.9430716296528e-07, "loss": 0.7464, "step": 191 }, { "epoch": 0.018018018018018018, "grad_norm": 2.280996386617162, "learning_rate": 5.974350954019394e-07, "loss": 0.7491, "step": 192 }, { "epoch": 0.018111861861861863, "grad_norm": 2.523988988313979, "learning_rate": 6.005630278385988e-07, "loss": 0.7747, "step": 193 }, { "epoch": 0.018205705705705705, "grad_norm": 2.797071401585852, "learning_rate": 6.036909602752581e-07, "loss": 0.7311, "step": 194 }, { "epoch": 0.01829954954954955, "grad_norm": 3.443157786439724, "learning_rate": 6.068188927119175e-07, "loss": 0.7409, "step": 195 }, { "epoch": 0.01839339339339339, "grad_norm": 3.4888174306724653, "learning_rate": 6.099468251485769e-07, "loss": 0.7494, "step": 196 }, { "epoch": 0.018487237237237237, "grad_norm": 2.845136123982356, "learning_rate": 6.130747575852363e-07, "loss": 0.7388, "step": 197 }, { "epoch": 0.018581081081081082, "grad_norm": 2.5159663149889626, "learning_rate": 6.162026900218955e-07, "loss": 0.716, "step": 198 }, { "epoch": 0.018674924924924924, "grad_norm": 3.888522712509841, "learning_rate": 6.19330622458555e-07, "loss": 0.6985, "step": 199 }, { "epoch": 0.01876876876876877, "grad_norm": 2.4196849273382486, "learning_rate": 6.224585548952143e-07, "loss": 0.7804, "step": 200 }, { "epoch": 0.018862612612612614, "grad_norm": 2.5430707418523695, "learning_rate": 6.255864873318737e-07, "loss": 0.6815, "step": 201 }, { "epoch": 0.018956456456456456, "grad_norm": 2.5901707753163348, "learning_rate": 6.287144197685331e-07, "loss": 0.7658, "step": 202 }, { "epoch": 0.0190503003003003, "grad_norm": 3.0805404307253843, "learning_rate": 6.318423522051924e-07, "loss": 0.7942, "step": 203 }, { "epoch": 0.019144144144144143, "grad_norm": 2.2097789730959536, "learning_rate": 6.349702846418518e-07, "loss": 0.7584, "step": 204 }, { "epoch": 0.019237987987987988, "grad_norm": 2.1747838042918146, "learning_rate": 6.380982170785112e-07, "loss": 0.7039, "step": 205 }, { "epoch": 0.019331831831831833, "grad_norm": 3.0461978642761873, "learning_rate": 6.412261495151706e-07, "loss": 0.7697, "step": 206 }, { "epoch": 0.019425675675675675, "grad_norm": 3.0759785194140874, "learning_rate": 6.443540819518299e-07, "loss": 0.8744, "step": 207 }, { "epoch": 0.01951951951951952, "grad_norm": 2.685506904869619, "learning_rate": 6.474820143884893e-07, "loss": 0.7508, "step": 208 }, { "epoch": 0.019613363363363362, "grad_norm": 2.3185303304883296, "learning_rate": 6.506099468251486e-07, "loss": 0.7284, "step": 209 }, { "epoch": 0.019707207207207207, "grad_norm": 4.2385819470551045, "learning_rate": 6.537378792618081e-07, "loss": 0.8098, "step": 210 }, { "epoch": 0.019801051051051052, "grad_norm": 2.7055271080786523, "learning_rate": 6.568658116984674e-07, "loss": 0.693, "step": 211 }, { "epoch": 0.019894894894894894, "grad_norm": 2.3211257865603967, "learning_rate": 6.599937441351267e-07, "loss": 0.7225, "step": 212 }, { "epoch": 0.01998873873873874, "grad_norm": 2.4834462713418244, "learning_rate": 6.631216765717861e-07, "loss": 0.7688, "step": 213 }, { "epoch": 0.02008258258258258, "grad_norm": 2.680843626792745, "learning_rate": 6.662496090084456e-07, "loss": 0.7376, "step": 214 }, { "epoch": 0.020176426426426426, "grad_norm": 2.3705355919748166, "learning_rate": 6.693775414451049e-07, "loss": 0.7282, "step": 215 }, { "epoch": 0.02027027027027027, "grad_norm": 3.0202370113988826, "learning_rate": 6.725054738817642e-07, "loss": 0.7253, "step": 216 }, { "epoch": 0.020364114114114113, "grad_norm": 2.3220999058330087, "learning_rate": 6.756334063184236e-07, "loss": 0.7027, "step": 217 }, { "epoch": 0.02045795795795796, "grad_norm": 3.187132313290879, "learning_rate": 6.787613387550829e-07, "loss": 0.7273, "step": 218 }, { "epoch": 0.020551801801801804, "grad_norm": 2.5274521879187333, "learning_rate": 6.818892711917422e-07, "loss": 0.7312, "step": 219 }, { "epoch": 0.020645645645645645, "grad_norm": 3.0912194560567636, "learning_rate": 6.850172036284017e-07, "loss": 0.6798, "step": 220 }, { "epoch": 0.02073948948948949, "grad_norm": 2.8032195135288864, "learning_rate": 6.881451360650611e-07, "loss": 0.6984, "step": 221 }, { "epoch": 0.020833333333333332, "grad_norm": 2.4977036625904674, "learning_rate": 6.912730685017204e-07, "loss": 0.6542, "step": 222 }, { "epoch": 0.020927177177177177, "grad_norm": 2.415732245622113, "learning_rate": 6.944010009383797e-07, "loss": 0.6706, "step": 223 }, { "epoch": 0.021021021021021023, "grad_norm": 2.7788490056109656, "learning_rate": 6.975289333750392e-07, "loss": 0.7601, "step": 224 }, { "epoch": 0.021114864864864864, "grad_norm": 2.779501229030509, "learning_rate": 7.006568658116986e-07, "loss": 0.7524, "step": 225 }, { "epoch": 0.02120870870870871, "grad_norm": 2.6241183536377877, "learning_rate": 7.037847982483578e-07, "loss": 0.6831, "step": 226 }, { "epoch": 0.02130255255255255, "grad_norm": 2.4192693353684707, "learning_rate": 7.069127306850172e-07, "loss": 0.7613, "step": 227 }, { "epoch": 0.021396396396396396, "grad_norm": 2.7210079288368543, "learning_rate": 7.100406631216766e-07, "loss": 0.7197, "step": 228 }, { "epoch": 0.02149024024024024, "grad_norm": 2.9599837522244608, "learning_rate": 7.131685955583361e-07, "loss": 0.6831, "step": 229 }, { "epoch": 0.021584084084084083, "grad_norm": 2.3474863253763822, "learning_rate": 7.162965279949953e-07, "loss": 0.7185, "step": 230 }, { "epoch": 0.02167792792792793, "grad_norm": 2.687848298844963, "learning_rate": 7.194244604316547e-07, "loss": 0.7238, "step": 231 }, { "epoch": 0.02177177177177177, "grad_norm": 2.5464386692019336, "learning_rate": 7.225523928683141e-07, "loss": 0.7223, "step": 232 }, { "epoch": 0.021865615615615615, "grad_norm": 2.238937123081139, "learning_rate": 7.256803253049736e-07, "loss": 0.7408, "step": 233 }, { "epoch": 0.02195945945945946, "grad_norm": 2.4587818085585917, "learning_rate": 7.288082577416327e-07, "loss": 0.7888, "step": 234 }, { "epoch": 0.022053303303303302, "grad_norm": 2.004453347201395, "learning_rate": 7.319361901782922e-07, "loss": 0.768, "step": 235 }, { "epoch": 0.022147147147147148, "grad_norm": 2.987195775140512, "learning_rate": 7.350641226149516e-07, "loss": 0.7669, "step": 236 }, { "epoch": 0.02224099099099099, "grad_norm": 2.3182315997556673, "learning_rate": 7.381920550516109e-07, "loss": 0.7876, "step": 237 }, { "epoch": 0.022334834834834835, "grad_norm": 2.635806775079582, "learning_rate": 7.413199874882702e-07, "loss": 0.6737, "step": 238 }, { "epoch": 0.02242867867867868, "grad_norm": 2.3889895260250382, "learning_rate": 7.444479199249297e-07, "loss": 0.7032, "step": 239 }, { "epoch": 0.02252252252252252, "grad_norm": 2.296297972130682, "learning_rate": 7.475758523615891e-07, "loss": 0.7149, "step": 240 }, { "epoch": 0.022616366366366367, "grad_norm": 3.024389518021028, "learning_rate": 7.507037847982484e-07, "loss": 0.7697, "step": 241 }, { "epoch": 0.022710210210210212, "grad_norm": 4.93596873894388, "learning_rate": 7.538317172349077e-07, "loss": 0.812, "step": 242 }, { "epoch": 0.022804054054054054, "grad_norm": 2.7656848001935153, "learning_rate": 7.569596496715671e-07, "loss": 0.7317, "step": 243 }, { "epoch": 0.0228978978978979, "grad_norm": 2.5863886190685177, "learning_rate": 7.600875821082266e-07, "loss": 0.7321, "step": 244 }, { "epoch": 0.02299174174174174, "grad_norm": 2.8970284990975093, "learning_rate": 7.632155145448859e-07, "loss": 0.7355, "step": 245 }, { "epoch": 0.023085585585585586, "grad_norm": 4.94605189958749, "learning_rate": 7.663434469815452e-07, "loss": 0.6784, "step": 246 }, { "epoch": 0.02317942942942943, "grad_norm": 2.4927640530601427, "learning_rate": 7.694713794182046e-07, "loss": 0.6712, "step": 247 }, { "epoch": 0.023273273273273273, "grad_norm": 2.6390951891094026, "learning_rate": 7.72599311854864e-07, "loss": 0.8021, "step": 248 }, { "epoch": 0.023367117117117118, "grad_norm": 2.512992081426313, "learning_rate": 7.757272442915234e-07, "loss": 0.6649, "step": 249 }, { "epoch": 0.02346096096096096, "grad_norm": 2.231959022572805, "learning_rate": 7.788551767281827e-07, "loss": 0.7158, "step": 250 }, { "epoch": 0.023554804804804805, "grad_norm": 3.1551264077525802, "learning_rate": 7.819831091648421e-07, "loss": 0.7329, "step": 251 }, { "epoch": 0.02364864864864865, "grad_norm": 2.480487889739852, "learning_rate": 7.851110416015014e-07, "loss": 0.6647, "step": 252 }, { "epoch": 0.02374249249249249, "grad_norm": 1.8801259998880928, "learning_rate": 7.882389740381609e-07, "loss": 0.6835, "step": 253 }, { "epoch": 0.023836336336336337, "grad_norm": 2.400641750141136, "learning_rate": 7.913669064748202e-07, "loss": 0.6374, "step": 254 }, { "epoch": 0.02393018018018018, "grad_norm": 2.738186735228455, "learning_rate": 7.944948389114795e-07, "loss": 0.7339, "step": 255 }, { "epoch": 0.024024024024024024, "grad_norm": 2.896180470932027, "learning_rate": 7.976227713481389e-07, "loss": 0.7339, "step": 256 }, { "epoch": 0.02411786786786787, "grad_norm": 2.3905043827967822, "learning_rate": 8.007507037847984e-07, "loss": 0.6944, "step": 257 }, { "epoch": 0.02421171171171171, "grad_norm": 2.7276828127438733, "learning_rate": 8.038786362214577e-07, "loss": 0.7297, "step": 258 }, { "epoch": 0.024305555555555556, "grad_norm": 2.869972633116998, "learning_rate": 8.07006568658117e-07, "loss": 0.6515, "step": 259 }, { "epoch": 0.024399399399399398, "grad_norm": 2.2973247480695234, "learning_rate": 8.101345010947764e-07, "loss": 0.6583, "step": 260 }, { "epoch": 0.024493243243243243, "grad_norm": 2.649019017267653, "learning_rate": 8.132624335314358e-07, "loss": 0.6848, "step": 261 }, { "epoch": 0.024587087087087088, "grad_norm": 2.6915894211201747, "learning_rate": 8.16390365968095e-07, "loss": 0.7179, "step": 262 }, { "epoch": 0.02468093093093093, "grad_norm": 2.709904824488113, "learning_rate": 8.195182984047545e-07, "loss": 0.6036, "step": 263 }, { "epoch": 0.024774774774774775, "grad_norm": 2.918777162404174, "learning_rate": 8.226462308414139e-07, "loss": 0.7335, "step": 264 }, { "epoch": 0.02486861861861862, "grad_norm": 3.218729417526737, "learning_rate": 8.257741632780733e-07, "loss": 0.6616, "step": 265 }, { "epoch": 0.024962462462462462, "grad_norm": 2.9063743106220548, "learning_rate": 8.289020957147325e-07, "loss": 0.7263, "step": 266 }, { "epoch": 0.025056306306306307, "grad_norm": 2.3221472120716156, "learning_rate": 8.32030028151392e-07, "loss": 0.7296, "step": 267 }, { "epoch": 0.02515015015015015, "grad_norm": 2.4900200916485957, "learning_rate": 8.351579605880514e-07, "loss": 0.6731, "step": 268 }, { "epoch": 0.025243993993993994, "grad_norm": 2.439016264150023, "learning_rate": 8.382858930247108e-07, "loss": 0.6625, "step": 269 }, { "epoch": 0.02533783783783784, "grad_norm": 2.322733307875388, "learning_rate": 8.4141382546137e-07, "loss": 0.6303, "step": 270 }, { "epoch": 0.02543168168168168, "grad_norm": 2.233787406249723, "learning_rate": 8.445417578980294e-07, "loss": 0.7386, "step": 271 }, { "epoch": 0.025525525525525526, "grad_norm": 4.852710365129129, "learning_rate": 8.476696903346889e-07, "loss": 0.6974, "step": 272 }, { "epoch": 0.025619369369369368, "grad_norm": 2.1736864374051668, "learning_rate": 8.507976227713482e-07, "loss": 0.6729, "step": 273 }, { "epoch": 0.025713213213213213, "grad_norm": 2.5561319198056442, "learning_rate": 8.539255552080075e-07, "loss": 0.669, "step": 274 }, { "epoch": 0.02580705705705706, "grad_norm": 2.7205423949361385, "learning_rate": 8.570534876446669e-07, "loss": 0.7287, "step": 275 }, { "epoch": 0.0259009009009009, "grad_norm": 2.451747689036882, "learning_rate": 8.601814200813264e-07, "loss": 0.6923, "step": 276 }, { "epoch": 0.025994744744744745, "grad_norm": 2.574444706455036, "learning_rate": 8.633093525179857e-07, "loss": 0.7246, "step": 277 }, { "epoch": 0.026088588588588587, "grad_norm": 2.7691227110349863, "learning_rate": 8.66437284954645e-07, "loss": 0.7044, "step": 278 }, { "epoch": 0.026182432432432432, "grad_norm": 2.3567557133611414, "learning_rate": 8.695652173913044e-07, "loss": 0.7377, "step": 279 }, { "epoch": 0.026276276276276277, "grad_norm": 2.945785297833105, "learning_rate": 8.726931498279637e-07, "loss": 0.7135, "step": 280 }, { "epoch": 0.02637012012012012, "grad_norm": 2.5211464390269462, "learning_rate": 8.758210822646232e-07, "loss": 0.6812, "step": 281 }, { "epoch": 0.026463963963963964, "grad_norm": 2.1401123256673547, "learning_rate": 8.789490147012825e-07, "loss": 0.6082, "step": 282 }, { "epoch": 0.02655780780780781, "grad_norm": 2.4714006920968448, "learning_rate": 8.820769471379419e-07, "loss": 0.6937, "step": 283 }, { "epoch": 0.02665165165165165, "grad_norm": 2.6017565038168047, "learning_rate": 8.852048795746012e-07, "loss": 0.6424, "step": 284 }, { "epoch": 0.026745495495495496, "grad_norm": 2.899848582694218, "learning_rate": 8.883328120112607e-07, "loss": 0.6863, "step": 285 }, { "epoch": 0.026839339339339338, "grad_norm": 2.3278624982337983, "learning_rate": 8.9146074444792e-07, "loss": 0.7042, "step": 286 }, { "epoch": 0.026933183183183183, "grad_norm": 3.318050966742932, "learning_rate": 8.945886768845794e-07, "loss": 0.6623, "step": 287 }, { "epoch": 0.02702702702702703, "grad_norm": 2.1964190700486212, "learning_rate": 8.977166093212387e-07, "loss": 0.7042, "step": 288 }, { "epoch": 0.02712087087087087, "grad_norm": 2.611898301653501, "learning_rate": 9.008445417578981e-07, "loss": 0.5869, "step": 289 }, { "epoch": 0.027214714714714715, "grad_norm": 3.4559475446862518, "learning_rate": 9.039724741945574e-07, "loss": 0.6899, "step": 290 }, { "epoch": 0.027308558558558557, "grad_norm": 2.9807783734234627, "learning_rate": 9.071004066312168e-07, "loss": 0.706, "step": 291 }, { "epoch": 0.027402402402402402, "grad_norm": 2.2164235291385648, "learning_rate": 9.102283390678762e-07, "loss": 0.6323, "step": 292 }, { "epoch": 0.027496246246246248, "grad_norm": 2.534777523718213, "learning_rate": 9.133562715045356e-07, "loss": 0.7337, "step": 293 }, { "epoch": 0.02759009009009009, "grad_norm": 2.539165849211124, "learning_rate": 9.164842039411949e-07, "loss": 0.7593, "step": 294 }, { "epoch": 0.027683933933933935, "grad_norm": 2.571921938930268, "learning_rate": 9.196121363778543e-07, "loss": 0.6792, "step": 295 }, { "epoch": 0.027777777777777776, "grad_norm": 3.0099651452701783, "learning_rate": 9.227400688145137e-07, "loss": 0.6482, "step": 296 }, { "epoch": 0.02787162162162162, "grad_norm": 2.301485291525057, "learning_rate": 9.258680012511731e-07, "loss": 0.678, "step": 297 }, { "epoch": 0.027965465465465467, "grad_norm": 1.936505368962842, "learning_rate": 9.289959336878323e-07, "loss": 0.6266, "step": 298 }, { "epoch": 0.02805930930930931, "grad_norm": 2.631566381855497, "learning_rate": 9.321238661244917e-07, "loss": 0.6783, "step": 299 }, { "epoch": 0.028153153153153154, "grad_norm": 2.4082556330166547, "learning_rate": 9.352517985611512e-07, "loss": 0.6603, "step": 300 }, { "epoch": 0.028246996996996995, "grad_norm": 2.3702117674387715, "learning_rate": 9.383797309978106e-07, "loss": 0.5859, "step": 301 }, { "epoch": 0.02834084084084084, "grad_norm": 2.659164010331785, "learning_rate": 9.415076634344698e-07, "loss": 0.7195, "step": 302 }, { "epoch": 0.028434684684684686, "grad_norm": 5.152865910523947, "learning_rate": 9.446355958711292e-07, "loss": 0.7217, "step": 303 }, { "epoch": 0.028528528528528527, "grad_norm": 2.3363154656566483, "learning_rate": 9.477635283077887e-07, "loss": 0.6986, "step": 304 }, { "epoch": 0.028622372372372373, "grad_norm": 2.0424347464196444, "learning_rate": 9.508914607444481e-07, "loss": 0.65, "step": 305 }, { "epoch": 0.028716216216216218, "grad_norm": 2.7275813013927475, "learning_rate": 9.540193931811073e-07, "loss": 0.7109, "step": 306 }, { "epoch": 0.02881006006006006, "grad_norm": 3.5315396012938227, "learning_rate": 9.571473256177667e-07, "loss": 0.7284, "step": 307 }, { "epoch": 0.028903903903903905, "grad_norm": 2.7215399790135146, "learning_rate": 9.60275258054426e-07, "loss": 0.6877, "step": 308 }, { "epoch": 0.028997747747747746, "grad_norm": 2.5811659954051565, "learning_rate": 9.634031904910854e-07, "loss": 0.7111, "step": 309 }, { "epoch": 0.02909159159159159, "grad_norm": 2.340838304999448, "learning_rate": 9.665311229277448e-07, "loss": 0.6497, "step": 310 }, { "epoch": 0.029185435435435437, "grad_norm": 2.1889853193831454, "learning_rate": 9.696590553644042e-07, "loss": 0.6666, "step": 311 }, { "epoch": 0.02927927927927928, "grad_norm": 2.2630867133419708, "learning_rate": 9.727869878010636e-07, "loss": 0.6535, "step": 312 }, { "epoch": 0.029373123123123124, "grad_norm": 2.811030683426717, "learning_rate": 9.75914920237723e-07, "loss": 0.6809, "step": 313 }, { "epoch": 0.029466966966966966, "grad_norm": 1.944279352108113, "learning_rate": 9.790428526743823e-07, "loss": 0.6261, "step": 314 }, { "epoch": 0.02956081081081081, "grad_norm": 3.371136331479708, "learning_rate": 9.821707851110417e-07, "loss": 0.6495, "step": 315 }, { "epoch": 0.029654654654654656, "grad_norm": 2.471448049313944, "learning_rate": 9.85298717547701e-07, "loss": 0.6692, "step": 316 }, { "epoch": 0.029748498498498498, "grad_norm": 7.012944327527563, "learning_rate": 9.884266499843604e-07, "loss": 0.599, "step": 317 }, { "epoch": 0.029842342342342343, "grad_norm": 2.8988833184912237, "learning_rate": 9.915545824210198e-07, "loss": 0.639, "step": 318 }, { "epoch": 0.029936186186186185, "grad_norm": 12.781801890870373, "learning_rate": 9.946825148576792e-07, "loss": 0.6295, "step": 319 }, { "epoch": 0.03003003003003003, "grad_norm": 53.79524149179188, "learning_rate": 9.978104472943385e-07, "loss": 0.6596, "step": 320 }, { "epoch": 0.030123873873873875, "grad_norm": 2.5139914867348745, "learning_rate": 1.000938379730998e-06, "loss": 0.6348, "step": 321 }, { "epoch": 0.030217717717717717, "grad_norm": 3.4395799624364334, "learning_rate": 1.0040663121676573e-06, "loss": 0.6619, "step": 322 }, { "epoch": 0.030311561561561562, "grad_norm": 2.4820802643609907, "learning_rate": 1.0071942446043167e-06, "loss": 0.6664, "step": 323 }, { "epoch": 0.030405405405405407, "grad_norm": 2.5726092724830023, "learning_rate": 1.010322177040976e-06, "loss": 0.5608, "step": 324 }, { "epoch": 0.03049924924924925, "grad_norm": 2.2024657669459464, "learning_rate": 1.0134501094776354e-06, "loss": 0.6266, "step": 325 }, { "epoch": 0.030593093093093094, "grad_norm": 2.2448434353207993, "learning_rate": 1.0165780419142948e-06, "loss": 0.6983, "step": 326 }, { "epoch": 0.030686936936936936, "grad_norm": 2.6619602300820446, "learning_rate": 1.0197059743509541e-06, "loss": 0.6811, "step": 327 }, { "epoch": 0.03078078078078078, "grad_norm": 2.1241802561710896, "learning_rate": 1.0228339067876135e-06, "loss": 0.6085, "step": 328 }, { "epoch": 0.030874624624624626, "grad_norm": 2.466709308977618, "learning_rate": 1.025961839224273e-06, "loss": 0.7331, "step": 329 }, { "epoch": 0.030968468468468468, "grad_norm": 2.0275895987036354, "learning_rate": 1.029089771660932e-06, "loss": 0.5982, "step": 330 }, { "epoch": 0.031062312312312313, "grad_norm": 4.778171118974855, "learning_rate": 1.0322177040975916e-06, "loss": 0.6877, "step": 331 }, { "epoch": 0.031156156156156155, "grad_norm": 2.170949505149953, "learning_rate": 1.035345636534251e-06, "loss": 0.6201, "step": 332 }, { "epoch": 0.03125, "grad_norm": 2.0043025568218695, "learning_rate": 1.0384735689709104e-06, "loss": 0.6084, "step": 333 }, { "epoch": 0.031343843843843845, "grad_norm": 2.072986530758786, "learning_rate": 1.0416015014075695e-06, "loss": 0.6344, "step": 334 }, { "epoch": 0.03143768768768769, "grad_norm": 2.864001001737747, "learning_rate": 1.0447294338442291e-06, "loss": 0.7489, "step": 335 }, { "epoch": 0.03153153153153153, "grad_norm": 2.011767380348235, "learning_rate": 1.0478573662808885e-06, "loss": 0.6461, "step": 336 }, { "epoch": 0.031625375375375374, "grad_norm": 2.3904023844638256, "learning_rate": 1.0509852987175479e-06, "loss": 0.6981, "step": 337 }, { "epoch": 0.03171921921921922, "grad_norm": 2.048523520901469, "learning_rate": 1.054113231154207e-06, "loss": 0.6357, "step": 338 }, { "epoch": 0.031813063063063064, "grad_norm": 2.2441031559326934, "learning_rate": 1.0572411635908664e-06, "loss": 0.6316, "step": 339 }, { "epoch": 0.03190690690690691, "grad_norm": 2.162130638610654, "learning_rate": 1.060369096027526e-06, "loss": 0.674, "step": 340 }, { "epoch": 0.03200075075075075, "grad_norm": 2.943521010358512, "learning_rate": 1.0634970284641854e-06, "loss": 0.6687, "step": 341 }, { "epoch": 0.03209459459459459, "grad_norm": 2.0415200015851367, "learning_rate": 1.0666249609008445e-06, "loss": 0.6194, "step": 342 }, { "epoch": 0.03218843843843844, "grad_norm": 2.4567968107075777, "learning_rate": 1.069752893337504e-06, "loss": 0.6979, "step": 343 }, { "epoch": 0.03228228228228228, "grad_norm": 2.4050148823890556, "learning_rate": 1.0728808257741635e-06, "loss": 0.6616, "step": 344 }, { "epoch": 0.03237612612612613, "grad_norm": 2.2800978962989022, "learning_rate": 1.0760087582108229e-06, "loss": 0.6711, "step": 345 }, { "epoch": 0.03246996996996997, "grad_norm": 2.804842337581489, "learning_rate": 1.079136690647482e-06, "loss": 0.7046, "step": 346 }, { "epoch": 0.03256381381381381, "grad_norm": 2.496175011296808, "learning_rate": 1.0822646230841414e-06, "loss": 0.6864, "step": 347 }, { "epoch": 0.03265765765765766, "grad_norm": 2.0320578824118525, "learning_rate": 1.0853925555208008e-06, "loss": 0.7053, "step": 348 }, { "epoch": 0.0327515015015015, "grad_norm": 2.313308551350351, "learning_rate": 1.0885204879574603e-06, "loss": 0.6667, "step": 349 }, { "epoch": 0.03284534534534535, "grad_norm": 2.9650497188738942, "learning_rate": 1.0916484203941195e-06, "loss": 0.658, "step": 350 }, { "epoch": 0.032939189189189186, "grad_norm": 2.2565934115834896, "learning_rate": 1.0947763528307789e-06, "loss": 0.6918, "step": 351 }, { "epoch": 0.03303303303303303, "grad_norm": 2.8995069112945417, "learning_rate": 1.0979042852674382e-06, "loss": 0.6479, "step": 352 }, { "epoch": 0.033126876876876876, "grad_norm": 2.4616578731627747, "learning_rate": 1.1010322177040978e-06, "loss": 0.5919, "step": 353 }, { "epoch": 0.03322072072072072, "grad_norm": 2.038229638667791, "learning_rate": 1.104160150140757e-06, "loss": 0.5789, "step": 354 }, { "epoch": 0.03331456456456457, "grad_norm": 2.5529861267275207, "learning_rate": 1.1072880825774164e-06, "loss": 0.6243, "step": 355 }, { "epoch": 0.03340840840840841, "grad_norm": 1.9453095711830215, "learning_rate": 1.1104160150140757e-06, "loss": 0.663, "step": 356 }, { "epoch": 0.03350225225225225, "grad_norm": 2.555878910074913, "learning_rate": 1.1135439474507351e-06, "loss": 0.6354, "step": 357 }, { "epoch": 0.033596096096096095, "grad_norm": 2.300574813860463, "learning_rate": 1.1166718798873945e-06, "loss": 0.6041, "step": 358 }, { "epoch": 0.03368993993993994, "grad_norm": 2.381943474077214, "learning_rate": 1.1197998123240539e-06, "loss": 0.6975, "step": 359 }, { "epoch": 0.033783783783783786, "grad_norm": 2.2359636446331868, "learning_rate": 1.1229277447607132e-06, "loss": 0.7046, "step": 360 }, { "epoch": 0.03387762762762763, "grad_norm": 2.5858563486925017, "learning_rate": 1.1260556771973726e-06, "loss": 0.6899, "step": 361 }, { "epoch": 0.03397147147147147, "grad_norm": 2.757742500772339, "learning_rate": 1.129183609634032e-06, "loss": 0.6852, "step": 362 }, { "epoch": 0.034065315315315314, "grad_norm": 2.1271358719735503, "learning_rate": 1.1323115420706913e-06, "loss": 0.601, "step": 363 }, { "epoch": 0.03415915915915916, "grad_norm": 2.632793315360076, "learning_rate": 1.1354394745073507e-06, "loss": 0.6641, "step": 364 }, { "epoch": 0.034253003003003005, "grad_norm": 1.8777920903321694, "learning_rate": 1.13856740694401e-06, "loss": 0.6314, "step": 365 }, { "epoch": 0.03434684684684685, "grad_norm": 3.0870508689234994, "learning_rate": 1.1416953393806695e-06, "loss": 0.6258, "step": 366 }, { "epoch": 0.03444069069069069, "grad_norm": 2.8332915425300307, "learning_rate": 1.1448232718173288e-06, "loss": 0.6794, "step": 367 }, { "epoch": 0.03453453453453453, "grad_norm": 2.3801815827882793, "learning_rate": 1.1479512042539882e-06, "loss": 0.658, "step": 368 }, { "epoch": 0.03462837837837838, "grad_norm": 2.835443703546854, "learning_rate": 1.1510791366906476e-06, "loss": 0.6418, "step": 369 }, { "epoch": 0.034722222222222224, "grad_norm": 3.6316391625545696, "learning_rate": 1.154207069127307e-06, "loss": 0.693, "step": 370 }, { "epoch": 0.03481606606606607, "grad_norm": 2.454249812981114, "learning_rate": 1.1573350015639663e-06, "loss": 0.6397, "step": 371 }, { "epoch": 0.03490990990990991, "grad_norm": 2.418110939625931, "learning_rate": 1.1604629340006257e-06, "loss": 0.6742, "step": 372 }, { "epoch": 0.03500375375375375, "grad_norm": 2.324552388267983, "learning_rate": 1.163590866437285e-06, "loss": 0.6459, "step": 373 }, { "epoch": 0.0350975975975976, "grad_norm": 2.3331811353881386, "learning_rate": 1.1667187988739444e-06, "loss": 0.6659, "step": 374 }, { "epoch": 0.03519144144144144, "grad_norm": 2.249302370104681, "learning_rate": 1.1698467313106038e-06, "loss": 0.6227, "step": 375 }, { "epoch": 0.03528528528528529, "grad_norm": 2.0937671762892114, "learning_rate": 1.1729746637472632e-06, "loss": 0.6256, "step": 376 }, { "epoch": 0.035379129129129126, "grad_norm": 2.648175027188946, "learning_rate": 1.1761025961839226e-06, "loss": 0.6099, "step": 377 }, { "epoch": 0.03547297297297297, "grad_norm": 2.4208351164474378, "learning_rate": 1.179230528620582e-06, "loss": 0.6621, "step": 378 }, { "epoch": 0.03556681681681682, "grad_norm": 2.2136953636761345, "learning_rate": 1.1823584610572413e-06, "loss": 0.6628, "step": 379 }, { "epoch": 0.03566066066066066, "grad_norm": 2.2908161756941707, "learning_rate": 1.1854863934939007e-06, "loss": 0.6261, "step": 380 }, { "epoch": 0.03575450450450451, "grad_norm": 2.415802144265505, "learning_rate": 1.18861432593056e-06, "loss": 0.7165, "step": 381 }, { "epoch": 0.035848348348348345, "grad_norm": 2.272345171097273, "learning_rate": 1.1917422583672194e-06, "loss": 0.6215, "step": 382 }, { "epoch": 0.03594219219219219, "grad_norm": 2.7068563326230692, "learning_rate": 1.1948701908038788e-06, "loss": 0.6942, "step": 383 }, { "epoch": 0.036036036036036036, "grad_norm": 1.9803238884287584, "learning_rate": 1.1979981232405382e-06, "loss": 0.639, "step": 384 }, { "epoch": 0.03612987987987988, "grad_norm": 2.5932981385203058, "learning_rate": 1.2011260556771975e-06, "loss": 0.6165, "step": 385 }, { "epoch": 0.036223723723723726, "grad_norm": 1.9928414427214491, "learning_rate": 1.2042539881138567e-06, "loss": 0.6252, "step": 386 }, { "epoch": 0.036317567567567564, "grad_norm": 2.277232050263943, "learning_rate": 1.2073819205505163e-06, "loss": 0.7091, "step": 387 }, { "epoch": 0.03641141141141141, "grad_norm": 2.334123739421346, "learning_rate": 1.2105098529871757e-06, "loss": 0.6728, "step": 388 }, { "epoch": 0.036505255255255255, "grad_norm": 2.238683922999547, "learning_rate": 1.213637785423835e-06, "loss": 0.6493, "step": 389 }, { "epoch": 0.0365990990990991, "grad_norm": 2.353554446743507, "learning_rate": 1.2167657178604942e-06, "loss": 0.6527, "step": 390 }, { "epoch": 0.036692942942942945, "grad_norm": 2.823483947012371, "learning_rate": 1.2198936502971538e-06, "loss": 0.6199, "step": 391 }, { "epoch": 0.03678678678678678, "grad_norm": 3.892794960673601, "learning_rate": 1.2230215827338131e-06, "loss": 0.634, "step": 392 }, { "epoch": 0.03688063063063063, "grad_norm": 3.2379896645625115, "learning_rate": 1.2261495151704725e-06, "loss": 0.605, "step": 393 }, { "epoch": 0.036974474474474474, "grad_norm": 2.5681327935666856, "learning_rate": 1.2292774476071317e-06, "loss": 0.6909, "step": 394 }, { "epoch": 0.03706831831831832, "grad_norm": 2.401633535366461, "learning_rate": 1.232405380043791e-06, "loss": 0.6265, "step": 395 }, { "epoch": 0.037162162162162164, "grad_norm": 2.9039248768765296, "learning_rate": 1.2355333124804506e-06, "loss": 0.6335, "step": 396 }, { "epoch": 0.03725600600600601, "grad_norm": 2.6340254842651754, "learning_rate": 1.23866124491711e-06, "loss": 0.7178, "step": 397 }, { "epoch": 0.03734984984984985, "grad_norm": 2.1211448362475402, "learning_rate": 1.2417891773537692e-06, "loss": 0.5545, "step": 398 }, { "epoch": 0.03744369369369369, "grad_norm": 2.635012420278949, "learning_rate": 1.2449171097904285e-06, "loss": 0.6691, "step": 399 }, { "epoch": 0.03753753753753754, "grad_norm": 2.365559810112873, "learning_rate": 1.2480450422270881e-06, "loss": 0.7014, "step": 400 }, { "epoch": 0.03763138138138138, "grad_norm": 2.563537877413815, "learning_rate": 1.2511729746637475e-06, "loss": 0.6589, "step": 401 }, { "epoch": 0.03772522522522523, "grad_norm": 2.3539195763499112, "learning_rate": 1.2543009071004067e-06, "loss": 0.6589, "step": 402 }, { "epoch": 0.03781906906906907, "grad_norm": 2.204790539850708, "learning_rate": 1.2574288395370662e-06, "loss": 0.6125, "step": 403 }, { "epoch": 0.03791291291291291, "grad_norm": 2.248722766811529, "learning_rate": 1.2605567719737254e-06, "loss": 0.6833, "step": 404 }, { "epoch": 0.03800675675675676, "grad_norm": 2.370544236948174, "learning_rate": 1.2636847044103848e-06, "loss": 0.6101, "step": 405 }, { "epoch": 0.0381006006006006, "grad_norm": 2.8303002419368366, "learning_rate": 1.2668126368470444e-06, "loss": 0.6322, "step": 406 }, { "epoch": 0.03819444444444445, "grad_norm": 2.9006803712352807, "learning_rate": 1.2699405692837035e-06, "loss": 0.6262, "step": 407 }, { "epoch": 0.038288288288288286, "grad_norm": 2.3742409168872753, "learning_rate": 1.273068501720363e-06, "loss": 0.6084, "step": 408 }, { "epoch": 0.03838213213213213, "grad_norm": 2.159796042879535, "learning_rate": 1.2761964341570225e-06, "loss": 0.6618, "step": 409 }, { "epoch": 0.038475975975975976, "grad_norm": 2.3108486565138904, "learning_rate": 1.2793243665936816e-06, "loss": 0.6454, "step": 410 }, { "epoch": 0.03856981981981982, "grad_norm": 2.75131997060012, "learning_rate": 1.2824522990303412e-06, "loss": 0.6658, "step": 411 }, { "epoch": 0.03866366366366367, "grad_norm": 2.0800489222821974, "learning_rate": 1.2855802314670004e-06, "loss": 0.7239, "step": 412 }, { "epoch": 0.038757507507507505, "grad_norm": 2.9248583011086584, "learning_rate": 1.2887081639036598e-06, "loss": 0.6488, "step": 413 }, { "epoch": 0.03885135135135135, "grad_norm": 2.0775913801621764, "learning_rate": 1.2918360963403193e-06, "loss": 0.5917, "step": 414 }, { "epoch": 0.038945195195195195, "grad_norm": 2.5357323605021764, "learning_rate": 1.2949640287769785e-06, "loss": 0.6931, "step": 415 }, { "epoch": 0.03903903903903904, "grad_norm": 2.117754476931723, "learning_rate": 1.2980919612136379e-06, "loss": 0.5994, "step": 416 }, { "epoch": 0.039132882882882886, "grad_norm": 2.433086800236419, "learning_rate": 1.3012198936502972e-06, "loss": 0.696, "step": 417 }, { "epoch": 0.039226726726726724, "grad_norm": 2.1380396954449763, "learning_rate": 1.3043478260869566e-06, "loss": 0.593, "step": 418 }, { "epoch": 0.03932057057057057, "grad_norm": 2.9033039315968465, "learning_rate": 1.3074757585236162e-06, "loss": 0.6492, "step": 419 }, { "epoch": 0.039414414414414414, "grad_norm": 2.6025397005589057, "learning_rate": 1.3106036909602754e-06, "loss": 0.6531, "step": 420 }, { "epoch": 0.03950825825825826, "grad_norm": 3.0376108660873187, "learning_rate": 1.3137316233969347e-06, "loss": 0.6292, "step": 421 }, { "epoch": 0.039602102102102105, "grad_norm": 2.83839365193535, "learning_rate": 1.3168595558335941e-06, "loss": 0.633, "step": 422 }, { "epoch": 0.03969594594594594, "grad_norm": 2.140625902214509, "learning_rate": 1.3199874882702535e-06, "loss": 0.5826, "step": 423 }, { "epoch": 0.03978978978978979, "grad_norm": 2.4702599665254947, "learning_rate": 1.3231154207069126e-06, "loss": 0.6642, "step": 424 }, { "epoch": 0.03988363363363363, "grad_norm": 2.467166173728345, "learning_rate": 1.3262433531435722e-06, "loss": 0.6076, "step": 425 }, { "epoch": 0.03997747747747748, "grad_norm": 6.427278154205219, "learning_rate": 1.3293712855802316e-06, "loss": 0.6901, "step": 426 }, { "epoch": 0.040071321321321324, "grad_norm": 3.1025918849965866, "learning_rate": 1.3324992180168912e-06, "loss": 0.6606, "step": 427 }, { "epoch": 0.04016516516516516, "grad_norm": 3.484820824339866, "learning_rate": 1.3356271504535503e-06, "loss": 0.7303, "step": 428 }, { "epoch": 0.04025900900900901, "grad_norm": 3.216815581232274, "learning_rate": 1.3387550828902097e-06, "loss": 0.5919, "step": 429 }, { "epoch": 0.04035285285285285, "grad_norm": 2.5063294939181997, "learning_rate": 1.341883015326869e-06, "loss": 0.6526, "step": 430 }, { "epoch": 0.0404466966966967, "grad_norm": 2.777424127808545, "learning_rate": 1.3450109477635285e-06, "loss": 0.6298, "step": 431 }, { "epoch": 0.04054054054054054, "grad_norm": 2.9216081660112656, "learning_rate": 1.3481388802001876e-06, "loss": 0.6543, "step": 432 }, { "epoch": 0.04063438438438438, "grad_norm": 2.6028467745783934, "learning_rate": 1.3512668126368472e-06, "loss": 0.6247, "step": 433 }, { "epoch": 0.040728228228228226, "grad_norm": 2.1807844456051004, "learning_rate": 1.3543947450735066e-06, "loss": 0.6307, "step": 434 }, { "epoch": 0.04082207207207207, "grad_norm": 2.506459201190973, "learning_rate": 1.3575226775101657e-06, "loss": 0.6709, "step": 435 }, { "epoch": 0.04091591591591592, "grad_norm": 2.420389411414867, "learning_rate": 1.3606506099468253e-06, "loss": 0.6001, "step": 436 }, { "epoch": 0.04100975975975976, "grad_norm": 2.015342276203169, "learning_rate": 1.3637785423834845e-06, "loss": 0.6147, "step": 437 }, { "epoch": 0.04110360360360361, "grad_norm": 2.1288378932641, "learning_rate": 1.366906474820144e-06, "loss": 0.675, "step": 438 }, { "epoch": 0.041197447447447445, "grad_norm": 2.4587756036992463, "learning_rate": 1.3700344072568034e-06, "loss": 0.5915, "step": 439 }, { "epoch": 0.04129129129129129, "grad_norm": 2.890801349168186, "learning_rate": 1.3731623396934626e-06, "loss": 0.6058, "step": 440 }, { "epoch": 0.041385135135135136, "grad_norm": 2.5107221862078184, "learning_rate": 1.3762902721301222e-06, "loss": 0.6832, "step": 441 }, { "epoch": 0.04147897897897898, "grad_norm": 2.2618832705989798, "learning_rate": 1.3794182045667813e-06, "loss": 0.6574, "step": 442 }, { "epoch": 0.041572822822822826, "grad_norm": 2.6665117298479637, "learning_rate": 1.3825461370034407e-06, "loss": 0.6711, "step": 443 }, { "epoch": 0.041666666666666664, "grad_norm": 2.551494899255517, "learning_rate": 1.3856740694401003e-06, "loss": 0.5662, "step": 444 }, { "epoch": 0.04176051051051051, "grad_norm": 2.263560159301054, "learning_rate": 1.3888020018767595e-06, "loss": 0.6513, "step": 445 }, { "epoch": 0.041854354354354355, "grad_norm": 2.718879238708351, "learning_rate": 1.391929934313419e-06, "loss": 0.6741, "step": 446 }, { "epoch": 0.0419481981981982, "grad_norm": 4.0629483566267925, "learning_rate": 1.3950578667500784e-06, "loss": 0.6328, "step": 447 }, { "epoch": 0.042042042042042045, "grad_norm": 3.674597016739851, "learning_rate": 1.3981857991867376e-06, "loss": 0.6177, "step": 448 }, { "epoch": 0.04213588588588588, "grad_norm": 2.2776082585970574, "learning_rate": 1.4013137316233972e-06, "loss": 0.6816, "step": 449 }, { "epoch": 0.04222972972972973, "grad_norm": 3.5387485513024823, "learning_rate": 1.4044416640600563e-06, "loss": 0.6389, "step": 450 }, { "epoch": 0.042323573573573574, "grad_norm": 3.297860234211898, "learning_rate": 1.4075695964967157e-06, "loss": 0.5856, "step": 451 }, { "epoch": 0.04241741741741742, "grad_norm": 2.5435209125220073, "learning_rate": 1.4106975289333753e-06, "loss": 0.697, "step": 452 }, { "epoch": 0.042511261261261264, "grad_norm": 5.7225243469135005, "learning_rate": 1.4138254613700344e-06, "loss": 0.666, "step": 453 }, { "epoch": 0.0426051051051051, "grad_norm": 4.4583843790933795, "learning_rate": 1.416953393806694e-06, "loss": 0.6836, "step": 454 }, { "epoch": 0.04269894894894895, "grad_norm": 4.29993022911029, "learning_rate": 1.4200813262433532e-06, "loss": 0.6158, "step": 455 }, { "epoch": 0.04279279279279279, "grad_norm": 2.471129413377005, "learning_rate": 1.4232092586800126e-06, "loss": 0.6612, "step": 456 }, { "epoch": 0.04288663663663664, "grad_norm": 2.069480680980259, "learning_rate": 1.4263371911166721e-06, "loss": 0.5761, "step": 457 }, { "epoch": 0.04298048048048048, "grad_norm": 2.36180009755541, "learning_rate": 1.4294651235533313e-06, "loss": 0.609, "step": 458 }, { "epoch": 0.04307432432432432, "grad_norm": 3.4119640598503786, "learning_rate": 1.4325930559899907e-06, "loss": 0.5919, "step": 459 }, { "epoch": 0.04316816816816817, "grad_norm": 2.2586679274804795, "learning_rate": 1.43572098842665e-06, "loss": 0.5749, "step": 460 }, { "epoch": 0.04326201201201201, "grad_norm": 2.338765537084143, "learning_rate": 1.4388489208633094e-06, "loss": 0.6652, "step": 461 }, { "epoch": 0.04335585585585586, "grad_norm": 2.270634543628297, "learning_rate": 1.441976853299969e-06, "loss": 0.5937, "step": 462 }, { "epoch": 0.0434496996996997, "grad_norm": 2.4435616775234683, "learning_rate": 1.4451047857366282e-06, "loss": 0.5867, "step": 463 }, { "epoch": 0.04354354354354354, "grad_norm": 2.7849991218807273, "learning_rate": 1.4482327181732875e-06, "loss": 0.6179, "step": 464 }, { "epoch": 0.043637387387387386, "grad_norm": 2.1736559507209225, "learning_rate": 1.4513606506099471e-06, "loss": 0.6709, "step": 465 }, { "epoch": 0.04373123123123123, "grad_norm": 2.610184508691879, "learning_rate": 1.4544885830466063e-06, "loss": 0.5836, "step": 466 }, { "epoch": 0.043825075075075076, "grad_norm": 7.290313947242542, "learning_rate": 1.4576165154832654e-06, "loss": 0.6109, "step": 467 }, { "epoch": 0.04391891891891892, "grad_norm": 2.182836033255348, "learning_rate": 1.460744447919925e-06, "loss": 0.6181, "step": 468 }, { "epoch": 0.04401276276276276, "grad_norm": 2.9936888112277282, "learning_rate": 1.4638723803565844e-06, "loss": 0.6567, "step": 469 }, { "epoch": 0.044106606606606605, "grad_norm": 2.5001752727980437, "learning_rate": 1.467000312793244e-06, "loss": 0.6323, "step": 470 }, { "epoch": 0.04420045045045045, "grad_norm": 1.8125877901865026, "learning_rate": 1.4701282452299031e-06, "loss": 0.6763, "step": 471 }, { "epoch": 0.044294294294294295, "grad_norm": 2.040449934443249, "learning_rate": 1.4732561776665625e-06, "loss": 0.6244, "step": 472 }, { "epoch": 0.04438813813813814, "grad_norm": 4.350059191343659, "learning_rate": 1.4763841101032219e-06, "loss": 0.5437, "step": 473 }, { "epoch": 0.04448198198198198, "grad_norm": 2.6531213339555686, "learning_rate": 1.4795120425398813e-06, "loss": 0.6408, "step": 474 }, { "epoch": 0.044575825825825824, "grad_norm": 2.4832421219509384, "learning_rate": 1.4826399749765404e-06, "loss": 0.584, "step": 475 }, { "epoch": 0.04466966966966967, "grad_norm": 3.042643104965048, "learning_rate": 1.4857679074132e-06, "loss": 0.5872, "step": 476 }, { "epoch": 0.044763513513513514, "grad_norm": 2.4585882761399858, "learning_rate": 1.4888958398498594e-06, "loss": 0.5747, "step": 477 }, { "epoch": 0.04485735735735736, "grad_norm": 2.2952787034622526, "learning_rate": 1.4920237722865188e-06, "loss": 0.6097, "step": 478 }, { "epoch": 0.044951201201201205, "grad_norm": 2.6625787756212254, "learning_rate": 1.4951517047231781e-06, "loss": 0.6066, "step": 479 }, { "epoch": 0.04504504504504504, "grad_norm": 2.239967357154761, "learning_rate": 1.4982796371598373e-06, "loss": 0.6794, "step": 480 }, { "epoch": 0.04513888888888889, "grad_norm": 2.7155427374370658, "learning_rate": 1.5014075695964969e-06, "loss": 0.5735, "step": 481 }, { "epoch": 0.04523273273273273, "grad_norm": 2.1326132335824655, "learning_rate": 1.5045355020331562e-06, "loss": 0.6085, "step": 482 }, { "epoch": 0.04532657657657658, "grad_norm": 2.1206582729017933, "learning_rate": 1.5076634344698154e-06, "loss": 0.5917, "step": 483 }, { "epoch": 0.045420420420420424, "grad_norm": 2.520618058853737, "learning_rate": 1.510791366906475e-06, "loss": 0.6285, "step": 484 }, { "epoch": 0.04551426426426426, "grad_norm": 2.21285373133644, "learning_rate": 1.5139192993431342e-06, "loss": 0.5599, "step": 485 }, { "epoch": 0.04560810810810811, "grad_norm": 1.9887716704710636, "learning_rate": 1.5170472317797937e-06, "loss": 0.559, "step": 486 }, { "epoch": 0.04570195195195195, "grad_norm": 2.5173995289721676, "learning_rate": 1.5201751642164531e-06, "loss": 0.5979, "step": 487 }, { "epoch": 0.0457957957957958, "grad_norm": 2.989477718303918, "learning_rate": 1.5233030966531123e-06, "loss": 0.6859, "step": 488 }, { "epoch": 0.04588963963963964, "grad_norm": 2.2744459757240882, "learning_rate": 1.5264310290897719e-06, "loss": 0.5854, "step": 489 }, { "epoch": 0.04598348348348348, "grad_norm": 2.1590120301114344, "learning_rate": 1.5295589615264312e-06, "loss": 0.6246, "step": 490 }, { "epoch": 0.046077327327327326, "grad_norm": 2.3632527435853383, "learning_rate": 1.5326868939630904e-06, "loss": 0.6599, "step": 491 }, { "epoch": 0.04617117117117117, "grad_norm": 2.6255459682914593, "learning_rate": 1.53581482639975e-06, "loss": 0.6373, "step": 492 }, { "epoch": 0.04626501501501502, "grad_norm": 2.440745619669431, "learning_rate": 1.5389427588364091e-06, "loss": 0.618, "step": 493 }, { "epoch": 0.04635885885885886, "grad_norm": 2.576186583855961, "learning_rate": 1.5420706912730687e-06, "loss": 0.6543, "step": 494 }, { "epoch": 0.0464527027027027, "grad_norm": 2.4654627269557032, "learning_rate": 1.545198623709728e-06, "loss": 0.5904, "step": 495 }, { "epoch": 0.046546546546546545, "grad_norm": 2.1207417551774044, "learning_rate": 1.5483265561463872e-06, "loss": 0.5171, "step": 496 }, { "epoch": 0.04664039039039039, "grad_norm": 5.994887583610954, "learning_rate": 1.5514544885830468e-06, "loss": 0.5753, "step": 497 }, { "epoch": 0.046734234234234236, "grad_norm": 2.393256924723006, "learning_rate": 1.554582421019706e-06, "loss": 0.5083, "step": 498 }, { "epoch": 0.04682807807807808, "grad_norm": 2.3379323302112036, "learning_rate": 1.5577103534563654e-06, "loss": 0.6216, "step": 499 }, { "epoch": 0.04692192192192192, "grad_norm": 2.2708354521393277, "learning_rate": 1.560838285893025e-06, "loss": 0.6623, "step": 500 }, { "epoch": 0.047015765765765764, "grad_norm": 2.4712126926354694, "learning_rate": 1.5639662183296841e-06, "loss": 0.6156, "step": 501 }, { "epoch": 0.04710960960960961, "grad_norm": 2.3381551525815323, "learning_rate": 1.5670941507663437e-06, "loss": 0.6193, "step": 502 }, { "epoch": 0.047203453453453455, "grad_norm": 1.9288702194283942, "learning_rate": 1.5702220832030029e-06, "loss": 0.6259, "step": 503 }, { "epoch": 0.0472972972972973, "grad_norm": 2.570896246881711, "learning_rate": 1.5733500156396622e-06, "loss": 0.5622, "step": 504 }, { "epoch": 0.04739114114114114, "grad_norm": 2.3183828366932873, "learning_rate": 1.5764779480763218e-06, "loss": 0.6313, "step": 505 }, { "epoch": 0.04748498498498498, "grad_norm": 3.4438073984039086, "learning_rate": 1.579605880512981e-06, "loss": 0.6185, "step": 506 }, { "epoch": 0.04757882882882883, "grad_norm": 2.149760806823914, "learning_rate": 1.5827338129496403e-06, "loss": 0.6627, "step": 507 }, { "epoch": 0.047672672672672674, "grad_norm": 2.585864439512029, "learning_rate": 1.5858617453863e-06, "loss": 0.6283, "step": 508 }, { "epoch": 0.04776651651651652, "grad_norm": 13.805749993152233, "learning_rate": 1.588989677822959e-06, "loss": 0.5779, "step": 509 }, { "epoch": 0.04786036036036036, "grad_norm": 2.4568981851976655, "learning_rate": 1.5921176102596187e-06, "loss": 0.6705, "step": 510 }, { "epoch": 0.0479542042042042, "grad_norm": 2.404319081037833, "learning_rate": 1.5952455426962778e-06, "loss": 0.6121, "step": 511 }, { "epoch": 0.04804804804804805, "grad_norm": 2.2219688689821684, "learning_rate": 1.5983734751329372e-06, "loss": 0.5673, "step": 512 }, { "epoch": 0.04814189189189189, "grad_norm": 2.1707353222631194, "learning_rate": 1.6015014075695968e-06, "loss": 0.5833, "step": 513 }, { "epoch": 0.04823573573573574, "grad_norm": 1.9271333858043092, "learning_rate": 1.604629340006256e-06, "loss": 0.5901, "step": 514 }, { "epoch": 0.048329579579579576, "grad_norm": 2.140155290375549, "learning_rate": 1.6077572724429153e-06, "loss": 0.595, "step": 515 }, { "epoch": 0.04842342342342342, "grad_norm": 2.385933228702192, "learning_rate": 1.6108852048795747e-06, "loss": 0.61, "step": 516 }, { "epoch": 0.04851726726726727, "grad_norm": 2.1993553589582926, "learning_rate": 1.614013137316234e-06, "loss": 0.5848, "step": 517 }, { "epoch": 0.04861111111111111, "grad_norm": 1.9268682207663699, "learning_rate": 1.6171410697528937e-06, "loss": 0.6396, "step": 518 }, { "epoch": 0.04870495495495496, "grad_norm": 2.273640550424007, "learning_rate": 1.6202690021895528e-06, "loss": 0.6092, "step": 519 }, { "epoch": 0.048798798798798795, "grad_norm": 2.88167006592645, "learning_rate": 1.6233969346262122e-06, "loss": 0.6426, "step": 520 }, { "epoch": 0.04889264264264264, "grad_norm": 4.141288265859222, "learning_rate": 1.6265248670628716e-06, "loss": 0.6159, "step": 521 }, { "epoch": 0.048986486486486486, "grad_norm": 3.213180392803827, "learning_rate": 1.629652799499531e-06, "loss": 0.6018, "step": 522 }, { "epoch": 0.04908033033033033, "grad_norm": 2.8264103075292346, "learning_rate": 1.63278073193619e-06, "loss": 0.5782, "step": 523 }, { "epoch": 0.049174174174174176, "grad_norm": 2.438788845350679, "learning_rate": 1.6359086643728497e-06, "loss": 0.6962, "step": 524 }, { "epoch": 0.04926801801801802, "grad_norm": 2.767873278492073, "learning_rate": 1.639036596809509e-06, "loss": 0.5597, "step": 525 }, { "epoch": 0.04936186186186186, "grad_norm": 4.960845537641869, "learning_rate": 1.6421645292461686e-06, "loss": 0.5988, "step": 526 }, { "epoch": 0.049455705705705705, "grad_norm": 1.9976839809921325, "learning_rate": 1.6452924616828278e-06, "loss": 0.5861, "step": 527 }, { "epoch": 0.04954954954954955, "grad_norm": 2.083855638175149, "learning_rate": 1.6484203941194872e-06, "loss": 0.5796, "step": 528 }, { "epoch": 0.049643393393393395, "grad_norm": 2.4471640906108965, "learning_rate": 1.6515483265561465e-06, "loss": 0.6109, "step": 529 }, { "epoch": 0.04973723723723724, "grad_norm": 2.4169148791571216, "learning_rate": 1.654676258992806e-06, "loss": 0.5763, "step": 530 }, { "epoch": 0.04983108108108108, "grad_norm": 2.8384460350831855, "learning_rate": 1.657804191429465e-06, "loss": 0.5918, "step": 531 }, { "epoch": 0.049924924924924924, "grad_norm": 3.1572711143614103, "learning_rate": 1.6609321238661247e-06, "loss": 0.6726, "step": 532 }, { "epoch": 0.05001876876876877, "grad_norm": 2.154999100101601, "learning_rate": 1.664060056302784e-06, "loss": 0.6219, "step": 533 }, { "epoch": 0.050112612612612614, "grad_norm": 2.5065485261736433, "learning_rate": 1.6671879887394434e-06, "loss": 0.5932, "step": 534 }, { "epoch": 0.05020645645645646, "grad_norm": 2.570939330651501, "learning_rate": 1.6703159211761028e-06, "loss": 0.6163, "step": 535 }, { "epoch": 0.0503003003003003, "grad_norm": 2.4594495783546004, "learning_rate": 1.673443853612762e-06, "loss": 0.6426, "step": 536 }, { "epoch": 0.05039414414414414, "grad_norm": 2.649617664868619, "learning_rate": 1.6765717860494215e-06, "loss": 0.6851, "step": 537 }, { "epoch": 0.05048798798798799, "grad_norm": 1.9115375714286174, "learning_rate": 1.6796997184860809e-06, "loss": 0.5986, "step": 538 }, { "epoch": 0.05058183183183183, "grad_norm": 2.303787149534606, "learning_rate": 1.68282765092274e-06, "loss": 0.5968, "step": 539 }, { "epoch": 0.05067567567567568, "grad_norm": 2.059332842554781, "learning_rate": 1.6859555833593996e-06, "loss": 0.5385, "step": 540 }, { "epoch": 0.05076951951951952, "grad_norm": 2.7784842695656735, "learning_rate": 1.6890835157960588e-06, "loss": 0.5978, "step": 541 }, { "epoch": 0.05086336336336336, "grad_norm": 2.293592935878827, "learning_rate": 1.6922114482327184e-06, "loss": 0.6806, "step": 542 }, { "epoch": 0.05095720720720721, "grad_norm": 3.1234156886732003, "learning_rate": 1.6953393806693778e-06, "loss": 0.5816, "step": 543 }, { "epoch": 0.05105105105105105, "grad_norm": 2.432268881525819, "learning_rate": 1.698467313106037e-06, "loss": 0.5808, "step": 544 }, { "epoch": 0.0511448948948949, "grad_norm": 2.6229735929326186, "learning_rate": 1.7015952455426965e-06, "loss": 0.6624, "step": 545 }, { "epoch": 0.051238738738738736, "grad_norm": 2.493586005062035, "learning_rate": 1.7047231779793559e-06, "loss": 0.5996, "step": 546 }, { "epoch": 0.05133258258258258, "grad_norm": 2.776739175926621, "learning_rate": 1.707851110416015e-06, "loss": 0.6157, "step": 547 }, { "epoch": 0.051426426426426426, "grad_norm": 6.8297865187540845, "learning_rate": 1.7109790428526746e-06, "loss": 0.5963, "step": 548 }, { "epoch": 0.05152027027027027, "grad_norm": 2.048923982407405, "learning_rate": 1.7141069752893338e-06, "loss": 0.637, "step": 549 }, { "epoch": 0.05161411411411412, "grad_norm": 1.9633316658564874, "learning_rate": 1.7172349077259934e-06, "loss": 0.5628, "step": 550 }, { "epoch": 0.051707957957957955, "grad_norm": 2.451991913718273, "learning_rate": 1.7203628401626527e-06, "loss": 0.6638, "step": 551 }, { "epoch": 0.0518018018018018, "grad_norm": 3.123851955277921, "learning_rate": 1.7234907725993119e-06, "loss": 0.5845, "step": 552 }, { "epoch": 0.051895645645645645, "grad_norm": 2.4439395120396847, "learning_rate": 1.7266187050359715e-06, "loss": 0.6198, "step": 553 }, { "epoch": 0.05198948948948949, "grad_norm": 2.1522284248184453, "learning_rate": 1.7297466374726306e-06, "loss": 0.5795, "step": 554 }, { "epoch": 0.052083333333333336, "grad_norm": 2.0769467013861167, "learning_rate": 1.73287456990929e-06, "loss": 0.6254, "step": 555 }, { "epoch": 0.052177177177177174, "grad_norm": 2.714604105970657, "learning_rate": 1.7360025023459496e-06, "loss": 0.6503, "step": 556 }, { "epoch": 0.05227102102102102, "grad_norm": 2.322023418389926, "learning_rate": 1.7391304347826088e-06, "loss": 0.6014, "step": 557 }, { "epoch": 0.052364864864864864, "grad_norm": 2.3537720997028733, "learning_rate": 1.7422583672192683e-06, "loss": 0.589, "step": 558 }, { "epoch": 0.05245870870870871, "grad_norm": 2.491365465926005, "learning_rate": 1.7453862996559275e-06, "loss": 0.6065, "step": 559 }, { "epoch": 0.052552552552552555, "grad_norm": 2.2346374422443573, "learning_rate": 1.7485142320925869e-06, "loss": 0.6852, "step": 560 }, { "epoch": 0.05264639639639639, "grad_norm": 2.7458281456570335, "learning_rate": 1.7516421645292465e-06, "loss": 0.5782, "step": 561 }, { "epoch": 0.05274024024024024, "grad_norm": 2.0956126074375367, "learning_rate": 1.7547700969659056e-06, "loss": 0.5819, "step": 562 }, { "epoch": 0.05283408408408408, "grad_norm": 3.3731223952598537, "learning_rate": 1.757898029402565e-06, "loss": 0.6647, "step": 563 }, { "epoch": 0.05292792792792793, "grad_norm": 2.6940467783040796, "learning_rate": 1.7610259618392246e-06, "loss": 0.6062, "step": 564 }, { "epoch": 0.053021771771771774, "grad_norm": 2.712727483182917, "learning_rate": 1.7641538942758837e-06, "loss": 0.6188, "step": 565 }, { "epoch": 0.05311561561561562, "grad_norm": 2.6049174953829923, "learning_rate": 1.7672818267125433e-06, "loss": 0.5565, "step": 566 }, { "epoch": 0.05320945945945946, "grad_norm": 2.3444116806030983, "learning_rate": 1.7704097591492025e-06, "loss": 0.6596, "step": 567 }, { "epoch": 0.0533033033033033, "grad_norm": 3.9364450264112656, "learning_rate": 1.7735376915858619e-06, "loss": 0.6222, "step": 568 }, { "epoch": 0.05339714714714715, "grad_norm": 2.4944839757761237, "learning_rate": 1.7766656240225214e-06, "loss": 0.6366, "step": 569 }, { "epoch": 0.05349099099099099, "grad_norm": 4.6818196862466985, "learning_rate": 1.7797935564591806e-06, "loss": 0.636, "step": 570 }, { "epoch": 0.05358483483483484, "grad_norm": 2.3394140351194648, "learning_rate": 1.78292148889584e-06, "loss": 0.6377, "step": 571 }, { "epoch": 0.053678678678678676, "grad_norm": 2.7387004958129557, "learning_rate": 1.7860494213324993e-06, "loss": 0.598, "step": 572 }, { "epoch": 0.05377252252252252, "grad_norm": 2.566143717943941, "learning_rate": 1.7891773537691587e-06, "loss": 0.6088, "step": 573 }, { "epoch": 0.05386636636636637, "grad_norm": 3.9836252820184477, "learning_rate": 1.7923052862058183e-06, "loss": 0.5637, "step": 574 }, { "epoch": 0.05396021021021021, "grad_norm": 3.3171734142802194, "learning_rate": 1.7954332186424775e-06, "loss": 0.6122, "step": 575 }, { "epoch": 0.05405405405405406, "grad_norm": 2.3451034820368952, "learning_rate": 1.7985611510791368e-06, "loss": 0.6008, "step": 576 }, { "epoch": 0.054147897897897895, "grad_norm": 2.2764672911360564, "learning_rate": 1.8016890835157962e-06, "loss": 0.5603, "step": 577 }, { "epoch": 0.05424174174174174, "grad_norm": 2.2508695889707737, "learning_rate": 1.8048170159524556e-06, "loss": 0.5961, "step": 578 }, { "epoch": 0.054335585585585586, "grad_norm": 2.9427847183263975, "learning_rate": 1.8079449483891147e-06, "loss": 0.579, "step": 579 }, { "epoch": 0.05442942942942943, "grad_norm": 2.735916094050949, "learning_rate": 1.8110728808257743e-06, "loss": 0.6104, "step": 580 }, { "epoch": 0.054523273273273276, "grad_norm": 3.7916878358841455, "learning_rate": 1.8142008132624337e-06, "loss": 0.5867, "step": 581 }, { "epoch": 0.054617117117117114, "grad_norm": 2.2790222848287462, "learning_rate": 1.8173287456990933e-06, "loss": 0.6403, "step": 582 }, { "epoch": 0.05471096096096096, "grad_norm": 2.2234652422967724, "learning_rate": 1.8204566781357524e-06, "loss": 0.5426, "step": 583 }, { "epoch": 0.054804804804804805, "grad_norm": 2.104195040874136, "learning_rate": 1.8235846105724118e-06, "loss": 0.6334, "step": 584 }, { "epoch": 0.05489864864864865, "grad_norm": 2.1968323001778134, "learning_rate": 1.8267125430090712e-06, "loss": 0.5983, "step": 585 }, { "epoch": 0.054992492492492495, "grad_norm": 2.266247150012719, "learning_rate": 1.8298404754457306e-06, "loss": 0.5866, "step": 586 }, { "epoch": 0.05508633633633633, "grad_norm": 2.66669448016517, "learning_rate": 1.8329684078823897e-06, "loss": 0.6487, "step": 587 }, { "epoch": 0.05518018018018018, "grad_norm": 2.0916759135214775, "learning_rate": 1.8360963403190493e-06, "loss": 0.5468, "step": 588 }, { "epoch": 0.055274024024024024, "grad_norm": 2.0718058732162477, "learning_rate": 1.8392242727557087e-06, "loss": 0.6726, "step": 589 }, { "epoch": 0.05536786786786787, "grad_norm": 3.081590224762539, "learning_rate": 1.842352205192368e-06, "loss": 0.6349, "step": 590 }, { "epoch": 0.055461711711711714, "grad_norm": 2.743799472910774, "learning_rate": 1.8454801376290274e-06, "loss": 0.6201, "step": 591 }, { "epoch": 0.05555555555555555, "grad_norm": 2.651898809199542, "learning_rate": 1.8486080700656866e-06, "loss": 0.5947, "step": 592 }, { "epoch": 0.0556493993993994, "grad_norm": 2.452090601190199, "learning_rate": 1.8517360025023462e-06, "loss": 0.6625, "step": 593 }, { "epoch": 0.05574324324324324, "grad_norm": 2.5617101150666106, "learning_rate": 1.8548639349390055e-06, "loss": 0.6135, "step": 594 }, { "epoch": 0.05583708708708709, "grad_norm": 7.1990613249004065, "learning_rate": 1.8579918673756647e-06, "loss": 0.6152, "step": 595 }, { "epoch": 0.05593093093093093, "grad_norm": 2.2719220339304713, "learning_rate": 1.8611197998123243e-06, "loss": 0.6132, "step": 596 }, { "epoch": 0.05602477477477477, "grad_norm": 2.548186219347169, "learning_rate": 1.8642477322489834e-06, "loss": 0.551, "step": 597 }, { "epoch": 0.05611861861861862, "grad_norm": 2.0862811775540653, "learning_rate": 1.867375664685643e-06, "loss": 0.667, "step": 598 }, { "epoch": 0.05621246246246246, "grad_norm": 3.267764801486778, "learning_rate": 1.8705035971223024e-06, "loss": 0.6046, "step": 599 }, { "epoch": 0.05630630630630631, "grad_norm": 2.373270850363496, "learning_rate": 1.8736315295589616e-06, "loss": 0.6135, "step": 600 }, { "epoch": 0.05640015015015015, "grad_norm": 5.287449930004447, "learning_rate": 1.8767594619956211e-06, "loss": 0.6479, "step": 601 }, { "epoch": 0.05649399399399399, "grad_norm": 2.0883001264491408, "learning_rate": 1.8798873944322805e-06, "loss": 0.5303, "step": 602 }, { "epoch": 0.056587837837837836, "grad_norm": 2.591744030936817, "learning_rate": 1.8830153268689397e-06, "loss": 0.5471, "step": 603 }, { "epoch": 0.05668168168168168, "grad_norm": 2.082424358949169, "learning_rate": 1.8861432593055993e-06, "loss": 0.5796, "step": 604 }, { "epoch": 0.056775525525525526, "grad_norm": 2.7806056004850666, "learning_rate": 1.8892711917422584e-06, "loss": 0.5839, "step": 605 }, { "epoch": 0.05686936936936937, "grad_norm": 2.5559439682276084, "learning_rate": 1.892399124178918e-06, "loss": 0.6232, "step": 606 }, { "epoch": 0.05696321321321322, "grad_norm": 2.075625727118021, "learning_rate": 1.8955270566155774e-06, "loss": 0.5729, "step": 607 }, { "epoch": 0.057057057057057055, "grad_norm": 2.2337381219321837, "learning_rate": 1.8986549890522365e-06, "loss": 0.6042, "step": 608 }, { "epoch": 0.0571509009009009, "grad_norm": 4.427937312778197, "learning_rate": 1.9017829214888961e-06, "loss": 0.6671, "step": 609 }, { "epoch": 0.057244744744744745, "grad_norm": 2.5738469405141156, "learning_rate": 1.9049108539255553e-06, "loss": 0.5039, "step": 610 }, { "epoch": 0.05733858858858859, "grad_norm": 2.0064007202410643, "learning_rate": 1.9080387863622147e-06, "loss": 0.6336, "step": 611 }, { "epoch": 0.057432432432432436, "grad_norm": 2.328254961354987, "learning_rate": 1.9111667187988742e-06, "loss": 0.6203, "step": 612 }, { "epoch": 0.057526276276276274, "grad_norm": 2.1023909648140786, "learning_rate": 1.9142946512355334e-06, "loss": 0.635, "step": 613 }, { "epoch": 0.05762012012012012, "grad_norm": 2.4618208851871457, "learning_rate": 1.917422583672193e-06, "loss": 0.6134, "step": 614 }, { "epoch": 0.057713963963963964, "grad_norm": 2.098827673686888, "learning_rate": 1.920550516108852e-06, "loss": 0.584, "step": 615 }, { "epoch": 0.05780780780780781, "grad_norm": 2.102025190103612, "learning_rate": 1.9236784485455113e-06, "loss": 0.6056, "step": 616 }, { "epoch": 0.057901651651651655, "grad_norm": 2.232867598328096, "learning_rate": 1.926806380982171e-06, "loss": 0.6285, "step": 617 }, { "epoch": 0.05799549549549549, "grad_norm": 2.172692233496675, "learning_rate": 1.9299343134188305e-06, "loss": 0.619, "step": 618 }, { "epoch": 0.05808933933933934, "grad_norm": 2.2055853430633023, "learning_rate": 1.9330622458554896e-06, "loss": 0.5555, "step": 619 }, { "epoch": 0.05818318318318318, "grad_norm": 2.3065325794746347, "learning_rate": 1.9361901782921492e-06, "loss": 0.6274, "step": 620 }, { "epoch": 0.05827702702702703, "grad_norm": 2.252805251586936, "learning_rate": 1.9393181107288084e-06, "loss": 0.5938, "step": 621 }, { "epoch": 0.058370870870870874, "grad_norm": 2.1896012407126486, "learning_rate": 1.942446043165468e-06, "loss": 0.556, "step": 622 }, { "epoch": 0.05846471471471471, "grad_norm": 2.902312549286153, "learning_rate": 1.945573975602127e-06, "loss": 0.6269, "step": 623 }, { "epoch": 0.05855855855855856, "grad_norm": 2.2563322039655174, "learning_rate": 1.9487019080387863e-06, "loss": 0.5855, "step": 624 }, { "epoch": 0.0586524024024024, "grad_norm": 6.146880903582973, "learning_rate": 1.951829840475446e-06, "loss": 0.578, "step": 625 }, { "epoch": 0.05874624624624625, "grad_norm": 2.90638309677645, "learning_rate": 1.9549577729121055e-06, "loss": 0.6583, "step": 626 }, { "epoch": 0.05884009009009009, "grad_norm": 2.2261369331244865, "learning_rate": 1.9580857053487646e-06, "loss": 0.5413, "step": 627 }, { "epoch": 0.05893393393393393, "grad_norm": 2.3815731538793345, "learning_rate": 1.961213637785424e-06, "loss": 0.6143, "step": 628 }, { "epoch": 0.059027777777777776, "grad_norm": 2.36879025200118, "learning_rate": 1.9643415702220834e-06, "loss": 0.5664, "step": 629 }, { "epoch": 0.05912162162162162, "grad_norm": 2.8818864182629644, "learning_rate": 1.967469502658743e-06, "loss": 0.6007, "step": 630 }, { "epoch": 0.05921546546546547, "grad_norm": 2.361147731146654, "learning_rate": 1.970597435095402e-06, "loss": 0.6496, "step": 631 }, { "epoch": 0.05930930930930931, "grad_norm": 1.90041100354402, "learning_rate": 1.9737253675320613e-06, "loss": 0.5726, "step": 632 }, { "epoch": 0.05940315315315315, "grad_norm": 2.3345605702688834, "learning_rate": 1.976853299968721e-06, "loss": 0.6098, "step": 633 }, { "epoch": 0.059496996996996995, "grad_norm": 2.362276237667762, "learning_rate": 1.97998123240538e-06, "loss": 0.5879, "step": 634 }, { "epoch": 0.05959084084084084, "grad_norm": 2.588394503793484, "learning_rate": 1.9831091648420396e-06, "loss": 0.6269, "step": 635 }, { "epoch": 0.059684684684684686, "grad_norm": 3.638544977294538, "learning_rate": 1.986237097278699e-06, "loss": 0.5757, "step": 636 }, { "epoch": 0.05977852852852853, "grad_norm": 2.1133093783188004, "learning_rate": 1.9893650297153583e-06, "loss": 0.622, "step": 637 }, { "epoch": 0.05987237237237237, "grad_norm": 2.1871166329394764, "learning_rate": 1.992492962152018e-06, "loss": 0.6269, "step": 638 }, { "epoch": 0.059966216216216214, "grad_norm": 2.0923394770668478, "learning_rate": 1.995620894588677e-06, "loss": 0.5838, "step": 639 }, { "epoch": 0.06006006006006006, "grad_norm": 3.3349111369840583, "learning_rate": 1.9987488270253362e-06, "loss": 0.6086, "step": 640 }, { "epoch": 0.060153903903903905, "grad_norm": 2.037111203888246, "learning_rate": 2.001876759461996e-06, "loss": 0.6168, "step": 641 }, { "epoch": 0.06024774774774775, "grad_norm": 1.9435443610947936, "learning_rate": 2.005004691898655e-06, "loss": 0.6088, "step": 642 }, { "epoch": 0.06034159159159159, "grad_norm": 1.8402685416532913, "learning_rate": 2.0081326243353146e-06, "loss": 0.5521, "step": 643 }, { "epoch": 0.06043543543543543, "grad_norm": 2.4445651845285754, "learning_rate": 2.011260556771974e-06, "loss": 0.6118, "step": 644 }, { "epoch": 0.06052927927927928, "grad_norm": 2.370977632067055, "learning_rate": 2.0143884892086333e-06, "loss": 0.7048, "step": 645 }, { "epoch": 0.060623123123123124, "grad_norm": 2.978155095048351, "learning_rate": 2.0175164216452925e-06, "loss": 0.5951, "step": 646 }, { "epoch": 0.06071696696696697, "grad_norm": 2.219294275419054, "learning_rate": 2.020644354081952e-06, "loss": 0.5342, "step": 647 }, { "epoch": 0.060810810810810814, "grad_norm": 2.747436767607921, "learning_rate": 2.0237722865186112e-06, "loss": 0.5546, "step": 648 }, { "epoch": 0.06090465465465465, "grad_norm": 1.9458444999381077, "learning_rate": 2.026900218955271e-06, "loss": 0.5568, "step": 649 }, { "epoch": 0.0609984984984985, "grad_norm": 2.2950382125281457, "learning_rate": 2.03002815139193e-06, "loss": 0.6094, "step": 650 }, { "epoch": 0.06109234234234234, "grad_norm": 2.777756162269937, "learning_rate": 2.0331560838285896e-06, "loss": 0.5958, "step": 651 }, { "epoch": 0.06118618618618619, "grad_norm": 2.510192805792623, "learning_rate": 2.0362840162652487e-06, "loss": 0.6321, "step": 652 }, { "epoch": 0.06128003003003003, "grad_norm": 2.2146099364105587, "learning_rate": 2.0394119487019083e-06, "loss": 0.6555, "step": 653 }, { "epoch": 0.06137387387387387, "grad_norm": 3.2094829089133534, "learning_rate": 2.0425398811385675e-06, "loss": 0.5952, "step": 654 }, { "epoch": 0.06146771771771772, "grad_norm": 2.311276168114188, "learning_rate": 2.045667813575227e-06, "loss": 0.5367, "step": 655 }, { "epoch": 0.06156156156156156, "grad_norm": 3.974021116084875, "learning_rate": 2.048795746011886e-06, "loss": 0.6751, "step": 656 }, { "epoch": 0.06165540540540541, "grad_norm": 2.373005235225002, "learning_rate": 2.051923678448546e-06, "loss": 0.5811, "step": 657 }, { "epoch": 0.06174924924924925, "grad_norm": 2.2965586042813606, "learning_rate": 2.055051610885205e-06, "loss": 0.578, "step": 658 }, { "epoch": 0.06184309309309309, "grad_norm": 2.2185500875619946, "learning_rate": 2.058179543321864e-06, "loss": 0.6539, "step": 659 }, { "epoch": 0.061936936936936936, "grad_norm": 2.5010596165233205, "learning_rate": 2.0613074757585237e-06, "loss": 0.5651, "step": 660 }, { "epoch": 0.06203078078078078, "grad_norm": 2.121047105814424, "learning_rate": 2.0644354081951833e-06, "loss": 0.6222, "step": 661 }, { "epoch": 0.062124624624624626, "grad_norm": 3.4655898229020354, "learning_rate": 2.0675633406318424e-06, "loss": 0.6192, "step": 662 }, { "epoch": 0.06221846846846847, "grad_norm": 4.705846647972184, "learning_rate": 2.070691273068502e-06, "loss": 0.5686, "step": 663 }, { "epoch": 0.06231231231231231, "grad_norm": 2.414347952309469, "learning_rate": 2.073819205505161e-06, "loss": 0.5635, "step": 664 }, { "epoch": 0.062406156156156155, "grad_norm": 3.0426736129887297, "learning_rate": 2.0769471379418208e-06, "loss": 0.6713, "step": 665 }, { "epoch": 0.0625, "grad_norm": 2.229887521033893, "learning_rate": 2.08007507037848e-06, "loss": 0.5909, "step": 666 }, { "epoch": 0.06259384384384384, "grad_norm": 2.227364362960132, "learning_rate": 2.083203002815139e-06, "loss": 0.6489, "step": 667 }, { "epoch": 0.06268768768768769, "grad_norm": 2.29479130191147, "learning_rate": 2.0863309352517987e-06, "loss": 0.5887, "step": 668 }, { "epoch": 0.06278153153153153, "grad_norm": 2.373049021240486, "learning_rate": 2.0894588676884583e-06, "loss": 0.628, "step": 669 }, { "epoch": 0.06287537537537538, "grad_norm": 2.652660803563937, "learning_rate": 2.0925868001251174e-06, "loss": 0.6107, "step": 670 }, { "epoch": 0.06296921921921922, "grad_norm": 2.6318640245793183, "learning_rate": 2.095714732561777e-06, "loss": 0.5879, "step": 671 }, { "epoch": 0.06306306306306306, "grad_norm": 2.336640946552939, "learning_rate": 2.098842664998436e-06, "loss": 0.5589, "step": 672 }, { "epoch": 0.06315690690690691, "grad_norm": 2.402430251059773, "learning_rate": 2.1019705974350957e-06, "loss": 0.5884, "step": 673 }, { "epoch": 0.06325075075075075, "grad_norm": 2.20132675172062, "learning_rate": 2.105098529871755e-06, "loss": 0.5438, "step": 674 }, { "epoch": 0.0633445945945946, "grad_norm": 2.135567721928307, "learning_rate": 2.108226462308414e-06, "loss": 0.6466, "step": 675 }, { "epoch": 0.06343843843843844, "grad_norm": 12.620822839520114, "learning_rate": 2.1113543947450737e-06, "loss": 0.5812, "step": 676 }, { "epoch": 0.06353228228228228, "grad_norm": 1.849366374605163, "learning_rate": 2.114482327181733e-06, "loss": 0.6192, "step": 677 }, { "epoch": 0.06362612612612613, "grad_norm": 1.96280557163736, "learning_rate": 2.1176102596183924e-06, "loss": 0.5889, "step": 678 }, { "epoch": 0.06371996996996997, "grad_norm": 5.800318614819026, "learning_rate": 2.120738192055052e-06, "loss": 0.611, "step": 679 }, { "epoch": 0.06381381381381382, "grad_norm": 2.27381104282483, "learning_rate": 2.123866124491711e-06, "loss": 0.5391, "step": 680 }, { "epoch": 0.06390765765765766, "grad_norm": 2.6725104281222043, "learning_rate": 2.1269940569283707e-06, "loss": 0.5947, "step": 681 }, { "epoch": 0.0640015015015015, "grad_norm": 2.1175941103825524, "learning_rate": 2.13012198936503e-06, "loss": 0.6268, "step": 682 }, { "epoch": 0.06409534534534535, "grad_norm": 2.2027667217048603, "learning_rate": 2.133249921801689e-06, "loss": 0.5867, "step": 683 }, { "epoch": 0.06418918918918919, "grad_norm": 2.244924448776754, "learning_rate": 2.1363778542383486e-06, "loss": 0.5594, "step": 684 }, { "epoch": 0.06428303303303304, "grad_norm": 2.074241961416583, "learning_rate": 2.139505786675008e-06, "loss": 0.5815, "step": 685 }, { "epoch": 0.06437687687687688, "grad_norm": 2.3847791575370585, "learning_rate": 2.1426337191116674e-06, "loss": 0.6295, "step": 686 }, { "epoch": 0.06447072072072071, "grad_norm": 1.997949031985884, "learning_rate": 2.145761651548327e-06, "loss": 0.6012, "step": 687 }, { "epoch": 0.06456456456456457, "grad_norm": 2.0911404697734923, "learning_rate": 2.148889583984986e-06, "loss": 0.6242, "step": 688 }, { "epoch": 0.0646584084084084, "grad_norm": 2.177452192868679, "learning_rate": 2.1520175164216457e-06, "loss": 0.5728, "step": 689 }, { "epoch": 0.06475225225225226, "grad_norm": 2.3806189228995067, "learning_rate": 2.155145448858305e-06, "loss": 0.5721, "step": 690 }, { "epoch": 0.0648460960960961, "grad_norm": 2.053897932028559, "learning_rate": 2.158273381294964e-06, "loss": 0.5359, "step": 691 }, { "epoch": 0.06493993993993993, "grad_norm": 2.9592255475535376, "learning_rate": 2.1614013137316236e-06, "loss": 0.5804, "step": 692 }, { "epoch": 0.06503378378378379, "grad_norm": 1.9060914922325058, "learning_rate": 2.1645292461682828e-06, "loss": 0.5693, "step": 693 }, { "epoch": 0.06512762762762762, "grad_norm": 2.5435680660146054, "learning_rate": 2.1676571786049424e-06, "loss": 0.5797, "step": 694 }, { "epoch": 0.06522147147147148, "grad_norm": 3.0133203462777716, "learning_rate": 2.1707851110416015e-06, "loss": 0.5823, "step": 695 }, { "epoch": 0.06531531531531531, "grad_norm": 2.190440926725674, "learning_rate": 2.173913043478261e-06, "loss": 0.5805, "step": 696 }, { "epoch": 0.06540915915915915, "grad_norm": 2.408476778401619, "learning_rate": 2.1770409759149207e-06, "loss": 0.6016, "step": 697 }, { "epoch": 0.065503003003003, "grad_norm": 2.125251420746095, "learning_rate": 2.18016890835158e-06, "loss": 0.5436, "step": 698 }, { "epoch": 0.06559684684684684, "grad_norm": 2.577445680375977, "learning_rate": 2.183296840788239e-06, "loss": 0.5938, "step": 699 }, { "epoch": 0.0656906906906907, "grad_norm": 2.993930969977202, "learning_rate": 2.1864247732248986e-06, "loss": 0.6177, "step": 700 }, { "epoch": 0.06578453453453453, "grad_norm": 2.0301655992147127, "learning_rate": 2.1895527056615578e-06, "loss": 0.5664, "step": 701 }, { "epoch": 0.06587837837837837, "grad_norm": 2.856365198710951, "learning_rate": 2.1926806380982173e-06, "loss": 0.6088, "step": 702 }, { "epoch": 0.06597222222222222, "grad_norm": 2.1295801097975366, "learning_rate": 2.1958085705348765e-06, "loss": 0.5887, "step": 703 }, { "epoch": 0.06606606606606606, "grad_norm": 2.560897799040281, "learning_rate": 2.198936502971536e-06, "loss": 0.5759, "step": 704 }, { "epoch": 0.06615990990990991, "grad_norm": 2.848541613733762, "learning_rate": 2.2020644354081957e-06, "loss": 0.6073, "step": 705 }, { "epoch": 0.06625375375375375, "grad_norm": 2.2517807650531956, "learning_rate": 2.205192367844855e-06, "loss": 0.6839, "step": 706 }, { "epoch": 0.06634759759759759, "grad_norm": 4.407596701833581, "learning_rate": 2.208320300281514e-06, "loss": 0.5814, "step": 707 }, { "epoch": 0.06644144144144144, "grad_norm": 29.17387692915536, "learning_rate": 2.2114482327181736e-06, "loss": 0.5535, "step": 708 }, { "epoch": 0.06653528528528528, "grad_norm": 1.9673827887438615, "learning_rate": 2.2145761651548327e-06, "loss": 0.6051, "step": 709 }, { "epoch": 0.06662912912912913, "grad_norm": 2.631064975993296, "learning_rate": 2.217704097591492e-06, "loss": 0.5984, "step": 710 }, { "epoch": 0.06672297297297297, "grad_norm": 2.389918957592088, "learning_rate": 2.2208320300281515e-06, "loss": 0.5897, "step": 711 }, { "epoch": 0.06681681681681682, "grad_norm": 3.2533905249541806, "learning_rate": 2.223959962464811e-06, "loss": 0.5806, "step": 712 }, { "epoch": 0.06691066066066066, "grad_norm": 2.2788868613886017, "learning_rate": 2.2270878949014702e-06, "loss": 0.5897, "step": 713 }, { "epoch": 0.0670045045045045, "grad_norm": 2.4373507171291737, "learning_rate": 2.23021582733813e-06, "loss": 0.5765, "step": 714 }, { "epoch": 0.06709834834834835, "grad_norm": 2.542892368326286, "learning_rate": 2.233343759774789e-06, "loss": 0.5666, "step": 715 }, { "epoch": 0.06719219219219219, "grad_norm": 2.451925542363799, "learning_rate": 2.2364716922114486e-06, "loss": 0.6045, "step": 716 }, { "epoch": 0.06728603603603604, "grad_norm": 2.549521118003136, "learning_rate": 2.2395996246481077e-06, "loss": 0.5859, "step": 717 }, { "epoch": 0.06737987987987988, "grad_norm": 2.4237251302239633, "learning_rate": 2.242727557084767e-06, "loss": 0.5944, "step": 718 }, { "epoch": 0.06747372372372372, "grad_norm": 2.618611753914501, "learning_rate": 2.2458554895214265e-06, "loss": 0.567, "step": 719 }, { "epoch": 0.06756756756756757, "grad_norm": 1.9214554963438721, "learning_rate": 2.248983421958086e-06, "loss": 0.4943, "step": 720 }, { "epoch": 0.06766141141141141, "grad_norm": 2.2584995027045984, "learning_rate": 2.252111354394745e-06, "loss": 0.5868, "step": 721 }, { "epoch": 0.06775525525525526, "grad_norm": 1.9139098030195874, "learning_rate": 2.2552392868314048e-06, "loss": 0.5988, "step": 722 }, { "epoch": 0.0678490990990991, "grad_norm": 2.232608305328435, "learning_rate": 2.258367219268064e-06, "loss": 0.5757, "step": 723 }, { "epoch": 0.06794294294294294, "grad_norm": 2.2473607480569733, "learning_rate": 2.2614951517047235e-06, "loss": 0.607, "step": 724 }, { "epoch": 0.06803678678678679, "grad_norm": 2.1348209110614107, "learning_rate": 2.2646230841413827e-06, "loss": 0.5818, "step": 725 }, { "epoch": 0.06813063063063063, "grad_norm": 2.2118703110343105, "learning_rate": 2.267751016578042e-06, "loss": 0.5636, "step": 726 }, { "epoch": 0.06822447447447448, "grad_norm": 7.672853267205728, "learning_rate": 2.2708789490147014e-06, "loss": 0.6341, "step": 727 }, { "epoch": 0.06831831831831832, "grad_norm": 2.6381549754297944, "learning_rate": 2.2740068814513606e-06, "loss": 0.5076, "step": 728 }, { "epoch": 0.06841216216216216, "grad_norm": 2.709219264583749, "learning_rate": 2.27713481388802e-06, "loss": 0.6271, "step": 729 }, { "epoch": 0.06850600600600601, "grad_norm": 2.0988231465692806, "learning_rate": 2.2802627463246798e-06, "loss": 0.5302, "step": 730 }, { "epoch": 0.06859984984984985, "grad_norm": 4.295352513687167, "learning_rate": 2.283390678761339e-06, "loss": 0.5917, "step": 731 }, { "epoch": 0.0686936936936937, "grad_norm": 2.343532614230006, "learning_rate": 2.2865186111979985e-06, "loss": 0.6268, "step": 732 }, { "epoch": 0.06878753753753754, "grad_norm": 2.401577358588665, "learning_rate": 2.2896465436346577e-06, "loss": 0.6114, "step": 733 }, { "epoch": 0.06888138138138138, "grad_norm": 2.0431820368090508, "learning_rate": 2.292774476071317e-06, "loss": 0.5357, "step": 734 }, { "epoch": 0.06897522522522523, "grad_norm": 2.127184809713533, "learning_rate": 2.2959024085079764e-06, "loss": 0.5172, "step": 735 }, { "epoch": 0.06906906906906907, "grad_norm": 2.3976684495004825, "learning_rate": 2.2990303409446356e-06, "loss": 0.555, "step": 736 }, { "epoch": 0.06916291291291292, "grad_norm": 2.212135563822484, "learning_rate": 2.302158273381295e-06, "loss": 0.6274, "step": 737 }, { "epoch": 0.06925675675675676, "grad_norm": 1.8641361717061742, "learning_rate": 2.3052862058179547e-06, "loss": 0.5808, "step": 738 }, { "epoch": 0.0693506006006006, "grad_norm": 2.1922435989811584, "learning_rate": 2.308414138254614e-06, "loss": 0.5613, "step": 739 }, { "epoch": 0.06944444444444445, "grad_norm": 2.314438193174371, "learning_rate": 2.3115420706912735e-06, "loss": 0.6059, "step": 740 }, { "epoch": 0.06953828828828829, "grad_norm": 2.180486094255799, "learning_rate": 2.3146700031279327e-06, "loss": 0.5486, "step": 741 }, { "epoch": 0.06963213213213214, "grad_norm": 2.4600268524146696, "learning_rate": 2.317797935564592e-06, "loss": 0.5596, "step": 742 }, { "epoch": 0.06972597597597598, "grad_norm": 3.102400410924301, "learning_rate": 2.3209258680012514e-06, "loss": 0.5162, "step": 743 }, { "epoch": 0.06981981981981981, "grad_norm": 3.1603806527133607, "learning_rate": 2.3240538004379106e-06, "loss": 0.6522, "step": 744 }, { "epoch": 0.06991366366366367, "grad_norm": 2.7540959050929548, "learning_rate": 2.32718173287457e-06, "loss": 0.6202, "step": 745 }, { "epoch": 0.0700075075075075, "grad_norm": 2.4589856319348793, "learning_rate": 2.3303096653112293e-06, "loss": 0.5485, "step": 746 }, { "epoch": 0.07010135135135136, "grad_norm": 2.2726969441599274, "learning_rate": 2.333437597747889e-06, "loss": 0.5932, "step": 747 }, { "epoch": 0.0701951951951952, "grad_norm": 1.8845578915686405, "learning_rate": 2.3365655301845485e-06, "loss": 0.5192, "step": 748 }, { "epoch": 0.07028903903903903, "grad_norm": 2.6708554529924964, "learning_rate": 2.3396934626212076e-06, "loss": 0.5894, "step": 749 }, { "epoch": 0.07038288288288289, "grad_norm": 2.247402541191821, "learning_rate": 2.342821395057867e-06, "loss": 0.6165, "step": 750 }, { "epoch": 0.07047672672672672, "grad_norm": 2.1576161983478954, "learning_rate": 2.3459493274945264e-06, "loss": 0.5932, "step": 751 }, { "epoch": 0.07057057057057058, "grad_norm": 2.818002137356883, "learning_rate": 2.3490772599311855e-06, "loss": 0.5933, "step": 752 }, { "epoch": 0.07066441441441441, "grad_norm": 2.5335900010201065, "learning_rate": 2.352205192367845e-06, "loss": 0.5809, "step": 753 }, { "epoch": 0.07075825825825825, "grad_norm": 2.5258856315944493, "learning_rate": 2.3553331248045043e-06, "loss": 0.561, "step": 754 }, { "epoch": 0.0708521021021021, "grad_norm": 2.164634602379473, "learning_rate": 2.358461057241164e-06, "loss": 0.5427, "step": 755 }, { "epoch": 0.07094594594594594, "grad_norm": 2.3029334720263477, "learning_rate": 2.3615889896778234e-06, "loss": 0.6001, "step": 756 }, { "epoch": 0.0710397897897898, "grad_norm": 2.255087627487009, "learning_rate": 2.3647169221144826e-06, "loss": 0.5872, "step": 757 }, { "epoch": 0.07113363363363363, "grad_norm": 3.0517507114541313, "learning_rate": 2.3678448545511418e-06, "loss": 0.5423, "step": 758 }, { "epoch": 0.07122747747747747, "grad_norm": 2.0482254893824114, "learning_rate": 2.3709727869878014e-06, "loss": 0.6377, "step": 759 }, { "epoch": 0.07132132132132132, "grad_norm": 2.6264135158598445, "learning_rate": 2.3741007194244605e-06, "loss": 0.513, "step": 760 }, { "epoch": 0.07141516516516516, "grad_norm": 2.2350298050509445, "learning_rate": 2.37722865186112e-06, "loss": 0.5612, "step": 761 }, { "epoch": 0.07150900900900901, "grad_norm": 2.3730308071565394, "learning_rate": 2.3803565842977793e-06, "loss": 0.6044, "step": 762 }, { "epoch": 0.07160285285285285, "grad_norm": 2.2612793940868574, "learning_rate": 2.383484516734439e-06, "loss": 0.6339, "step": 763 }, { "epoch": 0.07169669669669669, "grad_norm": 4.988230140025222, "learning_rate": 2.386612449171098e-06, "loss": 0.5582, "step": 764 }, { "epoch": 0.07179054054054054, "grad_norm": 2.814405121403503, "learning_rate": 2.3897403816077576e-06, "loss": 0.5566, "step": 765 }, { "epoch": 0.07188438438438438, "grad_norm": 2.3931956355208617, "learning_rate": 2.3928683140444168e-06, "loss": 0.6853, "step": 766 }, { "epoch": 0.07197822822822823, "grad_norm": 2.3961212291337572, "learning_rate": 2.3959962464810763e-06, "loss": 0.544, "step": 767 }, { "epoch": 0.07207207207207207, "grad_norm": 2.165070524554209, "learning_rate": 2.3991241789177355e-06, "loss": 0.6136, "step": 768 }, { "epoch": 0.07216591591591591, "grad_norm": 6.166243563981889, "learning_rate": 2.402252111354395e-06, "loss": 0.5482, "step": 769 }, { "epoch": 0.07225975975975976, "grad_norm": 2.124374290460139, "learning_rate": 2.4053800437910542e-06, "loss": 0.5234, "step": 770 }, { "epoch": 0.0723536036036036, "grad_norm": 2.4958938851447687, "learning_rate": 2.4085079762277134e-06, "loss": 0.5907, "step": 771 }, { "epoch": 0.07244744744744745, "grad_norm": 2.587309504147115, "learning_rate": 2.411635908664373e-06, "loss": 0.5488, "step": 772 }, { "epoch": 0.07254129129129129, "grad_norm": 1.9332131991358077, "learning_rate": 2.4147638411010326e-06, "loss": 0.5306, "step": 773 }, { "epoch": 0.07263513513513513, "grad_norm": 4.732547996104944, "learning_rate": 2.4178917735376917e-06, "loss": 0.5456, "step": 774 }, { "epoch": 0.07272897897897898, "grad_norm": 2.708056385238022, "learning_rate": 2.4210197059743513e-06, "loss": 0.5057, "step": 775 }, { "epoch": 0.07282282282282282, "grad_norm": 2.0749246087504827, "learning_rate": 2.4241476384110105e-06, "loss": 0.5763, "step": 776 }, { "epoch": 0.07291666666666667, "grad_norm": 2.5619333353667835, "learning_rate": 2.42727557084767e-06, "loss": 0.5524, "step": 777 }, { "epoch": 0.07301051051051051, "grad_norm": 2.030238391996853, "learning_rate": 2.4304035032843292e-06, "loss": 0.6223, "step": 778 }, { "epoch": 0.07310435435435435, "grad_norm": 3.846773882393738, "learning_rate": 2.4335314357209884e-06, "loss": 0.5926, "step": 779 }, { "epoch": 0.0731981981981982, "grad_norm": 3.153539836821605, "learning_rate": 2.436659368157648e-06, "loss": 0.6539, "step": 780 }, { "epoch": 0.07329204204204204, "grad_norm": 3.4227256140475344, "learning_rate": 2.4397873005943075e-06, "loss": 0.5519, "step": 781 }, { "epoch": 0.07338588588588589, "grad_norm": 2.283451359680633, "learning_rate": 2.4429152330309667e-06, "loss": 0.606, "step": 782 }, { "epoch": 0.07347972972972973, "grad_norm": 2.308836414796066, "learning_rate": 2.4460431654676263e-06, "loss": 0.5791, "step": 783 }, { "epoch": 0.07357357357357357, "grad_norm": 2.2327908027994186, "learning_rate": 2.4491710979042855e-06, "loss": 0.5925, "step": 784 }, { "epoch": 0.07366741741741742, "grad_norm": 2.255598849361275, "learning_rate": 2.452299030340945e-06, "loss": 0.547, "step": 785 }, { "epoch": 0.07376126126126126, "grad_norm": 2.0980061703575292, "learning_rate": 2.455426962777604e-06, "loss": 0.5074, "step": 786 }, { "epoch": 0.07385510510510511, "grad_norm": 3.8836626084398413, "learning_rate": 2.4585548952142634e-06, "loss": 0.579, "step": 787 }, { "epoch": 0.07394894894894895, "grad_norm": 2.4265745796663687, "learning_rate": 2.461682827650923e-06, "loss": 0.5404, "step": 788 }, { "epoch": 0.07404279279279279, "grad_norm": 2.268221959377736, "learning_rate": 2.464810760087582e-06, "loss": 0.5701, "step": 789 }, { "epoch": 0.07413663663663664, "grad_norm": 2.1567401240332122, "learning_rate": 2.4679386925242417e-06, "loss": 0.5213, "step": 790 }, { "epoch": 0.07423048048048048, "grad_norm": 2.6511204752914073, "learning_rate": 2.4710666249609013e-06, "loss": 0.6185, "step": 791 }, { "epoch": 0.07432432432432433, "grad_norm": 2.091696120492805, "learning_rate": 2.4741945573975604e-06, "loss": 0.5501, "step": 792 }, { "epoch": 0.07441816816816817, "grad_norm": 2.3583668703537852, "learning_rate": 2.47732248983422e-06, "loss": 0.5996, "step": 793 }, { "epoch": 0.07451201201201202, "grad_norm": 2.240918413903003, "learning_rate": 2.480450422270879e-06, "loss": 0.5329, "step": 794 }, { "epoch": 0.07460585585585586, "grad_norm": 2.6989123670514146, "learning_rate": 2.4835783547075383e-06, "loss": 0.6043, "step": 795 }, { "epoch": 0.0746996996996997, "grad_norm": 2.661434416714799, "learning_rate": 2.486706287144198e-06, "loss": 0.5545, "step": 796 }, { "epoch": 0.07479354354354355, "grad_norm": 2.063883622224196, "learning_rate": 2.489834219580857e-06, "loss": 0.601, "step": 797 }, { "epoch": 0.07488738738738739, "grad_norm": 2.3951039722434744, "learning_rate": 2.4929621520175167e-06, "loss": 0.61, "step": 798 }, { "epoch": 0.07498123123123124, "grad_norm": 2.801994306298524, "learning_rate": 2.4960900844541763e-06, "loss": 0.6052, "step": 799 }, { "epoch": 0.07507507507507508, "grad_norm": 2.2078004802527813, "learning_rate": 2.4992180168908354e-06, "loss": 0.5765, "step": 800 }, { "epoch": 0.07516891891891891, "grad_norm": 2.2530056337133235, "learning_rate": 2.502345949327495e-06, "loss": 0.5963, "step": 801 }, { "epoch": 0.07526276276276277, "grad_norm": 2.20578606875047, "learning_rate": 2.5054738817641537e-06, "loss": 0.5987, "step": 802 }, { "epoch": 0.0753566066066066, "grad_norm": 2.734221601378888, "learning_rate": 2.5086018142008133e-06, "loss": 0.5677, "step": 803 }, { "epoch": 0.07545045045045046, "grad_norm": 2.735910908619944, "learning_rate": 2.511729746637473e-06, "loss": 0.6068, "step": 804 }, { "epoch": 0.0755442942942943, "grad_norm": 1.922493473054778, "learning_rate": 2.5148576790741325e-06, "loss": 0.5517, "step": 805 }, { "epoch": 0.07563813813813813, "grad_norm": 2.1024048231692785, "learning_rate": 2.5179856115107916e-06, "loss": 0.5947, "step": 806 }, { "epoch": 0.07573198198198199, "grad_norm": 2.4915113620251823, "learning_rate": 2.521113543947451e-06, "loss": 0.5561, "step": 807 }, { "epoch": 0.07582582582582582, "grad_norm": 2.052664085660054, "learning_rate": 2.5242414763841104e-06, "loss": 0.5797, "step": 808 }, { "epoch": 0.07591966966966968, "grad_norm": 4.634642533703118, "learning_rate": 2.5273694088207696e-06, "loss": 0.6433, "step": 809 }, { "epoch": 0.07601351351351351, "grad_norm": 2.4800458457869055, "learning_rate": 2.530497341257429e-06, "loss": 0.5724, "step": 810 }, { "epoch": 0.07610735735735735, "grad_norm": 2.153062030915762, "learning_rate": 2.5336252736940887e-06, "loss": 0.6517, "step": 811 }, { "epoch": 0.0762012012012012, "grad_norm": 2.147906742344554, "learning_rate": 2.5367532061307475e-06, "loss": 0.5713, "step": 812 }, { "epoch": 0.07629504504504504, "grad_norm": 2.1894797386341303, "learning_rate": 2.539881138567407e-06, "loss": 0.5534, "step": 813 }, { "epoch": 0.0763888888888889, "grad_norm": 2.2953595851019144, "learning_rate": 2.5430090710040666e-06, "loss": 0.612, "step": 814 }, { "epoch": 0.07648273273273273, "grad_norm": 2.660980985096225, "learning_rate": 2.546137003440726e-06, "loss": 0.5662, "step": 815 }, { "epoch": 0.07657657657657657, "grad_norm": 3.1240204634727937, "learning_rate": 2.5492649358773854e-06, "loss": 0.5708, "step": 816 }, { "epoch": 0.07667042042042042, "grad_norm": 1.9428673147299498, "learning_rate": 2.552392868314045e-06, "loss": 0.5408, "step": 817 }, { "epoch": 0.07676426426426426, "grad_norm": 2.5557882010724597, "learning_rate": 2.5555208007507037e-06, "loss": 0.479, "step": 818 }, { "epoch": 0.07685810810810811, "grad_norm": 2.992414949561814, "learning_rate": 2.5586487331873633e-06, "loss": 0.5723, "step": 819 }, { "epoch": 0.07695195195195195, "grad_norm": 2.877225155224419, "learning_rate": 2.561776665624023e-06, "loss": 0.5903, "step": 820 }, { "epoch": 0.07704579579579579, "grad_norm": 2.885326436833143, "learning_rate": 2.5649045980606824e-06, "loss": 0.5434, "step": 821 }, { "epoch": 0.07713963963963964, "grad_norm": 3.5202822697082015, "learning_rate": 2.568032530497341e-06, "loss": 0.5659, "step": 822 }, { "epoch": 0.07723348348348348, "grad_norm": 2.0778443742019435, "learning_rate": 2.5711604629340008e-06, "loss": 0.5578, "step": 823 }, { "epoch": 0.07732732732732733, "grad_norm": 2.3781290446656995, "learning_rate": 2.5742883953706604e-06, "loss": 0.5575, "step": 824 }, { "epoch": 0.07742117117117117, "grad_norm": 2.325963727105167, "learning_rate": 2.5774163278073195e-06, "loss": 0.5989, "step": 825 }, { "epoch": 0.07751501501501501, "grad_norm": 3.168741257375592, "learning_rate": 2.580544260243979e-06, "loss": 0.592, "step": 826 }, { "epoch": 0.07760885885885886, "grad_norm": 2.0798893178157822, "learning_rate": 2.5836721926806387e-06, "loss": 0.5858, "step": 827 }, { "epoch": 0.0777027027027027, "grad_norm": 2.1932953187837074, "learning_rate": 2.5868001251172974e-06, "loss": 0.5298, "step": 828 }, { "epoch": 0.07779654654654655, "grad_norm": 4.630435414662803, "learning_rate": 2.589928057553957e-06, "loss": 0.5803, "step": 829 }, { "epoch": 0.07789039039039039, "grad_norm": 2.176870206184172, "learning_rate": 2.5930559899906166e-06, "loss": 0.6724, "step": 830 }, { "epoch": 0.07798423423423423, "grad_norm": 1.9816954635325812, "learning_rate": 2.5961839224272757e-06, "loss": 0.5576, "step": 831 }, { "epoch": 0.07807807807807808, "grad_norm": 2.2144309784502973, "learning_rate": 2.599311854863935e-06, "loss": 0.5359, "step": 832 }, { "epoch": 0.07817192192192192, "grad_norm": 2.7013146680544238, "learning_rate": 2.6024397873005945e-06, "loss": 0.5538, "step": 833 }, { "epoch": 0.07826576576576577, "grad_norm": 2.338923997660058, "learning_rate": 2.6055677197372537e-06, "loss": 0.578, "step": 834 }, { "epoch": 0.07835960960960961, "grad_norm": 2.1921455387183784, "learning_rate": 2.6086956521739132e-06, "loss": 0.5602, "step": 835 }, { "epoch": 0.07845345345345345, "grad_norm": 2.806477929619707, "learning_rate": 2.611823584610573e-06, "loss": 0.5589, "step": 836 }, { "epoch": 0.0785472972972973, "grad_norm": 2.4802901276378737, "learning_rate": 2.6149515170472324e-06, "loss": 0.6061, "step": 837 }, { "epoch": 0.07864114114114114, "grad_norm": 2.686815054991699, "learning_rate": 2.618079449483891e-06, "loss": 0.5371, "step": 838 }, { "epoch": 0.07873498498498499, "grad_norm": 6.473844497810295, "learning_rate": 2.6212073819205507e-06, "loss": 0.6202, "step": 839 }, { "epoch": 0.07882882882882883, "grad_norm": 2.3223745275007444, "learning_rate": 2.6243353143572103e-06, "loss": 0.5428, "step": 840 }, { "epoch": 0.07892267267267267, "grad_norm": 2.193635985755038, "learning_rate": 2.6274632467938695e-06, "loss": 0.6199, "step": 841 }, { "epoch": 0.07901651651651652, "grad_norm": 2.6854237985501883, "learning_rate": 2.630591179230529e-06, "loss": 0.6431, "step": 842 }, { "epoch": 0.07911036036036036, "grad_norm": 57.679505645205495, "learning_rate": 2.6337191116671882e-06, "loss": 0.5845, "step": 843 }, { "epoch": 0.07920420420420421, "grad_norm": 2.8203175291889657, "learning_rate": 2.6368470441038474e-06, "loss": 0.5784, "step": 844 }, { "epoch": 0.07929804804804805, "grad_norm": 4.668982418940882, "learning_rate": 2.639974976540507e-06, "loss": 0.5788, "step": 845 }, { "epoch": 0.07939189189189189, "grad_norm": 2.697619541724111, "learning_rate": 2.6431029089771665e-06, "loss": 0.5789, "step": 846 }, { "epoch": 0.07948573573573574, "grad_norm": 2.2829001678404346, "learning_rate": 2.6462308414138253e-06, "loss": 0.5452, "step": 847 }, { "epoch": 0.07957957957957958, "grad_norm": 2.298145834823505, "learning_rate": 2.649358773850485e-06, "loss": 0.5904, "step": 848 }, { "epoch": 0.07967342342342343, "grad_norm": 3.1809468838814987, "learning_rate": 2.6524867062871445e-06, "loss": 0.6222, "step": 849 }, { "epoch": 0.07976726726726727, "grad_norm": 2.47333514772555, "learning_rate": 2.6556146387238036e-06, "loss": 0.6234, "step": 850 }, { "epoch": 0.0798611111111111, "grad_norm": 3.18050656521093, "learning_rate": 2.658742571160463e-06, "loss": 0.6253, "step": 851 }, { "epoch": 0.07995495495495496, "grad_norm": 1.8888381274698365, "learning_rate": 2.6618705035971228e-06, "loss": 0.557, "step": 852 }, { "epoch": 0.0800487987987988, "grad_norm": 1.9289749761672834, "learning_rate": 2.6649984360337824e-06, "loss": 0.6287, "step": 853 }, { "epoch": 0.08014264264264265, "grad_norm": 3.798246116891678, "learning_rate": 2.668126368470441e-06, "loss": 0.5415, "step": 854 }, { "epoch": 0.08023648648648649, "grad_norm": 2.2737400196727147, "learning_rate": 2.6712543009071007e-06, "loss": 0.6172, "step": 855 }, { "epoch": 0.08033033033033032, "grad_norm": 2.1348610220315107, "learning_rate": 2.6743822333437603e-06, "loss": 0.5952, "step": 856 }, { "epoch": 0.08042417417417418, "grad_norm": 2.450596650407801, "learning_rate": 2.6775101657804194e-06, "loss": 0.5748, "step": 857 }, { "epoch": 0.08051801801801801, "grad_norm": 1.913720302350583, "learning_rate": 2.6806380982170786e-06, "loss": 0.5293, "step": 858 }, { "epoch": 0.08061186186186187, "grad_norm": 1.9974812086062887, "learning_rate": 2.683766030653738e-06, "loss": 0.6095, "step": 859 }, { "epoch": 0.0807057057057057, "grad_norm": 2.0280903304350524, "learning_rate": 2.6868939630903973e-06, "loss": 0.5631, "step": 860 }, { "epoch": 0.08079954954954954, "grad_norm": 2.223325069240265, "learning_rate": 2.690021895527057e-06, "loss": 0.5235, "step": 861 }, { "epoch": 0.0808933933933934, "grad_norm": 2.2006175118918674, "learning_rate": 2.6931498279637165e-06, "loss": 0.5645, "step": 862 }, { "epoch": 0.08098723723723723, "grad_norm": 2.494415486012368, "learning_rate": 2.6962777604003752e-06, "loss": 0.6092, "step": 863 }, { "epoch": 0.08108108108108109, "grad_norm": 2.3331408365165496, "learning_rate": 2.699405692837035e-06, "loss": 0.6071, "step": 864 }, { "epoch": 0.08117492492492492, "grad_norm": 2.127343007686841, "learning_rate": 2.7025336252736944e-06, "loss": 0.6038, "step": 865 }, { "epoch": 0.08126876876876876, "grad_norm": 2.2210653817041397, "learning_rate": 2.7056615577103536e-06, "loss": 0.5472, "step": 866 }, { "epoch": 0.08136261261261261, "grad_norm": 1.9068724536804966, "learning_rate": 2.708789490147013e-06, "loss": 0.5753, "step": 867 }, { "epoch": 0.08145645645645645, "grad_norm": 2.0920193379103704, "learning_rate": 2.7119174225836723e-06, "loss": 0.5679, "step": 868 }, { "epoch": 0.0815503003003003, "grad_norm": 4.426314841011764, "learning_rate": 2.7150453550203315e-06, "loss": 0.5968, "step": 869 }, { "epoch": 0.08164414414414414, "grad_norm": 1.9622275015027357, "learning_rate": 2.718173287456991e-06, "loss": 0.5305, "step": 870 }, { "epoch": 0.08173798798798798, "grad_norm": 1.980275203045201, "learning_rate": 2.7213012198936506e-06, "loss": 0.561, "step": 871 }, { "epoch": 0.08183183183183183, "grad_norm": 1.9650242662030073, "learning_rate": 2.7244291523303102e-06, "loss": 0.5355, "step": 872 }, { "epoch": 0.08192567567567567, "grad_norm": 2.301750630536184, "learning_rate": 2.727557084766969e-06, "loss": 0.5687, "step": 873 }, { "epoch": 0.08201951951951952, "grad_norm": 1.9999972075465295, "learning_rate": 2.7306850172036286e-06, "loss": 0.5856, "step": 874 }, { "epoch": 0.08211336336336336, "grad_norm": 2.3813727621068224, "learning_rate": 2.733812949640288e-06, "loss": 0.6141, "step": 875 }, { "epoch": 0.08220720720720721, "grad_norm": 2.79024127787013, "learning_rate": 2.7369408820769473e-06, "loss": 0.5442, "step": 876 }, { "epoch": 0.08230105105105105, "grad_norm": 1.9294802396064565, "learning_rate": 2.740068814513607e-06, "loss": 0.5817, "step": 877 }, { "epoch": 0.08239489489489489, "grad_norm": 2.4007190257423403, "learning_rate": 2.7431967469502665e-06, "loss": 0.4538, "step": 878 }, { "epoch": 0.08248873873873874, "grad_norm": 2.065153327975479, "learning_rate": 2.746324679386925e-06, "loss": 0.5411, "step": 879 }, { "epoch": 0.08258258258258258, "grad_norm": 2.115264640377381, "learning_rate": 2.7494526118235848e-06, "loss": 0.5928, "step": 880 }, { "epoch": 0.08267642642642643, "grad_norm": 3.1933145845092805, "learning_rate": 2.7525805442602444e-06, "loss": 0.5828, "step": 881 }, { "epoch": 0.08277027027027027, "grad_norm": 1.8566513458126612, "learning_rate": 2.7557084766969035e-06, "loss": 0.4764, "step": 882 }, { "epoch": 0.08286411411411411, "grad_norm": 2.2220517528562436, "learning_rate": 2.7588364091335627e-06, "loss": 0.5358, "step": 883 }, { "epoch": 0.08295795795795796, "grad_norm": 3.6324144977997057, "learning_rate": 2.7619643415702223e-06, "loss": 0.5764, "step": 884 }, { "epoch": 0.0830518018018018, "grad_norm": 2.1353122374556697, "learning_rate": 2.7650922740068814e-06, "loss": 0.5391, "step": 885 }, { "epoch": 0.08314564564564565, "grad_norm": 3.953027353122582, "learning_rate": 2.768220206443541e-06, "loss": 0.6103, "step": 886 }, { "epoch": 0.08323948948948949, "grad_norm": 2.202256312531811, "learning_rate": 2.7713481388802006e-06, "loss": 0.5924, "step": 887 }, { "epoch": 0.08333333333333333, "grad_norm": 6.928346702844369, "learning_rate": 2.77447607131686e-06, "loss": 0.535, "step": 888 }, { "epoch": 0.08342717717717718, "grad_norm": 2.4434025259499346, "learning_rate": 2.777604003753519e-06, "loss": 0.5908, "step": 889 }, { "epoch": 0.08352102102102102, "grad_norm": 2.337097443323234, "learning_rate": 2.7807319361901785e-06, "loss": 0.5707, "step": 890 }, { "epoch": 0.08361486486486487, "grad_norm": 2.2903796551861513, "learning_rate": 2.783859868626838e-06, "loss": 0.563, "step": 891 }, { "epoch": 0.08370870870870871, "grad_norm": 2.096043191897608, "learning_rate": 2.7869878010634973e-06, "loss": 0.5704, "step": 892 }, { "epoch": 0.08380255255255255, "grad_norm": 2.0655690232601684, "learning_rate": 2.790115733500157e-06, "loss": 0.5528, "step": 893 }, { "epoch": 0.0838963963963964, "grad_norm": 2.4944573746514953, "learning_rate": 2.793243665936816e-06, "loss": 0.564, "step": 894 }, { "epoch": 0.08399024024024024, "grad_norm": 2.081893727324139, "learning_rate": 2.796371598373475e-06, "loss": 0.5874, "step": 895 }, { "epoch": 0.08408408408408409, "grad_norm": 2.2004005506330566, "learning_rate": 2.7994995308101347e-06, "loss": 0.5587, "step": 896 }, { "epoch": 0.08417792792792793, "grad_norm": 2.4424887800367183, "learning_rate": 2.8026274632467943e-06, "loss": 0.5655, "step": 897 }, { "epoch": 0.08427177177177177, "grad_norm": 1.948829821468454, "learning_rate": 2.805755395683453e-06, "loss": 0.5474, "step": 898 }, { "epoch": 0.08436561561561562, "grad_norm": 2.4322037513501384, "learning_rate": 2.8088833281201127e-06, "loss": 0.5544, "step": 899 }, { "epoch": 0.08445945945945946, "grad_norm": 2.5931371419178117, "learning_rate": 2.8120112605567722e-06, "loss": 0.5696, "step": 900 }, { "epoch": 0.08455330330330331, "grad_norm": 3.995666167003037, "learning_rate": 2.8151391929934314e-06, "loss": 0.5225, "step": 901 }, { "epoch": 0.08464714714714715, "grad_norm": 2.135093968483641, "learning_rate": 2.818267125430091e-06, "loss": 0.5097, "step": 902 }, { "epoch": 0.08474099099099099, "grad_norm": 2.4959477102493604, "learning_rate": 2.8213950578667506e-06, "loss": 0.6139, "step": 903 }, { "epoch": 0.08483483483483484, "grad_norm": 2.1517920815505143, "learning_rate": 2.8245229903034097e-06, "loss": 0.5622, "step": 904 }, { "epoch": 0.08492867867867868, "grad_norm": 2.1268062180110507, "learning_rate": 2.827650922740069e-06, "loss": 0.555, "step": 905 }, { "epoch": 0.08502252252252253, "grad_norm": 3.299554815753772, "learning_rate": 2.8307788551767285e-06, "loss": 0.6306, "step": 906 }, { "epoch": 0.08511636636636637, "grad_norm": 2.2243653997612873, "learning_rate": 2.833906787613388e-06, "loss": 0.5532, "step": 907 }, { "epoch": 0.0852102102102102, "grad_norm": 2.337798067141395, "learning_rate": 2.837034720050047e-06, "loss": 0.5491, "step": 908 }, { "epoch": 0.08530405405405406, "grad_norm": 2.1207158388618037, "learning_rate": 2.8401626524867064e-06, "loss": 0.5528, "step": 909 }, { "epoch": 0.0853978978978979, "grad_norm": 2.1320113590360443, "learning_rate": 2.843290584923366e-06, "loss": 0.6337, "step": 910 }, { "epoch": 0.08549174174174175, "grad_norm": 2.1678470294734975, "learning_rate": 2.846418517360025e-06, "loss": 0.5925, "step": 911 }, { "epoch": 0.08558558558558559, "grad_norm": 2.0185543798526893, "learning_rate": 2.8495464497966847e-06, "loss": 0.5668, "step": 912 }, { "epoch": 0.08567942942942942, "grad_norm": 2.027500548395578, "learning_rate": 2.8526743822333443e-06, "loss": 0.5078, "step": 913 }, { "epoch": 0.08577327327327328, "grad_norm": 2.265030663429937, "learning_rate": 2.855802314670003e-06, "loss": 0.5693, "step": 914 }, { "epoch": 0.08586711711711711, "grad_norm": 2.002500658423593, "learning_rate": 2.8589302471066626e-06, "loss": 0.5687, "step": 915 }, { "epoch": 0.08596096096096097, "grad_norm": 2.283832078217732, "learning_rate": 2.862058179543322e-06, "loss": 0.5675, "step": 916 }, { "epoch": 0.0860548048048048, "grad_norm": 2.523863272906467, "learning_rate": 2.8651861119799814e-06, "loss": 0.5501, "step": 917 }, { "epoch": 0.08614864864864864, "grad_norm": 2.556876670358958, "learning_rate": 2.868314044416641e-06, "loss": 0.6038, "step": 918 }, { "epoch": 0.0862424924924925, "grad_norm": 2.0424594671436114, "learning_rate": 2.8714419768533e-06, "loss": 0.5809, "step": 919 }, { "epoch": 0.08633633633633633, "grad_norm": 2.0015646887983976, "learning_rate": 2.8745699092899597e-06, "loss": 0.5864, "step": 920 }, { "epoch": 0.08643018018018019, "grad_norm": 2.2776765462905524, "learning_rate": 2.877697841726619e-06, "loss": 0.5785, "step": 921 }, { "epoch": 0.08652402402402402, "grad_norm": 3.020116559007729, "learning_rate": 2.8808257741632784e-06, "loss": 0.5733, "step": 922 }, { "epoch": 0.08661786786786786, "grad_norm": 2.292623814329313, "learning_rate": 2.883953706599938e-06, "loss": 0.517, "step": 923 }, { "epoch": 0.08671171171171171, "grad_norm": 2.01861202108983, "learning_rate": 2.8870816390365968e-06, "loss": 0.5765, "step": 924 }, { "epoch": 0.08680555555555555, "grad_norm": 2.6202646704210117, "learning_rate": 2.8902095714732563e-06, "loss": 0.5758, "step": 925 }, { "epoch": 0.0868993993993994, "grad_norm": 3.185174331199342, "learning_rate": 2.893337503909916e-06, "loss": 0.5552, "step": 926 }, { "epoch": 0.08699324324324324, "grad_norm": 2.155655267576894, "learning_rate": 2.896465436346575e-06, "loss": 0.5561, "step": 927 }, { "epoch": 0.08708708708708708, "grad_norm": 2.1559855428198382, "learning_rate": 2.8995933687832347e-06, "loss": 0.5177, "step": 928 }, { "epoch": 0.08718093093093093, "grad_norm": 1.8802969955836972, "learning_rate": 2.9027213012198942e-06, "loss": 0.531, "step": 929 }, { "epoch": 0.08727477477477477, "grad_norm": 2.483958609858957, "learning_rate": 2.905849233656553e-06, "loss": 0.5872, "step": 930 }, { "epoch": 0.08736861861861862, "grad_norm": 7.569691866705792, "learning_rate": 2.9089771660932126e-06, "loss": 0.5861, "step": 931 }, { "epoch": 0.08746246246246246, "grad_norm": 2.139188521504847, "learning_rate": 2.912105098529872e-06, "loss": 0.564, "step": 932 }, { "epoch": 0.0875563063063063, "grad_norm": 1.9414050387303545, "learning_rate": 2.915233030966531e-06, "loss": 0.5557, "step": 933 }, { "epoch": 0.08765015015015015, "grad_norm": 3.824955221061669, "learning_rate": 2.9183609634031905e-06, "loss": 0.5788, "step": 934 }, { "epoch": 0.08774399399399399, "grad_norm": 16.903977429049895, "learning_rate": 2.92148889583985e-06, "loss": 0.5176, "step": 935 }, { "epoch": 0.08783783783783784, "grad_norm": 2.1682378361041246, "learning_rate": 2.9246168282765096e-06, "loss": 0.537, "step": 936 }, { "epoch": 0.08793168168168168, "grad_norm": 4.395055650534568, "learning_rate": 2.927744760713169e-06, "loss": 0.5685, "step": 937 }, { "epoch": 0.08802552552552552, "grad_norm": 3.3897932030038995, "learning_rate": 2.9308726931498284e-06, "loss": 0.5931, "step": 938 }, { "epoch": 0.08811936936936937, "grad_norm": 2.204732522209389, "learning_rate": 2.934000625586488e-06, "loss": 0.5499, "step": 939 }, { "epoch": 0.08821321321321321, "grad_norm": 2.551973635468441, "learning_rate": 2.9371285580231467e-06, "loss": 0.6368, "step": 940 }, { "epoch": 0.08830705705705706, "grad_norm": 2.161156820253394, "learning_rate": 2.9402564904598063e-06, "loss": 0.5926, "step": 941 }, { "epoch": 0.0884009009009009, "grad_norm": 1.9613737456182296, "learning_rate": 2.943384422896466e-06, "loss": 0.4917, "step": 942 }, { "epoch": 0.08849474474474474, "grad_norm": 2.0610783116357254, "learning_rate": 2.946512355333125e-06, "loss": 0.5669, "step": 943 }, { "epoch": 0.08858858858858859, "grad_norm": 2.3768098281944594, "learning_rate": 2.949640287769784e-06, "loss": 0.5631, "step": 944 }, { "epoch": 0.08868243243243243, "grad_norm": 2.1626863592139465, "learning_rate": 2.9527682202064438e-06, "loss": 0.5495, "step": 945 }, { "epoch": 0.08877627627627628, "grad_norm": 2.384752505110542, "learning_rate": 2.955896152643103e-06, "loss": 0.5817, "step": 946 }, { "epoch": 0.08887012012012012, "grad_norm": 2.203898064287337, "learning_rate": 2.9590240850797625e-06, "loss": 0.5517, "step": 947 }, { "epoch": 0.08896396396396396, "grad_norm": 2.8374419363139842, "learning_rate": 2.962152017516422e-06, "loss": 0.5926, "step": 948 }, { "epoch": 0.08905780780780781, "grad_norm": 2.1536872127218856, "learning_rate": 2.965279949953081e-06, "loss": 0.5536, "step": 949 }, { "epoch": 0.08915165165165165, "grad_norm": 1.9600626387858608, "learning_rate": 2.9684078823897404e-06, "loss": 0.5454, "step": 950 }, { "epoch": 0.0892454954954955, "grad_norm": 2.007501077974103, "learning_rate": 2.9715358148264e-06, "loss": 0.5577, "step": 951 }, { "epoch": 0.08933933933933934, "grad_norm": 2.1045791671246445, "learning_rate": 2.9746637472630596e-06, "loss": 0.5749, "step": 952 }, { "epoch": 0.08943318318318318, "grad_norm": 5.500109026308995, "learning_rate": 2.9777916796997188e-06, "loss": 0.5442, "step": 953 }, { "epoch": 0.08952702702702703, "grad_norm": 2.135381689491945, "learning_rate": 2.9809196121363783e-06, "loss": 0.6244, "step": 954 }, { "epoch": 0.08962087087087087, "grad_norm": 3.601559403732888, "learning_rate": 2.9840475445730375e-06, "loss": 0.5964, "step": 955 }, { "epoch": 0.08971471471471472, "grad_norm": 1.9351586712384092, "learning_rate": 2.9871754770096967e-06, "loss": 0.5559, "step": 956 }, { "epoch": 0.08980855855855856, "grad_norm": 2.4478844132290685, "learning_rate": 2.9903034094463563e-06, "loss": 0.5814, "step": 957 }, { "epoch": 0.08990240240240241, "grad_norm": 2.344565316390317, "learning_rate": 2.993431341883016e-06, "loss": 0.6114, "step": 958 }, { "epoch": 0.08999624624624625, "grad_norm": 1.8713255271813993, "learning_rate": 2.9965592743196746e-06, "loss": 0.5799, "step": 959 }, { "epoch": 0.09009009009009009, "grad_norm": 2.2911037378652144, "learning_rate": 2.999687206756334e-06, "loss": 0.5507, "step": 960 }, { "epoch": 0.09018393393393394, "grad_norm": 2.2305933191833054, "learning_rate": 3.0028151391929937e-06, "loss": 0.5582, "step": 961 }, { "epoch": 0.09027777777777778, "grad_norm": 1.8036606184656796, "learning_rate": 3.005943071629653e-06, "loss": 0.5626, "step": 962 }, { "epoch": 0.09037162162162163, "grad_norm": 3.979755180482466, "learning_rate": 3.0090710040663125e-06, "loss": 0.6013, "step": 963 }, { "epoch": 0.09046546546546547, "grad_norm": 1.9218053255839398, "learning_rate": 3.012198936502972e-06, "loss": 0.4976, "step": 964 }, { "epoch": 0.0905593093093093, "grad_norm": 2.3159901463513592, "learning_rate": 3.015326868939631e-06, "loss": 0.5806, "step": 965 }, { "epoch": 0.09065315315315316, "grad_norm": 1.8826439947598193, "learning_rate": 3.0184548013762904e-06, "loss": 0.5847, "step": 966 }, { "epoch": 0.090746996996997, "grad_norm": 2.245440272077196, "learning_rate": 3.02158273381295e-06, "loss": 0.628, "step": 967 }, { "epoch": 0.09084084084084085, "grad_norm": 2.1295720634971564, "learning_rate": 3.0247106662496096e-06, "loss": 0.6228, "step": 968 }, { "epoch": 0.09093468468468469, "grad_norm": 2.2564193145354805, "learning_rate": 3.0278385986862683e-06, "loss": 0.6011, "step": 969 }, { "epoch": 0.09102852852852852, "grad_norm": 1.9601934400213537, "learning_rate": 3.030966531122928e-06, "loss": 0.589, "step": 970 }, { "epoch": 0.09112237237237238, "grad_norm": 2.0961695565559717, "learning_rate": 3.0340944635595875e-06, "loss": 0.5878, "step": 971 }, { "epoch": 0.09121621621621621, "grad_norm": 1.768558124539744, "learning_rate": 3.0372223959962466e-06, "loss": 0.6034, "step": 972 }, { "epoch": 0.09131006006006007, "grad_norm": 4.705620695745603, "learning_rate": 3.0403503284329062e-06, "loss": 0.5648, "step": 973 }, { "epoch": 0.0914039039039039, "grad_norm": 2.2140372415029517, "learning_rate": 3.043478260869566e-06, "loss": 0.5294, "step": 974 }, { "epoch": 0.09149774774774774, "grad_norm": 2.5158646712045853, "learning_rate": 3.0466061933062245e-06, "loss": 0.5323, "step": 975 }, { "epoch": 0.0915915915915916, "grad_norm": 2.2541578704186733, "learning_rate": 3.049734125742884e-06, "loss": 0.5645, "step": 976 }, { "epoch": 0.09168543543543543, "grad_norm": 2.024448150953367, "learning_rate": 3.0528620581795437e-06, "loss": 0.5979, "step": 977 }, { "epoch": 0.09177927927927929, "grad_norm": 2.557029551428739, "learning_rate": 3.055989990616203e-06, "loss": 0.4921, "step": 978 }, { "epoch": 0.09187312312312312, "grad_norm": 3.985836980609802, "learning_rate": 3.0591179230528624e-06, "loss": 0.6139, "step": 979 }, { "epoch": 0.09196696696696696, "grad_norm": 1.9649204054855394, "learning_rate": 3.0622458554895216e-06, "loss": 0.5734, "step": 980 }, { "epoch": 0.09206081081081081, "grad_norm": 8.32803285972049, "learning_rate": 3.0653737879261808e-06, "loss": 0.5775, "step": 981 }, { "epoch": 0.09215465465465465, "grad_norm": 3.632599037105236, "learning_rate": 3.0685017203628404e-06, "loss": 0.5717, "step": 982 }, { "epoch": 0.0922484984984985, "grad_norm": 1.9601293357892717, "learning_rate": 3.0716296527995e-06, "loss": 0.5717, "step": 983 }, { "epoch": 0.09234234234234234, "grad_norm": 2.567891725291499, "learning_rate": 3.0747575852361595e-06, "loss": 0.5963, "step": 984 }, { "epoch": 0.09243618618618618, "grad_norm": 1.908779377062107, "learning_rate": 3.0778855176728183e-06, "loss": 0.5427, "step": 985 }, { "epoch": 0.09253003003003003, "grad_norm": 2.780847382957155, "learning_rate": 3.081013450109478e-06, "loss": 0.5644, "step": 986 }, { "epoch": 0.09262387387387387, "grad_norm": 2.022991985077043, "learning_rate": 3.0841413825461374e-06, "loss": 0.6292, "step": 987 }, { "epoch": 0.09271771771771772, "grad_norm": 2.102646907378194, "learning_rate": 3.0872693149827966e-06, "loss": 0.5858, "step": 988 }, { "epoch": 0.09281156156156156, "grad_norm": 2.2122616251792095, "learning_rate": 3.090397247419456e-06, "loss": 0.5172, "step": 989 }, { "epoch": 0.0929054054054054, "grad_norm": 2.1517347830756557, "learning_rate": 3.0935251798561158e-06, "loss": 0.5469, "step": 990 }, { "epoch": 0.09299924924924925, "grad_norm": 2.038879715990076, "learning_rate": 3.0966531122927745e-06, "loss": 0.5783, "step": 991 }, { "epoch": 0.09309309309309309, "grad_norm": 2.154900463443959, "learning_rate": 3.099781044729434e-06, "loss": 0.5914, "step": 992 }, { "epoch": 0.09318693693693694, "grad_norm": 2.198127706802774, "learning_rate": 3.1029089771660937e-06, "loss": 0.5257, "step": 993 }, { "epoch": 0.09328078078078078, "grad_norm": 2.339969865846749, "learning_rate": 3.106036909602753e-06, "loss": 0.6296, "step": 994 }, { "epoch": 0.09337462462462462, "grad_norm": 2.053974775200492, "learning_rate": 3.109164842039412e-06, "loss": 0.5834, "step": 995 }, { "epoch": 0.09346846846846847, "grad_norm": 2.0652010514863766, "learning_rate": 3.1122927744760716e-06, "loss": 0.5655, "step": 996 }, { "epoch": 0.09356231231231231, "grad_norm": 1.938126692513794, "learning_rate": 3.1154207069127307e-06, "loss": 0.5201, "step": 997 }, { "epoch": 0.09365615615615616, "grad_norm": 2.0255822917910677, "learning_rate": 3.1185486393493903e-06, "loss": 0.5781, "step": 998 }, { "epoch": 0.09375, "grad_norm": 1.932450665139219, "learning_rate": 3.12167657178605e-06, "loss": 0.5644, "step": 999 }, { "epoch": 0.09384384384384384, "grad_norm": 2.3515170492435615, "learning_rate": 3.1248045042227095e-06, "loss": 0.569, "step": 1000 }, { "epoch": 0.09393768768768769, "grad_norm": 2.1290391073581856, "learning_rate": 3.1279324366593682e-06, "loss": 0.5788, "step": 1001 }, { "epoch": 0.09403153153153153, "grad_norm": 2.366412260796367, "learning_rate": 3.131060369096028e-06, "loss": 0.5738, "step": 1002 }, { "epoch": 0.09412537537537538, "grad_norm": 3.9145993224815796, "learning_rate": 3.1341883015326874e-06, "loss": 0.5559, "step": 1003 }, { "epoch": 0.09421921921921922, "grad_norm": 3.3041193438033782, "learning_rate": 3.1373162339693465e-06, "loss": 0.539, "step": 1004 }, { "epoch": 0.09431306306306306, "grad_norm": 2.661870071575611, "learning_rate": 3.1404441664060057e-06, "loss": 0.5045, "step": 1005 }, { "epoch": 0.09440690690690691, "grad_norm": 1.9616343299678862, "learning_rate": 3.1435720988426653e-06, "loss": 0.5163, "step": 1006 }, { "epoch": 0.09450075075075075, "grad_norm": 1.9450840903714635, "learning_rate": 3.1467000312793245e-06, "loss": 0.6053, "step": 1007 }, { "epoch": 0.0945945945945946, "grad_norm": 2.199308276426639, "learning_rate": 3.149827963715984e-06, "loss": 0.5475, "step": 1008 }, { "epoch": 0.09468843843843844, "grad_norm": 1.9839162950407945, "learning_rate": 3.1529558961526436e-06, "loss": 0.5872, "step": 1009 }, { "epoch": 0.09478228228228228, "grad_norm": 2.2393732104533783, "learning_rate": 3.1560838285893024e-06, "loss": 0.6324, "step": 1010 }, { "epoch": 0.09487612612612613, "grad_norm": 6.344563925565948, "learning_rate": 3.159211761025962e-06, "loss": 0.57, "step": 1011 }, { "epoch": 0.09496996996996997, "grad_norm": 2.2108388184251555, "learning_rate": 3.1623396934626215e-06, "loss": 0.5258, "step": 1012 }, { "epoch": 0.09506381381381382, "grad_norm": 2.1357414046814247, "learning_rate": 3.1654676258992807e-06, "loss": 0.5698, "step": 1013 }, { "epoch": 0.09515765765765766, "grad_norm": 2.058941860975386, "learning_rate": 3.1685955583359403e-06, "loss": 0.5383, "step": 1014 }, { "epoch": 0.0952515015015015, "grad_norm": 2.057242934640074, "learning_rate": 3.1717234907726e-06, "loss": 0.5582, "step": 1015 }, { "epoch": 0.09534534534534535, "grad_norm": 2.1454715299160374, "learning_rate": 3.174851423209259e-06, "loss": 0.5129, "step": 1016 }, { "epoch": 0.09543918918918919, "grad_norm": 2.4076611274309414, "learning_rate": 3.177979355645918e-06, "loss": 0.6022, "step": 1017 }, { "epoch": 0.09553303303303304, "grad_norm": 2.324274435588543, "learning_rate": 3.1811072880825778e-06, "loss": 0.58, "step": 1018 }, { "epoch": 0.09562687687687688, "grad_norm": 2.4611187368321743, "learning_rate": 3.1842352205192373e-06, "loss": 0.5303, "step": 1019 }, { "epoch": 0.09572072072072071, "grad_norm": 2.1821257568516237, "learning_rate": 3.187363152955896e-06, "loss": 0.5537, "step": 1020 }, { "epoch": 0.09581456456456457, "grad_norm": 2.144404663391851, "learning_rate": 3.1904910853925557e-06, "loss": 0.4792, "step": 1021 }, { "epoch": 0.0959084084084084, "grad_norm": 2.377816250540905, "learning_rate": 3.1936190178292153e-06, "loss": 0.5302, "step": 1022 }, { "epoch": 0.09600225225225226, "grad_norm": 1.9156232213417095, "learning_rate": 3.1967469502658744e-06, "loss": 0.538, "step": 1023 }, { "epoch": 0.0960960960960961, "grad_norm": 2.002616499502242, "learning_rate": 3.199874882702534e-06, "loss": 0.5036, "step": 1024 }, { "epoch": 0.09618993993993993, "grad_norm": 2.740836088753826, "learning_rate": 3.2030028151391936e-06, "loss": 0.6241, "step": 1025 }, { "epoch": 0.09628378378378379, "grad_norm": 1.7476934339883157, "learning_rate": 3.2061307475758523e-06, "loss": 0.5489, "step": 1026 }, { "epoch": 0.09637762762762762, "grad_norm": 2.2259648720684257, "learning_rate": 3.209258680012512e-06, "loss": 0.5286, "step": 1027 }, { "epoch": 0.09647147147147148, "grad_norm": 2.281309254391647, "learning_rate": 3.2123866124491715e-06, "loss": 0.5467, "step": 1028 }, { "epoch": 0.09656531531531531, "grad_norm": 2.1461239905801826, "learning_rate": 3.2155145448858306e-06, "loss": 0.645, "step": 1029 }, { "epoch": 0.09665915915915915, "grad_norm": 2.3869470081070556, "learning_rate": 3.2186424773224902e-06, "loss": 0.5274, "step": 1030 }, { "epoch": 0.096753003003003, "grad_norm": 1.929799606151892, "learning_rate": 3.2217704097591494e-06, "loss": 0.53, "step": 1031 }, { "epoch": 0.09684684684684684, "grad_norm": 2.3475862367842155, "learning_rate": 3.224898342195809e-06, "loss": 0.5283, "step": 1032 }, { "epoch": 0.0969406906906907, "grad_norm": 2.1036507535149616, "learning_rate": 3.228026274632468e-06, "loss": 0.5676, "step": 1033 }, { "epoch": 0.09703453453453453, "grad_norm": 2.4405804630672203, "learning_rate": 3.2311542070691277e-06, "loss": 0.5301, "step": 1034 }, { "epoch": 0.09712837837837837, "grad_norm": 1.953626764754654, "learning_rate": 3.2342821395057873e-06, "loss": 0.5827, "step": 1035 }, { "epoch": 0.09722222222222222, "grad_norm": 3.504894356613674, "learning_rate": 3.237410071942446e-06, "loss": 0.6098, "step": 1036 }, { "epoch": 0.09731606606606606, "grad_norm": 2.3609306324725097, "learning_rate": 3.2405380043791056e-06, "loss": 0.6156, "step": 1037 }, { "epoch": 0.09740990990990991, "grad_norm": 2.3110914619540996, "learning_rate": 3.243665936815765e-06, "loss": 0.5302, "step": 1038 }, { "epoch": 0.09750375375375375, "grad_norm": 2.006252785166137, "learning_rate": 3.2467938692524244e-06, "loss": 0.5372, "step": 1039 }, { "epoch": 0.09759759759759759, "grad_norm": 3.036979911354387, "learning_rate": 3.249921801689084e-06, "loss": 0.5796, "step": 1040 }, { "epoch": 0.09769144144144144, "grad_norm": 2.3533228811849827, "learning_rate": 3.253049734125743e-06, "loss": 0.5781, "step": 1041 }, { "epoch": 0.09778528528528528, "grad_norm": 2.2975203987446142, "learning_rate": 3.2561776665624023e-06, "loss": 0.5838, "step": 1042 }, { "epoch": 0.09787912912912913, "grad_norm": 2.0404616348472886, "learning_rate": 3.259305598999062e-06, "loss": 0.5734, "step": 1043 }, { "epoch": 0.09797297297297297, "grad_norm": 2.0062581397842187, "learning_rate": 3.2624335314357214e-06, "loss": 0.6023, "step": 1044 }, { "epoch": 0.09806681681681682, "grad_norm": 2.1793966364202846, "learning_rate": 3.26556146387238e-06, "loss": 0.6054, "step": 1045 }, { "epoch": 0.09816066066066066, "grad_norm": 2.230901596110041, "learning_rate": 3.2686893963090398e-06, "loss": 0.6246, "step": 1046 }, { "epoch": 0.0982545045045045, "grad_norm": 1.8891206476737366, "learning_rate": 3.2718173287456994e-06, "loss": 0.5655, "step": 1047 }, { "epoch": 0.09834834834834835, "grad_norm": 2.247799288130519, "learning_rate": 3.274945261182359e-06, "loss": 0.5863, "step": 1048 }, { "epoch": 0.09844219219219219, "grad_norm": 2.5340081869516093, "learning_rate": 3.278073193619018e-06, "loss": 0.5496, "step": 1049 }, { "epoch": 0.09853603603603604, "grad_norm": 2.120988008258499, "learning_rate": 3.2812011260556777e-06, "loss": 0.5618, "step": 1050 }, { "epoch": 0.09862987987987988, "grad_norm": 2.1879960348051153, "learning_rate": 3.2843290584923373e-06, "loss": 0.5535, "step": 1051 }, { "epoch": 0.09872372372372372, "grad_norm": 2.287643836687262, "learning_rate": 3.287456990928996e-06, "loss": 0.5514, "step": 1052 }, { "epoch": 0.09881756756756757, "grad_norm": 3.2151748859453955, "learning_rate": 3.2905849233656556e-06, "loss": 0.5209, "step": 1053 }, { "epoch": 0.09891141141141141, "grad_norm": 2.035128306558533, "learning_rate": 3.293712855802315e-06, "loss": 0.5391, "step": 1054 }, { "epoch": 0.09900525525525526, "grad_norm": 3.003198216328017, "learning_rate": 3.2968407882389743e-06, "loss": 0.6074, "step": 1055 }, { "epoch": 0.0990990990990991, "grad_norm": 1.8749291745978194, "learning_rate": 3.2999687206756335e-06, "loss": 0.4752, "step": 1056 }, { "epoch": 0.09919294294294294, "grad_norm": 2.2007600149543753, "learning_rate": 3.303096653112293e-06, "loss": 0.5207, "step": 1057 }, { "epoch": 0.09928678678678679, "grad_norm": 2.671653980102906, "learning_rate": 3.3062245855489522e-06, "loss": 0.6168, "step": 1058 }, { "epoch": 0.09938063063063063, "grad_norm": 1.9863343739443509, "learning_rate": 3.309352517985612e-06, "loss": 0.5439, "step": 1059 }, { "epoch": 0.09947447447447448, "grad_norm": 2.693145894463581, "learning_rate": 3.3124804504222714e-06, "loss": 0.541, "step": 1060 }, { "epoch": 0.09956831831831832, "grad_norm": 4.095596257551906, "learning_rate": 3.31560838285893e-06, "loss": 0.4955, "step": 1061 }, { "epoch": 0.09966216216216216, "grad_norm": 2.040638787211276, "learning_rate": 3.3187363152955897e-06, "loss": 0.5517, "step": 1062 }, { "epoch": 0.09975600600600601, "grad_norm": 1.7662183414822044, "learning_rate": 3.3218642477322493e-06, "loss": 0.4964, "step": 1063 }, { "epoch": 0.09984984984984985, "grad_norm": 2.247868376160564, "learning_rate": 3.324992180168909e-06, "loss": 0.5504, "step": 1064 }, { "epoch": 0.0999436936936937, "grad_norm": 4.30200720479904, "learning_rate": 3.328120112605568e-06, "loss": 0.5763, "step": 1065 }, { "epoch": 0.10003753753753754, "grad_norm": 2.1212418524242818, "learning_rate": 3.3312480450422276e-06, "loss": 0.5779, "step": 1066 }, { "epoch": 0.10013138138138138, "grad_norm": 2.800218090287, "learning_rate": 3.334375977478887e-06, "loss": 0.5407, "step": 1067 }, { "epoch": 0.10022522522522523, "grad_norm": 3.288639262780801, "learning_rate": 3.337503909915546e-06, "loss": 0.5378, "step": 1068 }, { "epoch": 0.10031906906906907, "grad_norm": 2.2986681932878796, "learning_rate": 3.3406318423522055e-06, "loss": 0.5844, "step": 1069 }, { "epoch": 0.10041291291291292, "grad_norm": 2.041095407676196, "learning_rate": 3.343759774788865e-06, "loss": 0.547, "step": 1070 }, { "epoch": 0.10050675675675676, "grad_norm": 2.0088594602031415, "learning_rate": 3.346887707225524e-06, "loss": 0.5424, "step": 1071 }, { "epoch": 0.1006006006006006, "grad_norm": 2.583527058521348, "learning_rate": 3.3500156396621835e-06, "loss": 0.5243, "step": 1072 }, { "epoch": 0.10069444444444445, "grad_norm": 2.2542743661758844, "learning_rate": 3.353143572098843e-06, "loss": 0.559, "step": 1073 }, { "epoch": 0.10078828828828829, "grad_norm": 3.5392847895898147, "learning_rate": 3.356271504535502e-06, "loss": 0.5732, "step": 1074 }, { "epoch": 0.10088213213213214, "grad_norm": 2.5747748977216474, "learning_rate": 3.3593994369721618e-06, "loss": 0.5553, "step": 1075 }, { "epoch": 0.10097597597597598, "grad_norm": 2.2196761146949493, "learning_rate": 3.3625273694088214e-06, "loss": 0.5882, "step": 1076 }, { "epoch": 0.10106981981981981, "grad_norm": 3.1179657822845397, "learning_rate": 3.36565530184548e-06, "loss": 0.5271, "step": 1077 }, { "epoch": 0.10116366366366367, "grad_norm": 2.5187005774309594, "learning_rate": 3.3687832342821397e-06, "loss": 0.5519, "step": 1078 }, { "epoch": 0.1012575075075075, "grad_norm": 2.6558227013045768, "learning_rate": 3.3719111667187993e-06, "loss": 0.6054, "step": 1079 }, { "epoch": 0.10135135135135136, "grad_norm": 2.0332715539196435, "learning_rate": 3.3750390991554584e-06, "loss": 0.5182, "step": 1080 }, { "epoch": 0.1014451951951952, "grad_norm": 2.1625905635355114, "learning_rate": 3.3781670315921176e-06, "loss": 0.5216, "step": 1081 }, { "epoch": 0.10153903903903903, "grad_norm": 2.4096034895031333, "learning_rate": 3.381294964028777e-06, "loss": 0.5493, "step": 1082 }, { "epoch": 0.10163288288288289, "grad_norm": 2.124790358379489, "learning_rate": 3.3844228964654368e-06, "loss": 0.5774, "step": 1083 }, { "epoch": 0.10172672672672672, "grad_norm": 2.1040228325371593, "learning_rate": 3.387550828902096e-06, "loss": 0.5158, "step": 1084 }, { "epoch": 0.10182057057057058, "grad_norm": 2.4678767873424587, "learning_rate": 3.3906787613387555e-06, "loss": 0.5778, "step": 1085 }, { "epoch": 0.10191441441441441, "grad_norm": 2.106925849106885, "learning_rate": 3.393806693775415e-06, "loss": 0.5548, "step": 1086 }, { "epoch": 0.10200825825825825, "grad_norm": 2.0600142946387954, "learning_rate": 3.396934626212074e-06, "loss": 0.605, "step": 1087 }, { "epoch": 0.1021021021021021, "grad_norm": 3.367522750208518, "learning_rate": 3.4000625586487334e-06, "loss": 0.5784, "step": 1088 }, { "epoch": 0.10219594594594594, "grad_norm": 2.1805714391063367, "learning_rate": 3.403190491085393e-06, "loss": 0.4957, "step": 1089 }, { "epoch": 0.1022897897897898, "grad_norm": 2.021311496437906, "learning_rate": 3.406318423522052e-06, "loss": 0.5675, "step": 1090 }, { "epoch": 0.10238363363363363, "grad_norm": 2.018609685842182, "learning_rate": 3.4094463559587117e-06, "loss": 0.5872, "step": 1091 }, { "epoch": 0.10247747747747747, "grad_norm": 2.120466474049351, "learning_rate": 3.412574288395371e-06, "loss": 0.627, "step": 1092 }, { "epoch": 0.10257132132132132, "grad_norm": 2.062128865462151, "learning_rate": 3.41570222083203e-06, "loss": 0.5479, "step": 1093 }, { "epoch": 0.10266516516516516, "grad_norm": 2.291280328343781, "learning_rate": 3.4188301532686896e-06, "loss": 0.5494, "step": 1094 }, { "epoch": 0.10275900900900901, "grad_norm": 2.0450337592836823, "learning_rate": 3.4219580857053492e-06, "loss": 0.4857, "step": 1095 }, { "epoch": 0.10285285285285285, "grad_norm": 2.113912622611828, "learning_rate": 3.425086018142008e-06, "loss": 0.5588, "step": 1096 }, { "epoch": 0.10294669669669669, "grad_norm": 2.9119116858635805, "learning_rate": 3.4282139505786676e-06, "loss": 0.5932, "step": 1097 }, { "epoch": 0.10304054054054054, "grad_norm": 2.338611630380699, "learning_rate": 3.431341883015327e-06, "loss": 0.6195, "step": 1098 }, { "epoch": 0.10313438438438438, "grad_norm": 2.024976835809703, "learning_rate": 3.4344698154519867e-06, "loss": 0.5362, "step": 1099 }, { "epoch": 0.10322822822822823, "grad_norm": 2.77001338927488, "learning_rate": 3.437597747888646e-06, "loss": 0.5181, "step": 1100 }, { "epoch": 0.10332207207207207, "grad_norm": 2.1595804393201514, "learning_rate": 3.4407256803253055e-06, "loss": 0.6167, "step": 1101 }, { "epoch": 0.10341591591591591, "grad_norm": 1.9852519560819364, "learning_rate": 3.443853612761965e-06, "loss": 0.5232, "step": 1102 }, { "epoch": 0.10350975975975976, "grad_norm": 1.8850461665101972, "learning_rate": 3.4469815451986238e-06, "loss": 0.5846, "step": 1103 }, { "epoch": 0.1036036036036036, "grad_norm": 3.4831967282753267, "learning_rate": 3.4501094776352834e-06, "loss": 0.552, "step": 1104 }, { "epoch": 0.10369744744744745, "grad_norm": 2.026868860707684, "learning_rate": 3.453237410071943e-06, "loss": 0.5106, "step": 1105 }, { "epoch": 0.10379129129129129, "grad_norm": 6.094905298655813, "learning_rate": 3.4563653425086017e-06, "loss": 0.5851, "step": 1106 }, { "epoch": 0.10388513513513513, "grad_norm": 1.803106983207924, "learning_rate": 3.4594932749452613e-06, "loss": 0.4847, "step": 1107 }, { "epoch": 0.10397897897897898, "grad_norm": 3.163173785542437, "learning_rate": 3.462621207381921e-06, "loss": 0.5215, "step": 1108 }, { "epoch": 0.10407282282282282, "grad_norm": 2.8993804588447167, "learning_rate": 3.46574913981858e-06, "loss": 0.5399, "step": 1109 }, { "epoch": 0.10416666666666667, "grad_norm": 2.417837678750671, "learning_rate": 3.4688770722552396e-06, "loss": 0.6125, "step": 1110 }, { "epoch": 0.10426051051051051, "grad_norm": 2.840572564879856, "learning_rate": 3.472005004691899e-06, "loss": 0.5224, "step": 1111 }, { "epoch": 0.10435435435435435, "grad_norm": 2.320487363246699, "learning_rate": 3.475132937128558e-06, "loss": 0.5627, "step": 1112 }, { "epoch": 0.1044481981981982, "grad_norm": 2.058690134081316, "learning_rate": 3.4782608695652175e-06, "loss": 0.5648, "step": 1113 }, { "epoch": 0.10454204204204204, "grad_norm": 2.1078289505826913, "learning_rate": 3.481388802001877e-06, "loss": 0.5402, "step": 1114 }, { "epoch": 0.10463588588588589, "grad_norm": 2.4023117434717465, "learning_rate": 3.4845167344385367e-06, "loss": 0.5311, "step": 1115 }, { "epoch": 0.10472972972972973, "grad_norm": 5.067143547064238, "learning_rate": 3.487644666875196e-06, "loss": 0.6097, "step": 1116 }, { "epoch": 0.10482357357357357, "grad_norm": 2.2346936468750997, "learning_rate": 3.490772599311855e-06, "loss": 0.5728, "step": 1117 }, { "epoch": 0.10491741741741742, "grad_norm": 2.414489701624181, "learning_rate": 3.4939005317485146e-06, "loss": 0.6031, "step": 1118 }, { "epoch": 0.10501126126126126, "grad_norm": 2.392068588243331, "learning_rate": 3.4970284641851737e-06, "loss": 0.6041, "step": 1119 }, { "epoch": 0.10510510510510511, "grad_norm": 2.9446046358658524, "learning_rate": 3.5001563966218333e-06, "loss": 0.5993, "step": 1120 }, { "epoch": 0.10519894894894895, "grad_norm": 1.942208179753855, "learning_rate": 3.503284329058493e-06, "loss": 0.5513, "step": 1121 }, { "epoch": 0.10529279279279279, "grad_norm": 1.9819033877210543, "learning_rate": 3.5064122614951517e-06, "loss": 0.5558, "step": 1122 }, { "epoch": 0.10538663663663664, "grad_norm": 1.9218483166562192, "learning_rate": 3.5095401939318112e-06, "loss": 0.5358, "step": 1123 }, { "epoch": 0.10548048048048048, "grad_norm": 2.7222207276622536, "learning_rate": 3.512668126368471e-06, "loss": 0.5161, "step": 1124 }, { "epoch": 0.10557432432432433, "grad_norm": 4.399757822882535, "learning_rate": 3.51579605880513e-06, "loss": 0.5737, "step": 1125 }, { "epoch": 0.10566816816816817, "grad_norm": 2.03377987327115, "learning_rate": 3.5189239912417896e-06, "loss": 0.5649, "step": 1126 }, { "epoch": 0.10576201201201202, "grad_norm": 2.0007606788664654, "learning_rate": 3.522051923678449e-06, "loss": 0.6012, "step": 1127 }, { "epoch": 0.10585585585585586, "grad_norm": 2.6217434873442325, "learning_rate": 3.525179856115108e-06, "loss": 0.5104, "step": 1128 }, { "epoch": 0.1059496996996997, "grad_norm": 2.1729350486650416, "learning_rate": 3.5283077885517675e-06, "loss": 0.5369, "step": 1129 }, { "epoch": 0.10604354354354355, "grad_norm": 1.9980404546511334, "learning_rate": 3.531435720988427e-06, "loss": 0.5599, "step": 1130 }, { "epoch": 0.10613738738738739, "grad_norm": 2.0663412899732108, "learning_rate": 3.5345636534250866e-06, "loss": 0.5542, "step": 1131 }, { "epoch": 0.10623123123123124, "grad_norm": 2.0723788672423207, "learning_rate": 3.5376915858617454e-06, "loss": 0.5082, "step": 1132 }, { "epoch": 0.10632507507507508, "grad_norm": 2.174719621567928, "learning_rate": 3.540819518298405e-06, "loss": 0.5278, "step": 1133 }, { "epoch": 0.10641891891891891, "grad_norm": 2.0225134741474897, "learning_rate": 3.5439474507350645e-06, "loss": 0.5138, "step": 1134 }, { "epoch": 0.10651276276276277, "grad_norm": 2.2379485510879893, "learning_rate": 3.5470753831717237e-06, "loss": 0.5883, "step": 1135 }, { "epoch": 0.1066066066066066, "grad_norm": 2.54811067907011, "learning_rate": 3.5502033156083833e-06, "loss": 0.4985, "step": 1136 }, { "epoch": 0.10670045045045046, "grad_norm": 2.1781833636843175, "learning_rate": 3.553331248045043e-06, "loss": 0.4937, "step": 1137 }, { "epoch": 0.1067942942942943, "grad_norm": 2.2531499498145493, "learning_rate": 3.5564591804817016e-06, "loss": 0.6193, "step": 1138 }, { "epoch": 0.10688813813813813, "grad_norm": 1.8901836683312085, "learning_rate": 3.559587112918361e-06, "loss": 0.5358, "step": 1139 }, { "epoch": 0.10698198198198199, "grad_norm": 4.099013244121398, "learning_rate": 3.5627150453550208e-06, "loss": 0.4738, "step": 1140 }, { "epoch": 0.10707582582582582, "grad_norm": 1.9230851422909463, "learning_rate": 3.56584297779168e-06, "loss": 0.6091, "step": 1141 }, { "epoch": 0.10716966966966968, "grad_norm": 2.0754444434659596, "learning_rate": 3.568970910228339e-06, "loss": 0.5976, "step": 1142 }, { "epoch": 0.10726351351351351, "grad_norm": 2.723613486232721, "learning_rate": 3.5720988426649987e-06, "loss": 0.5331, "step": 1143 }, { "epoch": 0.10735735735735735, "grad_norm": 2.5059487687945525, "learning_rate": 3.575226775101658e-06, "loss": 0.5461, "step": 1144 }, { "epoch": 0.1074512012012012, "grad_norm": 2.0475267276039255, "learning_rate": 3.5783547075383174e-06, "loss": 0.5818, "step": 1145 }, { "epoch": 0.10754504504504504, "grad_norm": 5.239329390059535, "learning_rate": 3.581482639974977e-06, "loss": 0.5958, "step": 1146 }, { "epoch": 0.1076388888888889, "grad_norm": 2.3411748327712294, "learning_rate": 3.5846105724116366e-06, "loss": 0.5803, "step": 1147 }, { "epoch": 0.10773273273273273, "grad_norm": 1.9870824240775509, "learning_rate": 3.5877385048482953e-06, "loss": 0.5629, "step": 1148 }, { "epoch": 0.10782657657657657, "grad_norm": 1.8650100061058077, "learning_rate": 3.590866437284955e-06, "loss": 0.5636, "step": 1149 }, { "epoch": 0.10792042042042042, "grad_norm": 2.269559140124047, "learning_rate": 3.5939943697216145e-06, "loss": 0.5342, "step": 1150 }, { "epoch": 0.10801426426426426, "grad_norm": 4.868443408134084, "learning_rate": 3.5971223021582737e-06, "loss": 0.5692, "step": 1151 }, { "epoch": 0.10810810810810811, "grad_norm": 2.0759304194640054, "learning_rate": 3.6002502345949332e-06, "loss": 0.5919, "step": 1152 }, { "epoch": 0.10820195195195195, "grad_norm": 2.6440028307984176, "learning_rate": 3.6033781670315924e-06, "loss": 0.5848, "step": 1153 }, { "epoch": 0.10829579579579579, "grad_norm": 1.9756218464524378, "learning_rate": 3.6065060994682516e-06, "loss": 0.5554, "step": 1154 }, { "epoch": 0.10838963963963964, "grad_norm": 2.160004850382239, "learning_rate": 3.609634031904911e-06, "loss": 0.5922, "step": 1155 }, { "epoch": 0.10848348348348348, "grad_norm": 1.9934919982719224, "learning_rate": 3.6127619643415707e-06, "loss": 0.5697, "step": 1156 }, { "epoch": 0.10857732732732733, "grad_norm": 1.8823596127466105, "learning_rate": 3.6158898967782295e-06, "loss": 0.5484, "step": 1157 }, { "epoch": 0.10867117117117117, "grad_norm": 1.9243046500404135, "learning_rate": 3.619017829214889e-06, "loss": 0.5337, "step": 1158 }, { "epoch": 0.10876501501501501, "grad_norm": 2.0397017689180346, "learning_rate": 3.6221457616515486e-06, "loss": 0.5536, "step": 1159 }, { "epoch": 0.10885885885885886, "grad_norm": 2.322301032157328, "learning_rate": 3.625273694088208e-06, "loss": 0.5306, "step": 1160 }, { "epoch": 0.1089527027027027, "grad_norm": 2.4090925888045813, "learning_rate": 3.6284016265248674e-06, "loss": 0.5237, "step": 1161 }, { "epoch": 0.10904654654654655, "grad_norm": 4.6191487241424625, "learning_rate": 3.631529558961527e-06, "loss": 0.5118, "step": 1162 }, { "epoch": 0.10914039039039039, "grad_norm": 2.4992489219255742, "learning_rate": 3.6346574913981866e-06, "loss": 0.5067, "step": 1163 }, { "epoch": 0.10923423423423423, "grad_norm": 2.9906010373257237, "learning_rate": 3.6377854238348453e-06, "loss": 0.5502, "step": 1164 }, { "epoch": 0.10932807807807808, "grad_norm": 2.5094608581754207, "learning_rate": 3.640913356271505e-06, "loss": 0.5829, "step": 1165 }, { "epoch": 0.10942192192192192, "grad_norm": 2.1877312964563393, "learning_rate": 3.6440412887081645e-06, "loss": 0.5216, "step": 1166 }, { "epoch": 0.10951576576576577, "grad_norm": 4.460989170408921, "learning_rate": 3.6471692211448236e-06, "loss": 0.5313, "step": 1167 }, { "epoch": 0.10960960960960961, "grad_norm": 2.5004279890450802, "learning_rate": 3.6502971535814828e-06, "loss": 0.5794, "step": 1168 }, { "epoch": 0.10970345345345345, "grad_norm": 1.886854760537313, "learning_rate": 3.6534250860181424e-06, "loss": 0.5304, "step": 1169 }, { "epoch": 0.1097972972972973, "grad_norm": 1.9150250802753082, "learning_rate": 3.6565530184548015e-06, "loss": 0.4891, "step": 1170 }, { "epoch": 0.10989114114114114, "grad_norm": 2.126473076376414, "learning_rate": 3.659680950891461e-06, "loss": 0.5363, "step": 1171 }, { "epoch": 0.10998498498498499, "grad_norm": 2.1309869970021436, "learning_rate": 3.6628088833281207e-06, "loss": 0.5376, "step": 1172 }, { "epoch": 0.11007882882882883, "grad_norm": 1.924184787100001, "learning_rate": 3.6659368157647794e-06, "loss": 0.5424, "step": 1173 }, { "epoch": 0.11017267267267267, "grad_norm": 6.235342737151376, "learning_rate": 3.669064748201439e-06, "loss": 0.5609, "step": 1174 }, { "epoch": 0.11026651651651652, "grad_norm": 1.8632093402817391, "learning_rate": 3.6721926806380986e-06, "loss": 0.5422, "step": 1175 }, { "epoch": 0.11036036036036036, "grad_norm": 1.9772421138011895, "learning_rate": 3.6753206130747578e-06, "loss": 0.513, "step": 1176 }, { "epoch": 0.11045420420420421, "grad_norm": 2.068629032629185, "learning_rate": 3.6784485455114173e-06, "loss": 0.5615, "step": 1177 }, { "epoch": 0.11054804804804805, "grad_norm": 2.7352729363315995, "learning_rate": 3.6815764779480765e-06, "loss": 0.503, "step": 1178 }, { "epoch": 0.11064189189189189, "grad_norm": 18.454967897913818, "learning_rate": 3.684704410384736e-06, "loss": 0.4966, "step": 1179 }, { "epoch": 0.11073573573573574, "grad_norm": 2.372905313389038, "learning_rate": 3.6878323428213953e-06, "loss": 0.5457, "step": 1180 }, { "epoch": 0.11082957957957958, "grad_norm": 3.122119490826369, "learning_rate": 3.690960275258055e-06, "loss": 0.5533, "step": 1181 }, { "epoch": 0.11092342342342343, "grad_norm": 2.301862299054168, "learning_rate": 3.6940882076947144e-06, "loss": 0.5441, "step": 1182 }, { "epoch": 0.11101726726726727, "grad_norm": 2.577322558463145, "learning_rate": 3.697216140131373e-06, "loss": 0.4655, "step": 1183 }, { "epoch": 0.1111111111111111, "grad_norm": 1.9416567796678676, "learning_rate": 3.7003440725680327e-06, "loss": 0.5111, "step": 1184 }, { "epoch": 0.11120495495495496, "grad_norm": 2.196751387445326, "learning_rate": 3.7034720050046923e-06, "loss": 0.541, "step": 1185 }, { "epoch": 0.1112987987987988, "grad_norm": 2.644915571311042, "learning_rate": 3.7065999374413515e-06, "loss": 0.5454, "step": 1186 }, { "epoch": 0.11139264264264265, "grad_norm": 2.332388808636686, "learning_rate": 3.709727869878011e-06, "loss": 0.5251, "step": 1187 }, { "epoch": 0.11148648648648649, "grad_norm": 2.992267524886804, "learning_rate": 3.7128558023146707e-06, "loss": 0.5895, "step": 1188 }, { "epoch": 0.11158033033033032, "grad_norm": 2.5121368605208088, "learning_rate": 3.7159837347513294e-06, "loss": 0.5575, "step": 1189 }, { "epoch": 0.11167417417417418, "grad_norm": 1.9292007606829327, "learning_rate": 3.719111667187989e-06, "loss": 0.5365, "step": 1190 }, { "epoch": 0.11176801801801801, "grad_norm": 2.122150812714756, "learning_rate": 3.7222395996246486e-06, "loss": 0.5573, "step": 1191 }, { "epoch": 0.11186186186186187, "grad_norm": 2.0345202224915386, "learning_rate": 3.7253675320613077e-06, "loss": 0.5425, "step": 1192 }, { "epoch": 0.1119557057057057, "grad_norm": 2.404891359947671, "learning_rate": 3.728495464497967e-06, "loss": 0.5719, "step": 1193 }, { "epoch": 0.11204954954954954, "grad_norm": 2.2455262328685484, "learning_rate": 3.7316233969346265e-06, "loss": 0.5483, "step": 1194 }, { "epoch": 0.1121433933933934, "grad_norm": 2.1618857395767015, "learning_rate": 3.734751329371286e-06, "loss": 0.5863, "step": 1195 }, { "epoch": 0.11223723723723723, "grad_norm": 2.3160793611264143, "learning_rate": 3.7378792618079452e-06, "loss": 0.5577, "step": 1196 }, { "epoch": 0.11233108108108109, "grad_norm": 2.035028312330026, "learning_rate": 3.741007194244605e-06, "loss": 0.6028, "step": 1197 }, { "epoch": 0.11242492492492492, "grad_norm": 1.8948339315178901, "learning_rate": 3.7441351266812644e-06, "loss": 0.5182, "step": 1198 }, { "epoch": 0.11251876876876876, "grad_norm": 1.8661119948897928, "learning_rate": 3.747263059117923e-06, "loss": 0.5445, "step": 1199 }, { "epoch": 0.11261261261261261, "grad_norm": 17.926550186516092, "learning_rate": 3.7503909915545827e-06, "loss": 0.5533, "step": 1200 }, { "epoch": 0.11270645645645645, "grad_norm": 2.5154170834714438, "learning_rate": 3.7535189239912423e-06, "loss": 0.582, "step": 1201 }, { "epoch": 0.1128003003003003, "grad_norm": 2.3519662887554103, "learning_rate": 3.7566468564279014e-06, "loss": 0.5745, "step": 1202 }, { "epoch": 0.11289414414414414, "grad_norm": 2.000591783577145, "learning_rate": 3.759774788864561e-06, "loss": 0.5379, "step": 1203 }, { "epoch": 0.11298798798798798, "grad_norm": 2.194597602473863, "learning_rate": 3.76290272130122e-06, "loss": 0.5382, "step": 1204 }, { "epoch": 0.11308183183183183, "grad_norm": 1.8239442997695352, "learning_rate": 3.7660306537378794e-06, "loss": 0.5033, "step": 1205 }, { "epoch": 0.11317567567567567, "grad_norm": 2.2063975953027857, "learning_rate": 3.769158586174539e-06, "loss": 0.5858, "step": 1206 }, { "epoch": 0.11326951951951952, "grad_norm": 1.9513133385539774, "learning_rate": 3.7722865186111985e-06, "loss": 0.5747, "step": 1207 }, { "epoch": 0.11336336336336336, "grad_norm": 2.00266843065953, "learning_rate": 3.7754144510478573e-06, "loss": 0.5891, "step": 1208 }, { "epoch": 0.11345720720720721, "grad_norm": 2.51948038643706, "learning_rate": 3.778542383484517e-06, "loss": 0.5219, "step": 1209 }, { "epoch": 0.11355105105105105, "grad_norm": 1.7736522352723723, "learning_rate": 3.7816703159211764e-06, "loss": 0.5026, "step": 1210 }, { "epoch": 0.11364489489489489, "grad_norm": 1.9154541827592255, "learning_rate": 3.784798248357836e-06, "loss": 0.5452, "step": 1211 }, { "epoch": 0.11373873873873874, "grad_norm": 2.0739499818893847, "learning_rate": 3.787926180794495e-06, "loss": 0.5625, "step": 1212 }, { "epoch": 0.11383258258258258, "grad_norm": 2.290834700021399, "learning_rate": 3.7910541132311548e-06, "loss": 0.524, "step": 1213 }, { "epoch": 0.11392642642642643, "grad_norm": 2.6220454696567668, "learning_rate": 3.794182045667814e-06, "loss": 0.6197, "step": 1214 }, { "epoch": 0.11402027027027027, "grad_norm": 2.2358428519055895, "learning_rate": 3.797309978104473e-06, "loss": 0.5625, "step": 1215 }, { "epoch": 0.11411411411411411, "grad_norm": 2.8096106144177, "learning_rate": 3.8004379105411327e-06, "loss": 0.5253, "step": 1216 }, { "epoch": 0.11420795795795796, "grad_norm": 2.114345524563669, "learning_rate": 3.8035658429777922e-06, "loss": 0.5227, "step": 1217 }, { "epoch": 0.1143018018018018, "grad_norm": 2.2907581884361066, "learning_rate": 3.806693775414451e-06, "loss": 0.5713, "step": 1218 }, { "epoch": 0.11439564564564565, "grad_norm": 2.1966120435484284, "learning_rate": 3.8098217078511106e-06, "loss": 0.5327, "step": 1219 }, { "epoch": 0.11448948948948949, "grad_norm": 2.4162505808799297, "learning_rate": 3.81294964028777e-06, "loss": 0.5471, "step": 1220 }, { "epoch": 0.11458333333333333, "grad_norm": 2.1371777361728284, "learning_rate": 3.816077572724429e-06, "loss": 0.5735, "step": 1221 }, { "epoch": 0.11467717717717718, "grad_norm": 2.2655676045352497, "learning_rate": 3.8192055051610885e-06, "loss": 0.5782, "step": 1222 }, { "epoch": 0.11477102102102102, "grad_norm": 2.231697309447917, "learning_rate": 3.8223334375977485e-06, "loss": 0.5207, "step": 1223 }, { "epoch": 0.11486486486486487, "grad_norm": 2.306536918693169, "learning_rate": 3.825461370034408e-06, "loss": 0.5176, "step": 1224 }, { "epoch": 0.11495870870870871, "grad_norm": 1.9509011573304547, "learning_rate": 3.828589302471067e-06, "loss": 0.5411, "step": 1225 }, { "epoch": 0.11505255255255255, "grad_norm": 2.070302813527364, "learning_rate": 3.831717234907727e-06, "loss": 0.5458, "step": 1226 }, { "epoch": 0.1151463963963964, "grad_norm": 2.105021913439609, "learning_rate": 3.834845167344386e-06, "loss": 0.5448, "step": 1227 }, { "epoch": 0.11524024024024024, "grad_norm": 1.793512704027708, "learning_rate": 3.837973099781045e-06, "loss": 0.5793, "step": 1228 }, { "epoch": 0.11533408408408409, "grad_norm": 1.9563190469929466, "learning_rate": 3.841101032217704e-06, "loss": 0.6173, "step": 1229 }, { "epoch": 0.11542792792792793, "grad_norm": 2.320896800741785, "learning_rate": 3.844228964654364e-06, "loss": 0.5022, "step": 1230 }, { "epoch": 0.11552177177177177, "grad_norm": 2.1376987167979693, "learning_rate": 3.847356897091023e-06, "loss": 0.5851, "step": 1231 }, { "epoch": 0.11561561561561562, "grad_norm": 2.2311871827075356, "learning_rate": 3.850484829527683e-06, "loss": 0.5524, "step": 1232 }, { "epoch": 0.11570945945945946, "grad_norm": 2.8468211002705637, "learning_rate": 3.853612761964342e-06, "loss": 0.5149, "step": 1233 }, { "epoch": 0.11580330330330331, "grad_norm": 2.3793079834678434, "learning_rate": 3.856740694401001e-06, "loss": 0.5509, "step": 1234 }, { "epoch": 0.11589714714714715, "grad_norm": 2.278589623651029, "learning_rate": 3.859868626837661e-06, "loss": 0.5898, "step": 1235 }, { "epoch": 0.11599099099099099, "grad_norm": 1.919476190785532, "learning_rate": 3.86299655927432e-06, "loss": 0.493, "step": 1236 }, { "epoch": 0.11608483483483484, "grad_norm": 2.323819366466326, "learning_rate": 3.866124491710979e-06, "loss": 0.5877, "step": 1237 }, { "epoch": 0.11617867867867868, "grad_norm": 2.194764514345015, "learning_rate": 3.8692524241476384e-06, "loss": 0.539, "step": 1238 }, { "epoch": 0.11627252252252253, "grad_norm": 1.7638791542078522, "learning_rate": 3.8723803565842984e-06, "loss": 0.5516, "step": 1239 }, { "epoch": 0.11636636636636637, "grad_norm": 2.0675056710170354, "learning_rate": 3.875508289020958e-06, "loss": 0.5573, "step": 1240 }, { "epoch": 0.1164602102102102, "grad_norm": 2.2073661716476742, "learning_rate": 3.878636221457617e-06, "loss": 0.6118, "step": 1241 }, { "epoch": 0.11655405405405406, "grad_norm": 2.368904287035754, "learning_rate": 3.881764153894276e-06, "loss": 0.5553, "step": 1242 }, { "epoch": 0.1166478978978979, "grad_norm": 2.7197478725739317, "learning_rate": 3.884892086330936e-06, "loss": 0.5529, "step": 1243 }, { "epoch": 0.11674174174174175, "grad_norm": 1.9842057938110416, "learning_rate": 3.888020018767595e-06, "loss": 0.5185, "step": 1244 }, { "epoch": 0.11683558558558559, "grad_norm": 5.56534559948982, "learning_rate": 3.891147951204254e-06, "loss": 0.5597, "step": 1245 }, { "epoch": 0.11692942942942942, "grad_norm": 3.3279003785224615, "learning_rate": 3.894275883640914e-06, "loss": 0.5566, "step": 1246 }, { "epoch": 0.11702327327327328, "grad_norm": 2.385823460751581, "learning_rate": 3.8974038160775726e-06, "loss": 0.5684, "step": 1247 }, { "epoch": 0.11711711711711711, "grad_norm": 2.1454352194820903, "learning_rate": 3.900531748514233e-06, "loss": 0.5915, "step": 1248 }, { "epoch": 0.11721096096096097, "grad_norm": 2.2132254531461197, "learning_rate": 3.903659680950892e-06, "loss": 0.5429, "step": 1249 }, { "epoch": 0.1173048048048048, "grad_norm": 2.194293383790253, "learning_rate": 3.906787613387551e-06, "loss": 0.6016, "step": 1250 }, { "epoch": 0.11739864864864864, "grad_norm": 2.2208761515153346, "learning_rate": 3.909915545824211e-06, "loss": 0.5186, "step": 1251 }, { "epoch": 0.1174924924924925, "grad_norm": 1.7226921971073896, "learning_rate": 3.91304347826087e-06, "loss": 0.5881, "step": 1252 }, { "epoch": 0.11758633633633633, "grad_norm": 1.9146406782778125, "learning_rate": 3.916171410697529e-06, "loss": 0.5625, "step": 1253 }, { "epoch": 0.11768018018018019, "grad_norm": 2.307163955905805, "learning_rate": 3.919299343134188e-06, "loss": 0.5759, "step": 1254 }, { "epoch": 0.11777402402402402, "grad_norm": 2.865797772893895, "learning_rate": 3.922427275570848e-06, "loss": 0.5708, "step": 1255 }, { "epoch": 0.11786786786786786, "grad_norm": 2.0464518862169427, "learning_rate": 3.925555208007507e-06, "loss": 0.515, "step": 1256 }, { "epoch": 0.11796171171171171, "grad_norm": 1.9058307155508096, "learning_rate": 3.928683140444167e-06, "loss": 0.5419, "step": 1257 }, { "epoch": 0.11805555555555555, "grad_norm": 2.259825576802688, "learning_rate": 3.931811072880826e-06, "loss": 0.5408, "step": 1258 }, { "epoch": 0.1181493993993994, "grad_norm": 2.2852611866349055, "learning_rate": 3.934939005317486e-06, "loss": 0.5786, "step": 1259 }, { "epoch": 0.11824324324324324, "grad_norm": 2.0223779271760804, "learning_rate": 3.938066937754145e-06, "loss": 0.4955, "step": 1260 }, { "epoch": 0.11833708708708708, "grad_norm": 2.103825266477072, "learning_rate": 3.941194870190804e-06, "loss": 0.5779, "step": 1261 }, { "epoch": 0.11843093093093093, "grad_norm": 2.0268297963360653, "learning_rate": 3.944322802627464e-06, "loss": 0.5225, "step": 1262 }, { "epoch": 0.11852477477477477, "grad_norm": 1.8319974940129413, "learning_rate": 3.9474507350641225e-06, "loss": 0.4934, "step": 1263 }, { "epoch": 0.11861861861861862, "grad_norm": 4.126700958874211, "learning_rate": 3.9505786675007825e-06, "loss": 0.5618, "step": 1264 }, { "epoch": 0.11871246246246246, "grad_norm": 2.473850888027996, "learning_rate": 3.953706599937442e-06, "loss": 0.5892, "step": 1265 }, { "epoch": 0.1188063063063063, "grad_norm": 1.9219268053860656, "learning_rate": 3.956834532374101e-06, "loss": 0.5905, "step": 1266 }, { "epoch": 0.11890015015015015, "grad_norm": 2.1383967184413746, "learning_rate": 3.95996246481076e-06, "loss": 0.5841, "step": 1267 }, { "epoch": 0.11899399399399399, "grad_norm": 5.023458247316333, "learning_rate": 3.96309039724742e-06, "loss": 0.5772, "step": 1268 }, { "epoch": 0.11908783783783784, "grad_norm": 2.1629663030926936, "learning_rate": 3.966218329684079e-06, "loss": 0.5666, "step": 1269 }, { "epoch": 0.11918168168168168, "grad_norm": 2.4038026697693353, "learning_rate": 3.969346262120738e-06, "loss": 0.5237, "step": 1270 }, { "epoch": 0.11927552552552552, "grad_norm": 2.139621878151589, "learning_rate": 3.972474194557398e-06, "loss": 0.5591, "step": 1271 }, { "epoch": 0.11936936936936937, "grad_norm": 2.112217525962969, "learning_rate": 3.975602126994057e-06, "loss": 0.6437, "step": 1272 }, { "epoch": 0.11946321321321321, "grad_norm": 2.184489333565292, "learning_rate": 3.978730059430717e-06, "loss": 0.5803, "step": 1273 }, { "epoch": 0.11955705705705706, "grad_norm": 2.3374488959334734, "learning_rate": 3.981857991867376e-06, "loss": 0.5769, "step": 1274 }, { "epoch": 0.1196509009009009, "grad_norm": 2.0254153985276973, "learning_rate": 3.984985924304036e-06, "loss": 0.5414, "step": 1275 }, { "epoch": 0.11974474474474474, "grad_norm": 2.028440243404696, "learning_rate": 3.988113856740695e-06, "loss": 0.5577, "step": 1276 }, { "epoch": 0.11983858858858859, "grad_norm": 1.788420177510913, "learning_rate": 3.991241789177354e-06, "loss": 0.5121, "step": 1277 }, { "epoch": 0.11993243243243243, "grad_norm": 3.094052109081166, "learning_rate": 3.994369721614013e-06, "loss": 0.5467, "step": 1278 }, { "epoch": 0.12002627627627628, "grad_norm": 2.4036136742889687, "learning_rate": 3.9974976540506725e-06, "loss": 0.5963, "step": 1279 }, { "epoch": 0.12012012012012012, "grad_norm": 2.214897556732431, "learning_rate": 4.0006255864873325e-06, "loss": 0.5398, "step": 1280 }, { "epoch": 0.12021396396396396, "grad_norm": 2.2108043105409116, "learning_rate": 4.003753518923992e-06, "loss": 0.5318, "step": 1281 }, { "epoch": 0.12030780780780781, "grad_norm": 2.435152567139981, "learning_rate": 4.006881451360651e-06, "loss": 0.5605, "step": 1282 }, { "epoch": 0.12040165165165165, "grad_norm": 2.273633410080579, "learning_rate": 4.01000938379731e-06, "loss": 0.5789, "step": 1283 }, { "epoch": 0.1204954954954955, "grad_norm": 1.7030232486803032, "learning_rate": 4.01313731623397e-06, "loss": 0.4636, "step": 1284 }, { "epoch": 0.12058933933933934, "grad_norm": 2.06216499551836, "learning_rate": 4.016265248670629e-06, "loss": 0.6207, "step": 1285 }, { "epoch": 0.12068318318318318, "grad_norm": 1.9372343252187334, "learning_rate": 4.019393181107288e-06, "loss": 0.5434, "step": 1286 }, { "epoch": 0.12077702702702703, "grad_norm": 2.1787840092899327, "learning_rate": 4.022521113543948e-06, "loss": 0.5114, "step": 1287 }, { "epoch": 0.12087087087087087, "grad_norm": 2.8671030405963815, "learning_rate": 4.025649045980607e-06, "loss": 0.5699, "step": 1288 }, { "epoch": 0.12096471471471472, "grad_norm": 3.018631972546843, "learning_rate": 4.028776978417267e-06, "loss": 0.5473, "step": 1289 }, { "epoch": 0.12105855855855856, "grad_norm": 1.9919862496589262, "learning_rate": 4.031904910853926e-06, "loss": 0.5367, "step": 1290 }, { "epoch": 0.12115240240240241, "grad_norm": 2.193687956706349, "learning_rate": 4.035032843290585e-06, "loss": 0.5917, "step": 1291 }, { "epoch": 0.12124624624624625, "grad_norm": 1.8371231159342227, "learning_rate": 4.038160775727244e-06, "loss": 0.5072, "step": 1292 }, { "epoch": 0.12134009009009009, "grad_norm": 1.7476181314867398, "learning_rate": 4.041288708163904e-06, "loss": 0.4918, "step": 1293 }, { "epoch": 0.12143393393393394, "grad_norm": 2.2148919721872007, "learning_rate": 4.044416640600563e-06, "loss": 0.5381, "step": 1294 }, { "epoch": 0.12152777777777778, "grad_norm": 2.7025950673084513, "learning_rate": 4.0475445730372224e-06, "loss": 0.5756, "step": 1295 }, { "epoch": 0.12162162162162163, "grad_norm": 3.5889229772899576, "learning_rate": 4.0506725054738825e-06, "loss": 0.5571, "step": 1296 }, { "epoch": 0.12171546546546547, "grad_norm": 3.0309379876890934, "learning_rate": 4.053800437910542e-06, "loss": 0.5536, "step": 1297 }, { "epoch": 0.1218093093093093, "grad_norm": 1.8987199146072589, "learning_rate": 4.056928370347201e-06, "loss": 0.5272, "step": 1298 }, { "epoch": 0.12190315315315316, "grad_norm": 3.102814118463554, "learning_rate": 4.06005630278386e-06, "loss": 0.5345, "step": 1299 }, { "epoch": 0.121996996996997, "grad_norm": 1.789370592525419, "learning_rate": 4.06318423522052e-06, "loss": 0.5423, "step": 1300 }, { "epoch": 0.12209084084084085, "grad_norm": 2.2365412317439826, "learning_rate": 4.066312167657179e-06, "loss": 0.5318, "step": 1301 }, { "epoch": 0.12218468468468469, "grad_norm": 2.405537266330517, "learning_rate": 4.069440100093838e-06, "loss": 0.5131, "step": 1302 }, { "epoch": 0.12227852852852852, "grad_norm": 2.0435354766877034, "learning_rate": 4.0725680325304974e-06, "loss": 0.5397, "step": 1303 }, { "epoch": 0.12237237237237238, "grad_norm": 2.1488011516953844, "learning_rate": 4.075695964967157e-06, "loss": 0.5398, "step": 1304 }, { "epoch": 0.12246621621621621, "grad_norm": 4.778873447818951, "learning_rate": 4.078823897403817e-06, "loss": 0.5846, "step": 1305 }, { "epoch": 0.12256006006006007, "grad_norm": 2.3691693839244765, "learning_rate": 4.081951829840476e-06, "loss": 0.5551, "step": 1306 }, { "epoch": 0.1226539039039039, "grad_norm": 2.360287005042546, "learning_rate": 4.085079762277135e-06, "loss": 0.4743, "step": 1307 }, { "epoch": 0.12274774774774774, "grad_norm": 3.0473028378618374, "learning_rate": 4.088207694713794e-06, "loss": 0.5635, "step": 1308 }, { "epoch": 0.1228415915915916, "grad_norm": 2.6134574260477774, "learning_rate": 4.091335627150454e-06, "loss": 0.4847, "step": 1309 }, { "epoch": 0.12293543543543543, "grad_norm": 10.579805745548724, "learning_rate": 4.094463559587113e-06, "loss": 0.5358, "step": 1310 }, { "epoch": 0.12302927927927929, "grad_norm": 1.8630270387640147, "learning_rate": 4.097591492023772e-06, "loss": 0.4778, "step": 1311 }, { "epoch": 0.12312312312312312, "grad_norm": 2.3560967235153583, "learning_rate": 4.100719424460432e-06, "loss": 0.5129, "step": 1312 }, { "epoch": 0.12321696696696696, "grad_norm": 2.4329735304079216, "learning_rate": 4.103847356897092e-06, "loss": 0.5101, "step": 1313 }, { "epoch": 0.12331081081081081, "grad_norm": 2.096675328422486, "learning_rate": 4.106975289333751e-06, "loss": 0.5255, "step": 1314 }, { "epoch": 0.12340465465465465, "grad_norm": 2.6785721931431397, "learning_rate": 4.11010322177041e-06, "loss": 0.562, "step": 1315 }, { "epoch": 0.1234984984984985, "grad_norm": 2.49622872551483, "learning_rate": 4.11323115420707e-06, "loss": 0.5573, "step": 1316 }, { "epoch": 0.12359234234234234, "grad_norm": 2.320443295859026, "learning_rate": 4.116359086643728e-06, "loss": 0.5818, "step": 1317 }, { "epoch": 0.12368618618618618, "grad_norm": 2.5114753166225796, "learning_rate": 4.119487019080388e-06, "loss": 0.5456, "step": 1318 }, { "epoch": 0.12378003003003003, "grad_norm": 2.943989628370687, "learning_rate": 4.122614951517047e-06, "loss": 0.6161, "step": 1319 }, { "epoch": 0.12387387387387387, "grad_norm": 2.56881899487057, "learning_rate": 4.1257428839537065e-06, "loss": 0.5353, "step": 1320 }, { "epoch": 0.12396771771771772, "grad_norm": 2.6158000637045613, "learning_rate": 4.1288708163903666e-06, "loss": 0.5493, "step": 1321 }, { "epoch": 0.12406156156156156, "grad_norm": 4.128969075229963, "learning_rate": 4.131998748827026e-06, "loss": 0.5817, "step": 1322 }, { "epoch": 0.1241554054054054, "grad_norm": 1.9910910393007621, "learning_rate": 4.135126681263685e-06, "loss": 0.5181, "step": 1323 }, { "epoch": 0.12424924924924925, "grad_norm": 1.9846046888959241, "learning_rate": 4.138254613700344e-06, "loss": 0.5753, "step": 1324 }, { "epoch": 0.12434309309309309, "grad_norm": 1.9689543818453188, "learning_rate": 4.141382546137004e-06, "loss": 0.5419, "step": 1325 }, { "epoch": 0.12443693693693694, "grad_norm": 8.523566580802791, "learning_rate": 4.144510478573663e-06, "loss": 0.5253, "step": 1326 }, { "epoch": 0.12453078078078078, "grad_norm": 1.9711605157915348, "learning_rate": 4.147638411010322e-06, "loss": 0.6204, "step": 1327 }, { "epoch": 0.12462462462462462, "grad_norm": 2.0778232534917507, "learning_rate": 4.1507663434469815e-06, "loss": 0.5849, "step": 1328 }, { "epoch": 0.12471846846846847, "grad_norm": 2.208943927339261, "learning_rate": 4.1538942758836415e-06, "loss": 0.5158, "step": 1329 }, { "epoch": 0.12481231231231231, "grad_norm": 2.6102346783264236, "learning_rate": 4.157022208320301e-06, "loss": 0.5852, "step": 1330 }, { "epoch": 0.12490615615615616, "grad_norm": 2.0465787785963645, "learning_rate": 4.16015014075696e-06, "loss": 0.5216, "step": 1331 }, { "epoch": 0.125, "grad_norm": 1.9513487842320898, "learning_rate": 4.16327807319362e-06, "loss": 0.5912, "step": 1332 }, { "epoch": 0.12509384384384384, "grad_norm": 1.953248056900136, "learning_rate": 4.166406005630278e-06, "loss": 0.5543, "step": 1333 }, { "epoch": 0.12518768768768768, "grad_norm": 2.0798613260995564, "learning_rate": 4.169533938066938e-06, "loss": 0.5454, "step": 1334 }, { "epoch": 0.12528153153153154, "grad_norm": 5.387507858478982, "learning_rate": 4.172661870503597e-06, "loss": 0.5599, "step": 1335 }, { "epoch": 0.12537537537537538, "grad_norm": 2.115117401710588, "learning_rate": 4.1757898029402565e-06, "loss": 0.5695, "step": 1336 }, { "epoch": 0.12546921921921922, "grad_norm": 2.341508101447922, "learning_rate": 4.1789177353769165e-06, "loss": 0.5318, "step": 1337 }, { "epoch": 0.12556306306306306, "grad_norm": 2.0911163726399065, "learning_rate": 4.182045667813576e-06, "loss": 0.5788, "step": 1338 }, { "epoch": 0.1256569069069069, "grad_norm": 2.2842759947961473, "learning_rate": 4.185173600250235e-06, "loss": 0.586, "step": 1339 }, { "epoch": 0.12575075075075076, "grad_norm": 2.3540622772796937, "learning_rate": 4.188301532686894e-06, "loss": 0.5258, "step": 1340 }, { "epoch": 0.1258445945945946, "grad_norm": 1.5595007150850606, "learning_rate": 4.191429465123554e-06, "loss": 0.5373, "step": 1341 }, { "epoch": 0.12593843843843844, "grad_norm": 2.129096840265918, "learning_rate": 4.194557397560213e-06, "loss": 0.5281, "step": 1342 }, { "epoch": 0.12603228228228228, "grad_norm": 2.25009676113524, "learning_rate": 4.197685329996872e-06, "loss": 0.5201, "step": 1343 }, { "epoch": 0.12612612612612611, "grad_norm": 2.3401826986026784, "learning_rate": 4.2008132624335315e-06, "loss": 0.5315, "step": 1344 }, { "epoch": 0.12621996996996998, "grad_norm": 2.682955838543724, "learning_rate": 4.2039411948701915e-06, "loss": 0.5038, "step": 1345 }, { "epoch": 0.12631381381381382, "grad_norm": 3.1630691767724213, "learning_rate": 4.207069127306851e-06, "loss": 0.5157, "step": 1346 }, { "epoch": 0.12640765765765766, "grad_norm": 2.0090133217893578, "learning_rate": 4.21019705974351e-06, "loss": 0.5332, "step": 1347 }, { "epoch": 0.1265015015015015, "grad_norm": 2.100549010012698, "learning_rate": 4.21332499218017e-06, "loss": 0.5099, "step": 1348 }, { "epoch": 0.12659534534534533, "grad_norm": 2.473080913505927, "learning_rate": 4.216452924616828e-06, "loss": 0.5274, "step": 1349 }, { "epoch": 0.1266891891891892, "grad_norm": 2.322105857322399, "learning_rate": 4.219580857053488e-06, "loss": 0.5688, "step": 1350 }, { "epoch": 0.12678303303303304, "grad_norm": 2.1431620842119314, "learning_rate": 4.222708789490147e-06, "loss": 0.5223, "step": 1351 }, { "epoch": 0.12687687687687688, "grad_norm": 2.0888240736223906, "learning_rate": 4.2258367219268065e-06, "loss": 0.5817, "step": 1352 }, { "epoch": 0.12697072072072071, "grad_norm": 2.992179412030645, "learning_rate": 4.228964654363466e-06, "loss": 0.5548, "step": 1353 }, { "epoch": 0.12706456456456455, "grad_norm": 2.2643664929516025, "learning_rate": 4.232092586800126e-06, "loss": 0.5318, "step": 1354 }, { "epoch": 0.12715840840840842, "grad_norm": 2.9257265459559827, "learning_rate": 4.235220519236785e-06, "loss": 0.5889, "step": 1355 }, { "epoch": 0.12725225225225226, "grad_norm": 2.9201950586921206, "learning_rate": 4.238348451673444e-06, "loss": 0.4927, "step": 1356 }, { "epoch": 0.1273460960960961, "grad_norm": 1.7775752974660752, "learning_rate": 4.241476384110104e-06, "loss": 0.4723, "step": 1357 }, { "epoch": 0.12743993993993993, "grad_norm": 2.3296461191483933, "learning_rate": 4.244604316546763e-06, "loss": 0.5571, "step": 1358 }, { "epoch": 0.12753378378378377, "grad_norm": 2.683583215299485, "learning_rate": 4.247732248983422e-06, "loss": 0.527, "step": 1359 }, { "epoch": 0.12762762762762764, "grad_norm": 3.112526287565826, "learning_rate": 4.2508601814200814e-06, "loss": 0.5798, "step": 1360 }, { "epoch": 0.12772147147147148, "grad_norm": 2.360403866844843, "learning_rate": 4.2539881138567415e-06, "loss": 0.5753, "step": 1361 }, { "epoch": 0.12781531531531531, "grad_norm": 2.0512725562488856, "learning_rate": 4.257116046293401e-06, "loss": 0.5334, "step": 1362 }, { "epoch": 0.12790915915915915, "grad_norm": 3.230250948403069, "learning_rate": 4.26024397873006e-06, "loss": 0.5734, "step": 1363 }, { "epoch": 0.128003003003003, "grad_norm": 2.7285836885304655, "learning_rate": 4.263371911166719e-06, "loss": 0.5665, "step": 1364 }, { "epoch": 0.12809684684684686, "grad_norm": 2.2455357962606115, "learning_rate": 4.266499843603378e-06, "loss": 0.515, "step": 1365 }, { "epoch": 0.1281906906906907, "grad_norm": 2.0486006815482525, "learning_rate": 4.269627776040038e-06, "loss": 0.5052, "step": 1366 }, { "epoch": 0.12828453453453453, "grad_norm": 2.4091752439889778, "learning_rate": 4.272755708476697e-06, "loss": 0.5508, "step": 1367 }, { "epoch": 0.12837837837837837, "grad_norm": 3.0841792376601855, "learning_rate": 4.2758836409133564e-06, "loss": 0.54, "step": 1368 }, { "epoch": 0.1284722222222222, "grad_norm": 2.2310743276190386, "learning_rate": 4.279011573350016e-06, "loss": 0.557, "step": 1369 }, { "epoch": 0.12856606606606608, "grad_norm": 2.3219837370030723, "learning_rate": 4.282139505786676e-06, "loss": 0.4875, "step": 1370 }, { "epoch": 0.12865990990990991, "grad_norm": 2.3507687053875843, "learning_rate": 4.285267438223335e-06, "loss": 0.5731, "step": 1371 }, { "epoch": 0.12875375375375375, "grad_norm": 2.025091187633092, "learning_rate": 4.288395370659994e-06, "loss": 0.5648, "step": 1372 }, { "epoch": 0.1288475975975976, "grad_norm": 2.0007421004699513, "learning_rate": 4.291523303096654e-06, "loss": 0.573, "step": 1373 }, { "epoch": 0.12894144144144143, "grad_norm": 2.30215117284068, "learning_rate": 4.294651235533313e-06, "loss": 0.5161, "step": 1374 }, { "epoch": 0.1290352852852853, "grad_norm": 2.1470062929708016, "learning_rate": 4.297779167969972e-06, "loss": 0.561, "step": 1375 }, { "epoch": 0.12912912912912913, "grad_norm": 2.609467786382938, "learning_rate": 4.300907100406631e-06, "loss": 0.5401, "step": 1376 }, { "epoch": 0.12922297297297297, "grad_norm": 1.7937438042708818, "learning_rate": 4.304035032843291e-06, "loss": 0.5591, "step": 1377 }, { "epoch": 0.1293168168168168, "grad_norm": 2.8053948251694756, "learning_rate": 4.30716296527995e-06, "loss": 0.5661, "step": 1378 }, { "epoch": 0.12941066066066065, "grad_norm": 2.9853359942499695, "learning_rate": 4.31029089771661e-06, "loss": 0.5657, "step": 1379 }, { "epoch": 0.12950450450450451, "grad_norm": 3.1337903425179796, "learning_rate": 4.313418830153269e-06, "loss": 0.564, "step": 1380 }, { "epoch": 0.12959834834834835, "grad_norm": 1.9361724369014857, "learning_rate": 4.316546762589928e-06, "loss": 0.5506, "step": 1381 }, { "epoch": 0.1296921921921922, "grad_norm": 1.8832445006089196, "learning_rate": 4.319674695026588e-06, "loss": 0.4781, "step": 1382 }, { "epoch": 0.12978603603603603, "grad_norm": 4.652601406972478, "learning_rate": 4.322802627463247e-06, "loss": 0.5938, "step": 1383 }, { "epoch": 0.12987987987987987, "grad_norm": 3.2507516985865266, "learning_rate": 4.325930559899906e-06, "loss": 0.5679, "step": 1384 }, { "epoch": 0.12997372372372373, "grad_norm": 2.4990118746235943, "learning_rate": 4.3290584923365655e-06, "loss": 0.5236, "step": 1385 }, { "epoch": 0.13006756756756757, "grad_norm": 3.5640781795728214, "learning_rate": 4.3321864247732256e-06, "loss": 0.5914, "step": 1386 }, { "epoch": 0.1301614114114114, "grad_norm": 2.2762791651644947, "learning_rate": 4.335314357209885e-06, "loss": 0.5764, "step": 1387 }, { "epoch": 0.13025525525525525, "grad_norm": 2.878265993274455, "learning_rate": 4.338442289646544e-06, "loss": 0.5566, "step": 1388 }, { "epoch": 0.13034909909909909, "grad_norm": 24.13877055595361, "learning_rate": 4.341570222083203e-06, "loss": 0.5404, "step": 1389 }, { "epoch": 0.13044294294294295, "grad_norm": 2.1605003649783034, "learning_rate": 4.344698154519863e-06, "loss": 0.4887, "step": 1390 }, { "epoch": 0.1305367867867868, "grad_norm": 4.009504890715502, "learning_rate": 4.347826086956522e-06, "loss": 0.5717, "step": 1391 }, { "epoch": 0.13063063063063063, "grad_norm": 1.6743995749525171, "learning_rate": 4.350954019393181e-06, "loss": 0.5535, "step": 1392 }, { "epoch": 0.13072447447447447, "grad_norm": 2.0906369763262793, "learning_rate": 4.354081951829841e-06, "loss": 0.5314, "step": 1393 }, { "epoch": 0.1308183183183183, "grad_norm": 2.301361396053821, "learning_rate": 4.3572098842665e-06, "loss": 0.5589, "step": 1394 }, { "epoch": 0.13091216216216217, "grad_norm": 2.7270198402949486, "learning_rate": 4.36033781670316e-06, "loss": 0.5295, "step": 1395 }, { "epoch": 0.131006006006006, "grad_norm": 2.186752968196721, "learning_rate": 4.363465749139819e-06, "loss": 0.4993, "step": 1396 }, { "epoch": 0.13109984984984985, "grad_norm": 2.0537018738327446, "learning_rate": 4.366593681576478e-06, "loss": 0.5737, "step": 1397 }, { "epoch": 0.13119369369369369, "grad_norm": 2.170081413585867, "learning_rate": 4.369721614013138e-06, "loss": 0.5642, "step": 1398 }, { "epoch": 0.13128753753753752, "grad_norm": 5.921283075123021, "learning_rate": 4.372849546449797e-06, "loss": 0.5511, "step": 1399 }, { "epoch": 0.1313813813813814, "grad_norm": 2.334358589626714, "learning_rate": 4.375977478886456e-06, "loss": 0.5068, "step": 1400 }, { "epoch": 0.13147522522522523, "grad_norm": 1.9540817597175417, "learning_rate": 4.3791054113231155e-06, "loss": 0.6186, "step": 1401 }, { "epoch": 0.13156906906906907, "grad_norm": 2.088742048844517, "learning_rate": 4.3822333437597755e-06, "loss": 0.5341, "step": 1402 }, { "epoch": 0.1316629129129129, "grad_norm": 2.1199253821830375, "learning_rate": 4.385361276196435e-06, "loss": 0.5278, "step": 1403 }, { "epoch": 0.13175675675675674, "grad_norm": 1.9884666844146084, "learning_rate": 4.388489208633094e-06, "loss": 0.5408, "step": 1404 }, { "epoch": 0.1318506006006006, "grad_norm": 2.920030984094954, "learning_rate": 4.391617141069753e-06, "loss": 0.5483, "step": 1405 }, { "epoch": 0.13194444444444445, "grad_norm": 1.7162686060023002, "learning_rate": 4.394745073506413e-06, "loss": 0.4909, "step": 1406 }, { "epoch": 0.13203828828828829, "grad_norm": 4.037635698181726, "learning_rate": 4.397873005943072e-06, "loss": 0.4924, "step": 1407 }, { "epoch": 0.13213213213213212, "grad_norm": 3.4750101559024165, "learning_rate": 4.401000938379731e-06, "loss": 0.5094, "step": 1408 }, { "epoch": 0.13222597597597596, "grad_norm": 2.7912687043183846, "learning_rate": 4.404128870816391e-06, "loss": 0.5575, "step": 1409 }, { "epoch": 0.13231981981981983, "grad_norm": 2.6480319826456022, "learning_rate": 4.40725680325305e-06, "loss": 0.5509, "step": 1410 }, { "epoch": 0.13241366366366367, "grad_norm": 4.9575033898272975, "learning_rate": 4.41038473568971e-06, "loss": 0.5279, "step": 1411 }, { "epoch": 0.1325075075075075, "grad_norm": 2.007696688852104, "learning_rate": 4.413512668126369e-06, "loss": 0.5762, "step": 1412 }, { "epoch": 0.13260135135135134, "grad_norm": 1.795859390594599, "learning_rate": 4.416640600563028e-06, "loss": 0.5776, "step": 1413 }, { "epoch": 0.13269519519519518, "grad_norm": 4.010176127219908, "learning_rate": 4.419768532999687e-06, "loss": 0.5249, "step": 1414 }, { "epoch": 0.13278903903903905, "grad_norm": 2.3026078059761774, "learning_rate": 4.422896465436347e-06, "loss": 0.5476, "step": 1415 }, { "epoch": 0.13288288288288289, "grad_norm": 1.923550589551025, "learning_rate": 4.426024397873006e-06, "loss": 0.5341, "step": 1416 }, { "epoch": 0.13297672672672672, "grad_norm": 2.751314949780634, "learning_rate": 4.4291523303096655e-06, "loss": 0.5455, "step": 1417 }, { "epoch": 0.13307057057057056, "grad_norm": 2.0797413527626047, "learning_rate": 4.4322802627463255e-06, "loss": 0.4853, "step": 1418 }, { "epoch": 0.13316441441441443, "grad_norm": 2.316333711714632, "learning_rate": 4.435408195182984e-06, "loss": 0.605, "step": 1419 }, { "epoch": 0.13325825825825827, "grad_norm": 3.046064019982915, "learning_rate": 4.438536127619644e-06, "loss": 0.5927, "step": 1420 }, { "epoch": 0.1333521021021021, "grad_norm": 2.1782384379275426, "learning_rate": 4.441664060056303e-06, "loss": 0.4903, "step": 1421 }, { "epoch": 0.13344594594594594, "grad_norm": 2.0092555024659458, "learning_rate": 4.444791992492963e-06, "loss": 0.5593, "step": 1422 }, { "epoch": 0.13353978978978978, "grad_norm": 2.38335850981485, "learning_rate": 4.447919924929622e-06, "loss": 0.5254, "step": 1423 }, { "epoch": 0.13363363363363365, "grad_norm": 2.0080304434811445, "learning_rate": 4.451047857366281e-06, "loss": 0.5444, "step": 1424 }, { "epoch": 0.13372747747747749, "grad_norm": 2.0770622137757226, "learning_rate": 4.4541757898029404e-06, "loss": 0.5814, "step": 1425 }, { "epoch": 0.13382132132132132, "grad_norm": 2.5447225439681596, "learning_rate": 4.4573037222396e-06, "loss": 0.5302, "step": 1426 }, { "epoch": 0.13391516516516516, "grad_norm": 3.416412798030206, "learning_rate": 4.46043165467626e-06, "loss": 0.5267, "step": 1427 }, { "epoch": 0.134009009009009, "grad_norm": 2.013139510789556, "learning_rate": 4.463559587112919e-06, "loss": 0.5406, "step": 1428 }, { "epoch": 0.13410285285285287, "grad_norm": 5.9289623815155466, "learning_rate": 4.466687519549578e-06, "loss": 0.5365, "step": 1429 }, { "epoch": 0.1341966966966967, "grad_norm": 2.018578626875402, "learning_rate": 4.469815451986237e-06, "loss": 0.5314, "step": 1430 }, { "epoch": 0.13429054054054054, "grad_norm": 1.9774603742146681, "learning_rate": 4.472943384422897e-06, "loss": 0.5105, "step": 1431 }, { "epoch": 0.13438438438438438, "grad_norm": 3.615756502467627, "learning_rate": 4.476071316859556e-06, "loss": 0.53, "step": 1432 }, { "epoch": 0.13447822822822822, "grad_norm": 2.264924048390281, "learning_rate": 4.4791992492962154e-06, "loss": 0.5216, "step": 1433 }, { "epoch": 0.13457207207207209, "grad_norm": 2.4293196527829397, "learning_rate": 4.4823271817328754e-06, "loss": 0.5579, "step": 1434 }, { "epoch": 0.13466591591591592, "grad_norm": 1.9857781509266317, "learning_rate": 4.485455114169534e-06, "loss": 0.5615, "step": 1435 }, { "epoch": 0.13475975975975976, "grad_norm": 2.0277594664891394, "learning_rate": 4.488583046606194e-06, "loss": 0.5237, "step": 1436 }, { "epoch": 0.1348536036036036, "grad_norm": 2.3702430906703285, "learning_rate": 4.491710979042853e-06, "loss": 0.5327, "step": 1437 }, { "epoch": 0.13494744744744744, "grad_norm": 2.0196728414224268, "learning_rate": 4.494838911479513e-06, "loss": 0.5598, "step": 1438 }, { "epoch": 0.1350412912912913, "grad_norm": 1.7089720141329405, "learning_rate": 4.497966843916172e-06, "loss": 0.4937, "step": 1439 }, { "epoch": 0.13513513513513514, "grad_norm": 1.8600937565739983, "learning_rate": 4.501094776352831e-06, "loss": 0.5756, "step": 1440 }, { "epoch": 0.13522897897897898, "grad_norm": 2.1806103878655203, "learning_rate": 4.50422270878949e-06, "loss": 0.5473, "step": 1441 }, { "epoch": 0.13532282282282282, "grad_norm": 2.9174007626219725, "learning_rate": 4.5073506412261496e-06, "loss": 0.5877, "step": 1442 }, { "epoch": 0.13541666666666666, "grad_norm": 2.215134015040503, "learning_rate": 4.5104785736628096e-06, "loss": 0.525, "step": 1443 }, { "epoch": 0.13551051051051052, "grad_norm": 2.2960362137917265, "learning_rate": 4.513606506099469e-06, "loss": 0.5031, "step": 1444 }, { "epoch": 0.13560435435435436, "grad_norm": 2.0604464851141597, "learning_rate": 4.516734438536128e-06, "loss": 0.5157, "step": 1445 }, { "epoch": 0.1356981981981982, "grad_norm": 2.1603725290463274, "learning_rate": 4.519862370972787e-06, "loss": 0.5292, "step": 1446 }, { "epoch": 0.13579204204204204, "grad_norm": 2.2908910680370616, "learning_rate": 4.522990303409447e-06, "loss": 0.5211, "step": 1447 }, { "epoch": 0.13588588588588588, "grad_norm": 1.8813938869139366, "learning_rate": 4.526118235846106e-06, "loss": 0.5376, "step": 1448 }, { "epoch": 0.13597972972972974, "grad_norm": 1.9214171769171753, "learning_rate": 4.529246168282765e-06, "loss": 0.5511, "step": 1449 }, { "epoch": 0.13607357357357358, "grad_norm": 1.726014642673216, "learning_rate": 4.5323741007194245e-06, "loss": 0.4672, "step": 1450 }, { "epoch": 0.13616741741741742, "grad_norm": 4.192709774265717, "learning_rate": 4.535502033156084e-06, "loss": 0.5589, "step": 1451 }, { "epoch": 0.13626126126126126, "grad_norm": 1.816006414711554, "learning_rate": 4.538629965592744e-06, "loss": 0.5497, "step": 1452 }, { "epoch": 0.1363551051051051, "grad_norm": 2.0652465707595837, "learning_rate": 4.541757898029403e-06, "loss": 0.5355, "step": 1453 }, { "epoch": 0.13644894894894896, "grad_norm": 2.3979430430519564, "learning_rate": 4.544885830466063e-06, "loss": 0.5258, "step": 1454 }, { "epoch": 0.1365427927927928, "grad_norm": 2.1498896534324055, "learning_rate": 4.548013762902721e-06, "loss": 0.5293, "step": 1455 }, { "epoch": 0.13663663663663664, "grad_norm": 2.1579954533200105, "learning_rate": 4.551141695339381e-06, "loss": 0.5346, "step": 1456 }, { "epoch": 0.13673048048048048, "grad_norm": 2.170329201718317, "learning_rate": 4.55426962777604e-06, "loss": 0.5897, "step": 1457 }, { "epoch": 0.13682432432432431, "grad_norm": 1.8417164610297454, "learning_rate": 4.5573975602126995e-06, "loss": 0.5919, "step": 1458 }, { "epoch": 0.13691816816816818, "grad_norm": 1.7696839491406908, "learning_rate": 4.5605254926493595e-06, "loss": 0.5417, "step": 1459 }, { "epoch": 0.13701201201201202, "grad_norm": 1.744019068208589, "learning_rate": 4.563653425086019e-06, "loss": 0.4958, "step": 1460 }, { "epoch": 0.13710585585585586, "grad_norm": 2.9180815336457293, "learning_rate": 4.566781357522678e-06, "loss": 0.5007, "step": 1461 }, { "epoch": 0.1371996996996997, "grad_norm": 1.8070625920804442, "learning_rate": 4.569909289959337e-06, "loss": 0.5106, "step": 1462 }, { "epoch": 0.13729354354354353, "grad_norm": 2.1553936275210046, "learning_rate": 4.573037222395997e-06, "loss": 0.5459, "step": 1463 }, { "epoch": 0.1373873873873874, "grad_norm": 1.8860968233198137, "learning_rate": 4.576165154832656e-06, "loss": 0.5118, "step": 1464 }, { "epoch": 0.13748123123123124, "grad_norm": 2.668401917242723, "learning_rate": 4.579293087269315e-06, "loss": 0.6101, "step": 1465 }, { "epoch": 0.13757507507507508, "grad_norm": 1.9061496486510894, "learning_rate": 4.5824210197059745e-06, "loss": 0.4883, "step": 1466 }, { "epoch": 0.13766891891891891, "grad_norm": 1.9364292053539622, "learning_rate": 4.585548952142634e-06, "loss": 0.503, "step": 1467 }, { "epoch": 0.13776276276276275, "grad_norm": 1.8731041484879782, "learning_rate": 4.588676884579294e-06, "loss": 0.5076, "step": 1468 }, { "epoch": 0.13785660660660662, "grad_norm": 1.8919719054343664, "learning_rate": 4.591804817015953e-06, "loss": 0.5131, "step": 1469 }, { "epoch": 0.13795045045045046, "grad_norm": 2.252977561534937, "learning_rate": 4.594932749452613e-06, "loss": 0.5763, "step": 1470 }, { "epoch": 0.1380442942942943, "grad_norm": 1.9155014999855586, "learning_rate": 4.598060681889271e-06, "loss": 0.5292, "step": 1471 }, { "epoch": 0.13813813813813813, "grad_norm": 9.174643623580002, "learning_rate": 4.601188614325931e-06, "loss": 0.5941, "step": 1472 }, { "epoch": 0.13823198198198197, "grad_norm": 1.8906944050303232, "learning_rate": 4.60431654676259e-06, "loss": 0.5054, "step": 1473 }, { "epoch": 0.13832582582582584, "grad_norm": 1.7827670810925134, "learning_rate": 4.6074444791992495e-06, "loss": 0.4979, "step": 1474 }, { "epoch": 0.13841966966966968, "grad_norm": 3.537414365876501, "learning_rate": 4.6105724116359095e-06, "loss": 0.5561, "step": 1475 }, { "epoch": 0.13851351351351351, "grad_norm": 1.8504514727956276, "learning_rate": 4.613700344072569e-06, "loss": 0.4936, "step": 1476 }, { "epoch": 0.13860735735735735, "grad_norm": 1.6946141069037988, "learning_rate": 4.616828276509228e-06, "loss": 0.5236, "step": 1477 }, { "epoch": 0.1387012012012012, "grad_norm": 1.9712725086778906, "learning_rate": 4.619956208945887e-06, "loss": 0.5228, "step": 1478 }, { "epoch": 0.13879504504504506, "grad_norm": 2.512150222114302, "learning_rate": 4.623084141382547e-06, "loss": 0.5201, "step": 1479 }, { "epoch": 0.1388888888888889, "grad_norm": 2.317568792708569, "learning_rate": 4.626212073819205e-06, "loss": 0.5599, "step": 1480 }, { "epoch": 0.13898273273273273, "grad_norm": 3.1790555071315447, "learning_rate": 4.629340006255865e-06, "loss": 0.5061, "step": 1481 }, { "epoch": 0.13907657657657657, "grad_norm": 2.1863425226417355, "learning_rate": 4.6324679386925245e-06, "loss": 0.6688, "step": 1482 }, { "epoch": 0.1391704204204204, "grad_norm": 2.0035367872573313, "learning_rate": 4.635595871129184e-06, "loss": 0.5243, "step": 1483 }, { "epoch": 0.13926426426426428, "grad_norm": 2.0755762411458503, "learning_rate": 4.638723803565844e-06, "loss": 0.5274, "step": 1484 }, { "epoch": 0.13935810810810811, "grad_norm": 1.8724521957995859, "learning_rate": 4.641851736002503e-06, "loss": 0.56, "step": 1485 }, { "epoch": 0.13945195195195195, "grad_norm": 1.7917219269793572, "learning_rate": 4.644979668439162e-06, "loss": 0.5354, "step": 1486 }, { "epoch": 0.1395457957957958, "grad_norm": 1.6316140478670569, "learning_rate": 4.648107600875821e-06, "loss": 0.5188, "step": 1487 }, { "epoch": 0.13963963963963963, "grad_norm": 1.7633672300458754, "learning_rate": 4.651235533312481e-06, "loss": 0.5195, "step": 1488 }, { "epoch": 0.1397334834834835, "grad_norm": 2.556799977243169, "learning_rate": 4.65436346574914e-06, "loss": 0.507, "step": 1489 }, { "epoch": 0.13982732732732733, "grad_norm": 3.4488666578282086, "learning_rate": 4.6574913981857994e-06, "loss": 0.5556, "step": 1490 }, { "epoch": 0.13992117117117117, "grad_norm": 2.008287127340385, "learning_rate": 4.660619330622459e-06, "loss": 0.5545, "step": 1491 }, { "epoch": 0.140015015015015, "grad_norm": 1.963727855487502, "learning_rate": 4.663747263059119e-06, "loss": 0.5512, "step": 1492 }, { "epoch": 0.14010885885885885, "grad_norm": 1.989131185108304, "learning_rate": 4.666875195495778e-06, "loss": 0.5576, "step": 1493 }, { "epoch": 0.14020270270270271, "grad_norm": 2.0826366597606274, "learning_rate": 4.670003127932437e-06, "loss": 0.546, "step": 1494 }, { "epoch": 0.14029654654654655, "grad_norm": 2.3947241930012493, "learning_rate": 4.673131060369097e-06, "loss": 0.5319, "step": 1495 }, { "epoch": 0.1403903903903904, "grad_norm": 1.9317019736608014, "learning_rate": 4.676258992805755e-06, "loss": 0.5268, "step": 1496 }, { "epoch": 0.14048423423423423, "grad_norm": 2.0447299979408213, "learning_rate": 4.679386925242415e-06, "loss": 0.5453, "step": 1497 }, { "epoch": 0.14057807807807807, "grad_norm": 1.7433214388192382, "learning_rate": 4.682514857679074e-06, "loss": 0.4898, "step": 1498 }, { "epoch": 0.14067192192192193, "grad_norm": 2.2498811163975074, "learning_rate": 4.685642790115734e-06, "loss": 0.5672, "step": 1499 }, { "epoch": 0.14076576576576577, "grad_norm": 3.303435277469517, "learning_rate": 4.688770722552394e-06, "loss": 0.5501, "step": 1500 }, { "epoch": 0.1408596096096096, "grad_norm": 5.318245557724294, "learning_rate": 4.691898654989053e-06, "loss": 0.4654, "step": 1501 }, { "epoch": 0.14095345345345345, "grad_norm": 2.6292206305450363, "learning_rate": 4.695026587425712e-06, "loss": 0.533, "step": 1502 }, { "epoch": 0.14104729729729729, "grad_norm": 1.8482580876474002, "learning_rate": 4.698154519862371e-06, "loss": 0.55, "step": 1503 }, { "epoch": 0.14114114114114115, "grad_norm": 4.533327434572361, "learning_rate": 4.701282452299031e-06, "loss": 0.5538, "step": 1504 }, { "epoch": 0.141234984984985, "grad_norm": 3.447843345985858, "learning_rate": 4.70441038473569e-06, "loss": 0.5077, "step": 1505 }, { "epoch": 0.14132882882882883, "grad_norm": 2.318669617174099, "learning_rate": 4.707538317172349e-06, "loss": 0.5549, "step": 1506 }, { "epoch": 0.14142267267267267, "grad_norm": 2.0471855153019427, "learning_rate": 4.7106662496090086e-06, "loss": 0.5958, "step": 1507 }, { "epoch": 0.1415165165165165, "grad_norm": 2.8510422370885973, "learning_rate": 4.7137941820456686e-06, "loss": 0.5057, "step": 1508 }, { "epoch": 0.14161036036036037, "grad_norm": 1.9341112705094075, "learning_rate": 4.716922114482328e-06, "loss": 0.5322, "step": 1509 }, { "epoch": 0.1417042042042042, "grad_norm": 2.40497561940164, "learning_rate": 4.720050046918987e-06, "loss": 0.5293, "step": 1510 }, { "epoch": 0.14179804804804805, "grad_norm": 1.970059434220401, "learning_rate": 4.723177979355647e-06, "loss": 0.512, "step": 1511 }, { "epoch": 0.14189189189189189, "grad_norm": 2.1124534159877495, "learning_rate": 4.726305911792305e-06, "loss": 0.5427, "step": 1512 }, { "epoch": 0.14198573573573572, "grad_norm": 2.1048438108992134, "learning_rate": 4.729433844228965e-06, "loss": 0.5416, "step": 1513 }, { "epoch": 0.1420795795795796, "grad_norm": 1.9155488071324527, "learning_rate": 4.732561776665624e-06, "loss": 0.512, "step": 1514 }, { "epoch": 0.14217342342342343, "grad_norm": 1.8165242489876052, "learning_rate": 4.7356897091022835e-06, "loss": 0.5727, "step": 1515 }, { "epoch": 0.14226726726726727, "grad_norm": 2.0654583926635177, "learning_rate": 4.738817641538943e-06, "loss": 0.5408, "step": 1516 }, { "epoch": 0.1423611111111111, "grad_norm": 2.8907529434594657, "learning_rate": 4.741945573975603e-06, "loss": 0.5374, "step": 1517 }, { "epoch": 0.14245495495495494, "grad_norm": 3.61322508142297, "learning_rate": 4.745073506412262e-06, "loss": 0.5291, "step": 1518 }, { "epoch": 0.1425487987987988, "grad_norm": 3.266298182974794, "learning_rate": 4.748201438848921e-06, "loss": 0.5282, "step": 1519 }, { "epoch": 0.14264264264264265, "grad_norm": 2.003714651198655, "learning_rate": 4.751329371285581e-06, "loss": 0.562, "step": 1520 }, { "epoch": 0.14273648648648649, "grad_norm": 2.6282711927836715, "learning_rate": 4.75445730372224e-06, "loss": 0.5617, "step": 1521 }, { "epoch": 0.14283033033033032, "grad_norm": 1.694676298577371, "learning_rate": 4.757585236158899e-06, "loss": 0.4914, "step": 1522 }, { "epoch": 0.14292417417417416, "grad_norm": 2.333734259447635, "learning_rate": 4.7607131685955585e-06, "loss": 0.478, "step": 1523 }, { "epoch": 0.14301801801801803, "grad_norm": 5.563757826647285, "learning_rate": 4.7638411010322185e-06, "loss": 0.5614, "step": 1524 }, { "epoch": 0.14311186186186187, "grad_norm": 2.9593493155029265, "learning_rate": 4.766969033468878e-06, "loss": 0.5716, "step": 1525 }, { "epoch": 0.1432057057057057, "grad_norm": 1.9755375047098234, "learning_rate": 4.770096965905537e-06, "loss": 0.5321, "step": 1526 }, { "epoch": 0.14329954954954954, "grad_norm": 4.58268318099452, "learning_rate": 4.773224898342196e-06, "loss": 0.5674, "step": 1527 }, { "epoch": 0.14339339339339338, "grad_norm": 1.7561087557970276, "learning_rate": 4.776352830778855e-06, "loss": 0.4842, "step": 1528 }, { "epoch": 0.14348723723723725, "grad_norm": 4.487304502372746, "learning_rate": 4.779480763215515e-06, "loss": 0.5007, "step": 1529 }, { "epoch": 0.14358108108108109, "grad_norm": 1.9771495280353568, "learning_rate": 4.782608695652174e-06, "loss": 0.5643, "step": 1530 }, { "epoch": 0.14367492492492492, "grad_norm": 2.0221959280370325, "learning_rate": 4.7857366280888335e-06, "loss": 0.5244, "step": 1531 }, { "epoch": 0.14376876876876876, "grad_norm": 1.9238729712123734, "learning_rate": 4.788864560525493e-06, "loss": 0.5478, "step": 1532 }, { "epoch": 0.1438626126126126, "grad_norm": 3.6062945882277044, "learning_rate": 4.791992492962153e-06, "loss": 0.5463, "step": 1533 }, { "epoch": 0.14395645645645647, "grad_norm": 2.437636232762534, "learning_rate": 4.795120425398812e-06, "loss": 0.5505, "step": 1534 }, { "epoch": 0.1440503003003003, "grad_norm": 1.6293366305791765, "learning_rate": 4.798248357835471e-06, "loss": 0.5111, "step": 1535 }, { "epoch": 0.14414414414414414, "grad_norm": 1.8809471895996117, "learning_rate": 4.801376290272131e-06, "loss": 0.5861, "step": 1536 }, { "epoch": 0.14423798798798798, "grad_norm": 2.0957356844823525, "learning_rate": 4.80450422270879e-06, "loss": 0.5762, "step": 1537 }, { "epoch": 0.14433183183183182, "grad_norm": 1.9911737415662067, "learning_rate": 4.807632155145449e-06, "loss": 0.5226, "step": 1538 }, { "epoch": 0.14442567567567569, "grad_norm": 2.378074018531537, "learning_rate": 4.8107600875821085e-06, "loss": 0.5707, "step": 1539 }, { "epoch": 0.14451951951951952, "grad_norm": 2.2102103725003723, "learning_rate": 4.8138880200187685e-06, "loss": 0.5008, "step": 1540 }, { "epoch": 0.14461336336336336, "grad_norm": 2.2157829600823558, "learning_rate": 4.817015952455427e-06, "loss": 0.561, "step": 1541 }, { "epoch": 0.1447072072072072, "grad_norm": 1.8200383321127067, "learning_rate": 4.820143884892087e-06, "loss": 0.5134, "step": 1542 }, { "epoch": 0.14480105105105104, "grad_norm": 2.2200382361160975, "learning_rate": 4.823271817328746e-06, "loss": 0.5387, "step": 1543 }, { "epoch": 0.1448948948948949, "grad_norm": 1.9136410523610017, "learning_rate": 4.826399749765405e-06, "loss": 0.5707, "step": 1544 }, { "epoch": 0.14498873873873874, "grad_norm": 3.840252019472985, "learning_rate": 4.829527682202065e-06, "loss": 0.5067, "step": 1545 }, { "epoch": 0.14508258258258258, "grad_norm": 2.164559063994277, "learning_rate": 4.832655614638724e-06, "loss": 0.5229, "step": 1546 }, { "epoch": 0.14517642642642642, "grad_norm": 2.740188252739313, "learning_rate": 4.8357835470753835e-06, "loss": 0.5456, "step": 1547 }, { "epoch": 0.14527027027027026, "grad_norm": 1.832591971430516, "learning_rate": 4.838911479512043e-06, "loss": 0.5936, "step": 1548 }, { "epoch": 0.14536411411411412, "grad_norm": 2.0302296482002307, "learning_rate": 4.842039411948703e-06, "loss": 0.5605, "step": 1549 }, { "epoch": 0.14545795795795796, "grad_norm": 2.4728642371468883, "learning_rate": 4.845167344385362e-06, "loss": 0.5793, "step": 1550 }, { "epoch": 0.1455518018018018, "grad_norm": 1.986791863487037, "learning_rate": 4.848295276822021e-06, "loss": 0.6069, "step": 1551 }, { "epoch": 0.14564564564564564, "grad_norm": 2.0274036224090213, "learning_rate": 4.85142320925868e-06, "loss": 0.5205, "step": 1552 }, { "epoch": 0.14573948948948948, "grad_norm": 2.8576789828529603, "learning_rate": 4.85455114169534e-06, "loss": 0.5242, "step": 1553 }, { "epoch": 0.14583333333333334, "grad_norm": 2.0846290856624705, "learning_rate": 4.857679074131999e-06, "loss": 0.5756, "step": 1554 }, { "epoch": 0.14592717717717718, "grad_norm": 1.9898421631868324, "learning_rate": 4.8608070065686584e-06, "loss": 0.5424, "step": 1555 }, { "epoch": 0.14602102102102102, "grad_norm": 2.3980237223938285, "learning_rate": 4.8639349390053184e-06, "loss": 0.5195, "step": 1556 }, { "epoch": 0.14611486486486486, "grad_norm": 2.1601982512616087, "learning_rate": 4.867062871441977e-06, "loss": 0.5352, "step": 1557 }, { "epoch": 0.1462087087087087, "grad_norm": 1.9622235625655624, "learning_rate": 4.870190803878637e-06, "loss": 0.4627, "step": 1558 }, { "epoch": 0.14630255255255256, "grad_norm": 1.8858717233324596, "learning_rate": 4.873318736315296e-06, "loss": 0.492, "step": 1559 }, { "epoch": 0.1463963963963964, "grad_norm": 1.7979454895272065, "learning_rate": 4.876446668751955e-06, "loss": 0.5457, "step": 1560 }, { "epoch": 0.14649024024024024, "grad_norm": 3.354477509986686, "learning_rate": 4.879574601188615e-06, "loss": 0.5529, "step": 1561 }, { "epoch": 0.14658408408408408, "grad_norm": 2.295840656444505, "learning_rate": 4.882702533625274e-06, "loss": 0.5126, "step": 1562 }, { "epoch": 0.14667792792792791, "grad_norm": 2.154528109518198, "learning_rate": 4.885830466061933e-06, "loss": 0.6467, "step": 1563 }, { "epoch": 0.14677177177177178, "grad_norm": 2.232618367105991, "learning_rate": 4.888958398498593e-06, "loss": 0.5266, "step": 1564 }, { "epoch": 0.14686561561561562, "grad_norm": 1.9280840292742751, "learning_rate": 4.892086330935253e-06, "loss": 0.5434, "step": 1565 }, { "epoch": 0.14695945945945946, "grad_norm": 1.8016734350395385, "learning_rate": 4.895214263371911e-06, "loss": 0.4747, "step": 1566 }, { "epoch": 0.1470533033033033, "grad_norm": 2.744094756040515, "learning_rate": 4.898342195808571e-06, "loss": 0.5514, "step": 1567 }, { "epoch": 0.14714714714714713, "grad_norm": 2.121013256192556, "learning_rate": 4.90147012824523e-06, "loss": 0.5102, "step": 1568 }, { "epoch": 0.147240990990991, "grad_norm": 2.3286371643477675, "learning_rate": 4.90459806068189e-06, "loss": 0.5372, "step": 1569 }, { "epoch": 0.14733483483483484, "grad_norm": 1.990195380408997, "learning_rate": 4.907725993118549e-06, "loss": 0.5259, "step": 1570 }, { "epoch": 0.14742867867867868, "grad_norm": 2.8975744312290974, "learning_rate": 4.910853925555208e-06, "loss": 0.5515, "step": 1571 }, { "epoch": 0.14752252252252251, "grad_norm": 2.659680903545972, "learning_rate": 4.913981857991868e-06, "loss": 0.5168, "step": 1572 }, { "epoch": 0.14761636636636635, "grad_norm": 1.8516841457729654, "learning_rate": 4.917109790428527e-06, "loss": 0.5348, "step": 1573 }, { "epoch": 0.14771021021021022, "grad_norm": 1.9656784272427106, "learning_rate": 4.920237722865187e-06, "loss": 0.5042, "step": 1574 }, { "epoch": 0.14780405405405406, "grad_norm": 4.937471567918905, "learning_rate": 4.923365655301846e-06, "loss": 0.5393, "step": 1575 }, { "epoch": 0.1478978978978979, "grad_norm": 1.6745024896841139, "learning_rate": 4.926493587738505e-06, "loss": 0.5045, "step": 1576 }, { "epoch": 0.14799174174174173, "grad_norm": 3.1214162172935866, "learning_rate": 4.929621520175164e-06, "loss": 0.536, "step": 1577 }, { "epoch": 0.14808558558558557, "grad_norm": 2.6725926823647823, "learning_rate": 4.932749452611824e-06, "loss": 0.533, "step": 1578 }, { "epoch": 0.14817942942942944, "grad_norm": 2.235855759867319, "learning_rate": 4.935877385048483e-06, "loss": 0.5696, "step": 1579 }, { "epoch": 0.14827327327327328, "grad_norm": 2.3086667733477895, "learning_rate": 4.9390053174851425e-06, "loss": 0.5157, "step": 1580 }, { "epoch": 0.14836711711711711, "grad_norm": 2.208860866949685, "learning_rate": 4.9421332499218025e-06, "loss": 0.5667, "step": 1581 }, { "epoch": 0.14846096096096095, "grad_norm": 1.810309659083635, "learning_rate": 4.945261182358461e-06, "loss": 0.5397, "step": 1582 }, { "epoch": 0.14855480480480482, "grad_norm": 1.922777526795265, "learning_rate": 4.948389114795121e-06, "loss": 0.5022, "step": 1583 }, { "epoch": 0.14864864864864866, "grad_norm": 1.9555576930810579, "learning_rate": 4.95151704723178e-06, "loss": 0.5276, "step": 1584 }, { "epoch": 0.1487424924924925, "grad_norm": 1.820662911494321, "learning_rate": 4.95464497966844e-06, "loss": 0.5469, "step": 1585 }, { "epoch": 0.14883633633633633, "grad_norm": 2.4966782025582255, "learning_rate": 4.957772912105099e-06, "loss": 0.51, "step": 1586 }, { "epoch": 0.14893018018018017, "grad_norm": 1.968746944799214, "learning_rate": 4.960900844541758e-06, "loss": 0.4644, "step": 1587 }, { "epoch": 0.14902402402402404, "grad_norm": 1.9338006279246902, "learning_rate": 4.9640287769784175e-06, "loss": 0.5184, "step": 1588 }, { "epoch": 0.14911786786786788, "grad_norm": 3.5106563684771297, "learning_rate": 4.967156709415077e-06, "loss": 0.5466, "step": 1589 }, { "epoch": 0.14921171171171171, "grad_norm": 2.467180539356911, "learning_rate": 4.970284641851737e-06, "loss": 0.513, "step": 1590 }, { "epoch": 0.14930555555555555, "grad_norm": 2.0178664230610956, "learning_rate": 4.973412574288396e-06, "loss": 0.5754, "step": 1591 }, { "epoch": 0.1493993993993994, "grad_norm": 3.9225830066765237, "learning_rate": 4.976540506725055e-06, "loss": 0.573, "step": 1592 }, { "epoch": 0.14949324324324326, "grad_norm": 2.0046471991753694, "learning_rate": 4.979668439161714e-06, "loss": 0.5933, "step": 1593 }, { "epoch": 0.1495870870870871, "grad_norm": 1.9501514115520917, "learning_rate": 4.982796371598374e-06, "loss": 0.5468, "step": 1594 }, { "epoch": 0.14968093093093093, "grad_norm": 1.9452811516113697, "learning_rate": 4.985924304035033e-06, "loss": 0.5596, "step": 1595 }, { "epoch": 0.14977477477477477, "grad_norm": 1.886201859380312, "learning_rate": 4.9890522364716925e-06, "loss": 0.4493, "step": 1596 }, { "epoch": 0.1498686186186186, "grad_norm": 1.9466156942233097, "learning_rate": 4.9921801689083525e-06, "loss": 0.5126, "step": 1597 }, { "epoch": 0.14996246246246248, "grad_norm": 6.04579425246962, "learning_rate": 4.995308101345011e-06, "loss": 0.5444, "step": 1598 }, { "epoch": 0.15005630630630631, "grad_norm": 1.7547411006257307, "learning_rate": 4.998436033781671e-06, "loss": 0.5762, "step": 1599 }, { "epoch": 0.15015015015015015, "grad_norm": 1.8440991126372661, "learning_rate": 5.00156396621833e-06, "loss": 0.5381, "step": 1600 }, { "epoch": 0.150243993993994, "grad_norm": 2.148166945471818, "learning_rate": 5.00469189865499e-06, "loss": 0.5738, "step": 1601 }, { "epoch": 0.15033783783783783, "grad_norm": 2.275738753992005, "learning_rate": 5.007819831091649e-06, "loss": 0.5545, "step": 1602 }, { "epoch": 0.1504316816816817, "grad_norm": 1.5996347024980107, "learning_rate": 5.0109477635283075e-06, "loss": 0.5102, "step": 1603 }, { "epoch": 0.15052552552552553, "grad_norm": 1.575926443610356, "learning_rate": 5.0140756959649675e-06, "loss": 0.4902, "step": 1604 }, { "epoch": 0.15061936936936937, "grad_norm": 5.064839347582082, "learning_rate": 5.017203628401627e-06, "loss": 0.6087, "step": 1605 }, { "epoch": 0.1507132132132132, "grad_norm": 2.1397926458200387, "learning_rate": 5.020331560838287e-06, "loss": 0.5019, "step": 1606 }, { "epoch": 0.15080705705705705, "grad_norm": 2.486517277693156, "learning_rate": 5.023459493274946e-06, "loss": 0.5111, "step": 1607 }, { "epoch": 0.15090090090090091, "grad_norm": 2.150988373168894, "learning_rate": 5.026587425711606e-06, "loss": 0.5605, "step": 1608 }, { "epoch": 0.15099474474474475, "grad_norm": 1.8195262543190833, "learning_rate": 5.029715358148265e-06, "loss": 0.5102, "step": 1609 }, { "epoch": 0.1510885885885886, "grad_norm": 1.7444609249905099, "learning_rate": 5.032843290584923e-06, "loss": 0.4975, "step": 1610 }, { "epoch": 0.15118243243243243, "grad_norm": 2.441544999841758, "learning_rate": 5.035971223021583e-06, "loss": 0.5742, "step": 1611 }, { "epoch": 0.15127627627627627, "grad_norm": 2.31504741220132, "learning_rate": 5.0390991554582425e-06, "loss": 0.5706, "step": 1612 }, { "epoch": 0.15137012012012013, "grad_norm": 2.1928941271944664, "learning_rate": 5.042227087894902e-06, "loss": 0.5157, "step": 1613 }, { "epoch": 0.15146396396396397, "grad_norm": 1.6377323069272243, "learning_rate": 5.045355020331562e-06, "loss": 0.5499, "step": 1614 }, { "epoch": 0.1515578078078078, "grad_norm": 1.8603094300616438, "learning_rate": 5.048482952768221e-06, "loss": 0.4986, "step": 1615 }, { "epoch": 0.15165165165165165, "grad_norm": 1.7456834908023477, "learning_rate": 5.051610885204879e-06, "loss": 0.5591, "step": 1616 }, { "epoch": 0.15174549549549549, "grad_norm": 1.8733040897771427, "learning_rate": 5.054738817641539e-06, "loss": 0.5463, "step": 1617 }, { "epoch": 0.15183933933933935, "grad_norm": 1.9191424665961996, "learning_rate": 5.057866750078198e-06, "loss": 0.5028, "step": 1618 }, { "epoch": 0.1519331831831832, "grad_norm": 1.7641590699388756, "learning_rate": 5.060994682514858e-06, "loss": 0.5274, "step": 1619 }, { "epoch": 0.15202702702702703, "grad_norm": 2.3957033471264673, "learning_rate": 5.0641226149515174e-06, "loss": 0.5232, "step": 1620 }, { "epoch": 0.15212087087087087, "grad_norm": 1.9188820789771028, "learning_rate": 5.0672505473881774e-06, "loss": 0.534, "step": 1621 }, { "epoch": 0.1522147147147147, "grad_norm": 2.1027393644514207, "learning_rate": 5.070378479824836e-06, "loss": 0.4877, "step": 1622 }, { "epoch": 0.15230855855855857, "grad_norm": 2.148516486003119, "learning_rate": 5.073506412261495e-06, "loss": 0.5539, "step": 1623 }, { "epoch": 0.1524024024024024, "grad_norm": 2.2848192239669936, "learning_rate": 5.076634344698155e-06, "loss": 0.4634, "step": 1624 }, { "epoch": 0.15249624624624625, "grad_norm": 1.8515191351379, "learning_rate": 5.079762277134814e-06, "loss": 0.573, "step": 1625 }, { "epoch": 0.15259009009009009, "grad_norm": 6.492029575371258, "learning_rate": 5.082890209571474e-06, "loss": 0.5034, "step": 1626 }, { "epoch": 0.15268393393393392, "grad_norm": 1.68858226517627, "learning_rate": 5.086018142008133e-06, "loss": 0.5174, "step": 1627 }, { "epoch": 0.1527777777777778, "grad_norm": 2.5913226520323676, "learning_rate": 5.089146074444793e-06, "loss": 0.5294, "step": 1628 }, { "epoch": 0.15287162162162163, "grad_norm": 1.959272717204824, "learning_rate": 5.092274006881452e-06, "loss": 0.5738, "step": 1629 }, { "epoch": 0.15296546546546547, "grad_norm": 2.0725334988172612, "learning_rate": 5.095401939318111e-06, "loss": 0.575, "step": 1630 }, { "epoch": 0.1530593093093093, "grad_norm": 1.7467717823624425, "learning_rate": 5.098529871754771e-06, "loss": 0.5288, "step": 1631 }, { "epoch": 0.15315315315315314, "grad_norm": 1.7914745607221356, "learning_rate": 5.10165780419143e-06, "loss": 0.5201, "step": 1632 }, { "epoch": 0.153246996996997, "grad_norm": 1.8845020091608473, "learning_rate": 5.10478573662809e-06, "loss": 0.56, "step": 1633 }, { "epoch": 0.15334084084084085, "grad_norm": 2.3534043414641537, "learning_rate": 5.107913669064749e-06, "loss": 0.5234, "step": 1634 }, { "epoch": 0.15343468468468469, "grad_norm": 1.7700537440405775, "learning_rate": 5.111041601501407e-06, "loss": 0.5508, "step": 1635 }, { "epoch": 0.15352852852852852, "grad_norm": 2.239592813053389, "learning_rate": 5.114169533938067e-06, "loss": 0.6045, "step": 1636 }, { "epoch": 0.15362237237237236, "grad_norm": 2.3018305910980152, "learning_rate": 5.1172974663747266e-06, "loss": 0.5479, "step": 1637 }, { "epoch": 0.15371621621621623, "grad_norm": 2.14324816606082, "learning_rate": 5.120425398811386e-06, "loss": 0.5215, "step": 1638 }, { "epoch": 0.15381006006006007, "grad_norm": 3.1930741582714366, "learning_rate": 5.123553331248046e-06, "loss": 0.5778, "step": 1639 }, { "epoch": 0.1539039039039039, "grad_norm": 2.188709904443202, "learning_rate": 5.126681263684705e-06, "loss": 0.5659, "step": 1640 }, { "epoch": 0.15399774774774774, "grad_norm": 2.003096645571683, "learning_rate": 5.129809196121365e-06, "loss": 0.5401, "step": 1641 }, { "epoch": 0.15409159159159158, "grad_norm": 1.9240439897747976, "learning_rate": 5.132937128558023e-06, "loss": 0.4855, "step": 1642 }, { "epoch": 0.15418543543543545, "grad_norm": 2.4706202121417307, "learning_rate": 5.136065060994682e-06, "loss": 0.527, "step": 1643 }, { "epoch": 0.15427927927927929, "grad_norm": 1.9640494738264684, "learning_rate": 5.139192993431342e-06, "loss": 0.5425, "step": 1644 }, { "epoch": 0.15437312312312312, "grad_norm": 2.516260979046117, "learning_rate": 5.1423209258680015e-06, "loss": 0.5721, "step": 1645 }, { "epoch": 0.15446696696696696, "grad_norm": 2.2595519190586173, "learning_rate": 5.1454488583046615e-06, "loss": 0.4965, "step": 1646 }, { "epoch": 0.1545608108108108, "grad_norm": 3.5395035224394307, "learning_rate": 5.148576790741321e-06, "loss": 0.5667, "step": 1647 }, { "epoch": 0.15465465465465467, "grad_norm": 1.8650257263128363, "learning_rate": 5.151704723177979e-06, "loss": 0.5143, "step": 1648 }, { "epoch": 0.1547484984984985, "grad_norm": 2.549299994886157, "learning_rate": 5.154832655614639e-06, "loss": 0.5566, "step": 1649 }, { "epoch": 0.15484234234234234, "grad_norm": 1.9140461032885387, "learning_rate": 5.157960588051298e-06, "loss": 0.6101, "step": 1650 }, { "epoch": 0.15493618618618618, "grad_norm": 1.7266986422529915, "learning_rate": 5.161088520487958e-06, "loss": 0.5091, "step": 1651 }, { "epoch": 0.15503003003003002, "grad_norm": 2.749620295801227, "learning_rate": 5.164216452924617e-06, "loss": 0.6305, "step": 1652 }, { "epoch": 0.15512387387387389, "grad_norm": 2.3145801104942993, "learning_rate": 5.167344385361277e-06, "loss": 0.5476, "step": 1653 }, { "epoch": 0.15521771771771772, "grad_norm": 7.916220666943615, "learning_rate": 5.170472317797936e-06, "loss": 0.4979, "step": 1654 }, { "epoch": 0.15531156156156156, "grad_norm": 2.424917411593017, "learning_rate": 5.173600250234595e-06, "loss": 0.5329, "step": 1655 }, { "epoch": 0.1554054054054054, "grad_norm": 2.3185643926213735, "learning_rate": 5.176728182671255e-06, "loss": 0.513, "step": 1656 }, { "epoch": 0.15549924924924924, "grad_norm": 1.8397557110952423, "learning_rate": 5.179856115107914e-06, "loss": 0.5494, "step": 1657 }, { "epoch": 0.1555930930930931, "grad_norm": 1.96636342238, "learning_rate": 5.182984047544574e-06, "loss": 0.5107, "step": 1658 }, { "epoch": 0.15568693693693694, "grad_norm": 5.007508055906378, "learning_rate": 5.186111979981233e-06, "loss": 0.6206, "step": 1659 }, { "epoch": 0.15578078078078078, "grad_norm": 1.9400576008909214, "learning_rate": 5.189239912417892e-06, "loss": 0.4979, "step": 1660 }, { "epoch": 0.15587462462462462, "grad_norm": 2.36791177391986, "learning_rate": 5.1923678448545515e-06, "loss": 0.5685, "step": 1661 }, { "epoch": 0.15596846846846846, "grad_norm": 2.449160218090824, "learning_rate": 5.195495777291211e-06, "loss": 0.5374, "step": 1662 }, { "epoch": 0.15606231231231232, "grad_norm": 3.191262500735621, "learning_rate": 5.19862370972787e-06, "loss": 0.5635, "step": 1663 }, { "epoch": 0.15615615615615616, "grad_norm": 2.4306759725279274, "learning_rate": 5.20175164216453e-06, "loss": 0.5528, "step": 1664 }, { "epoch": 0.15625, "grad_norm": 1.8083683991116213, "learning_rate": 5.204879574601189e-06, "loss": 0.5191, "step": 1665 }, { "epoch": 0.15634384384384384, "grad_norm": 2.1864905872248035, "learning_rate": 5.208007507037849e-06, "loss": 0.5159, "step": 1666 }, { "epoch": 0.15643768768768768, "grad_norm": 2.0124232235463118, "learning_rate": 5.211135439474507e-06, "loss": 0.506, "step": 1667 }, { "epoch": 0.15653153153153154, "grad_norm": 3.0002784688188724, "learning_rate": 5.2142633719111665e-06, "loss": 0.5218, "step": 1668 }, { "epoch": 0.15662537537537538, "grad_norm": 1.8108267278340757, "learning_rate": 5.2173913043478265e-06, "loss": 0.5263, "step": 1669 }, { "epoch": 0.15671921921921922, "grad_norm": 2.1353971516785917, "learning_rate": 5.220519236784486e-06, "loss": 0.5651, "step": 1670 }, { "epoch": 0.15681306306306306, "grad_norm": 1.9742576328385517, "learning_rate": 5.223647169221146e-06, "loss": 0.514, "step": 1671 }, { "epoch": 0.1569069069069069, "grad_norm": 1.7657364848593684, "learning_rate": 5.226775101657805e-06, "loss": 0.5622, "step": 1672 }, { "epoch": 0.15700075075075076, "grad_norm": 1.7795639905443499, "learning_rate": 5.229903034094465e-06, "loss": 0.578, "step": 1673 }, { "epoch": 0.1570945945945946, "grad_norm": 2.0274537768925605, "learning_rate": 5.233030966531123e-06, "loss": 0.5134, "step": 1674 }, { "epoch": 0.15718843843843844, "grad_norm": 1.867113465675082, "learning_rate": 5.236158898967782e-06, "loss": 0.5233, "step": 1675 }, { "epoch": 0.15728228228228228, "grad_norm": 1.8634057841799267, "learning_rate": 5.239286831404442e-06, "loss": 0.5791, "step": 1676 }, { "epoch": 0.15737612612612611, "grad_norm": 1.7501482170263365, "learning_rate": 5.2424147638411015e-06, "loss": 0.5021, "step": 1677 }, { "epoch": 0.15746996996996998, "grad_norm": 11.723182204707436, "learning_rate": 5.2455426962777615e-06, "loss": 0.4691, "step": 1678 }, { "epoch": 0.15756381381381382, "grad_norm": 1.8599911844151633, "learning_rate": 5.248670628714421e-06, "loss": 0.6028, "step": 1679 }, { "epoch": 0.15765765765765766, "grad_norm": 1.844563297496644, "learning_rate": 5.251798561151079e-06, "loss": 0.4971, "step": 1680 }, { "epoch": 0.1577515015015015, "grad_norm": 1.6436637103072558, "learning_rate": 5.254926493587739e-06, "loss": 0.514, "step": 1681 }, { "epoch": 0.15784534534534533, "grad_norm": 2.009614972681039, "learning_rate": 5.258054426024398e-06, "loss": 0.4851, "step": 1682 }, { "epoch": 0.1579391891891892, "grad_norm": 1.8105965140667755, "learning_rate": 5.261182358461058e-06, "loss": 0.5565, "step": 1683 }, { "epoch": 0.15803303303303304, "grad_norm": 1.9543776112571276, "learning_rate": 5.264310290897717e-06, "loss": 0.4932, "step": 1684 }, { "epoch": 0.15812687687687688, "grad_norm": 2.0755186989511962, "learning_rate": 5.2674382233343764e-06, "loss": 0.6145, "step": 1685 }, { "epoch": 0.15822072072072071, "grad_norm": 8.216501763823604, "learning_rate": 5.270566155771036e-06, "loss": 0.5848, "step": 1686 }, { "epoch": 0.15831456456456455, "grad_norm": 2.510270087306177, "learning_rate": 5.273694088207695e-06, "loss": 0.5249, "step": 1687 }, { "epoch": 0.15840840840840842, "grad_norm": 1.888163876375961, "learning_rate": 5.276822020644354e-06, "loss": 0.5228, "step": 1688 }, { "epoch": 0.15850225225225226, "grad_norm": 2.1126896921911773, "learning_rate": 5.279949953081014e-06, "loss": 0.5295, "step": 1689 }, { "epoch": 0.1585960960960961, "grad_norm": 2.199326726047398, "learning_rate": 5.283077885517673e-06, "loss": 0.5511, "step": 1690 }, { "epoch": 0.15868993993993993, "grad_norm": 2.3036434474402943, "learning_rate": 5.286205817954333e-06, "loss": 0.543, "step": 1691 }, { "epoch": 0.15878378378378377, "grad_norm": 3.9226652500491053, "learning_rate": 5.289333750390992e-06, "loss": 0.5405, "step": 1692 }, { "epoch": 0.15887762762762764, "grad_norm": 2.25277870525058, "learning_rate": 5.2924616828276506e-06, "loss": 0.5314, "step": 1693 }, { "epoch": 0.15897147147147148, "grad_norm": 2.0931897856773434, "learning_rate": 5.295589615264311e-06, "loss": 0.5262, "step": 1694 }, { "epoch": 0.15906531531531531, "grad_norm": 1.9934133624951633, "learning_rate": 5.29871754770097e-06, "loss": 0.6214, "step": 1695 }, { "epoch": 0.15915915915915915, "grad_norm": 2.103693715551854, "learning_rate": 5.30184548013763e-06, "loss": 0.52, "step": 1696 }, { "epoch": 0.159253003003003, "grad_norm": 2.219395580926849, "learning_rate": 5.304973412574289e-06, "loss": 0.515, "step": 1697 }, { "epoch": 0.15934684684684686, "grad_norm": 2.507367237107173, "learning_rate": 5.308101345010949e-06, "loss": 0.5554, "step": 1698 }, { "epoch": 0.1594406906906907, "grad_norm": 2.3827440600592116, "learning_rate": 5.311229277447607e-06, "loss": 0.568, "step": 1699 }, { "epoch": 0.15953453453453453, "grad_norm": 2.251613418727302, "learning_rate": 5.314357209884266e-06, "loss": 0.5286, "step": 1700 }, { "epoch": 0.15962837837837837, "grad_norm": 2.2082079917044792, "learning_rate": 5.317485142320926e-06, "loss": 0.5226, "step": 1701 }, { "epoch": 0.1597222222222222, "grad_norm": 2.0766541518383366, "learning_rate": 5.3206130747575856e-06, "loss": 0.5412, "step": 1702 }, { "epoch": 0.15981606606606608, "grad_norm": 2.037976252463236, "learning_rate": 5.3237410071942456e-06, "loss": 0.5213, "step": 1703 }, { "epoch": 0.15990990990990991, "grad_norm": 2.0065434140029694, "learning_rate": 5.326868939630905e-06, "loss": 0.5718, "step": 1704 }, { "epoch": 0.16000375375375375, "grad_norm": 1.7339270305982244, "learning_rate": 5.329996872067565e-06, "loss": 0.5309, "step": 1705 }, { "epoch": 0.1600975975975976, "grad_norm": 1.9792967323235313, "learning_rate": 5.333124804504223e-06, "loss": 0.5263, "step": 1706 }, { "epoch": 0.16019144144144143, "grad_norm": 2.1214301936814577, "learning_rate": 5.336252736940882e-06, "loss": 0.5435, "step": 1707 }, { "epoch": 0.1602852852852853, "grad_norm": 4.043580942805707, "learning_rate": 5.339380669377542e-06, "loss": 0.5799, "step": 1708 }, { "epoch": 0.16037912912912913, "grad_norm": 1.849669144900422, "learning_rate": 5.342508601814201e-06, "loss": 0.5495, "step": 1709 }, { "epoch": 0.16047297297297297, "grad_norm": 2.1185063685081373, "learning_rate": 5.3456365342508605e-06, "loss": 0.5895, "step": 1710 }, { "epoch": 0.1605668168168168, "grad_norm": 2.3785282963154963, "learning_rate": 5.3487644666875205e-06, "loss": 0.5912, "step": 1711 }, { "epoch": 0.16066066066066065, "grad_norm": 3.030244597847424, "learning_rate": 5.351892399124179e-06, "loss": 0.4996, "step": 1712 }, { "epoch": 0.16075450450450451, "grad_norm": 2.8996170291853103, "learning_rate": 5.355020331560839e-06, "loss": 0.5615, "step": 1713 }, { "epoch": 0.16084834834834835, "grad_norm": 2.8491167913312214, "learning_rate": 5.358148263997498e-06, "loss": 0.5736, "step": 1714 }, { "epoch": 0.1609421921921922, "grad_norm": 3.4543569215919243, "learning_rate": 5.361276196434157e-06, "loss": 0.5467, "step": 1715 }, { "epoch": 0.16103603603603603, "grad_norm": 3.1356829951730907, "learning_rate": 5.364404128870817e-06, "loss": 0.5947, "step": 1716 }, { "epoch": 0.16112987987987987, "grad_norm": 2.2553628782569723, "learning_rate": 5.367532061307476e-06, "loss": 0.4982, "step": 1717 }, { "epoch": 0.16122372372372373, "grad_norm": 3.5767734961562025, "learning_rate": 5.370659993744135e-06, "loss": 0.5144, "step": 1718 }, { "epoch": 0.16131756756756757, "grad_norm": 2.0753693615355533, "learning_rate": 5.373787926180795e-06, "loss": 0.5889, "step": 1719 }, { "epoch": 0.1614114114114114, "grad_norm": 2.4849122969982456, "learning_rate": 5.376915858617454e-06, "loss": 0.5393, "step": 1720 }, { "epoch": 0.16150525525525525, "grad_norm": 1.8003041339032702, "learning_rate": 5.380043791054114e-06, "loss": 0.551, "step": 1721 }, { "epoch": 0.16159909909909909, "grad_norm": 1.6396596576185336, "learning_rate": 5.383171723490773e-06, "loss": 0.5251, "step": 1722 }, { "epoch": 0.16169294294294295, "grad_norm": 1.6241411337308318, "learning_rate": 5.386299655927433e-06, "loss": 0.5287, "step": 1723 }, { "epoch": 0.1617867867867868, "grad_norm": 1.9363745627194664, "learning_rate": 5.389427588364092e-06, "loss": 0.5484, "step": 1724 }, { "epoch": 0.16188063063063063, "grad_norm": 1.789152092070657, "learning_rate": 5.3925555208007505e-06, "loss": 0.5356, "step": 1725 }, { "epoch": 0.16197447447447447, "grad_norm": 2.082040123266829, "learning_rate": 5.3956834532374105e-06, "loss": 0.5279, "step": 1726 }, { "epoch": 0.1620683183183183, "grad_norm": 2.078758865050361, "learning_rate": 5.39881138567407e-06, "loss": 0.5635, "step": 1727 }, { "epoch": 0.16216216216216217, "grad_norm": 1.9585112146314698, "learning_rate": 5.40193931811073e-06, "loss": 0.5396, "step": 1728 }, { "epoch": 0.162256006006006, "grad_norm": 3.1929522696251973, "learning_rate": 5.405067250547389e-06, "loss": 0.4991, "step": 1729 }, { "epoch": 0.16234984984984985, "grad_norm": 1.891559915631046, "learning_rate": 5.408195182984049e-06, "loss": 0.5478, "step": 1730 }, { "epoch": 0.16244369369369369, "grad_norm": 1.5442398578665393, "learning_rate": 5.411323115420707e-06, "loss": 0.5101, "step": 1731 }, { "epoch": 0.16253753753753752, "grad_norm": 2.2133313101009993, "learning_rate": 5.414451047857366e-06, "loss": 0.5722, "step": 1732 }, { "epoch": 0.1626313813813814, "grad_norm": 3.1357724108262652, "learning_rate": 5.417578980294026e-06, "loss": 0.5242, "step": 1733 }, { "epoch": 0.16272522522522523, "grad_norm": 1.7295620507679805, "learning_rate": 5.4207069127306855e-06, "loss": 0.5224, "step": 1734 }, { "epoch": 0.16281906906906907, "grad_norm": 1.5335001834981024, "learning_rate": 5.423834845167345e-06, "loss": 0.4792, "step": 1735 }, { "epoch": 0.1629129129129129, "grad_norm": 2.181834473840547, "learning_rate": 5.426962777604005e-06, "loss": 0.4961, "step": 1736 }, { "epoch": 0.16300675675675674, "grad_norm": 1.7558440667701996, "learning_rate": 5.430090710040663e-06, "loss": 0.5684, "step": 1737 }, { "epoch": 0.1631006006006006, "grad_norm": 1.9172603367626888, "learning_rate": 5.433218642477323e-06, "loss": 0.5444, "step": 1738 }, { "epoch": 0.16319444444444445, "grad_norm": 1.6834436155365124, "learning_rate": 5.436346574913982e-06, "loss": 0.5029, "step": 1739 }, { "epoch": 0.16328828828828829, "grad_norm": 2.056388583382157, "learning_rate": 5.439474507350641e-06, "loss": 0.5213, "step": 1740 }, { "epoch": 0.16338213213213212, "grad_norm": 2.680859165413758, "learning_rate": 5.442602439787301e-06, "loss": 0.5416, "step": 1741 }, { "epoch": 0.16347597597597596, "grad_norm": 1.8878782724513123, "learning_rate": 5.4457303722239605e-06, "loss": 0.6062, "step": 1742 }, { "epoch": 0.16356981981981983, "grad_norm": 2.3288830056552805, "learning_rate": 5.4488583046606205e-06, "loss": 0.5054, "step": 1743 }, { "epoch": 0.16366366366366367, "grad_norm": 2.4186326136802827, "learning_rate": 5.451986237097279e-06, "loss": 0.5226, "step": 1744 }, { "epoch": 0.1637575075075075, "grad_norm": 1.776925243650813, "learning_rate": 5.455114169533938e-06, "loss": 0.5485, "step": 1745 }, { "epoch": 0.16385135135135134, "grad_norm": 2.1515548124501067, "learning_rate": 5.458242101970598e-06, "loss": 0.5311, "step": 1746 }, { "epoch": 0.16394519519519518, "grad_norm": 2.050768890855737, "learning_rate": 5.461370034407257e-06, "loss": 0.5536, "step": 1747 }, { "epoch": 0.16403903903903905, "grad_norm": 1.7153025484468962, "learning_rate": 5.464497966843917e-06, "loss": 0.5341, "step": 1748 }, { "epoch": 0.16413288288288289, "grad_norm": 1.9608481853815263, "learning_rate": 5.467625899280576e-06, "loss": 0.5422, "step": 1749 }, { "epoch": 0.16422672672672672, "grad_norm": 2.030986947494877, "learning_rate": 5.470753831717235e-06, "loss": 0.5315, "step": 1750 }, { "epoch": 0.16432057057057056, "grad_norm": 2.225573528421858, "learning_rate": 5.473881764153895e-06, "loss": 0.5544, "step": 1751 }, { "epoch": 0.16441441441441443, "grad_norm": 1.559220710653767, "learning_rate": 5.477009696590554e-06, "loss": 0.4995, "step": 1752 }, { "epoch": 0.16450825825825827, "grad_norm": 2.2889907075913505, "learning_rate": 5.480137629027214e-06, "loss": 0.5195, "step": 1753 }, { "epoch": 0.1646021021021021, "grad_norm": 2.2721347010062973, "learning_rate": 5.483265561463873e-06, "loss": 0.5337, "step": 1754 }, { "epoch": 0.16469594594594594, "grad_norm": 5.544506386668742, "learning_rate": 5.486393493900533e-06, "loss": 0.4714, "step": 1755 }, { "epoch": 0.16478978978978978, "grad_norm": 1.8024894493955435, "learning_rate": 5.489521426337192e-06, "loss": 0.519, "step": 1756 }, { "epoch": 0.16488363363363365, "grad_norm": 2.0550131086106016, "learning_rate": 5.49264935877385e-06, "loss": 0.5512, "step": 1757 }, { "epoch": 0.16497747747747749, "grad_norm": 2.0147997228973336, "learning_rate": 5.49577729121051e-06, "loss": 0.5797, "step": 1758 }, { "epoch": 0.16507132132132132, "grad_norm": 1.9050477009902838, "learning_rate": 5.4989052236471696e-06, "loss": 0.533, "step": 1759 }, { "epoch": 0.16516516516516516, "grad_norm": 2.5447505942354076, "learning_rate": 5.502033156083829e-06, "loss": 0.5326, "step": 1760 }, { "epoch": 0.165259009009009, "grad_norm": 1.9833727155655558, "learning_rate": 5.505161088520489e-06, "loss": 0.5674, "step": 1761 }, { "epoch": 0.16535285285285287, "grad_norm": 2.0215776207876592, "learning_rate": 5.508289020957148e-06, "loss": 0.6019, "step": 1762 }, { "epoch": 0.1654466966966967, "grad_norm": 1.76570753484795, "learning_rate": 5.511416953393807e-06, "loss": 0.5094, "step": 1763 }, { "epoch": 0.16554054054054054, "grad_norm": 1.8844807496157285, "learning_rate": 5.514544885830466e-06, "loss": 0.5444, "step": 1764 }, { "epoch": 0.16563438438438438, "grad_norm": 1.7764982457143572, "learning_rate": 5.517672818267125e-06, "loss": 0.5024, "step": 1765 }, { "epoch": 0.16572822822822822, "grad_norm": 2.151891591052822, "learning_rate": 5.520800750703785e-06, "loss": 0.5292, "step": 1766 }, { "epoch": 0.16582207207207209, "grad_norm": 1.7124061387771468, "learning_rate": 5.5239286831404446e-06, "loss": 0.5329, "step": 1767 }, { "epoch": 0.16591591591591592, "grad_norm": 2.2555420328853835, "learning_rate": 5.5270566155771046e-06, "loss": 0.5775, "step": 1768 }, { "epoch": 0.16600975975975976, "grad_norm": 2.1906154782076497, "learning_rate": 5.530184548013763e-06, "loss": 0.559, "step": 1769 }, { "epoch": 0.1661036036036036, "grad_norm": 2.2003921843412173, "learning_rate": 5.533312480450422e-06, "loss": 0.4988, "step": 1770 }, { "epoch": 0.16619744744744744, "grad_norm": 4.568595945594202, "learning_rate": 5.536440412887082e-06, "loss": 0.5579, "step": 1771 }, { "epoch": 0.1662912912912913, "grad_norm": 1.6803004313063534, "learning_rate": 5.539568345323741e-06, "loss": 0.5085, "step": 1772 }, { "epoch": 0.16638513513513514, "grad_norm": 1.8145261157278652, "learning_rate": 5.542696277760401e-06, "loss": 0.4658, "step": 1773 }, { "epoch": 0.16647897897897898, "grad_norm": 2.0264601482723252, "learning_rate": 5.54582421019706e-06, "loss": 0.5165, "step": 1774 }, { "epoch": 0.16657282282282282, "grad_norm": 1.8761774488188727, "learning_rate": 5.54895214263372e-06, "loss": 0.5456, "step": 1775 }, { "epoch": 0.16666666666666666, "grad_norm": 2.3512916971966167, "learning_rate": 5.552080075070379e-06, "loss": 0.5482, "step": 1776 }, { "epoch": 0.16676051051051052, "grad_norm": 1.8110709506885345, "learning_rate": 5.555208007507038e-06, "loss": 0.5565, "step": 1777 }, { "epoch": 0.16685435435435436, "grad_norm": 1.9638559061039715, "learning_rate": 5.558335939943698e-06, "loss": 0.5362, "step": 1778 }, { "epoch": 0.1669481981981982, "grad_norm": 2.667685019321078, "learning_rate": 5.561463872380357e-06, "loss": 0.5744, "step": 1779 }, { "epoch": 0.16704204204204204, "grad_norm": 1.8583102061305299, "learning_rate": 5.564591804817017e-06, "loss": 0.4635, "step": 1780 }, { "epoch": 0.16713588588588588, "grad_norm": 1.8800455419414468, "learning_rate": 5.567719737253676e-06, "loss": 0.5891, "step": 1781 }, { "epoch": 0.16722972972972974, "grad_norm": 1.817867816794718, "learning_rate": 5.5708476696903345e-06, "loss": 0.5106, "step": 1782 }, { "epoch": 0.16732357357357358, "grad_norm": 1.945476378851802, "learning_rate": 5.5739756021269945e-06, "loss": 0.5549, "step": 1783 }, { "epoch": 0.16741741741741742, "grad_norm": 2.302650786479491, "learning_rate": 5.577103534563654e-06, "loss": 0.5688, "step": 1784 }, { "epoch": 0.16751126126126126, "grad_norm": 2.2895877590801774, "learning_rate": 5.580231467000314e-06, "loss": 0.548, "step": 1785 }, { "epoch": 0.1676051051051051, "grad_norm": 1.6239771612770824, "learning_rate": 5.583359399436973e-06, "loss": 0.479, "step": 1786 }, { "epoch": 0.16769894894894896, "grad_norm": 10.6993696416702, "learning_rate": 5.586487331873632e-06, "loss": 0.5441, "step": 1787 }, { "epoch": 0.1677927927927928, "grad_norm": 2.16607677169821, "learning_rate": 5.589615264310292e-06, "loss": 0.5636, "step": 1788 }, { "epoch": 0.16788663663663664, "grad_norm": 2.2201588833369827, "learning_rate": 5.59274319674695e-06, "loss": 0.4635, "step": 1789 }, { "epoch": 0.16798048048048048, "grad_norm": 2.133054585635744, "learning_rate": 5.5958711291836095e-06, "loss": 0.6111, "step": 1790 }, { "epoch": 0.16807432432432431, "grad_norm": 2.0911072905216166, "learning_rate": 5.5989990616202695e-06, "loss": 0.4784, "step": 1791 }, { "epoch": 0.16816816816816818, "grad_norm": 2.010864513253652, "learning_rate": 5.602126994056929e-06, "loss": 0.5066, "step": 1792 }, { "epoch": 0.16826201201201202, "grad_norm": 2.0885359213313985, "learning_rate": 5.605254926493589e-06, "loss": 0.5436, "step": 1793 }, { "epoch": 0.16835585585585586, "grad_norm": 1.9615171869707355, "learning_rate": 5.608382858930248e-06, "loss": 0.5214, "step": 1794 }, { "epoch": 0.1684496996996997, "grad_norm": 1.7307197525765794, "learning_rate": 5.611510791366906e-06, "loss": 0.4612, "step": 1795 }, { "epoch": 0.16854354354354353, "grad_norm": 1.7262927030224602, "learning_rate": 5.614638723803566e-06, "loss": 0.5523, "step": 1796 }, { "epoch": 0.1686373873873874, "grad_norm": 1.9775309347631778, "learning_rate": 5.617766656240225e-06, "loss": 0.4972, "step": 1797 }, { "epoch": 0.16873123123123124, "grad_norm": 1.970074830960554, "learning_rate": 5.620894588676885e-06, "loss": 0.5323, "step": 1798 }, { "epoch": 0.16882507507507508, "grad_norm": 1.6692770755603985, "learning_rate": 5.6240225211135445e-06, "loss": 0.6222, "step": 1799 }, { "epoch": 0.16891891891891891, "grad_norm": 1.7402415524081556, "learning_rate": 5.6271504535502045e-06, "loss": 0.5641, "step": 1800 }, { "epoch": 0.16901276276276275, "grad_norm": 1.4845389218722744, "learning_rate": 5.630278385986863e-06, "loss": 0.5047, "step": 1801 }, { "epoch": 0.16910660660660662, "grad_norm": 2.4260868349908766, "learning_rate": 5.633406318423522e-06, "loss": 0.5287, "step": 1802 }, { "epoch": 0.16920045045045046, "grad_norm": 1.8120971592524517, "learning_rate": 5.636534250860182e-06, "loss": 0.5282, "step": 1803 }, { "epoch": 0.1692942942942943, "grad_norm": 1.8619459920055201, "learning_rate": 5.639662183296841e-06, "loss": 0.53, "step": 1804 }, { "epoch": 0.16938813813813813, "grad_norm": 1.7044661266212358, "learning_rate": 5.642790115733501e-06, "loss": 0.5179, "step": 1805 }, { "epoch": 0.16948198198198197, "grad_norm": 1.7971317772240276, "learning_rate": 5.64591804817016e-06, "loss": 0.533, "step": 1806 }, { "epoch": 0.16957582582582584, "grad_norm": 2.4267995766497323, "learning_rate": 5.6490459806068195e-06, "loss": 0.5303, "step": 1807 }, { "epoch": 0.16966966966966968, "grad_norm": 1.8577230363749522, "learning_rate": 5.652173913043479e-06, "loss": 0.5616, "step": 1808 }, { "epoch": 0.16976351351351351, "grad_norm": 2.651053631158886, "learning_rate": 5.655301845480138e-06, "loss": 0.5064, "step": 1809 }, { "epoch": 0.16985735735735735, "grad_norm": 1.841196972278522, "learning_rate": 5.658429777916798e-06, "loss": 0.5037, "step": 1810 }, { "epoch": 0.1699512012012012, "grad_norm": 2.030023042581501, "learning_rate": 5.661557710353457e-06, "loss": 0.5098, "step": 1811 }, { "epoch": 0.17004504504504506, "grad_norm": 1.714996738837268, "learning_rate": 5.664685642790116e-06, "loss": 0.5716, "step": 1812 }, { "epoch": 0.1701388888888889, "grad_norm": 2.2558850394961127, "learning_rate": 5.667813575226776e-06, "loss": 0.5418, "step": 1813 }, { "epoch": 0.17023273273273273, "grad_norm": 1.9963247689500734, "learning_rate": 5.6709415076634344e-06, "loss": 0.5479, "step": 1814 }, { "epoch": 0.17032657657657657, "grad_norm": 1.9094991859658934, "learning_rate": 5.674069440100094e-06, "loss": 0.5621, "step": 1815 }, { "epoch": 0.1704204204204204, "grad_norm": 1.6369809430778348, "learning_rate": 5.677197372536754e-06, "loss": 0.5593, "step": 1816 }, { "epoch": 0.17051426426426428, "grad_norm": 2.1912557618120947, "learning_rate": 5.680325304973413e-06, "loss": 0.5628, "step": 1817 }, { "epoch": 0.17060810810810811, "grad_norm": 2.383477614468437, "learning_rate": 5.683453237410073e-06, "loss": 0.5177, "step": 1818 }, { "epoch": 0.17070195195195195, "grad_norm": 1.9959940851579856, "learning_rate": 5.686581169846732e-06, "loss": 0.5707, "step": 1819 }, { "epoch": 0.1707957957957958, "grad_norm": 2.164541307336708, "learning_rate": 5.689709102283392e-06, "loss": 0.507, "step": 1820 }, { "epoch": 0.17088963963963963, "grad_norm": 2.261396449501976, "learning_rate": 5.69283703472005e-06, "loss": 0.578, "step": 1821 }, { "epoch": 0.1709834834834835, "grad_norm": 13.942696267075222, "learning_rate": 5.695964967156709e-06, "loss": 0.5185, "step": 1822 }, { "epoch": 0.17107732732732733, "grad_norm": 2.318940724785211, "learning_rate": 5.699092899593369e-06, "loss": 0.54, "step": 1823 }, { "epoch": 0.17117117117117117, "grad_norm": 2.5196418817904545, "learning_rate": 5.7022208320300286e-06, "loss": 0.5455, "step": 1824 }, { "epoch": 0.171265015015015, "grad_norm": 1.701490523199021, "learning_rate": 5.705348764466689e-06, "loss": 0.4628, "step": 1825 }, { "epoch": 0.17135885885885885, "grad_norm": 3.321759190439543, "learning_rate": 5.708476696903348e-06, "loss": 0.52, "step": 1826 }, { "epoch": 0.17145270270270271, "grad_norm": 1.6805079429229701, "learning_rate": 5.711604629340006e-06, "loss": 0.5528, "step": 1827 }, { "epoch": 0.17154654654654655, "grad_norm": 2.7256941745723404, "learning_rate": 5.714732561776666e-06, "loss": 0.519, "step": 1828 }, { "epoch": 0.1716403903903904, "grad_norm": 2.4226880973707674, "learning_rate": 5.717860494213325e-06, "loss": 0.4797, "step": 1829 }, { "epoch": 0.17173423423423423, "grad_norm": 1.7346614852889042, "learning_rate": 5.720988426649985e-06, "loss": 0.5218, "step": 1830 }, { "epoch": 0.17182807807807807, "grad_norm": 1.5667498364964216, "learning_rate": 5.724116359086644e-06, "loss": 0.5083, "step": 1831 }, { "epoch": 0.17192192192192193, "grad_norm": 1.810826606047938, "learning_rate": 5.7272442915233036e-06, "loss": 0.4826, "step": 1832 }, { "epoch": 0.17201576576576577, "grad_norm": 2.015674041103638, "learning_rate": 5.730372223959963e-06, "loss": 0.4987, "step": 1833 }, { "epoch": 0.1721096096096096, "grad_norm": 1.8508236498250796, "learning_rate": 5.733500156396622e-06, "loss": 0.5202, "step": 1834 }, { "epoch": 0.17220345345345345, "grad_norm": 2.046142055030391, "learning_rate": 5.736628088833282e-06, "loss": 0.4807, "step": 1835 }, { "epoch": 0.17229729729729729, "grad_norm": 2.5115523764829, "learning_rate": 5.739756021269941e-06, "loss": 0.5452, "step": 1836 }, { "epoch": 0.17239114114114115, "grad_norm": 1.8423456637418412, "learning_rate": 5.7428839537066e-06, "loss": 0.5661, "step": 1837 }, { "epoch": 0.172484984984985, "grad_norm": 2.030075603567123, "learning_rate": 5.74601188614326e-06, "loss": 0.5896, "step": 1838 }, { "epoch": 0.17257882882882883, "grad_norm": 1.6387564223787032, "learning_rate": 5.749139818579919e-06, "loss": 0.5189, "step": 1839 }, { "epoch": 0.17267267267267267, "grad_norm": 2.141236092607261, "learning_rate": 5.752267751016578e-06, "loss": 0.5166, "step": 1840 }, { "epoch": 0.1727665165165165, "grad_norm": 2.1541689349847943, "learning_rate": 5.755395683453238e-06, "loss": 0.4804, "step": 1841 }, { "epoch": 0.17286036036036037, "grad_norm": 2.1584602479120645, "learning_rate": 5.758523615889897e-06, "loss": 0.4668, "step": 1842 }, { "epoch": 0.1729542042042042, "grad_norm": 3.8060522149843057, "learning_rate": 5.761651548326557e-06, "loss": 0.5193, "step": 1843 }, { "epoch": 0.17304804804804805, "grad_norm": 2.1086811778963868, "learning_rate": 5.764779480763216e-06, "loss": 0.5407, "step": 1844 }, { "epoch": 0.17314189189189189, "grad_norm": 2.8062035638694818, "learning_rate": 5.767907413199876e-06, "loss": 0.5136, "step": 1845 }, { "epoch": 0.17323573573573572, "grad_norm": 3.737494498984866, "learning_rate": 5.771035345636534e-06, "loss": 0.5683, "step": 1846 }, { "epoch": 0.1733295795795796, "grad_norm": 1.8544760110944207, "learning_rate": 5.7741632780731935e-06, "loss": 0.5437, "step": 1847 }, { "epoch": 0.17342342342342343, "grad_norm": 1.8749561700500454, "learning_rate": 5.7772912105098535e-06, "loss": 0.5447, "step": 1848 }, { "epoch": 0.17351726726726727, "grad_norm": 1.9838462268635642, "learning_rate": 5.780419142946513e-06, "loss": 0.5927, "step": 1849 }, { "epoch": 0.1736111111111111, "grad_norm": 2.0526785741496605, "learning_rate": 5.783547075383173e-06, "loss": 0.5557, "step": 1850 }, { "epoch": 0.17370495495495494, "grad_norm": 2.594744410912754, "learning_rate": 5.786675007819832e-06, "loss": 0.5004, "step": 1851 }, { "epoch": 0.1737987987987988, "grad_norm": 3.432592762783028, "learning_rate": 5.789802940256492e-06, "loss": 0.5514, "step": 1852 }, { "epoch": 0.17389264264264265, "grad_norm": 2.4780347943005148, "learning_rate": 5.79293087269315e-06, "loss": 0.4915, "step": 1853 }, { "epoch": 0.17398648648648649, "grad_norm": 1.9241598757451377, "learning_rate": 5.796058805129809e-06, "loss": 0.5389, "step": 1854 }, { "epoch": 0.17408033033033032, "grad_norm": 2.0228682447907085, "learning_rate": 5.799186737566469e-06, "loss": 0.4968, "step": 1855 }, { "epoch": 0.17417417417417416, "grad_norm": 1.937772132099993, "learning_rate": 5.8023146700031285e-06, "loss": 0.4818, "step": 1856 }, { "epoch": 0.17426801801801803, "grad_norm": 2.1952943689776707, "learning_rate": 5.8054426024397885e-06, "loss": 0.4893, "step": 1857 }, { "epoch": 0.17436186186186187, "grad_norm": 1.8159350395795388, "learning_rate": 5.808570534876448e-06, "loss": 0.4891, "step": 1858 }, { "epoch": 0.1744557057057057, "grad_norm": 1.8546151863124738, "learning_rate": 5.811698467313106e-06, "loss": 0.5372, "step": 1859 }, { "epoch": 0.17454954954954954, "grad_norm": 1.7557449623751455, "learning_rate": 5.814826399749766e-06, "loss": 0.5095, "step": 1860 }, { "epoch": 0.17464339339339338, "grad_norm": 2.1064177010713254, "learning_rate": 5.817954332186425e-06, "loss": 0.4922, "step": 1861 }, { "epoch": 0.17473723723723725, "grad_norm": 2.7484931539212503, "learning_rate": 5.821082264623084e-06, "loss": 0.5111, "step": 1862 }, { "epoch": 0.17483108108108109, "grad_norm": 2.2252348018512156, "learning_rate": 5.824210197059744e-06, "loss": 0.5979, "step": 1863 }, { "epoch": 0.17492492492492492, "grad_norm": 5.043370464175951, "learning_rate": 5.8273381294964035e-06, "loss": 0.5241, "step": 1864 }, { "epoch": 0.17501876876876876, "grad_norm": 1.8092328406869989, "learning_rate": 5.830466061933062e-06, "loss": 0.5035, "step": 1865 }, { "epoch": 0.1751126126126126, "grad_norm": 1.6782899502235846, "learning_rate": 5.833593994369722e-06, "loss": 0.5277, "step": 1866 }, { "epoch": 0.17520645645645647, "grad_norm": 1.8399053172733284, "learning_rate": 5.836721926806381e-06, "loss": 0.5289, "step": 1867 }, { "epoch": 0.1753003003003003, "grad_norm": 2.2326463836671904, "learning_rate": 5.839849859243041e-06, "loss": 0.4983, "step": 1868 }, { "epoch": 0.17539414414414414, "grad_norm": 1.8230954126259296, "learning_rate": 5.8429777916797e-06, "loss": 0.5717, "step": 1869 }, { "epoch": 0.17548798798798798, "grad_norm": 1.5776312819160974, "learning_rate": 5.84610572411636e-06, "loss": 0.4988, "step": 1870 }, { "epoch": 0.17558183183183182, "grad_norm": 1.8218553461931462, "learning_rate": 5.849233656553019e-06, "loss": 0.4731, "step": 1871 }, { "epoch": 0.17567567567567569, "grad_norm": 1.7445584875866462, "learning_rate": 5.852361588989678e-06, "loss": 0.4964, "step": 1872 }, { "epoch": 0.17576951951951952, "grad_norm": 1.7995256702312152, "learning_rate": 5.855489521426338e-06, "loss": 0.5172, "step": 1873 }, { "epoch": 0.17586336336336336, "grad_norm": 1.9082803018664958, "learning_rate": 5.858617453862997e-06, "loss": 0.5829, "step": 1874 }, { "epoch": 0.1759572072072072, "grad_norm": 1.9772715973034163, "learning_rate": 5.861745386299657e-06, "loss": 0.4912, "step": 1875 }, { "epoch": 0.17605105105105104, "grad_norm": 3.248463915039392, "learning_rate": 5.864873318736316e-06, "loss": 0.5605, "step": 1876 }, { "epoch": 0.1761448948948949, "grad_norm": 1.9872209538047252, "learning_rate": 5.868001251172976e-06, "loss": 0.4865, "step": 1877 }, { "epoch": 0.17623873873873874, "grad_norm": 2.256499313544717, "learning_rate": 5.871129183609634e-06, "loss": 0.5151, "step": 1878 }, { "epoch": 0.17633258258258258, "grad_norm": 3.261639212079086, "learning_rate": 5.874257116046293e-06, "loss": 0.5282, "step": 1879 }, { "epoch": 0.17642642642642642, "grad_norm": 1.8288646245675042, "learning_rate": 5.8773850484829534e-06, "loss": 0.5821, "step": 1880 }, { "epoch": 0.17652027027027026, "grad_norm": 3.932269406033051, "learning_rate": 5.880512980919613e-06, "loss": 0.5015, "step": 1881 }, { "epoch": 0.17661411411411412, "grad_norm": 1.923007717953018, "learning_rate": 5.883640913356273e-06, "loss": 0.5893, "step": 1882 }, { "epoch": 0.17670795795795796, "grad_norm": 1.876073563678065, "learning_rate": 5.886768845792932e-06, "loss": 0.5671, "step": 1883 }, { "epoch": 0.1768018018018018, "grad_norm": 2.0318593362752257, "learning_rate": 5.889896778229591e-06, "loss": 0.5345, "step": 1884 }, { "epoch": 0.17689564564564564, "grad_norm": 2.113605592488298, "learning_rate": 5.89302471066625e-06, "loss": 0.5464, "step": 1885 }, { "epoch": 0.17698948948948948, "grad_norm": 1.7288149903104042, "learning_rate": 5.896152643102909e-06, "loss": 0.5027, "step": 1886 }, { "epoch": 0.17708333333333334, "grad_norm": 1.9245092264129962, "learning_rate": 5.899280575539568e-06, "loss": 0.5913, "step": 1887 }, { "epoch": 0.17717717717717718, "grad_norm": 2.6249483209583477, "learning_rate": 5.902408507976228e-06, "loss": 0.4453, "step": 1888 }, { "epoch": 0.17727102102102102, "grad_norm": 2.0999198468628153, "learning_rate": 5.9055364404128876e-06, "loss": 0.5147, "step": 1889 }, { "epoch": 0.17736486486486486, "grad_norm": 1.8782048646621232, "learning_rate": 5.908664372849548e-06, "loss": 0.531, "step": 1890 }, { "epoch": 0.1774587087087087, "grad_norm": 2.10639192200829, "learning_rate": 5.911792305286206e-06, "loss": 0.5077, "step": 1891 }, { "epoch": 0.17755255255255256, "grad_norm": 1.618940404821947, "learning_rate": 5.914920237722865e-06, "loss": 0.5271, "step": 1892 }, { "epoch": 0.1776463963963964, "grad_norm": 1.8128377919181313, "learning_rate": 5.918048170159525e-06, "loss": 0.563, "step": 1893 }, { "epoch": 0.17774024024024024, "grad_norm": 1.6332690732599653, "learning_rate": 5.921176102596184e-06, "loss": 0.5091, "step": 1894 }, { "epoch": 0.17783408408408408, "grad_norm": 1.7053631554826676, "learning_rate": 5.924304035032844e-06, "loss": 0.5529, "step": 1895 }, { "epoch": 0.17792792792792791, "grad_norm": 2.493122260306159, "learning_rate": 5.927431967469503e-06, "loss": 0.4712, "step": 1896 }, { "epoch": 0.17802177177177178, "grad_norm": 1.987638393599794, "learning_rate": 5.930559899906162e-06, "loss": 0.5322, "step": 1897 }, { "epoch": 0.17811561561561562, "grad_norm": 5.323289284589887, "learning_rate": 5.933687832342822e-06, "loss": 0.5618, "step": 1898 }, { "epoch": 0.17820945945945946, "grad_norm": 1.589778459110594, "learning_rate": 5.936815764779481e-06, "loss": 0.5271, "step": 1899 }, { "epoch": 0.1783033033033033, "grad_norm": 1.7705399995360174, "learning_rate": 5.939943697216141e-06, "loss": 0.5289, "step": 1900 }, { "epoch": 0.17839714714714713, "grad_norm": 2.0429385063221774, "learning_rate": 5.9430716296528e-06, "loss": 0.5539, "step": 1901 }, { "epoch": 0.178490990990991, "grad_norm": 1.8352041892394502, "learning_rate": 5.94619956208946e-06, "loss": 0.606, "step": 1902 }, { "epoch": 0.17858483483483484, "grad_norm": 1.9030510695319502, "learning_rate": 5.949327494526119e-06, "loss": 0.5489, "step": 1903 }, { "epoch": 0.17867867867867868, "grad_norm": 2.057361337864836, "learning_rate": 5.9524554269627775e-06, "loss": 0.5631, "step": 1904 }, { "epoch": 0.17877252252252251, "grad_norm": 1.5508891997318535, "learning_rate": 5.9555833593994375e-06, "loss": 0.5389, "step": 1905 }, { "epoch": 0.17886636636636635, "grad_norm": 1.618937302476965, "learning_rate": 5.958711291836097e-06, "loss": 0.5223, "step": 1906 }, { "epoch": 0.17896021021021022, "grad_norm": 1.6678017821954259, "learning_rate": 5.961839224272757e-06, "loss": 0.517, "step": 1907 }, { "epoch": 0.17905405405405406, "grad_norm": 1.7893389279220842, "learning_rate": 5.964967156709416e-06, "loss": 0.5164, "step": 1908 }, { "epoch": 0.1791478978978979, "grad_norm": 2.912206989969486, "learning_rate": 5.968095089146075e-06, "loss": 0.5163, "step": 1909 }, { "epoch": 0.17924174174174173, "grad_norm": 2.0550671693064015, "learning_rate": 5.971223021582734e-06, "loss": 0.4955, "step": 1910 }, { "epoch": 0.17933558558558557, "grad_norm": 1.816355969964052, "learning_rate": 5.974350954019393e-06, "loss": 0.5546, "step": 1911 }, { "epoch": 0.17942942942942944, "grad_norm": 1.7255338712511428, "learning_rate": 5.9774788864560525e-06, "loss": 0.5373, "step": 1912 }, { "epoch": 0.17952327327327328, "grad_norm": 1.6809710068950483, "learning_rate": 5.9806068188927125e-06, "loss": 0.4814, "step": 1913 }, { "epoch": 0.17961711711711711, "grad_norm": 1.9173701229754974, "learning_rate": 5.983734751329372e-06, "loss": 0.5285, "step": 1914 }, { "epoch": 0.17971096096096095, "grad_norm": 1.9944872971066936, "learning_rate": 5.986862683766032e-06, "loss": 0.5512, "step": 1915 }, { "epoch": 0.17980480480480482, "grad_norm": 1.7799110165490384, "learning_rate": 5.989990616202691e-06, "loss": 0.5269, "step": 1916 }, { "epoch": 0.17989864864864866, "grad_norm": 2.003183046298369, "learning_rate": 5.993118548639349e-06, "loss": 0.5643, "step": 1917 }, { "epoch": 0.1799924924924925, "grad_norm": 1.5716778204623636, "learning_rate": 5.996246481076009e-06, "loss": 0.5074, "step": 1918 }, { "epoch": 0.18008633633633633, "grad_norm": 1.7846708086097112, "learning_rate": 5.999374413512668e-06, "loss": 0.5355, "step": 1919 }, { "epoch": 0.18018018018018017, "grad_norm": 1.5496187238315662, "learning_rate": 6.002502345949328e-06, "loss": 0.536, "step": 1920 }, { "epoch": 0.18027402402402404, "grad_norm": 1.5122797305137303, "learning_rate": 6.0056302783859875e-06, "loss": 0.5153, "step": 1921 }, { "epoch": 0.18036786786786788, "grad_norm": 1.6492433730447336, "learning_rate": 6.0087582108226475e-06, "loss": 0.5567, "step": 1922 }, { "epoch": 0.18046171171171171, "grad_norm": 1.5517026176860804, "learning_rate": 6.011886143259306e-06, "loss": 0.5399, "step": 1923 }, { "epoch": 0.18055555555555555, "grad_norm": 1.9556220235964699, "learning_rate": 6.015014075695965e-06, "loss": 0.5173, "step": 1924 }, { "epoch": 0.1806493993993994, "grad_norm": 2.5154126614428365, "learning_rate": 6.018142008132625e-06, "loss": 0.5148, "step": 1925 }, { "epoch": 0.18074324324324326, "grad_norm": 1.6196178474340088, "learning_rate": 6.021269940569284e-06, "loss": 0.5327, "step": 1926 }, { "epoch": 0.1808370870870871, "grad_norm": 1.607945828912033, "learning_rate": 6.024397873005944e-06, "loss": 0.4692, "step": 1927 }, { "epoch": 0.18093093093093093, "grad_norm": 1.4853706713177321, "learning_rate": 6.027525805442603e-06, "loss": 0.5534, "step": 1928 }, { "epoch": 0.18102477477477477, "grad_norm": 1.7967694446699736, "learning_rate": 6.030653737879262e-06, "loss": 0.5353, "step": 1929 }, { "epoch": 0.1811186186186186, "grad_norm": 2.1405358099440597, "learning_rate": 6.033781670315922e-06, "loss": 0.5452, "step": 1930 }, { "epoch": 0.18121246246246248, "grad_norm": 1.5052146909045536, "learning_rate": 6.036909602752581e-06, "loss": 0.4904, "step": 1931 }, { "epoch": 0.18130630630630631, "grad_norm": 1.6063549320759263, "learning_rate": 6.040037535189241e-06, "loss": 0.5245, "step": 1932 }, { "epoch": 0.18140015015015015, "grad_norm": 3.98039795406885, "learning_rate": 6.0431654676259e-06, "loss": 0.5339, "step": 1933 }, { "epoch": 0.181493993993994, "grad_norm": 1.7778933058663142, "learning_rate": 6.046293400062559e-06, "loss": 0.5809, "step": 1934 }, { "epoch": 0.18158783783783783, "grad_norm": 4.93394964713865, "learning_rate": 6.049421332499219e-06, "loss": 0.5449, "step": 1935 }, { "epoch": 0.1816816816816817, "grad_norm": 2.552511768821708, "learning_rate": 6.0525492649358774e-06, "loss": 0.5476, "step": 1936 }, { "epoch": 0.18177552552552553, "grad_norm": 2.7383592260215956, "learning_rate": 6.055677197372537e-06, "loss": 0.5424, "step": 1937 }, { "epoch": 0.18186936936936937, "grad_norm": 1.782557907549461, "learning_rate": 6.058805129809197e-06, "loss": 0.5159, "step": 1938 }, { "epoch": 0.1819632132132132, "grad_norm": 6.792082341997555, "learning_rate": 6.061933062245856e-06, "loss": 0.5166, "step": 1939 }, { "epoch": 0.18205705705705705, "grad_norm": 6.152402267655315, "learning_rate": 6.065060994682516e-06, "loss": 0.5075, "step": 1940 }, { "epoch": 0.18215090090090091, "grad_norm": 1.5497191247600153, "learning_rate": 6.068188927119175e-06, "loss": 0.5556, "step": 1941 }, { "epoch": 0.18224474474474475, "grad_norm": 1.6458024639396867, "learning_rate": 6.071316859555833e-06, "loss": 0.5498, "step": 1942 }, { "epoch": 0.1823385885885886, "grad_norm": 1.629971744334342, "learning_rate": 6.074444791992493e-06, "loss": 0.5088, "step": 1943 }, { "epoch": 0.18243243243243243, "grad_norm": 2.563603930187991, "learning_rate": 6.077572724429152e-06, "loss": 0.5033, "step": 1944 }, { "epoch": 0.18252627627627627, "grad_norm": 1.8543103944085004, "learning_rate": 6.0807006568658124e-06, "loss": 0.5582, "step": 1945 }, { "epoch": 0.18262012012012013, "grad_norm": 2.3106558958084915, "learning_rate": 6.083828589302472e-06, "loss": 0.5399, "step": 1946 }, { "epoch": 0.18271396396396397, "grad_norm": 1.7059463515034814, "learning_rate": 6.086956521739132e-06, "loss": 0.5564, "step": 1947 }, { "epoch": 0.1828078078078078, "grad_norm": 1.6096184779558604, "learning_rate": 6.09008445417579e-06, "loss": 0.5381, "step": 1948 }, { "epoch": 0.18290165165165165, "grad_norm": 1.6274864162066354, "learning_rate": 6.093212386612449e-06, "loss": 0.5749, "step": 1949 }, { "epoch": 0.18299549549549549, "grad_norm": 2.189363803370657, "learning_rate": 6.096340319049109e-06, "loss": 0.5267, "step": 1950 }, { "epoch": 0.18308933933933935, "grad_norm": 3.347907102077346, "learning_rate": 6.099468251485768e-06, "loss": 0.5126, "step": 1951 }, { "epoch": 0.1831831831831832, "grad_norm": 1.6998187915721867, "learning_rate": 6.102596183922428e-06, "loss": 0.5294, "step": 1952 }, { "epoch": 0.18327702702702703, "grad_norm": 1.848807161362, "learning_rate": 6.105724116359087e-06, "loss": 0.5571, "step": 1953 }, { "epoch": 0.18337087087087087, "grad_norm": 1.7921753400084952, "learning_rate": 6.108852048795747e-06, "loss": 0.5215, "step": 1954 }, { "epoch": 0.1834647147147147, "grad_norm": 1.9216512400690349, "learning_rate": 6.111979981232406e-06, "loss": 0.5045, "step": 1955 }, { "epoch": 0.18355855855855857, "grad_norm": 1.7681854164745026, "learning_rate": 6.115107913669065e-06, "loss": 0.5542, "step": 1956 }, { "epoch": 0.1836524024024024, "grad_norm": 1.7924248725949488, "learning_rate": 6.118235846105725e-06, "loss": 0.5382, "step": 1957 }, { "epoch": 0.18374624624624625, "grad_norm": 2.1422880335664836, "learning_rate": 6.121363778542384e-06, "loss": 0.5309, "step": 1958 }, { "epoch": 0.18384009009009009, "grad_norm": 2.069750309609182, "learning_rate": 6.124491710979043e-06, "loss": 0.5189, "step": 1959 }, { "epoch": 0.18393393393393392, "grad_norm": 2.5250804790002968, "learning_rate": 6.127619643415703e-06, "loss": 0.5897, "step": 1960 }, { "epoch": 0.1840277777777778, "grad_norm": 1.9255050662975273, "learning_rate": 6.1307475758523615e-06, "loss": 0.4876, "step": 1961 }, { "epoch": 0.18412162162162163, "grad_norm": 1.9009445242389005, "learning_rate": 6.133875508289021e-06, "loss": 0.5417, "step": 1962 }, { "epoch": 0.18421546546546547, "grad_norm": 1.6094662062101153, "learning_rate": 6.137003440725681e-06, "loss": 0.5346, "step": 1963 }, { "epoch": 0.1843093093093093, "grad_norm": 1.7885179386254173, "learning_rate": 6.14013137316234e-06, "loss": 0.5977, "step": 1964 }, { "epoch": 0.18440315315315314, "grad_norm": 1.8996715598930942, "learning_rate": 6.143259305599e-06, "loss": 0.4947, "step": 1965 }, { "epoch": 0.184496996996997, "grad_norm": 1.8181714040568333, "learning_rate": 6.146387238035659e-06, "loss": 0.4976, "step": 1966 }, { "epoch": 0.18459084084084085, "grad_norm": 1.616969055471485, "learning_rate": 6.149515170472319e-06, "loss": 0.4677, "step": 1967 }, { "epoch": 0.18468468468468469, "grad_norm": 1.8950639315073434, "learning_rate": 6.152643102908977e-06, "loss": 0.5317, "step": 1968 }, { "epoch": 0.18477852852852852, "grad_norm": 1.896112242392531, "learning_rate": 6.1557710353456365e-06, "loss": 0.5021, "step": 1969 }, { "epoch": 0.18487237237237236, "grad_norm": 2.721503891061479, "learning_rate": 6.1588989677822965e-06, "loss": 0.5781, "step": 1970 }, { "epoch": 0.18496621621621623, "grad_norm": 1.767814267859377, "learning_rate": 6.162026900218956e-06, "loss": 0.5113, "step": 1971 }, { "epoch": 0.18506006006006007, "grad_norm": 1.8787445973654957, "learning_rate": 6.165154832655616e-06, "loss": 0.547, "step": 1972 }, { "epoch": 0.1851539039039039, "grad_norm": 1.8764734252594948, "learning_rate": 6.168282765092275e-06, "loss": 0.5904, "step": 1973 }, { "epoch": 0.18524774774774774, "grad_norm": 1.892813522702546, "learning_rate": 6.171410697528933e-06, "loss": 0.5344, "step": 1974 }, { "epoch": 0.18534159159159158, "grad_norm": 1.673793460777609, "learning_rate": 6.174538629965593e-06, "loss": 0.533, "step": 1975 }, { "epoch": 0.18543543543543545, "grad_norm": 1.6661808220770686, "learning_rate": 6.177666562402252e-06, "loss": 0.5162, "step": 1976 }, { "epoch": 0.18552927927927929, "grad_norm": 1.6867095793720028, "learning_rate": 6.180794494838912e-06, "loss": 0.5225, "step": 1977 }, { "epoch": 0.18562312312312312, "grad_norm": 1.8533958099193504, "learning_rate": 6.1839224272755715e-06, "loss": 0.535, "step": 1978 }, { "epoch": 0.18571696696696696, "grad_norm": 2.241465445118882, "learning_rate": 6.1870503597122315e-06, "loss": 0.5489, "step": 1979 }, { "epoch": 0.1858108108108108, "grad_norm": 2.2112274614304734, "learning_rate": 6.19017829214889e-06, "loss": 0.5273, "step": 1980 }, { "epoch": 0.18590465465465467, "grad_norm": 2.0212927918893335, "learning_rate": 6.193306224585549e-06, "loss": 0.5537, "step": 1981 }, { "epoch": 0.1859984984984985, "grad_norm": 3.158666655227477, "learning_rate": 6.196434157022209e-06, "loss": 0.5266, "step": 1982 }, { "epoch": 0.18609234234234234, "grad_norm": 2.05749879191305, "learning_rate": 6.199562089458868e-06, "loss": 0.5335, "step": 1983 }, { "epoch": 0.18618618618618618, "grad_norm": 1.6633598711015152, "learning_rate": 6.202690021895527e-06, "loss": 0.519, "step": 1984 }, { "epoch": 0.18628003003003002, "grad_norm": 1.5801195167409392, "learning_rate": 6.205817954332187e-06, "loss": 0.5227, "step": 1985 }, { "epoch": 0.18637387387387389, "grad_norm": 1.6071893746823647, "learning_rate": 6.2089458867688465e-06, "loss": 0.5603, "step": 1986 }, { "epoch": 0.18646771771771772, "grad_norm": 1.7476532620767464, "learning_rate": 6.212073819205506e-06, "loss": 0.5261, "step": 1987 }, { "epoch": 0.18656156156156156, "grad_norm": 1.9775243067012145, "learning_rate": 6.215201751642165e-06, "loss": 0.5166, "step": 1988 }, { "epoch": 0.1866554054054054, "grad_norm": 1.822565037905469, "learning_rate": 6.218329684078824e-06, "loss": 0.5419, "step": 1989 }, { "epoch": 0.18674924924924924, "grad_norm": 1.8428247841222325, "learning_rate": 6.221457616515484e-06, "loss": 0.5317, "step": 1990 }, { "epoch": 0.1868430930930931, "grad_norm": 1.743419083862006, "learning_rate": 6.224585548952143e-06, "loss": 0.5763, "step": 1991 }, { "epoch": 0.18693693693693694, "grad_norm": 1.693259332450631, "learning_rate": 6.227713481388803e-06, "loss": 0.5453, "step": 1992 }, { "epoch": 0.18703078078078078, "grad_norm": 1.7635597745535576, "learning_rate": 6.2308414138254615e-06, "loss": 0.5422, "step": 1993 }, { "epoch": 0.18712462462462462, "grad_norm": 2.6278850043920983, "learning_rate": 6.233969346262121e-06, "loss": 0.5147, "step": 1994 }, { "epoch": 0.18721846846846846, "grad_norm": 1.9709849912401909, "learning_rate": 6.237097278698781e-06, "loss": 0.5646, "step": 1995 }, { "epoch": 0.18731231231231232, "grad_norm": 1.4575243006564158, "learning_rate": 6.24022521113544e-06, "loss": 0.5569, "step": 1996 }, { "epoch": 0.18740615615615616, "grad_norm": 1.8806553637746977, "learning_rate": 6.2433531435721e-06, "loss": 0.4909, "step": 1997 }, { "epoch": 0.1875, "grad_norm": 2.0902464629472735, "learning_rate": 6.246481076008759e-06, "loss": 0.6454, "step": 1998 }, { "epoch": 0.18759384384384384, "grad_norm": 1.5480807859701848, "learning_rate": 6.249609008445419e-06, "loss": 0.5313, "step": 1999 }, { "epoch": 0.18768768768768768, "grad_norm": 1.5010221009907623, "learning_rate": 6.252736940882077e-06, "loss": 0.5378, "step": 2000 }, { "epoch": 0.18778153153153154, "grad_norm": 1.4762120484894432, "learning_rate": 6.2558648733187364e-06, "loss": 0.5157, "step": 2001 }, { "epoch": 0.18787537537537538, "grad_norm": 1.702378041357641, "learning_rate": 6.2589928057553964e-06, "loss": 0.5041, "step": 2002 }, { "epoch": 0.18796921921921922, "grad_norm": 1.8789286227310595, "learning_rate": 6.262120738192056e-06, "loss": 0.5771, "step": 2003 }, { "epoch": 0.18806306306306306, "grad_norm": 1.9225301951005678, "learning_rate": 6.265248670628716e-06, "loss": 0.5133, "step": 2004 }, { "epoch": 0.1881569069069069, "grad_norm": 2.9915236226780078, "learning_rate": 6.268376603065375e-06, "loss": 0.509, "step": 2005 }, { "epoch": 0.18825075075075076, "grad_norm": 1.5576271653035292, "learning_rate": 6.271504535502033e-06, "loss": 0.533, "step": 2006 }, { "epoch": 0.1883445945945946, "grad_norm": 1.961138933990576, "learning_rate": 6.274632467938693e-06, "loss": 0.5378, "step": 2007 }, { "epoch": 0.18843843843843844, "grad_norm": 1.8890647705004995, "learning_rate": 6.277760400375352e-06, "loss": 0.5608, "step": 2008 }, { "epoch": 0.18853228228228228, "grad_norm": 1.8986853558525558, "learning_rate": 6.280888332812011e-06, "loss": 0.5294, "step": 2009 }, { "epoch": 0.18862612612612611, "grad_norm": 2.6160114970561352, "learning_rate": 6.2840162652486714e-06, "loss": 0.4989, "step": 2010 }, { "epoch": 0.18871996996996998, "grad_norm": 1.5670236162180788, "learning_rate": 6.287144197685331e-06, "loss": 0.5619, "step": 2011 }, { "epoch": 0.18881381381381382, "grad_norm": 1.6277427465425804, "learning_rate": 6.29027213012199e-06, "loss": 0.4674, "step": 2012 }, { "epoch": 0.18890765765765766, "grad_norm": 1.5320484835908086, "learning_rate": 6.293400062558649e-06, "loss": 0.4984, "step": 2013 }, { "epoch": 0.1890015015015015, "grad_norm": 2.365734172838379, "learning_rate": 6.296527994995308e-06, "loss": 0.5234, "step": 2014 }, { "epoch": 0.18909534534534533, "grad_norm": 1.9349802333675399, "learning_rate": 6.299655927431968e-06, "loss": 0.5097, "step": 2015 }, { "epoch": 0.1891891891891892, "grad_norm": 1.5976934690590592, "learning_rate": 6.302783859868627e-06, "loss": 0.5252, "step": 2016 }, { "epoch": 0.18928303303303304, "grad_norm": 1.7581797513619881, "learning_rate": 6.305911792305287e-06, "loss": 0.5617, "step": 2017 }, { "epoch": 0.18937687687687688, "grad_norm": 1.69624982700178, "learning_rate": 6.309039724741946e-06, "loss": 0.5229, "step": 2018 }, { "epoch": 0.18947072072072071, "grad_norm": 1.5868188352454737, "learning_rate": 6.312167657178605e-06, "loss": 0.5163, "step": 2019 }, { "epoch": 0.18956456456456455, "grad_norm": 1.657382354383611, "learning_rate": 6.315295589615265e-06, "loss": 0.5108, "step": 2020 }, { "epoch": 0.18965840840840842, "grad_norm": 1.6800025155905745, "learning_rate": 6.318423522051924e-06, "loss": 0.4671, "step": 2021 }, { "epoch": 0.18975225225225226, "grad_norm": 1.7775240951759927, "learning_rate": 6.321551454488584e-06, "loss": 0.5002, "step": 2022 }, { "epoch": 0.1898460960960961, "grad_norm": 1.8206476080512126, "learning_rate": 6.324679386925243e-06, "loss": 0.4883, "step": 2023 }, { "epoch": 0.18993993993993993, "grad_norm": 3.626110532577723, "learning_rate": 6.327807319361903e-06, "loss": 0.5757, "step": 2024 }, { "epoch": 0.19003378378378377, "grad_norm": 1.6623263318050117, "learning_rate": 6.330935251798561e-06, "loss": 0.4891, "step": 2025 }, { "epoch": 0.19012762762762764, "grad_norm": 2.8475918915816734, "learning_rate": 6.3340631842352205e-06, "loss": 0.4868, "step": 2026 }, { "epoch": 0.19022147147147148, "grad_norm": 1.8394022467372435, "learning_rate": 6.3371911166718805e-06, "loss": 0.4864, "step": 2027 }, { "epoch": 0.19031531531531531, "grad_norm": 1.7446926557396498, "learning_rate": 6.34031904910854e-06, "loss": 0.4897, "step": 2028 }, { "epoch": 0.19040915915915915, "grad_norm": 4.222263586909974, "learning_rate": 6.3434469815452e-06, "loss": 0.5383, "step": 2029 }, { "epoch": 0.190503003003003, "grad_norm": 2.5871092926813333, "learning_rate": 6.346574913981859e-06, "loss": 0.5572, "step": 2030 }, { "epoch": 0.19059684684684686, "grad_norm": 2.2239996513860745, "learning_rate": 6.349702846418518e-06, "loss": 0.545, "step": 2031 }, { "epoch": 0.1906906906906907, "grad_norm": 1.4490477156573585, "learning_rate": 6.352830778855177e-06, "loss": 0.5253, "step": 2032 }, { "epoch": 0.19078453453453453, "grad_norm": 1.7438109034030373, "learning_rate": 6.355958711291836e-06, "loss": 0.5334, "step": 2033 }, { "epoch": 0.19087837837837837, "grad_norm": 2.120614603122782, "learning_rate": 6.3590866437284955e-06, "loss": 0.5544, "step": 2034 }, { "epoch": 0.1909722222222222, "grad_norm": 1.6415683212669894, "learning_rate": 6.3622145761651555e-06, "loss": 0.5537, "step": 2035 }, { "epoch": 0.19106606606606608, "grad_norm": 1.6008715662039528, "learning_rate": 6.365342508601815e-06, "loss": 0.5517, "step": 2036 }, { "epoch": 0.19115990990990991, "grad_norm": 1.4607035774850796, "learning_rate": 6.368470441038475e-06, "loss": 0.5317, "step": 2037 }, { "epoch": 0.19125375375375375, "grad_norm": 1.66758594309565, "learning_rate": 6.371598373475133e-06, "loss": 0.5052, "step": 2038 }, { "epoch": 0.1913475975975976, "grad_norm": 2.139539767691819, "learning_rate": 6.374726305911792e-06, "loss": 0.4912, "step": 2039 }, { "epoch": 0.19144144144144143, "grad_norm": 2.045437174472956, "learning_rate": 6.377854238348452e-06, "loss": 0.5095, "step": 2040 }, { "epoch": 0.1915352852852853, "grad_norm": 1.5159201079566154, "learning_rate": 6.380982170785111e-06, "loss": 0.5383, "step": 2041 }, { "epoch": 0.19162912912912913, "grad_norm": 1.9553831095018825, "learning_rate": 6.384110103221771e-06, "loss": 0.6124, "step": 2042 }, { "epoch": 0.19172297297297297, "grad_norm": 1.9965241090120285, "learning_rate": 6.3872380356584305e-06, "loss": 0.4802, "step": 2043 }, { "epoch": 0.1918168168168168, "grad_norm": 1.9795559279879753, "learning_rate": 6.390365968095089e-06, "loss": 0.512, "step": 2044 }, { "epoch": 0.19191066066066065, "grad_norm": 1.938209997067974, "learning_rate": 6.393493900531749e-06, "loss": 0.5885, "step": 2045 }, { "epoch": 0.19200450450450451, "grad_norm": 1.6720379890008112, "learning_rate": 6.396621832968408e-06, "loss": 0.5026, "step": 2046 }, { "epoch": 0.19209834834834835, "grad_norm": 2.3010907449015847, "learning_rate": 6.399749765405068e-06, "loss": 0.5435, "step": 2047 }, { "epoch": 0.1921921921921922, "grad_norm": 1.685641533862163, "learning_rate": 6.402877697841727e-06, "loss": 0.498, "step": 2048 }, { "epoch": 0.19228603603603603, "grad_norm": 1.8590382338147209, "learning_rate": 6.406005630278387e-06, "loss": 0.6045, "step": 2049 }, { "epoch": 0.19237987987987987, "grad_norm": 2.7973123510892086, "learning_rate": 6.409133562715046e-06, "loss": 0.4824, "step": 2050 }, { "epoch": 0.19247372372372373, "grad_norm": 1.4737919109524364, "learning_rate": 6.412261495151705e-06, "loss": 0.5398, "step": 2051 }, { "epoch": 0.19256756756756757, "grad_norm": 1.9451878949036299, "learning_rate": 6.415389427588365e-06, "loss": 0.585, "step": 2052 }, { "epoch": 0.1926614114114114, "grad_norm": 1.938272872598762, "learning_rate": 6.418517360025024e-06, "loss": 0.5297, "step": 2053 }, { "epoch": 0.19275525525525525, "grad_norm": 4.525782425990771, "learning_rate": 6.421645292461684e-06, "loss": 0.4731, "step": 2054 }, { "epoch": 0.19284909909909909, "grad_norm": 2.28136285944662, "learning_rate": 6.424773224898343e-06, "loss": 0.5129, "step": 2055 }, { "epoch": 0.19294294294294295, "grad_norm": 1.8093712359073681, "learning_rate": 6.427901157335002e-06, "loss": 0.5787, "step": 2056 }, { "epoch": 0.1930367867867868, "grad_norm": 2.396580325371361, "learning_rate": 6.431029089771661e-06, "loss": 0.5757, "step": 2057 }, { "epoch": 0.19313063063063063, "grad_norm": 1.84665907644247, "learning_rate": 6.4341570222083205e-06, "loss": 0.5684, "step": 2058 }, { "epoch": 0.19322447447447447, "grad_norm": 1.6001305184205392, "learning_rate": 6.4372849546449805e-06, "loss": 0.503, "step": 2059 }, { "epoch": 0.1933183183183183, "grad_norm": 1.639462479775208, "learning_rate": 6.44041288708164e-06, "loss": 0.5377, "step": 2060 }, { "epoch": 0.19341216216216217, "grad_norm": 1.5900639767521796, "learning_rate": 6.443540819518299e-06, "loss": 0.5718, "step": 2061 }, { "epoch": 0.193506006006006, "grad_norm": 1.5523746364361126, "learning_rate": 6.446668751954959e-06, "loss": 0.4596, "step": 2062 }, { "epoch": 0.19359984984984985, "grad_norm": 1.450064925772331, "learning_rate": 6.449796684391618e-06, "loss": 0.491, "step": 2063 }, { "epoch": 0.19369369369369369, "grad_norm": 2.629231959526645, "learning_rate": 6.452924616828276e-06, "loss": 0.536, "step": 2064 }, { "epoch": 0.19378753753753752, "grad_norm": 1.4430720931570875, "learning_rate": 6.456052549264936e-06, "loss": 0.547, "step": 2065 }, { "epoch": 0.1938813813813814, "grad_norm": 1.459427709886434, "learning_rate": 6.4591804817015954e-06, "loss": 0.5214, "step": 2066 }, { "epoch": 0.19397522522522523, "grad_norm": 1.9046675926558887, "learning_rate": 6.4623084141382554e-06, "loss": 0.5664, "step": 2067 }, { "epoch": 0.19406906906906907, "grad_norm": 1.6886754685699374, "learning_rate": 6.465436346574915e-06, "loss": 0.5254, "step": 2068 }, { "epoch": 0.1941629129129129, "grad_norm": 1.5197012449141063, "learning_rate": 6.468564279011575e-06, "loss": 0.4876, "step": 2069 }, { "epoch": 0.19425675675675674, "grad_norm": 1.7168445537208417, "learning_rate": 6.471692211448233e-06, "loss": 0.5298, "step": 2070 }, { "epoch": 0.1943506006006006, "grad_norm": 1.706599810615238, "learning_rate": 6.474820143884892e-06, "loss": 0.5602, "step": 2071 }, { "epoch": 0.19444444444444445, "grad_norm": 1.8434463118828404, "learning_rate": 6.477948076321552e-06, "loss": 0.5055, "step": 2072 }, { "epoch": 0.19453828828828829, "grad_norm": 1.580720874713507, "learning_rate": 6.481076008758211e-06, "loss": 0.4844, "step": 2073 }, { "epoch": 0.19463213213213212, "grad_norm": 1.7889327819928187, "learning_rate": 6.484203941194871e-06, "loss": 0.5214, "step": 2074 }, { "epoch": 0.19472597597597596, "grad_norm": 1.6644547651406554, "learning_rate": 6.48733187363153e-06, "loss": 0.4961, "step": 2075 }, { "epoch": 0.19481981981981983, "grad_norm": 1.8492278274953848, "learning_rate": 6.490459806068189e-06, "loss": 0.5734, "step": 2076 }, { "epoch": 0.19491366366366367, "grad_norm": 2.1270901211865056, "learning_rate": 6.493587738504849e-06, "loss": 0.5702, "step": 2077 }, { "epoch": 0.1950075075075075, "grad_norm": 1.9844457860936089, "learning_rate": 6.496715670941508e-06, "loss": 0.4892, "step": 2078 }, { "epoch": 0.19510135135135134, "grad_norm": 1.9700955884479574, "learning_rate": 6.499843603378168e-06, "loss": 0.5226, "step": 2079 }, { "epoch": 0.19519519519519518, "grad_norm": 1.876437710990519, "learning_rate": 6.502971535814827e-06, "loss": 0.5404, "step": 2080 }, { "epoch": 0.19528903903903905, "grad_norm": 1.7737358670726027, "learning_rate": 6.506099468251486e-06, "loss": 0.494, "step": 2081 }, { "epoch": 0.19538288288288289, "grad_norm": 2.0598123877840053, "learning_rate": 6.509227400688146e-06, "loss": 0.5189, "step": 2082 }, { "epoch": 0.19547672672672672, "grad_norm": 1.5442524562485909, "learning_rate": 6.5123553331248046e-06, "loss": 0.4783, "step": 2083 }, { "epoch": 0.19557057057057056, "grad_norm": 1.7017436941883386, "learning_rate": 6.5154832655614646e-06, "loss": 0.5124, "step": 2084 }, { "epoch": 0.19566441441441443, "grad_norm": 2.2230385620573587, "learning_rate": 6.518611197998124e-06, "loss": 0.5026, "step": 2085 }, { "epoch": 0.19575825825825827, "grad_norm": 2.3588041161573896, "learning_rate": 6.521739130434783e-06, "loss": 0.5388, "step": 2086 }, { "epoch": 0.1958521021021021, "grad_norm": 1.6044088757089678, "learning_rate": 6.524867062871443e-06, "loss": 0.52, "step": 2087 }, { "epoch": 0.19594594594594594, "grad_norm": 7.526732246905775, "learning_rate": 6.527994995308102e-06, "loss": 0.5114, "step": 2088 }, { "epoch": 0.19603978978978978, "grad_norm": 1.8503246198795602, "learning_rate": 6.53112292774476e-06, "loss": 0.5384, "step": 2089 }, { "epoch": 0.19613363363363365, "grad_norm": 2.5744131065585902, "learning_rate": 6.53425086018142e-06, "loss": 0.5527, "step": 2090 }, { "epoch": 0.19622747747747749, "grad_norm": 3.999779041488698, "learning_rate": 6.5373787926180795e-06, "loss": 0.5895, "step": 2091 }, { "epoch": 0.19632132132132132, "grad_norm": 1.984122637374993, "learning_rate": 6.5405067250547395e-06, "loss": 0.5504, "step": 2092 }, { "epoch": 0.19641516516516516, "grad_norm": 2.6017298212320243, "learning_rate": 6.543634657491399e-06, "loss": 0.4854, "step": 2093 }, { "epoch": 0.196509009009009, "grad_norm": 1.888988042970257, "learning_rate": 6.546762589928059e-06, "loss": 0.5722, "step": 2094 }, { "epoch": 0.19660285285285287, "grad_norm": 1.7328377590582875, "learning_rate": 6.549890522364718e-06, "loss": 0.5162, "step": 2095 }, { "epoch": 0.1966966966966967, "grad_norm": 2.225849212451876, "learning_rate": 6.553018454801376e-06, "loss": 0.5195, "step": 2096 }, { "epoch": 0.19679054054054054, "grad_norm": 1.62429619159361, "learning_rate": 6.556146387238036e-06, "loss": 0.5188, "step": 2097 }, { "epoch": 0.19688438438438438, "grad_norm": 1.727479967188683, "learning_rate": 6.559274319674695e-06, "loss": 0.5193, "step": 2098 }, { "epoch": 0.19697822822822822, "grad_norm": 1.657140551403681, "learning_rate": 6.562402252111355e-06, "loss": 0.4972, "step": 2099 }, { "epoch": 0.19707207207207209, "grad_norm": 1.5937302214818267, "learning_rate": 6.5655301845480145e-06, "loss": 0.4516, "step": 2100 }, { "epoch": 0.19716591591591592, "grad_norm": 1.6201578827245413, "learning_rate": 6.5686581169846745e-06, "loss": 0.5448, "step": 2101 }, { "epoch": 0.19725975975975976, "grad_norm": 2.1598725776569907, "learning_rate": 6.571786049421333e-06, "loss": 0.5436, "step": 2102 }, { "epoch": 0.1973536036036036, "grad_norm": 1.7726786496257207, "learning_rate": 6.574913981857992e-06, "loss": 0.5726, "step": 2103 }, { "epoch": 0.19744744744744744, "grad_norm": 2.047289033461762, "learning_rate": 6.578041914294652e-06, "loss": 0.5226, "step": 2104 }, { "epoch": 0.1975412912912913, "grad_norm": 2.8397117499621167, "learning_rate": 6.581169846731311e-06, "loss": 0.457, "step": 2105 }, { "epoch": 0.19763513513513514, "grad_norm": 1.6278804277888443, "learning_rate": 6.58429777916797e-06, "loss": 0.5465, "step": 2106 }, { "epoch": 0.19772897897897898, "grad_norm": 1.6814337986434802, "learning_rate": 6.58742571160463e-06, "loss": 0.5433, "step": 2107 }, { "epoch": 0.19782282282282282, "grad_norm": 1.7613713110648217, "learning_rate": 6.590553644041289e-06, "loss": 0.5783, "step": 2108 }, { "epoch": 0.19791666666666666, "grad_norm": 1.9964140845073661, "learning_rate": 6.593681576477949e-06, "loss": 0.5902, "step": 2109 }, { "epoch": 0.19801051051051052, "grad_norm": 2.3077760070304394, "learning_rate": 6.596809508914608e-06, "loss": 0.5358, "step": 2110 }, { "epoch": 0.19810435435435436, "grad_norm": 2.0406498343556914, "learning_rate": 6.599937441351267e-06, "loss": 0.4817, "step": 2111 }, { "epoch": 0.1981981981981982, "grad_norm": 1.4984614114468173, "learning_rate": 6.603065373787927e-06, "loss": 0.5328, "step": 2112 }, { "epoch": 0.19829204204204204, "grad_norm": 1.6222319542083536, "learning_rate": 6.606193306224586e-06, "loss": 0.5356, "step": 2113 }, { "epoch": 0.19838588588588588, "grad_norm": 2.1738889891767, "learning_rate": 6.609321238661246e-06, "loss": 0.5731, "step": 2114 }, { "epoch": 0.19847972972972974, "grad_norm": 1.5407143014189868, "learning_rate": 6.6124491710979045e-06, "loss": 0.5594, "step": 2115 }, { "epoch": 0.19857357357357358, "grad_norm": 2.12206511351053, "learning_rate": 6.615577103534564e-06, "loss": 0.5909, "step": 2116 }, { "epoch": 0.19866741741741742, "grad_norm": 1.7450764275926005, "learning_rate": 6.618705035971224e-06, "loss": 0.5586, "step": 2117 }, { "epoch": 0.19876126126126126, "grad_norm": 1.9022719872281968, "learning_rate": 6.621832968407883e-06, "loss": 0.5259, "step": 2118 }, { "epoch": 0.1988551051051051, "grad_norm": 2.1454917295311104, "learning_rate": 6.624960900844543e-06, "loss": 0.5109, "step": 2119 }, { "epoch": 0.19894894894894896, "grad_norm": 1.769299297961439, "learning_rate": 6.628088833281202e-06, "loss": 0.4849, "step": 2120 }, { "epoch": 0.1990427927927928, "grad_norm": 2.196052237764968, "learning_rate": 6.63121676571786e-06, "loss": 0.5276, "step": 2121 }, { "epoch": 0.19913663663663664, "grad_norm": 1.5572111439207263, "learning_rate": 6.63434469815452e-06, "loss": 0.5119, "step": 2122 }, { "epoch": 0.19923048048048048, "grad_norm": 1.5218516753084668, "learning_rate": 6.6374726305911795e-06, "loss": 0.4655, "step": 2123 }, { "epoch": 0.19932432432432431, "grad_norm": 1.921139701473248, "learning_rate": 6.6406005630278395e-06, "loss": 0.5434, "step": 2124 }, { "epoch": 0.19941816816816818, "grad_norm": 1.695625401833198, "learning_rate": 6.643728495464499e-06, "loss": 0.5268, "step": 2125 }, { "epoch": 0.19951201201201202, "grad_norm": 1.8686806443217312, "learning_rate": 6.646856427901159e-06, "loss": 0.4998, "step": 2126 }, { "epoch": 0.19960585585585586, "grad_norm": 1.6259538790178953, "learning_rate": 6.649984360337818e-06, "loss": 0.6018, "step": 2127 }, { "epoch": 0.1996996996996997, "grad_norm": 1.7819323562077105, "learning_rate": 6.653112292774476e-06, "loss": 0.5447, "step": 2128 }, { "epoch": 0.19979354354354353, "grad_norm": 1.7470377430089847, "learning_rate": 6.656240225211136e-06, "loss": 0.5688, "step": 2129 }, { "epoch": 0.1998873873873874, "grad_norm": 1.6853739233872571, "learning_rate": 6.659368157647795e-06, "loss": 0.4712, "step": 2130 }, { "epoch": 0.19998123123123124, "grad_norm": 1.918845295396769, "learning_rate": 6.662496090084455e-06, "loss": 0.5267, "step": 2131 }, { "epoch": 0.20007507507507508, "grad_norm": 2.3152320269087747, "learning_rate": 6.6656240225211144e-06, "loss": 0.5709, "step": 2132 }, { "epoch": 0.20016891891891891, "grad_norm": 1.6328989485080234, "learning_rate": 6.668751954957774e-06, "loss": 0.5388, "step": 2133 }, { "epoch": 0.20026276276276275, "grad_norm": 2.843620483727878, "learning_rate": 6.671879887394433e-06, "loss": 0.583, "step": 2134 }, { "epoch": 0.20035660660660662, "grad_norm": 1.6396810653948155, "learning_rate": 6.675007819831092e-06, "loss": 0.5105, "step": 2135 }, { "epoch": 0.20045045045045046, "grad_norm": 1.6639021589862413, "learning_rate": 6.678135752267751e-06, "loss": 0.517, "step": 2136 }, { "epoch": 0.2005442942942943, "grad_norm": 2.0018000535050833, "learning_rate": 6.681263684704411e-06, "loss": 0.5073, "step": 2137 }, { "epoch": 0.20063813813813813, "grad_norm": 1.9333055037937275, "learning_rate": 6.68439161714107e-06, "loss": 0.5522, "step": 2138 }, { "epoch": 0.20073198198198197, "grad_norm": 1.7128788494045768, "learning_rate": 6.68751954957773e-06, "loss": 0.5435, "step": 2139 }, { "epoch": 0.20082582582582584, "grad_norm": 1.7855934573189898, "learning_rate": 6.6906474820143886e-06, "loss": 0.5304, "step": 2140 }, { "epoch": 0.20091966966966968, "grad_norm": 1.5886900081642603, "learning_rate": 6.693775414451048e-06, "loss": 0.5369, "step": 2141 }, { "epoch": 0.20101351351351351, "grad_norm": 3.523418493761438, "learning_rate": 6.696903346887708e-06, "loss": 0.5335, "step": 2142 }, { "epoch": 0.20110735735735735, "grad_norm": 3.6602889319111753, "learning_rate": 6.700031279324367e-06, "loss": 0.4787, "step": 2143 }, { "epoch": 0.2012012012012012, "grad_norm": 1.7929914010316785, "learning_rate": 6.703159211761027e-06, "loss": 0.5007, "step": 2144 }, { "epoch": 0.20129504504504506, "grad_norm": 1.8870782667230632, "learning_rate": 6.706287144197686e-06, "loss": 0.5225, "step": 2145 }, { "epoch": 0.2013888888888889, "grad_norm": 1.7445826172754124, "learning_rate": 6.709415076634346e-06, "loss": 0.5153, "step": 2146 }, { "epoch": 0.20148273273273273, "grad_norm": 2.7678779428936013, "learning_rate": 6.712543009071004e-06, "loss": 0.5598, "step": 2147 }, { "epoch": 0.20157657657657657, "grad_norm": 1.9539898486130636, "learning_rate": 6.7156709415076636e-06, "loss": 0.5229, "step": 2148 }, { "epoch": 0.2016704204204204, "grad_norm": 1.7791882404312604, "learning_rate": 6.7187988739443236e-06, "loss": 0.54, "step": 2149 }, { "epoch": 0.20176426426426428, "grad_norm": 2.9246035784002022, "learning_rate": 6.721926806380983e-06, "loss": 0.5339, "step": 2150 }, { "epoch": 0.20185810810810811, "grad_norm": 1.6905056074641744, "learning_rate": 6.725054738817643e-06, "loss": 0.5341, "step": 2151 }, { "epoch": 0.20195195195195195, "grad_norm": 1.7210294322054138, "learning_rate": 6.728182671254302e-06, "loss": 0.5734, "step": 2152 }, { "epoch": 0.2020457957957958, "grad_norm": 1.7439996403091207, "learning_rate": 6.73131060369096e-06, "loss": 0.5644, "step": 2153 }, { "epoch": 0.20213963963963963, "grad_norm": 1.828748975823045, "learning_rate": 6.73443853612762e-06, "loss": 0.527, "step": 2154 }, { "epoch": 0.2022334834834835, "grad_norm": 1.5165712403614395, "learning_rate": 6.737566468564279e-06, "loss": 0.4831, "step": 2155 }, { "epoch": 0.20232732732732733, "grad_norm": 1.7822352680429543, "learning_rate": 6.740694401000939e-06, "loss": 0.5608, "step": 2156 }, { "epoch": 0.20242117117117117, "grad_norm": 1.8573091838239137, "learning_rate": 6.7438223334375985e-06, "loss": 0.516, "step": 2157 }, { "epoch": 0.202515015015015, "grad_norm": 2.4686504908319065, "learning_rate": 6.746950265874258e-06, "loss": 0.506, "step": 2158 }, { "epoch": 0.20260885885885885, "grad_norm": 2.2443224848253074, "learning_rate": 6.750078198310917e-06, "loss": 0.5477, "step": 2159 }, { "epoch": 0.20270270270270271, "grad_norm": 2.1081748034598453, "learning_rate": 6.753206130747576e-06, "loss": 0.5386, "step": 2160 }, { "epoch": 0.20279654654654655, "grad_norm": 2.165700705753106, "learning_rate": 6.756334063184235e-06, "loss": 0.4657, "step": 2161 }, { "epoch": 0.2028903903903904, "grad_norm": 1.6265434762631752, "learning_rate": 6.759461995620895e-06, "loss": 0.5035, "step": 2162 }, { "epoch": 0.20298423423423423, "grad_norm": 1.4977703474809199, "learning_rate": 6.762589928057554e-06, "loss": 0.5345, "step": 2163 }, { "epoch": 0.20307807807807807, "grad_norm": 1.8913166344370198, "learning_rate": 6.765717860494214e-06, "loss": 0.5218, "step": 2164 }, { "epoch": 0.20317192192192193, "grad_norm": 1.6737160720205748, "learning_rate": 6.7688457929308735e-06, "loss": 0.5226, "step": 2165 }, { "epoch": 0.20326576576576577, "grad_norm": 2.1693379644748725, "learning_rate": 6.771973725367532e-06, "loss": 0.5016, "step": 2166 }, { "epoch": 0.2033596096096096, "grad_norm": 1.4064545579607277, "learning_rate": 6.775101657804192e-06, "loss": 0.4804, "step": 2167 }, { "epoch": 0.20345345345345345, "grad_norm": 1.56439149287625, "learning_rate": 6.778229590240851e-06, "loss": 0.4917, "step": 2168 }, { "epoch": 0.20354729729729729, "grad_norm": 1.5872821424670833, "learning_rate": 6.781357522677511e-06, "loss": 0.5313, "step": 2169 }, { "epoch": 0.20364114114114115, "grad_norm": 1.5151400604271934, "learning_rate": 6.78448545511417e-06, "loss": 0.5788, "step": 2170 }, { "epoch": 0.203734984984985, "grad_norm": 2.3559349045827833, "learning_rate": 6.78761338755083e-06, "loss": 0.5608, "step": 2171 }, { "epoch": 0.20382882882882883, "grad_norm": 2.066345234452218, "learning_rate": 6.7907413199874885e-06, "loss": 0.4923, "step": 2172 }, { "epoch": 0.20392267267267267, "grad_norm": 1.9484705710618302, "learning_rate": 6.793869252424148e-06, "loss": 0.5655, "step": 2173 }, { "epoch": 0.2040165165165165, "grad_norm": 1.513604142012602, "learning_rate": 6.796997184860808e-06, "loss": 0.5273, "step": 2174 }, { "epoch": 0.20411036036036037, "grad_norm": 1.4892411083352328, "learning_rate": 6.800125117297467e-06, "loss": 0.5082, "step": 2175 }, { "epoch": 0.2042042042042042, "grad_norm": 2.0215894669201337, "learning_rate": 6.803253049734127e-06, "loss": 0.4918, "step": 2176 }, { "epoch": 0.20429804804804805, "grad_norm": 1.610907545729977, "learning_rate": 6.806380982170786e-06, "loss": 0.5309, "step": 2177 }, { "epoch": 0.20439189189189189, "grad_norm": 2.196684565207243, "learning_rate": 6.809508914607445e-06, "loss": 0.5631, "step": 2178 }, { "epoch": 0.20448573573573572, "grad_norm": 1.6962735812801255, "learning_rate": 6.812636847044104e-06, "loss": 0.5339, "step": 2179 }, { "epoch": 0.2045795795795796, "grad_norm": 1.6625846953155388, "learning_rate": 6.8157647794807635e-06, "loss": 0.4738, "step": 2180 }, { "epoch": 0.20467342342342343, "grad_norm": 3.0585516200106477, "learning_rate": 6.8188927119174235e-06, "loss": 0.5155, "step": 2181 }, { "epoch": 0.20476726726726727, "grad_norm": 1.9263401755986935, "learning_rate": 6.822020644354083e-06, "loss": 0.5457, "step": 2182 }, { "epoch": 0.2048611111111111, "grad_norm": 1.712327435079304, "learning_rate": 6.825148576790742e-06, "loss": 0.548, "step": 2183 }, { "epoch": 0.20495495495495494, "grad_norm": 1.8473808588201006, "learning_rate": 6.828276509227402e-06, "loss": 0.448, "step": 2184 }, { "epoch": 0.2050487987987988, "grad_norm": 1.5374528564754995, "learning_rate": 6.83140444166406e-06, "loss": 0.5071, "step": 2185 }, { "epoch": 0.20514264264264265, "grad_norm": 4.280961485978255, "learning_rate": 6.834532374100719e-06, "loss": 0.5303, "step": 2186 }, { "epoch": 0.20523648648648649, "grad_norm": 2.206879776686726, "learning_rate": 6.837660306537379e-06, "loss": 0.5601, "step": 2187 }, { "epoch": 0.20533033033033032, "grad_norm": 1.8673213620604396, "learning_rate": 6.8407882389740385e-06, "loss": 0.5702, "step": 2188 }, { "epoch": 0.20542417417417416, "grad_norm": 1.6723971533012456, "learning_rate": 6.8439161714106985e-06, "loss": 0.4778, "step": 2189 }, { "epoch": 0.20551801801801803, "grad_norm": 2.5964287362080074, "learning_rate": 6.847044103847358e-06, "loss": 0.564, "step": 2190 }, { "epoch": 0.20561186186186187, "grad_norm": 1.7118758821872564, "learning_rate": 6.850172036284016e-06, "loss": 0.5245, "step": 2191 }, { "epoch": 0.2057057057057057, "grad_norm": 1.9728545387715408, "learning_rate": 6.853299968720676e-06, "loss": 0.5929, "step": 2192 }, { "epoch": 0.20579954954954954, "grad_norm": 1.619258344086149, "learning_rate": 6.856427901157335e-06, "loss": 0.5124, "step": 2193 }, { "epoch": 0.20589339339339338, "grad_norm": 3.4517920163463747, "learning_rate": 6.859555833593995e-06, "loss": 0.4894, "step": 2194 }, { "epoch": 0.20598723723723725, "grad_norm": 1.7195840897291887, "learning_rate": 6.862683766030654e-06, "loss": 0.5379, "step": 2195 }, { "epoch": 0.20608108108108109, "grad_norm": 1.961170472617962, "learning_rate": 6.865811698467314e-06, "loss": 0.5388, "step": 2196 }, { "epoch": 0.20617492492492492, "grad_norm": 1.7018462862361747, "learning_rate": 6.8689396309039734e-06, "loss": 0.5096, "step": 2197 }, { "epoch": 0.20626876876876876, "grad_norm": 2.1372475450233557, "learning_rate": 6.872067563340632e-06, "loss": 0.5358, "step": 2198 }, { "epoch": 0.2063626126126126, "grad_norm": 1.8684372616293623, "learning_rate": 6.875195495777292e-06, "loss": 0.5481, "step": 2199 }, { "epoch": 0.20645645645645647, "grad_norm": 1.6161139099030415, "learning_rate": 6.878323428213951e-06, "loss": 0.5002, "step": 2200 }, { "epoch": 0.2065503003003003, "grad_norm": 3.498859781022698, "learning_rate": 6.881451360650611e-06, "loss": 0.5683, "step": 2201 }, { "epoch": 0.20664414414414414, "grad_norm": 1.8330090111591137, "learning_rate": 6.88457929308727e-06, "loss": 0.5465, "step": 2202 }, { "epoch": 0.20673798798798798, "grad_norm": 1.9194332138831145, "learning_rate": 6.88770722552393e-06, "loss": 0.5877, "step": 2203 }, { "epoch": 0.20683183183183182, "grad_norm": 2.2145892010862447, "learning_rate": 6.890835157960588e-06, "loss": 0.4968, "step": 2204 }, { "epoch": 0.20692567567567569, "grad_norm": 1.5686396781299108, "learning_rate": 6.8939630903972476e-06, "loss": 0.5296, "step": 2205 }, { "epoch": 0.20701951951951952, "grad_norm": 1.465592568061966, "learning_rate": 6.897091022833908e-06, "loss": 0.5222, "step": 2206 }, { "epoch": 0.20711336336336336, "grad_norm": 1.976125983177005, "learning_rate": 6.900218955270567e-06, "loss": 0.4834, "step": 2207 }, { "epoch": 0.2072072072072072, "grad_norm": 1.5916611973854893, "learning_rate": 6.903346887707226e-06, "loss": 0.5357, "step": 2208 }, { "epoch": 0.20730105105105104, "grad_norm": 2.1065414904592217, "learning_rate": 6.906474820143886e-06, "loss": 0.5396, "step": 2209 }, { "epoch": 0.2073948948948949, "grad_norm": 1.7281565634751803, "learning_rate": 6.909602752580545e-06, "loss": 0.5573, "step": 2210 }, { "epoch": 0.20748873873873874, "grad_norm": 1.5216969817275634, "learning_rate": 6.912730685017203e-06, "loss": 0.5079, "step": 2211 }, { "epoch": 0.20758258258258258, "grad_norm": 1.9547881174776474, "learning_rate": 6.915858617453863e-06, "loss": 0.5414, "step": 2212 }, { "epoch": 0.20767642642642642, "grad_norm": 1.5979834662523267, "learning_rate": 6.9189865498905226e-06, "loss": 0.5046, "step": 2213 }, { "epoch": 0.20777027027027026, "grad_norm": 1.7247001056856972, "learning_rate": 6.9221144823271826e-06, "loss": 0.5288, "step": 2214 }, { "epoch": 0.20786411411411412, "grad_norm": 1.7441219751907366, "learning_rate": 6.925242414763842e-06, "loss": 0.5154, "step": 2215 }, { "epoch": 0.20795795795795796, "grad_norm": 1.916367449861654, "learning_rate": 6.928370347200502e-06, "loss": 0.6088, "step": 2216 }, { "epoch": 0.2080518018018018, "grad_norm": 2.0844461851362137, "learning_rate": 6.93149827963716e-06, "loss": 0.5827, "step": 2217 }, { "epoch": 0.20814564564564564, "grad_norm": 1.7101875619235751, "learning_rate": 6.934626212073819e-06, "loss": 0.4789, "step": 2218 }, { "epoch": 0.20823948948948948, "grad_norm": 1.4568801753083211, "learning_rate": 6.937754144510479e-06, "loss": 0.5179, "step": 2219 }, { "epoch": 0.20833333333333334, "grad_norm": 2.554621317318509, "learning_rate": 6.940882076947138e-06, "loss": 0.5538, "step": 2220 }, { "epoch": 0.20842717717717718, "grad_norm": 6.298390446200674, "learning_rate": 6.944010009383798e-06, "loss": 0.5319, "step": 2221 }, { "epoch": 0.20852102102102102, "grad_norm": 1.7942550659188137, "learning_rate": 6.9471379418204575e-06, "loss": 0.5244, "step": 2222 }, { "epoch": 0.20861486486486486, "grad_norm": 2.0817173193072684, "learning_rate": 6.950265874257116e-06, "loss": 0.5745, "step": 2223 }, { "epoch": 0.2087087087087087, "grad_norm": 1.9287418630431912, "learning_rate": 6.953393806693776e-06, "loss": 0.5507, "step": 2224 }, { "epoch": 0.20880255255255256, "grad_norm": 4.167725015139605, "learning_rate": 6.956521739130435e-06, "loss": 0.5229, "step": 2225 }, { "epoch": 0.2088963963963964, "grad_norm": 2.0242274417953423, "learning_rate": 6.959649671567095e-06, "loss": 0.5384, "step": 2226 }, { "epoch": 0.20899024024024024, "grad_norm": 1.5579961226737244, "learning_rate": 6.962777604003754e-06, "loss": 0.5347, "step": 2227 }, { "epoch": 0.20908408408408408, "grad_norm": 1.590025991356571, "learning_rate": 6.965905536440414e-06, "loss": 0.4287, "step": 2228 }, { "epoch": 0.20917792792792791, "grad_norm": 1.6782491125861654, "learning_rate": 6.969033468877073e-06, "loss": 0.5442, "step": 2229 }, { "epoch": 0.20927177177177178, "grad_norm": 3.0062230237260756, "learning_rate": 6.972161401313732e-06, "loss": 0.5184, "step": 2230 }, { "epoch": 0.20936561561561562, "grad_norm": 1.5036237152033254, "learning_rate": 6.975289333750392e-06, "loss": 0.5445, "step": 2231 }, { "epoch": 0.20945945945945946, "grad_norm": 2.1595633265404146, "learning_rate": 6.978417266187051e-06, "loss": 0.4738, "step": 2232 }, { "epoch": 0.2095533033033033, "grad_norm": 2.014326315822599, "learning_rate": 6.98154519862371e-06, "loss": 0.5466, "step": 2233 }, { "epoch": 0.20964714714714713, "grad_norm": 1.7349941560389084, "learning_rate": 6.98467313106037e-06, "loss": 0.4989, "step": 2234 }, { "epoch": 0.209740990990991, "grad_norm": 1.9988652099133521, "learning_rate": 6.987801063497029e-06, "loss": 0.5578, "step": 2235 }, { "epoch": 0.20983483483483484, "grad_norm": 1.6306833385817945, "learning_rate": 6.9909289959336875e-06, "loss": 0.5239, "step": 2236 }, { "epoch": 0.20992867867867868, "grad_norm": 2.3590948081549423, "learning_rate": 6.9940569283703475e-06, "loss": 0.5102, "step": 2237 }, { "epoch": 0.21002252252252251, "grad_norm": 1.5457420052809117, "learning_rate": 6.997184860807007e-06, "loss": 0.491, "step": 2238 }, { "epoch": 0.21011636636636635, "grad_norm": 1.4339287468830164, "learning_rate": 7.000312793243667e-06, "loss": 0.5019, "step": 2239 }, { "epoch": 0.21021021021021022, "grad_norm": 1.8067110229799572, "learning_rate": 7.003440725680326e-06, "loss": 0.5486, "step": 2240 }, { "epoch": 0.21030405405405406, "grad_norm": 1.6180660859964326, "learning_rate": 7.006568658116986e-06, "loss": 0.5447, "step": 2241 }, { "epoch": 0.2103978978978979, "grad_norm": 1.6014481262991551, "learning_rate": 7.009696590553645e-06, "loss": 0.5246, "step": 2242 }, { "epoch": 0.21049174174174173, "grad_norm": 1.4169976830022215, "learning_rate": 7.012824522990303e-06, "loss": 0.4833, "step": 2243 }, { "epoch": 0.21058558558558557, "grad_norm": 3.5736047855938105, "learning_rate": 7.015952455426963e-06, "loss": 0.4864, "step": 2244 }, { "epoch": 0.21067942942942944, "grad_norm": 1.879157380853568, "learning_rate": 7.0190803878636225e-06, "loss": 0.4725, "step": 2245 }, { "epoch": 0.21077327327327328, "grad_norm": 1.8891036185634895, "learning_rate": 7.0222083203002825e-06, "loss": 0.521, "step": 2246 }, { "epoch": 0.21086711711711711, "grad_norm": 1.6351876538233097, "learning_rate": 7.025336252736942e-06, "loss": 0.4946, "step": 2247 }, { "epoch": 0.21096096096096095, "grad_norm": 2.7775555559389242, "learning_rate": 7.028464185173602e-06, "loss": 0.532, "step": 2248 }, { "epoch": 0.21105480480480482, "grad_norm": 1.5998374123754966, "learning_rate": 7.03159211761026e-06, "loss": 0.4841, "step": 2249 }, { "epoch": 0.21114864864864866, "grad_norm": 1.74706698428869, "learning_rate": 7.034720050046919e-06, "loss": 0.5768, "step": 2250 }, { "epoch": 0.2112424924924925, "grad_norm": 1.5794707214753045, "learning_rate": 7.037847982483579e-06, "loss": 0.5724, "step": 2251 }, { "epoch": 0.21133633633633633, "grad_norm": 1.5394482036083672, "learning_rate": 7.040975914920238e-06, "loss": 0.4879, "step": 2252 }, { "epoch": 0.21143018018018017, "grad_norm": 1.4868706634454623, "learning_rate": 7.044103847356898e-06, "loss": 0.5175, "step": 2253 }, { "epoch": 0.21152402402402404, "grad_norm": 2.2084114722749404, "learning_rate": 7.0472317797935575e-06, "loss": 0.4812, "step": 2254 }, { "epoch": 0.21161786786786788, "grad_norm": 1.9504800479941773, "learning_rate": 7.050359712230216e-06, "loss": 0.5385, "step": 2255 }, { "epoch": 0.21171171171171171, "grad_norm": 1.7833964204307369, "learning_rate": 7.053487644666876e-06, "loss": 0.5678, "step": 2256 }, { "epoch": 0.21180555555555555, "grad_norm": 1.4615229060535537, "learning_rate": 7.056615577103535e-06, "loss": 0.5417, "step": 2257 }, { "epoch": 0.2118993993993994, "grad_norm": 1.4826902450892498, "learning_rate": 7.059743509540194e-06, "loss": 0.5827, "step": 2258 }, { "epoch": 0.21199324324324326, "grad_norm": 1.5822471236137226, "learning_rate": 7.062871441976854e-06, "loss": 0.5261, "step": 2259 }, { "epoch": 0.2120870870870871, "grad_norm": 1.5193892003863958, "learning_rate": 7.065999374413513e-06, "loss": 0.553, "step": 2260 }, { "epoch": 0.21218093093093093, "grad_norm": 1.552976937631925, "learning_rate": 7.069127306850173e-06, "loss": 0.4887, "step": 2261 }, { "epoch": 0.21227477477477477, "grad_norm": 2.0386231407800555, "learning_rate": 7.072255239286832e-06, "loss": 0.5665, "step": 2262 }, { "epoch": 0.2123686186186186, "grad_norm": 2.0999290622308875, "learning_rate": 7.075383171723491e-06, "loss": 0.5764, "step": 2263 }, { "epoch": 0.21246246246246248, "grad_norm": 1.9663232197722784, "learning_rate": 7.078511104160151e-06, "loss": 0.5745, "step": 2264 }, { "epoch": 0.21255630630630631, "grad_norm": 1.60059957359306, "learning_rate": 7.08163903659681e-06, "loss": 0.4914, "step": 2265 }, { "epoch": 0.21265015015015015, "grad_norm": 1.69189282836606, "learning_rate": 7.08476696903347e-06, "loss": 0.501, "step": 2266 }, { "epoch": 0.212743993993994, "grad_norm": 2.0136261051717192, "learning_rate": 7.087894901470129e-06, "loss": 0.5278, "step": 2267 }, { "epoch": 0.21283783783783783, "grad_norm": 2.3970492730839723, "learning_rate": 7.091022833906787e-06, "loss": 0.5844, "step": 2268 }, { "epoch": 0.2129316816816817, "grad_norm": 1.474863151098819, "learning_rate": 7.094150766343447e-06, "loss": 0.5393, "step": 2269 }, { "epoch": 0.21302552552552553, "grad_norm": 1.5865797601898055, "learning_rate": 7.0972786987801066e-06, "loss": 0.4988, "step": 2270 }, { "epoch": 0.21311936936936937, "grad_norm": 1.9419335783765337, "learning_rate": 7.100406631216767e-06, "loss": 0.5244, "step": 2271 }, { "epoch": 0.2132132132132132, "grad_norm": 1.3948475031218144, "learning_rate": 7.103534563653426e-06, "loss": 0.4662, "step": 2272 }, { "epoch": 0.21330705705705705, "grad_norm": 1.5999842848807078, "learning_rate": 7.106662496090086e-06, "loss": 0.5147, "step": 2273 }, { "epoch": 0.21340090090090091, "grad_norm": 1.599845656489736, "learning_rate": 7.109790428526745e-06, "loss": 0.4946, "step": 2274 }, { "epoch": 0.21349474474474475, "grad_norm": 1.7699030977895398, "learning_rate": 7.112918360963403e-06, "loss": 0.5338, "step": 2275 }, { "epoch": 0.2135885885885886, "grad_norm": 1.6790857382346793, "learning_rate": 7.116046293400063e-06, "loss": 0.5446, "step": 2276 }, { "epoch": 0.21368243243243243, "grad_norm": 1.7868440665887484, "learning_rate": 7.119174225836722e-06, "loss": 0.5774, "step": 2277 }, { "epoch": 0.21377627627627627, "grad_norm": 1.446049369965634, "learning_rate": 7.122302158273382e-06, "loss": 0.5383, "step": 2278 }, { "epoch": 0.21387012012012013, "grad_norm": 1.4589380850003069, "learning_rate": 7.1254300907100416e-06, "loss": 0.5324, "step": 2279 }, { "epoch": 0.21396396396396397, "grad_norm": 1.4239145441814252, "learning_rate": 7.128558023146701e-06, "loss": 0.49, "step": 2280 }, { "epoch": 0.2140578078078078, "grad_norm": 1.4248529535028602, "learning_rate": 7.13168595558336e-06, "loss": 0.4931, "step": 2281 }, { "epoch": 0.21415165165165165, "grad_norm": 1.7597803949163384, "learning_rate": 7.134813888020019e-06, "loss": 0.5306, "step": 2282 }, { "epoch": 0.21424549549549549, "grad_norm": 1.7566977490652367, "learning_rate": 7.137941820456678e-06, "loss": 0.5426, "step": 2283 }, { "epoch": 0.21433933933933935, "grad_norm": 1.6718665725792259, "learning_rate": 7.141069752893338e-06, "loss": 0.5245, "step": 2284 }, { "epoch": 0.2144331831831832, "grad_norm": 1.6619958267561938, "learning_rate": 7.144197685329997e-06, "loss": 0.5187, "step": 2285 }, { "epoch": 0.21452702702702703, "grad_norm": 1.6538802591225212, "learning_rate": 7.147325617766657e-06, "loss": 0.5136, "step": 2286 }, { "epoch": 0.21462087087087087, "grad_norm": 1.652917685967004, "learning_rate": 7.150453550203316e-06, "loss": 0.5164, "step": 2287 }, { "epoch": 0.2147147147147147, "grad_norm": 3.595443039970787, "learning_rate": 7.153581482639975e-06, "loss": 0.5256, "step": 2288 }, { "epoch": 0.21480855855855857, "grad_norm": 2.077373456137981, "learning_rate": 7.156709415076635e-06, "loss": 0.5283, "step": 2289 }, { "epoch": 0.2149024024024024, "grad_norm": 1.6445969183088749, "learning_rate": 7.159837347513294e-06, "loss": 0.4694, "step": 2290 }, { "epoch": 0.21499624624624625, "grad_norm": 2.340241597624625, "learning_rate": 7.162965279949954e-06, "loss": 0.5106, "step": 2291 }, { "epoch": 0.21509009009009009, "grad_norm": 5.238702854799872, "learning_rate": 7.166093212386613e-06, "loss": 0.5014, "step": 2292 }, { "epoch": 0.21518393393393392, "grad_norm": 1.9849924949514257, "learning_rate": 7.169221144823273e-06, "loss": 0.5186, "step": 2293 }, { "epoch": 0.2152777777777778, "grad_norm": 1.741945261410383, "learning_rate": 7.1723490772599315e-06, "loss": 0.5769, "step": 2294 }, { "epoch": 0.21537162162162163, "grad_norm": 1.3270554400911967, "learning_rate": 7.175477009696591e-06, "loss": 0.4609, "step": 2295 }, { "epoch": 0.21546546546546547, "grad_norm": 2.210984348728526, "learning_rate": 7.178604942133251e-06, "loss": 0.5331, "step": 2296 }, { "epoch": 0.2155593093093093, "grad_norm": 1.6951149766166027, "learning_rate": 7.18173287456991e-06, "loss": 0.5238, "step": 2297 }, { "epoch": 0.21565315315315314, "grad_norm": 1.5682027044229978, "learning_rate": 7.18486080700657e-06, "loss": 0.4744, "step": 2298 }, { "epoch": 0.215746996996997, "grad_norm": 1.405017603850976, "learning_rate": 7.187988739443229e-06, "loss": 0.5163, "step": 2299 }, { "epoch": 0.21584084084084085, "grad_norm": 1.9523495884866726, "learning_rate": 7.191116671879887e-06, "loss": 0.5531, "step": 2300 }, { "epoch": 0.21593468468468469, "grad_norm": 5.016634622450942, "learning_rate": 7.194244604316547e-06, "loss": 0.5594, "step": 2301 }, { "epoch": 0.21602852852852852, "grad_norm": 1.975667744689984, "learning_rate": 7.1973725367532065e-06, "loss": 0.5383, "step": 2302 }, { "epoch": 0.21612237237237236, "grad_norm": 1.6970510712811202, "learning_rate": 7.2005004691898665e-06, "loss": 0.543, "step": 2303 }, { "epoch": 0.21621621621621623, "grad_norm": 1.5551898296483126, "learning_rate": 7.203628401626526e-06, "loss": 0.5162, "step": 2304 }, { "epoch": 0.21631006006006007, "grad_norm": 1.9737138111999493, "learning_rate": 7.206756334063185e-06, "loss": 0.5708, "step": 2305 }, { "epoch": 0.2164039039039039, "grad_norm": 1.5412496002029652, "learning_rate": 7.209884266499845e-06, "loss": 0.5209, "step": 2306 }, { "epoch": 0.21649774774774774, "grad_norm": 1.8559440866539536, "learning_rate": 7.213012198936503e-06, "loss": 0.5552, "step": 2307 }, { "epoch": 0.21659159159159158, "grad_norm": 1.5939749100581517, "learning_rate": 7.216140131373162e-06, "loss": 0.5347, "step": 2308 }, { "epoch": 0.21668543543543545, "grad_norm": 1.943730749779042, "learning_rate": 7.219268063809822e-06, "loss": 0.5423, "step": 2309 }, { "epoch": 0.21677927927927929, "grad_norm": 1.8793053030173328, "learning_rate": 7.2223959962464815e-06, "loss": 0.525, "step": 2310 }, { "epoch": 0.21687312312312312, "grad_norm": 2.342989884786762, "learning_rate": 7.2255239286831415e-06, "loss": 0.5246, "step": 2311 }, { "epoch": 0.21696696696696696, "grad_norm": 1.5547098345691364, "learning_rate": 7.228651861119801e-06, "loss": 0.5431, "step": 2312 }, { "epoch": 0.2170608108108108, "grad_norm": 1.7547260597302394, "learning_rate": 7.231779793556459e-06, "loss": 0.522, "step": 2313 }, { "epoch": 0.21715465465465467, "grad_norm": 2.4401926077428113, "learning_rate": 7.234907725993119e-06, "loss": 0.5423, "step": 2314 }, { "epoch": 0.2172484984984985, "grad_norm": 1.7540012657910546, "learning_rate": 7.238035658429778e-06, "loss": 0.5865, "step": 2315 }, { "epoch": 0.21734234234234234, "grad_norm": 10.322680380660266, "learning_rate": 7.241163590866438e-06, "loss": 0.5495, "step": 2316 }, { "epoch": 0.21743618618618618, "grad_norm": 2.6286702908797133, "learning_rate": 7.244291523303097e-06, "loss": 0.5436, "step": 2317 }, { "epoch": 0.21753003003003002, "grad_norm": 1.5654372753963708, "learning_rate": 7.247419455739757e-06, "loss": 0.4986, "step": 2318 }, { "epoch": 0.21762387387387389, "grad_norm": 1.65993827536412, "learning_rate": 7.250547388176416e-06, "loss": 0.5203, "step": 2319 }, { "epoch": 0.21771771771771772, "grad_norm": 2.5227293909055155, "learning_rate": 7.253675320613075e-06, "loss": 0.503, "step": 2320 }, { "epoch": 0.21781156156156156, "grad_norm": 4.406182477014431, "learning_rate": 7.256803253049735e-06, "loss": 0.5232, "step": 2321 }, { "epoch": 0.2179054054054054, "grad_norm": 1.3872937107650378, "learning_rate": 7.259931185486394e-06, "loss": 0.5364, "step": 2322 }, { "epoch": 0.21799924924924924, "grad_norm": 1.5450361618665827, "learning_rate": 7.263059117923054e-06, "loss": 0.4529, "step": 2323 }, { "epoch": 0.2180930930930931, "grad_norm": 1.5129206124115386, "learning_rate": 7.266187050359713e-06, "loss": 0.484, "step": 2324 }, { "epoch": 0.21818693693693694, "grad_norm": 2.1770935821127915, "learning_rate": 7.269314982796373e-06, "loss": 0.4986, "step": 2325 }, { "epoch": 0.21828078078078078, "grad_norm": 2.009842918856628, "learning_rate": 7.2724429152330314e-06, "loss": 0.5668, "step": 2326 }, { "epoch": 0.21837462462462462, "grad_norm": 1.8962517734440416, "learning_rate": 7.275570847669691e-06, "loss": 0.5635, "step": 2327 }, { "epoch": 0.21846846846846846, "grad_norm": 1.596982273583742, "learning_rate": 7.278698780106351e-06, "loss": 0.4256, "step": 2328 }, { "epoch": 0.21856231231231232, "grad_norm": 1.6133051643167287, "learning_rate": 7.28182671254301e-06, "loss": 0.5557, "step": 2329 }, { "epoch": 0.21865615615615616, "grad_norm": 2.163230563438488, "learning_rate": 7.284954644979669e-06, "loss": 0.5538, "step": 2330 }, { "epoch": 0.21875, "grad_norm": 1.9164689125535879, "learning_rate": 7.288082577416329e-06, "loss": 0.5578, "step": 2331 }, { "epoch": 0.21884384384384384, "grad_norm": 1.7896314481709132, "learning_rate": 7.291210509852987e-06, "loss": 0.4902, "step": 2332 }, { "epoch": 0.21893768768768768, "grad_norm": 1.7345500564548046, "learning_rate": 7.294338442289647e-06, "loss": 0.5464, "step": 2333 }, { "epoch": 0.21903153153153154, "grad_norm": 2.4105359256844987, "learning_rate": 7.297466374726306e-06, "loss": 0.5746, "step": 2334 }, { "epoch": 0.21912537537537538, "grad_norm": 2.003597685963839, "learning_rate": 7.3005943071629656e-06, "loss": 0.5171, "step": 2335 }, { "epoch": 0.21921921921921922, "grad_norm": 1.515150979341115, "learning_rate": 7.3037222395996256e-06, "loss": 0.4899, "step": 2336 }, { "epoch": 0.21931306306306306, "grad_norm": 1.683166557080094, "learning_rate": 7.306850172036285e-06, "loss": 0.5341, "step": 2337 }, { "epoch": 0.2194069069069069, "grad_norm": 1.8173473348729405, "learning_rate": 7.309978104472945e-06, "loss": 0.5106, "step": 2338 }, { "epoch": 0.21950075075075076, "grad_norm": 2.017971079583891, "learning_rate": 7.313106036909603e-06, "loss": 0.5363, "step": 2339 }, { "epoch": 0.2195945945945946, "grad_norm": 1.463078720908552, "learning_rate": 7.316233969346262e-06, "loss": 0.5038, "step": 2340 }, { "epoch": 0.21968843843843844, "grad_norm": 2.821169653902464, "learning_rate": 7.319361901782922e-06, "loss": 0.5607, "step": 2341 }, { "epoch": 0.21978228228228228, "grad_norm": 2.2478415391703948, "learning_rate": 7.322489834219581e-06, "loss": 0.5287, "step": 2342 }, { "epoch": 0.21987612612612611, "grad_norm": 1.655283302685012, "learning_rate": 7.325617766656241e-06, "loss": 0.4998, "step": 2343 }, { "epoch": 0.21996996996996998, "grad_norm": 1.9512077118686848, "learning_rate": 7.3287456990929006e-06, "loss": 0.5971, "step": 2344 }, { "epoch": 0.22006381381381382, "grad_norm": 2.263607797062326, "learning_rate": 7.331873631529559e-06, "loss": 0.5031, "step": 2345 }, { "epoch": 0.22015765765765766, "grad_norm": 5.133169997397489, "learning_rate": 7.335001563966219e-06, "loss": 0.4852, "step": 2346 }, { "epoch": 0.2202515015015015, "grad_norm": 1.8138028590553945, "learning_rate": 7.338129496402878e-06, "loss": 0.5034, "step": 2347 }, { "epoch": 0.22034534534534533, "grad_norm": 1.8044559198748369, "learning_rate": 7.341257428839538e-06, "loss": 0.5983, "step": 2348 }, { "epoch": 0.2204391891891892, "grad_norm": 2.961981041934651, "learning_rate": 7.344385361276197e-06, "loss": 0.5153, "step": 2349 }, { "epoch": 0.22053303303303304, "grad_norm": 4.316389411505883, "learning_rate": 7.347513293712857e-06, "loss": 0.4765, "step": 2350 }, { "epoch": 0.22062687687687688, "grad_norm": 2.160039813127478, "learning_rate": 7.3506412261495155e-06, "loss": 0.5668, "step": 2351 }, { "epoch": 0.22072072072072071, "grad_norm": 1.8580790467387425, "learning_rate": 7.353769158586175e-06, "loss": 0.551, "step": 2352 }, { "epoch": 0.22081456456456455, "grad_norm": 1.8143551255265895, "learning_rate": 7.356897091022835e-06, "loss": 0.4885, "step": 2353 }, { "epoch": 0.22090840840840842, "grad_norm": 1.854784800681038, "learning_rate": 7.360025023459494e-06, "loss": 0.5449, "step": 2354 }, { "epoch": 0.22100225225225226, "grad_norm": 1.702439466089269, "learning_rate": 7.363152955896153e-06, "loss": 0.4556, "step": 2355 }, { "epoch": 0.2210960960960961, "grad_norm": 1.7725073634197912, "learning_rate": 7.366280888332813e-06, "loss": 0.5045, "step": 2356 }, { "epoch": 0.22118993993993993, "grad_norm": 1.723600478129435, "learning_rate": 7.369408820769472e-06, "loss": 0.4966, "step": 2357 }, { "epoch": 0.22128378378378377, "grad_norm": 1.7844540495571817, "learning_rate": 7.372536753206131e-06, "loss": 0.5246, "step": 2358 }, { "epoch": 0.22137762762762764, "grad_norm": 1.4358308550218677, "learning_rate": 7.3756646856427905e-06, "loss": 0.5547, "step": 2359 }, { "epoch": 0.22147147147147148, "grad_norm": 1.66195488618228, "learning_rate": 7.37879261807945e-06, "loss": 0.4787, "step": 2360 }, { "epoch": 0.22156531531531531, "grad_norm": 1.772826157229592, "learning_rate": 7.38192055051611e-06, "loss": 0.5283, "step": 2361 }, { "epoch": 0.22165915915915915, "grad_norm": 1.5790917862249982, "learning_rate": 7.385048482952769e-06, "loss": 0.5425, "step": 2362 }, { "epoch": 0.221753003003003, "grad_norm": 1.6629099428326977, "learning_rate": 7.388176415389429e-06, "loss": 0.5571, "step": 2363 }, { "epoch": 0.22184684684684686, "grad_norm": 1.4817387201187877, "learning_rate": 7.391304347826087e-06, "loss": 0.5344, "step": 2364 }, { "epoch": 0.2219406906906907, "grad_norm": 1.5390973154555656, "learning_rate": 7.394432280262746e-06, "loss": 0.5262, "step": 2365 }, { "epoch": 0.22203453453453453, "grad_norm": 1.7023135994337064, "learning_rate": 7.397560212699406e-06, "loss": 0.5627, "step": 2366 }, { "epoch": 0.22212837837837837, "grad_norm": 1.9546845286303332, "learning_rate": 7.4006881451360655e-06, "loss": 0.5479, "step": 2367 }, { "epoch": 0.2222222222222222, "grad_norm": 1.9731303517953396, "learning_rate": 7.4038160775727255e-06, "loss": 0.5847, "step": 2368 }, { "epoch": 0.22231606606606608, "grad_norm": 2.5665217985796334, "learning_rate": 7.406944010009385e-06, "loss": 0.5146, "step": 2369 }, { "epoch": 0.22240990990990991, "grad_norm": 1.4809839203353397, "learning_rate": 7.410071942446043e-06, "loss": 0.4837, "step": 2370 }, { "epoch": 0.22250375375375375, "grad_norm": 1.6448822178694507, "learning_rate": 7.413199874882703e-06, "loss": 0.527, "step": 2371 }, { "epoch": 0.2225975975975976, "grad_norm": 1.3582002122326038, "learning_rate": 7.416327807319362e-06, "loss": 0.4999, "step": 2372 }, { "epoch": 0.22269144144144143, "grad_norm": 1.5088046452916428, "learning_rate": 7.419455739756022e-06, "loss": 0.5698, "step": 2373 }, { "epoch": 0.2227852852852853, "grad_norm": 1.3594440975446047, "learning_rate": 7.422583672192681e-06, "loss": 0.4651, "step": 2374 }, { "epoch": 0.22287912912912913, "grad_norm": 1.4393461328454606, "learning_rate": 7.425711604629341e-06, "loss": 0.4676, "step": 2375 }, { "epoch": 0.22297297297297297, "grad_norm": 1.7030310011724075, "learning_rate": 7.4288395370660005e-06, "loss": 0.504, "step": 2376 }, { "epoch": 0.2230668168168168, "grad_norm": 7.651269290522925, "learning_rate": 7.431967469502659e-06, "loss": 0.5424, "step": 2377 }, { "epoch": 0.22316066066066065, "grad_norm": 1.810343425234677, "learning_rate": 7.435095401939319e-06, "loss": 0.4797, "step": 2378 }, { "epoch": 0.22325450450450451, "grad_norm": 1.630342028165963, "learning_rate": 7.438223334375978e-06, "loss": 0.5178, "step": 2379 }, { "epoch": 0.22334834834834835, "grad_norm": 1.679453780410413, "learning_rate": 7.441351266812637e-06, "loss": 0.4565, "step": 2380 }, { "epoch": 0.2234421921921922, "grad_norm": 25.63120282733759, "learning_rate": 7.444479199249297e-06, "loss": 0.4518, "step": 2381 }, { "epoch": 0.22353603603603603, "grad_norm": 2.113941603929206, "learning_rate": 7.447607131685956e-06, "loss": 0.5197, "step": 2382 }, { "epoch": 0.22362987987987987, "grad_norm": 1.8119547388970678, "learning_rate": 7.4507350641226154e-06, "loss": 0.5083, "step": 2383 }, { "epoch": 0.22372372372372373, "grad_norm": 1.4971341330993466, "learning_rate": 7.453862996559275e-06, "loss": 0.498, "step": 2384 }, { "epoch": 0.22381756756756757, "grad_norm": 1.7379313078152765, "learning_rate": 7.456990928995934e-06, "loss": 0.5844, "step": 2385 }, { "epoch": 0.2239114114114114, "grad_norm": 1.9357113623017648, "learning_rate": 7.460118861432594e-06, "loss": 0.5331, "step": 2386 }, { "epoch": 0.22400525525525525, "grad_norm": 1.5399689710480955, "learning_rate": 7.463246793869253e-06, "loss": 0.514, "step": 2387 }, { "epoch": 0.22409909909909909, "grad_norm": 1.5864725137302713, "learning_rate": 7.466374726305913e-06, "loss": 0.4973, "step": 2388 }, { "epoch": 0.22419294294294295, "grad_norm": 1.9278969866403837, "learning_rate": 7.469502658742572e-06, "loss": 0.5054, "step": 2389 }, { "epoch": 0.2242867867867868, "grad_norm": 1.4135018066345957, "learning_rate": 7.47263059117923e-06, "loss": 0.5305, "step": 2390 }, { "epoch": 0.22438063063063063, "grad_norm": 1.8112108940032827, "learning_rate": 7.4757585236158904e-06, "loss": 0.573, "step": 2391 }, { "epoch": 0.22447447447447447, "grad_norm": 3.1414008371248676, "learning_rate": 7.47888645605255e-06, "loss": 0.5654, "step": 2392 }, { "epoch": 0.2245683183183183, "grad_norm": 1.528077772031718, "learning_rate": 7.48201438848921e-06, "loss": 0.5248, "step": 2393 }, { "epoch": 0.22466216216216217, "grad_norm": 1.7680366417228492, "learning_rate": 7.485142320925869e-06, "loss": 0.529, "step": 2394 }, { "epoch": 0.224756006006006, "grad_norm": 1.5359339024570833, "learning_rate": 7.488270253362529e-06, "loss": 0.5435, "step": 2395 }, { "epoch": 0.22484984984984985, "grad_norm": 1.9555709974002506, "learning_rate": 7.491398185799187e-06, "loss": 0.4743, "step": 2396 }, { "epoch": 0.22494369369369369, "grad_norm": 1.659188181768809, "learning_rate": 7.494526118235846e-06, "loss": 0.5502, "step": 2397 }, { "epoch": 0.22503753753753752, "grad_norm": 3.2721905955783788, "learning_rate": 7.497654050672506e-06, "loss": 0.4897, "step": 2398 }, { "epoch": 0.2251313813813814, "grad_norm": 2.144534573642006, "learning_rate": 7.500781983109165e-06, "loss": 0.5837, "step": 2399 }, { "epoch": 0.22522522522522523, "grad_norm": 1.706847595047566, "learning_rate": 7.503909915545825e-06, "loss": 0.482, "step": 2400 }, { "epoch": 0.22531906906906907, "grad_norm": 1.4564490685424907, "learning_rate": 7.5070378479824846e-06, "loss": 0.5367, "step": 2401 }, { "epoch": 0.2254129129129129, "grad_norm": 1.669777322720617, "learning_rate": 7.510165780419143e-06, "loss": 0.4985, "step": 2402 }, { "epoch": 0.22550675675675674, "grad_norm": 1.713048827211883, "learning_rate": 7.513293712855803e-06, "loss": 0.535, "step": 2403 }, { "epoch": 0.2256006006006006, "grad_norm": 2.813071380400169, "learning_rate": 7.516421645292462e-06, "loss": 0.5549, "step": 2404 }, { "epoch": 0.22569444444444445, "grad_norm": 1.8897632416382684, "learning_rate": 7.519549577729122e-06, "loss": 0.5153, "step": 2405 }, { "epoch": 0.22578828828828829, "grad_norm": 1.362777854240662, "learning_rate": 7.522677510165781e-06, "loss": 0.4667, "step": 2406 }, { "epoch": 0.22588213213213212, "grad_norm": 5.171132087178801, "learning_rate": 7.52580544260244e-06, "loss": 0.5072, "step": 2407 }, { "epoch": 0.22597597597597596, "grad_norm": 2.5801272445213974, "learning_rate": 7.5289333750391e-06, "loss": 0.5304, "step": 2408 }, { "epoch": 0.22606981981981983, "grad_norm": 4.954620338997029, "learning_rate": 7.532061307475759e-06, "loss": 0.5318, "step": 2409 }, { "epoch": 0.22616366366366367, "grad_norm": 1.384994061455444, "learning_rate": 7.535189239912418e-06, "loss": 0.51, "step": 2410 }, { "epoch": 0.2262575075075075, "grad_norm": 2.168938049428518, "learning_rate": 7.538317172349078e-06, "loss": 0.4955, "step": 2411 }, { "epoch": 0.22635135135135134, "grad_norm": 1.5910198721810744, "learning_rate": 7.541445104785737e-06, "loss": 0.5408, "step": 2412 }, { "epoch": 0.22644519519519518, "grad_norm": 1.28787972746402, "learning_rate": 7.544573037222397e-06, "loss": 0.4839, "step": 2413 }, { "epoch": 0.22653903903903905, "grad_norm": 1.5384978809345045, "learning_rate": 7.547700969659056e-06, "loss": 0.5115, "step": 2414 }, { "epoch": 0.22663288288288289, "grad_norm": 1.8018118657253877, "learning_rate": 7.5508289020957145e-06, "loss": 0.4713, "step": 2415 }, { "epoch": 0.22672672672672672, "grad_norm": 1.3972671173921347, "learning_rate": 7.5539568345323745e-06, "loss": 0.4644, "step": 2416 }, { "epoch": 0.22682057057057056, "grad_norm": 2.6692995386835703, "learning_rate": 7.557084766969034e-06, "loss": 0.4891, "step": 2417 }, { "epoch": 0.22691441441441443, "grad_norm": 1.7891978354307048, "learning_rate": 7.560212699405694e-06, "loss": 0.5311, "step": 2418 }, { "epoch": 0.22700825825825827, "grad_norm": 1.723837624527255, "learning_rate": 7.563340631842353e-06, "loss": 0.4589, "step": 2419 }, { "epoch": 0.2271021021021021, "grad_norm": 1.5485201417979253, "learning_rate": 7.566468564279013e-06, "loss": 0.5344, "step": 2420 }, { "epoch": 0.22719594594594594, "grad_norm": 2.084292956923314, "learning_rate": 7.569596496715672e-06, "loss": 0.5463, "step": 2421 }, { "epoch": 0.22728978978978978, "grad_norm": 1.8376777130398, "learning_rate": 7.57272442915233e-06, "loss": 0.5019, "step": 2422 }, { "epoch": 0.22738363363363365, "grad_norm": 1.7634686332554605, "learning_rate": 7.57585236158899e-06, "loss": 0.501, "step": 2423 }, { "epoch": 0.22747747747747749, "grad_norm": 2.088990861676029, "learning_rate": 7.5789802940256495e-06, "loss": 0.5636, "step": 2424 }, { "epoch": 0.22757132132132132, "grad_norm": 2.38027736461394, "learning_rate": 7.5821082264623095e-06, "loss": 0.5411, "step": 2425 }, { "epoch": 0.22766516516516516, "grad_norm": 1.6186457711722388, "learning_rate": 7.585236158898969e-06, "loss": 0.5027, "step": 2426 }, { "epoch": 0.227759009009009, "grad_norm": 1.7876690890253377, "learning_rate": 7.588364091335628e-06, "loss": 0.5303, "step": 2427 }, { "epoch": 0.22785285285285287, "grad_norm": 1.6947888058359828, "learning_rate": 7.591492023772287e-06, "loss": 0.5493, "step": 2428 }, { "epoch": 0.2279466966966967, "grad_norm": 1.4372853520655928, "learning_rate": 7.594619956208946e-06, "loss": 0.5241, "step": 2429 }, { "epoch": 0.22804054054054054, "grad_norm": 1.7001400946757708, "learning_rate": 7.597747888645606e-06, "loss": 0.5359, "step": 2430 }, { "epoch": 0.22813438438438438, "grad_norm": 1.6738245032043095, "learning_rate": 7.600875821082265e-06, "loss": 0.5606, "step": 2431 }, { "epoch": 0.22822822822822822, "grad_norm": 1.8077116314647788, "learning_rate": 7.6040037535189245e-06, "loss": 0.5029, "step": 2432 }, { "epoch": 0.22832207207207209, "grad_norm": 1.6871527146064282, "learning_rate": 7.6071316859555845e-06, "loss": 0.5479, "step": 2433 }, { "epoch": 0.22841591591591592, "grad_norm": 5.657414155751408, "learning_rate": 7.610259618392243e-06, "loss": 0.5607, "step": 2434 }, { "epoch": 0.22850975975975976, "grad_norm": 1.6993917280033266, "learning_rate": 7.613387550828902e-06, "loss": 0.5118, "step": 2435 }, { "epoch": 0.2286036036036036, "grad_norm": 1.6961392332119258, "learning_rate": 7.616515483265562e-06, "loss": 0.5914, "step": 2436 }, { "epoch": 0.22869744744744744, "grad_norm": 1.690592839508821, "learning_rate": 7.619643415702221e-06, "loss": 0.5082, "step": 2437 }, { "epoch": 0.2287912912912913, "grad_norm": 2.0435624447136482, "learning_rate": 7.622771348138881e-06, "loss": 0.5082, "step": 2438 }, { "epoch": 0.22888513513513514, "grad_norm": 7.201258795072978, "learning_rate": 7.62589928057554e-06, "loss": 0.4683, "step": 2439 }, { "epoch": 0.22897897897897898, "grad_norm": 1.4524211272405243, "learning_rate": 7.6290272130122e-06, "loss": 0.4077, "step": 2440 }, { "epoch": 0.22907282282282282, "grad_norm": 1.5212736293258617, "learning_rate": 7.632155145448859e-06, "loss": 0.5172, "step": 2441 }, { "epoch": 0.22916666666666666, "grad_norm": 1.83675529602365, "learning_rate": 7.635283077885518e-06, "loss": 0.5202, "step": 2442 }, { "epoch": 0.22926051051051052, "grad_norm": 1.510718092265353, "learning_rate": 7.638411010322177e-06, "loss": 0.4643, "step": 2443 }, { "epoch": 0.22935435435435436, "grad_norm": 1.4671477195328106, "learning_rate": 7.641538942758838e-06, "loss": 0.507, "step": 2444 }, { "epoch": 0.2294481981981982, "grad_norm": 2.0099571302656343, "learning_rate": 7.644666875195497e-06, "loss": 0.5825, "step": 2445 }, { "epoch": 0.22954204204204204, "grad_norm": 1.6745252282384993, "learning_rate": 7.647794807632156e-06, "loss": 0.5802, "step": 2446 }, { "epoch": 0.22963588588588588, "grad_norm": 1.7767939912347273, "learning_rate": 7.650922740068815e-06, "loss": 0.555, "step": 2447 }, { "epoch": 0.22972972972972974, "grad_norm": 1.5222088737281414, "learning_rate": 7.654050672505474e-06, "loss": 0.4928, "step": 2448 }, { "epoch": 0.22982357357357358, "grad_norm": 1.7121807940988039, "learning_rate": 7.657178604942134e-06, "loss": 0.5115, "step": 2449 }, { "epoch": 0.22991741741741742, "grad_norm": 1.6793303407052453, "learning_rate": 7.660306537378793e-06, "loss": 0.5069, "step": 2450 }, { "epoch": 0.23001126126126126, "grad_norm": 1.9659378227272184, "learning_rate": 7.663434469815454e-06, "loss": 0.4865, "step": 2451 }, { "epoch": 0.2301051051051051, "grad_norm": 1.6645722990324232, "learning_rate": 7.666562402252113e-06, "loss": 0.5431, "step": 2452 }, { "epoch": 0.23019894894894896, "grad_norm": 1.9380655711770323, "learning_rate": 7.669690334688772e-06, "loss": 0.4911, "step": 2453 }, { "epoch": 0.2302927927927928, "grad_norm": 1.6093284355747173, "learning_rate": 7.672818267125431e-06, "loss": 0.4956, "step": 2454 }, { "epoch": 0.23038663663663664, "grad_norm": 1.5648730259891286, "learning_rate": 7.67594619956209e-06, "loss": 0.4958, "step": 2455 }, { "epoch": 0.23048048048048048, "grad_norm": 1.4306388537649526, "learning_rate": 7.67907413199875e-06, "loss": 0.4523, "step": 2456 }, { "epoch": 0.23057432432432431, "grad_norm": 1.3948037560113442, "learning_rate": 7.682202064435409e-06, "loss": 0.5166, "step": 2457 }, { "epoch": 0.23066816816816818, "grad_norm": 1.5723377268931737, "learning_rate": 7.685329996872068e-06, "loss": 0.4819, "step": 2458 }, { "epoch": 0.23076201201201202, "grad_norm": 1.635927302823533, "learning_rate": 7.688457929308729e-06, "loss": 0.5199, "step": 2459 }, { "epoch": 0.23085585585585586, "grad_norm": 13.912887748794212, "learning_rate": 7.691585861745386e-06, "loss": 0.5748, "step": 2460 }, { "epoch": 0.2309496996996997, "grad_norm": 2.662970602014381, "learning_rate": 7.694713794182045e-06, "loss": 0.5268, "step": 2461 }, { "epoch": 0.23104354354354353, "grad_norm": 1.491252027937193, "learning_rate": 7.697841726618706e-06, "loss": 0.5307, "step": 2462 }, { "epoch": 0.2311373873873874, "grad_norm": 2.7988343454263225, "learning_rate": 7.700969659055365e-06, "loss": 0.5017, "step": 2463 }, { "epoch": 0.23123123123123124, "grad_norm": 1.622676900451987, "learning_rate": 7.704097591492024e-06, "loss": 0.4961, "step": 2464 }, { "epoch": 0.23132507507507508, "grad_norm": 1.3561917851814884, "learning_rate": 7.707225523928684e-06, "loss": 0.4943, "step": 2465 }, { "epoch": 0.23141891891891891, "grad_norm": 1.7248191184240598, "learning_rate": 7.710353456365343e-06, "loss": 0.5033, "step": 2466 }, { "epoch": 0.23151276276276275, "grad_norm": 1.8092973371859675, "learning_rate": 7.713481388802002e-06, "loss": 0.4943, "step": 2467 }, { "epoch": 0.23160660660660662, "grad_norm": 1.354714681321835, "learning_rate": 7.716609321238661e-06, "loss": 0.4716, "step": 2468 }, { "epoch": 0.23170045045045046, "grad_norm": 2.872477200274248, "learning_rate": 7.719737253675322e-06, "loss": 0.5197, "step": 2469 }, { "epoch": 0.2317942942942943, "grad_norm": 1.3779963335153995, "learning_rate": 7.722865186111981e-06, "loss": 0.5482, "step": 2470 }, { "epoch": 0.23188813813813813, "grad_norm": 1.4560633532696665, "learning_rate": 7.72599311854864e-06, "loss": 0.4776, "step": 2471 }, { "epoch": 0.23198198198198197, "grad_norm": 1.5422515973530997, "learning_rate": 7.7291210509853e-06, "loss": 0.5112, "step": 2472 }, { "epoch": 0.23207582582582584, "grad_norm": 1.4534186444448698, "learning_rate": 7.732248983421959e-06, "loss": 0.5409, "step": 2473 }, { "epoch": 0.23216966966966968, "grad_norm": 1.7122677918196352, "learning_rate": 7.735376915858618e-06, "loss": 0.5641, "step": 2474 }, { "epoch": 0.23226351351351351, "grad_norm": 1.788744359764919, "learning_rate": 7.738504848295277e-06, "loss": 0.5324, "step": 2475 }, { "epoch": 0.23235735735735735, "grad_norm": 1.5499978136579031, "learning_rate": 7.741632780731938e-06, "loss": 0.4874, "step": 2476 }, { "epoch": 0.2324512012012012, "grad_norm": 1.6552787399427278, "learning_rate": 7.744760713168597e-06, "loss": 0.4898, "step": 2477 }, { "epoch": 0.23254504504504506, "grad_norm": 2.2590363362593604, "learning_rate": 7.747888645605256e-06, "loss": 0.563, "step": 2478 }, { "epoch": 0.2326388888888889, "grad_norm": 1.5834769413789977, "learning_rate": 7.751016578041915e-06, "loss": 0.4491, "step": 2479 }, { "epoch": 0.23273273273273273, "grad_norm": 1.6410456146832435, "learning_rate": 7.754144510478574e-06, "loss": 0.5357, "step": 2480 }, { "epoch": 0.23282657657657657, "grad_norm": 1.5288118374508024, "learning_rate": 7.757272442915234e-06, "loss": 0.53, "step": 2481 }, { "epoch": 0.2329204204204204, "grad_norm": 1.7353062486961803, "learning_rate": 7.760400375351893e-06, "loss": 0.5276, "step": 2482 }, { "epoch": 0.23301426426426428, "grad_norm": 2.6008248081116476, "learning_rate": 7.763528307788552e-06, "loss": 0.5271, "step": 2483 }, { "epoch": 0.23310810810810811, "grad_norm": 2.017343099184053, "learning_rate": 7.766656240225213e-06, "loss": 0.4688, "step": 2484 }, { "epoch": 0.23320195195195195, "grad_norm": 2.022803372688665, "learning_rate": 7.769784172661872e-06, "loss": 0.4804, "step": 2485 }, { "epoch": 0.2332957957957958, "grad_norm": 2.315406758813776, "learning_rate": 7.77291210509853e-06, "loss": 0.538, "step": 2486 }, { "epoch": 0.23338963963963963, "grad_norm": 2.5580435812892826, "learning_rate": 7.77604003753519e-06, "loss": 0.5575, "step": 2487 }, { "epoch": 0.2334834834834835, "grad_norm": 1.5547516390740992, "learning_rate": 7.77916796997185e-06, "loss": 0.4991, "step": 2488 }, { "epoch": 0.23357732732732733, "grad_norm": 1.5543167911853026, "learning_rate": 7.782295902408509e-06, "loss": 0.5107, "step": 2489 }, { "epoch": 0.23367117117117117, "grad_norm": 2.2444798523353757, "learning_rate": 7.785423834845168e-06, "loss": 0.5202, "step": 2490 }, { "epoch": 0.233765015015015, "grad_norm": 1.4903823140428651, "learning_rate": 7.788551767281829e-06, "loss": 0.5245, "step": 2491 }, { "epoch": 0.23385885885885885, "grad_norm": 1.5622120708249128, "learning_rate": 7.791679699718486e-06, "loss": 0.5229, "step": 2492 }, { "epoch": 0.23395270270270271, "grad_norm": 1.6475837780211826, "learning_rate": 7.794807632155145e-06, "loss": 0.5701, "step": 2493 }, { "epoch": 0.23404654654654655, "grad_norm": 1.8265690053301944, "learning_rate": 7.797935564591806e-06, "loss": 0.4866, "step": 2494 }, { "epoch": 0.2341403903903904, "grad_norm": 5.677878984243591, "learning_rate": 7.801063497028465e-06, "loss": 0.5078, "step": 2495 }, { "epoch": 0.23423423423423423, "grad_norm": 3.72243713529153, "learning_rate": 7.804191429465124e-06, "loss": 0.5185, "step": 2496 }, { "epoch": 0.23432807807807807, "grad_norm": 1.42787377416781, "learning_rate": 7.807319361901783e-06, "loss": 0.4784, "step": 2497 }, { "epoch": 0.23442192192192193, "grad_norm": 1.8803580063746694, "learning_rate": 7.810447294338443e-06, "loss": 0.4917, "step": 2498 }, { "epoch": 0.23451576576576577, "grad_norm": 2.860758618297448, "learning_rate": 7.813575226775102e-06, "loss": 0.5698, "step": 2499 }, { "epoch": 0.2346096096096096, "grad_norm": 1.481149771974109, "learning_rate": 7.816703159211761e-06, "loss": 0.5107, "step": 2500 }, { "epoch": 0.23470345345345345, "grad_norm": 1.4103876397547426, "learning_rate": 7.819831091648422e-06, "loss": 0.5148, "step": 2501 }, { "epoch": 0.23479729729729729, "grad_norm": 2.8494220582838214, "learning_rate": 7.822959024085081e-06, "loss": 0.512, "step": 2502 }, { "epoch": 0.23489114114114115, "grad_norm": 1.9090095840359753, "learning_rate": 7.82608695652174e-06, "loss": 0.4843, "step": 2503 }, { "epoch": 0.234984984984985, "grad_norm": 1.3201046293712841, "learning_rate": 7.8292148889584e-06, "loss": 0.5197, "step": 2504 }, { "epoch": 0.23507882882882883, "grad_norm": 1.733462967931746, "learning_rate": 7.832342821395058e-06, "loss": 0.507, "step": 2505 }, { "epoch": 0.23517267267267267, "grad_norm": 1.8460816744727926, "learning_rate": 7.835470753831718e-06, "loss": 0.5241, "step": 2506 }, { "epoch": 0.2352665165165165, "grad_norm": 1.9204589590615713, "learning_rate": 7.838598686268377e-06, "loss": 0.6025, "step": 2507 }, { "epoch": 0.23536036036036037, "grad_norm": 1.9609333954899204, "learning_rate": 7.841726618705036e-06, "loss": 0.5263, "step": 2508 }, { "epoch": 0.2354542042042042, "grad_norm": 6.422666910143657, "learning_rate": 7.844854551141697e-06, "loss": 0.5417, "step": 2509 }, { "epoch": 0.23554804804804805, "grad_norm": 2.019955415363173, "learning_rate": 7.847982483578356e-06, "loss": 0.5026, "step": 2510 }, { "epoch": 0.23564189189189189, "grad_norm": 1.5696240213766444, "learning_rate": 7.851110416015013e-06, "loss": 0.5038, "step": 2511 }, { "epoch": 0.23573573573573572, "grad_norm": 1.597822461973698, "learning_rate": 7.854238348451674e-06, "loss": 0.5333, "step": 2512 }, { "epoch": 0.2358295795795796, "grad_norm": 1.6237693068705863, "learning_rate": 7.857366280888333e-06, "loss": 0.5325, "step": 2513 }, { "epoch": 0.23592342342342343, "grad_norm": 1.6510217072183735, "learning_rate": 7.860494213324993e-06, "loss": 0.5373, "step": 2514 }, { "epoch": 0.23601726726726727, "grad_norm": 2.1743199610116393, "learning_rate": 7.863622145761652e-06, "loss": 0.5357, "step": 2515 }, { "epoch": 0.2361111111111111, "grad_norm": 1.6218003922946966, "learning_rate": 7.866750078198313e-06, "loss": 0.5516, "step": 2516 }, { "epoch": 0.23620495495495494, "grad_norm": 1.6360848508884926, "learning_rate": 7.869878010634972e-06, "loss": 0.478, "step": 2517 }, { "epoch": 0.2362987987987988, "grad_norm": 2.7105551192267128, "learning_rate": 7.87300594307163e-06, "loss": 0.5726, "step": 2518 }, { "epoch": 0.23639264264264265, "grad_norm": 1.458824293496011, "learning_rate": 7.87613387550829e-06, "loss": 0.4434, "step": 2519 }, { "epoch": 0.23648648648648649, "grad_norm": 1.7629771665647567, "learning_rate": 7.87926180794495e-06, "loss": 0.4876, "step": 2520 }, { "epoch": 0.23658033033033032, "grad_norm": 1.7746749839702742, "learning_rate": 7.882389740381608e-06, "loss": 0.5553, "step": 2521 }, { "epoch": 0.23667417417417416, "grad_norm": 1.5308450777625526, "learning_rate": 7.885517672818268e-06, "loss": 0.499, "step": 2522 }, { "epoch": 0.23676801801801803, "grad_norm": 5.977388179320714, "learning_rate": 7.888645605254928e-06, "loss": 0.5468, "step": 2523 }, { "epoch": 0.23686186186186187, "grad_norm": 1.5244100787074546, "learning_rate": 7.891773537691586e-06, "loss": 0.5217, "step": 2524 }, { "epoch": 0.2369557057057057, "grad_norm": 1.9800958376355429, "learning_rate": 7.894901470128245e-06, "loss": 0.563, "step": 2525 }, { "epoch": 0.23704954954954954, "grad_norm": 1.695879487166718, "learning_rate": 7.898029402564906e-06, "loss": 0.4913, "step": 2526 }, { "epoch": 0.23714339339339338, "grad_norm": 1.8126231147028657, "learning_rate": 7.901157335001565e-06, "loss": 0.5353, "step": 2527 }, { "epoch": 0.23723723723723725, "grad_norm": 2.2069636925346257, "learning_rate": 7.904285267438224e-06, "loss": 0.5479, "step": 2528 }, { "epoch": 0.23733108108108109, "grad_norm": 1.803959872481971, "learning_rate": 7.907413199874883e-06, "loss": 0.5368, "step": 2529 }, { "epoch": 0.23742492492492492, "grad_norm": 1.6881259724069317, "learning_rate": 7.910541132311543e-06, "loss": 0.5135, "step": 2530 }, { "epoch": 0.23751876876876876, "grad_norm": 1.5658166942040088, "learning_rate": 7.913669064748202e-06, "loss": 0.4907, "step": 2531 }, { "epoch": 0.2376126126126126, "grad_norm": 1.5627643598058047, "learning_rate": 7.916796997184861e-06, "loss": 0.4981, "step": 2532 }, { "epoch": 0.23770645645645647, "grad_norm": 1.9322618666453422, "learning_rate": 7.91992492962152e-06, "loss": 0.5317, "step": 2533 }, { "epoch": 0.2378003003003003, "grad_norm": 3.5656770163219624, "learning_rate": 7.923052862058181e-06, "loss": 0.5586, "step": 2534 }, { "epoch": 0.23789414414414414, "grad_norm": 1.5089995323125522, "learning_rate": 7.92618079449484e-06, "loss": 0.5094, "step": 2535 }, { "epoch": 0.23798798798798798, "grad_norm": 1.6874316046888653, "learning_rate": 7.9293087269315e-06, "loss": 0.557, "step": 2536 }, { "epoch": 0.23808183183183182, "grad_norm": 1.7925147554800556, "learning_rate": 7.932436659368158e-06, "loss": 0.5143, "step": 2537 }, { "epoch": 0.23817567567567569, "grad_norm": 1.5760829782178682, "learning_rate": 7.935564591804818e-06, "loss": 0.5648, "step": 2538 }, { "epoch": 0.23826951951951952, "grad_norm": 19.777146752650726, "learning_rate": 7.938692524241477e-06, "loss": 0.5177, "step": 2539 }, { "epoch": 0.23836336336336336, "grad_norm": 3.4348581896767905, "learning_rate": 7.941820456678136e-06, "loss": 0.5422, "step": 2540 }, { "epoch": 0.2384572072072072, "grad_norm": 1.9860536255035437, "learning_rate": 7.944948389114797e-06, "loss": 0.4673, "step": 2541 }, { "epoch": 0.23855105105105104, "grad_norm": 1.623815051122917, "learning_rate": 7.948076321551456e-06, "loss": 0.5531, "step": 2542 }, { "epoch": 0.2386448948948949, "grad_norm": 1.3044339927253685, "learning_rate": 7.951204253988113e-06, "loss": 0.55, "step": 2543 }, { "epoch": 0.23873873873873874, "grad_norm": 106.63653646508224, "learning_rate": 7.954332186424774e-06, "loss": 0.4461, "step": 2544 }, { "epoch": 0.23883258258258258, "grad_norm": 1.4131226852832062, "learning_rate": 7.957460118861433e-06, "loss": 0.5439, "step": 2545 }, { "epoch": 0.23892642642642642, "grad_norm": 1.680368671863304, "learning_rate": 7.960588051298093e-06, "loss": 0.5067, "step": 2546 }, { "epoch": 0.23902027027027026, "grad_norm": 1.497837539425782, "learning_rate": 7.963715983734752e-06, "loss": 0.519, "step": 2547 }, { "epoch": 0.23911411411411412, "grad_norm": 1.4091745354209375, "learning_rate": 7.966843916171413e-06, "loss": 0.5281, "step": 2548 }, { "epoch": 0.23920795795795796, "grad_norm": 1.73772264518803, "learning_rate": 7.969971848608072e-06, "loss": 0.5087, "step": 2549 }, { "epoch": 0.2393018018018018, "grad_norm": 1.9790148689180551, "learning_rate": 7.97309978104473e-06, "loss": 0.4929, "step": 2550 }, { "epoch": 0.23939564564564564, "grad_norm": 1.8786734627981876, "learning_rate": 7.97622771348139e-06, "loss": 0.5939, "step": 2551 }, { "epoch": 0.23948948948948948, "grad_norm": 1.99045218795746, "learning_rate": 7.97935564591805e-06, "loss": 0.5706, "step": 2552 }, { "epoch": 0.23958333333333334, "grad_norm": 2.263607075204972, "learning_rate": 7.982483578354708e-06, "loss": 0.4931, "step": 2553 }, { "epoch": 0.23967717717717718, "grad_norm": 2.6352265404829924, "learning_rate": 7.985611510791367e-06, "loss": 0.5023, "step": 2554 }, { "epoch": 0.23977102102102102, "grad_norm": 1.7316984588365827, "learning_rate": 7.988739443228027e-06, "loss": 0.5421, "step": 2555 }, { "epoch": 0.23986486486486486, "grad_norm": 1.6827528141934742, "learning_rate": 7.991867375664686e-06, "loss": 0.5302, "step": 2556 }, { "epoch": 0.2399587087087087, "grad_norm": 1.3628175123893753, "learning_rate": 7.994995308101345e-06, "loss": 0.523, "step": 2557 }, { "epoch": 0.24005255255255256, "grad_norm": 1.5534819168497243, "learning_rate": 7.998123240538004e-06, "loss": 0.5809, "step": 2558 }, { "epoch": 0.2401463963963964, "grad_norm": 1.4027915747378061, "learning_rate": 8.001251172974665e-06, "loss": 0.5251, "step": 2559 }, { "epoch": 0.24024024024024024, "grad_norm": 2.2325692844098235, "learning_rate": 8.004379105411324e-06, "loss": 0.5136, "step": 2560 }, { "epoch": 0.24033408408408408, "grad_norm": 1.8384478551291998, "learning_rate": 8.007507037847983e-06, "loss": 0.5808, "step": 2561 }, { "epoch": 0.24042792792792791, "grad_norm": 1.7204351196666823, "learning_rate": 8.010634970284642e-06, "loss": 0.5113, "step": 2562 }, { "epoch": 0.24052177177177178, "grad_norm": 2.07332001981382, "learning_rate": 8.013762902721302e-06, "loss": 0.523, "step": 2563 }, { "epoch": 0.24061561561561562, "grad_norm": 2.6366268283410794, "learning_rate": 8.01689083515796e-06, "loss": 0.5153, "step": 2564 }, { "epoch": 0.24070945945945946, "grad_norm": 1.6093592627057705, "learning_rate": 8.02001876759462e-06, "loss": 0.5373, "step": 2565 }, { "epoch": 0.2408033033033033, "grad_norm": 1.5671080397494348, "learning_rate": 8.02314670003128e-06, "loss": 0.5178, "step": 2566 }, { "epoch": 0.24089714714714713, "grad_norm": 1.4962292131221329, "learning_rate": 8.02627463246794e-06, "loss": 0.539, "step": 2567 }, { "epoch": 0.240990990990991, "grad_norm": 1.8778834618962295, "learning_rate": 8.029402564904599e-06, "loss": 0.5325, "step": 2568 }, { "epoch": 0.24108483483483484, "grad_norm": 1.726396858517535, "learning_rate": 8.032530497341258e-06, "loss": 0.4826, "step": 2569 }, { "epoch": 0.24117867867867868, "grad_norm": 1.497892622592552, "learning_rate": 8.035658429777917e-06, "loss": 0.4915, "step": 2570 }, { "epoch": 0.24127252252252251, "grad_norm": 4.8717731523754075, "learning_rate": 8.038786362214577e-06, "loss": 0.5425, "step": 2571 }, { "epoch": 0.24136636636636635, "grad_norm": 1.4434426169351278, "learning_rate": 8.041914294651236e-06, "loss": 0.5001, "step": 2572 }, { "epoch": 0.24146021021021022, "grad_norm": 2.5055012611556715, "learning_rate": 8.045042227087897e-06, "loss": 0.5349, "step": 2573 }, { "epoch": 0.24155405405405406, "grad_norm": 1.527806933708563, "learning_rate": 8.048170159524556e-06, "loss": 0.5146, "step": 2574 }, { "epoch": 0.2416478978978979, "grad_norm": 1.6947334551585636, "learning_rate": 8.051298091961213e-06, "loss": 0.5363, "step": 2575 }, { "epoch": 0.24174174174174173, "grad_norm": 2.115502695614235, "learning_rate": 8.054426024397874e-06, "loss": 0.5148, "step": 2576 }, { "epoch": 0.24183558558558557, "grad_norm": 1.9068028835914022, "learning_rate": 8.057553956834533e-06, "loss": 0.5383, "step": 2577 }, { "epoch": 0.24192942942942944, "grad_norm": 6.004287057872623, "learning_rate": 8.060681889271192e-06, "loss": 0.5655, "step": 2578 }, { "epoch": 0.24202327327327328, "grad_norm": 4.190014028765601, "learning_rate": 8.063809821707852e-06, "loss": 0.5372, "step": 2579 }, { "epoch": 0.24211711711711711, "grad_norm": 1.7192697032235005, "learning_rate": 8.06693775414451e-06, "loss": 0.528, "step": 2580 }, { "epoch": 0.24221096096096095, "grad_norm": 3.222479352463538, "learning_rate": 8.07006568658117e-06, "loss": 0.5169, "step": 2581 }, { "epoch": 0.24230480480480482, "grad_norm": 1.8528595523329776, "learning_rate": 8.073193619017829e-06, "loss": 0.5106, "step": 2582 }, { "epoch": 0.24239864864864866, "grad_norm": 1.6815563119150592, "learning_rate": 8.076321551454488e-06, "loss": 0.5537, "step": 2583 }, { "epoch": 0.2424924924924925, "grad_norm": 1.6106407782498715, "learning_rate": 8.079449483891149e-06, "loss": 0.5209, "step": 2584 }, { "epoch": 0.24258633633633633, "grad_norm": 2.143167385807634, "learning_rate": 8.082577416327808e-06, "loss": 0.5041, "step": 2585 }, { "epoch": 0.24268018018018017, "grad_norm": 1.402104313981157, "learning_rate": 8.085705348764467e-06, "loss": 0.4958, "step": 2586 }, { "epoch": 0.24277402402402404, "grad_norm": 1.4968989560093722, "learning_rate": 8.088833281201127e-06, "loss": 0.5234, "step": 2587 }, { "epoch": 0.24286786786786788, "grad_norm": 2.3737508011817967, "learning_rate": 8.091961213637786e-06, "loss": 0.5331, "step": 2588 }, { "epoch": 0.24296171171171171, "grad_norm": 1.428565825320651, "learning_rate": 8.095089146074445e-06, "loss": 0.5306, "step": 2589 }, { "epoch": 0.24305555555555555, "grad_norm": 1.2286188944241874, "learning_rate": 8.098217078511104e-06, "loss": 0.4886, "step": 2590 }, { "epoch": 0.2431493993993994, "grad_norm": 3.6031067140779554, "learning_rate": 8.101345010947765e-06, "loss": 0.509, "step": 2591 }, { "epoch": 0.24324324324324326, "grad_norm": 1.8682355918182643, "learning_rate": 8.104472943384424e-06, "loss": 0.5467, "step": 2592 }, { "epoch": 0.2433370870870871, "grad_norm": 1.3969808656068645, "learning_rate": 8.107600875821083e-06, "loss": 0.529, "step": 2593 }, { "epoch": 0.24343093093093093, "grad_norm": 2.515385062160382, "learning_rate": 8.110728808257742e-06, "loss": 0.5391, "step": 2594 }, { "epoch": 0.24352477477477477, "grad_norm": 1.917903142106855, "learning_rate": 8.113856740694402e-06, "loss": 0.5708, "step": 2595 }, { "epoch": 0.2436186186186186, "grad_norm": 2.0646105345184824, "learning_rate": 8.11698467313106e-06, "loss": 0.589, "step": 2596 }, { "epoch": 0.24371246246246248, "grad_norm": 1.6019148241276384, "learning_rate": 8.12011260556772e-06, "loss": 0.4849, "step": 2597 }, { "epoch": 0.24380630630630631, "grad_norm": 1.7974070445856902, "learning_rate": 8.12324053800438e-06, "loss": 0.4966, "step": 2598 }, { "epoch": 0.24390015015015015, "grad_norm": 1.7927976487954609, "learning_rate": 8.12636847044104e-06, "loss": 0.5267, "step": 2599 }, { "epoch": 0.243993993993994, "grad_norm": 1.6966218420028305, "learning_rate": 8.129496402877699e-06, "loss": 0.4912, "step": 2600 }, { "epoch": 0.24408783783783783, "grad_norm": 3.191623669888294, "learning_rate": 8.132624335314358e-06, "loss": 0.4934, "step": 2601 }, { "epoch": 0.2441816816816817, "grad_norm": 1.9880342905668307, "learning_rate": 8.135752267751017e-06, "loss": 0.5134, "step": 2602 }, { "epoch": 0.24427552552552553, "grad_norm": 7.493467471680232, "learning_rate": 8.138880200187677e-06, "loss": 0.5457, "step": 2603 }, { "epoch": 0.24436936936936937, "grad_norm": 1.5756897653542208, "learning_rate": 8.142008132624336e-06, "loss": 0.5899, "step": 2604 }, { "epoch": 0.2444632132132132, "grad_norm": 2.1229702545072144, "learning_rate": 8.145136065060995e-06, "loss": 0.4987, "step": 2605 }, { "epoch": 0.24455705705705705, "grad_norm": 1.4058237066839845, "learning_rate": 8.148263997497656e-06, "loss": 0.5848, "step": 2606 }, { "epoch": 0.24465090090090091, "grad_norm": 1.5176776990822032, "learning_rate": 8.151391929934313e-06, "loss": 0.4632, "step": 2607 }, { "epoch": 0.24474474474474475, "grad_norm": 1.6425968639444974, "learning_rate": 8.154519862370972e-06, "loss": 0.4904, "step": 2608 }, { "epoch": 0.2448385885885886, "grad_norm": 2.54512877094732, "learning_rate": 8.157647794807633e-06, "loss": 0.5467, "step": 2609 }, { "epoch": 0.24493243243243243, "grad_norm": 1.8392652087039256, "learning_rate": 8.160775727244292e-06, "loss": 0.4997, "step": 2610 }, { "epoch": 0.24502627627627627, "grad_norm": 1.5428175577705794, "learning_rate": 8.163903659680952e-06, "loss": 0.5381, "step": 2611 }, { "epoch": 0.24512012012012013, "grad_norm": 1.5885536975278851, "learning_rate": 8.16703159211761e-06, "loss": 0.5223, "step": 2612 }, { "epoch": 0.24521396396396397, "grad_norm": 1.3492790747995715, "learning_rate": 8.17015952455427e-06, "loss": 0.4908, "step": 2613 }, { "epoch": 0.2453078078078078, "grad_norm": 1.4881804983137392, "learning_rate": 8.173287456990929e-06, "loss": 0.5188, "step": 2614 }, { "epoch": 0.24540165165165165, "grad_norm": 1.7861389818994444, "learning_rate": 8.176415389427588e-06, "loss": 0.4962, "step": 2615 }, { "epoch": 0.24549549549549549, "grad_norm": 1.412283340705043, "learning_rate": 8.179543321864249e-06, "loss": 0.5194, "step": 2616 }, { "epoch": 0.24558933933933935, "grad_norm": 2.601970513516846, "learning_rate": 8.182671254300908e-06, "loss": 0.4924, "step": 2617 }, { "epoch": 0.2456831831831832, "grad_norm": 1.5123573104902621, "learning_rate": 8.185799186737567e-06, "loss": 0.5089, "step": 2618 }, { "epoch": 0.24577702702702703, "grad_norm": 1.760575269136024, "learning_rate": 8.188927119174226e-06, "loss": 0.4903, "step": 2619 }, { "epoch": 0.24587087087087087, "grad_norm": 1.8644655725731956, "learning_rate": 8.192055051610886e-06, "loss": 0.5363, "step": 2620 }, { "epoch": 0.2459647147147147, "grad_norm": 1.615176080651577, "learning_rate": 8.195182984047545e-06, "loss": 0.5143, "step": 2621 }, { "epoch": 0.24605855855855857, "grad_norm": 13.488649997142423, "learning_rate": 8.198310916484204e-06, "loss": 0.505, "step": 2622 }, { "epoch": 0.2461524024024024, "grad_norm": 1.5041325835595796, "learning_rate": 8.201438848920865e-06, "loss": 0.5112, "step": 2623 }, { "epoch": 0.24624624624624625, "grad_norm": 3.071410893973914, "learning_rate": 8.204566781357524e-06, "loss": 0.569, "step": 2624 }, { "epoch": 0.24634009009009009, "grad_norm": 3.646106295799527, "learning_rate": 8.207694713794183e-06, "loss": 0.5312, "step": 2625 }, { "epoch": 0.24643393393393392, "grad_norm": 1.7108063749910214, "learning_rate": 8.210822646230842e-06, "loss": 0.5134, "step": 2626 }, { "epoch": 0.2465277777777778, "grad_norm": 1.8830101242624924, "learning_rate": 8.213950578667501e-06, "loss": 0.5382, "step": 2627 }, { "epoch": 0.24662162162162163, "grad_norm": 1.389054845244744, "learning_rate": 8.21707851110416e-06, "loss": 0.4707, "step": 2628 }, { "epoch": 0.24671546546546547, "grad_norm": 1.6132206297298999, "learning_rate": 8.22020644354082e-06, "loss": 0.5677, "step": 2629 }, { "epoch": 0.2468093093093093, "grad_norm": 1.4585694691407989, "learning_rate": 8.223334375977479e-06, "loss": 0.5539, "step": 2630 }, { "epoch": 0.24690315315315314, "grad_norm": 2.048849832789228, "learning_rate": 8.22646230841414e-06, "loss": 0.5437, "step": 2631 }, { "epoch": 0.246996996996997, "grad_norm": 1.342051503299413, "learning_rate": 8.229590240850799e-06, "loss": 0.5333, "step": 2632 }, { "epoch": 0.24709084084084085, "grad_norm": 1.6951231720571194, "learning_rate": 8.232718173287456e-06, "loss": 0.4899, "step": 2633 }, { "epoch": 0.24718468468468469, "grad_norm": 7.182797239243159, "learning_rate": 8.235846105724117e-06, "loss": 0.4942, "step": 2634 }, { "epoch": 0.24727852852852852, "grad_norm": 1.439654167653694, "learning_rate": 8.238974038160776e-06, "loss": 0.5099, "step": 2635 }, { "epoch": 0.24737237237237236, "grad_norm": 1.529569406867368, "learning_rate": 8.242101970597436e-06, "loss": 0.5278, "step": 2636 }, { "epoch": 0.24746621621621623, "grad_norm": 1.404562900794159, "learning_rate": 8.245229903034095e-06, "loss": 0.5161, "step": 2637 }, { "epoch": 0.24756006006006007, "grad_norm": 1.8286254439816625, "learning_rate": 8.248357835470756e-06, "loss": 0.5314, "step": 2638 }, { "epoch": 0.2476539039039039, "grad_norm": 1.524683508775528, "learning_rate": 8.251485767907413e-06, "loss": 0.5457, "step": 2639 }, { "epoch": 0.24774774774774774, "grad_norm": 1.6133726271684863, "learning_rate": 8.254613700344072e-06, "loss": 0.4778, "step": 2640 }, { "epoch": 0.24784159159159158, "grad_norm": 1.6780564249486605, "learning_rate": 8.257741632780733e-06, "loss": 0.5432, "step": 2641 }, { "epoch": 0.24793543543543545, "grad_norm": 1.6111961572401854, "learning_rate": 8.260869565217392e-06, "loss": 0.51, "step": 2642 }, { "epoch": 0.24802927927927929, "grad_norm": 1.3463899929817023, "learning_rate": 8.263997497654051e-06, "loss": 0.5142, "step": 2643 }, { "epoch": 0.24812312312312312, "grad_norm": 1.5917035061903793, "learning_rate": 8.26712543009071e-06, "loss": 0.5538, "step": 2644 }, { "epoch": 0.24821696696696696, "grad_norm": 1.6712519757140374, "learning_rate": 8.27025336252737e-06, "loss": 0.5104, "step": 2645 }, { "epoch": 0.2483108108108108, "grad_norm": 1.616174748797472, "learning_rate": 8.273381294964029e-06, "loss": 0.5354, "step": 2646 }, { "epoch": 0.24840465465465467, "grad_norm": 1.9074622195271516, "learning_rate": 8.276509227400688e-06, "loss": 0.5112, "step": 2647 }, { "epoch": 0.2484984984984985, "grad_norm": 1.6244431500618077, "learning_rate": 8.279637159837349e-06, "loss": 0.5428, "step": 2648 }, { "epoch": 0.24859234234234234, "grad_norm": 1.4965605601244996, "learning_rate": 8.282765092274008e-06, "loss": 0.4855, "step": 2649 }, { "epoch": 0.24868618618618618, "grad_norm": 1.4946379020228842, "learning_rate": 8.285893024710667e-06, "loss": 0.4937, "step": 2650 }, { "epoch": 0.24878003003003002, "grad_norm": 5.4819275945029355, "learning_rate": 8.289020957147326e-06, "loss": 0.5556, "step": 2651 }, { "epoch": 0.24887387387387389, "grad_norm": 1.4998587127872105, "learning_rate": 8.292148889583986e-06, "loss": 0.5023, "step": 2652 }, { "epoch": 0.24896771771771772, "grad_norm": 1.6900027473640704, "learning_rate": 8.295276822020645e-06, "loss": 0.5278, "step": 2653 }, { "epoch": 0.24906156156156156, "grad_norm": 1.4351057445916309, "learning_rate": 8.298404754457304e-06, "loss": 0.5271, "step": 2654 }, { "epoch": 0.2491554054054054, "grad_norm": 1.570321530517403, "learning_rate": 8.301532686893963e-06, "loss": 0.4981, "step": 2655 }, { "epoch": 0.24924924924924924, "grad_norm": 1.3919256631809769, "learning_rate": 8.304660619330624e-06, "loss": 0.4803, "step": 2656 }, { "epoch": 0.2493430930930931, "grad_norm": 1.5459181846654337, "learning_rate": 8.307788551767283e-06, "loss": 0.5009, "step": 2657 }, { "epoch": 0.24943693693693694, "grad_norm": 1.4960871805593197, "learning_rate": 8.31091648420394e-06, "loss": 0.495, "step": 2658 }, { "epoch": 0.24953078078078078, "grad_norm": 4.209533654098735, "learning_rate": 8.314044416640601e-06, "loss": 0.541, "step": 2659 }, { "epoch": 0.24962462462462462, "grad_norm": 1.421334609881783, "learning_rate": 8.31717234907726e-06, "loss": 0.5461, "step": 2660 }, { "epoch": 0.24971846846846846, "grad_norm": 3.6904842344607123, "learning_rate": 8.32030028151392e-06, "loss": 0.4759, "step": 2661 }, { "epoch": 0.24981231231231232, "grad_norm": 1.6371174573428933, "learning_rate": 8.323428213950579e-06, "loss": 0.5163, "step": 2662 }, { "epoch": 0.24990615615615616, "grad_norm": 1.523429807949937, "learning_rate": 8.32655614638724e-06, "loss": 0.513, "step": 2663 }, { "epoch": 0.25, "grad_norm": 1.3961732655786303, "learning_rate": 8.329684078823899e-06, "loss": 0.4867, "step": 2664 }, { "epoch": 0.25009384384384387, "grad_norm": 1.3951856370227056, "learning_rate": 8.332812011260556e-06, "loss": 0.5394, "step": 2665 }, { "epoch": 0.2501876876876877, "grad_norm": 1.5069136634578697, "learning_rate": 8.335939943697217e-06, "loss": 0.5251, "step": 2666 }, { "epoch": 0.25028153153153154, "grad_norm": 2.3239474241089093, "learning_rate": 8.339067876133876e-06, "loss": 0.5057, "step": 2667 }, { "epoch": 0.25037537537537535, "grad_norm": 1.7170264486147857, "learning_rate": 8.342195808570536e-06, "loss": 0.5172, "step": 2668 }, { "epoch": 0.2504692192192192, "grad_norm": 1.4765760784889113, "learning_rate": 8.345323741007195e-06, "loss": 0.5167, "step": 2669 }, { "epoch": 0.2505630630630631, "grad_norm": 1.893310371741658, "learning_rate": 8.348451673443856e-06, "loss": 0.4865, "step": 2670 }, { "epoch": 0.2506569069069069, "grad_norm": 1.4202404283149113, "learning_rate": 8.351579605880513e-06, "loss": 0.51, "step": 2671 }, { "epoch": 0.25075075075075076, "grad_norm": 1.314954604792067, "learning_rate": 8.354707538317172e-06, "loss": 0.4883, "step": 2672 }, { "epoch": 0.25084459459459457, "grad_norm": 1.6017675475079465, "learning_rate": 8.357835470753833e-06, "loss": 0.5499, "step": 2673 }, { "epoch": 0.25093843843843844, "grad_norm": 2.34348112070633, "learning_rate": 8.360963403190492e-06, "loss": 0.4618, "step": 2674 }, { "epoch": 0.2510322822822823, "grad_norm": 2.0737879620367936, "learning_rate": 8.364091335627151e-06, "loss": 0.5182, "step": 2675 }, { "epoch": 0.2511261261261261, "grad_norm": 1.4083110693273786, "learning_rate": 8.36721926806381e-06, "loss": 0.5964, "step": 2676 }, { "epoch": 0.25121996996997, "grad_norm": 2.0907051346045606, "learning_rate": 8.37034720050047e-06, "loss": 0.5255, "step": 2677 }, { "epoch": 0.2513138138138138, "grad_norm": 1.4810508837612448, "learning_rate": 8.373475132937129e-06, "loss": 0.6135, "step": 2678 }, { "epoch": 0.25140765765765766, "grad_norm": 1.9331653401911817, "learning_rate": 8.376603065373788e-06, "loss": 0.5027, "step": 2679 }, { "epoch": 0.2515015015015015, "grad_norm": 1.5328674678572742, "learning_rate": 8.379730997810447e-06, "loss": 0.5008, "step": 2680 }, { "epoch": 0.25159534534534533, "grad_norm": 1.3854758827889522, "learning_rate": 8.382858930247108e-06, "loss": 0.5086, "step": 2681 }, { "epoch": 0.2516891891891892, "grad_norm": 1.4956947807420005, "learning_rate": 8.385986862683767e-06, "loss": 0.5102, "step": 2682 }, { "epoch": 0.251783033033033, "grad_norm": 1.5867918769320208, "learning_rate": 8.389114795120426e-06, "loss": 0.5451, "step": 2683 }, { "epoch": 0.2518768768768769, "grad_norm": 1.424375709450446, "learning_rate": 8.392242727557085e-06, "loss": 0.523, "step": 2684 }, { "epoch": 0.25197072072072074, "grad_norm": 1.5673581567700519, "learning_rate": 8.395370659993745e-06, "loss": 0.5059, "step": 2685 }, { "epoch": 0.25206456456456455, "grad_norm": 1.5354759824593873, "learning_rate": 8.398498592430404e-06, "loss": 0.5392, "step": 2686 }, { "epoch": 0.2521584084084084, "grad_norm": 1.6108511356368922, "learning_rate": 8.401626524867063e-06, "loss": 0.5331, "step": 2687 }, { "epoch": 0.25225225225225223, "grad_norm": 1.9310389035688809, "learning_rate": 8.404754457303724e-06, "loss": 0.4845, "step": 2688 }, { "epoch": 0.2523460960960961, "grad_norm": 1.8423625898871316, "learning_rate": 8.407882389740383e-06, "loss": 0.5318, "step": 2689 }, { "epoch": 0.25243993993993996, "grad_norm": 1.5145618093919861, "learning_rate": 8.41101032217704e-06, "loss": 0.5838, "step": 2690 }, { "epoch": 0.25253378378378377, "grad_norm": 1.5393011906385383, "learning_rate": 8.414138254613701e-06, "loss": 0.4796, "step": 2691 }, { "epoch": 0.25262762762762764, "grad_norm": 1.677343770303144, "learning_rate": 8.41726618705036e-06, "loss": 0.5573, "step": 2692 }, { "epoch": 0.25272147147147145, "grad_norm": 1.6480665593931063, "learning_rate": 8.42039411948702e-06, "loss": 0.5343, "step": 2693 }, { "epoch": 0.2528153153153153, "grad_norm": 1.2913299303846908, "learning_rate": 8.423522051923679e-06, "loss": 0.5366, "step": 2694 }, { "epoch": 0.2529091591591592, "grad_norm": 2.049226009844777, "learning_rate": 8.42664998436034e-06, "loss": 0.5673, "step": 2695 }, { "epoch": 0.253003003003003, "grad_norm": 1.4375353867511171, "learning_rate": 8.429777916796999e-06, "loss": 0.5181, "step": 2696 }, { "epoch": 0.25309684684684686, "grad_norm": 1.8347923989891886, "learning_rate": 8.432905849233656e-06, "loss": 0.4754, "step": 2697 }, { "epoch": 0.25319069069069067, "grad_norm": 1.5314240192367108, "learning_rate": 8.436033781670317e-06, "loss": 0.567, "step": 2698 }, { "epoch": 0.25328453453453453, "grad_norm": 1.3287039436886348, "learning_rate": 8.439161714106976e-06, "loss": 0.5794, "step": 2699 }, { "epoch": 0.2533783783783784, "grad_norm": 1.7723945767563798, "learning_rate": 8.442289646543635e-06, "loss": 0.5512, "step": 2700 }, { "epoch": 0.2534722222222222, "grad_norm": 1.3182644535483037, "learning_rate": 8.445417578980295e-06, "loss": 0.4657, "step": 2701 }, { "epoch": 0.2535660660660661, "grad_norm": 2.2697465344708863, "learning_rate": 8.448545511416954e-06, "loss": 0.5277, "step": 2702 }, { "epoch": 0.2536599099099099, "grad_norm": 1.29549012869726, "learning_rate": 8.451673443853613e-06, "loss": 0.4615, "step": 2703 }, { "epoch": 0.25375375375375375, "grad_norm": 1.6855287724069072, "learning_rate": 8.454801376290272e-06, "loss": 0.5203, "step": 2704 }, { "epoch": 0.2538475975975976, "grad_norm": 1.4513161220128863, "learning_rate": 8.457929308726931e-06, "loss": 0.5619, "step": 2705 }, { "epoch": 0.25394144144144143, "grad_norm": 1.4212684656013392, "learning_rate": 8.461057241163592e-06, "loss": 0.5278, "step": 2706 }, { "epoch": 0.2540352852852853, "grad_norm": 1.6868208113378989, "learning_rate": 8.464185173600251e-06, "loss": 0.5355, "step": 2707 }, { "epoch": 0.2541291291291291, "grad_norm": 1.9216111582163355, "learning_rate": 8.46731310603691e-06, "loss": 0.5343, "step": 2708 }, { "epoch": 0.25422297297297297, "grad_norm": 2.132680645436017, "learning_rate": 8.47044103847357e-06, "loss": 0.5173, "step": 2709 }, { "epoch": 0.25431681681681684, "grad_norm": 1.6703321648047018, "learning_rate": 8.473568970910229e-06, "loss": 0.5209, "step": 2710 }, { "epoch": 0.25441066066066065, "grad_norm": 1.4722736985752178, "learning_rate": 8.476696903346888e-06, "loss": 0.5614, "step": 2711 }, { "epoch": 0.2545045045045045, "grad_norm": 2.4908646175892897, "learning_rate": 8.479824835783547e-06, "loss": 0.5353, "step": 2712 }, { "epoch": 0.2545983483483483, "grad_norm": 2.6328554485240097, "learning_rate": 8.482952768220208e-06, "loss": 0.5377, "step": 2713 }, { "epoch": 0.2546921921921922, "grad_norm": 1.5806826834634, "learning_rate": 8.486080700656867e-06, "loss": 0.5381, "step": 2714 }, { "epoch": 0.25478603603603606, "grad_norm": 1.4075581082081152, "learning_rate": 8.489208633093526e-06, "loss": 0.5281, "step": 2715 }, { "epoch": 0.25487987987987987, "grad_norm": 1.790624879216402, "learning_rate": 8.492336565530185e-06, "loss": 0.5127, "step": 2716 }, { "epoch": 0.25497372372372373, "grad_norm": 13.963356749195807, "learning_rate": 8.495464497966845e-06, "loss": 0.551, "step": 2717 }, { "epoch": 0.25506756756756754, "grad_norm": 1.5651315077251413, "learning_rate": 8.498592430403504e-06, "loss": 0.5173, "step": 2718 }, { "epoch": 0.2551614114114114, "grad_norm": 1.9371791891577226, "learning_rate": 8.501720362840163e-06, "loss": 0.4951, "step": 2719 }, { "epoch": 0.2552552552552553, "grad_norm": 1.6144128430059157, "learning_rate": 8.504848295276824e-06, "loss": 0.515, "step": 2720 }, { "epoch": 0.2553490990990991, "grad_norm": 1.3773635654499117, "learning_rate": 8.507976227713483e-06, "loss": 0.5118, "step": 2721 }, { "epoch": 0.25544294294294295, "grad_norm": 2.0461123729429826, "learning_rate": 8.51110416015014e-06, "loss": 0.4757, "step": 2722 }, { "epoch": 0.25553678678678676, "grad_norm": 1.3822676502517082, "learning_rate": 8.514232092586801e-06, "loss": 0.4705, "step": 2723 }, { "epoch": 0.25563063063063063, "grad_norm": 1.498054380481966, "learning_rate": 8.51736002502346e-06, "loss": 0.5307, "step": 2724 }, { "epoch": 0.2557244744744745, "grad_norm": 1.870121261212527, "learning_rate": 8.52048795746012e-06, "loss": 0.5452, "step": 2725 }, { "epoch": 0.2558183183183183, "grad_norm": 1.3400506740740197, "learning_rate": 8.523615889896779e-06, "loss": 0.5277, "step": 2726 }, { "epoch": 0.25591216216216217, "grad_norm": 1.3825755031187814, "learning_rate": 8.526743822333438e-06, "loss": 0.5076, "step": 2727 }, { "epoch": 0.256006006006006, "grad_norm": 1.5420461201172118, "learning_rate": 8.529871754770099e-06, "loss": 0.5338, "step": 2728 }, { "epoch": 0.25609984984984985, "grad_norm": 1.2577516160945537, "learning_rate": 8.532999687206756e-06, "loss": 0.5123, "step": 2729 }, { "epoch": 0.2561936936936937, "grad_norm": 1.4431366916418167, "learning_rate": 8.536127619643415e-06, "loss": 0.5475, "step": 2730 }, { "epoch": 0.2562875375375375, "grad_norm": 2.148937113836785, "learning_rate": 8.539255552080076e-06, "loss": 0.5476, "step": 2731 }, { "epoch": 0.2563813813813814, "grad_norm": 1.8579370560761168, "learning_rate": 8.542383484516735e-06, "loss": 0.5065, "step": 2732 }, { "epoch": 0.2564752252252252, "grad_norm": 1.4907002986210485, "learning_rate": 8.545511416953395e-06, "loss": 0.5074, "step": 2733 }, { "epoch": 0.25656906906906907, "grad_norm": 1.3854794989917767, "learning_rate": 8.548639349390054e-06, "loss": 0.5326, "step": 2734 }, { "epoch": 0.25666291291291293, "grad_norm": 1.4896440169023502, "learning_rate": 8.551767281826713e-06, "loss": 0.5153, "step": 2735 }, { "epoch": 0.25675675675675674, "grad_norm": 3.945518659026028, "learning_rate": 8.554895214263372e-06, "loss": 0.4643, "step": 2736 }, { "epoch": 0.2568506006006006, "grad_norm": 1.5677767139091923, "learning_rate": 8.558023146700031e-06, "loss": 0.5097, "step": 2737 }, { "epoch": 0.2569444444444444, "grad_norm": 1.6629471853694937, "learning_rate": 8.561151079136692e-06, "loss": 0.5082, "step": 2738 }, { "epoch": 0.2570382882882883, "grad_norm": 1.4245627676261454, "learning_rate": 8.564279011573351e-06, "loss": 0.5292, "step": 2739 }, { "epoch": 0.25713213213213215, "grad_norm": 1.620962368353068, "learning_rate": 8.56740694401001e-06, "loss": 0.5303, "step": 2740 }, { "epoch": 0.25722597597597596, "grad_norm": 1.7240342628365932, "learning_rate": 8.57053487644667e-06, "loss": 0.4731, "step": 2741 }, { "epoch": 0.25731981981981983, "grad_norm": 1.6457553082396161, "learning_rate": 8.573662808883329e-06, "loss": 0.4991, "step": 2742 }, { "epoch": 0.25741366366366364, "grad_norm": 1.3408562644626236, "learning_rate": 8.576790741319988e-06, "loss": 0.4666, "step": 2743 }, { "epoch": 0.2575075075075075, "grad_norm": 1.2926856205985369, "learning_rate": 8.579918673756647e-06, "loss": 0.5189, "step": 2744 }, { "epoch": 0.25760135135135137, "grad_norm": 1.4892114544830513, "learning_rate": 8.583046606193308e-06, "loss": 0.4867, "step": 2745 }, { "epoch": 0.2576951951951952, "grad_norm": 2.5745323636827577, "learning_rate": 8.586174538629967e-06, "loss": 0.5098, "step": 2746 }, { "epoch": 0.25778903903903905, "grad_norm": 1.5834324378372318, "learning_rate": 8.589302471066626e-06, "loss": 0.5494, "step": 2747 }, { "epoch": 0.25788288288288286, "grad_norm": 1.3476648537366982, "learning_rate": 8.592430403503285e-06, "loss": 0.5088, "step": 2748 }, { "epoch": 0.2579767267267267, "grad_norm": 1.4376543125704002, "learning_rate": 8.595558335939944e-06, "loss": 0.5471, "step": 2749 }, { "epoch": 0.2580705705705706, "grad_norm": 1.3262078913716453, "learning_rate": 8.598686268376604e-06, "loss": 0.5156, "step": 2750 }, { "epoch": 0.2581644144144144, "grad_norm": 1.5465011800000141, "learning_rate": 8.601814200813263e-06, "loss": 0.4995, "step": 2751 }, { "epoch": 0.25825825825825827, "grad_norm": 1.5087018903635565, "learning_rate": 8.604942133249922e-06, "loss": 0.5664, "step": 2752 }, { "epoch": 0.2583521021021021, "grad_norm": 1.3883763698836382, "learning_rate": 8.608070065686583e-06, "loss": 0.4659, "step": 2753 }, { "epoch": 0.25844594594594594, "grad_norm": 1.5490149317947681, "learning_rate": 8.61119799812324e-06, "loss": 0.5328, "step": 2754 }, { "epoch": 0.2585397897897898, "grad_norm": 2.9513664936116064, "learning_rate": 8.6143259305599e-06, "loss": 0.5206, "step": 2755 }, { "epoch": 0.2586336336336336, "grad_norm": 1.7078640526669793, "learning_rate": 8.61745386299656e-06, "loss": 0.5387, "step": 2756 }, { "epoch": 0.2587274774774775, "grad_norm": 1.706935870385541, "learning_rate": 8.62058179543322e-06, "loss": 0.4879, "step": 2757 }, { "epoch": 0.2588213213213213, "grad_norm": 3.23902987391855, "learning_rate": 8.623709727869879e-06, "loss": 0.5284, "step": 2758 }, { "epoch": 0.25891516516516516, "grad_norm": 2.3966771572536065, "learning_rate": 8.626837660306538e-06, "loss": 0.5254, "step": 2759 }, { "epoch": 0.25900900900900903, "grad_norm": 1.4502207617467582, "learning_rate": 8.629965592743199e-06, "loss": 0.5354, "step": 2760 }, { "epoch": 0.25910285285285284, "grad_norm": 1.3776396687251948, "learning_rate": 8.633093525179856e-06, "loss": 0.5039, "step": 2761 }, { "epoch": 0.2591966966966967, "grad_norm": 1.7533748657009909, "learning_rate": 8.636221457616515e-06, "loss": 0.5252, "step": 2762 }, { "epoch": 0.2592905405405405, "grad_norm": 1.837426509937043, "learning_rate": 8.639349390053176e-06, "loss": 0.4823, "step": 2763 }, { "epoch": 0.2593843843843844, "grad_norm": 4.322190532330962, "learning_rate": 8.642477322489835e-06, "loss": 0.5466, "step": 2764 }, { "epoch": 0.25947822822822825, "grad_norm": 1.4622820909450176, "learning_rate": 8.645605254926494e-06, "loss": 0.5669, "step": 2765 }, { "epoch": 0.25957207207207206, "grad_norm": 1.8431496800636478, "learning_rate": 8.648733187363154e-06, "loss": 0.5098, "step": 2766 }, { "epoch": 0.2596659159159159, "grad_norm": 1.590529447375289, "learning_rate": 8.651861119799813e-06, "loss": 0.498, "step": 2767 }, { "epoch": 0.25975975975975973, "grad_norm": 1.6965103805713793, "learning_rate": 8.654989052236472e-06, "loss": 0.5336, "step": 2768 }, { "epoch": 0.2598536036036036, "grad_norm": 1.4935459458681468, "learning_rate": 8.658116984673131e-06, "loss": 0.5323, "step": 2769 }, { "epoch": 0.25994744744744747, "grad_norm": 1.29500396691502, "learning_rate": 8.661244917109792e-06, "loss": 0.5169, "step": 2770 }, { "epoch": 0.2600412912912913, "grad_norm": 1.4163773604113947, "learning_rate": 8.664372849546451e-06, "loss": 0.5368, "step": 2771 }, { "epoch": 0.26013513513513514, "grad_norm": 1.4177689726496747, "learning_rate": 8.66750078198311e-06, "loss": 0.4846, "step": 2772 }, { "epoch": 0.26022897897897895, "grad_norm": 1.340391626598316, "learning_rate": 8.67062871441977e-06, "loss": 0.4857, "step": 2773 }, { "epoch": 0.2603228228228228, "grad_norm": 1.5515126390906857, "learning_rate": 8.673756646856429e-06, "loss": 0.511, "step": 2774 }, { "epoch": 0.2604166666666667, "grad_norm": 1.4627463918513606, "learning_rate": 8.676884579293088e-06, "loss": 0.4858, "step": 2775 }, { "epoch": 0.2605105105105105, "grad_norm": 1.9926756738679599, "learning_rate": 8.680012511729747e-06, "loss": 0.4926, "step": 2776 }, { "epoch": 0.26060435435435436, "grad_norm": 2.06806162474234, "learning_rate": 8.683140444166406e-06, "loss": 0.6031, "step": 2777 }, { "epoch": 0.26069819819819817, "grad_norm": 1.6336211271945995, "learning_rate": 8.686268376603067e-06, "loss": 0.5564, "step": 2778 }, { "epoch": 0.26079204204204204, "grad_norm": 1.3510010106186447, "learning_rate": 8.689396309039726e-06, "loss": 0.5275, "step": 2779 }, { "epoch": 0.2608858858858859, "grad_norm": 1.5813965691386438, "learning_rate": 8.692524241476385e-06, "loss": 0.5607, "step": 2780 }, { "epoch": 0.2609797297297297, "grad_norm": 1.8136658792411642, "learning_rate": 8.695652173913044e-06, "loss": 0.515, "step": 2781 }, { "epoch": 0.2610735735735736, "grad_norm": 1.4118070036446646, "learning_rate": 8.698780106349704e-06, "loss": 0.5257, "step": 2782 }, { "epoch": 0.2611674174174174, "grad_norm": 1.8738600504342797, "learning_rate": 8.701908038786363e-06, "loss": 0.5246, "step": 2783 }, { "epoch": 0.26126126126126126, "grad_norm": 1.5806412053612928, "learning_rate": 8.705035971223022e-06, "loss": 0.5345, "step": 2784 }, { "epoch": 0.2613551051051051, "grad_norm": 2.402804755292347, "learning_rate": 8.708163903659683e-06, "loss": 0.4958, "step": 2785 }, { "epoch": 0.26144894894894893, "grad_norm": 1.558512106913808, "learning_rate": 8.71129183609634e-06, "loss": 0.5824, "step": 2786 }, { "epoch": 0.2615427927927928, "grad_norm": 1.3339056539710878, "learning_rate": 8.714419768533e-06, "loss": 0.5336, "step": 2787 }, { "epoch": 0.2616366366366366, "grad_norm": 2.0032155076412126, "learning_rate": 8.71754770096966e-06, "loss": 0.5431, "step": 2788 }, { "epoch": 0.2617304804804805, "grad_norm": 2.3922399360396915, "learning_rate": 8.72067563340632e-06, "loss": 0.4941, "step": 2789 }, { "epoch": 0.26182432432432434, "grad_norm": 3.624660839062145, "learning_rate": 8.723803565842979e-06, "loss": 0.4911, "step": 2790 }, { "epoch": 0.26191816816816815, "grad_norm": 1.7835435786487985, "learning_rate": 8.726931498279638e-06, "loss": 0.4941, "step": 2791 }, { "epoch": 0.262012012012012, "grad_norm": 1.4483931405918107, "learning_rate": 8.730059430716297e-06, "loss": 0.5105, "step": 2792 }, { "epoch": 0.26210585585585583, "grad_norm": 1.745503081489346, "learning_rate": 8.733187363152956e-06, "loss": 0.4867, "step": 2793 }, { "epoch": 0.2621996996996997, "grad_norm": 2.11319801020257, "learning_rate": 8.736315295589615e-06, "loss": 0.5134, "step": 2794 }, { "epoch": 0.26229354354354356, "grad_norm": 3.4765938528378797, "learning_rate": 8.739443228026276e-06, "loss": 0.5613, "step": 2795 }, { "epoch": 0.26238738738738737, "grad_norm": 2.856141339701863, "learning_rate": 8.742571160462935e-06, "loss": 0.4974, "step": 2796 }, { "epoch": 0.26248123123123124, "grad_norm": 1.3035300197050217, "learning_rate": 8.745699092899594e-06, "loss": 0.4813, "step": 2797 }, { "epoch": 0.26257507507507505, "grad_norm": 1.7082258159887111, "learning_rate": 8.748827025336254e-06, "loss": 0.5193, "step": 2798 }, { "epoch": 0.2626689189189189, "grad_norm": 1.8802361073182856, "learning_rate": 8.751954957772913e-06, "loss": 0.5592, "step": 2799 }, { "epoch": 0.2627627627627628, "grad_norm": 2.0183103830524862, "learning_rate": 8.755082890209572e-06, "loss": 0.5659, "step": 2800 }, { "epoch": 0.2628566066066066, "grad_norm": 1.515320133575404, "learning_rate": 8.758210822646231e-06, "loss": 0.5097, "step": 2801 }, { "epoch": 0.26295045045045046, "grad_norm": 1.5400136658075427, "learning_rate": 8.76133875508289e-06, "loss": 0.5025, "step": 2802 }, { "epoch": 0.26304429429429427, "grad_norm": 1.36759946971494, "learning_rate": 8.764466687519551e-06, "loss": 0.5075, "step": 2803 }, { "epoch": 0.26313813813813813, "grad_norm": 1.2973693857321673, "learning_rate": 8.76759461995621e-06, "loss": 0.4478, "step": 2804 }, { "epoch": 0.263231981981982, "grad_norm": 1.3533637683574946, "learning_rate": 8.77072255239287e-06, "loss": 0.4805, "step": 2805 }, { "epoch": 0.2633258258258258, "grad_norm": 2.9165994580503654, "learning_rate": 8.773850484829529e-06, "loss": 0.5453, "step": 2806 }, { "epoch": 0.2634196696696697, "grad_norm": 1.3968108112702422, "learning_rate": 8.776978417266188e-06, "loss": 0.5216, "step": 2807 }, { "epoch": 0.2635135135135135, "grad_norm": 1.5418493602201604, "learning_rate": 8.780106349702847e-06, "loss": 0.5633, "step": 2808 }, { "epoch": 0.26360735735735735, "grad_norm": 1.731074305792783, "learning_rate": 8.783234282139506e-06, "loss": 0.5308, "step": 2809 }, { "epoch": 0.2637012012012012, "grad_norm": 1.3848007061607646, "learning_rate": 8.786362214576167e-06, "loss": 0.5059, "step": 2810 }, { "epoch": 0.26379504504504503, "grad_norm": 1.5480159891812755, "learning_rate": 8.789490147012826e-06, "loss": 0.5377, "step": 2811 }, { "epoch": 0.2638888888888889, "grad_norm": 1.3393920690786405, "learning_rate": 8.792618079449483e-06, "loss": 0.5566, "step": 2812 }, { "epoch": 0.2639827327327327, "grad_norm": 2.738253735340526, "learning_rate": 8.795746011886144e-06, "loss": 0.5396, "step": 2813 }, { "epoch": 0.26407657657657657, "grad_norm": 1.4120652992085057, "learning_rate": 8.798873944322803e-06, "loss": 0.4989, "step": 2814 }, { "epoch": 0.26417042042042044, "grad_norm": 1.6080605429177108, "learning_rate": 8.802001876759463e-06, "loss": 0.4882, "step": 2815 }, { "epoch": 0.26426426426426425, "grad_norm": 1.3738030887678288, "learning_rate": 8.805129809196122e-06, "loss": 0.4703, "step": 2816 }, { "epoch": 0.2643581081081081, "grad_norm": 1.3883961365330868, "learning_rate": 8.808257741632783e-06, "loss": 0.48, "step": 2817 }, { "epoch": 0.2644519519519519, "grad_norm": 1.3683899560315385, "learning_rate": 8.81138567406944e-06, "loss": 0.4986, "step": 2818 }, { "epoch": 0.2645457957957958, "grad_norm": 1.549654101315582, "learning_rate": 8.8145136065061e-06, "loss": 0.5097, "step": 2819 }, { "epoch": 0.26463963963963966, "grad_norm": 1.8340199700839206, "learning_rate": 8.81764153894276e-06, "loss": 0.5018, "step": 2820 }, { "epoch": 0.26473348348348347, "grad_norm": 8.274832282516206, "learning_rate": 8.82076947137942e-06, "loss": 0.5402, "step": 2821 }, { "epoch": 0.26482732732732733, "grad_norm": 1.7833758824738195, "learning_rate": 8.823897403816078e-06, "loss": 0.6002, "step": 2822 }, { "epoch": 0.26492117117117114, "grad_norm": 2.38955675596969, "learning_rate": 8.827025336252738e-06, "loss": 0.4718, "step": 2823 }, { "epoch": 0.265015015015015, "grad_norm": 1.6469347266147014, "learning_rate": 8.830153268689397e-06, "loss": 0.5562, "step": 2824 }, { "epoch": 0.2651088588588589, "grad_norm": 1.3785834742724892, "learning_rate": 8.833281201126056e-06, "loss": 0.5425, "step": 2825 }, { "epoch": 0.2652027027027027, "grad_norm": 5.974119147632294, "learning_rate": 8.836409133562715e-06, "loss": 0.4871, "step": 2826 }, { "epoch": 0.26529654654654655, "grad_norm": 1.946125113966653, "learning_rate": 8.839537065999374e-06, "loss": 0.4953, "step": 2827 }, { "epoch": 0.26539039039039036, "grad_norm": 1.7381780145808539, "learning_rate": 8.842664998436035e-06, "loss": 0.5269, "step": 2828 }, { "epoch": 0.26548423423423423, "grad_norm": 1.4314095673680265, "learning_rate": 8.845792930872694e-06, "loss": 0.5271, "step": 2829 }, { "epoch": 0.2655780780780781, "grad_norm": 1.7856879726380872, "learning_rate": 8.848920863309353e-06, "loss": 0.5302, "step": 2830 }, { "epoch": 0.2656719219219219, "grad_norm": 1.316089245215726, "learning_rate": 8.852048795746013e-06, "loss": 0.5594, "step": 2831 }, { "epoch": 0.26576576576576577, "grad_norm": 1.643487805995296, "learning_rate": 8.855176728182672e-06, "loss": 0.5157, "step": 2832 }, { "epoch": 0.26585960960960964, "grad_norm": 2.024511782896662, "learning_rate": 8.858304660619331e-06, "loss": 0.487, "step": 2833 }, { "epoch": 0.26595345345345345, "grad_norm": 1.4400489262998653, "learning_rate": 8.86143259305599e-06, "loss": 0.4869, "step": 2834 }, { "epoch": 0.2660472972972973, "grad_norm": 1.453057737879772, "learning_rate": 8.864560525492651e-06, "loss": 0.4993, "step": 2835 }, { "epoch": 0.2661411411411411, "grad_norm": 2.0567585657579723, "learning_rate": 8.86768845792931e-06, "loss": 0.5511, "step": 2836 }, { "epoch": 0.266234984984985, "grad_norm": 1.7806300438491853, "learning_rate": 8.870816390365968e-06, "loss": 0.496, "step": 2837 }, { "epoch": 0.26632882882882886, "grad_norm": 1.7595439326988045, "learning_rate": 8.873944322802628e-06, "loss": 0.5212, "step": 2838 }, { "epoch": 0.26642267267267267, "grad_norm": 1.4065387769223747, "learning_rate": 8.877072255239288e-06, "loss": 0.5709, "step": 2839 }, { "epoch": 0.26651651651651653, "grad_norm": 1.2817412538982353, "learning_rate": 8.880200187675947e-06, "loss": 0.5672, "step": 2840 }, { "epoch": 0.26661036036036034, "grad_norm": 1.6419102529627105, "learning_rate": 8.883328120112606e-06, "loss": 0.4825, "step": 2841 }, { "epoch": 0.2667042042042042, "grad_norm": 2.091532246113929, "learning_rate": 8.886456052549267e-06, "loss": 0.5225, "step": 2842 }, { "epoch": 0.2667980480480481, "grad_norm": 1.3745627447460795, "learning_rate": 8.889583984985926e-06, "loss": 0.4674, "step": 2843 }, { "epoch": 0.2668918918918919, "grad_norm": 1.425028913937949, "learning_rate": 8.892711917422583e-06, "loss": 0.5131, "step": 2844 }, { "epoch": 0.26698573573573575, "grad_norm": 1.8018764935475216, "learning_rate": 8.895839849859244e-06, "loss": 0.4996, "step": 2845 }, { "epoch": 0.26707957957957956, "grad_norm": 1.5021418650332738, "learning_rate": 8.898967782295903e-06, "loss": 0.5353, "step": 2846 }, { "epoch": 0.26717342342342343, "grad_norm": 1.4624097386457056, "learning_rate": 8.902095714732563e-06, "loss": 0.5393, "step": 2847 }, { "epoch": 0.2672672672672673, "grad_norm": 1.5236364761969943, "learning_rate": 8.905223647169222e-06, "loss": 0.5758, "step": 2848 }, { "epoch": 0.2673611111111111, "grad_norm": 1.2540986532731828, "learning_rate": 8.908351579605881e-06, "loss": 0.4602, "step": 2849 }, { "epoch": 0.26745495495495497, "grad_norm": 1.4693597859977316, "learning_rate": 8.91147951204254e-06, "loss": 0.512, "step": 2850 }, { "epoch": 0.2675487987987988, "grad_norm": 1.8452520693417813, "learning_rate": 8.9146074444792e-06, "loss": 0.535, "step": 2851 }, { "epoch": 0.26764264264264265, "grad_norm": 1.3773100580145559, "learning_rate": 8.91773537691586e-06, "loss": 0.5385, "step": 2852 }, { "epoch": 0.2677364864864865, "grad_norm": 1.625923029333424, "learning_rate": 8.92086330935252e-06, "loss": 0.4907, "step": 2853 }, { "epoch": 0.2678303303303303, "grad_norm": 5.058740177825704, "learning_rate": 8.923991241789178e-06, "loss": 0.4591, "step": 2854 }, { "epoch": 0.2679241741741742, "grad_norm": 1.366406156935109, "learning_rate": 8.927119174225838e-06, "loss": 0.5232, "step": 2855 }, { "epoch": 0.268018018018018, "grad_norm": 1.4772471618383967, "learning_rate": 8.930247106662497e-06, "loss": 0.5411, "step": 2856 }, { "epoch": 0.26811186186186187, "grad_norm": 1.3689276258917396, "learning_rate": 8.933375039099156e-06, "loss": 0.5158, "step": 2857 }, { "epoch": 0.26820570570570573, "grad_norm": 1.752138410244526, "learning_rate": 8.936502971535815e-06, "loss": 0.5364, "step": 2858 }, { "epoch": 0.26829954954954954, "grad_norm": 1.6301646929111624, "learning_rate": 8.939630903972474e-06, "loss": 0.5303, "step": 2859 }, { "epoch": 0.2683933933933934, "grad_norm": 1.9541706482790218, "learning_rate": 8.942758836409135e-06, "loss": 0.5231, "step": 2860 }, { "epoch": 0.2684872372372372, "grad_norm": 1.9745833287625152, "learning_rate": 8.945886768845794e-06, "loss": 0.5025, "step": 2861 }, { "epoch": 0.2685810810810811, "grad_norm": 1.7278878133012472, "learning_rate": 8.949014701282453e-06, "loss": 0.532, "step": 2862 }, { "epoch": 0.26867492492492495, "grad_norm": 1.9502958550949459, "learning_rate": 8.952142633719113e-06, "loss": 0.4763, "step": 2863 }, { "epoch": 0.26876876876876876, "grad_norm": 1.8401459028577845, "learning_rate": 8.955270566155772e-06, "loss": 0.505, "step": 2864 }, { "epoch": 0.26886261261261263, "grad_norm": 4.5775336521407, "learning_rate": 8.958398498592431e-06, "loss": 0.5028, "step": 2865 }, { "epoch": 0.26895645645645644, "grad_norm": 1.5297744426968856, "learning_rate": 8.96152643102909e-06, "loss": 0.528, "step": 2866 }, { "epoch": 0.2690503003003003, "grad_norm": 4.9039475397005186, "learning_rate": 8.964654363465751e-06, "loss": 0.5397, "step": 2867 }, { "epoch": 0.26914414414414417, "grad_norm": 1.3323981857597955, "learning_rate": 8.96778229590241e-06, "loss": 0.4978, "step": 2868 }, { "epoch": 0.269237987987988, "grad_norm": 4.966334534628378, "learning_rate": 8.970910228339067e-06, "loss": 0.5372, "step": 2869 }, { "epoch": 0.26933183183183185, "grad_norm": 2.0551892677639003, "learning_rate": 8.974038160775728e-06, "loss": 0.5074, "step": 2870 }, { "epoch": 0.26942567567567566, "grad_norm": 1.6253462156251568, "learning_rate": 8.977166093212388e-06, "loss": 0.5708, "step": 2871 }, { "epoch": 0.2695195195195195, "grad_norm": 1.4537868791968016, "learning_rate": 8.980294025649047e-06, "loss": 0.5482, "step": 2872 }, { "epoch": 0.2696133633633634, "grad_norm": 1.2905790431432809, "learning_rate": 8.983421958085706e-06, "loss": 0.4994, "step": 2873 }, { "epoch": 0.2697072072072072, "grad_norm": 1.422439019954582, "learning_rate": 8.986549890522365e-06, "loss": 0.5374, "step": 2874 }, { "epoch": 0.26980105105105107, "grad_norm": 1.616373294201033, "learning_rate": 8.989677822959026e-06, "loss": 0.5186, "step": 2875 }, { "epoch": 0.2698948948948949, "grad_norm": 1.4185169497416805, "learning_rate": 8.992805755395683e-06, "loss": 0.5146, "step": 2876 }, { "epoch": 0.26998873873873874, "grad_norm": 1.686732811568012, "learning_rate": 8.995933687832344e-06, "loss": 0.4912, "step": 2877 }, { "epoch": 0.2700825825825826, "grad_norm": 1.6322975609461354, "learning_rate": 8.999061620269003e-06, "loss": 0.5221, "step": 2878 }, { "epoch": 0.2701764264264264, "grad_norm": 1.392959306387997, "learning_rate": 9.002189552705662e-06, "loss": 0.5044, "step": 2879 }, { "epoch": 0.2702702702702703, "grad_norm": 1.9898134364269602, "learning_rate": 9.005317485142322e-06, "loss": 0.5334, "step": 2880 }, { "epoch": 0.2703641141141141, "grad_norm": 1.7002187948487943, "learning_rate": 9.00844541757898e-06, "loss": 0.5063, "step": 2881 }, { "epoch": 0.27045795795795796, "grad_norm": 1.5814728860667804, "learning_rate": 9.01157335001564e-06, "loss": 0.5389, "step": 2882 }, { "epoch": 0.27055180180180183, "grad_norm": 2.721048339736594, "learning_rate": 9.014701282452299e-06, "loss": 0.559, "step": 2883 }, { "epoch": 0.27064564564564564, "grad_norm": 2.2796178413259662, "learning_rate": 9.017829214888958e-06, "loss": 0.5449, "step": 2884 }, { "epoch": 0.2707394894894895, "grad_norm": 1.6291842845625288, "learning_rate": 9.020957147325619e-06, "loss": 0.5527, "step": 2885 }, { "epoch": 0.2708333333333333, "grad_norm": 1.7802739740318898, "learning_rate": 9.024085079762278e-06, "loss": 0.5069, "step": 2886 }, { "epoch": 0.2709271771771772, "grad_norm": 1.6925932450871572, "learning_rate": 9.027213012198937e-06, "loss": 0.5414, "step": 2887 }, { "epoch": 0.27102102102102105, "grad_norm": 1.7239258616023978, "learning_rate": 9.030340944635597e-06, "loss": 0.5581, "step": 2888 }, { "epoch": 0.27111486486486486, "grad_norm": 2.898558438813039, "learning_rate": 9.033468877072256e-06, "loss": 0.4978, "step": 2889 }, { "epoch": 0.2712087087087087, "grad_norm": 1.627771640407675, "learning_rate": 9.036596809508915e-06, "loss": 0.4758, "step": 2890 }, { "epoch": 0.27130255255255253, "grad_norm": 2.5123069540062994, "learning_rate": 9.039724741945574e-06, "loss": 0.4806, "step": 2891 }, { "epoch": 0.2713963963963964, "grad_norm": 3.032589114944661, "learning_rate": 9.042852674382235e-06, "loss": 0.5218, "step": 2892 }, { "epoch": 0.27149024024024027, "grad_norm": 1.6292712609457924, "learning_rate": 9.045980606818894e-06, "loss": 0.5251, "step": 2893 }, { "epoch": 0.2715840840840841, "grad_norm": 1.9583886112412818, "learning_rate": 9.049108539255553e-06, "loss": 0.5477, "step": 2894 }, { "epoch": 0.27167792792792794, "grad_norm": 1.706136874210274, "learning_rate": 9.052236471692212e-06, "loss": 0.5214, "step": 2895 }, { "epoch": 0.27177177177177175, "grad_norm": 4.766257214500767, "learning_rate": 9.055364404128872e-06, "loss": 0.5081, "step": 2896 }, { "epoch": 0.2718656156156156, "grad_norm": 1.5932195222919152, "learning_rate": 9.05849233656553e-06, "loss": 0.5356, "step": 2897 }, { "epoch": 0.2719594594594595, "grad_norm": 1.3578606292990385, "learning_rate": 9.06162026900219e-06, "loss": 0.5224, "step": 2898 }, { "epoch": 0.2720533033033033, "grad_norm": 1.3434739103497482, "learning_rate": 9.064748201438849e-06, "loss": 0.5175, "step": 2899 }, { "epoch": 0.27214714714714716, "grad_norm": 1.658951928160002, "learning_rate": 9.06787613387551e-06, "loss": 0.5183, "step": 2900 }, { "epoch": 0.27224099099099097, "grad_norm": 1.705848066665559, "learning_rate": 9.071004066312167e-06, "loss": 0.5308, "step": 2901 }, { "epoch": 0.27233483483483484, "grad_norm": 1.4511580898006908, "learning_rate": 9.074131998748828e-06, "loss": 0.4953, "step": 2902 }, { "epoch": 0.2724286786786787, "grad_norm": 1.4867111709884346, "learning_rate": 9.077259931185487e-06, "loss": 0.4454, "step": 2903 }, { "epoch": 0.2725225225225225, "grad_norm": 1.2347319480832657, "learning_rate": 9.080387863622147e-06, "loss": 0.455, "step": 2904 }, { "epoch": 0.2726163663663664, "grad_norm": 1.6148675059679334, "learning_rate": 9.083515796058806e-06, "loss": 0.5282, "step": 2905 }, { "epoch": 0.2727102102102102, "grad_norm": 1.5507048804510597, "learning_rate": 9.086643728495465e-06, "loss": 0.5049, "step": 2906 }, { "epoch": 0.27280405405405406, "grad_norm": 1.3239082024699442, "learning_rate": 9.089771660932126e-06, "loss": 0.4638, "step": 2907 }, { "epoch": 0.2728978978978979, "grad_norm": 1.4533618701459614, "learning_rate": 9.092899593368783e-06, "loss": 0.5167, "step": 2908 }, { "epoch": 0.27299174174174173, "grad_norm": 2.8918650484857102, "learning_rate": 9.096027525805442e-06, "loss": 0.4799, "step": 2909 }, { "epoch": 0.2730855855855856, "grad_norm": 1.788167179248099, "learning_rate": 9.099155458242103e-06, "loss": 0.4773, "step": 2910 }, { "epoch": 0.2731794294294294, "grad_norm": 1.4331037936206001, "learning_rate": 9.102283390678762e-06, "loss": 0.5206, "step": 2911 }, { "epoch": 0.2732732732732733, "grad_norm": 1.4342909495602885, "learning_rate": 9.105411323115422e-06, "loss": 0.5046, "step": 2912 }, { "epoch": 0.27336711711711714, "grad_norm": 1.8643555785186001, "learning_rate": 9.10853925555208e-06, "loss": 0.5413, "step": 2913 }, { "epoch": 0.27346096096096095, "grad_norm": 1.4372679389671426, "learning_rate": 9.11166718798874e-06, "loss": 0.5389, "step": 2914 }, { "epoch": 0.2735548048048048, "grad_norm": 1.782089522120114, "learning_rate": 9.114795120425399e-06, "loss": 0.5517, "step": 2915 }, { "epoch": 0.27364864864864863, "grad_norm": 1.3650197291364774, "learning_rate": 9.117923052862058e-06, "loss": 0.4537, "step": 2916 }, { "epoch": 0.2737424924924925, "grad_norm": 1.6103379316712527, "learning_rate": 9.121050985298719e-06, "loss": 0.5013, "step": 2917 }, { "epoch": 0.27383633633633636, "grad_norm": 1.8075739601843546, "learning_rate": 9.124178917735378e-06, "loss": 0.5141, "step": 2918 }, { "epoch": 0.27393018018018017, "grad_norm": 1.5317262998481809, "learning_rate": 9.127306850172037e-06, "loss": 0.5212, "step": 2919 }, { "epoch": 0.27402402402402404, "grad_norm": 1.5744594354311712, "learning_rate": 9.130434782608697e-06, "loss": 0.5333, "step": 2920 }, { "epoch": 0.27411786786786785, "grad_norm": 1.6983462654487107, "learning_rate": 9.133562715045356e-06, "loss": 0.5306, "step": 2921 }, { "epoch": 0.2742117117117117, "grad_norm": 1.686395297765915, "learning_rate": 9.136690647482015e-06, "loss": 0.5392, "step": 2922 }, { "epoch": 0.2743055555555556, "grad_norm": 1.5287942065857996, "learning_rate": 9.139818579918674e-06, "loss": 0.5539, "step": 2923 }, { "epoch": 0.2743993993993994, "grad_norm": 1.8839703857452543, "learning_rate": 9.142946512355335e-06, "loss": 0.513, "step": 2924 }, { "epoch": 0.27449324324324326, "grad_norm": 1.380870499913614, "learning_rate": 9.146074444791994e-06, "loss": 0.4786, "step": 2925 }, { "epoch": 0.27458708708708707, "grad_norm": 5.461893471370356, "learning_rate": 9.149202377228653e-06, "loss": 0.5181, "step": 2926 }, { "epoch": 0.27468093093093093, "grad_norm": 2.0614413568752035, "learning_rate": 9.152330309665312e-06, "loss": 0.5872, "step": 2927 }, { "epoch": 0.2747747747747748, "grad_norm": 2.350866737715886, "learning_rate": 9.155458242101972e-06, "loss": 0.5199, "step": 2928 }, { "epoch": 0.2748686186186186, "grad_norm": 2.2652115841893603, "learning_rate": 9.15858617453863e-06, "loss": 0.5685, "step": 2929 }, { "epoch": 0.2749624624624625, "grad_norm": 3.331479993206833, "learning_rate": 9.16171410697529e-06, "loss": 0.536, "step": 2930 }, { "epoch": 0.2750563063063063, "grad_norm": 1.2077158762988027, "learning_rate": 9.164842039411949e-06, "loss": 0.4842, "step": 2931 }, { "epoch": 0.27515015015015015, "grad_norm": 3.7780978502886766, "learning_rate": 9.16796997184861e-06, "loss": 0.498, "step": 2932 }, { "epoch": 0.275243993993994, "grad_norm": 1.7215955672434005, "learning_rate": 9.171097904285267e-06, "loss": 0.4468, "step": 2933 }, { "epoch": 0.27533783783783783, "grad_norm": 1.3508172035534154, "learning_rate": 9.174225836721926e-06, "loss": 0.4688, "step": 2934 }, { "epoch": 0.2754316816816817, "grad_norm": 1.4470013666584443, "learning_rate": 9.177353769158587e-06, "loss": 0.5431, "step": 2935 }, { "epoch": 0.2755255255255255, "grad_norm": 1.3763290340492234, "learning_rate": 9.180481701595247e-06, "loss": 0.5481, "step": 2936 }, { "epoch": 0.27561936936936937, "grad_norm": 1.2810943509670252, "learning_rate": 9.183609634031906e-06, "loss": 0.4893, "step": 2937 }, { "epoch": 0.27571321321321324, "grad_norm": 1.3238309976245215, "learning_rate": 9.186737566468565e-06, "loss": 0.5047, "step": 2938 }, { "epoch": 0.27580705705705705, "grad_norm": 1.5071585509542709, "learning_rate": 9.189865498905226e-06, "loss": 0.5242, "step": 2939 }, { "epoch": 0.2759009009009009, "grad_norm": 2.8000594236759357, "learning_rate": 9.192993431341883e-06, "loss": 0.5245, "step": 2940 }, { "epoch": 0.2759947447447447, "grad_norm": 1.1856415391691448, "learning_rate": 9.196121363778542e-06, "loss": 0.4949, "step": 2941 }, { "epoch": 0.2760885885885886, "grad_norm": 1.4462596568429011, "learning_rate": 9.199249296215203e-06, "loss": 0.4449, "step": 2942 }, { "epoch": 0.27618243243243246, "grad_norm": 1.6198795360559637, "learning_rate": 9.202377228651862e-06, "loss": 0.4643, "step": 2943 }, { "epoch": 0.27627627627627627, "grad_norm": 1.6584133092857085, "learning_rate": 9.205505161088521e-06, "loss": 0.5358, "step": 2944 }, { "epoch": 0.27637012012012013, "grad_norm": 2.30202641661892, "learning_rate": 9.20863309352518e-06, "loss": 0.5327, "step": 2945 }, { "epoch": 0.27646396396396394, "grad_norm": 1.408618222157484, "learning_rate": 9.21176102596184e-06, "loss": 0.4768, "step": 2946 }, { "epoch": 0.2765578078078078, "grad_norm": 1.6107794912832216, "learning_rate": 9.214888958398499e-06, "loss": 0.508, "step": 2947 }, { "epoch": 0.2766516516516517, "grad_norm": 1.479911321106824, "learning_rate": 9.218016890835158e-06, "loss": 0.5548, "step": 2948 }, { "epoch": 0.2767454954954955, "grad_norm": 1.6540245458264475, "learning_rate": 9.221144823271819e-06, "loss": 0.5455, "step": 2949 }, { "epoch": 0.27683933933933935, "grad_norm": 1.525297035277019, "learning_rate": 9.224272755708478e-06, "loss": 0.5047, "step": 2950 }, { "epoch": 0.27693318318318316, "grad_norm": 1.6845573260739, "learning_rate": 9.227400688145137e-06, "loss": 0.4984, "step": 2951 }, { "epoch": 0.27702702702702703, "grad_norm": 1.7672791766702518, "learning_rate": 9.230528620581796e-06, "loss": 0.5208, "step": 2952 }, { "epoch": 0.2771208708708709, "grad_norm": 1.4054517585050303, "learning_rate": 9.233656553018456e-06, "loss": 0.4844, "step": 2953 }, { "epoch": 0.2772147147147147, "grad_norm": 1.5899223321671037, "learning_rate": 9.236784485455115e-06, "loss": 0.4699, "step": 2954 }, { "epoch": 0.27730855855855857, "grad_norm": 1.697988518806814, "learning_rate": 9.239912417891774e-06, "loss": 0.5822, "step": 2955 }, { "epoch": 0.2774024024024024, "grad_norm": 1.4486023855057173, "learning_rate": 9.243040350328433e-06, "loss": 0.511, "step": 2956 }, { "epoch": 0.27749624624624625, "grad_norm": 2.1159248010570355, "learning_rate": 9.246168282765094e-06, "loss": 0.5311, "step": 2957 }, { "epoch": 0.2775900900900901, "grad_norm": 1.5018945505235448, "learning_rate": 9.249296215201753e-06, "loss": 0.5181, "step": 2958 }, { "epoch": 0.2776839339339339, "grad_norm": 2.380020529110034, "learning_rate": 9.25242414763841e-06, "loss": 0.565, "step": 2959 }, { "epoch": 0.2777777777777778, "grad_norm": 2.1536781245612064, "learning_rate": 9.255552080075071e-06, "loss": 0.5747, "step": 2960 }, { "epoch": 0.2778716216216216, "grad_norm": 1.514861864438676, "learning_rate": 9.25868001251173e-06, "loss": 0.4627, "step": 2961 }, { "epoch": 0.27796546546546547, "grad_norm": 1.285775973182927, "learning_rate": 9.26180794494839e-06, "loss": 0.5658, "step": 2962 }, { "epoch": 0.27805930930930933, "grad_norm": 2.0109948090956156, "learning_rate": 9.264935877385049e-06, "loss": 0.5105, "step": 2963 }, { "epoch": 0.27815315315315314, "grad_norm": 1.9232952888352335, "learning_rate": 9.26806380982171e-06, "loss": 0.5138, "step": 2964 }, { "epoch": 0.278246996996997, "grad_norm": 1.5182917596735273, "learning_rate": 9.271191742258367e-06, "loss": 0.5185, "step": 2965 }, { "epoch": 0.2783408408408408, "grad_norm": 2.0529194320543067, "learning_rate": 9.274319674695026e-06, "loss": 0.58, "step": 2966 }, { "epoch": 0.2784346846846847, "grad_norm": 1.3499931935796987, "learning_rate": 9.277447607131687e-06, "loss": 0.5051, "step": 2967 }, { "epoch": 0.27852852852852855, "grad_norm": 1.3558265843831594, "learning_rate": 9.280575539568346e-06, "loss": 0.4554, "step": 2968 }, { "epoch": 0.27862237237237236, "grad_norm": 3.3091438818004213, "learning_rate": 9.283703472005006e-06, "loss": 0.4593, "step": 2969 }, { "epoch": 0.27871621621621623, "grad_norm": 1.7131752900102495, "learning_rate": 9.286831404441665e-06, "loss": 0.5212, "step": 2970 }, { "epoch": 0.27881006006006004, "grad_norm": 2.834058509538187, "learning_rate": 9.289959336878324e-06, "loss": 0.5153, "step": 2971 }, { "epoch": 0.2789039039039039, "grad_norm": 1.4954541904230465, "learning_rate": 9.293087269314983e-06, "loss": 0.5223, "step": 2972 }, { "epoch": 0.27899774774774777, "grad_norm": 1.5278751679121434, "learning_rate": 9.296215201751642e-06, "loss": 0.5467, "step": 2973 }, { "epoch": 0.2790915915915916, "grad_norm": 1.3678995424017704, "learning_rate": 9.299343134188303e-06, "loss": 0.4698, "step": 2974 }, { "epoch": 0.27918543543543545, "grad_norm": 3.168564599524668, "learning_rate": 9.302471066624962e-06, "loss": 0.4805, "step": 2975 }, { "epoch": 0.27927927927927926, "grad_norm": 1.8603044212424402, "learning_rate": 9.305598999061621e-06, "loss": 0.5521, "step": 2976 }, { "epoch": 0.2793731231231231, "grad_norm": 1.3342181128353119, "learning_rate": 9.30872693149828e-06, "loss": 0.4872, "step": 2977 }, { "epoch": 0.279466966966967, "grad_norm": 2.5218366577517246, "learning_rate": 9.31185486393494e-06, "loss": 0.499, "step": 2978 }, { "epoch": 0.2795608108108108, "grad_norm": 1.2828635512419035, "learning_rate": 9.314982796371599e-06, "loss": 0.4994, "step": 2979 }, { "epoch": 0.27965465465465467, "grad_norm": 1.7404719439188407, "learning_rate": 9.318110728808258e-06, "loss": 0.518, "step": 2980 }, { "epoch": 0.2797484984984985, "grad_norm": 7.487995019200367, "learning_rate": 9.321238661244917e-06, "loss": 0.5225, "step": 2981 }, { "epoch": 0.27984234234234234, "grad_norm": 1.811961196765858, "learning_rate": 9.324366593681578e-06, "loss": 0.5449, "step": 2982 }, { "epoch": 0.2799361861861862, "grad_norm": 2.060699571905655, "learning_rate": 9.327494526118237e-06, "loss": 0.4682, "step": 2983 }, { "epoch": 0.28003003003003, "grad_norm": 1.5244360287185357, "learning_rate": 9.330622458554895e-06, "loss": 0.5251, "step": 2984 }, { "epoch": 0.2801238738738739, "grad_norm": 1.5311004916772073, "learning_rate": 9.333750390991556e-06, "loss": 0.5377, "step": 2985 }, { "epoch": 0.2802177177177177, "grad_norm": 1.456312862466615, "learning_rate": 9.336878323428215e-06, "loss": 0.5389, "step": 2986 }, { "epoch": 0.28031156156156156, "grad_norm": 1.4912174404367877, "learning_rate": 9.340006255864874e-06, "loss": 0.494, "step": 2987 }, { "epoch": 0.28040540540540543, "grad_norm": 1.6483844561307655, "learning_rate": 9.343134188301533e-06, "loss": 0.5474, "step": 2988 }, { "epoch": 0.28049924924924924, "grad_norm": 4.334196184455461, "learning_rate": 9.346262120738194e-06, "loss": 0.5196, "step": 2989 }, { "epoch": 0.2805930930930931, "grad_norm": 1.6051718453422021, "learning_rate": 9.349390053174853e-06, "loss": 0.5242, "step": 2990 }, { "epoch": 0.2806869369369369, "grad_norm": 1.4274544069138098, "learning_rate": 9.35251798561151e-06, "loss": 0.5148, "step": 2991 }, { "epoch": 0.2807807807807808, "grad_norm": 1.4516634708907856, "learning_rate": 9.355645918048171e-06, "loss": 0.4643, "step": 2992 }, { "epoch": 0.28087462462462465, "grad_norm": 1.5730259128575563, "learning_rate": 9.35877385048483e-06, "loss": 0.4937, "step": 2993 }, { "epoch": 0.28096846846846846, "grad_norm": 1.4529197055143173, "learning_rate": 9.36190178292149e-06, "loss": 0.4763, "step": 2994 }, { "epoch": 0.2810623123123123, "grad_norm": 1.7096840093131835, "learning_rate": 9.365029715358149e-06, "loss": 0.5132, "step": 2995 }, { "epoch": 0.28115615615615613, "grad_norm": 1.9720229741535742, "learning_rate": 9.368157647794808e-06, "loss": 0.5633, "step": 2996 }, { "epoch": 0.28125, "grad_norm": 1.746047179773616, "learning_rate": 9.371285580231467e-06, "loss": 0.5091, "step": 2997 }, { "epoch": 0.28134384384384387, "grad_norm": 2.4354433800267565, "learning_rate": 9.374413512668126e-06, "loss": 0.5159, "step": 2998 }, { "epoch": 0.2814376876876877, "grad_norm": 1.616164324797412, "learning_rate": 9.377541445104787e-06, "loss": 0.5535, "step": 2999 }, { "epoch": 0.28153153153153154, "grad_norm": 1.3284671728289936, "learning_rate": 9.380669377541446e-06, "loss": 0.4927, "step": 3000 }, { "epoch": 0.28162537537537535, "grad_norm": 1.506866909277748, "learning_rate": 9.383797309978106e-06, "loss": 0.5526, "step": 3001 }, { "epoch": 0.2817192192192192, "grad_norm": 1.2764282029758105, "learning_rate": 9.386925242414765e-06, "loss": 0.5016, "step": 3002 }, { "epoch": 0.2818130630630631, "grad_norm": 2.0298901619908696, "learning_rate": 9.390053174851424e-06, "loss": 0.5812, "step": 3003 }, { "epoch": 0.2819069069069069, "grad_norm": 1.581797219664076, "learning_rate": 9.393181107288083e-06, "loss": 0.5342, "step": 3004 }, { "epoch": 0.28200075075075076, "grad_norm": 1.5905504540331281, "learning_rate": 9.396309039724742e-06, "loss": 0.5418, "step": 3005 }, { "epoch": 0.28209459459459457, "grad_norm": 1.600392647209934, "learning_rate": 9.399436972161401e-06, "loss": 0.5389, "step": 3006 }, { "epoch": 0.28218843843843844, "grad_norm": 1.390509004980457, "learning_rate": 9.402564904598062e-06, "loss": 0.5038, "step": 3007 }, { "epoch": 0.2822822822822823, "grad_norm": 1.3414641771236766, "learning_rate": 9.405692837034721e-06, "loss": 0.5178, "step": 3008 }, { "epoch": 0.2823761261261261, "grad_norm": 1.6770403107042138, "learning_rate": 9.40882076947138e-06, "loss": 0.5219, "step": 3009 }, { "epoch": 0.28246996996997, "grad_norm": 1.4503071094315738, "learning_rate": 9.41194870190804e-06, "loss": 0.5332, "step": 3010 }, { "epoch": 0.2825638138138138, "grad_norm": 1.3790477372400074, "learning_rate": 9.415076634344699e-06, "loss": 0.5015, "step": 3011 }, { "epoch": 0.28265765765765766, "grad_norm": 1.440822922419064, "learning_rate": 9.418204566781358e-06, "loss": 0.584, "step": 3012 }, { "epoch": 0.2827515015015015, "grad_norm": 1.9342861281856072, "learning_rate": 9.421332499218017e-06, "loss": 0.463, "step": 3013 }, { "epoch": 0.28284534534534533, "grad_norm": 1.4947289891607565, "learning_rate": 9.424460431654678e-06, "loss": 0.4997, "step": 3014 }, { "epoch": 0.2829391891891892, "grad_norm": 1.2809802338997394, "learning_rate": 9.427588364091337e-06, "loss": 0.5598, "step": 3015 }, { "epoch": 0.283033033033033, "grad_norm": 1.496857446408644, "learning_rate": 9.430716296527995e-06, "loss": 0.5385, "step": 3016 }, { "epoch": 0.2831268768768769, "grad_norm": 1.4069313263037777, "learning_rate": 9.433844228964655e-06, "loss": 0.5225, "step": 3017 }, { "epoch": 0.28322072072072074, "grad_norm": 1.4652095037670942, "learning_rate": 9.436972161401315e-06, "loss": 0.5294, "step": 3018 }, { "epoch": 0.28331456456456455, "grad_norm": 1.5366504821357834, "learning_rate": 9.440100093837974e-06, "loss": 0.5586, "step": 3019 }, { "epoch": 0.2834084084084084, "grad_norm": 23.78359045543789, "learning_rate": 9.443228026274633e-06, "loss": 0.4621, "step": 3020 }, { "epoch": 0.28350225225225223, "grad_norm": 1.7770635699164197, "learning_rate": 9.446355958711294e-06, "loss": 0.5089, "step": 3021 }, { "epoch": 0.2835960960960961, "grad_norm": 1.9386839078168876, "learning_rate": 9.449483891147953e-06, "loss": 0.5282, "step": 3022 }, { "epoch": 0.28368993993993996, "grad_norm": 2.147225043490637, "learning_rate": 9.45261182358461e-06, "loss": 0.5094, "step": 3023 }, { "epoch": 0.28378378378378377, "grad_norm": 1.5612131952653372, "learning_rate": 9.455739756021271e-06, "loss": 0.5234, "step": 3024 }, { "epoch": 0.28387762762762764, "grad_norm": 2.206532945599886, "learning_rate": 9.45886768845793e-06, "loss": 0.5312, "step": 3025 }, { "epoch": 0.28397147147147145, "grad_norm": 1.5689738761344183, "learning_rate": 9.46199562089459e-06, "loss": 0.5148, "step": 3026 }, { "epoch": 0.2840653153153153, "grad_norm": 1.3997590410368281, "learning_rate": 9.465123553331249e-06, "loss": 0.5132, "step": 3027 }, { "epoch": 0.2841591591591592, "grad_norm": 1.7102009219949803, "learning_rate": 9.468251485767908e-06, "loss": 0.5571, "step": 3028 }, { "epoch": 0.284253003003003, "grad_norm": 1.5676214379425266, "learning_rate": 9.471379418204567e-06, "loss": 0.5111, "step": 3029 }, { "epoch": 0.28434684684684686, "grad_norm": 2.0372394071684363, "learning_rate": 9.474507350641226e-06, "loss": 0.5293, "step": 3030 }, { "epoch": 0.28444069069069067, "grad_norm": 1.4403733092299351, "learning_rate": 9.477635283077885e-06, "loss": 0.5277, "step": 3031 }, { "epoch": 0.28453453453453453, "grad_norm": 1.3932435221538624, "learning_rate": 9.480763215514546e-06, "loss": 0.539, "step": 3032 }, { "epoch": 0.2846283783783784, "grad_norm": 3.0145602632913158, "learning_rate": 9.483891147951205e-06, "loss": 0.524, "step": 3033 }, { "epoch": 0.2847222222222222, "grad_norm": 1.9247541694494092, "learning_rate": 9.487019080387865e-06, "loss": 0.528, "step": 3034 }, { "epoch": 0.2848160660660661, "grad_norm": 2.448052314075797, "learning_rate": 9.490147012824524e-06, "loss": 0.5292, "step": 3035 }, { "epoch": 0.2849099099099099, "grad_norm": 1.6282203513897384, "learning_rate": 9.493274945261183e-06, "loss": 0.5489, "step": 3036 }, { "epoch": 0.28500375375375375, "grad_norm": 1.4840710555617984, "learning_rate": 9.496402877697842e-06, "loss": 0.4813, "step": 3037 }, { "epoch": 0.2850975975975976, "grad_norm": 1.897063669452515, "learning_rate": 9.499530810134501e-06, "loss": 0.5146, "step": 3038 }, { "epoch": 0.28519144144144143, "grad_norm": 1.8052867039694893, "learning_rate": 9.502658742571162e-06, "loss": 0.5302, "step": 3039 }, { "epoch": 0.2852852852852853, "grad_norm": 1.7102289921745502, "learning_rate": 9.505786675007821e-06, "loss": 0.5313, "step": 3040 }, { "epoch": 0.2853791291291291, "grad_norm": 1.3966933496327794, "learning_rate": 9.50891460744448e-06, "loss": 0.5051, "step": 3041 }, { "epoch": 0.28547297297297297, "grad_norm": 1.4783307788720506, "learning_rate": 9.51204253988114e-06, "loss": 0.5032, "step": 3042 }, { "epoch": 0.28556681681681684, "grad_norm": 2.0916957679277104, "learning_rate": 9.515170472317799e-06, "loss": 0.5386, "step": 3043 }, { "epoch": 0.28566066066066065, "grad_norm": 3.7126489764644117, "learning_rate": 9.518298404754458e-06, "loss": 0.5064, "step": 3044 }, { "epoch": 0.2857545045045045, "grad_norm": 4.38779166883691, "learning_rate": 9.521426337191117e-06, "loss": 0.4974, "step": 3045 }, { "epoch": 0.2858483483483483, "grad_norm": 1.9503021240973553, "learning_rate": 9.524554269627778e-06, "loss": 0.4992, "step": 3046 }, { "epoch": 0.2859421921921922, "grad_norm": 1.484481418096964, "learning_rate": 9.527682202064437e-06, "loss": 0.4857, "step": 3047 }, { "epoch": 0.28603603603603606, "grad_norm": 1.3945338263182707, "learning_rate": 9.530810134501095e-06, "loss": 0.5008, "step": 3048 }, { "epoch": 0.28612987987987987, "grad_norm": 1.5304641218458912, "learning_rate": 9.533938066937755e-06, "loss": 0.5263, "step": 3049 }, { "epoch": 0.28622372372372373, "grad_norm": 1.4857259308412263, "learning_rate": 9.537065999374415e-06, "loss": 0.5471, "step": 3050 }, { "epoch": 0.28631756756756754, "grad_norm": 2.0640876424599583, "learning_rate": 9.540193931811074e-06, "loss": 0.5378, "step": 3051 }, { "epoch": 0.2864114114114114, "grad_norm": 1.4373222644991763, "learning_rate": 9.543321864247733e-06, "loss": 0.4908, "step": 3052 }, { "epoch": 0.2865052552552553, "grad_norm": 1.6306799656800062, "learning_rate": 9.546449796684392e-06, "loss": 0.5009, "step": 3053 }, { "epoch": 0.2865990990990991, "grad_norm": 1.4839441987739062, "learning_rate": 9.549577729121053e-06, "loss": 0.4708, "step": 3054 }, { "epoch": 0.28669294294294295, "grad_norm": 1.5047612568244824, "learning_rate": 9.55270566155771e-06, "loss": 0.4822, "step": 3055 }, { "epoch": 0.28678678678678676, "grad_norm": 1.396732040690659, "learning_rate": 9.55583359399437e-06, "loss": 0.4932, "step": 3056 }, { "epoch": 0.28688063063063063, "grad_norm": 1.7857863159852077, "learning_rate": 9.55896152643103e-06, "loss": 0.5783, "step": 3057 }, { "epoch": 0.2869744744744745, "grad_norm": 1.5721645246435352, "learning_rate": 9.56208945886769e-06, "loss": 0.5217, "step": 3058 }, { "epoch": 0.2870683183183183, "grad_norm": 1.5181679388313005, "learning_rate": 9.565217391304349e-06, "loss": 0.4819, "step": 3059 }, { "epoch": 0.28716216216216217, "grad_norm": 1.3347038735415948, "learning_rate": 9.568345323741008e-06, "loss": 0.5789, "step": 3060 }, { "epoch": 0.287256006006006, "grad_norm": 1.8235971460181641, "learning_rate": 9.571473256177667e-06, "loss": 0.5279, "step": 3061 }, { "epoch": 0.28734984984984985, "grad_norm": 1.3837022994955586, "learning_rate": 9.574601188614326e-06, "loss": 0.5198, "step": 3062 }, { "epoch": 0.2874436936936937, "grad_norm": 3.4075386451900935, "learning_rate": 9.577729121050985e-06, "loss": 0.5336, "step": 3063 }, { "epoch": 0.2875375375375375, "grad_norm": 1.2452814245810215, "learning_rate": 9.580857053487646e-06, "loss": 0.5171, "step": 3064 }, { "epoch": 0.2876313813813814, "grad_norm": 4.680301095415597, "learning_rate": 9.583984985924305e-06, "loss": 0.509, "step": 3065 }, { "epoch": 0.2877252252252252, "grad_norm": 1.724165844992969, "learning_rate": 9.587112918360965e-06, "loss": 0.4736, "step": 3066 }, { "epoch": 0.28781906906906907, "grad_norm": 1.7491004391133314, "learning_rate": 9.590240850797624e-06, "loss": 0.5581, "step": 3067 }, { "epoch": 0.28791291291291293, "grad_norm": 1.7061485495601922, "learning_rate": 9.593368783234283e-06, "loss": 0.4961, "step": 3068 }, { "epoch": 0.28800675675675674, "grad_norm": 1.543528493351051, "learning_rate": 9.596496715670942e-06, "loss": 0.6183, "step": 3069 }, { "epoch": 0.2881006006006006, "grad_norm": 1.3052051423802575, "learning_rate": 9.599624648107601e-06, "loss": 0.5347, "step": 3070 }, { "epoch": 0.2881944444444444, "grad_norm": 1.5667949223510698, "learning_rate": 9.602752580544262e-06, "loss": 0.5155, "step": 3071 }, { "epoch": 0.2882882882882883, "grad_norm": 1.4626730078007133, "learning_rate": 9.605880512980921e-06, "loss": 0.5179, "step": 3072 }, { "epoch": 0.28838213213213215, "grad_norm": 1.869302541590115, "learning_rate": 9.60900844541758e-06, "loss": 0.5202, "step": 3073 }, { "epoch": 0.28847597597597596, "grad_norm": 3.6155845768678807, "learning_rate": 9.61213637785424e-06, "loss": 0.4826, "step": 3074 }, { "epoch": 0.28856981981981983, "grad_norm": 1.5138997483702858, "learning_rate": 9.615264310290899e-06, "loss": 0.6088, "step": 3075 }, { "epoch": 0.28866366366366364, "grad_norm": 1.6861786947954727, "learning_rate": 9.618392242727558e-06, "loss": 0.5132, "step": 3076 }, { "epoch": 0.2887575075075075, "grad_norm": 5.631501804004073, "learning_rate": 9.621520175164217e-06, "loss": 0.5346, "step": 3077 }, { "epoch": 0.28885135135135137, "grad_norm": 1.716001444516398, "learning_rate": 9.624648107600876e-06, "loss": 0.5584, "step": 3078 }, { "epoch": 0.2889451951951952, "grad_norm": 1.446417190186534, "learning_rate": 9.627776040037537e-06, "loss": 0.5479, "step": 3079 }, { "epoch": 0.28903903903903905, "grad_norm": 6.88377246617426, "learning_rate": 9.630903972474194e-06, "loss": 0.4922, "step": 3080 }, { "epoch": 0.28913288288288286, "grad_norm": 1.5293652530686035, "learning_rate": 9.634031904910854e-06, "loss": 0.5104, "step": 3081 }, { "epoch": 0.2892267267267267, "grad_norm": 1.405320191735976, "learning_rate": 9.637159837347514e-06, "loss": 0.4574, "step": 3082 }, { "epoch": 0.2893205705705706, "grad_norm": 1.6406073759755324, "learning_rate": 9.640287769784174e-06, "loss": 0.4554, "step": 3083 }, { "epoch": 0.2894144144144144, "grad_norm": 1.1892806948141805, "learning_rate": 9.643415702220833e-06, "loss": 0.43, "step": 3084 }, { "epoch": 0.28950825825825827, "grad_norm": 1.4973026240323528, "learning_rate": 9.646543634657492e-06, "loss": 0.5372, "step": 3085 }, { "epoch": 0.2896021021021021, "grad_norm": 1.5564885384806166, "learning_rate": 9.649671567094153e-06, "loss": 0.5324, "step": 3086 }, { "epoch": 0.28969594594594594, "grad_norm": 1.6071231208965824, "learning_rate": 9.65279949953081e-06, "loss": 0.5398, "step": 3087 }, { "epoch": 0.2897897897897898, "grad_norm": 1.4787835256607824, "learning_rate": 9.65592743196747e-06, "loss": 0.5715, "step": 3088 }, { "epoch": 0.2898836336336336, "grad_norm": 1.3869791742132913, "learning_rate": 9.65905536440413e-06, "loss": 0.5186, "step": 3089 }, { "epoch": 0.2899774774774775, "grad_norm": 1.6811073784191861, "learning_rate": 9.66218329684079e-06, "loss": 0.4676, "step": 3090 }, { "epoch": 0.2900713213213213, "grad_norm": 1.3696042678552791, "learning_rate": 9.665311229277449e-06, "loss": 0.4903, "step": 3091 }, { "epoch": 0.29016516516516516, "grad_norm": 1.3996711796618388, "learning_rate": 9.668439161714108e-06, "loss": 0.5396, "step": 3092 }, { "epoch": 0.29025900900900903, "grad_norm": 1.6717621660693551, "learning_rate": 9.671567094150767e-06, "loss": 0.5234, "step": 3093 }, { "epoch": 0.29035285285285284, "grad_norm": 1.690257786249317, "learning_rate": 9.674695026587426e-06, "loss": 0.5135, "step": 3094 }, { "epoch": 0.2904466966966967, "grad_norm": 2.022023865218415, "learning_rate": 9.677822959024085e-06, "loss": 0.5509, "step": 3095 }, { "epoch": 0.2905405405405405, "grad_norm": 1.6320435349927034, "learning_rate": 9.680950891460746e-06, "loss": 0.5069, "step": 3096 }, { "epoch": 0.2906343843843844, "grad_norm": 1.3433906371936535, "learning_rate": 9.684078823897405e-06, "loss": 0.4712, "step": 3097 }, { "epoch": 0.29072822822822825, "grad_norm": 1.4654166665836355, "learning_rate": 9.687206756334064e-06, "loss": 0.5042, "step": 3098 }, { "epoch": 0.29082207207207206, "grad_norm": 1.6370756592110418, "learning_rate": 9.690334688770724e-06, "loss": 0.5198, "step": 3099 }, { "epoch": 0.2909159159159159, "grad_norm": 2.2649631117439535, "learning_rate": 9.693462621207383e-06, "loss": 0.4769, "step": 3100 }, { "epoch": 0.29100975975975973, "grad_norm": 1.4648105514446907, "learning_rate": 9.696590553644042e-06, "loss": 0.53, "step": 3101 }, { "epoch": 0.2911036036036036, "grad_norm": 3.7601672295005795, "learning_rate": 9.699718486080701e-06, "loss": 0.5222, "step": 3102 }, { "epoch": 0.29119744744744747, "grad_norm": 3.578414086174395, "learning_rate": 9.70284641851736e-06, "loss": 0.5135, "step": 3103 }, { "epoch": 0.2912912912912913, "grad_norm": 1.3349549465035095, "learning_rate": 9.705974350954021e-06, "loss": 0.5137, "step": 3104 }, { "epoch": 0.29138513513513514, "grad_norm": 1.4234982349879781, "learning_rate": 9.70910228339068e-06, "loss": 0.5496, "step": 3105 }, { "epoch": 0.29147897897897895, "grad_norm": 1.7375007281543775, "learning_rate": 9.712230215827338e-06, "loss": 0.5142, "step": 3106 }, { "epoch": 0.2915728228228228, "grad_norm": 1.6449138397191418, "learning_rate": 9.715358148263999e-06, "loss": 0.4412, "step": 3107 }, { "epoch": 0.2916666666666667, "grad_norm": 1.2556535635232104, "learning_rate": 9.718486080700658e-06, "loss": 0.4284, "step": 3108 }, { "epoch": 0.2917605105105105, "grad_norm": 1.1831248303085198, "learning_rate": 9.721614013137317e-06, "loss": 0.5032, "step": 3109 }, { "epoch": 0.29185435435435436, "grad_norm": 1.980164998157672, "learning_rate": 9.724741945573976e-06, "loss": 0.5542, "step": 3110 }, { "epoch": 0.29194819819819817, "grad_norm": 1.3831447326237782, "learning_rate": 9.727869878010637e-06, "loss": 0.512, "step": 3111 }, { "epoch": 0.29204204204204204, "grad_norm": 1.4493377193266441, "learning_rate": 9.730997810447294e-06, "loss": 0.5023, "step": 3112 }, { "epoch": 0.2921358858858859, "grad_norm": 1.3978083666759509, "learning_rate": 9.734125742883954e-06, "loss": 0.5397, "step": 3113 }, { "epoch": 0.2922297297297297, "grad_norm": 1.4121284269125562, "learning_rate": 9.737253675320614e-06, "loss": 0.4603, "step": 3114 }, { "epoch": 0.2923235735735736, "grad_norm": 2.151656092308561, "learning_rate": 9.740381607757274e-06, "loss": 0.515, "step": 3115 }, { "epoch": 0.2924174174174174, "grad_norm": 1.8516693179518349, "learning_rate": 9.743509540193933e-06, "loss": 0.5063, "step": 3116 }, { "epoch": 0.29251126126126126, "grad_norm": 5.335714966945571, "learning_rate": 9.746637472630592e-06, "loss": 0.5381, "step": 3117 }, { "epoch": 0.2926051051051051, "grad_norm": 2.497885166068548, "learning_rate": 9.749765405067253e-06, "loss": 0.5, "step": 3118 }, { "epoch": 0.29269894894894893, "grad_norm": 2.858535651950868, "learning_rate": 9.75289333750391e-06, "loss": 0.5271, "step": 3119 }, { "epoch": 0.2927927927927928, "grad_norm": 1.3841537241403155, "learning_rate": 9.75602126994057e-06, "loss": 0.5151, "step": 3120 }, { "epoch": 0.2928866366366366, "grad_norm": 1.7243956288644793, "learning_rate": 9.75914920237723e-06, "loss": 0.5273, "step": 3121 }, { "epoch": 0.2929804804804805, "grad_norm": 1.5887701368524947, "learning_rate": 9.76227713481389e-06, "loss": 0.5199, "step": 3122 }, { "epoch": 0.29307432432432434, "grad_norm": 3.616033162065747, "learning_rate": 9.765405067250549e-06, "loss": 0.478, "step": 3123 }, { "epoch": 0.29316816816816815, "grad_norm": 1.6589649771024424, "learning_rate": 9.768532999687208e-06, "loss": 0.5781, "step": 3124 }, { "epoch": 0.293262012012012, "grad_norm": 2.056453245597793, "learning_rate": 9.771660932123867e-06, "loss": 0.5162, "step": 3125 }, { "epoch": 0.29335585585585583, "grad_norm": 1.3935759228449813, "learning_rate": 9.774788864560526e-06, "loss": 0.5448, "step": 3126 }, { "epoch": 0.2934496996996997, "grad_norm": 1.3136298303717546, "learning_rate": 9.777916796997185e-06, "loss": 0.4938, "step": 3127 }, { "epoch": 0.29354354354354356, "grad_norm": 1.6343836843820354, "learning_rate": 9.781044729433844e-06, "loss": 0.5196, "step": 3128 }, { "epoch": 0.29363738738738737, "grad_norm": 1.3328579700764287, "learning_rate": 9.784172661870505e-06, "loss": 0.4973, "step": 3129 }, { "epoch": 0.29373123123123124, "grad_norm": 1.775768994382039, "learning_rate": 9.787300594307164e-06, "loss": 0.4849, "step": 3130 }, { "epoch": 0.29382507507507505, "grad_norm": 1.5369283457894887, "learning_rate": 9.790428526743822e-06, "loss": 0.5497, "step": 3131 }, { "epoch": 0.2939189189189189, "grad_norm": 1.5449113613130234, "learning_rate": 9.793556459180483e-06, "loss": 0.5042, "step": 3132 }, { "epoch": 0.2940127627627628, "grad_norm": 4.205666924841208, "learning_rate": 9.796684391617142e-06, "loss": 0.4926, "step": 3133 }, { "epoch": 0.2941066066066066, "grad_norm": 1.5803129655217973, "learning_rate": 9.799812324053801e-06, "loss": 0.4826, "step": 3134 }, { "epoch": 0.29420045045045046, "grad_norm": 1.870458271361524, "learning_rate": 9.80294025649046e-06, "loss": 0.5341, "step": 3135 }, { "epoch": 0.29429429429429427, "grad_norm": 1.449032200630654, "learning_rate": 9.806068188927121e-06, "loss": 0.4921, "step": 3136 }, { "epoch": 0.29438813813813813, "grad_norm": 1.1598609062861602, "learning_rate": 9.80919612136378e-06, "loss": 0.4619, "step": 3137 }, { "epoch": 0.294481981981982, "grad_norm": 1.2952368973794808, "learning_rate": 9.812324053800438e-06, "loss": 0.4453, "step": 3138 }, { "epoch": 0.2945758258258258, "grad_norm": 2.5101106508912983, "learning_rate": 9.815451986237098e-06, "loss": 0.4884, "step": 3139 }, { "epoch": 0.2946696696696697, "grad_norm": 1.4491916697804905, "learning_rate": 9.818579918673758e-06, "loss": 0.5252, "step": 3140 }, { "epoch": 0.2947635135135135, "grad_norm": 1.5320819820952496, "learning_rate": 9.821707851110417e-06, "loss": 0.5724, "step": 3141 }, { "epoch": 0.29485735735735735, "grad_norm": 1.414491931290001, "learning_rate": 9.824835783547076e-06, "loss": 0.5006, "step": 3142 }, { "epoch": 0.2949512012012012, "grad_norm": 1.4922581334812282, "learning_rate": 9.827963715983737e-06, "loss": 0.5182, "step": 3143 }, { "epoch": 0.29504504504504503, "grad_norm": 1.3182614986946888, "learning_rate": 9.831091648420394e-06, "loss": 0.5299, "step": 3144 }, { "epoch": 0.2951388888888889, "grad_norm": 2.2635559508135383, "learning_rate": 9.834219580857053e-06, "loss": 0.5143, "step": 3145 }, { "epoch": 0.2952327327327327, "grad_norm": 1.3713443247621562, "learning_rate": 9.837347513293714e-06, "loss": 0.5065, "step": 3146 }, { "epoch": 0.29532657657657657, "grad_norm": 1.6700350874067897, "learning_rate": 9.840475445730373e-06, "loss": 0.5183, "step": 3147 }, { "epoch": 0.29542042042042044, "grad_norm": 1.5830662326830498, "learning_rate": 9.843603378167033e-06, "loss": 0.5649, "step": 3148 }, { "epoch": 0.29551426426426425, "grad_norm": 8.184860645975347, "learning_rate": 9.846731310603692e-06, "loss": 0.5264, "step": 3149 }, { "epoch": 0.2956081081081081, "grad_norm": 2.2378138567834376, "learning_rate": 9.849859243040351e-06, "loss": 0.527, "step": 3150 }, { "epoch": 0.2957019519519519, "grad_norm": 1.3936987275466683, "learning_rate": 9.85298717547701e-06, "loss": 0.5148, "step": 3151 }, { "epoch": 0.2957957957957958, "grad_norm": 1.2660766529725556, "learning_rate": 9.85611510791367e-06, "loss": 0.4487, "step": 3152 }, { "epoch": 0.29588963963963966, "grad_norm": 1.3586006005399147, "learning_rate": 9.859243040350328e-06, "loss": 0.5198, "step": 3153 }, { "epoch": 0.29598348348348347, "grad_norm": 1.9447051984892092, "learning_rate": 9.86237097278699e-06, "loss": 0.4956, "step": 3154 }, { "epoch": 0.29607732732732733, "grad_norm": 2.154710334595168, "learning_rate": 9.865498905223648e-06, "loss": 0.5417, "step": 3155 }, { "epoch": 0.29617117117117114, "grad_norm": 2.6471789782287383, "learning_rate": 9.868626837660308e-06, "loss": 0.546, "step": 3156 }, { "epoch": 0.296265015015015, "grad_norm": 1.3480251632915685, "learning_rate": 9.871754770096967e-06, "loss": 0.4855, "step": 3157 }, { "epoch": 0.2963588588588589, "grad_norm": 1.8106363007901787, "learning_rate": 9.874882702533626e-06, "loss": 0.5932, "step": 3158 }, { "epoch": 0.2964527027027027, "grad_norm": 1.675694822498998, "learning_rate": 9.878010634970285e-06, "loss": 0.5391, "step": 3159 }, { "epoch": 0.29654654654654655, "grad_norm": 1.4968672008556838, "learning_rate": 9.881138567406944e-06, "loss": 0.5196, "step": 3160 }, { "epoch": 0.29664039039039036, "grad_norm": 1.309093124751554, "learning_rate": 9.884266499843605e-06, "loss": 0.5198, "step": 3161 }, { "epoch": 0.29673423423423423, "grad_norm": 2.0731107831453848, "learning_rate": 9.887394432280264e-06, "loss": 0.5691, "step": 3162 }, { "epoch": 0.2968280780780781, "grad_norm": 1.385073952408725, "learning_rate": 9.890522364716922e-06, "loss": 0.5087, "step": 3163 }, { "epoch": 0.2969219219219219, "grad_norm": 1.4412518259644096, "learning_rate": 9.893650297153583e-06, "loss": 0.4894, "step": 3164 }, { "epoch": 0.29701576576576577, "grad_norm": 1.5525249668995926, "learning_rate": 9.896778229590242e-06, "loss": 0.5185, "step": 3165 }, { "epoch": 0.29710960960960964, "grad_norm": 1.3492760869914373, "learning_rate": 9.899906162026901e-06, "loss": 0.5236, "step": 3166 }, { "epoch": 0.29720345345345345, "grad_norm": 1.4102401901860462, "learning_rate": 9.90303409446356e-06, "loss": 0.5191, "step": 3167 }, { "epoch": 0.2972972972972973, "grad_norm": 1.6752653725341775, "learning_rate": 9.906162026900221e-06, "loss": 0.5006, "step": 3168 }, { "epoch": 0.2973911411411411, "grad_norm": 1.3374509327834996, "learning_rate": 9.90928995933688e-06, "loss": 0.5199, "step": 3169 }, { "epoch": 0.297484984984985, "grad_norm": 1.6727382447184165, "learning_rate": 9.912417891773538e-06, "loss": 0.5331, "step": 3170 }, { "epoch": 0.29757882882882886, "grad_norm": 1.4979852968581437, "learning_rate": 9.915545824210198e-06, "loss": 0.5661, "step": 3171 }, { "epoch": 0.29767267267267267, "grad_norm": 1.5238986072729488, "learning_rate": 9.918673756646858e-06, "loss": 0.5292, "step": 3172 }, { "epoch": 0.29776651651651653, "grad_norm": 2.4425515101585535, "learning_rate": 9.921801689083517e-06, "loss": 0.5521, "step": 3173 }, { "epoch": 0.29786036036036034, "grad_norm": 2.018805302429218, "learning_rate": 9.924929621520176e-06, "loss": 0.4985, "step": 3174 }, { "epoch": 0.2979542042042042, "grad_norm": 1.4103831821430357, "learning_rate": 9.928057553956835e-06, "loss": 0.519, "step": 3175 }, { "epoch": 0.2980480480480481, "grad_norm": 1.8452821410897835, "learning_rate": 9.931185486393494e-06, "loss": 0.5404, "step": 3176 }, { "epoch": 0.2981418918918919, "grad_norm": 1.4408788354909299, "learning_rate": 9.934313418830153e-06, "loss": 0.5442, "step": 3177 }, { "epoch": 0.29823573573573575, "grad_norm": 2.704360389460123, "learning_rate": 9.937441351266813e-06, "loss": 0.5041, "step": 3178 }, { "epoch": 0.29832957957957956, "grad_norm": 1.539503918466408, "learning_rate": 9.940569283703473e-06, "loss": 0.4803, "step": 3179 }, { "epoch": 0.29842342342342343, "grad_norm": 2.418244282051282, "learning_rate": 9.943697216140133e-06, "loss": 0.5392, "step": 3180 }, { "epoch": 0.2985172672672673, "grad_norm": 1.4400441934205206, "learning_rate": 9.946825148576792e-06, "loss": 0.4868, "step": 3181 }, { "epoch": 0.2986111111111111, "grad_norm": 1.6915918238482934, "learning_rate": 9.949953081013451e-06, "loss": 0.593, "step": 3182 }, { "epoch": 0.29870495495495497, "grad_norm": 4.035472687215123, "learning_rate": 9.95308101345011e-06, "loss": 0.5427, "step": 3183 }, { "epoch": 0.2987987987987988, "grad_norm": 1.5150297424103538, "learning_rate": 9.95620894588677e-06, "loss": 0.5351, "step": 3184 }, { "epoch": 0.29889264264264265, "grad_norm": 3.602054962563025, "learning_rate": 9.959336878323428e-06, "loss": 0.4669, "step": 3185 }, { "epoch": 0.2989864864864865, "grad_norm": 1.5085719846142538, "learning_rate": 9.96246481076009e-06, "loss": 0.516, "step": 3186 }, { "epoch": 0.2990803303303303, "grad_norm": 1.4807434703786657, "learning_rate": 9.965592743196748e-06, "loss": 0.5234, "step": 3187 }, { "epoch": 0.2991741741741742, "grad_norm": 1.3725708468069022, "learning_rate": 9.968720675633408e-06, "loss": 0.56, "step": 3188 }, { "epoch": 0.299268018018018, "grad_norm": 1.4579595228442628, "learning_rate": 9.971848608070067e-06, "loss": 0.5468, "step": 3189 }, { "epoch": 0.29936186186186187, "grad_norm": 1.4586691078030645, "learning_rate": 9.974976540506726e-06, "loss": 0.4961, "step": 3190 }, { "epoch": 0.29945570570570573, "grad_norm": 1.4809085096154897, "learning_rate": 9.978104472943385e-06, "loss": 0.5672, "step": 3191 }, { "epoch": 0.29954954954954954, "grad_norm": 1.668992182308525, "learning_rate": 9.981232405380044e-06, "loss": 0.5373, "step": 3192 }, { "epoch": 0.2996433933933934, "grad_norm": 3.978179931228785, "learning_rate": 9.984360337816705e-06, "loss": 0.4653, "step": 3193 }, { "epoch": 0.2997372372372372, "grad_norm": 1.4565389973413199, "learning_rate": 9.987488270253364e-06, "loss": 0.5044, "step": 3194 }, { "epoch": 0.2998310810810811, "grad_norm": 2.059679751839651, "learning_rate": 9.990616202690022e-06, "loss": 0.4616, "step": 3195 }, { "epoch": 0.29992492492492495, "grad_norm": 1.6079839782986924, "learning_rate": 9.993744135126682e-06, "loss": 0.5799, "step": 3196 }, { "epoch": 0.30001876876876876, "grad_norm": 1.3504652298674913, "learning_rate": 9.996872067563342e-06, "loss": 0.5354, "step": 3197 }, { "epoch": 0.30011261261261263, "grad_norm": 1.3963734810796349, "learning_rate": 1e-05, "loss": 0.5056, "step": 3198 }, { "epoch": 0.30020645645645644, "grad_norm": 1.2216535010754555, "learning_rate": 9.999999970192207e-06, "loss": 0.5231, "step": 3199 }, { "epoch": 0.3003003003003003, "grad_norm": 2.0475934851162214, "learning_rate": 9.999999880768824e-06, "loss": 0.4652, "step": 3200 }, { "epoch": 0.30039414414414417, "grad_norm": 1.2977098943991368, "learning_rate": 9.999999731729855e-06, "loss": 0.4915, "step": 3201 }, { "epoch": 0.300487987987988, "grad_norm": 2.089796350595701, "learning_rate": 9.9999995230753e-06, "loss": 0.5042, "step": 3202 }, { "epoch": 0.30058183183183185, "grad_norm": 1.4488791600715172, "learning_rate": 9.999999254805162e-06, "loss": 0.4796, "step": 3203 }, { "epoch": 0.30067567567567566, "grad_norm": 1.6601323940141166, "learning_rate": 9.999998926919445e-06, "loss": 0.4894, "step": 3204 }, { "epoch": 0.3007695195195195, "grad_norm": 1.2412414685196083, "learning_rate": 9.999998539418154e-06, "loss": 0.5054, "step": 3205 }, { "epoch": 0.3008633633633634, "grad_norm": 1.2884352590300998, "learning_rate": 9.999998092301289e-06, "loss": 0.5344, "step": 3206 }, { "epoch": 0.3009572072072072, "grad_norm": 1.3404598175679296, "learning_rate": 9.99999758556886e-06, "loss": 0.5443, "step": 3207 }, { "epoch": 0.30105105105105107, "grad_norm": 1.6940160777505815, "learning_rate": 9.999997019220871e-06, "loss": 0.5801, "step": 3208 }, { "epoch": 0.3011448948948949, "grad_norm": 1.601261477516457, "learning_rate": 9.999996393257328e-06, "loss": 0.4984, "step": 3209 }, { "epoch": 0.30123873873873874, "grad_norm": 2.2535412617987984, "learning_rate": 9.99999570767824e-06, "loss": 0.5449, "step": 3210 }, { "epoch": 0.3013325825825826, "grad_norm": 1.2819009258780365, "learning_rate": 9.999994962483617e-06, "loss": 0.5299, "step": 3211 }, { "epoch": 0.3014264264264264, "grad_norm": 2.3840290395298007, "learning_rate": 9.999994157673463e-06, "loss": 0.5786, "step": 3212 }, { "epoch": 0.3015202702702703, "grad_norm": 1.2313847970066714, "learning_rate": 9.999993293247792e-06, "loss": 0.492, "step": 3213 }, { "epoch": 0.3016141141141141, "grad_norm": 1.6075267275562561, "learning_rate": 9.99999236920661e-06, "loss": 0.5342, "step": 3214 }, { "epoch": 0.30170795795795796, "grad_norm": 1.3678974891778573, "learning_rate": 9.999991385549932e-06, "loss": 0.495, "step": 3215 }, { "epoch": 0.30180180180180183, "grad_norm": 1.6207628783973211, "learning_rate": 9.999990342277767e-06, "loss": 0.5183, "step": 3216 }, { "epoch": 0.30189564564564564, "grad_norm": 2.754837769928498, "learning_rate": 9.999989239390131e-06, "loss": 0.543, "step": 3217 }, { "epoch": 0.3019894894894895, "grad_norm": 1.5055638787057632, "learning_rate": 9.999988076887034e-06, "loss": 0.5001, "step": 3218 }, { "epoch": 0.3020833333333333, "grad_norm": 1.4876590299187145, "learning_rate": 9.99998685476849e-06, "loss": 0.531, "step": 3219 }, { "epoch": 0.3021771771771772, "grad_norm": 1.4503031094128345, "learning_rate": 9.999985573034516e-06, "loss": 0.5339, "step": 3220 }, { "epoch": 0.30227102102102105, "grad_norm": 1.5393056874600808, "learning_rate": 9.999984231685124e-06, "loss": 0.515, "step": 3221 }, { "epoch": 0.30236486486486486, "grad_norm": 1.490905052536758, "learning_rate": 9.999982830720331e-06, "loss": 0.5063, "step": 3222 }, { "epoch": 0.3024587087087087, "grad_norm": 1.3811275631370392, "learning_rate": 9.999981370140154e-06, "loss": 0.5091, "step": 3223 }, { "epoch": 0.30255255255255253, "grad_norm": 1.7858708555668037, "learning_rate": 9.999979849944613e-06, "loss": 0.4947, "step": 3224 }, { "epoch": 0.3026463963963964, "grad_norm": 1.4337441331593996, "learning_rate": 9.999978270133722e-06, "loss": 0.5007, "step": 3225 }, { "epoch": 0.30274024024024027, "grad_norm": 1.3226695366451557, "learning_rate": 9.999976630707502e-06, "loss": 0.5371, "step": 3226 }, { "epoch": 0.3028340840840841, "grad_norm": 1.3936552401799307, "learning_rate": 9.999974931665972e-06, "loss": 0.4832, "step": 3227 }, { "epoch": 0.30292792792792794, "grad_norm": 1.7469668564830634, "learning_rate": 9.999973173009153e-06, "loss": 0.573, "step": 3228 }, { "epoch": 0.30302177177177175, "grad_norm": 1.585390784603566, "learning_rate": 9.999971354737066e-06, "loss": 0.4809, "step": 3229 }, { "epoch": 0.3031156156156156, "grad_norm": 3.2330530237984774, "learning_rate": 9.999969476849731e-06, "loss": 0.5615, "step": 3230 }, { "epoch": 0.3032094594594595, "grad_norm": 1.8151586640479047, "learning_rate": 9.999967539347171e-06, "loss": 0.4609, "step": 3231 }, { "epoch": 0.3033033033033033, "grad_norm": 1.4260124654527984, "learning_rate": 9.99996554222941e-06, "loss": 0.5187, "step": 3232 }, { "epoch": 0.30339714714714716, "grad_norm": 1.3481049417660234, "learning_rate": 9.999963485496471e-06, "loss": 0.5457, "step": 3233 }, { "epoch": 0.30349099099099097, "grad_norm": 1.6737625702817653, "learning_rate": 9.99996136914838e-06, "loss": 0.5178, "step": 3234 }, { "epoch": 0.30358483483483484, "grad_norm": 2.068654169506885, "learning_rate": 9.999959193185161e-06, "loss": 0.5281, "step": 3235 }, { "epoch": 0.3036786786786787, "grad_norm": 1.5918674797923882, "learning_rate": 9.99995695760684e-06, "loss": 0.5605, "step": 3236 }, { "epoch": 0.3037725225225225, "grad_norm": 1.5792630459553971, "learning_rate": 9.999954662413443e-06, "loss": 0.5582, "step": 3237 }, { "epoch": 0.3038663663663664, "grad_norm": 1.7193850075313633, "learning_rate": 9.999952307604998e-06, "loss": 0.5008, "step": 3238 }, { "epoch": 0.3039602102102102, "grad_norm": 1.4126114219074708, "learning_rate": 9.999949893181533e-06, "loss": 0.5072, "step": 3239 }, { "epoch": 0.30405405405405406, "grad_norm": 1.511705864425692, "learning_rate": 9.999947419143079e-06, "loss": 0.5148, "step": 3240 }, { "epoch": 0.3041478978978979, "grad_norm": 1.3346176248891284, "learning_rate": 9.99994488548966e-06, "loss": 0.5198, "step": 3241 }, { "epoch": 0.30424174174174173, "grad_norm": 1.6647944584013006, "learning_rate": 9.999942292221314e-06, "loss": 0.4806, "step": 3242 }, { "epoch": 0.3043355855855856, "grad_norm": 1.578246826173137, "learning_rate": 9.999939639338065e-06, "loss": 0.5121, "step": 3243 }, { "epoch": 0.3044294294294294, "grad_norm": 1.3981263285579122, "learning_rate": 9.999936926839947e-06, "loss": 0.5084, "step": 3244 }, { "epoch": 0.3045232732732733, "grad_norm": 1.2404056772057164, "learning_rate": 9.999934154726994e-06, "loss": 0.4869, "step": 3245 }, { "epoch": 0.30461711711711714, "grad_norm": 1.4654832202032062, "learning_rate": 9.999931322999237e-06, "loss": 0.5597, "step": 3246 }, { "epoch": 0.30471096096096095, "grad_norm": 1.4382008596700209, "learning_rate": 9.99992843165671e-06, "loss": 0.5185, "step": 3247 }, { "epoch": 0.3048048048048048, "grad_norm": 1.288555423359657, "learning_rate": 9.999925480699447e-06, "loss": 0.5148, "step": 3248 }, { "epoch": 0.30489864864864863, "grad_norm": 1.2640413886661308, "learning_rate": 9.999922470127486e-06, "loss": 0.5194, "step": 3249 }, { "epoch": 0.3049924924924925, "grad_norm": 1.9513060879345463, "learning_rate": 9.999919399940859e-06, "loss": 0.4247, "step": 3250 }, { "epoch": 0.30508633633633636, "grad_norm": 1.2286503531174158, "learning_rate": 9.999916270139604e-06, "loss": 0.4859, "step": 3251 }, { "epoch": 0.30518018018018017, "grad_norm": 1.7462818294217533, "learning_rate": 9.999913080723762e-06, "loss": 0.55, "step": 3252 }, { "epoch": 0.30527402402402404, "grad_norm": 1.20945841990943, "learning_rate": 9.999909831693365e-06, "loss": 0.4954, "step": 3253 }, { "epoch": 0.30536786786786785, "grad_norm": 1.560612734870711, "learning_rate": 9.999906523048458e-06, "loss": 0.5676, "step": 3254 }, { "epoch": 0.3054617117117117, "grad_norm": 1.5610857482027551, "learning_rate": 9.999903154789073e-06, "loss": 0.5428, "step": 3255 }, { "epoch": 0.3055555555555556, "grad_norm": 2.039666189081436, "learning_rate": 9.999899726915257e-06, "loss": 0.4906, "step": 3256 }, { "epoch": 0.3056493993993994, "grad_norm": 1.3266585919848215, "learning_rate": 9.999896239427047e-06, "loss": 0.5312, "step": 3257 }, { "epoch": 0.30574324324324326, "grad_norm": 1.4966333351630952, "learning_rate": 9.999892692324486e-06, "loss": 0.5369, "step": 3258 }, { "epoch": 0.30583708708708707, "grad_norm": 1.330801460338839, "learning_rate": 9.999889085607615e-06, "loss": 0.5079, "step": 3259 }, { "epoch": 0.30593093093093093, "grad_norm": 1.4159266008815001, "learning_rate": 9.999885419276479e-06, "loss": 0.5238, "step": 3260 }, { "epoch": 0.3060247747747748, "grad_norm": 2.07534968379521, "learning_rate": 9.999881693331121e-06, "loss": 0.4904, "step": 3261 }, { "epoch": 0.3061186186186186, "grad_norm": 1.9227886954449631, "learning_rate": 9.999877907771585e-06, "loss": 0.5021, "step": 3262 }, { "epoch": 0.3062124624624625, "grad_norm": 1.275852885172379, "learning_rate": 9.999874062597916e-06, "loss": 0.4974, "step": 3263 }, { "epoch": 0.3063063063063063, "grad_norm": 1.5686784225845154, "learning_rate": 9.999870157810158e-06, "loss": 0.5347, "step": 3264 }, { "epoch": 0.30640015015015015, "grad_norm": 1.2844383537451693, "learning_rate": 9.999866193408363e-06, "loss": 0.4571, "step": 3265 }, { "epoch": 0.306493993993994, "grad_norm": 1.5674221178135233, "learning_rate": 9.999862169392573e-06, "loss": 0.5632, "step": 3266 }, { "epoch": 0.30658783783783783, "grad_norm": 1.2963080760226415, "learning_rate": 9.999858085762839e-06, "loss": 0.5144, "step": 3267 }, { "epoch": 0.3066816816816817, "grad_norm": 1.6824856143857132, "learning_rate": 9.999853942519209e-06, "loss": 0.5412, "step": 3268 }, { "epoch": 0.3067755255255255, "grad_norm": 1.5788847108912132, "learning_rate": 9.99984973966173e-06, "loss": 0.4899, "step": 3269 }, { "epoch": 0.30686936936936937, "grad_norm": 1.4736721302719977, "learning_rate": 9.999845477190455e-06, "loss": 0.5645, "step": 3270 }, { "epoch": 0.30696321321321324, "grad_norm": 1.4235561927676927, "learning_rate": 9.999841155105433e-06, "loss": 0.4844, "step": 3271 }, { "epoch": 0.30705705705705705, "grad_norm": 2.9209448850706887, "learning_rate": 9.99983677340672e-06, "loss": 0.4381, "step": 3272 }, { "epoch": 0.3071509009009009, "grad_norm": 1.7185262166664828, "learning_rate": 9.99983233209436e-06, "loss": 0.4956, "step": 3273 }, { "epoch": 0.3072447447447447, "grad_norm": 1.3745594913448262, "learning_rate": 9.999827831168414e-06, "loss": 0.5486, "step": 3274 }, { "epoch": 0.3073385885885886, "grad_norm": 3.133685304349132, "learning_rate": 9.999823270628932e-06, "loss": 0.5197, "step": 3275 }, { "epoch": 0.30743243243243246, "grad_norm": 5.2647807142075465, "learning_rate": 9.999818650475968e-06, "loss": 0.5226, "step": 3276 }, { "epoch": 0.30752627627627627, "grad_norm": 2.145639141564573, "learning_rate": 9.999813970709577e-06, "loss": 0.4598, "step": 3277 }, { "epoch": 0.30762012012012013, "grad_norm": 1.6585593392138094, "learning_rate": 9.999809231329816e-06, "loss": 0.5273, "step": 3278 }, { "epoch": 0.30771396396396394, "grad_norm": 1.7788195939506486, "learning_rate": 9.999804432336741e-06, "loss": 0.5025, "step": 3279 }, { "epoch": 0.3078078078078078, "grad_norm": 1.3439848758155097, "learning_rate": 9.99979957373041e-06, "loss": 0.4842, "step": 3280 }, { "epoch": 0.3079016516516517, "grad_norm": 1.2585101129737382, "learning_rate": 9.99979465551088e-06, "loss": 0.5299, "step": 3281 }, { "epoch": 0.3079954954954955, "grad_norm": 1.3109865371857679, "learning_rate": 9.99978967767821e-06, "loss": 0.5603, "step": 3282 }, { "epoch": 0.30808933933933935, "grad_norm": 1.898375537645134, "learning_rate": 9.999784640232459e-06, "loss": 0.5203, "step": 3283 }, { "epoch": 0.30818318318318316, "grad_norm": 1.3776279401931848, "learning_rate": 9.999779543173687e-06, "loss": 0.4889, "step": 3284 }, { "epoch": 0.30827702702702703, "grad_norm": 2.439448990480942, "learning_rate": 9.999774386501956e-06, "loss": 0.4978, "step": 3285 }, { "epoch": 0.3083708708708709, "grad_norm": 3.9437816258185494, "learning_rate": 9.999769170217326e-06, "loss": 0.5554, "step": 3286 }, { "epoch": 0.3084647147147147, "grad_norm": 1.896142763084448, "learning_rate": 9.99976389431986e-06, "loss": 0.5705, "step": 3287 }, { "epoch": 0.30855855855855857, "grad_norm": 1.4349670560901941, "learning_rate": 9.99975855880962e-06, "loss": 0.4831, "step": 3288 }, { "epoch": 0.3086524024024024, "grad_norm": 1.5692733419581493, "learning_rate": 9.999753163686671e-06, "loss": 0.5192, "step": 3289 }, { "epoch": 0.30874624624624625, "grad_norm": 1.56708055910919, "learning_rate": 9.999747708951075e-06, "loss": 0.5462, "step": 3290 }, { "epoch": 0.3088400900900901, "grad_norm": 1.160454738351432, "learning_rate": 9.9997421946029e-06, "loss": 0.4924, "step": 3291 }, { "epoch": 0.3089339339339339, "grad_norm": 1.3578969193598731, "learning_rate": 9.999736620642212e-06, "loss": 0.5348, "step": 3292 }, { "epoch": 0.3090277777777778, "grad_norm": 1.3307368150290866, "learning_rate": 9.999730987069073e-06, "loss": 0.4883, "step": 3293 }, { "epoch": 0.3091216216216216, "grad_norm": 2.353257931976695, "learning_rate": 9.999725293883553e-06, "loss": 0.55, "step": 3294 }, { "epoch": 0.30921546546546547, "grad_norm": 3.667782411757515, "learning_rate": 9.999719541085721e-06, "loss": 0.5475, "step": 3295 }, { "epoch": 0.30930930930930933, "grad_norm": 1.2550450529887895, "learning_rate": 9.999713728675645e-06, "loss": 0.5157, "step": 3296 }, { "epoch": 0.30940315315315314, "grad_norm": 1.3232009414356627, "learning_rate": 9.999707856653393e-06, "loss": 0.4939, "step": 3297 }, { "epoch": 0.309496996996997, "grad_norm": 1.4612161108400514, "learning_rate": 9.999701925019036e-06, "loss": 0.5611, "step": 3298 }, { "epoch": 0.3095908408408408, "grad_norm": 1.6136139137755505, "learning_rate": 9.999695933772645e-06, "loss": 0.5147, "step": 3299 }, { "epoch": 0.3096846846846847, "grad_norm": 1.2336179100848739, "learning_rate": 9.99968988291429e-06, "loss": 0.5572, "step": 3300 }, { "epoch": 0.30977852852852855, "grad_norm": 1.3566062344937864, "learning_rate": 9.999683772444044e-06, "loss": 0.5188, "step": 3301 }, { "epoch": 0.30987237237237236, "grad_norm": 1.4087318215209363, "learning_rate": 9.999677602361982e-06, "loss": 0.517, "step": 3302 }, { "epoch": 0.30996621621621623, "grad_norm": 1.4087280653741987, "learning_rate": 9.999671372668174e-06, "loss": 0.4997, "step": 3303 }, { "epoch": 0.31006006006006004, "grad_norm": 1.252117970754358, "learning_rate": 9.999665083362695e-06, "loss": 0.5129, "step": 3304 }, { "epoch": 0.3101539039039039, "grad_norm": 1.4782516671409667, "learning_rate": 9.999658734445621e-06, "loss": 0.5768, "step": 3305 }, { "epoch": 0.31024774774774777, "grad_norm": 16.286186845388983, "learning_rate": 9.999652325917028e-06, "loss": 0.4661, "step": 3306 }, { "epoch": 0.3103415915915916, "grad_norm": 1.67761464943341, "learning_rate": 9.999645857776992e-06, "loss": 0.4898, "step": 3307 }, { "epoch": 0.31043543543543545, "grad_norm": 1.5918567740286267, "learning_rate": 9.99963933002559e-06, "loss": 0.5055, "step": 3308 }, { "epoch": 0.31052927927927926, "grad_norm": 2.042052191813137, "learning_rate": 9.9996327426629e-06, "loss": 0.5, "step": 3309 }, { "epoch": 0.3106231231231231, "grad_norm": 1.6517662123189705, "learning_rate": 9.999626095689e-06, "loss": 0.5087, "step": 3310 }, { "epoch": 0.310716966966967, "grad_norm": 1.331631429039027, "learning_rate": 9.99961938910397e-06, "loss": 0.4813, "step": 3311 }, { "epoch": 0.3108108108108108, "grad_norm": 1.1705096527572527, "learning_rate": 9.99961262290789e-06, "loss": 0.4785, "step": 3312 }, { "epoch": 0.31090465465465467, "grad_norm": 1.2141842166991954, "learning_rate": 9.99960579710084e-06, "loss": 0.5435, "step": 3313 }, { "epoch": 0.3109984984984985, "grad_norm": 1.6350287158930612, "learning_rate": 9.9995989116829e-06, "loss": 0.4624, "step": 3314 }, { "epoch": 0.31109234234234234, "grad_norm": 1.5410108541147365, "learning_rate": 9.999591966654156e-06, "loss": 0.4978, "step": 3315 }, { "epoch": 0.3111861861861862, "grad_norm": 1.0962664320439501, "learning_rate": 9.999584962014688e-06, "loss": 0.483, "step": 3316 }, { "epoch": 0.31128003003003, "grad_norm": 1.1866427634766086, "learning_rate": 9.999577897764578e-06, "loss": 0.4382, "step": 3317 }, { "epoch": 0.3113738738738739, "grad_norm": 2.0039729548317067, "learning_rate": 9.999570773903915e-06, "loss": 0.5114, "step": 3318 }, { "epoch": 0.3114677177177177, "grad_norm": 1.4194541723598775, "learning_rate": 9.99956359043278e-06, "loss": 0.5168, "step": 3319 }, { "epoch": 0.31156156156156156, "grad_norm": 1.4347775128039453, "learning_rate": 9.99955634735126e-06, "loss": 0.5265, "step": 3320 }, { "epoch": 0.31165540540540543, "grad_norm": 1.3091801680516775, "learning_rate": 9.999549044659442e-06, "loss": 0.5403, "step": 3321 }, { "epoch": 0.31174924924924924, "grad_norm": 1.8313556546626264, "learning_rate": 9.999541682357411e-06, "loss": 0.5712, "step": 3322 }, { "epoch": 0.3118430930930931, "grad_norm": 2.119899549976865, "learning_rate": 9.999534260445258e-06, "loss": 0.529, "step": 3323 }, { "epoch": 0.3119369369369369, "grad_norm": 1.2507243928358451, "learning_rate": 9.999526778923068e-06, "loss": 0.4444, "step": 3324 }, { "epoch": 0.3120307807807808, "grad_norm": 1.4636681998417038, "learning_rate": 9.999519237790934e-06, "loss": 0.4852, "step": 3325 }, { "epoch": 0.31212462462462465, "grad_norm": 2.459914865202891, "learning_rate": 9.999511637048941e-06, "loss": 0.5473, "step": 3326 }, { "epoch": 0.31221846846846846, "grad_norm": 1.686566980173804, "learning_rate": 9.999503976697182e-06, "loss": 0.5255, "step": 3327 }, { "epoch": 0.3123123123123123, "grad_norm": 1.2247040457380796, "learning_rate": 9.999496256735751e-06, "loss": 0.5222, "step": 3328 }, { "epoch": 0.31240615615615613, "grad_norm": 1.4607973142624913, "learning_rate": 9.999488477164735e-06, "loss": 0.5133, "step": 3329 }, { "epoch": 0.3125, "grad_norm": 1.5235961029389495, "learning_rate": 9.999480637984233e-06, "loss": 0.5373, "step": 3330 }, { "epoch": 0.31259384384384387, "grad_norm": 1.3511827091428044, "learning_rate": 9.999472739194333e-06, "loss": 0.5577, "step": 3331 }, { "epoch": 0.3126876876876877, "grad_norm": 1.3725779263135827, "learning_rate": 9.999464780795131e-06, "loss": 0.4862, "step": 3332 }, { "epoch": 0.31278153153153154, "grad_norm": 1.5973143429058165, "learning_rate": 9.999456762786723e-06, "loss": 0.5263, "step": 3333 }, { "epoch": 0.31287537537537535, "grad_norm": 1.5045949886713568, "learning_rate": 9.999448685169202e-06, "loss": 0.5558, "step": 3334 }, { "epoch": 0.3129692192192192, "grad_norm": 1.4467343688357723, "learning_rate": 9.999440547942667e-06, "loss": 0.5578, "step": 3335 }, { "epoch": 0.3130630630630631, "grad_norm": 1.4382815244476181, "learning_rate": 9.999432351107214e-06, "loss": 0.5212, "step": 3336 }, { "epoch": 0.3131569069069069, "grad_norm": 1.420140002529463, "learning_rate": 9.99942409466294e-06, "loss": 0.4985, "step": 3337 }, { "epoch": 0.31325075075075076, "grad_norm": 1.3906240272421209, "learning_rate": 9.999415778609945e-06, "loss": 0.4892, "step": 3338 }, { "epoch": 0.31334459459459457, "grad_norm": 1.1692445649741419, "learning_rate": 9.999407402948327e-06, "loss": 0.4962, "step": 3339 }, { "epoch": 0.31343843843843844, "grad_norm": 1.9820827996425991, "learning_rate": 9.999398967678187e-06, "loss": 0.5073, "step": 3340 }, { "epoch": 0.3135322822822823, "grad_norm": 1.9656333609864185, "learning_rate": 9.999390472799622e-06, "loss": 0.5342, "step": 3341 }, { "epoch": 0.3136261261261261, "grad_norm": 1.5709463378171022, "learning_rate": 9.999381918312737e-06, "loss": 0.5375, "step": 3342 }, { "epoch": 0.31371996996997, "grad_norm": 1.4672841890878605, "learning_rate": 9.999373304217635e-06, "loss": 0.5316, "step": 3343 }, { "epoch": 0.3138138138138138, "grad_norm": 1.7926385566207614, "learning_rate": 9.999364630514417e-06, "loss": 0.5671, "step": 3344 }, { "epoch": 0.31390765765765766, "grad_norm": 1.3534793179676767, "learning_rate": 9.999355897203183e-06, "loss": 0.4842, "step": 3345 }, { "epoch": 0.3140015015015015, "grad_norm": 2.437111144463581, "learning_rate": 9.999347104284043e-06, "loss": 0.5118, "step": 3346 }, { "epoch": 0.31409534534534533, "grad_norm": 1.1716503242266685, "learning_rate": 9.999338251757098e-06, "loss": 0.5115, "step": 3347 }, { "epoch": 0.3141891891891892, "grad_norm": 1.8074757320442107, "learning_rate": 9.999329339622453e-06, "loss": 0.5435, "step": 3348 }, { "epoch": 0.314283033033033, "grad_norm": 1.42132605264477, "learning_rate": 9.999320367880218e-06, "loss": 0.5021, "step": 3349 }, { "epoch": 0.3143768768768769, "grad_norm": 2.0521350232743503, "learning_rate": 9.999311336530497e-06, "loss": 0.4961, "step": 3350 }, { "epoch": 0.31447072072072074, "grad_norm": 2.322452972227847, "learning_rate": 9.9993022455734e-06, "loss": 0.5039, "step": 3351 }, { "epoch": 0.31456456456456455, "grad_norm": 1.6825034655981488, "learning_rate": 9.999293095009031e-06, "loss": 0.5469, "step": 3352 }, { "epoch": 0.3146584084084084, "grad_norm": 2.0588295231013096, "learning_rate": 9.999283884837504e-06, "loss": 0.5021, "step": 3353 }, { "epoch": 0.31475225225225223, "grad_norm": 1.5510120950500592, "learning_rate": 9.999274615058927e-06, "loss": 0.4903, "step": 3354 }, { "epoch": 0.3148460960960961, "grad_norm": 1.3329575934031836, "learning_rate": 9.99926528567341e-06, "loss": 0.5473, "step": 3355 }, { "epoch": 0.31493993993993996, "grad_norm": 1.2562125156928388, "learning_rate": 9.999255896681063e-06, "loss": 0.5049, "step": 3356 }, { "epoch": 0.31503378378378377, "grad_norm": 1.2857145194007809, "learning_rate": 9.999246448082003e-06, "loss": 0.4842, "step": 3357 }, { "epoch": 0.31512762762762764, "grad_norm": 1.3009849632434651, "learning_rate": 9.999236939876338e-06, "loss": 0.4911, "step": 3358 }, { "epoch": 0.31522147147147145, "grad_norm": 2.2567213606768877, "learning_rate": 9.999227372064182e-06, "loss": 0.5429, "step": 3359 }, { "epoch": 0.3153153153153153, "grad_norm": 1.4173335648123013, "learning_rate": 9.999217744645648e-06, "loss": 0.5443, "step": 3360 }, { "epoch": 0.3154091591591592, "grad_norm": 1.5902562248213348, "learning_rate": 9.999208057620855e-06, "loss": 0.541, "step": 3361 }, { "epoch": 0.315503003003003, "grad_norm": 1.8795447665989413, "learning_rate": 9.999198310989915e-06, "loss": 0.5287, "step": 3362 }, { "epoch": 0.31559684684684686, "grad_norm": 1.6991365685226865, "learning_rate": 9.999188504752945e-06, "loss": 0.5226, "step": 3363 }, { "epoch": 0.31569069069069067, "grad_norm": 1.301070363525701, "learning_rate": 9.999178638910063e-06, "loss": 0.5266, "step": 3364 }, { "epoch": 0.31578453453453453, "grad_norm": 1.815369416840626, "learning_rate": 9.999168713461384e-06, "loss": 0.4921, "step": 3365 }, { "epoch": 0.3158783783783784, "grad_norm": 1.6356611254641016, "learning_rate": 9.99915872840703e-06, "loss": 0.4595, "step": 3366 }, { "epoch": 0.3159722222222222, "grad_norm": 1.3123436517496092, "learning_rate": 9.999148683747118e-06, "loss": 0.555, "step": 3367 }, { "epoch": 0.3160660660660661, "grad_norm": 1.4743310970168673, "learning_rate": 9.999138579481766e-06, "loss": 0.5676, "step": 3368 }, { "epoch": 0.3161599099099099, "grad_norm": 2.2449719800056784, "learning_rate": 9.999128415611097e-06, "loss": 0.533, "step": 3369 }, { "epoch": 0.31625375375375375, "grad_norm": 2.104398014466554, "learning_rate": 9.999118192135233e-06, "loss": 0.5369, "step": 3370 }, { "epoch": 0.3163475975975976, "grad_norm": 1.9350953193599572, "learning_rate": 9.999107909054293e-06, "loss": 0.5235, "step": 3371 }, { "epoch": 0.31644144144144143, "grad_norm": 2.8326049058541503, "learning_rate": 9.999097566368401e-06, "loss": 0.5325, "step": 3372 }, { "epoch": 0.3165352852852853, "grad_norm": 2.3251914385132464, "learning_rate": 9.999087164077682e-06, "loss": 0.4538, "step": 3373 }, { "epoch": 0.3166291291291291, "grad_norm": 1.5120560396839102, "learning_rate": 9.999076702182256e-06, "loss": 0.4892, "step": 3374 }, { "epoch": 0.31672297297297297, "grad_norm": 1.69448766012094, "learning_rate": 9.999066180682252e-06, "loss": 0.484, "step": 3375 }, { "epoch": 0.31681681681681684, "grad_norm": 1.5931638057703528, "learning_rate": 9.999055599577793e-06, "loss": 0.5223, "step": 3376 }, { "epoch": 0.31691066066066065, "grad_norm": 1.2936437896598525, "learning_rate": 9.999044958869005e-06, "loss": 0.4676, "step": 3377 }, { "epoch": 0.3170045045045045, "grad_norm": 4.027559859945748, "learning_rate": 9.999034258556016e-06, "loss": 0.4632, "step": 3378 }, { "epoch": 0.3170983483483483, "grad_norm": 1.923081431180064, "learning_rate": 9.999023498638953e-06, "loss": 0.519, "step": 3379 }, { "epoch": 0.3171921921921922, "grad_norm": 6.336071889519447, "learning_rate": 9.999012679117945e-06, "loss": 0.5176, "step": 3380 }, { "epoch": 0.31728603603603606, "grad_norm": 1.1891259211801823, "learning_rate": 9.999001799993121e-06, "loss": 0.5138, "step": 3381 }, { "epoch": 0.31737987987987987, "grad_norm": 1.1622973032671724, "learning_rate": 9.99899086126461e-06, "loss": 0.4895, "step": 3382 }, { "epoch": 0.31747372372372373, "grad_norm": 2.3606128706248315, "learning_rate": 9.998979862932541e-06, "loss": 0.503, "step": 3383 }, { "epoch": 0.31756756756756754, "grad_norm": 1.82181666241825, "learning_rate": 9.99896880499705e-06, "loss": 0.5031, "step": 3384 }, { "epoch": 0.3176614114114114, "grad_norm": 1.2480621282183535, "learning_rate": 9.998957687458262e-06, "loss": 0.5372, "step": 3385 }, { "epoch": 0.3177552552552553, "grad_norm": 1.2732293257037197, "learning_rate": 9.998946510316315e-06, "loss": 0.4828, "step": 3386 }, { "epoch": 0.3178490990990991, "grad_norm": 1.2533051454825783, "learning_rate": 9.99893527357134e-06, "loss": 0.4786, "step": 3387 }, { "epoch": 0.31794294294294295, "grad_norm": 1.2525417917477102, "learning_rate": 9.998923977223471e-06, "loss": 0.5166, "step": 3388 }, { "epoch": 0.31803678678678676, "grad_norm": 1.2744230911256418, "learning_rate": 9.998912621272844e-06, "loss": 0.5524, "step": 3389 }, { "epoch": 0.31813063063063063, "grad_norm": 1.1565103756063981, "learning_rate": 9.998901205719593e-06, "loss": 0.4703, "step": 3390 }, { "epoch": 0.3182244744744745, "grad_norm": 1.3679029502256526, "learning_rate": 9.998889730563854e-06, "loss": 0.5295, "step": 3391 }, { "epoch": 0.3183183183183183, "grad_norm": 1.580881547063607, "learning_rate": 9.998878195805765e-06, "loss": 0.4981, "step": 3392 }, { "epoch": 0.31841216216216217, "grad_norm": 1.3245244199562518, "learning_rate": 9.998866601445463e-06, "loss": 0.5358, "step": 3393 }, { "epoch": 0.318506006006006, "grad_norm": 1.2964904289365424, "learning_rate": 9.998854947483085e-06, "loss": 0.4947, "step": 3394 }, { "epoch": 0.31859984984984985, "grad_norm": 1.3645063210299226, "learning_rate": 9.998843233918773e-06, "loss": 0.5607, "step": 3395 }, { "epoch": 0.3186936936936937, "grad_norm": 1.3124386346294392, "learning_rate": 9.998831460752665e-06, "loss": 0.44, "step": 3396 }, { "epoch": 0.3187875375375375, "grad_norm": 1.2550364192863979, "learning_rate": 9.9988196279849e-06, "loss": 0.4885, "step": 3397 }, { "epoch": 0.3188813813813814, "grad_norm": 1.2848866602271405, "learning_rate": 9.99880773561562e-06, "loss": 0.5218, "step": 3398 }, { "epoch": 0.3189752252252252, "grad_norm": 3.8173748544042616, "learning_rate": 9.998795783644969e-06, "loss": 0.4961, "step": 3399 }, { "epoch": 0.31906906906906907, "grad_norm": 1.3487322103811554, "learning_rate": 9.998783772073084e-06, "loss": 0.4807, "step": 3400 }, { "epoch": 0.31916291291291293, "grad_norm": 1.2475378837539146, "learning_rate": 9.998771700900113e-06, "loss": 0.4878, "step": 3401 }, { "epoch": 0.31925675675675674, "grad_norm": 1.6575689210208486, "learning_rate": 9.9987595701262e-06, "loss": 0.5535, "step": 3402 }, { "epoch": 0.3193506006006006, "grad_norm": 1.432677740277665, "learning_rate": 9.998747379751487e-06, "loss": 0.514, "step": 3403 }, { "epoch": 0.3194444444444444, "grad_norm": 1.270129395741204, "learning_rate": 9.998735129776122e-06, "loss": 0.502, "step": 3404 }, { "epoch": 0.3195382882882883, "grad_norm": 1.3552513533855683, "learning_rate": 9.998722820200251e-06, "loss": 0.5353, "step": 3405 }, { "epoch": 0.31963213213213215, "grad_norm": 1.2252609657048306, "learning_rate": 9.998710451024016e-06, "loss": 0.5262, "step": 3406 }, { "epoch": 0.31972597597597596, "grad_norm": 1.4712217418768265, "learning_rate": 9.99869802224757e-06, "loss": 0.5152, "step": 3407 }, { "epoch": 0.31981981981981983, "grad_norm": 2.1227890001292655, "learning_rate": 9.99868553387106e-06, "loss": 0.4921, "step": 3408 }, { "epoch": 0.31991366366366364, "grad_norm": 1.6365223823436927, "learning_rate": 9.998672985894632e-06, "loss": 0.5566, "step": 3409 }, { "epoch": 0.3200075075075075, "grad_norm": 1.2903522189968442, "learning_rate": 9.998660378318438e-06, "loss": 0.4968, "step": 3410 }, { "epoch": 0.32010135135135137, "grad_norm": 1.6592721881851176, "learning_rate": 9.998647711142628e-06, "loss": 0.4734, "step": 3411 }, { "epoch": 0.3201951951951952, "grad_norm": 1.563523616398869, "learning_rate": 9.998634984367355e-06, "loss": 0.5301, "step": 3412 }, { "epoch": 0.32028903903903905, "grad_norm": 1.336077037179677, "learning_rate": 9.998622197992767e-06, "loss": 0.508, "step": 3413 }, { "epoch": 0.32038288288288286, "grad_norm": 1.7845888676534456, "learning_rate": 9.998609352019018e-06, "loss": 0.5312, "step": 3414 }, { "epoch": 0.3204767267267267, "grad_norm": 2.038158693889411, "learning_rate": 9.998596446446262e-06, "loss": 0.5073, "step": 3415 }, { "epoch": 0.3205705705705706, "grad_norm": 1.3312228826868662, "learning_rate": 9.998583481274652e-06, "loss": 0.4892, "step": 3416 }, { "epoch": 0.3206644144144144, "grad_norm": 1.6241935716685012, "learning_rate": 9.998570456504344e-06, "loss": 0.4651, "step": 3417 }, { "epoch": 0.32075825825825827, "grad_norm": 1.5115338135530991, "learning_rate": 9.99855737213549e-06, "loss": 0.5473, "step": 3418 }, { "epoch": 0.3208521021021021, "grad_norm": 1.3961809667363632, "learning_rate": 9.99854422816825e-06, "loss": 0.5065, "step": 3419 }, { "epoch": 0.32094594594594594, "grad_norm": 1.4658318176732552, "learning_rate": 9.998531024602779e-06, "loss": 0.4951, "step": 3420 }, { "epoch": 0.3210397897897898, "grad_norm": 1.3821980622480325, "learning_rate": 9.998517761439234e-06, "loss": 0.4958, "step": 3421 }, { "epoch": 0.3211336336336336, "grad_norm": 1.3997541614143325, "learning_rate": 9.998504438677772e-06, "loss": 0.4878, "step": 3422 }, { "epoch": 0.3212274774774775, "grad_norm": 1.5302364157715234, "learning_rate": 9.998491056318556e-06, "loss": 0.548, "step": 3423 }, { "epoch": 0.3213213213213213, "grad_norm": 1.173956086248531, "learning_rate": 9.99847761436174e-06, "loss": 0.494, "step": 3424 }, { "epoch": 0.32141516516516516, "grad_norm": 1.117793061120843, "learning_rate": 9.99846411280749e-06, "loss": 0.4979, "step": 3425 }, { "epoch": 0.32150900900900903, "grad_norm": 3.9371486629067802, "learning_rate": 9.998450551655961e-06, "loss": 0.5138, "step": 3426 }, { "epoch": 0.32160285285285284, "grad_norm": 1.229686855644096, "learning_rate": 9.998436930907318e-06, "loss": 0.5343, "step": 3427 }, { "epoch": 0.3216966966966967, "grad_norm": 1.3846844043765392, "learning_rate": 9.998423250561724e-06, "loss": 0.5527, "step": 3428 }, { "epoch": 0.3217905405405405, "grad_norm": 1.4346474182641036, "learning_rate": 9.998409510619342e-06, "loss": 0.525, "step": 3429 }, { "epoch": 0.3218843843843844, "grad_norm": 1.2646434624291047, "learning_rate": 9.998395711080336e-06, "loss": 0.5141, "step": 3430 }, { "epoch": 0.32197822822822825, "grad_norm": 1.7983279547322448, "learning_rate": 9.998381851944868e-06, "loss": 0.4889, "step": 3431 }, { "epoch": 0.32207207207207206, "grad_norm": 1.3018181805839038, "learning_rate": 9.998367933213105e-06, "loss": 0.5335, "step": 3432 }, { "epoch": 0.3221659159159159, "grad_norm": 1.2458994709861568, "learning_rate": 9.998353954885214e-06, "loss": 0.5165, "step": 3433 }, { "epoch": 0.32225975975975973, "grad_norm": 1.7860716772492504, "learning_rate": 9.998339916961359e-06, "loss": 0.5317, "step": 3434 }, { "epoch": 0.3223536036036036, "grad_norm": 1.4764639053133357, "learning_rate": 9.99832581944171e-06, "loss": 0.4768, "step": 3435 }, { "epoch": 0.32244744744744747, "grad_norm": 1.2239209902906794, "learning_rate": 9.998311662326433e-06, "loss": 0.5145, "step": 3436 }, { "epoch": 0.3225412912912913, "grad_norm": 1.4515926027592065, "learning_rate": 9.9982974456157e-06, "loss": 0.4951, "step": 3437 }, { "epoch": 0.32263513513513514, "grad_norm": 1.3719941471511792, "learning_rate": 9.998283169309674e-06, "loss": 0.4723, "step": 3438 }, { "epoch": 0.32272897897897895, "grad_norm": 1.0800011238948017, "learning_rate": 9.998268833408534e-06, "loss": 0.4916, "step": 3439 }, { "epoch": 0.3228228228228228, "grad_norm": 1.755495648111712, "learning_rate": 9.998254437912445e-06, "loss": 0.5232, "step": 3440 }, { "epoch": 0.3229166666666667, "grad_norm": 1.409833909289955, "learning_rate": 9.998239982821579e-06, "loss": 0.4774, "step": 3441 }, { "epoch": 0.3230105105105105, "grad_norm": 1.5551178255427613, "learning_rate": 9.99822546813611e-06, "loss": 0.4906, "step": 3442 }, { "epoch": 0.32310435435435436, "grad_norm": 1.2982412644369492, "learning_rate": 9.998210893856211e-06, "loss": 0.4964, "step": 3443 }, { "epoch": 0.32319819819819817, "grad_norm": 1.3540917200610272, "learning_rate": 9.998196259982053e-06, "loss": 0.5063, "step": 3444 }, { "epoch": 0.32329204204204204, "grad_norm": 1.4568984641882456, "learning_rate": 9.998181566513816e-06, "loss": 0.4989, "step": 3445 }, { "epoch": 0.3233858858858859, "grad_norm": 1.2097969249827516, "learning_rate": 9.998166813451671e-06, "loss": 0.5016, "step": 3446 }, { "epoch": 0.3234797297297297, "grad_norm": 1.4966169092705937, "learning_rate": 9.998152000795793e-06, "loss": 0.5465, "step": 3447 }, { "epoch": 0.3235735735735736, "grad_norm": 1.4228318783817702, "learning_rate": 9.998137128546362e-06, "loss": 0.5341, "step": 3448 }, { "epoch": 0.3236674174174174, "grad_norm": 1.2885968234943113, "learning_rate": 9.998122196703553e-06, "loss": 0.5464, "step": 3449 }, { "epoch": 0.32376126126126126, "grad_norm": 1.2519329130422623, "learning_rate": 9.998107205267546e-06, "loss": 0.5105, "step": 3450 }, { "epoch": 0.3238551051051051, "grad_norm": 1.2953146762845353, "learning_rate": 9.998092154238518e-06, "loss": 0.5282, "step": 3451 }, { "epoch": 0.32394894894894893, "grad_norm": 1.288752243342256, "learning_rate": 9.99807704361665e-06, "loss": 0.5146, "step": 3452 }, { "epoch": 0.3240427927927928, "grad_norm": 1.3848233593084571, "learning_rate": 9.998061873402118e-06, "loss": 0.5023, "step": 3453 }, { "epoch": 0.3241366366366366, "grad_norm": 1.452956558934201, "learning_rate": 9.998046643595109e-06, "loss": 0.5396, "step": 3454 }, { "epoch": 0.3242304804804805, "grad_norm": 1.377139433340206, "learning_rate": 9.9980313541958e-06, "loss": 0.4932, "step": 3455 }, { "epoch": 0.32432432432432434, "grad_norm": 1.223797070602211, "learning_rate": 9.998016005204376e-06, "loss": 0.506, "step": 3456 }, { "epoch": 0.32441816816816815, "grad_norm": 1.3195937286547341, "learning_rate": 9.998000596621019e-06, "loss": 0.4916, "step": 3457 }, { "epoch": 0.324512012012012, "grad_norm": 1.5703703041149137, "learning_rate": 9.997985128445913e-06, "loss": 0.5298, "step": 3458 }, { "epoch": 0.32460585585585583, "grad_norm": 1.3427635348114142, "learning_rate": 9.99796960067924e-06, "loss": 0.5158, "step": 3459 }, { "epoch": 0.3246996996996997, "grad_norm": 1.2642821309407424, "learning_rate": 9.99795401332119e-06, "loss": 0.5589, "step": 3460 }, { "epoch": 0.32479354354354356, "grad_norm": 1.723674601773786, "learning_rate": 9.997938366371945e-06, "loss": 0.4651, "step": 3461 }, { "epoch": 0.32488738738738737, "grad_norm": 3.694100119489046, "learning_rate": 9.997922659831691e-06, "loss": 0.5341, "step": 3462 }, { "epoch": 0.32498123123123124, "grad_norm": 1.5806517052749947, "learning_rate": 9.997906893700618e-06, "loss": 0.5393, "step": 3463 }, { "epoch": 0.32507507507507505, "grad_norm": 1.5522417741168941, "learning_rate": 9.997891067978914e-06, "loss": 0.4822, "step": 3464 }, { "epoch": 0.3251689189189189, "grad_norm": 1.702327932756499, "learning_rate": 9.997875182666764e-06, "loss": 0.5796, "step": 3465 }, { "epoch": 0.3252627627627628, "grad_norm": 1.2671433076838257, "learning_rate": 9.997859237764363e-06, "loss": 0.5079, "step": 3466 }, { "epoch": 0.3253566066066066, "grad_norm": 1.7871658520481428, "learning_rate": 9.997843233271895e-06, "loss": 0.5345, "step": 3467 }, { "epoch": 0.32545045045045046, "grad_norm": 1.4181052179181939, "learning_rate": 9.997827169189558e-06, "loss": 0.4903, "step": 3468 }, { "epoch": 0.32554429429429427, "grad_norm": 1.1980798050142645, "learning_rate": 9.997811045517536e-06, "loss": 0.4723, "step": 3469 }, { "epoch": 0.32563813813813813, "grad_norm": 3.735939970748066, "learning_rate": 9.997794862256025e-06, "loss": 0.5026, "step": 3470 }, { "epoch": 0.325731981981982, "grad_norm": 1.2690030762848041, "learning_rate": 9.997778619405219e-06, "loss": 0.5327, "step": 3471 }, { "epoch": 0.3258258258258258, "grad_norm": 1.1655234980221783, "learning_rate": 9.99776231696531e-06, "loss": 0.4628, "step": 3472 }, { "epoch": 0.3259196696696697, "grad_norm": 1.8122913254615147, "learning_rate": 9.997745954936492e-06, "loss": 0.556, "step": 3473 }, { "epoch": 0.3260135135135135, "grad_norm": 1.456064356028035, "learning_rate": 9.99772953331896e-06, "loss": 0.5317, "step": 3474 }, { "epoch": 0.32610735735735735, "grad_norm": 1.2807281330169218, "learning_rate": 9.997713052112912e-06, "loss": 0.5614, "step": 3475 }, { "epoch": 0.3262012012012012, "grad_norm": 1.7889992901233058, "learning_rate": 9.997696511318543e-06, "loss": 0.4742, "step": 3476 }, { "epoch": 0.32629504504504503, "grad_norm": 1.697836554108132, "learning_rate": 9.99767991093605e-06, "loss": 0.5668, "step": 3477 }, { "epoch": 0.3263888888888889, "grad_norm": 2.582120552297597, "learning_rate": 9.997663250965633e-06, "loss": 0.4386, "step": 3478 }, { "epoch": 0.3264827327327327, "grad_norm": 1.8884326339551842, "learning_rate": 9.997646531407487e-06, "loss": 0.5032, "step": 3479 }, { "epoch": 0.32657657657657657, "grad_norm": 1.8486127516614548, "learning_rate": 9.997629752261812e-06, "loss": 0.4923, "step": 3480 }, { "epoch": 0.32667042042042044, "grad_norm": 1.472122309778069, "learning_rate": 9.997612913528812e-06, "loss": 0.5346, "step": 3481 }, { "epoch": 0.32676426426426425, "grad_norm": 1.4997532907073303, "learning_rate": 9.997596015208683e-06, "loss": 0.4133, "step": 3482 }, { "epoch": 0.3268581081081081, "grad_norm": 1.4245711323167254, "learning_rate": 9.99757905730163e-06, "loss": 0.4749, "step": 3483 }, { "epoch": 0.3269519519519519, "grad_norm": 1.2786871202276404, "learning_rate": 9.997562039807853e-06, "loss": 0.4936, "step": 3484 }, { "epoch": 0.3270457957957958, "grad_norm": 1.2528488164027216, "learning_rate": 9.997544962727554e-06, "loss": 0.5078, "step": 3485 }, { "epoch": 0.32713963963963966, "grad_norm": 1.818408206174438, "learning_rate": 9.997527826060941e-06, "loss": 0.4796, "step": 3486 }, { "epoch": 0.32723348348348347, "grad_norm": 1.550560141889479, "learning_rate": 9.997510629808215e-06, "loss": 0.5552, "step": 3487 }, { "epoch": 0.32732732732732733, "grad_norm": 1.2465040317338894, "learning_rate": 9.99749337396958e-06, "loss": 0.4946, "step": 3488 }, { "epoch": 0.32742117117117114, "grad_norm": 1.0652250041179805, "learning_rate": 9.997476058545245e-06, "loss": 0.5356, "step": 3489 }, { "epoch": 0.327515015015015, "grad_norm": 1.2735792199751963, "learning_rate": 9.997458683535414e-06, "loss": 0.5426, "step": 3490 }, { "epoch": 0.3276088588588589, "grad_norm": 1.737132498444451, "learning_rate": 9.997441248940295e-06, "loss": 0.4773, "step": 3491 }, { "epoch": 0.3277027027027027, "grad_norm": 1.37043241236298, "learning_rate": 9.997423754760097e-06, "loss": 0.5121, "step": 3492 }, { "epoch": 0.32779654654654655, "grad_norm": 1.6229125821876058, "learning_rate": 9.997406200995026e-06, "loss": 0.5646, "step": 3493 }, { "epoch": 0.32789039039039036, "grad_norm": 1.5134345621379353, "learning_rate": 9.997388587645291e-06, "loss": 0.5345, "step": 3494 }, { "epoch": 0.32798423423423423, "grad_norm": 1.4694467953308286, "learning_rate": 9.997370914711107e-06, "loss": 0.5629, "step": 3495 }, { "epoch": 0.3280780780780781, "grad_norm": 1.3442622105617625, "learning_rate": 9.997353182192679e-06, "loss": 0.4765, "step": 3496 }, { "epoch": 0.3281719219219219, "grad_norm": 1.2243387746114447, "learning_rate": 9.997335390090223e-06, "loss": 0.5159, "step": 3497 }, { "epoch": 0.32826576576576577, "grad_norm": 1.9703946181351784, "learning_rate": 9.997317538403948e-06, "loss": 0.5072, "step": 3498 }, { "epoch": 0.32835960960960964, "grad_norm": 1.5587214010885586, "learning_rate": 9.997299627134067e-06, "loss": 0.5538, "step": 3499 }, { "epoch": 0.32845345345345345, "grad_norm": 1.9048812703600504, "learning_rate": 9.997281656280797e-06, "loss": 0.4463, "step": 3500 }, { "epoch": 0.3285472972972973, "grad_norm": 2.1614307768506182, "learning_rate": 9.997263625844347e-06, "loss": 0.565, "step": 3501 }, { "epoch": 0.3286411411411411, "grad_norm": 1.1254995868313828, "learning_rate": 9.997245535824936e-06, "loss": 0.4457, "step": 3502 }, { "epoch": 0.328734984984985, "grad_norm": 1.4268193086760153, "learning_rate": 9.997227386222777e-06, "loss": 0.551, "step": 3503 }, { "epoch": 0.32882882882882886, "grad_norm": 1.442312901550375, "learning_rate": 9.997209177038088e-06, "loss": 0.5592, "step": 3504 }, { "epoch": 0.32892267267267267, "grad_norm": 1.669677485077408, "learning_rate": 9.997190908271087e-06, "loss": 0.545, "step": 3505 }, { "epoch": 0.32901651651651653, "grad_norm": 1.1954642580789332, "learning_rate": 9.99717257992199e-06, "loss": 0.4884, "step": 3506 }, { "epoch": 0.32911036036036034, "grad_norm": 1.640739189345562, "learning_rate": 9.997154191991016e-06, "loss": 0.5266, "step": 3507 }, { "epoch": 0.3292042042042042, "grad_norm": 1.7692136514160155, "learning_rate": 9.997135744478386e-06, "loss": 0.541, "step": 3508 }, { "epoch": 0.3292980480480481, "grad_norm": 1.6509681906740135, "learning_rate": 9.997117237384317e-06, "loss": 0.4758, "step": 3509 }, { "epoch": 0.3293918918918919, "grad_norm": 1.255056586039335, "learning_rate": 9.997098670709032e-06, "loss": 0.5625, "step": 3510 }, { "epoch": 0.32948573573573575, "grad_norm": 1.379128278463351, "learning_rate": 9.99708004445275e-06, "loss": 0.5097, "step": 3511 }, { "epoch": 0.32957957957957956, "grad_norm": 1.3528267120159725, "learning_rate": 9.997061358615694e-06, "loss": 0.4935, "step": 3512 }, { "epoch": 0.32967342342342343, "grad_norm": 1.7746322224200033, "learning_rate": 9.997042613198088e-06, "loss": 0.5386, "step": 3513 }, { "epoch": 0.3297672672672673, "grad_norm": 1.374635197195743, "learning_rate": 9.997023808200157e-06, "loss": 0.5447, "step": 3514 }, { "epoch": 0.3298611111111111, "grad_norm": 1.17201605616146, "learning_rate": 9.99700494362212e-06, "loss": 0.5224, "step": 3515 }, { "epoch": 0.32995495495495497, "grad_norm": 1.45187091436432, "learning_rate": 9.996986019464205e-06, "loss": 0.5185, "step": 3516 }, { "epoch": 0.3300487987987988, "grad_norm": 1.6384972470353232, "learning_rate": 9.99696703572664e-06, "loss": 0.4871, "step": 3517 }, { "epoch": 0.33014264264264265, "grad_norm": 1.441641626386979, "learning_rate": 9.996947992409647e-06, "loss": 0.5267, "step": 3518 }, { "epoch": 0.3302364864864865, "grad_norm": 1.222273906851615, "learning_rate": 9.996928889513454e-06, "loss": 0.5185, "step": 3519 }, { "epoch": 0.3303303303303303, "grad_norm": 2.3927736377214965, "learning_rate": 9.996909727038293e-06, "loss": 0.5547, "step": 3520 }, { "epoch": 0.3304241741741742, "grad_norm": 1.8582284403901987, "learning_rate": 9.996890504984387e-06, "loss": 0.499, "step": 3521 }, { "epoch": 0.330518018018018, "grad_norm": 1.2779193119081138, "learning_rate": 9.99687122335197e-06, "loss": 0.5104, "step": 3522 }, { "epoch": 0.33061186186186187, "grad_norm": 5.021057422759228, "learning_rate": 9.996851882141267e-06, "loss": 0.4773, "step": 3523 }, { "epoch": 0.33070570570570573, "grad_norm": 2.115414927545711, "learning_rate": 9.99683248135251e-06, "loss": 0.5244, "step": 3524 }, { "epoch": 0.33079954954954954, "grad_norm": 1.4339322107047607, "learning_rate": 9.996813020985932e-06, "loss": 0.5377, "step": 3525 }, { "epoch": 0.3308933933933934, "grad_norm": 1.5250083471113582, "learning_rate": 9.996793501041765e-06, "loss": 0.5265, "step": 3526 }, { "epoch": 0.3309872372372372, "grad_norm": 1.2121615097000895, "learning_rate": 9.996773921520242e-06, "loss": 0.5251, "step": 3527 }, { "epoch": 0.3310810810810811, "grad_norm": 1.3839954854418939, "learning_rate": 9.996754282421594e-06, "loss": 0.549, "step": 3528 }, { "epoch": 0.33117492492492495, "grad_norm": 1.4457642109419158, "learning_rate": 9.996734583746056e-06, "loss": 0.4821, "step": 3529 }, { "epoch": 0.33126876876876876, "grad_norm": 1.947724197102794, "learning_rate": 9.996714825493865e-06, "loss": 0.472, "step": 3530 }, { "epoch": 0.33136261261261263, "grad_norm": 1.3192816114812558, "learning_rate": 9.996695007665253e-06, "loss": 0.5361, "step": 3531 }, { "epoch": 0.33145645645645644, "grad_norm": 1.3228105918677548, "learning_rate": 9.99667513026046e-06, "loss": 0.4764, "step": 3532 }, { "epoch": 0.3315503003003003, "grad_norm": 2.3730567143336274, "learning_rate": 9.99665519327972e-06, "loss": 0.4922, "step": 3533 }, { "epoch": 0.33164414414414417, "grad_norm": 1.4012495493702455, "learning_rate": 9.996635196723274e-06, "loss": 0.5019, "step": 3534 }, { "epoch": 0.331737987987988, "grad_norm": 1.304583119534192, "learning_rate": 9.996615140591356e-06, "loss": 0.5139, "step": 3535 }, { "epoch": 0.33183183183183185, "grad_norm": 1.2559116253064366, "learning_rate": 9.99659502488421e-06, "loss": 0.5233, "step": 3536 }, { "epoch": 0.33192567567567566, "grad_norm": 1.4389319835177805, "learning_rate": 9.996574849602073e-06, "loss": 0.4703, "step": 3537 }, { "epoch": 0.3320195195195195, "grad_norm": 1.2673936474103766, "learning_rate": 9.996554614745186e-06, "loss": 0.569, "step": 3538 }, { "epoch": 0.3321133633633634, "grad_norm": 1.532565654594264, "learning_rate": 9.99653432031379e-06, "loss": 0.5492, "step": 3539 }, { "epoch": 0.3322072072072072, "grad_norm": 1.4676846118904279, "learning_rate": 9.996513966308126e-06, "loss": 0.5035, "step": 3540 }, { "epoch": 0.33230105105105107, "grad_norm": 2.456642838222468, "learning_rate": 9.996493552728438e-06, "loss": 0.4989, "step": 3541 }, { "epoch": 0.3323948948948949, "grad_norm": 1.5175738591742212, "learning_rate": 9.996473079574972e-06, "loss": 0.5235, "step": 3542 }, { "epoch": 0.33248873873873874, "grad_norm": 1.5067475641302461, "learning_rate": 9.996452546847968e-06, "loss": 0.5096, "step": 3543 }, { "epoch": 0.3325825825825826, "grad_norm": 1.3084015599500813, "learning_rate": 9.996431954547672e-06, "loss": 0.5236, "step": 3544 }, { "epoch": 0.3326764264264264, "grad_norm": 1.4444037598592534, "learning_rate": 9.99641130267433e-06, "loss": 0.5159, "step": 3545 }, { "epoch": 0.3327702702702703, "grad_norm": 1.6845856930365966, "learning_rate": 9.996390591228186e-06, "loss": 0.5221, "step": 3546 }, { "epoch": 0.3328641141141141, "grad_norm": 1.83380526981108, "learning_rate": 9.996369820209492e-06, "loss": 0.5116, "step": 3547 }, { "epoch": 0.33295795795795796, "grad_norm": 1.3270405426200966, "learning_rate": 9.996348989618492e-06, "loss": 0.5251, "step": 3548 }, { "epoch": 0.33305180180180183, "grad_norm": 1.8425670678226214, "learning_rate": 9.996328099455432e-06, "loss": 0.5652, "step": 3549 }, { "epoch": 0.33314564564564564, "grad_norm": 1.7059202133579678, "learning_rate": 9.996307149720568e-06, "loss": 0.5045, "step": 3550 }, { "epoch": 0.3332394894894895, "grad_norm": 3.7251337189803024, "learning_rate": 9.996286140414144e-06, "loss": 0.5174, "step": 3551 }, { "epoch": 0.3333333333333333, "grad_norm": 1.4004696865698472, "learning_rate": 9.996265071536412e-06, "loss": 0.5252, "step": 3552 }, { "epoch": 0.3334271771771772, "grad_norm": 1.318711380971433, "learning_rate": 9.996243943087624e-06, "loss": 0.5226, "step": 3553 }, { "epoch": 0.33352102102102105, "grad_norm": 1.246734735221339, "learning_rate": 9.99622275506803e-06, "loss": 0.5191, "step": 3554 }, { "epoch": 0.33361486486486486, "grad_norm": 1.5449672968817265, "learning_rate": 9.996201507477886e-06, "loss": 0.5444, "step": 3555 }, { "epoch": 0.3337087087087087, "grad_norm": 1.3918453119238816, "learning_rate": 9.996180200317442e-06, "loss": 0.4818, "step": 3556 }, { "epoch": 0.33380255255255253, "grad_norm": 1.7949548591486089, "learning_rate": 9.996158833586956e-06, "loss": 0.5216, "step": 3557 }, { "epoch": 0.3338963963963964, "grad_norm": 1.67752865596452, "learning_rate": 9.996137407286677e-06, "loss": 0.5145, "step": 3558 }, { "epoch": 0.33399024024024027, "grad_norm": 1.9724466219924293, "learning_rate": 9.996115921416867e-06, "loss": 0.4951, "step": 3559 }, { "epoch": 0.3340840840840841, "grad_norm": 1.2984308425778543, "learning_rate": 9.996094375977775e-06, "loss": 0.4612, "step": 3560 }, { "epoch": 0.33417792792792794, "grad_norm": 1.343675613624529, "learning_rate": 9.996072770969666e-06, "loss": 0.5513, "step": 3561 }, { "epoch": 0.33427177177177175, "grad_norm": 1.3304502389769235, "learning_rate": 9.99605110639279e-06, "loss": 0.4755, "step": 3562 }, { "epoch": 0.3343656156156156, "grad_norm": 1.2796694073662918, "learning_rate": 9.99602938224741e-06, "loss": 0.4729, "step": 3563 }, { "epoch": 0.3344594594594595, "grad_norm": 1.3436642905277785, "learning_rate": 9.996007598533784e-06, "loss": 0.4576, "step": 3564 }, { "epoch": 0.3345533033033033, "grad_norm": 1.9037324620101845, "learning_rate": 9.99598575525217e-06, "loss": 0.5058, "step": 3565 }, { "epoch": 0.33464714714714716, "grad_norm": 1.4182275472763486, "learning_rate": 9.99596385240283e-06, "loss": 0.5268, "step": 3566 }, { "epoch": 0.33474099099099097, "grad_norm": 1.582139869594775, "learning_rate": 9.995941889986026e-06, "loss": 0.5115, "step": 3567 }, { "epoch": 0.33483483483483484, "grad_norm": 1.4697026330543965, "learning_rate": 9.99591986800202e-06, "loss": 0.5096, "step": 3568 }, { "epoch": 0.3349286786786787, "grad_norm": 2.0921744606489066, "learning_rate": 9.995897786451071e-06, "loss": 0.5753, "step": 3569 }, { "epoch": 0.3350225225225225, "grad_norm": 8.305443491326105, "learning_rate": 9.995875645333445e-06, "loss": 0.5087, "step": 3570 }, { "epoch": 0.3351163663663664, "grad_norm": 1.5498570983063014, "learning_rate": 9.995853444649407e-06, "loss": 0.4923, "step": 3571 }, { "epoch": 0.3352102102102102, "grad_norm": 1.2932744562333034, "learning_rate": 9.99583118439922e-06, "loss": 0.5459, "step": 3572 }, { "epoch": 0.33530405405405406, "grad_norm": 1.351865979348406, "learning_rate": 9.99580886458315e-06, "loss": 0.5489, "step": 3573 }, { "epoch": 0.3353978978978979, "grad_norm": 1.8230000137458071, "learning_rate": 9.995786485201462e-06, "loss": 0.4795, "step": 3574 }, { "epoch": 0.33549174174174173, "grad_norm": 2.089761987363336, "learning_rate": 9.995764046254424e-06, "loss": 0.5354, "step": 3575 }, { "epoch": 0.3355855855855856, "grad_norm": 1.1974341571458529, "learning_rate": 9.995741547742304e-06, "loss": 0.5265, "step": 3576 }, { "epoch": 0.3356794294294294, "grad_norm": 1.190263656867072, "learning_rate": 9.995718989665368e-06, "loss": 0.5374, "step": 3577 }, { "epoch": 0.3357732732732733, "grad_norm": 1.7760523346611221, "learning_rate": 9.995696372023888e-06, "loss": 0.4944, "step": 3578 }, { "epoch": 0.33586711711711714, "grad_norm": 1.7453714915249914, "learning_rate": 9.995673694818133e-06, "loss": 0.5084, "step": 3579 }, { "epoch": 0.33596096096096095, "grad_norm": 1.1822790288553597, "learning_rate": 9.99565095804837e-06, "loss": 0.4635, "step": 3580 }, { "epoch": 0.3360548048048048, "grad_norm": 1.5054521534757461, "learning_rate": 9.995628161714874e-06, "loss": 0.4616, "step": 3581 }, { "epoch": 0.33614864864864863, "grad_norm": 1.211144660796515, "learning_rate": 9.995605305817917e-06, "loss": 0.4762, "step": 3582 }, { "epoch": 0.3362424924924925, "grad_norm": 1.227626793618642, "learning_rate": 9.995582390357767e-06, "loss": 0.4717, "step": 3583 }, { "epoch": 0.33633633633633636, "grad_norm": 1.165033848577572, "learning_rate": 9.995559415334702e-06, "loss": 0.5474, "step": 3584 }, { "epoch": 0.33643018018018017, "grad_norm": 1.969126261262959, "learning_rate": 9.995536380748995e-06, "loss": 0.4953, "step": 3585 }, { "epoch": 0.33652402402402404, "grad_norm": 1.1929875512000103, "learning_rate": 9.995513286600918e-06, "loss": 0.4677, "step": 3586 }, { "epoch": 0.33661786786786785, "grad_norm": 1.3428556730713261, "learning_rate": 9.995490132890749e-06, "loss": 0.5588, "step": 3587 }, { "epoch": 0.3367117117117117, "grad_norm": 1.2021951870773098, "learning_rate": 9.995466919618763e-06, "loss": 0.4701, "step": 3588 }, { "epoch": 0.3368055555555556, "grad_norm": 1.3461567516899122, "learning_rate": 9.995443646785236e-06, "loss": 0.5303, "step": 3589 }, { "epoch": 0.3368993993993994, "grad_norm": 4.751275416713571, "learning_rate": 9.99542031439045e-06, "loss": 0.5244, "step": 3590 }, { "epoch": 0.33699324324324326, "grad_norm": 1.3717828104153587, "learning_rate": 9.995396922434677e-06, "loss": 0.4769, "step": 3591 }, { "epoch": 0.33708708708708707, "grad_norm": 1.6849513152766382, "learning_rate": 9.995373470918198e-06, "loss": 0.5463, "step": 3592 }, { "epoch": 0.33718093093093093, "grad_norm": 1.361211130246632, "learning_rate": 9.995349959841295e-06, "loss": 0.5285, "step": 3593 }, { "epoch": 0.3372747747747748, "grad_norm": 1.3182067261455088, "learning_rate": 9.995326389204245e-06, "loss": 0.5031, "step": 3594 }, { "epoch": 0.3373686186186186, "grad_norm": 1.0818863359025348, "learning_rate": 9.99530275900733e-06, "loss": 0.4719, "step": 3595 }, { "epoch": 0.3374624624624625, "grad_norm": 1.4220134375959876, "learning_rate": 9.995279069250834e-06, "loss": 0.506, "step": 3596 }, { "epoch": 0.3375563063063063, "grad_norm": 3.89171197900631, "learning_rate": 9.995255319935036e-06, "loss": 0.5135, "step": 3597 }, { "epoch": 0.33765015015015015, "grad_norm": 1.0636891189074733, "learning_rate": 9.995231511060222e-06, "loss": 0.4594, "step": 3598 }, { "epoch": 0.337743993993994, "grad_norm": 1.1369721588336246, "learning_rate": 9.995207642626675e-06, "loss": 0.5142, "step": 3599 }, { "epoch": 0.33783783783783783, "grad_norm": 1.1931193602572687, "learning_rate": 9.99518371463468e-06, "loss": 0.5617, "step": 3600 }, { "epoch": 0.3379316816816817, "grad_norm": 1.2136783105214393, "learning_rate": 9.99515972708452e-06, "loss": 0.5114, "step": 3601 }, { "epoch": 0.3380255255255255, "grad_norm": 1.1052232599034664, "learning_rate": 9.995135679976484e-06, "loss": 0.48, "step": 3602 }, { "epoch": 0.33811936936936937, "grad_norm": 1.2253835314755415, "learning_rate": 9.995111573310856e-06, "loss": 0.5207, "step": 3603 }, { "epoch": 0.33821321321321324, "grad_norm": 1.1382842274485463, "learning_rate": 9.995087407087925e-06, "loss": 0.4967, "step": 3604 }, { "epoch": 0.33830705705705705, "grad_norm": 1.5529773409603975, "learning_rate": 9.99506318130798e-06, "loss": 0.5083, "step": 3605 }, { "epoch": 0.3384009009009009, "grad_norm": 1.301922969206265, "learning_rate": 9.99503889597131e-06, "loss": 0.5761, "step": 3606 }, { "epoch": 0.3384947447447447, "grad_norm": 1.2773201884827827, "learning_rate": 9.9950145510782e-06, "loss": 0.5472, "step": 3607 }, { "epoch": 0.3385885885885886, "grad_norm": 1.3846633744007983, "learning_rate": 9.994990146628946e-06, "loss": 0.5112, "step": 3608 }, { "epoch": 0.33868243243243246, "grad_norm": 1.54213623938142, "learning_rate": 9.994965682623836e-06, "loss": 0.5311, "step": 3609 }, { "epoch": 0.33877627627627627, "grad_norm": 1.1341546940187377, "learning_rate": 9.994941159063162e-06, "loss": 0.4886, "step": 3610 }, { "epoch": 0.33887012012012013, "grad_norm": 2.505601832842823, "learning_rate": 9.994916575947217e-06, "loss": 0.5375, "step": 3611 }, { "epoch": 0.33896396396396394, "grad_norm": 1.370976797141387, "learning_rate": 9.994891933276294e-06, "loss": 0.5735, "step": 3612 }, { "epoch": 0.3390578078078078, "grad_norm": 1.3232393841338057, "learning_rate": 9.994867231050687e-06, "loss": 0.5252, "step": 3613 }, { "epoch": 0.3391516516516517, "grad_norm": 3.4764341661901677, "learning_rate": 9.994842469270689e-06, "loss": 0.562, "step": 3614 }, { "epoch": 0.3392454954954955, "grad_norm": 1.4393879261160454, "learning_rate": 9.994817647936597e-06, "loss": 0.5121, "step": 3615 }, { "epoch": 0.33933933933933935, "grad_norm": 1.754584467012954, "learning_rate": 9.994792767048706e-06, "loss": 0.5005, "step": 3616 }, { "epoch": 0.33943318318318316, "grad_norm": 1.3553612228414331, "learning_rate": 9.994767826607313e-06, "loss": 0.526, "step": 3617 }, { "epoch": 0.33952702702702703, "grad_norm": 1.264830241723636, "learning_rate": 9.994742826612715e-06, "loss": 0.502, "step": 3618 }, { "epoch": 0.3396208708708709, "grad_norm": 1.153214461068684, "learning_rate": 9.994717767065211e-06, "loss": 0.5115, "step": 3619 }, { "epoch": 0.3397147147147147, "grad_norm": 1.231693082909941, "learning_rate": 9.994692647965101e-06, "loss": 0.5103, "step": 3620 }, { "epoch": 0.33980855855855857, "grad_norm": 1.1280378873512402, "learning_rate": 9.99466746931268e-06, "loss": 0.5146, "step": 3621 }, { "epoch": 0.3399024024024024, "grad_norm": 1.391417411909603, "learning_rate": 9.994642231108251e-06, "loss": 0.5064, "step": 3622 }, { "epoch": 0.33999624624624625, "grad_norm": 1.4382488231798416, "learning_rate": 9.994616933352118e-06, "loss": 0.5685, "step": 3623 }, { "epoch": 0.3400900900900901, "grad_norm": 1.506329752302932, "learning_rate": 9.994591576044576e-06, "loss": 0.5044, "step": 3624 }, { "epoch": 0.3401839339339339, "grad_norm": 1.40218737084109, "learning_rate": 9.994566159185932e-06, "loss": 0.5053, "step": 3625 }, { "epoch": 0.3402777777777778, "grad_norm": 1.5025160430595932, "learning_rate": 9.994540682776488e-06, "loss": 0.6006, "step": 3626 }, { "epoch": 0.3403716216216216, "grad_norm": 1.1167086894626488, "learning_rate": 9.994515146816548e-06, "loss": 0.4853, "step": 3627 }, { "epoch": 0.34046546546546547, "grad_norm": 1.1140144848023634, "learning_rate": 9.994489551306414e-06, "loss": 0.5012, "step": 3628 }, { "epoch": 0.34055930930930933, "grad_norm": 1.1886468745973424, "learning_rate": 9.994463896246396e-06, "loss": 0.5089, "step": 3629 }, { "epoch": 0.34065315315315314, "grad_norm": 1.1936668774319434, "learning_rate": 9.994438181636796e-06, "loss": 0.4816, "step": 3630 }, { "epoch": 0.340746996996997, "grad_norm": 1.4236187092260848, "learning_rate": 9.99441240747792e-06, "loss": 0.5635, "step": 3631 }, { "epoch": 0.3408408408408408, "grad_norm": 1.4982093246777388, "learning_rate": 9.99438657377008e-06, "loss": 0.5263, "step": 3632 }, { "epoch": 0.3409346846846847, "grad_norm": 1.2611419711666678, "learning_rate": 9.994360680513579e-06, "loss": 0.4933, "step": 3633 }, { "epoch": 0.34102852852852855, "grad_norm": 1.2826142826112226, "learning_rate": 9.994334727708729e-06, "loss": 0.4869, "step": 3634 }, { "epoch": 0.34112237237237236, "grad_norm": 1.2114489384965865, "learning_rate": 9.994308715355838e-06, "loss": 0.4867, "step": 3635 }, { "epoch": 0.34121621621621623, "grad_norm": 1.3003628415634223, "learning_rate": 9.994282643455215e-06, "loss": 0.4882, "step": 3636 }, { "epoch": 0.34131006006006004, "grad_norm": 1.4326891943095572, "learning_rate": 9.994256512007174e-06, "loss": 0.5418, "step": 3637 }, { "epoch": 0.3414039039039039, "grad_norm": 1.5353179083430422, "learning_rate": 9.994230321012025e-06, "loss": 0.5088, "step": 3638 }, { "epoch": 0.34149774774774777, "grad_norm": 1.729816383871648, "learning_rate": 9.99420407047008e-06, "loss": 0.5447, "step": 3639 }, { "epoch": 0.3415915915915916, "grad_norm": 1.4336846491567343, "learning_rate": 9.994177760381652e-06, "loss": 0.5099, "step": 3640 }, { "epoch": 0.34168543543543545, "grad_norm": 4.10625302992626, "learning_rate": 9.994151390747053e-06, "loss": 0.499, "step": 3641 }, { "epoch": 0.34177927927927926, "grad_norm": 1.3332602651964658, "learning_rate": 9.9941249615666e-06, "loss": 0.5066, "step": 3642 }, { "epoch": 0.3418731231231231, "grad_norm": 1.5772093399877922, "learning_rate": 9.994098472840607e-06, "loss": 0.5383, "step": 3643 }, { "epoch": 0.341966966966967, "grad_norm": 1.4503804173923713, "learning_rate": 9.994071924569393e-06, "loss": 0.5458, "step": 3644 }, { "epoch": 0.3420608108108108, "grad_norm": 1.296644453199209, "learning_rate": 9.994045316753269e-06, "loss": 0.5122, "step": 3645 }, { "epoch": 0.34215465465465467, "grad_norm": 1.2472091238794705, "learning_rate": 9.994018649392555e-06, "loss": 0.5412, "step": 3646 }, { "epoch": 0.3422484984984985, "grad_norm": 1.4469440037636023, "learning_rate": 9.993991922487569e-06, "loss": 0.5523, "step": 3647 }, { "epoch": 0.34234234234234234, "grad_norm": 1.6293899373284568, "learning_rate": 9.99396513603863e-06, "loss": 0.4996, "step": 3648 }, { "epoch": 0.3424361861861862, "grad_norm": 1.2619230600470017, "learning_rate": 9.993938290046056e-06, "loss": 0.4956, "step": 3649 }, { "epoch": 0.34253003003003, "grad_norm": 1.277949756203282, "learning_rate": 9.99391138451017e-06, "loss": 0.4733, "step": 3650 }, { "epoch": 0.3426238738738739, "grad_norm": 1.4927287788140584, "learning_rate": 9.99388441943129e-06, "loss": 0.5474, "step": 3651 }, { "epoch": 0.3427177177177177, "grad_norm": 1.238524248251714, "learning_rate": 9.993857394809738e-06, "loss": 0.5305, "step": 3652 }, { "epoch": 0.34281156156156156, "grad_norm": 1.3428865319848333, "learning_rate": 9.993830310645836e-06, "loss": 0.4942, "step": 3653 }, { "epoch": 0.34290540540540543, "grad_norm": 1.1761532466242388, "learning_rate": 9.993803166939908e-06, "loss": 0.5164, "step": 3654 }, { "epoch": 0.34299924924924924, "grad_norm": 1.30641112880646, "learning_rate": 9.993775963692277e-06, "loss": 0.5277, "step": 3655 }, { "epoch": 0.3430930930930931, "grad_norm": 2.023452462757985, "learning_rate": 9.993748700903267e-06, "loss": 0.4906, "step": 3656 }, { "epoch": 0.3431869369369369, "grad_norm": 1.6217565608826805, "learning_rate": 9.993721378573205e-06, "loss": 0.506, "step": 3657 }, { "epoch": 0.3432807807807808, "grad_norm": 1.1679476906530217, "learning_rate": 9.993693996702413e-06, "loss": 0.5108, "step": 3658 }, { "epoch": 0.34337462462462465, "grad_norm": 1.2689240740092527, "learning_rate": 9.993666555291221e-06, "loss": 0.5201, "step": 3659 }, { "epoch": 0.34346846846846846, "grad_norm": 2.749971338994083, "learning_rate": 9.993639054339956e-06, "loss": 0.5243, "step": 3660 }, { "epoch": 0.3435623123123123, "grad_norm": 2.101312017997155, "learning_rate": 9.993611493848945e-06, "loss": 0.4973, "step": 3661 }, { "epoch": 0.34365615615615613, "grad_norm": 1.2752030778647814, "learning_rate": 9.993583873818515e-06, "loss": 0.4732, "step": 3662 }, { "epoch": 0.34375, "grad_norm": 3.9791631863542514, "learning_rate": 9.993556194248997e-06, "loss": 0.4706, "step": 3663 }, { "epoch": 0.34384384384384387, "grad_norm": 1.7814546339711068, "learning_rate": 9.99352845514072e-06, "loss": 0.477, "step": 3664 }, { "epoch": 0.3439376876876877, "grad_norm": 1.5243653745502992, "learning_rate": 9.993500656494018e-06, "loss": 0.5252, "step": 3665 }, { "epoch": 0.34403153153153154, "grad_norm": 1.5552323174486051, "learning_rate": 9.993472798309217e-06, "loss": 0.5439, "step": 3666 }, { "epoch": 0.34412537537537535, "grad_norm": 1.619446417897773, "learning_rate": 9.993444880586653e-06, "loss": 0.5161, "step": 3667 }, { "epoch": 0.3442192192192192, "grad_norm": 1.235217887284147, "learning_rate": 9.993416903326657e-06, "loss": 0.5229, "step": 3668 }, { "epoch": 0.3443130630630631, "grad_norm": 1.3638874832264556, "learning_rate": 9.993388866529567e-06, "loss": 0.5358, "step": 3669 }, { "epoch": 0.3444069069069069, "grad_norm": 1.9404549001986873, "learning_rate": 9.993360770195713e-06, "loss": 0.5554, "step": 3670 }, { "epoch": 0.34450075075075076, "grad_norm": 1.3063064015694028, "learning_rate": 9.993332614325428e-06, "loss": 0.5287, "step": 3671 }, { "epoch": 0.34459459459459457, "grad_norm": 1.4345099946312627, "learning_rate": 9.993304398919053e-06, "loss": 0.5161, "step": 3672 }, { "epoch": 0.34468843843843844, "grad_norm": 1.3245794218293043, "learning_rate": 9.99327612397692e-06, "loss": 0.5092, "step": 3673 }, { "epoch": 0.3447822822822823, "grad_norm": 1.1797920338368855, "learning_rate": 9.993247789499369e-06, "loss": 0.4596, "step": 3674 }, { "epoch": 0.3448761261261261, "grad_norm": 1.3249133280926837, "learning_rate": 9.993219395486739e-06, "loss": 0.5196, "step": 3675 }, { "epoch": 0.34496996996997, "grad_norm": 1.0856821183814036, "learning_rate": 9.993190941939364e-06, "loss": 0.4826, "step": 3676 }, { "epoch": 0.3450638138138138, "grad_norm": 1.483621604100552, "learning_rate": 9.993162428857586e-06, "loss": 0.5112, "step": 3677 }, { "epoch": 0.34515765765765766, "grad_norm": 1.3060089064269986, "learning_rate": 9.993133856241745e-06, "loss": 0.4818, "step": 3678 }, { "epoch": 0.3452515015015015, "grad_norm": 1.3868449881393392, "learning_rate": 9.993105224092183e-06, "loss": 0.5021, "step": 3679 }, { "epoch": 0.34534534534534533, "grad_norm": 2.5079468601368293, "learning_rate": 9.993076532409239e-06, "loss": 0.529, "step": 3680 }, { "epoch": 0.3454391891891892, "grad_norm": 1.2371817666830414, "learning_rate": 9.993047781193255e-06, "loss": 0.5418, "step": 3681 }, { "epoch": 0.345533033033033, "grad_norm": 1.3487596374161759, "learning_rate": 9.993018970444575e-06, "loss": 0.5091, "step": 3682 }, { "epoch": 0.3456268768768769, "grad_norm": 1.5655349298624106, "learning_rate": 9.992990100163542e-06, "loss": 0.5286, "step": 3683 }, { "epoch": 0.34572072072072074, "grad_norm": 1.3288389220431633, "learning_rate": 9.9929611703505e-06, "loss": 0.5003, "step": 3684 }, { "epoch": 0.34581456456456455, "grad_norm": 1.1904723550260443, "learning_rate": 9.992932181005794e-06, "loss": 0.5402, "step": 3685 }, { "epoch": 0.3459084084084084, "grad_norm": 1.49149628663401, "learning_rate": 9.99290313212977e-06, "loss": 0.5408, "step": 3686 }, { "epoch": 0.34600225225225223, "grad_norm": 2.1636762185736753, "learning_rate": 9.992874023722777e-06, "loss": 0.549, "step": 3687 }, { "epoch": 0.3460960960960961, "grad_norm": 1.1940196967361634, "learning_rate": 9.992844855785155e-06, "loss": 0.4906, "step": 3688 }, { "epoch": 0.34618993993993996, "grad_norm": 1.311005879996304, "learning_rate": 9.99281562831726e-06, "loss": 0.4967, "step": 3689 }, { "epoch": 0.34628378378378377, "grad_norm": 1.7812807116860379, "learning_rate": 9.992786341319435e-06, "loss": 0.4532, "step": 3690 }, { "epoch": 0.34637762762762764, "grad_norm": 1.4693160443333277, "learning_rate": 9.99275699479203e-06, "loss": 0.4753, "step": 3691 }, { "epoch": 0.34647147147147145, "grad_norm": 1.4067539946378946, "learning_rate": 9.992727588735399e-06, "loss": 0.5262, "step": 3692 }, { "epoch": 0.3465653153153153, "grad_norm": 1.4401063513058567, "learning_rate": 9.992698123149886e-06, "loss": 0.5834, "step": 3693 }, { "epoch": 0.3466591591591592, "grad_norm": 1.5318395689308981, "learning_rate": 9.992668598035846e-06, "loss": 0.509, "step": 3694 }, { "epoch": 0.346753003003003, "grad_norm": 1.2750023319964137, "learning_rate": 9.992639013393632e-06, "loss": 0.535, "step": 3695 }, { "epoch": 0.34684684684684686, "grad_norm": 1.6627350377844798, "learning_rate": 9.992609369223594e-06, "loss": 0.4966, "step": 3696 }, { "epoch": 0.34694069069069067, "grad_norm": 1.3559996516603845, "learning_rate": 9.992579665526088e-06, "loss": 0.5445, "step": 3697 }, { "epoch": 0.34703453453453453, "grad_norm": 1.3702890805471069, "learning_rate": 9.992549902301468e-06, "loss": 0.5586, "step": 3698 }, { "epoch": 0.3471283783783784, "grad_norm": 1.4135309568103074, "learning_rate": 9.992520079550086e-06, "loss": 0.5444, "step": 3699 }, { "epoch": 0.3472222222222222, "grad_norm": 1.5160144253337424, "learning_rate": 9.9924901972723e-06, "loss": 0.5371, "step": 3700 }, { "epoch": 0.3473160660660661, "grad_norm": 1.9580740916827941, "learning_rate": 9.992460255468468e-06, "loss": 0.5228, "step": 3701 }, { "epoch": 0.3474099099099099, "grad_norm": 1.2897348944258011, "learning_rate": 9.992430254138942e-06, "loss": 0.5058, "step": 3702 }, { "epoch": 0.34750375375375375, "grad_norm": 1.4452796728805728, "learning_rate": 9.992400193284083e-06, "loss": 0.5069, "step": 3703 }, { "epoch": 0.3475975975975976, "grad_norm": 1.454100489878059, "learning_rate": 9.99237007290425e-06, "loss": 0.4884, "step": 3704 }, { "epoch": 0.34769144144144143, "grad_norm": 1.1994810550644992, "learning_rate": 9.9923398929998e-06, "loss": 0.4976, "step": 3705 }, { "epoch": 0.3477852852852853, "grad_norm": 1.9688821908788154, "learning_rate": 9.992309653571094e-06, "loss": 0.4873, "step": 3706 }, { "epoch": 0.3478791291291291, "grad_norm": 1.6718122915272369, "learning_rate": 9.992279354618492e-06, "loss": 0.4876, "step": 3707 }, { "epoch": 0.34797297297297297, "grad_norm": 1.129210475072306, "learning_rate": 9.992248996142358e-06, "loss": 0.4794, "step": 3708 }, { "epoch": 0.34806681681681684, "grad_norm": 1.3361645415263872, "learning_rate": 9.99221857814305e-06, "loss": 0.5259, "step": 3709 }, { "epoch": 0.34816066066066065, "grad_norm": 1.2850537199712082, "learning_rate": 9.992188100620932e-06, "loss": 0.473, "step": 3710 }, { "epoch": 0.3482545045045045, "grad_norm": 5.176046305775118, "learning_rate": 9.992157563576368e-06, "loss": 0.5233, "step": 3711 }, { "epoch": 0.3483483483483483, "grad_norm": 1.3970046383022332, "learning_rate": 9.992126967009721e-06, "loss": 0.4948, "step": 3712 }, { "epoch": 0.3484421921921922, "grad_norm": 1.6577640417138433, "learning_rate": 9.992096310921358e-06, "loss": 0.5284, "step": 3713 }, { "epoch": 0.34853603603603606, "grad_norm": 1.2382858873136244, "learning_rate": 9.992065595311641e-06, "loss": 0.4835, "step": 3714 }, { "epoch": 0.34862987987987987, "grad_norm": 1.5696083139964925, "learning_rate": 9.992034820180942e-06, "loss": 0.5172, "step": 3715 }, { "epoch": 0.34872372372372373, "grad_norm": 1.4371447811653622, "learning_rate": 9.99200398552962e-06, "loss": 0.5351, "step": 3716 }, { "epoch": 0.34881756756756754, "grad_norm": 2.3676471249429145, "learning_rate": 9.991973091358051e-06, "loss": 0.4099, "step": 3717 }, { "epoch": 0.3489114114114114, "grad_norm": 1.3465688332293348, "learning_rate": 9.991942137666598e-06, "loss": 0.466, "step": 3718 }, { "epoch": 0.3490052552552553, "grad_norm": 1.0936063712721906, "learning_rate": 9.991911124455632e-06, "loss": 0.5209, "step": 3719 }, { "epoch": 0.3490990990990991, "grad_norm": 1.4268054999090958, "learning_rate": 9.99188005172552e-06, "loss": 0.5099, "step": 3720 }, { "epoch": 0.34919294294294295, "grad_norm": 1.2286901479013932, "learning_rate": 9.991848919476637e-06, "loss": 0.5248, "step": 3721 }, { "epoch": 0.34928678678678676, "grad_norm": 2.0226121112838253, "learning_rate": 9.99181772770935e-06, "loss": 0.4711, "step": 3722 }, { "epoch": 0.34938063063063063, "grad_norm": 2.0196763246803777, "learning_rate": 9.991786476424032e-06, "loss": 0.5702, "step": 3723 }, { "epoch": 0.3494744744744745, "grad_norm": 1.3838886228967984, "learning_rate": 9.991755165621059e-06, "loss": 0.544, "step": 3724 }, { "epoch": 0.3495683183183183, "grad_norm": 1.2322259036736471, "learning_rate": 9.9917237953008e-06, "loss": 0.5437, "step": 3725 }, { "epoch": 0.34966216216216217, "grad_norm": 1.3001470927669032, "learning_rate": 9.991692365463632e-06, "loss": 0.5043, "step": 3726 }, { "epoch": 0.349756006006006, "grad_norm": 2.306422437647388, "learning_rate": 9.991660876109927e-06, "loss": 0.5877, "step": 3727 }, { "epoch": 0.34984984984984985, "grad_norm": 1.2105564421974429, "learning_rate": 9.991629327240061e-06, "loss": 0.4986, "step": 3728 }, { "epoch": 0.3499436936936937, "grad_norm": 1.290698411579225, "learning_rate": 9.991597718854414e-06, "loss": 0.4562, "step": 3729 }, { "epoch": 0.3500375375375375, "grad_norm": 1.1367286060624455, "learning_rate": 9.991566050953358e-06, "loss": 0.5196, "step": 3730 }, { "epoch": 0.3501313813813814, "grad_norm": 1.328177687197742, "learning_rate": 9.991534323537272e-06, "loss": 0.5266, "step": 3731 }, { "epoch": 0.3502252252252252, "grad_norm": 1.132223939041125, "learning_rate": 9.991502536606537e-06, "loss": 0.5503, "step": 3732 }, { "epoch": 0.35031906906906907, "grad_norm": 1.424326698792379, "learning_rate": 9.991470690161528e-06, "loss": 0.4844, "step": 3733 }, { "epoch": 0.35041291291291293, "grad_norm": 1.2000306776372682, "learning_rate": 9.991438784202628e-06, "loss": 0.521, "step": 3734 }, { "epoch": 0.35050675675675674, "grad_norm": 1.7834225347250838, "learning_rate": 9.991406818730214e-06, "loss": 0.5459, "step": 3735 }, { "epoch": 0.3506006006006006, "grad_norm": 1.8862333574912964, "learning_rate": 9.991374793744669e-06, "loss": 0.5091, "step": 3736 }, { "epoch": 0.3506944444444444, "grad_norm": 1.512644105554941, "learning_rate": 9.991342709246376e-06, "loss": 0.4919, "step": 3737 }, { "epoch": 0.3507882882882883, "grad_norm": 28.893430123362656, "learning_rate": 9.991310565235717e-06, "loss": 0.4884, "step": 3738 }, { "epoch": 0.35088213213213215, "grad_norm": 1.2072444486305223, "learning_rate": 9.991278361713072e-06, "loss": 0.5098, "step": 3739 }, { "epoch": 0.35097597597597596, "grad_norm": 4.796946287919748, "learning_rate": 9.991246098678831e-06, "loss": 0.4796, "step": 3740 }, { "epoch": 0.35106981981981983, "grad_norm": 1.3701715222328594, "learning_rate": 9.991213776133373e-06, "loss": 0.4654, "step": 3741 }, { "epoch": 0.35116366366366364, "grad_norm": 1.1358238934384166, "learning_rate": 9.991181394077088e-06, "loss": 0.4862, "step": 3742 }, { "epoch": 0.3512575075075075, "grad_norm": 1.2381580507302872, "learning_rate": 9.991148952510358e-06, "loss": 0.5462, "step": 3743 }, { "epoch": 0.35135135135135137, "grad_norm": 1.1961063505793723, "learning_rate": 9.991116451433572e-06, "loss": 0.4845, "step": 3744 }, { "epoch": 0.3514451951951952, "grad_norm": 1.2985192023400627, "learning_rate": 9.991083890847117e-06, "loss": 0.5024, "step": 3745 }, { "epoch": 0.35153903903903905, "grad_norm": 1.261224543887986, "learning_rate": 9.991051270751382e-06, "loss": 0.5185, "step": 3746 }, { "epoch": 0.35163288288288286, "grad_norm": 2.058581026738431, "learning_rate": 9.991018591146754e-06, "loss": 0.5729, "step": 3747 }, { "epoch": 0.3517267267267267, "grad_norm": 1.136575551659277, "learning_rate": 9.990985852033625e-06, "loss": 0.5217, "step": 3748 }, { "epoch": 0.3518205705705706, "grad_norm": 1.5478401563381652, "learning_rate": 9.990953053412384e-06, "loss": 0.466, "step": 3749 }, { "epoch": 0.3519144144144144, "grad_norm": 1.2377225667686018, "learning_rate": 9.990920195283421e-06, "loss": 0.5096, "step": 3750 }, { "epoch": 0.35200825825825827, "grad_norm": 1.640858291772302, "learning_rate": 9.990887277647133e-06, "loss": 0.5416, "step": 3751 }, { "epoch": 0.3521021021021021, "grad_norm": 1.276179850730377, "learning_rate": 9.990854300503905e-06, "loss": 0.5128, "step": 3752 }, { "epoch": 0.35219594594594594, "grad_norm": 1.271108011270606, "learning_rate": 9.990821263854134e-06, "loss": 0.4893, "step": 3753 }, { "epoch": 0.3522897897897898, "grad_norm": 1.6679889370081957, "learning_rate": 9.990788167698215e-06, "loss": 0.5501, "step": 3754 }, { "epoch": 0.3523836336336336, "grad_norm": 1.0965162908960921, "learning_rate": 9.990755012036541e-06, "loss": 0.4702, "step": 3755 }, { "epoch": 0.3524774774774775, "grad_norm": 1.1823665637631262, "learning_rate": 9.990721796869506e-06, "loss": 0.4775, "step": 3756 }, { "epoch": 0.3525713213213213, "grad_norm": 1.4445404189595754, "learning_rate": 9.990688522197509e-06, "loss": 0.5257, "step": 3757 }, { "epoch": 0.35266516516516516, "grad_norm": 1.426213757922932, "learning_rate": 9.990655188020946e-06, "loss": 0.5043, "step": 3758 }, { "epoch": 0.35275900900900903, "grad_norm": 1.3057746188008659, "learning_rate": 9.990621794340215e-06, "loss": 0.5399, "step": 3759 }, { "epoch": 0.35285285285285284, "grad_norm": 1.669998118783276, "learning_rate": 9.990588341155709e-06, "loss": 0.4785, "step": 3760 }, { "epoch": 0.3529466966966967, "grad_norm": 1.3361109941597689, "learning_rate": 9.990554828467833e-06, "loss": 0.5484, "step": 3761 }, { "epoch": 0.3530405405405405, "grad_norm": 1.3669225248456303, "learning_rate": 9.990521256276985e-06, "loss": 0.5161, "step": 3762 }, { "epoch": 0.3531343843843844, "grad_norm": 1.5961160642720955, "learning_rate": 9.990487624583565e-06, "loss": 0.5176, "step": 3763 }, { "epoch": 0.35322822822822825, "grad_norm": 1.264954561234558, "learning_rate": 9.990453933387973e-06, "loss": 0.5483, "step": 3764 }, { "epoch": 0.35332207207207206, "grad_norm": 1.3264082900948735, "learning_rate": 9.99042018269061e-06, "loss": 0.5003, "step": 3765 }, { "epoch": 0.3534159159159159, "grad_norm": 1.3140666631823033, "learning_rate": 9.990386372491883e-06, "loss": 0.5226, "step": 3766 }, { "epoch": 0.35350975975975973, "grad_norm": 2.2086181451729625, "learning_rate": 9.990352502792188e-06, "loss": 0.5224, "step": 3767 }, { "epoch": 0.3536036036036036, "grad_norm": 1.5129697406285214, "learning_rate": 9.990318573591936e-06, "loss": 0.4592, "step": 3768 }, { "epoch": 0.35369744744744747, "grad_norm": 1.2222396951230476, "learning_rate": 9.990284584891528e-06, "loss": 0.519, "step": 3769 }, { "epoch": 0.3537912912912913, "grad_norm": 1.0936636200566918, "learning_rate": 9.990250536691368e-06, "loss": 0.5126, "step": 3770 }, { "epoch": 0.35388513513513514, "grad_norm": 1.2571464587126477, "learning_rate": 9.990216428991864e-06, "loss": 0.5266, "step": 3771 }, { "epoch": 0.35397897897897895, "grad_norm": 1.2164378969791358, "learning_rate": 9.990182261793422e-06, "loss": 0.4906, "step": 3772 }, { "epoch": 0.3540728228228228, "grad_norm": 1.4602694387416228, "learning_rate": 9.99014803509645e-06, "loss": 0.502, "step": 3773 }, { "epoch": 0.3541666666666667, "grad_norm": 2.713407444115041, "learning_rate": 9.990113748901356e-06, "loss": 0.4956, "step": 3774 }, { "epoch": 0.3542605105105105, "grad_norm": 2.698237916282939, "learning_rate": 9.990079403208547e-06, "loss": 0.4847, "step": 3775 }, { "epoch": 0.35435435435435436, "grad_norm": 1.832441274604584, "learning_rate": 9.990044998018437e-06, "loss": 0.5258, "step": 3776 }, { "epoch": 0.35444819819819817, "grad_norm": 1.3950264532938796, "learning_rate": 9.99001053333143e-06, "loss": 0.544, "step": 3777 }, { "epoch": 0.35454204204204204, "grad_norm": 2.0250491683580107, "learning_rate": 9.989976009147943e-06, "loss": 0.5144, "step": 3778 }, { "epoch": 0.3546358858858859, "grad_norm": 6.569957520252247, "learning_rate": 9.989941425468383e-06, "loss": 0.5086, "step": 3779 }, { "epoch": 0.3547297297297297, "grad_norm": 1.3219608198645258, "learning_rate": 9.989906782293164e-06, "loss": 0.4731, "step": 3780 }, { "epoch": 0.3548235735735736, "grad_norm": 1.3512573183496106, "learning_rate": 9.9898720796227e-06, "loss": 0.541, "step": 3781 }, { "epoch": 0.3549174174174174, "grad_norm": 2.095375067391804, "learning_rate": 9.989837317457403e-06, "loss": 0.5394, "step": 3782 }, { "epoch": 0.35501126126126126, "grad_norm": 1.1432012106358467, "learning_rate": 9.989802495797689e-06, "loss": 0.5361, "step": 3783 }, { "epoch": 0.3551051051051051, "grad_norm": 1.3497150454642926, "learning_rate": 9.989767614643973e-06, "loss": 0.5172, "step": 3784 }, { "epoch": 0.35519894894894893, "grad_norm": 1.1669031149270779, "learning_rate": 9.989732673996669e-06, "loss": 0.5054, "step": 3785 }, { "epoch": 0.3552927927927928, "grad_norm": 1.2868404618174638, "learning_rate": 9.989697673856196e-06, "loss": 0.4688, "step": 3786 }, { "epoch": 0.3553866366366366, "grad_norm": 1.4392144090518892, "learning_rate": 9.98966261422297e-06, "loss": 0.5247, "step": 3787 }, { "epoch": 0.3554804804804805, "grad_norm": 1.1429601804387162, "learning_rate": 9.989627495097409e-06, "loss": 0.4782, "step": 3788 }, { "epoch": 0.35557432432432434, "grad_norm": 2.25596818789108, "learning_rate": 9.989592316479934e-06, "loss": 0.5248, "step": 3789 }, { "epoch": 0.35566816816816815, "grad_norm": 1.230857613628972, "learning_rate": 9.989557078370962e-06, "loss": 0.485, "step": 3790 }, { "epoch": 0.355762012012012, "grad_norm": 1.1804028578343222, "learning_rate": 9.989521780770912e-06, "loss": 0.4984, "step": 3791 }, { "epoch": 0.35585585585585583, "grad_norm": 1.1743081572231504, "learning_rate": 9.989486423680208e-06, "loss": 0.4844, "step": 3792 }, { "epoch": 0.3559496996996997, "grad_norm": 1.257591328119596, "learning_rate": 9.989451007099269e-06, "loss": 0.5248, "step": 3793 }, { "epoch": 0.35604354354354356, "grad_norm": 1.2882834147559903, "learning_rate": 9.98941553102852e-06, "loss": 0.5228, "step": 3794 }, { "epoch": 0.35613738738738737, "grad_norm": 1.2529040280647803, "learning_rate": 9.98937999546838e-06, "loss": 0.5352, "step": 3795 }, { "epoch": 0.35623123123123124, "grad_norm": 1.2368766268139446, "learning_rate": 9.989344400419279e-06, "loss": 0.5279, "step": 3796 }, { "epoch": 0.35632507507507505, "grad_norm": 1.8692498171945489, "learning_rate": 9.989308745881634e-06, "loss": 0.4384, "step": 3797 }, { "epoch": 0.3564189189189189, "grad_norm": 1.406402039740976, "learning_rate": 9.989273031855875e-06, "loss": 0.499, "step": 3798 }, { "epoch": 0.3565127627627628, "grad_norm": 1.362882884096299, "learning_rate": 9.989237258342425e-06, "loss": 0.5186, "step": 3799 }, { "epoch": 0.3566066066066066, "grad_norm": 1.2824297354745529, "learning_rate": 9.989201425341715e-06, "loss": 0.4792, "step": 3800 }, { "epoch": 0.35670045045045046, "grad_norm": 2.081848430647506, "learning_rate": 9.989165532854167e-06, "loss": 0.5478, "step": 3801 }, { "epoch": 0.35679429429429427, "grad_norm": 1.108108751689193, "learning_rate": 9.989129580880213e-06, "loss": 0.4597, "step": 3802 }, { "epoch": 0.35688813813813813, "grad_norm": 1.107871629899946, "learning_rate": 9.989093569420277e-06, "loss": 0.4829, "step": 3803 }, { "epoch": 0.356981981981982, "grad_norm": 1.2644578630776202, "learning_rate": 9.989057498474793e-06, "loss": 0.5118, "step": 3804 }, { "epoch": 0.3570758258258258, "grad_norm": 1.1217871309775427, "learning_rate": 9.989021368044189e-06, "loss": 0.4998, "step": 3805 }, { "epoch": 0.3571696696696697, "grad_norm": 1.1180504322383988, "learning_rate": 9.988985178128895e-06, "loss": 0.4381, "step": 3806 }, { "epoch": 0.3572635135135135, "grad_norm": 1.6140374074759474, "learning_rate": 9.988948928729344e-06, "loss": 0.4693, "step": 3807 }, { "epoch": 0.35735735735735735, "grad_norm": 1.1238011115016409, "learning_rate": 9.98891261984597e-06, "loss": 0.5402, "step": 3808 }, { "epoch": 0.3574512012012012, "grad_norm": 1.474797600004638, "learning_rate": 9.988876251479201e-06, "loss": 0.4799, "step": 3809 }, { "epoch": 0.35754504504504503, "grad_norm": 1.2230758802033692, "learning_rate": 9.988839823629474e-06, "loss": 0.5084, "step": 3810 }, { "epoch": 0.3576388888888889, "grad_norm": 1.1705588650342436, "learning_rate": 9.988803336297223e-06, "loss": 0.5364, "step": 3811 }, { "epoch": 0.3577327327327327, "grad_norm": 1.4273531988691686, "learning_rate": 9.988766789482881e-06, "loss": 0.5603, "step": 3812 }, { "epoch": 0.35782657657657657, "grad_norm": 1.1364889422341786, "learning_rate": 9.988730183186889e-06, "loss": 0.4901, "step": 3813 }, { "epoch": 0.35792042042042044, "grad_norm": 1.3735602840949408, "learning_rate": 9.988693517409678e-06, "loss": 0.5407, "step": 3814 }, { "epoch": 0.35801426426426425, "grad_norm": 1.4036316137537421, "learning_rate": 9.988656792151686e-06, "loss": 0.4956, "step": 3815 }, { "epoch": 0.3581081081081081, "grad_norm": 1.2161593677764608, "learning_rate": 9.988620007413354e-06, "loss": 0.4735, "step": 3816 }, { "epoch": 0.3582019519519519, "grad_norm": 1.3646653489421783, "learning_rate": 9.988583163195115e-06, "loss": 0.537, "step": 3817 }, { "epoch": 0.3582957957957958, "grad_norm": 1.1614013550601507, "learning_rate": 9.988546259497415e-06, "loss": 0.462, "step": 3818 }, { "epoch": 0.35838963963963966, "grad_norm": 1.3411214858491995, "learning_rate": 9.988509296320691e-06, "loss": 0.4919, "step": 3819 }, { "epoch": 0.35848348348348347, "grad_norm": 1.0726538576389772, "learning_rate": 9.988472273665382e-06, "loss": 0.4819, "step": 3820 }, { "epoch": 0.35857732732732733, "grad_norm": 2.42625663178483, "learning_rate": 9.988435191531932e-06, "loss": 0.4504, "step": 3821 }, { "epoch": 0.35867117117117114, "grad_norm": 1.5669269308214195, "learning_rate": 9.988398049920782e-06, "loss": 0.5648, "step": 3822 }, { "epoch": 0.358765015015015, "grad_norm": 1.333886157975964, "learning_rate": 9.988360848832374e-06, "loss": 0.5232, "step": 3823 }, { "epoch": 0.3588588588588589, "grad_norm": 1.1251737937697515, "learning_rate": 9.988323588267152e-06, "loss": 0.4703, "step": 3824 }, { "epoch": 0.3589527027027027, "grad_norm": 1.4683530452555875, "learning_rate": 9.988286268225562e-06, "loss": 0.5454, "step": 3825 }, { "epoch": 0.35904654654654655, "grad_norm": 1.1507823493280467, "learning_rate": 9.988248888708047e-06, "loss": 0.5236, "step": 3826 }, { "epoch": 0.35914039039039036, "grad_norm": 1.4219584018837204, "learning_rate": 9.988211449715056e-06, "loss": 0.4882, "step": 3827 }, { "epoch": 0.35923423423423423, "grad_norm": 1.413231448380462, "learning_rate": 9.988173951247029e-06, "loss": 0.5014, "step": 3828 }, { "epoch": 0.3593280780780781, "grad_norm": 1.3313439411980486, "learning_rate": 9.98813639330442e-06, "loss": 0.5536, "step": 3829 }, { "epoch": 0.3594219219219219, "grad_norm": 1.1783251411327786, "learning_rate": 9.988098775887672e-06, "loss": 0.5235, "step": 3830 }, { "epoch": 0.35951576576576577, "grad_norm": 2.0878613840380504, "learning_rate": 9.988061098997235e-06, "loss": 0.481, "step": 3831 }, { "epoch": 0.35960960960960964, "grad_norm": 1.358401144648268, "learning_rate": 9.98802336263356e-06, "loss": 0.5132, "step": 3832 }, { "epoch": 0.35970345345345345, "grad_norm": 1.525056852068838, "learning_rate": 9.987985566797093e-06, "loss": 0.4791, "step": 3833 }, { "epoch": 0.3597972972972973, "grad_norm": 1.3093663296748719, "learning_rate": 9.987947711488289e-06, "loss": 0.5061, "step": 3834 }, { "epoch": 0.3598911411411411, "grad_norm": 1.3022741675124574, "learning_rate": 9.987909796707597e-06, "loss": 0.5024, "step": 3835 }, { "epoch": 0.359984984984985, "grad_norm": 1.7774237784653832, "learning_rate": 9.98787182245547e-06, "loss": 0.5707, "step": 3836 }, { "epoch": 0.36007882882882886, "grad_norm": 1.4404908794394735, "learning_rate": 9.987833788732359e-06, "loss": 0.5319, "step": 3837 }, { "epoch": 0.36017267267267267, "grad_norm": 1.4994692642362806, "learning_rate": 9.98779569553872e-06, "loss": 0.4805, "step": 3838 }, { "epoch": 0.36026651651651653, "grad_norm": 1.5266238676285113, "learning_rate": 9.987757542875005e-06, "loss": 0.532, "step": 3839 }, { "epoch": 0.36036036036036034, "grad_norm": 1.2177013368591816, "learning_rate": 9.98771933074167e-06, "loss": 0.5368, "step": 3840 }, { "epoch": 0.3604542042042042, "grad_norm": 1.2642791650882812, "learning_rate": 9.987681059139168e-06, "loss": 0.487, "step": 3841 }, { "epoch": 0.3605480480480481, "grad_norm": 1.3144374111779302, "learning_rate": 9.987642728067962e-06, "loss": 0.4834, "step": 3842 }, { "epoch": 0.3606418918918919, "grad_norm": 1.419457136746279, "learning_rate": 9.987604337528503e-06, "loss": 0.5339, "step": 3843 }, { "epoch": 0.36073573573573575, "grad_norm": 1.4105766804177715, "learning_rate": 9.98756588752125e-06, "loss": 0.5088, "step": 3844 }, { "epoch": 0.36082957957957956, "grad_norm": 1.5553263600744591, "learning_rate": 9.987527378046662e-06, "loss": 0.5381, "step": 3845 }, { "epoch": 0.36092342342342343, "grad_norm": 1.4735987514789988, "learning_rate": 9.9874888091052e-06, "loss": 0.4895, "step": 3846 }, { "epoch": 0.3610172672672673, "grad_norm": 1.7187561484970382, "learning_rate": 9.987450180697317e-06, "loss": 0.5165, "step": 3847 }, { "epoch": 0.3611111111111111, "grad_norm": 1.497281454685548, "learning_rate": 9.987411492823483e-06, "loss": 0.5277, "step": 3848 }, { "epoch": 0.36120495495495497, "grad_norm": 1.5172916046300442, "learning_rate": 9.987372745484155e-06, "loss": 0.5318, "step": 3849 }, { "epoch": 0.3612987987987988, "grad_norm": 1.4785551258919893, "learning_rate": 9.987333938679792e-06, "loss": 0.5265, "step": 3850 }, { "epoch": 0.36139264264264265, "grad_norm": 1.2146948622995013, "learning_rate": 9.987295072410861e-06, "loss": 0.537, "step": 3851 }, { "epoch": 0.3614864864864865, "grad_norm": 1.2597851784881067, "learning_rate": 9.987256146677824e-06, "loss": 0.5092, "step": 3852 }, { "epoch": 0.3615803303303303, "grad_norm": 1.329995403205095, "learning_rate": 9.987217161481144e-06, "loss": 0.4721, "step": 3853 }, { "epoch": 0.3616741741741742, "grad_norm": 1.1822816748823695, "learning_rate": 9.987178116821288e-06, "loss": 0.5122, "step": 3854 }, { "epoch": 0.361768018018018, "grad_norm": 2.3910007529321624, "learning_rate": 9.98713901269872e-06, "loss": 0.4864, "step": 3855 }, { "epoch": 0.36186186186186187, "grad_norm": 1.3503546281464534, "learning_rate": 9.987099849113906e-06, "loss": 0.4827, "step": 3856 }, { "epoch": 0.36195570570570573, "grad_norm": 1.6816419571435015, "learning_rate": 9.987060626067313e-06, "loss": 0.4474, "step": 3857 }, { "epoch": 0.36204954954954954, "grad_norm": 1.962771788198377, "learning_rate": 9.987021343559411e-06, "loss": 0.5426, "step": 3858 }, { "epoch": 0.3621433933933934, "grad_norm": 1.3709915881828216, "learning_rate": 9.986982001590665e-06, "loss": 0.514, "step": 3859 }, { "epoch": 0.3622372372372372, "grad_norm": 1.2933776485919741, "learning_rate": 9.986942600161547e-06, "loss": 0.489, "step": 3860 }, { "epoch": 0.3623310810810811, "grad_norm": 1.1062250605224726, "learning_rate": 9.986903139272524e-06, "loss": 0.4764, "step": 3861 }, { "epoch": 0.36242492492492495, "grad_norm": 1.3233423920554663, "learning_rate": 9.98686361892407e-06, "loss": 0.4666, "step": 3862 }, { "epoch": 0.36251876876876876, "grad_norm": 1.2041644270397482, "learning_rate": 9.986824039116652e-06, "loss": 0.4806, "step": 3863 }, { "epoch": 0.36261261261261263, "grad_norm": 2.0231108305254306, "learning_rate": 9.986784399850744e-06, "loss": 0.4753, "step": 3864 }, { "epoch": 0.36270645645645644, "grad_norm": 1.3084712435080461, "learning_rate": 9.98674470112682e-06, "loss": 0.5077, "step": 3865 }, { "epoch": 0.3628003003003003, "grad_norm": 1.418342861897476, "learning_rate": 9.986704942945352e-06, "loss": 0.4576, "step": 3866 }, { "epoch": 0.36289414414414417, "grad_norm": 3.5477568716217296, "learning_rate": 9.986665125306813e-06, "loss": 0.5296, "step": 3867 }, { "epoch": 0.362987987987988, "grad_norm": 1.3884275270414679, "learning_rate": 9.98662524821168e-06, "loss": 0.5794, "step": 3868 }, { "epoch": 0.36308183183183185, "grad_norm": 1.1843726630944564, "learning_rate": 9.986585311660427e-06, "loss": 0.4615, "step": 3869 }, { "epoch": 0.36317567567567566, "grad_norm": 6.390746664254987, "learning_rate": 9.986545315653531e-06, "loss": 0.529, "step": 3870 }, { "epoch": 0.3632695195195195, "grad_norm": 1.396297432663167, "learning_rate": 9.986505260191468e-06, "loss": 0.5126, "step": 3871 }, { "epoch": 0.3633633633633634, "grad_norm": 1.1587749716425675, "learning_rate": 9.986465145274715e-06, "loss": 0.4705, "step": 3872 }, { "epoch": 0.3634572072072072, "grad_norm": 1.1487471801625413, "learning_rate": 9.986424970903752e-06, "loss": 0.4803, "step": 3873 }, { "epoch": 0.36355105105105107, "grad_norm": 1.1599491567958504, "learning_rate": 9.986384737079056e-06, "loss": 0.5378, "step": 3874 }, { "epoch": 0.3636448948948949, "grad_norm": 1.9522897898417015, "learning_rate": 9.986344443801108e-06, "loss": 0.5381, "step": 3875 }, { "epoch": 0.36373873873873874, "grad_norm": 1.928352805070156, "learning_rate": 9.98630409107039e-06, "loss": 0.4766, "step": 3876 }, { "epoch": 0.3638325825825826, "grad_norm": 1.7075996833230764, "learning_rate": 9.98626367888738e-06, "loss": 0.5234, "step": 3877 }, { "epoch": 0.3639264264264264, "grad_norm": 1.1456908199436104, "learning_rate": 9.986223207252563e-06, "loss": 0.5139, "step": 3878 }, { "epoch": 0.3640202702702703, "grad_norm": 1.069952067338123, "learning_rate": 9.986182676166419e-06, "loss": 0.4927, "step": 3879 }, { "epoch": 0.3641141141141141, "grad_norm": 1.991804537285703, "learning_rate": 9.986142085629431e-06, "loss": 0.526, "step": 3880 }, { "epoch": 0.36420795795795796, "grad_norm": 1.1451945894821272, "learning_rate": 9.986101435642084e-06, "loss": 0.5068, "step": 3881 }, { "epoch": 0.36430180180180183, "grad_norm": 1.2254492315598275, "learning_rate": 9.986060726204864e-06, "loss": 0.5026, "step": 3882 }, { "epoch": 0.36439564564564564, "grad_norm": 1.1896995581193706, "learning_rate": 9.986019957318256e-06, "loss": 0.5256, "step": 3883 }, { "epoch": 0.3644894894894895, "grad_norm": 1.3094229211713928, "learning_rate": 9.985979128982743e-06, "loss": 0.4864, "step": 3884 }, { "epoch": 0.3645833333333333, "grad_norm": 1.1196304578862528, "learning_rate": 9.985938241198816e-06, "loss": 0.4994, "step": 3885 }, { "epoch": 0.3646771771771772, "grad_norm": 1.3250861049775908, "learning_rate": 9.985897293966959e-06, "loss": 0.4673, "step": 3886 }, { "epoch": 0.36477102102102105, "grad_norm": 1.3225387433512932, "learning_rate": 9.985856287287663e-06, "loss": 0.4819, "step": 3887 }, { "epoch": 0.36486486486486486, "grad_norm": 1.168656127820458, "learning_rate": 9.985815221161415e-06, "loss": 0.4519, "step": 3888 }, { "epoch": 0.3649587087087087, "grad_norm": 1.3733191359242076, "learning_rate": 9.985774095588706e-06, "loss": 0.4934, "step": 3889 }, { "epoch": 0.36505255255255253, "grad_norm": 1.1914342717804929, "learning_rate": 9.985732910570024e-06, "loss": 0.4813, "step": 3890 }, { "epoch": 0.3651463963963964, "grad_norm": 1.3214820952964714, "learning_rate": 9.985691666105863e-06, "loss": 0.5385, "step": 3891 }, { "epoch": 0.36524024024024027, "grad_norm": 1.2336180906909042, "learning_rate": 9.985650362196712e-06, "loss": 0.4764, "step": 3892 }, { "epoch": 0.3653340840840841, "grad_norm": 1.3198812805047362, "learning_rate": 9.985608998843068e-06, "loss": 0.4779, "step": 3893 }, { "epoch": 0.36542792792792794, "grad_norm": 1.216908356923057, "learning_rate": 9.985567576045418e-06, "loss": 0.5009, "step": 3894 }, { "epoch": 0.36552177177177175, "grad_norm": 1.41130648092839, "learning_rate": 9.985526093804261e-06, "loss": 0.5072, "step": 3895 }, { "epoch": 0.3656156156156156, "grad_norm": 1.4283151387244022, "learning_rate": 9.985484552120087e-06, "loss": 0.4981, "step": 3896 }, { "epoch": 0.3657094594594595, "grad_norm": 1.7112053860092507, "learning_rate": 9.985442950993398e-06, "loss": 0.5186, "step": 3897 }, { "epoch": 0.3658033033033033, "grad_norm": 1.3159010389823305, "learning_rate": 9.985401290424684e-06, "loss": 0.5247, "step": 3898 }, { "epoch": 0.36589714714714716, "grad_norm": 1.379529539549004, "learning_rate": 9.985359570414444e-06, "loss": 0.4912, "step": 3899 }, { "epoch": 0.36599099099099097, "grad_norm": 1.2066871300611854, "learning_rate": 9.985317790963173e-06, "loss": 0.4933, "step": 3900 }, { "epoch": 0.36608483483483484, "grad_norm": 1.4913497010469623, "learning_rate": 9.985275952071374e-06, "loss": 0.509, "step": 3901 }, { "epoch": 0.3661786786786787, "grad_norm": 1.3582683222678442, "learning_rate": 9.985234053739541e-06, "loss": 0.5739, "step": 3902 }, { "epoch": 0.3662725225225225, "grad_norm": 1.2092203888202266, "learning_rate": 9.985192095968178e-06, "loss": 0.5231, "step": 3903 }, { "epoch": 0.3663663663663664, "grad_norm": 1.0894581767308378, "learning_rate": 9.985150078757781e-06, "loss": 0.4963, "step": 3904 }, { "epoch": 0.3664602102102102, "grad_norm": 1.4146134052821688, "learning_rate": 9.985108002108853e-06, "loss": 0.5131, "step": 3905 }, { "epoch": 0.36655405405405406, "grad_norm": 1.2100513882964856, "learning_rate": 9.985065866021896e-06, "loss": 0.5348, "step": 3906 }, { "epoch": 0.3666478978978979, "grad_norm": 1.7132947825070073, "learning_rate": 9.985023670497411e-06, "loss": 0.5279, "step": 3907 }, { "epoch": 0.36674174174174173, "grad_norm": 1.3142848346825313, "learning_rate": 9.984981415535903e-06, "loss": 0.4929, "step": 3908 }, { "epoch": 0.3668355855855856, "grad_norm": 1.3690417620223545, "learning_rate": 9.984939101137877e-06, "loss": 0.52, "step": 3909 }, { "epoch": 0.3669294294294294, "grad_norm": 1.6672299650417939, "learning_rate": 9.984896727303833e-06, "loss": 0.5262, "step": 3910 }, { "epoch": 0.3670232732732733, "grad_norm": 1.7247445415578857, "learning_rate": 9.984854294034278e-06, "loss": 0.5049, "step": 3911 }, { "epoch": 0.36711711711711714, "grad_norm": 1.4629287424154804, "learning_rate": 9.984811801329719e-06, "loss": 0.49, "step": 3912 }, { "epoch": 0.36721096096096095, "grad_norm": 1.300976583691061, "learning_rate": 9.984769249190662e-06, "loss": 0.4638, "step": 3913 }, { "epoch": 0.3673048048048048, "grad_norm": 1.510521707991466, "learning_rate": 9.984726637617616e-06, "loss": 0.4934, "step": 3914 }, { "epoch": 0.36739864864864863, "grad_norm": 1.171156662602701, "learning_rate": 9.984683966611087e-06, "loss": 0.4713, "step": 3915 }, { "epoch": 0.3674924924924925, "grad_norm": 1.4652225477818466, "learning_rate": 9.984641236171584e-06, "loss": 0.494, "step": 3916 }, { "epoch": 0.36758633633633636, "grad_norm": 1.41476874155407, "learning_rate": 9.984598446299617e-06, "loss": 0.5274, "step": 3917 }, { "epoch": 0.36768018018018017, "grad_norm": 1.2278674210518445, "learning_rate": 9.984555596995697e-06, "loss": 0.5068, "step": 3918 }, { "epoch": 0.36777402402402404, "grad_norm": 1.3052507118751058, "learning_rate": 9.984512688260333e-06, "loss": 0.5188, "step": 3919 }, { "epoch": 0.36786786786786785, "grad_norm": 1.134110727107998, "learning_rate": 9.984469720094038e-06, "loss": 0.4973, "step": 3920 }, { "epoch": 0.3679617117117117, "grad_norm": 1.6192291643228254, "learning_rate": 9.984426692497324e-06, "loss": 0.5285, "step": 3921 }, { "epoch": 0.3680555555555556, "grad_norm": 1.1633644729874797, "learning_rate": 9.984383605470703e-06, "loss": 0.4591, "step": 3922 }, { "epoch": 0.3681493993993994, "grad_norm": 1.3519534030971738, "learning_rate": 9.984340459014689e-06, "loss": 0.4884, "step": 3923 }, { "epoch": 0.36824324324324326, "grad_norm": 1.1334674969389886, "learning_rate": 9.9842972531298e-06, "loss": 0.4267, "step": 3924 }, { "epoch": 0.36833708708708707, "grad_norm": 1.311068526082983, "learning_rate": 9.984253987816545e-06, "loss": 0.5284, "step": 3925 }, { "epoch": 0.36843093093093093, "grad_norm": 1.3123946945961609, "learning_rate": 9.984210663075447e-06, "loss": 0.5139, "step": 3926 }, { "epoch": 0.3685247747747748, "grad_norm": 1.684689309787679, "learning_rate": 9.984167278907016e-06, "loss": 0.5109, "step": 3927 }, { "epoch": 0.3686186186186186, "grad_norm": 1.3801184218945848, "learning_rate": 9.984123835311773e-06, "loss": 0.4947, "step": 3928 }, { "epoch": 0.3687124624624625, "grad_norm": 2.606531199367679, "learning_rate": 9.984080332290233e-06, "loss": 0.5264, "step": 3929 }, { "epoch": 0.3688063063063063, "grad_norm": 1.39081263112969, "learning_rate": 9.984036769842918e-06, "loss": 0.4566, "step": 3930 }, { "epoch": 0.36890015015015015, "grad_norm": 1.369342823346845, "learning_rate": 9.983993147970346e-06, "loss": 0.5463, "step": 3931 }, { "epoch": 0.368993993993994, "grad_norm": 1.4606166216557095, "learning_rate": 9.983949466673034e-06, "loss": 0.5128, "step": 3932 }, { "epoch": 0.36908783783783783, "grad_norm": 1.154522776724547, "learning_rate": 9.98390572595151e-06, "loss": 0.4978, "step": 3933 }, { "epoch": 0.3691816816816817, "grad_norm": 1.1802460139346964, "learning_rate": 9.98386192580629e-06, "loss": 0.5242, "step": 3934 }, { "epoch": 0.3692755255255255, "grad_norm": 1.148306487036214, "learning_rate": 9.983818066237899e-06, "loss": 0.481, "step": 3935 }, { "epoch": 0.36936936936936937, "grad_norm": 1.2281933924122337, "learning_rate": 9.983774147246857e-06, "loss": 0.5153, "step": 3936 }, { "epoch": 0.36946321321321324, "grad_norm": 1.215861498125214, "learning_rate": 9.98373016883369e-06, "loss": 0.5135, "step": 3937 }, { "epoch": 0.36955705705705705, "grad_norm": 1.5097728662690553, "learning_rate": 9.983686130998921e-06, "loss": 0.4987, "step": 3938 }, { "epoch": 0.3696509009009009, "grad_norm": 1.2224511381425487, "learning_rate": 9.983642033743077e-06, "loss": 0.5269, "step": 3939 }, { "epoch": 0.3697447447447447, "grad_norm": 1.5450887763399175, "learning_rate": 9.983597877066681e-06, "loss": 0.5083, "step": 3940 }, { "epoch": 0.3698385885885886, "grad_norm": 1.530502378261613, "learning_rate": 9.983553660970261e-06, "loss": 0.5306, "step": 3941 }, { "epoch": 0.36993243243243246, "grad_norm": 1.1094209621338027, "learning_rate": 9.983509385454347e-06, "loss": 0.4792, "step": 3942 }, { "epoch": 0.37002627627627627, "grad_norm": 1.3475381193880311, "learning_rate": 9.98346505051946e-06, "loss": 0.4849, "step": 3943 }, { "epoch": 0.37012012012012013, "grad_norm": 3.2219434471133286, "learning_rate": 9.983420656166136e-06, "loss": 0.5005, "step": 3944 }, { "epoch": 0.37021396396396394, "grad_norm": 1.2472525151099003, "learning_rate": 9.983376202394898e-06, "loss": 0.4971, "step": 3945 }, { "epoch": 0.3703078078078078, "grad_norm": 1.4731705780629547, "learning_rate": 9.983331689206282e-06, "loss": 0.5346, "step": 3946 }, { "epoch": 0.3704016516516517, "grad_norm": 1.1336398229807478, "learning_rate": 9.983287116600813e-06, "loss": 0.4604, "step": 3947 }, { "epoch": 0.3704954954954955, "grad_norm": 2.177118593926322, "learning_rate": 9.983242484579027e-06, "loss": 0.486, "step": 3948 }, { "epoch": 0.37058933933933935, "grad_norm": 1.3638770288407374, "learning_rate": 9.983197793141455e-06, "loss": 0.5393, "step": 3949 }, { "epoch": 0.37068318318318316, "grad_norm": 1.428233458292047, "learning_rate": 9.983153042288627e-06, "loss": 0.496, "step": 3950 }, { "epoch": 0.37077702702702703, "grad_norm": 1.3274497281672082, "learning_rate": 9.98310823202108e-06, "loss": 0.5208, "step": 3951 }, { "epoch": 0.3708708708708709, "grad_norm": 1.4772595150425034, "learning_rate": 9.983063362339348e-06, "loss": 0.4886, "step": 3952 }, { "epoch": 0.3709647147147147, "grad_norm": 1.868967301069459, "learning_rate": 9.983018433243963e-06, "loss": 0.4808, "step": 3953 }, { "epoch": 0.37105855855855857, "grad_norm": 1.4267789224161225, "learning_rate": 9.982973444735464e-06, "loss": 0.4844, "step": 3954 }, { "epoch": 0.3711524024024024, "grad_norm": 1.4960525372297375, "learning_rate": 9.982928396814386e-06, "loss": 0.521, "step": 3955 }, { "epoch": 0.37124624624624625, "grad_norm": 1.722216740999983, "learning_rate": 9.982883289481266e-06, "loss": 0.5061, "step": 3956 }, { "epoch": 0.3713400900900901, "grad_norm": 1.280321503956746, "learning_rate": 9.982838122736642e-06, "loss": 0.4839, "step": 3957 }, { "epoch": 0.3714339339339339, "grad_norm": 1.266904378894749, "learning_rate": 9.982792896581054e-06, "loss": 0.5212, "step": 3958 }, { "epoch": 0.3715277777777778, "grad_norm": 1.255200074323885, "learning_rate": 9.982747611015038e-06, "loss": 0.5038, "step": 3959 }, { "epoch": 0.3716216216216216, "grad_norm": 1.3762121641301461, "learning_rate": 9.982702266039138e-06, "loss": 0.5147, "step": 3960 }, { "epoch": 0.37171546546546547, "grad_norm": 1.0099747115021869, "learning_rate": 9.98265686165389e-06, "loss": 0.3814, "step": 3961 }, { "epoch": 0.37180930930930933, "grad_norm": 1.1660349113494757, "learning_rate": 9.982611397859839e-06, "loss": 0.5193, "step": 3962 }, { "epoch": 0.37190315315315314, "grad_norm": 1.3850683832385116, "learning_rate": 9.982565874657527e-06, "loss": 0.4988, "step": 3963 }, { "epoch": 0.371996996996997, "grad_norm": 1.8086862746358372, "learning_rate": 9.982520292047493e-06, "loss": 0.5278, "step": 3964 }, { "epoch": 0.3720908408408408, "grad_norm": 1.2888335496925918, "learning_rate": 9.982474650030286e-06, "loss": 0.4869, "step": 3965 }, { "epoch": 0.3721846846846847, "grad_norm": 1.194299740655169, "learning_rate": 9.982428948606444e-06, "loss": 0.4646, "step": 3966 }, { "epoch": 0.37227852852852855, "grad_norm": 1.482000366068331, "learning_rate": 9.982383187776518e-06, "loss": 0.5415, "step": 3967 }, { "epoch": 0.37237237237237236, "grad_norm": 1.1951753846707585, "learning_rate": 9.982337367541049e-06, "loss": 0.4915, "step": 3968 }, { "epoch": 0.37246621621621623, "grad_norm": 1.560954891263227, "learning_rate": 9.982291487900586e-06, "loss": 0.4945, "step": 3969 }, { "epoch": 0.37256006006006004, "grad_norm": 1.2499667311443983, "learning_rate": 9.982245548855676e-06, "loss": 0.5071, "step": 3970 }, { "epoch": 0.3726539039039039, "grad_norm": 1.3133832323076353, "learning_rate": 9.982199550406866e-06, "loss": 0.5536, "step": 3971 }, { "epoch": 0.37274774774774777, "grad_norm": 1.244030167350113, "learning_rate": 9.982153492554703e-06, "loss": 0.5199, "step": 3972 }, { "epoch": 0.3728415915915916, "grad_norm": 1.2093777015401752, "learning_rate": 9.98210737529974e-06, "loss": 0.5191, "step": 3973 }, { "epoch": 0.37293543543543545, "grad_norm": 1.2910562555158507, "learning_rate": 9.982061198642522e-06, "loss": 0.4848, "step": 3974 }, { "epoch": 0.37302927927927926, "grad_norm": 1.1567169987312627, "learning_rate": 9.982014962583602e-06, "loss": 0.5131, "step": 3975 }, { "epoch": 0.3731231231231231, "grad_norm": 1.153626003130628, "learning_rate": 9.981968667123532e-06, "loss": 0.5217, "step": 3976 }, { "epoch": 0.373216966966967, "grad_norm": 1.4196082439018736, "learning_rate": 9.981922312262862e-06, "loss": 0.5035, "step": 3977 }, { "epoch": 0.3733108108108108, "grad_norm": 1.8844211937999036, "learning_rate": 9.981875898002147e-06, "loss": 0.5201, "step": 3978 }, { "epoch": 0.37340465465465467, "grad_norm": 1.2225278057870312, "learning_rate": 9.98182942434194e-06, "loss": 0.4959, "step": 3979 }, { "epoch": 0.3734984984984985, "grad_norm": 1.7627059029436158, "learning_rate": 9.981782891282794e-06, "loss": 0.5236, "step": 3980 }, { "epoch": 0.37359234234234234, "grad_norm": 1.625504213866565, "learning_rate": 9.981736298825265e-06, "loss": 0.4561, "step": 3981 }, { "epoch": 0.3736861861861862, "grad_norm": 1.6194002844955928, "learning_rate": 9.981689646969906e-06, "loss": 0.5096, "step": 3982 }, { "epoch": 0.37378003003003, "grad_norm": 1.2085332981073516, "learning_rate": 9.981642935717276e-06, "loss": 0.5197, "step": 3983 }, { "epoch": 0.3738738738738739, "grad_norm": 1.195401032829375, "learning_rate": 9.981596165067931e-06, "loss": 0.467, "step": 3984 }, { "epoch": 0.3739677177177177, "grad_norm": 1.3628843997200373, "learning_rate": 9.981549335022429e-06, "loss": 0.5066, "step": 3985 }, { "epoch": 0.37406156156156156, "grad_norm": 1.659776648182921, "learning_rate": 9.981502445581326e-06, "loss": 0.5709, "step": 3986 }, { "epoch": 0.37415540540540543, "grad_norm": 1.1708230671197786, "learning_rate": 9.981455496745185e-06, "loss": 0.4891, "step": 3987 }, { "epoch": 0.37424924924924924, "grad_norm": 1.629832183468575, "learning_rate": 9.981408488514564e-06, "loss": 0.4893, "step": 3988 }, { "epoch": 0.3743430930930931, "grad_norm": 1.3935362172936716, "learning_rate": 9.981361420890022e-06, "loss": 0.465, "step": 3989 }, { "epoch": 0.3744369369369369, "grad_norm": 1.5804670757842747, "learning_rate": 9.981314293872122e-06, "loss": 0.5169, "step": 3990 }, { "epoch": 0.3745307807807808, "grad_norm": 1.2372022075874343, "learning_rate": 9.981267107461425e-06, "loss": 0.5238, "step": 3991 }, { "epoch": 0.37462462462462465, "grad_norm": 2.3330464066038803, "learning_rate": 9.981219861658495e-06, "loss": 0.5176, "step": 3992 }, { "epoch": 0.37471846846846846, "grad_norm": 1.186807934084373, "learning_rate": 9.981172556463894e-06, "loss": 0.4874, "step": 3993 }, { "epoch": 0.3748123123123123, "grad_norm": 1.1415703514747773, "learning_rate": 9.981125191878187e-06, "loss": 0.5278, "step": 3994 }, { "epoch": 0.37490615615615613, "grad_norm": 1.2820458974159705, "learning_rate": 9.981077767901937e-06, "loss": 0.5766, "step": 3995 }, { "epoch": 0.375, "grad_norm": 1.0604811148888305, "learning_rate": 9.981030284535711e-06, "loss": 0.4817, "step": 3996 }, { "epoch": 0.37509384384384387, "grad_norm": 1.1895355786801491, "learning_rate": 9.980982741780075e-06, "loss": 0.471, "step": 3997 }, { "epoch": 0.3751876876876877, "grad_norm": 1.8769078310946565, "learning_rate": 9.980935139635594e-06, "loss": 0.5003, "step": 3998 }, { "epoch": 0.37528153153153154, "grad_norm": 2.7642762945839423, "learning_rate": 9.980887478102838e-06, "loss": 0.5247, "step": 3999 }, { "epoch": 0.37537537537537535, "grad_norm": 1.331896620387533, "learning_rate": 9.980839757182375e-06, "loss": 0.5387, "step": 4000 }, { "epoch": 0.3754692192192192, "grad_norm": 1.7135268352484014, "learning_rate": 9.980791976874773e-06, "loss": 0.4914, "step": 4001 }, { "epoch": 0.3755630630630631, "grad_norm": 8.652937166489103, "learning_rate": 9.980744137180602e-06, "loss": 0.5506, "step": 4002 }, { "epoch": 0.3756569069069069, "grad_norm": 1.1064121193898482, "learning_rate": 9.980696238100433e-06, "loss": 0.4988, "step": 4003 }, { "epoch": 0.37575075075075076, "grad_norm": 1.3880533440111296, "learning_rate": 9.980648279634836e-06, "loss": 0.5686, "step": 4004 }, { "epoch": 0.37584459459459457, "grad_norm": 1.8912316950573909, "learning_rate": 9.980600261784383e-06, "loss": 0.4702, "step": 4005 }, { "epoch": 0.37593843843843844, "grad_norm": 1.177590981005998, "learning_rate": 9.980552184549647e-06, "loss": 0.4546, "step": 4006 }, { "epoch": 0.3760322822822823, "grad_norm": 1.33060498099614, "learning_rate": 9.980504047931202e-06, "loss": 0.522, "step": 4007 }, { "epoch": 0.3761261261261261, "grad_norm": 1.4360098866293127, "learning_rate": 9.98045585192962e-06, "loss": 0.4613, "step": 4008 }, { "epoch": 0.37621996996997, "grad_norm": 1.2671090049618083, "learning_rate": 9.980407596545478e-06, "loss": 0.5132, "step": 4009 }, { "epoch": 0.3763138138138138, "grad_norm": 1.4981624061413268, "learning_rate": 9.980359281779348e-06, "loss": 0.5068, "step": 4010 }, { "epoch": 0.37640765765765766, "grad_norm": 1.2160200118252815, "learning_rate": 9.98031090763181e-06, "loss": 0.4823, "step": 4011 }, { "epoch": 0.3765015015015015, "grad_norm": 1.2718657326963532, "learning_rate": 9.980262474103439e-06, "loss": 0.5252, "step": 4012 }, { "epoch": 0.37659534534534533, "grad_norm": 1.4460395822570358, "learning_rate": 9.980213981194811e-06, "loss": 0.5833, "step": 4013 }, { "epoch": 0.3766891891891892, "grad_norm": 1.5458684000486216, "learning_rate": 9.980165428906506e-06, "loss": 0.5059, "step": 4014 }, { "epoch": 0.376783033033033, "grad_norm": 1.2513755053639952, "learning_rate": 9.980116817239102e-06, "loss": 0.4499, "step": 4015 }, { "epoch": 0.3768768768768769, "grad_norm": 1.3262718527511614, "learning_rate": 9.980068146193181e-06, "loss": 0.4775, "step": 4016 }, { "epoch": 0.37697072072072074, "grad_norm": 1.456854653378047, "learning_rate": 9.98001941576932e-06, "loss": 0.5263, "step": 4017 }, { "epoch": 0.37706456456456455, "grad_norm": 1.3608919901076573, "learning_rate": 9.979970625968101e-06, "loss": 0.49, "step": 4018 }, { "epoch": 0.3771584084084084, "grad_norm": 1.1382899681541114, "learning_rate": 9.979921776790107e-06, "loss": 0.4936, "step": 4019 }, { "epoch": 0.37725225225225223, "grad_norm": 1.8946632025578483, "learning_rate": 9.97987286823592e-06, "loss": 0.4886, "step": 4020 }, { "epoch": 0.3773460960960961, "grad_norm": 1.1366234593734266, "learning_rate": 9.97982390030612e-06, "loss": 0.5111, "step": 4021 }, { "epoch": 0.37743993993993996, "grad_norm": 1.326433700583588, "learning_rate": 9.979774873001296e-06, "loss": 0.57, "step": 4022 }, { "epoch": 0.37753378378378377, "grad_norm": 1.717509160750516, "learning_rate": 9.97972578632203e-06, "loss": 0.5003, "step": 4023 }, { "epoch": 0.37762762762762764, "grad_norm": 1.9598402921791525, "learning_rate": 9.979676640268908e-06, "loss": 0.457, "step": 4024 }, { "epoch": 0.37772147147147145, "grad_norm": 1.13627744239639, "learning_rate": 9.979627434842514e-06, "loss": 0.504, "step": 4025 }, { "epoch": 0.3778153153153153, "grad_norm": 1.9150828011031023, "learning_rate": 9.979578170043436e-06, "loss": 0.4686, "step": 4026 }, { "epoch": 0.3779091591591592, "grad_norm": 1.1874854762170324, "learning_rate": 9.979528845872261e-06, "loss": 0.4456, "step": 4027 }, { "epoch": 0.378003003003003, "grad_norm": 1.3825823235382977, "learning_rate": 9.979479462329577e-06, "loss": 0.5022, "step": 4028 }, { "epoch": 0.37809684684684686, "grad_norm": 1.2017496717234006, "learning_rate": 9.979430019415976e-06, "loss": 0.5144, "step": 4029 }, { "epoch": 0.37819069069069067, "grad_norm": 1.2123177853627762, "learning_rate": 9.979380517132042e-06, "loss": 0.4834, "step": 4030 }, { "epoch": 0.37828453453453453, "grad_norm": 2.4907291012844865, "learning_rate": 9.97933095547837e-06, "loss": 0.487, "step": 4031 }, { "epoch": 0.3783783783783784, "grad_norm": 1.225556811686721, "learning_rate": 9.979281334455548e-06, "loss": 0.5286, "step": 4032 }, { "epoch": 0.3784722222222222, "grad_norm": 1.1952148557181386, "learning_rate": 9.97923165406417e-06, "loss": 0.4916, "step": 4033 }, { "epoch": 0.3785660660660661, "grad_norm": 2.135029853838636, "learning_rate": 9.979181914304825e-06, "loss": 0.4728, "step": 4034 }, { "epoch": 0.3786599099099099, "grad_norm": 1.2977588659323325, "learning_rate": 9.97913211517811e-06, "loss": 0.4961, "step": 4035 }, { "epoch": 0.37875375375375375, "grad_norm": 1.1544235667894376, "learning_rate": 9.979082256684615e-06, "loss": 0.4552, "step": 4036 }, { "epoch": 0.3788475975975976, "grad_norm": 1.1782309582172625, "learning_rate": 9.979032338824937e-06, "loss": 0.4967, "step": 4037 }, { "epoch": 0.37894144144144143, "grad_norm": 1.3923280464377712, "learning_rate": 9.978982361599671e-06, "loss": 0.4801, "step": 4038 }, { "epoch": 0.3790352852852853, "grad_norm": 1.2465210898441097, "learning_rate": 9.978932325009411e-06, "loss": 0.5278, "step": 4039 }, { "epoch": 0.3791291291291291, "grad_norm": 1.0932875675833225, "learning_rate": 9.978882229054756e-06, "loss": 0.4854, "step": 4040 }, { "epoch": 0.37922297297297297, "grad_norm": 1.3944025844084194, "learning_rate": 9.978832073736303e-06, "loss": 0.4344, "step": 4041 }, { "epoch": 0.37931681681681684, "grad_norm": 1.2562843936967514, "learning_rate": 9.978781859054647e-06, "loss": 0.5019, "step": 4042 }, { "epoch": 0.37941066066066065, "grad_norm": 2.415694934853156, "learning_rate": 9.978731585010391e-06, "loss": 0.4917, "step": 4043 }, { "epoch": 0.3795045045045045, "grad_norm": 1.2613434948857298, "learning_rate": 9.97868125160413e-06, "loss": 0.4798, "step": 4044 }, { "epoch": 0.3795983483483483, "grad_norm": 1.7810770642151057, "learning_rate": 9.978630858836468e-06, "loss": 0.5182, "step": 4045 }, { "epoch": 0.3796921921921922, "grad_norm": 1.1049165714910272, "learning_rate": 9.978580406708004e-06, "loss": 0.4728, "step": 4046 }, { "epoch": 0.37978603603603606, "grad_norm": 1.1324345421030833, "learning_rate": 9.978529895219339e-06, "loss": 0.4774, "step": 4047 }, { "epoch": 0.37987987987987987, "grad_norm": 1.172866945529522, "learning_rate": 9.978479324371078e-06, "loss": 0.453, "step": 4048 }, { "epoch": 0.37997372372372373, "grad_norm": 1.4334655899493278, "learning_rate": 9.978428694163821e-06, "loss": 0.5551, "step": 4049 }, { "epoch": 0.38006756756756754, "grad_norm": 1.280964511034104, "learning_rate": 9.978378004598173e-06, "loss": 0.5455, "step": 4050 }, { "epoch": 0.3801614114114114, "grad_norm": 1.524712677321027, "learning_rate": 9.978327255674738e-06, "loss": 0.5171, "step": 4051 }, { "epoch": 0.3802552552552553, "grad_norm": 2.4997225520986954, "learning_rate": 9.978276447394122e-06, "loss": 0.5633, "step": 4052 }, { "epoch": 0.3803490990990991, "grad_norm": 1.4955753536236267, "learning_rate": 9.978225579756927e-06, "loss": 0.5103, "step": 4053 }, { "epoch": 0.38044294294294295, "grad_norm": 1.1596993399442048, "learning_rate": 9.978174652763765e-06, "loss": 0.4543, "step": 4054 }, { "epoch": 0.38053678678678676, "grad_norm": 2.6180684526271283, "learning_rate": 9.978123666415243e-06, "loss": 0.4705, "step": 4055 }, { "epoch": 0.38063063063063063, "grad_norm": 1.25273445899753, "learning_rate": 9.978072620711963e-06, "loss": 0.4738, "step": 4056 }, { "epoch": 0.3807244744744745, "grad_norm": 1.3968467713001183, "learning_rate": 9.97802151565454e-06, "loss": 0.5118, "step": 4057 }, { "epoch": 0.3808183183183183, "grad_norm": 1.4148265318175195, "learning_rate": 9.977970351243578e-06, "loss": 0.5678, "step": 4058 }, { "epoch": 0.38091216216216217, "grad_norm": 1.4042585719583736, "learning_rate": 9.977919127479689e-06, "loss": 0.5283, "step": 4059 }, { "epoch": 0.381006006006006, "grad_norm": 1.5031653396766953, "learning_rate": 9.977867844363487e-06, "loss": 0.5693, "step": 4060 }, { "epoch": 0.38109984984984985, "grad_norm": 1.6654598064550838, "learning_rate": 9.97781650189558e-06, "loss": 0.5039, "step": 4061 }, { "epoch": 0.3811936936936937, "grad_norm": 1.1404943543122492, "learning_rate": 9.977765100076579e-06, "loss": 0.4975, "step": 4062 }, { "epoch": 0.3812875375375375, "grad_norm": 1.1450879028950793, "learning_rate": 9.977713638907103e-06, "loss": 0.4452, "step": 4063 }, { "epoch": 0.3813813813813814, "grad_norm": 1.476868520996318, "learning_rate": 9.97766211838776e-06, "loss": 0.4972, "step": 4064 }, { "epoch": 0.3814752252252252, "grad_norm": 0.988665379620248, "learning_rate": 9.977610538519162e-06, "loss": 0.4748, "step": 4065 }, { "epoch": 0.38156906906906907, "grad_norm": 2.249059099058319, "learning_rate": 9.977558899301932e-06, "loss": 0.4531, "step": 4066 }, { "epoch": 0.38166291291291293, "grad_norm": 1.2743081190910035, "learning_rate": 9.97750720073668e-06, "loss": 0.4883, "step": 4067 }, { "epoch": 0.38175675675675674, "grad_norm": 1.307581926591623, "learning_rate": 9.977455442824023e-06, "loss": 0.4986, "step": 4068 }, { "epoch": 0.3818506006006006, "grad_norm": 2.4907666065184837, "learning_rate": 9.977403625564579e-06, "loss": 0.5458, "step": 4069 }, { "epoch": 0.3819444444444444, "grad_norm": 1.0974194783242444, "learning_rate": 9.977351748958968e-06, "loss": 0.4926, "step": 4070 }, { "epoch": 0.3820382882882883, "grad_norm": 1.6337738015639094, "learning_rate": 9.977299813007804e-06, "loss": 0.4653, "step": 4071 }, { "epoch": 0.38213213213213215, "grad_norm": 2.0168849816429057, "learning_rate": 9.97724781771171e-06, "loss": 0.5054, "step": 4072 }, { "epoch": 0.38222597597597596, "grad_norm": 1.3030284470372009, "learning_rate": 9.977195763071304e-06, "loss": 0.5404, "step": 4073 }, { "epoch": 0.38231981981981983, "grad_norm": 1.2588067765516213, "learning_rate": 9.977143649087206e-06, "loss": 0.5146, "step": 4074 }, { "epoch": 0.38241366366366364, "grad_norm": 1.1730311015475094, "learning_rate": 9.97709147576004e-06, "loss": 0.5099, "step": 4075 }, { "epoch": 0.3825075075075075, "grad_norm": 1.288456142718751, "learning_rate": 9.977039243090426e-06, "loss": 0.5267, "step": 4076 }, { "epoch": 0.38260135135135137, "grad_norm": 1.2462332107961203, "learning_rate": 9.976986951078986e-06, "loss": 0.4888, "step": 4077 }, { "epoch": 0.3826951951951952, "grad_norm": 1.3124744846238239, "learning_rate": 9.976934599726346e-06, "loss": 0.495, "step": 4078 }, { "epoch": 0.38278903903903905, "grad_norm": 1.2437403634165474, "learning_rate": 9.97688218903313e-06, "loss": 0.5224, "step": 4079 }, { "epoch": 0.38288288288288286, "grad_norm": 1.17061299118246, "learning_rate": 9.97682971899996e-06, "loss": 0.51, "step": 4080 }, { "epoch": 0.3829767267267267, "grad_norm": 1.3313783981210763, "learning_rate": 9.976777189627466e-06, "loss": 0.5254, "step": 4081 }, { "epoch": 0.3830705705705706, "grad_norm": 2.040808359261857, "learning_rate": 9.97672460091627e-06, "loss": 0.5056, "step": 4082 }, { "epoch": 0.3831644144144144, "grad_norm": 2.4370545994539716, "learning_rate": 9.976671952867001e-06, "loss": 0.4776, "step": 4083 }, { "epoch": 0.38325825825825827, "grad_norm": 4.542374253485455, "learning_rate": 9.976619245480286e-06, "loss": 0.4351, "step": 4084 }, { "epoch": 0.3833521021021021, "grad_norm": 1.2504336162154703, "learning_rate": 9.976566478756755e-06, "loss": 0.4831, "step": 4085 }, { "epoch": 0.38344594594594594, "grad_norm": 1.333581435573528, "learning_rate": 9.976513652697037e-06, "loss": 0.5194, "step": 4086 }, { "epoch": 0.3835397897897898, "grad_norm": 1.4385734408875208, "learning_rate": 9.97646076730176e-06, "loss": 0.5205, "step": 4087 }, { "epoch": 0.3836336336336336, "grad_norm": 1.1383839084490233, "learning_rate": 9.976407822571556e-06, "loss": 0.5089, "step": 4088 }, { "epoch": 0.3837274774774775, "grad_norm": 1.175351022488492, "learning_rate": 9.976354818507055e-06, "loss": 0.511, "step": 4089 }, { "epoch": 0.3838213213213213, "grad_norm": 1.1855111129611053, "learning_rate": 9.97630175510889e-06, "loss": 0.5245, "step": 4090 }, { "epoch": 0.38391516516516516, "grad_norm": 1.5690541175870043, "learning_rate": 9.976248632377694e-06, "loss": 0.4969, "step": 4091 }, { "epoch": 0.38400900900900903, "grad_norm": 1.2740969133618703, "learning_rate": 9.976195450314098e-06, "loss": 0.4713, "step": 4092 }, { "epoch": 0.38410285285285284, "grad_norm": 1.1801543418575, "learning_rate": 9.976142208918743e-06, "loss": 0.4836, "step": 4093 }, { "epoch": 0.3841966966966967, "grad_norm": 1.9105562285581383, "learning_rate": 9.976088908192255e-06, "loss": 0.4984, "step": 4094 }, { "epoch": 0.3842905405405405, "grad_norm": 1.2513636128805572, "learning_rate": 9.976035548135272e-06, "loss": 0.4868, "step": 4095 }, { "epoch": 0.3843843843843844, "grad_norm": 1.3618539879430565, "learning_rate": 9.975982128748433e-06, "loss": 0.5647, "step": 4096 }, { "epoch": 0.38447822822822825, "grad_norm": 1.1590158858173079, "learning_rate": 9.975928650032374e-06, "loss": 0.4896, "step": 4097 }, { "epoch": 0.38457207207207206, "grad_norm": 1.595342058168626, "learning_rate": 9.975875111987732e-06, "loss": 0.484, "step": 4098 }, { "epoch": 0.3846659159159159, "grad_norm": 1.1168476659447102, "learning_rate": 9.975821514615144e-06, "loss": 0.4651, "step": 4099 }, { "epoch": 0.38475975975975973, "grad_norm": 1.2532759242918503, "learning_rate": 9.975767857915252e-06, "loss": 0.4961, "step": 4100 }, { "epoch": 0.3848536036036036, "grad_norm": 1.1973370534535237, "learning_rate": 9.975714141888695e-06, "loss": 0.5119, "step": 4101 }, { "epoch": 0.38494744744744747, "grad_norm": 1.2984119007562314, "learning_rate": 9.97566036653611e-06, "loss": 0.51, "step": 4102 }, { "epoch": 0.3850412912912913, "grad_norm": 1.1840662686755448, "learning_rate": 9.975606531858143e-06, "loss": 0.4541, "step": 4103 }, { "epoch": 0.38513513513513514, "grad_norm": 1.3654823524649131, "learning_rate": 9.975552637855434e-06, "loss": 0.4734, "step": 4104 }, { "epoch": 0.38522897897897895, "grad_norm": 2.0825046457990406, "learning_rate": 9.975498684528623e-06, "loss": 0.541, "step": 4105 }, { "epoch": 0.3853228228228228, "grad_norm": 1.4175423074105558, "learning_rate": 9.975444671878358e-06, "loss": 0.5185, "step": 4106 }, { "epoch": 0.3854166666666667, "grad_norm": 2.6170008986922686, "learning_rate": 9.97539059990528e-06, "loss": 0.5697, "step": 4107 }, { "epoch": 0.3855105105105105, "grad_norm": 1.357018522205996, "learning_rate": 9.975336468610034e-06, "loss": 0.518, "step": 4108 }, { "epoch": 0.38560435435435436, "grad_norm": 1.252838598444716, "learning_rate": 9.975282277993266e-06, "loss": 0.4925, "step": 4109 }, { "epoch": 0.38569819819819817, "grad_norm": 1.3183335552946773, "learning_rate": 9.97522802805562e-06, "loss": 0.5222, "step": 4110 }, { "epoch": 0.38579204204204204, "grad_norm": 1.125680271787643, "learning_rate": 9.975173718797746e-06, "loss": 0.5485, "step": 4111 }, { "epoch": 0.3858858858858859, "grad_norm": 1.5443414789385863, "learning_rate": 9.975119350220291e-06, "loss": 0.4711, "step": 4112 }, { "epoch": 0.3859797297297297, "grad_norm": 1.2551407151833036, "learning_rate": 9.975064922323902e-06, "loss": 0.5305, "step": 4113 }, { "epoch": 0.3860735735735736, "grad_norm": 1.2035430398783415, "learning_rate": 9.975010435109227e-06, "loss": 0.5609, "step": 4114 }, { "epoch": 0.3861674174174174, "grad_norm": 2.420157905958131, "learning_rate": 9.974955888576919e-06, "loss": 0.48, "step": 4115 }, { "epoch": 0.38626126126126126, "grad_norm": 1.164342848379267, "learning_rate": 9.974901282727625e-06, "loss": 0.4982, "step": 4116 }, { "epoch": 0.3863551051051051, "grad_norm": 1.1491630378690554, "learning_rate": 9.974846617561999e-06, "loss": 0.5299, "step": 4117 }, { "epoch": 0.38644894894894893, "grad_norm": 2.0139565798366985, "learning_rate": 9.974791893080689e-06, "loss": 0.4895, "step": 4118 }, { "epoch": 0.3865427927927928, "grad_norm": 1.475453413421753, "learning_rate": 9.974737109284351e-06, "loss": 0.4973, "step": 4119 }, { "epoch": 0.3866366366366366, "grad_norm": 1.248335202939025, "learning_rate": 9.97468226617364e-06, "loss": 0.5162, "step": 4120 }, { "epoch": 0.3867304804804805, "grad_norm": 1.5395397732702243, "learning_rate": 9.974627363749202e-06, "loss": 0.4911, "step": 4121 }, { "epoch": 0.38682432432432434, "grad_norm": 1.3535398790541955, "learning_rate": 9.974572402011699e-06, "loss": 0.4972, "step": 4122 }, { "epoch": 0.38691816816816815, "grad_norm": 1.056619589309783, "learning_rate": 9.974517380961783e-06, "loss": 0.4631, "step": 4123 }, { "epoch": 0.387012012012012, "grad_norm": 1.2128391952989193, "learning_rate": 9.974462300600111e-06, "loss": 0.4429, "step": 4124 }, { "epoch": 0.38710585585585583, "grad_norm": 2.676564184979527, "learning_rate": 9.97440716092734e-06, "loss": 0.5439, "step": 4125 }, { "epoch": 0.3871996996996997, "grad_norm": 1.0739792129003967, "learning_rate": 9.974351961944127e-06, "loss": 0.4931, "step": 4126 }, { "epoch": 0.38729354354354356, "grad_norm": 1.4566609072254664, "learning_rate": 9.974296703651128e-06, "loss": 0.5243, "step": 4127 }, { "epoch": 0.38738738738738737, "grad_norm": 1.3209895322739262, "learning_rate": 9.974241386049007e-06, "loss": 0.4965, "step": 4128 }, { "epoch": 0.38748123123123124, "grad_norm": 1.2742170604670944, "learning_rate": 9.974186009138418e-06, "loss": 0.521, "step": 4129 }, { "epoch": 0.38757507507507505, "grad_norm": 1.3473326687947729, "learning_rate": 9.974130572920025e-06, "loss": 0.5179, "step": 4130 }, { "epoch": 0.3876689189189189, "grad_norm": 1.2315770747620802, "learning_rate": 9.974075077394488e-06, "loss": 0.4885, "step": 4131 }, { "epoch": 0.3877627627627628, "grad_norm": 1.3022020544728994, "learning_rate": 9.974019522562469e-06, "loss": 0.4486, "step": 4132 }, { "epoch": 0.3878566066066066, "grad_norm": 1.276167378530109, "learning_rate": 9.973963908424628e-06, "loss": 0.4878, "step": 4133 }, { "epoch": 0.38795045045045046, "grad_norm": 1.1454258392316716, "learning_rate": 9.97390823498163e-06, "loss": 0.4823, "step": 4134 }, { "epoch": 0.38804429429429427, "grad_norm": 1.1987312662368046, "learning_rate": 9.97385250223414e-06, "loss": 0.4782, "step": 4135 }, { "epoch": 0.38813813813813813, "grad_norm": 1.3647360713532002, "learning_rate": 9.97379671018282e-06, "loss": 0.4918, "step": 4136 }, { "epoch": 0.388231981981982, "grad_norm": 1.2960944748185332, "learning_rate": 9.973740858828339e-06, "loss": 0.4627, "step": 4137 }, { "epoch": 0.3883258258258258, "grad_norm": 1.4074455930956564, "learning_rate": 9.973684948171356e-06, "loss": 0.4996, "step": 4138 }, { "epoch": 0.3884196696696697, "grad_norm": 1.3699783638589809, "learning_rate": 9.973628978212546e-06, "loss": 0.5059, "step": 4139 }, { "epoch": 0.3885135135135135, "grad_norm": 1.267899211371926, "learning_rate": 9.97357294895257e-06, "loss": 0.4458, "step": 4140 }, { "epoch": 0.38860735735735735, "grad_norm": 1.3002314899206677, "learning_rate": 9.9735168603921e-06, "loss": 0.5359, "step": 4141 }, { "epoch": 0.3887012012012012, "grad_norm": 1.2412879428060657, "learning_rate": 9.973460712531802e-06, "loss": 0.5627, "step": 4142 }, { "epoch": 0.38879504504504503, "grad_norm": 1.1840343519005703, "learning_rate": 9.973404505372348e-06, "loss": 0.4791, "step": 4143 }, { "epoch": 0.3888888888888889, "grad_norm": 1.0589559366704122, "learning_rate": 9.973348238914404e-06, "loss": 0.5272, "step": 4144 }, { "epoch": 0.3889827327327327, "grad_norm": 1.0536344976279846, "learning_rate": 9.973291913158646e-06, "loss": 0.4802, "step": 4145 }, { "epoch": 0.38907657657657657, "grad_norm": 3.0386351006649353, "learning_rate": 9.973235528105743e-06, "loss": 0.5357, "step": 4146 }, { "epoch": 0.38917042042042044, "grad_norm": 1.3175498845586187, "learning_rate": 9.973179083756365e-06, "loss": 0.4674, "step": 4147 }, { "epoch": 0.38926426426426425, "grad_norm": 0.9940256465549215, "learning_rate": 9.973122580111191e-06, "loss": 0.4735, "step": 4148 }, { "epoch": 0.3893581081081081, "grad_norm": 1.265495641929711, "learning_rate": 9.973066017170889e-06, "loss": 0.5059, "step": 4149 }, { "epoch": 0.3894519519519519, "grad_norm": 1.2907609599388579, "learning_rate": 9.973009394936137e-06, "loss": 0.4702, "step": 4150 }, { "epoch": 0.3895457957957958, "grad_norm": 1.0857464869333182, "learning_rate": 9.972952713407607e-06, "loss": 0.522, "step": 4151 }, { "epoch": 0.38963963963963966, "grad_norm": 1.2500822496900243, "learning_rate": 9.972895972585978e-06, "loss": 0.5096, "step": 4152 }, { "epoch": 0.38973348348348347, "grad_norm": 1.114495884762726, "learning_rate": 9.972839172471924e-06, "loss": 0.5372, "step": 4153 }, { "epoch": 0.38982732732732733, "grad_norm": 1.1623466127855755, "learning_rate": 9.972782313066125e-06, "loss": 0.5231, "step": 4154 }, { "epoch": 0.38992117117117114, "grad_norm": 1.2164331827658164, "learning_rate": 9.972725394369255e-06, "loss": 0.4878, "step": 4155 }, { "epoch": 0.390015015015015, "grad_norm": 1.2428435715946373, "learning_rate": 9.972668416381996e-06, "loss": 0.4889, "step": 4156 }, { "epoch": 0.3901088588588589, "grad_norm": 1.1614395178126704, "learning_rate": 9.972611379105027e-06, "loss": 0.5269, "step": 4157 }, { "epoch": 0.3902027027027027, "grad_norm": 2.2766637777861978, "learning_rate": 9.972554282539025e-06, "loss": 0.5308, "step": 4158 }, { "epoch": 0.39029654654654655, "grad_norm": 1.6375206460129754, "learning_rate": 9.972497126684675e-06, "loss": 0.4948, "step": 4159 }, { "epoch": 0.39039039039039036, "grad_norm": 1.3015351140293168, "learning_rate": 9.972439911542656e-06, "loss": 0.5018, "step": 4160 }, { "epoch": 0.39048423423423423, "grad_norm": 1.2266501973884538, "learning_rate": 9.97238263711365e-06, "loss": 0.5035, "step": 4161 }, { "epoch": 0.3905780780780781, "grad_norm": 1.8844910228556073, "learning_rate": 9.972325303398341e-06, "loss": 0.4636, "step": 4162 }, { "epoch": 0.3906719219219219, "grad_norm": 1.1113028150415407, "learning_rate": 9.972267910397413e-06, "loss": 0.487, "step": 4163 }, { "epoch": 0.39076576576576577, "grad_norm": 1.0791274301502167, "learning_rate": 9.972210458111549e-06, "loss": 0.4707, "step": 4164 }, { "epoch": 0.39085960960960964, "grad_norm": 1.207209174420063, "learning_rate": 9.972152946541434e-06, "loss": 0.5151, "step": 4165 }, { "epoch": 0.39095345345345345, "grad_norm": 1.24588326359598, "learning_rate": 9.972095375687754e-06, "loss": 0.4827, "step": 4166 }, { "epoch": 0.3910472972972973, "grad_norm": 1.1464521632319187, "learning_rate": 9.972037745551197e-06, "loss": 0.4843, "step": 4167 }, { "epoch": 0.3911411411411411, "grad_norm": 1.4918686525024065, "learning_rate": 9.971980056132447e-06, "loss": 0.4881, "step": 4168 }, { "epoch": 0.391234984984985, "grad_norm": 1.0776594983487366, "learning_rate": 9.971922307432196e-06, "loss": 0.443, "step": 4169 }, { "epoch": 0.39132882882882886, "grad_norm": 2.5328146745734923, "learning_rate": 9.971864499451127e-06, "loss": 0.5052, "step": 4170 }, { "epoch": 0.39142267267267267, "grad_norm": 1.0506631392948351, "learning_rate": 9.971806632189935e-06, "loss": 0.4897, "step": 4171 }, { "epoch": 0.39151651651651653, "grad_norm": 3.1535866241752775, "learning_rate": 9.971748705649307e-06, "loss": 0.4884, "step": 4172 }, { "epoch": 0.39161036036036034, "grad_norm": 1.4215531780369732, "learning_rate": 9.971690719829934e-06, "loss": 0.5771, "step": 4173 }, { "epoch": 0.3917042042042042, "grad_norm": 3.336844151430037, "learning_rate": 9.971632674732507e-06, "loss": 0.5489, "step": 4174 }, { "epoch": 0.3917980480480481, "grad_norm": 1.163189996685051, "learning_rate": 9.971574570357718e-06, "loss": 0.5038, "step": 4175 }, { "epoch": 0.3918918918918919, "grad_norm": 1.2332138582059353, "learning_rate": 9.971516406706261e-06, "loss": 0.475, "step": 4176 }, { "epoch": 0.39198573573573575, "grad_norm": 1.0496138011228462, "learning_rate": 9.971458183778829e-06, "loss": 0.4902, "step": 4177 }, { "epoch": 0.39207957957957956, "grad_norm": 1.3040091575083248, "learning_rate": 9.971399901576116e-06, "loss": 0.4717, "step": 4178 }, { "epoch": 0.39217342342342343, "grad_norm": 1.4221623210299659, "learning_rate": 9.971341560098816e-06, "loss": 0.4909, "step": 4179 }, { "epoch": 0.3922672672672673, "grad_norm": 1.3243985898938921, "learning_rate": 9.971283159347627e-06, "loss": 0.5551, "step": 4180 }, { "epoch": 0.3923611111111111, "grad_norm": 1.372661062814225, "learning_rate": 9.971224699323242e-06, "loss": 0.4812, "step": 4181 }, { "epoch": 0.39245495495495497, "grad_norm": 5.467156583947544, "learning_rate": 9.97116618002636e-06, "loss": 0.5067, "step": 4182 }, { "epoch": 0.3925487987987988, "grad_norm": 1.2015383765696073, "learning_rate": 9.97110760145768e-06, "loss": 0.5069, "step": 4183 }, { "epoch": 0.39264264264264265, "grad_norm": 1.2607031166458633, "learning_rate": 9.971048963617898e-06, "loss": 0.4711, "step": 4184 }, { "epoch": 0.3927364864864865, "grad_norm": 1.163121265865259, "learning_rate": 9.970990266507715e-06, "loss": 0.5115, "step": 4185 }, { "epoch": 0.3928303303303303, "grad_norm": 1.4588480468827656, "learning_rate": 9.970931510127828e-06, "loss": 0.5419, "step": 4186 }, { "epoch": 0.3929241741741742, "grad_norm": 1.1697246009870035, "learning_rate": 9.970872694478942e-06, "loss": 0.5263, "step": 4187 }, { "epoch": 0.393018018018018, "grad_norm": 1.0992072153020291, "learning_rate": 9.970813819561753e-06, "loss": 0.5417, "step": 4188 }, { "epoch": 0.39311186186186187, "grad_norm": 1.3743626497822363, "learning_rate": 9.970754885376967e-06, "loss": 0.5356, "step": 4189 }, { "epoch": 0.39320570570570573, "grad_norm": 1.2503636396407798, "learning_rate": 9.970695891925287e-06, "loss": 0.5429, "step": 4190 }, { "epoch": 0.39329954954954954, "grad_norm": 1.2678197992544447, "learning_rate": 9.970636839207414e-06, "loss": 0.4667, "step": 4191 }, { "epoch": 0.3933933933933934, "grad_norm": 1.3036288973045718, "learning_rate": 9.970577727224053e-06, "loss": 0.512, "step": 4192 }, { "epoch": 0.3934872372372372, "grad_norm": 1.4327301016904213, "learning_rate": 9.970518555975907e-06, "loss": 0.5406, "step": 4193 }, { "epoch": 0.3935810810810811, "grad_norm": 1.3272270106080628, "learning_rate": 9.970459325463686e-06, "loss": 0.5199, "step": 4194 }, { "epoch": 0.39367492492492495, "grad_norm": 1.2398081027803338, "learning_rate": 9.970400035688092e-06, "loss": 0.4569, "step": 4195 }, { "epoch": 0.39376876876876876, "grad_norm": 1.261798825205933, "learning_rate": 9.970340686649833e-06, "loss": 0.4981, "step": 4196 }, { "epoch": 0.39386261261261263, "grad_norm": 1.2509312642196881, "learning_rate": 9.970281278349619e-06, "loss": 0.5057, "step": 4197 }, { "epoch": 0.39395645645645644, "grad_norm": 1.7359297725341523, "learning_rate": 9.970221810788154e-06, "loss": 0.4617, "step": 4198 }, { "epoch": 0.3940503003003003, "grad_norm": 1.966995552134873, "learning_rate": 9.97016228396615e-06, "loss": 0.485, "step": 4199 }, { "epoch": 0.39414414414414417, "grad_norm": 1.6747374806972255, "learning_rate": 9.970102697884318e-06, "loss": 0.5436, "step": 4200 }, { "epoch": 0.394237987987988, "grad_norm": 1.5098001242433303, "learning_rate": 9.970043052543365e-06, "loss": 0.5825, "step": 4201 }, { "epoch": 0.39433183183183185, "grad_norm": 2.0208610969417036, "learning_rate": 9.969983347944004e-06, "loss": 0.5046, "step": 4202 }, { "epoch": 0.39442567567567566, "grad_norm": 1.479677595365836, "learning_rate": 9.969923584086946e-06, "loss": 0.4619, "step": 4203 }, { "epoch": 0.3945195195195195, "grad_norm": 1.5497107726823047, "learning_rate": 9.969863760972904e-06, "loss": 0.5239, "step": 4204 }, { "epoch": 0.3946133633633634, "grad_norm": 1.2744130246065355, "learning_rate": 9.969803878602592e-06, "loss": 0.4375, "step": 4205 }, { "epoch": 0.3947072072072072, "grad_norm": 1.266280002573145, "learning_rate": 9.969743936976723e-06, "loss": 0.5255, "step": 4206 }, { "epoch": 0.39480105105105107, "grad_norm": 1.0972202115721799, "learning_rate": 9.969683936096013e-06, "loss": 0.5225, "step": 4207 }, { "epoch": 0.3948948948948949, "grad_norm": 1.5108001063676342, "learning_rate": 9.969623875961177e-06, "loss": 0.5282, "step": 4208 }, { "epoch": 0.39498873873873874, "grad_norm": 1.1395301066901669, "learning_rate": 9.96956375657293e-06, "loss": 0.4935, "step": 4209 }, { "epoch": 0.3950825825825826, "grad_norm": 1.3311184098425901, "learning_rate": 9.96950357793199e-06, "loss": 0.5045, "step": 4210 }, { "epoch": 0.3951764264264264, "grad_norm": 1.1351950450748838, "learning_rate": 9.969443340039072e-06, "loss": 0.5145, "step": 4211 }, { "epoch": 0.3952702702702703, "grad_norm": 1.2414465180955843, "learning_rate": 9.969383042894899e-06, "loss": 0.5073, "step": 4212 }, { "epoch": 0.3953641141141141, "grad_norm": 1.1755026231442334, "learning_rate": 9.969322686500185e-06, "loss": 0.5431, "step": 4213 }, { "epoch": 0.39545795795795796, "grad_norm": 1.1824857710207335, "learning_rate": 9.96926227085565e-06, "loss": 0.4796, "step": 4214 }, { "epoch": 0.39555180180180183, "grad_norm": 1.3292199753381215, "learning_rate": 9.96920179596202e-06, "loss": 0.512, "step": 4215 }, { "epoch": 0.39564564564564564, "grad_norm": 1.337054240872743, "learning_rate": 9.96914126182001e-06, "loss": 0.5223, "step": 4216 }, { "epoch": 0.3957394894894895, "grad_norm": 1.1490244463437376, "learning_rate": 9.969080668430344e-06, "loss": 0.4779, "step": 4217 }, { "epoch": 0.3958333333333333, "grad_norm": 1.1916870349698907, "learning_rate": 9.969020015793746e-06, "loss": 0.415, "step": 4218 }, { "epoch": 0.3959271771771772, "grad_norm": 1.1045628873757836, "learning_rate": 9.968959303910936e-06, "loss": 0.5168, "step": 4219 }, { "epoch": 0.39602102102102105, "grad_norm": 1.263155232380357, "learning_rate": 9.96889853278264e-06, "loss": 0.5045, "step": 4220 }, { "epoch": 0.39611486486486486, "grad_norm": 1.0813374080141471, "learning_rate": 9.968837702409581e-06, "loss": 0.4677, "step": 4221 }, { "epoch": 0.3962087087087087, "grad_norm": 1.2225419575458956, "learning_rate": 9.968776812792487e-06, "loss": 0.515, "step": 4222 }, { "epoch": 0.39630255255255253, "grad_norm": 1.2295601748087788, "learning_rate": 9.96871586393208e-06, "loss": 0.476, "step": 4223 }, { "epoch": 0.3963963963963964, "grad_norm": 1.3819975023043412, "learning_rate": 9.96865485582909e-06, "loss": 0.524, "step": 4224 }, { "epoch": 0.39649024024024027, "grad_norm": 1.2001106109642214, "learning_rate": 9.968593788484245e-06, "loss": 0.4689, "step": 4225 }, { "epoch": 0.3965840840840841, "grad_norm": 1.036988457706122, "learning_rate": 9.96853266189827e-06, "loss": 0.4342, "step": 4226 }, { "epoch": 0.39667792792792794, "grad_norm": 1.2104321261545496, "learning_rate": 9.968471476071895e-06, "loss": 0.5206, "step": 4227 }, { "epoch": 0.39677177177177175, "grad_norm": 1.327701875150805, "learning_rate": 9.96841023100585e-06, "loss": 0.5635, "step": 4228 }, { "epoch": 0.3968656156156156, "grad_norm": 1.3896835595027524, "learning_rate": 9.968348926700865e-06, "loss": 0.5258, "step": 4229 }, { "epoch": 0.3969594594594595, "grad_norm": 1.426091195444243, "learning_rate": 9.968287563157672e-06, "loss": 0.5225, "step": 4230 }, { "epoch": 0.3970533033033033, "grad_norm": 2.049520844458972, "learning_rate": 9.968226140377e-06, "loss": 0.4803, "step": 4231 }, { "epoch": 0.39714714714714716, "grad_norm": 1.2211727663856775, "learning_rate": 9.968164658359584e-06, "loss": 0.5375, "step": 4232 }, { "epoch": 0.39724099099099097, "grad_norm": 2.204724271731995, "learning_rate": 9.968103117106156e-06, "loss": 0.4863, "step": 4233 }, { "epoch": 0.39733483483483484, "grad_norm": 2.2856965321126186, "learning_rate": 9.96804151661745e-06, "loss": 0.5032, "step": 4234 }, { "epoch": 0.3974286786786787, "grad_norm": 1.1648375071712178, "learning_rate": 9.9679798568942e-06, "loss": 0.4801, "step": 4235 }, { "epoch": 0.3975225225225225, "grad_norm": 1.159535577080204, "learning_rate": 9.967918137937141e-06, "loss": 0.4782, "step": 4236 }, { "epoch": 0.3976163663663664, "grad_norm": 1.2499335065887098, "learning_rate": 9.96785635974701e-06, "loss": 0.4632, "step": 4237 }, { "epoch": 0.3977102102102102, "grad_norm": 1.2161177557323273, "learning_rate": 9.967794522324544e-06, "loss": 0.509, "step": 4238 }, { "epoch": 0.39780405405405406, "grad_norm": 1.2283077125915878, "learning_rate": 9.967732625670477e-06, "loss": 0.4927, "step": 4239 }, { "epoch": 0.3978978978978979, "grad_norm": 1.2683322945411641, "learning_rate": 9.96767066978555e-06, "loss": 0.5381, "step": 4240 }, { "epoch": 0.39799174174174173, "grad_norm": 1.053401618190575, "learning_rate": 9.967608654670502e-06, "loss": 0.4654, "step": 4241 }, { "epoch": 0.3980855855855856, "grad_norm": 1.2874280511845495, "learning_rate": 9.96754658032607e-06, "loss": 0.5144, "step": 4242 }, { "epoch": 0.3981794294294294, "grad_norm": 1.2001777885423002, "learning_rate": 9.967484446752997e-06, "loss": 0.5023, "step": 4243 }, { "epoch": 0.3982732732732733, "grad_norm": 1.235428382550118, "learning_rate": 9.967422253952022e-06, "loss": 0.5655, "step": 4244 }, { "epoch": 0.39836711711711714, "grad_norm": 1.2201949995409889, "learning_rate": 9.967360001923884e-06, "loss": 0.4989, "step": 4245 }, { "epoch": 0.39846096096096095, "grad_norm": 1.7725250687661236, "learning_rate": 9.967297690669332e-06, "loss": 0.4988, "step": 4246 }, { "epoch": 0.3985548048048048, "grad_norm": 1.0980554123230224, "learning_rate": 9.967235320189104e-06, "loss": 0.5306, "step": 4247 }, { "epoch": 0.39864864864864863, "grad_norm": 1.1802454466683434, "learning_rate": 9.967172890483942e-06, "loss": 0.5218, "step": 4248 }, { "epoch": 0.3987424924924925, "grad_norm": 1.2128718161339227, "learning_rate": 9.967110401554595e-06, "loss": 0.5372, "step": 4249 }, { "epoch": 0.39883633633633636, "grad_norm": 1.258799987590594, "learning_rate": 9.967047853401808e-06, "loss": 0.5054, "step": 4250 }, { "epoch": 0.39893018018018017, "grad_norm": 1.0644398725311472, "learning_rate": 9.966985246026322e-06, "loss": 0.4645, "step": 4251 }, { "epoch": 0.39902402402402404, "grad_norm": 1.23000887921767, "learning_rate": 9.966922579428888e-06, "loss": 0.4336, "step": 4252 }, { "epoch": 0.39911786786786785, "grad_norm": 1.6871511575798548, "learning_rate": 9.96685985361025e-06, "loss": 0.5063, "step": 4253 }, { "epoch": 0.3992117117117117, "grad_norm": 1.2672869124929034, "learning_rate": 9.966797068571159e-06, "loss": 0.5284, "step": 4254 }, { "epoch": 0.3993055555555556, "grad_norm": 1.1551586303177523, "learning_rate": 9.96673422431236e-06, "loss": 0.4605, "step": 4255 }, { "epoch": 0.3993993993993994, "grad_norm": 1.2285086841224016, "learning_rate": 9.966671320834606e-06, "loss": 0.4955, "step": 4256 }, { "epoch": 0.39949324324324326, "grad_norm": 1.0933270992711268, "learning_rate": 9.966608358138645e-06, "loss": 0.4518, "step": 4257 }, { "epoch": 0.39958708708708707, "grad_norm": 1.0440778621127496, "learning_rate": 9.966545336225227e-06, "loss": 0.4622, "step": 4258 }, { "epoch": 0.39968093093093093, "grad_norm": 1.1514256615506395, "learning_rate": 9.966482255095106e-06, "loss": 0.4784, "step": 4259 }, { "epoch": 0.3997747747747748, "grad_norm": 1.380187858122555, "learning_rate": 9.966419114749031e-06, "loss": 0.5562, "step": 4260 }, { "epoch": 0.3998686186186186, "grad_norm": 1.3094456833578807, "learning_rate": 9.966355915187758e-06, "loss": 0.4846, "step": 4261 }, { "epoch": 0.3999624624624625, "grad_norm": 1.4079542557998954, "learning_rate": 9.966292656412038e-06, "loss": 0.4728, "step": 4262 }, { "epoch": 0.4000563063063063, "grad_norm": 5.454654118588429, "learning_rate": 9.966229338422626e-06, "loss": 0.5317, "step": 4263 }, { "epoch": 0.40015015015015015, "grad_norm": 1.1008841984838895, "learning_rate": 9.966165961220277e-06, "loss": 0.5164, "step": 4264 }, { "epoch": 0.400243993993994, "grad_norm": 1.2297578979621206, "learning_rate": 9.966102524805747e-06, "loss": 0.5268, "step": 4265 }, { "epoch": 0.40033783783783783, "grad_norm": 1.438516919801444, "learning_rate": 9.966039029179793e-06, "loss": 0.5052, "step": 4266 }, { "epoch": 0.4004316816816817, "grad_norm": 1.3284156334781343, "learning_rate": 9.965975474343171e-06, "loss": 0.446, "step": 4267 }, { "epoch": 0.4005255255255255, "grad_norm": 1.600168000141991, "learning_rate": 9.965911860296637e-06, "loss": 0.531, "step": 4268 }, { "epoch": 0.40061936936936937, "grad_norm": 1.6753727245685175, "learning_rate": 9.965848187040953e-06, "loss": 0.4644, "step": 4269 }, { "epoch": 0.40071321321321324, "grad_norm": 1.224342478178752, "learning_rate": 9.965784454576877e-06, "loss": 0.4749, "step": 4270 }, { "epoch": 0.40080705705705705, "grad_norm": 1.5328576268753722, "learning_rate": 9.965720662905167e-06, "loss": 0.5437, "step": 4271 }, { "epoch": 0.4009009009009009, "grad_norm": 1.2469514873127714, "learning_rate": 9.965656812026586e-06, "loss": 0.5098, "step": 4272 }, { "epoch": 0.4009947447447447, "grad_norm": 1.1945844891116655, "learning_rate": 9.965592901941893e-06, "loss": 0.5153, "step": 4273 }, { "epoch": 0.4010885885885886, "grad_norm": 1.1070490425917194, "learning_rate": 9.965528932651854e-06, "loss": 0.4986, "step": 4274 }, { "epoch": 0.40118243243243246, "grad_norm": 1.1595571318863829, "learning_rate": 9.965464904157228e-06, "loss": 0.4801, "step": 4275 }, { "epoch": 0.40127627627627627, "grad_norm": 1.1755980016380185, "learning_rate": 9.96540081645878e-06, "loss": 0.5403, "step": 4276 }, { "epoch": 0.40137012012012013, "grad_norm": 2.726601560120434, "learning_rate": 9.965336669557272e-06, "loss": 0.531, "step": 4277 }, { "epoch": 0.40146396396396394, "grad_norm": 1.3264360330273888, "learning_rate": 9.965272463453473e-06, "loss": 0.5305, "step": 4278 }, { "epoch": 0.4015578078078078, "grad_norm": 1.1598709826028837, "learning_rate": 9.965208198148145e-06, "loss": 0.4792, "step": 4279 }, { "epoch": 0.4016516516516517, "grad_norm": 1.2060953078596224, "learning_rate": 9.965143873642053e-06, "loss": 0.5402, "step": 4280 }, { "epoch": 0.4017454954954955, "grad_norm": 1.0876478078936014, "learning_rate": 9.965079489935969e-06, "loss": 0.4984, "step": 4281 }, { "epoch": 0.40183933933933935, "grad_norm": 2.3080695034119523, "learning_rate": 9.965015047030657e-06, "loss": 0.506, "step": 4282 }, { "epoch": 0.40193318318318316, "grad_norm": 1.2019609039175434, "learning_rate": 9.964950544926887e-06, "loss": 0.5165, "step": 4283 }, { "epoch": 0.40202702702702703, "grad_norm": 3.095195186627356, "learning_rate": 9.964885983625427e-06, "loss": 0.5643, "step": 4284 }, { "epoch": 0.4021208708708709, "grad_norm": 1.2159730594259786, "learning_rate": 9.964821363127047e-06, "loss": 0.5704, "step": 4285 }, { "epoch": 0.4022147147147147, "grad_norm": 1.4959877919402271, "learning_rate": 9.964756683432516e-06, "loss": 0.5124, "step": 4286 }, { "epoch": 0.40230855855855857, "grad_norm": 1.1452811880291378, "learning_rate": 9.964691944542608e-06, "loss": 0.5158, "step": 4287 }, { "epoch": 0.4024024024024024, "grad_norm": 1.2271791072588478, "learning_rate": 9.964627146458095e-06, "loss": 0.5243, "step": 4288 }, { "epoch": 0.40249624624624625, "grad_norm": 1.1323043382926428, "learning_rate": 9.964562289179746e-06, "loss": 0.4781, "step": 4289 }, { "epoch": 0.4025900900900901, "grad_norm": 1.15282468139491, "learning_rate": 9.964497372708337e-06, "loss": 0.5193, "step": 4290 }, { "epoch": 0.4026839339339339, "grad_norm": 1.3220193593745349, "learning_rate": 9.964432397044642e-06, "loss": 0.4729, "step": 4291 }, { "epoch": 0.4027777777777778, "grad_norm": 1.189082775571802, "learning_rate": 9.964367362189435e-06, "loss": 0.4872, "step": 4292 }, { "epoch": 0.4028716216216216, "grad_norm": 1.4839054276412513, "learning_rate": 9.964302268143491e-06, "loss": 0.5318, "step": 4293 }, { "epoch": 0.40296546546546547, "grad_norm": 1.4072050166579906, "learning_rate": 9.964237114907588e-06, "loss": 0.4909, "step": 4294 }, { "epoch": 0.40305930930930933, "grad_norm": 1.2474944719805583, "learning_rate": 9.964171902482501e-06, "loss": 0.5052, "step": 4295 }, { "epoch": 0.40315315315315314, "grad_norm": 1.2064929791407049, "learning_rate": 9.964106630869009e-06, "loss": 0.5137, "step": 4296 }, { "epoch": 0.403246996996997, "grad_norm": 1.3531369656134178, "learning_rate": 9.96404130006789e-06, "loss": 0.5205, "step": 4297 }, { "epoch": 0.4033408408408408, "grad_norm": 1.282776548329594, "learning_rate": 9.96397591007992e-06, "loss": 0.4928, "step": 4298 }, { "epoch": 0.4034346846846847, "grad_norm": 1.1088816624199453, "learning_rate": 9.963910460905882e-06, "loss": 0.4607, "step": 4299 }, { "epoch": 0.40352852852852855, "grad_norm": 1.2327630984014901, "learning_rate": 9.963844952546557e-06, "loss": 0.4867, "step": 4300 }, { "epoch": 0.40362237237237236, "grad_norm": 1.3113857240237117, "learning_rate": 9.963779385002723e-06, "loss": 0.4728, "step": 4301 }, { "epoch": 0.40371621621621623, "grad_norm": 1.0268458368077813, "learning_rate": 9.963713758275161e-06, "loss": 0.4847, "step": 4302 }, { "epoch": 0.40381006006006004, "grad_norm": 1.427142332046685, "learning_rate": 9.96364807236466e-06, "loss": 0.5031, "step": 4303 }, { "epoch": 0.4039039039039039, "grad_norm": 1.3849601869319668, "learning_rate": 9.963582327271996e-06, "loss": 0.4966, "step": 4304 }, { "epoch": 0.40399774774774777, "grad_norm": 1.2487377232461074, "learning_rate": 9.963516522997956e-06, "loss": 0.538, "step": 4305 }, { "epoch": 0.4040915915915916, "grad_norm": 2.691901650926927, "learning_rate": 9.963450659543324e-06, "loss": 0.4817, "step": 4306 }, { "epoch": 0.40418543543543545, "grad_norm": 1.317758105604103, "learning_rate": 9.963384736908887e-06, "loss": 0.4918, "step": 4307 }, { "epoch": 0.40427927927927926, "grad_norm": 0.9634304311008732, "learning_rate": 9.963318755095428e-06, "loss": 0.4435, "step": 4308 }, { "epoch": 0.4043731231231231, "grad_norm": 1.9061136692109004, "learning_rate": 9.963252714103737e-06, "loss": 0.4855, "step": 4309 }, { "epoch": 0.404466966966967, "grad_norm": 1.2350177991459994, "learning_rate": 9.9631866139346e-06, "loss": 0.5146, "step": 4310 }, { "epoch": 0.4045608108108108, "grad_norm": 1.2248087097030786, "learning_rate": 9.963120454588802e-06, "loss": 0.4807, "step": 4311 }, { "epoch": 0.40465465465465467, "grad_norm": 1.1506054531451373, "learning_rate": 9.963054236067137e-06, "loss": 0.5126, "step": 4312 }, { "epoch": 0.4047484984984985, "grad_norm": 1.449798687123382, "learning_rate": 9.962987958370392e-06, "loss": 0.5494, "step": 4313 }, { "epoch": 0.40484234234234234, "grad_norm": 1.2928684332235498, "learning_rate": 9.962921621499358e-06, "loss": 0.5104, "step": 4314 }, { "epoch": 0.4049361861861862, "grad_norm": 1.207144954154509, "learning_rate": 9.962855225454823e-06, "loss": 0.431, "step": 4315 }, { "epoch": 0.40503003003003, "grad_norm": 1.2645297908550563, "learning_rate": 9.962788770237584e-06, "loss": 0.5032, "step": 4316 }, { "epoch": 0.4051238738738739, "grad_norm": 1.1839337420172815, "learning_rate": 9.96272225584843e-06, "loss": 0.5283, "step": 4317 }, { "epoch": 0.4052177177177177, "grad_norm": 1.0194296801213891, "learning_rate": 9.962655682288153e-06, "loss": 0.5115, "step": 4318 }, { "epoch": 0.40531156156156156, "grad_norm": 1.2694164662986656, "learning_rate": 9.962589049557548e-06, "loss": 0.4812, "step": 4319 }, { "epoch": 0.40540540540540543, "grad_norm": 1.0872062836918348, "learning_rate": 9.96252235765741e-06, "loss": 0.4799, "step": 4320 }, { "epoch": 0.40549924924924924, "grad_norm": 1.226144049897166, "learning_rate": 9.962455606588535e-06, "loss": 0.4663, "step": 4321 }, { "epoch": 0.4055930930930931, "grad_norm": 1.3057813523384465, "learning_rate": 9.962388796351717e-06, "loss": 0.5333, "step": 4322 }, { "epoch": 0.4056869369369369, "grad_norm": 1.1584658188595147, "learning_rate": 9.962321926947753e-06, "loss": 0.4476, "step": 4323 }, { "epoch": 0.4057807807807808, "grad_norm": 1.0961129248358468, "learning_rate": 9.962254998377442e-06, "loss": 0.4933, "step": 4324 }, { "epoch": 0.40587462462462465, "grad_norm": 1.106119451909729, "learning_rate": 9.96218801064158e-06, "loss": 0.488, "step": 4325 }, { "epoch": 0.40596846846846846, "grad_norm": 1.3369261590927184, "learning_rate": 9.962120963740966e-06, "loss": 0.4841, "step": 4326 }, { "epoch": 0.4060623123123123, "grad_norm": 1.676039545130964, "learning_rate": 9.9620538576764e-06, "loss": 0.5261, "step": 4327 }, { "epoch": 0.40615615615615613, "grad_norm": 1.205070676019884, "learning_rate": 9.961986692448681e-06, "loss": 0.4548, "step": 4328 }, { "epoch": 0.40625, "grad_norm": 1.0888697368767584, "learning_rate": 9.961919468058611e-06, "loss": 0.4178, "step": 4329 }, { "epoch": 0.40634384384384387, "grad_norm": 1.33524422605455, "learning_rate": 9.961852184506992e-06, "loss": 0.5576, "step": 4330 }, { "epoch": 0.4064376876876877, "grad_norm": 1.242379954888344, "learning_rate": 9.961784841794624e-06, "loss": 0.4892, "step": 4331 }, { "epoch": 0.40653153153153154, "grad_norm": 1.3122328176717635, "learning_rate": 9.961717439922314e-06, "loss": 0.4736, "step": 4332 }, { "epoch": 0.40662537537537535, "grad_norm": 1.346179919314903, "learning_rate": 9.961649978890861e-06, "loss": 0.5414, "step": 4333 }, { "epoch": 0.4067192192192192, "grad_norm": 1.3979234700995633, "learning_rate": 9.96158245870107e-06, "loss": 0.4735, "step": 4334 }, { "epoch": 0.4068130630630631, "grad_norm": 1.185510817549807, "learning_rate": 9.96151487935375e-06, "loss": 0.4533, "step": 4335 }, { "epoch": 0.4069069069069069, "grad_norm": 1.8200018646932667, "learning_rate": 9.961447240849703e-06, "loss": 0.4961, "step": 4336 }, { "epoch": 0.40700075075075076, "grad_norm": 1.0676872753017908, "learning_rate": 9.961379543189737e-06, "loss": 0.4813, "step": 4337 }, { "epoch": 0.40709459459459457, "grad_norm": 1.123882262744494, "learning_rate": 9.961311786374659e-06, "loss": 0.4467, "step": 4338 }, { "epoch": 0.40718843843843844, "grad_norm": 1.0805898451947111, "learning_rate": 9.961243970405276e-06, "loss": 0.4536, "step": 4339 }, { "epoch": 0.4072822822822823, "grad_norm": 1.2042685402191522, "learning_rate": 9.961176095282397e-06, "loss": 0.4954, "step": 4340 }, { "epoch": 0.4073761261261261, "grad_norm": 1.140794711916872, "learning_rate": 9.961108161006832e-06, "loss": 0.521, "step": 4341 }, { "epoch": 0.40746996996997, "grad_norm": 5.629268665125242, "learning_rate": 9.961040167579391e-06, "loss": 0.4885, "step": 4342 }, { "epoch": 0.4075638138138138, "grad_norm": 1.3293816487201404, "learning_rate": 9.960972115000884e-06, "loss": 0.485, "step": 4343 }, { "epoch": 0.40765765765765766, "grad_norm": 1.1835205131169133, "learning_rate": 9.960904003272123e-06, "loss": 0.5244, "step": 4344 }, { "epoch": 0.4077515015015015, "grad_norm": 1.211007699754388, "learning_rate": 9.960835832393918e-06, "loss": 0.5115, "step": 4345 }, { "epoch": 0.40784534534534533, "grad_norm": 1.0988489351162263, "learning_rate": 9.960767602367085e-06, "loss": 0.5057, "step": 4346 }, { "epoch": 0.4079391891891892, "grad_norm": 1.2218997743808568, "learning_rate": 9.960699313192435e-06, "loss": 0.5046, "step": 4347 }, { "epoch": 0.408033033033033, "grad_norm": 1.3340132990639841, "learning_rate": 9.960630964870783e-06, "loss": 0.4792, "step": 4348 }, { "epoch": 0.4081268768768769, "grad_norm": 1.4179511928055628, "learning_rate": 9.960562557402947e-06, "loss": 0.5428, "step": 4349 }, { "epoch": 0.40822072072072074, "grad_norm": 2.1615501967740385, "learning_rate": 9.960494090789736e-06, "loss": 0.4904, "step": 4350 }, { "epoch": 0.40831456456456455, "grad_norm": 1.4187231774427647, "learning_rate": 9.960425565031973e-06, "loss": 0.5178, "step": 4351 }, { "epoch": 0.4084084084084084, "grad_norm": 1.114290354192932, "learning_rate": 9.96035698013047e-06, "loss": 0.4931, "step": 4352 }, { "epoch": 0.40850225225225223, "grad_norm": 1.0892339986098731, "learning_rate": 9.960288336086049e-06, "loss": 0.4722, "step": 4353 }, { "epoch": 0.4085960960960961, "grad_norm": 1.0941144687824638, "learning_rate": 9.960219632899525e-06, "loss": 0.4796, "step": 4354 }, { "epoch": 0.40868993993993996, "grad_norm": 1.0916008902803558, "learning_rate": 9.960150870571718e-06, "loss": 0.4984, "step": 4355 }, { "epoch": 0.40878378378378377, "grad_norm": 1.592229069857114, "learning_rate": 9.96008204910345e-06, "loss": 0.4722, "step": 4356 }, { "epoch": 0.40887762762762764, "grad_norm": 1.0751046253755197, "learning_rate": 9.96001316849554e-06, "loss": 0.4676, "step": 4357 }, { "epoch": 0.40897147147147145, "grad_norm": 1.2806464629310004, "learning_rate": 9.959944228748808e-06, "loss": 0.5601, "step": 4358 }, { "epoch": 0.4090653153153153, "grad_norm": 1.1460042754349362, "learning_rate": 9.959875229864077e-06, "loss": 0.5502, "step": 4359 }, { "epoch": 0.4091591591591592, "grad_norm": 1.3080580698193651, "learning_rate": 9.95980617184217e-06, "loss": 0.5777, "step": 4360 }, { "epoch": 0.409253003003003, "grad_norm": 1.1771912262690727, "learning_rate": 9.959737054683912e-06, "loss": 0.5268, "step": 4361 }, { "epoch": 0.40934684684684686, "grad_norm": 1.4534321258113538, "learning_rate": 9.959667878390124e-06, "loss": 0.5403, "step": 4362 }, { "epoch": 0.40944069069069067, "grad_norm": 1.093993251418149, "learning_rate": 9.959598642961633e-06, "loss": 0.5018, "step": 4363 }, { "epoch": 0.40953453453453453, "grad_norm": 1.4334769748634066, "learning_rate": 9.959529348399262e-06, "loss": 0.4995, "step": 4364 }, { "epoch": 0.4096283783783784, "grad_norm": 1.2101221478725144, "learning_rate": 9.95945999470384e-06, "loss": 0.5049, "step": 4365 }, { "epoch": 0.4097222222222222, "grad_norm": 1.4034393858665744, "learning_rate": 9.959390581876194e-06, "loss": 0.492, "step": 4366 }, { "epoch": 0.4098160660660661, "grad_norm": 1.0555977544323658, "learning_rate": 9.959321109917149e-06, "loss": 0.5151, "step": 4367 }, { "epoch": 0.4099099099099099, "grad_norm": 1.1045728069610716, "learning_rate": 9.959251578827534e-06, "loss": 0.4397, "step": 4368 }, { "epoch": 0.41000375375375375, "grad_norm": 1.326063234466467, "learning_rate": 9.95918198860818e-06, "loss": 0.4831, "step": 4369 }, { "epoch": 0.4100975975975976, "grad_norm": 1.1343115237411543, "learning_rate": 9.959112339259915e-06, "loss": 0.4771, "step": 4370 }, { "epoch": 0.41019144144144143, "grad_norm": 1.2319964414609812, "learning_rate": 9.959042630783571e-06, "loss": 0.5069, "step": 4371 }, { "epoch": 0.4102852852852853, "grad_norm": 1.2104854249060475, "learning_rate": 9.958972863179976e-06, "loss": 0.4064, "step": 4372 }, { "epoch": 0.4103791291291291, "grad_norm": 1.2262745084286906, "learning_rate": 9.958903036449964e-06, "loss": 0.4728, "step": 4373 }, { "epoch": 0.41047297297297297, "grad_norm": 1.2478280160408597, "learning_rate": 9.95883315059437e-06, "loss": 0.5223, "step": 4374 }, { "epoch": 0.41056681681681684, "grad_norm": 1.6511728128163814, "learning_rate": 9.958763205614023e-06, "loss": 0.4807, "step": 4375 }, { "epoch": 0.41066066066066065, "grad_norm": 1.1043678022713692, "learning_rate": 9.958693201509759e-06, "loss": 0.4886, "step": 4376 }, { "epoch": 0.4107545045045045, "grad_norm": 1.237436845087592, "learning_rate": 9.958623138282412e-06, "loss": 0.496, "step": 4377 }, { "epoch": 0.4108483483483483, "grad_norm": 1.585424769435019, "learning_rate": 9.958553015932818e-06, "loss": 0.4839, "step": 4378 }, { "epoch": 0.4109421921921922, "grad_norm": 1.4035916296197035, "learning_rate": 9.958482834461813e-06, "loss": 0.4981, "step": 4379 }, { "epoch": 0.41103603603603606, "grad_norm": 1.019474585041138, "learning_rate": 9.958412593870233e-06, "loss": 0.4696, "step": 4380 }, { "epoch": 0.41112987987987987, "grad_norm": 1.4375148737650287, "learning_rate": 9.958342294158916e-06, "loss": 0.5017, "step": 4381 }, { "epoch": 0.41122372372372373, "grad_norm": 1.203427331194027, "learning_rate": 9.9582719353287e-06, "loss": 0.4045, "step": 4382 }, { "epoch": 0.41131756756756754, "grad_norm": 1.8443243252326225, "learning_rate": 9.958201517380426e-06, "loss": 0.5164, "step": 4383 }, { "epoch": 0.4114114114114114, "grad_norm": 3.0661562442656307, "learning_rate": 9.958131040314931e-06, "loss": 0.5558, "step": 4384 }, { "epoch": 0.4115052552552553, "grad_norm": 1.2579726056924987, "learning_rate": 9.958060504133056e-06, "loss": 0.4753, "step": 4385 }, { "epoch": 0.4115990990990991, "grad_norm": 1.2543698938144936, "learning_rate": 9.95798990883564e-06, "loss": 0.5309, "step": 4386 }, { "epoch": 0.41169294294294295, "grad_norm": 1.194180382477655, "learning_rate": 9.957919254423529e-06, "loss": 0.5258, "step": 4387 }, { "epoch": 0.41178678678678676, "grad_norm": 1.0673141769831829, "learning_rate": 9.957848540897564e-06, "loss": 0.4719, "step": 4388 }, { "epoch": 0.41188063063063063, "grad_norm": 1.1310505553776087, "learning_rate": 9.957777768258585e-06, "loss": 0.4564, "step": 4389 }, { "epoch": 0.4119744744744745, "grad_norm": 1.5887072924080292, "learning_rate": 9.957706936507439e-06, "loss": 0.4865, "step": 4390 }, { "epoch": 0.4120683183183183, "grad_norm": 1.5544867952470183, "learning_rate": 9.95763604564497e-06, "loss": 0.5326, "step": 4391 }, { "epoch": 0.41216216216216217, "grad_norm": 1.541591179928637, "learning_rate": 9.957565095672024e-06, "loss": 0.5524, "step": 4392 }, { "epoch": 0.412256006006006, "grad_norm": 1.4166387178334263, "learning_rate": 9.957494086589445e-06, "loss": 0.4672, "step": 4393 }, { "epoch": 0.41234984984984985, "grad_norm": 1.0894633269077327, "learning_rate": 9.957423018398081e-06, "loss": 0.482, "step": 4394 }, { "epoch": 0.4124436936936937, "grad_norm": 1.3187137202438197, "learning_rate": 9.957351891098776e-06, "loss": 0.4752, "step": 4395 }, { "epoch": 0.4125375375375375, "grad_norm": 1.2021126854910718, "learning_rate": 9.957280704692385e-06, "loss": 0.498, "step": 4396 }, { "epoch": 0.4126313813813814, "grad_norm": 1.1168755459817787, "learning_rate": 9.95720945917975e-06, "loss": 0.5176, "step": 4397 }, { "epoch": 0.4127252252252252, "grad_norm": 1.0427450045554827, "learning_rate": 9.957138154561724e-06, "loss": 0.4783, "step": 4398 }, { "epoch": 0.41281906906906907, "grad_norm": 2.2209288310615913, "learning_rate": 9.957066790839155e-06, "loss": 0.487, "step": 4399 }, { "epoch": 0.41291291291291293, "grad_norm": 1.4787813625818262, "learning_rate": 9.956995368012898e-06, "loss": 0.4812, "step": 4400 }, { "epoch": 0.41300675675675674, "grad_norm": 1.2393314517284786, "learning_rate": 9.9569238860838e-06, "loss": 0.5171, "step": 4401 }, { "epoch": 0.4131006006006006, "grad_norm": 1.1798078688575335, "learning_rate": 9.956852345052717e-06, "loss": 0.4917, "step": 4402 }, { "epoch": 0.4131944444444444, "grad_norm": 1.0135277104209044, "learning_rate": 9.956780744920498e-06, "loss": 0.4663, "step": 4403 }, { "epoch": 0.4132882882882883, "grad_norm": 1.3774568925480177, "learning_rate": 9.956709085688e-06, "loss": 0.5272, "step": 4404 }, { "epoch": 0.41338213213213215, "grad_norm": 1.198833720164949, "learning_rate": 9.956637367356075e-06, "loss": 0.4651, "step": 4405 }, { "epoch": 0.41347597597597596, "grad_norm": 1.215477794820926, "learning_rate": 9.95656558992558e-06, "loss": 0.5258, "step": 4406 }, { "epoch": 0.41356981981981983, "grad_norm": 1.1737601053755957, "learning_rate": 9.956493753397371e-06, "loss": 0.4699, "step": 4407 }, { "epoch": 0.41366366366366364, "grad_norm": 1.098531703411459, "learning_rate": 9.956421857772303e-06, "loss": 0.4802, "step": 4408 }, { "epoch": 0.4137575075075075, "grad_norm": 1.105823028357988, "learning_rate": 9.956349903051233e-06, "loss": 0.5039, "step": 4409 }, { "epoch": 0.41385135135135137, "grad_norm": 1.1888697021189467, "learning_rate": 9.956277889235019e-06, "loss": 0.5295, "step": 4410 }, { "epoch": 0.4139451951951952, "grad_norm": 1.2849620496567695, "learning_rate": 9.956205816324522e-06, "loss": 0.5505, "step": 4411 }, { "epoch": 0.41403903903903905, "grad_norm": 1.2658792228631521, "learning_rate": 9.956133684320601e-06, "loss": 0.4851, "step": 4412 }, { "epoch": 0.41413288288288286, "grad_norm": 1.4763353057974171, "learning_rate": 9.956061493224113e-06, "loss": 0.4984, "step": 4413 }, { "epoch": 0.4142267267267267, "grad_norm": 1.0882289844202269, "learning_rate": 9.95598924303592e-06, "loss": 0.5153, "step": 4414 }, { "epoch": 0.4143205705705706, "grad_norm": 1.341202329054079, "learning_rate": 9.955916933756886e-06, "loss": 0.503, "step": 4415 }, { "epoch": 0.4144144144144144, "grad_norm": 1.0862953846646088, "learning_rate": 9.95584456538787e-06, "loss": 0.4529, "step": 4416 }, { "epoch": 0.41450825825825827, "grad_norm": 1.2706795211851412, "learning_rate": 9.955772137929736e-06, "loss": 0.5042, "step": 4417 }, { "epoch": 0.4146021021021021, "grad_norm": 1.428132542669863, "learning_rate": 9.955699651383347e-06, "loss": 0.4767, "step": 4418 }, { "epoch": 0.41469594594594594, "grad_norm": 1.3385565380858007, "learning_rate": 9.955627105749569e-06, "loss": 0.5699, "step": 4419 }, { "epoch": 0.4147897897897898, "grad_norm": 1.0154696868907376, "learning_rate": 9.955554501029264e-06, "loss": 0.5077, "step": 4420 }, { "epoch": 0.4148836336336336, "grad_norm": 1.7725174688690062, "learning_rate": 9.955481837223299e-06, "loss": 0.4902, "step": 4421 }, { "epoch": 0.4149774774774775, "grad_norm": 1.3076637814782426, "learning_rate": 9.955409114332544e-06, "loss": 0.5183, "step": 4422 }, { "epoch": 0.4150713213213213, "grad_norm": 1.2740721172288705, "learning_rate": 9.95533633235786e-06, "loss": 0.4824, "step": 4423 }, { "epoch": 0.41516516516516516, "grad_norm": 1.171686512196271, "learning_rate": 9.95526349130012e-06, "loss": 0.4733, "step": 4424 }, { "epoch": 0.41525900900900903, "grad_norm": 2.014805353615335, "learning_rate": 9.95519059116019e-06, "loss": 0.4633, "step": 4425 }, { "epoch": 0.41535285285285284, "grad_norm": 1.2992600812616057, "learning_rate": 9.955117631938938e-06, "loss": 0.5387, "step": 4426 }, { "epoch": 0.4154466966966967, "grad_norm": 1.534676822843875, "learning_rate": 9.955044613637234e-06, "loss": 0.5611, "step": 4427 }, { "epoch": 0.4155405405405405, "grad_norm": 1.1044947570825798, "learning_rate": 9.954971536255954e-06, "loss": 0.4436, "step": 4428 }, { "epoch": 0.4156343843843844, "grad_norm": 1.4879444409295923, "learning_rate": 9.954898399795961e-06, "loss": 0.4995, "step": 4429 }, { "epoch": 0.41572822822822825, "grad_norm": 1.3505881973781602, "learning_rate": 9.954825204258134e-06, "loss": 0.4961, "step": 4430 }, { "epoch": 0.41582207207207206, "grad_norm": 1.582153999652439, "learning_rate": 9.954751949643343e-06, "loss": 0.4644, "step": 4431 }, { "epoch": 0.4159159159159159, "grad_norm": 1.2405001100253437, "learning_rate": 9.95467863595246e-06, "loss": 0.5173, "step": 4432 }, { "epoch": 0.41600975975975973, "grad_norm": 1.2625411355851146, "learning_rate": 9.95460526318636e-06, "loss": 0.4738, "step": 4433 }, { "epoch": 0.4161036036036036, "grad_norm": 1.847773956770853, "learning_rate": 9.95453183134592e-06, "loss": 0.5273, "step": 4434 }, { "epoch": 0.41619744744744747, "grad_norm": 2.469142272281677, "learning_rate": 9.954458340432012e-06, "loss": 0.5255, "step": 4435 }, { "epoch": 0.4162912912912913, "grad_norm": 1.2501215013365001, "learning_rate": 9.954384790445516e-06, "loss": 0.5488, "step": 4436 }, { "epoch": 0.41638513513513514, "grad_norm": 1.9099717974366854, "learning_rate": 9.954311181387305e-06, "loss": 0.4815, "step": 4437 }, { "epoch": 0.41647897897897895, "grad_norm": 1.1258899759692136, "learning_rate": 9.95423751325826e-06, "loss": 0.5412, "step": 4438 }, { "epoch": 0.4165728228228228, "grad_norm": 1.3981876238274067, "learning_rate": 9.95416378605926e-06, "loss": 0.4931, "step": 4439 }, { "epoch": 0.4166666666666667, "grad_norm": 1.1719880812773926, "learning_rate": 9.954089999791179e-06, "loss": 0.4848, "step": 4440 }, { "epoch": 0.4167605105105105, "grad_norm": 1.1926986807701363, "learning_rate": 9.9540161544549e-06, "loss": 0.4445, "step": 4441 }, { "epoch": 0.41685435435435436, "grad_norm": 1.1935500762171793, "learning_rate": 9.953942250051305e-06, "loss": 0.5367, "step": 4442 }, { "epoch": 0.41694819819819817, "grad_norm": 1.134467294647976, "learning_rate": 9.953868286581272e-06, "loss": 0.4588, "step": 4443 }, { "epoch": 0.41704204204204204, "grad_norm": 1.2853725785997392, "learning_rate": 9.953794264045685e-06, "loss": 0.4878, "step": 4444 }, { "epoch": 0.4171358858858859, "grad_norm": 1.1535100404420224, "learning_rate": 9.953720182445427e-06, "loss": 0.496, "step": 4445 }, { "epoch": 0.4172297297297297, "grad_norm": 1.2919747769189038, "learning_rate": 9.953646041781379e-06, "loss": 0.4887, "step": 4446 }, { "epoch": 0.4173235735735736, "grad_norm": 1.0714133231690413, "learning_rate": 9.953571842054426e-06, "loss": 0.5275, "step": 4447 }, { "epoch": 0.4174174174174174, "grad_norm": 1.319983115481187, "learning_rate": 9.953497583265455e-06, "loss": 0.4575, "step": 4448 }, { "epoch": 0.41751126126126126, "grad_norm": 1.0934458766517794, "learning_rate": 9.953423265415348e-06, "loss": 0.5002, "step": 4449 }, { "epoch": 0.4176051051051051, "grad_norm": 1.5491032011863566, "learning_rate": 9.953348888504991e-06, "loss": 0.5397, "step": 4450 }, { "epoch": 0.41769894894894893, "grad_norm": 1.4620934539423827, "learning_rate": 9.953274452535272e-06, "loss": 0.4917, "step": 4451 }, { "epoch": 0.4177927927927928, "grad_norm": 1.2801092598767854, "learning_rate": 9.953199957507081e-06, "loss": 0.5081, "step": 4452 }, { "epoch": 0.4178866366366366, "grad_norm": 1.223630563086364, "learning_rate": 9.953125403421304e-06, "loss": 0.5119, "step": 4453 }, { "epoch": 0.4179804804804805, "grad_norm": 1.1838058016823563, "learning_rate": 9.953050790278829e-06, "loss": 0.4874, "step": 4454 }, { "epoch": 0.41807432432432434, "grad_norm": 1.0710257762765714, "learning_rate": 9.952976118080545e-06, "loss": 0.4942, "step": 4455 }, { "epoch": 0.41816816816816815, "grad_norm": 1.143333224754414, "learning_rate": 9.952901386827347e-06, "loss": 0.5397, "step": 4456 }, { "epoch": 0.418262012012012, "grad_norm": 1.190229001853675, "learning_rate": 9.952826596520119e-06, "loss": 0.4678, "step": 4457 }, { "epoch": 0.41835585585585583, "grad_norm": 1.0855852883892, "learning_rate": 9.95275174715976e-06, "loss": 0.4442, "step": 4458 }, { "epoch": 0.4184496996996997, "grad_norm": 1.17623568706435, "learning_rate": 9.952676838747157e-06, "loss": 0.495, "step": 4459 }, { "epoch": 0.41854354354354356, "grad_norm": 1.778269371680043, "learning_rate": 9.952601871283207e-06, "loss": 0.5414, "step": 4460 }, { "epoch": 0.41863738738738737, "grad_norm": 1.4168396418387874, "learning_rate": 9.9525268447688e-06, "loss": 0.5858, "step": 4461 }, { "epoch": 0.41873123123123124, "grad_norm": 1.1005049855967985, "learning_rate": 9.952451759204834e-06, "loss": 0.4895, "step": 4462 }, { "epoch": 0.41882507507507505, "grad_norm": 2.0883340296608814, "learning_rate": 9.952376614592203e-06, "loss": 0.4365, "step": 4463 }, { "epoch": 0.4189189189189189, "grad_norm": 1.071668052951787, "learning_rate": 9.952301410931801e-06, "loss": 0.4985, "step": 4464 }, { "epoch": 0.4190127627627628, "grad_norm": 1.077320955199456, "learning_rate": 9.952226148224527e-06, "loss": 0.4518, "step": 4465 }, { "epoch": 0.4191066066066066, "grad_norm": 1.3140255701162267, "learning_rate": 9.95215082647128e-06, "loss": 0.4737, "step": 4466 }, { "epoch": 0.41920045045045046, "grad_norm": 1.2775983481718785, "learning_rate": 9.952075445672952e-06, "loss": 0.5446, "step": 4467 }, { "epoch": 0.41929429429429427, "grad_norm": 2.0652232704091484, "learning_rate": 9.952000005830449e-06, "loss": 0.506, "step": 4468 }, { "epoch": 0.41938813813813813, "grad_norm": 1.1962118803355413, "learning_rate": 9.951924506944666e-06, "loss": 0.5193, "step": 4469 }, { "epoch": 0.419481981981982, "grad_norm": 5.559255933747519, "learning_rate": 9.951848949016503e-06, "loss": 0.4674, "step": 4470 }, { "epoch": 0.4195758258258258, "grad_norm": 1.2617635202151989, "learning_rate": 9.951773332046864e-06, "loss": 0.5248, "step": 4471 }, { "epoch": 0.4196696696696697, "grad_norm": 1.648739264875015, "learning_rate": 9.951697656036647e-06, "loss": 0.4841, "step": 4472 }, { "epoch": 0.4197635135135135, "grad_norm": 1.113110143636785, "learning_rate": 9.951621920986757e-06, "loss": 0.5294, "step": 4473 }, { "epoch": 0.41985735735735735, "grad_norm": 1.3705047442479716, "learning_rate": 9.951546126898097e-06, "loss": 0.5537, "step": 4474 }, { "epoch": 0.4199512012012012, "grad_norm": 1.2948113482249153, "learning_rate": 9.951470273771568e-06, "loss": 0.4964, "step": 4475 }, { "epoch": 0.42004504504504503, "grad_norm": 2.4067555144331463, "learning_rate": 9.951394361608078e-06, "loss": 0.5244, "step": 4476 }, { "epoch": 0.4201388888888889, "grad_norm": 1.863751340911403, "learning_rate": 9.951318390408529e-06, "loss": 0.475, "step": 4477 }, { "epoch": 0.4202327327327327, "grad_norm": 1.794460960045749, "learning_rate": 9.951242360173829e-06, "loss": 0.5264, "step": 4478 }, { "epoch": 0.42032657657657657, "grad_norm": 1.1833331361848354, "learning_rate": 9.951166270904883e-06, "loss": 0.4608, "step": 4479 }, { "epoch": 0.42042042042042044, "grad_norm": 1.6830265132805997, "learning_rate": 9.9510901226026e-06, "loss": 0.4348, "step": 4480 }, { "epoch": 0.42051426426426425, "grad_norm": 3.262236871586201, "learning_rate": 9.951013915267884e-06, "loss": 0.5403, "step": 4481 }, { "epoch": 0.4206081081081081, "grad_norm": 1.531128676658614, "learning_rate": 9.950937648901649e-06, "loss": 0.4772, "step": 4482 }, { "epoch": 0.4207019519519519, "grad_norm": 1.0764373240626985, "learning_rate": 9.950861323504801e-06, "loss": 0.5004, "step": 4483 }, { "epoch": 0.4207957957957958, "grad_norm": 1.2144430135360378, "learning_rate": 9.95078493907825e-06, "loss": 0.5062, "step": 4484 }, { "epoch": 0.42088963963963966, "grad_norm": 8.057100915662971, "learning_rate": 9.950708495622907e-06, "loss": 0.4716, "step": 4485 }, { "epoch": 0.42098348348348347, "grad_norm": 1.3053955582273258, "learning_rate": 9.950631993139685e-06, "loss": 0.5482, "step": 4486 }, { "epoch": 0.42107732732732733, "grad_norm": 1.1055458888256962, "learning_rate": 9.950555431629495e-06, "loss": 0.5866, "step": 4487 }, { "epoch": 0.42117117117117114, "grad_norm": 1.4680363429872634, "learning_rate": 9.950478811093252e-06, "loss": 0.5447, "step": 4488 }, { "epoch": 0.421265015015015, "grad_norm": 1.1607779235426325, "learning_rate": 9.950402131531865e-06, "loss": 0.5199, "step": 4489 }, { "epoch": 0.4213588588588589, "grad_norm": 1.0620906520037146, "learning_rate": 9.950325392946252e-06, "loss": 0.4592, "step": 4490 }, { "epoch": 0.4214527027027027, "grad_norm": 1.196035439465285, "learning_rate": 9.950248595337325e-06, "loss": 0.4952, "step": 4491 }, { "epoch": 0.42154654654654655, "grad_norm": 1.2973262461851165, "learning_rate": 9.950171738706003e-06, "loss": 0.5419, "step": 4492 }, { "epoch": 0.42164039039039036, "grad_norm": 2.176301637450235, "learning_rate": 9.9500948230532e-06, "loss": 0.4741, "step": 4493 }, { "epoch": 0.42173423423423423, "grad_norm": 1.7521364415512952, "learning_rate": 9.950017848379834e-06, "loss": 0.4991, "step": 4494 }, { "epoch": 0.4218280780780781, "grad_norm": 1.2569922166630614, "learning_rate": 9.949940814686823e-06, "loss": 0.5064, "step": 4495 }, { "epoch": 0.4219219219219219, "grad_norm": 1.5755763810670906, "learning_rate": 9.949863721975083e-06, "loss": 0.4925, "step": 4496 }, { "epoch": 0.42201576576576577, "grad_norm": 5.339738307681667, "learning_rate": 9.949786570245537e-06, "loss": 0.5029, "step": 4497 }, { "epoch": 0.42210960960960964, "grad_norm": 1.3021936169403536, "learning_rate": 9.949709359499103e-06, "loss": 0.459, "step": 4498 }, { "epoch": 0.42220345345345345, "grad_norm": 1.4624411356436178, "learning_rate": 9.949632089736703e-06, "loss": 0.4974, "step": 4499 }, { "epoch": 0.4222972972972973, "grad_norm": 1.0955263521359138, "learning_rate": 9.949554760959254e-06, "loss": 0.4704, "step": 4500 }, { "epoch": 0.4223911411411411, "grad_norm": 4.113143910919946, "learning_rate": 9.949477373167684e-06, "loss": 0.529, "step": 4501 }, { "epoch": 0.422484984984985, "grad_norm": 1.1987295736991914, "learning_rate": 9.949399926362913e-06, "loss": 0.5067, "step": 4502 }, { "epoch": 0.42257882882882886, "grad_norm": 1.2551794855693412, "learning_rate": 9.949322420545861e-06, "loss": 0.5079, "step": 4503 }, { "epoch": 0.42267267267267267, "grad_norm": 2.076147054207353, "learning_rate": 9.949244855717457e-06, "loss": 0.47, "step": 4504 }, { "epoch": 0.42276651651651653, "grad_norm": 1.6955486910258102, "learning_rate": 9.949167231878625e-06, "loss": 0.5097, "step": 4505 }, { "epoch": 0.42286036036036034, "grad_norm": 1.3579692010307738, "learning_rate": 9.94908954903029e-06, "loss": 0.5007, "step": 4506 }, { "epoch": 0.4229542042042042, "grad_norm": 1.6210140371883826, "learning_rate": 9.949011807173377e-06, "loss": 0.4837, "step": 4507 }, { "epoch": 0.4230480480480481, "grad_norm": 1.0763814969524597, "learning_rate": 9.948934006308812e-06, "loss": 0.4691, "step": 4508 }, { "epoch": 0.4231418918918919, "grad_norm": 1.141110521588332, "learning_rate": 9.948856146437526e-06, "loss": 0.4386, "step": 4509 }, { "epoch": 0.42323573573573575, "grad_norm": 1.238421076392466, "learning_rate": 9.948778227560445e-06, "loss": 0.4754, "step": 4510 }, { "epoch": 0.42332957957957956, "grad_norm": 1.213773883743041, "learning_rate": 9.9487002496785e-06, "loss": 0.5346, "step": 4511 }, { "epoch": 0.42342342342342343, "grad_norm": 1.5619519953662655, "learning_rate": 9.948622212792618e-06, "loss": 0.5047, "step": 4512 }, { "epoch": 0.4235172672672673, "grad_norm": 1.536340981579494, "learning_rate": 9.948544116903733e-06, "loss": 0.4912, "step": 4513 }, { "epoch": 0.4236111111111111, "grad_norm": 2.099693730874737, "learning_rate": 9.948465962012773e-06, "loss": 0.4676, "step": 4514 }, { "epoch": 0.42370495495495497, "grad_norm": 1.2902914258856837, "learning_rate": 9.94838774812067e-06, "loss": 0.4754, "step": 4515 }, { "epoch": 0.4237987987987988, "grad_norm": 1.278932021834025, "learning_rate": 9.948309475228357e-06, "loss": 0.5026, "step": 4516 }, { "epoch": 0.42389264264264265, "grad_norm": 1.3031235844422238, "learning_rate": 9.948231143336771e-06, "loss": 0.5965, "step": 4517 }, { "epoch": 0.4239864864864865, "grad_norm": 1.390632739410933, "learning_rate": 9.94815275244684e-06, "loss": 0.4699, "step": 4518 }, { "epoch": 0.4240803303303303, "grad_norm": 3.429042547603802, "learning_rate": 9.948074302559502e-06, "loss": 0.493, "step": 4519 }, { "epoch": 0.4241741741741742, "grad_norm": 8.787288480556441, "learning_rate": 9.94799579367569e-06, "loss": 0.5086, "step": 4520 }, { "epoch": 0.424268018018018, "grad_norm": 1.2359471277684024, "learning_rate": 9.947917225796346e-06, "loss": 0.4546, "step": 4521 }, { "epoch": 0.42436186186186187, "grad_norm": 1.322257886722987, "learning_rate": 9.9478385989224e-06, "loss": 0.5199, "step": 4522 }, { "epoch": 0.42445570570570573, "grad_norm": 1.2139384266473077, "learning_rate": 9.947759913054792e-06, "loss": 0.4694, "step": 4523 }, { "epoch": 0.42454954954954954, "grad_norm": 1.1177656258816504, "learning_rate": 9.947681168194461e-06, "loss": 0.521, "step": 4524 }, { "epoch": 0.4246433933933934, "grad_norm": 3.7133412690129295, "learning_rate": 9.947602364342344e-06, "loss": 0.5047, "step": 4525 }, { "epoch": 0.4247372372372372, "grad_norm": 1.2690849591584568, "learning_rate": 9.947523501499382e-06, "loss": 0.5126, "step": 4526 }, { "epoch": 0.4248310810810811, "grad_norm": 1.5278221465758206, "learning_rate": 9.947444579666516e-06, "loss": 0.4885, "step": 4527 }, { "epoch": 0.42492492492492495, "grad_norm": 4.702241660619801, "learning_rate": 9.947365598844686e-06, "loss": 0.4478, "step": 4528 }, { "epoch": 0.42501876876876876, "grad_norm": 2.3806769737814792, "learning_rate": 9.947286559034832e-06, "loss": 0.5137, "step": 4529 }, { "epoch": 0.42511261261261263, "grad_norm": 1.4690560699559796, "learning_rate": 9.947207460237899e-06, "loss": 0.5631, "step": 4530 }, { "epoch": 0.42520645645645644, "grad_norm": 1.2517381668791518, "learning_rate": 9.94712830245483e-06, "loss": 0.4871, "step": 4531 }, { "epoch": 0.4253003003003003, "grad_norm": 1.121276284121526, "learning_rate": 9.947049085686567e-06, "loss": 0.4613, "step": 4532 }, { "epoch": 0.42539414414414417, "grad_norm": 1.741862800175665, "learning_rate": 9.946969809934054e-06, "loss": 0.5492, "step": 4533 }, { "epoch": 0.425487987987988, "grad_norm": 1.1988352320260607, "learning_rate": 9.94689047519824e-06, "loss": 0.4902, "step": 4534 }, { "epoch": 0.42558183183183185, "grad_norm": 1.4853359607495273, "learning_rate": 9.946811081480068e-06, "loss": 0.5004, "step": 4535 }, { "epoch": 0.42567567567567566, "grad_norm": 2.3141329073000536, "learning_rate": 9.946731628780486e-06, "loss": 0.4958, "step": 4536 }, { "epoch": 0.4257695195195195, "grad_norm": 1.3664321339773782, "learning_rate": 9.94665211710044e-06, "loss": 0.5207, "step": 4537 }, { "epoch": 0.4258633633633634, "grad_norm": 1.14514492340648, "learning_rate": 9.946572546440877e-06, "loss": 0.4694, "step": 4538 }, { "epoch": 0.4259572072072072, "grad_norm": 1.328713406492187, "learning_rate": 9.94649291680275e-06, "loss": 0.491, "step": 4539 }, { "epoch": 0.42605105105105107, "grad_norm": 1.0336733713340112, "learning_rate": 9.946413228187006e-06, "loss": 0.485, "step": 4540 }, { "epoch": 0.4261448948948949, "grad_norm": 1.1305580672533362, "learning_rate": 9.946333480594594e-06, "loss": 0.425, "step": 4541 }, { "epoch": 0.42623873873873874, "grad_norm": 1.3685255321868774, "learning_rate": 9.946253674026464e-06, "loss": 0.5013, "step": 4542 }, { "epoch": 0.4263325825825826, "grad_norm": 1.3203959993960197, "learning_rate": 9.94617380848357e-06, "loss": 0.5126, "step": 4543 }, { "epoch": 0.4264264264264264, "grad_norm": 1.0754505134555072, "learning_rate": 9.946093883966866e-06, "loss": 0.4546, "step": 4544 }, { "epoch": 0.4265202702702703, "grad_norm": 1.1788216994837202, "learning_rate": 9.9460139004773e-06, "loss": 0.4234, "step": 4545 }, { "epoch": 0.4266141141141141, "grad_norm": 1.5215933793453542, "learning_rate": 9.94593385801583e-06, "loss": 0.4701, "step": 4546 }, { "epoch": 0.42670795795795796, "grad_norm": 1.3039153563722592, "learning_rate": 9.945853756583407e-06, "loss": 0.4838, "step": 4547 }, { "epoch": 0.42680180180180183, "grad_norm": 1.890919136999122, "learning_rate": 9.945773596180987e-06, "loss": 0.4977, "step": 4548 }, { "epoch": 0.42689564564564564, "grad_norm": 1.089895234535356, "learning_rate": 9.945693376809528e-06, "loss": 0.4696, "step": 4549 }, { "epoch": 0.4269894894894895, "grad_norm": 1.510488550630606, "learning_rate": 9.945613098469985e-06, "loss": 0.4964, "step": 4550 }, { "epoch": 0.4270833333333333, "grad_norm": 1.1691783279083587, "learning_rate": 9.945532761163313e-06, "loss": 0.4712, "step": 4551 }, { "epoch": 0.4271771771771772, "grad_norm": 1.1889584771822188, "learning_rate": 9.945452364890473e-06, "loss": 0.4759, "step": 4552 }, { "epoch": 0.42727102102102105, "grad_norm": 1.0462867068222599, "learning_rate": 9.945371909652423e-06, "loss": 0.4237, "step": 4553 }, { "epoch": 0.42736486486486486, "grad_norm": 1.3880795888065065, "learning_rate": 9.945291395450121e-06, "loss": 0.5061, "step": 4554 }, { "epoch": 0.4274587087087087, "grad_norm": 1.2806254705019398, "learning_rate": 9.945210822284528e-06, "loss": 0.4597, "step": 4555 }, { "epoch": 0.42755255255255253, "grad_norm": 1.4845254423208327, "learning_rate": 9.945130190156604e-06, "loss": 0.5015, "step": 4556 }, { "epoch": 0.4276463963963964, "grad_norm": 1.1314866542291089, "learning_rate": 9.94504949906731e-06, "loss": 0.5054, "step": 4557 }, { "epoch": 0.42774024024024027, "grad_norm": 1.236887200389782, "learning_rate": 9.94496874901761e-06, "loss": 0.5514, "step": 4558 }, { "epoch": 0.4278340840840841, "grad_norm": 1.0834514123313674, "learning_rate": 9.944887940008465e-06, "loss": 0.4592, "step": 4559 }, { "epoch": 0.42792792792792794, "grad_norm": 1.089222470231765, "learning_rate": 9.944807072040839e-06, "loss": 0.5048, "step": 4560 }, { "epoch": 0.42802177177177175, "grad_norm": 1.5424110349309414, "learning_rate": 9.944726145115698e-06, "loss": 0.5061, "step": 4561 }, { "epoch": 0.4281156156156156, "grad_norm": 1.2800599152290917, "learning_rate": 9.944645159234003e-06, "loss": 0.477, "step": 4562 }, { "epoch": 0.4282094594594595, "grad_norm": 1.4937607377955453, "learning_rate": 9.944564114396722e-06, "loss": 0.5254, "step": 4563 }, { "epoch": 0.4283033033033033, "grad_norm": 1.1428289741034465, "learning_rate": 9.944483010604822e-06, "loss": 0.5061, "step": 4564 }, { "epoch": 0.42839714714714716, "grad_norm": 1.0374728803399877, "learning_rate": 9.944401847859269e-06, "loss": 0.4536, "step": 4565 }, { "epoch": 0.42849099099099097, "grad_norm": 1.1887800867715754, "learning_rate": 9.944320626161031e-06, "loss": 0.4852, "step": 4566 }, { "epoch": 0.42858483483483484, "grad_norm": 1.2627455783649981, "learning_rate": 9.944239345511075e-06, "loss": 0.5011, "step": 4567 }, { "epoch": 0.4286786786786787, "grad_norm": 1.3561033660497535, "learning_rate": 9.944158005910375e-06, "loss": 0.4461, "step": 4568 }, { "epoch": 0.4287725225225225, "grad_norm": 1.1430590756264563, "learning_rate": 9.944076607359892e-06, "loss": 0.4839, "step": 4569 }, { "epoch": 0.4288663663663664, "grad_norm": 1.0718539567902943, "learning_rate": 9.943995149860604e-06, "loss": 0.5033, "step": 4570 }, { "epoch": 0.4289602102102102, "grad_norm": 1.0035729378258325, "learning_rate": 9.943913633413481e-06, "loss": 0.4103, "step": 4571 }, { "epoch": 0.42905405405405406, "grad_norm": 1.1516762827932752, "learning_rate": 9.943832058019494e-06, "loss": 0.4177, "step": 4572 }, { "epoch": 0.4291478978978979, "grad_norm": 1.9564270303084248, "learning_rate": 9.943750423679615e-06, "loss": 0.5082, "step": 4573 }, { "epoch": 0.42924174174174173, "grad_norm": 1.1486015433828116, "learning_rate": 9.943668730394817e-06, "loss": 0.5043, "step": 4574 }, { "epoch": 0.4293355855855856, "grad_norm": 1.1686342048587057, "learning_rate": 9.943586978166076e-06, "loss": 0.4982, "step": 4575 }, { "epoch": 0.4294294294294294, "grad_norm": 1.3499420464956513, "learning_rate": 9.943505166994364e-06, "loss": 0.5011, "step": 4576 }, { "epoch": 0.4295232732732733, "grad_norm": 1.3459441530691294, "learning_rate": 9.943423296880659e-06, "loss": 0.4672, "step": 4577 }, { "epoch": 0.42961711711711714, "grad_norm": 1.2334933250598161, "learning_rate": 9.943341367825937e-06, "loss": 0.4953, "step": 4578 }, { "epoch": 0.42971096096096095, "grad_norm": 1.1939435428857306, "learning_rate": 9.943259379831173e-06, "loss": 0.4673, "step": 4579 }, { "epoch": 0.4298048048048048, "grad_norm": 1.1408819784682243, "learning_rate": 9.943177332897345e-06, "loss": 0.4907, "step": 4580 }, { "epoch": 0.42989864864864863, "grad_norm": 1.3120709979059673, "learning_rate": 9.943095227025432e-06, "loss": 0.5246, "step": 4581 }, { "epoch": 0.4299924924924925, "grad_norm": 1.1304169706198357, "learning_rate": 9.943013062216415e-06, "loss": 0.522, "step": 4582 }, { "epoch": 0.43008633633633636, "grad_norm": 1.165692049310154, "learning_rate": 9.94293083847127e-06, "loss": 0.5018, "step": 4583 }, { "epoch": 0.43018018018018017, "grad_norm": 2.096115126345307, "learning_rate": 9.942848555790979e-06, "loss": 0.5411, "step": 4584 }, { "epoch": 0.43027402402402404, "grad_norm": 7.6665972284587145, "learning_rate": 9.942766214176524e-06, "loss": 0.5872, "step": 4585 }, { "epoch": 0.43036786786786785, "grad_norm": 1.1289144855471933, "learning_rate": 9.942683813628884e-06, "loss": 0.4727, "step": 4586 }, { "epoch": 0.4304617117117117, "grad_norm": 1.217623554374186, "learning_rate": 9.942601354149044e-06, "loss": 0.4845, "step": 4587 }, { "epoch": 0.4305555555555556, "grad_norm": 1.1290164396726168, "learning_rate": 9.942518835737986e-06, "loss": 0.5194, "step": 4588 }, { "epoch": 0.4306493993993994, "grad_norm": 1.454664877462721, "learning_rate": 9.942436258396694e-06, "loss": 0.4905, "step": 4589 }, { "epoch": 0.43074324324324326, "grad_norm": 1.7031088902039722, "learning_rate": 9.942353622126153e-06, "loss": 0.4956, "step": 4590 }, { "epoch": 0.43083708708708707, "grad_norm": 1.2610799204434247, "learning_rate": 9.942270926927349e-06, "loss": 0.5385, "step": 4591 }, { "epoch": 0.43093093093093093, "grad_norm": 1.7730289540991369, "learning_rate": 9.942188172801265e-06, "loss": 0.4892, "step": 4592 }, { "epoch": 0.4310247747747748, "grad_norm": 17.367961815080616, "learning_rate": 9.942105359748892e-06, "loss": 0.4978, "step": 4593 }, { "epoch": 0.4311186186186186, "grad_norm": 1.3084204436229714, "learning_rate": 9.942022487771215e-06, "loss": 0.5392, "step": 4594 }, { "epoch": 0.4312124624624625, "grad_norm": 1.2457004910546263, "learning_rate": 9.941939556869223e-06, "loss": 0.5613, "step": 4595 }, { "epoch": 0.4313063063063063, "grad_norm": 2.2638709213015944, "learning_rate": 9.941856567043901e-06, "loss": 0.4929, "step": 4596 }, { "epoch": 0.43140015015015015, "grad_norm": 1.216873717718439, "learning_rate": 9.941773518296243e-06, "loss": 0.5202, "step": 4597 }, { "epoch": 0.431493993993994, "grad_norm": 1.3897125743518508, "learning_rate": 9.941690410627239e-06, "loss": 0.5512, "step": 4598 }, { "epoch": 0.43158783783783783, "grad_norm": 2.4931941895956284, "learning_rate": 9.941607244037877e-06, "loss": 0.5152, "step": 4599 }, { "epoch": 0.4316816816816817, "grad_norm": 1.7703575303012837, "learning_rate": 9.94152401852915e-06, "loss": 0.5322, "step": 4600 }, { "epoch": 0.4317755255255255, "grad_norm": 1.8133880449329411, "learning_rate": 9.941440734102051e-06, "loss": 0.4905, "step": 4601 }, { "epoch": 0.43186936936936937, "grad_norm": 1.09884144557152, "learning_rate": 9.941357390757573e-06, "loss": 0.4569, "step": 4602 }, { "epoch": 0.43196321321321324, "grad_norm": 1.5237123389120784, "learning_rate": 9.94127398849671e-06, "loss": 0.5294, "step": 4603 }, { "epoch": 0.43205705705705705, "grad_norm": 1.1506855675369752, "learning_rate": 9.941190527320454e-06, "loss": 0.4842, "step": 4604 }, { "epoch": 0.4321509009009009, "grad_norm": 1.1920271805070894, "learning_rate": 9.941107007229802e-06, "loss": 0.4986, "step": 4605 }, { "epoch": 0.4322447447447447, "grad_norm": 2.642599553866428, "learning_rate": 9.941023428225751e-06, "loss": 0.5004, "step": 4606 }, { "epoch": 0.4323385885885886, "grad_norm": 1.6030074633345122, "learning_rate": 9.940939790309294e-06, "loss": 0.4859, "step": 4607 }, { "epoch": 0.43243243243243246, "grad_norm": 57.14239882654569, "learning_rate": 9.940856093481432e-06, "loss": 0.5074, "step": 4608 }, { "epoch": 0.43252627627627627, "grad_norm": 1.268785021618823, "learning_rate": 9.940772337743161e-06, "loss": 0.501, "step": 4609 }, { "epoch": 0.43262012012012013, "grad_norm": 1.5590464957126224, "learning_rate": 9.94068852309548e-06, "loss": 0.4454, "step": 4610 }, { "epoch": 0.43271396396396394, "grad_norm": 1.6977287462322326, "learning_rate": 9.940604649539388e-06, "loss": 0.4875, "step": 4611 }, { "epoch": 0.4328078078078078, "grad_norm": 1.1942808024931013, "learning_rate": 9.940520717075886e-06, "loss": 0.4852, "step": 4612 }, { "epoch": 0.4329016516516517, "grad_norm": 1.244112918229235, "learning_rate": 9.940436725705973e-06, "loss": 0.5262, "step": 4613 }, { "epoch": 0.4329954954954955, "grad_norm": 1.1706607740940995, "learning_rate": 9.940352675430651e-06, "loss": 0.502, "step": 4614 }, { "epoch": 0.43308933933933935, "grad_norm": 1.1645929174725036, "learning_rate": 9.940268566250923e-06, "loss": 0.4954, "step": 4615 }, { "epoch": 0.43318318318318316, "grad_norm": 1.194593771171777, "learning_rate": 9.940184398167794e-06, "loss": 0.4593, "step": 4616 }, { "epoch": 0.43327702702702703, "grad_norm": 1.4931069263231127, "learning_rate": 9.940100171182262e-06, "loss": 0.463, "step": 4617 }, { "epoch": 0.4333708708708709, "grad_norm": 1.1845033747885712, "learning_rate": 9.940015885295335e-06, "loss": 0.48, "step": 4618 }, { "epoch": 0.4334647147147147, "grad_norm": 1.1744587183038457, "learning_rate": 9.939931540508016e-06, "loss": 0.5509, "step": 4619 }, { "epoch": 0.43355855855855857, "grad_norm": 1.556825153716627, "learning_rate": 9.939847136821314e-06, "loss": 0.5065, "step": 4620 }, { "epoch": 0.4336524024024024, "grad_norm": 1.6326264948619806, "learning_rate": 9.939762674236232e-06, "loss": 0.4945, "step": 4621 }, { "epoch": 0.43374624624624625, "grad_norm": 1.331213066091584, "learning_rate": 9.93967815275378e-06, "loss": 0.4171, "step": 4622 }, { "epoch": 0.4338400900900901, "grad_norm": 1.4491140461189072, "learning_rate": 9.939593572374962e-06, "loss": 0.5508, "step": 4623 }, { "epoch": 0.4339339339339339, "grad_norm": 1.4959530952085074, "learning_rate": 9.93950893310079e-06, "loss": 0.5384, "step": 4624 }, { "epoch": 0.4340277777777778, "grad_norm": 1.2137458960481824, "learning_rate": 9.93942423493227e-06, "loss": 0.4674, "step": 4625 }, { "epoch": 0.4341216216216216, "grad_norm": 1.1485166101651612, "learning_rate": 9.939339477870415e-06, "loss": 0.4533, "step": 4626 }, { "epoch": 0.43421546546546547, "grad_norm": 1.146402528971795, "learning_rate": 9.939254661916233e-06, "loss": 0.5086, "step": 4627 }, { "epoch": 0.43430930930930933, "grad_norm": 1.8193003567454438, "learning_rate": 9.939169787070736e-06, "loss": 0.5346, "step": 4628 }, { "epoch": 0.43440315315315314, "grad_norm": 1.1759592461977575, "learning_rate": 9.939084853334937e-06, "loss": 0.4751, "step": 4629 }, { "epoch": 0.434496996996997, "grad_norm": 1.355104325976811, "learning_rate": 9.938999860709847e-06, "loss": 0.5458, "step": 4630 }, { "epoch": 0.4345908408408408, "grad_norm": 1.1843083482779, "learning_rate": 9.938914809196482e-06, "loss": 0.4801, "step": 4631 }, { "epoch": 0.4346846846846847, "grad_norm": 1.678713647573737, "learning_rate": 9.938829698795854e-06, "loss": 0.553, "step": 4632 }, { "epoch": 0.43477852852852855, "grad_norm": 1.2381153451828086, "learning_rate": 9.93874452950898e-06, "loss": 0.498, "step": 4633 }, { "epoch": 0.43487237237237236, "grad_norm": 2.016691284626845, "learning_rate": 9.93865930133687e-06, "loss": 0.4838, "step": 4634 }, { "epoch": 0.43496621621621623, "grad_norm": 1.2325398633006084, "learning_rate": 9.938574014280546e-06, "loss": 0.4683, "step": 4635 }, { "epoch": 0.43506006006006004, "grad_norm": 1.0377882403365757, "learning_rate": 9.938488668341022e-06, "loss": 0.4421, "step": 4636 }, { "epoch": 0.4351539039039039, "grad_norm": 1.045078789491148, "learning_rate": 9.938403263519317e-06, "loss": 0.5064, "step": 4637 }, { "epoch": 0.43524774774774777, "grad_norm": 1.122677353727516, "learning_rate": 9.938317799816448e-06, "loss": 0.4813, "step": 4638 }, { "epoch": 0.4353415915915916, "grad_norm": 1.7138364780327315, "learning_rate": 9.938232277233436e-06, "loss": 0.5647, "step": 4639 }, { "epoch": 0.43543543543543545, "grad_norm": 1.8347332210433638, "learning_rate": 9.938146695771297e-06, "loss": 0.5003, "step": 4640 }, { "epoch": 0.43552927927927926, "grad_norm": 1.296662368137687, "learning_rate": 9.938061055431056e-06, "loss": 0.477, "step": 4641 }, { "epoch": 0.4356231231231231, "grad_norm": 1.13632739768961, "learning_rate": 9.937975356213731e-06, "loss": 0.4695, "step": 4642 }, { "epoch": 0.435716966966967, "grad_norm": 1.2338496625522115, "learning_rate": 9.937889598120346e-06, "loss": 0.5179, "step": 4643 }, { "epoch": 0.4358108108108108, "grad_norm": 1.046006746740647, "learning_rate": 9.93780378115192e-06, "loss": 0.4834, "step": 4644 }, { "epoch": 0.43590465465465467, "grad_norm": 1.319997634823138, "learning_rate": 9.937717905309481e-06, "loss": 0.4637, "step": 4645 }, { "epoch": 0.4359984984984985, "grad_norm": 1.371820646080141, "learning_rate": 9.937631970594049e-06, "loss": 0.5626, "step": 4646 }, { "epoch": 0.43609234234234234, "grad_norm": 1.664633955104783, "learning_rate": 9.937545977006649e-06, "loss": 0.4906, "step": 4647 }, { "epoch": 0.4361861861861862, "grad_norm": 1.167312211354837, "learning_rate": 9.937459924548307e-06, "loss": 0.4821, "step": 4648 }, { "epoch": 0.43628003003003, "grad_norm": 1.3592612517584526, "learning_rate": 9.93737381322005e-06, "loss": 0.4561, "step": 4649 }, { "epoch": 0.4363738738738739, "grad_norm": 1.0706190922605994, "learning_rate": 9.937287643022904e-06, "loss": 0.4295, "step": 4650 }, { "epoch": 0.4364677177177177, "grad_norm": 1.034800896588344, "learning_rate": 9.937201413957895e-06, "loss": 0.4319, "step": 4651 }, { "epoch": 0.43656156156156156, "grad_norm": 1.2003039973492649, "learning_rate": 9.937115126026054e-06, "loss": 0.4844, "step": 4652 }, { "epoch": 0.43665540540540543, "grad_norm": 1.161526467392938, "learning_rate": 9.937028779228409e-06, "loss": 0.5092, "step": 4653 }, { "epoch": 0.43674924924924924, "grad_norm": 1.5224159577366498, "learning_rate": 9.936942373565988e-06, "loss": 0.4922, "step": 4654 }, { "epoch": 0.4368430930930931, "grad_norm": 1.5282464396829463, "learning_rate": 9.936855909039821e-06, "loss": 0.5511, "step": 4655 }, { "epoch": 0.4369369369369369, "grad_norm": 1.1442579414526681, "learning_rate": 9.936769385650941e-06, "loss": 0.4851, "step": 4656 }, { "epoch": 0.4370307807807808, "grad_norm": 1.3487456443813304, "learning_rate": 9.936682803400378e-06, "loss": 0.461, "step": 4657 }, { "epoch": 0.43712462462462465, "grad_norm": 1.6672892045655863, "learning_rate": 9.936596162289164e-06, "loss": 0.4923, "step": 4658 }, { "epoch": 0.43721846846846846, "grad_norm": 2.485031674008092, "learning_rate": 9.936509462318335e-06, "loss": 0.4953, "step": 4659 }, { "epoch": 0.4373123123123123, "grad_norm": 1.3520224272658874, "learning_rate": 9.936422703488922e-06, "loss": 0.5574, "step": 4660 }, { "epoch": 0.43740615615615613, "grad_norm": 1.1434671998218413, "learning_rate": 9.93633588580196e-06, "loss": 0.5012, "step": 4661 }, { "epoch": 0.4375, "grad_norm": 1.6053928179196002, "learning_rate": 9.936249009258486e-06, "loss": 0.5418, "step": 4662 }, { "epoch": 0.43759384384384387, "grad_norm": 1.5454292337431692, "learning_rate": 9.93616207385953e-06, "loss": 0.5434, "step": 4663 }, { "epoch": 0.4376876876876877, "grad_norm": 1.2842193519074345, "learning_rate": 9.936075079606136e-06, "loss": 0.4698, "step": 4664 }, { "epoch": 0.43778153153153154, "grad_norm": 1.4244669145252709, "learning_rate": 9.935988026499338e-06, "loss": 0.4764, "step": 4665 }, { "epoch": 0.43787537537537535, "grad_norm": 1.1113615252040419, "learning_rate": 9.935900914540171e-06, "loss": 0.4644, "step": 4666 }, { "epoch": 0.4379692192192192, "grad_norm": 1.9681350650681328, "learning_rate": 9.93581374372968e-06, "loss": 0.553, "step": 4667 }, { "epoch": 0.4380630630630631, "grad_norm": 1.2964923388928062, "learning_rate": 9.935726514068897e-06, "loss": 0.4746, "step": 4668 }, { "epoch": 0.4381569069069069, "grad_norm": 3.165027919506945, "learning_rate": 9.935639225558869e-06, "loss": 0.5183, "step": 4669 }, { "epoch": 0.43825075075075076, "grad_norm": 1.2088897010309978, "learning_rate": 9.935551878200634e-06, "loss": 0.5062, "step": 4670 }, { "epoch": 0.43834459459459457, "grad_norm": 1.345817243095088, "learning_rate": 9.93546447199523e-06, "loss": 0.5195, "step": 4671 }, { "epoch": 0.43843843843843844, "grad_norm": 1.4352123872447224, "learning_rate": 9.935377006943704e-06, "loss": 0.5696, "step": 4672 }, { "epoch": 0.4385322822822823, "grad_norm": 1.1209824679097855, "learning_rate": 9.935289483047097e-06, "loss": 0.4966, "step": 4673 }, { "epoch": 0.4386261261261261, "grad_norm": 1.1966695224025816, "learning_rate": 9.935201900306453e-06, "loss": 0.4517, "step": 4674 }, { "epoch": 0.43871996996997, "grad_norm": 1.142446628853802, "learning_rate": 9.935114258722816e-06, "loss": 0.5306, "step": 4675 }, { "epoch": 0.4388138138138138, "grad_norm": 1.478823852521956, "learning_rate": 9.935026558297229e-06, "loss": 0.4867, "step": 4676 }, { "epoch": 0.43890765765765766, "grad_norm": 1.3216604039247803, "learning_rate": 9.93493879903074e-06, "loss": 0.4985, "step": 4677 }, { "epoch": 0.4390015015015015, "grad_norm": 1.2112392203429398, "learning_rate": 9.934850980924396e-06, "loss": 0.4515, "step": 4678 }, { "epoch": 0.43909534534534533, "grad_norm": 1.4737990362316626, "learning_rate": 9.934763103979243e-06, "loss": 0.5519, "step": 4679 }, { "epoch": 0.4391891891891892, "grad_norm": 1.1645205085476342, "learning_rate": 9.934675168196328e-06, "loss": 0.4754, "step": 4680 }, { "epoch": 0.439283033033033, "grad_norm": 1.097628190264621, "learning_rate": 9.934587173576701e-06, "loss": 0.4928, "step": 4681 }, { "epoch": 0.4393768768768769, "grad_norm": 1.2922532548627665, "learning_rate": 9.93449912012141e-06, "loss": 0.4499, "step": 4682 }, { "epoch": 0.43947072072072074, "grad_norm": 1.6425402778159366, "learning_rate": 9.934411007831507e-06, "loss": 0.4576, "step": 4683 }, { "epoch": 0.43956456456456455, "grad_norm": 1.201278093161411, "learning_rate": 9.934322836708037e-06, "loss": 0.5314, "step": 4684 }, { "epoch": 0.4396584084084084, "grad_norm": 1.1956438983880506, "learning_rate": 9.934234606752059e-06, "loss": 0.4909, "step": 4685 }, { "epoch": 0.43975225225225223, "grad_norm": 1.2260748465270348, "learning_rate": 9.934146317964618e-06, "loss": 0.4531, "step": 4686 }, { "epoch": 0.4398460960960961, "grad_norm": 1.3353916868405142, "learning_rate": 9.934057970346771e-06, "loss": 0.5566, "step": 4687 }, { "epoch": 0.43993993993993996, "grad_norm": 1.0901378301783635, "learning_rate": 9.93396956389957e-06, "loss": 0.4898, "step": 4688 }, { "epoch": 0.44003378378378377, "grad_norm": 1.13771255292699, "learning_rate": 9.933881098624069e-06, "loss": 0.5266, "step": 4689 }, { "epoch": 0.44012762762762764, "grad_norm": 1.1317277146618823, "learning_rate": 9.933792574521323e-06, "loss": 0.5012, "step": 4690 }, { "epoch": 0.44022147147147145, "grad_norm": 1.254024630133548, "learning_rate": 9.933703991592387e-06, "loss": 0.5298, "step": 4691 }, { "epoch": 0.4403153153153153, "grad_norm": 1.137331662959074, "learning_rate": 9.93361534983832e-06, "loss": 0.5062, "step": 4692 }, { "epoch": 0.4404091591591592, "grad_norm": 1.3249963268935856, "learning_rate": 9.933526649260173e-06, "loss": 0.5178, "step": 4693 }, { "epoch": 0.440503003003003, "grad_norm": 1.1792827058537967, "learning_rate": 9.93343788985901e-06, "loss": 0.4943, "step": 4694 }, { "epoch": 0.44059684684684686, "grad_norm": 1.5055576996587319, "learning_rate": 9.933349071635885e-06, "loss": 0.4861, "step": 4695 }, { "epoch": 0.44069069069069067, "grad_norm": 1.2346113916677985, "learning_rate": 9.933260194591858e-06, "loss": 0.4699, "step": 4696 }, { "epoch": 0.44078453453453453, "grad_norm": 1.1507414133862024, "learning_rate": 9.93317125872799e-06, "loss": 0.4779, "step": 4697 }, { "epoch": 0.4408783783783784, "grad_norm": 1.6257635976192615, "learning_rate": 9.933082264045339e-06, "loss": 0.4778, "step": 4698 }, { "epoch": 0.4409722222222222, "grad_norm": 1.1347114258693136, "learning_rate": 9.93299321054497e-06, "loss": 0.4935, "step": 4699 }, { "epoch": 0.4410660660660661, "grad_norm": 1.2967599081438983, "learning_rate": 9.932904098227941e-06, "loss": 0.4995, "step": 4700 }, { "epoch": 0.4411599099099099, "grad_norm": 1.1472126502878697, "learning_rate": 9.932814927095315e-06, "loss": 0.4882, "step": 4701 }, { "epoch": 0.44125375375375375, "grad_norm": 1.905654813661901, "learning_rate": 9.932725697148157e-06, "loss": 0.4649, "step": 4702 }, { "epoch": 0.4413475975975976, "grad_norm": 1.2560450758013681, "learning_rate": 9.932636408387532e-06, "loss": 0.4957, "step": 4703 }, { "epoch": 0.44144144144144143, "grad_norm": 1.2030618818376273, "learning_rate": 9.9325470608145e-06, "loss": 0.4927, "step": 4704 }, { "epoch": 0.4415352852852853, "grad_norm": 1.216420233796803, "learning_rate": 9.93245765443013e-06, "loss": 0.5233, "step": 4705 }, { "epoch": 0.4416291291291291, "grad_norm": 1.4399590762564565, "learning_rate": 9.932368189235486e-06, "loss": 0.4976, "step": 4706 }, { "epoch": 0.44172297297297297, "grad_norm": 1.0502949510884387, "learning_rate": 9.932278665231636e-06, "loss": 0.5501, "step": 4707 }, { "epoch": 0.44181681681681684, "grad_norm": 1.3212672361903792, "learning_rate": 9.932189082419646e-06, "loss": 0.5334, "step": 4708 }, { "epoch": 0.44191066066066065, "grad_norm": 1.1883450259445945, "learning_rate": 9.932099440800587e-06, "loss": 0.482, "step": 4709 }, { "epoch": 0.4420045045045045, "grad_norm": 1.3850722028758622, "learning_rate": 9.932009740375525e-06, "loss": 0.5117, "step": 4710 }, { "epoch": 0.4420983483483483, "grad_norm": 1.0671041860286692, "learning_rate": 9.93191998114553e-06, "loss": 0.501, "step": 4711 }, { "epoch": 0.4421921921921922, "grad_norm": 1.0355610311979808, "learning_rate": 9.931830163111673e-06, "loss": 0.4906, "step": 4712 }, { "epoch": 0.44228603603603606, "grad_norm": 1.1842660419996178, "learning_rate": 9.931740286275025e-06, "loss": 0.5292, "step": 4713 }, { "epoch": 0.44237987987987987, "grad_norm": 1.167003247999819, "learning_rate": 9.931650350636656e-06, "loss": 0.5307, "step": 4714 }, { "epoch": 0.44247372372372373, "grad_norm": 1.1297229676747138, "learning_rate": 9.93156035619764e-06, "loss": 0.4697, "step": 4715 }, { "epoch": 0.44256756756756754, "grad_norm": 6.432506210036424, "learning_rate": 9.93147030295905e-06, "loss": 0.5107, "step": 4716 }, { "epoch": 0.4426614114114114, "grad_norm": 1.1659580807488155, "learning_rate": 9.931380190921958e-06, "loss": 0.5015, "step": 4717 }, { "epoch": 0.4427552552552553, "grad_norm": 15.595072867806458, "learning_rate": 9.93129002008744e-06, "loss": 0.4856, "step": 4718 }, { "epoch": 0.4428490990990991, "grad_norm": 1.5096828280146832, "learning_rate": 9.931199790456572e-06, "loss": 0.5169, "step": 4719 }, { "epoch": 0.44294294294294295, "grad_norm": 1.0824564717369292, "learning_rate": 9.931109502030428e-06, "loss": 0.4935, "step": 4720 }, { "epoch": 0.44303678678678676, "grad_norm": 1.198550245837084, "learning_rate": 9.931019154810083e-06, "loss": 0.5088, "step": 4721 }, { "epoch": 0.44313063063063063, "grad_norm": 1.7617615965804314, "learning_rate": 9.930928748796617e-06, "loss": 0.509, "step": 4722 }, { "epoch": 0.4432244744744745, "grad_norm": 1.181579146823211, "learning_rate": 9.930838283991108e-06, "loss": 0.4913, "step": 4723 }, { "epoch": 0.4433183183183183, "grad_norm": 1.178995456674467, "learning_rate": 9.930747760394634e-06, "loss": 0.4879, "step": 4724 }, { "epoch": 0.44341216216216217, "grad_norm": 1.13331602978779, "learning_rate": 9.930657178008274e-06, "loss": 0.4737, "step": 4725 }, { "epoch": 0.443506006006006, "grad_norm": 1.2639909568890706, "learning_rate": 9.930566536833109e-06, "loss": 0.4395, "step": 4726 }, { "epoch": 0.44359984984984985, "grad_norm": 1.1305206858596364, "learning_rate": 9.930475836870216e-06, "loss": 0.4893, "step": 4727 }, { "epoch": 0.4436936936936937, "grad_norm": 1.2916276503851465, "learning_rate": 9.930385078120683e-06, "loss": 0.5334, "step": 4728 }, { "epoch": 0.4437875375375375, "grad_norm": 23.06813849068475, "learning_rate": 9.930294260585587e-06, "loss": 0.4861, "step": 4729 }, { "epoch": 0.4438813813813814, "grad_norm": 10.145512433977192, "learning_rate": 9.930203384266012e-06, "loss": 0.4848, "step": 4730 }, { "epoch": 0.4439752252252252, "grad_norm": 1.1593068852624275, "learning_rate": 9.93011244916304e-06, "loss": 0.4954, "step": 4731 }, { "epoch": 0.44406906906906907, "grad_norm": 1.2667397049378766, "learning_rate": 9.93002145527776e-06, "loss": 0.4906, "step": 4732 }, { "epoch": 0.44416291291291293, "grad_norm": 1.1144762535343677, "learning_rate": 9.929930402611253e-06, "loss": 0.5051, "step": 4733 }, { "epoch": 0.44425675675675674, "grad_norm": 1.33260424815252, "learning_rate": 9.929839291164605e-06, "loss": 0.52, "step": 4734 }, { "epoch": 0.4443506006006006, "grad_norm": 1.228053589569711, "learning_rate": 9.929748120938904e-06, "loss": 0.4345, "step": 4735 }, { "epoch": 0.4444444444444444, "grad_norm": 1.5250467230449958, "learning_rate": 9.929656891935235e-06, "loss": 0.524, "step": 4736 }, { "epoch": 0.4445382882882883, "grad_norm": 1.4299284518427813, "learning_rate": 9.929565604154688e-06, "loss": 0.453, "step": 4737 }, { "epoch": 0.44463213213213215, "grad_norm": 1.0903183956967228, "learning_rate": 9.929474257598348e-06, "loss": 0.4982, "step": 4738 }, { "epoch": 0.44472597597597596, "grad_norm": 1.149639012530398, "learning_rate": 9.929382852267307e-06, "loss": 0.4329, "step": 4739 }, { "epoch": 0.44481981981981983, "grad_norm": 1.1598577757152038, "learning_rate": 9.929291388162654e-06, "loss": 0.4923, "step": 4740 }, { "epoch": 0.44491366366366364, "grad_norm": 1.645110780059203, "learning_rate": 9.929199865285481e-06, "loss": 0.5004, "step": 4741 }, { "epoch": 0.4450075075075075, "grad_norm": 0.9485781175234073, "learning_rate": 9.929108283636876e-06, "loss": 0.4544, "step": 4742 }, { "epoch": 0.44510135135135137, "grad_norm": 1.3690331940343405, "learning_rate": 9.929016643217934e-06, "loss": 0.5426, "step": 4743 }, { "epoch": 0.4451951951951952, "grad_norm": 1.716547563896384, "learning_rate": 9.928924944029746e-06, "loss": 0.5111, "step": 4744 }, { "epoch": 0.44528903903903905, "grad_norm": 1.379177735541758, "learning_rate": 9.928833186073405e-06, "loss": 0.5351, "step": 4745 }, { "epoch": 0.44538288288288286, "grad_norm": 1.3330887397661617, "learning_rate": 9.928741369350008e-06, "loss": 0.515, "step": 4746 }, { "epoch": 0.4454767267267267, "grad_norm": 1.30260964166197, "learning_rate": 9.928649493860646e-06, "loss": 0.5, "step": 4747 }, { "epoch": 0.4455705705705706, "grad_norm": 1.1472507816093462, "learning_rate": 9.928557559606417e-06, "loss": 0.5275, "step": 4748 }, { "epoch": 0.4456644144144144, "grad_norm": 1.1225243549182071, "learning_rate": 9.928465566588415e-06, "loss": 0.4598, "step": 4749 }, { "epoch": 0.44575825825825827, "grad_norm": 1.1223076726759116, "learning_rate": 9.928373514807739e-06, "loss": 0.518, "step": 4750 }, { "epoch": 0.4458521021021021, "grad_norm": 1.1679075972955513, "learning_rate": 9.928281404265486e-06, "loss": 0.4343, "step": 4751 }, { "epoch": 0.44594594594594594, "grad_norm": 1.3262286537518928, "learning_rate": 9.928189234962751e-06, "loss": 0.5369, "step": 4752 }, { "epoch": 0.4460397897897898, "grad_norm": 1.1501319372515353, "learning_rate": 9.92809700690064e-06, "loss": 0.4675, "step": 4753 }, { "epoch": 0.4461336336336336, "grad_norm": 1.2394389244192534, "learning_rate": 9.928004720080243e-06, "loss": 0.4894, "step": 4754 }, { "epoch": 0.4462274774774775, "grad_norm": 2.6002221376062225, "learning_rate": 9.927912374502671e-06, "loss": 0.5476, "step": 4755 }, { "epoch": 0.4463213213213213, "grad_norm": 1.1994736983696757, "learning_rate": 9.927819970169017e-06, "loss": 0.5247, "step": 4756 }, { "epoch": 0.44641516516516516, "grad_norm": 1.3453806946365494, "learning_rate": 9.927727507080386e-06, "loss": 0.4641, "step": 4757 }, { "epoch": 0.44650900900900903, "grad_norm": 4.419063547698367, "learning_rate": 9.92763498523788e-06, "loss": 0.4823, "step": 4758 }, { "epoch": 0.44660285285285284, "grad_norm": 1.3952505070562093, "learning_rate": 9.927542404642602e-06, "loss": 0.4667, "step": 4759 }, { "epoch": 0.4466966966966967, "grad_norm": 1.1727190862431467, "learning_rate": 9.927449765295655e-06, "loss": 0.5724, "step": 4760 }, { "epoch": 0.4467905405405405, "grad_norm": 1.3747537778182441, "learning_rate": 9.927357067198148e-06, "loss": 0.4719, "step": 4761 }, { "epoch": 0.4468843843843844, "grad_norm": 1.118589846793824, "learning_rate": 9.927264310351178e-06, "loss": 0.4996, "step": 4762 }, { "epoch": 0.44697822822822825, "grad_norm": 1.135221462587827, "learning_rate": 9.927171494755859e-06, "loss": 0.5451, "step": 4763 }, { "epoch": 0.44707207207207206, "grad_norm": 1.4462857339978208, "learning_rate": 9.927078620413294e-06, "loss": 0.4739, "step": 4764 }, { "epoch": 0.4471659159159159, "grad_norm": 2.507167327113668, "learning_rate": 9.92698568732459e-06, "loss": 0.5021, "step": 4765 }, { "epoch": 0.44725975975975973, "grad_norm": 1.0519732011725877, "learning_rate": 9.926892695490855e-06, "loss": 0.5, "step": 4766 }, { "epoch": 0.4473536036036036, "grad_norm": 1.387235698992273, "learning_rate": 9.9267996449132e-06, "loss": 0.5749, "step": 4767 }, { "epoch": 0.44744744744744747, "grad_norm": 1.2851838092260155, "learning_rate": 9.926706535592731e-06, "loss": 0.5187, "step": 4768 }, { "epoch": 0.4475412912912913, "grad_norm": 1.4773595151639671, "learning_rate": 9.926613367530562e-06, "loss": 0.5245, "step": 4769 }, { "epoch": 0.44763513513513514, "grad_norm": 1.0278703561416063, "learning_rate": 9.926520140727802e-06, "loss": 0.4544, "step": 4770 }, { "epoch": 0.44772897897897895, "grad_norm": 1.0555623181024683, "learning_rate": 9.926426855185563e-06, "loss": 0.5167, "step": 4771 }, { "epoch": 0.4478228228228228, "grad_norm": 1.2903432314020722, "learning_rate": 9.926333510904955e-06, "loss": 0.4576, "step": 4772 }, { "epoch": 0.4479166666666667, "grad_norm": 1.152866784962865, "learning_rate": 9.926240107887094e-06, "loss": 0.4986, "step": 4773 }, { "epoch": 0.4480105105105105, "grad_norm": 1.1981243266091812, "learning_rate": 9.926146646133092e-06, "loss": 0.4799, "step": 4774 }, { "epoch": 0.44810435435435436, "grad_norm": 1.1836044389675549, "learning_rate": 9.926053125644064e-06, "loss": 0.5215, "step": 4775 }, { "epoch": 0.44819819819819817, "grad_norm": 1.260141139543885, "learning_rate": 9.925959546421124e-06, "loss": 0.5123, "step": 4776 }, { "epoch": 0.44829204204204204, "grad_norm": 1.0805503827000142, "learning_rate": 9.92586590846539e-06, "loss": 0.4749, "step": 4777 }, { "epoch": 0.4483858858858859, "grad_norm": 1.2203055405804062, "learning_rate": 9.925772211777977e-06, "loss": 0.5666, "step": 4778 }, { "epoch": 0.4484797297297297, "grad_norm": 1.5003062906864488, "learning_rate": 9.92567845636e-06, "loss": 0.4872, "step": 4779 }, { "epoch": 0.4485735735735736, "grad_norm": 1.018445055882507, "learning_rate": 9.925584642212581e-06, "loss": 0.4552, "step": 4780 }, { "epoch": 0.4486674174174174, "grad_norm": 1.3300247369049931, "learning_rate": 9.925490769336837e-06, "loss": 0.537, "step": 4781 }, { "epoch": 0.44876126126126126, "grad_norm": 1.2526932141981486, "learning_rate": 9.925396837733885e-06, "loss": 0.4299, "step": 4782 }, { "epoch": 0.4488551051051051, "grad_norm": 1.1117278450537635, "learning_rate": 9.925302847404847e-06, "loss": 0.4529, "step": 4783 }, { "epoch": 0.44894894894894893, "grad_norm": 1.4869689405905633, "learning_rate": 9.925208798350845e-06, "loss": 0.494, "step": 4784 }, { "epoch": 0.4490427927927928, "grad_norm": 1.4728658294861856, "learning_rate": 9.925114690572998e-06, "loss": 0.5162, "step": 4785 }, { "epoch": 0.4491366366366366, "grad_norm": 1.1764255621231061, "learning_rate": 9.925020524072429e-06, "loss": 0.4782, "step": 4786 }, { "epoch": 0.4492304804804805, "grad_norm": 1.05911556063359, "learning_rate": 9.924926298850261e-06, "loss": 0.5165, "step": 4787 }, { "epoch": 0.44932432432432434, "grad_norm": 1.1102136624135317, "learning_rate": 9.924832014907617e-06, "loss": 0.5243, "step": 4788 }, { "epoch": 0.44941816816816815, "grad_norm": 1.1702475690541885, "learning_rate": 9.924737672245621e-06, "loss": 0.4989, "step": 4789 }, { "epoch": 0.449512012012012, "grad_norm": 1.3924692721252288, "learning_rate": 9.924643270865398e-06, "loss": 0.4907, "step": 4790 }, { "epoch": 0.44960585585585583, "grad_norm": 1.3283419953362592, "learning_rate": 9.924548810768073e-06, "loss": 0.5335, "step": 4791 }, { "epoch": 0.4496996996996997, "grad_norm": 1.2149690124182264, "learning_rate": 9.924454291954775e-06, "loss": 0.4701, "step": 4792 }, { "epoch": 0.44979354354354356, "grad_norm": 1.1087784972918127, "learning_rate": 9.924359714426626e-06, "loss": 0.5098, "step": 4793 }, { "epoch": 0.44988738738738737, "grad_norm": 1.0979131227764891, "learning_rate": 9.924265078184758e-06, "loss": 0.5132, "step": 4794 }, { "epoch": 0.44998123123123124, "grad_norm": 1.2622256269135517, "learning_rate": 9.9241703832303e-06, "loss": 0.4861, "step": 4795 }, { "epoch": 0.45007507507507505, "grad_norm": 1.4801137599242349, "learning_rate": 9.924075629564376e-06, "loss": 0.484, "step": 4796 }, { "epoch": 0.4501689189189189, "grad_norm": 1.029356797316225, "learning_rate": 9.92398081718812e-06, "loss": 0.4887, "step": 4797 }, { "epoch": 0.4502627627627628, "grad_norm": 1.3715540879446133, "learning_rate": 9.92388594610266e-06, "loss": 0.4993, "step": 4798 }, { "epoch": 0.4503566066066066, "grad_norm": 1.3922650373642234, "learning_rate": 9.92379101630913e-06, "loss": 0.5271, "step": 4799 }, { "epoch": 0.45045045045045046, "grad_norm": 1.3805914347334791, "learning_rate": 9.92369602780866e-06, "loss": 0.528, "step": 4800 }, { "epoch": 0.45054429429429427, "grad_norm": 1.0770697105838227, "learning_rate": 9.923600980602382e-06, "loss": 0.526, "step": 4801 }, { "epoch": 0.45063813813813813, "grad_norm": 1.4933732395600974, "learning_rate": 9.92350587469143e-06, "loss": 0.4876, "step": 4802 }, { "epoch": 0.450731981981982, "grad_norm": 1.3545709102085028, "learning_rate": 9.92341071007694e-06, "loss": 0.4544, "step": 4803 }, { "epoch": 0.4508258258258258, "grad_norm": 1.6306880803139612, "learning_rate": 9.923315486760043e-06, "loss": 0.5067, "step": 4804 }, { "epoch": 0.4509196696696697, "grad_norm": 1.092466924182582, "learning_rate": 9.923220204741875e-06, "loss": 0.4743, "step": 4805 }, { "epoch": 0.4510135135135135, "grad_norm": 1.1612545440961524, "learning_rate": 9.923124864023575e-06, "loss": 0.441, "step": 4806 }, { "epoch": 0.45110735735735735, "grad_norm": 1.170042366818755, "learning_rate": 9.923029464606276e-06, "loss": 0.5375, "step": 4807 }, { "epoch": 0.4512012012012012, "grad_norm": 1.3248341909917283, "learning_rate": 9.922934006491118e-06, "loss": 0.4932, "step": 4808 }, { "epoch": 0.45129504504504503, "grad_norm": 1.194556560345009, "learning_rate": 9.92283848967924e-06, "loss": 0.526, "step": 4809 }, { "epoch": 0.4513888888888889, "grad_norm": 1.2165494979208584, "learning_rate": 9.922742914171776e-06, "loss": 0.5063, "step": 4810 }, { "epoch": 0.4514827327327327, "grad_norm": 1.2537487615990857, "learning_rate": 9.922647279969872e-06, "loss": 0.5493, "step": 4811 }, { "epoch": 0.45157657657657657, "grad_norm": 1.4627888358210717, "learning_rate": 9.922551587074665e-06, "loss": 0.5443, "step": 4812 }, { "epoch": 0.45167042042042044, "grad_norm": 1.37430381939393, "learning_rate": 9.922455835487294e-06, "loss": 0.4883, "step": 4813 }, { "epoch": 0.45176426426426425, "grad_norm": 1.2061909195895986, "learning_rate": 9.922360025208904e-06, "loss": 0.5267, "step": 4814 }, { "epoch": 0.4518581081081081, "grad_norm": 1.6181602999820797, "learning_rate": 9.922264156240636e-06, "loss": 0.5314, "step": 4815 }, { "epoch": 0.4519519519519519, "grad_norm": 1.1565886130359846, "learning_rate": 9.922168228583634e-06, "loss": 0.4497, "step": 4816 }, { "epoch": 0.4520457957957958, "grad_norm": 1.1255369741215058, "learning_rate": 9.922072242239039e-06, "loss": 0.5084, "step": 4817 }, { "epoch": 0.45213963963963966, "grad_norm": 0.999793951540297, "learning_rate": 9.921976197207998e-06, "loss": 0.4497, "step": 4818 }, { "epoch": 0.45223348348348347, "grad_norm": 1.1781950223050153, "learning_rate": 9.921880093491656e-06, "loss": 0.4952, "step": 4819 }, { "epoch": 0.45232732732732733, "grad_norm": 1.0444518701176113, "learning_rate": 9.921783931091158e-06, "loss": 0.4953, "step": 4820 }, { "epoch": 0.45242117117117114, "grad_norm": 1.1365273626033556, "learning_rate": 9.92168771000765e-06, "loss": 0.4977, "step": 4821 }, { "epoch": 0.452515015015015, "grad_norm": 1.2945475380140261, "learning_rate": 9.921591430242282e-06, "loss": 0.4872, "step": 4822 }, { "epoch": 0.4526088588588589, "grad_norm": 1.0988521815476564, "learning_rate": 9.921495091796199e-06, "loss": 0.4732, "step": 4823 }, { "epoch": 0.4527027027027027, "grad_norm": 2.1123936612426473, "learning_rate": 9.921398694670552e-06, "loss": 0.5612, "step": 4824 }, { "epoch": 0.45279654654654655, "grad_norm": 1.0361603585537835, "learning_rate": 9.921302238866488e-06, "loss": 0.483, "step": 4825 }, { "epoch": 0.45289039039039036, "grad_norm": 1.0867154939166888, "learning_rate": 9.92120572438516e-06, "loss": 0.4518, "step": 4826 }, { "epoch": 0.45298423423423423, "grad_norm": 1.4174590140009415, "learning_rate": 9.921109151227714e-06, "loss": 0.4706, "step": 4827 }, { "epoch": 0.4530780780780781, "grad_norm": 1.0133414271803594, "learning_rate": 9.921012519395306e-06, "loss": 0.4705, "step": 4828 }, { "epoch": 0.4531719219219219, "grad_norm": 2.3158809121376245, "learning_rate": 9.920915828889086e-06, "loss": 0.498, "step": 4829 }, { "epoch": 0.45326576576576577, "grad_norm": 1.4575432798386225, "learning_rate": 9.920819079710209e-06, "loss": 0.4821, "step": 4830 }, { "epoch": 0.45335960960960964, "grad_norm": 1.0824189029807039, "learning_rate": 9.920722271859825e-06, "loss": 0.5028, "step": 4831 }, { "epoch": 0.45345345345345345, "grad_norm": 1.1746075121547193, "learning_rate": 9.92062540533909e-06, "loss": 0.4729, "step": 4832 }, { "epoch": 0.4535472972972973, "grad_norm": 1.5237175823814908, "learning_rate": 9.92052848014916e-06, "loss": 0.5364, "step": 4833 }, { "epoch": 0.4536411411411411, "grad_norm": 1.000066809652714, "learning_rate": 9.920431496291192e-06, "loss": 0.4984, "step": 4834 }, { "epoch": 0.453734984984985, "grad_norm": 1.1713752984959038, "learning_rate": 9.920334453766338e-06, "loss": 0.4849, "step": 4835 }, { "epoch": 0.45382882882882886, "grad_norm": 1.045473649163636, "learning_rate": 9.920237352575757e-06, "loss": 0.477, "step": 4836 }, { "epoch": 0.45392267267267267, "grad_norm": 1.0952507792851247, "learning_rate": 9.920140192720608e-06, "loss": 0.4631, "step": 4837 }, { "epoch": 0.45401651651651653, "grad_norm": 1.1262055428640207, "learning_rate": 9.920042974202047e-06, "loss": 0.5369, "step": 4838 }, { "epoch": 0.45411036036036034, "grad_norm": 1.2020232093979746, "learning_rate": 9.919945697021237e-06, "loss": 0.4769, "step": 4839 }, { "epoch": 0.4542042042042042, "grad_norm": 1.2038582617448006, "learning_rate": 9.919848361179332e-06, "loss": 0.5431, "step": 4840 }, { "epoch": 0.4542980480480481, "grad_norm": 1.5535403034054984, "learning_rate": 9.919750966677497e-06, "loss": 0.5031, "step": 4841 }, { "epoch": 0.4543918918918919, "grad_norm": 1.1000497971194958, "learning_rate": 9.919653513516893e-06, "loss": 0.4288, "step": 4842 }, { "epoch": 0.45448573573573575, "grad_norm": 1.2604154712402937, "learning_rate": 9.919556001698683e-06, "loss": 0.5064, "step": 4843 }, { "epoch": 0.45457957957957956, "grad_norm": 1.2979308668408633, "learning_rate": 9.919458431224025e-06, "loss": 0.5656, "step": 4844 }, { "epoch": 0.45467342342342343, "grad_norm": 1.2613181364378823, "learning_rate": 9.919360802094086e-06, "loss": 0.5417, "step": 4845 }, { "epoch": 0.4547672672672673, "grad_norm": 1.1067420282566314, "learning_rate": 9.91926311431003e-06, "loss": 0.5524, "step": 4846 }, { "epoch": 0.4548611111111111, "grad_norm": 1.0962463282142305, "learning_rate": 9.919165367873018e-06, "loss": 0.428, "step": 4847 }, { "epoch": 0.45495495495495497, "grad_norm": 1.0390866539260717, "learning_rate": 9.91906756278422e-06, "loss": 0.5071, "step": 4848 }, { "epoch": 0.4550487987987988, "grad_norm": 1.4322002473556064, "learning_rate": 9.918969699044802e-06, "loss": 0.5473, "step": 4849 }, { "epoch": 0.45514264264264265, "grad_norm": 1.57889780270082, "learning_rate": 9.918871776655929e-06, "loss": 0.5382, "step": 4850 }, { "epoch": 0.4552364864864865, "grad_norm": 1.0931249184248155, "learning_rate": 9.918773795618767e-06, "loss": 0.4621, "step": 4851 }, { "epoch": 0.4553303303303303, "grad_norm": 1.1350282859818062, "learning_rate": 9.918675755934488e-06, "loss": 0.4991, "step": 4852 }, { "epoch": 0.4554241741741742, "grad_norm": 1.0650674964900688, "learning_rate": 9.918577657604258e-06, "loss": 0.4626, "step": 4853 }, { "epoch": 0.455518018018018, "grad_norm": 1.095551384819439, "learning_rate": 9.91847950062925e-06, "loss": 0.4835, "step": 4854 }, { "epoch": 0.45561186186186187, "grad_norm": 1.203544319641006, "learning_rate": 9.918381285010628e-06, "loss": 0.4747, "step": 4855 }, { "epoch": 0.45570570570570573, "grad_norm": 1.2079316636343964, "learning_rate": 9.91828301074957e-06, "loss": 0.5128, "step": 4856 }, { "epoch": 0.45579954954954954, "grad_norm": 1.0927677393599808, "learning_rate": 9.918184677847244e-06, "loss": 0.5183, "step": 4857 }, { "epoch": 0.4558933933933934, "grad_norm": 1.1145937121973803, "learning_rate": 9.918086286304823e-06, "loss": 0.4612, "step": 4858 }, { "epoch": 0.4559872372372372, "grad_norm": 1.21061299020437, "learning_rate": 9.917987836123481e-06, "loss": 0.4879, "step": 4859 }, { "epoch": 0.4560810810810811, "grad_norm": 1.2378524606239, "learning_rate": 9.91788932730439e-06, "loss": 0.5115, "step": 4860 }, { "epoch": 0.45617492492492495, "grad_norm": 1.3359155115059633, "learning_rate": 9.917790759848728e-06, "loss": 0.4745, "step": 4861 }, { "epoch": 0.45626876876876876, "grad_norm": 1.083052157493556, "learning_rate": 9.917692133757666e-06, "loss": 0.4206, "step": 4862 }, { "epoch": 0.45636261261261263, "grad_norm": 1.3266491361629063, "learning_rate": 9.917593449032383e-06, "loss": 0.5204, "step": 4863 }, { "epoch": 0.45645645645645644, "grad_norm": 1.1331606162599965, "learning_rate": 9.917494705674053e-06, "loss": 0.5071, "step": 4864 }, { "epoch": 0.4565503003003003, "grad_norm": 1.2063049072233865, "learning_rate": 9.917395903683856e-06, "loss": 0.5134, "step": 4865 }, { "epoch": 0.45664414414414417, "grad_norm": 2.1882137726739073, "learning_rate": 9.91729704306297e-06, "loss": 0.5212, "step": 4866 }, { "epoch": 0.456737987987988, "grad_norm": 1.155715369853059, "learning_rate": 9.91719812381257e-06, "loss": 0.4855, "step": 4867 }, { "epoch": 0.45683183183183185, "grad_norm": 1.2847026360231495, "learning_rate": 9.917099145933839e-06, "loss": 0.4771, "step": 4868 }, { "epoch": 0.45692567567567566, "grad_norm": 1.034942964019176, "learning_rate": 9.917000109427958e-06, "loss": 0.4578, "step": 4869 }, { "epoch": 0.4570195195195195, "grad_norm": 1.1310536157127282, "learning_rate": 9.916901014296104e-06, "loss": 0.5391, "step": 4870 }, { "epoch": 0.4571133633633634, "grad_norm": 2.2714987187405633, "learning_rate": 9.916801860539463e-06, "loss": 0.4944, "step": 4871 }, { "epoch": 0.4572072072072072, "grad_norm": 0.9948070444274109, "learning_rate": 9.916702648159211e-06, "loss": 0.4885, "step": 4872 }, { "epoch": 0.45730105105105107, "grad_norm": 0.9206730957045522, "learning_rate": 9.916603377156537e-06, "loss": 0.4428, "step": 4873 }, { "epoch": 0.4573948948948949, "grad_norm": 1.4775801833596482, "learning_rate": 9.916504047532623e-06, "loss": 0.5375, "step": 4874 }, { "epoch": 0.45748873873873874, "grad_norm": 1.241360546340711, "learning_rate": 9.91640465928865e-06, "loss": 0.4458, "step": 4875 }, { "epoch": 0.4575825825825826, "grad_norm": 1.0445561057814714, "learning_rate": 9.916305212425808e-06, "loss": 0.4705, "step": 4876 }, { "epoch": 0.4576764264264264, "grad_norm": 1.0981547473054087, "learning_rate": 9.916205706945279e-06, "loss": 0.5113, "step": 4877 }, { "epoch": 0.4577702702702703, "grad_norm": 1.3440309212165122, "learning_rate": 9.91610614284825e-06, "loss": 0.4782, "step": 4878 }, { "epoch": 0.4578641141141141, "grad_norm": 0.998071774830902, "learning_rate": 9.91600652013591e-06, "loss": 0.4597, "step": 4879 }, { "epoch": 0.45795795795795796, "grad_norm": 1.239099445803809, "learning_rate": 9.915906838809446e-06, "loss": 0.4619, "step": 4880 }, { "epoch": 0.45805180180180183, "grad_norm": 1.0800205414182078, "learning_rate": 9.915807098870048e-06, "loss": 0.4679, "step": 4881 }, { "epoch": 0.45814564564564564, "grad_norm": 1.2751068606506888, "learning_rate": 9.915707300318899e-06, "loss": 0.4701, "step": 4882 }, { "epoch": 0.4582394894894895, "grad_norm": 1.0552701309381936, "learning_rate": 9.915607443157195e-06, "loss": 0.496, "step": 4883 }, { "epoch": 0.4583333333333333, "grad_norm": 1.1597472897486438, "learning_rate": 9.915507527386126e-06, "loss": 0.4894, "step": 4884 }, { "epoch": 0.4584271771771772, "grad_norm": 1.4338330035673426, "learning_rate": 9.915407553006884e-06, "loss": 0.5018, "step": 4885 }, { "epoch": 0.45852102102102105, "grad_norm": 1.1357014850535965, "learning_rate": 9.915307520020656e-06, "loss": 0.5194, "step": 4886 }, { "epoch": 0.45861486486486486, "grad_norm": 1.477914804404461, "learning_rate": 9.91520742842864e-06, "loss": 0.5063, "step": 4887 }, { "epoch": 0.4587087087087087, "grad_norm": 1.0271826195679241, "learning_rate": 9.915107278232024e-06, "loss": 0.4529, "step": 4888 }, { "epoch": 0.45880255255255253, "grad_norm": 1.259281758531371, "learning_rate": 9.915007069432007e-06, "loss": 0.5299, "step": 4889 }, { "epoch": 0.4588963963963964, "grad_norm": 1.1634192250301094, "learning_rate": 9.914906802029784e-06, "loss": 0.4857, "step": 4890 }, { "epoch": 0.45899024024024027, "grad_norm": 2.735776914961144, "learning_rate": 9.914806476026548e-06, "loss": 0.4965, "step": 4891 }, { "epoch": 0.4590840840840841, "grad_norm": 1.1990213851123812, "learning_rate": 9.914706091423496e-06, "loss": 0.5038, "step": 4892 }, { "epoch": 0.45917792792792794, "grad_norm": 1.567828629082375, "learning_rate": 9.914605648221824e-06, "loss": 0.4823, "step": 4893 }, { "epoch": 0.45927177177177175, "grad_norm": 1.0734722083607369, "learning_rate": 9.91450514642273e-06, "loss": 0.4997, "step": 4894 }, { "epoch": 0.4593656156156156, "grad_norm": 1.0740289053099998, "learning_rate": 9.914404586027416e-06, "loss": 0.5132, "step": 4895 }, { "epoch": 0.4594594594594595, "grad_norm": 2.234506728569927, "learning_rate": 9.914303967037074e-06, "loss": 0.5316, "step": 4896 }, { "epoch": 0.4595533033033033, "grad_norm": 1.0657371085666116, "learning_rate": 9.914203289452909e-06, "loss": 0.4528, "step": 4897 }, { "epoch": 0.45964714714714716, "grad_norm": 1.2239224276931344, "learning_rate": 9.91410255327612e-06, "loss": 0.5084, "step": 4898 }, { "epoch": 0.45974099099099097, "grad_norm": 1.0749723404149474, "learning_rate": 9.914001758507907e-06, "loss": 0.4724, "step": 4899 }, { "epoch": 0.45983483483483484, "grad_norm": 1.183745608267018, "learning_rate": 9.913900905149474e-06, "loss": 0.5298, "step": 4900 }, { "epoch": 0.4599286786786787, "grad_norm": 1.021891751130581, "learning_rate": 9.91379999320202e-06, "loss": 0.5366, "step": 4901 }, { "epoch": 0.4600225225225225, "grad_norm": 1.082816662180796, "learning_rate": 9.913699022666751e-06, "loss": 0.4594, "step": 4902 }, { "epoch": 0.4601163663663664, "grad_norm": 2.1675998629811084, "learning_rate": 9.913597993544873e-06, "loss": 0.4762, "step": 4903 }, { "epoch": 0.4602102102102102, "grad_norm": 1.4259162327225825, "learning_rate": 9.913496905837584e-06, "loss": 0.4742, "step": 4904 }, { "epoch": 0.46030405405405406, "grad_norm": 1.0817235234536888, "learning_rate": 9.913395759546096e-06, "loss": 0.4524, "step": 4905 }, { "epoch": 0.4603978978978979, "grad_norm": 1.2887070273908432, "learning_rate": 9.913294554671612e-06, "loss": 0.4683, "step": 4906 }, { "epoch": 0.46049174174174173, "grad_norm": 1.3904058442414076, "learning_rate": 9.913193291215338e-06, "loss": 0.4613, "step": 4907 }, { "epoch": 0.4605855855855856, "grad_norm": 1.4112440769782428, "learning_rate": 9.913091969178482e-06, "loss": 0.4559, "step": 4908 }, { "epoch": 0.4606794294294294, "grad_norm": 1.040089183899052, "learning_rate": 9.91299058856225e-06, "loss": 0.4736, "step": 4909 }, { "epoch": 0.4607732732732733, "grad_norm": 1.1746877668364777, "learning_rate": 9.912889149367856e-06, "loss": 0.4959, "step": 4910 }, { "epoch": 0.46086711711711714, "grad_norm": 1.419242841540361, "learning_rate": 9.912787651596506e-06, "loss": 0.5309, "step": 4911 }, { "epoch": 0.46096096096096095, "grad_norm": 1.6095949088114239, "learning_rate": 9.91268609524941e-06, "loss": 0.4941, "step": 4912 }, { "epoch": 0.4610548048048048, "grad_norm": 1.4203679484471676, "learning_rate": 9.91258448032778e-06, "loss": 0.4993, "step": 4913 }, { "epoch": 0.46114864864864863, "grad_norm": 1.1513951320079108, "learning_rate": 9.912482806832826e-06, "loss": 0.4754, "step": 4914 }, { "epoch": 0.4612424924924925, "grad_norm": 1.529438412004348, "learning_rate": 9.912381074765762e-06, "loss": 0.4731, "step": 4915 }, { "epoch": 0.46133633633633636, "grad_norm": 1.3170571083475289, "learning_rate": 9.912279284127801e-06, "loss": 0.5262, "step": 4916 }, { "epoch": 0.46143018018018017, "grad_norm": 1.4004860903211929, "learning_rate": 9.912177434920155e-06, "loss": 0.5288, "step": 4917 }, { "epoch": 0.46152402402402404, "grad_norm": 1.4298188950765216, "learning_rate": 9.912075527144039e-06, "loss": 0.4875, "step": 4918 }, { "epoch": 0.46161786786786785, "grad_norm": 1.251041095132186, "learning_rate": 9.911973560800669e-06, "loss": 0.5481, "step": 4919 }, { "epoch": 0.4617117117117117, "grad_norm": 1.166072103793532, "learning_rate": 9.91187153589126e-06, "loss": 0.541, "step": 4920 }, { "epoch": 0.4618055555555556, "grad_norm": 1.6550256910261558, "learning_rate": 9.911769452417028e-06, "loss": 0.4923, "step": 4921 }, { "epoch": 0.4618993993993994, "grad_norm": 1.9490223596656115, "learning_rate": 9.911667310379191e-06, "loss": 0.5381, "step": 4922 }, { "epoch": 0.46199324324324326, "grad_norm": 1.124733723108233, "learning_rate": 9.91156510977897e-06, "loss": 0.5023, "step": 4923 }, { "epoch": 0.46208708708708707, "grad_norm": 0.9290769675810383, "learning_rate": 9.911462850617575e-06, "loss": 0.4626, "step": 4924 }, { "epoch": 0.46218093093093093, "grad_norm": 1.0782008770698979, "learning_rate": 9.911360532896232e-06, "loss": 0.4758, "step": 4925 }, { "epoch": 0.4622747747747748, "grad_norm": 1.0864504545596787, "learning_rate": 9.911258156616161e-06, "loss": 0.525, "step": 4926 }, { "epoch": 0.4623686186186186, "grad_norm": 1.673099836909472, "learning_rate": 9.91115572177858e-06, "loss": 0.467, "step": 4927 }, { "epoch": 0.4624624624624625, "grad_norm": 1.126316872541012, "learning_rate": 9.91105322838471e-06, "loss": 0.4573, "step": 4928 }, { "epoch": 0.4625563063063063, "grad_norm": 1.1444388249207091, "learning_rate": 9.910950676435775e-06, "loss": 0.5265, "step": 4929 }, { "epoch": 0.46265015015015015, "grad_norm": 1.0390103955494208, "learning_rate": 9.910848065932999e-06, "loss": 0.5499, "step": 4930 }, { "epoch": 0.462743993993994, "grad_norm": 1.2359557667977559, "learning_rate": 9.910745396877602e-06, "loss": 0.4532, "step": 4931 }, { "epoch": 0.46283783783783783, "grad_norm": 1.0906310860410298, "learning_rate": 9.91064266927081e-06, "loss": 0.4625, "step": 4932 }, { "epoch": 0.4629316816816817, "grad_norm": 1.0198979631777347, "learning_rate": 9.910539883113848e-06, "loss": 0.5116, "step": 4933 }, { "epoch": 0.4630255255255255, "grad_norm": 1.2527516916374706, "learning_rate": 9.91043703840794e-06, "loss": 0.4959, "step": 4934 }, { "epoch": 0.46311936936936937, "grad_norm": 1.0118900805364932, "learning_rate": 9.910334135154314e-06, "loss": 0.4772, "step": 4935 }, { "epoch": 0.46321321321321324, "grad_norm": 1.180163552685306, "learning_rate": 9.910231173354195e-06, "loss": 0.5159, "step": 4936 }, { "epoch": 0.46330705705705705, "grad_norm": 1.3331276365586304, "learning_rate": 9.910128153008813e-06, "loss": 0.5332, "step": 4937 }, { "epoch": 0.4634009009009009, "grad_norm": 1.1130996719394448, "learning_rate": 9.910025074119394e-06, "loss": 0.4604, "step": 4938 }, { "epoch": 0.4634947447447447, "grad_norm": 1.1937241681803192, "learning_rate": 9.909921936687169e-06, "loss": 0.4352, "step": 4939 }, { "epoch": 0.4635885885885886, "grad_norm": 1.215496656010028, "learning_rate": 9.909818740713365e-06, "loss": 0.5131, "step": 4940 }, { "epoch": 0.46368243243243246, "grad_norm": 1.4192124094570913, "learning_rate": 9.909715486199218e-06, "loss": 0.5409, "step": 4941 }, { "epoch": 0.46377627627627627, "grad_norm": 1.400312645682826, "learning_rate": 9.909612173145954e-06, "loss": 0.5398, "step": 4942 }, { "epoch": 0.46387012012012013, "grad_norm": 1.0820068307675559, "learning_rate": 9.909508801554806e-06, "loss": 0.4084, "step": 4943 }, { "epoch": 0.46396396396396394, "grad_norm": 1.1739505360464875, "learning_rate": 9.909405371427006e-06, "loss": 0.5333, "step": 4944 }, { "epoch": 0.4640578078078078, "grad_norm": 1.2655605745469216, "learning_rate": 9.909301882763788e-06, "loss": 0.4966, "step": 4945 }, { "epoch": 0.4641516516516517, "grad_norm": 1.2061873273959234, "learning_rate": 9.909198335566388e-06, "loss": 0.4552, "step": 4946 }, { "epoch": 0.4642454954954955, "grad_norm": 1.1057105926005313, "learning_rate": 9.909094729836035e-06, "loss": 0.4969, "step": 4947 }, { "epoch": 0.46433933933933935, "grad_norm": 1.1653204324042405, "learning_rate": 9.90899106557397e-06, "loss": 0.4732, "step": 4948 }, { "epoch": 0.46443318318318316, "grad_norm": 1.228097688571737, "learning_rate": 9.908887342781425e-06, "loss": 0.4872, "step": 4949 }, { "epoch": 0.46452702702702703, "grad_norm": 1.2286445728088493, "learning_rate": 9.90878356145964e-06, "loss": 0.4626, "step": 4950 }, { "epoch": 0.4646208708708709, "grad_norm": 1.3316517319897587, "learning_rate": 9.908679721609851e-06, "loss": 0.4869, "step": 4951 }, { "epoch": 0.4647147147147147, "grad_norm": 1.3555018126550775, "learning_rate": 9.908575823233297e-06, "loss": 0.5413, "step": 4952 }, { "epoch": 0.46480855855855857, "grad_norm": 1.2609421835406922, "learning_rate": 9.908471866331213e-06, "loss": 0.4802, "step": 4953 }, { "epoch": 0.4649024024024024, "grad_norm": 1.4056875709066967, "learning_rate": 9.908367850904843e-06, "loss": 0.5158, "step": 4954 }, { "epoch": 0.46499624624624625, "grad_norm": 1.7272513468964272, "learning_rate": 9.908263776955425e-06, "loss": 0.4989, "step": 4955 }, { "epoch": 0.4650900900900901, "grad_norm": 1.4050041681262648, "learning_rate": 9.9081596444842e-06, "loss": 0.5028, "step": 4956 }, { "epoch": 0.4651839339339339, "grad_norm": 1.365334939170973, "learning_rate": 9.908055453492412e-06, "loss": 0.5676, "step": 4957 }, { "epoch": 0.4652777777777778, "grad_norm": 1.3162375614608404, "learning_rate": 9.907951203981299e-06, "loss": 0.5197, "step": 4958 }, { "epoch": 0.4653716216216216, "grad_norm": 1.2018916316495627, "learning_rate": 9.907846895952106e-06, "loss": 0.5108, "step": 4959 }, { "epoch": 0.46546546546546547, "grad_norm": 0.962627646877977, "learning_rate": 9.907742529406077e-06, "loss": 0.4481, "step": 4960 }, { "epoch": 0.46555930930930933, "grad_norm": 1.3542682948961478, "learning_rate": 9.907638104344458e-06, "loss": 0.51, "step": 4961 }, { "epoch": 0.46565315315315314, "grad_norm": 1.957911602948657, "learning_rate": 9.90753362076849e-06, "loss": 0.4977, "step": 4962 }, { "epoch": 0.465746996996997, "grad_norm": 1.3071463531945011, "learning_rate": 9.907429078679422e-06, "loss": 0.4828, "step": 4963 }, { "epoch": 0.4658408408408408, "grad_norm": 1.038683831765185, "learning_rate": 9.907324478078499e-06, "loss": 0.5135, "step": 4964 }, { "epoch": 0.4659346846846847, "grad_norm": 1.2181474381418151, "learning_rate": 9.907219818966969e-06, "loss": 0.4835, "step": 4965 }, { "epoch": 0.46602852852852855, "grad_norm": 1.0485330470995515, "learning_rate": 9.907115101346079e-06, "loss": 0.412, "step": 4966 }, { "epoch": 0.46612237237237236, "grad_norm": 1.6160891507079735, "learning_rate": 9.90701032521708e-06, "loss": 0.47, "step": 4967 }, { "epoch": 0.46621621621621623, "grad_norm": 2.1505256085744198, "learning_rate": 9.906905490581217e-06, "loss": 0.5558, "step": 4968 }, { "epoch": 0.46631006006006004, "grad_norm": 1.584233128765541, "learning_rate": 9.906800597439746e-06, "loss": 0.5136, "step": 4969 }, { "epoch": 0.4664039039039039, "grad_norm": 1.2722235261486485, "learning_rate": 9.90669564579391e-06, "loss": 0.4994, "step": 4970 }, { "epoch": 0.46649774774774777, "grad_norm": 1.3209680044257222, "learning_rate": 9.906590635644966e-06, "loss": 0.4631, "step": 4971 }, { "epoch": 0.4665915915915916, "grad_norm": 1.5325182050349184, "learning_rate": 9.906485566994165e-06, "loss": 0.5343, "step": 4972 }, { "epoch": 0.46668543543543545, "grad_norm": 1.1325233782931212, "learning_rate": 9.906380439842758e-06, "loss": 0.4434, "step": 4973 }, { "epoch": 0.46677927927927926, "grad_norm": 1.098337384865214, "learning_rate": 9.906275254192e-06, "loss": 0.5153, "step": 4974 }, { "epoch": 0.4668731231231231, "grad_norm": 1.3404187538235783, "learning_rate": 9.906170010043144e-06, "loss": 0.4286, "step": 4975 }, { "epoch": 0.466966966966967, "grad_norm": 1.0996205934656804, "learning_rate": 9.906064707397446e-06, "loss": 0.5304, "step": 4976 }, { "epoch": 0.4670608108108108, "grad_norm": 1.6568512684649759, "learning_rate": 9.905959346256162e-06, "loss": 0.4893, "step": 4977 }, { "epoch": 0.46715465465465467, "grad_norm": 1.3532239042087468, "learning_rate": 9.905853926620545e-06, "loss": 0.5452, "step": 4978 }, { "epoch": 0.4672484984984985, "grad_norm": 1.1321462958516935, "learning_rate": 9.905748448491857e-06, "loss": 0.5468, "step": 4979 }, { "epoch": 0.46734234234234234, "grad_norm": 1.1215307101007823, "learning_rate": 9.905642911871351e-06, "loss": 0.5012, "step": 4980 }, { "epoch": 0.4674361861861862, "grad_norm": 1.1368647491887258, "learning_rate": 9.905537316760288e-06, "loss": 0.4845, "step": 4981 }, { "epoch": 0.46753003003003, "grad_norm": 1.1376747289585818, "learning_rate": 9.905431663159926e-06, "loss": 0.4915, "step": 4982 }, { "epoch": 0.4676238738738739, "grad_norm": 1.3856235299204769, "learning_rate": 9.905325951071526e-06, "loss": 0.5198, "step": 4983 }, { "epoch": 0.4677177177177177, "grad_norm": 2.5388110171236926, "learning_rate": 9.905220180496346e-06, "loss": 0.4995, "step": 4984 }, { "epoch": 0.46781156156156156, "grad_norm": 1.7816012265896302, "learning_rate": 9.905114351435648e-06, "loss": 0.5176, "step": 4985 }, { "epoch": 0.46790540540540543, "grad_norm": 1.1121856196012954, "learning_rate": 9.905008463890695e-06, "loss": 0.5004, "step": 4986 }, { "epoch": 0.46799924924924924, "grad_norm": 1.1307873848723442, "learning_rate": 9.904902517862748e-06, "loss": 0.4771, "step": 4987 }, { "epoch": 0.4680930930930931, "grad_norm": 1.006051083492832, "learning_rate": 9.904796513353074e-06, "loss": 0.4893, "step": 4988 }, { "epoch": 0.4681869369369369, "grad_norm": 2.803166975952071, "learning_rate": 9.904690450362932e-06, "loss": 0.5656, "step": 4989 }, { "epoch": 0.4682807807807808, "grad_norm": 1.1719353653466762, "learning_rate": 9.904584328893587e-06, "loss": 0.4931, "step": 4990 }, { "epoch": 0.46837462462462465, "grad_norm": 2.542456425792083, "learning_rate": 9.904478148946308e-06, "loss": 0.5194, "step": 4991 }, { "epoch": 0.46846846846846846, "grad_norm": 1.186256595459727, "learning_rate": 9.904371910522358e-06, "loss": 0.5061, "step": 4992 }, { "epoch": 0.4685623123123123, "grad_norm": 1.0524417334844973, "learning_rate": 9.904265613623005e-06, "loss": 0.4921, "step": 4993 }, { "epoch": 0.46865615615615613, "grad_norm": 1.2631268696161966, "learning_rate": 9.904159258249516e-06, "loss": 0.5511, "step": 4994 }, { "epoch": 0.46875, "grad_norm": 1.3173264337240944, "learning_rate": 9.904052844403158e-06, "loss": 0.5077, "step": 4995 }, { "epoch": 0.46884384384384387, "grad_norm": 1.809374238508126, "learning_rate": 9.9039463720852e-06, "loss": 0.4658, "step": 4996 }, { "epoch": 0.4689376876876877, "grad_norm": 1.156208177316152, "learning_rate": 9.903839841296915e-06, "loss": 0.5114, "step": 4997 }, { "epoch": 0.46903153153153154, "grad_norm": 1.1960515459695975, "learning_rate": 9.903733252039566e-06, "loss": 0.5035, "step": 4998 }, { "epoch": 0.46912537537537535, "grad_norm": 1.7304067892264337, "learning_rate": 9.903626604314432e-06, "loss": 0.4962, "step": 4999 }, { "epoch": 0.4692192192192192, "grad_norm": 1.4690690846146393, "learning_rate": 9.90351989812278e-06, "loss": 0.4736, "step": 5000 }, { "epoch": 0.4693130630630631, "grad_norm": 1.4256314141546604, "learning_rate": 9.903413133465885e-06, "loss": 0.4892, "step": 5001 }, { "epoch": 0.4694069069069069, "grad_norm": 1.257461640846136, "learning_rate": 9.903306310345015e-06, "loss": 0.4745, "step": 5002 }, { "epoch": 0.46950075075075076, "grad_norm": 2.304790908661898, "learning_rate": 9.903199428761448e-06, "loss": 0.4395, "step": 5003 }, { "epoch": 0.46959459459459457, "grad_norm": 1.1524337934798559, "learning_rate": 9.903092488716457e-06, "loss": 0.4811, "step": 5004 }, { "epoch": 0.46968843843843844, "grad_norm": 1.0954874696901056, "learning_rate": 9.902985490211316e-06, "loss": 0.4551, "step": 5005 }, { "epoch": 0.4697822822822823, "grad_norm": 1.3191950297047716, "learning_rate": 9.902878433247304e-06, "loss": 0.5454, "step": 5006 }, { "epoch": 0.4698761261261261, "grad_norm": 1.4226481822678219, "learning_rate": 9.902771317825695e-06, "loss": 0.5163, "step": 5007 }, { "epoch": 0.46996996996997, "grad_norm": 1.4092077795955955, "learning_rate": 9.902664143947766e-06, "loss": 0.4769, "step": 5008 }, { "epoch": 0.4700638138138138, "grad_norm": 1.2344292366973062, "learning_rate": 9.902556911614795e-06, "loss": 0.4964, "step": 5009 }, { "epoch": 0.47015765765765766, "grad_norm": 1.3042428932214285, "learning_rate": 9.902449620828063e-06, "loss": 0.5397, "step": 5010 }, { "epoch": 0.4702515015015015, "grad_norm": 1.360940049303028, "learning_rate": 9.902342271588844e-06, "loss": 0.4827, "step": 5011 }, { "epoch": 0.47034534534534533, "grad_norm": 2.072632817768655, "learning_rate": 9.902234863898425e-06, "loss": 0.5123, "step": 5012 }, { "epoch": 0.4704391891891892, "grad_norm": 0.981759889292859, "learning_rate": 9.90212739775808e-06, "loss": 0.445, "step": 5013 }, { "epoch": 0.470533033033033, "grad_norm": 1.0804453119973951, "learning_rate": 9.902019873169094e-06, "loss": 0.5731, "step": 5014 }, { "epoch": 0.4706268768768769, "grad_norm": 1.4200006521968167, "learning_rate": 9.901912290132747e-06, "loss": 0.4438, "step": 5015 }, { "epoch": 0.47072072072072074, "grad_norm": 8.10280724499925, "learning_rate": 9.901804648650325e-06, "loss": 0.4961, "step": 5016 }, { "epoch": 0.47081456456456455, "grad_norm": 1.1270502209483062, "learning_rate": 9.901696948723108e-06, "loss": 0.5084, "step": 5017 }, { "epoch": 0.4709084084084084, "grad_norm": 0.9810269581852644, "learning_rate": 9.901589190352381e-06, "loss": 0.4682, "step": 5018 }, { "epoch": 0.47100225225225223, "grad_norm": 1.230546573220228, "learning_rate": 9.901481373539429e-06, "loss": 0.5051, "step": 5019 }, { "epoch": 0.4710960960960961, "grad_norm": 1.5042616241548006, "learning_rate": 9.901373498285538e-06, "loss": 0.4964, "step": 5020 }, { "epoch": 0.47118993993993996, "grad_norm": 1.3709479172472512, "learning_rate": 9.901265564591993e-06, "loss": 0.4853, "step": 5021 }, { "epoch": 0.47128378378378377, "grad_norm": 1.3057064466814106, "learning_rate": 9.901157572460084e-06, "loss": 0.4756, "step": 5022 }, { "epoch": 0.47137762762762764, "grad_norm": 1.1635509463638414, "learning_rate": 9.901049521891095e-06, "loss": 0.5172, "step": 5023 }, { "epoch": 0.47147147147147145, "grad_norm": 1.3951532595607663, "learning_rate": 9.900941412886316e-06, "loss": 0.4815, "step": 5024 }, { "epoch": 0.4715653153153153, "grad_norm": 1.4819186246979983, "learning_rate": 9.900833245447036e-06, "loss": 0.5157, "step": 5025 }, { "epoch": 0.4716591591591592, "grad_norm": 1.1189278799984927, "learning_rate": 9.900725019574543e-06, "loss": 0.5013, "step": 5026 }, { "epoch": 0.471753003003003, "grad_norm": 1.1150599846580647, "learning_rate": 9.900616735270131e-06, "loss": 0.4755, "step": 5027 }, { "epoch": 0.47184684684684686, "grad_norm": 1.0442099303796162, "learning_rate": 9.900508392535088e-06, "loss": 0.5205, "step": 5028 }, { "epoch": 0.47194069069069067, "grad_norm": 1.0585604120032146, "learning_rate": 9.900399991370707e-06, "loss": 0.4362, "step": 5029 }, { "epoch": 0.47203453453453453, "grad_norm": 1.1353226349832255, "learning_rate": 9.90029153177828e-06, "loss": 0.4739, "step": 5030 }, { "epoch": 0.4721283783783784, "grad_norm": 1.1306294736719507, "learning_rate": 9.9001830137591e-06, "loss": 0.5436, "step": 5031 }, { "epoch": 0.4722222222222222, "grad_norm": 1.0373204173682733, "learning_rate": 9.900074437314462e-06, "loss": 0.5046, "step": 5032 }, { "epoch": 0.4723160660660661, "grad_norm": 1.0526463215831825, "learning_rate": 9.899965802445659e-06, "loss": 0.4626, "step": 5033 }, { "epoch": 0.4724099099099099, "grad_norm": 1.2425906492310135, "learning_rate": 9.899857109153987e-06, "loss": 0.5617, "step": 5034 }, { "epoch": 0.47250375375375375, "grad_norm": 1.546392753233567, "learning_rate": 9.899748357440742e-06, "loss": 0.4945, "step": 5035 }, { "epoch": 0.4725975975975976, "grad_norm": 1.0375570433059957, "learning_rate": 9.899639547307221e-06, "loss": 0.4724, "step": 5036 }, { "epoch": 0.47269144144144143, "grad_norm": 1.15806220782867, "learning_rate": 9.89953067875472e-06, "loss": 0.5145, "step": 5037 }, { "epoch": 0.4727852852852853, "grad_norm": 1.2163905004397186, "learning_rate": 9.899421751784539e-06, "loss": 0.471, "step": 5038 }, { "epoch": 0.4728791291291291, "grad_norm": 1.0583270200439296, "learning_rate": 9.899312766397977e-06, "loss": 0.5014, "step": 5039 }, { "epoch": 0.47297297297297297, "grad_norm": 1.849844222484133, "learning_rate": 9.899203722596331e-06, "loss": 0.4758, "step": 5040 }, { "epoch": 0.47306681681681684, "grad_norm": 1.1171746858739253, "learning_rate": 9.899094620380902e-06, "loss": 0.4451, "step": 5041 }, { "epoch": 0.47316066066066065, "grad_norm": 1.1538419445905566, "learning_rate": 9.898985459752992e-06, "loss": 0.4903, "step": 5042 }, { "epoch": 0.4732545045045045, "grad_norm": 1.632832248198439, "learning_rate": 9.898876240713901e-06, "loss": 0.4725, "step": 5043 }, { "epoch": 0.4733483483483483, "grad_norm": 1.4035480401754874, "learning_rate": 9.898766963264932e-06, "loss": 0.4908, "step": 5044 }, { "epoch": 0.4734421921921922, "grad_norm": 1.3586009889830426, "learning_rate": 9.898657627407388e-06, "loss": 0.4883, "step": 5045 }, { "epoch": 0.47353603603603606, "grad_norm": 1.2414638813880625, "learning_rate": 9.898548233142574e-06, "loss": 0.49, "step": 5046 }, { "epoch": 0.47362987987987987, "grad_norm": 1.511992418903021, "learning_rate": 9.898438780471791e-06, "loss": 0.5765, "step": 5047 }, { "epoch": 0.47372372372372373, "grad_norm": 1.1633513041407983, "learning_rate": 9.898329269396348e-06, "loss": 0.5134, "step": 5048 }, { "epoch": 0.47381756756756754, "grad_norm": 1.2685724006361696, "learning_rate": 9.898219699917548e-06, "loss": 0.4748, "step": 5049 }, { "epoch": 0.4739114114114114, "grad_norm": 1.2950888970414511, "learning_rate": 9.898110072036696e-06, "loss": 0.4899, "step": 5050 }, { "epoch": 0.4740052552552553, "grad_norm": 1.0478894561223717, "learning_rate": 9.898000385755104e-06, "loss": 0.4928, "step": 5051 }, { "epoch": 0.4740990990990991, "grad_norm": 1.115790673383984, "learning_rate": 9.897890641074076e-06, "loss": 0.5122, "step": 5052 }, { "epoch": 0.47419294294294295, "grad_norm": 1.0044663986084477, "learning_rate": 9.89778083799492e-06, "loss": 0.4925, "step": 5053 }, { "epoch": 0.47428678678678676, "grad_norm": 1.008575573825588, "learning_rate": 9.897670976518946e-06, "loss": 0.4426, "step": 5054 }, { "epoch": 0.47438063063063063, "grad_norm": 1.0862336983288472, "learning_rate": 9.897561056647466e-06, "loss": 0.4883, "step": 5055 }, { "epoch": 0.4744744744744745, "grad_norm": 1.1954337703214164, "learning_rate": 9.89745107838179e-06, "loss": 0.5046, "step": 5056 }, { "epoch": 0.4745683183183183, "grad_norm": 1.4847925600226446, "learning_rate": 9.897341041723227e-06, "loss": 0.5447, "step": 5057 }, { "epoch": 0.47466216216216217, "grad_norm": 1.2247875041585634, "learning_rate": 9.897230946673091e-06, "loss": 0.4898, "step": 5058 }, { "epoch": 0.474756006006006, "grad_norm": 1.1172631391758212, "learning_rate": 9.897120793232694e-06, "loss": 0.514, "step": 5059 }, { "epoch": 0.47484984984984985, "grad_norm": 1.0232138693319568, "learning_rate": 9.89701058140335e-06, "loss": 0.4934, "step": 5060 }, { "epoch": 0.4749436936936937, "grad_norm": 1.632298834359731, "learning_rate": 9.896900311186373e-06, "loss": 0.5025, "step": 5061 }, { "epoch": 0.4750375375375375, "grad_norm": 1.0641589973504015, "learning_rate": 9.896789982583076e-06, "loss": 0.5372, "step": 5062 }, { "epoch": 0.4751313813813814, "grad_norm": 1.2323240397619688, "learning_rate": 9.896679595594777e-06, "loss": 0.4829, "step": 5063 }, { "epoch": 0.4752252252252252, "grad_norm": 1.4709100243289248, "learning_rate": 9.896569150222789e-06, "loss": 0.5309, "step": 5064 }, { "epoch": 0.47531906906906907, "grad_norm": 1.4357756772107013, "learning_rate": 9.896458646468431e-06, "loss": 0.4446, "step": 5065 }, { "epoch": 0.47541291291291293, "grad_norm": 1.2667255841656244, "learning_rate": 9.896348084333021e-06, "loss": 0.4677, "step": 5066 }, { "epoch": 0.47550675675675674, "grad_norm": 1.010570773688859, "learning_rate": 9.896237463817876e-06, "loss": 0.5104, "step": 5067 }, { "epoch": 0.4756006006006006, "grad_norm": 1.0790357372442778, "learning_rate": 9.896126784924315e-06, "loss": 0.4889, "step": 5068 }, { "epoch": 0.4756944444444444, "grad_norm": 1.1415652724151835, "learning_rate": 9.896016047653659e-06, "loss": 0.461, "step": 5069 }, { "epoch": 0.4757882882882883, "grad_norm": 1.3579533996479836, "learning_rate": 9.895905252007228e-06, "loss": 0.4603, "step": 5070 }, { "epoch": 0.47588213213213215, "grad_norm": 1.0735487134439878, "learning_rate": 9.895794397986343e-06, "loss": 0.4794, "step": 5071 }, { "epoch": 0.47597597597597596, "grad_norm": 1.5417162156572592, "learning_rate": 9.895683485592323e-06, "loss": 0.498, "step": 5072 }, { "epoch": 0.47606981981981983, "grad_norm": 1.27726801752465, "learning_rate": 9.895572514826492e-06, "loss": 0.5348, "step": 5073 }, { "epoch": 0.47616366366366364, "grad_norm": 1.057423610121978, "learning_rate": 9.895461485690176e-06, "loss": 0.5059, "step": 5074 }, { "epoch": 0.4762575075075075, "grad_norm": 1.2034105293470347, "learning_rate": 9.895350398184696e-06, "loss": 0.4403, "step": 5075 }, { "epoch": 0.47635135135135137, "grad_norm": 1.602201124617177, "learning_rate": 9.895239252311377e-06, "loss": 0.4818, "step": 5076 }, { "epoch": 0.4764451951951952, "grad_norm": 1.445582282452665, "learning_rate": 9.895128048071545e-06, "loss": 0.5273, "step": 5077 }, { "epoch": 0.47653903903903905, "grad_norm": 2.576978273815718, "learning_rate": 9.895016785466523e-06, "loss": 0.5081, "step": 5078 }, { "epoch": 0.47663288288288286, "grad_norm": 1.0600970468407818, "learning_rate": 9.894905464497641e-06, "loss": 0.5207, "step": 5079 }, { "epoch": 0.4767267267267267, "grad_norm": 1.007960056247748, "learning_rate": 9.894794085166226e-06, "loss": 0.4061, "step": 5080 }, { "epoch": 0.4768205705705706, "grad_norm": 1.4941477121100761, "learning_rate": 9.894682647473604e-06, "loss": 0.5425, "step": 5081 }, { "epoch": 0.4769144144144144, "grad_norm": 0.9862949388111513, "learning_rate": 9.894571151421106e-06, "loss": 0.4968, "step": 5082 }, { "epoch": 0.47700825825825827, "grad_norm": 1.3788916871381112, "learning_rate": 9.894459597010057e-06, "loss": 0.4788, "step": 5083 }, { "epoch": 0.4771021021021021, "grad_norm": 1.068631589381968, "learning_rate": 9.894347984241793e-06, "loss": 0.4909, "step": 5084 }, { "epoch": 0.47719594594594594, "grad_norm": 1.1199511569003333, "learning_rate": 9.894236313117641e-06, "loss": 0.5173, "step": 5085 }, { "epoch": 0.4772897897897898, "grad_norm": 1.2214544283010977, "learning_rate": 9.894124583638933e-06, "loss": 0.4604, "step": 5086 }, { "epoch": 0.4773836336336336, "grad_norm": 1.2429128272930998, "learning_rate": 9.894012795807002e-06, "loss": 0.4876, "step": 5087 }, { "epoch": 0.4774774774774775, "grad_norm": 1.1699843050732603, "learning_rate": 9.893900949623181e-06, "loss": 0.4645, "step": 5088 }, { "epoch": 0.4775713213213213, "grad_norm": 1.589644855724171, "learning_rate": 9.893789045088804e-06, "loss": 0.4547, "step": 5089 }, { "epoch": 0.47766516516516516, "grad_norm": 1.2836689919222102, "learning_rate": 9.893677082205202e-06, "loss": 0.465, "step": 5090 }, { "epoch": 0.47775900900900903, "grad_norm": 1.0733292278945086, "learning_rate": 9.893565060973713e-06, "loss": 0.491, "step": 5091 }, { "epoch": 0.47785285285285284, "grad_norm": 1.2616091432854057, "learning_rate": 9.893452981395671e-06, "loss": 0.5035, "step": 5092 }, { "epoch": 0.4779466966966967, "grad_norm": 2.089589369773063, "learning_rate": 9.893340843472414e-06, "loss": 0.5113, "step": 5093 }, { "epoch": 0.4780405405405405, "grad_norm": 1.1788126680349187, "learning_rate": 9.893228647205278e-06, "loss": 0.5134, "step": 5094 }, { "epoch": 0.4781343843843844, "grad_norm": 1.2178901684370578, "learning_rate": 9.893116392595603e-06, "loss": 0.4812, "step": 5095 }, { "epoch": 0.47822822822822825, "grad_norm": 1.0739619765489725, "learning_rate": 9.893004079644722e-06, "loss": 0.4666, "step": 5096 }, { "epoch": 0.47832207207207206, "grad_norm": 1.124399321332673, "learning_rate": 9.89289170835398e-06, "loss": 0.4749, "step": 5097 }, { "epoch": 0.4784159159159159, "grad_norm": 1.119120949073898, "learning_rate": 9.892779278724713e-06, "loss": 0.4778, "step": 5098 }, { "epoch": 0.47850975975975973, "grad_norm": 0.9485984991434833, "learning_rate": 9.892666790758263e-06, "loss": 0.4897, "step": 5099 }, { "epoch": 0.4786036036036036, "grad_norm": 1.2116994201398752, "learning_rate": 9.89255424445597e-06, "loss": 0.5464, "step": 5100 }, { "epoch": 0.47869744744744747, "grad_norm": 1.7272921926344316, "learning_rate": 9.892441639819179e-06, "loss": 0.4814, "step": 5101 }, { "epoch": 0.4787912912912913, "grad_norm": 1.3124304270793279, "learning_rate": 9.89232897684923e-06, "loss": 0.5044, "step": 5102 }, { "epoch": 0.47888513513513514, "grad_norm": 1.6953963890756085, "learning_rate": 9.892216255547467e-06, "loss": 0.4601, "step": 5103 }, { "epoch": 0.47897897897897895, "grad_norm": 1.2081432897513698, "learning_rate": 9.89210347591523e-06, "loss": 0.5045, "step": 5104 }, { "epoch": 0.4790728228228228, "grad_norm": 1.24833354727978, "learning_rate": 9.891990637953873e-06, "loss": 0.4606, "step": 5105 }, { "epoch": 0.4791666666666667, "grad_norm": 1.2212299846656904, "learning_rate": 9.891877741664733e-06, "loss": 0.4337, "step": 5106 }, { "epoch": 0.4792605105105105, "grad_norm": 1.0753276783057713, "learning_rate": 9.89176478704916e-06, "loss": 0.4645, "step": 5107 }, { "epoch": 0.47935435435435436, "grad_norm": 1.79621816571199, "learning_rate": 9.8916517741085e-06, "loss": 0.4824, "step": 5108 }, { "epoch": 0.47944819819819817, "grad_norm": 1.1387167686114694, "learning_rate": 9.8915387028441e-06, "loss": 0.4729, "step": 5109 }, { "epoch": 0.47954204204204204, "grad_norm": 1.1284383239166111, "learning_rate": 9.891425573257306e-06, "loss": 0.4714, "step": 5110 }, { "epoch": 0.4796358858858859, "grad_norm": 1.1387879739319746, "learning_rate": 9.891312385349471e-06, "loss": 0.486, "step": 5111 }, { "epoch": 0.4797297297297297, "grad_norm": 1.1905762067014447, "learning_rate": 9.891199139121943e-06, "loss": 0.5272, "step": 5112 }, { "epoch": 0.4798235735735736, "grad_norm": 1.0112820448428848, "learning_rate": 9.89108583457607e-06, "loss": 0.4071, "step": 5113 }, { "epoch": 0.4799174174174174, "grad_norm": 1.030558237602509, "learning_rate": 9.890972471713206e-06, "loss": 0.4557, "step": 5114 }, { "epoch": 0.48001126126126126, "grad_norm": 1.1192907206618203, "learning_rate": 9.890859050534704e-06, "loss": 0.5048, "step": 5115 }, { "epoch": 0.4801051051051051, "grad_norm": 1.0082482177804422, "learning_rate": 9.890745571041911e-06, "loss": 0.5473, "step": 5116 }, { "epoch": 0.48019894894894893, "grad_norm": 1.1158714363486029, "learning_rate": 9.890632033236184e-06, "loss": 0.5051, "step": 5117 }, { "epoch": 0.4802927927927928, "grad_norm": 0.9095066979009044, "learning_rate": 9.890518437118876e-06, "loss": 0.4934, "step": 5118 }, { "epoch": 0.4803866366366366, "grad_norm": 1.767400515631839, "learning_rate": 9.890404782691342e-06, "loss": 0.4517, "step": 5119 }, { "epoch": 0.4804804804804805, "grad_norm": 1.1143248548701623, "learning_rate": 9.890291069954934e-06, "loss": 0.475, "step": 5120 }, { "epoch": 0.48057432432432434, "grad_norm": 1.335015649068289, "learning_rate": 9.890177298911013e-06, "loss": 0.4776, "step": 5121 }, { "epoch": 0.48066816816816815, "grad_norm": 1.1770663905084209, "learning_rate": 9.890063469560928e-06, "loss": 0.5404, "step": 5122 }, { "epoch": 0.480762012012012, "grad_norm": 1.0850983805198025, "learning_rate": 9.889949581906045e-06, "loss": 0.5258, "step": 5123 }, { "epoch": 0.48085585585585583, "grad_norm": 0.9771681296187379, "learning_rate": 9.889835635947716e-06, "loss": 0.4919, "step": 5124 }, { "epoch": 0.4809496996996997, "grad_norm": 1.0232162716753554, "learning_rate": 9.889721631687301e-06, "loss": 0.4953, "step": 5125 }, { "epoch": 0.48104354354354356, "grad_norm": 1.075673725577829, "learning_rate": 9.889607569126159e-06, "loss": 0.4325, "step": 5126 }, { "epoch": 0.48113738738738737, "grad_norm": 2.277204023357213, "learning_rate": 9.889493448265653e-06, "loss": 0.5484, "step": 5127 }, { "epoch": 0.48123123123123124, "grad_norm": 1.4969938127049045, "learning_rate": 9.889379269107138e-06, "loss": 0.5079, "step": 5128 }, { "epoch": 0.48132507507507505, "grad_norm": 1.3808079440519814, "learning_rate": 9.88926503165198e-06, "loss": 0.4707, "step": 5129 }, { "epoch": 0.4814189189189189, "grad_norm": 1.0185984770225336, "learning_rate": 9.889150735901538e-06, "loss": 0.4867, "step": 5130 }, { "epoch": 0.4815127627627628, "grad_norm": 1.2296579924544613, "learning_rate": 9.889036381857179e-06, "loss": 0.505, "step": 5131 }, { "epoch": 0.4816066066066066, "grad_norm": 1.2916247982080564, "learning_rate": 9.888921969520263e-06, "loss": 0.4511, "step": 5132 }, { "epoch": 0.48170045045045046, "grad_norm": 1.3822176099370285, "learning_rate": 9.888807498892155e-06, "loss": 0.5078, "step": 5133 }, { "epoch": 0.48179429429429427, "grad_norm": 1.120690497915563, "learning_rate": 9.888692969974218e-06, "loss": 0.5125, "step": 5134 }, { "epoch": 0.48188813813813813, "grad_norm": 1.1808067236553463, "learning_rate": 9.888578382767821e-06, "loss": 0.4923, "step": 5135 }, { "epoch": 0.481981981981982, "grad_norm": 1.0851843516186022, "learning_rate": 9.888463737274327e-06, "loss": 0.5063, "step": 5136 }, { "epoch": 0.4820758258258258, "grad_norm": 1.852199595806504, "learning_rate": 9.888349033495107e-06, "loss": 0.5223, "step": 5137 }, { "epoch": 0.4821696696696697, "grad_norm": 1.0618806233968991, "learning_rate": 9.888234271431524e-06, "loss": 0.5111, "step": 5138 }, { "epoch": 0.4822635135135135, "grad_norm": 1.1505934798820172, "learning_rate": 9.88811945108495e-06, "loss": 0.4392, "step": 5139 }, { "epoch": 0.48235735735735735, "grad_norm": 1.4407893560374174, "learning_rate": 9.888004572456752e-06, "loss": 0.5136, "step": 5140 }, { "epoch": 0.4824512012012012, "grad_norm": 1.0830042350386966, "learning_rate": 9.8878896355483e-06, "loss": 0.517, "step": 5141 }, { "epoch": 0.48254504504504503, "grad_norm": 1.0576492635501453, "learning_rate": 9.887774640360965e-06, "loss": 0.4481, "step": 5142 }, { "epoch": 0.4826388888888889, "grad_norm": 1.1056102508921817, "learning_rate": 9.887659586896117e-06, "loss": 0.448, "step": 5143 }, { "epoch": 0.4827327327327327, "grad_norm": 1.1914257779897521, "learning_rate": 9.88754447515513e-06, "loss": 0.5082, "step": 5144 }, { "epoch": 0.48282657657657657, "grad_norm": 1.1236518246400449, "learning_rate": 9.887429305139373e-06, "loss": 0.4741, "step": 5145 }, { "epoch": 0.48292042042042044, "grad_norm": 1.1731006900194574, "learning_rate": 9.887314076850221e-06, "loss": 0.5574, "step": 5146 }, { "epoch": 0.48301426426426425, "grad_norm": 1.213920729886719, "learning_rate": 9.88719879028905e-06, "loss": 0.5315, "step": 5147 }, { "epoch": 0.4831081081081081, "grad_norm": 4.98547484143792, "learning_rate": 9.887083445457232e-06, "loss": 0.5003, "step": 5148 }, { "epoch": 0.4832019519519519, "grad_norm": 1.2195069987328173, "learning_rate": 9.886968042356141e-06, "loss": 0.5058, "step": 5149 }, { "epoch": 0.4832957957957958, "grad_norm": 1.5754956664367092, "learning_rate": 9.886852580987158e-06, "loss": 0.5269, "step": 5150 }, { "epoch": 0.48338963963963966, "grad_norm": 1.0468239951395728, "learning_rate": 9.886737061351654e-06, "loss": 0.4709, "step": 5151 }, { "epoch": 0.48348348348348347, "grad_norm": 1.0937500288396298, "learning_rate": 9.88662148345101e-06, "loss": 0.5378, "step": 5152 }, { "epoch": 0.48357732732732733, "grad_norm": 1.506859936643137, "learning_rate": 9.886505847286604e-06, "loss": 0.4898, "step": 5153 }, { "epoch": 0.48367117117117114, "grad_norm": 1.2727899650613013, "learning_rate": 9.886390152859813e-06, "loss": 0.4516, "step": 5154 }, { "epoch": 0.483765015015015, "grad_norm": 1.122335975979065, "learning_rate": 9.886274400172016e-06, "loss": 0.4775, "step": 5155 }, { "epoch": 0.4838588588588589, "grad_norm": 1.059424927691488, "learning_rate": 9.886158589224594e-06, "loss": 0.4931, "step": 5156 }, { "epoch": 0.4839527027027027, "grad_norm": 1.1411019953484063, "learning_rate": 9.886042720018928e-06, "loss": 0.5171, "step": 5157 }, { "epoch": 0.48404654654654655, "grad_norm": 1.1710057819103623, "learning_rate": 9.885926792556402e-06, "loss": 0.4889, "step": 5158 }, { "epoch": 0.48414039039039036, "grad_norm": 1.0812592059378567, "learning_rate": 9.885810806838395e-06, "loss": 0.4527, "step": 5159 }, { "epoch": 0.48423423423423423, "grad_norm": 1.100232433900766, "learning_rate": 9.88569476286629e-06, "loss": 0.4916, "step": 5160 }, { "epoch": 0.4843280780780781, "grad_norm": 1.2081103726659335, "learning_rate": 9.88557866064147e-06, "loss": 0.5377, "step": 5161 }, { "epoch": 0.4844219219219219, "grad_norm": 0.9818889674464479, "learning_rate": 9.885462500165323e-06, "loss": 0.458, "step": 5162 }, { "epoch": 0.48451576576576577, "grad_norm": 0.9525840907819619, "learning_rate": 9.88534628143923e-06, "loss": 0.4873, "step": 5163 }, { "epoch": 0.48460960960960964, "grad_norm": 7.773536636616019, "learning_rate": 9.885230004464579e-06, "loss": 0.5146, "step": 5164 }, { "epoch": 0.48470345345345345, "grad_norm": 1.125914210240023, "learning_rate": 9.885113669242756e-06, "loss": 0.5096, "step": 5165 }, { "epoch": 0.4847972972972973, "grad_norm": 1.4140424810575556, "learning_rate": 9.884997275775148e-06, "loss": 0.4963, "step": 5166 }, { "epoch": 0.4848911411411411, "grad_norm": 1.105138274746088, "learning_rate": 9.884880824063141e-06, "loss": 0.4652, "step": 5167 }, { "epoch": 0.484984984984985, "grad_norm": 1.1995952105951055, "learning_rate": 9.884764314108127e-06, "loss": 0.4474, "step": 5168 }, { "epoch": 0.48507882882882886, "grad_norm": 0.9816118110164163, "learning_rate": 9.884647745911491e-06, "loss": 0.473, "step": 5169 }, { "epoch": 0.48517267267267267, "grad_norm": 1.1511612169230119, "learning_rate": 9.884531119474627e-06, "loss": 0.5221, "step": 5170 }, { "epoch": 0.48526651651651653, "grad_norm": 1.1526847303432002, "learning_rate": 9.884414434798923e-06, "loss": 0.4845, "step": 5171 }, { "epoch": 0.48536036036036034, "grad_norm": 1.1602119720805035, "learning_rate": 9.884297691885771e-06, "loss": 0.5533, "step": 5172 }, { "epoch": 0.4854542042042042, "grad_norm": 1.0352491352382012, "learning_rate": 9.88418089073656e-06, "loss": 0.4684, "step": 5173 }, { "epoch": 0.4855480480480481, "grad_norm": 1.2239561102163938, "learning_rate": 9.88406403135269e-06, "loss": 0.5071, "step": 5174 }, { "epoch": 0.4856418918918919, "grad_norm": 1.428938485199758, "learning_rate": 9.883947113735546e-06, "loss": 0.5146, "step": 5175 }, { "epoch": 0.48573573573573575, "grad_norm": 1.0718591910070188, "learning_rate": 9.883830137886527e-06, "loss": 0.4817, "step": 5176 }, { "epoch": 0.48582957957957956, "grad_norm": 1.0127457237027093, "learning_rate": 9.883713103807029e-06, "loss": 0.4997, "step": 5177 }, { "epoch": 0.48592342342342343, "grad_norm": 1.3067147092643439, "learning_rate": 9.883596011498442e-06, "loss": 0.4719, "step": 5178 }, { "epoch": 0.4860172672672673, "grad_norm": 1.2037753466195893, "learning_rate": 9.883478860962166e-06, "loss": 0.5031, "step": 5179 }, { "epoch": 0.4861111111111111, "grad_norm": 1.1423751491044554, "learning_rate": 9.883361652199595e-06, "loss": 0.5525, "step": 5180 }, { "epoch": 0.48620495495495497, "grad_norm": 1.5304186430244147, "learning_rate": 9.88324438521213e-06, "loss": 0.4594, "step": 5181 }, { "epoch": 0.4862987987987988, "grad_norm": 1.1096719389860126, "learning_rate": 9.883127060001167e-06, "loss": 0.5282, "step": 5182 }, { "epoch": 0.48639264264264265, "grad_norm": 1.1481904257663271, "learning_rate": 9.883009676568107e-06, "loss": 0.5401, "step": 5183 }, { "epoch": 0.4864864864864865, "grad_norm": 1.1310364990548913, "learning_rate": 9.882892234914345e-06, "loss": 0.4893, "step": 5184 }, { "epoch": 0.4865803303303303, "grad_norm": 1.522838873988634, "learning_rate": 9.882774735041287e-06, "loss": 0.568, "step": 5185 }, { "epoch": 0.4866741741741742, "grad_norm": 1.3939132088569266, "learning_rate": 9.88265717695033e-06, "loss": 0.4687, "step": 5186 }, { "epoch": 0.486768018018018, "grad_norm": 1.0259341775330706, "learning_rate": 9.882539560642875e-06, "loss": 0.5149, "step": 5187 }, { "epoch": 0.48686186186186187, "grad_norm": 1.2409776518966973, "learning_rate": 9.882421886120327e-06, "loss": 0.4801, "step": 5188 }, { "epoch": 0.48695570570570573, "grad_norm": 1.0918615445182713, "learning_rate": 9.88230415338409e-06, "loss": 0.501, "step": 5189 }, { "epoch": 0.48704954954954954, "grad_norm": 1.1135646655362679, "learning_rate": 9.882186362435566e-06, "loss": 0.4469, "step": 5190 }, { "epoch": 0.4871433933933934, "grad_norm": 1.061078026114499, "learning_rate": 9.882068513276158e-06, "loss": 0.4719, "step": 5191 }, { "epoch": 0.4872372372372372, "grad_norm": 1.1548097862504834, "learning_rate": 9.881950605907273e-06, "loss": 0.4904, "step": 5192 }, { "epoch": 0.4873310810810811, "grad_norm": 1.8417268928847095, "learning_rate": 9.881832640330315e-06, "loss": 0.5498, "step": 5193 }, { "epoch": 0.48742492492492495, "grad_norm": 1.4220279601542514, "learning_rate": 9.881714616546694e-06, "loss": 0.5798, "step": 5194 }, { "epoch": 0.48751876876876876, "grad_norm": 1.9871623891459413, "learning_rate": 9.881596534557813e-06, "loss": 0.5062, "step": 5195 }, { "epoch": 0.48761261261261263, "grad_norm": 1.1376893556546404, "learning_rate": 9.881478394365084e-06, "loss": 0.4906, "step": 5196 }, { "epoch": 0.48770645645645644, "grad_norm": 1.0736791206360305, "learning_rate": 9.881360195969914e-06, "loss": 0.47, "step": 5197 }, { "epoch": 0.4878003003003003, "grad_norm": 2.250291245515581, "learning_rate": 9.88124193937371e-06, "loss": 0.4968, "step": 5198 }, { "epoch": 0.48789414414414417, "grad_norm": 1.448813666688184, "learning_rate": 9.881123624577884e-06, "loss": 0.4999, "step": 5199 }, { "epoch": 0.487987987987988, "grad_norm": 0.9714914668292498, "learning_rate": 9.881005251583847e-06, "loss": 0.4252, "step": 5200 }, { "epoch": 0.48808183183183185, "grad_norm": 0.9674773313137941, "learning_rate": 9.88088682039301e-06, "loss": 0.4761, "step": 5201 }, { "epoch": 0.48817567567567566, "grad_norm": 1.07245536923249, "learning_rate": 9.880768331006785e-06, "loss": 0.4869, "step": 5202 }, { "epoch": 0.4882695195195195, "grad_norm": 1.2559877422651713, "learning_rate": 9.880649783426586e-06, "loss": 0.5153, "step": 5203 }, { "epoch": 0.4883633633633634, "grad_norm": 1.1551662846884758, "learning_rate": 9.880531177653823e-06, "loss": 0.5228, "step": 5204 }, { "epoch": 0.4884572072072072, "grad_norm": 1.1153764264082273, "learning_rate": 9.880412513689913e-06, "loss": 0.4787, "step": 5205 }, { "epoch": 0.48855105105105107, "grad_norm": 1.1168317331704218, "learning_rate": 9.88029379153627e-06, "loss": 0.5169, "step": 5206 }, { "epoch": 0.4886448948948949, "grad_norm": 1.0508318112644683, "learning_rate": 9.88017501119431e-06, "loss": 0.4542, "step": 5207 }, { "epoch": 0.48873873873873874, "grad_norm": 1.4153178198095313, "learning_rate": 9.88005617266545e-06, "loss": 0.4699, "step": 5208 }, { "epoch": 0.4888325825825826, "grad_norm": 1.1879591128055542, "learning_rate": 9.879937275951106e-06, "loss": 0.4508, "step": 5209 }, { "epoch": 0.4889264264264264, "grad_norm": 1.2294021073660282, "learning_rate": 9.879818321052694e-06, "loss": 0.4719, "step": 5210 }, { "epoch": 0.4890202702702703, "grad_norm": 1.3743921821423417, "learning_rate": 9.879699307971635e-06, "loss": 0.4786, "step": 5211 }, { "epoch": 0.4891141141141141, "grad_norm": 1.1950140680158232, "learning_rate": 9.879580236709345e-06, "loss": 0.5283, "step": 5212 }, { "epoch": 0.48920795795795796, "grad_norm": 1.1564007224763466, "learning_rate": 9.879461107267247e-06, "loss": 0.5219, "step": 5213 }, { "epoch": 0.48930180180180183, "grad_norm": 1.1758747248433843, "learning_rate": 9.879341919646762e-06, "loss": 0.4576, "step": 5214 }, { "epoch": 0.48939564564564564, "grad_norm": 1.0112984665145248, "learning_rate": 9.879222673849304e-06, "loss": 0.477, "step": 5215 }, { "epoch": 0.4894894894894895, "grad_norm": 1.0580783280158355, "learning_rate": 9.879103369876305e-06, "loss": 0.5449, "step": 5216 }, { "epoch": 0.4895833333333333, "grad_norm": 1.218431665529895, "learning_rate": 9.878984007729178e-06, "loss": 0.444, "step": 5217 }, { "epoch": 0.4896771771771772, "grad_norm": 1.14837753342897, "learning_rate": 9.878864587409354e-06, "loss": 0.5, "step": 5218 }, { "epoch": 0.48977102102102105, "grad_norm": 1.127387453369761, "learning_rate": 9.878745108918251e-06, "loss": 0.4655, "step": 5219 }, { "epoch": 0.48986486486486486, "grad_norm": 1.2754861089345157, "learning_rate": 9.878625572257296e-06, "loss": 0.5121, "step": 5220 }, { "epoch": 0.4899587087087087, "grad_norm": 1.1448647122767857, "learning_rate": 9.878505977427915e-06, "loss": 0.5412, "step": 5221 }, { "epoch": 0.49005255255255253, "grad_norm": 1.061175882661887, "learning_rate": 9.878386324431532e-06, "loss": 0.4825, "step": 5222 }, { "epoch": 0.4901463963963964, "grad_norm": 1.1368896168974056, "learning_rate": 9.878266613269576e-06, "loss": 0.478, "step": 5223 }, { "epoch": 0.49024024024024027, "grad_norm": 1.154699669702057, "learning_rate": 9.878146843943473e-06, "loss": 0.4994, "step": 5224 }, { "epoch": 0.4903340840840841, "grad_norm": 1.0507633159008836, "learning_rate": 9.87802701645465e-06, "loss": 0.5147, "step": 5225 }, { "epoch": 0.49042792792792794, "grad_norm": 1.0066838952824124, "learning_rate": 9.877907130804538e-06, "loss": 0.4205, "step": 5226 }, { "epoch": 0.49052177177177175, "grad_norm": 1.1132778596599406, "learning_rate": 9.877787186994564e-06, "loss": 0.4578, "step": 5227 }, { "epoch": 0.4906156156156156, "grad_norm": 1.2968300026367023, "learning_rate": 9.87766718502616e-06, "loss": 0.4629, "step": 5228 }, { "epoch": 0.4907094594594595, "grad_norm": 1.2843173732015156, "learning_rate": 9.877547124900756e-06, "loss": 0.5049, "step": 5229 }, { "epoch": 0.4908033033033033, "grad_norm": 1.228531575858328, "learning_rate": 9.877427006619783e-06, "loss": 0.4622, "step": 5230 }, { "epoch": 0.49089714714714716, "grad_norm": 1.848095573898479, "learning_rate": 9.877306830184673e-06, "loss": 0.5618, "step": 5231 }, { "epoch": 0.49099099099099097, "grad_norm": 1.1765601456942612, "learning_rate": 9.87718659559686e-06, "loss": 0.5413, "step": 5232 }, { "epoch": 0.49108483483483484, "grad_norm": 1.3958012994183646, "learning_rate": 9.87706630285778e-06, "loss": 0.5028, "step": 5233 }, { "epoch": 0.4911786786786787, "grad_norm": 1.0495263534669173, "learning_rate": 9.876945951968862e-06, "loss": 0.5468, "step": 5234 }, { "epoch": 0.4912725225225225, "grad_norm": 1.064388259532852, "learning_rate": 9.876825542931545e-06, "loss": 0.4998, "step": 5235 }, { "epoch": 0.4913663663663664, "grad_norm": 1.2249990422414985, "learning_rate": 9.876705075747261e-06, "loss": 0.4785, "step": 5236 }, { "epoch": 0.4914602102102102, "grad_norm": 1.1428846916335675, "learning_rate": 9.87658455041745e-06, "loss": 0.471, "step": 5237 }, { "epoch": 0.49155405405405406, "grad_norm": 1.1558006834397152, "learning_rate": 9.876463966943547e-06, "loss": 0.5098, "step": 5238 }, { "epoch": 0.4916478978978979, "grad_norm": 6.967988932602237, "learning_rate": 9.87634332532699e-06, "loss": 0.4972, "step": 5239 }, { "epoch": 0.49174174174174173, "grad_norm": 4.595140452155057, "learning_rate": 9.876222625569218e-06, "loss": 0.4734, "step": 5240 }, { "epoch": 0.4918355855855856, "grad_norm": 1.0367313091636892, "learning_rate": 9.876101867671669e-06, "loss": 0.4732, "step": 5241 }, { "epoch": 0.4919294294294294, "grad_norm": 1.4187434996526918, "learning_rate": 9.875981051635784e-06, "loss": 0.4841, "step": 5242 }, { "epoch": 0.4920232732732733, "grad_norm": 1.080639173864921, "learning_rate": 9.875860177463003e-06, "loss": 0.4721, "step": 5243 }, { "epoch": 0.49211711711711714, "grad_norm": 1.301845896169822, "learning_rate": 9.875739245154767e-06, "loss": 0.464, "step": 5244 }, { "epoch": 0.49221096096096095, "grad_norm": 1.038311725314022, "learning_rate": 9.87561825471252e-06, "loss": 0.4704, "step": 5245 }, { "epoch": 0.4923048048048048, "grad_norm": 1.1309700088706196, "learning_rate": 9.875497206137701e-06, "loss": 0.4867, "step": 5246 }, { "epoch": 0.49239864864864863, "grad_norm": 1.4007569228075394, "learning_rate": 9.875376099431756e-06, "loss": 0.4771, "step": 5247 }, { "epoch": 0.4924924924924925, "grad_norm": 1.37163298474555, "learning_rate": 9.875254934596126e-06, "loss": 0.4815, "step": 5248 }, { "epoch": 0.49258633633633636, "grad_norm": 1.065353529074741, "learning_rate": 9.875133711632259e-06, "loss": 0.4734, "step": 5249 }, { "epoch": 0.49268018018018017, "grad_norm": 1.2584128839349968, "learning_rate": 9.875012430541598e-06, "loss": 0.5116, "step": 5250 }, { "epoch": 0.49277402402402404, "grad_norm": 1.2808596576570448, "learning_rate": 9.87489109132559e-06, "loss": 0.4749, "step": 5251 }, { "epoch": 0.49286786786786785, "grad_norm": 1.1634874752825484, "learning_rate": 9.874769693985684e-06, "loss": 0.5371, "step": 5252 }, { "epoch": 0.4929617117117117, "grad_norm": 1.083759907620484, "learning_rate": 9.874648238523323e-06, "loss": 0.4799, "step": 5253 }, { "epoch": 0.4930555555555556, "grad_norm": 1.2737210835403556, "learning_rate": 9.874526724939958e-06, "loss": 0.5564, "step": 5254 }, { "epoch": 0.4931493993993994, "grad_norm": 1.0203645561528172, "learning_rate": 9.874405153237037e-06, "loss": 0.4186, "step": 5255 }, { "epoch": 0.49324324324324326, "grad_norm": 1.1891642383661196, "learning_rate": 9.87428352341601e-06, "loss": 0.441, "step": 5256 }, { "epoch": 0.49333708708708707, "grad_norm": 1.234639996118758, "learning_rate": 9.874161835478326e-06, "loss": 0.5244, "step": 5257 }, { "epoch": 0.49343093093093093, "grad_norm": 2.778384006717041, "learning_rate": 9.874040089425439e-06, "loss": 0.4705, "step": 5258 }, { "epoch": 0.4935247747747748, "grad_norm": 1.0489551703836348, "learning_rate": 9.873918285258797e-06, "loss": 0.4698, "step": 5259 }, { "epoch": 0.4936186186186186, "grad_norm": 1.172776645026794, "learning_rate": 9.873796422979854e-06, "loss": 0.4544, "step": 5260 }, { "epoch": 0.4937124624624625, "grad_norm": 1.3180468708646627, "learning_rate": 9.873674502590061e-06, "loss": 0.5204, "step": 5261 }, { "epoch": 0.4938063063063063, "grad_norm": 1.2098281617214184, "learning_rate": 9.873552524090876e-06, "loss": 0.4848, "step": 5262 }, { "epoch": 0.49390015015015015, "grad_norm": 2.1559155249097675, "learning_rate": 9.873430487483748e-06, "loss": 0.5133, "step": 5263 }, { "epoch": 0.493993993993994, "grad_norm": 1.2487315713014338, "learning_rate": 9.873308392770136e-06, "loss": 0.5412, "step": 5264 }, { "epoch": 0.49408783783783783, "grad_norm": 1.76860962460344, "learning_rate": 9.873186239951494e-06, "loss": 0.4521, "step": 5265 }, { "epoch": 0.4941816816816817, "grad_norm": 1.3776428891984513, "learning_rate": 9.87306402902928e-06, "loss": 0.4988, "step": 5266 }, { "epoch": 0.4942755255255255, "grad_norm": 1.158928987585525, "learning_rate": 9.872941760004949e-06, "loss": 0.5029, "step": 5267 }, { "epoch": 0.49436936936936937, "grad_norm": 1.184680083862924, "learning_rate": 9.87281943287996e-06, "loss": 0.4849, "step": 5268 }, { "epoch": 0.49446321321321324, "grad_norm": 1.3002059394158505, "learning_rate": 9.87269704765577e-06, "loss": 0.4393, "step": 5269 }, { "epoch": 0.49455705705705705, "grad_norm": 1.54734966412793, "learning_rate": 9.872574604333842e-06, "loss": 0.4702, "step": 5270 }, { "epoch": 0.4946509009009009, "grad_norm": 1.0766125857502251, "learning_rate": 9.872452102915632e-06, "loss": 0.4801, "step": 5271 }, { "epoch": 0.4947447447447447, "grad_norm": 1.072930376565849, "learning_rate": 9.872329543402605e-06, "loss": 0.502, "step": 5272 }, { "epoch": 0.4948385885885886, "grad_norm": 0.9664913075965395, "learning_rate": 9.872206925796217e-06, "loss": 0.4464, "step": 5273 }, { "epoch": 0.49493243243243246, "grad_norm": 1.0897397813550553, "learning_rate": 9.872084250097932e-06, "loss": 0.482, "step": 5274 }, { "epoch": 0.49502627627627627, "grad_norm": 1.1477964222149033, "learning_rate": 9.871961516309214e-06, "loss": 0.5318, "step": 5275 }, { "epoch": 0.49512012012012013, "grad_norm": 1.0007909538028117, "learning_rate": 9.871838724431526e-06, "loss": 0.4558, "step": 5276 }, { "epoch": 0.49521396396396394, "grad_norm": 1.042125360989973, "learning_rate": 9.87171587446633e-06, "loss": 0.4988, "step": 5277 }, { "epoch": 0.4953078078078078, "grad_norm": 1.0863594820198192, "learning_rate": 9.871592966415094e-06, "loss": 0.4811, "step": 5278 }, { "epoch": 0.4954016516516517, "grad_norm": 1.2146567073545387, "learning_rate": 9.87147000027928e-06, "loss": 0.4977, "step": 5279 }, { "epoch": 0.4954954954954955, "grad_norm": 1.0042831250922573, "learning_rate": 9.871346976060357e-06, "loss": 0.4808, "step": 5280 }, { "epoch": 0.49558933933933935, "grad_norm": 1.0790205499003858, "learning_rate": 9.87122389375979e-06, "loss": 0.4787, "step": 5281 }, { "epoch": 0.49568318318318316, "grad_norm": 1.142509816074787, "learning_rate": 9.871100753379046e-06, "loss": 0.4879, "step": 5282 }, { "epoch": 0.49577702702702703, "grad_norm": 1.1768282782379837, "learning_rate": 9.870977554919596e-06, "loss": 0.4658, "step": 5283 }, { "epoch": 0.4958708708708709, "grad_norm": 1.1933020432032426, "learning_rate": 9.870854298382909e-06, "loss": 0.4866, "step": 5284 }, { "epoch": 0.4959647147147147, "grad_norm": 1.107266981583643, "learning_rate": 9.87073098377045e-06, "loss": 0.4536, "step": 5285 }, { "epoch": 0.49605855855855857, "grad_norm": 1.481474434764869, "learning_rate": 9.870607611083694e-06, "loss": 0.4923, "step": 5286 }, { "epoch": 0.4961524024024024, "grad_norm": 1.1137283130739424, "learning_rate": 9.87048418032411e-06, "loss": 0.4849, "step": 5287 }, { "epoch": 0.49624624624624625, "grad_norm": 1.1911725319285311, "learning_rate": 9.870360691493168e-06, "loss": 0.512, "step": 5288 }, { "epoch": 0.4963400900900901, "grad_norm": 1.2087009858877595, "learning_rate": 9.870237144592345e-06, "loss": 0.5167, "step": 5289 }, { "epoch": 0.4964339339339339, "grad_norm": 1.104137591289424, "learning_rate": 9.87011353962311e-06, "loss": 0.4668, "step": 5290 }, { "epoch": 0.4965277777777778, "grad_norm": 1.5434449609230352, "learning_rate": 9.869989876586937e-06, "loss": 0.4911, "step": 5291 }, { "epoch": 0.4966216216216216, "grad_norm": 1.1774238706919653, "learning_rate": 9.869866155485304e-06, "loss": 0.5688, "step": 5292 }, { "epoch": 0.49671546546546547, "grad_norm": 1.1152989561288715, "learning_rate": 9.869742376319682e-06, "loss": 0.4261, "step": 5293 }, { "epoch": 0.49680930930930933, "grad_norm": 1.357701510753643, "learning_rate": 9.869618539091548e-06, "loss": 0.5361, "step": 5294 }, { "epoch": 0.49690315315315314, "grad_norm": 1.2749192794107487, "learning_rate": 9.86949464380238e-06, "loss": 0.5048, "step": 5295 }, { "epoch": 0.496996996996997, "grad_norm": 1.236428552088625, "learning_rate": 9.869370690453654e-06, "loss": 0.4596, "step": 5296 }, { "epoch": 0.4970908408408408, "grad_norm": 8.168001832277637, "learning_rate": 9.869246679046848e-06, "loss": 0.5288, "step": 5297 }, { "epoch": 0.4971846846846847, "grad_norm": 1.2333719558305907, "learning_rate": 9.86912260958344e-06, "loss": 0.5301, "step": 5298 }, { "epoch": 0.49727852852852855, "grad_norm": 1.1236579552404695, "learning_rate": 9.868998482064913e-06, "loss": 0.4685, "step": 5299 }, { "epoch": 0.49737237237237236, "grad_norm": 1.5325710205772378, "learning_rate": 9.868874296492742e-06, "loss": 0.5194, "step": 5300 }, { "epoch": 0.49746621621621623, "grad_norm": 1.240492836662417, "learning_rate": 9.86875005286841e-06, "loss": 0.4893, "step": 5301 }, { "epoch": 0.49756006006006004, "grad_norm": 1.1493359539222208, "learning_rate": 9.868625751193398e-06, "loss": 0.5353, "step": 5302 }, { "epoch": 0.4976539039039039, "grad_norm": 1.0349951261831474, "learning_rate": 9.868501391469187e-06, "loss": 0.4406, "step": 5303 }, { "epoch": 0.49774774774774777, "grad_norm": 1.1118002019894295, "learning_rate": 9.868376973697263e-06, "loss": 0.4751, "step": 5304 }, { "epoch": 0.4978415915915916, "grad_norm": 1.3816845670615774, "learning_rate": 9.868252497879108e-06, "loss": 0.4965, "step": 5305 }, { "epoch": 0.49793543543543545, "grad_norm": 2.2924363453916192, "learning_rate": 9.868127964016204e-06, "loss": 0.508, "step": 5306 }, { "epoch": 0.49802927927927926, "grad_norm": 1.5484478513536981, "learning_rate": 9.868003372110035e-06, "loss": 0.4273, "step": 5307 }, { "epoch": 0.4981231231231231, "grad_norm": 0.9812552656153165, "learning_rate": 9.867878722162092e-06, "loss": 0.4891, "step": 5308 }, { "epoch": 0.498216966966967, "grad_norm": 3.8137586285943113, "learning_rate": 9.867754014173858e-06, "loss": 0.5043, "step": 5309 }, { "epoch": 0.4983108108108108, "grad_norm": 1.7693396125114436, "learning_rate": 9.86762924814682e-06, "loss": 0.4698, "step": 5310 }, { "epoch": 0.49840465465465467, "grad_norm": 1.0775237741977401, "learning_rate": 9.867504424082464e-06, "loss": 0.4012, "step": 5311 }, { "epoch": 0.4984984984984985, "grad_norm": 1.1582298853278394, "learning_rate": 9.86737954198228e-06, "loss": 0.4609, "step": 5312 }, { "epoch": 0.49859234234234234, "grad_norm": 1.0670133728207767, "learning_rate": 9.867254601847758e-06, "loss": 0.4369, "step": 5313 }, { "epoch": 0.4986861861861862, "grad_norm": 1.1123793429045006, "learning_rate": 9.867129603680385e-06, "loss": 0.5082, "step": 5314 }, { "epoch": 0.49878003003003, "grad_norm": 1.1943926744244813, "learning_rate": 9.867004547481655e-06, "loss": 0.5184, "step": 5315 }, { "epoch": 0.4988738738738739, "grad_norm": 1.43702682472674, "learning_rate": 9.866879433253056e-06, "loss": 0.4479, "step": 5316 }, { "epoch": 0.4989677177177177, "grad_norm": 1.1628002169855782, "learning_rate": 9.866754260996082e-06, "loss": 0.4153, "step": 5317 }, { "epoch": 0.49906156156156156, "grad_norm": 1.8499660917619123, "learning_rate": 9.866629030712221e-06, "loss": 0.4897, "step": 5318 }, { "epoch": 0.49915540540540543, "grad_norm": 1.2340324532859077, "learning_rate": 9.866503742402971e-06, "loss": 0.5478, "step": 5319 }, { "epoch": 0.49924924924924924, "grad_norm": 1.0210518590888418, "learning_rate": 9.866378396069825e-06, "loss": 0.4759, "step": 5320 }, { "epoch": 0.4993430930930931, "grad_norm": 1.0528782329689523, "learning_rate": 9.866252991714275e-06, "loss": 0.4723, "step": 5321 }, { "epoch": 0.4994369369369369, "grad_norm": 1.0935606126086268, "learning_rate": 9.866127529337818e-06, "loss": 0.4816, "step": 5322 }, { "epoch": 0.4995307807807808, "grad_norm": 1.462330719764553, "learning_rate": 9.866002008941951e-06, "loss": 0.4986, "step": 5323 }, { "epoch": 0.49962462462462465, "grad_norm": 1.7159396261710365, "learning_rate": 9.86587643052817e-06, "loss": 0.5209, "step": 5324 }, { "epoch": 0.49971846846846846, "grad_norm": 1.2330561305209713, "learning_rate": 9.86575079409797e-06, "loss": 0.4777, "step": 5325 }, { "epoch": 0.4998123123123123, "grad_norm": 2.4593495036188435, "learning_rate": 9.865625099652851e-06, "loss": 0.4647, "step": 5326 }, { "epoch": 0.49990615615615613, "grad_norm": 1.0807201879913813, "learning_rate": 9.865499347194312e-06, "loss": 0.5005, "step": 5327 }, { "epoch": 0.5, "grad_norm": 1.8077600872615003, "learning_rate": 9.86537353672385e-06, "loss": 0.4861, "step": 5328 }, { "epoch": 0.5000938438438438, "grad_norm": 1.2946798806521673, "learning_rate": 9.865247668242968e-06, "loss": 0.5723, "step": 5329 }, { "epoch": 0.5001876876876877, "grad_norm": 1.292403998538272, "learning_rate": 9.865121741753165e-06, "loss": 0.4387, "step": 5330 }, { "epoch": 0.5002815315315315, "grad_norm": 1.0800401069212957, "learning_rate": 9.864995757255945e-06, "loss": 0.5366, "step": 5331 }, { "epoch": 0.5003753753753754, "grad_norm": 1.5568983949641013, "learning_rate": 9.864869714752804e-06, "loss": 0.5583, "step": 5332 }, { "epoch": 0.5004692192192193, "grad_norm": 2.55202798743541, "learning_rate": 9.86474361424525e-06, "loss": 0.4842, "step": 5333 }, { "epoch": 0.5005630630630631, "grad_norm": 1.146858740195505, "learning_rate": 9.864617455734788e-06, "loss": 0.4829, "step": 5334 }, { "epoch": 0.5006569069069069, "grad_norm": 1.149028918079224, "learning_rate": 9.864491239222918e-06, "loss": 0.4973, "step": 5335 }, { "epoch": 0.5007507507507507, "grad_norm": 1.0885344986470877, "learning_rate": 9.864364964711146e-06, "loss": 0.475, "step": 5336 }, { "epoch": 0.5008445945945946, "grad_norm": 1.0956664858995822, "learning_rate": 9.864238632200977e-06, "loss": 0.4773, "step": 5337 }, { "epoch": 0.5009384384384384, "grad_norm": 1.134702097695148, "learning_rate": 9.86411224169392e-06, "loss": 0.4716, "step": 5338 }, { "epoch": 0.5010322822822822, "grad_norm": 1.2384419886766698, "learning_rate": 9.863985793191479e-06, "loss": 0.5208, "step": 5339 }, { "epoch": 0.5011261261261262, "grad_norm": 0.9125675078563342, "learning_rate": 9.863859286695165e-06, "loss": 0.4426, "step": 5340 }, { "epoch": 0.50121996996997, "grad_norm": 0.8546249169366271, "learning_rate": 9.863732722206482e-06, "loss": 0.3995, "step": 5341 }, { "epoch": 0.5013138138138138, "grad_norm": 1.1260034586914254, "learning_rate": 9.863606099726943e-06, "loss": 0.5053, "step": 5342 }, { "epoch": 0.5014076576576577, "grad_norm": 1.086301143523105, "learning_rate": 9.863479419258055e-06, "loss": 0.5056, "step": 5343 }, { "epoch": 0.5015015015015015, "grad_norm": 1.1022932406044244, "learning_rate": 9.86335268080133e-06, "loss": 0.5054, "step": 5344 }, { "epoch": 0.5015953453453453, "grad_norm": 1.4395953388265488, "learning_rate": 9.863225884358278e-06, "loss": 0.5252, "step": 5345 }, { "epoch": 0.5016891891891891, "grad_norm": 1.0284725690143888, "learning_rate": 9.863099029930413e-06, "loss": 0.4595, "step": 5346 }, { "epoch": 0.5017830330330331, "grad_norm": 2.1199322363965063, "learning_rate": 9.862972117519245e-06, "loss": 0.4869, "step": 5347 }, { "epoch": 0.5018768768768769, "grad_norm": 1.0881766786222085, "learning_rate": 9.862845147126287e-06, "loss": 0.4417, "step": 5348 }, { "epoch": 0.5019707207207207, "grad_norm": 1.316989042795032, "learning_rate": 9.862718118753056e-06, "loss": 0.5201, "step": 5349 }, { "epoch": 0.5020645645645646, "grad_norm": 1.0890816383834485, "learning_rate": 9.862591032401063e-06, "loss": 0.4768, "step": 5350 }, { "epoch": 0.5021584084084084, "grad_norm": 1.4327943997354322, "learning_rate": 9.862463888071825e-06, "loss": 0.4957, "step": 5351 }, { "epoch": 0.5022522522522522, "grad_norm": 1.637765716361284, "learning_rate": 9.862336685766858e-06, "loss": 0.5195, "step": 5352 }, { "epoch": 0.5023460960960962, "grad_norm": 1.402625621287748, "learning_rate": 9.862209425487678e-06, "loss": 0.4724, "step": 5353 }, { "epoch": 0.50243993993994, "grad_norm": 1.09986843286435, "learning_rate": 9.862082107235803e-06, "loss": 0.4295, "step": 5354 }, { "epoch": 0.5025337837837838, "grad_norm": 1.168104207451624, "learning_rate": 9.861954731012751e-06, "loss": 0.4785, "step": 5355 }, { "epoch": 0.5026276276276276, "grad_norm": 1.532209864981162, "learning_rate": 9.861827296820041e-06, "loss": 0.5053, "step": 5356 }, { "epoch": 0.5027214714714715, "grad_norm": 1.088810606348257, "learning_rate": 9.861699804659192e-06, "loss": 0.428, "step": 5357 }, { "epoch": 0.5028153153153153, "grad_norm": 1.1494179132018805, "learning_rate": 9.861572254531724e-06, "loss": 0.492, "step": 5358 }, { "epoch": 0.5029091591591591, "grad_norm": 2.1360358389378833, "learning_rate": 9.861444646439157e-06, "loss": 0.4821, "step": 5359 }, { "epoch": 0.503003003003003, "grad_norm": 1.259935099321998, "learning_rate": 9.861316980383013e-06, "loss": 0.47, "step": 5360 }, { "epoch": 0.5030968468468469, "grad_norm": 1.315769416200806, "learning_rate": 9.861189256364816e-06, "loss": 0.5579, "step": 5361 }, { "epoch": 0.5031906906906907, "grad_norm": 1.2331760245824688, "learning_rate": 9.861061474386087e-06, "loss": 0.4641, "step": 5362 }, { "epoch": 0.5032845345345346, "grad_norm": 1.548918816027342, "learning_rate": 9.860933634448348e-06, "loss": 0.4592, "step": 5363 }, { "epoch": 0.5033783783783784, "grad_norm": 1.2028012425155028, "learning_rate": 9.860805736553128e-06, "loss": 0.4928, "step": 5364 }, { "epoch": 0.5034722222222222, "grad_norm": 1.0935938832389742, "learning_rate": 9.860677780701947e-06, "loss": 0.4806, "step": 5365 }, { "epoch": 0.503566066066066, "grad_norm": 0.9919494584190583, "learning_rate": 9.860549766896335e-06, "loss": 0.4893, "step": 5366 }, { "epoch": 0.5036599099099099, "grad_norm": 1.0426455392185798, "learning_rate": 9.860421695137814e-06, "loss": 0.4722, "step": 5367 }, { "epoch": 0.5037537537537538, "grad_norm": 1.1237843201070203, "learning_rate": 9.860293565427913e-06, "loss": 0.458, "step": 5368 }, { "epoch": 0.5038475975975976, "grad_norm": 1.18571726583308, "learning_rate": 9.86016537776816e-06, "loss": 0.5037, "step": 5369 }, { "epoch": 0.5039414414414415, "grad_norm": 1.3137290898143068, "learning_rate": 9.860037132160084e-06, "loss": 0.4529, "step": 5370 }, { "epoch": 0.5040352852852853, "grad_norm": 0.9305073131032161, "learning_rate": 9.859908828605214e-06, "loss": 0.4954, "step": 5371 }, { "epoch": 0.5041291291291291, "grad_norm": 1.3409754561932121, "learning_rate": 9.859780467105077e-06, "loss": 0.4919, "step": 5372 }, { "epoch": 0.504222972972973, "grad_norm": 1.2140634539962027, "learning_rate": 9.859652047661207e-06, "loss": 0.4577, "step": 5373 }, { "epoch": 0.5043168168168168, "grad_norm": 1.0902624888294847, "learning_rate": 9.859523570275134e-06, "loss": 0.4989, "step": 5374 }, { "epoch": 0.5044106606606606, "grad_norm": 5.598306313548107, "learning_rate": 9.859395034948387e-06, "loss": 0.4776, "step": 5375 }, { "epoch": 0.5045045045045045, "grad_norm": 1.0746938131312467, "learning_rate": 9.859266441682504e-06, "loss": 0.5055, "step": 5376 }, { "epoch": 0.5045983483483484, "grad_norm": 1.0328291074891613, "learning_rate": 9.859137790479013e-06, "loss": 0.4541, "step": 5377 }, { "epoch": 0.5046921921921922, "grad_norm": 1.0330079167291757, "learning_rate": 9.859009081339452e-06, "loss": 0.5301, "step": 5378 }, { "epoch": 0.504786036036036, "grad_norm": 0.9513462105161065, "learning_rate": 9.858880314265353e-06, "loss": 0.422, "step": 5379 }, { "epoch": 0.5048798798798799, "grad_norm": 1.1208511893805158, "learning_rate": 9.858751489258252e-06, "loss": 0.567, "step": 5380 }, { "epoch": 0.5049737237237237, "grad_norm": 1.1112857857892602, "learning_rate": 9.858622606319686e-06, "loss": 0.5088, "step": 5381 }, { "epoch": 0.5050675675675675, "grad_norm": 1.1167558075207402, "learning_rate": 9.858493665451189e-06, "loss": 0.484, "step": 5382 }, { "epoch": 0.5051614114114115, "grad_norm": 1.0817664499189095, "learning_rate": 9.858364666654303e-06, "loss": 0.5224, "step": 5383 }, { "epoch": 0.5052552552552553, "grad_norm": 1.3879070281013455, "learning_rate": 9.858235609930562e-06, "loss": 0.4489, "step": 5384 }, { "epoch": 0.5053490990990991, "grad_norm": 1.1552023338432547, "learning_rate": 9.858106495281506e-06, "loss": 0.4426, "step": 5385 }, { "epoch": 0.5054429429429429, "grad_norm": 1.088716406506292, "learning_rate": 9.857977322708674e-06, "loss": 0.4631, "step": 5386 }, { "epoch": 0.5055367867867868, "grad_norm": 1.0611042810244782, "learning_rate": 9.857848092213606e-06, "loss": 0.5531, "step": 5387 }, { "epoch": 0.5056306306306306, "grad_norm": 1.1554508702049051, "learning_rate": 9.857718803797846e-06, "loss": 0.479, "step": 5388 }, { "epoch": 0.5057244744744744, "grad_norm": 1.669776570986659, "learning_rate": 9.85758945746293e-06, "loss": 0.4857, "step": 5389 }, { "epoch": 0.5058183183183184, "grad_norm": 1.0194748252914216, "learning_rate": 9.857460053210405e-06, "loss": 0.4677, "step": 5390 }, { "epoch": 0.5059121621621622, "grad_norm": 1.4037734698995576, "learning_rate": 9.857330591041813e-06, "loss": 0.5168, "step": 5391 }, { "epoch": 0.506006006006006, "grad_norm": 1.055193228600111, "learning_rate": 9.857201070958695e-06, "loss": 0.501, "step": 5392 }, { "epoch": 0.5060998498498499, "grad_norm": 1.0190633457223088, "learning_rate": 9.857071492962597e-06, "loss": 0.4919, "step": 5393 }, { "epoch": 0.5061936936936937, "grad_norm": 1.1135486617347703, "learning_rate": 9.856941857055064e-06, "loss": 0.4538, "step": 5394 }, { "epoch": 0.5062875375375375, "grad_norm": 0.9623887592755239, "learning_rate": 9.856812163237643e-06, "loss": 0.4556, "step": 5395 }, { "epoch": 0.5063813813813813, "grad_norm": 1.2139114801998743, "learning_rate": 9.856682411511878e-06, "loss": 0.4372, "step": 5396 }, { "epoch": 0.5064752252252253, "grad_norm": 1.1849688100544236, "learning_rate": 9.856552601879317e-06, "loss": 0.4967, "step": 5397 }, { "epoch": 0.5065690690690691, "grad_norm": 1.107373611306403, "learning_rate": 9.856422734341508e-06, "loss": 0.5007, "step": 5398 }, { "epoch": 0.5066629129129129, "grad_norm": 1.125512049541433, "learning_rate": 9.856292808899999e-06, "loss": 0.4807, "step": 5399 }, { "epoch": 0.5067567567567568, "grad_norm": 1.3016256472869094, "learning_rate": 9.856162825556339e-06, "loss": 0.4765, "step": 5400 }, { "epoch": 0.5068506006006006, "grad_norm": 1.1908478817581383, "learning_rate": 9.856032784312078e-06, "loss": 0.5103, "step": 5401 }, { "epoch": 0.5069444444444444, "grad_norm": 1.328848382762961, "learning_rate": 9.855902685168765e-06, "loss": 0.513, "step": 5402 }, { "epoch": 0.5070382882882883, "grad_norm": 2.103460399326669, "learning_rate": 9.855772528127956e-06, "loss": 0.5352, "step": 5403 }, { "epoch": 0.5071321321321322, "grad_norm": 1.067245437795418, "learning_rate": 9.855642313191199e-06, "loss": 0.5207, "step": 5404 }, { "epoch": 0.507225975975976, "grad_norm": 1.1804412019530046, "learning_rate": 9.855512040360046e-06, "loss": 0.4702, "step": 5405 }, { "epoch": 0.5073198198198198, "grad_norm": 1.455581842322351, "learning_rate": 9.855381709636052e-06, "loss": 0.5049, "step": 5406 }, { "epoch": 0.5074136636636637, "grad_norm": 1.2458254798553818, "learning_rate": 9.85525132102077e-06, "loss": 0.5133, "step": 5407 }, { "epoch": 0.5075075075075075, "grad_norm": 1.3183735605523539, "learning_rate": 9.855120874515756e-06, "loss": 0.4415, "step": 5408 }, { "epoch": 0.5076013513513513, "grad_norm": 1.1137838768006048, "learning_rate": 9.854990370122563e-06, "loss": 0.4881, "step": 5409 }, { "epoch": 0.5076951951951952, "grad_norm": 1.6884790505830403, "learning_rate": 9.85485980784275e-06, "loss": 0.5237, "step": 5410 }, { "epoch": 0.507789039039039, "grad_norm": 1.1438980539954267, "learning_rate": 9.854729187677871e-06, "loss": 0.4911, "step": 5411 }, { "epoch": 0.5078828828828829, "grad_norm": 1.698493524164949, "learning_rate": 9.854598509629485e-06, "loss": 0.5434, "step": 5412 }, { "epoch": 0.5079767267267268, "grad_norm": 1.1091404012947599, "learning_rate": 9.854467773699149e-06, "loss": 0.5142, "step": 5413 }, { "epoch": 0.5080705705705706, "grad_norm": 1.3788903824494194, "learning_rate": 9.854336979888423e-06, "loss": 0.5322, "step": 5414 }, { "epoch": 0.5081644144144144, "grad_norm": 1.226425483362278, "learning_rate": 9.854206128198866e-06, "loss": 0.5233, "step": 5415 }, { "epoch": 0.5082582582582582, "grad_norm": 1.4494267682308253, "learning_rate": 9.854075218632037e-06, "loss": 0.4812, "step": 5416 }, { "epoch": 0.5083521021021021, "grad_norm": 1.378388390780846, "learning_rate": 9.8539442511895e-06, "loss": 0.4643, "step": 5417 }, { "epoch": 0.5084459459459459, "grad_norm": 1.0180092222061587, "learning_rate": 9.853813225872812e-06, "loss": 0.4667, "step": 5418 }, { "epoch": 0.5085397897897898, "grad_norm": 1.0425179835610854, "learning_rate": 9.853682142683538e-06, "loss": 0.4953, "step": 5419 }, { "epoch": 0.5086336336336337, "grad_norm": 1.0216541709967157, "learning_rate": 9.853551001623243e-06, "loss": 0.4725, "step": 5420 }, { "epoch": 0.5087274774774775, "grad_norm": 1.4709615625102122, "learning_rate": 9.853419802693486e-06, "loss": 0.5798, "step": 5421 }, { "epoch": 0.5088213213213213, "grad_norm": 1.4448193702562857, "learning_rate": 9.853288545895834e-06, "loss": 0.4822, "step": 5422 }, { "epoch": 0.5089151651651652, "grad_norm": 1.1496446150732234, "learning_rate": 9.853157231231851e-06, "loss": 0.5266, "step": 5423 }, { "epoch": 0.509009009009009, "grad_norm": 1.1660890910370703, "learning_rate": 9.853025858703103e-06, "loss": 0.5173, "step": 5424 }, { "epoch": 0.5091028528528528, "grad_norm": 0.9573187056119464, "learning_rate": 9.852894428311157e-06, "loss": 0.5008, "step": 5425 }, { "epoch": 0.5091966966966966, "grad_norm": 1.110166523684939, "learning_rate": 9.852762940057581e-06, "loss": 0.4978, "step": 5426 }, { "epoch": 0.5092905405405406, "grad_norm": 1.0971185754532806, "learning_rate": 9.852631393943939e-06, "loss": 0.5243, "step": 5427 }, { "epoch": 0.5093843843843844, "grad_norm": 3.3009926225196216, "learning_rate": 9.852499789971804e-06, "loss": 0.5102, "step": 5428 }, { "epoch": 0.5094782282282282, "grad_norm": 1.17693362247112, "learning_rate": 9.85236812814274e-06, "loss": 0.4809, "step": 5429 }, { "epoch": 0.5095720720720721, "grad_norm": 1.1771861433906698, "learning_rate": 9.852236408458322e-06, "loss": 0.5032, "step": 5430 }, { "epoch": 0.5096659159159159, "grad_norm": 1.1251895308855888, "learning_rate": 9.852104630920118e-06, "loss": 0.5196, "step": 5431 }, { "epoch": 0.5097597597597597, "grad_norm": 1.1363679612083195, "learning_rate": 9.851972795529698e-06, "loss": 0.4839, "step": 5432 }, { "epoch": 0.5098536036036037, "grad_norm": 1.1862544360904792, "learning_rate": 9.851840902288636e-06, "loss": 0.4675, "step": 5433 }, { "epoch": 0.5099474474474475, "grad_norm": 1.5377185726484501, "learning_rate": 9.851708951198506e-06, "loss": 0.4595, "step": 5434 }, { "epoch": 0.5100412912912913, "grad_norm": 1.0129055386145416, "learning_rate": 9.851576942260877e-06, "loss": 0.5166, "step": 5435 }, { "epoch": 0.5101351351351351, "grad_norm": 4.163781916761974, "learning_rate": 9.851444875477326e-06, "loss": 0.4892, "step": 5436 }, { "epoch": 0.510228978978979, "grad_norm": 1.172746228364837, "learning_rate": 9.851312750849425e-06, "loss": 0.5132, "step": 5437 }, { "epoch": 0.5103228228228228, "grad_norm": 1.6463802539041505, "learning_rate": 9.851180568378754e-06, "loss": 0.5048, "step": 5438 }, { "epoch": 0.5104166666666666, "grad_norm": 1.3792541807388334, "learning_rate": 9.851048328066883e-06, "loss": 0.4868, "step": 5439 }, { "epoch": 0.5105105105105106, "grad_norm": 1.3419065003668345, "learning_rate": 9.850916029915394e-06, "loss": 0.4663, "step": 5440 }, { "epoch": 0.5106043543543544, "grad_norm": 1.2146262486140411, "learning_rate": 9.850783673925861e-06, "loss": 0.4751, "step": 5441 }, { "epoch": 0.5106981981981982, "grad_norm": 1.5597097847952877, "learning_rate": 9.850651260099864e-06, "loss": 0.439, "step": 5442 }, { "epoch": 0.5107920420420421, "grad_norm": 1.116241196709563, "learning_rate": 9.850518788438982e-06, "loss": 0.476, "step": 5443 }, { "epoch": 0.5108858858858859, "grad_norm": 0.9502926963306397, "learning_rate": 9.850386258944792e-06, "loss": 0.4653, "step": 5444 }, { "epoch": 0.5109797297297297, "grad_norm": 1.1414662093183325, "learning_rate": 9.850253671618878e-06, "loss": 0.5553, "step": 5445 }, { "epoch": 0.5110735735735735, "grad_norm": 1.1766654998927923, "learning_rate": 9.850121026462815e-06, "loss": 0.4493, "step": 5446 }, { "epoch": 0.5111674174174174, "grad_norm": 1.2298005286417706, "learning_rate": 9.84998832347819e-06, "loss": 0.4246, "step": 5447 }, { "epoch": 0.5112612612612613, "grad_norm": 1.1674157974224637, "learning_rate": 9.849855562666584e-06, "loss": 0.486, "step": 5448 }, { "epoch": 0.5113551051051051, "grad_norm": 1.2111561281745002, "learning_rate": 9.84972274402958e-06, "loss": 0.5559, "step": 5449 }, { "epoch": 0.511448948948949, "grad_norm": 1.0616598181752848, "learning_rate": 9.84958986756876e-06, "loss": 0.4596, "step": 5450 }, { "epoch": 0.5115427927927928, "grad_norm": 0.8863245359407728, "learning_rate": 9.849456933285708e-06, "loss": 0.3932, "step": 5451 }, { "epoch": 0.5116366366366366, "grad_norm": 1.036412370874783, "learning_rate": 9.849323941182012e-06, "loss": 0.4636, "step": 5452 }, { "epoch": 0.5117304804804805, "grad_norm": 0.9175840862868327, "learning_rate": 9.849190891259255e-06, "loss": 0.4397, "step": 5453 }, { "epoch": 0.5118243243243243, "grad_norm": 1.072491712712044, "learning_rate": 9.849057783519025e-06, "loss": 0.4986, "step": 5454 }, { "epoch": 0.5119181681681682, "grad_norm": 1.1386618420024388, "learning_rate": 9.848924617962908e-06, "loss": 0.5134, "step": 5455 }, { "epoch": 0.512012012012012, "grad_norm": 1.485811704582615, "learning_rate": 9.848791394592492e-06, "loss": 0.4714, "step": 5456 }, { "epoch": 0.5121058558558559, "grad_norm": 1.0860251250742265, "learning_rate": 9.848658113409366e-06, "loss": 0.4995, "step": 5457 }, { "epoch": 0.5121996996996997, "grad_norm": 1.187458047241493, "learning_rate": 9.848524774415118e-06, "loss": 0.5241, "step": 5458 }, { "epoch": 0.5122935435435435, "grad_norm": 1.0834856307031249, "learning_rate": 9.84839137761134e-06, "loss": 0.4955, "step": 5459 }, { "epoch": 0.5123873873873874, "grad_norm": 1.053159674116458, "learning_rate": 9.848257922999618e-06, "loss": 0.4031, "step": 5460 }, { "epoch": 0.5124812312312312, "grad_norm": 1.080993385174569, "learning_rate": 9.848124410581547e-06, "loss": 0.5046, "step": 5461 }, { "epoch": 0.512575075075075, "grad_norm": 0.9930788236604189, "learning_rate": 9.84799084035872e-06, "loss": 0.4778, "step": 5462 }, { "epoch": 0.512668918918919, "grad_norm": 1.05901505615378, "learning_rate": 9.847857212332727e-06, "loss": 0.4431, "step": 5463 }, { "epoch": 0.5127627627627628, "grad_norm": 1.1052950319664379, "learning_rate": 9.847723526505162e-06, "loss": 0.5197, "step": 5464 }, { "epoch": 0.5128566066066066, "grad_norm": 1.0997897502407699, "learning_rate": 9.847589782877618e-06, "loss": 0.488, "step": 5465 }, { "epoch": 0.5129504504504504, "grad_norm": 1.3507127418388982, "learning_rate": 9.847455981451691e-06, "loss": 0.4832, "step": 5466 }, { "epoch": 0.5130442942942943, "grad_norm": 1.1217010854168687, "learning_rate": 9.847322122228975e-06, "loss": 0.585, "step": 5467 }, { "epoch": 0.5131381381381381, "grad_norm": 1.2059878643126687, "learning_rate": 9.847188205211067e-06, "loss": 0.4595, "step": 5468 }, { "epoch": 0.5132319819819819, "grad_norm": 1.2245184385366905, "learning_rate": 9.847054230399564e-06, "loss": 0.539, "step": 5469 }, { "epoch": 0.5133258258258259, "grad_norm": 1.0392735693230712, "learning_rate": 9.846920197796064e-06, "loss": 0.4855, "step": 5470 }, { "epoch": 0.5134196696696697, "grad_norm": 1.9865443405494079, "learning_rate": 9.846786107402162e-06, "loss": 0.5108, "step": 5471 }, { "epoch": 0.5135135135135135, "grad_norm": 1.2318032982319447, "learning_rate": 9.84665195921946e-06, "loss": 0.452, "step": 5472 }, { "epoch": 0.5136073573573574, "grad_norm": 1.4168311332605386, "learning_rate": 9.846517753249555e-06, "loss": 0.5452, "step": 5473 }, { "epoch": 0.5137012012012012, "grad_norm": 2.0248504709797626, "learning_rate": 9.846383489494051e-06, "loss": 0.5063, "step": 5474 }, { "epoch": 0.513795045045045, "grad_norm": 1.0918959286237682, "learning_rate": 9.846249167954544e-06, "loss": 0.4806, "step": 5475 }, { "epoch": 0.5138888888888888, "grad_norm": 1.4313506219894179, "learning_rate": 9.84611478863264e-06, "loss": 0.5194, "step": 5476 }, { "epoch": 0.5139827327327328, "grad_norm": 1.1088781048268932, "learning_rate": 9.845980351529936e-06, "loss": 0.503, "step": 5477 }, { "epoch": 0.5140765765765766, "grad_norm": 1.1016032358865655, "learning_rate": 9.84584585664804e-06, "loss": 0.4939, "step": 5478 }, { "epoch": 0.5141704204204204, "grad_norm": 9.199433400146178, "learning_rate": 9.845711303988552e-06, "loss": 0.4972, "step": 5479 }, { "epoch": 0.5142642642642643, "grad_norm": 1.1167561256243417, "learning_rate": 9.84557669355308e-06, "loss": 0.4693, "step": 5480 }, { "epoch": 0.5143581081081081, "grad_norm": 1.3401607124121921, "learning_rate": 9.845442025343226e-06, "loss": 0.4907, "step": 5481 }, { "epoch": 0.5144519519519519, "grad_norm": 0.9119983772654102, "learning_rate": 9.845307299360596e-06, "loss": 0.4693, "step": 5482 }, { "epoch": 0.5145457957957958, "grad_norm": 1.307802657875819, "learning_rate": 9.845172515606795e-06, "loss": 0.5347, "step": 5483 }, { "epoch": 0.5146396396396397, "grad_norm": 3.519181189434024, "learning_rate": 9.845037674083435e-06, "loss": 0.5231, "step": 5484 }, { "epoch": 0.5147334834834835, "grad_norm": 1.398458283401241, "learning_rate": 9.84490277479212e-06, "loss": 0.5272, "step": 5485 }, { "epoch": 0.5148273273273273, "grad_norm": 0.9012027386346827, "learning_rate": 9.844767817734458e-06, "loss": 0.4559, "step": 5486 }, { "epoch": 0.5149211711711712, "grad_norm": 1.314318979190488, "learning_rate": 9.84463280291206e-06, "loss": 0.4964, "step": 5487 }, { "epoch": 0.515015015015015, "grad_norm": 1.1288696162944214, "learning_rate": 9.844497730326534e-06, "loss": 0.517, "step": 5488 }, { "epoch": 0.5151088588588588, "grad_norm": 1.166485314655846, "learning_rate": 9.84436259997949e-06, "loss": 0.5154, "step": 5489 }, { "epoch": 0.5152027027027027, "grad_norm": 1.06338350042133, "learning_rate": 9.844227411872544e-06, "loss": 0.4599, "step": 5490 }, { "epoch": 0.5152965465465466, "grad_norm": 1.0463872923885604, "learning_rate": 9.844092166007301e-06, "loss": 0.4415, "step": 5491 }, { "epoch": 0.5153903903903904, "grad_norm": 1.1664471624597017, "learning_rate": 9.84395686238538e-06, "loss": 0.5125, "step": 5492 }, { "epoch": 0.5154842342342343, "grad_norm": 1.2006639430620567, "learning_rate": 9.84382150100839e-06, "loss": 0.4607, "step": 5493 }, { "epoch": 0.5155780780780781, "grad_norm": 1.5649341110655992, "learning_rate": 9.843686081877946e-06, "loss": 0.4962, "step": 5494 }, { "epoch": 0.5156719219219219, "grad_norm": 1.2414318090407452, "learning_rate": 9.843550604995663e-06, "loss": 0.5167, "step": 5495 }, { "epoch": 0.5157657657657657, "grad_norm": 1.436064369193388, "learning_rate": 9.843415070363156e-06, "loss": 0.4891, "step": 5496 }, { "epoch": 0.5158596096096096, "grad_norm": 1.1467163035483687, "learning_rate": 9.843279477982041e-06, "loss": 0.4875, "step": 5497 }, { "epoch": 0.5159534534534534, "grad_norm": 1.545695741089436, "learning_rate": 9.843143827853934e-06, "loss": 0.4191, "step": 5498 }, { "epoch": 0.5160472972972973, "grad_norm": 1.2883802270739524, "learning_rate": 9.843008119980456e-06, "loss": 0.4805, "step": 5499 }, { "epoch": 0.5161411411411412, "grad_norm": 1.258138409877896, "learning_rate": 9.842872354363218e-06, "loss": 0.48, "step": 5500 }, { "epoch": 0.516234984984985, "grad_norm": 1.3010649110063635, "learning_rate": 9.842736531003847e-06, "loss": 0.4729, "step": 5501 }, { "epoch": 0.5163288288288288, "grad_norm": 1.0139802044505657, "learning_rate": 9.842600649903955e-06, "loss": 0.4608, "step": 5502 }, { "epoch": 0.5164226726726727, "grad_norm": 1.0698165656665488, "learning_rate": 9.842464711065168e-06, "loss": 0.5043, "step": 5503 }, { "epoch": 0.5165165165165165, "grad_norm": 1.0230586259128287, "learning_rate": 9.842328714489102e-06, "loss": 0.4614, "step": 5504 }, { "epoch": 0.5166103603603603, "grad_norm": 1.2838990259839325, "learning_rate": 9.842192660177383e-06, "loss": 0.5072, "step": 5505 }, { "epoch": 0.5167042042042042, "grad_norm": 1.155878090568881, "learning_rate": 9.84205654813163e-06, "loss": 0.5052, "step": 5506 }, { "epoch": 0.5167980480480481, "grad_norm": 0.9275984934118499, "learning_rate": 9.841920378353467e-06, "loss": 0.5015, "step": 5507 }, { "epoch": 0.5168918918918919, "grad_norm": 1.2532920968848515, "learning_rate": 9.841784150844517e-06, "loss": 0.4877, "step": 5508 }, { "epoch": 0.5169857357357357, "grad_norm": 1.2912179542915991, "learning_rate": 9.841647865606406e-06, "loss": 0.5357, "step": 5509 }, { "epoch": 0.5170795795795796, "grad_norm": 1.20273322415397, "learning_rate": 9.841511522640756e-06, "loss": 0.4798, "step": 5510 }, { "epoch": 0.5171734234234234, "grad_norm": 2.1627405527095407, "learning_rate": 9.841375121949195e-06, "loss": 0.4775, "step": 5511 }, { "epoch": 0.5172672672672672, "grad_norm": 2.439964239809729, "learning_rate": 9.84123866353335e-06, "loss": 0.5207, "step": 5512 }, { "epoch": 0.5173611111111112, "grad_norm": 1.1670879426007734, "learning_rate": 9.841102147394846e-06, "loss": 0.5046, "step": 5513 }, { "epoch": 0.517454954954955, "grad_norm": 1.0421268221539142, "learning_rate": 9.840965573535311e-06, "loss": 0.4608, "step": 5514 }, { "epoch": 0.5175487987987988, "grad_norm": 1.2510251975884965, "learning_rate": 9.840828941956373e-06, "loss": 0.4992, "step": 5515 }, { "epoch": 0.5176426426426426, "grad_norm": 0.9604545854007701, "learning_rate": 9.84069225265966e-06, "loss": 0.4385, "step": 5516 }, { "epoch": 0.5177364864864865, "grad_norm": 2.9785405801579237, "learning_rate": 9.840555505646806e-06, "loss": 0.4994, "step": 5517 }, { "epoch": 0.5178303303303303, "grad_norm": 1.1623707888476307, "learning_rate": 9.840418700919441e-06, "loss": 0.4706, "step": 5518 }, { "epoch": 0.5179241741741741, "grad_norm": 1.0917296171772553, "learning_rate": 9.840281838479191e-06, "loss": 0.4888, "step": 5519 }, { "epoch": 0.5180180180180181, "grad_norm": 1.7149668962610956, "learning_rate": 9.840144918327691e-06, "loss": 0.4898, "step": 5520 }, { "epoch": 0.5181118618618619, "grad_norm": 1.1152141290869901, "learning_rate": 9.840007940466576e-06, "loss": 0.5009, "step": 5521 }, { "epoch": 0.5182057057057057, "grad_norm": 2.300996245016872, "learning_rate": 9.839870904897475e-06, "loss": 0.4266, "step": 5522 }, { "epoch": 0.5182995495495496, "grad_norm": 1.0687433860516615, "learning_rate": 9.839733811622024e-06, "loss": 0.4925, "step": 5523 }, { "epoch": 0.5183933933933934, "grad_norm": 2.7310285861584602, "learning_rate": 9.839596660641857e-06, "loss": 0.4637, "step": 5524 }, { "epoch": 0.5184872372372372, "grad_norm": 1.0632944524820576, "learning_rate": 9.83945945195861e-06, "loss": 0.4834, "step": 5525 }, { "epoch": 0.518581081081081, "grad_norm": 1.5863228676550136, "learning_rate": 9.839322185573917e-06, "loss": 0.4798, "step": 5526 }, { "epoch": 0.518674924924925, "grad_norm": 0.984009317183603, "learning_rate": 9.839184861489416e-06, "loss": 0.4708, "step": 5527 }, { "epoch": 0.5187687687687688, "grad_norm": 1.4640430286941675, "learning_rate": 9.839047479706744e-06, "loss": 0.5387, "step": 5528 }, { "epoch": 0.5188626126126126, "grad_norm": 1.0494398481662868, "learning_rate": 9.838910040227542e-06, "loss": 0.5303, "step": 5529 }, { "epoch": 0.5189564564564565, "grad_norm": 1.1341313801523183, "learning_rate": 9.838772543053444e-06, "loss": 0.4784, "step": 5530 }, { "epoch": 0.5190503003003003, "grad_norm": 1.1441539442828121, "learning_rate": 9.838634988186093e-06, "loss": 0.5299, "step": 5531 }, { "epoch": 0.5191441441441441, "grad_norm": 1.3395604277052793, "learning_rate": 9.838497375627126e-06, "loss": 0.4978, "step": 5532 }, { "epoch": 0.519237987987988, "grad_norm": 1.216728862627099, "learning_rate": 9.838359705378186e-06, "loss": 0.5905, "step": 5533 }, { "epoch": 0.5193318318318318, "grad_norm": 1.847968414067206, "learning_rate": 9.838221977440914e-06, "loss": 0.4924, "step": 5534 }, { "epoch": 0.5194256756756757, "grad_norm": 1.3037583582087169, "learning_rate": 9.83808419181695e-06, "loss": 0.4899, "step": 5535 }, { "epoch": 0.5195195195195195, "grad_norm": 1.0585953356387199, "learning_rate": 9.83794634850794e-06, "loss": 0.4732, "step": 5536 }, { "epoch": 0.5196133633633634, "grad_norm": 1.1328827798156056, "learning_rate": 9.837808447515527e-06, "loss": 0.5033, "step": 5537 }, { "epoch": 0.5197072072072072, "grad_norm": 1.0725981697336573, "learning_rate": 9.837670488841356e-06, "loss": 0.5097, "step": 5538 }, { "epoch": 0.519801051051051, "grad_norm": 1.1152118406437486, "learning_rate": 9.837532472487068e-06, "loss": 0.5185, "step": 5539 }, { "epoch": 0.5198948948948949, "grad_norm": 1.407636905600522, "learning_rate": 9.837394398454312e-06, "loss": 0.5279, "step": 5540 }, { "epoch": 0.5199887387387387, "grad_norm": 1.166553720052075, "learning_rate": 9.837256266744732e-06, "loss": 0.4457, "step": 5541 }, { "epoch": 0.5200825825825826, "grad_norm": 1.0130212174646431, "learning_rate": 9.837118077359977e-06, "loss": 0.443, "step": 5542 }, { "epoch": 0.5201764264264265, "grad_norm": 1.1227281806286216, "learning_rate": 9.836979830301694e-06, "loss": 0.4761, "step": 5543 }, { "epoch": 0.5202702702702703, "grad_norm": 1.2575714002940466, "learning_rate": 9.83684152557153e-06, "loss": 0.4925, "step": 5544 }, { "epoch": 0.5203641141141141, "grad_norm": 0.9725647727322683, "learning_rate": 9.836703163171138e-06, "loss": 0.4786, "step": 5545 }, { "epoch": 0.5204579579579579, "grad_norm": 1.4212656131056487, "learning_rate": 9.836564743102162e-06, "loss": 0.4813, "step": 5546 }, { "epoch": 0.5205518018018018, "grad_norm": 1.2237421529988877, "learning_rate": 9.836426265366256e-06, "loss": 0.463, "step": 5547 }, { "epoch": 0.5206456456456456, "grad_norm": 1.2267497335333475, "learning_rate": 9.836287729965073e-06, "loss": 0.5123, "step": 5548 }, { "epoch": 0.5207394894894894, "grad_norm": 1.1444116996936529, "learning_rate": 9.83614913690026e-06, "loss": 0.5414, "step": 5549 }, { "epoch": 0.5208333333333334, "grad_norm": 1.1344473284088277, "learning_rate": 9.83601048617347e-06, "loss": 0.5258, "step": 5550 }, { "epoch": 0.5209271771771772, "grad_norm": 1.071812348971305, "learning_rate": 9.835871777786361e-06, "loss": 0.4759, "step": 5551 }, { "epoch": 0.521021021021021, "grad_norm": 1.0088269051677499, "learning_rate": 9.835733011740583e-06, "loss": 0.5043, "step": 5552 }, { "epoch": 0.5211148648648649, "grad_norm": 1.0202759444830252, "learning_rate": 9.835594188037789e-06, "loss": 0.467, "step": 5553 }, { "epoch": 0.5212087087087087, "grad_norm": 1.0901807501093401, "learning_rate": 9.835455306679638e-06, "loss": 0.4504, "step": 5554 }, { "epoch": 0.5213025525525525, "grad_norm": 1.060690619287862, "learning_rate": 9.835316367667784e-06, "loss": 0.4758, "step": 5555 }, { "epoch": 0.5213963963963963, "grad_norm": 2.769420967786438, "learning_rate": 9.835177371003884e-06, "loss": 0.4747, "step": 5556 }, { "epoch": 0.5214902402402403, "grad_norm": 1.065496989074916, "learning_rate": 9.835038316689595e-06, "loss": 0.4736, "step": 5557 }, { "epoch": 0.5215840840840841, "grad_norm": 1.2717099265968674, "learning_rate": 9.834899204726576e-06, "loss": 0.474, "step": 5558 }, { "epoch": 0.5216779279279279, "grad_norm": 1.2891897795678022, "learning_rate": 9.834760035116483e-06, "loss": 0.5487, "step": 5559 }, { "epoch": 0.5217717717717718, "grad_norm": 0.9591754084390366, "learning_rate": 9.834620807860979e-06, "loss": 0.4871, "step": 5560 }, { "epoch": 0.5218656156156156, "grad_norm": 1.3332347570352114, "learning_rate": 9.834481522961721e-06, "loss": 0.496, "step": 5561 }, { "epoch": 0.5219594594594594, "grad_norm": 1.2874584807166296, "learning_rate": 9.83434218042037e-06, "loss": 0.516, "step": 5562 }, { "epoch": 0.5220533033033034, "grad_norm": 1.0126830792942096, "learning_rate": 9.83420278023859e-06, "loss": 0.4554, "step": 5563 }, { "epoch": 0.5221471471471472, "grad_norm": 1.1481956742133461, "learning_rate": 9.834063322418041e-06, "loss": 0.4979, "step": 5564 }, { "epoch": 0.522240990990991, "grad_norm": 1.1453187195126253, "learning_rate": 9.833923806960386e-06, "loss": 0.5523, "step": 5565 }, { "epoch": 0.5223348348348348, "grad_norm": 1.082780674640794, "learning_rate": 9.833784233867288e-06, "loss": 0.4561, "step": 5566 }, { "epoch": 0.5224286786786787, "grad_norm": 1.0760375674875922, "learning_rate": 9.83364460314041e-06, "loss": 0.467, "step": 5567 }, { "epoch": 0.5225225225225225, "grad_norm": 1.4347279418822185, "learning_rate": 9.833504914781422e-06, "loss": 0.4825, "step": 5568 }, { "epoch": 0.5226163663663663, "grad_norm": 1.1431482584677324, "learning_rate": 9.833365168791985e-06, "loss": 0.5027, "step": 5569 }, { "epoch": 0.5227102102102102, "grad_norm": 1.2633205273760195, "learning_rate": 9.833225365173763e-06, "loss": 0.5009, "step": 5570 }, { "epoch": 0.5228040540540541, "grad_norm": 1.0597165098052623, "learning_rate": 9.83308550392843e-06, "loss": 0.4984, "step": 5571 }, { "epoch": 0.5228978978978979, "grad_norm": 1.156991318488865, "learning_rate": 9.832945585057649e-06, "loss": 0.522, "step": 5572 }, { "epoch": 0.5229917417417418, "grad_norm": 1.1365412488460382, "learning_rate": 9.832805608563088e-06, "loss": 0.4573, "step": 5573 }, { "epoch": 0.5230855855855856, "grad_norm": 0.9867735452464305, "learning_rate": 9.832665574446416e-06, "loss": 0.4969, "step": 5574 }, { "epoch": 0.5231794294294294, "grad_norm": 0.978636773228996, "learning_rate": 9.832525482709306e-06, "loss": 0.489, "step": 5575 }, { "epoch": 0.5232732732732732, "grad_norm": 1.3635099800143857, "learning_rate": 9.832385333353425e-06, "loss": 0.5095, "step": 5576 }, { "epoch": 0.5233671171171171, "grad_norm": 1.3199139782549498, "learning_rate": 9.832245126380445e-06, "loss": 0.5042, "step": 5577 }, { "epoch": 0.523460960960961, "grad_norm": 0.9588976095095911, "learning_rate": 9.832104861792037e-06, "loss": 0.4412, "step": 5578 }, { "epoch": 0.5235548048048048, "grad_norm": 1.5803536143245867, "learning_rate": 9.831964539589874e-06, "loss": 0.5198, "step": 5579 }, { "epoch": 0.5236486486486487, "grad_norm": 1.3370819853795506, "learning_rate": 9.831824159775629e-06, "loss": 0.5039, "step": 5580 }, { "epoch": 0.5237424924924925, "grad_norm": 1.0187846676650483, "learning_rate": 9.831683722350978e-06, "loss": 0.4556, "step": 5581 }, { "epoch": 0.5238363363363363, "grad_norm": 1.1126293080497416, "learning_rate": 9.83154322731759e-06, "loss": 0.4617, "step": 5582 }, { "epoch": 0.5239301801801802, "grad_norm": 1.0738001068226612, "learning_rate": 9.831402674677146e-06, "loss": 0.4753, "step": 5583 }, { "epoch": 0.524024024024024, "grad_norm": 1.2209926469779464, "learning_rate": 9.83126206443132e-06, "loss": 0.5009, "step": 5584 }, { "epoch": 0.5241178678678678, "grad_norm": 1.3966492287921228, "learning_rate": 9.831121396581784e-06, "loss": 0.5299, "step": 5585 }, { "epoch": 0.5242117117117117, "grad_norm": 1.059077615167264, "learning_rate": 9.830980671130221e-06, "loss": 0.5087, "step": 5586 }, { "epoch": 0.5243055555555556, "grad_norm": 1.28227841934812, "learning_rate": 9.830839888078308e-06, "loss": 0.4533, "step": 5587 }, { "epoch": 0.5243993993993994, "grad_norm": 1.2636098462968564, "learning_rate": 9.83069904742772e-06, "loss": 0.5051, "step": 5588 }, { "epoch": 0.5244932432432432, "grad_norm": 1.1971247606797937, "learning_rate": 9.830558149180141e-06, "loss": 0.4571, "step": 5589 }, { "epoch": 0.5245870870870871, "grad_norm": 1.0758877768479655, "learning_rate": 9.830417193337249e-06, "loss": 0.48, "step": 5590 }, { "epoch": 0.5246809309309309, "grad_norm": 1.463428684442627, "learning_rate": 9.830276179900722e-06, "loss": 0.4957, "step": 5591 }, { "epoch": 0.5247747747747747, "grad_norm": 1.754125772330432, "learning_rate": 9.830135108872245e-06, "loss": 0.4882, "step": 5592 }, { "epoch": 0.5248686186186187, "grad_norm": 1.0228928017147159, "learning_rate": 9.829993980253498e-06, "loss": 0.4336, "step": 5593 }, { "epoch": 0.5249624624624625, "grad_norm": 1.0097729102500768, "learning_rate": 9.829852794046167e-06, "loss": 0.4712, "step": 5594 }, { "epoch": 0.5250563063063063, "grad_norm": 1.0583099278482258, "learning_rate": 9.829711550251929e-06, "loss": 0.4564, "step": 5595 }, { "epoch": 0.5251501501501501, "grad_norm": 1.1064529712487432, "learning_rate": 9.829570248872474e-06, "loss": 0.4844, "step": 5596 }, { "epoch": 0.525243993993994, "grad_norm": 1.0686338357067504, "learning_rate": 9.829428889909483e-06, "loss": 0.5204, "step": 5597 }, { "epoch": 0.5253378378378378, "grad_norm": 1.0427228302634897, "learning_rate": 9.829287473364644e-06, "loss": 0.5038, "step": 5598 }, { "epoch": 0.5254316816816816, "grad_norm": 1.507581247131329, "learning_rate": 9.829145999239642e-06, "loss": 0.4642, "step": 5599 }, { "epoch": 0.5255255255255256, "grad_norm": 1.6284746203647635, "learning_rate": 9.829004467536162e-06, "loss": 0.5336, "step": 5600 }, { "epoch": 0.5256193693693694, "grad_norm": 1.1181462664699324, "learning_rate": 9.828862878255896e-06, "loss": 0.5463, "step": 5601 }, { "epoch": 0.5257132132132132, "grad_norm": 1.1097632988137494, "learning_rate": 9.828721231400528e-06, "loss": 0.5094, "step": 5602 }, { "epoch": 0.5258070570570571, "grad_norm": 1.0242147706129068, "learning_rate": 9.82857952697175e-06, "loss": 0.4865, "step": 5603 }, { "epoch": 0.5259009009009009, "grad_norm": 1.111747056040423, "learning_rate": 9.828437764971249e-06, "loss": 0.4826, "step": 5604 }, { "epoch": 0.5259947447447447, "grad_norm": 1.24132064759234, "learning_rate": 9.828295945400716e-06, "loss": 0.5101, "step": 5605 }, { "epoch": 0.5260885885885885, "grad_norm": 1.6422319096820792, "learning_rate": 9.828154068261844e-06, "loss": 0.5201, "step": 5606 }, { "epoch": 0.5261824324324325, "grad_norm": 2.174112529492093, "learning_rate": 9.828012133556319e-06, "loss": 0.4822, "step": 5607 }, { "epoch": 0.5262762762762763, "grad_norm": 1.1755397059359007, "learning_rate": 9.82787014128584e-06, "loss": 0.5296, "step": 5608 }, { "epoch": 0.5263701201201201, "grad_norm": 1.1912246651564167, "learning_rate": 9.827728091452098e-06, "loss": 0.481, "step": 5609 }, { "epoch": 0.526463963963964, "grad_norm": 1.4723574564725708, "learning_rate": 9.827585984056784e-06, "loss": 0.4693, "step": 5610 }, { "epoch": 0.5265578078078078, "grad_norm": 1.001341033196872, "learning_rate": 9.827443819101595e-06, "loss": 0.4596, "step": 5611 }, { "epoch": 0.5266516516516516, "grad_norm": 0.9671324245979863, "learning_rate": 9.827301596588223e-06, "loss": 0.4896, "step": 5612 }, { "epoch": 0.5267454954954955, "grad_norm": 1.1790365622614973, "learning_rate": 9.827159316518368e-06, "loss": 0.5124, "step": 5613 }, { "epoch": 0.5268393393393394, "grad_norm": 0.9537350229446772, "learning_rate": 9.827016978893724e-06, "loss": 0.5178, "step": 5614 }, { "epoch": 0.5269331831831832, "grad_norm": 0.9461735786894041, "learning_rate": 9.826874583715989e-06, "loss": 0.4511, "step": 5615 }, { "epoch": 0.527027027027027, "grad_norm": 1.0897277710087894, "learning_rate": 9.826732130986858e-06, "loss": 0.4318, "step": 5616 }, { "epoch": 0.5271208708708709, "grad_norm": 0.916068816029455, "learning_rate": 9.826589620708034e-06, "loss": 0.4621, "step": 5617 }, { "epoch": 0.5272147147147147, "grad_norm": 1.245134930463832, "learning_rate": 9.826447052881214e-06, "loss": 0.5075, "step": 5618 }, { "epoch": 0.5273085585585585, "grad_norm": 1.3039917873193605, "learning_rate": 9.826304427508096e-06, "loss": 0.5168, "step": 5619 }, { "epoch": 0.5274024024024024, "grad_norm": 1.4455796037839146, "learning_rate": 9.826161744590383e-06, "loss": 0.4951, "step": 5620 }, { "epoch": 0.5274962462462462, "grad_norm": 0.9858162846029942, "learning_rate": 9.826019004129776e-06, "loss": 0.4738, "step": 5621 }, { "epoch": 0.5275900900900901, "grad_norm": 1.226209812321071, "learning_rate": 9.825876206127976e-06, "loss": 0.4793, "step": 5622 }, { "epoch": 0.527683933933934, "grad_norm": 1.1998080062915473, "learning_rate": 9.825733350586686e-06, "loss": 0.4976, "step": 5623 }, { "epoch": 0.5277777777777778, "grad_norm": 1.14690712026234, "learning_rate": 9.82559043750761e-06, "loss": 0.4777, "step": 5624 }, { "epoch": 0.5278716216216216, "grad_norm": 1.2923160505923907, "learning_rate": 9.825447466892451e-06, "loss": 0.4712, "step": 5625 }, { "epoch": 0.5279654654654654, "grad_norm": 1.1271922421806053, "learning_rate": 9.825304438742912e-06, "loss": 0.4887, "step": 5626 }, { "epoch": 0.5280593093093093, "grad_norm": 1.0531216045936158, "learning_rate": 9.825161353060704e-06, "loss": 0.4523, "step": 5627 }, { "epoch": 0.5281531531531531, "grad_norm": 1.091650932356352, "learning_rate": 9.825018209847526e-06, "loss": 0.4886, "step": 5628 }, { "epoch": 0.528246996996997, "grad_norm": 1.0086523446724833, "learning_rate": 9.82487500910509e-06, "loss": 0.4671, "step": 5629 }, { "epoch": 0.5283408408408409, "grad_norm": 1.2760108805761536, "learning_rate": 9.824731750835101e-06, "loss": 0.5063, "step": 5630 }, { "epoch": 0.5284346846846847, "grad_norm": 1.1286359253570555, "learning_rate": 9.824588435039267e-06, "loss": 0.4857, "step": 5631 }, { "epoch": 0.5285285285285285, "grad_norm": 1.0331128800654892, "learning_rate": 9.824445061719298e-06, "loss": 0.5371, "step": 5632 }, { "epoch": 0.5286223723723724, "grad_norm": 1.1637351016880364, "learning_rate": 9.824301630876903e-06, "loss": 0.4818, "step": 5633 }, { "epoch": 0.5287162162162162, "grad_norm": 2.2879718276957797, "learning_rate": 9.824158142513788e-06, "loss": 0.4755, "step": 5634 }, { "epoch": 0.52881006006006, "grad_norm": 1.1349702806914306, "learning_rate": 9.824014596631673e-06, "loss": 0.5037, "step": 5635 }, { "epoch": 0.5289039039039038, "grad_norm": 1.1114535144200381, "learning_rate": 9.823870993232261e-06, "loss": 0.5118, "step": 5636 }, { "epoch": 0.5289977477477478, "grad_norm": 1.897967143223919, "learning_rate": 9.823727332317269e-06, "loss": 0.5322, "step": 5637 }, { "epoch": 0.5290915915915916, "grad_norm": 1.0003991269070782, "learning_rate": 9.823583613888408e-06, "loss": 0.4786, "step": 5638 }, { "epoch": 0.5291854354354354, "grad_norm": 1.5603647980610063, "learning_rate": 9.82343983794739e-06, "loss": 0.4785, "step": 5639 }, { "epoch": 0.5292792792792793, "grad_norm": 0.9915844300853197, "learning_rate": 9.823296004495934e-06, "loss": 0.4913, "step": 5640 }, { "epoch": 0.5293731231231231, "grad_norm": 1.4580141089190826, "learning_rate": 9.82315211353575e-06, "loss": 0.4881, "step": 5641 }, { "epoch": 0.5294669669669669, "grad_norm": 1.1384552214238066, "learning_rate": 9.823008165068557e-06, "loss": 0.5001, "step": 5642 }, { "epoch": 0.5295608108108109, "grad_norm": 1.100337059476216, "learning_rate": 9.822864159096068e-06, "loss": 0.4812, "step": 5643 }, { "epoch": 0.5296546546546547, "grad_norm": 1.2893286717272165, "learning_rate": 9.822720095620004e-06, "loss": 0.5022, "step": 5644 }, { "epoch": 0.5297484984984985, "grad_norm": 1.3211622684732307, "learning_rate": 9.82257597464208e-06, "loss": 0.5167, "step": 5645 }, { "epoch": 0.5298423423423423, "grad_norm": 1.159142113009476, "learning_rate": 9.822431796164015e-06, "loss": 0.5065, "step": 5646 }, { "epoch": 0.5299361861861862, "grad_norm": 1.1239646789389295, "learning_rate": 9.822287560187529e-06, "loss": 0.4334, "step": 5647 }, { "epoch": 0.53003003003003, "grad_norm": 1.0288362552471926, "learning_rate": 9.82214326671434e-06, "loss": 0.4867, "step": 5648 }, { "epoch": 0.5301238738738738, "grad_norm": 1.4505565237633133, "learning_rate": 9.821998915746169e-06, "loss": 0.5405, "step": 5649 }, { "epoch": 0.5302177177177178, "grad_norm": 1.016778200675099, "learning_rate": 9.821854507284738e-06, "loss": 0.4747, "step": 5650 }, { "epoch": 0.5303115615615616, "grad_norm": 1.177292481866016, "learning_rate": 9.821710041331769e-06, "loss": 0.4538, "step": 5651 }, { "epoch": 0.5304054054054054, "grad_norm": 1.6763297879235737, "learning_rate": 9.821565517888982e-06, "loss": 0.4822, "step": 5652 }, { "epoch": 0.5304992492492493, "grad_norm": 1.150436326894096, "learning_rate": 9.821420936958104e-06, "loss": 0.4553, "step": 5653 }, { "epoch": 0.5305930930930931, "grad_norm": 1.3098446865286952, "learning_rate": 9.821276298540853e-06, "loss": 0.4907, "step": 5654 }, { "epoch": 0.5306869369369369, "grad_norm": 1.4637440885211543, "learning_rate": 9.821131602638961e-06, "loss": 0.5017, "step": 5655 }, { "epoch": 0.5307807807807807, "grad_norm": 1.129654981311654, "learning_rate": 9.820986849254149e-06, "loss": 0.4963, "step": 5656 }, { "epoch": 0.5308746246246246, "grad_norm": 1.5045025677567136, "learning_rate": 9.82084203838814e-06, "loss": 0.4714, "step": 5657 }, { "epoch": 0.5309684684684685, "grad_norm": 1.1780959845466048, "learning_rate": 9.820697170042668e-06, "loss": 0.4265, "step": 5658 }, { "epoch": 0.5310623123123123, "grad_norm": 1.0485813194259346, "learning_rate": 9.820552244219454e-06, "loss": 0.4731, "step": 5659 }, { "epoch": 0.5311561561561562, "grad_norm": 1.096912059529357, "learning_rate": 9.82040726092023e-06, "loss": 0.494, "step": 5660 }, { "epoch": 0.53125, "grad_norm": 1.3054432955432818, "learning_rate": 9.820262220146722e-06, "loss": 0.5035, "step": 5661 }, { "epoch": 0.5313438438438438, "grad_norm": 6.111606242418113, "learning_rate": 9.820117121900661e-06, "loss": 0.5277, "step": 5662 }, { "epoch": 0.5314376876876877, "grad_norm": 1.4448922419894052, "learning_rate": 9.819971966183774e-06, "loss": 0.4977, "step": 5663 }, { "epoch": 0.5315315315315315, "grad_norm": 1.2859858951915561, "learning_rate": 9.819826752997797e-06, "loss": 0.5071, "step": 5664 }, { "epoch": 0.5316253753753754, "grad_norm": 1.0637779132860228, "learning_rate": 9.819681482344455e-06, "loss": 0.45, "step": 5665 }, { "epoch": 0.5317192192192193, "grad_norm": 1.0475293610592435, "learning_rate": 9.819536154225485e-06, "loss": 0.4587, "step": 5666 }, { "epoch": 0.5318130630630631, "grad_norm": 0.974063491487285, "learning_rate": 9.81939076864262e-06, "loss": 0.4642, "step": 5667 }, { "epoch": 0.5319069069069069, "grad_norm": 1.2115404953000342, "learning_rate": 9.819245325597588e-06, "loss": 0.5025, "step": 5668 }, { "epoch": 0.5320007507507507, "grad_norm": 1.0843447705415254, "learning_rate": 9.81909982509213e-06, "loss": 0.465, "step": 5669 }, { "epoch": 0.5320945945945946, "grad_norm": 1.0923770784344444, "learning_rate": 9.818954267127975e-06, "loss": 0.4971, "step": 5670 }, { "epoch": 0.5321884384384384, "grad_norm": 1.0455359053273374, "learning_rate": 9.818808651706863e-06, "loss": 0.4014, "step": 5671 }, { "epoch": 0.5322822822822822, "grad_norm": 1.2011219664739088, "learning_rate": 9.81866297883053e-06, "loss": 0.4598, "step": 5672 }, { "epoch": 0.5323761261261262, "grad_norm": 1.0522740608102754, "learning_rate": 9.818517248500707e-06, "loss": 0.5012, "step": 5673 }, { "epoch": 0.53246996996997, "grad_norm": 1.3782265298821423, "learning_rate": 9.818371460719138e-06, "loss": 0.4681, "step": 5674 }, { "epoch": 0.5325638138138138, "grad_norm": 1.2910187597167266, "learning_rate": 9.81822561548756e-06, "loss": 0.5744, "step": 5675 }, { "epoch": 0.5326576576576577, "grad_norm": 1.299411505822768, "learning_rate": 9.81807971280771e-06, "loss": 0.5121, "step": 5676 }, { "epoch": 0.5327515015015015, "grad_norm": 2.454115446624934, "learning_rate": 9.817933752681327e-06, "loss": 0.4576, "step": 5677 }, { "epoch": 0.5328453453453453, "grad_norm": 2.3946020938932095, "learning_rate": 9.817787735110154e-06, "loss": 0.5149, "step": 5678 }, { "epoch": 0.5329391891891891, "grad_norm": 1.3143258535195492, "learning_rate": 9.817641660095931e-06, "loss": 0.5056, "step": 5679 }, { "epoch": 0.5330330330330331, "grad_norm": 1.0402348829231638, "learning_rate": 9.8174955276404e-06, "loss": 0.5042, "step": 5680 }, { "epoch": 0.5331268768768769, "grad_norm": 1.6367330354219645, "learning_rate": 9.817349337745301e-06, "loss": 0.4644, "step": 5681 }, { "epoch": 0.5332207207207207, "grad_norm": 4.0943793653857385, "learning_rate": 9.817203090412381e-06, "loss": 0.4629, "step": 5682 }, { "epoch": 0.5333145645645646, "grad_norm": 6.090017936234363, "learning_rate": 9.81705678564338e-06, "loss": 0.5124, "step": 5683 }, { "epoch": 0.5334084084084084, "grad_norm": 1.4617692805934945, "learning_rate": 9.816910423440044e-06, "loss": 0.5031, "step": 5684 }, { "epoch": 0.5335022522522522, "grad_norm": 1.1434973515520295, "learning_rate": 9.81676400380412e-06, "loss": 0.467, "step": 5685 }, { "epoch": 0.5335960960960962, "grad_norm": 1.1806199158943653, "learning_rate": 9.81661752673735e-06, "loss": 0.4558, "step": 5686 }, { "epoch": 0.53368993993994, "grad_norm": 1.244812365776017, "learning_rate": 9.816470992241482e-06, "loss": 0.5264, "step": 5687 }, { "epoch": 0.5337837837837838, "grad_norm": 1.3162481113941953, "learning_rate": 9.816324400318266e-06, "loss": 0.4953, "step": 5688 }, { "epoch": 0.5338776276276276, "grad_norm": 1.1166982597473696, "learning_rate": 9.816177750969446e-06, "loss": 0.4691, "step": 5689 }, { "epoch": 0.5339714714714715, "grad_norm": 0.8999229265493476, "learning_rate": 9.816031044196772e-06, "loss": 0.4054, "step": 5690 }, { "epoch": 0.5340653153153153, "grad_norm": 4.869505726041163, "learning_rate": 9.815884280001992e-06, "loss": 0.5077, "step": 5691 }, { "epoch": 0.5341591591591591, "grad_norm": 1.4282807416506396, "learning_rate": 9.815737458386858e-06, "loss": 0.4695, "step": 5692 }, { "epoch": 0.534253003003003, "grad_norm": 1.4842316364862551, "learning_rate": 9.815590579353118e-06, "loss": 0.4868, "step": 5693 }, { "epoch": 0.5343468468468469, "grad_norm": 1.4929672377344925, "learning_rate": 9.815443642902527e-06, "loss": 0.4599, "step": 5694 }, { "epoch": 0.5344406906906907, "grad_norm": 27.017049215348226, "learning_rate": 9.815296649036832e-06, "loss": 0.4479, "step": 5695 }, { "epoch": 0.5345345345345346, "grad_norm": 1.0268578642010255, "learning_rate": 9.815149597757791e-06, "loss": 0.4236, "step": 5696 }, { "epoch": 0.5346283783783784, "grad_norm": 1.4371541763341258, "learning_rate": 9.815002489067153e-06, "loss": 0.4577, "step": 5697 }, { "epoch": 0.5347222222222222, "grad_norm": 1.1954257265022155, "learning_rate": 9.814855322966675e-06, "loss": 0.4836, "step": 5698 }, { "epoch": 0.534816066066066, "grad_norm": 0.9827750362845529, "learning_rate": 9.814708099458109e-06, "loss": 0.4571, "step": 5699 }, { "epoch": 0.5349099099099099, "grad_norm": 1.1772157466742272, "learning_rate": 9.814560818543212e-06, "loss": 0.5701, "step": 5700 }, { "epoch": 0.5350037537537538, "grad_norm": 1.2208820908271758, "learning_rate": 9.81441348022374e-06, "loss": 0.4566, "step": 5701 }, { "epoch": 0.5350975975975976, "grad_norm": 1.0654855740406115, "learning_rate": 9.814266084501449e-06, "loss": 0.4369, "step": 5702 }, { "epoch": 0.5351914414414415, "grad_norm": 2.7548329563417773, "learning_rate": 9.814118631378097e-06, "loss": 0.4713, "step": 5703 }, { "epoch": 0.5352852852852853, "grad_norm": 1.5227987397073743, "learning_rate": 9.813971120855442e-06, "loss": 0.4698, "step": 5704 }, { "epoch": 0.5353791291291291, "grad_norm": 1.0786128970063373, "learning_rate": 9.813823552935243e-06, "loss": 0.4935, "step": 5705 }, { "epoch": 0.535472972972973, "grad_norm": 1.2664529972983989, "learning_rate": 9.81367592761926e-06, "loss": 0.4814, "step": 5706 }, { "epoch": 0.5355668168168168, "grad_norm": 1.2030579356987305, "learning_rate": 9.813528244909251e-06, "loss": 0.4867, "step": 5707 }, { "epoch": 0.5356606606606606, "grad_norm": 1.068074123451244, "learning_rate": 9.813380504806978e-06, "loss": 0.4647, "step": 5708 }, { "epoch": 0.5357545045045045, "grad_norm": 1.0668331593388993, "learning_rate": 9.813232707314204e-06, "loss": 0.5474, "step": 5709 }, { "epoch": 0.5358483483483484, "grad_norm": 1.6385900381970135, "learning_rate": 9.81308485243269e-06, "loss": 0.4899, "step": 5710 }, { "epoch": 0.5359421921921922, "grad_norm": 1.068763656873099, "learning_rate": 9.812936940164197e-06, "loss": 0.5142, "step": 5711 }, { "epoch": 0.536036036036036, "grad_norm": 1.0791233080194393, "learning_rate": 9.812788970510492e-06, "loss": 0.4909, "step": 5712 }, { "epoch": 0.5361298798798799, "grad_norm": 0.9791868228698032, "learning_rate": 9.812640943473338e-06, "loss": 0.4661, "step": 5713 }, { "epoch": 0.5362237237237237, "grad_norm": 1.455945307491939, "learning_rate": 9.812492859054499e-06, "loss": 0.517, "step": 5714 }, { "epoch": 0.5363175675675675, "grad_norm": 1.1450961516991074, "learning_rate": 9.812344717255739e-06, "loss": 0.4639, "step": 5715 }, { "epoch": 0.5364114114114115, "grad_norm": 1.112964506010904, "learning_rate": 9.812196518078828e-06, "loss": 0.5021, "step": 5716 }, { "epoch": 0.5365052552552553, "grad_norm": 1.280187464906991, "learning_rate": 9.812048261525534e-06, "loss": 0.48, "step": 5717 }, { "epoch": 0.5365990990990991, "grad_norm": 1.0776370312394323, "learning_rate": 9.81189994759762e-06, "loss": 0.5549, "step": 5718 }, { "epoch": 0.5366929429429429, "grad_norm": 1.4303061388899825, "learning_rate": 9.811751576296857e-06, "loss": 0.4921, "step": 5719 }, { "epoch": 0.5367867867867868, "grad_norm": 0.9851813708503351, "learning_rate": 9.811603147625014e-06, "loss": 0.4496, "step": 5720 }, { "epoch": 0.5368806306306306, "grad_norm": 1.1139543770328721, "learning_rate": 9.811454661583862e-06, "loss": 0.4525, "step": 5721 }, { "epoch": 0.5369744744744744, "grad_norm": 1.0005289350353168, "learning_rate": 9.811306118175166e-06, "loss": 0.5221, "step": 5722 }, { "epoch": 0.5370683183183184, "grad_norm": 1.1654183525876556, "learning_rate": 9.811157517400703e-06, "loss": 0.4998, "step": 5723 }, { "epoch": 0.5371621621621622, "grad_norm": 1.0640207830638027, "learning_rate": 9.811008859262243e-06, "loss": 0.48, "step": 5724 }, { "epoch": 0.537256006006006, "grad_norm": 1.0906562947510048, "learning_rate": 9.810860143761557e-06, "loss": 0.5039, "step": 5725 }, { "epoch": 0.5373498498498499, "grad_norm": 1.3269209807945073, "learning_rate": 9.810711370900421e-06, "loss": 0.4951, "step": 5726 }, { "epoch": 0.5374436936936937, "grad_norm": 1.44597635193823, "learning_rate": 9.810562540680605e-06, "loss": 0.4848, "step": 5727 }, { "epoch": 0.5375375375375375, "grad_norm": 1.266499723970397, "learning_rate": 9.810413653103887e-06, "loss": 0.4779, "step": 5728 }, { "epoch": 0.5376313813813813, "grad_norm": 1.2492201353188377, "learning_rate": 9.810264708172041e-06, "loss": 0.5145, "step": 5729 }, { "epoch": 0.5377252252252253, "grad_norm": 1.1460085330505099, "learning_rate": 9.810115705886842e-06, "loss": 0.4447, "step": 5730 }, { "epoch": 0.5378190690690691, "grad_norm": 1.0306309868722068, "learning_rate": 9.809966646250068e-06, "loss": 0.4434, "step": 5731 }, { "epoch": 0.5379129129129129, "grad_norm": 1.2329881730453, "learning_rate": 9.809817529263494e-06, "loss": 0.4889, "step": 5732 }, { "epoch": 0.5380067567567568, "grad_norm": 2.0857383649947794, "learning_rate": 9.8096683549289e-06, "loss": 0.4653, "step": 5733 }, { "epoch": 0.5381006006006006, "grad_norm": 1.4116300088325824, "learning_rate": 9.809519123248066e-06, "loss": 0.4841, "step": 5734 }, { "epoch": 0.5381944444444444, "grad_norm": 1.1545237450452963, "learning_rate": 9.809369834222768e-06, "loss": 0.4423, "step": 5735 }, { "epoch": 0.5382882882882883, "grad_norm": 1.3195969228145215, "learning_rate": 9.809220487854788e-06, "loss": 0.4745, "step": 5736 }, { "epoch": 0.5383821321321322, "grad_norm": 1.242721169425572, "learning_rate": 9.809071084145906e-06, "loss": 0.4461, "step": 5737 }, { "epoch": 0.538475975975976, "grad_norm": 1.0838143927432198, "learning_rate": 9.808921623097903e-06, "loss": 0.4643, "step": 5738 }, { "epoch": 0.5385698198198198, "grad_norm": 1.2904797558317083, "learning_rate": 9.808772104712563e-06, "loss": 0.5026, "step": 5739 }, { "epoch": 0.5386636636636637, "grad_norm": 1.078699934636195, "learning_rate": 9.808622528991665e-06, "loss": 0.4596, "step": 5740 }, { "epoch": 0.5387575075075075, "grad_norm": 2.7095728258050973, "learning_rate": 9.808472895936995e-06, "loss": 0.5188, "step": 5741 }, { "epoch": 0.5388513513513513, "grad_norm": 1.0038182226471397, "learning_rate": 9.80832320555034e-06, "loss": 0.5163, "step": 5742 }, { "epoch": 0.5389451951951952, "grad_norm": 1.3936329994134862, "learning_rate": 9.808173457833476e-06, "loss": 0.4587, "step": 5743 }, { "epoch": 0.539039039039039, "grad_norm": 1.055207461594761, "learning_rate": 9.808023652788197e-06, "loss": 0.4516, "step": 5744 }, { "epoch": 0.5391328828828829, "grad_norm": 1.0411798729298405, "learning_rate": 9.807873790416287e-06, "loss": 0.5104, "step": 5745 }, { "epoch": 0.5392267267267268, "grad_norm": 1.1381154844890935, "learning_rate": 9.807723870719531e-06, "loss": 0.4969, "step": 5746 }, { "epoch": 0.5393205705705706, "grad_norm": 1.6300017561065043, "learning_rate": 9.807573893699718e-06, "loss": 0.4602, "step": 5747 }, { "epoch": 0.5394144144144144, "grad_norm": 1.1069121393084778, "learning_rate": 9.807423859358635e-06, "loss": 0.4659, "step": 5748 }, { "epoch": 0.5395082582582582, "grad_norm": 1.0830613413963341, "learning_rate": 9.80727376769807e-06, "loss": 0.4852, "step": 5749 }, { "epoch": 0.5396021021021021, "grad_norm": 1.6747029452840732, "learning_rate": 9.807123618719817e-06, "loss": 0.4973, "step": 5750 }, { "epoch": 0.5396959459459459, "grad_norm": 1.757079898727289, "learning_rate": 9.806973412425662e-06, "loss": 0.4825, "step": 5751 }, { "epoch": 0.5397897897897898, "grad_norm": 0.9293356528743487, "learning_rate": 9.806823148817397e-06, "loss": 0.4356, "step": 5752 }, { "epoch": 0.5398836336336337, "grad_norm": 1.053156718802265, "learning_rate": 9.806672827896813e-06, "loss": 0.5147, "step": 5753 }, { "epoch": 0.5399774774774775, "grad_norm": 1.0823592821661572, "learning_rate": 9.806522449665706e-06, "loss": 0.4765, "step": 5754 }, { "epoch": 0.5400713213213213, "grad_norm": 1.1572936706830315, "learning_rate": 9.806372014125863e-06, "loss": 0.4972, "step": 5755 }, { "epoch": 0.5401651651651652, "grad_norm": 1.3764078812251672, "learning_rate": 9.806221521279082e-06, "loss": 0.4832, "step": 5756 }, { "epoch": 0.540259009009009, "grad_norm": 0.996388903045472, "learning_rate": 9.806070971127157e-06, "loss": 0.4362, "step": 5757 }, { "epoch": 0.5403528528528528, "grad_norm": 1.1007253561526322, "learning_rate": 9.805920363671882e-06, "loss": 0.546, "step": 5758 }, { "epoch": 0.5404466966966966, "grad_norm": 1.8622255723219938, "learning_rate": 9.805769698915052e-06, "loss": 0.4388, "step": 5759 }, { "epoch": 0.5405405405405406, "grad_norm": 1.3930619951802523, "learning_rate": 9.805618976858464e-06, "loss": 0.5003, "step": 5760 }, { "epoch": 0.5406343843843844, "grad_norm": 1.2288061583292202, "learning_rate": 9.805468197503916e-06, "loss": 0.4562, "step": 5761 }, { "epoch": 0.5407282282282282, "grad_norm": 1.1574928358211287, "learning_rate": 9.805317360853206e-06, "loss": 0.5477, "step": 5762 }, { "epoch": 0.5408220720720721, "grad_norm": 1.395419337985258, "learning_rate": 9.805166466908131e-06, "loss": 0.5219, "step": 5763 }, { "epoch": 0.5409159159159159, "grad_norm": 1.81070592381537, "learning_rate": 9.80501551567049e-06, "loss": 0.4969, "step": 5764 }, { "epoch": 0.5410097597597597, "grad_norm": 1.3346402981193586, "learning_rate": 9.804864507142083e-06, "loss": 0.4405, "step": 5765 }, { "epoch": 0.5411036036036037, "grad_norm": 0.9854468130669571, "learning_rate": 9.804713441324713e-06, "loss": 0.4633, "step": 5766 }, { "epoch": 0.5411974474474475, "grad_norm": 1.2123168572630751, "learning_rate": 9.804562318220177e-06, "loss": 0.4607, "step": 5767 }, { "epoch": 0.5412912912912913, "grad_norm": 2.3247574310442762, "learning_rate": 9.804411137830281e-06, "loss": 0.523, "step": 5768 }, { "epoch": 0.5413851351351351, "grad_norm": 2.099388948177053, "learning_rate": 9.804259900156824e-06, "loss": 0.4957, "step": 5769 }, { "epoch": 0.541478978978979, "grad_norm": 2.354166561673403, "learning_rate": 9.80410860520161e-06, "loss": 0.4111, "step": 5770 }, { "epoch": 0.5415728228228228, "grad_norm": 1.761737031327717, "learning_rate": 9.803957252966446e-06, "loss": 0.4464, "step": 5771 }, { "epoch": 0.5416666666666666, "grad_norm": 1.0668858354348068, "learning_rate": 9.803805843453133e-06, "loss": 0.447, "step": 5772 }, { "epoch": 0.5417605105105106, "grad_norm": 1.2881361787186134, "learning_rate": 9.803654376663478e-06, "loss": 0.5177, "step": 5773 }, { "epoch": 0.5418543543543544, "grad_norm": 1.0485981668706006, "learning_rate": 9.803502852599288e-06, "loss": 0.5085, "step": 5774 }, { "epoch": 0.5419481981981982, "grad_norm": 1.1935474912892792, "learning_rate": 9.803351271262365e-06, "loss": 0.5319, "step": 5775 }, { "epoch": 0.5420420420420421, "grad_norm": 1.2318500504918592, "learning_rate": 9.803199632654521e-06, "loss": 0.4939, "step": 5776 }, { "epoch": 0.5421358858858859, "grad_norm": 2.265194260276599, "learning_rate": 9.803047936777563e-06, "loss": 0.464, "step": 5777 }, { "epoch": 0.5422297297297297, "grad_norm": 1.1826111496243867, "learning_rate": 9.802896183633299e-06, "loss": 0.5326, "step": 5778 }, { "epoch": 0.5423235735735735, "grad_norm": 1.2956774871564145, "learning_rate": 9.802744373223537e-06, "loss": 0.5042, "step": 5779 }, { "epoch": 0.5424174174174174, "grad_norm": 1.143796280202212, "learning_rate": 9.80259250555009e-06, "loss": 0.444, "step": 5780 }, { "epoch": 0.5425112612612613, "grad_norm": 1.2349807014112497, "learning_rate": 9.802440580614767e-06, "loss": 0.4861, "step": 5781 }, { "epoch": 0.5426051051051051, "grad_norm": 1.060049196619811, "learning_rate": 9.80228859841938e-06, "loss": 0.4781, "step": 5782 }, { "epoch": 0.542698948948949, "grad_norm": 0.9899013986533419, "learning_rate": 9.80213655896574e-06, "loss": 0.4473, "step": 5783 }, { "epoch": 0.5427927927927928, "grad_norm": 1.2067188862973042, "learning_rate": 9.801984462255661e-06, "loss": 0.4477, "step": 5784 }, { "epoch": 0.5428866366366366, "grad_norm": 1.462679083919982, "learning_rate": 9.801832308290957e-06, "loss": 0.4582, "step": 5785 }, { "epoch": 0.5429804804804805, "grad_norm": 1.0391030319251078, "learning_rate": 9.80168009707344e-06, "loss": 0.4748, "step": 5786 }, { "epoch": 0.5430743243243243, "grad_norm": 1.1415686728542354, "learning_rate": 9.801527828604927e-06, "loss": 0.4727, "step": 5787 }, { "epoch": 0.5431681681681682, "grad_norm": 1.039523176583281, "learning_rate": 9.80137550288723e-06, "loss": 0.483, "step": 5788 }, { "epoch": 0.543262012012012, "grad_norm": 1.4617702236405778, "learning_rate": 9.801223119922172e-06, "loss": 0.4731, "step": 5789 }, { "epoch": 0.5433558558558559, "grad_norm": 1.8984914899214296, "learning_rate": 9.801070679711565e-06, "loss": 0.4667, "step": 5790 }, { "epoch": 0.5434496996996997, "grad_norm": 1.8562479355410004, "learning_rate": 9.800918182257223e-06, "loss": 0.4752, "step": 5791 }, { "epoch": 0.5435435435435435, "grad_norm": 1.1972849402268164, "learning_rate": 9.800765627560973e-06, "loss": 0.4476, "step": 5792 }, { "epoch": 0.5436373873873874, "grad_norm": 1.1389730317608342, "learning_rate": 9.800613015624628e-06, "loss": 0.4767, "step": 5793 }, { "epoch": 0.5437312312312312, "grad_norm": 1.2591547272325962, "learning_rate": 9.800460346450009e-06, "loss": 0.4816, "step": 5794 }, { "epoch": 0.543825075075075, "grad_norm": 1.204595794853574, "learning_rate": 9.800307620038937e-06, "loss": 0.4868, "step": 5795 }, { "epoch": 0.543918918918919, "grad_norm": 1.3034368588776764, "learning_rate": 9.80015483639323e-06, "loss": 0.4339, "step": 5796 }, { "epoch": 0.5440127627627628, "grad_norm": 1.1125880384753282, "learning_rate": 9.800001995514715e-06, "loss": 0.4096, "step": 5797 }, { "epoch": 0.5441066066066066, "grad_norm": 1.298227814937315, "learning_rate": 9.799849097405209e-06, "loss": 0.4481, "step": 5798 }, { "epoch": 0.5442004504504504, "grad_norm": 1.033346962152031, "learning_rate": 9.799696142066537e-06, "loss": 0.4544, "step": 5799 }, { "epoch": 0.5442942942942943, "grad_norm": 1.0333672179986306, "learning_rate": 9.799543129500524e-06, "loss": 0.424, "step": 5800 }, { "epoch": 0.5443881381381381, "grad_norm": 1.0362395139084533, "learning_rate": 9.799390059708993e-06, "loss": 0.4306, "step": 5801 }, { "epoch": 0.5444819819819819, "grad_norm": 1.0055506747784633, "learning_rate": 9.79923693269377e-06, "loss": 0.4484, "step": 5802 }, { "epoch": 0.5445758258258259, "grad_norm": 1.0017907222683864, "learning_rate": 9.79908374845668e-06, "loss": 0.4902, "step": 5803 }, { "epoch": 0.5446696696696697, "grad_norm": 0.9490520368993607, "learning_rate": 9.79893050699955e-06, "loss": 0.4091, "step": 5804 }, { "epoch": 0.5447635135135135, "grad_norm": 1.3023340071342857, "learning_rate": 9.798777208324207e-06, "loss": 0.4713, "step": 5805 }, { "epoch": 0.5448573573573574, "grad_norm": 0.9906583130019515, "learning_rate": 9.798623852432477e-06, "loss": 0.4854, "step": 5806 }, { "epoch": 0.5449512012012012, "grad_norm": 1.1084166299550395, "learning_rate": 9.79847043932619e-06, "loss": 0.4693, "step": 5807 }, { "epoch": 0.545045045045045, "grad_norm": 1.0278681148036128, "learning_rate": 9.798316969007178e-06, "loss": 0.4759, "step": 5808 }, { "epoch": 0.5451388888888888, "grad_norm": 1.0421381033663955, "learning_rate": 9.798163441477266e-06, "loss": 0.4344, "step": 5809 }, { "epoch": 0.5452327327327328, "grad_norm": 1.0383547551911223, "learning_rate": 9.798009856738286e-06, "loss": 0.5284, "step": 5810 }, { "epoch": 0.5453265765765766, "grad_norm": 1.1524887320281159, "learning_rate": 9.797856214792071e-06, "loss": 0.5062, "step": 5811 }, { "epoch": 0.5454204204204204, "grad_norm": 0.9918357833278224, "learning_rate": 9.797702515640451e-06, "loss": 0.4855, "step": 5812 }, { "epoch": 0.5455142642642643, "grad_norm": 1.0638060213286566, "learning_rate": 9.79754875928526e-06, "loss": 0.4988, "step": 5813 }, { "epoch": 0.5456081081081081, "grad_norm": 1.0404155155970942, "learning_rate": 9.79739494572833e-06, "loss": 0.5112, "step": 5814 }, { "epoch": 0.5457019519519519, "grad_norm": 1.238086120335164, "learning_rate": 9.797241074971495e-06, "loss": 0.5778, "step": 5815 }, { "epoch": 0.5457957957957958, "grad_norm": 1.1516879657019703, "learning_rate": 9.79708714701659e-06, "loss": 0.5016, "step": 5816 }, { "epoch": 0.5458896396396397, "grad_norm": 0.9343888437177121, "learning_rate": 9.796933161865451e-06, "loss": 0.5093, "step": 5817 }, { "epoch": 0.5459834834834835, "grad_norm": 1.2101591091879214, "learning_rate": 9.796779119519915e-06, "loss": 0.5151, "step": 5818 }, { "epoch": 0.5460773273273273, "grad_norm": 1.091396136394165, "learning_rate": 9.796625019981814e-06, "loss": 0.4881, "step": 5819 }, { "epoch": 0.5461711711711712, "grad_norm": 1.1174279753701428, "learning_rate": 9.796470863252991e-06, "loss": 0.4815, "step": 5820 }, { "epoch": 0.546265015015015, "grad_norm": 1.2559895291035268, "learning_rate": 9.79631664933528e-06, "loss": 0.502, "step": 5821 }, { "epoch": 0.5463588588588588, "grad_norm": 1.3570491624867993, "learning_rate": 9.79616237823052e-06, "loss": 0.479, "step": 5822 }, { "epoch": 0.5464527027027027, "grad_norm": 1.1197170083873431, "learning_rate": 9.796008049940554e-06, "loss": 0.5038, "step": 5823 }, { "epoch": 0.5465465465465466, "grad_norm": 1.7803611362630254, "learning_rate": 9.795853664467217e-06, "loss": 0.4951, "step": 5824 }, { "epoch": 0.5466403903903904, "grad_norm": 1.022111867759695, "learning_rate": 9.795699221812354e-06, "loss": 0.4573, "step": 5825 }, { "epoch": 0.5467342342342343, "grad_norm": 0.9485356097677975, "learning_rate": 9.795544721977803e-06, "loss": 0.421, "step": 5826 }, { "epoch": 0.5468280780780781, "grad_norm": 0.9809273747171209, "learning_rate": 9.79539016496541e-06, "loss": 0.4856, "step": 5827 }, { "epoch": 0.5469219219219219, "grad_norm": 1.1666490119197255, "learning_rate": 9.795235550777014e-06, "loss": 0.5061, "step": 5828 }, { "epoch": 0.5470157657657657, "grad_norm": 1.2013458571896711, "learning_rate": 9.795080879414462e-06, "loss": 0.4703, "step": 5829 }, { "epoch": 0.5471096096096096, "grad_norm": 1.3470938484576989, "learning_rate": 9.794926150879595e-06, "loss": 0.5304, "step": 5830 }, { "epoch": 0.5472034534534534, "grad_norm": 1.0952122761159782, "learning_rate": 9.794771365174258e-06, "loss": 0.4472, "step": 5831 }, { "epoch": 0.5472972972972973, "grad_norm": 1.041042635818109, "learning_rate": 9.7946165223003e-06, "loss": 0.4706, "step": 5832 }, { "epoch": 0.5473911411411412, "grad_norm": 1.0530621348512055, "learning_rate": 9.794461622259564e-06, "loss": 0.4825, "step": 5833 }, { "epoch": 0.547484984984985, "grad_norm": 0.9829270016016192, "learning_rate": 9.794306665053899e-06, "loss": 0.4683, "step": 5834 }, { "epoch": 0.5475788288288288, "grad_norm": 1.0621201788956094, "learning_rate": 9.794151650685148e-06, "loss": 0.4526, "step": 5835 }, { "epoch": 0.5476726726726727, "grad_norm": 0.9386920311526487, "learning_rate": 9.793996579155166e-06, "loss": 0.4979, "step": 5836 }, { "epoch": 0.5477665165165165, "grad_norm": 0.9338368984631926, "learning_rate": 9.793841450465797e-06, "loss": 0.4578, "step": 5837 }, { "epoch": 0.5478603603603603, "grad_norm": 1.0159657686270263, "learning_rate": 9.793686264618894e-06, "loss": 0.5036, "step": 5838 }, { "epoch": 0.5479542042042042, "grad_norm": 1.14442638485371, "learning_rate": 9.793531021616303e-06, "loss": 0.4921, "step": 5839 }, { "epoch": 0.5480480480480481, "grad_norm": 1.1527770444905605, "learning_rate": 9.79337572145988e-06, "loss": 0.4697, "step": 5840 }, { "epoch": 0.5481418918918919, "grad_norm": 1.011246282103231, "learning_rate": 9.793220364151471e-06, "loss": 0.5012, "step": 5841 }, { "epoch": 0.5482357357357357, "grad_norm": 0.9874689433268972, "learning_rate": 9.793064949692933e-06, "loss": 0.523, "step": 5842 }, { "epoch": 0.5483295795795796, "grad_norm": 1.0790105597785749, "learning_rate": 9.792909478086118e-06, "loss": 0.5154, "step": 5843 }, { "epoch": 0.5484234234234234, "grad_norm": 1.2044460121727285, "learning_rate": 9.792753949332879e-06, "loss": 0.4932, "step": 5844 }, { "epoch": 0.5485172672672672, "grad_norm": 0.9765400917068087, "learning_rate": 9.792598363435071e-06, "loss": 0.515, "step": 5845 }, { "epoch": 0.5486111111111112, "grad_norm": 1.0100160851450946, "learning_rate": 9.792442720394547e-06, "loss": 0.4499, "step": 5846 }, { "epoch": 0.548704954954955, "grad_norm": 1.1666142323315905, "learning_rate": 9.792287020213164e-06, "loss": 0.4818, "step": 5847 }, { "epoch": 0.5487987987987988, "grad_norm": 1.065572484797132, "learning_rate": 9.792131262892781e-06, "loss": 0.4255, "step": 5848 }, { "epoch": 0.5488926426426426, "grad_norm": 1.111915884090612, "learning_rate": 9.791975448435253e-06, "loss": 0.4654, "step": 5849 }, { "epoch": 0.5489864864864865, "grad_norm": 1.040452467904575, "learning_rate": 9.791819576842436e-06, "loss": 0.4741, "step": 5850 }, { "epoch": 0.5490803303303303, "grad_norm": 1.643584221016417, "learning_rate": 9.791663648116193e-06, "loss": 0.5272, "step": 5851 }, { "epoch": 0.5491741741741741, "grad_norm": 1.158774343655138, "learning_rate": 9.791507662258377e-06, "loss": 0.4732, "step": 5852 }, { "epoch": 0.5492680180180181, "grad_norm": 0.9856514279363752, "learning_rate": 9.791351619270853e-06, "loss": 0.4744, "step": 5853 }, { "epoch": 0.5493618618618619, "grad_norm": 0.9262614093020568, "learning_rate": 9.79119551915548e-06, "loss": 0.441, "step": 5854 }, { "epoch": 0.5494557057057057, "grad_norm": 1.3362789724004067, "learning_rate": 9.791039361914119e-06, "loss": 0.5112, "step": 5855 }, { "epoch": 0.5495495495495496, "grad_norm": 1.183277550049234, "learning_rate": 9.790883147548631e-06, "loss": 0.4805, "step": 5856 }, { "epoch": 0.5496433933933934, "grad_norm": 1.036334928452029, "learning_rate": 9.790726876060879e-06, "loss": 0.4952, "step": 5857 }, { "epoch": 0.5497372372372372, "grad_norm": 1.0642808571252216, "learning_rate": 9.790570547452727e-06, "loss": 0.481, "step": 5858 }, { "epoch": 0.549831081081081, "grad_norm": 1.1150825463095444, "learning_rate": 9.790414161726039e-06, "loss": 0.4925, "step": 5859 }, { "epoch": 0.549924924924925, "grad_norm": 1.1548312267043657, "learning_rate": 9.790257718882679e-06, "loss": 0.5526, "step": 5860 }, { "epoch": 0.5500187687687688, "grad_norm": 0.9711467637396409, "learning_rate": 9.790101218924512e-06, "loss": 0.4116, "step": 5861 }, { "epoch": 0.5501126126126126, "grad_norm": 0.9399334982167906, "learning_rate": 9.789944661853406e-06, "loss": 0.4918, "step": 5862 }, { "epoch": 0.5502064564564565, "grad_norm": 1.3321766109626718, "learning_rate": 9.789788047671226e-06, "loss": 0.516, "step": 5863 }, { "epoch": 0.5503003003003003, "grad_norm": 1.13822112407494, "learning_rate": 9.789631376379836e-06, "loss": 0.5305, "step": 5864 }, { "epoch": 0.5503941441441441, "grad_norm": 1.0711241931600963, "learning_rate": 9.78947464798111e-06, "loss": 0.4994, "step": 5865 }, { "epoch": 0.550487987987988, "grad_norm": 2.627679447334794, "learning_rate": 9.789317862476914e-06, "loss": 0.5097, "step": 5866 }, { "epoch": 0.5505818318318318, "grad_norm": 1.207369090262856, "learning_rate": 9.789161019869117e-06, "loss": 0.5194, "step": 5867 }, { "epoch": 0.5506756756756757, "grad_norm": 1.2050570224435413, "learning_rate": 9.789004120159589e-06, "loss": 0.5484, "step": 5868 }, { "epoch": 0.5507695195195195, "grad_norm": 21.491887885455217, "learning_rate": 9.788847163350203e-06, "loss": 0.4675, "step": 5869 }, { "epoch": 0.5508633633633634, "grad_norm": 1.3776325115090444, "learning_rate": 9.788690149442829e-06, "loss": 0.5005, "step": 5870 }, { "epoch": 0.5509572072072072, "grad_norm": 1.0720474789703722, "learning_rate": 9.788533078439335e-06, "loss": 0.5049, "step": 5871 }, { "epoch": 0.551051051051051, "grad_norm": 1.0791221400059336, "learning_rate": 9.7883759503416e-06, "loss": 0.5144, "step": 5872 }, { "epoch": 0.5511448948948949, "grad_norm": 1.1438134553806265, "learning_rate": 9.788218765151495e-06, "loss": 0.4899, "step": 5873 }, { "epoch": 0.5512387387387387, "grad_norm": 1.108952020248136, "learning_rate": 9.788061522870892e-06, "loss": 0.5132, "step": 5874 }, { "epoch": 0.5513325825825826, "grad_norm": 0.9910182322681518, "learning_rate": 9.787904223501672e-06, "loss": 0.4346, "step": 5875 }, { "epoch": 0.5514264264264265, "grad_norm": 1.0747525041292016, "learning_rate": 9.787746867045704e-06, "loss": 0.4864, "step": 5876 }, { "epoch": 0.5515202702702703, "grad_norm": 1.1002026593944199, "learning_rate": 9.787589453504866e-06, "loss": 0.4962, "step": 5877 }, { "epoch": 0.5516141141141141, "grad_norm": 1.1086883407348553, "learning_rate": 9.787431982881037e-06, "loss": 0.5043, "step": 5878 }, { "epoch": 0.5517079579579579, "grad_norm": 1.0261229328302601, "learning_rate": 9.787274455176093e-06, "loss": 0.4426, "step": 5879 }, { "epoch": 0.5518018018018018, "grad_norm": 1.1518538769053248, "learning_rate": 9.78711687039191e-06, "loss": 0.5026, "step": 5880 }, { "epoch": 0.5518956456456456, "grad_norm": 1.064310509777776, "learning_rate": 9.786959228530373e-06, "loss": 0.5397, "step": 5881 }, { "epoch": 0.5519894894894894, "grad_norm": 1.3302933432656294, "learning_rate": 9.786801529593354e-06, "loss": 0.5297, "step": 5882 }, { "epoch": 0.5520833333333334, "grad_norm": 1.0586998367336424, "learning_rate": 9.786643773582739e-06, "loss": 0.5318, "step": 5883 }, { "epoch": 0.5521771771771772, "grad_norm": 2.4460094277318194, "learning_rate": 9.786485960500408e-06, "loss": 0.4889, "step": 5884 }, { "epoch": 0.552271021021021, "grad_norm": 1.1948235264543328, "learning_rate": 9.786328090348239e-06, "loss": 0.501, "step": 5885 }, { "epoch": 0.5523648648648649, "grad_norm": 1.0183832990438106, "learning_rate": 9.786170163128116e-06, "loss": 0.4274, "step": 5886 }, { "epoch": 0.5524587087087087, "grad_norm": 1.1467107018649147, "learning_rate": 9.786012178841925e-06, "loss": 0.4592, "step": 5887 }, { "epoch": 0.5525525525525525, "grad_norm": 1.1439250653334052, "learning_rate": 9.785854137491548e-06, "loss": 0.5269, "step": 5888 }, { "epoch": 0.5526463963963963, "grad_norm": 1.046153339357065, "learning_rate": 9.785696039078869e-06, "loss": 0.4982, "step": 5889 }, { "epoch": 0.5527402402402403, "grad_norm": 1.2323870436039224, "learning_rate": 9.785537883605771e-06, "loss": 0.4925, "step": 5890 }, { "epoch": 0.5528340840840841, "grad_norm": 1.0413981205975458, "learning_rate": 9.785379671074142e-06, "loss": 0.483, "step": 5891 }, { "epoch": 0.5529279279279279, "grad_norm": 1.098552441111042, "learning_rate": 9.78522140148587e-06, "loss": 0.5247, "step": 5892 }, { "epoch": 0.5530217717717718, "grad_norm": 0.9952379221442695, "learning_rate": 9.785063074842838e-06, "loss": 0.4756, "step": 5893 }, { "epoch": 0.5531156156156156, "grad_norm": 1.076385441038388, "learning_rate": 9.784904691146936e-06, "loss": 0.4378, "step": 5894 }, { "epoch": 0.5532094594594594, "grad_norm": 1.361610206129767, "learning_rate": 9.784746250400051e-06, "loss": 0.4914, "step": 5895 }, { "epoch": 0.5533033033033034, "grad_norm": 0.9908023750528382, "learning_rate": 9.784587752604075e-06, "loss": 0.4116, "step": 5896 }, { "epoch": 0.5533971471471472, "grad_norm": 1.113184987168741, "learning_rate": 9.784429197760897e-06, "loss": 0.4918, "step": 5897 }, { "epoch": 0.553490990990991, "grad_norm": 1.1184077979781455, "learning_rate": 9.784270585872407e-06, "loss": 0.4578, "step": 5898 }, { "epoch": 0.5535848348348348, "grad_norm": 0.9852019381267015, "learning_rate": 9.784111916940493e-06, "loss": 0.4555, "step": 5899 }, { "epoch": 0.5536786786786787, "grad_norm": 1.1098336917638012, "learning_rate": 9.78395319096705e-06, "loss": 0.5059, "step": 5900 }, { "epoch": 0.5537725225225225, "grad_norm": 1.1939491844961372, "learning_rate": 9.783794407953973e-06, "loss": 0.5314, "step": 5901 }, { "epoch": 0.5538663663663663, "grad_norm": 1.0361802879523125, "learning_rate": 9.78363556790315e-06, "loss": 0.5129, "step": 5902 }, { "epoch": 0.5539602102102102, "grad_norm": 0.9566429613799619, "learning_rate": 9.783476670816477e-06, "loss": 0.4296, "step": 5903 }, { "epoch": 0.5540540540540541, "grad_norm": 1.1147633738511769, "learning_rate": 9.783317716695849e-06, "loss": 0.5371, "step": 5904 }, { "epoch": 0.5541478978978979, "grad_norm": 1.0852545446793678, "learning_rate": 9.78315870554316e-06, "loss": 0.4886, "step": 5905 }, { "epoch": 0.5542417417417418, "grad_norm": 0.879484041886273, "learning_rate": 9.782999637360307e-06, "loss": 0.4784, "step": 5906 }, { "epoch": 0.5543355855855856, "grad_norm": 1.8429827340800593, "learning_rate": 9.782840512149188e-06, "loss": 0.4895, "step": 5907 }, { "epoch": 0.5544294294294294, "grad_norm": 1.2768542013995596, "learning_rate": 9.782681329911698e-06, "loss": 0.5182, "step": 5908 }, { "epoch": 0.5545232732732732, "grad_norm": 1.0724589770520223, "learning_rate": 9.782522090649735e-06, "loss": 0.4932, "step": 5909 }, { "epoch": 0.5546171171171171, "grad_norm": 1.5604158182144596, "learning_rate": 9.782362794365198e-06, "loss": 0.3938, "step": 5910 }, { "epoch": 0.554710960960961, "grad_norm": 1.0658806388103679, "learning_rate": 9.782203441059987e-06, "loss": 0.549, "step": 5911 }, { "epoch": 0.5548048048048048, "grad_norm": 1.2594729058975074, "learning_rate": 9.782044030736003e-06, "loss": 0.4683, "step": 5912 }, { "epoch": 0.5548986486486487, "grad_norm": 1.3929392989928924, "learning_rate": 9.781884563395144e-06, "loss": 0.4857, "step": 5913 }, { "epoch": 0.5549924924924925, "grad_norm": 1.04968870924077, "learning_rate": 9.781725039039311e-06, "loss": 0.5125, "step": 5914 }, { "epoch": 0.5550863363363363, "grad_norm": 1.5767927476619144, "learning_rate": 9.78156545767041e-06, "loss": 0.4592, "step": 5915 }, { "epoch": 0.5551801801801802, "grad_norm": 1.1098552987079295, "learning_rate": 9.78140581929034e-06, "loss": 0.4741, "step": 5916 }, { "epoch": 0.555274024024024, "grad_norm": 1.2677315510111948, "learning_rate": 9.781246123901004e-06, "loss": 0.4617, "step": 5917 }, { "epoch": 0.5553678678678678, "grad_norm": 5.388681675703586, "learning_rate": 9.78108637150431e-06, "loss": 0.4986, "step": 5918 }, { "epoch": 0.5554617117117117, "grad_norm": 0.9937871010547977, "learning_rate": 9.780926562102163e-06, "loss": 0.4822, "step": 5919 }, { "epoch": 0.5555555555555556, "grad_norm": 1.06937378501505, "learning_rate": 9.780766695696462e-06, "loss": 0.4834, "step": 5920 }, { "epoch": 0.5556493993993994, "grad_norm": 0.8695867631007386, "learning_rate": 9.780606772289117e-06, "loss": 0.4152, "step": 5921 }, { "epoch": 0.5557432432432432, "grad_norm": 1.35297802428503, "learning_rate": 9.780446791882037e-06, "loss": 0.4858, "step": 5922 }, { "epoch": 0.5558370870870871, "grad_norm": 1.044481706086128, "learning_rate": 9.780286754477127e-06, "loss": 0.5142, "step": 5923 }, { "epoch": 0.5559309309309309, "grad_norm": 2.567607825618351, "learning_rate": 9.780126660076293e-06, "loss": 0.4568, "step": 5924 }, { "epoch": 0.5560247747747747, "grad_norm": 1.028354798143115, "learning_rate": 9.779966508681449e-06, "loss": 0.4505, "step": 5925 }, { "epoch": 0.5561186186186187, "grad_norm": 1.222996482403991, "learning_rate": 9.7798063002945e-06, "loss": 0.4847, "step": 5926 }, { "epoch": 0.5562124624624625, "grad_norm": 2.0756145011518394, "learning_rate": 9.779646034917359e-06, "loss": 0.496, "step": 5927 }, { "epoch": 0.5563063063063063, "grad_norm": 0.9788816585108903, "learning_rate": 9.779485712551934e-06, "loss": 0.4792, "step": 5928 }, { "epoch": 0.5564001501501501, "grad_norm": 1.0023206395350295, "learning_rate": 9.77932533320014e-06, "loss": 0.428, "step": 5929 }, { "epoch": 0.556493993993994, "grad_norm": 1.3660335358531959, "learning_rate": 9.779164896863888e-06, "loss": 0.4793, "step": 5930 }, { "epoch": 0.5565878378378378, "grad_norm": 0.9910717882107402, "learning_rate": 9.779004403545088e-06, "loss": 0.4996, "step": 5931 }, { "epoch": 0.5566816816816816, "grad_norm": 1.1033543256373786, "learning_rate": 9.778843853245659e-06, "loss": 0.4925, "step": 5932 }, { "epoch": 0.5567755255255256, "grad_norm": 1.3786890970155332, "learning_rate": 9.778683245967511e-06, "loss": 0.4646, "step": 5933 }, { "epoch": 0.5568693693693694, "grad_norm": 0.9588151273008135, "learning_rate": 9.77852258171256e-06, "loss": 0.5173, "step": 5934 }, { "epoch": 0.5569632132132132, "grad_norm": 0.9845806746880748, "learning_rate": 9.778361860482722e-06, "loss": 0.4882, "step": 5935 }, { "epoch": 0.5570570570570571, "grad_norm": 1.307829975694823, "learning_rate": 9.778201082279913e-06, "loss": 0.4993, "step": 5936 }, { "epoch": 0.5571509009009009, "grad_norm": 1.2175387827469766, "learning_rate": 9.778040247106051e-06, "loss": 0.532, "step": 5937 }, { "epoch": 0.5572447447447447, "grad_norm": 1.116305011624619, "learning_rate": 9.77787935496305e-06, "loss": 0.4833, "step": 5938 }, { "epoch": 0.5573385885885885, "grad_norm": 1.2016648781820998, "learning_rate": 9.777718405852835e-06, "loss": 0.5204, "step": 5939 }, { "epoch": 0.5574324324324325, "grad_norm": 1.191947074217586, "learning_rate": 9.77755739977732e-06, "loss": 0.5077, "step": 5940 }, { "epoch": 0.5575262762762763, "grad_norm": 1.015951381973472, "learning_rate": 9.777396336738426e-06, "loss": 0.4974, "step": 5941 }, { "epoch": 0.5576201201201201, "grad_norm": 1.0674183790874656, "learning_rate": 9.777235216738071e-06, "loss": 0.4303, "step": 5942 }, { "epoch": 0.557713963963964, "grad_norm": 1.0080310600407794, "learning_rate": 9.77707403977818e-06, "loss": 0.4794, "step": 5943 }, { "epoch": 0.5578078078078078, "grad_norm": 1.1622683000722127, "learning_rate": 9.776912805860674e-06, "loss": 0.494, "step": 5944 }, { "epoch": 0.5579016516516516, "grad_norm": 1.1109246742483325, "learning_rate": 9.776751514987472e-06, "loss": 0.518, "step": 5945 }, { "epoch": 0.5579954954954955, "grad_norm": 1.1531217356591907, "learning_rate": 9.7765901671605e-06, "loss": 0.4812, "step": 5946 }, { "epoch": 0.5580893393393394, "grad_norm": 1.109196111979492, "learning_rate": 9.776428762381682e-06, "loss": 0.5627, "step": 5947 }, { "epoch": 0.5581831831831832, "grad_norm": 1.1066472219120769, "learning_rate": 9.776267300652942e-06, "loss": 0.4702, "step": 5948 }, { "epoch": 0.558277027027027, "grad_norm": 1.1843747067367263, "learning_rate": 9.776105781976205e-06, "loss": 0.5187, "step": 5949 }, { "epoch": 0.5583708708708709, "grad_norm": 1.0957949452690456, "learning_rate": 9.775944206353395e-06, "loss": 0.4741, "step": 5950 }, { "epoch": 0.5584647147147147, "grad_norm": 1.116149050057396, "learning_rate": 9.775782573786442e-06, "loss": 0.5341, "step": 5951 }, { "epoch": 0.5585585585585585, "grad_norm": 1.0705445991954865, "learning_rate": 9.77562088427727e-06, "loss": 0.4997, "step": 5952 }, { "epoch": 0.5586524024024024, "grad_norm": 3.341463681469783, "learning_rate": 9.77545913782781e-06, "loss": 0.4611, "step": 5953 }, { "epoch": 0.5587462462462462, "grad_norm": 2.2090136362375414, "learning_rate": 9.775297334439986e-06, "loss": 0.4844, "step": 5954 }, { "epoch": 0.5588400900900901, "grad_norm": 1.3628049394483706, "learning_rate": 9.775135474115731e-06, "loss": 0.4897, "step": 5955 }, { "epoch": 0.558933933933934, "grad_norm": 1.1564832868399617, "learning_rate": 9.774973556856974e-06, "loss": 0.4678, "step": 5956 }, { "epoch": 0.5590277777777778, "grad_norm": 1.380043982931106, "learning_rate": 9.774811582665647e-06, "loss": 0.5232, "step": 5957 }, { "epoch": 0.5591216216216216, "grad_norm": 1.086388283911513, "learning_rate": 9.774649551543676e-06, "loss": 0.4401, "step": 5958 }, { "epoch": 0.5592154654654654, "grad_norm": 0.9846655816834977, "learning_rate": 9.774487463492999e-06, "loss": 0.466, "step": 5959 }, { "epoch": 0.5593093093093093, "grad_norm": 1.18436382622395, "learning_rate": 9.774325318515544e-06, "loss": 0.5349, "step": 5960 }, { "epoch": 0.5594031531531531, "grad_norm": 1.0177181941703852, "learning_rate": 9.77416311661325e-06, "loss": 0.4332, "step": 5961 }, { "epoch": 0.559496996996997, "grad_norm": 1.2929378667712377, "learning_rate": 9.774000857788045e-06, "loss": 0.5104, "step": 5962 }, { "epoch": 0.5595908408408409, "grad_norm": 1.1317044899115172, "learning_rate": 9.773838542041865e-06, "loss": 0.4614, "step": 5963 }, { "epoch": 0.5596846846846847, "grad_norm": 1.47932073160333, "learning_rate": 9.773676169376647e-06, "loss": 0.4557, "step": 5964 }, { "epoch": 0.5597785285285285, "grad_norm": 1.1877629094322129, "learning_rate": 9.773513739794325e-06, "loss": 0.4941, "step": 5965 }, { "epoch": 0.5598723723723724, "grad_norm": 1.3073964495407238, "learning_rate": 9.773351253296839e-06, "loss": 0.515, "step": 5966 }, { "epoch": 0.5599662162162162, "grad_norm": 5.442744602537554, "learning_rate": 9.773188709886123e-06, "loss": 0.5175, "step": 5967 }, { "epoch": 0.56006006006006, "grad_norm": 1.2592118202905027, "learning_rate": 9.773026109564117e-06, "loss": 0.563, "step": 5968 }, { "epoch": 0.5601539039039038, "grad_norm": 1.7681508171469802, "learning_rate": 9.772863452332758e-06, "loss": 0.5089, "step": 5969 }, { "epoch": 0.5602477477477478, "grad_norm": 1.0310505840741848, "learning_rate": 9.772700738193986e-06, "loss": 0.4835, "step": 5970 }, { "epoch": 0.5603415915915916, "grad_norm": 1.132603500073375, "learning_rate": 9.772537967149742e-06, "loss": 0.4667, "step": 5971 }, { "epoch": 0.5604354354354354, "grad_norm": 0.9775006816186321, "learning_rate": 9.772375139201967e-06, "loss": 0.4857, "step": 5972 }, { "epoch": 0.5605292792792793, "grad_norm": 1.1215150220820171, "learning_rate": 9.7722122543526e-06, "loss": 0.4939, "step": 5973 }, { "epoch": 0.5606231231231231, "grad_norm": 0.9357673803762584, "learning_rate": 9.772049312603587e-06, "loss": 0.4403, "step": 5974 }, { "epoch": 0.5607169669669669, "grad_norm": 1.2791679548283996, "learning_rate": 9.771886313956867e-06, "loss": 0.4732, "step": 5975 }, { "epoch": 0.5608108108108109, "grad_norm": 0.9772401166156419, "learning_rate": 9.771723258414383e-06, "loss": 0.5239, "step": 5976 }, { "epoch": 0.5609046546546547, "grad_norm": 1.0363453482954417, "learning_rate": 9.771560145978084e-06, "loss": 0.4685, "step": 5977 }, { "epoch": 0.5609984984984985, "grad_norm": 1.0682998404392237, "learning_rate": 9.77139697664991e-06, "loss": 0.4525, "step": 5978 }, { "epoch": 0.5610923423423423, "grad_norm": 0.9967944207035377, "learning_rate": 9.771233750431808e-06, "loss": 0.5232, "step": 5979 }, { "epoch": 0.5611861861861862, "grad_norm": 1.0259670744801657, "learning_rate": 9.771070467325726e-06, "loss": 0.4993, "step": 5980 }, { "epoch": 0.56128003003003, "grad_norm": 1.204127080346622, "learning_rate": 9.770907127333608e-06, "loss": 0.5191, "step": 5981 }, { "epoch": 0.5613738738738738, "grad_norm": 1.3727943968836596, "learning_rate": 9.770743730457404e-06, "loss": 0.5252, "step": 5982 }, { "epoch": 0.5614677177177178, "grad_norm": 0.9955596303996539, "learning_rate": 9.77058027669906e-06, "loss": 0.495, "step": 5983 }, { "epoch": 0.5615615615615616, "grad_norm": 1.2606757225821015, "learning_rate": 9.770416766060527e-06, "loss": 0.5336, "step": 5984 }, { "epoch": 0.5616554054054054, "grad_norm": 1.1591075049632604, "learning_rate": 9.77025319854375e-06, "loss": 0.4395, "step": 5985 }, { "epoch": 0.5617492492492493, "grad_norm": 1.044924254417148, "learning_rate": 9.770089574150687e-06, "loss": 0.4728, "step": 5986 }, { "epoch": 0.5618430930930931, "grad_norm": 1.071719050422615, "learning_rate": 9.769925892883282e-06, "loss": 0.4951, "step": 5987 }, { "epoch": 0.5619369369369369, "grad_norm": 1.144638882034013, "learning_rate": 9.769762154743492e-06, "loss": 0.4937, "step": 5988 }, { "epoch": 0.5620307807807807, "grad_norm": 1.0771815771901878, "learning_rate": 9.769598359733263e-06, "loss": 0.4638, "step": 5989 }, { "epoch": 0.5621246246246246, "grad_norm": 0.9614898171545229, "learning_rate": 9.769434507854555e-06, "loss": 0.5166, "step": 5990 }, { "epoch": 0.5622184684684685, "grad_norm": 1.1078274309814977, "learning_rate": 9.769270599109315e-06, "loss": 0.4361, "step": 5991 }, { "epoch": 0.5623123123123123, "grad_norm": 0.9588383782179889, "learning_rate": 9.769106633499501e-06, "loss": 0.4465, "step": 5992 }, { "epoch": 0.5624061561561562, "grad_norm": 1.179901011896747, "learning_rate": 9.768942611027068e-06, "loss": 0.5204, "step": 5993 }, { "epoch": 0.5625, "grad_norm": 0.9868705741423914, "learning_rate": 9.76877853169397e-06, "loss": 0.4308, "step": 5994 }, { "epoch": 0.5625938438438438, "grad_norm": 1.3279627189224477, "learning_rate": 9.768614395502165e-06, "loss": 0.5063, "step": 5995 }, { "epoch": 0.5626876876876877, "grad_norm": 5.935406771388594, "learning_rate": 9.76845020245361e-06, "loss": 0.4589, "step": 5996 }, { "epoch": 0.5627815315315315, "grad_norm": 1.0900533967511836, "learning_rate": 9.76828595255026e-06, "loss": 0.4784, "step": 5997 }, { "epoch": 0.5628753753753754, "grad_norm": 1.4870347420706054, "learning_rate": 9.768121645794078e-06, "loss": 0.5047, "step": 5998 }, { "epoch": 0.5629692192192193, "grad_norm": 5.528302092089786, "learning_rate": 9.76795728218702e-06, "loss": 0.5115, "step": 5999 }, { "epoch": 0.5630630630630631, "grad_norm": 1.2866546789832376, "learning_rate": 9.767792861731045e-06, "loss": 0.5222, "step": 6000 }, { "epoch": 0.5631569069069069, "grad_norm": 1.0959630633864441, "learning_rate": 9.767628384428113e-06, "loss": 0.4823, "step": 6001 }, { "epoch": 0.5632507507507507, "grad_norm": 1.0780023875011848, "learning_rate": 9.76746385028019e-06, "loss": 0.537, "step": 6002 }, { "epoch": 0.5633445945945946, "grad_norm": 0.9241983428864303, "learning_rate": 9.767299259289234e-06, "loss": 0.4506, "step": 6003 }, { "epoch": 0.5634384384384384, "grad_norm": 1.2796071773960331, "learning_rate": 9.767134611457206e-06, "loss": 0.5077, "step": 6004 }, { "epoch": 0.5635322822822822, "grad_norm": 1.4293928178265125, "learning_rate": 9.766969906786072e-06, "loss": 0.5003, "step": 6005 }, { "epoch": 0.5636261261261262, "grad_norm": 1.1125386022968575, "learning_rate": 9.766805145277794e-06, "loss": 0.4701, "step": 6006 }, { "epoch": 0.56371996996997, "grad_norm": 1.371564759273828, "learning_rate": 9.766640326934338e-06, "loss": 0.51, "step": 6007 }, { "epoch": 0.5638138138138138, "grad_norm": 1.2202864475555146, "learning_rate": 9.766475451757667e-06, "loss": 0.4724, "step": 6008 }, { "epoch": 0.5639076576576577, "grad_norm": 1.1214624617014126, "learning_rate": 9.766310519749751e-06, "loss": 0.4805, "step": 6009 }, { "epoch": 0.5640015015015015, "grad_norm": 1.0737066584448005, "learning_rate": 9.766145530912551e-06, "loss": 0.4802, "step": 6010 }, { "epoch": 0.5640953453453453, "grad_norm": 1.0005875915743265, "learning_rate": 9.76598048524804e-06, "loss": 0.4922, "step": 6011 }, { "epoch": 0.5641891891891891, "grad_norm": 1.217036562044856, "learning_rate": 9.76581538275818e-06, "loss": 0.4855, "step": 6012 }, { "epoch": 0.5642830330330331, "grad_norm": 1.201960837735583, "learning_rate": 9.765650223444942e-06, "loss": 0.4955, "step": 6013 }, { "epoch": 0.5643768768768769, "grad_norm": 1.0908556029508851, "learning_rate": 9.765485007310296e-06, "loss": 0.4944, "step": 6014 }, { "epoch": 0.5644707207207207, "grad_norm": 1.2653792857082986, "learning_rate": 9.765319734356213e-06, "loss": 0.4607, "step": 6015 }, { "epoch": 0.5645645645645646, "grad_norm": 1.0926786247128843, "learning_rate": 9.76515440458466e-06, "loss": 0.494, "step": 6016 }, { "epoch": 0.5646584084084084, "grad_norm": 1.6231935245269187, "learning_rate": 9.76498901799761e-06, "loss": 0.5052, "step": 6017 }, { "epoch": 0.5647522522522522, "grad_norm": 1.7402662832163698, "learning_rate": 9.764823574597037e-06, "loss": 0.4817, "step": 6018 }, { "epoch": 0.5648460960960962, "grad_norm": 0.9832798680941696, "learning_rate": 9.764658074384912e-06, "loss": 0.4868, "step": 6019 }, { "epoch": 0.56493993993994, "grad_norm": 1.7466193873429512, "learning_rate": 9.764492517363207e-06, "loss": 0.4477, "step": 6020 }, { "epoch": 0.5650337837837838, "grad_norm": 0.926392600385822, "learning_rate": 9.764326903533896e-06, "loss": 0.4475, "step": 6021 }, { "epoch": 0.5651276276276276, "grad_norm": 0.9579611175284706, "learning_rate": 9.764161232898955e-06, "loss": 0.5117, "step": 6022 }, { "epoch": 0.5652214714714715, "grad_norm": 1.6391338976512602, "learning_rate": 9.76399550546036e-06, "loss": 0.4881, "step": 6023 }, { "epoch": 0.5653153153153153, "grad_norm": 1.1000209250447854, "learning_rate": 9.763829721220085e-06, "loss": 0.4538, "step": 6024 }, { "epoch": 0.5654091591591591, "grad_norm": 1.5489988391839695, "learning_rate": 9.763663880180108e-06, "loss": 0.5041, "step": 6025 }, { "epoch": 0.565503003003003, "grad_norm": 1.050217847145962, "learning_rate": 9.763497982342406e-06, "loss": 0.5184, "step": 6026 }, { "epoch": 0.5655968468468469, "grad_norm": 1.2032652485490347, "learning_rate": 9.763332027708956e-06, "loss": 0.4507, "step": 6027 }, { "epoch": 0.5656906906906907, "grad_norm": 1.1820306269438403, "learning_rate": 9.763166016281739e-06, "loss": 0.5449, "step": 6028 }, { "epoch": 0.5657845345345346, "grad_norm": 1.103289115054707, "learning_rate": 9.762999948062733e-06, "loss": 0.5178, "step": 6029 }, { "epoch": 0.5658783783783784, "grad_norm": 1.4797754600576192, "learning_rate": 9.762833823053917e-06, "loss": 0.4992, "step": 6030 }, { "epoch": 0.5659722222222222, "grad_norm": 1.204718056593944, "learning_rate": 9.762667641257272e-06, "loss": 0.4827, "step": 6031 }, { "epoch": 0.566066066066066, "grad_norm": 1.1554968185838974, "learning_rate": 9.762501402674782e-06, "loss": 0.4886, "step": 6032 }, { "epoch": 0.5661599099099099, "grad_norm": 1.4325862105875256, "learning_rate": 9.762335107308428e-06, "loss": 0.4511, "step": 6033 }, { "epoch": 0.5662537537537538, "grad_norm": 1.0326251272766263, "learning_rate": 9.762168755160188e-06, "loss": 0.5198, "step": 6034 }, { "epoch": 0.5663475975975976, "grad_norm": 1.0032968832054452, "learning_rate": 9.762002346232054e-06, "loss": 0.4655, "step": 6035 }, { "epoch": 0.5664414414414415, "grad_norm": 1.081849942032585, "learning_rate": 9.761835880526003e-06, "loss": 0.4642, "step": 6036 }, { "epoch": 0.5665352852852853, "grad_norm": 0.9287280262712083, "learning_rate": 9.761669358044022e-06, "loss": 0.4479, "step": 6037 }, { "epoch": 0.5666291291291291, "grad_norm": 0.9540935174251306, "learning_rate": 9.761502778788099e-06, "loss": 0.4508, "step": 6038 }, { "epoch": 0.566722972972973, "grad_norm": 2.841206506145289, "learning_rate": 9.761336142760216e-06, "loss": 0.4487, "step": 6039 }, { "epoch": 0.5668168168168168, "grad_norm": 1.0900720730873092, "learning_rate": 9.761169449962363e-06, "loss": 0.5234, "step": 6040 }, { "epoch": 0.5669106606606606, "grad_norm": 1.0532080691298116, "learning_rate": 9.761002700396525e-06, "loss": 0.5106, "step": 6041 }, { "epoch": 0.5670045045045045, "grad_norm": 0.9342588795485609, "learning_rate": 9.760835894064693e-06, "loss": 0.5039, "step": 6042 }, { "epoch": 0.5670983483483484, "grad_norm": 1.1481827324634635, "learning_rate": 9.760669030968854e-06, "loss": 0.4805, "step": 6043 }, { "epoch": 0.5671921921921922, "grad_norm": 1.260404559583579, "learning_rate": 9.760502111110997e-06, "loss": 0.46, "step": 6044 }, { "epoch": 0.567286036036036, "grad_norm": 1.3342186149851518, "learning_rate": 9.760335134493112e-06, "loss": 0.4843, "step": 6045 }, { "epoch": 0.5673798798798799, "grad_norm": 1.2691691432047592, "learning_rate": 9.760168101117193e-06, "loss": 0.4775, "step": 6046 }, { "epoch": 0.5674737237237237, "grad_norm": 1.034558149221816, "learning_rate": 9.760001010985229e-06, "loss": 0.4779, "step": 6047 }, { "epoch": 0.5675675675675675, "grad_norm": 1.2224190459622355, "learning_rate": 9.759833864099212e-06, "loss": 0.4445, "step": 6048 }, { "epoch": 0.5676614114114115, "grad_norm": 1.010117003657005, "learning_rate": 9.759666660461138e-06, "loss": 0.4555, "step": 6049 }, { "epoch": 0.5677552552552553, "grad_norm": 1.156327009112543, "learning_rate": 9.759499400072996e-06, "loss": 0.4861, "step": 6050 }, { "epoch": 0.5678490990990991, "grad_norm": 0.9905816507780675, "learning_rate": 9.759332082936783e-06, "loss": 0.5047, "step": 6051 }, { "epoch": 0.5679429429429429, "grad_norm": 1.0639369567550894, "learning_rate": 9.759164709054493e-06, "loss": 0.4549, "step": 6052 }, { "epoch": 0.5680367867867868, "grad_norm": 2.5025089338220146, "learning_rate": 9.758997278428123e-06, "loss": 0.4953, "step": 6053 }, { "epoch": 0.5681306306306306, "grad_norm": 1.149567224939416, "learning_rate": 9.758829791059668e-06, "loss": 0.4557, "step": 6054 }, { "epoch": 0.5682244744744744, "grad_norm": 1.3062591808891335, "learning_rate": 9.758662246951126e-06, "loss": 0.4956, "step": 6055 }, { "epoch": 0.5683183183183184, "grad_norm": 1.0654166327259853, "learning_rate": 9.758494646104493e-06, "loss": 0.4546, "step": 6056 }, { "epoch": 0.5684121621621622, "grad_norm": 1.0467864933703268, "learning_rate": 9.758326988521767e-06, "loss": 0.495, "step": 6057 }, { "epoch": 0.568506006006006, "grad_norm": 1.0720653290482767, "learning_rate": 9.75815927420495e-06, "loss": 0.4837, "step": 6058 }, { "epoch": 0.5685998498498499, "grad_norm": 1.093808558020667, "learning_rate": 9.757991503156038e-06, "loss": 0.4764, "step": 6059 }, { "epoch": 0.5686936936936937, "grad_norm": 1.1320422029718802, "learning_rate": 9.757823675377036e-06, "loss": 0.5086, "step": 6060 }, { "epoch": 0.5687875375375375, "grad_norm": 1.1858862618568438, "learning_rate": 9.757655790869941e-06, "loss": 0.5103, "step": 6061 }, { "epoch": 0.5688813813813813, "grad_norm": 0.9933396203576371, "learning_rate": 9.757487849636757e-06, "loss": 0.4852, "step": 6062 }, { "epoch": 0.5689752252252253, "grad_norm": 1.386027752889515, "learning_rate": 9.757319851679485e-06, "loss": 0.5049, "step": 6063 }, { "epoch": 0.5690690690690691, "grad_norm": 1.7805882331767422, "learning_rate": 9.757151797000129e-06, "loss": 0.5017, "step": 6064 }, { "epoch": 0.5691629129129129, "grad_norm": 1.0660645727765914, "learning_rate": 9.756983685600692e-06, "loss": 0.3942, "step": 6065 }, { "epoch": 0.5692567567567568, "grad_norm": 1.3342142337343263, "learning_rate": 9.756815517483179e-06, "loss": 0.4956, "step": 6066 }, { "epoch": 0.5693506006006006, "grad_norm": 1.011187724946873, "learning_rate": 9.756647292649593e-06, "loss": 0.4965, "step": 6067 }, { "epoch": 0.5694444444444444, "grad_norm": 0.9706827566018438, "learning_rate": 9.756479011101942e-06, "loss": 0.4775, "step": 6068 }, { "epoch": 0.5695382882882883, "grad_norm": 1.0343289471503867, "learning_rate": 9.756310672842233e-06, "loss": 0.489, "step": 6069 }, { "epoch": 0.5696321321321322, "grad_norm": 1.605119004887019, "learning_rate": 9.756142277872471e-06, "loss": 0.5292, "step": 6070 }, { "epoch": 0.569725975975976, "grad_norm": 1.0381589641820248, "learning_rate": 9.755973826194665e-06, "loss": 0.5311, "step": 6071 }, { "epoch": 0.5698198198198198, "grad_norm": 1.089895737882458, "learning_rate": 9.755805317810825e-06, "loss": 0.4488, "step": 6072 }, { "epoch": 0.5699136636636637, "grad_norm": 1.0647130775216236, "learning_rate": 9.755636752722956e-06, "loss": 0.5419, "step": 6073 }, { "epoch": 0.5700075075075075, "grad_norm": 1.3000982429449888, "learning_rate": 9.755468130933071e-06, "loss": 0.5505, "step": 6074 }, { "epoch": 0.5701013513513513, "grad_norm": 1.1406237895874225, "learning_rate": 9.755299452443181e-06, "loss": 0.4192, "step": 6075 }, { "epoch": 0.5701951951951952, "grad_norm": 1.0190065421278347, "learning_rate": 9.755130717255295e-06, "loss": 0.5098, "step": 6076 }, { "epoch": 0.570289039039039, "grad_norm": 1.096081503804375, "learning_rate": 9.754961925371425e-06, "loss": 0.4687, "step": 6077 }, { "epoch": 0.5703828828828829, "grad_norm": 1.187494415008521, "learning_rate": 9.754793076793587e-06, "loss": 0.4289, "step": 6078 }, { "epoch": 0.5704767267267268, "grad_norm": 0.9673915804396613, "learning_rate": 9.75462417152379e-06, "loss": 0.4661, "step": 6079 }, { "epoch": 0.5705705705705706, "grad_norm": 1.6644492578990817, "learning_rate": 9.754455209564049e-06, "loss": 0.5026, "step": 6080 }, { "epoch": 0.5706644144144144, "grad_norm": 1.2779658475610558, "learning_rate": 9.754286190916377e-06, "loss": 0.506, "step": 6081 }, { "epoch": 0.5707582582582582, "grad_norm": 0.9854250278454941, "learning_rate": 9.754117115582796e-06, "loss": 0.5027, "step": 6082 }, { "epoch": 0.5708521021021021, "grad_norm": 1.072435235052664, "learning_rate": 9.753947983565312e-06, "loss": 0.4693, "step": 6083 }, { "epoch": 0.5709459459459459, "grad_norm": 1.005076994773379, "learning_rate": 9.753778794865951e-06, "loss": 0.4406, "step": 6084 }, { "epoch": 0.5710397897897898, "grad_norm": 1.0292500372666256, "learning_rate": 9.753609549486725e-06, "loss": 0.4781, "step": 6085 }, { "epoch": 0.5711336336336337, "grad_norm": 1.018830340106096, "learning_rate": 9.75344024742965e-06, "loss": 0.4725, "step": 6086 }, { "epoch": 0.5712274774774775, "grad_norm": 1.1015958107795978, "learning_rate": 9.753270888696751e-06, "loss": 0.4286, "step": 6087 }, { "epoch": 0.5713213213213213, "grad_norm": 1.1072389437628507, "learning_rate": 9.753101473290041e-06, "loss": 0.4688, "step": 6088 }, { "epoch": 0.5714151651651652, "grad_norm": 0.9802815069473001, "learning_rate": 9.752932001211545e-06, "loss": 0.4784, "step": 6089 }, { "epoch": 0.571509009009009, "grad_norm": 1.2391160325726003, "learning_rate": 9.752762472463279e-06, "loss": 0.5005, "step": 6090 }, { "epoch": 0.5716028528528528, "grad_norm": 1.1870767721449083, "learning_rate": 9.75259288704727e-06, "loss": 0.5374, "step": 6091 }, { "epoch": 0.5716966966966966, "grad_norm": 1.206706839651938, "learning_rate": 9.752423244965535e-06, "loss": 0.5009, "step": 6092 }, { "epoch": 0.5717905405405406, "grad_norm": 1.1658317166157677, "learning_rate": 9.752253546220097e-06, "loss": 0.4397, "step": 6093 }, { "epoch": 0.5718843843843844, "grad_norm": 1.1353166298424597, "learning_rate": 9.752083790812981e-06, "loss": 0.4986, "step": 6094 }, { "epoch": 0.5719782282282282, "grad_norm": 1.0651790658896905, "learning_rate": 9.751913978746211e-06, "loss": 0.4733, "step": 6095 }, { "epoch": 0.5720720720720721, "grad_norm": 1.0686373617952427, "learning_rate": 9.751744110021813e-06, "loss": 0.4774, "step": 6096 }, { "epoch": 0.5721659159159159, "grad_norm": 1.0175511612944779, "learning_rate": 9.751574184641809e-06, "loss": 0.4452, "step": 6097 }, { "epoch": 0.5722597597597597, "grad_norm": 1.0166838109826377, "learning_rate": 9.751404202608227e-06, "loss": 0.5199, "step": 6098 }, { "epoch": 0.5723536036036037, "grad_norm": 1.2231251694899272, "learning_rate": 9.751234163923093e-06, "loss": 0.4842, "step": 6099 }, { "epoch": 0.5724474474474475, "grad_norm": 1.2751996116510795, "learning_rate": 9.751064068588436e-06, "loss": 0.4909, "step": 6100 }, { "epoch": 0.5725412912912913, "grad_norm": 0.9402237693165473, "learning_rate": 9.750893916606283e-06, "loss": 0.4888, "step": 6101 }, { "epoch": 0.5726351351351351, "grad_norm": 1.0696300625330843, "learning_rate": 9.750723707978661e-06, "loss": 0.4573, "step": 6102 }, { "epoch": 0.572728978978979, "grad_norm": 1.206287284380606, "learning_rate": 9.750553442707603e-06, "loss": 0.509, "step": 6103 }, { "epoch": 0.5728228228228228, "grad_norm": 1.6812287568499582, "learning_rate": 9.750383120795137e-06, "loss": 0.4933, "step": 6104 }, { "epoch": 0.5729166666666666, "grad_norm": 1.380881723069484, "learning_rate": 9.750212742243293e-06, "loss": 0.461, "step": 6105 }, { "epoch": 0.5730105105105106, "grad_norm": 1.232995846413703, "learning_rate": 9.750042307054103e-06, "loss": 0.4673, "step": 6106 }, { "epoch": 0.5731043543543544, "grad_norm": 0.9469437262053418, "learning_rate": 9.749871815229601e-06, "loss": 0.4819, "step": 6107 }, { "epoch": 0.5731981981981982, "grad_norm": 1.040735946197105, "learning_rate": 9.749701266771817e-06, "loss": 0.4558, "step": 6108 }, { "epoch": 0.5732920420420421, "grad_norm": 1.195955129553819, "learning_rate": 9.749530661682786e-06, "loss": 0.5082, "step": 6109 }, { "epoch": 0.5733858858858859, "grad_norm": 1.1835252041871849, "learning_rate": 9.749359999964542e-06, "loss": 0.4749, "step": 6110 }, { "epoch": 0.5734797297297297, "grad_norm": 1.1123464318054044, "learning_rate": 9.749189281619122e-06, "loss": 0.4681, "step": 6111 }, { "epoch": 0.5735735735735735, "grad_norm": 1.071363066270392, "learning_rate": 9.749018506648558e-06, "loss": 0.4852, "step": 6112 }, { "epoch": 0.5736674174174174, "grad_norm": 1.1614663912888537, "learning_rate": 9.748847675054887e-06, "loss": 0.5121, "step": 6113 }, { "epoch": 0.5737612612612613, "grad_norm": 0.9932905530637574, "learning_rate": 9.748676786840145e-06, "loss": 0.5007, "step": 6114 }, { "epoch": 0.5738551051051051, "grad_norm": 74.28958035155337, "learning_rate": 9.748505842006371e-06, "loss": 0.5052, "step": 6115 }, { "epoch": 0.573948948948949, "grad_norm": 1.1208817254783783, "learning_rate": 9.748334840555604e-06, "loss": 0.4673, "step": 6116 }, { "epoch": 0.5740427927927928, "grad_norm": 1.2945701385779433, "learning_rate": 9.748163782489882e-06, "loss": 0.5302, "step": 6117 }, { "epoch": 0.5741366366366366, "grad_norm": 1.2534654696222287, "learning_rate": 9.747992667811244e-06, "loss": 0.5105, "step": 6118 }, { "epoch": 0.5742304804804805, "grad_norm": 1.4984974328609186, "learning_rate": 9.747821496521731e-06, "loss": 0.4895, "step": 6119 }, { "epoch": 0.5743243243243243, "grad_norm": 1.7966389937309766, "learning_rate": 9.747650268623384e-06, "loss": 0.4676, "step": 6120 }, { "epoch": 0.5744181681681682, "grad_norm": 1.234428659309628, "learning_rate": 9.747478984118243e-06, "loss": 0.5045, "step": 6121 }, { "epoch": 0.574512012012012, "grad_norm": 1.4608848778657653, "learning_rate": 9.747307643008351e-06, "loss": 0.535, "step": 6122 }, { "epoch": 0.5746058558558559, "grad_norm": 1.2223565719000817, "learning_rate": 9.747136245295751e-06, "loss": 0.5116, "step": 6123 }, { "epoch": 0.5746996996996997, "grad_norm": 1.4401267551425465, "learning_rate": 9.74696479098249e-06, "loss": 0.5119, "step": 6124 }, { "epoch": 0.5747935435435435, "grad_norm": 1.4091394496094518, "learning_rate": 9.746793280070606e-06, "loss": 0.4451, "step": 6125 }, { "epoch": 0.5748873873873874, "grad_norm": 1.4895828596459117, "learning_rate": 9.746621712562147e-06, "loss": 0.4538, "step": 6126 }, { "epoch": 0.5749812312312312, "grad_norm": 1.172205474070677, "learning_rate": 9.74645008845916e-06, "loss": 0.4798, "step": 6127 }, { "epoch": 0.575075075075075, "grad_norm": 1.116766344401164, "learning_rate": 9.74627840776369e-06, "loss": 0.4907, "step": 6128 }, { "epoch": 0.575168918918919, "grad_norm": 0.946417472857895, "learning_rate": 9.746106670477782e-06, "loss": 0.4694, "step": 6129 }, { "epoch": 0.5752627627627628, "grad_norm": 1.0050737439943906, "learning_rate": 9.745934876603488e-06, "loss": 0.4981, "step": 6130 }, { "epoch": 0.5753566066066066, "grad_norm": 1.132448704710447, "learning_rate": 9.745763026142853e-06, "loss": 0.4684, "step": 6131 }, { "epoch": 0.5754504504504504, "grad_norm": 1.2438950438898728, "learning_rate": 9.745591119097925e-06, "loss": 0.552, "step": 6132 }, { "epoch": 0.5755442942942943, "grad_norm": 1.1750749966744478, "learning_rate": 9.745419155470758e-06, "loss": 0.4956, "step": 6133 }, { "epoch": 0.5756381381381381, "grad_norm": 1.070479718016781, "learning_rate": 9.745247135263401e-06, "loss": 0.4384, "step": 6134 }, { "epoch": 0.5757319819819819, "grad_norm": 1.228442022189596, "learning_rate": 9.745075058477902e-06, "loss": 0.4348, "step": 6135 }, { "epoch": 0.5758258258258259, "grad_norm": 0.9614488058993567, "learning_rate": 9.744902925116315e-06, "loss": 0.4497, "step": 6136 }, { "epoch": 0.5759196696696697, "grad_norm": 1.1394201310920318, "learning_rate": 9.74473073518069e-06, "loss": 0.4504, "step": 6137 }, { "epoch": 0.5760135135135135, "grad_norm": 1.4524258792972418, "learning_rate": 9.744558488673084e-06, "loss": 0.4712, "step": 6138 }, { "epoch": 0.5761073573573574, "grad_norm": 1.1603698451619882, "learning_rate": 9.744386185595548e-06, "loss": 0.4757, "step": 6139 }, { "epoch": 0.5762012012012012, "grad_norm": 0.9373792301267491, "learning_rate": 9.74421382595014e-06, "loss": 0.4394, "step": 6140 }, { "epoch": 0.576295045045045, "grad_norm": 1.108770321647544, "learning_rate": 9.74404140973891e-06, "loss": 0.4632, "step": 6141 }, { "epoch": 0.5763888888888888, "grad_norm": 0.9296189564617504, "learning_rate": 9.743868936963913e-06, "loss": 0.4656, "step": 6142 }, { "epoch": 0.5764827327327328, "grad_norm": 36.692767836249395, "learning_rate": 9.743696407627212e-06, "loss": 0.5064, "step": 6143 }, { "epoch": 0.5765765765765766, "grad_norm": 1.2709921965159274, "learning_rate": 9.74352382173086e-06, "loss": 0.4915, "step": 6144 }, { "epoch": 0.5766704204204204, "grad_norm": 1.0565558273234614, "learning_rate": 9.743351179276913e-06, "loss": 0.5302, "step": 6145 }, { "epoch": 0.5767642642642643, "grad_norm": 1.2631946760817678, "learning_rate": 9.743178480267434e-06, "loss": 0.4653, "step": 6146 }, { "epoch": 0.5768581081081081, "grad_norm": 1.115375018157759, "learning_rate": 9.74300572470448e-06, "loss": 0.4807, "step": 6147 }, { "epoch": 0.5769519519519519, "grad_norm": 1.0942387183225362, "learning_rate": 9.74283291259011e-06, "loss": 0.5186, "step": 6148 }, { "epoch": 0.5770457957957958, "grad_norm": 1.2375338605871478, "learning_rate": 9.742660043926385e-06, "loss": 0.4616, "step": 6149 }, { "epoch": 0.5771396396396397, "grad_norm": 1.0798595796835908, "learning_rate": 9.742487118715365e-06, "loss": 0.5485, "step": 6150 }, { "epoch": 0.5772334834834835, "grad_norm": 1.061541804378265, "learning_rate": 9.742314136959115e-06, "loss": 0.5185, "step": 6151 }, { "epoch": 0.5773273273273273, "grad_norm": 1.2283067747534717, "learning_rate": 9.742141098659692e-06, "loss": 0.4836, "step": 6152 }, { "epoch": 0.5774211711711712, "grad_norm": 1.0202570103735287, "learning_rate": 9.741968003819165e-06, "loss": 0.4569, "step": 6153 }, { "epoch": 0.577515015015015, "grad_norm": 1.6947418214834475, "learning_rate": 9.741794852439595e-06, "loss": 0.507, "step": 6154 }, { "epoch": 0.5776088588588588, "grad_norm": 0.9557640949688, "learning_rate": 9.741621644523047e-06, "loss": 0.4905, "step": 6155 }, { "epoch": 0.5777027027027027, "grad_norm": 1.2407886680199491, "learning_rate": 9.741448380071587e-06, "loss": 0.4219, "step": 6156 }, { "epoch": 0.5777965465465466, "grad_norm": 0.9661822758460932, "learning_rate": 9.741275059087278e-06, "loss": 0.4576, "step": 6157 }, { "epoch": 0.5778903903903904, "grad_norm": 2.1060409476662207, "learning_rate": 9.74110168157219e-06, "loss": 0.4998, "step": 6158 }, { "epoch": 0.5779842342342343, "grad_norm": 1.009769606664284, "learning_rate": 9.740928247528387e-06, "loss": 0.4586, "step": 6159 }, { "epoch": 0.5780780780780781, "grad_norm": 1.0207113446361786, "learning_rate": 9.74075475695794e-06, "loss": 0.4632, "step": 6160 }, { "epoch": 0.5781719219219219, "grad_norm": 1.1081074944704332, "learning_rate": 9.740581209862917e-06, "loss": 0.4594, "step": 6161 }, { "epoch": 0.5782657657657657, "grad_norm": 2.013510819524424, "learning_rate": 9.740407606245384e-06, "loss": 0.4958, "step": 6162 }, { "epoch": 0.5783596096096096, "grad_norm": 2.0390096575925587, "learning_rate": 9.740233946107414e-06, "loss": 0.5075, "step": 6163 }, { "epoch": 0.5784534534534534, "grad_norm": 1.0894597423162873, "learning_rate": 9.740060229451075e-06, "loss": 0.4671, "step": 6164 }, { "epoch": 0.5785472972972973, "grad_norm": 1.2861532180361812, "learning_rate": 9.739886456278444e-06, "loss": 0.4425, "step": 6165 }, { "epoch": 0.5786411411411412, "grad_norm": 1.134167549463132, "learning_rate": 9.739712626591586e-06, "loss": 0.5392, "step": 6166 }, { "epoch": 0.578734984984985, "grad_norm": 1.7610507481908761, "learning_rate": 9.739538740392578e-06, "loss": 0.4832, "step": 6167 }, { "epoch": 0.5788288288288288, "grad_norm": 1.1169677418896995, "learning_rate": 9.73936479768349e-06, "loss": 0.5068, "step": 6168 }, { "epoch": 0.5789226726726727, "grad_norm": 1.3103604842863146, "learning_rate": 9.7391907984664e-06, "loss": 0.5236, "step": 6169 }, { "epoch": 0.5790165165165165, "grad_norm": 1.0844084041055049, "learning_rate": 9.73901674274338e-06, "loss": 0.4289, "step": 6170 }, { "epoch": 0.5791103603603603, "grad_norm": 0.9919521424856839, "learning_rate": 9.738842630516503e-06, "loss": 0.5258, "step": 6171 }, { "epoch": 0.5792042042042042, "grad_norm": 1.0319436610757267, "learning_rate": 9.73866846178785e-06, "loss": 0.4894, "step": 6172 }, { "epoch": 0.5792980480480481, "grad_norm": 1.4785681110259805, "learning_rate": 9.738494236559496e-06, "loss": 0.5327, "step": 6173 }, { "epoch": 0.5793918918918919, "grad_norm": 1.3782717545115826, "learning_rate": 9.738319954833516e-06, "loss": 0.4444, "step": 6174 }, { "epoch": 0.5794857357357357, "grad_norm": 0.951836010870641, "learning_rate": 9.738145616611988e-06, "loss": 0.4734, "step": 6175 }, { "epoch": 0.5795795795795796, "grad_norm": 0.9757229027447998, "learning_rate": 9.737971221896994e-06, "loss": 0.4886, "step": 6176 }, { "epoch": 0.5796734234234234, "grad_norm": 1.1620747272439538, "learning_rate": 9.73779677069061e-06, "loss": 0.4375, "step": 6177 }, { "epoch": 0.5797672672672672, "grad_norm": 1.143331533601172, "learning_rate": 9.737622262994921e-06, "loss": 0.5058, "step": 6178 }, { "epoch": 0.5798611111111112, "grad_norm": 1.1568850343674575, "learning_rate": 9.737447698812002e-06, "loss": 0.4867, "step": 6179 }, { "epoch": 0.579954954954955, "grad_norm": 1.403098796190223, "learning_rate": 9.737273078143936e-06, "loss": 0.4935, "step": 6180 }, { "epoch": 0.5800487987987988, "grad_norm": 1.0098474431788205, "learning_rate": 9.737098400992805e-06, "loss": 0.494, "step": 6181 }, { "epoch": 0.5801426426426426, "grad_norm": 0.9936578114976385, "learning_rate": 9.736923667360694e-06, "loss": 0.5013, "step": 6182 }, { "epoch": 0.5802364864864865, "grad_norm": 0.9211947631258223, "learning_rate": 9.736748877249685e-06, "loss": 0.4856, "step": 6183 }, { "epoch": 0.5803303303303303, "grad_norm": 1.759947958200236, "learning_rate": 9.736574030661859e-06, "loss": 0.4544, "step": 6184 }, { "epoch": 0.5804241741741741, "grad_norm": 1.0721533959828184, "learning_rate": 9.736399127599306e-06, "loss": 0.4789, "step": 6185 }, { "epoch": 0.5805180180180181, "grad_norm": 0.989550197827508, "learning_rate": 9.736224168064107e-06, "loss": 0.43, "step": 6186 }, { "epoch": 0.5806118618618619, "grad_norm": 1.101158860388825, "learning_rate": 9.73604915205835e-06, "loss": 0.5437, "step": 6187 }, { "epoch": 0.5807057057057057, "grad_norm": 1.72477300435992, "learning_rate": 9.735874079584122e-06, "loss": 0.5098, "step": 6188 }, { "epoch": 0.5807995495495496, "grad_norm": 2.202847252941227, "learning_rate": 9.73569895064351e-06, "loss": 0.4367, "step": 6189 }, { "epoch": 0.5808933933933934, "grad_norm": 1.0191889168407684, "learning_rate": 9.735523765238602e-06, "loss": 0.4928, "step": 6190 }, { "epoch": 0.5809872372372372, "grad_norm": 1.297654928877232, "learning_rate": 9.735348523371488e-06, "loss": 0.4704, "step": 6191 }, { "epoch": 0.581081081081081, "grad_norm": 0.9438221952006753, "learning_rate": 9.735173225044254e-06, "loss": 0.5052, "step": 6192 }, { "epoch": 0.581174924924925, "grad_norm": 1.1425728855831827, "learning_rate": 9.734997870258993e-06, "loss": 0.4885, "step": 6193 }, { "epoch": 0.5812687687687688, "grad_norm": 1.2453303243787892, "learning_rate": 9.734822459017797e-06, "loss": 0.4472, "step": 6194 }, { "epoch": 0.5813626126126126, "grad_norm": 0.8591589082685636, "learning_rate": 9.734646991322754e-06, "loss": 0.4592, "step": 6195 }, { "epoch": 0.5814564564564565, "grad_norm": 1.2688135552967608, "learning_rate": 9.734471467175957e-06, "loss": 0.4965, "step": 6196 }, { "epoch": 0.5815503003003003, "grad_norm": 1.793076272148869, "learning_rate": 9.734295886579502e-06, "loss": 0.4743, "step": 6197 }, { "epoch": 0.5816441441441441, "grad_norm": 0.9862066174366156, "learning_rate": 9.734120249535476e-06, "loss": 0.4855, "step": 6198 }, { "epoch": 0.581737987987988, "grad_norm": 1.7893210905852268, "learning_rate": 9.73394455604598e-06, "loss": 0.4509, "step": 6199 }, { "epoch": 0.5818318318318318, "grad_norm": 0.9623732157705236, "learning_rate": 9.733768806113105e-06, "loss": 0.4534, "step": 6200 }, { "epoch": 0.5819256756756757, "grad_norm": 1.7050281131491722, "learning_rate": 9.733592999738948e-06, "loss": 0.4755, "step": 6201 }, { "epoch": 0.5820195195195195, "grad_norm": 1.834027728084402, "learning_rate": 9.733417136925606e-06, "loss": 0.5251, "step": 6202 }, { "epoch": 0.5821133633633634, "grad_norm": 1.028752433303904, "learning_rate": 9.733241217675171e-06, "loss": 0.4835, "step": 6203 }, { "epoch": 0.5822072072072072, "grad_norm": 1.0106763021875658, "learning_rate": 9.733065241989745e-06, "loss": 0.5079, "step": 6204 }, { "epoch": 0.582301051051051, "grad_norm": 0.9750618332191006, "learning_rate": 9.732889209871426e-06, "loss": 0.483, "step": 6205 }, { "epoch": 0.5823948948948949, "grad_norm": 1.1094660173067716, "learning_rate": 9.732713121322311e-06, "loss": 0.5372, "step": 6206 }, { "epoch": 0.5824887387387387, "grad_norm": 1.1932840676511558, "learning_rate": 9.732536976344501e-06, "loss": 0.5085, "step": 6207 }, { "epoch": 0.5825825825825826, "grad_norm": 1.066984213840215, "learning_rate": 9.732360774940095e-06, "loss": 0.4655, "step": 6208 }, { "epoch": 0.5826764264264265, "grad_norm": 1.1366525954275415, "learning_rate": 9.732184517111196e-06, "loss": 0.4675, "step": 6209 }, { "epoch": 0.5827702702702703, "grad_norm": 1.1836019681973615, "learning_rate": 9.732008202859903e-06, "loss": 0.4993, "step": 6210 }, { "epoch": 0.5828641141141141, "grad_norm": 1.169860288736391, "learning_rate": 9.731831832188319e-06, "loss": 0.4657, "step": 6211 }, { "epoch": 0.5829579579579579, "grad_norm": 1.5992108065342894, "learning_rate": 9.731655405098547e-06, "loss": 0.5018, "step": 6212 }, { "epoch": 0.5830518018018018, "grad_norm": 1.0995137716048031, "learning_rate": 9.731478921592692e-06, "loss": 0.4783, "step": 6213 }, { "epoch": 0.5831456456456456, "grad_norm": 1.156995159345501, "learning_rate": 9.731302381672856e-06, "loss": 0.5303, "step": 6214 }, { "epoch": 0.5832394894894894, "grad_norm": 1.0825426871546424, "learning_rate": 9.731125785341145e-06, "loss": 0.4385, "step": 6215 }, { "epoch": 0.5833333333333334, "grad_norm": 1.0911094598136861, "learning_rate": 9.730949132599665e-06, "loss": 0.4697, "step": 6216 }, { "epoch": 0.5834271771771772, "grad_norm": 1.149872210858736, "learning_rate": 9.73077242345052e-06, "loss": 0.4821, "step": 6217 }, { "epoch": 0.583521021021021, "grad_norm": 1.1421595078763802, "learning_rate": 9.73059565789582e-06, "loss": 0.4917, "step": 6218 }, { "epoch": 0.5836148648648649, "grad_norm": 2.1215992040641445, "learning_rate": 9.730418835937671e-06, "loss": 0.4833, "step": 6219 }, { "epoch": 0.5837087087087087, "grad_norm": 0.9660192899234914, "learning_rate": 9.730241957578181e-06, "loss": 0.4634, "step": 6220 }, { "epoch": 0.5838025525525525, "grad_norm": 1.1056921805564977, "learning_rate": 9.73006502281946e-06, "loss": 0.4863, "step": 6221 }, { "epoch": 0.5838963963963963, "grad_norm": 1.0895563243911068, "learning_rate": 9.729888031663617e-06, "loss": 0.4702, "step": 6222 }, { "epoch": 0.5839902402402403, "grad_norm": 1.2728116167496555, "learning_rate": 9.729710984112763e-06, "loss": 0.4965, "step": 6223 }, { "epoch": 0.5840840840840841, "grad_norm": 1.0434981270192594, "learning_rate": 9.729533880169006e-06, "loss": 0.4846, "step": 6224 }, { "epoch": 0.5841779279279279, "grad_norm": 0.9615237442177815, "learning_rate": 9.729356719834462e-06, "loss": 0.4943, "step": 6225 }, { "epoch": 0.5842717717717718, "grad_norm": 1.0699327418172093, "learning_rate": 9.72917950311124e-06, "loss": 0.4988, "step": 6226 }, { "epoch": 0.5843656156156156, "grad_norm": 1.1571252173349214, "learning_rate": 9.729002230001456e-06, "loss": 0.5066, "step": 6227 }, { "epoch": 0.5844594594594594, "grad_norm": 1.0428979845374844, "learning_rate": 9.728824900507221e-06, "loss": 0.5261, "step": 6228 }, { "epoch": 0.5845533033033034, "grad_norm": 1.4356671350492969, "learning_rate": 9.72864751463065e-06, "loss": 0.4798, "step": 6229 }, { "epoch": 0.5846471471471472, "grad_norm": 1.1125677399179783, "learning_rate": 9.728470072373857e-06, "loss": 0.4555, "step": 6230 }, { "epoch": 0.584740990990991, "grad_norm": 1.3634297802749604, "learning_rate": 9.72829257373896e-06, "loss": 0.4497, "step": 6231 }, { "epoch": 0.5848348348348348, "grad_norm": 4.676375593110323, "learning_rate": 9.728115018728075e-06, "loss": 0.4405, "step": 6232 }, { "epoch": 0.5849286786786787, "grad_norm": 1.1104443087341436, "learning_rate": 9.727937407343316e-06, "loss": 0.5367, "step": 6233 }, { "epoch": 0.5850225225225225, "grad_norm": 1.253539385361494, "learning_rate": 9.727759739586804e-06, "loss": 0.4999, "step": 6234 }, { "epoch": 0.5851163663663663, "grad_norm": 1.8981760614620775, "learning_rate": 9.727582015460658e-06, "loss": 0.4948, "step": 6235 }, { "epoch": 0.5852102102102102, "grad_norm": 1.1985740573617174, "learning_rate": 9.727404234966992e-06, "loss": 0.4981, "step": 6236 }, { "epoch": 0.5853040540540541, "grad_norm": 1.1005022903405954, "learning_rate": 9.72722639810793e-06, "loss": 0.4431, "step": 6237 }, { "epoch": 0.5853978978978979, "grad_norm": 0.9930143743072107, "learning_rate": 9.727048504885592e-06, "loss": 0.4764, "step": 6238 }, { "epoch": 0.5854917417417418, "grad_norm": 1.3112705099534967, "learning_rate": 9.726870555302098e-06, "loss": 0.4859, "step": 6239 }, { "epoch": 0.5855855855855856, "grad_norm": 1.182514390393265, "learning_rate": 9.72669254935957e-06, "loss": 0.4966, "step": 6240 }, { "epoch": 0.5856794294294294, "grad_norm": 0.9676000968192371, "learning_rate": 9.72651448706013e-06, "loss": 0.5116, "step": 6241 }, { "epoch": 0.5857732732732732, "grad_norm": 0.9117485237138861, "learning_rate": 9.726336368405902e-06, "loss": 0.4421, "step": 6242 }, { "epoch": 0.5858671171171171, "grad_norm": 1.249731888310434, "learning_rate": 9.72615819339901e-06, "loss": 0.4825, "step": 6243 }, { "epoch": 0.585960960960961, "grad_norm": 1.4234270361226613, "learning_rate": 9.725979962041575e-06, "loss": 0.4734, "step": 6244 }, { "epoch": 0.5860548048048048, "grad_norm": 0.8705030262412554, "learning_rate": 9.725801674335728e-06, "loss": 0.4727, "step": 6245 }, { "epoch": 0.5861486486486487, "grad_norm": 0.9960495806737278, "learning_rate": 9.72562333028359e-06, "loss": 0.5004, "step": 6246 }, { "epoch": 0.5862424924924925, "grad_norm": 1.2665597188922377, "learning_rate": 9.725444929887288e-06, "loss": 0.4556, "step": 6247 }, { "epoch": 0.5863363363363363, "grad_norm": 1.0501732176327296, "learning_rate": 9.72526647314895e-06, "loss": 0.4701, "step": 6248 }, { "epoch": 0.5864301801801802, "grad_norm": 1.2297694254225506, "learning_rate": 9.725087960070704e-06, "loss": 0.4334, "step": 6249 }, { "epoch": 0.586524024024024, "grad_norm": 0.8440910124671128, "learning_rate": 9.72490939065468e-06, "loss": 0.4522, "step": 6250 }, { "epoch": 0.5866178678678678, "grad_norm": 0.9941319800692668, "learning_rate": 9.724730764903004e-06, "loss": 0.4906, "step": 6251 }, { "epoch": 0.5867117117117117, "grad_norm": 1.1968763941243454, "learning_rate": 9.724552082817807e-06, "loss": 0.4842, "step": 6252 }, { "epoch": 0.5868055555555556, "grad_norm": 0.8278171069804535, "learning_rate": 9.724373344401219e-06, "loss": 0.4592, "step": 6253 }, { "epoch": 0.5868993993993994, "grad_norm": 1.1122079796104352, "learning_rate": 9.724194549655371e-06, "loss": 0.4764, "step": 6254 }, { "epoch": 0.5869932432432432, "grad_norm": 1.906298745092844, "learning_rate": 9.724015698582397e-06, "loss": 0.5229, "step": 6255 }, { "epoch": 0.5870870870870871, "grad_norm": 1.2837403130476428, "learning_rate": 9.723836791184426e-06, "loss": 0.502, "step": 6256 }, { "epoch": 0.5871809309309309, "grad_norm": 1.409196902745809, "learning_rate": 9.723657827463595e-06, "loss": 0.4635, "step": 6257 }, { "epoch": 0.5872747747747747, "grad_norm": 0.9801431118663647, "learning_rate": 9.723478807422035e-06, "loss": 0.4644, "step": 6258 }, { "epoch": 0.5873686186186187, "grad_norm": 1.0852195303060712, "learning_rate": 9.72329973106188e-06, "loss": 0.4501, "step": 6259 }, { "epoch": 0.5874624624624625, "grad_norm": 1.1973543620417622, "learning_rate": 9.723120598385267e-06, "loss": 0.509, "step": 6260 }, { "epoch": 0.5875563063063063, "grad_norm": 0.9989525757074862, "learning_rate": 9.722941409394332e-06, "loss": 0.4229, "step": 6261 }, { "epoch": 0.5876501501501501, "grad_norm": 1.0910456950428151, "learning_rate": 9.722762164091211e-06, "loss": 0.4784, "step": 6262 }, { "epoch": 0.587743993993994, "grad_norm": 1.0546494010004328, "learning_rate": 9.72258286247804e-06, "loss": 0.4365, "step": 6263 }, { "epoch": 0.5878378378378378, "grad_norm": 1.1390794054374658, "learning_rate": 9.72240350455696e-06, "loss": 0.472, "step": 6264 }, { "epoch": 0.5879316816816816, "grad_norm": 1.1720690032196284, "learning_rate": 9.722224090330103e-06, "loss": 0.457, "step": 6265 }, { "epoch": 0.5880255255255256, "grad_norm": 0.9960283560002763, "learning_rate": 9.722044619799615e-06, "loss": 0.4715, "step": 6266 }, { "epoch": 0.5881193693693694, "grad_norm": 1.3123540396384767, "learning_rate": 9.721865092967633e-06, "loss": 0.5139, "step": 6267 }, { "epoch": 0.5882132132132132, "grad_norm": 1.0125410529323895, "learning_rate": 9.721685509836298e-06, "loss": 0.522, "step": 6268 }, { "epoch": 0.5883070570570571, "grad_norm": 1.0817542028758385, "learning_rate": 9.72150587040775e-06, "loss": 0.4512, "step": 6269 }, { "epoch": 0.5884009009009009, "grad_norm": 1.2018781028446153, "learning_rate": 9.721326174684134e-06, "loss": 0.4742, "step": 6270 }, { "epoch": 0.5884947447447447, "grad_norm": 0.9554348597819631, "learning_rate": 9.721146422667587e-06, "loss": 0.4436, "step": 6271 }, { "epoch": 0.5885885885885885, "grad_norm": 2.222076866576324, "learning_rate": 9.720966614360258e-06, "loss": 0.4583, "step": 6272 }, { "epoch": 0.5886824324324325, "grad_norm": 1.1873421286904149, "learning_rate": 9.720786749764288e-06, "loss": 0.4568, "step": 6273 }, { "epoch": 0.5887762762762763, "grad_norm": 1.0478320174434699, "learning_rate": 9.720606828881822e-06, "loss": 0.5244, "step": 6274 }, { "epoch": 0.5888701201201201, "grad_norm": 1.3183717787266047, "learning_rate": 9.720426851715004e-06, "loss": 0.5229, "step": 6275 }, { "epoch": 0.588963963963964, "grad_norm": 1.6138393289902528, "learning_rate": 9.720246818265982e-06, "loss": 0.5294, "step": 6276 }, { "epoch": 0.5890578078078078, "grad_norm": 2.0326746096370245, "learning_rate": 9.720066728536903e-06, "loss": 0.4543, "step": 6277 }, { "epoch": 0.5891516516516516, "grad_norm": 1.176164927634935, "learning_rate": 9.719886582529911e-06, "loss": 0.5041, "step": 6278 }, { "epoch": 0.5892454954954955, "grad_norm": 1.0421221470445903, "learning_rate": 9.719706380247155e-06, "loss": 0.4553, "step": 6279 }, { "epoch": 0.5893393393393394, "grad_norm": 0.9769527754225625, "learning_rate": 9.719526121690786e-06, "loss": 0.496, "step": 6280 }, { "epoch": 0.5894331831831832, "grad_norm": 1.1529820567353222, "learning_rate": 9.719345806862952e-06, "loss": 0.44, "step": 6281 }, { "epoch": 0.589527027027027, "grad_norm": 1.737161374056446, "learning_rate": 9.719165435765801e-06, "loss": 0.5238, "step": 6282 }, { "epoch": 0.5896208708708709, "grad_norm": 1.1782793108028138, "learning_rate": 9.718985008401486e-06, "loss": 0.4872, "step": 6283 }, { "epoch": 0.5897147147147147, "grad_norm": 1.9637541538342465, "learning_rate": 9.718804524772158e-06, "loss": 0.4987, "step": 6284 }, { "epoch": 0.5898085585585585, "grad_norm": 1.028557608625853, "learning_rate": 9.718623984879967e-06, "loss": 0.453, "step": 6285 }, { "epoch": 0.5899024024024024, "grad_norm": 1.1289717470150287, "learning_rate": 9.718443388727068e-06, "loss": 0.5239, "step": 6286 }, { "epoch": 0.5899962462462462, "grad_norm": 1.736591100390265, "learning_rate": 9.718262736315611e-06, "loss": 0.4696, "step": 6287 }, { "epoch": 0.5900900900900901, "grad_norm": 1.0469155786861593, "learning_rate": 9.718082027647755e-06, "loss": 0.476, "step": 6288 }, { "epoch": 0.590183933933934, "grad_norm": 0.9304985444164, "learning_rate": 9.71790126272565e-06, "loss": 0.4659, "step": 6289 }, { "epoch": 0.5902777777777778, "grad_norm": 1.3776884367130056, "learning_rate": 9.717720441551453e-06, "loss": 0.449, "step": 6290 }, { "epoch": 0.5903716216216216, "grad_norm": 0.9612358757017945, "learning_rate": 9.71753956412732e-06, "loss": 0.5044, "step": 6291 }, { "epoch": 0.5904654654654654, "grad_norm": 1.1353969883075676, "learning_rate": 9.717358630455408e-06, "loss": 0.4709, "step": 6292 }, { "epoch": 0.5905593093093093, "grad_norm": 1.053937677423093, "learning_rate": 9.717177640537874e-06, "loss": 0.4431, "step": 6293 }, { "epoch": 0.5906531531531531, "grad_norm": 1.4002809016873752, "learning_rate": 9.716996594376876e-06, "loss": 0.4726, "step": 6294 }, { "epoch": 0.590746996996997, "grad_norm": 1.7491030710593443, "learning_rate": 9.71681549197457e-06, "loss": 0.4638, "step": 6295 }, { "epoch": 0.5908408408408409, "grad_norm": 1.190560446744793, "learning_rate": 9.716634333333119e-06, "loss": 0.5225, "step": 6296 }, { "epoch": 0.5909346846846847, "grad_norm": 1.1006497585048398, "learning_rate": 9.716453118454684e-06, "loss": 0.5032, "step": 6297 }, { "epoch": 0.5910285285285285, "grad_norm": 1.4380870020243097, "learning_rate": 9.716271847341421e-06, "loss": 0.5326, "step": 6298 }, { "epoch": 0.5911223723723724, "grad_norm": 1.1325951058186345, "learning_rate": 9.716090519995494e-06, "loss": 0.4931, "step": 6299 }, { "epoch": 0.5912162162162162, "grad_norm": 1.0252131590632816, "learning_rate": 9.715909136419067e-06, "loss": 0.5478, "step": 6300 }, { "epoch": 0.59131006006006, "grad_norm": 3.358286567284722, "learning_rate": 9.715727696614297e-06, "loss": 0.4931, "step": 6301 }, { "epoch": 0.5914039039039038, "grad_norm": 0.9842415526216295, "learning_rate": 9.715546200583353e-06, "loss": 0.4891, "step": 6302 }, { "epoch": 0.5914977477477478, "grad_norm": 1.832664722708152, "learning_rate": 9.715364648328393e-06, "loss": 0.4933, "step": 6303 }, { "epoch": 0.5915915915915916, "grad_norm": 1.0927431404680876, "learning_rate": 9.71518303985159e-06, "loss": 0.4985, "step": 6304 }, { "epoch": 0.5916854354354354, "grad_norm": 1.2297568495802034, "learning_rate": 9.715001375155102e-06, "loss": 0.4812, "step": 6305 }, { "epoch": 0.5917792792792793, "grad_norm": 1.803751727352429, "learning_rate": 9.714819654241099e-06, "loss": 0.4426, "step": 6306 }, { "epoch": 0.5918731231231231, "grad_norm": 0.9909113482110854, "learning_rate": 9.714637877111746e-06, "loss": 0.4741, "step": 6307 }, { "epoch": 0.5919669669669669, "grad_norm": 1.0979475115417392, "learning_rate": 9.714456043769209e-06, "loss": 0.4576, "step": 6308 }, { "epoch": 0.5920608108108109, "grad_norm": 1.5757368357336552, "learning_rate": 9.71427415421566e-06, "loss": 0.4453, "step": 6309 }, { "epoch": 0.5921546546546547, "grad_norm": 1.1191387608459418, "learning_rate": 9.714092208453266e-06, "loss": 0.4811, "step": 6310 }, { "epoch": 0.5922484984984985, "grad_norm": 1.0949458745263814, "learning_rate": 9.713910206484192e-06, "loss": 0.4868, "step": 6311 }, { "epoch": 0.5923423423423423, "grad_norm": 2.4252022558647215, "learning_rate": 9.713728148310615e-06, "loss": 0.4358, "step": 6312 }, { "epoch": 0.5924361861861862, "grad_norm": 1.227971126318948, "learning_rate": 9.713546033934702e-06, "loss": 0.4716, "step": 6313 }, { "epoch": 0.59253003003003, "grad_norm": 0.9413555176631201, "learning_rate": 9.713363863358624e-06, "loss": 0.4454, "step": 6314 }, { "epoch": 0.5926238738738738, "grad_norm": 1.1008933525115703, "learning_rate": 9.713181636584554e-06, "loss": 0.5065, "step": 6315 }, { "epoch": 0.5927177177177178, "grad_norm": 1.126969124932388, "learning_rate": 9.712999353614665e-06, "loss": 0.5022, "step": 6316 }, { "epoch": 0.5928115615615616, "grad_norm": 1.303559999623468, "learning_rate": 9.712817014451132e-06, "loss": 0.469, "step": 6317 }, { "epoch": 0.5929054054054054, "grad_norm": 1.3132603868819528, "learning_rate": 9.712634619096124e-06, "loss": 0.4799, "step": 6318 }, { "epoch": 0.5929992492492493, "grad_norm": 1.3862840300364994, "learning_rate": 9.71245216755182e-06, "loss": 0.4317, "step": 6319 }, { "epoch": 0.5930930930930931, "grad_norm": 1.4459518473387698, "learning_rate": 9.712269659820394e-06, "loss": 0.463, "step": 6320 }, { "epoch": 0.5931869369369369, "grad_norm": 1.3679168557689272, "learning_rate": 9.712087095904024e-06, "loss": 0.4538, "step": 6321 }, { "epoch": 0.5932807807807807, "grad_norm": 1.0420469359268942, "learning_rate": 9.711904475804883e-06, "loss": 0.4843, "step": 6322 }, { "epoch": 0.5933746246246246, "grad_norm": 1.1342316111478077, "learning_rate": 9.71172179952515e-06, "loss": 0.4914, "step": 6323 }, { "epoch": 0.5934684684684685, "grad_norm": 1.2937736136195774, "learning_rate": 9.711539067067003e-06, "loss": 0.5238, "step": 6324 }, { "epoch": 0.5935623123123123, "grad_norm": 0.9824000665164317, "learning_rate": 9.711356278432625e-06, "loss": 0.4803, "step": 6325 }, { "epoch": 0.5936561561561562, "grad_norm": 1.1396079925426328, "learning_rate": 9.711173433624188e-06, "loss": 0.5547, "step": 6326 }, { "epoch": 0.59375, "grad_norm": 1.0881456204751816, "learning_rate": 9.710990532643877e-06, "loss": 0.4557, "step": 6327 }, { "epoch": 0.5938438438438438, "grad_norm": 1.4595594317662837, "learning_rate": 9.71080757549387e-06, "loss": 0.5396, "step": 6328 }, { "epoch": 0.5939376876876877, "grad_norm": 1.0038471972872158, "learning_rate": 9.710624562176352e-06, "loss": 0.4788, "step": 6329 }, { "epoch": 0.5940315315315315, "grad_norm": 0.9831311844396613, "learning_rate": 9.7104414926935e-06, "loss": 0.5125, "step": 6330 }, { "epoch": 0.5941253753753754, "grad_norm": 1.2875969391809796, "learning_rate": 9.710258367047502e-06, "loss": 0.4701, "step": 6331 }, { "epoch": 0.5942192192192193, "grad_norm": 1.2217041343244566, "learning_rate": 9.71007518524054e-06, "loss": 0.4913, "step": 6332 }, { "epoch": 0.5943130630630631, "grad_norm": 3.3103924576609516, "learning_rate": 9.709891947274796e-06, "loss": 0.4887, "step": 6333 }, { "epoch": 0.5944069069069069, "grad_norm": 1.1572289280148877, "learning_rate": 9.709708653152457e-06, "loss": 0.4813, "step": 6334 }, { "epoch": 0.5945007507507507, "grad_norm": 1.4796922170011444, "learning_rate": 9.709525302875705e-06, "loss": 0.4855, "step": 6335 }, { "epoch": 0.5945945945945946, "grad_norm": 1.1013573518964452, "learning_rate": 9.70934189644673e-06, "loss": 0.4371, "step": 6336 }, { "epoch": 0.5946884384384384, "grad_norm": 1.1529052166305493, "learning_rate": 9.709158433867718e-06, "loss": 0.5024, "step": 6337 }, { "epoch": 0.5947822822822822, "grad_norm": 1.7013809685900074, "learning_rate": 9.708974915140854e-06, "loss": 0.4384, "step": 6338 }, { "epoch": 0.5948761261261262, "grad_norm": 1.1904421994377694, "learning_rate": 9.708791340268329e-06, "loss": 0.4746, "step": 6339 }, { "epoch": 0.59496996996997, "grad_norm": 1.1227828130955546, "learning_rate": 9.708607709252332e-06, "loss": 0.4456, "step": 6340 }, { "epoch": 0.5950638138138138, "grad_norm": 1.2159401202313411, "learning_rate": 9.708424022095048e-06, "loss": 0.4988, "step": 6341 }, { "epoch": 0.5951576576576577, "grad_norm": 1.0581637927010519, "learning_rate": 9.708240278798671e-06, "loss": 0.5405, "step": 6342 }, { "epoch": 0.5952515015015015, "grad_norm": 0.9684755379927417, "learning_rate": 9.708056479365394e-06, "loss": 0.4684, "step": 6343 }, { "epoch": 0.5953453453453453, "grad_norm": 0.887510968468236, "learning_rate": 9.707872623797401e-06, "loss": 0.4414, "step": 6344 }, { "epoch": 0.5954391891891891, "grad_norm": 1.2431348027447198, "learning_rate": 9.707688712096893e-06, "loss": 0.4886, "step": 6345 }, { "epoch": 0.5955330330330331, "grad_norm": 0.9606401947063811, "learning_rate": 9.707504744266056e-06, "loss": 0.4752, "step": 6346 }, { "epoch": 0.5956268768768769, "grad_norm": 1.1178533368997383, "learning_rate": 9.707320720307085e-06, "loss": 0.5116, "step": 6347 }, { "epoch": 0.5957207207207207, "grad_norm": 1.1486768266261143, "learning_rate": 9.707136640222176e-06, "loss": 0.4805, "step": 6348 }, { "epoch": 0.5958145645645646, "grad_norm": 1.041602512271657, "learning_rate": 9.706952504013525e-06, "loss": 0.4606, "step": 6349 }, { "epoch": 0.5959084084084084, "grad_norm": 1.2804330512312418, "learning_rate": 9.706768311683323e-06, "loss": 0.4677, "step": 6350 }, { "epoch": 0.5960022522522522, "grad_norm": 1.24387518672346, "learning_rate": 9.706584063233768e-06, "loss": 0.4956, "step": 6351 }, { "epoch": 0.5960960960960962, "grad_norm": 1.084849713570432, "learning_rate": 9.706399758667058e-06, "loss": 0.4603, "step": 6352 }, { "epoch": 0.59618993993994, "grad_norm": 1.0321803941605032, "learning_rate": 9.706215397985391e-06, "loss": 0.4154, "step": 6353 }, { "epoch": 0.5962837837837838, "grad_norm": 1.0481090186030648, "learning_rate": 9.706030981190962e-06, "loss": 0.4816, "step": 6354 }, { "epoch": 0.5963776276276276, "grad_norm": 1.083364215669499, "learning_rate": 9.705846508285973e-06, "loss": 0.5023, "step": 6355 }, { "epoch": 0.5964714714714715, "grad_norm": 1.074121931940951, "learning_rate": 9.705661979272623e-06, "loss": 0.4924, "step": 6356 }, { "epoch": 0.5965653153153153, "grad_norm": 1.0844478535862896, "learning_rate": 9.705477394153108e-06, "loss": 0.5, "step": 6357 }, { "epoch": 0.5966591591591591, "grad_norm": 1.2183934835254704, "learning_rate": 9.705292752929636e-06, "loss": 0.4936, "step": 6358 }, { "epoch": 0.596753003003003, "grad_norm": 1.0005642268031245, "learning_rate": 9.705108055604403e-06, "loss": 0.4373, "step": 6359 }, { "epoch": 0.5968468468468469, "grad_norm": 1.0206906554398956, "learning_rate": 9.704923302179613e-06, "loss": 0.4802, "step": 6360 }, { "epoch": 0.5969406906906907, "grad_norm": 1.0965601794600621, "learning_rate": 9.704738492657469e-06, "loss": 0.5166, "step": 6361 }, { "epoch": 0.5970345345345346, "grad_norm": 1.3168456302732567, "learning_rate": 9.704553627040175e-06, "loss": 0.4721, "step": 6362 }, { "epoch": 0.5971283783783784, "grad_norm": 1.0611119238296114, "learning_rate": 9.704368705329931e-06, "loss": 0.5158, "step": 6363 }, { "epoch": 0.5972222222222222, "grad_norm": 0.9507010069015062, "learning_rate": 9.70418372752895e-06, "loss": 0.4302, "step": 6364 }, { "epoch": 0.597316066066066, "grad_norm": 1.1464256312375862, "learning_rate": 9.70399869363943e-06, "loss": 0.5184, "step": 6365 }, { "epoch": 0.5974099099099099, "grad_norm": 0.924417075540796, "learning_rate": 9.70381360366358e-06, "loss": 0.4242, "step": 6366 }, { "epoch": 0.5975037537537538, "grad_norm": 1.036154082710868, "learning_rate": 9.703628457603607e-06, "loss": 0.4861, "step": 6367 }, { "epoch": 0.5975975975975976, "grad_norm": 1.0936176288264632, "learning_rate": 9.703443255461719e-06, "loss": 0.4954, "step": 6368 }, { "epoch": 0.5976914414414415, "grad_norm": 1.5288225692112438, "learning_rate": 9.703257997240124e-06, "loss": 0.4191, "step": 6369 }, { "epoch": 0.5977852852852853, "grad_norm": 1.099530902831772, "learning_rate": 9.703072682941027e-06, "loss": 0.529, "step": 6370 }, { "epoch": 0.5978791291291291, "grad_norm": 1.345183169408481, "learning_rate": 9.702887312566644e-06, "loss": 0.4771, "step": 6371 }, { "epoch": 0.597972972972973, "grad_norm": 1.0533843095116957, "learning_rate": 9.702701886119181e-06, "loss": 0.5153, "step": 6372 }, { "epoch": 0.5980668168168168, "grad_norm": 1.0434772266169594, "learning_rate": 9.702516403600851e-06, "loss": 0.4419, "step": 6373 }, { "epoch": 0.5981606606606606, "grad_norm": 2.7545944282987085, "learning_rate": 9.702330865013863e-06, "loss": 0.4547, "step": 6374 }, { "epoch": 0.5982545045045045, "grad_norm": 1.1204723933936147, "learning_rate": 9.702145270360433e-06, "loss": 0.4641, "step": 6375 }, { "epoch": 0.5983483483483484, "grad_norm": 1.0316062504468704, "learning_rate": 9.701959619642769e-06, "loss": 0.5125, "step": 6376 }, { "epoch": 0.5984421921921922, "grad_norm": 1.0048827631991615, "learning_rate": 9.701773912863088e-06, "loss": 0.4915, "step": 6377 }, { "epoch": 0.598536036036036, "grad_norm": 1.0148480393299297, "learning_rate": 9.701588150023603e-06, "loss": 0.4928, "step": 6378 }, { "epoch": 0.5986298798798799, "grad_norm": 1.104932958533323, "learning_rate": 9.70140233112653e-06, "loss": 0.4974, "step": 6379 }, { "epoch": 0.5987237237237237, "grad_norm": 0.9822262135423969, "learning_rate": 9.701216456174083e-06, "loss": 0.4475, "step": 6380 }, { "epoch": 0.5988175675675675, "grad_norm": 1.0768547442824619, "learning_rate": 9.701030525168479e-06, "loss": 0.4759, "step": 6381 }, { "epoch": 0.5989114114114115, "grad_norm": 0.9622060278818878, "learning_rate": 9.700844538111936e-06, "loss": 0.4388, "step": 6382 }, { "epoch": 0.5990052552552553, "grad_norm": 1.2269023447578415, "learning_rate": 9.700658495006668e-06, "loss": 0.4392, "step": 6383 }, { "epoch": 0.5990990990990991, "grad_norm": 1.013123880662709, "learning_rate": 9.700472395854896e-06, "loss": 0.4873, "step": 6384 }, { "epoch": 0.5991929429429429, "grad_norm": 1.5656686172651086, "learning_rate": 9.70028624065884e-06, "loss": 0.4921, "step": 6385 }, { "epoch": 0.5992867867867868, "grad_norm": 1.1181399453895362, "learning_rate": 9.700100029420716e-06, "loss": 0.4689, "step": 6386 }, { "epoch": 0.5993806306306306, "grad_norm": 1.3133651604454721, "learning_rate": 9.699913762142747e-06, "loss": 0.5112, "step": 6387 }, { "epoch": 0.5994744744744744, "grad_norm": 1.1545508011818844, "learning_rate": 9.699727438827153e-06, "loss": 0.5174, "step": 6388 }, { "epoch": 0.5995683183183184, "grad_norm": 1.0386980799443892, "learning_rate": 9.699541059476156e-06, "loss": 0.4727, "step": 6389 }, { "epoch": 0.5996621621621622, "grad_norm": 1.0968005158433798, "learning_rate": 9.69935462409198e-06, "loss": 0.4801, "step": 6390 }, { "epoch": 0.599756006006006, "grad_norm": 1.1149109608113101, "learning_rate": 9.699168132676841e-06, "loss": 0.5124, "step": 6391 }, { "epoch": 0.5998498498498499, "grad_norm": 1.147655213592995, "learning_rate": 9.698981585232969e-06, "loss": 0.4551, "step": 6392 }, { "epoch": 0.5999436936936937, "grad_norm": 2.0024511809008696, "learning_rate": 9.698794981762589e-06, "loss": 0.4103, "step": 6393 }, { "epoch": 0.6000375375375375, "grad_norm": 1.914878708474706, "learning_rate": 9.698608322267921e-06, "loss": 0.4534, "step": 6394 }, { "epoch": 0.6001313813813813, "grad_norm": 1.1604945115900098, "learning_rate": 9.698421606751194e-06, "loss": 0.4693, "step": 6395 }, { "epoch": 0.6002252252252253, "grad_norm": 1.1931232810191807, "learning_rate": 9.698234835214633e-06, "loss": 0.4944, "step": 6396 }, { "epoch": 0.6003190690690691, "grad_norm": 0.9380901376431007, "learning_rate": 9.698048007660465e-06, "loss": 0.4728, "step": 6397 }, { "epoch": 0.6004129129129129, "grad_norm": 1.1801544442206984, "learning_rate": 9.697861124090917e-06, "loss": 0.4626, "step": 6398 }, { "epoch": 0.6005067567567568, "grad_norm": 1.159290961022165, "learning_rate": 9.697674184508219e-06, "loss": 0.4746, "step": 6399 }, { "epoch": 0.6006006006006006, "grad_norm": 1.1069313710115771, "learning_rate": 9.697487188914598e-06, "loss": 0.4869, "step": 6400 }, { "epoch": 0.6006944444444444, "grad_norm": 1.4521480340122666, "learning_rate": 9.697300137312284e-06, "loss": 0.5357, "step": 6401 }, { "epoch": 0.6007882882882883, "grad_norm": 1.1182629594005882, "learning_rate": 9.697113029703509e-06, "loss": 0.4663, "step": 6402 }, { "epoch": 0.6008821321321322, "grad_norm": 0.9884748125068962, "learning_rate": 9.696925866090501e-06, "loss": 0.4085, "step": 6403 }, { "epoch": 0.600975975975976, "grad_norm": 1.1317360711539242, "learning_rate": 9.696738646475493e-06, "loss": 0.5504, "step": 6404 }, { "epoch": 0.6010698198198198, "grad_norm": 1.5156850332270881, "learning_rate": 9.69655137086072e-06, "loss": 0.4776, "step": 6405 }, { "epoch": 0.6011636636636637, "grad_norm": 0.94401777169918, "learning_rate": 9.69636403924841e-06, "loss": 0.4362, "step": 6406 }, { "epoch": 0.6012575075075075, "grad_norm": 1.0850276012897513, "learning_rate": 9.6961766516408e-06, "loss": 0.5138, "step": 6407 }, { "epoch": 0.6013513513513513, "grad_norm": 1.1464621950961325, "learning_rate": 9.695989208040121e-06, "loss": 0.5041, "step": 6408 }, { "epoch": 0.6014451951951952, "grad_norm": 1.2300685766203898, "learning_rate": 9.695801708448613e-06, "loss": 0.4172, "step": 6409 }, { "epoch": 0.601539039039039, "grad_norm": 1.0226660294732457, "learning_rate": 9.695614152868507e-06, "loss": 0.4303, "step": 6410 }, { "epoch": 0.6016328828828829, "grad_norm": 1.1206299074269117, "learning_rate": 9.69542654130204e-06, "loss": 0.4438, "step": 6411 }, { "epoch": 0.6017267267267268, "grad_norm": 1.2504624764126182, "learning_rate": 9.695238873751452e-06, "loss": 0.4504, "step": 6412 }, { "epoch": 0.6018205705705706, "grad_norm": 1.133466464898301, "learning_rate": 9.695051150218976e-06, "loss": 0.466, "step": 6413 }, { "epoch": 0.6019144144144144, "grad_norm": 1.073427005851622, "learning_rate": 9.694863370706852e-06, "loss": 0.5003, "step": 6414 }, { "epoch": 0.6020082582582582, "grad_norm": 1.132656631914525, "learning_rate": 9.694675535217321e-06, "loss": 0.4778, "step": 6415 }, { "epoch": 0.6021021021021021, "grad_norm": 1.302350740095634, "learning_rate": 9.694487643752622e-06, "loss": 0.4705, "step": 6416 }, { "epoch": 0.6021959459459459, "grad_norm": 1.0624978619250938, "learning_rate": 9.694299696314993e-06, "loss": 0.5447, "step": 6417 }, { "epoch": 0.6022897897897898, "grad_norm": 0.9929232201001503, "learning_rate": 9.694111692906676e-06, "loss": 0.5044, "step": 6418 }, { "epoch": 0.6023836336336337, "grad_norm": 1.0338154442072527, "learning_rate": 9.693923633529914e-06, "loss": 0.476, "step": 6419 }, { "epoch": 0.6024774774774775, "grad_norm": 0.9025557571593749, "learning_rate": 9.693735518186948e-06, "loss": 0.4399, "step": 6420 }, { "epoch": 0.6025713213213213, "grad_norm": 1.0691685978887524, "learning_rate": 9.693547346880019e-06, "loss": 0.4871, "step": 6421 }, { "epoch": 0.6026651651651652, "grad_norm": 0.9485679130018978, "learning_rate": 9.693359119611375e-06, "loss": 0.4728, "step": 6422 }, { "epoch": 0.602759009009009, "grad_norm": 1.0849552926866344, "learning_rate": 9.693170836383256e-06, "loss": 0.4729, "step": 6423 }, { "epoch": 0.6028528528528528, "grad_norm": 1.1700862972099608, "learning_rate": 9.692982497197909e-06, "loss": 0.4956, "step": 6424 }, { "epoch": 0.6029466966966966, "grad_norm": 1.2341329162805312, "learning_rate": 9.692794102057582e-06, "loss": 0.4903, "step": 6425 }, { "epoch": 0.6030405405405406, "grad_norm": 1.0634034289880465, "learning_rate": 9.692605650964515e-06, "loss": 0.5221, "step": 6426 }, { "epoch": 0.6031343843843844, "grad_norm": 1.1900982354849452, "learning_rate": 9.692417143920961e-06, "loss": 0.4862, "step": 6427 }, { "epoch": 0.6032282282282282, "grad_norm": 1.4341340501260054, "learning_rate": 9.692228580929165e-06, "loss": 0.5617, "step": 6428 }, { "epoch": 0.6033220720720721, "grad_norm": 1.239650954259503, "learning_rate": 9.692039961991373e-06, "loss": 0.4549, "step": 6429 }, { "epoch": 0.6034159159159159, "grad_norm": 1.2731077228884993, "learning_rate": 9.691851287109838e-06, "loss": 0.4839, "step": 6430 }, { "epoch": 0.6035097597597597, "grad_norm": 1.0717870843068182, "learning_rate": 9.691662556286808e-06, "loss": 0.5132, "step": 6431 }, { "epoch": 0.6036036036036037, "grad_norm": 1.3179889701909557, "learning_rate": 9.691473769524533e-06, "loss": 0.4699, "step": 6432 }, { "epoch": 0.6036974474474475, "grad_norm": 1.2512542475207065, "learning_rate": 9.691284926825265e-06, "loss": 0.4731, "step": 6433 }, { "epoch": 0.6037912912912913, "grad_norm": 1.7404969915337585, "learning_rate": 9.691096028191252e-06, "loss": 0.485, "step": 6434 }, { "epoch": 0.6038851351351351, "grad_norm": 1.1991057034552421, "learning_rate": 9.69090707362475e-06, "loss": 0.4699, "step": 6435 }, { "epoch": 0.603978978978979, "grad_norm": 1.5378401182970656, "learning_rate": 9.690718063128013e-06, "loss": 0.4784, "step": 6436 }, { "epoch": 0.6040728228228228, "grad_norm": 1.0253441256745606, "learning_rate": 9.69052899670329e-06, "loss": 0.5265, "step": 6437 }, { "epoch": 0.6041666666666666, "grad_norm": 1.214716184078357, "learning_rate": 9.690339874352839e-06, "loss": 0.4769, "step": 6438 }, { "epoch": 0.6042605105105106, "grad_norm": 1.2162824285167655, "learning_rate": 9.690150696078914e-06, "loss": 0.5106, "step": 6439 }, { "epoch": 0.6043543543543544, "grad_norm": 1.084643380014021, "learning_rate": 9.689961461883768e-06, "loss": 0.4544, "step": 6440 }, { "epoch": 0.6044481981981982, "grad_norm": 16.24752738017614, "learning_rate": 9.689772171769662e-06, "loss": 0.5029, "step": 6441 }, { "epoch": 0.6045420420420421, "grad_norm": 1.0388083883267962, "learning_rate": 9.689582825738851e-06, "loss": 0.4575, "step": 6442 }, { "epoch": 0.6046358858858859, "grad_norm": 1.1345696369843508, "learning_rate": 9.689393423793589e-06, "loss": 0.4961, "step": 6443 }, { "epoch": 0.6047297297297297, "grad_norm": 1.9168274202594195, "learning_rate": 9.68920396593614e-06, "loss": 0.5331, "step": 6444 }, { "epoch": 0.6048235735735735, "grad_norm": 1.0340486351624, "learning_rate": 9.689014452168759e-06, "loss": 0.5175, "step": 6445 }, { "epoch": 0.6049174174174174, "grad_norm": 1.6699712657314858, "learning_rate": 9.688824882493706e-06, "loss": 0.4905, "step": 6446 }, { "epoch": 0.6050112612612613, "grad_norm": 2.6386387233715207, "learning_rate": 9.688635256913243e-06, "loss": 0.4765, "step": 6447 }, { "epoch": 0.6051051051051051, "grad_norm": 1.2590750423060901, "learning_rate": 9.68844557542963e-06, "loss": 0.4691, "step": 6448 }, { "epoch": 0.605198948948949, "grad_norm": 1.0288460370254615, "learning_rate": 9.688255838045128e-06, "loss": 0.4742, "step": 6449 }, { "epoch": 0.6052927927927928, "grad_norm": 1.408077227028168, "learning_rate": 9.688066044762002e-06, "loss": 0.445, "step": 6450 }, { "epoch": 0.6053866366366366, "grad_norm": 1.5838521036064024, "learning_rate": 9.687876195582512e-06, "loss": 0.4841, "step": 6451 }, { "epoch": 0.6054804804804805, "grad_norm": 1.1692360100335129, "learning_rate": 9.68768629050892e-06, "loss": 0.4709, "step": 6452 }, { "epoch": 0.6055743243243243, "grad_norm": 1.0460503559127, "learning_rate": 9.687496329543495e-06, "loss": 0.437, "step": 6453 }, { "epoch": 0.6056681681681682, "grad_norm": 1.3130481435641665, "learning_rate": 9.687306312688498e-06, "loss": 0.5246, "step": 6454 }, { "epoch": 0.605762012012012, "grad_norm": 1.1283185774037026, "learning_rate": 9.687116239946197e-06, "loss": 0.5138, "step": 6455 }, { "epoch": 0.6058558558558559, "grad_norm": 1.0620159038417396, "learning_rate": 9.686926111318859e-06, "loss": 0.4536, "step": 6456 }, { "epoch": 0.6059496996996997, "grad_norm": 2.2303033379987958, "learning_rate": 9.686735926808749e-06, "loss": 0.4922, "step": 6457 }, { "epoch": 0.6060435435435435, "grad_norm": 1.1540340670823592, "learning_rate": 9.686545686418132e-06, "loss": 0.5169, "step": 6458 }, { "epoch": 0.6061373873873874, "grad_norm": 1.0404503221755184, "learning_rate": 9.68635539014928e-06, "loss": 0.4732, "step": 6459 }, { "epoch": 0.6062312312312312, "grad_norm": 0.9161272502781551, "learning_rate": 9.686165038004462e-06, "loss": 0.4352, "step": 6460 }, { "epoch": 0.606325075075075, "grad_norm": 1.2532454811439806, "learning_rate": 9.685974629985947e-06, "loss": 0.4907, "step": 6461 }, { "epoch": 0.606418918918919, "grad_norm": 1.284418965021624, "learning_rate": 9.685784166096006e-06, "loss": 0.4741, "step": 6462 }, { "epoch": 0.6065127627627628, "grad_norm": 1.2145940682416767, "learning_rate": 9.685593646336908e-06, "loss": 0.4937, "step": 6463 }, { "epoch": 0.6066066066066066, "grad_norm": 0.9313661795173204, "learning_rate": 9.685403070710924e-06, "loss": 0.4429, "step": 6464 }, { "epoch": 0.6067004504504504, "grad_norm": 1.0439083973342587, "learning_rate": 9.68521243922033e-06, "loss": 0.4931, "step": 6465 }, { "epoch": 0.6067942942942943, "grad_norm": 1.058377229744849, "learning_rate": 9.685021751867397e-06, "loss": 0.4708, "step": 6466 }, { "epoch": 0.6068881381381381, "grad_norm": 1.313904928093253, "learning_rate": 9.684831008654395e-06, "loss": 0.5202, "step": 6467 }, { "epoch": 0.6069819819819819, "grad_norm": 0.9792384281641154, "learning_rate": 9.684640209583605e-06, "loss": 0.4567, "step": 6468 }, { "epoch": 0.6070758258258259, "grad_norm": 1.3436028249885403, "learning_rate": 9.684449354657295e-06, "loss": 0.5601, "step": 6469 }, { "epoch": 0.6071696696696697, "grad_norm": 1.2204112513920748, "learning_rate": 9.684258443877746e-06, "loss": 0.5344, "step": 6470 }, { "epoch": 0.6072635135135135, "grad_norm": 1.4679655604902884, "learning_rate": 9.684067477247232e-06, "loss": 0.5607, "step": 6471 }, { "epoch": 0.6073573573573574, "grad_norm": 3.984920848521494, "learning_rate": 9.68387645476803e-06, "loss": 0.5041, "step": 6472 }, { "epoch": 0.6074512012012012, "grad_norm": 1.3276062299646434, "learning_rate": 9.683685376442418e-06, "loss": 0.4461, "step": 6473 }, { "epoch": 0.607545045045045, "grad_norm": 1.1582651307076455, "learning_rate": 9.683494242272675e-06, "loss": 0.5025, "step": 6474 }, { "epoch": 0.6076388888888888, "grad_norm": 1.1984466233054358, "learning_rate": 9.683303052261078e-06, "loss": 0.485, "step": 6475 }, { "epoch": 0.6077327327327328, "grad_norm": 1.1619305153661459, "learning_rate": 9.683111806409908e-06, "loss": 0.4668, "step": 6476 }, { "epoch": 0.6078265765765766, "grad_norm": 1.241707866518683, "learning_rate": 9.682920504721444e-06, "loss": 0.462, "step": 6477 }, { "epoch": 0.6079204204204204, "grad_norm": 2.477053885322046, "learning_rate": 9.682729147197967e-06, "loss": 0.4714, "step": 6478 }, { "epoch": 0.6080142642642643, "grad_norm": 1.7790542755850847, "learning_rate": 9.682537733841761e-06, "loss": 0.5233, "step": 6479 }, { "epoch": 0.6081081081081081, "grad_norm": 1.2106029522799866, "learning_rate": 9.682346264655105e-06, "loss": 0.4832, "step": 6480 }, { "epoch": 0.6082019519519519, "grad_norm": 1.1153307003643762, "learning_rate": 9.682154739640285e-06, "loss": 0.4974, "step": 6481 }, { "epoch": 0.6082957957957958, "grad_norm": 1.3182800696251842, "learning_rate": 9.681963158799582e-06, "loss": 0.5064, "step": 6482 }, { "epoch": 0.6083896396396397, "grad_norm": 1.0193596025408318, "learning_rate": 9.681771522135281e-06, "loss": 0.458, "step": 6483 }, { "epoch": 0.6084834834834835, "grad_norm": 1.058482829242692, "learning_rate": 9.681579829649668e-06, "loss": 0.4673, "step": 6484 }, { "epoch": 0.6085773273273273, "grad_norm": 1.0631705322103304, "learning_rate": 9.681388081345027e-06, "loss": 0.4721, "step": 6485 }, { "epoch": 0.6086711711711712, "grad_norm": 1.0233025945555827, "learning_rate": 9.681196277223645e-06, "loss": 0.4408, "step": 6486 }, { "epoch": 0.608765015015015, "grad_norm": 1.1260517527786125, "learning_rate": 9.681004417287809e-06, "loss": 0.4618, "step": 6487 }, { "epoch": 0.6088588588588588, "grad_norm": 1.2399243419461428, "learning_rate": 9.680812501539807e-06, "loss": 0.4727, "step": 6488 }, { "epoch": 0.6089527027027027, "grad_norm": 1.0576398090929757, "learning_rate": 9.680620529981925e-06, "loss": 0.5106, "step": 6489 }, { "epoch": 0.6090465465465466, "grad_norm": 1.1199040945935295, "learning_rate": 9.680428502616455e-06, "loss": 0.525, "step": 6490 }, { "epoch": 0.6091403903903904, "grad_norm": 1.1974928045342834, "learning_rate": 9.680236419445685e-06, "loss": 0.5029, "step": 6491 }, { "epoch": 0.6092342342342343, "grad_norm": 1.16085264254149, "learning_rate": 9.680044280471903e-06, "loss": 0.4676, "step": 6492 }, { "epoch": 0.6093280780780781, "grad_norm": 1.3190184361556367, "learning_rate": 9.679852085697405e-06, "loss": 0.4479, "step": 6493 }, { "epoch": 0.6094219219219219, "grad_norm": 1.1957879457921814, "learning_rate": 9.67965983512448e-06, "loss": 0.4819, "step": 6494 }, { "epoch": 0.6095157657657657, "grad_norm": 1.2237834534738434, "learning_rate": 9.679467528755419e-06, "loss": 0.4689, "step": 6495 }, { "epoch": 0.6096096096096096, "grad_norm": 1.1017131294542508, "learning_rate": 9.679275166592516e-06, "loss": 0.5375, "step": 6496 }, { "epoch": 0.6097034534534534, "grad_norm": 2.231624762509973, "learning_rate": 9.679082748638064e-06, "loss": 0.465, "step": 6497 }, { "epoch": 0.6097972972972973, "grad_norm": 1.056574546212893, "learning_rate": 9.678890274894357e-06, "loss": 0.4521, "step": 6498 }, { "epoch": 0.6098911411411412, "grad_norm": 1.0876806704401842, "learning_rate": 9.678697745363692e-06, "loss": 0.5098, "step": 6499 }, { "epoch": 0.609984984984985, "grad_norm": 0.9671795031652355, "learning_rate": 9.678505160048364e-06, "loss": 0.4391, "step": 6500 }, { "epoch": 0.6100788288288288, "grad_norm": 1.1901248484787363, "learning_rate": 9.678312518950666e-06, "loss": 0.493, "step": 6501 }, { "epoch": 0.6101726726726727, "grad_norm": 0.912404771548737, "learning_rate": 9.678119822072899e-06, "loss": 0.4893, "step": 6502 }, { "epoch": 0.6102665165165165, "grad_norm": 1.3529544115718062, "learning_rate": 9.677927069417358e-06, "loss": 0.4806, "step": 6503 }, { "epoch": 0.6103603603603603, "grad_norm": 1.0568956480704452, "learning_rate": 9.677734260986342e-06, "loss": 0.4896, "step": 6504 }, { "epoch": 0.6104542042042042, "grad_norm": 1.2820745237799915, "learning_rate": 9.67754139678215e-06, "loss": 0.5165, "step": 6505 }, { "epoch": 0.6105480480480481, "grad_norm": 1.1529708032115502, "learning_rate": 9.677348476807082e-06, "loss": 0.463, "step": 6506 }, { "epoch": 0.6106418918918919, "grad_norm": 0.9867494812349115, "learning_rate": 9.677155501063436e-06, "loss": 0.4409, "step": 6507 }, { "epoch": 0.6107357357357357, "grad_norm": 2.9149381229961535, "learning_rate": 9.676962469553516e-06, "loss": 0.4661, "step": 6508 }, { "epoch": 0.6108295795795796, "grad_norm": 1.3809421177338743, "learning_rate": 9.67676938227962e-06, "loss": 0.5336, "step": 6509 }, { "epoch": 0.6109234234234234, "grad_norm": 0.9866984482474213, "learning_rate": 9.676576239244055e-06, "loss": 0.4725, "step": 6510 }, { "epoch": 0.6110172672672672, "grad_norm": 1.0572277598380608, "learning_rate": 9.67638304044912e-06, "loss": 0.5017, "step": 6511 }, { "epoch": 0.6111111111111112, "grad_norm": 1.201117109827929, "learning_rate": 9.676189785897118e-06, "loss": 0.4709, "step": 6512 }, { "epoch": 0.611204954954955, "grad_norm": 1.5315820997492091, "learning_rate": 9.675996475590358e-06, "loss": 0.5209, "step": 6513 }, { "epoch": 0.6112987987987988, "grad_norm": 1.311190148161062, "learning_rate": 9.67580310953114e-06, "loss": 0.4711, "step": 6514 }, { "epoch": 0.6113926426426426, "grad_norm": 1.1224943655625728, "learning_rate": 9.675609687721771e-06, "loss": 0.445, "step": 6515 }, { "epoch": 0.6114864864864865, "grad_norm": 0.9671854380098195, "learning_rate": 9.675416210164557e-06, "loss": 0.4722, "step": 6516 }, { "epoch": 0.6115803303303303, "grad_norm": 1.1551663447188731, "learning_rate": 9.675222676861806e-06, "loss": 0.5175, "step": 6517 }, { "epoch": 0.6116741741741741, "grad_norm": 0.9939148226243738, "learning_rate": 9.675029087815825e-06, "loss": 0.4105, "step": 6518 }, { "epoch": 0.6117680180180181, "grad_norm": 1.0918116658318, "learning_rate": 9.674835443028921e-06, "loss": 0.4631, "step": 6519 }, { "epoch": 0.6118618618618619, "grad_norm": 1.129442565913835, "learning_rate": 9.674641742503406e-06, "loss": 0.4706, "step": 6520 }, { "epoch": 0.6119557057057057, "grad_norm": 1.2166936267598076, "learning_rate": 9.674447986241586e-06, "loss": 0.4539, "step": 6521 }, { "epoch": 0.6120495495495496, "grad_norm": 0.951853290617988, "learning_rate": 9.674254174245772e-06, "loss": 0.4203, "step": 6522 }, { "epoch": 0.6121433933933934, "grad_norm": 1.1944223880137663, "learning_rate": 9.674060306518274e-06, "loss": 0.5137, "step": 6523 }, { "epoch": 0.6122372372372372, "grad_norm": 1.3420085455143655, "learning_rate": 9.673866383061407e-06, "loss": 0.5201, "step": 6524 }, { "epoch": 0.612331081081081, "grad_norm": 1.0225037072679857, "learning_rate": 9.673672403877481e-06, "loss": 0.4965, "step": 6525 }, { "epoch": 0.612424924924925, "grad_norm": 1.05101106309684, "learning_rate": 9.673478368968808e-06, "loss": 0.4897, "step": 6526 }, { "epoch": 0.6125187687687688, "grad_norm": 1.1153190995053188, "learning_rate": 9.673284278337702e-06, "loss": 0.4555, "step": 6527 }, { "epoch": 0.6126126126126126, "grad_norm": 1.0788839849938323, "learning_rate": 9.67309013198648e-06, "loss": 0.5087, "step": 6528 }, { "epoch": 0.6127064564564565, "grad_norm": 1.3327594114242407, "learning_rate": 9.672895929917452e-06, "loss": 0.4685, "step": 6529 }, { "epoch": 0.6128003003003003, "grad_norm": 1.18267862871735, "learning_rate": 9.672701672132937e-06, "loss": 0.5011, "step": 6530 }, { "epoch": 0.6128941441441441, "grad_norm": 0.9684266048860017, "learning_rate": 9.67250735863525e-06, "loss": 0.4881, "step": 6531 }, { "epoch": 0.612987987987988, "grad_norm": 1.2508267868611542, "learning_rate": 9.672312989426706e-06, "loss": 0.4615, "step": 6532 }, { "epoch": 0.6130818318318318, "grad_norm": 10.172751555640122, "learning_rate": 9.672118564509627e-06, "loss": 0.462, "step": 6533 }, { "epoch": 0.6131756756756757, "grad_norm": 1.0423183897775623, "learning_rate": 9.671924083886327e-06, "loss": 0.5085, "step": 6534 }, { "epoch": 0.6132695195195195, "grad_norm": 1.1853450071543705, "learning_rate": 9.671729547559126e-06, "loss": 0.5332, "step": 6535 }, { "epoch": 0.6133633633633634, "grad_norm": 0.9700828856259426, "learning_rate": 9.671534955530345e-06, "loss": 0.4926, "step": 6536 }, { "epoch": 0.6134572072072072, "grad_norm": 0.942285058161185, "learning_rate": 9.671340307802301e-06, "loss": 0.4624, "step": 6537 }, { "epoch": 0.613551051051051, "grad_norm": 0.9368232224756287, "learning_rate": 9.671145604377318e-06, "loss": 0.4248, "step": 6538 }, { "epoch": 0.6136448948948949, "grad_norm": 1.4124585060976813, "learning_rate": 9.670950845257716e-06, "loss": 0.4153, "step": 6539 }, { "epoch": 0.6137387387387387, "grad_norm": 2.1261958298352837, "learning_rate": 9.670756030445819e-06, "loss": 0.5257, "step": 6540 }, { "epoch": 0.6138325825825826, "grad_norm": 1.2111974262346037, "learning_rate": 9.670561159943946e-06, "loss": 0.4944, "step": 6541 }, { "epoch": 0.6139264264264265, "grad_norm": 1.1378297419205625, "learning_rate": 9.670366233754422e-06, "loss": 0.5146, "step": 6542 }, { "epoch": 0.6140202702702703, "grad_norm": 0.9978822532120852, "learning_rate": 9.670171251879572e-06, "loss": 0.494, "step": 6543 }, { "epoch": 0.6141141141141141, "grad_norm": 0.9196902607133398, "learning_rate": 9.669976214321723e-06, "loss": 0.4856, "step": 6544 }, { "epoch": 0.6142079579579579, "grad_norm": 1.474125837261228, "learning_rate": 9.669781121083195e-06, "loss": 0.4806, "step": 6545 }, { "epoch": 0.6143018018018018, "grad_norm": 1.6942221270831288, "learning_rate": 9.66958597216632e-06, "loss": 0.4834, "step": 6546 }, { "epoch": 0.6143956456456456, "grad_norm": 1.1732020817381217, "learning_rate": 9.669390767573419e-06, "loss": 0.5119, "step": 6547 }, { "epoch": 0.6144894894894894, "grad_norm": 1.169206289003332, "learning_rate": 9.669195507306823e-06, "loss": 0.4852, "step": 6548 }, { "epoch": 0.6145833333333334, "grad_norm": 1.2483163342705597, "learning_rate": 9.66900019136886e-06, "loss": 0.5241, "step": 6549 }, { "epoch": 0.6146771771771772, "grad_norm": 1.1041520125357704, "learning_rate": 9.668804819761858e-06, "loss": 0.503, "step": 6550 }, { "epoch": 0.614771021021021, "grad_norm": 1.4912996357421824, "learning_rate": 9.668609392488148e-06, "loss": 0.4881, "step": 6551 }, { "epoch": 0.6148648648648649, "grad_norm": 1.1192259725456053, "learning_rate": 9.668413909550058e-06, "loss": 0.4521, "step": 6552 }, { "epoch": 0.6149587087087087, "grad_norm": 0.9708981498742343, "learning_rate": 9.668218370949918e-06, "loss": 0.4663, "step": 6553 }, { "epoch": 0.6150525525525525, "grad_norm": 0.9801515671991609, "learning_rate": 9.668022776690063e-06, "loss": 0.4031, "step": 6554 }, { "epoch": 0.6151463963963963, "grad_norm": 0.9441396742863926, "learning_rate": 9.667827126772822e-06, "loss": 0.4524, "step": 6555 }, { "epoch": 0.6152402402402403, "grad_norm": 1.059699054089694, "learning_rate": 9.66763142120053e-06, "loss": 0.4751, "step": 6556 }, { "epoch": 0.6153340840840841, "grad_norm": 1.1526928601311066, "learning_rate": 9.667435659975517e-06, "loss": 0.5282, "step": 6557 }, { "epoch": 0.6154279279279279, "grad_norm": 1.0774835958673676, "learning_rate": 9.66723984310012e-06, "loss": 0.4638, "step": 6558 }, { "epoch": 0.6155217717717718, "grad_norm": 1.0020330805336544, "learning_rate": 9.667043970576675e-06, "loss": 0.4418, "step": 6559 }, { "epoch": 0.6156156156156156, "grad_norm": 1.3996098180390895, "learning_rate": 9.666848042407513e-06, "loss": 0.4488, "step": 6560 }, { "epoch": 0.6157094594594594, "grad_norm": 1.0583548569249566, "learning_rate": 9.666652058594973e-06, "loss": 0.5067, "step": 6561 }, { "epoch": 0.6158033033033034, "grad_norm": 1.0904316285978195, "learning_rate": 9.666456019141392e-06, "loss": 0.4697, "step": 6562 }, { "epoch": 0.6158971471471472, "grad_norm": 1.0703098788952499, "learning_rate": 9.666259924049107e-06, "loss": 0.5051, "step": 6563 }, { "epoch": 0.615990990990991, "grad_norm": 1.32308159476202, "learning_rate": 9.666063773320455e-06, "loss": 0.4478, "step": 6564 }, { "epoch": 0.6160848348348348, "grad_norm": 1.087585483500024, "learning_rate": 9.665867566957776e-06, "loss": 0.4643, "step": 6565 }, { "epoch": 0.6161786786786787, "grad_norm": 0.8874782877016956, "learning_rate": 9.665671304963408e-06, "loss": 0.4487, "step": 6566 }, { "epoch": 0.6162725225225225, "grad_norm": 1.5795745635585268, "learning_rate": 9.665474987339692e-06, "loss": 0.5035, "step": 6567 }, { "epoch": 0.6163663663663663, "grad_norm": 1.4512829195366497, "learning_rate": 9.66527861408897e-06, "loss": 0.5499, "step": 6568 }, { "epoch": 0.6164602102102102, "grad_norm": 1.0740131172665097, "learning_rate": 9.665082185213582e-06, "loss": 0.5149, "step": 6569 }, { "epoch": 0.6165540540540541, "grad_norm": 1.0215130572964832, "learning_rate": 9.664885700715866e-06, "loss": 0.4436, "step": 6570 }, { "epoch": 0.6166478978978979, "grad_norm": 1.2012092261250364, "learning_rate": 9.664689160598173e-06, "loss": 0.4722, "step": 6571 }, { "epoch": 0.6167417417417418, "grad_norm": 1.0959274060131767, "learning_rate": 9.664492564862842e-06, "loss": 0.4631, "step": 6572 }, { "epoch": 0.6168355855855856, "grad_norm": 3.2937227375194134, "learning_rate": 9.664295913512216e-06, "loss": 0.5148, "step": 6573 }, { "epoch": 0.6169294294294294, "grad_norm": 1.659289363769264, "learning_rate": 9.66409920654864e-06, "loss": 0.4695, "step": 6574 }, { "epoch": 0.6170232732732732, "grad_norm": 1.0940832964790188, "learning_rate": 9.663902443974462e-06, "loss": 0.4977, "step": 6575 }, { "epoch": 0.6171171171171171, "grad_norm": 1.2040684102557107, "learning_rate": 9.663705625792027e-06, "loss": 0.4795, "step": 6576 }, { "epoch": 0.617210960960961, "grad_norm": 1.1799388177102499, "learning_rate": 9.663508752003679e-06, "loss": 0.4587, "step": 6577 }, { "epoch": 0.6173048048048048, "grad_norm": 1.0619412297115853, "learning_rate": 9.663311822611767e-06, "loss": 0.4226, "step": 6578 }, { "epoch": 0.6173986486486487, "grad_norm": 1.401522519951129, "learning_rate": 9.66311483761864e-06, "loss": 0.4802, "step": 6579 }, { "epoch": 0.6174924924924925, "grad_norm": 2.3755836358731157, "learning_rate": 9.662917797026645e-06, "loss": 0.4936, "step": 6580 }, { "epoch": 0.6175863363363363, "grad_norm": 1.273357752851007, "learning_rate": 9.662720700838135e-06, "loss": 0.5055, "step": 6581 }, { "epoch": 0.6176801801801802, "grad_norm": 1.0281357454532536, "learning_rate": 9.662523549055454e-06, "loss": 0.4567, "step": 6582 }, { "epoch": 0.617774024024024, "grad_norm": 0.9421124046978612, "learning_rate": 9.662326341680958e-06, "loss": 0.4734, "step": 6583 }, { "epoch": 0.6178678678678678, "grad_norm": 1.2164190942118742, "learning_rate": 9.662129078716996e-06, "loss": 0.4146, "step": 6584 }, { "epoch": 0.6179617117117117, "grad_norm": 1.2589416502342305, "learning_rate": 9.66193176016592e-06, "loss": 0.4843, "step": 6585 }, { "epoch": 0.6180555555555556, "grad_norm": 1.007508830096717, "learning_rate": 9.661734386030084e-06, "loss": 0.4364, "step": 6586 }, { "epoch": 0.6181493993993994, "grad_norm": 1.5869308396605488, "learning_rate": 9.661536956311838e-06, "loss": 0.4484, "step": 6587 }, { "epoch": 0.6182432432432432, "grad_norm": 2.903672676588255, "learning_rate": 9.66133947101354e-06, "loss": 0.5109, "step": 6588 }, { "epoch": 0.6183370870870871, "grad_norm": 1.3397838185280568, "learning_rate": 9.661141930137544e-06, "loss": 0.4484, "step": 6589 }, { "epoch": 0.6184309309309309, "grad_norm": 1.020246079762199, "learning_rate": 9.660944333686202e-06, "loss": 0.4414, "step": 6590 }, { "epoch": 0.6185247747747747, "grad_norm": 1.5296917075453982, "learning_rate": 9.660746681661871e-06, "loss": 0.5081, "step": 6591 }, { "epoch": 0.6186186186186187, "grad_norm": 1.2148424701095728, "learning_rate": 9.660548974066911e-06, "loss": 0.5103, "step": 6592 }, { "epoch": 0.6187124624624625, "grad_norm": 1.0077095333691402, "learning_rate": 9.660351210903678e-06, "loss": 0.5211, "step": 6593 }, { "epoch": 0.6188063063063063, "grad_norm": 1.186370169784287, "learning_rate": 9.660153392174528e-06, "loss": 0.4621, "step": 6594 }, { "epoch": 0.6189001501501501, "grad_norm": 1.0762679080589144, "learning_rate": 9.65995551788182e-06, "loss": 0.4772, "step": 6595 }, { "epoch": 0.618993993993994, "grad_norm": 1.3773556340399509, "learning_rate": 9.659757588027913e-06, "loss": 0.5503, "step": 6596 }, { "epoch": 0.6190878378378378, "grad_norm": 1.2417053062873045, "learning_rate": 9.659559602615169e-06, "loss": 0.4542, "step": 6597 }, { "epoch": 0.6191816816816816, "grad_norm": 1.9692525719007132, "learning_rate": 9.659361561645947e-06, "loss": 0.4432, "step": 6598 }, { "epoch": 0.6192755255255256, "grad_norm": 0.9924225996785085, "learning_rate": 9.659163465122611e-06, "loss": 0.4487, "step": 6599 }, { "epoch": 0.6193693693693694, "grad_norm": 1.0600993410983806, "learning_rate": 9.658965313047519e-06, "loss": 0.4494, "step": 6600 }, { "epoch": 0.6194632132132132, "grad_norm": 0.9603820507970939, "learning_rate": 9.658767105423035e-06, "loss": 0.5036, "step": 6601 }, { "epoch": 0.6195570570570571, "grad_norm": 1.0664818640091156, "learning_rate": 9.658568842251523e-06, "loss": 0.4364, "step": 6602 }, { "epoch": 0.6196509009009009, "grad_norm": 1.2675386916931852, "learning_rate": 9.658370523535346e-06, "loss": 0.5043, "step": 6603 }, { "epoch": 0.6197447447447447, "grad_norm": 0.9977395732028079, "learning_rate": 9.658172149276869e-06, "loss": 0.4467, "step": 6604 }, { "epoch": 0.6198385885885885, "grad_norm": 1.1366326252952978, "learning_rate": 9.657973719478456e-06, "loss": 0.4443, "step": 6605 }, { "epoch": 0.6199324324324325, "grad_norm": 1.5144599295873016, "learning_rate": 9.657775234142478e-06, "loss": 0.4452, "step": 6606 }, { "epoch": 0.6200262762762763, "grad_norm": 1.015631445917436, "learning_rate": 9.657576693271293e-06, "loss": 0.4617, "step": 6607 }, { "epoch": 0.6201201201201201, "grad_norm": 1.100247809079172, "learning_rate": 9.657378096867276e-06, "loss": 0.4075, "step": 6608 }, { "epoch": 0.620213963963964, "grad_norm": 1.0488994261316713, "learning_rate": 9.65717944493279e-06, "loss": 0.491, "step": 6609 }, { "epoch": 0.6203078078078078, "grad_norm": 0.9780182211843477, "learning_rate": 9.656980737470207e-06, "loss": 0.4835, "step": 6610 }, { "epoch": 0.6204016516516516, "grad_norm": 1.123791764046968, "learning_rate": 9.656781974481894e-06, "loss": 0.5074, "step": 6611 }, { "epoch": 0.6204954954954955, "grad_norm": 1.0147857634402448, "learning_rate": 9.656583155970221e-06, "loss": 0.4766, "step": 6612 }, { "epoch": 0.6205893393393394, "grad_norm": 1.216354344235712, "learning_rate": 9.656384281937558e-06, "loss": 0.4673, "step": 6613 }, { "epoch": 0.6206831831831832, "grad_norm": 1.2340124843694713, "learning_rate": 9.656185352386279e-06, "loss": 0.4539, "step": 6614 }, { "epoch": 0.620777027027027, "grad_norm": 1.0704458668854195, "learning_rate": 9.655986367318753e-06, "loss": 0.4978, "step": 6615 }, { "epoch": 0.6208708708708709, "grad_norm": 1.7725050408151342, "learning_rate": 9.655787326737355e-06, "loss": 0.4925, "step": 6616 }, { "epoch": 0.6209647147147147, "grad_norm": 1.0501206178419682, "learning_rate": 9.655588230644454e-06, "loss": 0.4781, "step": 6617 }, { "epoch": 0.6210585585585585, "grad_norm": 1.0414307274025605, "learning_rate": 9.65538907904243e-06, "loss": 0.4502, "step": 6618 }, { "epoch": 0.6211524024024024, "grad_norm": 1.3811522271571808, "learning_rate": 9.655189871933653e-06, "loss": 0.4397, "step": 6619 }, { "epoch": 0.6212462462462462, "grad_norm": 1.2127581810995358, "learning_rate": 9.654990609320499e-06, "loss": 0.5055, "step": 6620 }, { "epoch": 0.6213400900900901, "grad_norm": 1.1153362972935714, "learning_rate": 9.654791291205343e-06, "loss": 0.461, "step": 6621 }, { "epoch": 0.621433933933934, "grad_norm": 1.0321738498093997, "learning_rate": 9.654591917590564e-06, "loss": 0.4913, "step": 6622 }, { "epoch": 0.6215277777777778, "grad_norm": 0.8991850925196132, "learning_rate": 9.654392488478537e-06, "loss": 0.462, "step": 6623 }, { "epoch": 0.6216216216216216, "grad_norm": 1.2375880734650562, "learning_rate": 9.65419300387164e-06, "loss": 0.4338, "step": 6624 }, { "epoch": 0.6217154654654654, "grad_norm": 1.0120226267238654, "learning_rate": 9.653993463772253e-06, "loss": 0.4541, "step": 6625 }, { "epoch": 0.6218093093093093, "grad_norm": 1.0279288848093209, "learning_rate": 9.653793868182753e-06, "loss": 0.5178, "step": 6626 }, { "epoch": 0.6219031531531531, "grad_norm": 1.035566751055339, "learning_rate": 9.653594217105522e-06, "loss": 0.5107, "step": 6627 }, { "epoch": 0.621996996996997, "grad_norm": 1.2635921131997727, "learning_rate": 9.65339451054294e-06, "loss": 0.4723, "step": 6628 }, { "epoch": 0.6220908408408409, "grad_norm": 2.2756810653323676, "learning_rate": 9.653194748497387e-06, "loss": 0.499, "step": 6629 }, { "epoch": 0.6221846846846847, "grad_norm": 1.4286252035064486, "learning_rate": 9.652994930971245e-06, "loss": 0.448, "step": 6630 }, { "epoch": 0.6222785285285285, "grad_norm": 1.0921009989578745, "learning_rate": 9.652795057966898e-06, "loss": 0.4663, "step": 6631 }, { "epoch": 0.6223723723723724, "grad_norm": 1.893035698332795, "learning_rate": 9.652595129486726e-06, "loss": 0.4935, "step": 6632 }, { "epoch": 0.6224662162162162, "grad_norm": 0.9686565514757451, "learning_rate": 9.652395145533116e-06, "loss": 0.4446, "step": 6633 }, { "epoch": 0.62256006006006, "grad_norm": 1.0750033496739222, "learning_rate": 9.652195106108451e-06, "loss": 0.465, "step": 6634 }, { "epoch": 0.6226539039039038, "grad_norm": 1.087286771123714, "learning_rate": 9.651995011215116e-06, "loss": 0.4914, "step": 6635 }, { "epoch": 0.6227477477477478, "grad_norm": 1.0803747403580386, "learning_rate": 9.651794860855497e-06, "loss": 0.5149, "step": 6636 }, { "epoch": 0.6228415915915916, "grad_norm": 1.0852696483917843, "learning_rate": 9.65159465503198e-06, "loss": 0.4567, "step": 6637 }, { "epoch": 0.6229354354354354, "grad_norm": 1.4002873768954507, "learning_rate": 9.651394393746954e-06, "loss": 0.4849, "step": 6638 }, { "epoch": 0.6230292792792793, "grad_norm": 0.993004856784738, "learning_rate": 9.651194077002803e-06, "loss": 0.465, "step": 6639 }, { "epoch": 0.6231231231231231, "grad_norm": 5.846693374930462, "learning_rate": 9.650993704801918e-06, "loss": 0.4419, "step": 6640 }, { "epoch": 0.6232169669669669, "grad_norm": 1.3233144955678438, "learning_rate": 9.65079327714669e-06, "loss": 0.4806, "step": 6641 }, { "epoch": 0.6233108108108109, "grad_norm": 1.0221957290377235, "learning_rate": 9.650592794039503e-06, "loss": 0.5362, "step": 6642 }, { "epoch": 0.6234046546546547, "grad_norm": 0.986100324403759, "learning_rate": 9.650392255482755e-06, "loss": 0.4888, "step": 6643 }, { "epoch": 0.6234984984984985, "grad_norm": 1.081375432158748, "learning_rate": 9.65019166147883e-06, "loss": 0.4629, "step": 6644 }, { "epoch": 0.6235923423423423, "grad_norm": 1.0876902119800307, "learning_rate": 9.649991012030122e-06, "loss": 0.5137, "step": 6645 }, { "epoch": 0.6236861861861862, "grad_norm": 1.4614340760583922, "learning_rate": 9.649790307139024e-06, "loss": 0.4964, "step": 6646 }, { "epoch": 0.62378003003003, "grad_norm": 1.466254644813794, "learning_rate": 9.649589546807931e-06, "loss": 0.4822, "step": 6647 }, { "epoch": 0.6238738738738738, "grad_norm": 1.0429519299284156, "learning_rate": 9.649388731039234e-06, "loss": 0.4426, "step": 6648 }, { "epoch": 0.6239677177177178, "grad_norm": 1.0389982488245577, "learning_rate": 9.649187859835327e-06, "loss": 0.519, "step": 6649 }, { "epoch": 0.6240615615615616, "grad_norm": 1.111841051400247, "learning_rate": 9.648986933198606e-06, "loss": 0.4557, "step": 6650 }, { "epoch": 0.6241554054054054, "grad_norm": 1.168080245385933, "learning_rate": 9.648785951131466e-06, "loss": 0.4157, "step": 6651 }, { "epoch": 0.6242492492492493, "grad_norm": 1.048370539222616, "learning_rate": 9.648584913636306e-06, "loss": 0.4797, "step": 6652 }, { "epoch": 0.6243430930930931, "grad_norm": 1.0018522184251177, "learning_rate": 9.64838382071552e-06, "loss": 0.5182, "step": 6653 }, { "epoch": 0.6244369369369369, "grad_norm": 1.2093176061376458, "learning_rate": 9.648182672371505e-06, "loss": 0.4414, "step": 6654 }, { "epoch": 0.6245307807807807, "grad_norm": 1.5812510351123152, "learning_rate": 9.647981468606663e-06, "loss": 0.5139, "step": 6655 }, { "epoch": 0.6246246246246246, "grad_norm": 1.0128906259986354, "learning_rate": 9.64778020942339e-06, "loss": 0.4991, "step": 6656 }, { "epoch": 0.6247184684684685, "grad_norm": 1.6528237464692708, "learning_rate": 9.647578894824086e-06, "loss": 0.4607, "step": 6657 }, { "epoch": 0.6248123123123123, "grad_norm": 2.759752129138026, "learning_rate": 9.647377524811154e-06, "loss": 0.4755, "step": 6658 }, { "epoch": 0.6249061561561562, "grad_norm": 1.053160630810374, "learning_rate": 9.647176099386991e-06, "loss": 0.4873, "step": 6659 }, { "epoch": 0.625, "grad_norm": 0.9845845961151952, "learning_rate": 9.646974618554002e-06, "loss": 0.4506, "step": 6660 }, { "epoch": 0.6250938438438438, "grad_norm": 1.1003403553290947, "learning_rate": 9.646773082314586e-06, "loss": 0.5096, "step": 6661 }, { "epoch": 0.6251876876876877, "grad_norm": 1.0897478165431165, "learning_rate": 9.646571490671149e-06, "loss": 0.5061, "step": 6662 }, { "epoch": 0.6252815315315315, "grad_norm": 1.0968891548604018, "learning_rate": 9.64636984362609e-06, "loss": 0.4883, "step": 6663 }, { "epoch": 0.6253753753753754, "grad_norm": 0.9747227857357624, "learning_rate": 9.64616814118182e-06, "loss": 0.4578, "step": 6664 }, { "epoch": 0.6254692192192193, "grad_norm": 1.0773210636756436, "learning_rate": 9.64596638334074e-06, "loss": 0.4944, "step": 6665 }, { "epoch": 0.6255630630630631, "grad_norm": 1.2608979177551314, "learning_rate": 9.645764570105254e-06, "loss": 0.487, "step": 6666 }, { "epoch": 0.6256569069069069, "grad_norm": 1.125294647605045, "learning_rate": 9.645562701477773e-06, "loss": 0.535, "step": 6667 }, { "epoch": 0.6257507507507507, "grad_norm": 1.0693605953164278, "learning_rate": 9.6453607774607e-06, "loss": 0.4876, "step": 6668 }, { "epoch": 0.6258445945945946, "grad_norm": 1.0774030838579736, "learning_rate": 9.645158798056444e-06, "loss": 0.4859, "step": 6669 }, { "epoch": 0.6259384384384384, "grad_norm": 1.0263586251312793, "learning_rate": 9.644956763267412e-06, "loss": 0.4666, "step": 6670 }, { "epoch": 0.6260322822822822, "grad_norm": 1.14188212775892, "learning_rate": 9.644754673096014e-06, "loss": 0.513, "step": 6671 }, { "epoch": 0.6261261261261262, "grad_norm": 1.2916277235623377, "learning_rate": 9.64455252754466e-06, "loss": 0.4915, "step": 6672 }, { "epoch": 0.62621996996997, "grad_norm": 1.1676526120807307, "learning_rate": 9.644350326615759e-06, "loss": 0.4597, "step": 6673 }, { "epoch": 0.6263138138138138, "grad_norm": 1.0639143096249897, "learning_rate": 9.644148070311723e-06, "loss": 0.4613, "step": 6674 }, { "epoch": 0.6264076576576577, "grad_norm": 0.8925270176667411, "learning_rate": 9.643945758634963e-06, "loss": 0.4241, "step": 6675 }, { "epoch": 0.6265015015015015, "grad_norm": 1.1199491312359577, "learning_rate": 9.64374339158789e-06, "loss": 0.5038, "step": 6676 }, { "epoch": 0.6265953453453453, "grad_norm": 1.024818286352991, "learning_rate": 9.64354096917292e-06, "loss": 0.4788, "step": 6677 }, { "epoch": 0.6266891891891891, "grad_norm": 1.6098521482328578, "learning_rate": 9.643338491392461e-06, "loss": 0.4758, "step": 6678 }, { "epoch": 0.6267830330330331, "grad_norm": 1.5703518642370011, "learning_rate": 9.643135958248936e-06, "loss": 0.4478, "step": 6679 }, { "epoch": 0.6268768768768769, "grad_norm": 3.497187308387443, "learning_rate": 9.642933369744751e-06, "loss": 0.4951, "step": 6680 }, { "epoch": 0.6269707207207207, "grad_norm": 1.0785416801671703, "learning_rate": 9.642730725882325e-06, "loss": 0.4485, "step": 6681 }, { "epoch": 0.6270645645645646, "grad_norm": 1.2138274770779913, "learning_rate": 9.642528026664075e-06, "loss": 0.4792, "step": 6682 }, { "epoch": 0.6271584084084084, "grad_norm": 1.0091364514070502, "learning_rate": 9.642325272092417e-06, "loss": 0.4581, "step": 6683 }, { "epoch": 0.6272522522522522, "grad_norm": 3.144555408802731, "learning_rate": 9.642122462169767e-06, "loss": 0.4685, "step": 6684 }, { "epoch": 0.6273460960960962, "grad_norm": 1.2605094858906765, "learning_rate": 9.641919596898546e-06, "loss": 0.4735, "step": 6685 }, { "epoch": 0.62743993993994, "grad_norm": 1.1231265652100257, "learning_rate": 9.64171667628117e-06, "loss": 0.4743, "step": 6686 }, { "epoch": 0.6275337837837838, "grad_norm": 1.32922682277549, "learning_rate": 9.641513700320062e-06, "loss": 0.5103, "step": 6687 }, { "epoch": 0.6276276276276276, "grad_norm": 0.9077247520691365, "learning_rate": 9.641310669017638e-06, "loss": 0.4233, "step": 6688 }, { "epoch": 0.6277214714714715, "grad_norm": 1.223382971370585, "learning_rate": 9.641107582376321e-06, "loss": 0.4905, "step": 6689 }, { "epoch": 0.6278153153153153, "grad_norm": 0.9816739814207244, "learning_rate": 9.640904440398534e-06, "loss": 0.4767, "step": 6690 }, { "epoch": 0.6279091591591591, "grad_norm": 0.9175376243431826, "learning_rate": 9.640701243086694e-06, "loss": 0.4911, "step": 6691 }, { "epoch": 0.628003003003003, "grad_norm": 1.0688566703193636, "learning_rate": 9.640497990443227e-06, "loss": 0.4643, "step": 6692 }, { "epoch": 0.6280968468468469, "grad_norm": 1.0721891197819708, "learning_rate": 9.64029468247056e-06, "loss": 0.4935, "step": 6693 }, { "epoch": 0.6281906906906907, "grad_norm": 1.2090978363333995, "learning_rate": 9.64009131917111e-06, "loss": 0.526, "step": 6694 }, { "epoch": 0.6282845345345346, "grad_norm": 1.2661996534703337, "learning_rate": 9.639887900547305e-06, "loss": 0.467, "step": 6695 }, { "epoch": 0.6283783783783784, "grad_norm": 1.229073098784614, "learning_rate": 9.639684426601572e-06, "loss": 0.4791, "step": 6696 }, { "epoch": 0.6284722222222222, "grad_norm": 1.196269437823469, "learning_rate": 9.639480897336335e-06, "loss": 0.5436, "step": 6697 }, { "epoch": 0.628566066066066, "grad_norm": 1.0300846572625295, "learning_rate": 9.63927731275402e-06, "loss": 0.4579, "step": 6698 }, { "epoch": 0.6286599099099099, "grad_norm": 1.0252975776050282, "learning_rate": 9.639073672857058e-06, "loss": 0.4721, "step": 6699 }, { "epoch": 0.6287537537537538, "grad_norm": 1.2781678840782305, "learning_rate": 9.638869977647873e-06, "loss": 0.4555, "step": 6700 }, { "epoch": 0.6288475975975976, "grad_norm": 1.5850934626450106, "learning_rate": 9.638666227128897e-06, "loss": 0.4388, "step": 6701 }, { "epoch": 0.6289414414414415, "grad_norm": 1.9193070400874737, "learning_rate": 9.638462421302557e-06, "loss": 0.4851, "step": 6702 }, { "epoch": 0.6290352852852853, "grad_norm": 1.5402128846942595, "learning_rate": 9.638258560171281e-06, "loss": 0.4857, "step": 6703 }, { "epoch": 0.6291291291291291, "grad_norm": 1.1521002641138343, "learning_rate": 9.638054643737506e-06, "loss": 0.4739, "step": 6704 }, { "epoch": 0.629222972972973, "grad_norm": 2.1744520195312087, "learning_rate": 9.637850672003656e-06, "loss": 0.4826, "step": 6705 }, { "epoch": 0.6293168168168168, "grad_norm": 1.032482362789409, "learning_rate": 9.63764664497217e-06, "loss": 0.469, "step": 6706 }, { "epoch": 0.6294106606606606, "grad_norm": 0.8859146962231578, "learning_rate": 9.637442562645476e-06, "loss": 0.4318, "step": 6707 }, { "epoch": 0.6295045045045045, "grad_norm": 1.2325613514538616, "learning_rate": 9.637238425026007e-06, "loss": 0.4913, "step": 6708 }, { "epoch": 0.6295983483483484, "grad_norm": 0.9601915660911243, "learning_rate": 9.6370342321162e-06, "loss": 0.4211, "step": 6709 }, { "epoch": 0.6296921921921922, "grad_norm": 1.1320091635273781, "learning_rate": 9.63682998391849e-06, "loss": 0.4825, "step": 6710 }, { "epoch": 0.629786036036036, "grad_norm": 1.4239482257644278, "learning_rate": 9.636625680435307e-06, "loss": 0.4574, "step": 6711 }, { "epoch": 0.6298798798798799, "grad_norm": 1.0439686144447204, "learning_rate": 9.636421321669092e-06, "loss": 0.4766, "step": 6712 }, { "epoch": 0.6299737237237237, "grad_norm": 1.0387207826579747, "learning_rate": 9.63621690762228e-06, "loss": 0.4728, "step": 6713 }, { "epoch": 0.6300675675675675, "grad_norm": 1.0913854350316279, "learning_rate": 9.636012438297307e-06, "loss": 0.5138, "step": 6714 }, { "epoch": 0.6301614114114115, "grad_norm": 1.2065031021237662, "learning_rate": 9.635807913696613e-06, "loss": 0.5299, "step": 6715 }, { "epoch": 0.6302552552552553, "grad_norm": 1.1148923467000416, "learning_rate": 9.635603333822635e-06, "loss": 0.4875, "step": 6716 }, { "epoch": 0.6303490990990991, "grad_norm": 1.2284520498579514, "learning_rate": 9.635398698677814e-06, "loss": 0.4722, "step": 6717 }, { "epoch": 0.6304429429429429, "grad_norm": 0.8779381792673093, "learning_rate": 9.635194008264586e-06, "loss": 0.4243, "step": 6718 }, { "epoch": 0.6305367867867868, "grad_norm": 0.9803797557359916, "learning_rate": 9.634989262585397e-06, "loss": 0.4958, "step": 6719 }, { "epoch": 0.6306306306306306, "grad_norm": 1.2452469161827282, "learning_rate": 9.634784461642686e-06, "loss": 0.5027, "step": 6720 }, { "epoch": 0.6307244744744744, "grad_norm": 0.9937579178650967, "learning_rate": 9.634579605438891e-06, "loss": 0.4506, "step": 6721 }, { "epoch": 0.6308183183183184, "grad_norm": 1.642119987659194, "learning_rate": 9.634374693976461e-06, "loss": 0.4879, "step": 6722 }, { "epoch": 0.6309121621621622, "grad_norm": 1.2221194862188764, "learning_rate": 9.634169727257837e-06, "loss": 0.5387, "step": 6723 }, { "epoch": 0.631006006006006, "grad_norm": 3.45506676509603, "learning_rate": 9.63396470528546e-06, "loss": 0.4385, "step": 6724 }, { "epoch": 0.6310998498498499, "grad_norm": 1.4985722757058104, "learning_rate": 9.633759628061775e-06, "loss": 0.5356, "step": 6725 }, { "epoch": 0.6311936936936937, "grad_norm": 5.220130517997928, "learning_rate": 9.63355449558923e-06, "loss": 0.5065, "step": 6726 }, { "epoch": 0.6312875375375375, "grad_norm": 1.0510895069468085, "learning_rate": 9.63334930787027e-06, "loss": 0.5218, "step": 6727 }, { "epoch": 0.6313813813813813, "grad_norm": 7.350787895219577, "learning_rate": 9.63314406490734e-06, "loss": 0.4597, "step": 6728 }, { "epoch": 0.6314752252252253, "grad_norm": 1.270450250182165, "learning_rate": 9.632938766702888e-06, "loss": 0.4137, "step": 6729 }, { "epoch": 0.6315690690690691, "grad_norm": 1.0563672853474169, "learning_rate": 9.632733413259362e-06, "loss": 0.4949, "step": 6730 }, { "epoch": 0.6316629129129129, "grad_norm": 0.9970472636188428, "learning_rate": 9.63252800457921e-06, "loss": 0.4315, "step": 6731 }, { "epoch": 0.6317567567567568, "grad_norm": 1.6240421602813986, "learning_rate": 9.632322540664884e-06, "loss": 0.556, "step": 6732 }, { "epoch": 0.6318506006006006, "grad_norm": 0.976936529769997, "learning_rate": 9.632117021518828e-06, "loss": 0.4758, "step": 6733 }, { "epoch": 0.6319444444444444, "grad_norm": 1.275451836909706, "learning_rate": 9.631911447143497e-06, "loss": 0.4323, "step": 6734 }, { "epoch": 0.6320382882882883, "grad_norm": 1.0790548657779637, "learning_rate": 9.63170581754134e-06, "loss": 0.5065, "step": 6735 }, { "epoch": 0.6321321321321322, "grad_norm": 0.9090030208367389, "learning_rate": 9.63150013271481e-06, "loss": 0.4746, "step": 6736 }, { "epoch": 0.632225975975976, "grad_norm": 1.1061082142190652, "learning_rate": 9.631294392666359e-06, "loss": 0.5033, "step": 6737 }, { "epoch": 0.6323198198198198, "grad_norm": 1.1051399491169682, "learning_rate": 9.631088597398438e-06, "loss": 0.4493, "step": 6738 }, { "epoch": 0.6324136636636637, "grad_norm": 1.2694597825195326, "learning_rate": 9.630882746913506e-06, "loss": 0.4918, "step": 6739 }, { "epoch": 0.6325075075075075, "grad_norm": 1.05657820957965, "learning_rate": 9.63067684121401e-06, "loss": 0.4701, "step": 6740 }, { "epoch": 0.6326013513513513, "grad_norm": 0.9576214435286102, "learning_rate": 9.630470880302411e-06, "loss": 0.4391, "step": 6741 }, { "epoch": 0.6326951951951952, "grad_norm": 0.9627959400727281, "learning_rate": 9.630264864181165e-06, "loss": 0.4486, "step": 6742 }, { "epoch": 0.632789039039039, "grad_norm": 1.069175222300375, "learning_rate": 9.630058792852723e-06, "loss": 0.4422, "step": 6743 }, { "epoch": 0.6328828828828829, "grad_norm": 0.899075354942853, "learning_rate": 9.629852666319547e-06, "loss": 0.5057, "step": 6744 }, { "epoch": 0.6329767267267268, "grad_norm": 1.285102850263563, "learning_rate": 9.629646484584089e-06, "loss": 0.4449, "step": 6745 }, { "epoch": 0.6330705705705706, "grad_norm": 1.196239504957403, "learning_rate": 9.629440247648816e-06, "loss": 0.5566, "step": 6746 }, { "epoch": 0.6331644144144144, "grad_norm": 1.073070320516737, "learning_rate": 9.62923395551618e-06, "loss": 0.4929, "step": 6747 }, { "epoch": 0.6332582582582582, "grad_norm": 1.046484094072481, "learning_rate": 9.62902760818864e-06, "loss": 0.4598, "step": 6748 }, { "epoch": 0.6333521021021021, "grad_norm": 1.048742857767201, "learning_rate": 9.628821205668664e-06, "loss": 0.4219, "step": 6749 }, { "epoch": 0.6334459459459459, "grad_norm": 1.0645025348672674, "learning_rate": 9.628614747958705e-06, "loss": 0.4731, "step": 6750 }, { "epoch": 0.6335397897897898, "grad_norm": 1.160438957587309, "learning_rate": 9.628408235061229e-06, "loss": 0.4932, "step": 6751 }, { "epoch": 0.6336336336336337, "grad_norm": 0.9849424992394711, "learning_rate": 9.628201666978694e-06, "loss": 0.4485, "step": 6752 }, { "epoch": 0.6337274774774775, "grad_norm": 1.040944006181882, "learning_rate": 9.62799504371357e-06, "loss": 0.4706, "step": 6753 }, { "epoch": 0.6338213213213213, "grad_norm": 1.3998422537832202, "learning_rate": 9.627788365268313e-06, "loss": 0.5178, "step": 6754 }, { "epoch": 0.6339151651651652, "grad_norm": 0.9732385260581509, "learning_rate": 9.627581631645392e-06, "loss": 0.4685, "step": 6755 }, { "epoch": 0.634009009009009, "grad_norm": 1.0729517500885744, "learning_rate": 9.62737484284727e-06, "loss": 0.4259, "step": 6756 }, { "epoch": 0.6341028528528528, "grad_norm": 5.218543652950311, "learning_rate": 9.627167998876415e-06, "loss": 0.469, "step": 6757 }, { "epoch": 0.6341966966966966, "grad_norm": 1.0767993955380688, "learning_rate": 9.62696109973529e-06, "loss": 0.521, "step": 6758 }, { "epoch": 0.6342905405405406, "grad_norm": 1.0404667956377507, "learning_rate": 9.626754145426363e-06, "loss": 0.4439, "step": 6759 }, { "epoch": 0.6343843843843844, "grad_norm": 0.9993938914848652, "learning_rate": 9.626547135952102e-06, "loss": 0.4826, "step": 6760 }, { "epoch": 0.6344782282282282, "grad_norm": 1.7791938751470997, "learning_rate": 9.626340071314974e-06, "loss": 0.5022, "step": 6761 }, { "epoch": 0.6345720720720721, "grad_norm": 1.4241003332431408, "learning_rate": 9.626132951517451e-06, "loss": 0.4683, "step": 6762 }, { "epoch": 0.6346659159159159, "grad_norm": 0.9106122025542575, "learning_rate": 9.625925776561999e-06, "loss": 0.4625, "step": 6763 }, { "epoch": 0.6347597597597597, "grad_norm": 1.0325177456954697, "learning_rate": 9.625718546451088e-06, "loss": 0.496, "step": 6764 }, { "epoch": 0.6348536036036037, "grad_norm": 0.8835880465805172, "learning_rate": 9.625511261187193e-06, "loss": 0.4822, "step": 6765 }, { "epoch": 0.6349474474474475, "grad_norm": 1.3948618652717424, "learning_rate": 9.625303920772782e-06, "loss": 0.4538, "step": 6766 }, { "epoch": 0.6350412912912913, "grad_norm": 1.1495767340452694, "learning_rate": 9.625096525210327e-06, "loss": 0.5016, "step": 6767 }, { "epoch": 0.6351351351351351, "grad_norm": 1.022596182694193, "learning_rate": 9.624889074502304e-06, "loss": 0.4915, "step": 6768 }, { "epoch": 0.635228978978979, "grad_norm": 1.1902166881954799, "learning_rate": 9.624681568651182e-06, "loss": 0.4917, "step": 6769 }, { "epoch": 0.6353228228228228, "grad_norm": 1.6445732628141203, "learning_rate": 9.624474007659439e-06, "loss": 0.4186, "step": 6770 }, { "epoch": 0.6354166666666666, "grad_norm": 1.0211202913428343, "learning_rate": 9.624266391529547e-06, "loss": 0.4767, "step": 6771 }, { "epoch": 0.6355105105105106, "grad_norm": 1.2419207373303767, "learning_rate": 9.624058720263982e-06, "loss": 0.5205, "step": 6772 }, { "epoch": 0.6356043543543544, "grad_norm": 2.8895277899062064, "learning_rate": 9.623850993865222e-06, "loss": 0.4729, "step": 6773 }, { "epoch": 0.6356981981981982, "grad_norm": 1.2719909402431107, "learning_rate": 9.623643212335742e-06, "loss": 0.4632, "step": 6774 }, { "epoch": 0.6357920420420421, "grad_norm": 1.0451360888872354, "learning_rate": 9.62343537567802e-06, "loss": 0.4899, "step": 6775 }, { "epoch": 0.6358858858858859, "grad_norm": 1.5130040547252483, "learning_rate": 9.623227483894536e-06, "loss": 0.5268, "step": 6776 }, { "epoch": 0.6359797297297297, "grad_norm": 1.3568112453635783, "learning_rate": 9.623019536987763e-06, "loss": 0.4507, "step": 6777 }, { "epoch": 0.6360735735735735, "grad_norm": 1.0017377674445886, "learning_rate": 9.622811534960184e-06, "loss": 0.4744, "step": 6778 }, { "epoch": 0.6361674174174174, "grad_norm": 1.2259971173870952, "learning_rate": 9.62260347781428e-06, "loss": 0.5048, "step": 6779 }, { "epoch": 0.6362612612612613, "grad_norm": 1.221365312030392, "learning_rate": 9.622395365552533e-06, "loss": 0.4607, "step": 6780 }, { "epoch": 0.6363551051051051, "grad_norm": 1.1679366871568404, "learning_rate": 9.622187198177419e-06, "loss": 0.4996, "step": 6781 }, { "epoch": 0.636448948948949, "grad_norm": 1.128323188826949, "learning_rate": 9.621978975691425e-06, "loss": 0.4646, "step": 6782 }, { "epoch": 0.6365427927927928, "grad_norm": 1.0459393921409201, "learning_rate": 9.621770698097031e-06, "loss": 0.4427, "step": 6783 }, { "epoch": 0.6366366366366366, "grad_norm": 1.2395294995326283, "learning_rate": 9.621562365396719e-06, "loss": 0.4702, "step": 6784 }, { "epoch": 0.6367304804804805, "grad_norm": 0.9971233617290295, "learning_rate": 9.621353977592978e-06, "loss": 0.4914, "step": 6785 }, { "epoch": 0.6368243243243243, "grad_norm": 1.1400677914369308, "learning_rate": 9.621145534688287e-06, "loss": 0.4736, "step": 6786 }, { "epoch": 0.6369181681681682, "grad_norm": 2.7933098724242824, "learning_rate": 9.620937036685137e-06, "loss": 0.4481, "step": 6787 }, { "epoch": 0.637012012012012, "grad_norm": 1.037463405583286, "learning_rate": 9.620728483586008e-06, "loss": 0.4756, "step": 6788 }, { "epoch": 0.6371058558558559, "grad_norm": 1.2302893130774726, "learning_rate": 9.62051987539339e-06, "loss": 0.5058, "step": 6789 }, { "epoch": 0.6371996996996997, "grad_norm": 1.2312802637979183, "learning_rate": 9.620311212109771e-06, "loss": 0.4875, "step": 6790 }, { "epoch": 0.6372935435435435, "grad_norm": 1.1378722116452633, "learning_rate": 9.620102493737638e-06, "loss": 0.4998, "step": 6791 }, { "epoch": 0.6373873873873874, "grad_norm": 1.3318425882793898, "learning_rate": 9.619893720279476e-06, "loss": 0.4878, "step": 6792 }, { "epoch": 0.6374812312312312, "grad_norm": 1.1680660594569927, "learning_rate": 9.619684891737781e-06, "loss": 0.5137, "step": 6793 }, { "epoch": 0.637575075075075, "grad_norm": 1.4989162760007253, "learning_rate": 9.619476008115037e-06, "loss": 0.4629, "step": 6794 }, { "epoch": 0.637668918918919, "grad_norm": 2.4448901651007957, "learning_rate": 9.619267069413738e-06, "loss": 0.4215, "step": 6795 }, { "epoch": 0.6377627627627628, "grad_norm": 1.241095683730024, "learning_rate": 9.619058075636374e-06, "loss": 0.4831, "step": 6796 }, { "epoch": 0.6378566066066066, "grad_norm": 1.1440745892396755, "learning_rate": 9.618849026785437e-06, "loss": 0.5053, "step": 6797 }, { "epoch": 0.6379504504504504, "grad_norm": 1.7421591492755937, "learning_rate": 9.618639922863419e-06, "loss": 0.555, "step": 6798 }, { "epoch": 0.6380442942942943, "grad_norm": 1.089865113613833, "learning_rate": 9.618430763872814e-06, "loss": 0.4713, "step": 6799 }, { "epoch": 0.6381381381381381, "grad_norm": 1.24657438390369, "learning_rate": 9.618221549816116e-06, "loss": 0.4888, "step": 6800 }, { "epoch": 0.6382319819819819, "grad_norm": 1.2727771137949315, "learning_rate": 9.618012280695817e-06, "loss": 0.494, "step": 6801 }, { "epoch": 0.6383258258258259, "grad_norm": 1.7267914515461047, "learning_rate": 9.617802956514416e-06, "loss": 0.483, "step": 6802 }, { "epoch": 0.6384196696696697, "grad_norm": 1.0100787642205469, "learning_rate": 9.617593577274406e-06, "loss": 0.4812, "step": 6803 }, { "epoch": 0.6385135135135135, "grad_norm": 1.237548797728622, "learning_rate": 9.617384142978284e-06, "loss": 0.4142, "step": 6804 }, { "epoch": 0.6386073573573574, "grad_norm": 1.0722411720300355, "learning_rate": 9.617174653628548e-06, "loss": 0.4458, "step": 6805 }, { "epoch": 0.6387012012012012, "grad_norm": 1.090347219387518, "learning_rate": 9.616965109227694e-06, "loss": 0.4592, "step": 6806 }, { "epoch": 0.638795045045045, "grad_norm": 2.4015893499047154, "learning_rate": 9.616755509778224e-06, "loss": 0.4914, "step": 6807 }, { "epoch": 0.6388888888888888, "grad_norm": 1.2687766743258295, "learning_rate": 9.616545855282633e-06, "loss": 0.4889, "step": 6808 }, { "epoch": 0.6389827327327328, "grad_norm": 1.5141897893965568, "learning_rate": 9.616336145743423e-06, "loss": 0.5011, "step": 6809 }, { "epoch": 0.6390765765765766, "grad_norm": 1.1490503177565452, "learning_rate": 9.616126381163094e-06, "loss": 0.4723, "step": 6810 }, { "epoch": 0.6391704204204204, "grad_norm": 1.1685424774346183, "learning_rate": 9.615916561544146e-06, "loss": 0.4647, "step": 6811 }, { "epoch": 0.6392642642642643, "grad_norm": 2.518527495168087, "learning_rate": 9.615706686889081e-06, "loss": 0.4958, "step": 6812 }, { "epoch": 0.6393581081081081, "grad_norm": 1.1989291780912537, "learning_rate": 9.615496757200404e-06, "loss": 0.5438, "step": 6813 }, { "epoch": 0.6394519519519519, "grad_norm": 1.2804389680362378, "learning_rate": 9.615286772480615e-06, "loss": 0.48, "step": 6814 }, { "epoch": 0.6395457957957958, "grad_norm": 1.2005635402336805, "learning_rate": 9.615076732732219e-06, "loss": 0.4823, "step": 6815 }, { "epoch": 0.6396396396396397, "grad_norm": 1.6169129724029299, "learning_rate": 9.61486663795772e-06, "loss": 0.4481, "step": 6816 }, { "epoch": 0.6397334834834835, "grad_norm": 1.1952487676249142, "learning_rate": 9.614656488159622e-06, "loss": 0.4307, "step": 6817 }, { "epoch": 0.6398273273273273, "grad_norm": 1.0208258421756646, "learning_rate": 9.614446283340432e-06, "loss": 0.4509, "step": 6818 }, { "epoch": 0.6399211711711712, "grad_norm": 1.2137026901773464, "learning_rate": 9.614236023502656e-06, "loss": 0.4749, "step": 6819 }, { "epoch": 0.640015015015015, "grad_norm": 1.6410568625259647, "learning_rate": 9.6140257086488e-06, "loss": 0.4753, "step": 6820 }, { "epoch": 0.6401088588588588, "grad_norm": 1.1501316925479965, "learning_rate": 9.613815338781373e-06, "loss": 0.4979, "step": 6821 }, { "epoch": 0.6402027027027027, "grad_norm": 1.1708710147511752, "learning_rate": 9.613604913902883e-06, "loss": 0.4881, "step": 6822 }, { "epoch": 0.6402965465465466, "grad_norm": 1.121739360333602, "learning_rate": 9.613394434015839e-06, "loss": 0.4649, "step": 6823 }, { "epoch": 0.6403903903903904, "grad_norm": 1.281231373815044, "learning_rate": 9.61318389912275e-06, "loss": 0.4944, "step": 6824 }, { "epoch": 0.6404842342342343, "grad_norm": 1.1112352871373015, "learning_rate": 9.612973309226125e-06, "loss": 0.4833, "step": 6825 }, { "epoch": 0.6405780780780781, "grad_norm": 1.0072170662366615, "learning_rate": 9.612762664328479e-06, "loss": 0.4775, "step": 6826 }, { "epoch": 0.6406719219219219, "grad_norm": 1.7579927239465238, "learning_rate": 9.612551964432318e-06, "loss": 0.4181, "step": 6827 }, { "epoch": 0.6407657657657657, "grad_norm": 1.0057612920618328, "learning_rate": 9.612341209540159e-06, "loss": 0.4602, "step": 6828 }, { "epoch": 0.6408596096096096, "grad_norm": 1.0862557694105355, "learning_rate": 9.612130399654513e-06, "loss": 0.493, "step": 6829 }, { "epoch": 0.6409534534534534, "grad_norm": 1.0195867464302533, "learning_rate": 9.611919534777892e-06, "loss": 0.4892, "step": 6830 }, { "epoch": 0.6410472972972973, "grad_norm": 1.3698014761570831, "learning_rate": 9.611708614912811e-06, "loss": 0.4808, "step": 6831 }, { "epoch": 0.6411411411411412, "grad_norm": 1.0359107490387007, "learning_rate": 9.611497640061788e-06, "loss": 0.468, "step": 6832 }, { "epoch": 0.641234984984985, "grad_norm": 1.1511271214565912, "learning_rate": 9.611286610227335e-06, "loss": 0.497, "step": 6833 }, { "epoch": 0.6413288288288288, "grad_norm": 1.031608834101164, "learning_rate": 9.611075525411967e-06, "loss": 0.4443, "step": 6834 }, { "epoch": 0.6414226726726727, "grad_norm": 1.058011139349707, "learning_rate": 9.610864385618207e-06, "loss": 0.5207, "step": 6835 }, { "epoch": 0.6415165165165165, "grad_norm": 1.180563628509399, "learning_rate": 9.610653190848565e-06, "loss": 0.4803, "step": 6836 }, { "epoch": 0.6416103603603603, "grad_norm": 1.029092284479469, "learning_rate": 9.610441941105562e-06, "loss": 0.4443, "step": 6837 }, { "epoch": 0.6417042042042042, "grad_norm": 1.371215609544871, "learning_rate": 9.610230636391719e-06, "loss": 0.4383, "step": 6838 }, { "epoch": 0.6417980480480481, "grad_norm": 1.0115766648169089, "learning_rate": 9.610019276709553e-06, "loss": 0.5294, "step": 6839 }, { "epoch": 0.6418918918918919, "grad_norm": 1.1508591658333183, "learning_rate": 9.609807862061585e-06, "loss": 0.4632, "step": 6840 }, { "epoch": 0.6419857357357357, "grad_norm": 1.165842037239778, "learning_rate": 9.609596392450333e-06, "loss": 0.5269, "step": 6841 }, { "epoch": 0.6420795795795796, "grad_norm": 1.0935111093899519, "learning_rate": 9.609384867878322e-06, "loss": 0.4414, "step": 6842 }, { "epoch": 0.6421734234234234, "grad_norm": 0.9868334628624492, "learning_rate": 9.609173288348075e-06, "loss": 0.4906, "step": 6843 }, { "epoch": 0.6422672672672672, "grad_norm": 1.1824771197795565, "learning_rate": 9.608961653862109e-06, "loss": 0.489, "step": 6844 }, { "epoch": 0.6423611111111112, "grad_norm": 1.0393221403820665, "learning_rate": 9.608749964422953e-06, "loss": 0.4906, "step": 6845 }, { "epoch": 0.642454954954955, "grad_norm": 1.1507210607061638, "learning_rate": 9.608538220033127e-06, "loss": 0.5395, "step": 6846 }, { "epoch": 0.6425487987987988, "grad_norm": 1.2155285883048934, "learning_rate": 9.608326420695158e-06, "loss": 0.4681, "step": 6847 }, { "epoch": 0.6426426426426426, "grad_norm": 1.8671088673519427, "learning_rate": 9.60811456641157e-06, "loss": 0.4602, "step": 6848 }, { "epoch": 0.6427364864864865, "grad_norm": 1.258337923916182, "learning_rate": 9.607902657184893e-06, "loss": 0.4598, "step": 6849 }, { "epoch": 0.6428303303303303, "grad_norm": 1.0860092624540214, "learning_rate": 9.607690693017648e-06, "loss": 0.4465, "step": 6850 }, { "epoch": 0.6429241741741741, "grad_norm": 0.9255891598798079, "learning_rate": 9.607478673912365e-06, "loss": 0.4859, "step": 6851 }, { "epoch": 0.6430180180180181, "grad_norm": 1.060449606343284, "learning_rate": 9.60726659987157e-06, "loss": 0.4682, "step": 6852 }, { "epoch": 0.6431118618618619, "grad_norm": 1.0437278512202068, "learning_rate": 9.607054470897795e-06, "loss": 0.4309, "step": 6853 }, { "epoch": 0.6432057057057057, "grad_norm": 1.5106926319847225, "learning_rate": 9.606842286993566e-06, "loss": 0.4249, "step": 6854 }, { "epoch": 0.6432995495495496, "grad_norm": 0.9795213782534425, "learning_rate": 9.606630048161415e-06, "loss": 0.5042, "step": 6855 }, { "epoch": 0.6433933933933934, "grad_norm": 0.9228893639688589, "learning_rate": 9.606417754403872e-06, "loss": 0.4597, "step": 6856 }, { "epoch": 0.6434872372372372, "grad_norm": 1.5288719585454775, "learning_rate": 9.606205405723467e-06, "loss": 0.4548, "step": 6857 }, { "epoch": 0.643581081081081, "grad_norm": 1.0594265678952375, "learning_rate": 9.605993002122734e-06, "loss": 0.4595, "step": 6858 }, { "epoch": 0.643674924924925, "grad_norm": 1.3946730822627955, "learning_rate": 9.605780543604203e-06, "loss": 0.5043, "step": 6859 }, { "epoch": 0.6437687687687688, "grad_norm": 1.220604258826021, "learning_rate": 9.60556803017041e-06, "loss": 0.5441, "step": 6860 }, { "epoch": 0.6438626126126126, "grad_norm": 1.602086664472236, "learning_rate": 9.605355461823886e-06, "loss": 0.4757, "step": 6861 }, { "epoch": 0.6439564564564565, "grad_norm": 1.3998629544314087, "learning_rate": 9.605142838567167e-06, "loss": 0.5232, "step": 6862 }, { "epoch": 0.6440503003003003, "grad_norm": 1.1138695031234387, "learning_rate": 9.604930160402787e-06, "loss": 0.4933, "step": 6863 }, { "epoch": 0.6441441441441441, "grad_norm": 1.0772754435341918, "learning_rate": 9.604717427333285e-06, "loss": 0.4965, "step": 6864 }, { "epoch": 0.644237987987988, "grad_norm": 1.423377112508665, "learning_rate": 9.604504639361194e-06, "loss": 0.5367, "step": 6865 }, { "epoch": 0.6443318318318318, "grad_norm": 1.1473633263582232, "learning_rate": 9.604291796489052e-06, "loss": 0.4784, "step": 6866 }, { "epoch": 0.6444256756756757, "grad_norm": 4.212655326197741, "learning_rate": 9.604078898719397e-06, "loss": 0.4662, "step": 6867 }, { "epoch": 0.6445195195195195, "grad_norm": 1.3760355564344524, "learning_rate": 9.603865946054769e-06, "loss": 0.476, "step": 6868 }, { "epoch": 0.6446133633633634, "grad_norm": 1.2668391603352338, "learning_rate": 9.603652938497706e-06, "loss": 0.4911, "step": 6869 }, { "epoch": 0.6447072072072072, "grad_norm": 0.8887948769573877, "learning_rate": 9.603439876050745e-06, "loss": 0.3784, "step": 6870 }, { "epoch": 0.644801051051051, "grad_norm": 1.1282321841060838, "learning_rate": 9.603226758716428e-06, "loss": 0.5126, "step": 6871 }, { "epoch": 0.6448948948948949, "grad_norm": 1.4545307839071202, "learning_rate": 9.603013586497297e-06, "loss": 0.5034, "step": 6872 }, { "epoch": 0.6449887387387387, "grad_norm": 3.076004342086485, "learning_rate": 9.602800359395894e-06, "loss": 0.4783, "step": 6873 }, { "epoch": 0.6450825825825826, "grad_norm": 0.9511864523059064, "learning_rate": 9.60258707741476e-06, "loss": 0.4488, "step": 6874 }, { "epoch": 0.6451764264264265, "grad_norm": 1.2974204787354024, "learning_rate": 9.602373740556441e-06, "loss": 0.4917, "step": 6875 }, { "epoch": 0.6452702702702703, "grad_norm": 0.9763382332279169, "learning_rate": 9.602160348823475e-06, "loss": 0.4756, "step": 6876 }, { "epoch": 0.6453641141141141, "grad_norm": 1.0083746190871894, "learning_rate": 9.60194690221841e-06, "loss": 0.5298, "step": 6877 }, { "epoch": 0.6454579579579579, "grad_norm": 0.8418352481492544, "learning_rate": 9.601733400743793e-06, "loss": 0.4094, "step": 6878 }, { "epoch": 0.6455518018018018, "grad_norm": 0.9345906932231137, "learning_rate": 9.601519844402164e-06, "loss": 0.4579, "step": 6879 }, { "epoch": 0.6456456456456456, "grad_norm": 1.1915159489592315, "learning_rate": 9.601306233196075e-06, "loss": 0.5079, "step": 6880 }, { "epoch": 0.6457394894894894, "grad_norm": 1.0381895094891618, "learning_rate": 9.601092567128069e-06, "loss": 0.5051, "step": 6881 }, { "epoch": 0.6458333333333334, "grad_norm": 1.4342518431371565, "learning_rate": 9.600878846200695e-06, "loss": 0.4839, "step": 6882 }, { "epoch": 0.6459271771771772, "grad_norm": 2.5386721426372754, "learning_rate": 9.6006650704165e-06, "loss": 0.4293, "step": 6883 }, { "epoch": 0.646021021021021, "grad_norm": 1.0892595782580634, "learning_rate": 9.600451239778034e-06, "loss": 0.5365, "step": 6884 }, { "epoch": 0.6461148648648649, "grad_norm": 1.1171577714658787, "learning_rate": 9.600237354287849e-06, "loss": 0.5005, "step": 6885 }, { "epoch": 0.6462087087087087, "grad_norm": 1.6589928522936475, "learning_rate": 9.600023413948489e-06, "loss": 0.498, "step": 6886 }, { "epoch": 0.6463025525525525, "grad_norm": 2.803400973455335, "learning_rate": 9.599809418762512e-06, "loss": 0.4717, "step": 6887 }, { "epoch": 0.6463963963963963, "grad_norm": 1.080510056725195, "learning_rate": 9.599595368732463e-06, "loss": 0.4529, "step": 6888 }, { "epoch": 0.6464902402402403, "grad_norm": 1.5041903264660892, "learning_rate": 9.599381263860898e-06, "loss": 0.5015, "step": 6889 }, { "epoch": 0.6465840840840841, "grad_norm": 1.2628573574253832, "learning_rate": 9.599167104150369e-06, "loss": 0.451, "step": 6890 }, { "epoch": 0.6466779279279279, "grad_norm": 1.4191983407275048, "learning_rate": 9.59895288960343e-06, "loss": 0.4867, "step": 6891 }, { "epoch": 0.6467717717717718, "grad_norm": 1.1828046800883696, "learning_rate": 9.598738620222634e-06, "loss": 0.4841, "step": 6892 }, { "epoch": 0.6468656156156156, "grad_norm": 1.0657067297524128, "learning_rate": 9.598524296010536e-06, "loss": 0.4654, "step": 6893 }, { "epoch": 0.6469594594594594, "grad_norm": 1.3370279239585863, "learning_rate": 9.59830991696969e-06, "loss": 0.4302, "step": 6894 }, { "epoch": 0.6470533033033034, "grad_norm": 0.9787791685919479, "learning_rate": 9.598095483102656e-06, "loss": 0.4433, "step": 6895 }, { "epoch": 0.6471471471471472, "grad_norm": 1.0707835647318407, "learning_rate": 9.597880994411988e-06, "loss": 0.4681, "step": 6896 }, { "epoch": 0.647240990990991, "grad_norm": 1.167325008455406, "learning_rate": 9.597666450900242e-06, "loss": 0.4828, "step": 6897 }, { "epoch": 0.6473348348348348, "grad_norm": 1.2128841284048155, "learning_rate": 9.59745185256998e-06, "loss": 0.4653, "step": 6898 }, { "epoch": 0.6474286786786787, "grad_norm": 0.9664849537237211, "learning_rate": 9.597237199423758e-06, "loss": 0.4737, "step": 6899 }, { "epoch": 0.6475225225225225, "grad_norm": 1.0453481466766652, "learning_rate": 9.597022491464135e-06, "loss": 0.45, "step": 6900 }, { "epoch": 0.6476163663663663, "grad_norm": 5.435114758548354, "learning_rate": 9.596807728693673e-06, "loss": 0.4471, "step": 6901 }, { "epoch": 0.6477102102102102, "grad_norm": 1.1336578685270708, "learning_rate": 9.59659291111493e-06, "loss": 0.4932, "step": 6902 }, { "epoch": 0.6478040540540541, "grad_norm": 1.5686356420410472, "learning_rate": 9.59637803873047e-06, "loss": 0.4612, "step": 6903 }, { "epoch": 0.6478978978978979, "grad_norm": 1.0621000878235007, "learning_rate": 9.596163111542854e-06, "loss": 0.4743, "step": 6904 }, { "epoch": 0.6479917417417418, "grad_norm": 1.0107336144837464, "learning_rate": 9.595948129554642e-06, "loss": 0.4928, "step": 6905 }, { "epoch": 0.6480855855855856, "grad_norm": 1.094253038542454, "learning_rate": 9.5957330927684e-06, "loss": 0.4988, "step": 6906 }, { "epoch": 0.6481794294294294, "grad_norm": 1.1220214554792798, "learning_rate": 9.595518001186694e-06, "loss": 0.4673, "step": 6907 }, { "epoch": 0.6482732732732732, "grad_norm": 1.1525507682260752, "learning_rate": 9.595302854812082e-06, "loss": 0.5072, "step": 6908 }, { "epoch": 0.6483671171171171, "grad_norm": 1.2172994481064248, "learning_rate": 9.595087653647136e-06, "loss": 0.4519, "step": 6909 }, { "epoch": 0.648460960960961, "grad_norm": 1.1445072542790689, "learning_rate": 9.59487239769442e-06, "loss": 0.4878, "step": 6910 }, { "epoch": 0.6485548048048048, "grad_norm": 1.10997633793607, "learning_rate": 9.594657086956498e-06, "loss": 0.4792, "step": 6911 }, { "epoch": 0.6486486486486487, "grad_norm": 1.5936985936029533, "learning_rate": 9.59444172143594e-06, "loss": 0.496, "step": 6912 }, { "epoch": 0.6487424924924925, "grad_norm": 0.9806300578148848, "learning_rate": 9.594226301135312e-06, "loss": 0.5012, "step": 6913 }, { "epoch": 0.6488363363363363, "grad_norm": 1.2701626567784636, "learning_rate": 9.594010826057182e-06, "loss": 0.4604, "step": 6914 }, { "epoch": 0.6489301801801802, "grad_norm": 1.9932268921847274, "learning_rate": 9.59379529620412e-06, "loss": 0.4587, "step": 6915 }, { "epoch": 0.649024024024024, "grad_norm": 0.998248488978464, "learning_rate": 9.593579711578698e-06, "loss": 0.4595, "step": 6916 }, { "epoch": 0.6491178678678678, "grad_norm": 1.453449503275171, "learning_rate": 9.593364072183484e-06, "loss": 0.4641, "step": 6917 }, { "epoch": 0.6492117117117117, "grad_norm": 2.834241248857518, "learning_rate": 9.59314837802105e-06, "loss": 0.4716, "step": 6918 }, { "epoch": 0.6493055555555556, "grad_norm": 1.181737726968045, "learning_rate": 9.592932629093965e-06, "loss": 0.4788, "step": 6919 }, { "epoch": 0.6493993993993994, "grad_norm": 1.190109760479822, "learning_rate": 9.592716825404806e-06, "loss": 0.4868, "step": 6920 }, { "epoch": 0.6494932432432432, "grad_norm": 1.0258566344362372, "learning_rate": 9.592500966956141e-06, "loss": 0.4819, "step": 6921 }, { "epoch": 0.6495870870870871, "grad_norm": 0.9929203553055014, "learning_rate": 9.59228505375055e-06, "loss": 0.4776, "step": 6922 }, { "epoch": 0.6496809309309309, "grad_norm": 1.8213712200656742, "learning_rate": 9.592069085790602e-06, "loss": 0.5013, "step": 6923 }, { "epoch": 0.6497747747747747, "grad_norm": 1.0398790003522385, "learning_rate": 9.591853063078872e-06, "loss": 0.422, "step": 6924 }, { "epoch": 0.6498686186186187, "grad_norm": 1.672397652856654, "learning_rate": 9.59163698561794e-06, "loss": 0.4436, "step": 6925 }, { "epoch": 0.6499624624624625, "grad_norm": 1.1784847209395701, "learning_rate": 9.591420853410381e-06, "loss": 0.4675, "step": 6926 }, { "epoch": 0.6500563063063063, "grad_norm": 1.3000245005547595, "learning_rate": 9.591204666458769e-06, "loss": 0.5086, "step": 6927 }, { "epoch": 0.6501501501501501, "grad_norm": 1.2591643209095564, "learning_rate": 9.590988424765684e-06, "loss": 0.4909, "step": 6928 }, { "epoch": 0.650243993993994, "grad_norm": 1.8571591621893333, "learning_rate": 9.590772128333702e-06, "loss": 0.4558, "step": 6929 }, { "epoch": 0.6503378378378378, "grad_norm": 1.4084420698902407, "learning_rate": 9.590555777165405e-06, "loss": 0.4199, "step": 6930 }, { "epoch": 0.6504316816816816, "grad_norm": 1.7936393770937011, "learning_rate": 9.59033937126337e-06, "loss": 0.4535, "step": 6931 }, { "epoch": 0.6505255255255256, "grad_norm": 1.1138788611589916, "learning_rate": 9.59012291063018e-06, "loss": 0.4688, "step": 6932 }, { "epoch": 0.6506193693693694, "grad_norm": 1.1310109434943363, "learning_rate": 9.589906395268414e-06, "loss": 0.5266, "step": 6933 }, { "epoch": 0.6507132132132132, "grad_norm": 1.168202543130537, "learning_rate": 9.589689825180653e-06, "loss": 0.4963, "step": 6934 }, { "epoch": 0.6508070570570571, "grad_norm": 1.3797109829326735, "learning_rate": 9.58947320036948e-06, "loss": 0.4832, "step": 6935 }, { "epoch": 0.6509009009009009, "grad_norm": 1.008160528075423, "learning_rate": 9.58925652083748e-06, "loss": 0.4321, "step": 6936 }, { "epoch": 0.6509947447447447, "grad_norm": 1.2324485751742995, "learning_rate": 9.589039786587234e-06, "loss": 0.4994, "step": 6937 }, { "epoch": 0.6510885885885885, "grad_norm": 1.7449587307960412, "learning_rate": 9.588822997621325e-06, "loss": 0.4768, "step": 6938 }, { "epoch": 0.6511824324324325, "grad_norm": 0.8882913012558671, "learning_rate": 9.588606153942339e-06, "loss": 0.4614, "step": 6939 }, { "epoch": 0.6512762762762763, "grad_norm": 1.4388994213691892, "learning_rate": 9.588389255552862e-06, "loss": 0.4568, "step": 6940 }, { "epoch": 0.6513701201201201, "grad_norm": 1.0181528094614385, "learning_rate": 9.588172302455481e-06, "loss": 0.4995, "step": 6941 }, { "epoch": 0.651463963963964, "grad_norm": 0.9839200021807477, "learning_rate": 9.58795529465278e-06, "loss": 0.4589, "step": 6942 }, { "epoch": 0.6515578078078078, "grad_norm": 1.0496166913698937, "learning_rate": 9.58773823214735e-06, "loss": 0.512, "step": 6943 }, { "epoch": 0.6516516516516516, "grad_norm": 1.165076464068859, "learning_rate": 9.587521114941776e-06, "loss": 0.4438, "step": 6944 }, { "epoch": 0.6517454954954955, "grad_norm": 0.982537436798748, "learning_rate": 9.587303943038649e-06, "loss": 0.3994, "step": 6945 }, { "epoch": 0.6518393393393394, "grad_norm": 1.1534114944338472, "learning_rate": 9.587086716440555e-06, "loss": 0.4629, "step": 6946 }, { "epoch": 0.6519331831831832, "grad_norm": 0.9161293386366326, "learning_rate": 9.586869435150088e-06, "loss": 0.4092, "step": 6947 }, { "epoch": 0.652027027027027, "grad_norm": 1.082124165475133, "learning_rate": 9.586652099169836e-06, "loss": 0.5195, "step": 6948 }, { "epoch": 0.6521208708708709, "grad_norm": 1.499794366416806, "learning_rate": 9.58643470850239e-06, "loss": 0.4841, "step": 6949 }, { "epoch": 0.6522147147147147, "grad_norm": 2.6876635322074964, "learning_rate": 9.586217263150346e-06, "loss": 0.4737, "step": 6950 }, { "epoch": 0.6523085585585585, "grad_norm": 1.1593142599794288, "learning_rate": 9.585999763116292e-06, "loss": 0.4541, "step": 6951 }, { "epoch": 0.6524024024024024, "grad_norm": 1.2860135191479718, "learning_rate": 9.585782208402822e-06, "loss": 0.4929, "step": 6952 }, { "epoch": 0.6524962462462462, "grad_norm": 1.3136374166615887, "learning_rate": 9.585564599012534e-06, "loss": 0.4896, "step": 6953 }, { "epoch": 0.6525900900900901, "grad_norm": 1.6420179230643404, "learning_rate": 9.585346934948018e-06, "loss": 0.4602, "step": 6954 }, { "epoch": 0.652683933933934, "grad_norm": 1.4677352678761122, "learning_rate": 9.585129216211872e-06, "loss": 0.4519, "step": 6955 }, { "epoch": 0.6527777777777778, "grad_norm": 1.0826148853161959, "learning_rate": 9.584911442806689e-06, "loss": 0.4759, "step": 6956 }, { "epoch": 0.6528716216216216, "grad_norm": 0.9565788768805735, "learning_rate": 9.584693614735069e-06, "loss": 0.4926, "step": 6957 }, { "epoch": 0.6529654654654654, "grad_norm": 1.1571424230375915, "learning_rate": 9.584475731999604e-06, "loss": 0.5447, "step": 6958 }, { "epoch": 0.6530593093093093, "grad_norm": 1.0583735047262752, "learning_rate": 9.584257794602898e-06, "loss": 0.5031, "step": 6959 }, { "epoch": 0.6531531531531531, "grad_norm": 1.0189785232760795, "learning_rate": 9.584039802547546e-06, "loss": 0.4688, "step": 6960 }, { "epoch": 0.653246996996997, "grad_norm": 1.0182130149013902, "learning_rate": 9.58382175583615e-06, "loss": 0.5013, "step": 6961 }, { "epoch": 0.6533408408408409, "grad_norm": 1.0330147960072182, "learning_rate": 9.583603654471305e-06, "loss": 0.4751, "step": 6962 }, { "epoch": 0.6534346846846847, "grad_norm": 0.9814650885462688, "learning_rate": 9.583385498455616e-06, "loss": 0.4785, "step": 6963 }, { "epoch": 0.6535285285285285, "grad_norm": 1.0508868329935874, "learning_rate": 9.583167287791681e-06, "loss": 0.4969, "step": 6964 }, { "epoch": 0.6536223723723724, "grad_norm": 1.073843735086686, "learning_rate": 9.582949022482104e-06, "loss": 0.47, "step": 6965 }, { "epoch": 0.6537162162162162, "grad_norm": 1.2657334294196245, "learning_rate": 9.582730702529487e-06, "loss": 0.4393, "step": 6966 }, { "epoch": 0.65381006006006, "grad_norm": 0.9678517149806475, "learning_rate": 9.58251232793643e-06, "loss": 0.4305, "step": 6967 }, { "epoch": 0.6539039039039038, "grad_norm": 1.1424820365405255, "learning_rate": 9.58229389870554e-06, "loss": 0.5085, "step": 6968 }, { "epoch": 0.6539977477477478, "grad_norm": 1.0310881736087893, "learning_rate": 9.582075414839422e-06, "loss": 0.461, "step": 6969 }, { "epoch": 0.6540915915915916, "grad_norm": 0.9339343665210085, "learning_rate": 9.581856876340679e-06, "loss": 0.4575, "step": 6970 }, { "epoch": 0.6541854354354354, "grad_norm": 2.853864918050169, "learning_rate": 9.581638283211916e-06, "loss": 0.5103, "step": 6971 }, { "epoch": 0.6542792792792793, "grad_norm": 1.042647309431357, "learning_rate": 9.581419635455741e-06, "loss": 0.45, "step": 6972 }, { "epoch": 0.6543731231231231, "grad_norm": 1.3134648007668754, "learning_rate": 9.581200933074761e-06, "loss": 0.4297, "step": 6973 }, { "epoch": 0.6544669669669669, "grad_norm": 1.7079997466556058, "learning_rate": 9.580982176071584e-06, "loss": 0.4667, "step": 6974 }, { "epoch": 0.6545608108108109, "grad_norm": 1.031342815834025, "learning_rate": 9.580763364448816e-06, "loss": 0.5186, "step": 6975 }, { "epoch": 0.6546546546546547, "grad_norm": 0.9696484164385694, "learning_rate": 9.580544498209068e-06, "loss": 0.4683, "step": 6976 }, { "epoch": 0.6547484984984985, "grad_norm": 1.1445959067602924, "learning_rate": 9.580325577354949e-06, "loss": 0.5231, "step": 6977 }, { "epoch": 0.6548423423423423, "grad_norm": 1.1960349260052976, "learning_rate": 9.580106601889067e-06, "loss": 0.5041, "step": 6978 }, { "epoch": 0.6549361861861862, "grad_norm": 1.0380191164160912, "learning_rate": 9.579887571814037e-06, "loss": 0.4751, "step": 6979 }, { "epoch": 0.65503003003003, "grad_norm": 1.0991299969848443, "learning_rate": 9.579668487132468e-06, "loss": 0.483, "step": 6980 }, { "epoch": 0.6551238738738738, "grad_norm": 1.0520280164775193, "learning_rate": 9.579449347846972e-06, "loss": 0.5164, "step": 6981 }, { "epoch": 0.6552177177177178, "grad_norm": 1.9807278886674295, "learning_rate": 9.579230153960164e-06, "loss": 0.522, "step": 6982 }, { "epoch": 0.6553115615615616, "grad_norm": 1.2162924050637385, "learning_rate": 9.579010905474655e-06, "loss": 0.4699, "step": 6983 }, { "epoch": 0.6554054054054054, "grad_norm": 2.867678841292611, "learning_rate": 9.57879160239306e-06, "loss": 0.4953, "step": 6984 }, { "epoch": 0.6554992492492493, "grad_norm": 2.378079453760236, "learning_rate": 9.578572244717995e-06, "loss": 0.4696, "step": 6985 }, { "epoch": 0.6555930930930931, "grad_norm": 1.1727889725126608, "learning_rate": 9.578352832452071e-06, "loss": 0.474, "step": 6986 }, { "epoch": 0.6556869369369369, "grad_norm": 1.036415928817618, "learning_rate": 9.57813336559791e-06, "loss": 0.5044, "step": 6987 }, { "epoch": 0.6557807807807807, "grad_norm": 1.0432697183432453, "learning_rate": 9.577913844158127e-06, "loss": 0.4982, "step": 6988 }, { "epoch": 0.6558746246246246, "grad_norm": 1.2597350356975152, "learning_rate": 9.577694268135335e-06, "loss": 0.4814, "step": 6989 }, { "epoch": 0.6559684684684685, "grad_norm": 1.0117940468873028, "learning_rate": 9.577474637532158e-06, "loss": 0.5026, "step": 6990 }, { "epoch": 0.6560623123123123, "grad_norm": 1.6738087536367312, "learning_rate": 9.577254952351211e-06, "loss": 0.489, "step": 6991 }, { "epoch": 0.6561561561561562, "grad_norm": 1.084005477758286, "learning_rate": 9.577035212595116e-06, "loss": 0.4992, "step": 6992 }, { "epoch": 0.65625, "grad_norm": 1.3334779418882337, "learning_rate": 9.576815418266491e-06, "loss": 0.4413, "step": 6993 }, { "epoch": 0.6563438438438438, "grad_norm": 1.0251299813207744, "learning_rate": 9.576595569367956e-06, "loss": 0.4801, "step": 6994 }, { "epoch": 0.6564376876876877, "grad_norm": 0.9947780123716726, "learning_rate": 9.576375665902135e-06, "loss": 0.5022, "step": 6995 }, { "epoch": 0.6565315315315315, "grad_norm": 1.2176893422535047, "learning_rate": 9.576155707871646e-06, "loss": 0.4812, "step": 6996 }, { "epoch": 0.6566253753753754, "grad_norm": 1.220857910559171, "learning_rate": 9.575935695279116e-06, "loss": 0.5479, "step": 6997 }, { "epoch": 0.6567192192192193, "grad_norm": 1.2453693338816056, "learning_rate": 9.575715628127165e-06, "loss": 0.4906, "step": 6998 }, { "epoch": 0.6568130630630631, "grad_norm": 1.0290312747589787, "learning_rate": 9.57549550641842e-06, "loss": 0.4963, "step": 6999 }, { "epoch": 0.6569069069069069, "grad_norm": 1.0085126460342244, "learning_rate": 9.5752753301555e-06, "loss": 0.4933, "step": 7000 }, { "epoch": 0.6570007507507507, "grad_norm": 1.601839062936704, "learning_rate": 9.575055099341038e-06, "loss": 0.4911, "step": 7001 }, { "epoch": 0.6570945945945946, "grad_norm": 0.9754117799838202, "learning_rate": 9.57483481397765e-06, "loss": 0.4944, "step": 7002 }, { "epoch": 0.6571884384384384, "grad_norm": 1.095835758856493, "learning_rate": 9.574614474067974e-06, "loss": 0.4521, "step": 7003 }, { "epoch": 0.6572822822822822, "grad_norm": 1.0270362207540025, "learning_rate": 9.574394079614629e-06, "loss": 0.4704, "step": 7004 }, { "epoch": 0.6573761261261262, "grad_norm": 0.9353707355154672, "learning_rate": 9.574173630620244e-06, "loss": 0.4541, "step": 7005 }, { "epoch": 0.65746996996997, "grad_norm": 1.55606038764139, "learning_rate": 9.57395312708745e-06, "loss": 0.5009, "step": 7006 }, { "epoch": 0.6575638138138138, "grad_norm": 1.112777269381445, "learning_rate": 9.573732569018872e-06, "loss": 0.4914, "step": 7007 }, { "epoch": 0.6576576576576577, "grad_norm": 1.0875710611560516, "learning_rate": 9.573511956417145e-06, "loss": 0.4867, "step": 7008 }, { "epoch": 0.6577515015015015, "grad_norm": 0.9904683346547478, "learning_rate": 9.573291289284897e-06, "loss": 0.5041, "step": 7009 }, { "epoch": 0.6578453453453453, "grad_norm": 1.2743289290616253, "learning_rate": 9.573070567624757e-06, "loss": 0.5002, "step": 7010 }, { "epoch": 0.6579391891891891, "grad_norm": 3.4230281882601052, "learning_rate": 9.572849791439358e-06, "loss": 0.5525, "step": 7011 }, { "epoch": 0.6580330330330331, "grad_norm": 0.9952949928527476, "learning_rate": 9.572628960731334e-06, "loss": 0.4715, "step": 7012 }, { "epoch": 0.6581268768768769, "grad_norm": 0.9689409744962153, "learning_rate": 9.572408075503314e-06, "loss": 0.4746, "step": 7013 }, { "epoch": 0.6582207207207207, "grad_norm": 1.2766622557772234, "learning_rate": 9.572187135757937e-06, "loss": 0.459, "step": 7014 }, { "epoch": 0.6583145645645646, "grad_norm": 0.9118316662762249, "learning_rate": 9.571966141497834e-06, "loss": 0.4401, "step": 7015 }, { "epoch": 0.6584084084084084, "grad_norm": 1.035390147145257, "learning_rate": 9.571745092725642e-06, "loss": 0.4655, "step": 7016 }, { "epoch": 0.6585022522522522, "grad_norm": 0.8886121407682153, "learning_rate": 9.571523989443993e-06, "loss": 0.4396, "step": 7017 }, { "epoch": 0.6585960960960962, "grad_norm": 1.2959701271022988, "learning_rate": 9.571302831655528e-06, "loss": 0.5105, "step": 7018 }, { "epoch": 0.65868993993994, "grad_norm": 1.0168395420312257, "learning_rate": 9.571081619362878e-06, "loss": 0.4465, "step": 7019 }, { "epoch": 0.6587837837837838, "grad_norm": 0.9109501439985151, "learning_rate": 9.570860352568686e-06, "loss": 0.5288, "step": 7020 }, { "epoch": 0.6588776276276276, "grad_norm": 1.0276500911670101, "learning_rate": 9.570639031275588e-06, "loss": 0.3993, "step": 7021 }, { "epoch": 0.6589714714714715, "grad_norm": 1.0595108838324647, "learning_rate": 9.570417655486223e-06, "loss": 0.4997, "step": 7022 }, { "epoch": 0.6590653153153153, "grad_norm": 1.4209747873656755, "learning_rate": 9.570196225203229e-06, "loss": 0.4445, "step": 7023 }, { "epoch": 0.6591591591591591, "grad_norm": 0.9993158797712499, "learning_rate": 9.56997474042925e-06, "loss": 0.5085, "step": 7024 }, { "epoch": 0.659253003003003, "grad_norm": 0.953705108147008, "learning_rate": 9.569753201166922e-06, "loss": 0.4723, "step": 7025 }, { "epoch": 0.6593468468468469, "grad_norm": 1.0099373540562797, "learning_rate": 9.56953160741889e-06, "loss": 0.4853, "step": 7026 }, { "epoch": 0.6594406906906907, "grad_norm": 1.0609483075981285, "learning_rate": 9.569309959187795e-06, "loss": 0.4439, "step": 7027 }, { "epoch": 0.6595345345345346, "grad_norm": 0.8709705884337293, "learning_rate": 9.56908825647628e-06, "loss": 0.4996, "step": 7028 }, { "epoch": 0.6596283783783784, "grad_norm": 1.0713047703419223, "learning_rate": 9.568866499286985e-06, "loss": 0.4886, "step": 7029 }, { "epoch": 0.6597222222222222, "grad_norm": 0.9558876714283375, "learning_rate": 9.56864468762256e-06, "loss": 0.4243, "step": 7030 }, { "epoch": 0.659816066066066, "grad_norm": 2.888620451105933, "learning_rate": 9.568422821485646e-06, "loss": 0.5469, "step": 7031 }, { "epoch": 0.6599099099099099, "grad_norm": 1.1690257702607716, "learning_rate": 9.568200900878889e-06, "loss": 0.5334, "step": 7032 }, { "epoch": 0.6600037537537538, "grad_norm": 1.716193279817029, "learning_rate": 9.567978925804935e-06, "loss": 0.5255, "step": 7033 }, { "epoch": 0.6600975975975976, "grad_norm": 1.1278471513373476, "learning_rate": 9.567756896266429e-06, "loss": 0.4934, "step": 7034 }, { "epoch": 0.6601914414414415, "grad_norm": 1.010624463410376, "learning_rate": 9.567534812266022e-06, "loss": 0.4995, "step": 7035 }, { "epoch": 0.6602852852852853, "grad_norm": 1.026518850569341, "learning_rate": 9.56731267380636e-06, "loss": 0.4848, "step": 7036 }, { "epoch": 0.6603791291291291, "grad_norm": 1.0710483799679955, "learning_rate": 9.56709048089009e-06, "loss": 0.4943, "step": 7037 }, { "epoch": 0.660472972972973, "grad_norm": 1.2026644097542358, "learning_rate": 9.566868233519864e-06, "loss": 0.4902, "step": 7038 }, { "epoch": 0.6605668168168168, "grad_norm": 1.149950316971631, "learning_rate": 9.566645931698328e-06, "loss": 0.5319, "step": 7039 }, { "epoch": 0.6606606606606606, "grad_norm": 0.8891029238840912, "learning_rate": 9.566423575428138e-06, "loss": 0.4606, "step": 7040 }, { "epoch": 0.6607545045045045, "grad_norm": 1.0264834139627175, "learning_rate": 9.566201164711941e-06, "loss": 0.5035, "step": 7041 }, { "epoch": 0.6608483483483484, "grad_norm": 1.0296424897547827, "learning_rate": 9.565978699552391e-06, "loss": 0.4596, "step": 7042 }, { "epoch": 0.6609421921921922, "grad_norm": 0.98206187966836, "learning_rate": 9.565756179952138e-06, "loss": 0.4738, "step": 7043 }, { "epoch": 0.661036036036036, "grad_norm": 3.5712609780203897, "learning_rate": 9.565533605913838e-06, "loss": 0.4987, "step": 7044 }, { "epoch": 0.6611298798798799, "grad_norm": 1.0537757663102805, "learning_rate": 9.565310977440143e-06, "loss": 0.5183, "step": 7045 }, { "epoch": 0.6612237237237237, "grad_norm": 1.8486323583266695, "learning_rate": 9.565088294533708e-06, "loss": 0.4935, "step": 7046 }, { "epoch": 0.6613175675675675, "grad_norm": 1.310521171995038, "learning_rate": 9.564865557197188e-06, "loss": 0.5144, "step": 7047 }, { "epoch": 0.6614114114114115, "grad_norm": 1.1565248249200406, "learning_rate": 9.564642765433239e-06, "loss": 0.5039, "step": 7048 }, { "epoch": 0.6615052552552553, "grad_norm": 1.0671723816561636, "learning_rate": 9.564419919244517e-06, "loss": 0.4537, "step": 7049 }, { "epoch": 0.6615990990990991, "grad_norm": 1.1083233974614533, "learning_rate": 9.564197018633679e-06, "loss": 0.5257, "step": 7050 }, { "epoch": 0.6616929429429429, "grad_norm": 1.4560931941952036, "learning_rate": 9.563974063603383e-06, "loss": 0.4727, "step": 7051 }, { "epoch": 0.6617867867867868, "grad_norm": 1.1333926597248418, "learning_rate": 9.563751054156286e-06, "loss": 0.4803, "step": 7052 }, { "epoch": 0.6618806306306306, "grad_norm": 4.191951229913533, "learning_rate": 9.563527990295048e-06, "loss": 0.4744, "step": 7053 }, { "epoch": 0.6619744744744744, "grad_norm": 0.9323262460649483, "learning_rate": 9.56330487202233e-06, "loss": 0.5033, "step": 7054 }, { "epoch": 0.6620683183183184, "grad_norm": 1.1026343320123366, "learning_rate": 9.56308169934079e-06, "loss": 0.5102, "step": 7055 }, { "epoch": 0.6621621621621622, "grad_norm": 1.0431772817141323, "learning_rate": 9.562858472253092e-06, "loss": 0.4498, "step": 7056 }, { "epoch": 0.662256006006006, "grad_norm": 1.143459050676223, "learning_rate": 9.562635190761892e-06, "loss": 0.4629, "step": 7057 }, { "epoch": 0.6623498498498499, "grad_norm": 0.9907812122584315, "learning_rate": 9.562411854869857e-06, "loss": 0.4547, "step": 7058 }, { "epoch": 0.6624436936936937, "grad_norm": 1.171945385412737, "learning_rate": 9.562188464579649e-06, "loss": 0.4573, "step": 7059 }, { "epoch": 0.6625375375375375, "grad_norm": 1.123773893097964, "learning_rate": 9.561965019893931e-06, "loss": 0.4627, "step": 7060 }, { "epoch": 0.6626313813813813, "grad_norm": 1.1309865350732033, "learning_rate": 9.561741520815368e-06, "loss": 0.4771, "step": 7061 }, { "epoch": 0.6627252252252253, "grad_norm": 1.1340325648096412, "learning_rate": 9.561517967346621e-06, "loss": 0.415, "step": 7062 }, { "epoch": 0.6628190690690691, "grad_norm": 1.526280398109955, "learning_rate": 9.561294359490362e-06, "loss": 0.4751, "step": 7063 }, { "epoch": 0.6629129129129129, "grad_norm": 2.997444915840771, "learning_rate": 9.56107069724925e-06, "loss": 0.4788, "step": 7064 }, { "epoch": 0.6630067567567568, "grad_norm": 1.4144399080203784, "learning_rate": 9.560846980625958e-06, "loss": 0.4795, "step": 7065 }, { "epoch": 0.6631006006006006, "grad_norm": 1.0406153236944284, "learning_rate": 9.56062320962315e-06, "loss": 0.4641, "step": 7066 }, { "epoch": 0.6631944444444444, "grad_norm": 1.1188186512872833, "learning_rate": 9.560399384243494e-06, "loss": 0.4858, "step": 7067 }, { "epoch": 0.6632882882882883, "grad_norm": 0.984025065645647, "learning_rate": 9.560175504489661e-06, "loss": 0.4787, "step": 7068 }, { "epoch": 0.6633821321321322, "grad_norm": 1.0832831781537138, "learning_rate": 9.559951570364317e-06, "loss": 0.4623, "step": 7069 }, { "epoch": 0.663475975975976, "grad_norm": 1.0349097790740527, "learning_rate": 9.559727581870135e-06, "loss": 0.4161, "step": 7070 }, { "epoch": 0.6635698198198198, "grad_norm": 0.9255677941028292, "learning_rate": 9.559503539009784e-06, "loss": 0.4525, "step": 7071 }, { "epoch": 0.6636636636636637, "grad_norm": 1.4867401319244378, "learning_rate": 9.559279441785935e-06, "loss": 0.4119, "step": 7072 }, { "epoch": 0.6637575075075075, "grad_norm": 1.9685010469331141, "learning_rate": 9.55905529020126e-06, "loss": 0.4636, "step": 7073 }, { "epoch": 0.6638513513513513, "grad_norm": 1.1105438362955118, "learning_rate": 9.558831084258434e-06, "loss": 0.5311, "step": 7074 }, { "epoch": 0.6639451951951952, "grad_norm": 1.5145428925817208, "learning_rate": 9.558606823960129e-06, "loss": 0.4284, "step": 7075 }, { "epoch": 0.664039039039039, "grad_norm": 1.1647896841119942, "learning_rate": 9.558382509309016e-06, "loss": 0.4361, "step": 7076 }, { "epoch": 0.6641328828828829, "grad_norm": 1.2829076561185393, "learning_rate": 9.558158140307773e-06, "loss": 0.4643, "step": 7077 }, { "epoch": 0.6642267267267268, "grad_norm": 1.1137939960358267, "learning_rate": 9.557933716959074e-06, "loss": 0.4902, "step": 7078 }, { "epoch": 0.6643205705705706, "grad_norm": 1.379832370896894, "learning_rate": 9.557709239265594e-06, "loss": 0.4908, "step": 7079 }, { "epoch": 0.6644144144144144, "grad_norm": 0.9700047715313554, "learning_rate": 9.557484707230011e-06, "loss": 0.4417, "step": 7080 }, { "epoch": 0.6645082582582582, "grad_norm": 1.0324962781444993, "learning_rate": 9.557260120855003e-06, "loss": 0.5158, "step": 7081 }, { "epoch": 0.6646021021021021, "grad_norm": 0.9564936444184483, "learning_rate": 9.557035480143244e-06, "loss": 0.4553, "step": 7082 }, { "epoch": 0.6646959459459459, "grad_norm": 1.2717703028971228, "learning_rate": 9.556810785097416e-06, "loss": 0.4595, "step": 7083 }, { "epoch": 0.6647897897897898, "grad_norm": 1.1033021762271666, "learning_rate": 9.556586035720195e-06, "loss": 0.4764, "step": 7084 }, { "epoch": 0.6648836336336337, "grad_norm": 1.0145860744574873, "learning_rate": 9.556361232014265e-06, "loss": 0.3948, "step": 7085 }, { "epoch": 0.6649774774774775, "grad_norm": 2.366244971724387, "learning_rate": 9.5561363739823e-06, "loss": 0.4384, "step": 7086 }, { "epoch": 0.6650713213213213, "grad_norm": 1.114278374542072, "learning_rate": 9.555911461626988e-06, "loss": 0.4485, "step": 7087 }, { "epoch": 0.6651651651651652, "grad_norm": 1.0042512226698084, "learning_rate": 9.555686494951006e-06, "loss": 0.5103, "step": 7088 }, { "epoch": 0.665259009009009, "grad_norm": 1.544509092259202, "learning_rate": 9.55546147395704e-06, "loss": 0.5178, "step": 7089 }, { "epoch": 0.6653528528528528, "grad_norm": 2.27318684472168, "learning_rate": 9.555236398647767e-06, "loss": 0.4627, "step": 7090 }, { "epoch": 0.6654466966966966, "grad_norm": 1.0348349030624493, "learning_rate": 9.555011269025878e-06, "loss": 0.477, "step": 7091 }, { "epoch": 0.6655405405405406, "grad_norm": 1.097887719376332, "learning_rate": 9.554786085094052e-06, "loss": 0.4841, "step": 7092 }, { "epoch": 0.6656343843843844, "grad_norm": 11.078040713910797, "learning_rate": 9.554560846854974e-06, "loss": 0.5044, "step": 7093 }, { "epoch": 0.6657282282282282, "grad_norm": 1.1634849419993254, "learning_rate": 9.554335554311332e-06, "loss": 0.5258, "step": 7094 }, { "epoch": 0.6658220720720721, "grad_norm": 1.101171164613045, "learning_rate": 9.554110207465812e-06, "loss": 0.4861, "step": 7095 }, { "epoch": 0.6659159159159159, "grad_norm": 1.075882902234048, "learning_rate": 9.5538848063211e-06, "loss": 0.4697, "step": 7096 }, { "epoch": 0.6660097597597597, "grad_norm": 1.0169799783066373, "learning_rate": 9.553659350879882e-06, "loss": 0.4435, "step": 7097 }, { "epoch": 0.6661036036036037, "grad_norm": 1.2670616627913633, "learning_rate": 9.55343384114485e-06, "loss": 0.4681, "step": 7098 }, { "epoch": 0.6661974474474475, "grad_norm": 1.1158969954711093, "learning_rate": 9.553208277118688e-06, "loss": 0.4351, "step": 7099 }, { "epoch": 0.6662912912912913, "grad_norm": 2.136396328841844, "learning_rate": 9.552982658804089e-06, "loss": 0.4293, "step": 7100 }, { "epoch": 0.6663851351351351, "grad_norm": 1.5171561907622095, "learning_rate": 9.552756986203741e-06, "loss": 0.519, "step": 7101 }, { "epoch": 0.666478978978979, "grad_norm": 1.0473375860723053, "learning_rate": 9.552531259320336e-06, "loss": 0.4473, "step": 7102 }, { "epoch": 0.6665728228228228, "grad_norm": 2.168800524525808, "learning_rate": 9.552305478156565e-06, "loss": 0.4898, "step": 7103 }, { "epoch": 0.6666666666666666, "grad_norm": 1.158873285953223, "learning_rate": 9.552079642715121e-06, "loss": 0.5146, "step": 7104 }, { "epoch": 0.6667605105105106, "grad_norm": 1.7005837741317744, "learning_rate": 9.551853752998696e-06, "loss": 0.5125, "step": 7105 }, { "epoch": 0.6668543543543544, "grad_norm": 1.0240125294556892, "learning_rate": 9.551627809009981e-06, "loss": 0.462, "step": 7106 }, { "epoch": 0.6669481981981982, "grad_norm": 1.176531321841944, "learning_rate": 9.551401810751672e-06, "loss": 0.462, "step": 7107 }, { "epoch": 0.6670420420420421, "grad_norm": 1.1565757419917013, "learning_rate": 9.551175758226465e-06, "loss": 0.4657, "step": 7108 }, { "epoch": 0.6671358858858859, "grad_norm": 0.9389128696094627, "learning_rate": 9.550949651437054e-06, "loss": 0.4838, "step": 7109 }, { "epoch": 0.6672297297297297, "grad_norm": 1.485647994378768, "learning_rate": 9.550723490386133e-06, "loss": 0.5431, "step": 7110 }, { "epoch": 0.6673235735735735, "grad_norm": 0.9616662837669805, "learning_rate": 9.550497275076402e-06, "loss": 0.4314, "step": 7111 }, { "epoch": 0.6674174174174174, "grad_norm": 3.293325164222176, "learning_rate": 9.550271005510555e-06, "loss": 0.4563, "step": 7112 }, { "epoch": 0.6675112612612613, "grad_norm": 1.332350626626113, "learning_rate": 9.550044681691291e-06, "loss": 0.4607, "step": 7113 }, { "epoch": 0.6676051051051051, "grad_norm": 1.244236288267864, "learning_rate": 9.54981830362131e-06, "loss": 0.4884, "step": 7114 }, { "epoch": 0.667698948948949, "grad_norm": 1.1264345209441329, "learning_rate": 9.54959187130331e-06, "loss": 0.4419, "step": 7115 }, { "epoch": 0.6677927927927928, "grad_norm": 2.0450377367993164, "learning_rate": 9.549365384739991e-06, "loss": 0.5152, "step": 7116 }, { "epoch": 0.6678866366366366, "grad_norm": 0.9373649426683881, "learning_rate": 9.549138843934052e-06, "loss": 0.4859, "step": 7117 }, { "epoch": 0.6679804804804805, "grad_norm": 1.5969851114003186, "learning_rate": 9.548912248888194e-06, "loss": 0.5216, "step": 7118 }, { "epoch": 0.6680743243243243, "grad_norm": 10.431731866983526, "learning_rate": 9.548685599605123e-06, "loss": 0.5048, "step": 7119 }, { "epoch": 0.6681681681681682, "grad_norm": 1.544023330150709, "learning_rate": 9.548458896087536e-06, "loss": 0.4575, "step": 7120 }, { "epoch": 0.668262012012012, "grad_norm": 1.1846429466909554, "learning_rate": 9.548232138338138e-06, "loss": 0.4618, "step": 7121 }, { "epoch": 0.6683558558558559, "grad_norm": 1.6101409664059838, "learning_rate": 9.548005326359632e-06, "loss": 0.4217, "step": 7122 }, { "epoch": 0.6684496996996997, "grad_norm": 1.1858228033523919, "learning_rate": 9.547778460154726e-06, "loss": 0.4945, "step": 7123 }, { "epoch": 0.6685435435435435, "grad_norm": 3.7968745734608342, "learning_rate": 9.54755153972612e-06, "loss": 0.4563, "step": 7124 }, { "epoch": 0.6686373873873874, "grad_norm": 1.0701434590352117, "learning_rate": 9.547324565076524e-06, "loss": 0.5045, "step": 7125 }, { "epoch": 0.6687312312312312, "grad_norm": 1.3113159227510047, "learning_rate": 9.547097536208641e-06, "loss": 0.4737, "step": 7126 }, { "epoch": 0.668825075075075, "grad_norm": 0.9619549432785482, "learning_rate": 9.546870453125179e-06, "loss": 0.4669, "step": 7127 }, { "epoch": 0.668918918918919, "grad_norm": 1.1126456754092726, "learning_rate": 9.546643315828845e-06, "loss": 0.4565, "step": 7128 }, { "epoch": 0.6690127627627628, "grad_norm": 0.9580536220130069, "learning_rate": 9.546416124322349e-06, "loss": 0.4926, "step": 7129 }, { "epoch": 0.6691066066066066, "grad_norm": 1.051241760107797, "learning_rate": 9.546188878608395e-06, "loss": 0.4475, "step": 7130 }, { "epoch": 0.6692004504504504, "grad_norm": 3.676472199826614, "learning_rate": 9.545961578689699e-06, "loss": 0.4718, "step": 7131 }, { "epoch": 0.6692942942942943, "grad_norm": 1.5701023882922656, "learning_rate": 9.545734224568969e-06, "loss": 0.4738, "step": 7132 }, { "epoch": 0.6693881381381381, "grad_norm": 2.1416628803312223, "learning_rate": 9.545506816248912e-06, "loss": 0.4919, "step": 7133 }, { "epoch": 0.6694819819819819, "grad_norm": 1.1009047824707372, "learning_rate": 9.545279353732244e-06, "loss": 0.5239, "step": 7134 }, { "epoch": 0.6695758258258259, "grad_norm": 1.1376566684766023, "learning_rate": 9.545051837021676e-06, "loss": 0.4588, "step": 7135 }, { "epoch": 0.6696696696696697, "grad_norm": 0.9737022252559757, "learning_rate": 9.544824266119916e-06, "loss": 0.4849, "step": 7136 }, { "epoch": 0.6697635135135135, "grad_norm": 1.0689132012022373, "learning_rate": 9.544596641029683e-06, "loss": 0.5037, "step": 7137 }, { "epoch": 0.6698573573573574, "grad_norm": 1.1832606643360521, "learning_rate": 9.544368961753692e-06, "loss": 0.4532, "step": 7138 }, { "epoch": 0.6699512012012012, "grad_norm": 1.9686669864884765, "learning_rate": 9.544141228294654e-06, "loss": 0.5095, "step": 7139 }, { "epoch": 0.670045045045045, "grad_norm": 0.9310161123709525, "learning_rate": 9.543913440655285e-06, "loss": 0.4312, "step": 7140 }, { "epoch": 0.6701388888888888, "grad_norm": 1.081966428442988, "learning_rate": 9.543685598838301e-06, "loss": 0.4614, "step": 7141 }, { "epoch": 0.6702327327327328, "grad_norm": 0.9062401077096238, "learning_rate": 9.54345770284642e-06, "loss": 0.459, "step": 7142 }, { "epoch": 0.6703265765765766, "grad_norm": 1.0443623277796372, "learning_rate": 9.543229752682355e-06, "loss": 0.5286, "step": 7143 }, { "epoch": 0.6704204204204204, "grad_norm": 0.9370705169084097, "learning_rate": 9.543001748348831e-06, "loss": 0.4688, "step": 7144 }, { "epoch": 0.6705142642642643, "grad_norm": 3.867859560814446, "learning_rate": 9.54277368984856e-06, "loss": 0.5068, "step": 7145 }, { "epoch": 0.6706081081081081, "grad_norm": 1.0479451179463086, "learning_rate": 9.542545577184263e-06, "loss": 0.4982, "step": 7146 }, { "epoch": 0.6707019519519519, "grad_norm": 1.3915683876602842, "learning_rate": 9.542317410358662e-06, "loss": 0.456, "step": 7147 }, { "epoch": 0.6707957957957958, "grad_norm": 1.7712580262420188, "learning_rate": 9.542089189374474e-06, "loss": 0.5052, "step": 7148 }, { "epoch": 0.6708896396396397, "grad_norm": 1.064711635199233, "learning_rate": 9.541860914234424e-06, "loss": 0.4653, "step": 7149 }, { "epoch": 0.6709834834834835, "grad_norm": 1.1669573891753675, "learning_rate": 9.541632584941232e-06, "loss": 0.4949, "step": 7150 }, { "epoch": 0.6710773273273273, "grad_norm": 0.9277371135571276, "learning_rate": 9.54140420149762e-06, "loss": 0.445, "step": 7151 }, { "epoch": 0.6711711711711712, "grad_norm": 0.9559585925409494, "learning_rate": 9.541175763906309e-06, "loss": 0.4722, "step": 7152 }, { "epoch": 0.671265015015015, "grad_norm": 0.872379965361642, "learning_rate": 9.540947272170027e-06, "loss": 0.4382, "step": 7153 }, { "epoch": 0.6713588588588588, "grad_norm": 2.3948685359331088, "learning_rate": 9.540718726291497e-06, "loss": 0.4811, "step": 7154 }, { "epoch": 0.6714527027027027, "grad_norm": 1.7720030160975577, "learning_rate": 9.540490126273441e-06, "loss": 0.4319, "step": 7155 }, { "epoch": 0.6715465465465466, "grad_norm": 1.277547518950422, "learning_rate": 9.540261472118588e-06, "loss": 0.4737, "step": 7156 }, { "epoch": 0.6716403903903904, "grad_norm": 1.22508722332183, "learning_rate": 9.540032763829663e-06, "loss": 0.4533, "step": 7157 }, { "epoch": 0.6717342342342343, "grad_norm": 1.1307296062058203, "learning_rate": 9.539804001409392e-06, "loss": 0.4956, "step": 7158 }, { "epoch": 0.6718280780780781, "grad_norm": 1.064510570836569, "learning_rate": 9.539575184860505e-06, "loss": 0.4793, "step": 7159 }, { "epoch": 0.6719219219219219, "grad_norm": 1.0504573673028252, "learning_rate": 9.53934631418573e-06, "loss": 0.4888, "step": 7160 }, { "epoch": 0.6720157657657657, "grad_norm": 1.1409934080100113, "learning_rate": 9.539117389387791e-06, "loss": 0.5066, "step": 7161 }, { "epoch": 0.6721096096096096, "grad_norm": 0.9319724308393621, "learning_rate": 9.538888410469425e-06, "loss": 0.3978, "step": 7162 }, { "epoch": 0.6722034534534534, "grad_norm": 1.0312951508030113, "learning_rate": 9.538659377433355e-06, "loss": 0.4934, "step": 7163 }, { "epoch": 0.6722972972972973, "grad_norm": 1.078778148535111, "learning_rate": 9.538430290282316e-06, "loss": 0.4935, "step": 7164 }, { "epoch": 0.6723911411411412, "grad_norm": 1.0456524718374616, "learning_rate": 9.538201149019039e-06, "loss": 0.5196, "step": 7165 }, { "epoch": 0.672484984984985, "grad_norm": 1.2514706657451666, "learning_rate": 9.537971953646257e-06, "loss": 0.5037, "step": 7166 }, { "epoch": 0.6725788288288288, "grad_norm": 0.9842934892250543, "learning_rate": 9.537742704166699e-06, "loss": 0.4903, "step": 7167 }, { "epoch": 0.6726726726726727, "grad_norm": 1.0318896031549227, "learning_rate": 9.537513400583101e-06, "loss": 0.434, "step": 7168 }, { "epoch": 0.6727665165165165, "grad_norm": 1.048016418585452, "learning_rate": 9.537284042898198e-06, "loss": 0.4745, "step": 7169 }, { "epoch": 0.6728603603603603, "grad_norm": 1.0944982827826852, "learning_rate": 9.537054631114721e-06, "loss": 0.4761, "step": 7170 }, { "epoch": 0.6729542042042042, "grad_norm": 1.1683165849070485, "learning_rate": 9.536825165235411e-06, "loss": 0.4659, "step": 7171 }, { "epoch": 0.6730480480480481, "grad_norm": 1.097011722117846, "learning_rate": 9.536595645262998e-06, "loss": 0.4873, "step": 7172 }, { "epoch": 0.6731418918918919, "grad_norm": 3.6782818405525064, "learning_rate": 9.536366071200222e-06, "loss": 0.4468, "step": 7173 }, { "epoch": 0.6732357357357357, "grad_norm": 1.1332818836767795, "learning_rate": 9.53613644304982e-06, "loss": 0.4268, "step": 7174 }, { "epoch": 0.6733295795795796, "grad_norm": 1.6659678632442558, "learning_rate": 9.535906760814528e-06, "loss": 0.4761, "step": 7175 }, { "epoch": 0.6734234234234234, "grad_norm": 1.2832543908298928, "learning_rate": 9.535677024497087e-06, "loss": 0.4956, "step": 7176 }, { "epoch": 0.6735172672672672, "grad_norm": 1.14326501618072, "learning_rate": 9.535447234100234e-06, "loss": 0.518, "step": 7177 }, { "epoch": 0.6736111111111112, "grad_norm": 1.4253895078397185, "learning_rate": 9.535217389626712e-06, "loss": 0.4638, "step": 7178 }, { "epoch": 0.673704954954955, "grad_norm": 1.0650077546840382, "learning_rate": 9.534987491079258e-06, "loss": 0.4377, "step": 7179 }, { "epoch": 0.6737987987987988, "grad_norm": 1.1241091494329398, "learning_rate": 9.534757538460613e-06, "loss": 0.4823, "step": 7180 }, { "epoch": 0.6738926426426426, "grad_norm": 1.1904301510287987, "learning_rate": 9.53452753177352e-06, "loss": 0.5036, "step": 7181 }, { "epoch": 0.6739864864864865, "grad_norm": 1.0334612412974769, "learning_rate": 9.534297471020724e-06, "loss": 0.5039, "step": 7182 }, { "epoch": 0.6740803303303303, "grad_norm": 1.0506758023404024, "learning_rate": 9.534067356204965e-06, "loss": 0.453, "step": 7183 }, { "epoch": 0.6741741741741741, "grad_norm": 1.2269050802807002, "learning_rate": 9.533837187328986e-06, "loss": 0.4307, "step": 7184 }, { "epoch": 0.6742680180180181, "grad_norm": 1.8069777690829534, "learning_rate": 9.533606964395535e-06, "loss": 0.4763, "step": 7185 }, { "epoch": 0.6743618618618619, "grad_norm": 0.934566405956699, "learning_rate": 9.533376687407352e-06, "loss": 0.3796, "step": 7186 }, { "epoch": 0.6744557057057057, "grad_norm": 1.0347101428714267, "learning_rate": 9.533146356367187e-06, "loss": 0.5074, "step": 7187 }, { "epoch": 0.6745495495495496, "grad_norm": 1.0833231166610988, "learning_rate": 9.532915971277782e-06, "loss": 0.4981, "step": 7188 }, { "epoch": 0.6746433933933934, "grad_norm": 1.0658584958392803, "learning_rate": 9.532685532141888e-06, "loss": 0.474, "step": 7189 }, { "epoch": 0.6747372372372372, "grad_norm": 1.0395083140425037, "learning_rate": 9.532455038962251e-06, "loss": 0.4526, "step": 7190 }, { "epoch": 0.674831081081081, "grad_norm": 1.0286584245653625, "learning_rate": 9.532224491741619e-06, "loss": 0.4711, "step": 7191 }, { "epoch": 0.674924924924925, "grad_norm": 2.5364063504512275, "learning_rate": 9.531993890482743e-06, "loss": 0.4775, "step": 7192 }, { "epoch": 0.6750187687687688, "grad_norm": 1.1248519923939446, "learning_rate": 9.531763235188367e-06, "loss": 0.5271, "step": 7193 }, { "epoch": 0.6751126126126126, "grad_norm": 1.0456852652735336, "learning_rate": 9.531532525861246e-06, "loss": 0.4879, "step": 7194 }, { "epoch": 0.6752064564564565, "grad_norm": 1.1736508895236069, "learning_rate": 9.53130176250413e-06, "loss": 0.4439, "step": 7195 }, { "epoch": 0.6753003003003003, "grad_norm": 1.2154087268194653, "learning_rate": 9.53107094511977e-06, "loss": 0.4832, "step": 7196 }, { "epoch": 0.6753941441441441, "grad_norm": 1.0880174237172953, "learning_rate": 9.530840073710916e-06, "loss": 0.5181, "step": 7197 }, { "epoch": 0.675487987987988, "grad_norm": 0.9705529099388992, "learning_rate": 9.530609148280323e-06, "loss": 0.471, "step": 7198 }, { "epoch": 0.6755818318318318, "grad_norm": 1.0245976051021413, "learning_rate": 9.530378168830745e-06, "loss": 0.4929, "step": 7199 }, { "epoch": 0.6756756756756757, "grad_norm": 0.954061321707435, "learning_rate": 9.530147135364934e-06, "loss": 0.473, "step": 7200 }, { "epoch": 0.6757695195195195, "grad_norm": 1.0402925305786526, "learning_rate": 9.529916047885646e-06, "loss": 0.467, "step": 7201 }, { "epoch": 0.6758633633633634, "grad_norm": 0.9221365442114691, "learning_rate": 9.529684906395636e-06, "loss": 0.4839, "step": 7202 }, { "epoch": 0.6759572072072072, "grad_norm": 1.020343263102347, "learning_rate": 9.52945371089766e-06, "loss": 0.4996, "step": 7203 }, { "epoch": 0.676051051051051, "grad_norm": 0.9782164062570202, "learning_rate": 9.529222461394474e-06, "loss": 0.5233, "step": 7204 }, { "epoch": 0.6761448948948949, "grad_norm": 1.2545711207527044, "learning_rate": 9.528991157888835e-06, "loss": 0.4648, "step": 7205 }, { "epoch": 0.6762387387387387, "grad_norm": 1.2247427123031334, "learning_rate": 9.528759800383503e-06, "loss": 0.4314, "step": 7206 }, { "epoch": 0.6763325825825826, "grad_norm": 4.440000990555987, "learning_rate": 9.528528388881232e-06, "loss": 0.4555, "step": 7207 }, { "epoch": 0.6764264264264265, "grad_norm": 0.955559503699658, "learning_rate": 9.528296923384786e-06, "loss": 0.5063, "step": 7208 }, { "epoch": 0.6765202702702703, "grad_norm": 0.9600873232999676, "learning_rate": 9.528065403896923e-06, "loss": 0.4037, "step": 7209 }, { "epoch": 0.6766141141141141, "grad_norm": 1.0144568504864335, "learning_rate": 9.527833830420403e-06, "loss": 0.4702, "step": 7210 }, { "epoch": 0.6767079579579579, "grad_norm": 1.043362654644595, "learning_rate": 9.527602202957987e-06, "loss": 0.4568, "step": 7211 }, { "epoch": 0.6768018018018018, "grad_norm": 0.8740887297043791, "learning_rate": 9.527370521512437e-06, "loss": 0.4574, "step": 7212 }, { "epoch": 0.6768956456456456, "grad_norm": 0.9674662006308672, "learning_rate": 9.527138786086516e-06, "loss": 0.4611, "step": 7213 }, { "epoch": 0.6769894894894894, "grad_norm": 0.9737185149482335, "learning_rate": 9.526906996682984e-06, "loss": 0.4464, "step": 7214 }, { "epoch": 0.6770833333333334, "grad_norm": 1.059130586483864, "learning_rate": 9.52667515330461e-06, "loss": 0.4749, "step": 7215 }, { "epoch": 0.6771771771771772, "grad_norm": 1.0747334864000901, "learning_rate": 9.526443255954155e-06, "loss": 0.4871, "step": 7216 }, { "epoch": 0.677271021021021, "grad_norm": 1.439431845552351, "learning_rate": 9.526211304634384e-06, "loss": 0.4603, "step": 7217 }, { "epoch": 0.6773648648648649, "grad_norm": 0.9613284240815749, "learning_rate": 9.525979299348063e-06, "loss": 0.4788, "step": 7218 }, { "epoch": 0.6774587087087087, "grad_norm": 1.1172737975635156, "learning_rate": 9.525747240097958e-06, "loss": 0.4567, "step": 7219 }, { "epoch": 0.6775525525525525, "grad_norm": 0.9263331016433891, "learning_rate": 9.525515126886835e-06, "loss": 0.4958, "step": 7220 }, { "epoch": 0.6776463963963963, "grad_norm": 0.9223927461055126, "learning_rate": 9.525282959717463e-06, "loss": 0.4655, "step": 7221 }, { "epoch": 0.6777402402402403, "grad_norm": 1.0008782175018394, "learning_rate": 9.52505073859261e-06, "loss": 0.4608, "step": 7222 }, { "epoch": 0.6778340840840841, "grad_norm": 1.0606767891672069, "learning_rate": 9.524818463515045e-06, "loss": 0.4795, "step": 7223 }, { "epoch": 0.6779279279279279, "grad_norm": 1.1310977245908762, "learning_rate": 9.524586134487536e-06, "loss": 0.489, "step": 7224 }, { "epoch": 0.6780217717717718, "grad_norm": 0.9224265635532742, "learning_rate": 9.524353751512857e-06, "loss": 0.4817, "step": 7225 }, { "epoch": 0.6781156156156156, "grad_norm": 0.9629898451185372, "learning_rate": 9.524121314593771e-06, "loss": 0.4359, "step": 7226 }, { "epoch": 0.6782094594594594, "grad_norm": 1.2907522132304279, "learning_rate": 9.523888823733057e-06, "loss": 0.4835, "step": 7227 }, { "epoch": 0.6783033033033034, "grad_norm": 1.0701220498996167, "learning_rate": 9.523656278933484e-06, "loss": 0.4618, "step": 7228 }, { "epoch": 0.6783971471471472, "grad_norm": 0.9596506896557468, "learning_rate": 9.523423680197823e-06, "loss": 0.4411, "step": 7229 }, { "epoch": 0.678490990990991, "grad_norm": 0.9447984180889906, "learning_rate": 9.523191027528851e-06, "loss": 0.5346, "step": 7230 }, { "epoch": 0.6785848348348348, "grad_norm": 1.3017135700194535, "learning_rate": 9.522958320929339e-06, "loss": 0.487, "step": 7231 }, { "epoch": 0.6786786786786787, "grad_norm": 1.0004018188987363, "learning_rate": 9.522725560402064e-06, "loss": 0.425, "step": 7232 }, { "epoch": 0.6787725225225225, "grad_norm": 0.8585317347897677, "learning_rate": 9.522492745949798e-06, "loss": 0.4583, "step": 7233 }, { "epoch": 0.6788663663663663, "grad_norm": 1.3987669462999013, "learning_rate": 9.522259877575319e-06, "loss": 0.5281, "step": 7234 }, { "epoch": 0.6789602102102102, "grad_norm": 1.0788682887986034, "learning_rate": 9.522026955281406e-06, "loss": 0.4834, "step": 7235 }, { "epoch": 0.6790540540540541, "grad_norm": 1.2576112907114732, "learning_rate": 9.52179397907083e-06, "loss": 0.4734, "step": 7236 }, { "epoch": 0.6791478978978979, "grad_norm": 1.595572642083267, "learning_rate": 9.521560948946374e-06, "loss": 0.5125, "step": 7237 }, { "epoch": 0.6792417417417418, "grad_norm": 1.6790905782539836, "learning_rate": 9.521327864910813e-06, "loss": 0.4865, "step": 7238 }, { "epoch": 0.6793355855855856, "grad_norm": 1.032725958103422, "learning_rate": 9.521094726966928e-06, "loss": 0.4841, "step": 7239 }, { "epoch": 0.6794294294294294, "grad_norm": 1.8855489943149446, "learning_rate": 9.5208615351175e-06, "loss": 0.53, "step": 7240 }, { "epoch": 0.6795232732732732, "grad_norm": 1.1771239327583616, "learning_rate": 9.520628289365308e-06, "loss": 0.5266, "step": 7241 }, { "epoch": 0.6796171171171171, "grad_norm": 1.2713081090073963, "learning_rate": 9.520394989713131e-06, "loss": 0.4858, "step": 7242 }, { "epoch": 0.679710960960961, "grad_norm": 0.9665728157324521, "learning_rate": 9.520161636163754e-06, "loss": 0.4938, "step": 7243 }, { "epoch": 0.6798048048048048, "grad_norm": 0.9072179470151049, "learning_rate": 9.519928228719956e-06, "loss": 0.4849, "step": 7244 }, { "epoch": 0.6798986486486487, "grad_norm": 1.0568258815397686, "learning_rate": 9.519694767384525e-06, "loss": 0.4702, "step": 7245 }, { "epoch": 0.6799924924924925, "grad_norm": 1.0153615314858844, "learning_rate": 9.519461252160239e-06, "loss": 0.4072, "step": 7246 }, { "epoch": 0.6800863363363363, "grad_norm": 0.9014866166280868, "learning_rate": 9.519227683049883e-06, "loss": 0.4772, "step": 7247 }, { "epoch": 0.6801801801801802, "grad_norm": 1.0759795407009092, "learning_rate": 9.518994060056247e-06, "loss": 0.4836, "step": 7248 }, { "epoch": 0.680274024024024, "grad_norm": 1.1315143910309557, "learning_rate": 9.518760383182111e-06, "loss": 0.4708, "step": 7249 }, { "epoch": 0.6803678678678678, "grad_norm": 1.006060527035753, "learning_rate": 9.518526652430265e-06, "loss": 0.4858, "step": 7250 }, { "epoch": 0.6804617117117117, "grad_norm": 0.9536754401696532, "learning_rate": 9.518292867803491e-06, "loss": 0.5062, "step": 7251 }, { "epoch": 0.6805555555555556, "grad_norm": 1.0160345715479338, "learning_rate": 9.518059029304581e-06, "loss": 0.4706, "step": 7252 }, { "epoch": 0.6806493993993994, "grad_norm": 2.191494764451925, "learning_rate": 9.51782513693632e-06, "loss": 0.4616, "step": 7253 }, { "epoch": 0.6807432432432432, "grad_norm": 1.209812876209309, "learning_rate": 9.517591190701501e-06, "loss": 0.4313, "step": 7254 }, { "epoch": 0.6808370870870871, "grad_norm": 1.022274115248444, "learning_rate": 9.517357190602908e-06, "loss": 0.4716, "step": 7255 }, { "epoch": 0.6809309309309309, "grad_norm": 1.2550052916942238, "learning_rate": 9.517123136643335e-06, "loss": 0.468, "step": 7256 }, { "epoch": 0.6810247747747747, "grad_norm": 1.0998911884685334, "learning_rate": 9.516889028825572e-06, "loss": 0.4605, "step": 7257 }, { "epoch": 0.6811186186186187, "grad_norm": 1.0432647254346512, "learning_rate": 9.516654867152406e-06, "loss": 0.4914, "step": 7258 }, { "epoch": 0.6812124624624625, "grad_norm": 1.0915220328496253, "learning_rate": 9.516420651626636e-06, "loss": 0.5101, "step": 7259 }, { "epoch": 0.6813063063063063, "grad_norm": 1.07461071005976, "learning_rate": 9.51618638225105e-06, "loss": 0.4993, "step": 7260 }, { "epoch": 0.6814001501501501, "grad_norm": 1.3534972900635418, "learning_rate": 9.515952059028442e-06, "loss": 0.4805, "step": 7261 }, { "epoch": 0.681493993993994, "grad_norm": 1.1212983858510515, "learning_rate": 9.515717681961605e-06, "loss": 0.5111, "step": 7262 }, { "epoch": 0.6815878378378378, "grad_norm": 1.2679617810089803, "learning_rate": 9.515483251053336e-06, "loss": 0.5274, "step": 7263 }, { "epoch": 0.6816816816816816, "grad_norm": 1.1448836584394382, "learning_rate": 9.515248766306428e-06, "loss": 0.4741, "step": 7264 }, { "epoch": 0.6817755255255256, "grad_norm": 1.0262359068181852, "learning_rate": 9.515014227723677e-06, "loss": 0.4997, "step": 7265 }, { "epoch": 0.6818693693693694, "grad_norm": 0.9944492470209632, "learning_rate": 9.514779635307882e-06, "loss": 0.4749, "step": 7266 }, { "epoch": 0.6819632132132132, "grad_norm": 0.8766152935635144, "learning_rate": 9.514544989061837e-06, "loss": 0.4798, "step": 7267 }, { "epoch": 0.6820570570570571, "grad_norm": 0.9592738378946569, "learning_rate": 9.51431028898834e-06, "loss": 0.4538, "step": 7268 }, { "epoch": 0.6821509009009009, "grad_norm": 1.11347819008774, "learning_rate": 9.514075535090192e-06, "loss": 0.4609, "step": 7269 }, { "epoch": 0.6822447447447447, "grad_norm": 1.3501016748839412, "learning_rate": 9.513840727370188e-06, "loss": 0.5469, "step": 7270 }, { "epoch": 0.6823385885885885, "grad_norm": 1.0875344726867133, "learning_rate": 9.513605865831131e-06, "loss": 0.4961, "step": 7271 }, { "epoch": 0.6824324324324325, "grad_norm": 1.1701808262626021, "learning_rate": 9.51337095047582e-06, "loss": 0.4733, "step": 7272 }, { "epoch": 0.6825262762762763, "grad_norm": 0.9446943658999067, "learning_rate": 9.513135981307056e-06, "loss": 0.486, "step": 7273 }, { "epoch": 0.6826201201201201, "grad_norm": 1.0553115359379683, "learning_rate": 9.51290095832764e-06, "loss": 0.4378, "step": 7274 }, { "epoch": 0.682713963963964, "grad_norm": 1.0721869267637258, "learning_rate": 9.512665881540375e-06, "loss": 0.4695, "step": 7275 }, { "epoch": 0.6828078078078078, "grad_norm": 0.9715870326759853, "learning_rate": 9.512430750948063e-06, "loss": 0.4892, "step": 7276 }, { "epoch": 0.6829016516516516, "grad_norm": 1.416702183832126, "learning_rate": 9.512195566553508e-06, "loss": 0.4342, "step": 7277 }, { "epoch": 0.6829954954954955, "grad_norm": 1.0114540766257856, "learning_rate": 9.511960328359515e-06, "loss": 0.4905, "step": 7278 }, { "epoch": 0.6830893393393394, "grad_norm": 0.9119814083874516, "learning_rate": 9.511725036368888e-06, "loss": 0.4684, "step": 7279 }, { "epoch": 0.6831831831831832, "grad_norm": 1.1373865247485149, "learning_rate": 9.511489690584432e-06, "loss": 0.4824, "step": 7280 }, { "epoch": 0.683277027027027, "grad_norm": 1.4496549424660399, "learning_rate": 9.511254291008954e-06, "loss": 0.4767, "step": 7281 }, { "epoch": 0.6833708708708709, "grad_norm": 0.9537683482179995, "learning_rate": 9.51101883764526e-06, "loss": 0.4601, "step": 7282 }, { "epoch": 0.6834647147147147, "grad_norm": 0.9612634181921377, "learning_rate": 9.510783330496156e-06, "loss": 0.4917, "step": 7283 }, { "epoch": 0.6835585585585585, "grad_norm": 1.0600686701387394, "learning_rate": 9.510547769564454e-06, "loss": 0.5087, "step": 7284 }, { "epoch": 0.6836524024024024, "grad_norm": 0.9378213615375373, "learning_rate": 9.510312154852958e-06, "loss": 0.4296, "step": 7285 }, { "epoch": 0.6837462462462462, "grad_norm": 1.2020618896226694, "learning_rate": 9.510076486364482e-06, "loss": 0.5229, "step": 7286 }, { "epoch": 0.6838400900900901, "grad_norm": 2.018909036453165, "learning_rate": 9.50984076410183e-06, "loss": 0.4778, "step": 7287 }, { "epoch": 0.683933933933934, "grad_norm": 1.4563959220946512, "learning_rate": 9.509604988067818e-06, "loss": 0.4976, "step": 7288 }, { "epoch": 0.6840277777777778, "grad_norm": 1.7128017722898166, "learning_rate": 9.509369158265254e-06, "loss": 0.5815, "step": 7289 }, { "epoch": 0.6841216216216216, "grad_norm": 1.1358287460759886, "learning_rate": 9.50913327469695e-06, "loss": 0.4823, "step": 7290 }, { "epoch": 0.6842154654654654, "grad_norm": 0.9755877591293326, "learning_rate": 9.508897337365721e-06, "loss": 0.4227, "step": 7291 }, { "epoch": 0.6843093093093093, "grad_norm": 0.9647209344233285, "learning_rate": 9.508661346274376e-06, "loss": 0.5085, "step": 7292 }, { "epoch": 0.6844031531531531, "grad_norm": 2.2329490894441655, "learning_rate": 9.508425301425735e-06, "loss": 0.5101, "step": 7293 }, { "epoch": 0.684496996996997, "grad_norm": 1.060337025519728, "learning_rate": 9.508189202822605e-06, "loss": 0.4495, "step": 7294 }, { "epoch": 0.6845908408408409, "grad_norm": 1.0904322039711556, "learning_rate": 9.507953050467806e-06, "loss": 0.5066, "step": 7295 }, { "epoch": 0.6846846846846847, "grad_norm": 1.1645100786499882, "learning_rate": 9.507716844364152e-06, "loss": 0.4798, "step": 7296 }, { "epoch": 0.6847785285285285, "grad_norm": 1.2232319481873148, "learning_rate": 9.507480584514459e-06, "loss": 0.4982, "step": 7297 }, { "epoch": 0.6848723723723724, "grad_norm": 2.9197232555752235, "learning_rate": 9.507244270921545e-06, "loss": 0.4601, "step": 7298 }, { "epoch": 0.6849662162162162, "grad_norm": 1.2506728689171795, "learning_rate": 9.507007903588227e-06, "loss": 0.397, "step": 7299 }, { "epoch": 0.68506006006006, "grad_norm": 0.9241199711799487, "learning_rate": 9.506771482517324e-06, "loss": 0.4341, "step": 7300 }, { "epoch": 0.6851539039039038, "grad_norm": 1.1140605813739106, "learning_rate": 9.506535007711654e-06, "loss": 0.4654, "step": 7301 }, { "epoch": 0.6852477477477478, "grad_norm": 2.901150031864446, "learning_rate": 9.506298479174037e-06, "loss": 0.4897, "step": 7302 }, { "epoch": 0.6853415915915916, "grad_norm": 0.9871000415981649, "learning_rate": 9.506061896907291e-06, "loss": 0.4355, "step": 7303 }, { "epoch": 0.6854354354354354, "grad_norm": 1.0770974910368505, "learning_rate": 9.50582526091424e-06, "loss": 0.4763, "step": 7304 }, { "epoch": 0.6855292792792793, "grad_norm": 1.1503390769407305, "learning_rate": 9.505588571197703e-06, "loss": 0.5442, "step": 7305 }, { "epoch": 0.6856231231231231, "grad_norm": 1.0489121593659008, "learning_rate": 9.505351827760505e-06, "loss": 0.5128, "step": 7306 }, { "epoch": 0.6857169669669669, "grad_norm": 1.1058364069174804, "learning_rate": 9.505115030605466e-06, "loss": 0.5161, "step": 7307 }, { "epoch": 0.6858108108108109, "grad_norm": 1.7614895036841611, "learning_rate": 9.50487817973541e-06, "loss": 0.4933, "step": 7308 }, { "epoch": 0.6859046546546547, "grad_norm": 1.2856473808287454, "learning_rate": 9.504641275153162e-06, "loss": 0.5357, "step": 7309 }, { "epoch": 0.6859984984984985, "grad_norm": 0.9031669031078065, "learning_rate": 9.504404316861547e-06, "loss": 0.4689, "step": 7310 }, { "epoch": 0.6860923423423423, "grad_norm": 0.9540955984522811, "learning_rate": 9.504167304863386e-06, "loss": 0.4628, "step": 7311 }, { "epoch": 0.6861861861861862, "grad_norm": 1.2047502092557165, "learning_rate": 9.50393023916151e-06, "loss": 0.5123, "step": 7312 }, { "epoch": 0.68628003003003, "grad_norm": 1.008573776818613, "learning_rate": 9.503693119758744e-06, "loss": 0.4728, "step": 7313 }, { "epoch": 0.6863738738738738, "grad_norm": 0.9667819493213085, "learning_rate": 9.503455946657913e-06, "loss": 0.4868, "step": 7314 }, { "epoch": 0.6864677177177178, "grad_norm": 0.999486979392248, "learning_rate": 9.503218719861848e-06, "loss": 0.504, "step": 7315 }, { "epoch": 0.6865615615615616, "grad_norm": 1.1541463198537247, "learning_rate": 9.502981439373378e-06, "loss": 0.4232, "step": 7316 }, { "epoch": 0.6866554054054054, "grad_norm": 1.0989906041012152, "learning_rate": 9.502744105195329e-06, "loss": 0.4787, "step": 7317 }, { "epoch": 0.6867492492492493, "grad_norm": 1.0903497445621892, "learning_rate": 9.502506717330532e-06, "loss": 0.5184, "step": 7318 }, { "epoch": 0.6868430930930931, "grad_norm": 1.0083542789732505, "learning_rate": 9.502269275781817e-06, "loss": 0.4704, "step": 7319 }, { "epoch": 0.6869369369369369, "grad_norm": 1.0152216124855595, "learning_rate": 9.502031780552014e-06, "loss": 0.4969, "step": 7320 }, { "epoch": 0.6870307807807807, "grad_norm": 0.9611248499041858, "learning_rate": 9.50179423164396e-06, "loss": 0.4583, "step": 7321 }, { "epoch": 0.6871246246246246, "grad_norm": 1.2106567565884856, "learning_rate": 9.50155662906048e-06, "loss": 0.5211, "step": 7322 }, { "epoch": 0.6872184684684685, "grad_norm": 1.114589642702133, "learning_rate": 9.501318972804412e-06, "loss": 0.4161, "step": 7323 }, { "epoch": 0.6873123123123123, "grad_norm": 0.9642654481559925, "learning_rate": 9.501081262878589e-06, "loss": 0.487, "step": 7324 }, { "epoch": 0.6874061561561562, "grad_norm": 1.130254102426383, "learning_rate": 9.500843499285844e-06, "loss": 0.4568, "step": 7325 }, { "epoch": 0.6875, "grad_norm": 0.9292416178225917, "learning_rate": 9.500605682029012e-06, "loss": 0.4673, "step": 7326 }, { "epoch": 0.6875938438438438, "grad_norm": 1.153277482403825, "learning_rate": 9.500367811110927e-06, "loss": 0.4463, "step": 7327 }, { "epoch": 0.6876876876876877, "grad_norm": 1.2994813049296907, "learning_rate": 9.500129886534429e-06, "loss": 0.5599, "step": 7328 }, { "epoch": 0.6877815315315315, "grad_norm": 0.9966670626893365, "learning_rate": 9.499891908302353e-06, "loss": 0.5119, "step": 7329 }, { "epoch": 0.6878753753753754, "grad_norm": 0.9602073924487365, "learning_rate": 9.499653876417535e-06, "loss": 0.4765, "step": 7330 }, { "epoch": 0.6879692192192193, "grad_norm": 1.1054785843781936, "learning_rate": 9.499415790882813e-06, "loss": 0.477, "step": 7331 }, { "epoch": 0.6880630630630631, "grad_norm": 0.9956248385669262, "learning_rate": 9.49917765170103e-06, "loss": 0.4614, "step": 7332 }, { "epoch": 0.6881569069069069, "grad_norm": 0.9485975616584273, "learning_rate": 9.49893945887502e-06, "loss": 0.4837, "step": 7333 }, { "epoch": 0.6882507507507507, "grad_norm": 1.028262991470534, "learning_rate": 9.498701212407627e-06, "loss": 0.4555, "step": 7334 }, { "epoch": 0.6883445945945946, "grad_norm": 0.8999222999233403, "learning_rate": 9.49846291230169e-06, "loss": 0.456, "step": 7335 }, { "epoch": 0.6884384384384384, "grad_norm": 1.2911887014541459, "learning_rate": 9.498224558560047e-06, "loss": 0.4991, "step": 7336 }, { "epoch": 0.6885322822822822, "grad_norm": 1.051138285284646, "learning_rate": 9.497986151185547e-06, "loss": 0.4166, "step": 7337 }, { "epoch": 0.6886261261261262, "grad_norm": 1.067934783624586, "learning_rate": 9.497747690181027e-06, "loss": 0.5042, "step": 7338 }, { "epoch": 0.68871996996997, "grad_norm": 1.2099659843872406, "learning_rate": 9.497509175549331e-06, "loss": 0.4821, "step": 7339 }, { "epoch": 0.6888138138138138, "grad_norm": 2.9712391233383437, "learning_rate": 9.497270607293307e-06, "loss": 0.5037, "step": 7340 }, { "epoch": 0.6889076576576577, "grad_norm": 10.686664976761376, "learning_rate": 9.497031985415795e-06, "loss": 0.488, "step": 7341 }, { "epoch": 0.6890015015015015, "grad_norm": 1.1892570664033077, "learning_rate": 9.49679330991964e-06, "loss": 0.4585, "step": 7342 }, { "epoch": 0.6890953453453453, "grad_norm": 1.2294125837117302, "learning_rate": 9.496554580807689e-06, "loss": 0.5158, "step": 7343 }, { "epoch": 0.6891891891891891, "grad_norm": 1.3105062622661956, "learning_rate": 9.49631579808279e-06, "loss": 0.5254, "step": 7344 }, { "epoch": 0.6892830330330331, "grad_norm": 1.971766975004567, "learning_rate": 9.496076961747786e-06, "loss": 0.4917, "step": 7345 }, { "epoch": 0.6893768768768769, "grad_norm": 1.32302733099586, "learning_rate": 9.49583807180553e-06, "loss": 0.4964, "step": 7346 }, { "epoch": 0.6894707207207207, "grad_norm": 1.4711684041061968, "learning_rate": 9.495599128258867e-06, "loss": 0.5057, "step": 7347 }, { "epoch": 0.6895645645645646, "grad_norm": 1.0495300683655364, "learning_rate": 9.495360131110647e-06, "loss": 0.4948, "step": 7348 }, { "epoch": 0.6896584084084084, "grad_norm": 1.0778120266088316, "learning_rate": 9.495121080363717e-06, "loss": 0.4858, "step": 7349 }, { "epoch": 0.6897522522522522, "grad_norm": 1.754054935738683, "learning_rate": 9.494881976020931e-06, "loss": 0.4937, "step": 7350 }, { "epoch": 0.6898460960960962, "grad_norm": 1.0984510231178795, "learning_rate": 9.494642818085138e-06, "loss": 0.5097, "step": 7351 }, { "epoch": 0.68993993993994, "grad_norm": 0.9369886601513258, "learning_rate": 9.494403606559189e-06, "loss": 0.3994, "step": 7352 }, { "epoch": 0.6900337837837838, "grad_norm": 1.0430912717521814, "learning_rate": 9.494164341445938e-06, "loss": 0.4466, "step": 7353 }, { "epoch": 0.6901276276276276, "grad_norm": 1.140232451949843, "learning_rate": 9.493925022748234e-06, "loss": 0.4186, "step": 7354 }, { "epoch": 0.6902214714714715, "grad_norm": 1.750426761462707, "learning_rate": 9.493685650468936e-06, "loss": 0.4809, "step": 7355 }, { "epoch": 0.6903153153153153, "grad_norm": 0.9605572631075981, "learning_rate": 9.493446224610893e-06, "loss": 0.4815, "step": 7356 }, { "epoch": 0.6904091591591591, "grad_norm": 1.252337748793639, "learning_rate": 9.493206745176963e-06, "loss": 0.4935, "step": 7357 }, { "epoch": 0.690503003003003, "grad_norm": 1.237246635448386, "learning_rate": 9.492967212170002e-06, "loss": 0.4645, "step": 7358 }, { "epoch": 0.6905968468468469, "grad_norm": 1.4561041047201968, "learning_rate": 9.492727625592862e-06, "loss": 0.4999, "step": 7359 }, { "epoch": 0.6906906906906907, "grad_norm": 1.2039866988479686, "learning_rate": 9.4924879854484e-06, "loss": 0.4987, "step": 7360 }, { "epoch": 0.6907845345345346, "grad_norm": 2.3998509512396957, "learning_rate": 9.492248291739477e-06, "loss": 0.4677, "step": 7361 }, { "epoch": 0.6908783783783784, "grad_norm": 1.0208098351264272, "learning_rate": 9.492008544468948e-06, "loss": 0.4758, "step": 7362 }, { "epoch": 0.6909722222222222, "grad_norm": 1.362341918425775, "learning_rate": 9.491768743639674e-06, "loss": 0.4704, "step": 7363 }, { "epoch": 0.691066066066066, "grad_norm": 0.9581232920687912, "learning_rate": 9.491528889254513e-06, "loss": 0.4326, "step": 7364 }, { "epoch": 0.6911599099099099, "grad_norm": 1.0848198823190038, "learning_rate": 9.491288981316322e-06, "loss": 0.4826, "step": 7365 }, { "epoch": 0.6912537537537538, "grad_norm": 1.2083867486185071, "learning_rate": 9.491049019827966e-06, "loss": 0.4417, "step": 7366 }, { "epoch": 0.6913475975975976, "grad_norm": 0.9163226265584259, "learning_rate": 9.490809004792303e-06, "loss": 0.4117, "step": 7367 }, { "epoch": 0.6914414414414415, "grad_norm": 1.0014225547367372, "learning_rate": 9.490568936212196e-06, "loss": 0.5008, "step": 7368 }, { "epoch": 0.6915352852852853, "grad_norm": 1.596016855010711, "learning_rate": 9.490328814090506e-06, "loss": 0.4334, "step": 7369 }, { "epoch": 0.6916291291291291, "grad_norm": 1.4610267151840186, "learning_rate": 9.490088638430098e-06, "loss": 0.5148, "step": 7370 }, { "epoch": 0.691722972972973, "grad_norm": 1.4455040013621059, "learning_rate": 9.489848409233834e-06, "loss": 0.4621, "step": 7371 }, { "epoch": 0.6918168168168168, "grad_norm": 1.1721086995512708, "learning_rate": 9.48960812650458e-06, "loss": 0.5279, "step": 7372 }, { "epoch": 0.6919106606606606, "grad_norm": 1.0001468163432232, "learning_rate": 9.489367790245201e-06, "loss": 0.4606, "step": 7373 }, { "epoch": 0.6920045045045045, "grad_norm": 1.2328254150754654, "learning_rate": 9.48912740045856e-06, "loss": 0.487, "step": 7374 }, { "epoch": 0.6920983483483484, "grad_norm": 7.179280238502966, "learning_rate": 9.488886957147525e-06, "loss": 0.4274, "step": 7375 }, { "epoch": 0.6921921921921922, "grad_norm": 1.0610549996140897, "learning_rate": 9.488646460314961e-06, "loss": 0.4803, "step": 7376 }, { "epoch": 0.692286036036036, "grad_norm": 0.9159156492199584, "learning_rate": 9.488405909963739e-06, "loss": 0.43, "step": 7377 }, { "epoch": 0.6923798798798799, "grad_norm": 1.273347478569237, "learning_rate": 9.488165306096724e-06, "loss": 0.4467, "step": 7378 }, { "epoch": 0.6924737237237237, "grad_norm": 1.0435944785187377, "learning_rate": 9.487924648716786e-06, "loss": 0.4912, "step": 7379 }, { "epoch": 0.6925675675675675, "grad_norm": 0.9749763077470612, "learning_rate": 9.487683937826794e-06, "loss": 0.4378, "step": 7380 }, { "epoch": 0.6926614114114115, "grad_norm": 0.9832430227969295, "learning_rate": 9.487443173429617e-06, "loss": 0.4954, "step": 7381 }, { "epoch": 0.6927552552552553, "grad_norm": 1.0262387617631357, "learning_rate": 9.48720235552813e-06, "loss": 0.5022, "step": 7382 }, { "epoch": 0.6928490990990991, "grad_norm": 0.9348746969636923, "learning_rate": 9.486961484125199e-06, "loss": 0.5025, "step": 7383 }, { "epoch": 0.6929429429429429, "grad_norm": 1.1461020075342787, "learning_rate": 9.4867205592237e-06, "loss": 0.4865, "step": 7384 }, { "epoch": 0.6930367867867868, "grad_norm": 0.9753786152065946, "learning_rate": 9.486479580826501e-06, "loss": 0.4886, "step": 7385 }, { "epoch": 0.6931306306306306, "grad_norm": 2.337893221563542, "learning_rate": 9.486238548936481e-06, "loss": 0.464, "step": 7386 }, { "epoch": 0.6932244744744744, "grad_norm": 1.313563958665589, "learning_rate": 9.48599746355651e-06, "loss": 0.4874, "step": 7387 }, { "epoch": 0.6933183183183184, "grad_norm": 1.2864030352745617, "learning_rate": 9.485756324689461e-06, "loss": 0.5053, "step": 7388 }, { "epoch": 0.6934121621621622, "grad_norm": 1.127512748638937, "learning_rate": 9.485515132338212e-06, "loss": 0.4946, "step": 7389 }, { "epoch": 0.693506006006006, "grad_norm": 1.5854846752867253, "learning_rate": 9.48527388650564e-06, "loss": 0.5278, "step": 7390 }, { "epoch": 0.6935998498498499, "grad_norm": 0.9500473608371607, "learning_rate": 9.485032587194617e-06, "loss": 0.4831, "step": 7391 }, { "epoch": 0.6936936936936937, "grad_norm": 1.223242449745434, "learning_rate": 9.484791234408026e-06, "loss": 0.453, "step": 7392 }, { "epoch": 0.6937875375375375, "grad_norm": 1.1862147115980592, "learning_rate": 9.484549828148738e-06, "loss": 0.5266, "step": 7393 }, { "epoch": 0.6938813813813813, "grad_norm": 0.9647069324154844, "learning_rate": 9.484308368419637e-06, "loss": 0.4862, "step": 7394 }, { "epoch": 0.6939752252252253, "grad_norm": 0.9597870275618514, "learning_rate": 9.4840668552236e-06, "loss": 0.4572, "step": 7395 }, { "epoch": 0.6940690690690691, "grad_norm": 1.190552793102925, "learning_rate": 9.483825288563504e-06, "loss": 0.4673, "step": 7396 }, { "epoch": 0.6941629129129129, "grad_norm": 3.6864845670803987, "learning_rate": 9.48358366844223e-06, "loss": 0.48, "step": 7397 }, { "epoch": 0.6942567567567568, "grad_norm": 1.2246259537107205, "learning_rate": 9.483341994862663e-06, "loss": 0.4573, "step": 7398 }, { "epoch": 0.6943506006006006, "grad_norm": 1.1479701590510976, "learning_rate": 9.483100267827682e-06, "loss": 0.4671, "step": 7399 }, { "epoch": 0.6944444444444444, "grad_norm": 1.0536871044117695, "learning_rate": 9.482858487340168e-06, "loss": 0.4916, "step": 7400 }, { "epoch": 0.6945382882882883, "grad_norm": 1.2272193617488631, "learning_rate": 9.482616653403005e-06, "loss": 0.4874, "step": 7401 }, { "epoch": 0.6946321321321322, "grad_norm": 1.729613369950147, "learning_rate": 9.482374766019075e-06, "loss": 0.5131, "step": 7402 }, { "epoch": 0.694725975975976, "grad_norm": 1.6300486316739298, "learning_rate": 9.482132825191264e-06, "loss": 0.4612, "step": 7403 }, { "epoch": 0.6948198198198198, "grad_norm": 1.1688836664777438, "learning_rate": 9.481890830922456e-06, "loss": 0.5086, "step": 7404 }, { "epoch": 0.6949136636636637, "grad_norm": 1.347118665183783, "learning_rate": 9.481648783215536e-06, "loss": 0.5149, "step": 7405 }, { "epoch": 0.6950075075075075, "grad_norm": 0.9822901884715823, "learning_rate": 9.48140668207339e-06, "loss": 0.43, "step": 7406 }, { "epoch": 0.6951013513513513, "grad_norm": 1.0827068674152711, "learning_rate": 9.481164527498902e-06, "loss": 0.4333, "step": 7407 }, { "epoch": 0.6951951951951952, "grad_norm": 0.9417340187439396, "learning_rate": 9.480922319494966e-06, "loss": 0.4739, "step": 7408 }, { "epoch": 0.695289039039039, "grad_norm": 1.0467401716154492, "learning_rate": 9.480680058064463e-06, "loss": 0.5158, "step": 7409 }, { "epoch": 0.6953828828828829, "grad_norm": 1.0460932836880679, "learning_rate": 9.480437743210287e-06, "loss": 0.5188, "step": 7410 }, { "epoch": 0.6954767267267268, "grad_norm": 1.0433963286217505, "learning_rate": 9.480195374935323e-06, "loss": 0.5021, "step": 7411 }, { "epoch": 0.6955705705705706, "grad_norm": 1.2423830720598517, "learning_rate": 9.479952953242462e-06, "loss": 0.453, "step": 7412 }, { "epoch": 0.6956644144144144, "grad_norm": 0.9439713307398377, "learning_rate": 9.479710478134595e-06, "loss": 0.4952, "step": 7413 }, { "epoch": 0.6957582582582582, "grad_norm": 1.1218178195495894, "learning_rate": 9.479467949614613e-06, "loss": 0.4618, "step": 7414 }, { "epoch": 0.6958521021021021, "grad_norm": 2.8160645697617555, "learning_rate": 9.479225367685407e-06, "loss": 0.4851, "step": 7415 }, { "epoch": 0.6959459459459459, "grad_norm": 0.9571899170262873, "learning_rate": 9.478982732349871e-06, "loss": 0.4668, "step": 7416 }, { "epoch": 0.6960397897897898, "grad_norm": 1.1256426643503665, "learning_rate": 9.478740043610895e-06, "loss": 0.4781, "step": 7417 }, { "epoch": 0.6961336336336337, "grad_norm": 0.9656184420541255, "learning_rate": 9.478497301471375e-06, "loss": 0.44, "step": 7418 }, { "epoch": 0.6962274774774775, "grad_norm": 1.3341390375165305, "learning_rate": 9.478254505934204e-06, "loss": 0.4435, "step": 7419 }, { "epoch": 0.6963213213213213, "grad_norm": 0.9333153620793854, "learning_rate": 9.478011657002278e-06, "loss": 0.4405, "step": 7420 }, { "epoch": 0.6964151651651652, "grad_norm": 1.120688568053593, "learning_rate": 9.477768754678492e-06, "loss": 0.5297, "step": 7421 }, { "epoch": 0.696509009009009, "grad_norm": 0.9805642684493756, "learning_rate": 9.477525798965742e-06, "loss": 0.4699, "step": 7422 }, { "epoch": 0.6966028528528528, "grad_norm": 0.9410963265795352, "learning_rate": 9.477282789866926e-06, "loss": 0.4668, "step": 7423 }, { "epoch": 0.6966966966966966, "grad_norm": 1.1234339561110582, "learning_rate": 9.477039727384938e-06, "loss": 0.4491, "step": 7424 }, { "epoch": 0.6967905405405406, "grad_norm": 1.0366368758790527, "learning_rate": 9.476796611522681e-06, "loss": 0.4906, "step": 7425 }, { "epoch": 0.6968843843843844, "grad_norm": 0.9718699791675479, "learning_rate": 9.476553442283049e-06, "loss": 0.4586, "step": 7426 }, { "epoch": 0.6969782282282282, "grad_norm": 1.129217699512649, "learning_rate": 9.476310219668945e-06, "loss": 0.4735, "step": 7427 }, { "epoch": 0.6970720720720721, "grad_norm": 1.1791465321127805, "learning_rate": 9.476066943683267e-06, "loss": 0.4998, "step": 7428 }, { "epoch": 0.6971659159159159, "grad_norm": 1.3331863451107258, "learning_rate": 9.475823614328917e-06, "loss": 0.4879, "step": 7429 }, { "epoch": 0.6972597597597597, "grad_norm": 1.1161933713662497, "learning_rate": 9.475580231608793e-06, "loss": 0.4472, "step": 7430 }, { "epoch": 0.6973536036036037, "grad_norm": 2.172256751066315, "learning_rate": 9.4753367955258e-06, "loss": 0.488, "step": 7431 }, { "epoch": 0.6974474474474475, "grad_norm": 0.9656230393397913, "learning_rate": 9.47509330608284e-06, "loss": 0.4732, "step": 7432 }, { "epoch": 0.6975412912912913, "grad_norm": 0.962194687059288, "learning_rate": 9.474849763282816e-06, "loss": 0.4234, "step": 7433 }, { "epoch": 0.6976351351351351, "grad_norm": 1.160993483558362, "learning_rate": 9.47460616712863e-06, "loss": 0.5089, "step": 7434 }, { "epoch": 0.697728978978979, "grad_norm": 1.514255237110983, "learning_rate": 9.47436251762319e-06, "loss": 0.4923, "step": 7435 }, { "epoch": 0.6978228228228228, "grad_norm": 1.3637171191002426, "learning_rate": 9.4741188147694e-06, "loss": 0.4692, "step": 7436 }, { "epoch": 0.6979166666666666, "grad_norm": 1.0275180214334292, "learning_rate": 9.473875058570162e-06, "loss": 0.4898, "step": 7437 }, { "epoch": 0.6980105105105106, "grad_norm": 1.0196125964170393, "learning_rate": 9.473631249028386e-06, "loss": 0.4662, "step": 7438 }, { "epoch": 0.6981043543543544, "grad_norm": 1.0379802221096153, "learning_rate": 9.47338738614698e-06, "loss": 0.487, "step": 7439 }, { "epoch": 0.6981981981981982, "grad_norm": 0.9577316567023658, "learning_rate": 9.473143469928848e-06, "loss": 0.4972, "step": 7440 }, { "epoch": 0.6982920420420421, "grad_norm": 1.3283173945716387, "learning_rate": 9.4728995003769e-06, "loss": 0.4216, "step": 7441 }, { "epoch": 0.6983858858858859, "grad_norm": 1.5076626106951594, "learning_rate": 9.472655477494045e-06, "loss": 0.4381, "step": 7442 }, { "epoch": 0.6984797297297297, "grad_norm": 1.2000454339994329, "learning_rate": 9.472411401283193e-06, "loss": 0.4866, "step": 7443 }, { "epoch": 0.6985735735735735, "grad_norm": 1.349535444884671, "learning_rate": 9.472167271747252e-06, "loss": 0.5344, "step": 7444 }, { "epoch": 0.6986674174174174, "grad_norm": 1.1148335911780862, "learning_rate": 9.471923088889135e-06, "loss": 0.5047, "step": 7445 }, { "epoch": 0.6987612612612613, "grad_norm": 0.9103737524834797, "learning_rate": 9.471678852711752e-06, "loss": 0.4559, "step": 7446 }, { "epoch": 0.6988551051051051, "grad_norm": 1.640757142775031, "learning_rate": 9.471434563218018e-06, "loss": 0.4627, "step": 7447 }, { "epoch": 0.698948948948949, "grad_norm": 1.256777315174564, "learning_rate": 9.471190220410842e-06, "loss": 0.4319, "step": 7448 }, { "epoch": 0.6990427927927928, "grad_norm": 1.1762536103484829, "learning_rate": 9.470945824293139e-06, "loss": 0.4806, "step": 7449 }, { "epoch": 0.6991366366366366, "grad_norm": 1.1183435076580894, "learning_rate": 9.47070137486782e-06, "loss": 0.4932, "step": 7450 }, { "epoch": 0.6992304804804805, "grad_norm": 1.2710304798238605, "learning_rate": 9.470456872137806e-06, "loss": 0.447, "step": 7451 }, { "epoch": 0.6993243243243243, "grad_norm": 0.9699515283323835, "learning_rate": 9.470212316106008e-06, "loss": 0.4933, "step": 7452 }, { "epoch": 0.6994181681681682, "grad_norm": 1.2856071377915124, "learning_rate": 9.469967706775342e-06, "loss": 0.5063, "step": 7453 }, { "epoch": 0.699512012012012, "grad_norm": 1.3636620918421614, "learning_rate": 9.469723044148724e-06, "loss": 0.5077, "step": 7454 }, { "epoch": 0.6996058558558559, "grad_norm": 1.206593460883559, "learning_rate": 9.469478328229073e-06, "loss": 0.4885, "step": 7455 }, { "epoch": 0.6996996996996997, "grad_norm": 1.1805570165224128, "learning_rate": 9.469233559019307e-06, "loss": 0.504, "step": 7456 }, { "epoch": 0.6997935435435435, "grad_norm": 1.2382617799591318, "learning_rate": 9.468988736522342e-06, "loss": 0.4849, "step": 7457 }, { "epoch": 0.6998873873873874, "grad_norm": 1.3516038149534677, "learning_rate": 9.468743860741097e-06, "loss": 0.5003, "step": 7458 }, { "epoch": 0.6999812312312312, "grad_norm": 2.7305010192771184, "learning_rate": 9.468498931678495e-06, "loss": 0.5002, "step": 7459 }, { "epoch": 0.700075075075075, "grad_norm": 0.9374910372482977, "learning_rate": 9.468253949337453e-06, "loss": 0.4532, "step": 7460 }, { "epoch": 0.700168918918919, "grad_norm": 0.9487741950664426, "learning_rate": 9.468008913720895e-06, "loss": 0.4714, "step": 7461 }, { "epoch": 0.7002627627627628, "grad_norm": 1.038441069202391, "learning_rate": 9.46776382483174e-06, "loss": 0.4311, "step": 7462 }, { "epoch": 0.7003566066066066, "grad_norm": 0.9870089256736637, "learning_rate": 9.467518682672909e-06, "loss": 0.4262, "step": 7463 }, { "epoch": 0.7004504504504504, "grad_norm": 1.0051813959144567, "learning_rate": 9.46727348724733e-06, "loss": 0.4844, "step": 7464 }, { "epoch": 0.7005442942942943, "grad_norm": 1.2989457977932044, "learning_rate": 9.467028238557923e-06, "loss": 0.5185, "step": 7465 }, { "epoch": 0.7006381381381381, "grad_norm": 0.8386818559912356, "learning_rate": 9.466782936607611e-06, "loss": 0.4993, "step": 7466 }, { "epoch": 0.7007319819819819, "grad_norm": 1.3967684925058002, "learning_rate": 9.466537581399322e-06, "loss": 0.4515, "step": 7467 }, { "epoch": 0.7008258258258259, "grad_norm": 1.1281421787649337, "learning_rate": 9.466292172935978e-06, "loss": 0.4635, "step": 7468 }, { "epoch": 0.7009196696696697, "grad_norm": 0.9947435838293985, "learning_rate": 9.466046711220507e-06, "loss": 0.4552, "step": 7469 }, { "epoch": 0.7010135135135135, "grad_norm": 1.094089694778799, "learning_rate": 9.465801196255838e-06, "loss": 0.5275, "step": 7470 }, { "epoch": 0.7011073573573574, "grad_norm": 1.0397781540460151, "learning_rate": 9.465555628044893e-06, "loss": 0.4839, "step": 7471 }, { "epoch": 0.7012012012012012, "grad_norm": 0.8604734211544981, "learning_rate": 9.465310006590604e-06, "loss": 0.4725, "step": 7472 }, { "epoch": 0.701295045045045, "grad_norm": 1.0460659783240374, "learning_rate": 9.465064331895896e-06, "loss": 0.4222, "step": 7473 }, { "epoch": 0.7013888888888888, "grad_norm": 1.1452502967565732, "learning_rate": 9.464818603963704e-06, "loss": 0.5355, "step": 7474 }, { "epoch": 0.7014827327327328, "grad_norm": 1.1089395285825465, "learning_rate": 9.464572822796952e-06, "loss": 0.4783, "step": 7475 }, { "epoch": 0.7015765765765766, "grad_norm": 1.03094864861227, "learning_rate": 9.464326988398573e-06, "loss": 0.4986, "step": 7476 }, { "epoch": 0.7016704204204204, "grad_norm": 1.3284570702245062, "learning_rate": 9.464081100771499e-06, "loss": 0.4932, "step": 7477 }, { "epoch": 0.7017642642642643, "grad_norm": 1.2407163244351236, "learning_rate": 9.46383515991866e-06, "loss": 0.4697, "step": 7478 }, { "epoch": 0.7018581081081081, "grad_norm": 1.0278319280143964, "learning_rate": 9.463589165842988e-06, "loss": 0.4554, "step": 7479 }, { "epoch": 0.7019519519519519, "grad_norm": 1.028231673008087, "learning_rate": 9.463343118547418e-06, "loss": 0.4528, "step": 7480 }, { "epoch": 0.7020457957957958, "grad_norm": 1.414382657174636, "learning_rate": 9.463097018034884e-06, "loss": 0.4943, "step": 7481 }, { "epoch": 0.7021396396396397, "grad_norm": 2.004662230655706, "learning_rate": 9.462850864308317e-06, "loss": 0.4657, "step": 7482 }, { "epoch": 0.7022334834834835, "grad_norm": 1.0061475105018653, "learning_rate": 9.462604657370655e-06, "loss": 0.4788, "step": 7483 }, { "epoch": 0.7023273273273273, "grad_norm": 1.12474512502992, "learning_rate": 9.462358397224831e-06, "loss": 0.5038, "step": 7484 }, { "epoch": 0.7024211711711712, "grad_norm": 1.1231105623398294, "learning_rate": 9.462112083873785e-06, "loss": 0.4818, "step": 7485 }, { "epoch": 0.702515015015015, "grad_norm": 1.0619751841076763, "learning_rate": 9.461865717320449e-06, "loss": 0.4404, "step": 7486 }, { "epoch": 0.7026088588588588, "grad_norm": 1.218497081523199, "learning_rate": 9.461619297567763e-06, "loss": 0.4821, "step": 7487 }, { "epoch": 0.7027027027027027, "grad_norm": 1.6963320179083594, "learning_rate": 9.461372824618667e-06, "loss": 0.4764, "step": 7488 }, { "epoch": 0.7027965465465466, "grad_norm": 1.135277766234526, "learning_rate": 9.461126298476097e-06, "loss": 0.4652, "step": 7489 }, { "epoch": 0.7028903903903904, "grad_norm": 1.0906678214465761, "learning_rate": 9.460879719142992e-06, "loss": 0.4842, "step": 7490 }, { "epoch": 0.7029842342342343, "grad_norm": 0.7887026908342979, "learning_rate": 9.460633086622294e-06, "loss": 0.4237, "step": 7491 }, { "epoch": 0.7030780780780781, "grad_norm": 1.04968876283401, "learning_rate": 9.460386400916944e-06, "loss": 0.4859, "step": 7492 }, { "epoch": 0.7031719219219219, "grad_norm": 1.219930759709305, "learning_rate": 9.46013966202988e-06, "loss": 0.4126, "step": 7493 }, { "epoch": 0.7032657657657657, "grad_norm": 1.0160369461477956, "learning_rate": 9.459892869964047e-06, "loss": 0.482, "step": 7494 }, { "epoch": 0.7033596096096096, "grad_norm": 1.6060560017667345, "learning_rate": 9.459646024722387e-06, "loss": 0.4616, "step": 7495 }, { "epoch": 0.7034534534534534, "grad_norm": 1.1741708783885714, "learning_rate": 9.45939912630784e-06, "loss": 0.4833, "step": 7496 }, { "epoch": 0.7035472972972973, "grad_norm": 1.0738904087150039, "learning_rate": 9.459152174723354e-06, "loss": 0.4786, "step": 7497 }, { "epoch": 0.7036411411411412, "grad_norm": 0.9563799591485006, "learning_rate": 9.458905169971873e-06, "loss": 0.4401, "step": 7498 }, { "epoch": 0.703734984984985, "grad_norm": 1.0715815078790585, "learning_rate": 9.458658112056338e-06, "loss": 0.4991, "step": 7499 }, { "epoch": 0.7038288288288288, "grad_norm": 0.9847003366953833, "learning_rate": 9.4584110009797e-06, "loss": 0.3936, "step": 7500 }, { "epoch": 0.7039226726726727, "grad_norm": 0.9672150945810966, "learning_rate": 9.458163836744901e-06, "loss": 0.4761, "step": 7501 }, { "epoch": 0.7040165165165165, "grad_norm": 1.1889000773916063, "learning_rate": 9.457916619354892e-06, "loss": 0.5034, "step": 7502 }, { "epoch": 0.7041103603603603, "grad_norm": 3.049871397704237, "learning_rate": 9.457669348812617e-06, "loss": 0.4337, "step": 7503 }, { "epoch": 0.7042042042042042, "grad_norm": 1.1797553331904878, "learning_rate": 9.457422025121026e-06, "loss": 0.4999, "step": 7504 }, { "epoch": 0.7042980480480481, "grad_norm": 1.2173665947988364, "learning_rate": 9.457174648283067e-06, "loss": 0.5023, "step": 7505 }, { "epoch": 0.7043918918918919, "grad_norm": 0.928874746966759, "learning_rate": 9.456927218301691e-06, "loss": 0.4342, "step": 7506 }, { "epoch": 0.7044857357357357, "grad_norm": 0.9626207712321108, "learning_rate": 9.456679735179847e-06, "loss": 0.4552, "step": 7507 }, { "epoch": 0.7045795795795796, "grad_norm": 1.1047466978241656, "learning_rate": 9.456432198920486e-06, "loss": 0.5088, "step": 7508 }, { "epoch": 0.7046734234234234, "grad_norm": 1.214847565572969, "learning_rate": 9.45618460952656e-06, "loss": 0.4677, "step": 7509 }, { "epoch": 0.7047672672672672, "grad_norm": 1.052111200785131, "learning_rate": 9.455936967001021e-06, "loss": 0.4442, "step": 7510 }, { "epoch": 0.7048611111111112, "grad_norm": 1.2311913921671713, "learning_rate": 9.455689271346818e-06, "loss": 0.4464, "step": 7511 }, { "epoch": 0.704954954954955, "grad_norm": 1.026236479308806, "learning_rate": 9.45544152256691e-06, "loss": 0.4967, "step": 7512 }, { "epoch": 0.7050487987987988, "grad_norm": 1.1331419470075055, "learning_rate": 9.455193720664249e-06, "loss": 0.486, "step": 7513 }, { "epoch": 0.7051426426426426, "grad_norm": 1.1048845121252813, "learning_rate": 9.454945865641786e-06, "loss": 0.4991, "step": 7514 }, { "epoch": 0.7052364864864865, "grad_norm": 1.1506568686516025, "learning_rate": 9.45469795750248e-06, "loss": 0.4482, "step": 7515 }, { "epoch": 0.7053303303303303, "grad_norm": 1.3225851229935843, "learning_rate": 9.454449996249288e-06, "loss": 0.5031, "step": 7516 }, { "epoch": 0.7054241741741741, "grad_norm": 0.910174672777309, "learning_rate": 9.454201981885162e-06, "loss": 0.5279, "step": 7517 }, { "epoch": 0.7055180180180181, "grad_norm": 1.1774828277330087, "learning_rate": 9.453953914413064e-06, "loss": 0.481, "step": 7518 }, { "epoch": 0.7056118618618619, "grad_norm": 1.0616751429770044, "learning_rate": 9.453705793835947e-06, "loss": 0.4822, "step": 7519 }, { "epoch": 0.7057057057057057, "grad_norm": 1.0824481013058571, "learning_rate": 9.453457620156774e-06, "loss": 0.4798, "step": 7520 }, { "epoch": 0.7057995495495496, "grad_norm": 0.9945031279434476, "learning_rate": 9.4532093933785e-06, "loss": 0.466, "step": 7521 }, { "epoch": 0.7058933933933934, "grad_norm": 1.1047212124111687, "learning_rate": 9.452961113504086e-06, "loss": 0.4955, "step": 7522 }, { "epoch": 0.7059872372372372, "grad_norm": 1.058316890845494, "learning_rate": 9.452712780536493e-06, "loss": 0.455, "step": 7523 }, { "epoch": 0.706081081081081, "grad_norm": 1.2998442006744735, "learning_rate": 9.452464394478683e-06, "loss": 0.5151, "step": 7524 }, { "epoch": 0.706174924924925, "grad_norm": 1.060373016348313, "learning_rate": 9.452215955333613e-06, "loss": 0.4581, "step": 7525 }, { "epoch": 0.7062687687687688, "grad_norm": 1.0744801017871284, "learning_rate": 9.45196746310425e-06, "loss": 0.4676, "step": 7526 }, { "epoch": 0.7063626126126126, "grad_norm": 0.9361247330860271, "learning_rate": 9.451718917793555e-06, "loss": 0.4478, "step": 7527 }, { "epoch": 0.7064564564564565, "grad_norm": 1.0422715438329206, "learning_rate": 9.451470319404492e-06, "loss": 0.5382, "step": 7528 }, { "epoch": 0.7065503003003003, "grad_norm": 1.2723480719259923, "learning_rate": 9.451221667940022e-06, "loss": 0.5009, "step": 7529 }, { "epoch": 0.7066441441441441, "grad_norm": 1.0202274146560548, "learning_rate": 9.450972963403114e-06, "loss": 0.4642, "step": 7530 }, { "epoch": 0.706737987987988, "grad_norm": 0.9834922337381917, "learning_rate": 9.450724205796732e-06, "loss": 0.4628, "step": 7531 }, { "epoch": 0.7068318318318318, "grad_norm": 1.1220671409283272, "learning_rate": 9.45047539512384e-06, "loss": 0.525, "step": 7532 }, { "epoch": 0.7069256756756757, "grad_norm": 0.9449871946643376, "learning_rate": 9.450226531387408e-06, "loss": 0.4519, "step": 7533 }, { "epoch": 0.7070195195195195, "grad_norm": 1.1204019921560107, "learning_rate": 9.4499776145904e-06, "loss": 0.4734, "step": 7534 }, { "epoch": 0.7071133633633634, "grad_norm": 1.0758601940684018, "learning_rate": 9.449728644735786e-06, "loss": 0.4786, "step": 7535 }, { "epoch": 0.7072072072072072, "grad_norm": 1.066397356232623, "learning_rate": 9.449479621826532e-06, "loss": 0.4524, "step": 7536 }, { "epoch": 0.707301051051051, "grad_norm": 0.9041881510018469, "learning_rate": 9.44923054586561e-06, "loss": 0.4905, "step": 7537 }, { "epoch": 0.7073948948948949, "grad_norm": 0.998625652131662, "learning_rate": 9.448981416855988e-06, "loss": 0.457, "step": 7538 }, { "epoch": 0.7074887387387387, "grad_norm": 0.9587758983526367, "learning_rate": 9.448732234800637e-06, "loss": 0.468, "step": 7539 }, { "epoch": 0.7075825825825826, "grad_norm": 1.2200995594879787, "learning_rate": 9.448482999702528e-06, "loss": 0.4894, "step": 7540 }, { "epoch": 0.7076764264264265, "grad_norm": 1.152410004068615, "learning_rate": 9.448233711564634e-06, "loss": 0.519, "step": 7541 }, { "epoch": 0.7077702702702703, "grad_norm": 1.2190926608997086, "learning_rate": 9.447984370389925e-06, "loss": 0.4853, "step": 7542 }, { "epoch": 0.7078641141141141, "grad_norm": 1.1030064951227134, "learning_rate": 9.447734976181373e-06, "loss": 0.4767, "step": 7543 }, { "epoch": 0.7079579579579579, "grad_norm": 1.0730924292038821, "learning_rate": 9.447485528941955e-06, "loss": 0.4201, "step": 7544 }, { "epoch": 0.7080518018018018, "grad_norm": 0.9775657058330282, "learning_rate": 9.447236028674644e-06, "loss": 0.4523, "step": 7545 }, { "epoch": 0.7081456456456456, "grad_norm": 0.9506964257887215, "learning_rate": 9.446986475382415e-06, "loss": 0.4717, "step": 7546 }, { "epoch": 0.7082394894894894, "grad_norm": 1.0009569047963012, "learning_rate": 9.44673686906824e-06, "loss": 0.4901, "step": 7547 }, { "epoch": 0.7083333333333334, "grad_norm": 1.3744633883130035, "learning_rate": 9.446487209735102e-06, "loss": 0.565, "step": 7548 }, { "epoch": 0.7084271771771772, "grad_norm": 1.0299792035323523, "learning_rate": 9.44623749738597e-06, "loss": 0.4687, "step": 7549 }, { "epoch": 0.708521021021021, "grad_norm": 1.1232410639211743, "learning_rate": 9.445987732023827e-06, "loss": 0.4238, "step": 7550 }, { "epoch": 0.7086148648648649, "grad_norm": 1.5519723887545442, "learning_rate": 9.44573791365165e-06, "loss": 0.4458, "step": 7551 }, { "epoch": 0.7087087087087087, "grad_norm": 1.3714932265847575, "learning_rate": 9.445488042272414e-06, "loss": 0.4695, "step": 7552 }, { "epoch": 0.7088025525525525, "grad_norm": 1.2132363257647867, "learning_rate": 9.445238117889102e-06, "loss": 0.5128, "step": 7553 }, { "epoch": 0.7088963963963963, "grad_norm": 1.0100291725325394, "learning_rate": 9.444988140504694e-06, "loss": 0.4334, "step": 7554 }, { "epoch": 0.7089902402402403, "grad_norm": 0.9160193581295938, "learning_rate": 9.444738110122167e-06, "loss": 0.4518, "step": 7555 }, { "epoch": 0.7090840840840841, "grad_norm": 1.11151615314062, "learning_rate": 9.444488026744506e-06, "loss": 0.4339, "step": 7556 }, { "epoch": 0.7091779279279279, "grad_norm": 1.0623327254779509, "learning_rate": 9.44423789037469e-06, "loss": 0.4757, "step": 7557 }, { "epoch": 0.7092717717717718, "grad_norm": 1.4643174356459707, "learning_rate": 9.443987701015705e-06, "loss": 0.4547, "step": 7558 }, { "epoch": 0.7093656156156156, "grad_norm": 1.3861933682188932, "learning_rate": 9.443737458670528e-06, "loss": 0.4466, "step": 7559 }, { "epoch": 0.7094594594594594, "grad_norm": 1.162297922365562, "learning_rate": 9.443487163342148e-06, "loss": 0.4502, "step": 7560 }, { "epoch": 0.7095533033033034, "grad_norm": 0.8823970012858897, "learning_rate": 9.443236815033548e-06, "loss": 0.4667, "step": 7561 }, { "epoch": 0.7096471471471472, "grad_norm": 0.9995667421240109, "learning_rate": 9.442986413747713e-06, "loss": 0.4565, "step": 7562 }, { "epoch": 0.709740990990991, "grad_norm": 1.0522938333814524, "learning_rate": 9.442735959487627e-06, "loss": 0.4791, "step": 7563 }, { "epoch": 0.7098348348348348, "grad_norm": 1.1248763240620896, "learning_rate": 9.442485452256278e-06, "loss": 0.4878, "step": 7564 }, { "epoch": 0.7099286786786787, "grad_norm": 0.9614862715628316, "learning_rate": 9.442234892056652e-06, "loss": 0.4974, "step": 7565 }, { "epoch": 0.7100225225225225, "grad_norm": 0.977762011323573, "learning_rate": 9.441984278891735e-06, "loss": 0.4626, "step": 7566 }, { "epoch": 0.7101163663663663, "grad_norm": 0.976314680547339, "learning_rate": 9.441733612764519e-06, "loss": 0.4584, "step": 7567 }, { "epoch": 0.7102102102102102, "grad_norm": 1.501994709892753, "learning_rate": 9.44148289367799e-06, "loss": 0.4642, "step": 7568 }, { "epoch": 0.7103040540540541, "grad_norm": 1.4182476834953996, "learning_rate": 9.441232121635135e-06, "loss": 0.449, "step": 7569 }, { "epoch": 0.7103978978978979, "grad_norm": 1.048937672999556, "learning_rate": 9.440981296638949e-06, "loss": 0.4815, "step": 7570 }, { "epoch": 0.7104917417417418, "grad_norm": 1.3562330233647855, "learning_rate": 9.44073041869242e-06, "loss": 0.488, "step": 7571 }, { "epoch": 0.7105855855855856, "grad_norm": 1.02322095368929, "learning_rate": 9.440479487798539e-06, "loss": 0.482, "step": 7572 }, { "epoch": 0.7106794294294294, "grad_norm": 1.0115150832754138, "learning_rate": 9.4402285039603e-06, "loss": 0.4821, "step": 7573 }, { "epoch": 0.7107732732732732, "grad_norm": 1.0589572114308967, "learning_rate": 9.439977467180693e-06, "loss": 0.46, "step": 7574 }, { "epoch": 0.7108671171171171, "grad_norm": 2.538970935080435, "learning_rate": 9.439726377462713e-06, "loss": 0.4732, "step": 7575 }, { "epoch": 0.710960960960961, "grad_norm": 1.090006712770427, "learning_rate": 9.439475234809352e-06, "loss": 0.4955, "step": 7576 }, { "epoch": 0.7110548048048048, "grad_norm": 1.041236424239507, "learning_rate": 9.439224039223604e-06, "loss": 0.5238, "step": 7577 }, { "epoch": 0.7111486486486487, "grad_norm": 1.2960969218400582, "learning_rate": 9.438972790708467e-06, "loss": 0.4554, "step": 7578 }, { "epoch": 0.7112424924924925, "grad_norm": 0.9084186227684405, "learning_rate": 9.438721489266936e-06, "loss": 0.4763, "step": 7579 }, { "epoch": 0.7113363363363363, "grad_norm": 1.2400328705809696, "learning_rate": 9.438470134902003e-06, "loss": 0.485, "step": 7580 }, { "epoch": 0.7114301801801802, "grad_norm": 1.603542034273512, "learning_rate": 9.43821872761667e-06, "loss": 0.4613, "step": 7581 }, { "epoch": 0.711524024024024, "grad_norm": 1.0255875311449625, "learning_rate": 9.437967267413934e-06, "loss": 0.5125, "step": 7582 }, { "epoch": 0.7116178678678678, "grad_norm": 1.0362998972073023, "learning_rate": 9.43771575429679e-06, "loss": 0.3823, "step": 7583 }, { "epoch": 0.7117117117117117, "grad_norm": 0.8702457137659317, "learning_rate": 9.43746418826824e-06, "loss": 0.4556, "step": 7584 }, { "epoch": 0.7118055555555556, "grad_norm": 0.9864361064811287, "learning_rate": 9.437212569331282e-06, "loss": 0.4887, "step": 7585 }, { "epoch": 0.7118993993993994, "grad_norm": 1.1091953109774326, "learning_rate": 9.436960897488916e-06, "loss": 0.5066, "step": 7586 }, { "epoch": 0.7119932432432432, "grad_norm": 1.0922210047431837, "learning_rate": 9.436709172744143e-06, "loss": 0.444, "step": 7587 }, { "epoch": 0.7120870870870871, "grad_norm": 1.0270980459566597, "learning_rate": 9.436457395099964e-06, "loss": 0.4488, "step": 7588 }, { "epoch": 0.7121809309309309, "grad_norm": 1.0001905431031595, "learning_rate": 9.436205564559382e-06, "loss": 0.4665, "step": 7589 }, { "epoch": 0.7122747747747747, "grad_norm": 1.034405433905705, "learning_rate": 9.435953681125399e-06, "loss": 0.4506, "step": 7590 }, { "epoch": 0.7123686186186187, "grad_norm": 1.889478718831051, "learning_rate": 9.435701744801018e-06, "loss": 0.5003, "step": 7591 }, { "epoch": 0.7124624624624625, "grad_norm": 1.0061695322794662, "learning_rate": 9.435449755589242e-06, "loss": 0.4922, "step": 7592 }, { "epoch": 0.7125563063063063, "grad_norm": 1.438165833475379, "learning_rate": 9.435197713493078e-06, "loss": 0.4786, "step": 7593 }, { "epoch": 0.7126501501501501, "grad_norm": 2.4877476356157473, "learning_rate": 9.434945618515528e-06, "loss": 0.4673, "step": 7594 }, { "epoch": 0.712743993993994, "grad_norm": 1.2089216634714361, "learning_rate": 9.434693470659602e-06, "loss": 0.4581, "step": 7595 }, { "epoch": 0.7128378378378378, "grad_norm": 1.2269577223349342, "learning_rate": 9.434441269928302e-06, "loss": 0.4864, "step": 7596 }, { "epoch": 0.7129316816816816, "grad_norm": 0.9902459070193579, "learning_rate": 9.434189016324637e-06, "loss": 0.4834, "step": 7597 }, { "epoch": 0.7130255255255256, "grad_norm": 0.9454974311161434, "learning_rate": 9.433936709851614e-06, "loss": 0.4629, "step": 7598 }, { "epoch": 0.7131193693693694, "grad_norm": 0.9719297839192742, "learning_rate": 9.433684350512243e-06, "loss": 0.414, "step": 7599 }, { "epoch": 0.7132132132132132, "grad_norm": 0.9394098407052527, "learning_rate": 9.433431938309531e-06, "loss": 0.4251, "step": 7600 }, { "epoch": 0.7133070570570571, "grad_norm": 1.049697290721, "learning_rate": 9.433179473246488e-06, "loss": 0.4538, "step": 7601 }, { "epoch": 0.7134009009009009, "grad_norm": 1.2079857873554807, "learning_rate": 9.432926955326125e-06, "loss": 0.5015, "step": 7602 }, { "epoch": 0.7134947447447447, "grad_norm": 0.9717105995756915, "learning_rate": 9.432674384551453e-06, "loss": 0.4332, "step": 7603 }, { "epoch": 0.7135885885885885, "grad_norm": 1.0321587429411476, "learning_rate": 9.43242176092548e-06, "loss": 0.4583, "step": 7604 }, { "epoch": 0.7136824324324325, "grad_norm": 1.7586541171325223, "learning_rate": 9.432169084451222e-06, "loss": 0.4767, "step": 7605 }, { "epoch": 0.7137762762762763, "grad_norm": 1.461551689372119, "learning_rate": 9.431916355131691e-06, "loss": 0.4801, "step": 7606 }, { "epoch": 0.7138701201201201, "grad_norm": 1.3267804750034111, "learning_rate": 9.4316635729699e-06, "loss": 0.4682, "step": 7607 }, { "epoch": 0.713963963963964, "grad_norm": 1.2150664473426, "learning_rate": 9.431410737968861e-06, "loss": 0.4902, "step": 7608 }, { "epoch": 0.7140578078078078, "grad_norm": 1.2063170170076196, "learning_rate": 9.431157850131591e-06, "loss": 0.5006, "step": 7609 }, { "epoch": 0.7141516516516516, "grad_norm": 1.0669287654562578, "learning_rate": 9.430904909461104e-06, "loss": 0.4773, "step": 7610 }, { "epoch": 0.7142454954954955, "grad_norm": 1.2394786091142087, "learning_rate": 9.430651915960418e-06, "loss": 0.4227, "step": 7611 }, { "epoch": 0.7143393393393394, "grad_norm": 1.0341685269017142, "learning_rate": 9.430398869632547e-06, "loss": 0.5187, "step": 7612 }, { "epoch": 0.7144331831831832, "grad_norm": 0.9532086233861985, "learning_rate": 9.430145770480508e-06, "loss": 0.4581, "step": 7613 }, { "epoch": 0.714527027027027, "grad_norm": 0.9837998389191368, "learning_rate": 9.42989261850732e-06, "loss": 0.4421, "step": 7614 }, { "epoch": 0.7146208708708709, "grad_norm": 0.8406039512835046, "learning_rate": 9.429639413716001e-06, "loss": 0.4208, "step": 7615 }, { "epoch": 0.7147147147147147, "grad_norm": 0.8775994204804567, "learning_rate": 9.42938615610957e-06, "loss": 0.4266, "step": 7616 }, { "epoch": 0.7148085585585585, "grad_norm": 1.0296489669371065, "learning_rate": 9.429132845691047e-06, "loss": 0.4872, "step": 7617 }, { "epoch": 0.7149024024024024, "grad_norm": 1.1848965395275808, "learning_rate": 9.428879482463451e-06, "loss": 0.455, "step": 7618 }, { "epoch": 0.7149962462462462, "grad_norm": 1.4229860494489885, "learning_rate": 9.428626066429803e-06, "loss": 0.4805, "step": 7619 }, { "epoch": 0.7150900900900901, "grad_norm": 1.2010684736703634, "learning_rate": 9.428372597593128e-06, "loss": 0.5111, "step": 7620 }, { "epoch": 0.715183933933934, "grad_norm": 1.0993989628305334, "learning_rate": 9.428119075956442e-06, "loss": 0.4416, "step": 7621 }, { "epoch": 0.7152777777777778, "grad_norm": 0.9775611833220061, "learning_rate": 9.427865501522773e-06, "loss": 0.4545, "step": 7622 }, { "epoch": 0.7153716216216216, "grad_norm": 1.1729318761223915, "learning_rate": 9.427611874295143e-06, "loss": 0.4877, "step": 7623 }, { "epoch": 0.7154654654654654, "grad_norm": 1.0424832426880923, "learning_rate": 9.427358194276572e-06, "loss": 0.4302, "step": 7624 }, { "epoch": 0.7155593093093093, "grad_norm": 1.309171064408815, "learning_rate": 9.427104461470091e-06, "loss": 0.4824, "step": 7625 }, { "epoch": 0.7156531531531531, "grad_norm": 1.2708052831466756, "learning_rate": 9.426850675878723e-06, "loss": 0.4781, "step": 7626 }, { "epoch": 0.715746996996997, "grad_norm": 0.9472430452437055, "learning_rate": 9.426596837505493e-06, "loss": 0.4471, "step": 7627 }, { "epoch": 0.7158408408408409, "grad_norm": 1.046513364822369, "learning_rate": 9.426342946353427e-06, "loss": 0.4959, "step": 7628 }, { "epoch": 0.7159346846846847, "grad_norm": 1.931531501214976, "learning_rate": 9.426089002425554e-06, "loss": 0.4944, "step": 7629 }, { "epoch": 0.7160285285285285, "grad_norm": 1.660548970355618, "learning_rate": 9.425835005724899e-06, "loss": 0.4747, "step": 7630 }, { "epoch": 0.7161223723723724, "grad_norm": 0.887775107608126, "learning_rate": 9.425580956254494e-06, "loss": 0.4533, "step": 7631 }, { "epoch": 0.7162162162162162, "grad_norm": 0.9887130131144363, "learning_rate": 9.425326854017366e-06, "loss": 0.4902, "step": 7632 }, { "epoch": 0.71631006006006, "grad_norm": 1.1864752535353682, "learning_rate": 9.425072699016546e-06, "loss": 0.4806, "step": 7633 }, { "epoch": 0.7164039039039038, "grad_norm": 1.3305120349223676, "learning_rate": 9.424818491255063e-06, "loss": 0.5046, "step": 7634 }, { "epoch": 0.7164977477477478, "grad_norm": 1.162063062832944, "learning_rate": 9.424564230735948e-06, "loss": 0.5223, "step": 7635 }, { "epoch": 0.7165915915915916, "grad_norm": 1.0310001601345629, "learning_rate": 9.424309917462233e-06, "loss": 0.4859, "step": 7636 }, { "epoch": 0.7166854354354354, "grad_norm": 1.5809896582735206, "learning_rate": 9.42405555143695e-06, "loss": 0.4374, "step": 7637 }, { "epoch": 0.7167792792792793, "grad_norm": 1.535282323492383, "learning_rate": 9.423801132663133e-06, "loss": 0.4462, "step": 7638 }, { "epoch": 0.7168731231231231, "grad_norm": 3.081963376533829, "learning_rate": 9.423546661143813e-06, "loss": 0.4627, "step": 7639 }, { "epoch": 0.7169669669669669, "grad_norm": 0.98395035808563, "learning_rate": 9.423292136882025e-06, "loss": 0.4531, "step": 7640 }, { "epoch": 0.7170608108108109, "grad_norm": 1.1396882074745656, "learning_rate": 9.423037559880805e-06, "loss": 0.4495, "step": 7641 }, { "epoch": 0.7171546546546547, "grad_norm": 1.034126990743551, "learning_rate": 9.42278293014319e-06, "loss": 0.4484, "step": 7642 }, { "epoch": 0.7172484984984985, "grad_norm": 1.060766985148105, "learning_rate": 9.422528247672212e-06, "loss": 0.4579, "step": 7643 }, { "epoch": 0.7173423423423423, "grad_norm": 1.4139009834425322, "learning_rate": 9.422273512470907e-06, "loss": 0.4902, "step": 7644 }, { "epoch": 0.7174361861861862, "grad_norm": 1.0815158023487825, "learning_rate": 9.422018724542317e-06, "loss": 0.457, "step": 7645 }, { "epoch": 0.71753003003003, "grad_norm": 1.2466547414175426, "learning_rate": 9.421763883889476e-06, "loss": 0.4953, "step": 7646 }, { "epoch": 0.7176238738738738, "grad_norm": 1.5294956773742845, "learning_rate": 9.421508990515424e-06, "loss": 0.4661, "step": 7647 }, { "epoch": 0.7177177177177178, "grad_norm": 0.8946989802870374, "learning_rate": 9.4212540444232e-06, "loss": 0.4353, "step": 7648 }, { "epoch": 0.7178115615615616, "grad_norm": 1.6554643761755397, "learning_rate": 9.420999045615844e-06, "loss": 0.4515, "step": 7649 }, { "epoch": 0.7179054054054054, "grad_norm": 1.1178919901453406, "learning_rate": 9.420743994096397e-06, "loss": 0.4625, "step": 7650 }, { "epoch": 0.7179992492492493, "grad_norm": 1.1977386107305596, "learning_rate": 9.420488889867898e-06, "loss": 0.3945, "step": 7651 }, { "epoch": 0.7180930930930931, "grad_norm": 1.070805684815543, "learning_rate": 9.42023373293339e-06, "loss": 0.492, "step": 7652 }, { "epoch": 0.7181869369369369, "grad_norm": 1.526931462820398, "learning_rate": 9.419978523295914e-06, "loss": 0.4748, "step": 7653 }, { "epoch": 0.7182807807807807, "grad_norm": 1.269947896803661, "learning_rate": 9.419723260958515e-06, "loss": 0.4871, "step": 7654 }, { "epoch": 0.7183746246246246, "grad_norm": 1.0299918720660108, "learning_rate": 9.419467945924235e-06, "loss": 0.4378, "step": 7655 }, { "epoch": 0.7184684684684685, "grad_norm": 1.0494329398225333, "learning_rate": 9.419212578196117e-06, "loss": 0.4535, "step": 7656 }, { "epoch": 0.7185623123123123, "grad_norm": 1.3261235374806608, "learning_rate": 9.41895715777721e-06, "loss": 0.4826, "step": 7657 }, { "epoch": 0.7186561561561562, "grad_norm": 1.7359275102586298, "learning_rate": 9.418701684670554e-06, "loss": 0.4817, "step": 7658 }, { "epoch": 0.71875, "grad_norm": 1.25737497828516, "learning_rate": 9.4184461588792e-06, "loss": 0.491, "step": 7659 }, { "epoch": 0.7188438438438438, "grad_norm": 1.2122913887542113, "learning_rate": 9.418190580406193e-06, "loss": 0.5405, "step": 7660 }, { "epoch": 0.7189376876876877, "grad_norm": 0.9808742126828413, "learning_rate": 9.417934949254577e-06, "loss": 0.4759, "step": 7661 }, { "epoch": 0.7190315315315315, "grad_norm": 1.2600268076301175, "learning_rate": 9.417679265427404e-06, "loss": 0.5148, "step": 7662 }, { "epoch": 0.7191253753753754, "grad_norm": 1.0153993767716052, "learning_rate": 9.417423528927723e-06, "loss": 0.4668, "step": 7663 }, { "epoch": 0.7192192192192193, "grad_norm": 1.0300873212477262, "learning_rate": 9.41716773975858e-06, "loss": 0.5072, "step": 7664 }, { "epoch": 0.7193130630630631, "grad_norm": 1.593805146206616, "learning_rate": 9.416911897923025e-06, "loss": 0.4438, "step": 7665 }, { "epoch": 0.7194069069069069, "grad_norm": 0.9785717569323443, "learning_rate": 9.416656003424109e-06, "loss": 0.427, "step": 7666 }, { "epoch": 0.7195007507507507, "grad_norm": 1.2348353771664258, "learning_rate": 9.416400056264885e-06, "loss": 0.5106, "step": 7667 }, { "epoch": 0.7195945945945946, "grad_norm": 0.9534504142108896, "learning_rate": 9.416144056448405e-06, "loss": 0.4662, "step": 7668 }, { "epoch": 0.7196884384384384, "grad_norm": 1.0563375247957498, "learning_rate": 9.415888003977717e-06, "loss": 0.4915, "step": 7669 }, { "epoch": 0.7197822822822822, "grad_norm": 1.3933430469597166, "learning_rate": 9.415631898855876e-06, "loss": 0.4955, "step": 7670 }, { "epoch": 0.7198761261261262, "grad_norm": 1.2666213038914336, "learning_rate": 9.415375741085939e-06, "loss": 0.526, "step": 7671 }, { "epoch": 0.71996996996997, "grad_norm": 1.0525000371532887, "learning_rate": 9.415119530670957e-06, "loss": 0.4331, "step": 7672 }, { "epoch": 0.7200638138138138, "grad_norm": 1.1964730423011951, "learning_rate": 9.414863267613984e-06, "loss": 0.5565, "step": 7673 }, { "epoch": 0.7201576576576577, "grad_norm": 1.125899842738129, "learning_rate": 9.414606951918077e-06, "loss": 0.466, "step": 7674 }, { "epoch": 0.7202515015015015, "grad_norm": 1.949473477033539, "learning_rate": 9.414350583586293e-06, "loss": 0.4401, "step": 7675 }, { "epoch": 0.7203453453453453, "grad_norm": 0.9755115320506597, "learning_rate": 9.414094162621685e-06, "loss": 0.4665, "step": 7676 }, { "epoch": 0.7204391891891891, "grad_norm": 0.9823930362625539, "learning_rate": 9.413837689027314e-06, "loss": 0.5421, "step": 7677 }, { "epoch": 0.7205330330330331, "grad_norm": 0.9613280345826302, "learning_rate": 9.413581162806238e-06, "loss": 0.4766, "step": 7678 }, { "epoch": 0.7206268768768769, "grad_norm": 0.9735634606058042, "learning_rate": 9.413324583961511e-06, "loss": 0.5077, "step": 7679 }, { "epoch": 0.7207207207207207, "grad_norm": 0.9035244182678106, "learning_rate": 9.413067952496199e-06, "loss": 0.4733, "step": 7680 }, { "epoch": 0.7208145645645646, "grad_norm": 1.1915048087228406, "learning_rate": 9.412811268413358e-06, "loss": 0.4775, "step": 7681 }, { "epoch": 0.7209084084084084, "grad_norm": 1.0430691021302994, "learning_rate": 9.412554531716047e-06, "loss": 0.4787, "step": 7682 }, { "epoch": 0.7210022522522522, "grad_norm": 1.490000997014921, "learning_rate": 9.41229774240733e-06, "loss": 0.4742, "step": 7683 }, { "epoch": 0.7210960960960962, "grad_norm": 1.4649089889355085, "learning_rate": 9.412040900490267e-06, "loss": 0.4729, "step": 7684 }, { "epoch": 0.72118993993994, "grad_norm": 1.1022673048249136, "learning_rate": 9.411784005967923e-06, "loss": 0.4427, "step": 7685 }, { "epoch": 0.7212837837837838, "grad_norm": 1.2399482702536933, "learning_rate": 9.411527058843357e-06, "loss": 0.488, "step": 7686 }, { "epoch": 0.7213776276276276, "grad_norm": 1.051951438969695, "learning_rate": 9.411270059119637e-06, "loss": 0.5569, "step": 7687 }, { "epoch": 0.7214714714714715, "grad_norm": 0.9606817532687735, "learning_rate": 9.411013006799824e-06, "loss": 0.4658, "step": 7688 }, { "epoch": 0.7215653153153153, "grad_norm": 1.0772636841335501, "learning_rate": 9.410755901886982e-06, "loss": 0.4907, "step": 7689 }, { "epoch": 0.7216591591591591, "grad_norm": 1.1504705466206164, "learning_rate": 9.41049874438418e-06, "loss": 0.4896, "step": 7690 }, { "epoch": 0.721753003003003, "grad_norm": 3.7561756145310965, "learning_rate": 9.410241534294482e-06, "loss": 0.467, "step": 7691 }, { "epoch": 0.7218468468468469, "grad_norm": 0.9989128826221937, "learning_rate": 9.409984271620953e-06, "loss": 0.4711, "step": 7692 }, { "epoch": 0.7219406906906907, "grad_norm": 0.9674203688245442, "learning_rate": 9.409726956366666e-06, "loss": 0.4776, "step": 7693 }, { "epoch": 0.7220345345345346, "grad_norm": 1.2553065096751879, "learning_rate": 9.409469588534682e-06, "loss": 0.4073, "step": 7694 }, { "epoch": 0.7221283783783784, "grad_norm": 1.2741664160816961, "learning_rate": 9.409212168128077e-06, "loss": 0.4754, "step": 7695 }, { "epoch": 0.7222222222222222, "grad_norm": 0.9975200209939734, "learning_rate": 9.408954695149914e-06, "loss": 0.4609, "step": 7696 }, { "epoch": 0.722316066066066, "grad_norm": 3.313449073560004, "learning_rate": 9.408697169603266e-06, "loss": 0.4762, "step": 7697 }, { "epoch": 0.7224099099099099, "grad_norm": 0.9067899240507132, "learning_rate": 9.408439591491203e-06, "loss": 0.439, "step": 7698 }, { "epoch": 0.7225037537537538, "grad_norm": 1.6755079392024042, "learning_rate": 9.408181960816796e-06, "loss": 0.4542, "step": 7699 }, { "epoch": 0.7225975975975976, "grad_norm": 1.1057849715532457, "learning_rate": 9.407924277583114e-06, "loss": 0.4647, "step": 7700 }, { "epoch": 0.7226914414414415, "grad_norm": 0.9821635024685046, "learning_rate": 9.407666541793234e-06, "loss": 0.4741, "step": 7701 }, { "epoch": 0.7227852852852853, "grad_norm": 1.030476840314552, "learning_rate": 9.407408753450229e-06, "loss": 0.5069, "step": 7702 }, { "epoch": 0.7228791291291291, "grad_norm": 1.0122523717210319, "learning_rate": 9.407150912557168e-06, "loss": 0.4374, "step": 7703 }, { "epoch": 0.722972972972973, "grad_norm": 1.9253482837653846, "learning_rate": 9.40689301911713e-06, "loss": 0.4818, "step": 7704 }, { "epoch": 0.7230668168168168, "grad_norm": 1.0967610495596047, "learning_rate": 9.406635073133185e-06, "loss": 0.4671, "step": 7705 }, { "epoch": 0.7231606606606606, "grad_norm": 1.0890636622171965, "learning_rate": 9.406377074608414e-06, "loss": 0.4531, "step": 7706 }, { "epoch": 0.7232545045045045, "grad_norm": 1.1880088859443507, "learning_rate": 9.40611902354589e-06, "loss": 0.5272, "step": 7707 }, { "epoch": 0.7233483483483484, "grad_norm": 0.9414661813025347, "learning_rate": 9.40586091994869e-06, "loss": 0.4339, "step": 7708 }, { "epoch": 0.7234421921921922, "grad_norm": 1.6711027422952438, "learning_rate": 9.405602763819892e-06, "loss": 0.4799, "step": 7709 }, { "epoch": 0.723536036036036, "grad_norm": 0.9425276730545109, "learning_rate": 9.405344555162573e-06, "loss": 0.5016, "step": 7710 }, { "epoch": 0.7236298798798799, "grad_norm": 2.411985592826562, "learning_rate": 9.405086293979812e-06, "loss": 0.4196, "step": 7711 }, { "epoch": 0.7237237237237237, "grad_norm": 1.0545078717569798, "learning_rate": 9.404827980274692e-06, "loss": 0.4866, "step": 7712 }, { "epoch": 0.7238175675675675, "grad_norm": 1.6509276920234746, "learning_rate": 9.404569614050286e-06, "loss": 0.493, "step": 7713 }, { "epoch": 0.7239114114114115, "grad_norm": 1.275905682585229, "learning_rate": 9.40431119530968e-06, "loss": 0.4937, "step": 7714 }, { "epoch": 0.7240052552552553, "grad_norm": 0.9577377604550756, "learning_rate": 9.404052724055953e-06, "loss": 0.4653, "step": 7715 }, { "epoch": 0.7240990990990991, "grad_norm": 1.0724830239023382, "learning_rate": 9.403794200292184e-06, "loss": 0.4499, "step": 7716 }, { "epoch": 0.7241929429429429, "grad_norm": 1.554900129870184, "learning_rate": 9.403535624021462e-06, "loss": 0.5035, "step": 7717 }, { "epoch": 0.7242867867867868, "grad_norm": 1.4003332052562756, "learning_rate": 9.403276995246865e-06, "loss": 0.4491, "step": 7718 }, { "epoch": 0.7243806306306306, "grad_norm": 1.1890403552640318, "learning_rate": 9.403018313971478e-06, "loss": 0.5206, "step": 7719 }, { "epoch": 0.7244744744744744, "grad_norm": 1.0256938298425502, "learning_rate": 9.402759580198386e-06, "loss": 0.4723, "step": 7720 }, { "epoch": 0.7245683183183184, "grad_norm": 1.3147009972516364, "learning_rate": 9.402500793930673e-06, "loss": 0.4807, "step": 7721 }, { "epoch": 0.7246621621621622, "grad_norm": 1.0845623076973157, "learning_rate": 9.402241955171428e-06, "loss": 0.49, "step": 7722 }, { "epoch": 0.724756006006006, "grad_norm": 1.2857747593122444, "learning_rate": 9.40198306392373e-06, "loss": 0.546, "step": 7723 }, { "epoch": 0.7248498498498499, "grad_norm": 1.1692889926231242, "learning_rate": 9.401724120190671e-06, "loss": 0.4586, "step": 7724 }, { "epoch": 0.7249436936936937, "grad_norm": 1.4742344533660574, "learning_rate": 9.401465123975338e-06, "loss": 0.4817, "step": 7725 }, { "epoch": 0.7250375375375375, "grad_norm": 1.438548921677734, "learning_rate": 9.401206075280818e-06, "loss": 0.5381, "step": 7726 }, { "epoch": 0.7251313813813813, "grad_norm": 0.9467481542619447, "learning_rate": 9.4009469741102e-06, "loss": 0.4283, "step": 7727 }, { "epoch": 0.7252252252252253, "grad_norm": 1.0027232531974335, "learning_rate": 9.400687820466571e-06, "loss": 0.4655, "step": 7728 }, { "epoch": 0.7253190690690691, "grad_norm": 1.1975265597472609, "learning_rate": 9.400428614353026e-06, "loss": 0.4358, "step": 7729 }, { "epoch": 0.7254129129129129, "grad_norm": 0.9948870117126222, "learning_rate": 9.400169355772653e-06, "loss": 0.4594, "step": 7730 }, { "epoch": 0.7255067567567568, "grad_norm": 1.2435729236200752, "learning_rate": 9.39991004472854e-06, "loss": 0.4938, "step": 7731 }, { "epoch": 0.7256006006006006, "grad_norm": 1.6749962262697018, "learning_rate": 9.399650681223784e-06, "loss": 0.5208, "step": 7732 }, { "epoch": 0.7256944444444444, "grad_norm": 10.190330654111682, "learning_rate": 9.399391265261475e-06, "loss": 0.4723, "step": 7733 }, { "epoch": 0.7257882882882883, "grad_norm": 1.0500922965552288, "learning_rate": 9.399131796844705e-06, "loss": 0.4834, "step": 7734 }, { "epoch": 0.7258821321321322, "grad_norm": 1.4286859499348927, "learning_rate": 9.398872275976569e-06, "loss": 0.4522, "step": 7735 }, { "epoch": 0.725975975975976, "grad_norm": 1.1501332405694855, "learning_rate": 9.398612702660162e-06, "loss": 0.4811, "step": 7736 }, { "epoch": 0.7260698198198198, "grad_norm": 1.0087084254183116, "learning_rate": 9.398353076898578e-06, "loss": 0.4594, "step": 7737 }, { "epoch": 0.7261636636636637, "grad_norm": 1.0183553773417915, "learning_rate": 9.398093398694913e-06, "loss": 0.4572, "step": 7738 }, { "epoch": 0.7262575075075075, "grad_norm": 1.2036545834343106, "learning_rate": 9.397833668052262e-06, "loss": 0.5052, "step": 7739 }, { "epoch": 0.7263513513513513, "grad_norm": 1.2932100973958964, "learning_rate": 9.397573884973724e-06, "loss": 0.4464, "step": 7740 }, { "epoch": 0.7264451951951952, "grad_norm": 0.9998409658648671, "learning_rate": 9.397314049462393e-06, "loss": 0.4427, "step": 7741 }, { "epoch": 0.726539039039039, "grad_norm": 1.2220916837396274, "learning_rate": 9.397054161521368e-06, "loss": 0.4381, "step": 7742 }, { "epoch": 0.7266328828828829, "grad_norm": 1.4306855864659667, "learning_rate": 9.396794221153751e-06, "loss": 0.4977, "step": 7743 }, { "epoch": 0.7267267267267268, "grad_norm": 0.9786441251841181, "learning_rate": 9.39653422836264e-06, "loss": 0.4511, "step": 7744 }, { "epoch": 0.7268205705705706, "grad_norm": 0.9742085769233471, "learning_rate": 9.396274183151131e-06, "loss": 0.4719, "step": 7745 }, { "epoch": 0.7269144144144144, "grad_norm": 1.5078015357580814, "learning_rate": 9.39601408552233e-06, "loss": 0.4456, "step": 7746 }, { "epoch": 0.7270082582582582, "grad_norm": 1.49696140471578, "learning_rate": 9.395753935479335e-06, "loss": 0.4633, "step": 7747 }, { "epoch": 0.7271021021021021, "grad_norm": 0.8640300509525894, "learning_rate": 9.395493733025248e-06, "loss": 0.4215, "step": 7748 }, { "epoch": 0.7271959459459459, "grad_norm": 1.2883882813712981, "learning_rate": 9.395233478163172e-06, "loss": 0.5316, "step": 7749 }, { "epoch": 0.7272897897897898, "grad_norm": 5.462093130308759, "learning_rate": 9.39497317089621e-06, "loss": 0.4855, "step": 7750 }, { "epoch": 0.7273836336336337, "grad_norm": 1.0186247302108449, "learning_rate": 9.394712811227465e-06, "loss": 0.4711, "step": 7751 }, { "epoch": 0.7274774774774775, "grad_norm": 1.3480223950149102, "learning_rate": 9.394452399160043e-06, "loss": 0.4783, "step": 7752 }, { "epoch": 0.7275713213213213, "grad_norm": 1.184454775010807, "learning_rate": 9.394191934697048e-06, "loss": 0.4853, "step": 7753 }, { "epoch": 0.7276651651651652, "grad_norm": 1.4291801415029741, "learning_rate": 9.393931417841585e-06, "loss": 0.4468, "step": 7754 }, { "epoch": 0.727759009009009, "grad_norm": 1.3425994133460173, "learning_rate": 9.393670848596759e-06, "loss": 0.4997, "step": 7755 }, { "epoch": 0.7278528528528528, "grad_norm": 1.2942623503910855, "learning_rate": 9.39341022696568e-06, "loss": 0.4754, "step": 7756 }, { "epoch": 0.7279466966966966, "grad_norm": 1.2538062849910987, "learning_rate": 9.393149552951454e-06, "loss": 0.4781, "step": 7757 }, { "epoch": 0.7280405405405406, "grad_norm": 1.1161131976582783, "learning_rate": 9.392888826557188e-06, "loss": 0.4262, "step": 7758 }, { "epoch": 0.7281343843843844, "grad_norm": 1.2995606215001176, "learning_rate": 9.392628047785993e-06, "loss": 0.4774, "step": 7759 }, { "epoch": 0.7282282282282282, "grad_norm": 1.4884313512074432, "learning_rate": 9.392367216640973e-06, "loss": 0.4672, "step": 7760 }, { "epoch": 0.7283220720720721, "grad_norm": 1.3107652158606284, "learning_rate": 9.392106333125247e-06, "loss": 0.4772, "step": 7761 }, { "epoch": 0.7284159159159159, "grad_norm": 2.1448589608888975, "learning_rate": 9.391845397241917e-06, "loss": 0.4267, "step": 7762 }, { "epoch": 0.7285097597597597, "grad_norm": 0.9657275456488648, "learning_rate": 9.391584408994098e-06, "loss": 0.4199, "step": 7763 }, { "epoch": 0.7286036036036037, "grad_norm": 1.055802885337691, "learning_rate": 9.391323368384902e-06, "loss": 0.4737, "step": 7764 }, { "epoch": 0.7286974474474475, "grad_norm": 2.5416149694803423, "learning_rate": 9.39106227541744e-06, "loss": 0.4593, "step": 7765 }, { "epoch": 0.7287912912912913, "grad_norm": 1.102133878466055, "learning_rate": 9.390801130094824e-06, "loss": 0.4794, "step": 7766 }, { "epoch": 0.7288851351351351, "grad_norm": 1.1181009292970865, "learning_rate": 9.390539932420171e-06, "loss": 0.4901, "step": 7767 }, { "epoch": 0.728978978978979, "grad_norm": 1.069960610338079, "learning_rate": 9.390278682396594e-06, "loss": 0.4829, "step": 7768 }, { "epoch": 0.7290728228228228, "grad_norm": 0.990224511167144, "learning_rate": 9.390017380027208e-06, "loss": 0.4702, "step": 7769 }, { "epoch": 0.7291666666666666, "grad_norm": 1.043557434490784, "learning_rate": 9.389756025315127e-06, "loss": 0.4391, "step": 7770 }, { "epoch": 0.7292605105105106, "grad_norm": 1.475465272107532, "learning_rate": 9.389494618263469e-06, "loss": 0.5183, "step": 7771 }, { "epoch": 0.7293543543543544, "grad_norm": 1.1722170011671034, "learning_rate": 9.389233158875348e-06, "loss": 0.5007, "step": 7772 }, { "epoch": 0.7294481981981982, "grad_norm": 1.0074117156484375, "learning_rate": 9.388971647153887e-06, "loss": 0.4941, "step": 7773 }, { "epoch": 0.7295420420420421, "grad_norm": 1.0414935865243682, "learning_rate": 9.388710083102197e-06, "loss": 0.481, "step": 7774 }, { "epoch": 0.7296358858858859, "grad_norm": 1.2016139961791703, "learning_rate": 9.388448466723402e-06, "loss": 0.4505, "step": 7775 }, { "epoch": 0.7297297297297297, "grad_norm": 0.9415052516201597, "learning_rate": 9.38818679802062e-06, "loss": 0.4768, "step": 7776 }, { "epoch": 0.7298235735735735, "grad_norm": 1.486519695170668, "learning_rate": 9.387925076996969e-06, "loss": 0.4975, "step": 7777 }, { "epoch": 0.7299174174174174, "grad_norm": 1.3679432629754784, "learning_rate": 9.387663303655572e-06, "loss": 0.4694, "step": 7778 }, { "epoch": 0.7300112612612613, "grad_norm": 1.038524426478728, "learning_rate": 9.387401477999547e-06, "loss": 0.4715, "step": 7779 }, { "epoch": 0.7301051051051051, "grad_norm": 1.0990639416207852, "learning_rate": 9.38713960003202e-06, "loss": 0.4866, "step": 7780 }, { "epoch": 0.730198948948949, "grad_norm": 1.0844524840748493, "learning_rate": 9.38687766975611e-06, "loss": 0.5162, "step": 7781 }, { "epoch": 0.7302927927927928, "grad_norm": 0.9875024700395307, "learning_rate": 9.386615687174942e-06, "loss": 0.4423, "step": 7782 }, { "epoch": 0.7303866366366366, "grad_norm": 1.3847139593368798, "learning_rate": 9.38635365229164e-06, "loss": 0.4559, "step": 7783 }, { "epoch": 0.7304804804804805, "grad_norm": 1.0483755562023644, "learning_rate": 9.386091565109325e-06, "loss": 0.471, "step": 7784 }, { "epoch": 0.7305743243243243, "grad_norm": 1.3287539510698338, "learning_rate": 9.385829425631124e-06, "loss": 0.5343, "step": 7785 }, { "epoch": 0.7306681681681682, "grad_norm": 0.9172499818853221, "learning_rate": 9.385567233860164e-06, "loss": 0.4777, "step": 7786 }, { "epoch": 0.730762012012012, "grad_norm": 1.14577165561547, "learning_rate": 9.385304989799569e-06, "loss": 0.4766, "step": 7787 }, { "epoch": 0.7308558558558559, "grad_norm": 1.1242409420074921, "learning_rate": 9.385042693452466e-06, "loss": 0.4892, "step": 7788 }, { "epoch": 0.7309496996996997, "grad_norm": 1.8529423602348383, "learning_rate": 9.384780344821984e-06, "loss": 0.457, "step": 7789 }, { "epoch": 0.7310435435435435, "grad_norm": 1.0135211999470501, "learning_rate": 9.384517943911249e-06, "loss": 0.4668, "step": 7790 }, { "epoch": 0.7311373873873874, "grad_norm": 1.3096424033928242, "learning_rate": 9.384255490723391e-06, "loss": 0.4873, "step": 7791 }, { "epoch": 0.7312312312312312, "grad_norm": 1.3801364805271215, "learning_rate": 9.383992985261538e-06, "loss": 0.4454, "step": 7792 }, { "epoch": 0.731325075075075, "grad_norm": 1.330367836055487, "learning_rate": 9.383730427528821e-06, "loss": 0.4674, "step": 7793 }, { "epoch": 0.731418918918919, "grad_norm": 3.5980584233017963, "learning_rate": 9.383467817528371e-06, "loss": 0.4664, "step": 7794 }, { "epoch": 0.7315127627627628, "grad_norm": 0.9259697166915467, "learning_rate": 9.383205155263318e-06, "loss": 0.4444, "step": 7795 }, { "epoch": 0.7316066066066066, "grad_norm": 1.0724896352133295, "learning_rate": 9.382942440736794e-06, "loss": 0.4169, "step": 7796 }, { "epoch": 0.7317004504504504, "grad_norm": 2.0574460323461725, "learning_rate": 9.38267967395193e-06, "loss": 0.4995, "step": 7797 }, { "epoch": 0.7317942942942943, "grad_norm": 1.0293651028045925, "learning_rate": 9.382416854911864e-06, "loss": 0.4712, "step": 7798 }, { "epoch": 0.7318881381381381, "grad_norm": 1.0127957505851808, "learning_rate": 9.382153983619723e-06, "loss": 0.4257, "step": 7799 }, { "epoch": 0.7319819819819819, "grad_norm": 0.9651723444561486, "learning_rate": 9.381891060078645e-06, "loss": 0.4433, "step": 7800 }, { "epoch": 0.7320758258258259, "grad_norm": 0.9759177081965673, "learning_rate": 9.381628084291765e-06, "loss": 0.4016, "step": 7801 }, { "epoch": 0.7321696696696697, "grad_norm": 1.1708281217516199, "learning_rate": 9.381365056262218e-06, "loss": 0.5034, "step": 7802 }, { "epoch": 0.7322635135135135, "grad_norm": 0.9185582074255643, "learning_rate": 9.381101975993136e-06, "loss": 0.405, "step": 7803 }, { "epoch": 0.7323573573573574, "grad_norm": 1.352703563108539, "learning_rate": 9.380838843487663e-06, "loss": 0.5051, "step": 7804 }, { "epoch": 0.7324512012012012, "grad_norm": 0.9645780668395297, "learning_rate": 9.380575658748932e-06, "loss": 0.4306, "step": 7805 }, { "epoch": 0.732545045045045, "grad_norm": 1.557628266972599, "learning_rate": 9.380312421780082e-06, "loss": 0.4335, "step": 7806 }, { "epoch": 0.7326388888888888, "grad_norm": 1.1375262134923652, "learning_rate": 9.380049132584252e-06, "loss": 0.4732, "step": 7807 }, { "epoch": 0.7327327327327328, "grad_norm": 1.0955994187055613, "learning_rate": 9.37978579116458e-06, "loss": 0.4766, "step": 7808 }, { "epoch": 0.7328265765765766, "grad_norm": 1.3009221236170811, "learning_rate": 9.379522397524206e-06, "loss": 0.515, "step": 7809 }, { "epoch": 0.7329204204204204, "grad_norm": 1.678613376940158, "learning_rate": 9.379258951666272e-06, "loss": 0.4971, "step": 7810 }, { "epoch": 0.7330142642642643, "grad_norm": 1.4617665607688206, "learning_rate": 9.378995453593918e-06, "loss": 0.4621, "step": 7811 }, { "epoch": 0.7331081081081081, "grad_norm": 7.69896005529156, "learning_rate": 9.378731903310286e-06, "loss": 0.4854, "step": 7812 }, { "epoch": 0.7332019519519519, "grad_norm": 1.1050636900375923, "learning_rate": 9.378468300818517e-06, "loss": 0.4942, "step": 7813 }, { "epoch": 0.7332957957957958, "grad_norm": 0.9942129767863359, "learning_rate": 9.378204646121755e-06, "loss": 0.4733, "step": 7814 }, { "epoch": 0.7333896396396397, "grad_norm": 1.3120757123225912, "learning_rate": 9.377940939223146e-06, "loss": 0.4651, "step": 7815 }, { "epoch": 0.7334834834834835, "grad_norm": 1.7500626421272383, "learning_rate": 9.37767718012583e-06, "loss": 0.4992, "step": 7816 }, { "epoch": 0.7335773273273273, "grad_norm": 0.9703997794237881, "learning_rate": 9.377413368832955e-06, "loss": 0.458, "step": 7817 }, { "epoch": 0.7336711711711712, "grad_norm": 1.2185978005808007, "learning_rate": 9.377149505347665e-06, "loss": 0.4678, "step": 7818 }, { "epoch": 0.733765015015015, "grad_norm": 3.0334289088199853, "learning_rate": 9.376885589673105e-06, "loss": 0.4978, "step": 7819 }, { "epoch": 0.7338588588588588, "grad_norm": 1.3668645465118057, "learning_rate": 9.376621621812423e-06, "loss": 0.5093, "step": 7820 }, { "epoch": 0.7339527027027027, "grad_norm": 1.0978047752343665, "learning_rate": 9.376357601768767e-06, "loss": 0.4881, "step": 7821 }, { "epoch": 0.7340465465465466, "grad_norm": 1.2770288603470725, "learning_rate": 9.376093529545286e-06, "loss": 0.4607, "step": 7822 }, { "epoch": 0.7341403903903904, "grad_norm": 1.3841318726933258, "learning_rate": 9.375829405145124e-06, "loss": 0.4663, "step": 7823 }, { "epoch": 0.7342342342342343, "grad_norm": 1.1049553582543987, "learning_rate": 9.375565228571434e-06, "loss": 0.5124, "step": 7824 }, { "epoch": 0.7343280780780781, "grad_norm": 2.3824182167659047, "learning_rate": 9.375300999827365e-06, "loss": 0.4947, "step": 7825 }, { "epoch": 0.7344219219219219, "grad_norm": 2.0206612028985114, "learning_rate": 9.375036718916067e-06, "loss": 0.5213, "step": 7826 }, { "epoch": 0.7345157657657657, "grad_norm": 1.253895837103616, "learning_rate": 9.374772385840691e-06, "loss": 0.4851, "step": 7827 }, { "epoch": 0.7346096096096096, "grad_norm": 1.1950534009930274, "learning_rate": 9.37450800060439e-06, "loss": 0.4747, "step": 7828 }, { "epoch": 0.7347034534534534, "grad_norm": 1.3607012706696544, "learning_rate": 9.374243563210315e-06, "loss": 0.4025, "step": 7829 }, { "epoch": 0.7347972972972973, "grad_norm": 1.1119631513169013, "learning_rate": 9.373979073661618e-06, "loss": 0.4649, "step": 7830 }, { "epoch": 0.7348911411411412, "grad_norm": 3.67848028494125, "learning_rate": 9.373714531961456e-06, "loss": 0.4641, "step": 7831 }, { "epoch": 0.734984984984985, "grad_norm": 1.183612960447219, "learning_rate": 9.373449938112979e-06, "loss": 0.4749, "step": 7832 }, { "epoch": 0.7350788288288288, "grad_norm": 1.3901805529808375, "learning_rate": 9.373185292119343e-06, "loss": 0.4551, "step": 7833 }, { "epoch": 0.7351726726726727, "grad_norm": 1.1313311381575102, "learning_rate": 9.372920593983705e-06, "loss": 0.4871, "step": 7834 }, { "epoch": 0.7352665165165165, "grad_norm": 1.0029102229065743, "learning_rate": 9.372655843709222e-06, "loss": 0.4829, "step": 7835 }, { "epoch": 0.7353603603603603, "grad_norm": 1.0068014847711173, "learning_rate": 9.372391041299045e-06, "loss": 0.4824, "step": 7836 }, { "epoch": 0.7354542042042042, "grad_norm": 1.2609126717754924, "learning_rate": 9.372126186756339e-06, "loss": 0.5293, "step": 7837 }, { "epoch": 0.7355480480480481, "grad_norm": 1.210481782039517, "learning_rate": 9.371861280084255e-06, "loss": 0.5264, "step": 7838 }, { "epoch": 0.7356418918918919, "grad_norm": 1.0386250666795553, "learning_rate": 9.371596321285956e-06, "loss": 0.4137, "step": 7839 }, { "epoch": 0.7357357357357357, "grad_norm": 1.026977387238867, "learning_rate": 9.371331310364598e-06, "loss": 0.4653, "step": 7840 }, { "epoch": 0.7358295795795796, "grad_norm": 0.8799820469996587, "learning_rate": 9.371066247323344e-06, "loss": 0.4016, "step": 7841 }, { "epoch": 0.7359234234234234, "grad_norm": 2.221403963703451, "learning_rate": 9.37080113216535e-06, "loss": 0.5041, "step": 7842 }, { "epoch": 0.7360172672672672, "grad_norm": 0.9341576427696122, "learning_rate": 9.370535964893784e-06, "loss": 0.4192, "step": 7843 }, { "epoch": 0.7361111111111112, "grad_norm": 1.4019489900809552, "learning_rate": 9.370270745511798e-06, "loss": 0.4474, "step": 7844 }, { "epoch": 0.736204954954955, "grad_norm": 1.0669731119347885, "learning_rate": 9.370005474022564e-06, "loss": 0.4508, "step": 7845 }, { "epoch": 0.7362987987987988, "grad_norm": 1.1576771655428013, "learning_rate": 9.369740150429239e-06, "loss": 0.4247, "step": 7846 }, { "epoch": 0.7363926426426426, "grad_norm": 0.9604053290989178, "learning_rate": 9.369474774734988e-06, "loss": 0.4162, "step": 7847 }, { "epoch": 0.7364864864864865, "grad_norm": 0.9781508172157166, "learning_rate": 9.369209346942974e-06, "loss": 0.4458, "step": 7848 }, { "epoch": 0.7365803303303303, "grad_norm": 0.920304783375725, "learning_rate": 9.368943867056364e-06, "loss": 0.4306, "step": 7849 }, { "epoch": 0.7366741741741741, "grad_norm": 1.0330353398381118, "learning_rate": 9.368678335078321e-06, "loss": 0.4815, "step": 7850 }, { "epoch": 0.7367680180180181, "grad_norm": 1.2862419017146443, "learning_rate": 9.368412751012013e-06, "loss": 0.5277, "step": 7851 }, { "epoch": 0.7368618618618619, "grad_norm": 1.9766502381297435, "learning_rate": 9.368147114860605e-06, "loss": 0.4767, "step": 7852 }, { "epoch": 0.7369557057057057, "grad_norm": 1.2539384390748367, "learning_rate": 9.367881426627265e-06, "loss": 0.4927, "step": 7853 }, { "epoch": 0.7370495495495496, "grad_norm": 0.9996297954938673, "learning_rate": 9.367615686315162e-06, "loss": 0.476, "step": 7854 }, { "epoch": 0.7371433933933934, "grad_norm": 1.3899780111176847, "learning_rate": 9.36734989392746e-06, "loss": 0.4913, "step": 7855 }, { "epoch": 0.7372372372372372, "grad_norm": 1.8321388610461529, "learning_rate": 9.367084049467332e-06, "loss": 0.4938, "step": 7856 }, { "epoch": 0.737331081081081, "grad_norm": 0.8673586327485802, "learning_rate": 9.366818152937949e-06, "loss": 0.4435, "step": 7857 }, { "epoch": 0.737424924924925, "grad_norm": 1.2316356146950096, "learning_rate": 9.366552204342478e-06, "loss": 0.4762, "step": 7858 }, { "epoch": 0.7375187687687688, "grad_norm": 1.29599951323886, "learning_rate": 9.366286203684092e-06, "loss": 0.5059, "step": 7859 }, { "epoch": 0.7376126126126126, "grad_norm": 1.0227542778074896, "learning_rate": 9.366020150965962e-06, "loss": 0.5066, "step": 7860 }, { "epoch": 0.7377064564564565, "grad_norm": 1.046008253978986, "learning_rate": 9.365754046191258e-06, "loss": 0.5323, "step": 7861 }, { "epoch": 0.7378003003003003, "grad_norm": 0.9781072168542897, "learning_rate": 9.365487889363154e-06, "loss": 0.4871, "step": 7862 }, { "epoch": 0.7378941441441441, "grad_norm": 1.1352213340213257, "learning_rate": 9.365221680484827e-06, "loss": 0.4697, "step": 7863 }, { "epoch": 0.737987987987988, "grad_norm": 1.1647303643532771, "learning_rate": 9.364955419559447e-06, "loss": 0.4722, "step": 7864 }, { "epoch": 0.7380818318318318, "grad_norm": 1.4121215212585416, "learning_rate": 9.364689106590191e-06, "loss": 0.4674, "step": 7865 }, { "epoch": 0.7381756756756757, "grad_norm": 1.297238908277134, "learning_rate": 9.36442274158023e-06, "loss": 0.4082, "step": 7866 }, { "epoch": 0.7382695195195195, "grad_norm": 1.1971482510847264, "learning_rate": 9.364156324532744e-06, "loss": 0.4861, "step": 7867 }, { "epoch": 0.7383633633633634, "grad_norm": 2.127534304717527, "learning_rate": 9.36388985545091e-06, "loss": 0.4761, "step": 7868 }, { "epoch": 0.7384572072072072, "grad_norm": 0.9829755649886242, "learning_rate": 9.363623334337903e-06, "loss": 0.4754, "step": 7869 }, { "epoch": 0.738551051051051, "grad_norm": 1.1085118338886693, "learning_rate": 9.363356761196902e-06, "loss": 0.4348, "step": 7870 }, { "epoch": 0.7386448948948949, "grad_norm": 1.0456048367867463, "learning_rate": 9.363090136031084e-06, "loss": 0.4826, "step": 7871 }, { "epoch": 0.7387387387387387, "grad_norm": 1.0408472828400388, "learning_rate": 9.362823458843628e-06, "loss": 0.5128, "step": 7872 }, { "epoch": 0.7388325825825826, "grad_norm": 0.9443650898323535, "learning_rate": 9.362556729637717e-06, "loss": 0.4327, "step": 7873 }, { "epoch": 0.7389264264264265, "grad_norm": 0.9516688856165948, "learning_rate": 9.362289948416525e-06, "loss": 0.4572, "step": 7874 }, { "epoch": 0.7390202702702703, "grad_norm": 1.0875876105756628, "learning_rate": 9.362023115183239e-06, "loss": 0.4328, "step": 7875 }, { "epoch": 0.7391141141141141, "grad_norm": 0.9915566843640353, "learning_rate": 9.36175622994104e-06, "loss": 0.4576, "step": 7876 }, { "epoch": 0.7392079579579579, "grad_norm": 1.049775111875859, "learning_rate": 9.361489292693104e-06, "loss": 0.5362, "step": 7877 }, { "epoch": 0.7393018018018018, "grad_norm": 1.1422603687230266, "learning_rate": 9.361222303442619e-06, "loss": 0.462, "step": 7878 }, { "epoch": 0.7393956456456456, "grad_norm": 1.0764622440101372, "learning_rate": 9.360955262192768e-06, "loss": 0.4584, "step": 7879 }, { "epoch": 0.7394894894894894, "grad_norm": 0.9216784142169467, "learning_rate": 9.360688168946734e-06, "loss": 0.491, "step": 7880 }, { "epoch": 0.7395833333333334, "grad_norm": 1.0612640287562205, "learning_rate": 9.360421023707702e-06, "loss": 0.5034, "step": 7881 }, { "epoch": 0.7396771771771772, "grad_norm": 1.119798892051791, "learning_rate": 9.360153826478856e-06, "loss": 0.5094, "step": 7882 }, { "epoch": 0.739771021021021, "grad_norm": 1.2183414739170986, "learning_rate": 9.359886577263384e-06, "loss": 0.4817, "step": 7883 }, { "epoch": 0.7398648648648649, "grad_norm": 0.9666213743350045, "learning_rate": 9.35961927606447e-06, "loss": 0.4898, "step": 7884 }, { "epoch": 0.7399587087087087, "grad_norm": 1.7891746949607443, "learning_rate": 9.359351922885303e-06, "loss": 0.4326, "step": 7885 }, { "epoch": 0.7400525525525525, "grad_norm": 1.1938375853161451, "learning_rate": 9.35908451772907e-06, "loss": 0.4673, "step": 7886 }, { "epoch": 0.7401463963963963, "grad_norm": 1.169978990778442, "learning_rate": 9.358817060598959e-06, "loss": 0.466, "step": 7887 }, { "epoch": 0.7402402402402403, "grad_norm": 0.9133787681616701, "learning_rate": 9.358549551498159e-06, "loss": 0.4767, "step": 7888 }, { "epoch": 0.7403340840840841, "grad_norm": 1.02663576717058, "learning_rate": 9.35828199042986e-06, "loss": 0.5317, "step": 7889 }, { "epoch": 0.7404279279279279, "grad_norm": 3.514605126297979, "learning_rate": 9.358014377397252e-06, "loss": 0.4989, "step": 7890 }, { "epoch": 0.7405217717717718, "grad_norm": 1.0576824039646044, "learning_rate": 9.357746712403524e-06, "loss": 0.476, "step": 7891 }, { "epoch": 0.7406156156156156, "grad_norm": 1.1062447772048811, "learning_rate": 9.357478995451873e-06, "loss": 0.4885, "step": 7892 }, { "epoch": 0.7407094594594594, "grad_norm": 1.051797471271336, "learning_rate": 9.357211226545483e-06, "loss": 0.4718, "step": 7893 }, { "epoch": 0.7408033033033034, "grad_norm": 1.1546405100679649, "learning_rate": 9.356943405687553e-06, "loss": 0.5004, "step": 7894 }, { "epoch": 0.7408971471471472, "grad_norm": 1.128564554276637, "learning_rate": 9.356675532881271e-06, "loss": 0.4457, "step": 7895 }, { "epoch": 0.740990990990991, "grad_norm": 0.8978938047651512, "learning_rate": 9.356407608129839e-06, "loss": 0.4635, "step": 7896 }, { "epoch": 0.7410848348348348, "grad_norm": 2.0771739621577123, "learning_rate": 9.356139631436443e-06, "loss": 0.4743, "step": 7897 }, { "epoch": 0.7411786786786787, "grad_norm": 1.0772007156015677, "learning_rate": 9.355871602804282e-06, "loss": 0.4409, "step": 7898 }, { "epoch": 0.7412725225225225, "grad_norm": 1.1688621946632642, "learning_rate": 9.355603522236551e-06, "loss": 0.4424, "step": 7899 }, { "epoch": 0.7413663663663663, "grad_norm": 0.9928392463149153, "learning_rate": 9.355335389736446e-06, "loss": 0.4796, "step": 7900 }, { "epoch": 0.7414602102102102, "grad_norm": 0.9618954004497657, "learning_rate": 9.355067205307167e-06, "loss": 0.4402, "step": 7901 }, { "epoch": 0.7415540540540541, "grad_norm": 1.035619658547026, "learning_rate": 9.354798968951905e-06, "loss": 0.4315, "step": 7902 }, { "epoch": 0.7416478978978979, "grad_norm": 0.8609518281261288, "learning_rate": 9.354530680673868e-06, "loss": 0.4384, "step": 7903 }, { "epoch": 0.7417417417417418, "grad_norm": 0.8947466910361563, "learning_rate": 9.354262340476244e-06, "loss": 0.4515, "step": 7904 }, { "epoch": 0.7418355855855856, "grad_norm": 1.1550295692340078, "learning_rate": 9.353993948362241e-06, "loss": 0.4728, "step": 7905 }, { "epoch": 0.7419294294294294, "grad_norm": 0.9862600662208953, "learning_rate": 9.353725504335053e-06, "loss": 0.4394, "step": 7906 }, { "epoch": 0.7420232732732732, "grad_norm": 1.0443542122866787, "learning_rate": 9.353457008397886e-06, "loss": 0.5148, "step": 7907 }, { "epoch": 0.7421171171171171, "grad_norm": 1.2605947259573733, "learning_rate": 9.353188460553937e-06, "loss": 0.4956, "step": 7908 }, { "epoch": 0.742210960960961, "grad_norm": 1.223304371014831, "learning_rate": 9.352919860806411e-06, "loss": 0.5008, "step": 7909 }, { "epoch": 0.7423048048048048, "grad_norm": 1.0357121474625612, "learning_rate": 9.352651209158507e-06, "loss": 0.4754, "step": 7910 }, { "epoch": 0.7423986486486487, "grad_norm": 0.9431571458790267, "learning_rate": 9.352382505613434e-06, "loss": 0.5045, "step": 7911 }, { "epoch": 0.7424924924924925, "grad_norm": 1.080294678245337, "learning_rate": 9.35211375017439e-06, "loss": 0.4915, "step": 7912 }, { "epoch": 0.7425863363363363, "grad_norm": 1.0053059407204015, "learning_rate": 9.35184494284458e-06, "loss": 0.4344, "step": 7913 }, { "epoch": 0.7426801801801802, "grad_norm": 1.1703646473316272, "learning_rate": 9.351576083627212e-06, "loss": 0.5087, "step": 7914 }, { "epoch": 0.742774024024024, "grad_norm": 0.9241422436596693, "learning_rate": 9.35130717252549e-06, "loss": 0.4826, "step": 7915 }, { "epoch": 0.7428678678678678, "grad_norm": 1.4069783182893332, "learning_rate": 9.351038209542622e-06, "loss": 0.4584, "step": 7916 }, { "epoch": 0.7429617117117117, "grad_norm": 1.0749467989737538, "learning_rate": 9.350769194681811e-06, "loss": 0.4424, "step": 7917 }, { "epoch": 0.7430555555555556, "grad_norm": 0.95301884018538, "learning_rate": 9.350500127946268e-06, "loss": 0.4596, "step": 7918 }, { "epoch": 0.7431493993993994, "grad_norm": 1.089369333830196, "learning_rate": 9.3502310093392e-06, "loss": 0.4818, "step": 7919 }, { "epoch": 0.7432432432432432, "grad_norm": 1.2618186562562304, "learning_rate": 9.349961838863814e-06, "loss": 0.473, "step": 7920 }, { "epoch": 0.7433370870870871, "grad_norm": 1.2416852972663797, "learning_rate": 9.349692616523325e-06, "loss": 0.4622, "step": 7921 }, { "epoch": 0.7434309309309309, "grad_norm": 1.2177033769919243, "learning_rate": 9.349423342320936e-06, "loss": 0.4249, "step": 7922 }, { "epoch": 0.7435247747747747, "grad_norm": 1.156095011417481, "learning_rate": 9.34915401625986e-06, "loss": 0.4062, "step": 7923 }, { "epoch": 0.7436186186186187, "grad_norm": 1.1387814100057032, "learning_rate": 9.34888463834331e-06, "loss": 0.4905, "step": 7924 }, { "epoch": 0.7437124624624625, "grad_norm": 3.7788846468861266, "learning_rate": 9.348615208574497e-06, "loss": 0.4431, "step": 7925 }, { "epoch": 0.7438063063063063, "grad_norm": 1.051892842718092, "learning_rate": 9.348345726956633e-06, "loss": 0.4691, "step": 7926 }, { "epoch": 0.7439001501501501, "grad_norm": 0.9420347453215016, "learning_rate": 9.34807619349293e-06, "loss": 0.4498, "step": 7927 }, { "epoch": 0.743993993993994, "grad_norm": 1.1571443419932386, "learning_rate": 9.347806608186606e-06, "loss": 0.4781, "step": 7928 }, { "epoch": 0.7440878378378378, "grad_norm": 1.186034285466963, "learning_rate": 9.347536971040866e-06, "loss": 0.5092, "step": 7929 }, { "epoch": 0.7441816816816816, "grad_norm": 1.5518573517895908, "learning_rate": 9.347267282058937e-06, "loss": 0.4661, "step": 7930 }, { "epoch": 0.7442755255255256, "grad_norm": 1.2835324735917109, "learning_rate": 9.346997541244025e-06, "loss": 0.5309, "step": 7931 }, { "epoch": 0.7443693693693694, "grad_norm": 1.0387267012102355, "learning_rate": 9.34672774859935e-06, "loss": 0.4272, "step": 7932 }, { "epoch": 0.7444632132132132, "grad_norm": 1.0056615102384907, "learning_rate": 9.346457904128131e-06, "loss": 0.4712, "step": 7933 }, { "epoch": 0.7445570570570571, "grad_norm": 1.414151531344503, "learning_rate": 9.34618800783358e-06, "loss": 0.543, "step": 7934 }, { "epoch": 0.7446509009009009, "grad_norm": 1.3058675000988642, "learning_rate": 9.34591805971892e-06, "loss": 0.4363, "step": 7935 }, { "epoch": 0.7447447447447447, "grad_norm": 1.1194239508049055, "learning_rate": 9.345648059787367e-06, "loss": 0.4736, "step": 7936 }, { "epoch": 0.7448385885885885, "grad_norm": 1.112627628244721, "learning_rate": 9.345378008042141e-06, "loss": 0.4426, "step": 7937 }, { "epoch": 0.7449324324324325, "grad_norm": 0.8853192403450968, "learning_rate": 9.34510790448646e-06, "loss": 0.4199, "step": 7938 }, { "epoch": 0.7450262762762763, "grad_norm": 1.1569618874975878, "learning_rate": 9.344837749123547e-06, "loss": 0.4208, "step": 7939 }, { "epoch": 0.7451201201201201, "grad_norm": 1.0167490976715448, "learning_rate": 9.344567541956621e-06, "loss": 0.45, "step": 7940 }, { "epoch": 0.745213963963964, "grad_norm": 0.9756904258289055, "learning_rate": 9.344297282988908e-06, "loss": 0.4947, "step": 7941 }, { "epoch": 0.7453078078078078, "grad_norm": 1.6428759017986152, "learning_rate": 9.344026972223624e-06, "loss": 0.461, "step": 7942 }, { "epoch": 0.7454016516516516, "grad_norm": 1.3003251943468728, "learning_rate": 9.343756609663997e-06, "loss": 0.4804, "step": 7943 }, { "epoch": 0.7454954954954955, "grad_norm": 0.9702911300131322, "learning_rate": 9.343486195313248e-06, "loss": 0.4794, "step": 7944 }, { "epoch": 0.7455893393393394, "grad_norm": 1.0491873351837397, "learning_rate": 9.343215729174601e-06, "loss": 0.4985, "step": 7945 }, { "epoch": 0.7456831831831832, "grad_norm": 1.3381218608911103, "learning_rate": 9.342945211251284e-06, "loss": 0.495, "step": 7946 }, { "epoch": 0.745777027027027, "grad_norm": 0.9733317520468551, "learning_rate": 9.342674641546519e-06, "loss": 0.439, "step": 7947 }, { "epoch": 0.7458708708708709, "grad_norm": 1.0071168162009023, "learning_rate": 9.342404020063532e-06, "loss": 0.477, "step": 7948 }, { "epoch": 0.7459647147147147, "grad_norm": 1.0360327030954903, "learning_rate": 9.342133346805552e-06, "loss": 0.4739, "step": 7949 }, { "epoch": 0.7460585585585585, "grad_norm": 0.9141916960323699, "learning_rate": 9.341862621775804e-06, "loss": 0.4506, "step": 7950 }, { "epoch": 0.7461524024024024, "grad_norm": 1.1628388720091718, "learning_rate": 9.341591844977518e-06, "loss": 0.4928, "step": 7951 }, { "epoch": 0.7462462462462462, "grad_norm": 1.1474338228111518, "learning_rate": 9.341321016413922e-06, "loss": 0.4877, "step": 7952 }, { "epoch": 0.7463400900900901, "grad_norm": 1.107163346398744, "learning_rate": 9.341050136088243e-06, "loss": 0.4834, "step": 7953 }, { "epoch": 0.746433933933934, "grad_norm": 0.9982697185400027, "learning_rate": 9.340779204003714e-06, "loss": 0.4398, "step": 7954 }, { "epoch": 0.7465277777777778, "grad_norm": 1.1315524220682416, "learning_rate": 9.340508220163562e-06, "loss": 0.4518, "step": 7955 }, { "epoch": 0.7466216216216216, "grad_norm": 0.8797926736379225, "learning_rate": 9.34023718457102e-06, "loss": 0.4232, "step": 7956 }, { "epoch": 0.7467154654654654, "grad_norm": 1.1112378478251388, "learning_rate": 9.33996609722932e-06, "loss": 0.5423, "step": 7957 }, { "epoch": 0.7468093093093093, "grad_norm": 1.2333121681144104, "learning_rate": 9.339694958141693e-06, "loss": 0.526, "step": 7958 }, { "epoch": 0.7469031531531531, "grad_norm": 1.1425927877371551, "learning_rate": 9.339423767311372e-06, "loss": 0.5348, "step": 7959 }, { "epoch": 0.746996996996997, "grad_norm": 0.9134057909561716, "learning_rate": 9.33915252474159e-06, "loss": 0.4407, "step": 7960 }, { "epoch": 0.7470908408408409, "grad_norm": 1.2713207805946198, "learning_rate": 9.338881230435584e-06, "loss": 0.4813, "step": 7961 }, { "epoch": 0.7471846846846847, "grad_norm": 1.4821049553932988, "learning_rate": 9.338609884396586e-06, "loss": 0.4671, "step": 7962 }, { "epoch": 0.7472785285285285, "grad_norm": 1.3047248109370044, "learning_rate": 9.338338486627829e-06, "loss": 0.4268, "step": 7963 }, { "epoch": 0.7473723723723724, "grad_norm": 1.2481540605744672, "learning_rate": 9.338067037132555e-06, "loss": 0.4458, "step": 7964 }, { "epoch": 0.7474662162162162, "grad_norm": 1.5664605804085074, "learning_rate": 9.337795535913995e-06, "loss": 0.4645, "step": 7965 }, { "epoch": 0.74756006006006, "grad_norm": 2.2045543738132474, "learning_rate": 9.337523982975389e-06, "loss": 0.5092, "step": 7966 }, { "epoch": 0.7476539039039038, "grad_norm": 0.8838372360284811, "learning_rate": 9.337252378319975e-06, "loss": 0.472, "step": 7967 }, { "epoch": 0.7477477477477478, "grad_norm": 1.1698228975567624, "learning_rate": 9.336980721950988e-06, "loss": 0.5071, "step": 7968 }, { "epoch": 0.7478415915915916, "grad_norm": 1.1627165878427468, "learning_rate": 9.336709013871672e-06, "loss": 0.4558, "step": 7969 }, { "epoch": 0.7479354354354354, "grad_norm": 1.2121228061170688, "learning_rate": 9.336437254085261e-06, "loss": 0.5119, "step": 7970 }, { "epoch": 0.7480292792792793, "grad_norm": 1.0529831804103336, "learning_rate": 9.336165442595003e-06, "loss": 0.4784, "step": 7971 }, { "epoch": 0.7481231231231231, "grad_norm": 1.1739375891736923, "learning_rate": 9.33589357940413e-06, "loss": 0.4575, "step": 7972 }, { "epoch": 0.7482169669669669, "grad_norm": 2.105670874960311, "learning_rate": 9.335621664515889e-06, "loss": 0.5218, "step": 7973 }, { "epoch": 0.7483108108108109, "grad_norm": 1.013378891040455, "learning_rate": 9.33534969793352e-06, "loss": 0.4556, "step": 7974 }, { "epoch": 0.7484046546546547, "grad_norm": 1.0984008199203015, "learning_rate": 9.335077679660267e-06, "loss": 0.4503, "step": 7975 }, { "epoch": 0.7484984984984985, "grad_norm": 1.4012539777295627, "learning_rate": 9.334805609699372e-06, "loss": 0.4562, "step": 7976 }, { "epoch": 0.7485923423423423, "grad_norm": 0.9453797666693848, "learning_rate": 9.334533488054081e-06, "loss": 0.4573, "step": 7977 }, { "epoch": 0.7486861861861862, "grad_norm": 1.0695788901104561, "learning_rate": 9.334261314727636e-06, "loss": 0.5039, "step": 7978 }, { "epoch": 0.74878003003003, "grad_norm": 1.115373433517403, "learning_rate": 9.33398908972328e-06, "loss": 0.5265, "step": 7979 }, { "epoch": 0.7488738738738738, "grad_norm": 1.2199451291127292, "learning_rate": 9.333716813044267e-06, "loss": 0.5119, "step": 7980 }, { "epoch": 0.7489677177177178, "grad_norm": 1.1975853154356901, "learning_rate": 9.333444484693836e-06, "loss": 0.4964, "step": 7981 }, { "epoch": 0.7490615615615616, "grad_norm": 0.9556383254879586, "learning_rate": 9.333172104675236e-06, "loss": 0.4862, "step": 7982 }, { "epoch": 0.7491554054054054, "grad_norm": 0.9622064034452091, "learning_rate": 9.332899672991718e-06, "loss": 0.4813, "step": 7983 }, { "epoch": 0.7492492492492493, "grad_norm": 0.9715616744081482, "learning_rate": 9.332627189646525e-06, "loss": 0.4144, "step": 7984 }, { "epoch": 0.7493430930930931, "grad_norm": 1.2126466148629869, "learning_rate": 9.332354654642908e-06, "loss": 0.4864, "step": 7985 }, { "epoch": 0.7494369369369369, "grad_norm": 1.0696300340927523, "learning_rate": 9.332082067984117e-06, "loss": 0.5003, "step": 7986 }, { "epoch": 0.7495307807807807, "grad_norm": 1.0358084980260733, "learning_rate": 9.331809429673403e-06, "loss": 0.493, "step": 7987 }, { "epoch": 0.7496246246246246, "grad_norm": 1.3543771904665691, "learning_rate": 9.331536739714012e-06, "loss": 0.4776, "step": 7988 }, { "epoch": 0.7497184684684685, "grad_norm": 1.3072645830909433, "learning_rate": 9.331263998109202e-06, "loss": 0.5107, "step": 7989 }, { "epoch": 0.7498123123123123, "grad_norm": 1.5410921606164376, "learning_rate": 9.33099120486222e-06, "loss": 0.4485, "step": 7990 }, { "epoch": 0.7499061561561562, "grad_norm": 1.085678158398988, "learning_rate": 9.330718359976319e-06, "loss": 0.4621, "step": 7991 }, { "epoch": 0.75, "grad_norm": 1.1020715396561906, "learning_rate": 9.330445463454754e-06, "loss": 0.4724, "step": 7992 }, { "epoch": 0.7500938438438438, "grad_norm": 1.1108074341785152, "learning_rate": 9.33017251530078e-06, "loss": 0.4348, "step": 7993 }, { "epoch": 0.7501876876876877, "grad_norm": 0.921987966542767, "learning_rate": 9.329899515517646e-06, "loss": 0.487, "step": 7994 }, { "epoch": 0.7502815315315315, "grad_norm": 0.9320671511147227, "learning_rate": 9.329626464108614e-06, "loss": 0.457, "step": 7995 }, { "epoch": 0.7503753753753754, "grad_norm": 1.1682323918102888, "learning_rate": 9.329353361076934e-06, "loss": 0.465, "step": 7996 }, { "epoch": 0.7504692192192193, "grad_norm": 0.8942446569002843, "learning_rate": 9.329080206425865e-06, "loss": 0.4339, "step": 7997 }, { "epoch": 0.7505630630630631, "grad_norm": 1.393839879367081, "learning_rate": 9.328807000158663e-06, "loss": 0.4771, "step": 7998 }, { "epoch": 0.7506569069069069, "grad_norm": 1.04506077621546, "learning_rate": 9.328533742278588e-06, "loss": 0.4789, "step": 7999 }, { "epoch": 0.7507507507507507, "grad_norm": 1.5048167879009164, "learning_rate": 9.328260432788891e-06, "loss": 0.5355, "step": 8000 }, { "epoch": 0.7508445945945946, "grad_norm": 1.0291915125126943, "learning_rate": 9.327987071692839e-06, "loss": 0.4935, "step": 8001 }, { "epoch": 0.7509384384384384, "grad_norm": 1.105516036875406, "learning_rate": 9.327713658993687e-06, "loss": 0.4561, "step": 8002 }, { "epoch": 0.7510322822822822, "grad_norm": 1.0219133892238212, "learning_rate": 9.327440194694695e-06, "loss": 0.5322, "step": 8003 }, { "epoch": 0.7511261261261262, "grad_norm": 2.2499944712867412, "learning_rate": 9.327166678799124e-06, "loss": 0.4549, "step": 8004 }, { "epoch": 0.75121996996997, "grad_norm": 1.2182910011553052, "learning_rate": 9.326893111310237e-06, "loss": 0.5191, "step": 8005 }, { "epoch": 0.7513138138138138, "grad_norm": 1.4419612148905057, "learning_rate": 9.326619492231293e-06, "loss": 0.4545, "step": 8006 }, { "epoch": 0.7514076576576577, "grad_norm": 0.9819838903765205, "learning_rate": 9.326345821565557e-06, "loss": 0.4387, "step": 8007 }, { "epoch": 0.7515015015015015, "grad_norm": 1.1105528977601509, "learning_rate": 9.326072099316289e-06, "loss": 0.4189, "step": 8008 }, { "epoch": 0.7515953453453453, "grad_norm": 1.056761393216077, "learning_rate": 9.325798325486755e-06, "loss": 0.5213, "step": 8009 }, { "epoch": 0.7516891891891891, "grad_norm": 1.130825769229291, "learning_rate": 9.325524500080219e-06, "loss": 0.4907, "step": 8010 }, { "epoch": 0.7517830330330331, "grad_norm": 1.2820241596767132, "learning_rate": 9.325250623099944e-06, "loss": 0.4445, "step": 8011 }, { "epoch": 0.7518768768768769, "grad_norm": 1.3875450640448002, "learning_rate": 9.324976694549198e-06, "loss": 0.4651, "step": 8012 }, { "epoch": 0.7519707207207207, "grad_norm": 3.2246517021521393, "learning_rate": 9.324702714431243e-06, "loss": 0.4951, "step": 8013 }, { "epoch": 0.7520645645645646, "grad_norm": 1.0069799448144097, "learning_rate": 9.324428682749352e-06, "loss": 0.4926, "step": 8014 }, { "epoch": 0.7521584084084084, "grad_norm": 1.2907692999543985, "learning_rate": 9.324154599506788e-06, "loss": 0.4618, "step": 8015 }, { "epoch": 0.7522522522522522, "grad_norm": 0.9286224796746856, "learning_rate": 9.323880464706818e-06, "loss": 0.4353, "step": 8016 }, { "epoch": 0.7523460960960962, "grad_norm": 0.951594881209659, "learning_rate": 9.323606278352714e-06, "loss": 0.4553, "step": 8017 }, { "epoch": 0.75243993993994, "grad_norm": 1.6420897930159062, "learning_rate": 9.323332040447743e-06, "loss": 0.4566, "step": 8018 }, { "epoch": 0.7525337837837838, "grad_norm": 0.8797302219865959, "learning_rate": 9.323057750995174e-06, "loss": 0.4213, "step": 8019 }, { "epoch": 0.7526276276276276, "grad_norm": 0.9814763446185938, "learning_rate": 9.322783409998282e-06, "loss": 0.4702, "step": 8020 }, { "epoch": 0.7527214714714715, "grad_norm": 0.87039530330807, "learning_rate": 9.322509017460331e-06, "loss": 0.4151, "step": 8021 }, { "epoch": 0.7528153153153153, "grad_norm": 1.6227389709587616, "learning_rate": 9.322234573384598e-06, "loss": 0.5055, "step": 8022 }, { "epoch": 0.7529091591591591, "grad_norm": 2.570368300796026, "learning_rate": 9.321960077774354e-06, "loss": 0.4553, "step": 8023 }, { "epoch": 0.753003003003003, "grad_norm": 1.3082614705155509, "learning_rate": 9.321685530632872e-06, "loss": 0.4354, "step": 8024 }, { "epoch": 0.7530968468468469, "grad_norm": 1.2149422081292083, "learning_rate": 9.321410931963422e-06, "loss": 0.4726, "step": 8025 }, { "epoch": 0.7531906906906907, "grad_norm": 1.1952560873946085, "learning_rate": 9.321136281769282e-06, "loss": 0.4484, "step": 8026 }, { "epoch": 0.7532845345345346, "grad_norm": 1.1679097966295446, "learning_rate": 9.320861580053726e-06, "loss": 0.4535, "step": 8027 }, { "epoch": 0.7533783783783784, "grad_norm": 1.4033418329198153, "learning_rate": 9.320586826820029e-06, "loss": 0.3774, "step": 8028 }, { "epoch": 0.7534722222222222, "grad_norm": 1.1538415989590056, "learning_rate": 9.320312022071467e-06, "loss": 0.4785, "step": 8029 }, { "epoch": 0.753566066066066, "grad_norm": 1.0795845458754028, "learning_rate": 9.320037165811316e-06, "loss": 0.5141, "step": 8030 }, { "epoch": 0.7536599099099099, "grad_norm": 1.4349663482180377, "learning_rate": 9.319762258042852e-06, "loss": 0.5097, "step": 8031 }, { "epoch": 0.7537537537537538, "grad_norm": 0.9383254503014343, "learning_rate": 9.319487298769356e-06, "loss": 0.5269, "step": 8032 }, { "epoch": 0.7538475975975976, "grad_norm": 1.1585640751796815, "learning_rate": 9.319212287994104e-06, "loss": 0.469, "step": 8033 }, { "epoch": 0.7539414414414415, "grad_norm": 1.0829929311738835, "learning_rate": 9.318937225720375e-06, "loss": 0.4774, "step": 8034 }, { "epoch": 0.7540352852852853, "grad_norm": 1.2048347103178232, "learning_rate": 9.31866211195145e-06, "loss": 0.4873, "step": 8035 }, { "epoch": 0.7541291291291291, "grad_norm": 1.8316196308940491, "learning_rate": 9.318386946690607e-06, "loss": 0.4799, "step": 8036 }, { "epoch": 0.754222972972973, "grad_norm": 1.133441441042, "learning_rate": 9.31811172994113e-06, "loss": 0.4701, "step": 8037 }, { "epoch": 0.7543168168168168, "grad_norm": 1.1148687937863515, "learning_rate": 9.317836461706298e-06, "loss": 0.5197, "step": 8038 }, { "epoch": 0.7544106606606606, "grad_norm": 1.6778669539488291, "learning_rate": 9.317561141989394e-06, "loss": 0.4917, "step": 8039 }, { "epoch": 0.7545045045045045, "grad_norm": 1.5719231689901139, "learning_rate": 9.317285770793699e-06, "loss": 0.4864, "step": 8040 }, { "epoch": 0.7545983483483484, "grad_norm": 1.194273701195267, "learning_rate": 9.3170103481225e-06, "loss": 0.4888, "step": 8041 }, { "epoch": 0.7546921921921922, "grad_norm": 3.646406482800393, "learning_rate": 9.316734873979076e-06, "loss": 0.4586, "step": 8042 }, { "epoch": 0.754786036036036, "grad_norm": 0.9942670668253374, "learning_rate": 9.316459348366715e-06, "loss": 0.4847, "step": 8043 }, { "epoch": 0.7548798798798799, "grad_norm": 1.514002383761968, "learning_rate": 9.3161837712887e-06, "loss": 0.4193, "step": 8044 }, { "epoch": 0.7549737237237237, "grad_norm": 1.2597713537243045, "learning_rate": 9.31590814274832e-06, "loss": 0.4842, "step": 8045 }, { "epoch": 0.7550675675675675, "grad_norm": 1.1004487300959847, "learning_rate": 9.315632462748858e-06, "loss": 0.4887, "step": 8046 }, { "epoch": 0.7551614114114115, "grad_norm": 1.0531636893308896, "learning_rate": 9.315356731293603e-06, "loss": 0.456, "step": 8047 }, { "epoch": 0.7552552552552553, "grad_norm": 1.0452036111706273, "learning_rate": 9.315080948385842e-06, "loss": 0.512, "step": 8048 }, { "epoch": 0.7553490990990991, "grad_norm": 1.0354778366933686, "learning_rate": 9.314805114028863e-06, "loss": 0.491, "step": 8049 }, { "epoch": 0.7554429429429429, "grad_norm": 3.7028292620242707, "learning_rate": 9.314529228225953e-06, "loss": 0.4472, "step": 8050 }, { "epoch": 0.7555367867867868, "grad_norm": 1.5177926175515815, "learning_rate": 9.314253290980405e-06, "loss": 0.4457, "step": 8051 }, { "epoch": 0.7556306306306306, "grad_norm": 1.034718218935438, "learning_rate": 9.313977302295508e-06, "loss": 0.4634, "step": 8052 }, { "epoch": 0.7557244744744744, "grad_norm": 1.0800560963535946, "learning_rate": 9.313701262174549e-06, "loss": 0.4659, "step": 8053 }, { "epoch": 0.7558183183183184, "grad_norm": 1.045455892672914, "learning_rate": 9.313425170620823e-06, "loss": 0.4611, "step": 8054 }, { "epoch": 0.7559121621621622, "grad_norm": 1.0770706633607776, "learning_rate": 9.313149027637623e-06, "loss": 0.4474, "step": 8055 }, { "epoch": 0.756006006006006, "grad_norm": 0.859190995857048, "learning_rate": 9.312872833228237e-06, "loss": 0.404, "step": 8056 }, { "epoch": 0.7560998498498499, "grad_norm": 1.1171177959422482, "learning_rate": 9.312596587395963e-06, "loss": 0.4977, "step": 8057 }, { "epoch": 0.7561936936936937, "grad_norm": 1.4874133153436018, "learning_rate": 9.31232029014409e-06, "loss": 0.4787, "step": 8058 }, { "epoch": 0.7562875375375375, "grad_norm": 1.2613454590693265, "learning_rate": 9.312043941475917e-06, "loss": 0.5072, "step": 8059 }, { "epoch": 0.7563813813813813, "grad_norm": 1.2815833383792807, "learning_rate": 9.311767541394735e-06, "loss": 0.5236, "step": 8060 }, { "epoch": 0.7564752252252253, "grad_norm": 1.0164322712292522, "learning_rate": 9.31149108990384e-06, "loss": 0.5008, "step": 8061 }, { "epoch": 0.7565690690690691, "grad_norm": 1.3292902692039998, "learning_rate": 9.311214587006533e-06, "loss": 0.4953, "step": 8062 }, { "epoch": 0.7566629129129129, "grad_norm": 1.0326905585365334, "learning_rate": 9.310938032706105e-06, "loss": 0.4737, "step": 8063 }, { "epoch": 0.7567567567567568, "grad_norm": 1.1116345814311595, "learning_rate": 9.310661427005855e-06, "loss": 0.4942, "step": 8064 }, { "epoch": 0.7568506006006006, "grad_norm": 0.9134916583037993, "learning_rate": 9.310384769909082e-06, "loss": 0.3793, "step": 8065 }, { "epoch": 0.7569444444444444, "grad_norm": 0.958335744197208, "learning_rate": 9.310108061419083e-06, "loss": 0.4756, "step": 8066 }, { "epoch": 0.7570382882882883, "grad_norm": 1.0631197492941793, "learning_rate": 9.30983130153916e-06, "loss": 0.5006, "step": 8067 }, { "epoch": 0.7571321321321322, "grad_norm": 1.304087052600686, "learning_rate": 9.309554490272611e-06, "loss": 0.4876, "step": 8068 }, { "epoch": 0.757225975975976, "grad_norm": 1.1725444239590161, "learning_rate": 9.309277627622736e-06, "loss": 0.526, "step": 8069 }, { "epoch": 0.7573198198198198, "grad_norm": 0.9719814452121474, "learning_rate": 9.309000713592837e-06, "loss": 0.51, "step": 8070 }, { "epoch": 0.7574136636636637, "grad_norm": 1.224659889698505, "learning_rate": 9.308723748186214e-06, "loss": 0.4912, "step": 8071 }, { "epoch": 0.7575075075075075, "grad_norm": 1.0743096693716703, "learning_rate": 9.308446731406173e-06, "loss": 0.5054, "step": 8072 }, { "epoch": 0.7576013513513513, "grad_norm": 1.5562090408795513, "learning_rate": 9.308169663256012e-06, "loss": 0.4571, "step": 8073 }, { "epoch": 0.7576951951951952, "grad_norm": 1.5092211959913626, "learning_rate": 9.307892543739039e-06, "loss": 0.4778, "step": 8074 }, { "epoch": 0.757789039039039, "grad_norm": 1.0275247764590134, "learning_rate": 9.307615372858557e-06, "loss": 0.4967, "step": 8075 }, { "epoch": 0.7578828828828829, "grad_norm": 1.0067749918071929, "learning_rate": 9.307338150617868e-06, "loss": 0.5282, "step": 8076 }, { "epoch": 0.7579767267267268, "grad_norm": 1.2261919351888326, "learning_rate": 9.307060877020278e-06, "loss": 0.5096, "step": 8077 }, { "epoch": 0.7580705705705706, "grad_norm": 1.149452916918234, "learning_rate": 9.306783552069096e-06, "loss": 0.5138, "step": 8078 }, { "epoch": 0.7581644144144144, "grad_norm": 2.50063937702297, "learning_rate": 9.306506175767628e-06, "loss": 0.479, "step": 8079 }, { "epoch": 0.7582582582582582, "grad_norm": 1.0796314678435015, "learning_rate": 9.306228748119178e-06, "loss": 0.4774, "step": 8080 }, { "epoch": 0.7583521021021021, "grad_norm": 0.9310485176084736, "learning_rate": 9.305951269127056e-06, "loss": 0.46, "step": 8081 }, { "epoch": 0.7584459459459459, "grad_norm": 1.1158651054072037, "learning_rate": 9.305673738794572e-06, "loss": 0.4858, "step": 8082 }, { "epoch": 0.7585397897897898, "grad_norm": 0.9891077944390534, "learning_rate": 9.305396157125033e-06, "loss": 0.4012, "step": 8083 }, { "epoch": 0.7586336336336337, "grad_norm": 1.0678972565670242, "learning_rate": 9.305118524121747e-06, "loss": 0.4799, "step": 8084 }, { "epoch": 0.7587274774774775, "grad_norm": 1.0173398115351366, "learning_rate": 9.304840839788028e-06, "loss": 0.4408, "step": 8085 }, { "epoch": 0.7588213213213213, "grad_norm": 2.6376548891300557, "learning_rate": 9.304563104127184e-06, "loss": 0.417, "step": 8086 }, { "epoch": 0.7589151651651652, "grad_norm": 1.022195035764458, "learning_rate": 9.304285317142528e-06, "loss": 0.4652, "step": 8087 }, { "epoch": 0.759009009009009, "grad_norm": 1.1481015186184984, "learning_rate": 9.304007478837371e-06, "loss": 0.4658, "step": 8088 }, { "epoch": 0.7591028528528528, "grad_norm": 1.032017931451134, "learning_rate": 9.303729589215027e-06, "loss": 0.4932, "step": 8089 }, { "epoch": 0.7591966966966966, "grad_norm": 1.0380480333737419, "learning_rate": 9.303451648278807e-06, "loss": 0.5158, "step": 8090 }, { "epoch": 0.7592905405405406, "grad_norm": 0.9860805315313821, "learning_rate": 9.303173656032027e-06, "loss": 0.4973, "step": 8091 }, { "epoch": 0.7593843843843844, "grad_norm": 2.437736288431163, "learning_rate": 9.302895612478003e-06, "loss": 0.502, "step": 8092 }, { "epoch": 0.7594782282282282, "grad_norm": 0.9842313138238485, "learning_rate": 9.302617517620048e-06, "loss": 0.4912, "step": 8093 }, { "epoch": 0.7595720720720721, "grad_norm": 1.304000110328926, "learning_rate": 9.302339371461475e-06, "loss": 0.486, "step": 8094 }, { "epoch": 0.7596659159159159, "grad_norm": 6.1030733943294795, "learning_rate": 9.302061174005606e-06, "loss": 0.4633, "step": 8095 }, { "epoch": 0.7597597597597597, "grad_norm": 3.450927567308565, "learning_rate": 9.301782925255754e-06, "loss": 0.4988, "step": 8096 }, { "epoch": 0.7598536036036037, "grad_norm": 1.135431908729149, "learning_rate": 9.301504625215239e-06, "loss": 0.4419, "step": 8097 }, { "epoch": 0.7599474474474475, "grad_norm": 1.088038325145328, "learning_rate": 9.301226273887378e-06, "loss": 0.4946, "step": 8098 }, { "epoch": 0.7600412912912913, "grad_norm": 1.2273008304594952, "learning_rate": 9.300947871275487e-06, "loss": 0.4802, "step": 8099 }, { "epoch": 0.7601351351351351, "grad_norm": 2.534362257292518, "learning_rate": 9.300669417382892e-06, "loss": 0.4372, "step": 8100 }, { "epoch": 0.760228978978979, "grad_norm": 0.9709947841513057, "learning_rate": 9.30039091221291e-06, "loss": 0.5052, "step": 8101 }, { "epoch": 0.7603228228228228, "grad_norm": 1.1143685068990659, "learning_rate": 9.300112355768856e-06, "loss": 0.5255, "step": 8102 }, { "epoch": 0.7604166666666666, "grad_norm": 1.4527552990888866, "learning_rate": 9.299833748054061e-06, "loss": 0.4854, "step": 8103 }, { "epoch": 0.7605105105105106, "grad_norm": 1.1690379023199196, "learning_rate": 9.29955508907184e-06, "loss": 0.4484, "step": 8104 }, { "epoch": 0.7606043543543544, "grad_norm": 1.4912997156132182, "learning_rate": 9.299276378825518e-06, "loss": 0.5382, "step": 8105 }, { "epoch": 0.7606981981981982, "grad_norm": 1.0096606993076098, "learning_rate": 9.298997617318419e-06, "loss": 0.4719, "step": 8106 }, { "epoch": 0.7607920420420421, "grad_norm": 1.1057514070023233, "learning_rate": 9.298718804553863e-06, "loss": 0.472, "step": 8107 }, { "epoch": 0.7608858858858859, "grad_norm": 1.0182786896934966, "learning_rate": 9.298439940535179e-06, "loss": 0.4811, "step": 8108 }, { "epoch": 0.7609797297297297, "grad_norm": 1.1995671203297098, "learning_rate": 9.298161025265689e-06, "loss": 0.4908, "step": 8109 }, { "epoch": 0.7610735735735735, "grad_norm": 1.2396945179825254, "learning_rate": 9.297882058748718e-06, "loss": 0.5055, "step": 8110 }, { "epoch": 0.7611674174174174, "grad_norm": 0.961340815590379, "learning_rate": 9.297603040987594e-06, "loss": 0.4807, "step": 8111 }, { "epoch": 0.7612612612612613, "grad_norm": 0.9562191960899198, "learning_rate": 9.297323971985645e-06, "loss": 0.4471, "step": 8112 }, { "epoch": 0.7613551051051051, "grad_norm": 0.9809280406320618, "learning_rate": 9.297044851746196e-06, "loss": 0.4475, "step": 8113 }, { "epoch": 0.761448948948949, "grad_norm": 1.041985121862621, "learning_rate": 9.296765680272573e-06, "loss": 0.4472, "step": 8114 }, { "epoch": 0.7615427927927928, "grad_norm": 0.995899749127536, "learning_rate": 9.29648645756811e-06, "loss": 0.3947, "step": 8115 }, { "epoch": 0.7616366366366366, "grad_norm": 1.2528557410841732, "learning_rate": 9.29620718363613e-06, "loss": 0.4753, "step": 8116 }, { "epoch": 0.7617304804804805, "grad_norm": 1.135283897231989, "learning_rate": 9.295927858479968e-06, "loss": 0.4947, "step": 8117 }, { "epoch": 0.7618243243243243, "grad_norm": 1.4674505407131415, "learning_rate": 9.295648482102953e-06, "loss": 0.4172, "step": 8118 }, { "epoch": 0.7619181681681682, "grad_norm": 1.046372831284651, "learning_rate": 9.295369054508416e-06, "loss": 0.4516, "step": 8119 }, { "epoch": 0.762012012012012, "grad_norm": 1.1976683317248662, "learning_rate": 9.295089575699687e-06, "loss": 0.462, "step": 8120 }, { "epoch": 0.7621058558558559, "grad_norm": 1.2529464225366604, "learning_rate": 9.2948100456801e-06, "loss": 0.4822, "step": 8121 }, { "epoch": 0.7621996996996997, "grad_norm": 1.2021247558660424, "learning_rate": 9.294530464452987e-06, "loss": 0.5136, "step": 8122 }, { "epoch": 0.7622935435435435, "grad_norm": 11.736809263613873, "learning_rate": 9.294250832021684e-06, "loss": 0.4777, "step": 8123 }, { "epoch": 0.7623873873873874, "grad_norm": 1.7787897900741372, "learning_rate": 9.29397114838952e-06, "loss": 0.437, "step": 8124 }, { "epoch": 0.7624812312312312, "grad_norm": 1.2984525493026209, "learning_rate": 9.293691413559833e-06, "loss": 0.4803, "step": 8125 }, { "epoch": 0.762575075075075, "grad_norm": 1.0904658583858027, "learning_rate": 9.293411627535958e-06, "loss": 0.504, "step": 8126 }, { "epoch": 0.762668918918919, "grad_norm": 0.9546072110657929, "learning_rate": 9.293131790321233e-06, "loss": 0.4419, "step": 8127 }, { "epoch": 0.7627627627627628, "grad_norm": 0.9776087769284705, "learning_rate": 9.29285190191899e-06, "loss": 0.4406, "step": 8128 }, { "epoch": 0.7628566066066066, "grad_norm": 1.002258708313098, "learning_rate": 9.292571962332568e-06, "loss": 0.4802, "step": 8129 }, { "epoch": 0.7629504504504504, "grad_norm": 0.9230248084390855, "learning_rate": 9.292291971565308e-06, "loss": 0.46, "step": 8130 }, { "epoch": 0.7630442942942943, "grad_norm": 1.0243120138932607, "learning_rate": 9.292011929620544e-06, "loss": 0.4791, "step": 8131 }, { "epoch": 0.7631381381381381, "grad_norm": 1.2282685029493485, "learning_rate": 9.291731836501616e-06, "loss": 0.4682, "step": 8132 }, { "epoch": 0.7632319819819819, "grad_norm": 1.0623089291963341, "learning_rate": 9.291451692211865e-06, "loss": 0.4758, "step": 8133 }, { "epoch": 0.7633258258258259, "grad_norm": 1.0200990288569227, "learning_rate": 9.291171496754631e-06, "loss": 0.4959, "step": 8134 }, { "epoch": 0.7634196696696697, "grad_norm": 0.8620138524298645, "learning_rate": 9.290891250133254e-06, "loss": 0.4579, "step": 8135 }, { "epoch": 0.7635135135135135, "grad_norm": 1.3325879342402378, "learning_rate": 9.290610952351075e-06, "loss": 0.4674, "step": 8136 }, { "epoch": 0.7636073573573574, "grad_norm": 1.0428337956838065, "learning_rate": 9.290330603411437e-06, "loss": 0.4584, "step": 8137 }, { "epoch": 0.7637012012012012, "grad_norm": 1.1056258388895617, "learning_rate": 9.290050203317681e-06, "loss": 0.4452, "step": 8138 }, { "epoch": 0.763795045045045, "grad_norm": 1.1974216351005254, "learning_rate": 9.289769752073155e-06, "loss": 0.4997, "step": 8139 }, { "epoch": 0.7638888888888888, "grad_norm": 1.3760977872804456, "learning_rate": 9.289489249681197e-06, "loss": 0.5068, "step": 8140 }, { "epoch": 0.7639827327327328, "grad_norm": 1.028297016268867, "learning_rate": 9.289208696145154e-06, "loss": 0.4969, "step": 8141 }, { "epoch": 0.7640765765765766, "grad_norm": 1.5330939254676252, "learning_rate": 9.28892809146837e-06, "loss": 0.4835, "step": 8142 }, { "epoch": 0.7641704204204204, "grad_norm": 0.9659191969538178, "learning_rate": 9.288647435654193e-06, "loss": 0.4384, "step": 8143 }, { "epoch": 0.7642642642642643, "grad_norm": 1.5545640252972932, "learning_rate": 9.288366728705968e-06, "loss": 0.4804, "step": 8144 }, { "epoch": 0.7643581081081081, "grad_norm": 0.978901108427413, "learning_rate": 9.28808597062704e-06, "loss": 0.461, "step": 8145 }, { "epoch": 0.7644519519519519, "grad_norm": 1.2551739288405785, "learning_rate": 9.28780516142076e-06, "loss": 0.4661, "step": 8146 }, { "epoch": 0.7645457957957958, "grad_norm": 1.039470857867863, "learning_rate": 9.287524301090473e-06, "loss": 0.492, "step": 8147 }, { "epoch": 0.7646396396396397, "grad_norm": 1.0552034354287083, "learning_rate": 9.28724338963953e-06, "loss": 0.4804, "step": 8148 }, { "epoch": 0.7647334834834835, "grad_norm": 0.9510245958635518, "learning_rate": 9.28696242707128e-06, "loss": 0.4515, "step": 8149 }, { "epoch": 0.7648273273273273, "grad_norm": 1.3553539676730215, "learning_rate": 9.286681413389072e-06, "loss": 0.5598, "step": 8150 }, { "epoch": 0.7649211711711712, "grad_norm": 1.2000769315781559, "learning_rate": 9.286400348596257e-06, "loss": 0.469, "step": 8151 }, { "epoch": 0.765015015015015, "grad_norm": 1.025547205923313, "learning_rate": 9.286119232696188e-06, "loss": 0.503, "step": 8152 }, { "epoch": 0.7651088588588588, "grad_norm": 1.3293842716701472, "learning_rate": 9.28583806569221e-06, "loss": 0.4809, "step": 8153 }, { "epoch": 0.7652027027027027, "grad_norm": 1.0107300323436106, "learning_rate": 9.285556847587684e-06, "loss": 0.4883, "step": 8154 }, { "epoch": 0.7652965465465466, "grad_norm": 1.121944209932577, "learning_rate": 9.285275578385959e-06, "loss": 0.4924, "step": 8155 }, { "epoch": 0.7653903903903904, "grad_norm": 0.873675734247701, "learning_rate": 9.284994258090389e-06, "loss": 0.4209, "step": 8156 }, { "epoch": 0.7654842342342343, "grad_norm": 1.662289600155058, "learning_rate": 9.284712886704327e-06, "loss": 0.5037, "step": 8157 }, { "epoch": 0.7655780780780781, "grad_norm": 1.2251749082561947, "learning_rate": 9.28443146423113e-06, "loss": 0.5238, "step": 8158 }, { "epoch": 0.7656719219219219, "grad_norm": 1.068899468753815, "learning_rate": 9.284149990674152e-06, "loss": 0.4792, "step": 8159 }, { "epoch": 0.7657657657657657, "grad_norm": 1.2558998154993173, "learning_rate": 9.28386846603675e-06, "loss": 0.4993, "step": 8160 }, { "epoch": 0.7658596096096096, "grad_norm": 0.9797684696424807, "learning_rate": 9.283586890322277e-06, "loss": 0.4557, "step": 8161 }, { "epoch": 0.7659534534534534, "grad_norm": 0.945833964905598, "learning_rate": 9.283305263534098e-06, "loss": 0.4987, "step": 8162 }, { "epoch": 0.7660472972972973, "grad_norm": 1.4461854870507274, "learning_rate": 9.283023585675563e-06, "loss": 0.4931, "step": 8163 }, { "epoch": 0.7661411411411412, "grad_norm": 1.2606548599592833, "learning_rate": 9.282741856750035e-06, "loss": 0.4608, "step": 8164 }, { "epoch": 0.766234984984985, "grad_norm": 1.1446408432030406, "learning_rate": 9.282460076760874e-06, "loss": 0.4636, "step": 8165 }, { "epoch": 0.7663288288288288, "grad_norm": 1.0029889128953582, "learning_rate": 9.282178245711434e-06, "loss": 0.4691, "step": 8166 }, { "epoch": 0.7664226726726727, "grad_norm": 0.996123665485289, "learning_rate": 9.28189636360508e-06, "loss": 0.4664, "step": 8167 }, { "epoch": 0.7665165165165165, "grad_norm": 1.1462043024850153, "learning_rate": 9.281614430445172e-06, "loss": 0.48, "step": 8168 }, { "epoch": 0.7666103603603603, "grad_norm": 1.0163070432336134, "learning_rate": 9.28133244623507e-06, "loss": 0.4724, "step": 8169 }, { "epoch": 0.7667042042042042, "grad_norm": 1.026685611503234, "learning_rate": 9.281050410978138e-06, "loss": 0.471, "step": 8170 }, { "epoch": 0.7667980480480481, "grad_norm": 1.2115798492386165, "learning_rate": 9.280768324677739e-06, "loss": 0.4835, "step": 8171 }, { "epoch": 0.7668918918918919, "grad_norm": 1.3581078182216055, "learning_rate": 9.280486187337234e-06, "loss": 0.44, "step": 8172 }, { "epoch": 0.7669857357357357, "grad_norm": 1.0043764417723549, "learning_rate": 9.28020399895999e-06, "loss": 0.4427, "step": 8173 }, { "epoch": 0.7670795795795796, "grad_norm": 1.3404089663299374, "learning_rate": 9.279921759549368e-06, "loss": 0.4748, "step": 8174 }, { "epoch": 0.7671734234234234, "grad_norm": 1.1549366418704667, "learning_rate": 9.279639469108737e-06, "loss": 0.4781, "step": 8175 }, { "epoch": 0.7672672672672672, "grad_norm": 1.057920018681016, "learning_rate": 9.27935712764146e-06, "loss": 0.4541, "step": 8176 }, { "epoch": 0.7673611111111112, "grad_norm": 1.1606610633599268, "learning_rate": 9.279074735150906e-06, "loss": 0.4866, "step": 8177 }, { "epoch": 0.767454954954955, "grad_norm": 1.3556436031277743, "learning_rate": 9.278792291640438e-06, "loss": 0.4874, "step": 8178 }, { "epoch": 0.7675487987987988, "grad_norm": 1.0005947899481817, "learning_rate": 9.278509797113427e-06, "loss": 0.4382, "step": 8179 }, { "epoch": 0.7676426426426426, "grad_norm": 1.2847782932436154, "learning_rate": 9.27822725157324e-06, "loss": 0.4668, "step": 8180 }, { "epoch": 0.7677364864864865, "grad_norm": 0.8963274396703287, "learning_rate": 9.277944655023245e-06, "loss": 0.4945, "step": 8181 }, { "epoch": 0.7678303303303303, "grad_norm": 1.164328831191517, "learning_rate": 9.277662007466814e-06, "loss": 0.4855, "step": 8182 }, { "epoch": 0.7679241741741741, "grad_norm": 1.0679507065663785, "learning_rate": 9.277379308907315e-06, "loss": 0.4308, "step": 8183 }, { "epoch": 0.7680180180180181, "grad_norm": 1.5630361914627604, "learning_rate": 9.277096559348117e-06, "loss": 0.4551, "step": 8184 }, { "epoch": 0.7681118618618619, "grad_norm": 1.2763349378905278, "learning_rate": 9.276813758792597e-06, "loss": 0.493, "step": 8185 }, { "epoch": 0.7682057057057057, "grad_norm": 1.1047841413360653, "learning_rate": 9.27653090724412e-06, "loss": 0.4788, "step": 8186 }, { "epoch": 0.7682995495495496, "grad_norm": 1.2686810906725867, "learning_rate": 9.276248004706063e-06, "loss": 0.4822, "step": 8187 }, { "epoch": 0.7683933933933934, "grad_norm": 0.8774018296664217, "learning_rate": 9.2759650511818e-06, "loss": 0.4479, "step": 8188 }, { "epoch": 0.7684872372372372, "grad_norm": 0.9011047052561191, "learning_rate": 9.2756820466747e-06, "loss": 0.4619, "step": 8189 }, { "epoch": 0.768581081081081, "grad_norm": 0.9154723962568143, "learning_rate": 9.275398991188138e-06, "loss": 0.4626, "step": 8190 }, { "epoch": 0.768674924924925, "grad_norm": 1.1011068866505611, "learning_rate": 9.275115884725495e-06, "loss": 0.4988, "step": 8191 }, { "epoch": 0.7687687687687688, "grad_norm": 1.0215015035667319, "learning_rate": 9.274832727290138e-06, "loss": 0.4637, "step": 8192 }, { "epoch": 0.7688626126126126, "grad_norm": 1.6814500120962796, "learning_rate": 9.27454951888545e-06, "loss": 0.4872, "step": 8193 }, { "epoch": 0.7689564564564565, "grad_norm": 1.018766635596236, "learning_rate": 9.274266259514804e-06, "loss": 0.484, "step": 8194 }, { "epoch": 0.7690503003003003, "grad_norm": 1.1241363016714383, "learning_rate": 9.273982949181577e-06, "loss": 0.4769, "step": 8195 }, { "epoch": 0.7691441441441441, "grad_norm": 0.9345945450165023, "learning_rate": 9.27369958788915e-06, "loss": 0.3922, "step": 8196 }, { "epoch": 0.769237987987988, "grad_norm": 2.4348013522132583, "learning_rate": 9.2734161756409e-06, "loss": 0.4448, "step": 8197 }, { "epoch": 0.7693318318318318, "grad_norm": 0.9509435230829044, "learning_rate": 9.273132712440205e-06, "loss": 0.4859, "step": 8198 }, { "epoch": 0.7694256756756757, "grad_norm": 1.2453894194547763, "learning_rate": 9.272849198290446e-06, "loss": 0.5036, "step": 8199 }, { "epoch": 0.7695195195195195, "grad_norm": 1.3260648652461324, "learning_rate": 9.272565633195003e-06, "loss": 0.4734, "step": 8200 }, { "epoch": 0.7696133633633634, "grad_norm": 0.9754333069949398, "learning_rate": 9.272282017157257e-06, "loss": 0.516, "step": 8201 }, { "epoch": 0.7697072072072072, "grad_norm": 0.9680546086912718, "learning_rate": 9.271998350180591e-06, "loss": 0.4159, "step": 8202 }, { "epoch": 0.769801051051051, "grad_norm": 1.02190491354061, "learning_rate": 9.271714632268385e-06, "loss": 0.4177, "step": 8203 }, { "epoch": 0.7698948948948949, "grad_norm": 1.4683849042819053, "learning_rate": 9.271430863424022e-06, "loss": 0.4571, "step": 8204 }, { "epoch": 0.7699887387387387, "grad_norm": 1.8048528139056044, "learning_rate": 9.271147043650888e-06, "loss": 0.4568, "step": 8205 }, { "epoch": 0.7700825825825826, "grad_norm": 0.9355802898889649, "learning_rate": 9.270863172952363e-06, "loss": 0.4209, "step": 8206 }, { "epoch": 0.7701764264264265, "grad_norm": 0.9674280429045399, "learning_rate": 9.270579251331835e-06, "loss": 0.4379, "step": 8207 }, { "epoch": 0.7702702702702703, "grad_norm": 1.1295721347010736, "learning_rate": 9.270295278792686e-06, "loss": 0.414, "step": 8208 }, { "epoch": 0.7703641141141141, "grad_norm": 0.96970960720107, "learning_rate": 9.270011255338307e-06, "loss": 0.4496, "step": 8209 }, { "epoch": 0.7704579579579579, "grad_norm": 1.1172532576543728, "learning_rate": 9.26972718097208e-06, "loss": 0.402, "step": 8210 }, { "epoch": 0.7705518018018018, "grad_norm": 0.9894368321372549, "learning_rate": 9.26944305569739e-06, "loss": 0.4906, "step": 8211 }, { "epoch": 0.7706456456456456, "grad_norm": 1.554484518235049, "learning_rate": 9.269158879517633e-06, "loss": 0.4433, "step": 8212 }, { "epoch": 0.7707394894894894, "grad_norm": 1.0749449709397307, "learning_rate": 9.26887465243619e-06, "loss": 0.4543, "step": 8213 }, { "epoch": 0.7708333333333334, "grad_norm": 2.4309566586903886, "learning_rate": 9.268590374456451e-06, "loss": 0.4812, "step": 8214 }, { "epoch": 0.7709271771771772, "grad_norm": 0.8989225498041538, "learning_rate": 9.268306045581808e-06, "loss": 0.491, "step": 8215 }, { "epoch": 0.771021021021021, "grad_norm": 1.4132394253568985, "learning_rate": 9.26802166581565e-06, "loss": 0.4449, "step": 8216 }, { "epoch": 0.7711148648648649, "grad_norm": 1.3501199864676063, "learning_rate": 9.267737235161366e-06, "loss": 0.4367, "step": 8217 }, { "epoch": 0.7712087087087087, "grad_norm": 1.2944648489009363, "learning_rate": 9.26745275362235e-06, "loss": 0.507, "step": 8218 }, { "epoch": 0.7713025525525525, "grad_norm": 1.1681511948535277, "learning_rate": 9.267168221201993e-06, "loss": 0.4928, "step": 8219 }, { "epoch": 0.7713963963963963, "grad_norm": 0.9165838617779672, "learning_rate": 9.266883637903685e-06, "loss": 0.462, "step": 8220 }, { "epoch": 0.7714902402402403, "grad_norm": 0.9595340519896771, "learning_rate": 9.266599003730824e-06, "loss": 0.498, "step": 8221 }, { "epoch": 0.7715840840840841, "grad_norm": 0.9696314712982081, "learning_rate": 9.266314318686798e-06, "loss": 0.4862, "step": 8222 }, { "epoch": 0.7716779279279279, "grad_norm": 1.0255815056617588, "learning_rate": 9.266029582775005e-06, "loss": 0.4757, "step": 8223 }, { "epoch": 0.7717717717717718, "grad_norm": 1.5558192689338874, "learning_rate": 9.265744795998842e-06, "loss": 0.5499, "step": 8224 }, { "epoch": 0.7718656156156156, "grad_norm": 1.0112955702845818, "learning_rate": 9.265459958361699e-06, "loss": 0.4779, "step": 8225 }, { "epoch": 0.7719594594594594, "grad_norm": 1.1646748449270365, "learning_rate": 9.265175069866975e-06, "loss": 0.4865, "step": 8226 }, { "epoch": 0.7720533033033034, "grad_norm": 2.260906188869945, "learning_rate": 9.264890130518066e-06, "loss": 0.4014, "step": 8227 }, { "epoch": 0.7721471471471472, "grad_norm": 1.1988999963089733, "learning_rate": 9.264605140318372e-06, "loss": 0.4207, "step": 8228 }, { "epoch": 0.772240990990991, "grad_norm": 3.257460541841065, "learning_rate": 9.264320099271287e-06, "loss": 0.4312, "step": 8229 }, { "epoch": 0.7723348348348348, "grad_norm": 1.0679817550822457, "learning_rate": 9.264035007380212e-06, "loss": 0.4646, "step": 8230 }, { "epoch": 0.7724286786786787, "grad_norm": 0.9865042166477546, "learning_rate": 9.263749864648547e-06, "loss": 0.4573, "step": 8231 }, { "epoch": 0.7725225225225225, "grad_norm": 1.3002793224934068, "learning_rate": 9.26346467107969e-06, "loss": 0.4835, "step": 8232 }, { "epoch": 0.7726163663663663, "grad_norm": 5.925441904166592, "learning_rate": 9.263179426677042e-06, "loss": 0.4455, "step": 8233 }, { "epoch": 0.7727102102102102, "grad_norm": 1.035273834768308, "learning_rate": 9.262894131444004e-06, "loss": 0.4514, "step": 8234 }, { "epoch": 0.7728040540540541, "grad_norm": 2.0452686222528444, "learning_rate": 9.262608785383977e-06, "loss": 0.4801, "step": 8235 }, { "epoch": 0.7728978978978979, "grad_norm": 1.0784924329659524, "learning_rate": 9.262323388500364e-06, "loss": 0.432, "step": 8236 }, { "epoch": 0.7729917417417418, "grad_norm": 1.065960699549841, "learning_rate": 9.262037940796567e-06, "loss": 0.4571, "step": 8237 }, { "epoch": 0.7730855855855856, "grad_norm": 1.2315305537281043, "learning_rate": 9.261752442275992e-06, "loss": 0.4719, "step": 8238 }, { "epoch": 0.7731794294294294, "grad_norm": 0.9439685808362874, "learning_rate": 9.26146689294204e-06, "loss": 0.4753, "step": 8239 }, { "epoch": 0.7732732732732732, "grad_norm": 1.2234444670214704, "learning_rate": 9.261181292798116e-06, "loss": 0.5341, "step": 8240 }, { "epoch": 0.7733671171171171, "grad_norm": 1.0311853340515433, "learning_rate": 9.260895641847626e-06, "loss": 0.4836, "step": 8241 }, { "epoch": 0.773460960960961, "grad_norm": 0.9309737450525436, "learning_rate": 9.260609940093976e-06, "loss": 0.4473, "step": 8242 }, { "epoch": 0.7735548048048048, "grad_norm": 1.0911539867903284, "learning_rate": 9.260324187540573e-06, "loss": 0.4727, "step": 8243 }, { "epoch": 0.7736486486486487, "grad_norm": 0.8362964749399412, "learning_rate": 9.260038384190823e-06, "loss": 0.439, "step": 8244 }, { "epoch": 0.7737424924924925, "grad_norm": 1.0051313909538018, "learning_rate": 9.259752530048134e-06, "loss": 0.496, "step": 8245 }, { "epoch": 0.7738363363363363, "grad_norm": 1.0610729388211844, "learning_rate": 9.259466625115914e-06, "loss": 0.5027, "step": 8246 }, { "epoch": 0.7739301801801802, "grad_norm": 1.150235064642682, "learning_rate": 9.259180669397571e-06, "loss": 0.4876, "step": 8247 }, { "epoch": 0.774024024024024, "grad_norm": 1.1556982414123471, "learning_rate": 9.258894662896517e-06, "loss": 0.492, "step": 8248 }, { "epoch": 0.7741178678678678, "grad_norm": 1.0506908049236012, "learning_rate": 9.25860860561616e-06, "loss": 0.457, "step": 8249 }, { "epoch": 0.7742117117117117, "grad_norm": 0.9029311972965425, "learning_rate": 9.258322497559912e-06, "loss": 0.4618, "step": 8250 }, { "epoch": 0.7743055555555556, "grad_norm": 1.1666267826713435, "learning_rate": 9.258036338731183e-06, "loss": 0.4796, "step": 8251 }, { "epoch": 0.7743993993993994, "grad_norm": 0.993358083579501, "learning_rate": 9.257750129133386e-06, "loss": 0.4617, "step": 8252 }, { "epoch": 0.7744932432432432, "grad_norm": 1.0365171714456374, "learning_rate": 9.257463868769933e-06, "loss": 0.4068, "step": 8253 }, { "epoch": 0.7745870870870871, "grad_norm": 1.1421472663903762, "learning_rate": 9.257177557644236e-06, "loss": 0.4359, "step": 8254 }, { "epoch": 0.7746809309309309, "grad_norm": 1.26069922236769, "learning_rate": 9.256891195759714e-06, "loss": 0.5009, "step": 8255 }, { "epoch": 0.7747747747747747, "grad_norm": 1.2304321918231438, "learning_rate": 9.256604783119774e-06, "loss": 0.5114, "step": 8256 }, { "epoch": 0.7748686186186187, "grad_norm": 1.0580743224448323, "learning_rate": 9.256318319727833e-06, "loss": 0.4977, "step": 8257 }, { "epoch": 0.7749624624624625, "grad_norm": 1.073575238836881, "learning_rate": 9.25603180558731e-06, "loss": 0.4617, "step": 8258 }, { "epoch": 0.7750563063063063, "grad_norm": 1.0991291539311265, "learning_rate": 9.255745240701619e-06, "loss": 0.4408, "step": 8259 }, { "epoch": 0.7751501501501501, "grad_norm": 2.2043919654326354, "learning_rate": 9.255458625074175e-06, "loss": 0.5208, "step": 8260 }, { "epoch": 0.775243993993994, "grad_norm": 1.1402327977360966, "learning_rate": 9.255171958708398e-06, "loss": 0.4407, "step": 8261 }, { "epoch": 0.7753378378378378, "grad_norm": 1.0225695373060686, "learning_rate": 9.254885241607706e-06, "loss": 0.4524, "step": 8262 }, { "epoch": 0.7754316816816816, "grad_norm": 0.992977928872561, "learning_rate": 9.254598473775515e-06, "loss": 0.442, "step": 8263 }, { "epoch": 0.7755255255255256, "grad_norm": 1.2780380554794903, "learning_rate": 9.254311655215246e-06, "loss": 0.5308, "step": 8264 }, { "epoch": 0.7756193693693694, "grad_norm": 1.0834130195048428, "learning_rate": 9.254024785930319e-06, "loss": 0.4332, "step": 8265 }, { "epoch": 0.7757132132132132, "grad_norm": 1.0161101119161984, "learning_rate": 9.253737865924152e-06, "loss": 0.4819, "step": 8266 }, { "epoch": 0.7758070570570571, "grad_norm": 1.112470667892284, "learning_rate": 9.253450895200169e-06, "loss": 0.4135, "step": 8267 }, { "epoch": 0.7759009009009009, "grad_norm": 0.9274959044021672, "learning_rate": 9.25316387376179e-06, "loss": 0.3719, "step": 8268 }, { "epoch": 0.7759947447447447, "grad_norm": 0.9171329873323877, "learning_rate": 9.252876801612439e-06, "loss": 0.3907, "step": 8269 }, { "epoch": 0.7760885885885885, "grad_norm": 1.1215996906143828, "learning_rate": 9.252589678755537e-06, "loss": 0.5162, "step": 8270 }, { "epoch": 0.7761824324324325, "grad_norm": 1.6098691663778153, "learning_rate": 9.252302505194505e-06, "loss": 0.4558, "step": 8271 }, { "epoch": 0.7762762762762763, "grad_norm": 0.997470272150115, "learning_rate": 9.252015280932773e-06, "loss": 0.4656, "step": 8272 }, { "epoch": 0.7763701201201201, "grad_norm": 1.0173123622108724, "learning_rate": 9.25172800597376e-06, "loss": 0.4833, "step": 8273 }, { "epoch": 0.776463963963964, "grad_norm": 0.9551654000387145, "learning_rate": 9.251440680320895e-06, "loss": 0.4166, "step": 8274 }, { "epoch": 0.7765578078078078, "grad_norm": 1.0102319387228091, "learning_rate": 9.251153303977602e-06, "loss": 0.4574, "step": 8275 }, { "epoch": 0.7766516516516516, "grad_norm": 2.028922378248487, "learning_rate": 9.250865876947309e-06, "loss": 0.4566, "step": 8276 }, { "epoch": 0.7767454954954955, "grad_norm": 1.067582340707393, "learning_rate": 9.25057839923344e-06, "loss": 0.4676, "step": 8277 }, { "epoch": 0.7768393393393394, "grad_norm": 0.9620841007178401, "learning_rate": 9.250290870839426e-06, "loss": 0.4831, "step": 8278 }, { "epoch": 0.7769331831831832, "grad_norm": 1.1587122290215255, "learning_rate": 9.250003291768694e-06, "loss": 0.4623, "step": 8279 }, { "epoch": 0.777027027027027, "grad_norm": 1.251423695230049, "learning_rate": 9.249715662024672e-06, "loss": 0.489, "step": 8280 }, { "epoch": 0.7771208708708709, "grad_norm": 0.970426367759877, "learning_rate": 9.249427981610787e-06, "loss": 0.5148, "step": 8281 }, { "epoch": 0.7772147147147147, "grad_norm": 1.004168422960949, "learning_rate": 9.249140250530476e-06, "loss": 0.4875, "step": 8282 }, { "epoch": 0.7773085585585585, "grad_norm": 2.6148283378268564, "learning_rate": 9.248852468787163e-06, "loss": 0.4654, "step": 8283 }, { "epoch": 0.7774024024024024, "grad_norm": 1.162422352752914, "learning_rate": 9.248564636384284e-06, "loss": 0.4399, "step": 8284 }, { "epoch": 0.7774962462462462, "grad_norm": 1.55510021061652, "learning_rate": 9.248276753325269e-06, "loss": 0.4782, "step": 8285 }, { "epoch": 0.7775900900900901, "grad_norm": 1.210279125107724, "learning_rate": 9.24798881961355e-06, "loss": 0.4539, "step": 8286 }, { "epoch": 0.777683933933934, "grad_norm": 5.456819299613358, "learning_rate": 9.24770083525256e-06, "loss": 0.4315, "step": 8287 }, { "epoch": 0.7777777777777778, "grad_norm": 0.9623725862203465, "learning_rate": 9.247412800245732e-06, "loss": 0.5111, "step": 8288 }, { "epoch": 0.7778716216216216, "grad_norm": 1.3124198282700517, "learning_rate": 9.247124714596502e-06, "loss": 0.436, "step": 8289 }, { "epoch": 0.7779654654654654, "grad_norm": 1.0247112864864323, "learning_rate": 9.246836578308305e-06, "loss": 0.4811, "step": 8290 }, { "epoch": 0.7780593093093093, "grad_norm": 1.0292176309393195, "learning_rate": 9.246548391384573e-06, "loss": 0.4217, "step": 8291 }, { "epoch": 0.7781531531531531, "grad_norm": 1.229793888236551, "learning_rate": 9.246260153828748e-06, "loss": 0.4571, "step": 8292 }, { "epoch": 0.778246996996997, "grad_norm": 1.136153162443449, "learning_rate": 9.245971865644262e-06, "loss": 0.4241, "step": 8293 }, { "epoch": 0.7783408408408409, "grad_norm": 1.4099231522786482, "learning_rate": 9.245683526834553e-06, "loss": 0.4545, "step": 8294 }, { "epoch": 0.7784346846846847, "grad_norm": 1.2093940963954128, "learning_rate": 9.245395137403062e-06, "loss": 0.4955, "step": 8295 }, { "epoch": 0.7785285285285285, "grad_norm": 0.8903484193239782, "learning_rate": 9.245106697353222e-06, "loss": 0.3976, "step": 8296 }, { "epoch": 0.7786223723723724, "grad_norm": 2.1449038458912026, "learning_rate": 9.244818206688478e-06, "loss": 0.421, "step": 8297 }, { "epoch": 0.7787162162162162, "grad_norm": 0.9648723419961438, "learning_rate": 9.244529665412265e-06, "loss": 0.4795, "step": 8298 }, { "epoch": 0.77881006006006, "grad_norm": 1.0499317877735244, "learning_rate": 9.244241073528026e-06, "loss": 0.4781, "step": 8299 }, { "epoch": 0.7789039039039038, "grad_norm": 1.1707567020939826, "learning_rate": 9.2439524310392e-06, "loss": 0.5055, "step": 8300 }, { "epoch": 0.7789977477477478, "grad_norm": 0.9782688761853727, "learning_rate": 9.243663737949232e-06, "loss": 0.5048, "step": 8301 }, { "epoch": 0.7790915915915916, "grad_norm": 1.9987261359438953, "learning_rate": 9.24337499426156e-06, "loss": 0.4815, "step": 8302 }, { "epoch": 0.7791854354354354, "grad_norm": 1.0842745043455222, "learning_rate": 9.24308619997963e-06, "loss": 0.5058, "step": 8303 }, { "epoch": 0.7792792792792793, "grad_norm": 1.0441552460752699, "learning_rate": 9.242797355106882e-06, "loss": 0.4991, "step": 8304 }, { "epoch": 0.7793731231231231, "grad_norm": 1.0269623781717219, "learning_rate": 9.242508459646762e-06, "loss": 0.5105, "step": 8305 }, { "epoch": 0.7794669669669669, "grad_norm": 1.5813808213290863, "learning_rate": 9.242219513602715e-06, "loss": 0.5108, "step": 8306 }, { "epoch": 0.7795608108108109, "grad_norm": 2.0095932611170766, "learning_rate": 9.241930516978186e-06, "loss": 0.5101, "step": 8307 }, { "epoch": 0.7796546546546547, "grad_norm": 1.0433262897401252, "learning_rate": 9.24164146977662e-06, "loss": 0.4879, "step": 8308 }, { "epoch": 0.7797484984984985, "grad_norm": 0.9883800215784279, "learning_rate": 9.241352372001463e-06, "loss": 0.4583, "step": 8309 }, { "epoch": 0.7798423423423423, "grad_norm": 1.6912348278596603, "learning_rate": 9.241063223656162e-06, "loss": 0.4522, "step": 8310 }, { "epoch": 0.7799361861861862, "grad_norm": 1.368782187065799, "learning_rate": 9.240774024744165e-06, "loss": 0.5361, "step": 8311 }, { "epoch": 0.78003003003003, "grad_norm": 1.2600146220276065, "learning_rate": 9.240484775268921e-06, "loss": 0.5311, "step": 8312 }, { "epoch": 0.7801238738738738, "grad_norm": 0.9840109256048815, "learning_rate": 9.24019547523388e-06, "loss": 0.4583, "step": 8313 }, { "epoch": 0.7802177177177178, "grad_norm": 1.1263869009192642, "learning_rate": 9.239906124642486e-06, "loss": 0.5216, "step": 8314 }, { "epoch": 0.7803115615615616, "grad_norm": 1.1083844095551236, "learning_rate": 9.239616723498194e-06, "loss": 0.4813, "step": 8315 }, { "epoch": 0.7804054054054054, "grad_norm": 1.0116125591220588, "learning_rate": 9.239327271804452e-06, "loss": 0.4492, "step": 8316 }, { "epoch": 0.7804992492492493, "grad_norm": 1.09283696061668, "learning_rate": 9.239037769564712e-06, "loss": 0.4254, "step": 8317 }, { "epoch": 0.7805930930930931, "grad_norm": 1.1144902930651424, "learning_rate": 9.238748216782426e-06, "loss": 0.4999, "step": 8318 }, { "epoch": 0.7806869369369369, "grad_norm": 0.9596524971194884, "learning_rate": 9.238458613461047e-06, "loss": 0.4556, "step": 8319 }, { "epoch": 0.7807807807807807, "grad_norm": 1.2035447247692588, "learning_rate": 9.238168959604026e-06, "loss": 0.4694, "step": 8320 }, { "epoch": 0.7808746246246246, "grad_norm": 1.0607442765502328, "learning_rate": 9.237879255214818e-06, "loss": 0.4834, "step": 8321 }, { "epoch": 0.7809684684684685, "grad_norm": 1.0307331906686095, "learning_rate": 9.237589500296876e-06, "loss": 0.5095, "step": 8322 }, { "epoch": 0.7810623123123123, "grad_norm": 1.0817548878290142, "learning_rate": 9.237299694853657e-06, "loss": 0.4557, "step": 8323 }, { "epoch": 0.7811561561561562, "grad_norm": 0.9723814000004476, "learning_rate": 9.237009838888615e-06, "loss": 0.4727, "step": 8324 }, { "epoch": 0.78125, "grad_norm": 0.9670534509743505, "learning_rate": 9.236719932405204e-06, "loss": 0.488, "step": 8325 }, { "epoch": 0.7813438438438438, "grad_norm": 0.9374116169405308, "learning_rate": 9.236429975406883e-06, "loss": 0.426, "step": 8326 }, { "epoch": 0.7814376876876877, "grad_norm": 1.074075121069895, "learning_rate": 9.236139967897111e-06, "loss": 0.4898, "step": 8327 }, { "epoch": 0.7815315315315315, "grad_norm": 0.9397787174076753, "learning_rate": 9.235849909879344e-06, "loss": 0.4907, "step": 8328 }, { "epoch": 0.7816253753753754, "grad_norm": 1.2166052259259548, "learning_rate": 9.235559801357037e-06, "loss": 0.5081, "step": 8329 }, { "epoch": 0.7817192192192193, "grad_norm": 1.0575608298656742, "learning_rate": 9.235269642333656e-06, "loss": 0.4884, "step": 8330 }, { "epoch": 0.7818130630630631, "grad_norm": 1.1146213384101744, "learning_rate": 9.234979432812653e-06, "loss": 0.5116, "step": 8331 }, { "epoch": 0.7819069069069069, "grad_norm": 0.9279066740014645, "learning_rate": 9.234689172797493e-06, "loss": 0.497, "step": 8332 }, { "epoch": 0.7820007507507507, "grad_norm": 1.0140956879125558, "learning_rate": 9.234398862291636e-06, "loss": 0.4769, "step": 8333 }, { "epoch": 0.7820945945945946, "grad_norm": 1.3511185348677175, "learning_rate": 9.234108501298542e-06, "loss": 0.4187, "step": 8334 }, { "epoch": 0.7821884384384384, "grad_norm": 2.450183297694661, "learning_rate": 9.233818089821675e-06, "loss": 0.4922, "step": 8335 }, { "epoch": 0.7822822822822822, "grad_norm": 1.0585154730448176, "learning_rate": 9.233527627864496e-06, "loss": 0.4908, "step": 8336 }, { "epoch": 0.7823761261261262, "grad_norm": 1.5418116308994787, "learning_rate": 9.23323711543047e-06, "loss": 0.4968, "step": 8337 }, { "epoch": 0.78246996996997, "grad_norm": 1.2605110044982923, "learning_rate": 9.232946552523056e-06, "loss": 0.4638, "step": 8338 }, { "epoch": 0.7825638138138138, "grad_norm": 1.1480480942263978, "learning_rate": 9.232655939145726e-06, "loss": 0.4434, "step": 8339 }, { "epoch": 0.7826576576576577, "grad_norm": 1.0017898572012405, "learning_rate": 9.23236527530194e-06, "loss": 0.4449, "step": 8340 }, { "epoch": 0.7827515015015015, "grad_norm": 1.2903254835373816, "learning_rate": 9.232074560995164e-06, "loss": 0.4782, "step": 8341 }, { "epoch": 0.7828453453453453, "grad_norm": 1.0994725279107147, "learning_rate": 9.231783796228865e-06, "loss": 0.4646, "step": 8342 }, { "epoch": 0.7829391891891891, "grad_norm": 4.438991402709424, "learning_rate": 9.231492981006511e-06, "loss": 0.4439, "step": 8343 }, { "epoch": 0.7830330330330331, "grad_norm": 1.1528729682340626, "learning_rate": 9.231202115331567e-06, "loss": 0.4357, "step": 8344 }, { "epoch": 0.7831268768768769, "grad_norm": 1.10727865857073, "learning_rate": 9.230911199207502e-06, "loss": 0.4381, "step": 8345 }, { "epoch": 0.7832207207207207, "grad_norm": 2.6063473193209457, "learning_rate": 9.230620232637786e-06, "loss": 0.476, "step": 8346 }, { "epoch": 0.7833145645645646, "grad_norm": 1.2319075631111789, "learning_rate": 9.230329215625886e-06, "loss": 0.4494, "step": 8347 }, { "epoch": 0.7834084084084084, "grad_norm": 1.0923198678822552, "learning_rate": 9.230038148175274e-06, "loss": 0.4686, "step": 8348 }, { "epoch": 0.7835022522522522, "grad_norm": 1.2550892886984084, "learning_rate": 9.229747030289418e-06, "loss": 0.4752, "step": 8349 }, { "epoch": 0.7835960960960962, "grad_norm": 1.0668514032002123, "learning_rate": 9.229455861971792e-06, "loss": 0.4768, "step": 8350 }, { "epoch": 0.78368993993994, "grad_norm": 0.9102109400357143, "learning_rate": 9.229164643225864e-06, "loss": 0.3736, "step": 8351 }, { "epoch": 0.7837837837837838, "grad_norm": 1.160874402872415, "learning_rate": 9.22887337405511e-06, "loss": 0.4263, "step": 8352 }, { "epoch": 0.7838776276276276, "grad_norm": 1.066035175043415, "learning_rate": 9.228582054463e-06, "loss": 0.4726, "step": 8353 }, { "epoch": 0.7839714714714715, "grad_norm": 1.3604404462335489, "learning_rate": 9.228290684453007e-06, "loss": 0.4921, "step": 8354 }, { "epoch": 0.7840653153153153, "grad_norm": 1.3927485169549751, "learning_rate": 9.227999264028609e-06, "loss": 0.4735, "step": 8355 }, { "epoch": 0.7841591591591591, "grad_norm": 1.495636186675825, "learning_rate": 9.227707793193277e-06, "loss": 0.4706, "step": 8356 }, { "epoch": 0.784253003003003, "grad_norm": 1.0040933154604739, "learning_rate": 9.22741627195049e-06, "loss": 0.471, "step": 8357 }, { "epoch": 0.7843468468468469, "grad_norm": 1.3410809054989128, "learning_rate": 9.227124700303718e-06, "loss": 0.4379, "step": 8358 }, { "epoch": 0.7844406906906907, "grad_norm": 0.9397912867738729, "learning_rate": 9.22683307825644e-06, "loss": 0.4613, "step": 8359 }, { "epoch": 0.7845345345345346, "grad_norm": 1.2309729334879436, "learning_rate": 9.226541405812136e-06, "loss": 0.436, "step": 8360 }, { "epoch": 0.7846283783783784, "grad_norm": 1.0409527029086878, "learning_rate": 9.226249682974282e-06, "loss": 0.4351, "step": 8361 }, { "epoch": 0.7847222222222222, "grad_norm": 0.9292792651487312, "learning_rate": 9.225957909746354e-06, "loss": 0.4317, "step": 8362 }, { "epoch": 0.784816066066066, "grad_norm": 0.9536864301658075, "learning_rate": 9.225666086131832e-06, "loss": 0.5002, "step": 8363 }, { "epoch": 0.7849099099099099, "grad_norm": 1.2344989130444348, "learning_rate": 9.225374212134198e-06, "loss": 0.4542, "step": 8364 }, { "epoch": 0.7850037537537538, "grad_norm": 1.1636490153897887, "learning_rate": 9.225082287756928e-06, "loss": 0.5144, "step": 8365 }, { "epoch": 0.7850975975975976, "grad_norm": 1.0121112779703008, "learning_rate": 9.224790313003505e-06, "loss": 0.5088, "step": 8366 }, { "epoch": 0.7851914414414415, "grad_norm": 1.0519723967426953, "learning_rate": 9.22449828787741e-06, "loss": 0.5072, "step": 8367 }, { "epoch": 0.7852852852852853, "grad_norm": 2.0208885575274924, "learning_rate": 9.224206212382126e-06, "loss": 0.4395, "step": 8368 }, { "epoch": 0.7853791291291291, "grad_norm": 0.9819955094868564, "learning_rate": 9.223914086521132e-06, "loss": 0.4506, "step": 8369 }, { "epoch": 0.785472972972973, "grad_norm": 0.9652600667389182, "learning_rate": 9.223621910297916e-06, "loss": 0.4758, "step": 8370 }, { "epoch": 0.7855668168168168, "grad_norm": 0.9571499062150414, "learning_rate": 9.223329683715957e-06, "loss": 0.4745, "step": 8371 }, { "epoch": 0.7856606606606606, "grad_norm": 0.9639391126971084, "learning_rate": 9.223037406778741e-06, "loss": 0.4651, "step": 8372 }, { "epoch": 0.7857545045045045, "grad_norm": 2.02387641487549, "learning_rate": 9.222745079489755e-06, "loss": 0.4304, "step": 8373 }, { "epoch": 0.7858483483483484, "grad_norm": 1.1122045210422384, "learning_rate": 9.22245270185248e-06, "loss": 0.4615, "step": 8374 }, { "epoch": 0.7859421921921922, "grad_norm": 1.1188158975464026, "learning_rate": 9.222160273870404e-06, "loss": 0.4202, "step": 8375 }, { "epoch": 0.786036036036036, "grad_norm": 1.008255862851544, "learning_rate": 9.221867795547017e-06, "loss": 0.5027, "step": 8376 }, { "epoch": 0.7861298798798799, "grad_norm": 1.1623652312856667, "learning_rate": 9.2215752668858e-06, "loss": 0.4697, "step": 8377 }, { "epoch": 0.7862237237237237, "grad_norm": 1.1270726632858943, "learning_rate": 9.221282687890247e-06, "loss": 0.4287, "step": 8378 }, { "epoch": 0.7863175675675675, "grad_norm": 1.0742398485586635, "learning_rate": 9.220990058563842e-06, "loss": 0.4649, "step": 8379 }, { "epoch": 0.7864114114114115, "grad_norm": 1.487983057644852, "learning_rate": 9.220697378910077e-06, "loss": 0.4641, "step": 8380 }, { "epoch": 0.7865052552552553, "grad_norm": 1.5953870043828033, "learning_rate": 9.220404648932439e-06, "loss": 0.4545, "step": 8381 }, { "epoch": 0.7865990990990991, "grad_norm": 0.9365635868941207, "learning_rate": 9.220111868634422e-06, "loss": 0.4688, "step": 8382 }, { "epoch": 0.7866929429429429, "grad_norm": 3.9712698539953335, "learning_rate": 9.219819038019512e-06, "loss": 0.5185, "step": 8383 }, { "epoch": 0.7867867867867868, "grad_norm": 0.9721460677953216, "learning_rate": 9.219526157091205e-06, "loss": 0.4573, "step": 8384 }, { "epoch": 0.7868806306306306, "grad_norm": 1.7121654205588532, "learning_rate": 9.219233225852989e-06, "loss": 0.4908, "step": 8385 }, { "epoch": 0.7869744744744744, "grad_norm": 1.0927848869282126, "learning_rate": 9.218940244308361e-06, "loss": 0.4703, "step": 8386 }, { "epoch": 0.7870683183183184, "grad_norm": 0.8832289189171496, "learning_rate": 9.21864721246081e-06, "loss": 0.4269, "step": 8387 }, { "epoch": 0.7871621621621622, "grad_norm": 1.4374732568818078, "learning_rate": 9.218354130313832e-06, "loss": 0.4933, "step": 8388 }, { "epoch": 0.787256006006006, "grad_norm": 1.2045533836624072, "learning_rate": 9.21806099787092e-06, "loss": 0.4917, "step": 8389 }, { "epoch": 0.7873498498498499, "grad_norm": 0.977342679661435, "learning_rate": 9.217767815135573e-06, "loss": 0.4646, "step": 8390 }, { "epoch": 0.7874436936936937, "grad_norm": 1.2901922732021824, "learning_rate": 9.217474582111284e-06, "loss": 0.4537, "step": 8391 }, { "epoch": 0.7875375375375375, "grad_norm": 1.0494367175838561, "learning_rate": 9.217181298801547e-06, "loss": 0.4901, "step": 8392 }, { "epoch": 0.7876313813813813, "grad_norm": 1.197570438541736, "learning_rate": 9.216887965209864e-06, "loss": 0.4637, "step": 8393 }, { "epoch": 0.7877252252252253, "grad_norm": 1.1095266749279242, "learning_rate": 9.216594581339726e-06, "loss": 0.4801, "step": 8394 }, { "epoch": 0.7878190690690691, "grad_norm": 1.0413559640892767, "learning_rate": 9.216301147194637e-06, "loss": 0.434, "step": 8395 }, { "epoch": 0.7879129129129129, "grad_norm": 0.9611466908954922, "learning_rate": 9.216007662778093e-06, "loss": 0.5261, "step": 8396 }, { "epoch": 0.7880067567567568, "grad_norm": 1.0716131820336232, "learning_rate": 9.215714128093591e-06, "loss": 0.4752, "step": 8397 }, { "epoch": 0.7881006006006006, "grad_norm": 0.8803456656852099, "learning_rate": 9.215420543144636e-06, "loss": 0.4874, "step": 8398 }, { "epoch": 0.7881944444444444, "grad_norm": 0.955636236317669, "learning_rate": 9.215126907934723e-06, "loss": 0.4992, "step": 8399 }, { "epoch": 0.7882882882882883, "grad_norm": 1.2096993407562304, "learning_rate": 9.214833222467358e-06, "loss": 0.3889, "step": 8400 }, { "epoch": 0.7883821321321322, "grad_norm": 1.0577319325379386, "learning_rate": 9.214539486746041e-06, "loss": 0.4336, "step": 8401 }, { "epoch": 0.788475975975976, "grad_norm": 1.1386067823737784, "learning_rate": 9.214245700774273e-06, "loss": 0.4925, "step": 8402 }, { "epoch": 0.7885698198198198, "grad_norm": 1.0354779208153224, "learning_rate": 9.213951864555556e-06, "loss": 0.4532, "step": 8403 }, { "epoch": 0.7886636636636637, "grad_norm": 1.1140228183373118, "learning_rate": 9.213657978093398e-06, "loss": 0.4334, "step": 8404 }, { "epoch": 0.7887575075075075, "grad_norm": 0.9494870274057969, "learning_rate": 9.213364041391297e-06, "loss": 0.498, "step": 8405 }, { "epoch": 0.7888513513513513, "grad_norm": 1.0672276138120615, "learning_rate": 9.213070054452761e-06, "loss": 0.5199, "step": 8406 }, { "epoch": 0.7889451951951952, "grad_norm": 1.0249865448632758, "learning_rate": 9.212776017281297e-06, "loss": 0.4604, "step": 8407 }, { "epoch": 0.789039039039039, "grad_norm": 1.125089251777866, "learning_rate": 9.212481929880406e-06, "loss": 0.4865, "step": 8408 }, { "epoch": 0.7891328828828829, "grad_norm": 1.3742405922111813, "learning_rate": 9.2121877922536e-06, "loss": 0.5187, "step": 8409 }, { "epoch": 0.7892267267267268, "grad_norm": 1.0538442289429717, "learning_rate": 9.211893604404381e-06, "loss": 0.4579, "step": 8410 }, { "epoch": 0.7893205705705706, "grad_norm": 1.2874213089018471, "learning_rate": 9.21159936633626e-06, "loss": 0.4456, "step": 8411 }, { "epoch": 0.7894144144144144, "grad_norm": 3.650450499343373, "learning_rate": 9.211305078052744e-06, "loss": 0.4582, "step": 8412 }, { "epoch": 0.7895082582582582, "grad_norm": 1.0976246928710793, "learning_rate": 9.21101073955734e-06, "loss": 0.4494, "step": 8413 }, { "epoch": 0.7896021021021021, "grad_norm": 1.0059960346049963, "learning_rate": 9.210716350853563e-06, "loss": 0.4394, "step": 8414 }, { "epoch": 0.7896959459459459, "grad_norm": 1.7695779136455323, "learning_rate": 9.210421911944917e-06, "loss": 0.5302, "step": 8415 }, { "epoch": 0.7897897897897898, "grad_norm": 1.5938286838281661, "learning_rate": 9.210127422834916e-06, "loss": 0.5, "step": 8416 }, { "epoch": 0.7898836336336337, "grad_norm": 1.1772510392874103, "learning_rate": 9.209832883527067e-06, "loss": 0.4967, "step": 8417 }, { "epoch": 0.7899774774774775, "grad_norm": 1.0121236348614846, "learning_rate": 9.209538294024888e-06, "loss": 0.4931, "step": 8418 }, { "epoch": 0.7900713213213213, "grad_norm": 1.506421412285974, "learning_rate": 9.20924365433189e-06, "loss": 0.4754, "step": 8419 }, { "epoch": 0.7901651651651652, "grad_norm": 1.1307115017605063, "learning_rate": 9.208948964451582e-06, "loss": 0.4642, "step": 8420 }, { "epoch": 0.790259009009009, "grad_norm": 1.4569405754786418, "learning_rate": 9.20865422438748e-06, "loss": 0.4879, "step": 8421 }, { "epoch": 0.7903528528528528, "grad_norm": 1.0600364028506422, "learning_rate": 9.2083594341431e-06, "loss": 0.4544, "step": 8422 }, { "epoch": 0.7904466966966966, "grad_norm": 1.2962808093524718, "learning_rate": 9.208064593721955e-06, "loss": 0.4233, "step": 8423 }, { "epoch": 0.7905405405405406, "grad_norm": 1.1989320262730034, "learning_rate": 9.207769703127559e-06, "loss": 0.4707, "step": 8424 }, { "epoch": 0.7906343843843844, "grad_norm": 1.214125314256804, "learning_rate": 9.20747476236343e-06, "loss": 0.5028, "step": 8425 }, { "epoch": 0.7907282282282282, "grad_norm": 1.9795060123395123, "learning_rate": 9.207179771433086e-06, "loss": 0.5156, "step": 8426 }, { "epoch": 0.7908220720720721, "grad_norm": 1.8253626323516898, "learning_rate": 9.20688473034004e-06, "loss": 0.4881, "step": 8427 }, { "epoch": 0.7909159159159159, "grad_norm": 1.099161483895328, "learning_rate": 9.206589639087813e-06, "loss": 0.4797, "step": 8428 }, { "epoch": 0.7910097597597597, "grad_norm": 1.0028570895533562, "learning_rate": 9.206294497679924e-06, "loss": 0.452, "step": 8429 }, { "epoch": 0.7911036036036037, "grad_norm": 1.0062499024166112, "learning_rate": 9.20599930611989e-06, "loss": 0.4782, "step": 8430 }, { "epoch": 0.7911974474474475, "grad_norm": 1.1244699983031123, "learning_rate": 9.20570406441123e-06, "loss": 0.4545, "step": 8431 }, { "epoch": 0.7912912912912913, "grad_norm": 0.9931352265496914, "learning_rate": 9.205408772557467e-06, "loss": 0.4706, "step": 8432 }, { "epoch": 0.7913851351351351, "grad_norm": 1.7136620285930404, "learning_rate": 9.205113430562119e-06, "loss": 0.4624, "step": 8433 }, { "epoch": 0.791478978978979, "grad_norm": 1.2385473348987874, "learning_rate": 9.204818038428709e-06, "loss": 0.4562, "step": 8434 }, { "epoch": 0.7915728228228228, "grad_norm": 0.8632098005325023, "learning_rate": 9.204522596160758e-06, "loss": 0.4553, "step": 8435 }, { "epoch": 0.7916666666666666, "grad_norm": 0.930836382470628, "learning_rate": 9.204227103761791e-06, "loss": 0.4821, "step": 8436 }, { "epoch": 0.7917605105105106, "grad_norm": 0.9799792730387216, "learning_rate": 9.203931561235328e-06, "loss": 0.4384, "step": 8437 }, { "epoch": 0.7918543543543544, "grad_norm": 1.068239404121673, "learning_rate": 9.203635968584896e-06, "loss": 0.4774, "step": 8438 }, { "epoch": 0.7919481981981982, "grad_norm": 1.0122885203216558, "learning_rate": 9.203340325814017e-06, "loss": 0.4622, "step": 8439 }, { "epoch": 0.7920420420420421, "grad_norm": 1.424347360785776, "learning_rate": 9.203044632926215e-06, "loss": 0.4692, "step": 8440 }, { "epoch": 0.7921358858858859, "grad_norm": 1.0022730700378293, "learning_rate": 9.202748889925019e-06, "loss": 0.4499, "step": 8441 }, { "epoch": 0.7922297297297297, "grad_norm": 1.0121337027497679, "learning_rate": 9.202453096813952e-06, "loss": 0.4487, "step": 8442 }, { "epoch": 0.7923235735735735, "grad_norm": 0.9838982055241846, "learning_rate": 9.202157253596542e-06, "loss": 0.4589, "step": 8443 }, { "epoch": 0.7924174174174174, "grad_norm": 1.0490885876719813, "learning_rate": 9.201861360276319e-06, "loss": 0.4596, "step": 8444 }, { "epoch": 0.7925112612612613, "grad_norm": 1.4752704141628616, "learning_rate": 9.201565416856805e-06, "loss": 0.4628, "step": 8445 }, { "epoch": 0.7926051051051051, "grad_norm": 1.1540829992095172, "learning_rate": 9.201269423341534e-06, "loss": 0.4824, "step": 8446 }, { "epoch": 0.792698948948949, "grad_norm": 0.9599333403628941, "learning_rate": 9.200973379734033e-06, "loss": 0.5134, "step": 8447 }, { "epoch": 0.7927927927927928, "grad_norm": 1.0234331291852383, "learning_rate": 9.200677286037831e-06, "loss": 0.4779, "step": 8448 }, { "epoch": 0.7928866366366366, "grad_norm": 0.9155005308701322, "learning_rate": 9.20038114225646e-06, "loss": 0.4548, "step": 8449 }, { "epoch": 0.7929804804804805, "grad_norm": 1.1308725800761898, "learning_rate": 9.200084948393451e-06, "loss": 0.4658, "step": 8450 }, { "epoch": 0.7930743243243243, "grad_norm": 1.653631832584905, "learning_rate": 9.199788704452335e-06, "loss": 0.4909, "step": 8451 }, { "epoch": 0.7931681681681682, "grad_norm": 1.0299499754116213, "learning_rate": 9.199492410436642e-06, "loss": 0.4708, "step": 8452 }, { "epoch": 0.793262012012012, "grad_norm": 1.3080729302982195, "learning_rate": 9.19919606634991e-06, "loss": 0.4972, "step": 8453 }, { "epoch": 0.7933558558558559, "grad_norm": 1.2361499457224503, "learning_rate": 9.198899672195665e-06, "loss": 0.5167, "step": 8454 }, { "epoch": 0.7934496996996997, "grad_norm": 1.046708533580346, "learning_rate": 9.198603227977448e-06, "loss": 0.4473, "step": 8455 }, { "epoch": 0.7935435435435435, "grad_norm": 0.8379815164752942, "learning_rate": 9.19830673369879e-06, "loss": 0.4382, "step": 8456 }, { "epoch": 0.7936373873873874, "grad_norm": 1.207351318816378, "learning_rate": 9.198010189363227e-06, "loss": 0.499, "step": 8457 }, { "epoch": 0.7937312312312312, "grad_norm": 2.3293795068971996, "learning_rate": 9.197713594974293e-06, "loss": 0.5242, "step": 8458 }, { "epoch": 0.793825075075075, "grad_norm": 1.0376944014475826, "learning_rate": 9.197416950535527e-06, "loss": 0.4619, "step": 8459 }, { "epoch": 0.793918918918919, "grad_norm": 1.084976796279464, "learning_rate": 9.197120256050464e-06, "loss": 0.4254, "step": 8460 }, { "epoch": 0.7940127627627628, "grad_norm": 0.9883812819760133, "learning_rate": 9.19682351152264e-06, "loss": 0.5083, "step": 8461 }, { "epoch": 0.7941066066066066, "grad_norm": 1.2858776247220343, "learning_rate": 9.1965267169556e-06, "loss": 0.4832, "step": 8462 }, { "epoch": 0.7942004504504504, "grad_norm": 1.1334307355948254, "learning_rate": 9.196229872352876e-06, "loss": 0.4555, "step": 8463 }, { "epoch": 0.7942942942942943, "grad_norm": 1.174231855726173, "learning_rate": 9.195932977718009e-06, "loss": 0.4889, "step": 8464 }, { "epoch": 0.7943881381381381, "grad_norm": 1.0237877777183233, "learning_rate": 9.19563603305454e-06, "loss": 0.4716, "step": 8465 }, { "epoch": 0.7944819819819819, "grad_norm": 1.1697665087135622, "learning_rate": 9.195339038366008e-06, "loss": 0.4742, "step": 8466 }, { "epoch": 0.7945758258258259, "grad_norm": 0.9881776446067884, "learning_rate": 9.195041993655956e-06, "loss": 0.5038, "step": 8467 }, { "epoch": 0.7946696696696697, "grad_norm": 0.9917964479786904, "learning_rate": 9.194744898927922e-06, "loss": 0.5246, "step": 8468 }, { "epoch": 0.7947635135135135, "grad_norm": 0.905741495547723, "learning_rate": 9.194447754185454e-06, "loss": 0.4544, "step": 8469 }, { "epoch": 0.7948573573573574, "grad_norm": 1.0800378965924304, "learning_rate": 9.19415055943209e-06, "loss": 0.4651, "step": 8470 }, { "epoch": 0.7949512012012012, "grad_norm": 1.1007359072021232, "learning_rate": 9.193853314671374e-06, "loss": 0.4866, "step": 8471 }, { "epoch": 0.795045045045045, "grad_norm": 1.1040560981240461, "learning_rate": 9.193556019906854e-06, "loss": 0.5083, "step": 8472 }, { "epoch": 0.7951388888888888, "grad_norm": 0.977536036525139, "learning_rate": 9.193258675142072e-06, "loss": 0.4738, "step": 8473 }, { "epoch": 0.7952327327327328, "grad_norm": 1.1423015240625698, "learning_rate": 9.192961280380572e-06, "loss": 0.4968, "step": 8474 }, { "epoch": 0.7953265765765766, "grad_norm": 1.2745410673699986, "learning_rate": 9.192663835625902e-06, "loss": 0.4907, "step": 8475 }, { "epoch": 0.7954204204204204, "grad_norm": 0.9087093832340737, "learning_rate": 9.192366340881605e-06, "loss": 0.4651, "step": 8476 }, { "epoch": 0.7955142642642643, "grad_norm": 0.9455283944231364, "learning_rate": 9.192068796151233e-06, "loss": 0.444, "step": 8477 }, { "epoch": 0.7956081081081081, "grad_norm": 1.2678275589682382, "learning_rate": 9.191771201438333e-06, "loss": 0.4802, "step": 8478 }, { "epoch": 0.7957019519519519, "grad_norm": 1.824177772681134, "learning_rate": 9.191473556746448e-06, "loss": 0.5021, "step": 8479 }, { "epoch": 0.7957957957957958, "grad_norm": 1.2176219350070998, "learning_rate": 9.191175862079133e-06, "loss": 0.5086, "step": 8480 }, { "epoch": 0.7958896396396397, "grad_norm": 0.9344417406487058, "learning_rate": 9.190878117439934e-06, "loss": 0.4852, "step": 8481 }, { "epoch": 0.7959834834834835, "grad_norm": 1.3037934499459585, "learning_rate": 9.190580322832401e-06, "loss": 0.4858, "step": 8482 }, { "epoch": 0.7960773273273273, "grad_norm": 1.1098647516998803, "learning_rate": 9.190282478260087e-06, "loss": 0.4483, "step": 8483 }, { "epoch": 0.7961711711711712, "grad_norm": 1.0944579630257942, "learning_rate": 9.189984583726543e-06, "loss": 0.4884, "step": 8484 }, { "epoch": 0.796265015015015, "grad_norm": 1.979748767681968, "learning_rate": 9.189686639235317e-06, "loss": 0.5269, "step": 8485 }, { "epoch": 0.7963588588588588, "grad_norm": 1.1368289323768885, "learning_rate": 9.189388644789966e-06, "loss": 0.5179, "step": 8486 }, { "epoch": 0.7964527027027027, "grad_norm": 1.3978050245798228, "learning_rate": 9.18909060039404e-06, "loss": 0.5028, "step": 8487 }, { "epoch": 0.7965465465465466, "grad_norm": 0.8807071287329937, "learning_rate": 9.188792506051094e-06, "loss": 0.454, "step": 8488 }, { "epoch": 0.7966403903903904, "grad_norm": 1.216624970927445, "learning_rate": 9.188494361764682e-06, "loss": 0.4311, "step": 8489 }, { "epoch": 0.7967342342342343, "grad_norm": 1.1542113647965049, "learning_rate": 9.18819616753836e-06, "loss": 0.4906, "step": 8490 }, { "epoch": 0.7968280780780781, "grad_norm": 0.9712520983617593, "learning_rate": 9.187897923375682e-06, "loss": 0.4623, "step": 8491 }, { "epoch": 0.7969219219219219, "grad_norm": 0.9123209924904444, "learning_rate": 9.187599629280201e-06, "loss": 0.4482, "step": 8492 }, { "epoch": 0.7970157657657657, "grad_norm": 1.5777779177821067, "learning_rate": 9.18730128525548e-06, "loss": 0.5326, "step": 8493 }, { "epoch": 0.7971096096096096, "grad_norm": 1.2727176851986697, "learning_rate": 9.187002891305073e-06, "loss": 0.4561, "step": 8494 }, { "epoch": 0.7972034534534534, "grad_norm": 0.9874118533295959, "learning_rate": 9.186704447432536e-06, "loss": 0.4757, "step": 8495 }, { "epoch": 0.7972972972972973, "grad_norm": 0.980265921211228, "learning_rate": 9.186405953641432e-06, "loss": 0.4694, "step": 8496 }, { "epoch": 0.7973911411411412, "grad_norm": 0.9341876380831503, "learning_rate": 9.186107409935313e-06, "loss": 0.5007, "step": 8497 }, { "epoch": 0.797484984984985, "grad_norm": 1.1025318401193525, "learning_rate": 9.185808816317745e-06, "loss": 0.5076, "step": 8498 }, { "epoch": 0.7975788288288288, "grad_norm": 1.8379266073786718, "learning_rate": 9.185510172792285e-06, "loss": 0.4893, "step": 8499 }, { "epoch": 0.7976726726726727, "grad_norm": 1.0941499234033412, "learning_rate": 9.185211479362494e-06, "loss": 0.4645, "step": 8500 }, { "epoch": 0.7977665165165165, "grad_norm": 1.576497597195925, "learning_rate": 9.184912736031936e-06, "loss": 0.4811, "step": 8501 }, { "epoch": 0.7978603603603603, "grad_norm": 2.6674972372137105, "learning_rate": 9.18461394280417e-06, "loss": 0.5038, "step": 8502 }, { "epoch": 0.7979542042042042, "grad_norm": 0.9450980742943076, "learning_rate": 9.18431509968276e-06, "loss": 0.456, "step": 8503 }, { "epoch": 0.7980480480480481, "grad_norm": 1.0374442327009128, "learning_rate": 9.184016206671266e-06, "loss": 0.4307, "step": 8504 }, { "epoch": 0.7981418918918919, "grad_norm": 1.1222100250142648, "learning_rate": 9.183717263773258e-06, "loss": 0.455, "step": 8505 }, { "epoch": 0.7982357357357357, "grad_norm": 1.565178621498376, "learning_rate": 9.183418270992294e-06, "loss": 0.5089, "step": 8506 }, { "epoch": 0.7983295795795796, "grad_norm": 2.0632643439495997, "learning_rate": 9.183119228331943e-06, "loss": 0.4417, "step": 8507 }, { "epoch": 0.7984234234234234, "grad_norm": 1.2708517053780144, "learning_rate": 9.182820135795769e-06, "loss": 0.4442, "step": 8508 }, { "epoch": 0.7985172672672672, "grad_norm": 0.9297091365909785, "learning_rate": 9.182520993387339e-06, "loss": 0.401, "step": 8509 }, { "epoch": 0.7986111111111112, "grad_norm": 1.0738721621687162, "learning_rate": 9.182221801110216e-06, "loss": 0.4348, "step": 8510 }, { "epoch": 0.798704954954955, "grad_norm": 0.980440606045692, "learning_rate": 9.181922558967973e-06, "loss": 0.496, "step": 8511 }, { "epoch": 0.7987987987987988, "grad_norm": 0.9198756847807469, "learning_rate": 9.181623266964175e-06, "loss": 0.398, "step": 8512 }, { "epoch": 0.7988926426426426, "grad_norm": 0.9833282336033187, "learning_rate": 9.18132392510239e-06, "loss": 0.4433, "step": 8513 }, { "epoch": 0.7989864864864865, "grad_norm": 1.200619516299495, "learning_rate": 9.181024533386188e-06, "loss": 0.4871, "step": 8514 }, { "epoch": 0.7990803303303303, "grad_norm": 0.9381045335020975, "learning_rate": 9.180725091819139e-06, "loss": 0.4376, "step": 8515 }, { "epoch": 0.7991741741741741, "grad_norm": 5.3648331827987, "learning_rate": 9.18042560040481e-06, "loss": 0.4255, "step": 8516 }, { "epoch": 0.7992680180180181, "grad_norm": 1.140230659221755, "learning_rate": 9.180126059146777e-06, "loss": 0.4768, "step": 8517 }, { "epoch": 0.7993618618618619, "grad_norm": 1.1548027972910269, "learning_rate": 9.179826468048607e-06, "loss": 0.5139, "step": 8518 }, { "epoch": 0.7994557057057057, "grad_norm": 1.1810095398364364, "learning_rate": 9.179526827113877e-06, "loss": 0.493, "step": 8519 }, { "epoch": 0.7995495495495496, "grad_norm": 1.0881532908912803, "learning_rate": 9.179227136346154e-06, "loss": 0.4299, "step": 8520 }, { "epoch": 0.7996433933933934, "grad_norm": 0.986983142008171, "learning_rate": 9.178927395749016e-06, "loss": 0.4515, "step": 8521 }, { "epoch": 0.7997372372372372, "grad_norm": 0.902278502303642, "learning_rate": 9.178627605326034e-06, "loss": 0.4786, "step": 8522 }, { "epoch": 0.799831081081081, "grad_norm": 1.0926536543564236, "learning_rate": 9.178327765080784e-06, "loss": 0.4823, "step": 8523 }, { "epoch": 0.799924924924925, "grad_norm": 1.162380096255303, "learning_rate": 9.178027875016839e-06, "loss": 0.4901, "step": 8524 }, { "epoch": 0.8000187687687688, "grad_norm": 1.6043284404758877, "learning_rate": 9.177727935137776e-06, "loss": 0.4808, "step": 8525 }, { "epoch": 0.8001126126126126, "grad_norm": 1.2003123688644632, "learning_rate": 9.177427945447171e-06, "loss": 0.4907, "step": 8526 }, { "epoch": 0.8002064564564565, "grad_norm": 0.9386412801468749, "learning_rate": 9.177127905948601e-06, "loss": 0.46, "step": 8527 }, { "epoch": 0.8003003003003003, "grad_norm": 1.6403133120074869, "learning_rate": 9.176827816645644e-06, "loss": 0.4861, "step": 8528 }, { "epoch": 0.8003941441441441, "grad_norm": 1.2553627206440998, "learning_rate": 9.176527677541878e-06, "loss": 0.4305, "step": 8529 }, { "epoch": 0.800487987987988, "grad_norm": 1.0466590087203838, "learning_rate": 9.17622748864088e-06, "loss": 0.5206, "step": 8530 }, { "epoch": 0.8005818318318318, "grad_norm": 0.9542511750786399, "learning_rate": 9.17592724994623e-06, "loss": 0.4612, "step": 8531 }, { "epoch": 0.8006756756756757, "grad_norm": 1.0042296546235645, "learning_rate": 9.175626961461508e-06, "loss": 0.5135, "step": 8532 }, { "epoch": 0.8007695195195195, "grad_norm": 1.0830719132982765, "learning_rate": 9.175326623190294e-06, "loss": 0.4939, "step": 8533 }, { "epoch": 0.8008633633633634, "grad_norm": 0.9592365890618717, "learning_rate": 9.17502623513617e-06, "loss": 0.4639, "step": 8534 }, { "epoch": 0.8009572072072072, "grad_norm": 1.2131050851201184, "learning_rate": 9.174725797302717e-06, "loss": 0.4836, "step": 8535 }, { "epoch": 0.801051051051051, "grad_norm": 1.0663628798807772, "learning_rate": 9.174425309693518e-06, "loss": 0.5075, "step": 8536 }, { "epoch": 0.8011448948948949, "grad_norm": 1.1475579362042132, "learning_rate": 9.174124772312152e-06, "loss": 0.4169, "step": 8537 }, { "epoch": 0.8012387387387387, "grad_norm": 0.9601895551990238, "learning_rate": 9.173824185162208e-06, "loss": 0.4628, "step": 8538 }, { "epoch": 0.8013325825825826, "grad_norm": 1.0044138915498075, "learning_rate": 9.173523548247265e-06, "loss": 0.4632, "step": 8539 }, { "epoch": 0.8014264264264265, "grad_norm": 0.9079462353116468, "learning_rate": 9.17322286157091e-06, "loss": 0.436, "step": 8540 }, { "epoch": 0.8015202702702703, "grad_norm": 1.3511988396071752, "learning_rate": 9.172922125136728e-06, "loss": 0.4683, "step": 8541 }, { "epoch": 0.8016141141141141, "grad_norm": 1.157874168040443, "learning_rate": 9.172621338948303e-06, "loss": 0.5086, "step": 8542 }, { "epoch": 0.8017079579579579, "grad_norm": 1.443652405501652, "learning_rate": 9.172320503009225e-06, "loss": 0.4899, "step": 8543 }, { "epoch": 0.8018018018018018, "grad_norm": 1.0803211654332237, "learning_rate": 9.172019617323075e-06, "loss": 0.442, "step": 8544 }, { "epoch": 0.8018956456456456, "grad_norm": 2.00396169145835, "learning_rate": 9.171718681893447e-06, "loss": 0.4692, "step": 8545 }, { "epoch": 0.8019894894894894, "grad_norm": 1.0947778148755891, "learning_rate": 9.171417696723925e-06, "loss": 0.4935, "step": 8546 }, { "epoch": 0.8020833333333334, "grad_norm": 1.2892307998641068, "learning_rate": 9.1711166618181e-06, "loss": 0.5241, "step": 8547 }, { "epoch": 0.8021771771771772, "grad_norm": 1.2986797701574941, "learning_rate": 9.17081557717956e-06, "loss": 0.4269, "step": 8548 }, { "epoch": 0.802271021021021, "grad_norm": 1.1694155444856786, "learning_rate": 9.170514442811896e-06, "loss": 0.5036, "step": 8549 }, { "epoch": 0.8023648648648649, "grad_norm": 0.9362774507027999, "learning_rate": 9.170213258718695e-06, "loss": 0.4559, "step": 8550 }, { "epoch": 0.8024587087087087, "grad_norm": 1.1065311373883397, "learning_rate": 9.169912024903551e-06, "loss": 0.4878, "step": 8551 }, { "epoch": 0.8025525525525525, "grad_norm": 0.9267259090741087, "learning_rate": 9.169610741370056e-06, "loss": 0.4698, "step": 8552 }, { "epoch": 0.8026463963963963, "grad_norm": 1.1711290294427454, "learning_rate": 9.1693094081218e-06, "loss": 0.4789, "step": 8553 }, { "epoch": 0.8027402402402403, "grad_norm": 0.9947075834013842, "learning_rate": 9.169008025162377e-06, "loss": 0.4361, "step": 8554 }, { "epoch": 0.8028340840840841, "grad_norm": 1.1728455131190765, "learning_rate": 9.168706592495385e-06, "loss": 0.491, "step": 8555 }, { "epoch": 0.8029279279279279, "grad_norm": 0.908831477749286, "learning_rate": 9.168405110124409e-06, "loss": 0.4705, "step": 8556 }, { "epoch": 0.8030217717717718, "grad_norm": 1.442092315265903, "learning_rate": 9.16810357805305e-06, "loss": 0.5, "step": 8557 }, { "epoch": 0.8031156156156156, "grad_norm": 0.9406410010407237, "learning_rate": 9.167801996284903e-06, "loss": 0.4629, "step": 8558 }, { "epoch": 0.8032094594594594, "grad_norm": 1.1419348609136148, "learning_rate": 9.167500364823558e-06, "loss": 0.4813, "step": 8559 }, { "epoch": 0.8033033033033034, "grad_norm": 1.1186889456682427, "learning_rate": 9.167198683672618e-06, "loss": 0.4297, "step": 8560 }, { "epoch": 0.8033971471471472, "grad_norm": 1.3366985089842527, "learning_rate": 9.16689695283568e-06, "loss": 0.4718, "step": 8561 }, { "epoch": 0.803490990990991, "grad_norm": 1.0755935861986594, "learning_rate": 9.166595172316337e-06, "loss": 0.5115, "step": 8562 }, { "epoch": 0.8035848348348348, "grad_norm": 1.0279918973620616, "learning_rate": 9.16629334211819e-06, "loss": 0.4846, "step": 8563 }, { "epoch": 0.8036786786786787, "grad_norm": 1.201632009424675, "learning_rate": 9.165991462244835e-06, "loss": 0.4959, "step": 8564 }, { "epoch": 0.8037725225225225, "grad_norm": 1.091231020326058, "learning_rate": 9.165689532699875e-06, "loss": 0.4731, "step": 8565 }, { "epoch": 0.8038663663663663, "grad_norm": 1.0545122285250235, "learning_rate": 9.165387553486908e-06, "loss": 0.4655, "step": 8566 }, { "epoch": 0.8039602102102102, "grad_norm": 1.229088692633925, "learning_rate": 9.165085524609536e-06, "loss": 0.5096, "step": 8567 }, { "epoch": 0.8040540540540541, "grad_norm": 1.705849780194243, "learning_rate": 9.16478344607136e-06, "loss": 0.5152, "step": 8568 }, { "epoch": 0.8041478978978979, "grad_norm": 1.142806407279716, "learning_rate": 9.164481317875978e-06, "loss": 0.4367, "step": 8569 }, { "epoch": 0.8042417417417418, "grad_norm": 0.8730775351201077, "learning_rate": 9.164179140026996e-06, "loss": 0.4106, "step": 8570 }, { "epoch": 0.8043355855855856, "grad_norm": 0.999503747905582, "learning_rate": 9.163876912528018e-06, "loss": 0.5054, "step": 8571 }, { "epoch": 0.8044294294294294, "grad_norm": 0.9602365685128883, "learning_rate": 9.163574635382643e-06, "loss": 0.4656, "step": 8572 }, { "epoch": 0.8045232732732732, "grad_norm": 1.6533695737001732, "learning_rate": 9.16327230859448e-06, "loss": 0.4822, "step": 8573 }, { "epoch": 0.8046171171171171, "grad_norm": 1.2042950902650782, "learning_rate": 9.16296993216713e-06, "loss": 0.4752, "step": 8574 }, { "epoch": 0.804710960960961, "grad_norm": 0.9820919437219302, "learning_rate": 9.1626675061042e-06, "loss": 0.4822, "step": 8575 }, { "epoch": 0.8048048048048048, "grad_norm": 1.164195016015774, "learning_rate": 9.162365030409294e-06, "loss": 0.4476, "step": 8576 }, { "epoch": 0.8048986486486487, "grad_norm": 1.065039432930313, "learning_rate": 9.16206250508602e-06, "loss": 0.5302, "step": 8577 }, { "epoch": 0.8049924924924925, "grad_norm": 0.9751119111605463, "learning_rate": 9.161759930137986e-06, "loss": 0.4136, "step": 8578 }, { "epoch": 0.8050863363363363, "grad_norm": 0.894507445160255, "learning_rate": 9.161457305568799e-06, "loss": 0.4509, "step": 8579 }, { "epoch": 0.8051801801801802, "grad_norm": 1.4632560841196436, "learning_rate": 9.161154631382066e-06, "loss": 0.4955, "step": 8580 }, { "epoch": 0.805274024024024, "grad_norm": 1.2381622132179233, "learning_rate": 9.160851907581395e-06, "loss": 0.436, "step": 8581 }, { "epoch": 0.8053678678678678, "grad_norm": 1.1496948049413358, "learning_rate": 9.160549134170399e-06, "loss": 0.4798, "step": 8582 }, { "epoch": 0.8054617117117117, "grad_norm": 1.164076430359805, "learning_rate": 9.160246311152684e-06, "loss": 0.4582, "step": 8583 }, { "epoch": 0.8055555555555556, "grad_norm": 0.9066945976007638, "learning_rate": 9.159943438531865e-06, "loss": 0.3708, "step": 8584 }, { "epoch": 0.8056493993993994, "grad_norm": 0.9924992355435567, "learning_rate": 9.15964051631155e-06, "loss": 0.5311, "step": 8585 }, { "epoch": 0.8057432432432432, "grad_norm": 1.3491226709573045, "learning_rate": 9.159337544495352e-06, "loss": 0.486, "step": 8586 }, { "epoch": 0.8058370870870871, "grad_norm": 1.2755697691042955, "learning_rate": 9.159034523086883e-06, "loss": 0.4453, "step": 8587 }, { "epoch": 0.8059309309309309, "grad_norm": 1.0432455769798932, "learning_rate": 9.158731452089755e-06, "loss": 0.5264, "step": 8588 }, { "epoch": 0.8060247747747747, "grad_norm": 1.0719107103325343, "learning_rate": 9.158428331507583e-06, "loss": 0.4352, "step": 8589 }, { "epoch": 0.8061186186186187, "grad_norm": 1.8966283989201027, "learning_rate": 9.158125161343979e-06, "loss": 0.475, "step": 8590 }, { "epoch": 0.8062124624624625, "grad_norm": 1.0556678576449985, "learning_rate": 9.157821941602561e-06, "loss": 0.471, "step": 8591 }, { "epoch": 0.8063063063063063, "grad_norm": 1.195845503416763, "learning_rate": 9.157518672286943e-06, "loss": 0.4398, "step": 8592 }, { "epoch": 0.8064001501501501, "grad_norm": 1.4090457853863358, "learning_rate": 9.157215353400738e-06, "loss": 0.4991, "step": 8593 }, { "epoch": 0.806493993993994, "grad_norm": 1.1717257000846575, "learning_rate": 9.156911984947567e-06, "loss": 0.4793, "step": 8594 }, { "epoch": 0.8065878378378378, "grad_norm": 0.9731723358866123, "learning_rate": 9.156608566931047e-06, "loss": 0.4425, "step": 8595 }, { "epoch": 0.8066816816816816, "grad_norm": 0.9749616233967066, "learning_rate": 9.15630509935479e-06, "loss": 0.5026, "step": 8596 }, { "epoch": 0.8067755255255256, "grad_norm": 1.1280454660436037, "learning_rate": 9.15600158222242e-06, "loss": 0.4928, "step": 8597 }, { "epoch": 0.8068693693693694, "grad_norm": 1.1259714429724677, "learning_rate": 9.155698015537555e-06, "loss": 0.5028, "step": 8598 }, { "epoch": 0.8069632132132132, "grad_norm": 1.646782457572077, "learning_rate": 9.155394399303812e-06, "loss": 0.5047, "step": 8599 }, { "epoch": 0.8070570570570571, "grad_norm": 1.1911334549050354, "learning_rate": 9.155090733524814e-06, "loss": 0.4643, "step": 8600 }, { "epoch": 0.8071509009009009, "grad_norm": 1.0485328517223873, "learning_rate": 9.154787018204178e-06, "loss": 0.5106, "step": 8601 }, { "epoch": 0.8072447447447447, "grad_norm": 0.9606428833263919, "learning_rate": 9.15448325334553e-06, "loss": 0.4682, "step": 8602 }, { "epoch": 0.8073385885885885, "grad_norm": 0.9927234508369372, "learning_rate": 9.154179438952486e-06, "loss": 0.4501, "step": 8603 }, { "epoch": 0.8074324324324325, "grad_norm": 0.9184672210249053, "learning_rate": 9.153875575028674e-06, "loss": 0.4641, "step": 8604 }, { "epoch": 0.8075262762762763, "grad_norm": 1.07349004369291, "learning_rate": 9.153571661577713e-06, "loss": 0.4194, "step": 8605 }, { "epoch": 0.8076201201201201, "grad_norm": 0.9457119153398033, "learning_rate": 9.153267698603231e-06, "loss": 0.4059, "step": 8606 }, { "epoch": 0.807713963963964, "grad_norm": 1.0542706150783951, "learning_rate": 9.152963686108848e-06, "loss": 0.4346, "step": 8607 }, { "epoch": 0.8078078078078078, "grad_norm": 1.5026359614337588, "learning_rate": 9.15265962409819e-06, "loss": 0.4708, "step": 8608 }, { "epoch": 0.8079016516516516, "grad_norm": 1.104087225273409, "learning_rate": 9.152355512574882e-06, "loss": 0.439, "step": 8609 }, { "epoch": 0.8079954954954955, "grad_norm": 0.9658530947279649, "learning_rate": 9.152051351542552e-06, "loss": 0.4907, "step": 8610 }, { "epoch": 0.8080893393393394, "grad_norm": 1.219871947065735, "learning_rate": 9.151747141004824e-06, "loss": 0.4528, "step": 8611 }, { "epoch": 0.8081831831831832, "grad_norm": 1.1318986409771192, "learning_rate": 9.151442880965324e-06, "loss": 0.3888, "step": 8612 }, { "epoch": 0.808277027027027, "grad_norm": 0.9926778258179484, "learning_rate": 9.151138571427686e-06, "loss": 0.5261, "step": 8613 }, { "epoch": 0.8083708708708709, "grad_norm": 1.009298675851382, "learning_rate": 9.150834212395531e-06, "loss": 0.4654, "step": 8614 }, { "epoch": 0.8084647147147147, "grad_norm": 1.0158944126287348, "learning_rate": 9.150529803872493e-06, "loss": 0.4757, "step": 8615 }, { "epoch": 0.8085585585585585, "grad_norm": 0.8984546491315788, "learning_rate": 9.1502253458622e-06, "loss": 0.4671, "step": 8616 }, { "epoch": 0.8086524024024024, "grad_norm": 0.9744251274037238, "learning_rate": 9.14992083836828e-06, "loss": 0.4984, "step": 8617 }, { "epoch": 0.8087462462462462, "grad_norm": 0.9436442713599668, "learning_rate": 9.149616281394367e-06, "loss": 0.4705, "step": 8618 }, { "epoch": 0.8088400900900901, "grad_norm": 1.228598262044436, "learning_rate": 9.14931167494409e-06, "loss": 0.4425, "step": 8619 }, { "epoch": 0.808933933933934, "grad_norm": 1.7908393308565944, "learning_rate": 9.149007019021081e-06, "loss": 0.4965, "step": 8620 }, { "epoch": 0.8090277777777778, "grad_norm": 1.1460854014268302, "learning_rate": 9.148702313628975e-06, "loss": 0.4428, "step": 8621 }, { "epoch": 0.8091216216216216, "grad_norm": 1.1086153724155123, "learning_rate": 9.1483975587714e-06, "loss": 0.4404, "step": 8622 }, { "epoch": 0.8092154654654654, "grad_norm": 1.2115448836313314, "learning_rate": 9.148092754451995e-06, "loss": 0.4586, "step": 8623 }, { "epoch": 0.8093093093093093, "grad_norm": 1.7519250820747216, "learning_rate": 9.147787900674392e-06, "loss": 0.5009, "step": 8624 }, { "epoch": 0.8094031531531531, "grad_norm": 1.0098695892421017, "learning_rate": 9.147482997442223e-06, "loss": 0.4449, "step": 8625 }, { "epoch": 0.809496996996997, "grad_norm": 0.9775770692173481, "learning_rate": 9.147178044759128e-06, "loss": 0.4583, "step": 8626 }, { "epoch": 0.8095908408408409, "grad_norm": 2.216947211353857, "learning_rate": 9.146873042628742e-06, "loss": 0.4728, "step": 8627 }, { "epoch": 0.8096846846846847, "grad_norm": 1.0124513582807666, "learning_rate": 9.1465679910547e-06, "loss": 0.434, "step": 8628 }, { "epoch": 0.8097785285285285, "grad_norm": 1.0644803326592964, "learning_rate": 9.14626289004064e-06, "loss": 0.4726, "step": 8629 }, { "epoch": 0.8098723723723724, "grad_norm": 1.0647983295985453, "learning_rate": 9.145957739590198e-06, "loss": 0.4601, "step": 8630 }, { "epoch": 0.8099662162162162, "grad_norm": 2.158833273808787, "learning_rate": 9.145652539707015e-06, "loss": 0.4247, "step": 8631 }, { "epoch": 0.81006006006006, "grad_norm": 1.2645875688134094, "learning_rate": 9.14534729039473e-06, "loss": 0.4845, "step": 8632 }, { "epoch": 0.8101539039039038, "grad_norm": 1.2984519579186051, "learning_rate": 9.14504199165698e-06, "loss": 0.4915, "step": 8633 }, { "epoch": 0.8102477477477478, "grad_norm": 0.9869481973701244, "learning_rate": 9.144736643497407e-06, "loss": 0.4438, "step": 8634 }, { "epoch": 0.8103415915915916, "grad_norm": 0.9604047230592926, "learning_rate": 9.144431245919651e-06, "loss": 0.441, "step": 8635 }, { "epoch": 0.8104354354354354, "grad_norm": 1.0529363945176016, "learning_rate": 9.144125798927352e-06, "loss": 0.5154, "step": 8636 }, { "epoch": 0.8105292792792793, "grad_norm": 1.0027256684713068, "learning_rate": 9.143820302524156e-06, "loss": 0.4593, "step": 8637 }, { "epoch": 0.8106231231231231, "grad_norm": 0.8726811883074886, "learning_rate": 9.143514756713702e-06, "loss": 0.4603, "step": 8638 }, { "epoch": 0.8107169669669669, "grad_norm": 1.276662190964, "learning_rate": 9.143209161499634e-06, "loss": 0.4616, "step": 8639 }, { "epoch": 0.8108108108108109, "grad_norm": 1.0728802187244624, "learning_rate": 9.142903516885595e-06, "loss": 0.4378, "step": 8640 }, { "epoch": 0.8109046546546547, "grad_norm": 1.0403156643486116, "learning_rate": 9.14259782287523e-06, "loss": 0.4822, "step": 8641 }, { "epoch": 0.8109984984984985, "grad_norm": 0.8235868904580561, "learning_rate": 9.142292079472182e-06, "loss": 0.4097, "step": 8642 }, { "epoch": 0.8110923423423423, "grad_norm": 1.0401570493528518, "learning_rate": 9.1419862866801e-06, "loss": 0.4873, "step": 8643 }, { "epoch": 0.8111861861861862, "grad_norm": 1.3113078143879788, "learning_rate": 9.141680444502628e-06, "loss": 0.4813, "step": 8644 }, { "epoch": 0.81128003003003, "grad_norm": 1.0570441021434496, "learning_rate": 9.141374552943412e-06, "loss": 0.4287, "step": 8645 }, { "epoch": 0.8113738738738738, "grad_norm": 0.9839808682056762, "learning_rate": 9.1410686120061e-06, "loss": 0.4403, "step": 8646 }, { "epoch": 0.8114677177177178, "grad_norm": 1.1083956132777495, "learning_rate": 9.14076262169434e-06, "loss": 0.4775, "step": 8647 }, { "epoch": 0.8115615615615616, "grad_norm": 0.975045532080071, "learning_rate": 9.140456582011779e-06, "loss": 0.4696, "step": 8648 }, { "epoch": 0.8116554054054054, "grad_norm": 1.0794978343812456, "learning_rate": 9.140150492962067e-06, "loss": 0.5214, "step": 8649 }, { "epoch": 0.8117492492492493, "grad_norm": 0.9941176704899398, "learning_rate": 9.139844354548853e-06, "loss": 0.4774, "step": 8650 }, { "epoch": 0.8118430930930931, "grad_norm": 1.288900538273332, "learning_rate": 9.139538166775788e-06, "loss": 0.4499, "step": 8651 }, { "epoch": 0.8119369369369369, "grad_norm": 1.0840936940871793, "learning_rate": 9.139231929646522e-06, "loss": 0.4977, "step": 8652 }, { "epoch": 0.8120307807807807, "grad_norm": 0.883650569615953, "learning_rate": 9.138925643164707e-06, "loss": 0.4075, "step": 8653 }, { "epoch": 0.8121246246246246, "grad_norm": 1.5987518808149856, "learning_rate": 9.138619307333995e-06, "loss": 0.477, "step": 8654 }, { "epoch": 0.8122184684684685, "grad_norm": 9.256641382109416, "learning_rate": 9.138312922158037e-06, "loss": 0.4613, "step": 8655 }, { "epoch": 0.8123123123123123, "grad_norm": 1.3577398530459188, "learning_rate": 9.138006487640487e-06, "loss": 0.488, "step": 8656 }, { "epoch": 0.8124061561561562, "grad_norm": 1.0030905558743004, "learning_rate": 9.137700003785e-06, "loss": 0.4968, "step": 8657 }, { "epoch": 0.8125, "grad_norm": 0.9883887090386486, "learning_rate": 9.137393470595226e-06, "loss": 0.4299, "step": 8658 }, { "epoch": 0.8125938438438438, "grad_norm": 1.2934495756702675, "learning_rate": 9.137086888074827e-06, "loss": 0.514, "step": 8659 }, { "epoch": 0.8126876876876877, "grad_norm": 1.3587130764357254, "learning_rate": 9.13678025622745e-06, "loss": 0.5589, "step": 8660 }, { "epoch": 0.8127815315315315, "grad_norm": 1.8892037432339737, "learning_rate": 9.136473575056757e-06, "loss": 0.4946, "step": 8661 }, { "epoch": 0.8128753753753754, "grad_norm": 1.3869624039347086, "learning_rate": 9.136166844566404e-06, "loss": 0.4529, "step": 8662 }, { "epoch": 0.8129692192192193, "grad_norm": 0.9764060624505155, "learning_rate": 9.135860064760044e-06, "loss": 0.4893, "step": 8663 }, { "epoch": 0.8130630630630631, "grad_norm": 1.0950564092254458, "learning_rate": 9.13555323564134e-06, "loss": 0.4529, "step": 8664 }, { "epoch": 0.8131569069069069, "grad_norm": 0.9703960026494665, "learning_rate": 9.135246357213946e-06, "loss": 0.4492, "step": 8665 }, { "epoch": 0.8132507507507507, "grad_norm": 0.8446455368545648, "learning_rate": 9.134939429481525e-06, "loss": 0.4555, "step": 8666 }, { "epoch": 0.8133445945945946, "grad_norm": 1.025001861172553, "learning_rate": 9.134632452447732e-06, "loss": 0.4085, "step": 8667 }, { "epoch": 0.8134384384384384, "grad_norm": 0.9270777207862438, "learning_rate": 9.134325426116233e-06, "loss": 0.4421, "step": 8668 }, { "epoch": 0.8135322822822822, "grad_norm": 1.145664027491686, "learning_rate": 9.134018350490684e-06, "loss": 0.4997, "step": 8669 }, { "epoch": 0.8136261261261262, "grad_norm": 0.9612344702263375, "learning_rate": 9.133711225574746e-06, "loss": 0.4327, "step": 8670 }, { "epoch": 0.81371996996997, "grad_norm": 0.930093044941974, "learning_rate": 9.133404051372083e-06, "loss": 0.3965, "step": 8671 }, { "epoch": 0.8138138138138138, "grad_norm": 1.0756719945644635, "learning_rate": 9.133096827886358e-06, "loss": 0.4578, "step": 8672 }, { "epoch": 0.8139076576576577, "grad_norm": 1.1133951191973763, "learning_rate": 9.132789555121233e-06, "loss": 0.4461, "step": 8673 }, { "epoch": 0.8140015015015015, "grad_norm": 1.0190097598698993, "learning_rate": 9.132482233080374e-06, "loss": 0.4947, "step": 8674 }, { "epoch": 0.8140953453453453, "grad_norm": 0.9270446309545313, "learning_rate": 9.132174861767439e-06, "loss": 0.4647, "step": 8675 }, { "epoch": 0.8141891891891891, "grad_norm": 1.3926837127705474, "learning_rate": 9.131867441186099e-06, "loss": 0.5016, "step": 8676 }, { "epoch": 0.8142830330330331, "grad_norm": 1.0046758129425335, "learning_rate": 9.131559971340017e-06, "loss": 0.524, "step": 8677 }, { "epoch": 0.8143768768768769, "grad_norm": 0.8631295919170059, "learning_rate": 9.131252452232858e-06, "loss": 0.4517, "step": 8678 }, { "epoch": 0.8144707207207207, "grad_norm": 1.0567517092070264, "learning_rate": 9.13094488386829e-06, "loss": 0.471, "step": 8679 }, { "epoch": 0.8145645645645646, "grad_norm": 1.3953826896983452, "learning_rate": 9.13063726624998e-06, "loss": 0.4413, "step": 8680 }, { "epoch": 0.8146584084084084, "grad_norm": 1.0450599059661516, "learning_rate": 9.130329599381598e-06, "loss": 0.4771, "step": 8681 }, { "epoch": 0.8147522522522522, "grad_norm": 1.2040029635205252, "learning_rate": 9.130021883266807e-06, "loss": 0.509, "step": 8682 }, { "epoch": 0.8148460960960962, "grad_norm": 1.0453621967339806, "learning_rate": 9.12971411790928e-06, "loss": 0.5008, "step": 8683 }, { "epoch": 0.81493993993994, "grad_norm": 0.889908913814573, "learning_rate": 9.129406303312687e-06, "loss": 0.4178, "step": 8684 }, { "epoch": 0.8150337837837838, "grad_norm": 1.1172943700257365, "learning_rate": 9.129098439480697e-06, "loss": 0.463, "step": 8685 }, { "epoch": 0.8151276276276276, "grad_norm": 1.4066010839676393, "learning_rate": 9.128790526416978e-06, "loss": 0.4696, "step": 8686 }, { "epoch": 0.8152214714714715, "grad_norm": 2.220969733347787, "learning_rate": 9.128482564125203e-06, "loss": 0.4634, "step": 8687 }, { "epoch": 0.8153153153153153, "grad_norm": 1.621754990680018, "learning_rate": 9.128174552609047e-06, "loss": 0.5303, "step": 8688 }, { "epoch": 0.8154091591591591, "grad_norm": 1.0026550344542235, "learning_rate": 9.12786649187218e-06, "loss": 0.4624, "step": 8689 }, { "epoch": 0.815503003003003, "grad_norm": 1.0149042947734126, "learning_rate": 9.127558381918273e-06, "loss": 0.4427, "step": 8690 }, { "epoch": 0.8155968468468469, "grad_norm": 3.5282038984604496, "learning_rate": 9.127250222751003e-06, "loss": 0.4567, "step": 8691 }, { "epoch": 0.8156906906906907, "grad_norm": 1.0293454081287303, "learning_rate": 9.126942014374041e-06, "loss": 0.4589, "step": 8692 }, { "epoch": 0.8157845345345346, "grad_norm": 4.712372397982322, "learning_rate": 9.126633756791065e-06, "loss": 0.4607, "step": 8693 }, { "epoch": 0.8158783783783784, "grad_norm": 1.1439556902816168, "learning_rate": 9.126325450005751e-06, "loss": 0.5108, "step": 8694 }, { "epoch": 0.8159722222222222, "grad_norm": 0.9864754111785418, "learning_rate": 9.12601709402177e-06, "loss": 0.4728, "step": 8695 }, { "epoch": 0.816066066066066, "grad_norm": 1.026593506605474, "learning_rate": 9.125708688842802e-06, "loss": 0.4732, "step": 8696 }, { "epoch": 0.8161599099099099, "grad_norm": 1.1437380107691986, "learning_rate": 9.125400234472525e-06, "loss": 0.4937, "step": 8697 }, { "epoch": 0.8162537537537538, "grad_norm": 1.2010888286216974, "learning_rate": 9.125091730914616e-06, "loss": 0.501, "step": 8698 }, { "epoch": 0.8163475975975976, "grad_norm": 1.379327752485369, "learning_rate": 9.124783178172752e-06, "loss": 0.5272, "step": 8699 }, { "epoch": 0.8164414414414415, "grad_norm": 1.0204543051167703, "learning_rate": 9.124474576250611e-06, "loss": 0.4889, "step": 8700 }, { "epoch": 0.8165352852852853, "grad_norm": 0.9342547018812681, "learning_rate": 9.124165925151876e-06, "loss": 0.4546, "step": 8701 }, { "epoch": 0.8166291291291291, "grad_norm": 1.108301713812135, "learning_rate": 9.123857224880225e-06, "loss": 0.4388, "step": 8702 }, { "epoch": 0.816722972972973, "grad_norm": 1.1649918657472673, "learning_rate": 9.123548475439339e-06, "loss": 0.5074, "step": 8703 }, { "epoch": 0.8168168168168168, "grad_norm": 0.978525009004452, "learning_rate": 9.123239676832899e-06, "loss": 0.4146, "step": 8704 }, { "epoch": 0.8169106606606606, "grad_norm": 1.2102284588892493, "learning_rate": 9.122930829064587e-06, "loss": 0.472, "step": 8705 }, { "epoch": 0.8170045045045045, "grad_norm": 2.3932091333879746, "learning_rate": 9.122621932138087e-06, "loss": 0.468, "step": 8706 }, { "epoch": 0.8170983483483484, "grad_norm": 1.015889613368057, "learning_rate": 9.122312986057078e-06, "loss": 0.4488, "step": 8707 }, { "epoch": 0.8171921921921922, "grad_norm": 1.177665038572559, "learning_rate": 9.122003990825248e-06, "loss": 0.5164, "step": 8708 }, { "epoch": 0.817286036036036, "grad_norm": 1.3806712895987185, "learning_rate": 9.121694946446278e-06, "loss": 0.4826, "step": 8709 }, { "epoch": 0.8173798798798799, "grad_norm": 1.0309600245857657, "learning_rate": 9.121385852923854e-06, "loss": 0.4769, "step": 8710 }, { "epoch": 0.8174737237237237, "grad_norm": 0.9149209276646234, "learning_rate": 9.121076710261662e-06, "loss": 0.4506, "step": 8711 }, { "epoch": 0.8175675675675675, "grad_norm": 1.1254486484266346, "learning_rate": 9.120767518463387e-06, "loss": 0.4578, "step": 8712 }, { "epoch": 0.8176614114114115, "grad_norm": 1.1617716879393203, "learning_rate": 9.120458277532717e-06, "loss": 0.4846, "step": 8713 }, { "epoch": 0.8177552552552553, "grad_norm": 0.9916921606582784, "learning_rate": 9.120148987473336e-06, "loss": 0.4831, "step": 8714 }, { "epoch": 0.8178490990990991, "grad_norm": 1.0177313113498987, "learning_rate": 9.119839648288937e-06, "loss": 0.4395, "step": 8715 }, { "epoch": 0.8179429429429429, "grad_norm": 1.5252619844718704, "learning_rate": 9.119530259983202e-06, "loss": 0.4381, "step": 8716 }, { "epoch": 0.8180367867867868, "grad_norm": 0.9313707714281787, "learning_rate": 9.119220822559823e-06, "loss": 0.5001, "step": 8717 }, { "epoch": 0.8181306306306306, "grad_norm": 1.772042437346765, "learning_rate": 9.118911336022492e-06, "loss": 0.4828, "step": 8718 }, { "epoch": 0.8182244744744744, "grad_norm": 0.831285835559189, "learning_rate": 9.118601800374893e-06, "loss": 0.4537, "step": 8719 }, { "epoch": 0.8183183183183184, "grad_norm": 0.9220789909520587, "learning_rate": 9.11829221562072e-06, "loss": 0.5154, "step": 8720 }, { "epoch": 0.8184121621621622, "grad_norm": 1.6052864100470425, "learning_rate": 9.117982581763669e-06, "loss": 0.4838, "step": 8721 }, { "epoch": 0.818506006006006, "grad_norm": 1.1606241738950833, "learning_rate": 9.117672898807422e-06, "loss": 0.4728, "step": 8722 }, { "epoch": 0.8185998498498499, "grad_norm": 0.8794357934413376, "learning_rate": 9.117363166755678e-06, "loss": 0.4594, "step": 8723 }, { "epoch": 0.8186936936936937, "grad_norm": 1.063822329416638, "learning_rate": 9.11705338561213e-06, "loss": 0.4883, "step": 8724 }, { "epoch": 0.8187875375375375, "grad_norm": 1.0781823200049927, "learning_rate": 9.116743555380467e-06, "loss": 0.4556, "step": 8725 }, { "epoch": 0.8188813813813813, "grad_norm": 0.946730818146405, "learning_rate": 9.116433676064388e-06, "loss": 0.448, "step": 8726 }, { "epoch": 0.8189752252252253, "grad_norm": 1.1372735160774698, "learning_rate": 9.116123747667585e-06, "loss": 0.496, "step": 8727 }, { "epoch": 0.8190690690690691, "grad_norm": 0.9672275303550196, "learning_rate": 9.115813770193754e-06, "loss": 0.4116, "step": 8728 }, { "epoch": 0.8191629129129129, "grad_norm": 1.700975774005909, "learning_rate": 9.115503743646592e-06, "loss": 0.4686, "step": 8729 }, { "epoch": 0.8192567567567568, "grad_norm": 0.9275242210289761, "learning_rate": 9.115193668029794e-06, "loss": 0.4342, "step": 8730 }, { "epoch": 0.8193506006006006, "grad_norm": 1.0380075955453896, "learning_rate": 9.114883543347058e-06, "loss": 0.5027, "step": 8731 }, { "epoch": 0.8194444444444444, "grad_norm": 1.0012282282157359, "learning_rate": 9.11457336960208e-06, "loss": 0.461, "step": 8732 }, { "epoch": 0.8195382882882883, "grad_norm": 1.2416133160742404, "learning_rate": 9.11426314679856e-06, "loss": 0.4729, "step": 8733 }, { "epoch": 0.8196321321321322, "grad_norm": 1.0891153714414092, "learning_rate": 9.113952874940198e-06, "loss": 0.4792, "step": 8734 }, { "epoch": 0.819725975975976, "grad_norm": 0.8691548489203069, "learning_rate": 9.113642554030688e-06, "loss": 0.4532, "step": 8735 }, { "epoch": 0.8198198198198198, "grad_norm": 1.089021630436698, "learning_rate": 9.113332184073738e-06, "loss": 0.4564, "step": 8736 }, { "epoch": 0.8199136636636637, "grad_norm": 1.0724467910949675, "learning_rate": 9.113021765073041e-06, "loss": 0.4108, "step": 8737 }, { "epoch": 0.8200075075075075, "grad_norm": 1.0541132270250744, "learning_rate": 9.112711297032301e-06, "loss": 0.4726, "step": 8738 }, { "epoch": 0.8201013513513513, "grad_norm": 1.567659628730566, "learning_rate": 9.112400779955223e-06, "loss": 0.4798, "step": 8739 }, { "epoch": 0.8201951951951952, "grad_norm": 1.1822281285897502, "learning_rate": 9.112090213845505e-06, "loss": 0.4088, "step": 8740 }, { "epoch": 0.820289039039039, "grad_norm": 1.1698520823539853, "learning_rate": 9.111779598706852e-06, "loss": 0.5209, "step": 8741 }, { "epoch": 0.8203828828828829, "grad_norm": 0.9478646949977956, "learning_rate": 9.111468934542967e-06, "loss": 0.5027, "step": 8742 }, { "epoch": 0.8204767267267268, "grad_norm": 1.046728571297367, "learning_rate": 9.111158221357553e-06, "loss": 0.5028, "step": 8743 }, { "epoch": 0.8205705705705706, "grad_norm": 1.0020987415829672, "learning_rate": 9.110847459154315e-06, "loss": 0.4635, "step": 8744 }, { "epoch": 0.8206644144144144, "grad_norm": 0.9925777273356226, "learning_rate": 9.110536647936962e-06, "loss": 0.4778, "step": 8745 }, { "epoch": 0.8207582582582582, "grad_norm": 0.9466529443567997, "learning_rate": 9.110225787709195e-06, "loss": 0.4925, "step": 8746 }, { "epoch": 0.8208521021021021, "grad_norm": 1.1283986444035654, "learning_rate": 9.109914878474721e-06, "loss": 0.545, "step": 8747 }, { "epoch": 0.8209459459459459, "grad_norm": 0.9426898953505825, "learning_rate": 9.10960392023725e-06, "loss": 0.4794, "step": 8748 }, { "epoch": 0.8210397897897898, "grad_norm": 1.060803352389399, "learning_rate": 9.109292913000487e-06, "loss": 0.4244, "step": 8749 }, { "epoch": 0.8211336336336337, "grad_norm": 1.2762880661117562, "learning_rate": 9.10898185676814e-06, "loss": 0.4518, "step": 8750 }, { "epoch": 0.8212274774774775, "grad_norm": 1.0505924135132778, "learning_rate": 9.10867075154392e-06, "loss": 0.4822, "step": 8751 }, { "epoch": 0.8213213213213213, "grad_norm": 0.9063033603847936, "learning_rate": 9.108359597331536e-06, "loss": 0.4513, "step": 8752 }, { "epoch": 0.8214151651651652, "grad_norm": 1.3213738297402307, "learning_rate": 9.108048394134694e-06, "loss": 0.508, "step": 8753 }, { "epoch": 0.821509009009009, "grad_norm": 0.9732174006251603, "learning_rate": 9.10773714195711e-06, "loss": 0.4838, "step": 8754 }, { "epoch": 0.8216028528528528, "grad_norm": 1.0263599074759813, "learning_rate": 9.107425840802492e-06, "loss": 0.4791, "step": 8755 }, { "epoch": 0.8216966966966966, "grad_norm": 0.9615550857556692, "learning_rate": 9.107114490674551e-06, "loss": 0.5159, "step": 8756 }, { "epoch": 0.8217905405405406, "grad_norm": 1.5711785984362698, "learning_rate": 9.106803091577003e-06, "loss": 0.4036, "step": 8757 }, { "epoch": 0.8218843843843844, "grad_norm": 5.557776595988932, "learning_rate": 9.106491643513558e-06, "loss": 0.4521, "step": 8758 }, { "epoch": 0.8219782282282282, "grad_norm": 1.0401187130571141, "learning_rate": 9.106180146487927e-06, "loss": 0.4531, "step": 8759 }, { "epoch": 0.8220720720720721, "grad_norm": 1.102764151656775, "learning_rate": 9.10586860050383e-06, "loss": 0.4002, "step": 8760 }, { "epoch": 0.8221659159159159, "grad_norm": 0.9028432802628091, "learning_rate": 9.105557005564976e-06, "loss": 0.4531, "step": 8761 }, { "epoch": 0.8222597597597597, "grad_norm": 1.089732087610743, "learning_rate": 9.105245361675085e-06, "loss": 0.4358, "step": 8762 }, { "epoch": 0.8223536036036037, "grad_norm": 1.4655550651534077, "learning_rate": 9.10493366883787e-06, "loss": 0.464, "step": 8763 }, { "epoch": 0.8224474474474475, "grad_norm": 1.2041062036980248, "learning_rate": 9.104621927057046e-06, "loss": 0.4913, "step": 8764 }, { "epoch": 0.8225412912912913, "grad_norm": 1.567566757784893, "learning_rate": 9.104310136336332e-06, "loss": 0.4307, "step": 8765 }, { "epoch": 0.8226351351351351, "grad_norm": 1.651121400152801, "learning_rate": 9.103998296679447e-06, "loss": 0.552, "step": 8766 }, { "epoch": 0.822728978978979, "grad_norm": 1.1215193179573608, "learning_rate": 9.103686408090105e-06, "loss": 0.463, "step": 8767 }, { "epoch": 0.8228228228228228, "grad_norm": 1.0748575250994081, "learning_rate": 9.10337447057203e-06, "loss": 0.4393, "step": 8768 }, { "epoch": 0.8229166666666666, "grad_norm": 0.8602974322038084, "learning_rate": 9.103062484128935e-06, "loss": 0.4609, "step": 8769 }, { "epoch": 0.8230105105105106, "grad_norm": 1.2240826862809155, "learning_rate": 9.102750448764544e-06, "loss": 0.4796, "step": 8770 }, { "epoch": 0.8231043543543544, "grad_norm": 1.7592934671469704, "learning_rate": 9.102438364482577e-06, "loss": 0.4785, "step": 8771 }, { "epoch": 0.8231981981981982, "grad_norm": 0.9346064219658371, "learning_rate": 9.102126231286754e-06, "loss": 0.4808, "step": 8772 }, { "epoch": 0.8232920420420421, "grad_norm": 0.955301332075491, "learning_rate": 9.101814049180798e-06, "loss": 0.4905, "step": 8773 }, { "epoch": 0.8233858858858859, "grad_norm": 0.8990425829465241, "learning_rate": 9.10150181816843e-06, "loss": 0.4617, "step": 8774 }, { "epoch": 0.8234797297297297, "grad_norm": 1.1424771552956952, "learning_rate": 9.101189538253373e-06, "loss": 0.49, "step": 8775 }, { "epoch": 0.8235735735735735, "grad_norm": 1.1432831932962124, "learning_rate": 9.100877209439352e-06, "loss": 0.4572, "step": 8776 }, { "epoch": 0.8236674174174174, "grad_norm": 1.351170454342579, "learning_rate": 9.100564831730088e-06, "loss": 0.4215, "step": 8777 }, { "epoch": 0.8237612612612613, "grad_norm": 1.2331295395189326, "learning_rate": 9.100252405129306e-06, "loss": 0.4833, "step": 8778 }, { "epoch": 0.8238551051051051, "grad_norm": 0.922950845054331, "learning_rate": 9.099939929640732e-06, "loss": 0.5332, "step": 8779 }, { "epoch": 0.823948948948949, "grad_norm": 1.0681715457551495, "learning_rate": 9.099627405268093e-06, "loss": 0.5104, "step": 8780 }, { "epoch": 0.8240427927927928, "grad_norm": 0.9881193042231883, "learning_rate": 9.099314832015113e-06, "loss": 0.4544, "step": 8781 }, { "epoch": 0.8241366366366366, "grad_norm": 1.2979406677527265, "learning_rate": 9.09900220988552e-06, "loss": 0.4176, "step": 8782 }, { "epoch": 0.8242304804804805, "grad_norm": 1.4306556675290776, "learning_rate": 9.098689538883043e-06, "loss": 0.4232, "step": 8783 }, { "epoch": 0.8243243243243243, "grad_norm": 1.0496449916671704, "learning_rate": 9.098376819011407e-06, "loss": 0.4463, "step": 8784 }, { "epoch": 0.8244181681681682, "grad_norm": 1.6415949707559239, "learning_rate": 9.098064050274341e-06, "loss": 0.4094, "step": 8785 }, { "epoch": 0.824512012012012, "grad_norm": 2.4686126220282163, "learning_rate": 9.097751232675575e-06, "loss": 0.4478, "step": 8786 }, { "epoch": 0.8246058558558559, "grad_norm": 1.4305365588612184, "learning_rate": 9.097438366218841e-06, "loss": 0.4784, "step": 8787 }, { "epoch": 0.8246996996996997, "grad_norm": 1.1948376353859638, "learning_rate": 9.097125450907864e-06, "loss": 0.505, "step": 8788 }, { "epoch": 0.8247935435435435, "grad_norm": 0.9445076981939348, "learning_rate": 9.09681248674638e-06, "loss": 0.458, "step": 8789 }, { "epoch": 0.8248873873873874, "grad_norm": 1.0380005246468331, "learning_rate": 9.096499473738118e-06, "loss": 0.4924, "step": 8790 }, { "epoch": 0.8249812312312312, "grad_norm": 1.533730236191676, "learning_rate": 9.096186411886812e-06, "loss": 0.4907, "step": 8791 }, { "epoch": 0.825075075075075, "grad_norm": 0.9969396598101599, "learning_rate": 9.095873301196192e-06, "loss": 0.4764, "step": 8792 }, { "epoch": 0.825168918918919, "grad_norm": 2.3234533256254317, "learning_rate": 9.095560141669991e-06, "loss": 0.4352, "step": 8793 }, { "epoch": 0.8252627627627628, "grad_norm": 1.0658271540681412, "learning_rate": 9.095246933311947e-06, "loss": 0.4801, "step": 8794 }, { "epoch": 0.8253566066066066, "grad_norm": 0.9813370688419037, "learning_rate": 9.09493367612579e-06, "loss": 0.4556, "step": 8795 }, { "epoch": 0.8254504504504504, "grad_norm": 0.962733511407431, "learning_rate": 9.094620370115258e-06, "loss": 0.4744, "step": 8796 }, { "epoch": 0.8255442942942943, "grad_norm": 1.102054564907996, "learning_rate": 9.094307015284086e-06, "loss": 0.4618, "step": 8797 }, { "epoch": 0.8256381381381381, "grad_norm": 1.2488064061747897, "learning_rate": 9.093993611636007e-06, "loss": 0.4678, "step": 8798 }, { "epoch": 0.8257319819819819, "grad_norm": 0.9768215352534532, "learning_rate": 9.093680159174763e-06, "loss": 0.453, "step": 8799 }, { "epoch": 0.8258258258258259, "grad_norm": 1.0022476644856415, "learning_rate": 9.093366657904089e-06, "loss": 0.486, "step": 8800 }, { "epoch": 0.8259196696696697, "grad_norm": 1.1222276743383899, "learning_rate": 9.09305310782772e-06, "loss": 0.4995, "step": 8801 }, { "epoch": 0.8260135135135135, "grad_norm": 0.9500902632435697, "learning_rate": 9.092739508949398e-06, "loss": 0.484, "step": 8802 }, { "epoch": 0.8261073573573574, "grad_norm": 1.7636232502722693, "learning_rate": 9.092425861272862e-06, "loss": 0.4554, "step": 8803 }, { "epoch": 0.8262012012012012, "grad_norm": 1.0352540741220289, "learning_rate": 9.092112164801849e-06, "loss": 0.4411, "step": 8804 }, { "epoch": 0.826295045045045, "grad_norm": 1.0257177952607786, "learning_rate": 9.091798419540104e-06, "loss": 0.5316, "step": 8805 }, { "epoch": 0.8263888888888888, "grad_norm": 1.0595767766078572, "learning_rate": 9.091484625491362e-06, "loss": 0.4438, "step": 8806 }, { "epoch": 0.8264827327327328, "grad_norm": 1.2637216910229707, "learning_rate": 9.091170782659368e-06, "loss": 0.4502, "step": 8807 }, { "epoch": 0.8265765765765766, "grad_norm": 0.8835000332200083, "learning_rate": 9.090856891047865e-06, "loss": 0.4338, "step": 8808 }, { "epoch": 0.8266704204204204, "grad_norm": 1.0871447125611198, "learning_rate": 9.090542950660593e-06, "loss": 0.5275, "step": 8809 }, { "epoch": 0.8267642642642643, "grad_norm": 1.1576574982668932, "learning_rate": 9.090228961501295e-06, "loss": 0.4558, "step": 8810 }, { "epoch": 0.8268581081081081, "grad_norm": 1.1004959890798145, "learning_rate": 9.089914923573715e-06, "loss": 0.4738, "step": 8811 }, { "epoch": 0.8269519519519519, "grad_norm": 1.49426690766775, "learning_rate": 9.0896008368816e-06, "loss": 0.4759, "step": 8812 }, { "epoch": 0.8270457957957958, "grad_norm": 0.9123553531042196, "learning_rate": 9.089286701428693e-06, "loss": 0.4043, "step": 8813 }, { "epoch": 0.8271396396396397, "grad_norm": 1.0689208645065724, "learning_rate": 9.088972517218737e-06, "loss": 0.4712, "step": 8814 }, { "epoch": 0.8272334834834835, "grad_norm": 1.06281030743038, "learning_rate": 9.088658284255482e-06, "loss": 0.4635, "step": 8815 }, { "epoch": 0.8273273273273273, "grad_norm": 2.2426125708876232, "learning_rate": 9.088344002542672e-06, "loss": 0.4353, "step": 8816 }, { "epoch": 0.8274211711711712, "grad_norm": 0.9132840190334058, "learning_rate": 9.088029672084058e-06, "loss": 0.4781, "step": 8817 }, { "epoch": 0.827515015015015, "grad_norm": 1.2333905280475956, "learning_rate": 9.087715292883384e-06, "loss": 0.476, "step": 8818 }, { "epoch": 0.8276088588588588, "grad_norm": 1.0163336662834597, "learning_rate": 9.087400864944398e-06, "loss": 0.4684, "step": 8819 }, { "epoch": 0.8277027027027027, "grad_norm": 0.9947162639958818, "learning_rate": 9.087086388270851e-06, "loss": 0.432, "step": 8820 }, { "epoch": 0.8277965465465466, "grad_norm": 1.3077113209824685, "learning_rate": 9.086771862866493e-06, "loss": 0.4804, "step": 8821 }, { "epoch": 0.8278903903903904, "grad_norm": 1.0097946451011948, "learning_rate": 9.08645728873507e-06, "loss": 0.4365, "step": 8822 }, { "epoch": 0.8279842342342343, "grad_norm": 1.0404104680556994, "learning_rate": 9.08614266588034e-06, "loss": 0.4431, "step": 8823 }, { "epoch": 0.8280780780780781, "grad_norm": 1.0035043225634805, "learning_rate": 9.085827994306048e-06, "loss": 0.473, "step": 8824 }, { "epoch": 0.8281719219219219, "grad_norm": 0.949004771769356, "learning_rate": 9.085513274015948e-06, "loss": 0.4722, "step": 8825 }, { "epoch": 0.8282657657657657, "grad_norm": 0.8861476410175996, "learning_rate": 9.085198505013793e-06, "loss": 0.4111, "step": 8826 }, { "epoch": 0.8283596096096096, "grad_norm": 0.9251720079785127, "learning_rate": 9.084883687303333e-06, "loss": 0.4755, "step": 8827 }, { "epoch": 0.8284534534534534, "grad_norm": 1.4525570504342868, "learning_rate": 9.084568820888327e-06, "loss": 0.4455, "step": 8828 }, { "epoch": 0.8285472972972973, "grad_norm": 1.1392006851049625, "learning_rate": 9.084253905772526e-06, "loss": 0.4852, "step": 8829 }, { "epoch": 0.8286411411411412, "grad_norm": 1.0452148314314262, "learning_rate": 9.083938941959684e-06, "loss": 0.4831, "step": 8830 }, { "epoch": 0.828734984984985, "grad_norm": 1.1838307771588013, "learning_rate": 9.083623929453558e-06, "loss": 0.443, "step": 8831 }, { "epoch": 0.8288288288288288, "grad_norm": 1.7367224384639826, "learning_rate": 9.083308868257902e-06, "loss": 0.4195, "step": 8832 }, { "epoch": 0.8289226726726727, "grad_norm": 1.0274045188683112, "learning_rate": 9.082993758376475e-06, "loss": 0.5071, "step": 8833 }, { "epoch": 0.8290165165165165, "grad_norm": 1.9879733679619511, "learning_rate": 9.082678599813032e-06, "loss": 0.4246, "step": 8834 }, { "epoch": 0.8291103603603603, "grad_norm": 0.9568832931783902, "learning_rate": 9.082363392571331e-06, "loss": 0.465, "step": 8835 }, { "epoch": 0.8292042042042042, "grad_norm": 1.1050113882842625, "learning_rate": 9.082048136655132e-06, "loss": 0.4419, "step": 8836 }, { "epoch": 0.8292980480480481, "grad_norm": 0.9214400214389213, "learning_rate": 9.081732832068193e-06, "loss": 0.4493, "step": 8837 }, { "epoch": 0.8293918918918919, "grad_norm": 0.9531994918214115, "learning_rate": 9.081417478814273e-06, "loss": 0.4374, "step": 8838 }, { "epoch": 0.8294857357357357, "grad_norm": 1.0957796411395169, "learning_rate": 9.081102076897131e-06, "loss": 0.4762, "step": 8839 }, { "epoch": 0.8295795795795796, "grad_norm": 1.076679646798699, "learning_rate": 9.08078662632053e-06, "loss": 0.4378, "step": 8840 }, { "epoch": 0.8296734234234234, "grad_norm": 1.3472666290826343, "learning_rate": 9.080471127088228e-06, "loss": 0.5222, "step": 8841 }, { "epoch": 0.8297672672672672, "grad_norm": 1.0931311508429806, "learning_rate": 9.08015557920399e-06, "loss": 0.4493, "step": 8842 }, { "epoch": 0.8298611111111112, "grad_norm": 1.3401894597595632, "learning_rate": 9.079839982671576e-06, "loss": 0.4948, "step": 8843 }, { "epoch": 0.829954954954955, "grad_norm": 1.0564329587970096, "learning_rate": 9.07952433749475e-06, "loss": 0.5007, "step": 8844 }, { "epoch": 0.8300487987987988, "grad_norm": 1.3556981054823725, "learning_rate": 9.079208643677275e-06, "loss": 0.4662, "step": 8845 }, { "epoch": 0.8301426426426426, "grad_norm": 1.0526005901212414, "learning_rate": 9.078892901222915e-06, "loss": 0.4906, "step": 8846 }, { "epoch": 0.8302364864864865, "grad_norm": 1.0860733679449353, "learning_rate": 9.078577110135435e-06, "loss": 0.4421, "step": 8847 }, { "epoch": 0.8303303303303303, "grad_norm": 0.9285667883946053, "learning_rate": 9.078261270418603e-06, "loss": 0.486, "step": 8848 }, { "epoch": 0.8304241741741741, "grad_norm": 1.3925408710197136, "learning_rate": 9.07794538207618e-06, "loss": 0.4598, "step": 8849 }, { "epoch": 0.8305180180180181, "grad_norm": 1.4606381433626996, "learning_rate": 9.077629445111931e-06, "loss": 0.5462, "step": 8850 }, { "epoch": 0.8306118618618619, "grad_norm": 1.1928779488421213, "learning_rate": 9.077313459529629e-06, "loss": 0.4963, "step": 8851 }, { "epoch": 0.8307057057057057, "grad_norm": 1.4655686149693876, "learning_rate": 9.076997425333039e-06, "loss": 0.497, "step": 8852 }, { "epoch": 0.8307995495495496, "grad_norm": 1.0012641006893825, "learning_rate": 9.076681342525928e-06, "loss": 0.4048, "step": 8853 }, { "epoch": 0.8308933933933934, "grad_norm": 0.9145584245468976, "learning_rate": 9.076365211112065e-06, "loss": 0.4138, "step": 8854 }, { "epoch": 0.8309872372372372, "grad_norm": 1.4197624887883877, "learning_rate": 9.076049031095221e-06, "loss": 0.4334, "step": 8855 }, { "epoch": 0.831081081081081, "grad_norm": 0.9239845097693974, "learning_rate": 9.075732802479166e-06, "loss": 0.4424, "step": 8856 }, { "epoch": 0.831174924924925, "grad_norm": 1.1409199587990508, "learning_rate": 9.075416525267667e-06, "loss": 0.4354, "step": 8857 }, { "epoch": 0.8312687687687688, "grad_norm": 1.0153535141119472, "learning_rate": 9.075100199464498e-06, "loss": 0.4769, "step": 8858 }, { "epoch": 0.8313626126126126, "grad_norm": 2.233671873035102, "learning_rate": 9.07478382507343e-06, "loss": 0.4441, "step": 8859 }, { "epoch": 0.8314564564564565, "grad_norm": 1.1249891476226057, "learning_rate": 9.074467402098233e-06, "loss": 0.4935, "step": 8860 }, { "epoch": 0.8315503003003003, "grad_norm": 1.2030550841828547, "learning_rate": 9.074150930542683e-06, "loss": 0.5128, "step": 8861 }, { "epoch": 0.8316441441441441, "grad_norm": 1.0229517173811034, "learning_rate": 9.073834410410552e-06, "loss": 0.491, "step": 8862 }, { "epoch": 0.831737987987988, "grad_norm": 1.709680904342997, "learning_rate": 9.073517841705613e-06, "loss": 0.519, "step": 8863 }, { "epoch": 0.8318318318318318, "grad_norm": 1.0423593955884158, "learning_rate": 9.073201224431643e-06, "loss": 0.4965, "step": 8864 }, { "epoch": 0.8319256756756757, "grad_norm": 1.2343895506536724, "learning_rate": 9.072884558592415e-06, "loss": 0.4487, "step": 8865 }, { "epoch": 0.8320195195195195, "grad_norm": 1.0417442159228063, "learning_rate": 9.072567844191704e-06, "loss": 0.4665, "step": 8866 }, { "epoch": 0.8321133633633634, "grad_norm": 1.011468643814433, "learning_rate": 9.072251081233288e-06, "loss": 0.4681, "step": 8867 }, { "epoch": 0.8322072072072072, "grad_norm": 1.046737818928647, "learning_rate": 9.071934269720942e-06, "loss": 0.475, "step": 8868 }, { "epoch": 0.832301051051051, "grad_norm": 1.0379863825466074, "learning_rate": 9.071617409658445e-06, "loss": 0.4554, "step": 8869 }, { "epoch": 0.8323948948948949, "grad_norm": 1.9385093087161491, "learning_rate": 9.071300501049576e-06, "loss": 0.483, "step": 8870 }, { "epoch": 0.8324887387387387, "grad_norm": 0.9011594280537366, "learning_rate": 9.070983543898111e-06, "loss": 0.4334, "step": 8871 }, { "epoch": 0.8325825825825826, "grad_norm": 1.0364104879811653, "learning_rate": 9.070666538207828e-06, "loss": 0.466, "step": 8872 }, { "epoch": 0.8326764264264265, "grad_norm": 1.0432771545793986, "learning_rate": 9.070349483982511e-06, "loss": 0.4537, "step": 8873 }, { "epoch": 0.8327702702702703, "grad_norm": 2.3151059622332824, "learning_rate": 9.070032381225937e-06, "loss": 0.4491, "step": 8874 }, { "epoch": 0.8328641141141141, "grad_norm": 1.3543465968577268, "learning_rate": 9.06971522994189e-06, "loss": 0.482, "step": 8875 }, { "epoch": 0.8329579579579579, "grad_norm": 1.0915082666938234, "learning_rate": 9.069398030134147e-06, "loss": 0.4755, "step": 8876 }, { "epoch": 0.8330518018018018, "grad_norm": 1.4070844106896159, "learning_rate": 9.069080781806494e-06, "loss": 0.4513, "step": 8877 }, { "epoch": 0.8331456456456456, "grad_norm": 1.1653275964661016, "learning_rate": 9.068763484962711e-06, "loss": 0.4958, "step": 8878 }, { "epoch": 0.8332394894894894, "grad_norm": 0.9360154607061256, "learning_rate": 9.068446139606582e-06, "loss": 0.4572, "step": 8879 }, { "epoch": 0.8333333333333334, "grad_norm": 1.3367594431482668, "learning_rate": 9.068128745741892e-06, "loss": 0.4636, "step": 8880 }, { "epoch": 0.8334271771771772, "grad_norm": 1.3306940151059945, "learning_rate": 9.067811303372422e-06, "loss": 0.4622, "step": 8881 }, { "epoch": 0.833521021021021, "grad_norm": 1.1338781746282793, "learning_rate": 9.06749381250196e-06, "loss": 0.5047, "step": 8882 }, { "epoch": 0.8336148648648649, "grad_norm": 1.132528746110628, "learning_rate": 9.067176273134291e-06, "loss": 0.4462, "step": 8883 }, { "epoch": 0.8337087087087087, "grad_norm": 1.8355682673897735, "learning_rate": 9.066858685273201e-06, "loss": 0.4674, "step": 8884 }, { "epoch": 0.8338025525525525, "grad_norm": 1.026475456110435, "learning_rate": 9.066541048922474e-06, "loss": 0.4725, "step": 8885 }, { "epoch": 0.8338963963963963, "grad_norm": 1.8486804625271291, "learning_rate": 9.066223364085902e-06, "loss": 0.4027, "step": 8886 }, { "epoch": 0.8339902402402403, "grad_norm": 1.29255955008423, "learning_rate": 9.06590563076727e-06, "loss": 0.5036, "step": 8887 }, { "epoch": 0.8340840840840841, "grad_norm": 1.5425290603755653, "learning_rate": 9.065587848970367e-06, "loss": 0.4855, "step": 8888 }, { "epoch": 0.8341779279279279, "grad_norm": 1.0121076036625778, "learning_rate": 9.06527001869898e-06, "loss": 0.4676, "step": 8889 }, { "epoch": 0.8342717717717718, "grad_norm": 1.337225882495268, "learning_rate": 9.064952139956901e-06, "loss": 0.4396, "step": 8890 }, { "epoch": 0.8343656156156156, "grad_norm": 1.3093773129350539, "learning_rate": 9.064634212747918e-06, "loss": 0.5249, "step": 8891 }, { "epoch": 0.8344594594594594, "grad_norm": 1.2117824701028772, "learning_rate": 9.064316237075826e-06, "loss": 0.4801, "step": 8892 }, { "epoch": 0.8345533033033034, "grad_norm": 1.4116406406206672, "learning_rate": 9.06399821294441e-06, "loss": 0.4389, "step": 8893 }, { "epoch": 0.8346471471471472, "grad_norm": 1.3323686659620357, "learning_rate": 9.063680140357465e-06, "loss": 0.4583, "step": 8894 }, { "epoch": 0.834740990990991, "grad_norm": 1.074259412195902, "learning_rate": 9.063362019318785e-06, "loss": 0.4521, "step": 8895 }, { "epoch": 0.8348348348348348, "grad_norm": 0.998364276320942, "learning_rate": 9.063043849832161e-06, "loss": 0.4581, "step": 8896 }, { "epoch": 0.8349286786786787, "grad_norm": 14.48128471368522, "learning_rate": 9.062725631901387e-06, "loss": 0.4495, "step": 8897 }, { "epoch": 0.8350225225225225, "grad_norm": 1.2194169682111173, "learning_rate": 9.062407365530257e-06, "loss": 0.441, "step": 8898 }, { "epoch": 0.8351163663663663, "grad_norm": 1.0428705718228215, "learning_rate": 9.062089050722566e-06, "loss": 0.4583, "step": 8899 }, { "epoch": 0.8352102102102102, "grad_norm": 0.9927584730057573, "learning_rate": 9.06177068748211e-06, "loss": 0.4832, "step": 8900 }, { "epoch": 0.8353040540540541, "grad_norm": 1.083454732752689, "learning_rate": 9.061452275812683e-06, "loss": 0.4509, "step": 8901 }, { "epoch": 0.8353978978978979, "grad_norm": 1.0823828852339836, "learning_rate": 9.061133815718082e-06, "loss": 0.4882, "step": 8902 }, { "epoch": 0.8354917417417418, "grad_norm": 0.8759210259436595, "learning_rate": 9.060815307202105e-06, "loss": 0.4884, "step": 8903 }, { "epoch": 0.8355855855855856, "grad_norm": 0.8931035078078678, "learning_rate": 9.060496750268549e-06, "loss": 0.386, "step": 8904 }, { "epoch": 0.8356794294294294, "grad_norm": 1.4758111515919503, "learning_rate": 9.060178144921211e-06, "loss": 0.4699, "step": 8905 }, { "epoch": 0.8357732732732732, "grad_norm": 1.051337860436888, "learning_rate": 9.059859491163893e-06, "loss": 0.542, "step": 8906 }, { "epoch": 0.8358671171171171, "grad_norm": 0.9946337518763583, "learning_rate": 9.059540789000394e-06, "loss": 0.4898, "step": 8907 }, { "epoch": 0.835960960960961, "grad_norm": 2.9023864476450556, "learning_rate": 9.05922203843451e-06, "loss": 0.5003, "step": 8908 }, { "epoch": 0.8360548048048048, "grad_norm": 1.182778491443981, "learning_rate": 9.058903239470045e-06, "loss": 0.4893, "step": 8909 }, { "epoch": 0.8361486486486487, "grad_norm": 1.5242759953907128, "learning_rate": 9.058584392110799e-06, "loss": 0.4454, "step": 8910 }, { "epoch": 0.8362424924924925, "grad_norm": 1.4139688994671873, "learning_rate": 9.058265496360572e-06, "loss": 0.4987, "step": 8911 }, { "epoch": 0.8363363363363363, "grad_norm": 1.010841158910668, "learning_rate": 9.05794655222317e-06, "loss": 0.5197, "step": 8912 }, { "epoch": 0.8364301801801802, "grad_norm": 0.848853985113769, "learning_rate": 9.057627559702392e-06, "loss": 0.4686, "step": 8913 }, { "epoch": 0.836524024024024, "grad_norm": 0.9227007786029606, "learning_rate": 9.057308518802043e-06, "loss": 0.4562, "step": 8914 }, { "epoch": 0.8366178678678678, "grad_norm": 0.9615230372512108, "learning_rate": 9.056989429525928e-06, "loss": 0.4842, "step": 8915 }, { "epoch": 0.8367117117117117, "grad_norm": 1.0013753558906, "learning_rate": 9.056670291877852e-06, "loss": 0.408, "step": 8916 }, { "epoch": 0.8368055555555556, "grad_norm": 0.9367733365719615, "learning_rate": 9.056351105861617e-06, "loss": 0.4947, "step": 8917 }, { "epoch": 0.8368993993993994, "grad_norm": 2.078774587284659, "learning_rate": 9.05603187148103e-06, "loss": 0.4757, "step": 8918 }, { "epoch": 0.8369932432432432, "grad_norm": 2.6831280615987136, "learning_rate": 9.055712588739898e-06, "loss": 0.5023, "step": 8919 }, { "epoch": 0.8370870870870871, "grad_norm": 0.8956858336677965, "learning_rate": 9.055393257642027e-06, "loss": 0.4566, "step": 8920 }, { "epoch": 0.8371809309309309, "grad_norm": 1.1028082446725729, "learning_rate": 9.055073878191226e-06, "loss": 0.4993, "step": 8921 }, { "epoch": 0.8372747747747747, "grad_norm": 0.9916334232664948, "learning_rate": 9.0547544503913e-06, "loss": 0.4669, "step": 8922 }, { "epoch": 0.8373686186186187, "grad_norm": 1.1465433773917308, "learning_rate": 9.05443497424606e-06, "loss": 0.4456, "step": 8923 }, { "epoch": 0.8374624624624625, "grad_norm": 1.212614431563353, "learning_rate": 9.054115449759316e-06, "loss": 0.4852, "step": 8924 }, { "epoch": 0.8375563063063063, "grad_norm": 1.235099227869413, "learning_rate": 9.053795876934875e-06, "loss": 0.5053, "step": 8925 }, { "epoch": 0.8376501501501501, "grad_norm": 1.024816920829302, "learning_rate": 9.05347625577655e-06, "loss": 0.5016, "step": 8926 }, { "epoch": 0.837743993993994, "grad_norm": 1.1589189241705846, "learning_rate": 9.05315658628815e-06, "loss": 0.3852, "step": 8927 }, { "epoch": 0.8378378378378378, "grad_norm": 1.325915539904268, "learning_rate": 9.052836868473487e-06, "loss": 0.4492, "step": 8928 }, { "epoch": 0.8379316816816816, "grad_norm": 1.2183661354053326, "learning_rate": 9.052517102336373e-06, "loss": 0.471, "step": 8929 }, { "epoch": 0.8380255255255256, "grad_norm": 0.8447169401150937, "learning_rate": 9.05219728788062e-06, "loss": 0.4848, "step": 8930 }, { "epoch": 0.8381193693693694, "grad_norm": 1.6522433236921747, "learning_rate": 9.051877425110042e-06, "loss": 0.5036, "step": 8931 }, { "epoch": 0.8382132132132132, "grad_norm": 0.9075021054637513, "learning_rate": 9.051557514028453e-06, "loss": 0.4429, "step": 8932 }, { "epoch": 0.8383070570570571, "grad_norm": 1.0070135615543736, "learning_rate": 9.051237554639668e-06, "loss": 0.4231, "step": 8933 }, { "epoch": 0.8384009009009009, "grad_norm": 1.0260550201007141, "learning_rate": 9.050917546947497e-06, "loss": 0.4876, "step": 8934 }, { "epoch": 0.8384947447447447, "grad_norm": 0.9027490314879248, "learning_rate": 9.050597490955763e-06, "loss": 0.5081, "step": 8935 }, { "epoch": 0.8385885885885885, "grad_norm": 1.2434458984775814, "learning_rate": 9.050277386668278e-06, "loss": 0.4686, "step": 8936 }, { "epoch": 0.8386824324324325, "grad_norm": 1.2105079732395008, "learning_rate": 9.049957234088858e-06, "loss": 0.4915, "step": 8937 }, { "epoch": 0.8387762762762763, "grad_norm": 1.0018687211624648, "learning_rate": 9.049637033221322e-06, "loss": 0.4824, "step": 8938 }, { "epoch": 0.8388701201201201, "grad_norm": 0.9392325686877606, "learning_rate": 9.049316784069487e-06, "loss": 0.4491, "step": 8939 }, { "epoch": 0.838963963963964, "grad_norm": 1.058155496202941, "learning_rate": 9.048996486637169e-06, "loss": 0.4794, "step": 8940 }, { "epoch": 0.8390578078078078, "grad_norm": 1.390892900415192, "learning_rate": 9.048676140928193e-06, "loss": 0.444, "step": 8941 }, { "epoch": 0.8391516516516516, "grad_norm": 1.5675216437536752, "learning_rate": 9.048355746946372e-06, "loss": 0.479, "step": 8942 }, { "epoch": 0.8392454954954955, "grad_norm": 1.1391221729464076, "learning_rate": 9.04803530469553e-06, "loss": 0.4898, "step": 8943 }, { "epoch": 0.8393393393393394, "grad_norm": 1.0753623531468866, "learning_rate": 9.047714814179487e-06, "loss": 0.4953, "step": 8944 }, { "epoch": 0.8394331831831832, "grad_norm": 1.2714622094960468, "learning_rate": 9.047394275402064e-06, "loss": 0.4619, "step": 8945 }, { "epoch": 0.839527027027027, "grad_norm": 1.0064450798033677, "learning_rate": 9.047073688367082e-06, "loss": 0.4499, "step": 8946 }, { "epoch": 0.8396208708708709, "grad_norm": 1.0151708160997182, "learning_rate": 9.046753053078365e-06, "loss": 0.4581, "step": 8947 }, { "epoch": 0.8397147147147147, "grad_norm": 5.858521055666843, "learning_rate": 9.046432369539736e-06, "loss": 0.4899, "step": 8948 }, { "epoch": 0.8398085585585585, "grad_norm": 1.7140028234815037, "learning_rate": 9.046111637755016e-06, "loss": 0.4683, "step": 8949 }, { "epoch": 0.8399024024024024, "grad_norm": 1.1856450358083892, "learning_rate": 9.045790857728029e-06, "loss": 0.4551, "step": 8950 }, { "epoch": 0.8399962462462462, "grad_norm": 1.3521839010048298, "learning_rate": 9.045470029462604e-06, "loss": 0.4683, "step": 8951 }, { "epoch": 0.8400900900900901, "grad_norm": 1.0201351865136492, "learning_rate": 9.045149152962563e-06, "loss": 0.4878, "step": 8952 }, { "epoch": 0.840183933933934, "grad_norm": 1.2065799839810896, "learning_rate": 9.044828228231732e-06, "loss": 0.5213, "step": 8953 }, { "epoch": 0.8402777777777778, "grad_norm": 1.1006559057755116, "learning_rate": 9.04450725527394e-06, "loss": 0.482, "step": 8954 }, { "epoch": 0.8403716216216216, "grad_norm": 1.107810710137985, "learning_rate": 9.04418623409301e-06, "loss": 0.4757, "step": 8955 }, { "epoch": 0.8404654654654654, "grad_norm": 1.227345051194715, "learning_rate": 9.043865164692772e-06, "loss": 0.4121, "step": 8956 }, { "epoch": 0.8405593093093093, "grad_norm": 1.03490340449032, "learning_rate": 9.043544047077056e-06, "loss": 0.4828, "step": 8957 }, { "epoch": 0.8406531531531531, "grad_norm": 1.3166910818969586, "learning_rate": 9.043222881249685e-06, "loss": 0.4507, "step": 8958 }, { "epoch": 0.840746996996997, "grad_norm": 0.9190429065379324, "learning_rate": 9.042901667214492e-06, "loss": 0.4876, "step": 8959 }, { "epoch": 0.8408408408408409, "grad_norm": 1.1791144512355556, "learning_rate": 9.042580404975308e-06, "loss": 0.4044, "step": 8960 }, { "epoch": 0.8409346846846847, "grad_norm": 1.0317762740556888, "learning_rate": 9.042259094535963e-06, "loss": 0.3931, "step": 8961 }, { "epoch": 0.8410285285285285, "grad_norm": 0.8891332020778114, "learning_rate": 9.041937735900285e-06, "loss": 0.4373, "step": 8962 }, { "epoch": 0.8411223723723724, "grad_norm": 1.150679924940874, "learning_rate": 9.041616329072109e-06, "loss": 0.4126, "step": 8963 }, { "epoch": 0.8412162162162162, "grad_norm": 1.2863420116248323, "learning_rate": 9.041294874055265e-06, "loss": 0.4273, "step": 8964 }, { "epoch": 0.84131006006006, "grad_norm": 1.0752860396762163, "learning_rate": 9.040973370853586e-06, "loss": 0.4503, "step": 8965 }, { "epoch": 0.8414039039039038, "grad_norm": 0.9408576448421326, "learning_rate": 9.040651819470907e-06, "loss": 0.486, "step": 8966 }, { "epoch": 0.8414977477477478, "grad_norm": 1.1070401798704252, "learning_rate": 9.04033021991106e-06, "loss": 0.4715, "step": 8967 }, { "epoch": 0.8415915915915916, "grad_norm": 1.364275457006437, "learning_rate": 9.040008572177881e-06, "loss": 0.4452, "step": 8968 }, { "epoch": 0.8416854354354354, "grad_norm": 0.9631131070970446, "learning_rate": 9.039686876275204e-06, "loss": 0.4366, "step": 8969 }, { "epoch": 0.8417792792792793, "grad_norm": 0.9583348083941032, "learning_rate": 9.039365132206865e-06, "loss": 0.5048, "step": 8970 }, { "epoch": 0.8418731231231231, "grad_norm": 1.0157241975995492, "learning_rate": 9.039043339976701e-06, "loss": 0.483, "step": 8971 }, { "epoch": 0.8419669669669669, "grad_norm": 0.8383218774132509, "learning_rate": 9.038721499588546e-06, "loss": 0.4304, "step": 8972 }, { "epoch": 0.8420608108108109, "grad_norm": 1.3369023075116484, "learning_rate": 9.038399611046241e-06, "loss": 0.4228, "step": 8973 }, { "epoch": 0.8421546546546547, "grad_norm": 1.0886746905545013, "learning_rate": 9.038077674353622e-06, "loss": 0.4827, "step": 8974 }, { "epoch": 0.8422484984984985, "grad_norm": 1.4009806440992414, "learning_rate": 9.037755689514527e-06, "loss": 0.4623, "step": 8975 }, { "epoch": 0.8423423423423423, "grad_norm": 1.3799872314833, "learning_rate": 9.037433656532794e-06, "loss": 0.4617, "step": 8976 }, { "epoch": 0.8424361861861862, "grad_norm": 1.325306311044924, "learning_rate": 9.037111575412267e-06, "loss": 0.5051, "step": 8977 }, { "epoch": 0.84253003003003, "grad_norm": 1.1607353788581123, "learning_rate": 9.036789446156783e-06, "loss": 0.484, "step": 8978 }, { "epoch": 0.8426238738738738, "grad_norm": 1.2359619974049723, "learning_rate": 9.036467268770183e-06, "loss": 0.4633, "step": 8979 }, { "epoch": 0.8427177177177178, "grad_norm": 1.1231209114992897, "learning_rate": 9.036145043256309e-06, "loss": 0.4424, "step": 8980 }, { "epoch": 0.8428115615615616, "grad_norm": 1.1266164454014536, "learning_rate": 9.035822769619002e-06, "loss": 0.4421, "step": 8981 }, { "epoch": 0.8429054054054054, "grad_norm": 1.109944230127791, "learning_rate": 9.035500447862105e-06, "loss": 0.4796, "step": 8982 }, { "epoch": 0.8429992492492493, "grad_norm": 1.1774557256021172, "learning_rate": 9.035178077989462e-06, "loss": 0.4362, "step": 8983 }, { "epoch": 0.8430930930930931, "grad_norm": 1.2190847636759223, "learning_rate": 9.034855660004916e-06, "loss": 0.4534, "step": 8984 }, { "epoch": 0.8431869369369369, "grad_norm": 0.922749609323144, "learning_rate": 9.03453319391231e-06, "loss": 0.4171, "step": 8985 }, { "epoch": 0.8432807807807807, "grad_norm": 1.7410956140000895, "learning_rate": 9.03421067971549e-06, "loss": 0.5005, "step": 8986 }, { "epoch": 0.8433746246246246, "grad_norm": 1.5205573003302526, "learning_rate": 9.033888117418301e-06, "loss": 0.4543, "step": 8987 }, { "epoch": 0.8434684684684685, "grad_norm": 0.9889701942663613, "learning_rate": 9.033565507024592e-06, "loss": 0.4391, "step": 8988 }, { "epoch": 0.8435623123123123, "grad_norm": 0.9869405860471596, "learning_rate": 9.033242848538204e-06, "loss": 0.4489, "step": 8989 }, { "epoch": 0.8436561561561562, "grad_norm": 1.0657195757607745, "learning_rate": 9.032920141962987e-06, "loss": 0.4026, "step": 8990 }, { "epoch": 0.84375, "grad_norm": 0.9577676143141427, "learning_rate": 9.03259738730279e-06, "loss": 0.4306, "step": 8991 }, { "epoch": 0.8438438438438438, "grad_norm": 1.3291229823469817, "learning_rate": 9.032274584561458e-06, "loss": 0.469, "step": 8992 }, { "epoch": 0.8439376876876877, "grad_norm": 0.9846273941305153, "learning_rate": 9.031951733742843e-06, "loss": 0.4923, "step": 8993 }, { "epoch": 0.8440315315315315, "grad_norm": 1.6580031881176163, "learning_rate": 9.031628834850792e-06, "loss": 0.4739, "step": 8994 }, { "epoch": 0.8441253753753754, "grad_norm": 1.746259681549821, "learning_rate": 9.031305887889156e-06, "loss": 0.4464, "step": 8995 }, { "epoch": 0.8442192192192193, "grad_norm": 0.88130898622773, "learning_rate": 9.030982892861786e-06, "loss": 0.4262, "step": 8996 }, { "epoch": 0.8443130630630631, "grad_norm": 1.076026205331965, "learning_rate": 9.030659849772532e-06, "loss": 0.457, "step": 8997 }, { "epoch": 0.8444069069069069, "grad_norm": 1.187377527809997, "learning_rate": 9.030336758625246e-06, "loss": 0.5034, "step": 8998 }, { "epoch": 0.8445007507507507, "grad_norm": 5.33724388656738, "learning_rate": 9.03001361942378e-06, "loss": 0.4288, "step": 8999 }, { "epoch": 0.8445945945945946, "grad_norm": 1.1539097478630997, "learning_rate": 9.029690432171988e-06, "loss": 0.4489, "step": 9000 }, { "epoch": 0.8446884384384384, "grad_norm": 1.1869498338865598, "learning_rate": 9.029367196873723e-06, "loss": 0.4635, "step": 9001 }, { "epoch": 0.8447822822822822, "grad_norm": 1.1920095963428805, "learning_rate": 9.029043913532839e-06, "loss": 0.4916, "step": 9002 }, { "epoch": 0.8448761261261262, "grad_norm": 1.0411039191511966, "learning_rate": 9.02872058215319e-06, "loss": 0.4344, "step": 9003 }, { "epoch": 0.84496996996997, "grad_norm": 1.3342547554323825, "learning_rate": 9.02839720273863e-06, "loss": 0.4153, "step": 9004 }, { "epoch": 0.8450638138138138, "grad_norm": 1.2224844783231967, "learning_rate": 9.028073775293018e-06, "loss": 0.4781, "step": 9005 }, { "epoch": 0.8451576576576577, "grad_norm": 1.9335757330229606, "learning_rate": 9.027750299820209e-06, "loss": 0.5127, "step": 9006 }, { "epoch": 0.8452515015015015, "grad_norm": 1.1604595273098928, "learning_rate": 9.027426776324057e-06, "loss": 0.432, "step": 9007 }, { "epoch": 0.8453453453453453, "grad_norm": 0.8892107688788731, "learning_rate": 9.02710320480842e-06, "loss": 0.4306, "step": 9008 }, { "epoch": 0.8454391891891891, "grad_norm": 0.918311172780182, "learning_rate": 9.026779585277162e-06, "loss": 0.4538, "step": 9009 }, { "epoch": 0.8455330330330331, "grad_norm": 1.4009741439492225, "learning_rate": 9.026455917734134e-06, "loss": 0.452, "step": 9010 }, { "epoch": 0.8456268768768769, "grad_norm": 1.334282139176856, "learning_rate": 9.0261322021832e-06, "loss": 0.4837, "step": 9011 }, { "epoch": 0.8457207207207207, "grad_norm": 1.1512940621393888, "learning_rate": 9.025808438628215e-06, "loss": 0.4918, "step": 9012 }, { "epoch": 0.8458145645645646, "grad_norm": 1.3618748819390198, "learning_rate": 9.025484627073044e-06, "loss": 0.4484, "step": 9013 }, { "epoch": 0.8459084084084084, "grad_norm": 2.546558548965616, "learning_rate": 9.025160767521546e-06, "loss": 0.4565, "step": 9014 }, { "epoch": 0.8460022522522522, "grad_norm": 1.51577202204321, "learning_rate": 9.024836859977583e-06, "loss": 0.5123, "step": 9015 }, { "epoch": 0.8460960960960962, "grad_norm": 1.3934803393027284, "learning_rate": 9.024512904445016e-06, "loss": 0.441, "step": 9016 }, { "epoch": 0.84618993993994, "grad_norm": 1.3233246979791755, "learning_rate": 9.024188900927708e-06, "loss": 0.5168, "step": 9017 }, { "epoch": 0.8462837837837838, "grad_norm": 1.6262120147038348, "learning_rate": 9.023864849429522e-06, "loss": 0.4492, "step": 9018 }, { "epoch": 0.8463776276276276, "grad_norm": 0.9497612951961466, "learning_rate": 9.023540749954322e-06, "loss": 0.4736, "step": 9019 }, { "epoch": 0.8464714714714715, "grad_norm": 1.1958373403881328, "learning_rate": 9.023216602505971e-06, "loss": 0.4357, "step": 9020 }, { "epoch": 0.8465653153153153, "grad_norm": 1.1816199149531994, "learning_rate": 9.022892407088337e-06, "loss": 0.495, "step": 9021 }, { "epoch": 0.8466591591591591, "grad_norm": 1.6718727838780068, "learning_rate": 9.022568163705282e-06, "loss": 0.483, "step": 9022 }, { "epoch": 0.846753003003003, "grad_norm": 1.0138858224940264, "learning_rate": 9.022243872360673e-06, "loss": 0.4573, "step": 9023 }, { "epoch": 0.8468468468468469, "grad_norm": 1.0350047085451048, "learning_rate": 9.021919533058377e-06, "loss": 0.4775, "step": 9024 }, { "epoch": 0.8469406906906907, "grad_norm": 1.0697345965935117, "learning_rate": 9.021595145802263e-06, "loss": 0.4786, "step": 9025 }, { "epoch": 0.8470345345345346, "grad_norm": 1.126444118723428, "learning_rate": 9.021270710596194e-06, "loss": 0.4351, "step": 9026 }, { "epoch": 0.8471283783783784, "grad_norm": 1.1487155914658977, "learning_rate": 9.020946227444043e-06, "loss": 0.4282, "step": 9027 }, { "epoch": 0.8472222222222222, "grad_norm": 8.147095494603743, "learning_rate": 9.020621696349676e-06, "loss": 0.4609, "step": 9028 }, { "epoch": 0.847316066066066, "grad_norm": 0.9347914028221029, "learning_rate": 9.020297117316963e-06, "loss": 0.4597, "step": 9029 }, { "epoch": 0.8474099099099099, "grad_norm": 1.2828698316690674, "learning_rate": 9.019972490349775e-06, "loss": 0.5036, "step": 9030 }, { "epoch": 0.8475037537537538, "grad_norm": 1.0655116474043882, "learning_rate": 9.019647815451982e-06, "loss": 0.4411, "step": 9031 }, { "epoch": 0.8475975975975976, "grad_norm": 1.1696148636316586, "learning_rate": 9.019323092627454e-06, "loss": 0.5027, "step": 9032 }, { "epoch": 0.8476914414414415, "grad_norm": 0.952991173954002, "learning_rate": 9.018998321880064e-06, "loss": 0.4702, "step": 9033 }, { "epoch": 0.8477852852852853, "grad_norm": 1.0945688129663378, "learning_rate": 9.018673503213685e-06, "loss": 0.4524, "step": 9034 }, { "epoch": 0.8478791291291291, "grad_norm": 1.0443114498838755, "learning_rate": 9.018348636632188e-06, "loss": 0.4531, "step": 9035 }, { "epoch": 0.847972972972973, "grad_norm": 1.0573478375587386, "learning_rate": 9.018023722139449e-06, "loss": 0.4575, "step": 9036 }, { "epoch": 0.8480668168168168, "grad_norm": 3.292460610997526, "learning_rate": 9.017698759739337e-06, "loss": 0.4357, "step": 9037 }, { "epoch": 0.8481606606606606, "grad_norm": 1.5221609096308557, "learning_rate": 9.017373749435733e-06, "loss": 0.4456, "step": 9038 }, { "epoch": 0.8482545045045045, "grad_norm": 1.0686615511947104, "learning_rate": 9.017048691232507e-06, "loss": 0.4487, "step": 9039 }, { "epoch": 0.8483483483483484, "grad_norm": 1.3991001523847086, "learning_rate": 9.016723585133537e-06, "loss": 0.5028, "step": 9040 }, { "epoch": 0.8484421921921922, "grad_norm": 0.9851076832495078, "learning_rate": 9.0163984311427e-06, "loss": 0.4608, "step": 9041 }, { "epoch": 0.848536036036036, "grad_norm": 1.083268996089749, "learning_rate": 9.016073229263871e-06, "loss": 0.5122, "step": 9042 }, { "epoch": 0.8486298798798799, "grad_norm": 1.2108346578237372, "learning_rate": 9.015747979500929e-06, "loss": 0.4939, "step": 9043 }, { "epoch": 0.8487237237237237, "grad_norm": 1.026428572149489, "learning_rate": 9.01542268185775e-06, "loss": 0.3949, "step": 9044 }, { "epoch": 0.8488175675675675, "grad_norm": 1.2571236993446882, "learning_rate": 9.015097336338214e-06, "loss": 0.4368, "step": 9045 }, { "epoch": 0.8489114114114115, "grad_norm": 1.0018735221274888, "learning_rate": 9.0147719429462e-06, "loss": 0.4549, "step": 9046 }, { "epoch": 0.8490052552552553, "grad_norm": 1.2378851164268099, "learning_rate": 9.014446501685587e-06, "loss": 0.4255, "step": 9047 }, { "epoch": 0.8490990990990991, "grad_norm": 1.1043191874957599, "learning_rate": 9.014121012560257e-06, "loss": 0.4861, "step": 9048 }, { "epoch": 0.8491929429429429, "grad_norm": 1.126871267674792, "learning_rate": 9.01379547557409e-06, "loss": 0.488, "step": 9049 }, { "epoch": 0.8492867867867868, "grad_norm": 1.4115000065541414, "learning_rate": 9.013469890730967e-06, "loss": 0.471, "step": 9050 }, { "epoch": 0.8493806306306306, "grad_norm": 2.0291754547692316, "learning_rate": 9.01314425803477e-06, "loss": 0.4846, "step": 9051 }, { "epoch": 0.8494744744744744, "grad_norm": 1.0278148219915784, "learning_rate": 9.012818577489381e-06, "loss": 0.4202, "step": 9052 }, { "epoch": 0.8495683183183184, "grad_norm": 2.982552685487179, "learning_rate": 9.012492849098685e-06, "loss": 0.4194, "step": 9053 }, { "epoch": 0.8496621621621622, "grad_norm": 1.0675340004768163, "learning_rate": 9.012167072866563e-06, "loss": 0.4505, "step": 9054 }, { "epoch": 0.849756006006006, "grad_norm": 0.9666186083895115, "learning_rate": 9.011841248796902e-06, "loss": 0.4298, "step": 9055 }, { "epoch": 0.8498498498498499, "grad_norm": 1.114498493560467, "learning_rate": 9.011515376893584e-06, "loss": 0.4603, "step": 9056 }, { "epoch": 0.8499436936936937, "grad_norm": 0.9756184940865845, "learning_rate": 9.011189457160496e-06, "loss": 0.4731, "step": 9057 }, { "epoch": 0.8500375375375375, "grad_norm": 1.389874172257771, "learning_rate": 9.010863489601526e-06, "loss": 0.4824, "step": 9058 }, { "epoch": 0.8501313813813813, "grad_norm": 1.0781796188522168, "learning_rate": 9.010537474220558e-06, "loss": 0.4443, "step": 9059 }, { "epoch": 0.8502252252252253, "grad_norm": 1.102472225202295, "learning_rate": 9.01021141102148e-06, "loss": 0.477, "step": 9060 }, { "epoch": 0.8503190690690691, "grad_norm": 0.9804790683211497, "learning_rate": 9.009885300008178e-06, "loss": 0.4222, "step": 9061 }, { "epoch": 0.8504129129129129, "grad_norm": 1.3064360951401217, "learning_rate": 9.009559141184543e-06, "loss": 0.4573, "step": 9062 }, { "epoch": 0.8505067567567568, "grad_norm": 1.1470033699124804, "learning_rate": 9.009232934554462e-06, "loss": 0.4642, "step": 9063 }, { "epoch": 0.8506006006006006, "grad_norm": 1.2261344506752023, "learning_rate": 9.008906680121823e-06, "loss": 0.445, "step": 9064 }, { "epoch": 0.8506944444444444, "grad_norm": 1.1613845106532272, "learning_rate": 9.00858037789052e-06, "loss": 0.4206, "step": 9065 }, { "epoch": 0.8507882882882883, "grad_norm": 1.4339404491529695, "learning_rate": 9.008254027864442e-06, "loss": 0.5169, "step": 9066 }, { "epoch": 0.8508821321321322, "grad_norm": 1.424015219372902, "learning_rate": 9.007927630047476e-06, "loss": 0.4604, "step": 9067 }, { "epoch": 0.850975975975976, "grad_norm": 1.098209495117159, "learning_rate": 9.007601184443523e-06, "loss": 0.4514, "step": 9068 }, { "epoch": 0.8510698198198198, "grad_norm": 0.9841608341179469, "learning_rate": 9.007274691056465e-06, "loss": 0.3845, "step": 9069 }, { "epoch": 0.8511636636636637, "grad_norm": 0.9055065498985895, "learning_rate": 9.0069481498902e-06, "loss": 0.4733, "step": 9070 }, { "epoch": 0.8512575075075075, "grad_norm": 1.1072027247664475, "learning_rate": 9.00662156094862e-06, "loss": 0.4719, "step": 9071 }, { "epoch": 0.8513513513513513, "grad_norm": 1.0667055322262724, "learning_rate": 9.006294924235622e-06, "loss": 0.5044, "step": 9072 }, { "epoch": 0.8514451951951952, "grad_norm": 1.2212146267725754, "learning_rate": 9.005968239755094e-06, "loss": 0.5233, "step": 9073 }, { "epoch": 0.851539039039039, "grad_norm": 1.691170156008365, "learning_rate": 9.005641507510938e-06, "loss": 0.4577, "step": 9074 }, { "epoch": 0.8516328828828829, "grad_norm": 1.017144746121899, "learning_rate": 9.005314727507047e-06, "loss": 0.4438, "step": 9075 }, { "epoch": 0.8517267267267268, "grad_norm": 1.2773165744478063, "learning_rate": 9.004987899747316e-06, "loss": 0.5087, "step": 9076 }, { "epoch": 0.8518205705705706, "grad_norm": 1.31710309451066, "learning_rate": 9.004661024235644e-06, "loss": 0.449, "step": 9077 }, { "epoch": 0.8519144144144144, "grad_norm": 1.7281103826676172, "learning_rate": 9.004334100975927e-06, "loss": 0.4438, "step": 9078 }, { "epoch": 0.8520082582582582, "grad_norm": 0.8688037145755081, "learning_rate": 9.004007129972064e-06, "loss": 0.4484, "step": 9079 }, { "epoch": 0.8521021021021021, "grad_norm": 1.470433455109023, "learning_rate": 9.00368011122795e-06, "loss": 0.431, "step": 9080 }, { "epoch": 0.8521959459459459, "grad_norm": 0.9901204375911252, "learning_rate": 9.00335304474749e-06, "loss": 0.4642, "step": 9081 }, { "epoch": 0.8522897897897898, "grad_norm": 1.004025563414456, "learning_rate": 9.003025930534578e-06, "loss": 0.4616, "step": 9082 }, { "epoch": 0.8523836336336337, "grad_norm": 3.91453182400924, "learning_rate": 9.00269876859312e-06, "loss": 0.5019, "step": 9083 }, { "epoch": 0.8524774774774775, "grad_norm": 0.9873151620380708, "learning_rate": 9.00237155892701e-06, "loss": 0.4644, "step": 9084 }, { "epoch": 0.8525713213213213, "grad_norm": 1.0651668097323364, "learning_rate": 9.002044301540153e-06, "loss": 0.4886, "step": 9085 }, { "epoch": 0.8526651651651652, "grad_norm": 1.7005787673276123, "learning_rate": 9.001716996436453e-06, "loss": 0.5263, "step": 9086 }, { "epoch": 0.852759009009009, "grad_norm": 1.5437587120816714, "learning_rate": 9.00138964361981e-06, "loss": 0.467, "step": 9087 }, { "epoch": 0.8528528528528528, "grad_norm": 1.0470224723833934, "learning_rate": 9.001062243094124e-06, "loss": 0.4748, "step": 9088 }, { "epoch": 0.8529466966966966, "grad_norm": 1.0066020075383357, "learning_rate": 9.000734794863307e-06, "loss": 0.451, "step": 9089 }, { "epoch": 0.8530405405405406, "grad_norm": 1.0649574879940187, "learning_rate": 9.000407298931255e-06, "loss": 0.4526, "step": 9090 }, { "epoch": 0.8531343843843844, "grad_norm": 0.9772534308148925, "learning_rate": 9.000079755301876e-06, "loss": 0.4377, "step": 9091 }, { "epoch": 0.8532282282282282, "grad_norm": 1.7600935725205635, "learning_rate": 8.999752163979077e-06, "loss": 0.4849, "step": 9092 }, { "epoch": 0.8533220720720721, "grad_norm": 0.8435117964069029, "learning_rate": 8.999424524966761e-06, "loss": 0.3857, "step": 9093 }, { "epoch": 0.8534159159159159, "grad_norm": 0.8777369062384187, "learning_rate": 8.999096838268835e-06, "loss": 0.4671, "step": 9094 }, { "epoch": 0.8535097597597597, "grad_norm": 1.1678666615189386, "learning_rate": 8.998769103889207e-06, "loss": 0.4515, "step": 9095 }, { "epoch": 0.8536036036036037, "grad_norm": 1.3472651111377247, "learning_rate": 8.998441321831785e-06, "loss": 0.572, "step": 9096 }, { "epoch": 0.8536974474474475, "grad_norm": 2.2370760690167018, "learning_rate": 8.998113492100477e-06, "loss": 0.4454, "step": 9097 }, { "epoch": 0.8537912912912913, "grad_norm": 1.161947810166297, "learning_rate": 8.99778561469919e-06, "loss": 0.4887, "step": 9098 }, { "epoch": 0.8538851351351351, "grad_norm": 1.178155308101094, "learning_rate": 8.997457689631837e-06, "loss": 0.4323, "step": 9099 }, { "epoch": 0.853978978978979, "grad_norm": 1.6232640626868817, "learning_rate": 8.997129716902323e-06, "loss": 0.4648, "step": 9100 }, { "epoch": 0.8540728228228228, "grad_norm": 1.259245968640247, "learning_rate": 8.996801696514562e-06, "loss": 0.4852, "step": 9101 }, { "epoch": 0.8541666666666666, "grad_norm": 1.111499669801138, "learning_rate": 8.996473628472464e-06, "loss": 0.5297, "step": 9102 }, { "epoch": 0.8542605105105106, "grad_norm": 1.1012676534176453, "learning_rate": 8.99614551277994e-06, "loss": 0.454, "step": 9103 }, { "epoch": 0.8543543543543544, "grad_norm": 0.9955508954399303, "learning_rate": 8.995817349440902e-06, "loss": 0.4754, "step": 9104 }, { "epoch": 0.8544481981981982, "grad_norm": 1.0323994933466818, "learning_rate": 8.995489138459265e-06, "loss": 0.4736, "step": 9105 }, { "epoch": 0.8545420420420421, "grad_norm": 0.9462524535519197, "learning_rate": 8.99516087983894e-06, "loss": 0.4896, "step": 9106 }, { "epoch": 0.8546358858858859, "grad_norm": 1.089516962915366, "learning_rate": 8.994832573583842e-06, "loss": 0.5137, "step": 9107 }, { "epoch": 0.8547297297297297, "grad_norm": 1.0528046318146722, "learning_rate": 8.994504219697886e-06, "loss": 0.4954, "step": 9108 }, { "epoch": 0.8548235735735735, "grad_norm": 1.1434705857598906, "learning_rate": 8.994175818184985e-06, "loss": 0.4215, "step": 9109 }, { "epoch": 0.8549174174174174, "grad_norm": 1.4509723148851619, "learning_rate": 8.993847369049055e-06, "loss": 0.4594, "step": 9110 }, { "epoch": 0.8550112612612613, "grad_norm": 1.167678685684146, "learning_rate": 8.993518872294013e-06, "loss": 0.4639, "step": 9111 }, { "epoch": 0.8551051051051051, "grad_norm": 0.9362346909754279, "learning_rate": 8.993190327923777e-06, "loss": 0.4609, "step": 9112 }, { "epoch": 0.855198948948949, "grad_norm": 1.0757970338407175, "learning_rate": 8.992861735942262e-06, "loss": 0.4506, "step": 9113 }, { "epoch": 0.8552927927927928, "grad_norm": 0.9458280578240132, "learning_rate": 8.992533096353387e-06, "loss": 0.4516, "step": 9114 }, { "epoch": 0.8553866366366366, "grad_norm": 0.9889146247119146, "learning_rate": 8.992204409161067e-06, "loss": 0.4756, "step": 9115 }, { "epoch": 0.8554804804804805, "grad_norm": 0.9779311133330999, "learning_rate": 8.991875674369227e-06, "loss": 0.4167, "step": 9116 }, { "epoch": 0.8555743243243243, "grad_norm": 1.6603023987499879, "learning_rate": 8.991546891981783e-06, "loss": 0.4873, "step": 9117 }, { "epoch": 0.8556681681681682, "grad_norm": 1.13390454606826, "learning_rate": 8.991218062002656e-06, "loss": 0.4785, "step": 9118 }, { "epoch": 0.855762012012012, "grad_norm": 1.1071565217529453, "learning_rate": 8.990889184435765e-06, "loss": 0.4287, "step": 9119 }, { "epoch": 0.8558558558558559, "grad_norm": 0.9802445262221282, "learning_rate": 8.990560259285032e-06, "loss": 0.4113, "step": 9120 }, { "epoch": 0.8559496996996997, "grad_norm": 0.9346799026113445, "learning_rate": 8.990231286554382e-06, "loss": 0.4069, "step": 9121 }, { "epoch": 0.8560435435435435, "grad_norm": 1.0168381155547017, "learning_rate": 8.989902266247731e-06, "loss": 0.4547, "step": 9122 }, { "epoch": 0.8561373873873874, "grad_norm": 1.1048368562785373, "learning_rate": 8.98957319836901e-06, "loss": 0.4719, "step": 9123 }, { "epoch": 0.8562312312312312, "grad_norm": 0.9177562300434333, "learning_rate": 8.989244082922136e-06, "loss": 0.4258, "step": 9124 }, { "epoch": 0.856325075075075, "grad_norm": 3.562434060497001, "learning_rate": 8.988914919911036e-06, "loss": 0.4167, "step": 9125 }, { "epoch": 0.856418918918919, "grad_norm": 1.525942268521972, "learning_rate": 8.988585709339632e-06, "loss": 0.4984, "step": 9126 }, { "epoch": 0.8565127627627628, "grad_norm": 1.0166783142299332, "learning_rate": 8.988256451211854e-06, "loss": 0.5145, "step": 9127 }, { "epoch": 0.8566066066066066, "grad_norm": 1.906859169685594, "learning_rate": 8.987927145531624e-06, "loss": 0.4477, "step": 9128 }, { "epoch": 0.8567004504504504, "grad_norm": 0.9161800627879629, "learning_rate": 8.987597792302867e-06, "loss": 0.5023, "step": 9129 }, { "epoch": 0.8567942942942943, "grad_norm": 0.9730874787804016, "learning_rate": 8.987268391529516e-06, "loss": 0.4711, "step": 9130 }, { "epoch": 0.8568881381381381, "grad_norm": 0.9857550638981682, "learning_rate": 8.986938943215493e-06, "loss": 0.4387, "step": 9131 }, { "epoch": 0.8569819819819819, "grad_norm": 1.0125770479055995, "learning_rate": 8.986609447364726e-06, "loss": 0.4236, "step": 9132 }, { "epoch": 0.8570758258258259, "grad_norm": 0.9961832411543667, "learning_rate": 8.986279903981147e-06, "loss": 0.4784, "step": 9133 }, { "epoch": 0.8571696696696697, "grad_norm": 1.3874853131046774, "learning_rate": 8.985950313068683e-06, "loss": 0.5188, "step": 9134 }, { "epoch": 0.8572635135135135, "grad_norm": 1.3496248481690958, "learning_rate": 8.985620674631265e-06, "loss": 0.4674, "step": 9135 }, { "epoch": 0.8573573573573574, "grad_norm": 1.0294713836739982, "learning_rate": 8.985290988672822e-06, "loss": 0.5213, "step": 9136 }, { "epoch": 0.8574512012012012, "grad_norm": 1.1734770688553282, "learning_rate": 8.984961255197286e-06, "loss": 0.4178, "step": 9137 }, { "epoch": 0.857545045045045, "grad_norm": 2.6962037152732776, "learning_rate": 8.984631474208587e-06, "loss": 0.4973, "step": 9138 }, { "epoch": 0.8576388888888888, "grad_norm": 1.1305778549859762, "learning_rate": 8.98430164571066e-06, "loss": 0.4848, "step": 9139 }, { "epoch": 0.8577327327327328, "grad_norm": 1.226418332135449, "learning_rate": 8.983971769707433e-06, "loss": 0.4568, "step": 9140 }, { "epoch": 0.8578265765765766, "grad_norm": 0.9638106425098578, "learning_rate": 8.983641846202843e-06, "loss": 0.4522, "step": 9141 }, { "epoch": 0.8579204204204204, "grad_norm": 1.0792108284757593, "learning_rate": 8.983311875200822e-06, "loss": 0.4781, "step": 9142 }, { "epoch": 0.8580142642642643, "grad_norm": 2.8515156380452087, "learning_rate": 8.982981856705306e-06, "loss": 0.4185, "step": 9143 }, { "epoch": 0.8581081081081081, "grad_norm": 0.9284409770415974, "learning_rate": 8.982651790720227e-06, "loss": 0.4523, "step": 9144 }, { "epoch": 0.8582019519519519, "grad_norm": 1.171685719275948, "learning_rate": 8.982321677249524e-06, "loss": 0.4477, "step": 9145 }, { "epoch": 0.8582957957957958, "grad_norm": 1.1014611010792144, "learning_rate": 8.981991516297129e-06, "loss": 0.4751, "step": 9146 }, { "epoch": 0.8583896396396397, "grad_norm": 1.170757823646207, "learning_rate": 8.981661307866982e-06, "loss": 0.4906, "step": 9147 }, { "epoch": 0.8584834834834835, "grad_norm": 0.9658416051455821, "learning_rate": 8.981331051963016e-06, "loss": 0.4084, "step": 9148 }, { "epoch": 0.8585773273273273, "grad_norm": 0.9633768793970441, "learning_rate": 8.981000748589173e-06, "loss": 0.4425, "step": 9149 }, { "epoch": 0.8586711711711712, "grad_norm": 1.0006093957119337, "learning_rate": 8.98067039774939e-06, "loss": 0.5217, "step": 9150 }, { "epoch": 0.858765015015015, "grad_norm": 1.0057175767246394, "learning_rate": 8.980339999447605e-06, "loss": 0.4738, "step": 9151 }, { "epoch": 0.8588588588588588, "grad_norm": 1.007784913587666, "learning_rate": 8.980009553687757e-06, "loss": 0.4567, "step": 9152 }, { "epoch": 0.8589527027027027, "grad_norm": 1.107198206974201, "learning_rate": 8.979679060473787e-06, "loss": 0.4805, "step": 9153 }, { "epoch": 0.8590465465465466, "grad_norm": 1.608563977608952, "learning_rate": 8.979348519809636e-06, "loss": 0.4156, "step": 9154 }, { "epoch": 0.8591403903903904, "grad_norm": 1.0619923298574057, "learning_rate": 8.979017931699243e-06, "loss": 0.4604, "step": 9155 }, { "epoch": 0.8592342342342343, "grad_norm": 1.5468250277132487, "learning_rate": 8.978687296146553e-06, "loss": 0.4401, "step": 9156 }, { "epoch": 0.8593280780780781, "grad_norm": 0.9537552951165845, "learning_rate": 8.978356613155504e-06, "loss": 0.4697, "step": 9157 }, { "epoch": 0.8594219219219219, "grad_norm": 1.2163412258650754, "learning_rate": 8.97802588273004e-06, "loss": 0.4734, "step": 9158 }, { "epoch": 0.8595157657657657, "grad_norm": 1.0350845541643083, "learning_rate": 8.977695104874109e-06, "loss": 0.4632, "step": 9159 }, { "epoch": 0.8596096096096096, "grad_norm": 1.0346690732409993, "learning_rate": 8.977364279591649e-06, "loss": 0.506, "step": 9160 }, { "epoch": 0.8597034534534534, "grad_norm": 1.9867692354524493, "learning_rate": 8.977033406886606e-06, "loss": 0.449, "step": 9161 }, { "epoch": 0.8597972972972973, "grad_norm": 1.0853366631121553, "learning_rate": 8.976702486762925e-06, "loss": 0.4537, "step": 9162 }, { "epoch": 0.8598911411411412, "grad_norm": 0.9980050135104396, "learning_rate": 8.976371519224554e-06, "loss": 0.4451, "step": 9163 }, { "epoch": 0.859984984984985, "grad_norm": 1.3134302414076349, "learning_rate": 8.976040504275437e-06, "loss": 0.4648, "step": 9164 }, { "epoch": 0.8600788288288288, "grad_norm": 1.654353674082686, "learning_rate": 8.97570944191952e-06, "loss": 0.448, "step": 9165 }, { "epoch": 0.8601726726726727, "grad_norm": 0.9362518309236021, "learning_rate": 8.975378332160752e-06, "loss": 0.4185, "step": 9166 }, { "epoch": 0.8602665165165165, "grad_norm": 1.0521463259155444, "learning_rate": 8.975047175003082e-06, "loss": 0.499, "step": 9167 }, { "epoch": 0.8603603603603603, "grad_norm": 1.141755056495878, "learning_rate": 8.974715970450454e-06, "loss": 0.4992, "step": 9168 }, { "epoch": 0.8604542042042042, "grad_norm": 1.242309937578375, "learning_rate": 8.97438471850682e-06, "loss": 0.487, "step": 9169 }, { "epoch": 0.8605480480480481, "grad_norm": 1.363928574522361, "learning_rate": 8.97405341917613e-06, "loss": 0.4382, "step": 9170 }, { "epoch": 0.8606418918918919, "grad_norm": 1.329155180806637, "learning_rate": 8.973722072462334e-06, "loss": 0.4481, "step": 9171 }, { "epoch": 0.8607357357357357, "grad_norm": 1.04804375352795, "learning_rate": 8.973390678369382e-06, "loss": 0.421, "step": 9172 }, { "epoch": 0.8608295795795796, "grad_norm": 1.062637915987918, "learning_rate": 8.973059236901224e-06, "loss": 0.49, "step": 9173 }, { "epoch": 0.8609234234234234, "grad_norm": 1.0108438678689768, "learning_rate": 8.972727748061815e-06, "loss": 0.4217, "step": 9174 }, { "epoch": 0.8610172672672672, "grad_norm": 1.366153584333397, "learning_rate": 8.972396211855103e-06, "loss": 0.4931, "step": 9175 }, { "epoch": 0.8611111111111112, "grad_norm": 1.042812019853423, "learning_rate": 8.972064628285047e-06, "loss": 0.4539, "step": 9176 }, { "epoch": 0.861204954954955, "grad_norm": 1.4272988787185064, "learning_rate": 8.971732997355593e-06, "loss": 0.4648, "step": 9177 }, { "epoch": 0.8612987987987988, "grad_norm": 0.9820222058281366, "learning_rate": 8.971401319070702e-06, "loss": 0.4768, "step": 9178 }, { "epoch": 0.8613926426426426, "grad_norm": 1.2340528305358531, "learning_rate": 8.971069593434323e-06, "loss": 0.4807, "step": 9179 }, { "epoch": 0.8614864864864865, "grad_norm": 1.0343667696234824, "learning_rate": 8.970737820450417e-06, "loss": 0.4229, "step": 9180 }, { "epoch": 0.8615803303303303, "grad_norm": 1.2052285770015148, "learning_rate": 8.970406000122933e-06, "loss": 0.4119, "step": 9181 }, { "epoch": 0.8616741741741741, "grad_norm": 1.0192439654615315, "learning_rate": 8.970074132455836e-06, "loss": 0.5043, "step": 9182 }, { "epoch": 0.8617680180180181, "grad_norm": 1.1272250255643739, "learning_rate": 8.969742217453073e-06, "loss": 0.5249, "step": 9183 }, { "epoch": 0.8618618618618619, "grad_norm": 1.1021972656153267, "learning_rate": 8.969410255118607e-06, "loss": 0.4625, "step": 9184 }, { "epoch": 0.8619557057057057, "grad_norm": 1.0385685366733377, "learning_rate": 8.969078245456397e-06, "loss": 0.4647, "step": 9185 }, { "epoch": 0.8620495495495496, "grad_norm": 1.1927709011451835, "learning_rate": 8.968746188470401e-06, "loss": 0.5032, "step": 9186 }, { "epoch": 0.8621433933933934, "grad_norm": 1.255198262277344, "learning_rate": 8.968414084164574e-06, "loss": 0.4999, "step": 9187 }, { "epoch": 0.8622372372372372, "grad_norm": 1.4269080197395143, "learning_rate": 8.968081932542881e-06, "loss": 0.4513, "step": 9188 }, { "epoch": 0.862331081081081, "grad_norm": 0.9663114362177744, "learning_rate": 8.967749733609279e-06, "loss": 0.5057, "step": 9189 }, { "epoch": 0.862424924924925, "grad_norm": 1.9019332000372677, "learning_rate": 8.967417487367731e-06, "loss": 0.468, "step": 9190 }, { "epoch": 0.8625187687687688, "grad_norm": 0.9939620330841793, "learning_rate": 8.967085193822195e-06, "loss": 0.4575, "step": 9191 }, { "epoch": 0.8626126126126126, "grad_norm": 1.1420923427701506, "learning_rate": 8.966752852976639e-06, "loss": 0.4419, "step": 9192 }, { "epoch": 0.8627064564564565, "grad_norm": 1.2021965137816688, "learning_rate": 8.96642046483502e-06, "loss": 0.4401, "step": 9193 }, { "epoch": 0.8628003003003003, "grad_norm": 0.9254653047557883, "learning_rate": 8.966088029401302e-06, "loss": 0.4323, "step": 9194 }, { "epoch": 0.8628941441441441, "grad_norm": 1.1772914807147268, "learning_rate": 8.965755546679452e-06, "loss": 0.4758, "step": 9195 }, { "epoch": 0.862987987987988, "grad_norm": 1.1582371508944298, "learning_rate": 8.965423016673429e-06, "loss": 0.5077, "step": 9196 }, { "epoch": 0.8630818318318318, "grad_norm": 1.079383685789928, "learning_rate": 8.965090439387202e-06, "loss": 0.4279, "step": 9197 }, { "epoch": 0.8631756756756757, "grad_norm": 1.2066207641704951, "learning_rate": 8.964757814824736e-06, "loss": 0.503, "step": 9198 }, { "epoch": 0.8632695195195195, "grad_norm": 1.1148340675560227, "learning_rate": 8.964425142989994e-06, "loss": 0.4615, "step": 9199 }, { "epoch": 0.8633633633633634, "grad_norm": 1.0853230676914585, "learning_rate": 8.964092423886946e-06, "loss": 0.4844, "step": 9200 }, { "epoch": 0.8634572072072072, "grad_norm": 1.2675140405217526, "learning_rate": 8.963759657519557e-06, "loss": 0.4693, "step": 9201 }, { "epoch": 0.863551051051051, "grad_norm": 0.9875084739734099, "learning_rate": 8.963426843891796e-06, "loss": 0.455, "step": 9202 }, { "epoch": 0.8636448948948949, "grad_norm": 1.241927130713439, "learning_rate": 8.963093983007629e-06, "loss": 0.4737, "step": 9203 }, { "epoch": 0.8637387387387387, "grad_norm": 0.9639175323565948, "learning_rate": 8.962761074871028e-06, "loss": 0.4311, "step": 9204 }, { "epoch": 0.8638325825825826, "grad_norm": 0.9483293919028479, "learning_rate": 8.962428119485958e-06, "loss": 0.4333, "step": 9205 }, { "epoch": 0.8639264264264265, "grad_norm": 1.2134881704382914, "learning_rate": 8.96209511685639e-06, "loss": 0.4696, "step": 9206 }, { "epoch": 0.8640202702702703, "grad_norm": 1.1159817606661007, "learning_rate": 8.961762066986298e-06, "loss": 0.5211, "step": 9207 }, { "epoch": 0.8641141141141141, "grad_norm": 1.0868874935450115, "learning_rate": 8.96142896987965e-06, "loss": 0.4922, "step": 9208 }, { "epoch": 0.8642079579579579, "grad_norm": 1.1007028218763948, "learning_rate": 8.961095825540419e-06, "loss": 0.4426, "step": 9209 }, { "epoch": 0.8643018018018018, "grad_norm": 0.9750947466203774, "learning_rate": 8.960762633972575e-06, "loss": 0.4522, "step": 9210 }, { "epoch": 0.8643956456456456, "grad_norm": 1.2039611417040492, "learning_rate": 8.960429395180092e-06, "loss": 0.4268, "step": 9211 }, { "epoch": 0.8644894894894894, "grad_norm": 1.214221351178427, "learning_rate": 8.960096109166944e-06, "loss": 0.4815, "step": 9212 }, { "epoch": 0.8645833333333334, "grad_norm": 0.9816572694249672, "learning_rate": 8.959762775937104e-06, "loss": 0.4587, "step": 9213 }, { "epoch": 0.8646771771771772, "grad_norm": 1.188593330449523, "learning_rate": 8.959429395494545e-06, "loss": 0.4935, "step": 9214 }, { "epoch": 0.864771021021021, "grad_norm": 0.9596536161332585, "learning_rate": 8.959095967843246e-06, "loss": 0.4512, "step": 9215 }, { "epoch": 0.8648648648648649, "grad_norm": 1.5591824822207918, "learning_rate": 8.95876249298718e-06, "loss": 0.4908, "step": 9216 }, { "epoch": 0.8649587087087087, "grad_norm": 1.0412395956935871, "learning_rate": 8.95842897093032e-06, "loss": 0.4846, "step": 9217 }, { "epoch": 0.8650525525525525, "grad_norm": 0.9281888312003369, "learning_rate": 8.958095401676645e-06, "loss": 0.4634, "step": 9218 }, { "epoch": 0.8651463963963963, "grad_norm": 1.0244549935743656, "learning_rate": 8.957761785230134e-06, "loss": 0.4656, "step": 9219 }, { "epoch": 0.8652402402402403, "grad_norm": 1.1549199415967153, "learning_rate": 8.957428121594764e-06, "loss": 0.4441, "step": 9220 }, { "epoch": 0.8653340840840841, "grad_norm": 1.0351508388900545, "learning_rate": 8.957094410774513e-06, "loss": 0.4295, "step": 9221 }, { "epoch": 0.8654279279279279, "grad_norm": 0.9214343684315448, "learning_rate": 8.95676065277336e-06, "loss": 0.4471, "step": 9222 }, { "epoch": 0.8655217717717718, "grad_norm": 1.3159582212456535, "learning_rate": 8.956426847595281e-06, "loss": 0.4608, "step": 9223 }, { "epoch": 0.8656156156156156, "grad_norm": 0.9819469158567737, "learning_rate": 8.956092995244262e-06, "loss": 0.3662, "step": 9224 }, { "epoch": 0.8657094594594594, "grad_norm": 1.5318616271379768, "learning_rate": 8.955759095724282e-06, "loss": 0.4937, "step": 9225 }, { "epoch": 0.8658033033033034, "grad_norm": 1.1686595050892272, "learning_rate": 8.955425149039317e-06, "loss": 0.5001, "step": 9226 }, { "epoch": 0.8658971471471472, "grad_norm": 1.6098614487839829, "learning_rate": 8.955091155193353e-06, "loss": 0.4798, "step": 9227 }, { "epoch": 0.865990990990991, "grad_norm": 1.178791409064039, "learning_rate": 8.954757114190373e-06, "loss": 0.441, "step": 9228 }, { "epoch": 0.8660848348348348, "grad_norm": 0.8755823136536829, "learning_rate": 8.954423026034359e-06, "loss": 0.4444, "step": 9229 }, { "epoch": 0.8661786786786787, "grad_norm": 0.9639401066941563, "learning_rate": 8.954088890729292e-06, "loss": 0.4865, "step": 9230 }, { "epoch": 0.8662725225225225, "grad_norm": 1.1084430510687713, "learning_rate": 8.953754708279159e-06, "loss": 0.5357, "step": 9231 }, { "epoch": 0.8663663663663663, "grad_norm": 1.3386253760132227, "learning_rate": 8.953420478687944e-06, "loss": 0.5083, "step": 9232 }, { "epoch": 0.8664602102102102, "grad_norm": 1.5739501718325954, "learning_rate": 8.953086201959628e-06, "loss": 0.5254, "step": 9233 }, { "epoch": 0.8665540540540541, "grad_norm": 1.5981322121877566, "learning_rate": 8.952751878098205e-06, "loss": 0.4011, "step": 9234 }, { "epoch": 0.8666478978978979, "grad_norm": 2.4769106606075386, "learning_rate": 8.952417507107652e-06, "loss": 0.4918, "step": 9235 }, { "epoch": 0.8667417417417418, "grad_norm": 1.0232514438137053, "learning_rate": 8.95208308899196e-06, "loss": 0.5239, "step": 9236 }, { "epoch": 0.8668355855855856, "grad_norm": 1.3265418288250186, "learning_rate": 8.951748623755119e-06, "loss": 0.4824, "step": 9237 }, { "epoch": 0.8669294294294294, "grad_norm": 1.1536882722280042, "learning_rate": 8.951414111401113e-06, "loss": 0.4738, "step": 9238 }, { "epoch": 0.8670232732732732, "grad_norm": 1.1044792783121593, "learning_rate": 8.95107955193393e-06, "loss": 0.4978, "step": 9239 }, { "epoch": 0.8671171171171171, "grad_norm": 0.9237051495171293, "learning_rate": 8.950744945357563e-06, "loss": 0.4717, "step": 9240 }, { "epoch": 0.867210960960961, "grad_norm": 1.090395073679457, "learning_rate": 8.950410291675998e-06, "loss": 0.4963, "step": 9241 }, { "epoch": 0.8673048048048048, "grad_norm": 1.3357355636410526, "learning_rate": 8.950075590893224e-06, "loss": 0.4626, "step": 9242 }, { "epoch": 0.8673986486486487, "grad_norm": 1.004502036243617, "learning_rate": 8.949740843013237e-06, "loss": 0.4417, "step": 9243 }, { "epoch": 0.8674924924924925, "grad_norm": 1.055124412925688, "learning_rate": 8.949406048040023e-06, "loss": 0.4375, "step": 9244 }, { "epoch": 0.8675863363363363, "grad_norm": 1.1215476040308858, "learning_rate": 8.949071205977577e-06, "loss": 0.487, "step": 9245 }, { "epoch": 0.8676801801801802, "grad_norm": 1.3016889662003577, "learning_rate": 8.948736316829889e-06, "loss": 0.4895, "step": 9246 }, { "epoch": 0.867774024024024, "grad_norm": 1.4790288875526425, "learning_rate": 8.948401380600955e-06, "loss": 0.4074, "step": 9247 }, { "epoch": 0.8678678678678678, "grad_norm": 1.4899085033819053, "learning_rate": 8.948066397294765e-06, "loss": 0.423, "step": 9248 }, { "epoch": 0.8679617117117117, "grad_norm": 1.1438837208297667, "learning_rate": 8.947731366915315e-06, "loss": 0.4578, "step": 9249 }, { "epoch": 0.8680555555555556, "grad_norm": 1.0334301450767447, "learning_rate": 8.947396289466599e-06, "loss": 0.4577, "step": 9250 }, { "epoch": 0.8681493993993994, "grad_norm": 1.429265282923678, "learning_rate": 8.947061164952613e-06, "loss": 0.4416, "step": 9251 }, { "epoch": 0.8682432432432432, "grad_norm": 0.9892602897553469, "learning_rate": 8.946725993377352e-06, "loss": 0.4915, "step": 9252 }, { "epoch": 0.8683370870870871, "grad_norm": 1.8080573713852368, "learning_rate": 8.946390774744813e-06, "loss": 0.4782, "step": 9253 }, { "epoch": 0.8684309309309309, "grad_norm": 0.9805972726838985, "learning_rate": 8.946055509058993e-06, "loss": 0.4391, "step": 9254 }, { "epoch": 0.8685247747747747, "grad_norm": 1.091206129205337, "learning_rate": 8.945720196323887e-06, "loss": 0.4788, "step": 9255 }, { "epoch": 0.8686186186186187, "grad_norm": 0.8762945752019694, "learning_rate": 8.945384836543497e-06, "loss": 0.4518, "step": 9256 }, { "epoch": 0.8687124624624625, "grad_norm": 0.9888667625787877, "learning_rate": 8.945049429721816e-06, "loss": 0.4233, "step": 9257 }, { "epoch": 0.8688063063063063, "grad_norm": 1.1499379381197103, "learning_rate": 8.944713975862848e-06, "loss": 0.4322, "step": 9258 }, { "epoch": 0.8689001501501501, "grad_norm": 1.3159693913144082, "learning_rate": 8.944378474970591e-06, "loss": 0.4541, "step": 9259 }, { "epoch": 0.868993993993994, "grad_norm": 0.8684028020716154, "learning_rate": 8.944042927049046e-06, "loss": 0.4608, "step": 9260 }, { "epoch": 0.8690878378378378, "grad_norm": 0.9135015912127834, "learning_rate": 8.943707332102211e-06, "loss": 0.4742, "step": 9261 }, { "epoch": 0.8691816816816816, "grad_norm": 1.1789783237250226, "learning_rate": 8.94337169013409e-06, "loss": 0.4788, "step": 9262 }, { "epoch": 0.8692755255255256, "grad_norm": 1.2065365974844677, "learning_rate": 8.943036001148686e-06, "loss": 0.4603, "step": 9263 }, { "epoch": 0.8693693693693694, "grad_norm": 1.0767687851858203, "learning_rate": 8.942700265149998e-06, "loss": 0.5067, "step": 9264 }, { "epoch": 0.8694632132132132, "grad_norm": 1.1471044554342111, "learning_rate": 8.942364482142032e-06, "loss": 0.5174, "step": 9265 }, { "epoch": 0.8695570570570571, "grad_norm": 1.1763772679774624, "learning_rate": 8.942028652128789e-06, "loss": 0.5073, "step": 9266 }, { "epoch": 0.8696509009009009, "grad_norm": 1.114666988037623, "learning_rate": 8.941692775114274e-06, "loss": 0.4959, "step": 9267 }, { "epoch": 0.8697447447447447, "grad_norm": 1.1364036242643956, "learning_rate": 8.941356851102493e-06, "loss": 0.4756, "step": 9268 }, { "epoch": 0.8698385885885885, "grad_norm": 0.9767781769854937, "learning_rate": 8.941020880097452e-06, "loss": 0.4532, "step": 9269 }, { "epoch": 0.8699324324324325, "grad_norm": 1.0416327834539854, "learning_rate": 8.940684862103153e-06, "loss": 0.4487, "step": 9270 }, { "epoch": 0.8700262762762763, "grad_norm": 1.1471289225869663, "learning_rate": 8.940348797123605e-06, "loss": 0.4657, "step": 9271 }, { "epoch": 0.8701201201201201, "grad_norm": 0.923522509840006, "learning_rate": 8.940012685162815e-06, "loss": 0.4833, "step": 9272 }, { "epoch": 0.870213963963964, "grad_norm": 1.093047825400146, "learning_rate": 8.93967652622479e-06, "loss": 0.5326, "step": 9273 }, { "epoch": 0.8703078078078078, "grad_norm": 3.836035357969739, "learning_rate": 8.93934032031354e-06, "loss": 0.4783, "step": 9274 }, { "epoch": 0.8704016516516516, "grad_norm": 0.9268758180451805, "learning_rate": 8.939004067433068e-06, "loss": 0.4976, "step": 9275 }, { "epoch": 0.8704954954954955, "grad_norm": 1.0709158226239872, "learning_rate": 8.938667767587389e-06, "loss": 0.4805, "step": 9276 }, { "epoch": 0.8705893393393394, "grad_norm": 1.0256920433116556, "learning_rate": 8.93833142078051e-06, "loss": 0.444, "step": 9277 }, { "epoch": 0.8706831831831832, "grad_norm": 0.8826005455184013, "learning_rate": 8.937995027016442e-06, "loss": 0.4714, "step": 9278 }, { "epoch": 0.870777027027027, "grad_norm": 1.5558409975361855, "learning_rate": 8.937658586299195e-06, "loss": 0.4599, "step": 9279 }, { "epoch": 0.8708708708708709, "grad_norm": 1.0984295621929026, "learning_rate": 8.937322098632784e-06, "loss": 0.5349, "step": 9280 }, { "epoch": 0.8709647147147147, "grad_norm": 0.9378291988561945, "learning_rate": 8.936985564021215e-06, "loss": 0.4383, "step": 9281 }, { "epoch": 0.8710585585585585, "grad_norm": 2.6306667640848205, "learning_rate": 8.936648982468506e-06, "loss": 0.4457, "step": 9282 }, { "epoch": 0.8711524024024024, "grad_norm": 0.9761117258844402, "learning_rate": 8.936312353978667e-06, "loss": 0.4683, "step": 9283 }, { "epoch": 0.8712462462462462, "grad_norm": 1.2391054190640467, "learning_rate": 8.935975678555713e-06, "loss": 0.4714, "step": 9284 }, { "epoch": 0.8713400900900901, "grad_norm": 1.0667473115720656, "learning_rate": 8.935638956203656e-06, "loss": 0.4646, "step": 9285 }, { "epoch": 0.871433933933934, "grad_norm": 1.1400570506246153, "learning_rate": 8.935302186926515e-06, "loss": 0.4949, "step": 9286 }, { "epoch": 0.8715277777777778, "grad_norm": 5.500443283871545, "learning_rate": 8.9349653707283e-06, "loss": 0.4986, "step": 9287 }, { "epoch": 0.8716216216216216, "grad_norm": 1.904872809928608, "learning_rate": 8.934628507613033e-06, "loss": 0.5008, "step": 9288 }, { "epoch": 0.8717154654654654, "grad_norm": 0.944992307970748, "learning_rate": 8.934291597584725e-06, "loss": 0.4493, "step": 9289 }, { "epoch": 0.8718093093093093, "grad_norm": 1.410612494487697, "learning_rate": 8.933954640647395e-06, "loss": 0.486, "step": 9290 }, { "epoch": 0.8719031531531531, "grad_norm": 1.008657206312121, "learning_rate": 8.933617636805064e-06, "loss": 0.4253, "step": 9291 }, { "epoch": 0.871996996996997, "grad_norm": 0.9026922644479238, "learning_rate": 8.933280586061744e-06, "loss": 0.4552, "step": 9292 }, { "epoch": 0.8720908408408409, "grad_norm": 1.4695691574034822, "learning_rate": 8.932943488421457e-06, "loss": 0.4581, "step": 9293 }, { "epoch": 0.8721846846846847, "grad_norm": 1.1883973357172641, "learning_rate": 8.932606343888222e-06, "loss": 0.4608, "step": 9294 }, { "epoch": 0.8722785285285285, "grad_norm": 1.1440942021088367, "learning_rate": 8.93226915246606e-06, "loss": 0.4506, "step": 9295 }, { "epoch": 0.8723723723723724, "grad_norm": 0.8582300431297728, "learning_rate": 8.931931914158988e-06, "loss": 0.4069, "step": 9296 }, { "epoch": 0.8724662162162162, "grad_norm": 1.3200856952636513, "learning_rate": 8.93159462897103e-06, "loss": 0.4727, "step": 9297 }, { "epoch": 0.87256006006006, "grad_norm": 2.294798088034055, "learning_rate": 8.931257296906208e-06, "loss": 0.515, "step": 9298 }, { "epoch": 0.8726539039039038, "grad_norm": 0.9801780424198022, "learning_rate": 8.93091991796854e-06, "loss": 0.4528, "step": 9299 }, { "epoch": 0.8727477477477478, "grad_norm": 0.9875742578341968, "learning_rate": 8.930582492162053e-06, "loss": 0.4866, "step": 9300 }, { "epoch": 0.8728415915915916, "grad_norm": 1.6910885058006786, "learning_rate": 8.930245019490768e-06, "loss": 0.4614, "step": 9301 }, { "epoch": 0.8729354354354354, "grad_norm": 1.1296915889545471, "learning_rate": 8.929907499958708e-06, "loss": 0.4794, "step": 9302 }, { "epoch": 0.8730292792792793, "grad_norm": 1.0688314363070017, "learning_rate": 8.929569933569899e-06, "loss": 0.4408, "step": 9303 }, { "epoch": 0.8731231231231231, "grad_norm": 1.03791436241363, "learning_rate": 8.929232320328365e-06, "loss": 0.469, "step": 9304 }, { "epoch": 0.8732169669669669, "grad_norm": 0.9658003459207869, "learning_rate": 8.928894660238132e-06, "loss": 0.4003, "step": 9305 }, { "epoch": 0.8733108108108109, "grad_norm": 1.0742018444421102, "learning_rate": 8.928556953303226e-06, "loss": 0.4926, "step": 9306 }, { "epoch": 0.8734046546546547, "grad_norm": 2.1027068088635743, "learning_rate": 8.928219199527673e-06, "loss": 0.4837, "step": 9307 }, { "epoch": 0.8734984984984985, "grad_norm": 1.2388551550823244, "learning_rate": 8.927881398915499e-06, "loss": 0.4482, "step": 9308 }, { "epoch": 0.8735923423423423, "grad_norm": 1.3991177073121122, "learning_rate": 8.927543551470734e-06, "loss": 0.5076, "step": 9309 }, { "epoch": 0.8736861861861862, "grad_norm": 2.6208354709195505, "learning_rate": 8.927205657197404e-06, "loss": 0.5085, "step": 9310 }, { "epoch": 0.87378003003003, "grad_norm": 1.4522851862862918, "learning_rate": 8.92686771609954e-06, "loss": 0.4631, "step": 9311 }, { "epoch": 0.8738738738738738, "grad_norm": 0.980974124236256, "learning_rate": 8.92652972818117e-06, "loss": 0.4655, "step": 9312 }, { "epoch": 0.8739677177177178, "grad_norm": 1.9010313018699019, "learning_rate": 8.926191693446322e-06, "loss": 0.4503, "step": 9313 }, { "epoch": 0.8740615615615616, "grad_norm": 0.9144999773886002, "learning_rate": 8.92585361189903e-06, "loss": 0.4556, "step": 9314 }, { "epoch": 0.8741554054054054, "grad_norm": 1.1454133898828271, "learning_rate": 8.925515483543324e-06, "loss": 0.4442, "step": 9315 }, { "epoch": 0.8742492492492493, "grad_norm": 1.0363917036067452, "learning_rate": 8.925177308383233e-06, "loss": 0.4536, "step": 9316 }, { "epoch": 0.8743430930930931, "grad_norm": 0.9080214738599806, "learning_rate": 8.924839086422792e-06, "loss": 0.4452, "step": 9317 }, { "epoch": 0.8744369369369369, "grad_norm": 1.8757739309011623, "learning_rate": 8.924500817666033e-06, "loss": 0.4978, "step": 9318 }, { "epoch": 0.8745307807807807, "grad_norm": 1.2640074372192653, "learning_rate": 8.924162502116987e-06, "loss": 0.5234, "step": 9319 }, { "epoch": 0.8746246246246246, "grad_norm": 0.964446049464268, "learning_rate": 8.923824139779691e-06, "loss": 0.4701, "step": 9320 }, { "epoch": 0.8747184684684685, "grad_norm": 1.282370134400342, "learning_rate": 8.923485730658178e-06, "loss": 0.4502, "step": 9321 }, { "epoch": 0.8748123123123123, "grad_norm": 1.4126373101083465, "learning_rate": 8.923147274756483e-06, "loss": 0.474, "step": 9322 }, { "epoch": 0.8749061561561562, "grad_norm": 0.9550187643084114, "learning_rate": 8.922808772078641e-06, "loss": 0.4467, "step": 9323 }, { "epoch": 0.875, "grad_norm": 1.2823428795767653, "learning_rate": 8.92247022262869e-06, "loss": 0.4859, "step": 9324 }, { "epoch": 0.8750938438438438, "grad_norm": 1.0287245739589472, "learning_rate": 8.922131626410662e-06, "loss": 0.478, "step": 9325 }, { "epoch": 0.8751876876876877, "grad_norm": 1.6168911338119099, "learning_rate": 8.921792983428599e-06, "loss": 0.438, "step": 9326 }, { "epoch": 0.8752815315315315, "grad_norm": 0.9915421859351252, "learning_rate": 8.921454293686535e-06, "loss": 0.4736, "step": 9327 }, { "epoch": 0.8753753753753754, "grad_norm": 0.9515675544045628, "learning_rate": 8.921115557188511e-06, "loss": 0.486, "step": 9328 }, { "epoch": 0.8754692192192193, "grad_norm": 0.9534564680558522, "learning_rate": 8.920776773938565e-06, "loss": 0.49, "step": 9329 }, { "epoch": 0.8755630630630631, "grad_norm": 0.8814439555227587, "learning_rate": 8.920437943940735e-06, "loss": 0.4386, "step": 9330 }, { "epoch": 0.8756569069069069, "grad_norm": 1.3386712764434632, "learning_rate": 8.920099067199063e-06, "loss": 0.4825, "step": 9331 }, { "epoch": 0.8757507507507507, "grad_norm": 1.1412614027170274, "learning_rate": 8.919760143717589e-06, "loss": 0.4446, "step": 9332 }, { "epoch": 0.8758445945945946, "grad_norm": 0.9716842787102543, "learning_rate": 8.919421173500351e-06, "loss": 0.4987, "step": 9333 }, { "epoch": 0.8759384384384384, "grad_norm": 1.0548073849962536, "learning_rate": 8.919082156551396e-06, "loss": 0.4622, "step": 9334 }, { "epoch": 0.8760322822822822, "grad_norm": 1.580875378627212, "learning_rate": 8.918743092874761e-06, "loss": 0.5219, "step": 9335 }, { "epoch": 0.8761261261261262, "grad_norm": 1.0410573372460887, "learning_rate": 8.918403982474492e-06, "loss": 0.4629, "step": 9336 }, { "epoch": 0.87621996996997, "grad_norm": 2.1862074647982026, "learning_rate": 8.918064825354632e-06, "loss": 0.4471, "step": 9337 }, { "epoch": 0.8763138138138138, "grad_norm": 1.270533443781829, "learning_rate": 8.917725621519222e-06, "loss": 0.4458, "step": 9338 }, { "epoch": 0.8764076576576577, "grad_norm": 1.0262642161516384, "learning_rate": 8.91738637097231e-06, "loss": 0.461, "step": 9339 }, { "epoch": 0.8765015015015015, "grad_norm": 1.1593317586469614, "learning_rate": 8.917047073717938e-06, "loss": 0.479, "step": 9340 }, { "epoch": 0.8765953453453453, "grad_norm": 0.9931129518421763, "learning_rate": 8.916707729760153e-06, "loss": 0.4739, "step": 9341 }, { "epoch": 0.8766891891891891, "grad_norm": 1.5224666903609196, "learning_rate": 8.916368339103e-06, "loss": 0.4158, "step": 9342 }, { "epoch": 0.8767830330330331, "grad_norm": 0.9080302144690878, "learning_rate": 8.916028901750527e-06, "loss": 0.459, "step": 9343 }, { "epoch": 0.8768768768768769, "grad_norm": 1.5071061381673934, "learning_rate": 8.915689417706781e-06, "loss": 0.4656, "step": 9344 }, { "epoch": 0.8769707207207207, "grad_norm": 1.013304838937522, "learning_rate": 8.91534988697581e-06, "loss": 0.4673, "step": 9345 }, { "epoch": 0.8770645645645646, "grad_norm": 0.972450131461336, "learning_rate": 8.915010309561661e-06, "loss": 0.4555, "step": 9346 }, { "epoch": 0.8771584084084084, "grad_norm": 0.99362002846841, "learning_rate": 8.914670685468381e-06, "loss": 0.5125, "step": 9347 }, { "epoch": 0.8772522522522522, "grad_norm": 1.0196546953544965, "learning_rate": 8.914331014700025e-06, "loss": 0.5143, "step": 9348 }, { "epoch": 0.8773460960960962, "grad_norm": 1.4655872708257185, "learning_rate": 8.913991297260638e-06, "loss": 0.4616, "step": 9349 }, { "epoch": 0.87743993993994, "grad_norm": 1.1404914928756054, "learning_rate": 8.913651533154272e-06, "loss": 0.4751, "step": 9350 }, { "epoch": 0.8775337837837838, "grad_norm": 1.17529344577132, "learning_rate": 8.913311722384978e-06, "loss": 0.432, "step": 9351 }, { "epoch": 0.8776276276276276, "grad_norm": 1.3817648375924272, "learning_rate": 8.912971864956808e-06, "loss": 0.4508, "step": 9352 }, { "epoch": 0.8777214714714715, "grad_norm": 0.9302055185250088, "learning_rate": 8.912631960873814e-06, "loss": 0.4931, "step": 9353 }, { "epoch": 0.8778153153153153, "grad_norm": 1.0817556659963228, "learning_rate": 8.912292010140049e-06, "loss": 0.4981, "step": 9354 }, { "epoch": 0.8779091591591591, "grad_norm": 1.0221208972347209, "learning_rate": 8.911952012759566e-06, "loss": 0.4892, "step": 9355 }, { "epoch": 0.878003003003003, "grad_norm": 1.779341019810774, "learning_rate": 8.911611968736418e-06, "loss": 0.4487, "step": 9356 }, { "epoch": 0.8780968468468469, "grad_norm": 1.1553758532416278, "learning_rate": 8.911271878074661e-06, "loss": 0.4537, "step": 9357 }, { "epoch": 0.8781906906906907, "grad_norm": 1.6383969886183827, "learning_rate": 8.910931740778349e-06, "loss": 0.4103, "step": 9358 }, { "epoch": 0.8782845345345346, "grad_norm": 2.6264091669309964, "learning_rate": 8.910591556851536e-06, "loss": 0.4134, "step": 9359 }, { "epoch": 0.8783783783783784, "grad_norm": 1.0806001331296768, "learning_rate": 8.910251326298282e-06, "loss": 0.4674, "step": 9360 }, { "epoch": 0.8784722222222222, "grad_norm": 1.3190268659814401, "learning_rate": 8.90991104912264e-06, "loss": 0.4897, "step": 9361 }, { "epoch": 0.878566066066066, "grad_norm": 1.0659156251747401, "learning_rate": 8.909570725328668e-06, "loss": 0.4977, "step": 9362 }, { "epoch": 0.8786599099099099, "grad_norm": 0.8803061354447608, "learning_rate": 8.909230354920423e-06, "loss": 0.4401, "step": 9363 }, { "epoch": 0.8787537537537538, "grad_norm": 0.9015439963114132, "learning_rate": 8.908889937901968e-06, "loss": 0.4632, "step": 9364 }, { "epoch": 0.8788475975975976, "grad_norm": 1.1487545906118029, "learning_rate": 8.908549474277354e-06, "loss": 0.4625, "step": 9365 }, { "epoch": 0.8789414414414415, "grad_norm": 1.1392259040533792, "learning_rate": 8.908208964050648e-06, "loss": 0.4746, "step": 9366 }, { "epoch": 0.8790352852852853, "grad_norm": 0.9302881255398833, "learning_rate": 8.907868407225906e-06, "loss": 0.4428, "step": 9367 }, { "epoch": 0.8791291291291291, "grad_norm": 1.2740164179315825, "learning_rate": 8.907527803807189e-06, "loss": 0.4486, "step": 9368 }, { "epoch": 0.879222972972973, "grad_norm": 1.4834747347724988, "learning_rate": 8.907187153798555e-06, "loss": 0.5087, "step": 9369 }, { "epoch": 0.8793168168168168, "grad_norm": 1.140700345815907, "learning_rate": 8.906846457204072e-06, "loss": 0.4355, "step": 9370 }, { "epoch": 0.8794106606606606, "grad_norm": 1.780001711834118, "learning_rate": 8.906505714027797e-06, "loss": 0.487, "step": 9371 }, { "epoch": 0.8795045045045045, "grad_norm": 0.9327825275132147, "learning_rate": 8.906164924273796e-06, "loss": 0.4755, "step": 9372 }, { "epoch": 0.8795983483483484, "grad_norm": 1.0007948718328556, "learning_rate": 8.905824087946131e-06, "loss": 0.4949, "step": 9373 }, { "epoch": 0.8796921921921922, "grad_norm": 1.034504517505206, "learning_rate": 8.905483205048867e-06, "loss": 0.4557, "step": 9374 }, { "epoch": 0.879786036036036, "grad_norm": 1.861429961838298, "learning_rate": 8.905142275586063e-06, "loss": 0.4951, "step": 9375 }, { "epoch": 0.8798798798798799, "grad_norm": 1.1084138198432694, "learning_rate": 8.90480129956179e-06, "loss": 0.4928, "step": 9376 }, { "epoch": 0.8799737237237237, "grad_norm": 0.9924930631785029, "learning_rate": 8.904460276980112e-06, "loss": 0.4743, "step": 9377 }, { "epoch": 0.8800675675675675, "grad_norm": 0.9051718800865536, "learning_rate": 8.904119207845094e-06, "loss": 0.4606, "step": 9378 }, { "epoch": 0.8801614114114115, "grad_norm": 1.590554016922417, "learning_rate": 8.903778092160804e-06, "loss": 0.5251, "step": 9379 }, { "epoch": 0.8802552552552553, "grad_norm": 0.9751506020768259, "learning_rate": 8.903436929931308e-06, "loss": 0.4371, "step": 9380 }, { "epoch": 0.8803490990990991, "grad_norm": 0.9450044619788639, "learning_rate": 8.903095721160676e-06, "loss": 0.3948, "step": 9381 }, { "epoch": 0.8804429429429429, "grad_norm": 1.0809855889499007, "learning_rate": 8.902754465852972e-06, "loss": 0.4832, "step": 9382 }, { "epoch": 0.8805367867867868, "grad_norm": 1.723004856527212, "learning_rate": 8.902413164012267e-06, "loss": 0.4857, "step": 9383 }, { "epoch": 0.8806306306306306, "grad_norm": 5.8188351399055955, "learning_rate": 8.902071815642631e-06, "loss": 0.4871, "step": 9384 }, { "epoch": 0.8807244744744744, "grad_norm": 0.9880458354363121, "learning_rate": 8.901730420748133e-06, "loss": 0.4109, "step": 9385 }, { "epoch": 0.8808183183183184, "grad_norm": 0.9984900432253256, "learning_rate": 8.901388979332846e-06, "loss": 0.415, "step": 9386 }, { "epoch": 0.8809121621621622, "grad_norm": 0.883232920870787, "learning_rate": 8.901047491400837e-06, "loss": 0.4282, "step": 9387 }, { "epoch": 0.881006006006006, "grad_norm": 1.1200383440039783, "learning_rate": 8.90070595695618e-06, "loss": 0.4991, "step": 9388 }, { "epoch": 0.8810998498498499, "grad_norm": 1.044168109443465, "learning_rate": 8.900364376002948e-06, "loss": 0.4911, "step": 9389 }, { "epoch": 0.8811936936936937, "grad_norm": 1.1359255394498509, "learning_rate": 8.90002274854521e-06, "loss": 0.4818, "step": 9390 }, { "epoch": 0.8812875375375375, "grad_norm": 0.994517356401, "learning_rate": 8.899681074587045e-06, "loss": 0.4409, "step": 9391 }, { "epoch": 0.8813813813813813, "grad_norm": 0.997773072631368, "learning_rate": 8.899339354132522e-06, "loss": 0.4389, "step": 9392 }, { "epoch": 0.8814752252252253, "grad_norm": 0.8912192246665017, "learning_rate": 8.898997587185717e-06, "loss": 0.3925, "step": 9393 }, { "epoch": 0.8815690690690691, "grad_norm": 1.122964097775382, "learning_rate": 8.898655773750705e-06, "loss": 0.4739, "step": 9394 }, { "epoch": 0.8816629129129129, "grad_norm": 0.9940956272079631, "learning_rate": 8.89831391383156e-06, "loss": 0.4972, "step": 9395 }, { "epoch": 0.8817567567567568, "grad_norm": 0.949803300502055, "learning_rate": 8.89797200743236e-06, "loss": 0.4959, "step": 9396 }, { "epoch": 0.8818506006006006, "grad_norm": 1.160354585751877, "learning_rate": 8.897630054557182e-06, "loss": 0.4643, "step": 9397 }, { "epoch": 0.8819444444444444, "grad_norm": 1.8931084860603615, "learning_rate": 8.897288055210101e-06, "loss": 0.5104, "step": 9398 }, { "epoch": 0.8820382882882883, "grad_norm": 1.0117174295705318, "learning_rate": 8.896946009395198e-06, "loss": 0.4763, "step": 9399 }, { "epoch": 0.8821321321321322, "grad_norm": 1.098783317823201, "learning_rate": 8.896603917116547e-06, "loss": 0.4394, "step": 9400 }, { "epoch": 0.882225975975976, "grad_norm": 0.9957204202333132, "learning_rate": 8.89626177837823e-06, "loss": 0.4288, "step": 9401 }, { "epoch": 0.8823198198198198, "grad_norm": 1.4592375688135035, "learning_rate": 8.895919593184323e-06, "loss": 0.4481, "step": 9402 }, { "epoch": 0.8824136636636637, "grad_norm": 1.1608457449445608, "learning_rate": 8.89557736153891e-06, "loss": 0.5107, "step": 9403 }, { "epoch": 0.8825075075075075, "grad_norm": 1.0755832497460436, "learning_rate": 8.89523508344607e-06, "loss": 0.477, "step": 9404 }, { "epoch": 0.8826013513513513, "grad_norm": 1.225632652251116, "learning_rate": 8.894892758909884e-06, "loss": 0.4667, "step": 9405 }, { "epoch": 0.8826951951951952, "grad_norm": 1.0614524786518231, "learning_rate": 8.894550387934432e-06, "loss": 0.4866, "step": 9406 }, { "epoch": 0.882789039039039, "grad_norm": 0.9480435539639528, "learning_rate": 8.894207970523798e-06, "loss": 0.4623, "step": 9407 }, { "epoch": 0.8828828828828829, "grad_norm": 0.9415138549978421, "learning_rate": 8.893865506682062e-06, "loss": 0.4486, "step": 9408 }, { "epoch": 0.8829767267267268, "grad_norm": 1.3462536890734864, "learning_rate": 8.893522996413314e-06, "loss": 0.4448, "step": 9409 }, { "epoch": 0.8830705705705706, "grad_norm": 1.1213509315807526, "learning_rate": 8.893180439721629e-06, "loss": 0.4287, "step": 9410 }, { "epoch": 0.8831644144144144, "grad_norm": 0.8724462729236827, "learning_rate": 8.892837836611096e-06, "loss": 0.4326, "step": 9411 }, { "epoch": 0.8832582582582582, "grad_norm": 1.1257574196377311, "learning_rate": 8.8924951870858e-06, "loss": 0.3979, "step": 9412 }, { "epoch": 0.8833521021021021, "grad_norm": 0.9410444700835757, "learning_rate": 8.892152491149826e-06, "loss": 0.5034, "step": 9413 }, { "epoch": 0.8834459459459459, "grad_norm": 1.0707643688958803, "learning_rate": 8.89180974880726e-06, "loss": 0.4515, "step": 9414 }, { "epoch": 0.8835397897897898, "grad_norm": 1.0057114224996633, "learning_rate": 8.891466960062187e-06, "loss": 0.4589, "step": 9415 }, { "epoch": 0.8836336336336337, "grad_norm": 1.0361388567655552, "learning_rate": 8.891124124918695e-06, "loss": 0.4498, "step": 9416 }, { "epoch": 0.8837274774774775, "grad_norm": 0.8867505694281269, "learning_rate": 8.890781243380875e-06, "loss": 0.5092, "step": 9417 }, { "epoch": 0.8838213213213213, "grad_norm": 0.9996248408970666, "learning_rate": 8.89043831545281e-06, "loss": 0.4327, "step": 9418 }, { "epoch": 0.8839151651651652, "grad_norm": 1.0126411788411636, "learning_rate": 8.890095341138592e-06, "loss": 0.4653, "step": 9419 }, { "epoch": 0.884009009009009, "grad_norm": 1.0987112980439204, "learning_rate": 8.88975232044231e-06, "loss": 0.4275, "step": 9420 }, { "epoch": 0.8841028528528528, "grad_norm": 1.0086834691685993, "learning_rate": 8.88940925336805e-06, "loss": 0.5068, "step": 9421 }, { "epoch": 0.8841966966966966, "grad_norm": 1.4461451030918913, "learning_rate": 8.889066139919909e-06, "loss": 0.5057, "step": 9422 }, { "epoch": 0.8842905405405406, "grad_norm": 0.8978969405209604, "learning_rate": 8.888722980101974e-06, "loss": 0.4517, "step": 9423 }, { "epoch": 0.8843843843843844, "grad_norm": 1.0286735261558615, "learning_rate": 8.888379773918336e-06, "loss": 0.4582, "step": 9424 }, { "epoch": 0.8844782282282282, "grad_norm": 1.090912021096975, "learning_rate": 8.88803652137309e-06, "loss": 0.4897, "step": 9425 }, { "epoch": 0.8845720720720721, "grad_norm": 0.8703654404245749, "learning_rate": 8.887693222470326e-06, "loss": 0.4454, "step": 9426 }, { "epoch": 0.8846659159159159, "grad_norm": 1.100163731889789, "learning_rate": 8.887349877214138e-06, "loss": 0.4777, "step": 9427 }, { "epoch": 0.8847597597597597, "grad_norm": 0.9302792531024608, "learning_rate": 8.88700648560862e-06, "loss": 0.4213, "step": 9428 }, { "epoch": 0.8848536036036037, "grad_norm": 1.1266110406578762, "learning_rate": 8.886663047657868e-06, "loss": 0.4688, "step": 9429 }, { "epoch": 0.8849474474474475, "grad_norm": 1.439006154145903, "learning_rate": 8.886319563365972e-06, "loss": 0.4601, "step": 9430 }, { "epoch": 0.8850412912912913, "grad_norm": 1.6154993317744462, "learning_rate": 8.885976032737033e-06, "loss": 0.44, "step": 9431 }, { "epoch": 0.8851351351351351, "grad_norm": 1.1288328435099295, "learning_rate": 8.885632455775143e-06, "loss": 0.5155, "step": 9432 }, { "epoch": 0.885228978978979, "grad_norm": 0.9009596725406397, "learning_rate": 8.8852888324844e-06, "loss": 0.4604, "step": 9433 }, { "epoch": 0.8853228228228228, "grad_norm": 0.9657549588707157, "learning_rate": 8.884945162868903e-06, "loss": 0.4904, "step": 9434 }, { "epoch": 0.8854166666666666, "grad_norm": 0.9868280458697248, "learning_rate": 8.884601446932744e-06, "loss": 0.4534, "step": 9435 }, { "epoch": 0.8855105105105106, "grad_norm": 0.8694860904408104, "learning_rate": 8.884257684680026e-06, "loss": 0.4668, "step": 9436 }, { "epoch": 0.8856043543543544, "grad_norm": 1.2075398327054654, "learning_rate": 8.88391387611485e-06, "loss": 0.4547, "step": 9437 }, { "epoch": 0.8856981981981982, "grad_norm": 1.8315231759184165, "learning_rate": 8.883570021241308e-06, "loss": 0.4465, "step": 9438 }, { "epoch": 0.8857920420420421, "grad_norm": 1.2206140861202113, "learning_rate": 8.883226120063504e-06, "loss": 0.4758, "step": 9439 }, { "epoch": 0.8858858858858859, "grad_norm": 1.2892723755776012, "learning_rate": 8.882882172585538e-06, "loss": 0.4493, "step": 9440 }, { "epoch": 0.8859797297297297, "grad_norm": 0.9340795773156341, "learning_rate": 8.882538178811512e-06, "loss": 0.4302, "step": 9441 }, { "epoch": 0.8860735735735735, "grad_norm": 1.0230784606518297, "learning_rate": 8.882194138745524e-06, "loss": 0.4396, "step": 9442 }, { "epoch": 0.8861674174174174, "grad_norm": 0.9450446379187676, "learning_rate": 8.88185005239168e-06, "loss": 0.4547, "step": 9443 }, { "epoch": 0.8862612612612613, "grad_norm": 1.1549090679614415, "learning_rate": 8.881505919754082e-06, "loss": 0.4892, "step": 9444 }, { "epoch": 0.8863551051051051, "grad_norm": 1.2626611064909588, "learning_rate": 8.88116174083683e-06, "loss": 0.4389, "step": 9445 }, { "epoch": 0.886448948948949, "grad_norm": 1.1067397558553287, "learning_rate": 8.880817515644032e-06, "loss": 0.4618, "step": 9446 }, { "epoch": 0.8865427927927928, "grad_norm": 1.0271131418524158, "learning_rate": 8.880473244179789e-06, "loss": 0.4645, "step": 9447 }, { "epoch": 0.8866366366366366, "grad_norm": 1.0063618998431432, "learning_rate": 8.880128926448207e-06, "loss": 0.4407, "step": 9448 }, { "epoch": 0.8867304804804805, "grad_norm": 0.8986246090653125, "learning_rate": 8.879784562453391e-06, "loss": 0.4729, "step": 9449 }, { "epoch": 0.8868243243243243, "grad_norm": 1.0836717761202614, "learning_rate": 8.879440152199445e-06, "loss": 0.443, "step": 9450 }, { "epoch": 0.8869181681681682, "grad_norm": 1.0073298082997744, "learning_rate": 8.87909569569048e-06, "loss": 0.4404, "step": 9451 }, { "epoch": 0.887012012012012, "grad_norm": 0.8848209672432443, "learning_rate": 8.8787511929306e-06, "loss": 0.441, "step": 9452 }, { "epoch": 0.8871058558558559, "grad_norm": 0.9712848811467445, "learning_rate": 8.878406643923912e-06, "loss": 0.4481, "step": 9453 }, { "epoch": 0.8871996996996997, "grad_norm": 1.0699135312563586, "learning_rate": 8.878062048674526e-06, "loss": 0.5097, "step": 9454 }, { "epoch": 0.8872935435435435, "grad_norm": 1.0362712758037953, "learning_rate": 8.87771740718655e-06, "loss": 0.4777, "step": 9455 }, { "epoch": 0.8873873873873874, "grad_norm": 1.1302308887207535, "learning_rate": 8.877372719464092e-06, "loss": 0.4608, "step": 9456 }, { "epoch": 0.8874812312312312, "grad_norm": 1.1258023125573116, "learning_rate": 8.877027985511264e-06, "loss": 0.4299, "step": 9457 }, { "epoch": 0.887575075075075, "grad_norm": 0.966009385134741, "learning_rate": 8.876683205332175e-06, "loss": 0.4338, "step": 9458 }, { "epoch": 0.887668918918919, "grad_norm": 1.2302739877069804, "learning_rate": 8.876338378930935e-06, "loss": 0.4492, "step": 9459 }, { "epoch": 0.8877627627627628, "grad_norm": 2.0233846374613664, "learning_rate": 8.875993506311656e-06, "loss": 0.4577, "step": 9460 }, { "epoch": 0.8878566066066066, "grad_norm": 1.0796448149431563, "learning_rate": 8.875648587478452e-06, "loss": 0.5048, "step": 9461 }, { "epoch": 0.8879504504504504, "grad_norm": 0.8813598440394768, "learning_rate": 8.875303622435432e-06, "loss": 0.4554, "step": 9462 }, { "epoch": 0.8880442942942943, "grad_norm": 0.9166522980264218, "learning_rate": 8.87495861118671e-06, "loss": 0.4376, "step": 9463 }, { "epoch": 0.8881381381381381, "grad_norm": 1.0379492006519342, "learning_rate": 8.874613553736402e-06, "loss": 0.4679, "step": 9464 }, { "epoch": 0.8882319819819819, "grad_norm": 1.013704759663944, "learning_rate": 8.87426845008862e-06, "loss": 0.4755, "step": 9465 }, { "epoch": 0.8883258258258259, "grad_norm": 1.1878215814140567, "learning_rate": 8.87392330024748e-06, "loss": 0.4915, "step": 9466 }, { "epoch": 0.8884196696696697, "grad_norm": 1.1798148630641028, "learning_rate": 8.873578104217096e-06, "loss": 0.5301, "step": 9467 }, { "epoch": 0.8885135135135135, "grad_norm": 0.9852359967325399, "learning_rate": 8.873232862001586e-06, "loss": 0.4498, "step": 9468 }, { "epoch": 0.8886073573573574, "grad_norm": 0.8670917927176425, "learning_rate": 8.872887573605063e-06, "loss": 0.4446, "step": 9469 }, { "epoch": 0.8887012012012012, "grad_norm": 1.130150991709734, "learning_rate": 8.872542239031646e-06, "loss": 0.4336, "step": 9470 }, { "epoch": 0.888795045045045, "grad_norm": 1.0341050105165575, "learning_rate": 8.872196858285452e-06, "loss": 0.4563, "step": 9471 }, { "epoch": 0.8888888888888888, "grad_norm": 1.6064812096004113, "learning_rate": 8.8718514313706e-06, "loss": 0.4526, "step": 9472 }, { "epoch": 0.8889827327327328, "grad_norm": 1.1395855338305791, "learning_rate": 8.871505958291206e-06, "loss": 0.4821, "step": 9473 }, { "epoch": 0.8890765765765766, "grad_norm": 1.5047435243792837, "learning_rate": 8.871160439051391e-06, "loss": 0.483, "step": 9474 }, { "epoch": 0.8891704204204204, "grad_norm": 1.0049926962759255, "learning_rate": 8.870814873655276e-06, "loss": 0.452, "step": 9475 }, { "epoch": 0.8892642642642643, "grad_norm": 0.8422743960528902, "learning_rate": 8.870469262106977e-06, "loss": 0.442, "step": 9476 }, { "epoch": 0.8893581081081081, "grad_norm": 0.9304653782258019, "learning_rate": 8.870123604410622e-06, "loss": 0.4675, "step": 9477 }, { "epoch": 0.8894519519519519, "grad_norm": 1.115764611294779, "learning_rate": 8.869777900570325e-06, "loss": 0.5159, "step": 9478 }, { "epoch": 0.8895457957957958, "grad_norm": 1.3439082516685192, "learning_rate": 8.86943215059021e-06, "loss": 0.5333, "step": 9479 }, { "epoch": 0.8896396396396397, "grad_norm": 0.9746241861067402, "learning_rate": 8.869086354474401e-06, "loss": 0.5001, "step": 9480 }, { "epoch": 0.8897334834834835, "grad_norm": 0.8659337133321386, "learning_rate": 8.868740512227018e-06, "loss": 0.4636, "step": 9481 }, { "epoch": 0.8898273273273273, "grad_norm": 1.098435279666761, "learning_rate": 8.86839462385219e-06, "loss": 0.4443, "step": 9482 }, { "epoch": 0.8899211711711712, "grad_norm": 2.0326142939309078, "learning_rate": 8.868048689354036e-06, "loss": 0.4604, "step": 9483 }, { "epoch": 0.890015015015015, "grad_norm": 0.9862053632662365, "learning_rate": 8.867702708736682e-06, "loss": 0.4811, "step": 9484 }, { "epoch": 0.8901088588588588, "grad_norm": 0.8903691384992736, "learning_rate": 8.867356682004253e-06, "loss": 0.4129, "step": 9485 }, { "epoch": 0.8902027027027027, "grad_norm": 1.1532736325156012, "learning_rate": 8.867010609160877e-06, "loss": 0.4673, "step": 9486 }, { "epoch": 0.8902965465465466, "grad_norm": 0.949957976718543, "learning_rate": 8.866664490210676e-06, "loss": 0.476, "step": 9487 }, { "epoch": 0.8903903903903904, "grad_norm": 0.8569260032102399, "learning_rate": 8.86631832515778e-06, "loss": 0.4567, "step": 9488 }, { "epoch": 0.8904842342342343, "grad_norm": 0.8508264478767976, "learning_rate": 8.865972114006316e-06, "loss": 0.4438, "step": 9489 }, { "epoch": 0.8905780780780781, "grad_norm": 1.3296832243830246, "learning_rate": 8.865625856760411e-06, "loss": 0.5049, "step": 9490 }, { "epoch": 0.8906719219219219, "grad_norm": 1.047714691088819, "learning_rate": 8.865279553424196e-06, "loss": 0.4923, "step": 9491 }, { "epoch": 0.8907657657657657, "grad_norm": 2.1282462040568673, "learning_rate": 8.864933204001796e-06, "loss": 0.4449, "step": 9492 }, { "epoch": 0.8908596096096096, "grad_norm": 0.9636634617509621, "learning_rate": 8.864586808497342e-06, "loss": 0.4125, "step": 9493 }, { "epoch": 0.8909534534534534, "grad_norm": 0.9296476801892074, "learning_rate": 8.864240366914966e-06, "loss": 0.454, "step": 9494 }, { "epoch": 0.8910472972972973, "grad_norm": 0.9942167949895216, "learning_rate": 8.863893879258796e-06, "loss": 0.4723, "step": 9495 }, { "epoch": 0.8911411411411412, "grad_norm": 0.934098062602909, "learning_rate": 8.863547345532965e-06, "loss": 0.4823, "step": 9496 }, { "epoch": 0.891234984984985, "grad_norm": 1.0457827620044295, "learning_rate": 8.863200765741605e-06, "loss": 0.4791, "step": 9497 }, { "epoch": 0.8913288288288288, "grad_norm": 1.1336492469829182, "learning_rate": 8.862854139888848e-06, "loss": 0.474, "step": 9498 }, { "epoch": 0.8914226726726727, "grad_norm": 0.9441950457257179, "learning_rate": 8.862507467978825e-06, "loss": 0.4239, "step": 9499 }, { "epoch": 0.8915165165165165, "grad_norm": 0.8357224621634148, "learning_rate": 8.86216075001567e-06, "loss": 0.495, "step": 9500 }, { "epoch": 0.8916103603603603, "grad_norm": 0.9495569531511143, "learning_rate": 8.861813986003521e-06, "loss": 0.4289, "step": 9501 }, { "epoch": 0.8917042042042042, "grad_norm": 0.9200734507988678, "learning_rate": 8.861467175946508e-06, "loss": 0.4255, "step": 9502 }, { "epoch": 0.8917980480480481, "grad_norm": 1.1355621520970165, "learning_rate": 8.861120319848765e-06, "loss": 0.4752, "step": 9503 }, { "epoch": 0.8918918918918919, "grad_norm": 1.1826241985047339, "learning_rate": 8.860773417714435e-06, "loss": 0.4475, "step": 9504 }, { "epoch": 0.8919857357357357, "grad_norm": 1.006228630842162, "learning_rate": 8.860426469547644e-06, "loss": 0.4971, "step": 9505 }, { "epoch": 0.8920795795795796, "grad_norm": 0.9058719654417176, "learning_rate": 8.860079475352536e-06, "loss": 0.451, "step": 9506 }, { "epoch": 0.8921734234234234, "grad_norm": 1.0683897922401824, "learning_rate": 8.859732435133248e-06, "loss": 0.4641, "step": 9507 }, { "epoch": 0.8922672672672672, "grad_norm": 1.0961730484294654, "learning_rate": 8.859385348893914e-06, "loss": 0.3842, "step": 9508 }, { "epoch": 0.8923611111111112, "grad_norm": 1.8117875718449892, "learning_rate": 8.859038216638673e-06, "loss": 0.4714, "step": 9509 }, { "epoch": 0.892454954954955, "grad_norm": 0.9550038243128327, "learning_rate": 8.858691038371667e-06, "loss": 0.4772, "step": 9510 }, { "epoch": 0.8925487987987988, "grad_norm": 1.2293565509293858, "learning_rate": 8.858343814097034e-06, "loss": 0.4355, "step": 9511 }, { "epoch": 0.8926426426426426, "grad_norm": 0.8628448707281141, "learning_rate": 8.857996543818913e-06, "loss": 0.4207, "step": 9512 }, { "epoch": 0.8927364864864865, "grad_norm": 1.0340481894970397, "learning_rate": 8.857649227541444e-06, "loss": 0.4619, "step": 9513 }, { "epoch": 0.8928303303303303, "grad_norm": 1.0286341256515292, "learning_rate": 8.85730186526877e-06, "loss": 0.4565, "step": 9514 }, { "epoch": 0.8929241741741741, "grad_norm": 1.1056478451774585, "learning_rate": 8.856954457005032e-06, "loss": 0.4694, "step": 9515 }, { "epoch": 0.8930180180180181, "grad_norm": 1.0210855598880768, "learning_rate": 8.856607002754372e-06, "loss": 0.4747, "step": 9516 }, { "epoch": 0.8931118618618619, "grad_norm": 1.2469786859034557, "learning_rate": 8.856259502520934e-06, "loss": 0.5114, "step": 9517 }, { "epoch": 0.8932057057057057, "grad_norm": 1.0360309744951228, "learning_rate": 8.855911956308859e-06, "loss": 0.4958, "step": 9518 }, { "epoch": 0.8932995495495496, "grad_norm": 0.9053430730197997, "learning_rate": 8.855564364122292e-06, "loss": 0.4702, "step": 9519 }, { "epoch": 0.8933933933933934, "grad_norm": 1.243739989725355, "learning_rate": 8.855216725965376e-06, "loss": 0.5055, "step": 9520 }, { "epoch": 0.8934872372372372, "grad_norm": 0.8989588739022526, "learning_rate": 8.854869041842258e-06, "loss": 0.4526, "step": 9521 }, { "epoch": 0.893581081081081, "grad_norm": 0.8287987378816212, "learning_rate": 8.854521311757084e-06, "loss": 0.4502, "step": 9522 }, { "epoch": 0.893674924924925, "grad_norm": 1.0409672459006503, "learning_rate": 8.854173535713997e-06, "loss": 0.4671, "step": 9523 }, { "epoch": 0.8937687687687688, "grad_norm": 1.3903722518591912, "learning_rate": 8.853825713717147e-06, "loss": 0.5034, "step": 9524 }, { "epoch": 0.8938626126126126, "grad_norm": 0.9519587170667991, "learning_rate": 8.853477845770678e-06, "loss": 0.5408, "step": 9525 }, { "epoch": 0.8939564564564565, "grad_norm": 0.8871140223210997, "learning_rate": 8.85312993187874e-06, "loss": 0.4386, "step": 9526 }, { "epoch": 0.8940503003003003, "grad_norm": 1.1001043503254861, "learning_rate": 8.852781972045482e-06, "loss": 0.4559, "step": 9527 }, { "epoch": 0.8941441441441441, "grad_norm": 0.9191339579930211, "learning_rate": 8.852433966275048e-06, "loss": 0.4715, "step": 9528 }, { "epoch": 0.894237987987988, "grad_norm": 0.9961359897204691, "learning_rate": 8.85208591457159e-06, "loss": 0.4431, "step": 9529 }, { "epoch": 0.8943318318318318, "grad_norm": 2.2630025231785553, "learning_rate": 8.85173781693926e-06, "loss": 0.4703, "step": 9530 }, { "epoch": 0.8944256756756757, "grad_norm": 1.0212849434142464, "learning_rate": 8.851389673382207e-06, "loss": 0.4106, "step": 9531 }, { "epoch": 0.8945195195195195, "grad_norm": 0.8537011264725151, "learning_rate": 8.851041483904582e-06, "loss": 0.4379, "step": 9532 }, { "epoch": 0.8946133633633634, "grad_norm": 1.0118006848772256, "learning_rate": 8.850693248510536e-06, "loss": 0.4388, "step": 9533 }, { "epoch": 0.8947072072072072, "grad_norm": 0.8274364273727398, "learning_rate": 8.85034496720422e-06, "loss": 0.4224, "step": 9534 }, { "epoch": 0.894801051051051, "grad_norm": 0.9686829041769739, "learning_rate": 8.849996639989788e-06, "loss": 0.434, "step": 9535 }, { "epoch": 0.8948948948948949, "grad_norm": 0.9937037157451871, "learning_rate": 8.849648266871393e-06, "loss": 0.433, "step": 9536 }, { "epoch": 0.8949887387387387, "grad_norm": 0.9544144404592396, "learning_rate": 8.84929984785319e-06, "loss": 0.4363, "step": 9537 }, { "epoch": 0.8950825825825826, "grad_norm": 1.4212914824887921, "learning_rate": 8.84895138293933e-06, "loss": 0.455, "step": 9538 }, { "epoch": 0.8951764264264265, "grad_norm": 0.9404245765441421, "learning_rate": 8.848602872133972e-06, "loss": 0.4649, "step": 9539 }, { "epoch": 0.8952702702702703, "grad_norm": 1.1189525133467417, "learning_rate": 8.848254315441267e-06, "loss": 0.452, "step": 9540 }, { "epoch": 0.8953641141141141, "grad_norm": 1.1755291290130594, "learning_rate": 8.847905712865372e-06, "loss": 0.4848, "step": 9541 }, { "epoch": 0.8954579579579579, "grad_norm": 1.0776601626769498, "learning_rate": 8.847557064410446e-06, "loss": 0.4928, "step": 9542 }, { "epoch": 0.8955518018018018, "grad_norm": 1.3282953271656126, "learning_rate": 8.847208370080644e-06, "loss": 0.4668, "step": 9543 }, { "epoch": 0.8956456456456456, "grad_norm": 0.9610245549771438, "learning_rate": 8.846859629880125e-06, "loss": 0.439, "step": 9544 }, { "epoch": 0.8957394894894894, "grad_norm": 1.0629570553101917, "learning_rate": 8.846510843813043e-06, "loss": 0.479, "step": 9545 }, { "epoch": 0.8958333333333334, "grad_norm": 0.8878088089391734, "learning_rate": 8.846162011883562e-06, "loss": 0.4452, "step": 9546 }, { "epoch": 0.8959271771771772, "grad_norm": 1.0199271996348618, "learning_rate": 8.845813134095837e-06, "loss": 0.4437, "step": 9547 }, { "epoch": 0.896021021021021, "grad_norm": 1.632817533883178, "learning_rate": 8.845464210454032e-06, "loss": 0.446, "step": 9548 }, { "epoch": 0.8961148648648649, "grad_norm": 1.382374034664377, "learning_rate": 8.845115240962302e-06, "loss": 0.394, "step": 9549 }, { "epoch": 0.8962087087087087, "grad_norm": 1.5613743455068052, "learning_rate": 8.84476622562481e-06, "loss": 0.4575, "step": 9550 }, { "epoch": 0.8963025525525525, "grad_norm": 1.194126732436387, "learning_rate": 8.84441716444572e-06, "loss": 0.4278, "step": 9551 }, { "epoch": 0.8963963963963963, "grad_norm": 1.037315381623247, "learning_rate": 8.844068057429191e-06, "loss": 0.4873, "step": 9552 }, { "epoch": 0.8964902402402403, "grad_norm": 1.754896430181632, "learning_rate": 8.843718904579385e-06, "loss": 0.4614, "step": 9553 }, { "epoch": 0.8965840840840841, "grad_norm": 0.9475434042769181, "learning_rate": 8.84336970590047e-06, "loss": 0.439, "step": 9554 }, { "epoch": 0.8966779279279279, "grad_norm": 1.0189058613324564, "learning_rate": 8.843020461396601e-06, "loss": 0.4261, "step": 9555 }, { "epoch": 0.8967717717717718, "grad_norm": 0.916808614076387, "learning_rate": 8.842671171071948e-06, "loss": 0.4544, "step": 9556 }, { "epoch": 0.8968656156156156, "grad_norm": 1.9660512938496102, "learning_rate": 8.842321834930673e-06, "loss": 0.4751, "step": 9557 }, { "epoch": 0.8969594594594594, "grad_norm": 0.8546371865935817, "learning_rate": 8.841972452976946e-06, "loss": 0.4144, "step": 9558 }, { "epoch": 0.8970533033033034, "grad_norm": 1.0726976394967949, "learning_rate": 8.841623025214927e-06, "loss": 0.5079, "step": 9559 }, { "epoch": 0.8971471471471472, "grad_norm": 0.8771901360798446, "learning_rate": 8.841273551648786e-06, "loss": 0.4275, "step": 9560 }, { "epoch": 0.897240990990991, "grad_norm": 0.8653354246788469, "learning_rate": 8.840924032282687e-06, "loss": 0.4935, "step": 9561 }, { "epoch": 0.8973348348348348, "grad_norm": 1.1836427051259504, "learning_rate": 8.8405744671208e-06, "loss": 0.5123, "step": 9562 }, { "epoch": 0.8974286786786787, "grad_norm": 1.0399370540322588, "learning_rate": 8.840224856167289e-06, "loss": 0.4431, "step": 9563 }, { "epoch": 0.8975225225225225, "grad_norm": 0.9434806997293157, "learning_rate": 8.839875199426328e-06, "loss": 0.4192, "step": 9564 }, { "epoch": 0.8976163663663663, "grad_norm": 1.1221351537958482, "learning_rate": 8.839525496902082e-06, "loss": 0.4542, "step": 9565 }, { "epoch": 0.8977102102102102, "grad_norm": 1.0478123583119467, "learning_rate": 8.83917574859872e-06, "loss": 0.4723, "step": 9566 }, { "epoch": 0.8978040540540541, "grad_norm": 1.3549708176391213, "learning_rate": 8.838825954520416e-06, "loss": 0.4702, "step": 9567 }, { "epoch": 0.8978978978978979, "grad_norm": 1.0939939660796956, "learning_rate": 8.838476114671337e-06, "loss": 0.4857, "step": 9568 }, { "epoch": 0.8979917417417418, "grad_norm": 1.3901984552252553, "learning_rate": 8.838126229055656e-06, "loss": 0.4969, "step": 9569 }, { "epoch": 0.8980855855855856, "grad_norm": 1.1524339564732577, "learning_rate": 8.837776297677545e-06, "loss": 0.4368, "step": 9570 }, { "epoch": 0.8981794294294294, "grad_norm": 0.9253741812253616, "learning_rate": 8.837426320541176e-06, "loss": 0.4753, "step": 9571 }, { "epoch": 0.8982732732732732, "grad_norm": 1.7266331446509045, "learning_rate": 8.83707629765072e-06, "loss": 0.445, "step": 9572 }, { "epoch": 0.8983671171171171, "grad_norm": 0.919527252839754, "learning_rate": 8.836726229010352e-06, "loss": 0.4216, "step": 9573 }, { "epoch": 0.898460960960961, "grad_norm": 1.0050019662909617, "learning_rate": 8.836376114624246e-06, "loss": 0.4332, "step": 9574 }, { "epoch": 0.8985548048048048, "grad_norm": 1.2298090110756077, "learning_rate": 8.836025954496574e-06, "loss": 0.438, "step": 9575 }, { "epoch": 0.8986486486486487, "grad_norm": 0.9086896376659188, "learning_rate": 8.835675748631516e-06, "loss": 0.4537, "step": 9576 }, { "epoch": 0.8987424924924925, "grad_norm": 1.0167340165310397, "learning_rate": 8.835325497033243e-06, "loss": 0.4424, "step": 9577 }, { "epoch": 0.8988363363363363, "grad_norm": 0.9657382998368695, "learning_rate": 8.834975199705933e-06, "loss": 0.4449, "step": 9578 }, { "epoch": 0.8989301801801802, "grad_norm": 0.9949560688473752, "learning_rate": 8.834624856653762e-06, "loss": 0.4624, "step": 9579 }, { "epoch": 0.899024024024024, "grad_norm": 1.0135035807626496, "learning_rate": 8.834274467880908e-06, "loss": 0.4479, "step": 9580 }, { "epoch": 0.8991178678678678, "grad_norm": 0.893209834665844, "learning_rate": 8.833924033391547e-06, "loss": 0.5121, "step": 9581 }, { "epoch": 0.8992117117117117, "grad_norm": 0.9102794609845787, "learning_rate": 8.83357355318986e-06, "loss": 0.4458, "step": 9582 }, { "epoch": 0.8993055555555556, "grad_norm": 1.63949005733132, "learning_rate": 8.833223027280024e-06, "loss": 0.394, "step": 9583 }, { "epoch": 0.8993993993993994, "grad_norm": 1.0639973664086029, "learning_rate": 8.832872455666219e-06, "loss": 0.458, "step": 9584 }, { "epoch": 0.8994932432432432, "grad_norm": 1.0461860773827654, "learning_rate": 8.832521838352624e-06, "loss": 0.4761, "step": 9585 }, { "epoch": 0.8995870870870871, "grad_norm": 1.0630280785978696, "learning_rate": 8.83217117534342e-06, "loss": 0.4705, "step": 9586 }, { "epoch": 0.8996809309309309, "grad_norm": 0.9850130005144261, "learning_rate": 8.831820466642789e-06, "loss": 0.4517, "step": 9587 }, { "epoch": 0.8997747747747747, "grad_norm": 0.8661144009430013, "learning_rate": 8.831469712254911e-06, "loss": 0.3968, "step": 9588 }, { "epoch": 0.8998686186186187, "grad_norm": 0.9532942351964235, "learning_rate": 8.831118912183969e-06, "loss": 0.4622, "step": 9589 }, { "epoch": 0.8999624624624625, "grad_norm": 1.3000185904895483, "learning_rate": 8.830768066434145e-06, "loss": 0.4612, "step": 9590 }, { "epoch": 0.9000563063063063, "grad_norm": 0.8990131096521625, "learning_rate": 8.830417175009622e-06, "loss": 0.4288, "step": 9591 }, { "epoch": 0.9001501501501501, "grad_norm": 1.1103992018128588, "learning_rate": 8.830066237914584e-06, "loss": 0.4615, "step": 9592 }, { "epoch": 0.900243993993994, "grad_norm": 0.8378072366850146, "learning_rate": 8.829715255153218e-06, "loss": 0.4681, "step": 9593 }, { "epoch": 0.9003378378378378, "grad_norm": 0.9192368524841641, "learning_rate": 8.829364226729704e-06, "loss": 0.4237, "step": 9594 }, { "epoch": 0.9004316816816816, "grad_norm": 1.0561312440505506, "learning_rate": 8.82901315264823e-06, "loss": 0.4544, "step": 9595 }, { "epoch": 0.9005255255255256, "grad_norm": 0.8672392238942254, "learning_rate": 8.82866203291298e-06, "loss": 0.477, "step": 9596 }, { "epoch": 0.9006193693693694, "grad_norm": 1.1310835003302246, "learning_rate": 8.828310867528145e-06, "loss": 0.4362, "step": 9597 }, { "epoch": 0.9007132132132132, "grad_norm": 0.821401347812248, "learning_rate": 8.827959656497906e-06, "loss": 0.3864, "step": 9598 }, { "epoch": 0.9008070570570571, "grad_norm": 1.015404148185895, "learning_rate": 8.827608399826456e-06, "loss": 0.4343, "step": 9599 }, { "epoch": 0.9009009009009009, "grad_norm": 0.8574235289819645, "learning_rate": 8.82725709751798e-06, "loss": 0.4183, "step": 9600 }, { "epoch": 0.9009947447447447, "grad_norm": 1.0420495558060556, "learning_rate": 8.826905749576666e-06, "loss": 0.4973, "step": 9601 }, { "epoch": 0.9010885885885885, "grad_norm": 1.0906462349840738, "learning_rate": 8.826554356006705e-06, "loss": 0.4571, "step": 9602 }, { "epoch": 0.9011824324324325, "grad_norm": 1.5302655625217825, "learning_rate": 8.826202916812288e-06, "loss": 0.4398, "step": 9603 }, { "epoch": 0.9012762762762763, "grad_norm": 1.0957063616676839, "learning_rate": 8.8258514319976e-06, "loss": 0.4894, "step": 9604 }, { "epoch": 0.9013701201201201, "grad_norm": 0.8927246756029494, "learning_rate": 8.825499901566837e-06, "loss": 0.4474, "step": 9605 }, { "epoch": 0.901463963963964, "grad_norm": 1.4428673532931833, "learning_rate": 8.825148325524188e-06, "loss": 0.4778, "step": 9606 }, { "epoch": 0.9015578078078078, "grad_norm": 1.037284251652723, "learning_rate": 8.824796703873845e-06, "loss": 0.4307, "step": 9607 }, { "epoch": 0.9016516516516516, "grad_norm": 1.1260188396781647, "learning_rate": 8.82444503662e-06, "loss": 0.4406, "step": 9608 }, { "epoch": 0.9017454954954955, "grad_norm": 0.9630711750392968, "learning_rate": 8.82409332376685e-06, "loss": 0.4861, "step": 9609 }, { "epoch": 0.9018393393393394, "grad_norm": 0.8791187222346258, "learning_rate": 8.823741565318582e-06, "loss": 0.481, "step": 9610 }, { "epoch": 0.9019331831831832, "grad_norm": 1.0973609440806207, "learning_rate": 8.823389761279394e-06, "loss": 0.4594, "step": 9611 }, { "epoch": 0.902027027027027, "grad_norm": 0.8630919674816108, "learning_rate": 8.82303791165348e-06, "loss": 0.4367, "step": 9612 }, { "epoch": 0.9021208708708709, "grad_norm": 0.9701096110397661, "learning_rate": 8.822686016445036e-06, "loss": 0.4847, "step": 9613 }, { "epoch": 0.9022147147147147, "grad_norm": 2.576086040512115, "learning_rate": 8.822334075658255e-06, "loss": 0.4872, "step": 9614 }, { "epoch": 0.9023085585585585, "grad_norm": 1.4139498792950966, "learning_rate": 8.821982089297336e-06, "loss": 0.451, "step": 9615 }, { "epoch": 0.9024024024024024, "grad_norm": 1.0371137078561883, "learning_rate": 8.821630057366474e-06, "loss": 0.4926, "step": 9616 }, { "epoch": 0.9024962462462462, "grad_norm": 0.8708901138142681, "learning_rate": 8.821277979869869e-06, "loss": 0.4638, "step": 9617 }, { "epoch": 0.9025900900900901, "grad_norm": 0.9728936039758798, "learning_rate": 8.820925856811715e-06, "loss": 0.4425, "step": 9618 }, { "epoch": 0.902683933933934, "grad_norm": 4.475947268346217, "learning_rate": 8.820573688196212e-06, "loss": 0.4654, "step": 9619 }, { "epoch": 0.9027777777777778, "grad_norm": 1.0027255485539828, "learning_rate": 8.820221474027562e-06, "loss": 0.4303, "step": 9620 }, { "epoch": 0.9028716216216216, "grad_norm": 1.2593009533957025, "learning_rate": 8.81986921430996e-06, "loss": 0.4694, "step": 9621 }, { "epoch": 0.9029654654654654, "grad_norm": 1.2649526010785028, "learning_rate": 8.819516909047607e-06, "loss": 0.444, "step": 9622 }, { "epoch": 0.9030593093093093, "grad_norm": 2.4267959566712825, "learning_rate": 8.819164558244704e-06, "loss": 0.3928, "step": 9623 }, { "epoch": 0.9031531531531531, "grad_norm": 0.9513708645546859, "learning_rate": 8.818812161905454e-06, "loss": 0.4693, "step": 9624 }, { "epoch": 0.903246996996997, "grad_norm": 0.9564185018298881, "learning_rate": 8.818459720034057e-06, "loss": 0.4874, "step": 9625 }, { "epoch": 0.9033408408408409, "grad_norm": 1.0402769429895122, "learning_rate": 8.818107232634714e-06, "loss": 0.4879, "step": 9626 }, { "epoch": 0.9034346846846847, "grad_norm": 0.9216900715928634, "learning_rate": 8.817754699711632e-06, "loss": 0.3973, "step": 9627 }, { "epoch": 0.9035285285285285, "grad_norm": 0.9582386486263709, "learning_rate": 8.81740212126901e-06, "loss": 0.4461, "step": 9628 }, { "epoch": 0.9036223723723724, "grad_norm": 0.9906602325180993, "learning_rate": 8.81704949731105e-06, "loss": 0.4757, "step": 9629 }, { "epoch": 0.9037162162162162, "grad_norm": 0.9146619127100015, "learning_rate": 8.816696827841965e-06, "loss": 0.4433, "step": 9630 }, { "epoch": 0.90381006006006, "grad_norm": 0.8948098413863254, "learning_rate": 8.81634411286595e-06, "loss": 0.425, "step": 9631 }, { "epoch": 0.9039039039039038, "grad_norm": 1.0063518320075853, "learning_rate": 8.815991352387216e-06, "loss": 0.4717, "step": 9632 }, { "epoch": 0.9039977477477478, "grad_norm": 0.9149112313274914, "learning_rate": 8.81563854640997e-06, "loss": 0.4286, "step": 9633 }, { "epoch": 0.9040915915915916, "grad_norm": 1.0208911736595765, "learning_rate": 8.815285694938416e-06, "loss": 0.4726, "step": 9634 }, { "epoch": 0.9041854354354354, "grad_norm": 1.0210107899932643, "learning_rate": 8.814932797976761e-06, "loss": 0.4571, "step": 9635 }, { "epoch": 0.9042792792792793, "grad_norm": 1.7834180765985341, "learning_rate": 8.814579855529212e-06, "loss": 0.4852, "step": 9636 }, { "epoch": 0.9043731231231231, "grad_norm": 1.3058845027510575, "learning_rate": 8.81422686759998e-06, "loss": 0.4393, "step": 9637 }, { "epoch": 0.9044669669669669, "grad_norm": 9.600501815256996, "learning_rate": 8.813873834193271e-06, "loss": 0.5407, "step": 9638 }, { "epoch": 0.9045608108108109, "grad_norm": 0.8839662733861847, "learning_rate": 8.813520755313297e-06, "loss": 0.4558, "step": 9639 }, { "epoch": 0.9046546546546547, "grad_norm": 2.2596985405063514, "learning_rate": 8.813167630964265e-06, "loss": 0.4431, "step": 9640 }, { "epoch": 0.9047484984984985, "grad_norm": 1.4786544100508878, "learning_rate": 8.812814461150388e-06, "loss": 0.4669, "step": 9641 }, { "epoch": 0.9048423423423423, "grad_norm": 0.894457939481283, "learning_rate": 8.812461245875874e-06, "loss": 0.456, "step": 9642 }, { "epoch": 0.9049361861861862, "grad_norm": 0.9743481415051941, "learning_rate": 8.812107985144936e-06, "loss": 0.4409, "step": 9643 }, { "epoch": 0.90503003003003, "grad_norm": 0.8905597448392644, "learning_rate": 8.811754678961787e-06, "loss": 0.4635, "step": 9644 }, { "epoch": 0.9051238738738738, "grad_norm": 0.9144164350798141, "learning_rate": 8.811401327330637e-06, "loss": 0.4875, "step": 9645 }, { "epoch": 0.9052177177177178, "grad_norm": 1.014773867796215, "learning_rate": 8.8110479302557e-06, "loss": 0.4375, "step": 9646 }, { "epoch": 0.9053115615615616, "grad_norm": 0.9170612567175341, "learning_rate": 8.810694487741192e-06, "loss": 0.4535, "step": 9647 }, { "epoch": 0.9054054054054054, "grad_norm": 0.9560761534915798, "learning_rate": 8.810340999791323e-06, "loss": 0.4473, "step": 9648 }, { "epoch": 0.9054992492492493, "grad_norm": 0.9510181011033566, "learning_rate": 8.809987466410312e-06, "loss": 0.5023, "step": 9649 }, { "epoch": 0.9055930930930931, "grad_norm": 1.1507762518733582, "learning_rate": 8.80963388760237e-06, "loss": 0.486, "step": 9650 }, { "epoch": 0.9056869369369369, "grad_norm": 1.1089274551273132, "learning_rate": 8.809280263371716e-06, "loss": 0.4293, "step": 9651 }, { "epoch": 0.9057807807807807, "grad_norm": 1.0546460891074998, "learning_rate": 8.808926593722565e-06, "loss": 0.495, "step": 9652 }, { "epoch": 0.9058746246246246, "grad_norm": 0.9085279032378143, "learning_rate": 8.808572878659132e-06, "loss": 0.4325, "step": 9653 }, { "epoch": 0.9059684684684685, "grad_norm": 1.0888414576205043, "learning_rate": 8.80821911818564e-06, "loss": 0.5284, "step": 9654 }, { "epoch": 0.9060623123123123, "grad_norm": 1.8534218433091674, "learning_rate": 8.8078653123063e-06, "loss": 0.5017, "step": 9655 }, { "epoch": 0.9061561561561562, "grad_norm": 1.0741986736648612, "learning_rate": 8.807511461025336e-06, "loss": 0.5097, "step": 9656 }, { "epoch": 0.90625, "grad_norm": 0.9825088014629839, "learning_rate": 8.807157564346962e-06, "loss": 0.4428, "step": 9657 }, { "epoch": 0.9063438438438438, "grad_norm": 1.079382074462816, "learning_rate": 8.8068036222754e-06, "loss": 0.4767, "step": 9658 }, { "epoch": 0.9064376876876877, "grad_norm": 1.1240314125587352, "learning_rate": 8.806449634814875e-06, "loss": 0.4186, "step": 9659 }, { "epoch": 0.9065315315315315, "grad_norm": 0.8851137629411965, "learning_rate": 8.8060956019696e-06, "loss": 0.42, "step": 9660 }, { "epoch": 0.9066253753753754, "grad_norm": 1.0269042706670524, "learning_rate": 8.8057415237438e-06, "loss": 0.4303, "step": 9661 }, { "epoch": 0.9067192192192193, "grad_norm": 1.0152675622756737, "learning_rate": 8.805387400141695e-06, "loss": 0.4955, "step": 9662 }, { "epoch": 0.9068130630630631, "grad_norm": 4.42471603402002, "learning_rate": 8.805033231167509e-06, "loss": 0.5282, "step": 9663 }, { "epoch": 0.9069069069069069, "grad_norm": 0.9707572394995323, "learning_rate": 8.804679016825464e-06, "loss": 0.529, "step": 9664 }, { "epoch": 0.9070007507507507, "grad_norm": 1.4238151833151744, "learning_rate": 8.804324757119782e-06, "loss": 0.4354, "step": 9665 }, { "epoch": 0.9070945945945946, "grad_norm": 0.9363392791227195, "learning_rate": 8.80397045205469e-06, "loss": 0.461, "step": 9666 }, { "epoch": 0.9071884384384384, "grad_norm": 1.2487347812871918, "learning_rate": 8.803616101634408e-06, "loss": 0.4628, "step": 9667 }, { "epoch": 0.9072822822822822, "grad_norm": 0.954358367579643, "learning_rate": 8.803261705863166e-06, "loss": 0.4623, "step": 9668 }, { "epoch": 0.9073761261261262, "grad_norm": 1.0265359458964005, "learning_rate": 8.802907264745187e-06, "loss": 0.4371, "step": 9669 }, { "epoch": 0.90746996996997, "grad_norm": 1.0528676795934246, "learning_rate": 8.802552778284694e-06, "loss": 0.4854, "step": 9670 }, { "epoch": 0.9075638138138138, "grad_norm": 1.0575107304251812, "learning_rate": 8.80219824648592e-06, "loss": 0.4166, "step": 9671 }, { "epoch": 0.9076576576576577, "grad_norm": 1.0295098042116138, "learning_rate": 8.801843669353087e-06, "loss": 0.5386, "step": 9672 }, { "epoch": 0.9077515015015015, "grad_norm": 1.0664479185401319, "learning_rate": 8.801489046890426e-06, "loss": 0.4776, "step": 9673 }, { "epoch": 0.9078453453453453, "grad_norm": 0.9552346692419786, "learning_rate": 8.801134379102163e-06, "loss": 0.4579, "step": 9674 }, { "epoch": 0.9079391891891891, "grad_norm": 1.4391695646305158, "learning_rate": 8.800779665992528e-06, "loss": 0.4582, "step": 9675 }, { "epoch": 0.9080330330330331, "grad_norm": 0.9573575127411342, "learning_rate": 8.800424907565747e-06, "loss": 0.4796, "step": 9676 }, { "epoch": 0.9081268768768769, "grad_norm": 1.0700930074217914, "learning_rate": 8.800070103826056e-06, "loss": 0.455, "step": 9677 }, { "epoch": 0.9082207207207207, "grad_norm": 0.9757950562841791, "learning_rate": 8.79971525477768e-06, "loss": 0.4143, "step": 9678 }, { "epoch": 0.9083145645645646, "grad_norm": 1.1273061213500677, "learning_rate": 8.799360360424852e-06, "loss": 0.4739, "step": 9679 }, { "epoch": 0.9084084084084084, "grad_norm": 1.2719908902409753, "learning_rate": 8.799005420771804e-06, "loss": 0.5042, "step": 9680 }, { "epoch": 0.9085022522522522, "grad_norm": 0.9029896571379091, "learning_rate": 8.798650435822767e-06, "loss": 0.4895, "step": 9681 }, { "epoch": 0.9085960960960962, "grad_norm": 0.8745199239413254, "learning_rate": 8.798295405581974e-06, "loss": 0.4695, "step": 9682 }, { "epoch": 0.90868993993994, "grad_norm": 1.1886140723524874, "learning_rate": 8.797940330053657e-06, "loss": 0.481, "step": 9683 }, { "epoch": 0.9087837837837838, "grad_norm": 1.136228863299578, "learning_rate": 8.797585209242052e-06, "loss": 0.4629, "step": 9684 }, { "epoch": 0.9088776276276276, "grad_norm": 1.0464796656738176, "learning_rate": 8.79723004315139e-06, "loss": 0.5142, "step": 9685 }, { "epoch": 0.9089714714714715, "grad_norm": 0.9078404963222739, "learning_rate": 8.796874831785908e-06, "loss": 0.4278, "step": 9686 }, { "epoch": 0.9090653153153153, "grad_norm": 1.0058606103225605, "learning_rate": 8.79651957514984e-06, "loss": 0.467, "step": 9687 }, { "epoch": 0.9091591591591591, "grad_norm": 0.9676720951150766, "learning_rate": 8.796164273247423e-06, "loss": 0.4497, "step": 9688 }, { "epoch": 0.909253003003003, "grad_norm": 1.001491850900156, "learning_rate": 8.795808926082892e-06, "loss": 0.4238, "step": 9689 }, { "epoch": 0.9093468468468469, "grad_norm": 1.0639009281505118, "learning_rate": 8.795453533660485e-06, "loss": 0.4787, "step": 9690 }, { "epoch": 0.9094406906906907, "grad_norm": 0.9432176348433913, "learning_rate": 8.795098095984438e-06, "loss": 0.4592, "step": 9691 }, { "epoch": 0.9095345345345346, "grad_norm": 1.1602338596842634, "learning_rate": 8.79474261305899e-06, "loss": 0.4791, "step": 9692 }, { "epoch": 0.9096283783783784, "grad_norm": 0.9736128421839937, "learning_rate": 8.79438708488838e-06, "loss": 0.4374, "step": 9693 }, { "epoch": 0.9097222222222222, "grad_norm": 0.8105716013457588, "learning_rate": 8.794031511476846e-06, "loss": 0.4271, "step": 9694 }, { "epoch": 0.909816066066066, "grad_norm": 2.2473323586401044, "learning_rate": 8.793675892828627e-06, "loss": 0.4374, "step": 9695 }, { "epoch": 0.9099099099099099, "grad_norm": 0.923678150450192, "learning_rate": 8.793320228947963e-06, "loss": 0.462, "step": 9696 }, { "epoch": 0.9100037537537538, "grad_norm": 1.1255823019878586, "learning_rate": 8.792964519839097e-06, "loss": 0.4655, "step": 9697 }, { "epoch": 0.9100975975975976, "grad_norm": 1.3237003414318929, "learning_rate": 8.79260876550627e-06, "loss": 0.4969, "step": 9698 }, { "epoch": 0.9101914414414415, "grad_norm": 0.8658844621153733, "learning_rate": 8.79225296595372e-06, "loss": 0.4123, "step": 9699 }, { "epoch": 0.9102852852852853, "grad_norm": 1.1317616700653401, "learning_rate": 8.791897121185691e-06, "loss": 0.4652, "step": 9700 }, { "epoch": 0.9103791291291291, "grad_norm": 1.056314235016024, "learning_rate": 8.79154123120643e-06, "loss": 0.4018, "step": 9701 }, { "epoch": 0.910472972972973, "grad_norm": 3.288388041606913, "learning_rate": 8.791185296020172e-06, "loss": 0.4803, "step": 9702 }, { "epoch": 0.9105668168168168, "grad_norm": 1.011510675111051, "learning_rate": 8.790829315631168e-06, "loss": 0.5263, "step": 9703 }, { "epoch": 0.9106606606606606, "grad_norm": 0.9507867965653253, "learning_rate": 8.790473290043659e-06, "loss": 0.4553, "step": 9704 }, { "epoch": 0.9107545045045045, "grad_norm": 0.8487788988938372, "learning_rate": 8.790117219261892e-06, "loss": 0.4149, "step": 9705 }, { "epoch": 0.9108483483483484, "grad_norm": 0.8453054997023189, "learning_rate": 8.789761103290111e-06, "loss": 0.4528, "step": 9706 }, { "epoch": 0.9109421921921922, "grad_norm": 0.9127377217760155, "learning_rate": 8.789404942132562e-06, "loss": 0.4744, "step": 9707 }, { "epoch": 0.911036036036036, "grad_norm": 0.8963026811719009, "learning_rate": 8.789048735793492e-06, "loss": 0.4621, "step": 9708 }, { "epoch": 0.9111298798798799, "grad_norm": 1.7462841845680401, "learning_rate": 8.788692484277148e-06, "loss": 0.4795, "step": 9709 }, { "epoch": 0.9112237237237237, "grad_norm": 0.9032612305526463, "learning_rate": 8.788336187587778e-06, "loss": 0.4697, "step": 9710 }, { "epoch": 0.9113175675675675, "grad_norm": 1.4081006061469514, "learning_rate": 8.78797984572963e-06, "loss": 0.4909, "step": 9711 }, { "epoch": 0.9114114114114115, "grad_norm": 0.9314881587387014, "learning_rate": 8.78762345870695e-06, "loss": 0.4849, "step": 9712 }, { "epoch": 0.9115052552552553, "grad_norm": 5.71204609712549, "learning_rate": 8.787267026523994e-06, "loss": 0.5133, "step": 9713 }, { "epoch": 0.9115990990990991, "grad_norm": 2.5310342024452215, "learning_rate": 8.786910549185004e-06, "loss": 0.4793, "step": 9714 }, { "epoch": 0.9116929429429429, "grad_norm": 0.93511711210001, "learning_rate": 8.786554026694234e-06, "loss": 0.4024, "step": 9715 }, { "epoch": 0.9117867867867868, "grad_norm": 1.6443380008845871, "learning_rate": 8.786197459055936e-06, "loss": 0.4613, "step": 9716 }, { "epoch": 0.9118806306306306, "grad_norm": 0.9211155972419239, "learning_rate": 8.78584084627436e-06, "loss": 0.4822, "step": 9717 }, { "epoch": 0.9119744744744744, "grad_norm": 0.9652560973509272, "learning_rate": 8.785484188353758e-06, "loss": 0.4848, "step": 9718 }, { "epoch": 0.9120683183183184, "grad_norm": 0.9813274618993924, "learning_rate": 8.785127485298381e-06, "loss": 0.472, "step": 9719 }, { "epoch": 0.9121621621621622, "grad_norm": 1.0890703630784888, "learning_rate": 8.784770737112486e-06, "loss": 0.4806, "step": 9720 }, { "epoch": 0.912256006006006, "grad_norm": 1.2893451078873166, "learning_rate": 8.784413943800323e-06, "loss": 0.4589, "step": 9721 }, { "epoch": 0.9123498498498499, "grad_norm": 0.9360116932317215, "learning_rate": 8.784057105366145e-06, "loss": 0.4971, "step": 9722 }, { "epoch": 0.9124436936936937, "grad_norm": 1.1074800932091142, "learning_rate": 8.783700221814212e-06, "loss": 0.4698, "step": 9723 }, { "epoch": 0.9125375375375375, "grad_norm": 1.0879699937179381, "learning_rate": 8.783343293148774e-06, "loss": 0.4655, "step": 9724 }, { "epoch": 0.9126313813813813, "grad_norm": 0.9029358746036418, "learning_rate": 8.78298631937409e-06, "loss": 0.4462, "step": 9725 }, { "epoch": 0.9127252252252253, "grad_norm": 1.0388270864203104, "learning_rate": 8.782629300494414e-06, "loss": 0.4486, "step": 9726 }, { "epoch": 0.9128190690690691, "grad_norm": 0.9624265810541165, "learning_rate": 8.782272236514005e-06, "loss": 0.4959, "step": 9727 }, { "epoch": 0.9129129129129129, "grad_norm": 0.9755221659169053, "learning_rate": 8.781915127437117e-06, "loss": 0.4245, "step": 9728 }, { "epoch": 0.9130067567567568, "grad_norm": 1.0029349941393, "learning_rate": 8.78155797326801e-06, "loss": 0.4866, "step": 9729 }, { "epoch": 0.9131006006006006, "grad_norm": 0.9762428876990533, "learning_rate": 8.781200774010943e-06, "loss": 0.464, "step": 9730 }, { "epoch": 0.9131944444444444, "grad_norm": 0.9302238443901468, "learning_rate": 8.780843529670173e-06, "loss": 0.485, "step": 9731 }, { "epoch": 0.9132882882882883, "grad_norm": 0.9240365824986844, "learning_rate": 8.780486240249962e-06, "loss": 0.4719, "step": 9732 }, { "epoch": 0.9133821321321322, "grad_norm": 1.1269942220941684, "learning_rate": 8.780128905754568e-06, "loss": 0.4823, "step": 9733 }, { "epoch": 0.913475975975976, "grad_norm": 0.9968219136580215, "learning_rate": 8.779771526188254e-06, "loss": 0.4156, "step": 9734 }, { "epoch": 0.9135698198198198, "grad_norm": 0.9249486807195354, "learning_rate": 8.779414101555277e-06, "loss": 0.3983, "step": 9735 }, { "epoch": 0.9136636636636637, "grad_norm": 0.9129602734951158, "learning_rate": 8.7790566318599e-06, "loss": 0.4906, "step": 9736 }, { "epoch": 0.9137575075075075, "grad_norm": 1.300073913406836, "learning_rate": 8.778699117106388e-06, "loss": 0.5485, "step": 9737 }, { "epoch": 0.9138513513513513, "grad_norm": 1.0757801240493265, "learning_rate": 8.778341557299001e-06, "loss": 0.4265, "step": 9738 }, { "epoch": 0.9139451951951952, "grad_norm": 1.4279613387562082, "learning_rate": 8.777983952442005e-06, "loss": 0.5038, "step": 9739 }, { "epoch": 0.914039039039039, "grad_norm": 0.9618724183656452, "learning_rate": 8.777626302539659e-06, "loss": 0.4156, "step": 9740 }, { "epoch": 0.9141328828828829, "grad_norm": 1.0061887546503279, "learning_rate": 8.77726860759623e-06, "loss": 0.4949, "step": 9741 }, { "epoch": 0.9142267267267268, "grad_norm": 1.2823170021610493, "learning_rate": 8.776910867615985e-06, "loss": 0.5054, "step": 9742 }, { "epoch": 0.9143205705705706, "grad_norm": 1.1483491232692378, "learning_rate": 8.776553082603185e-06, "loss": 0.4761, "step": 9743 }, { "epoch": 0.9144144144144144, "grad_norm": 0.8982718004523472, "learning_rate": 8.776195252562098e-06, "loss": 0.4923, "step": 9744 }, { "epoch": 0.9145082582582582, "grad_norm": 1.1404313009236668, "learning_rate": 8.775837377496992e-06, "loss": 0.4832, "step": 9745 }, { "epoch": 0.9146021021021021, "grad_norm": 1.0854079018885074, "learning_rate": 8.775479457412132e-06, "loss": 0.4683, "step": 9746 }, { "epoch": 0.9146959459459459, "grad_norm": 3.0931377983051505, "learning_rate": 8.775121492311787e-06, "loss": 0.4727, "step": 9747 }, { "epoch": 0.9147897897897898, "grad_norm": 1.2134310719135146, "learning_rate": 8.774763482200224e-06, "loss": 0.4741, "step": 9748 }, { "epoch": 0.9148836336336337, "grad_norm": 1.0196679956445858, "learning_rate": 8.774405427081709e-06, "loss": 0.4574, "step": 9749 }, { "epoch": 0.9149774774774775, "grad_norm": 1.8170575204871913, "learning_rate": 8.774047326960518e-06, "loss": 0.4531, "step": 9750 }, { "epoch": 0.9150713213213213, "grad_norm": 1.1784920033125084, "learning_rate": 8.773689181840913e-06, "loss": 0.437, "step": 9751 }, { "epoch": 0.9151651651651652, "grad_norm": 0.9343859259153022, "learning_rate": 8.773330991727169e-06, "loss": 0.4471, "step": 9752 }, { "epoch": 0.915259009009009, "grad_norm": 1.1411019607680428, "learning_rate": 8.772972756623556e-06, "loss": 0.4752, "step": 9753 }, { "epoch": 0.9153528528528528, "grad_norm": 0.9348973699196302, "learning_rate": 8.772614476534344e-06, "loss": 0.4133, "step": 9754 }, { "epoch": 0.9154466966966966, "grad_norm": 1.010902125288608, "learning_rate": 8.772256151463808e-06, "loss": 0.4149, "step": 9755 }, { "epoch": 0.9155405405405406, "grad_norm": 0.9962563299733023, "learning_rate": 8.771897781416213e-06, "loss": 0.4741, "step": 9756 }, { "epoch": 0.9156343843843844, "grad_norm": 1.042568894463898, "learning_rate": 8.771539366395842e-06, "loss": 0.5205, "step": 9757 }, { "epoch": 0.9157282282282282, "grad_norm": 1.010953833879944, "learning_rate": 8.77118090640696e-06, "loss": 0.4698, "step": 9758 }, { "epoch": 0.9158220720720721, "grad_norm": 1.335243624213916, "learning_rate": 8.770822401453846e-06, "loss": 0.4679, "step": 9759 }, { "epoch": 0.9159159159159159, "grad_norm": 1.1349817820124242, "learning_rate": 8.77046385154077e-06, "loss": 0.4921, "step": 9760 }, { "epoch": 0.9160097597597597, "grad_norm": 0.9392527854794636, "learning_rate": 8.770105256672013e-06, "loss": 0.5032, "step": 9761 }, { "epoch": 0.9161036036036037, "grad_norm": 1.04740148872561, "learning_rate": 8.769746616851846e-06, "loss": 0.5084, "step": 9762 }, { "epoch": 0.9161974474474475, "grad_norm": 1.0944101810751654, "learning_rate": 8.769387932084546e-06, "loss": 0.4778, "step": 9763 }, { "epoch": 0.9162912912912913, "grad_norm": 0.9945761848137598, "learning_rate": 8.76902920237439e-06, "loss": 0.4596, "step": 9764 }, { "epoch": 0.9163851351351351, "grad_norm": 1.0536823815939678, "learning_rate": 8.768670427725656e-06, "loss": 0.5037, "step": 9765 }, { "epoch": 0.916478978978979, "grad_norm": 1.0457060710764967, "learning_rate": 8.768311608142621e-06, "loss": 0.4817, "step": 9766 }, { "epoch": 0.9165728228228228, "grad_norm": 1.015301292654729, "learning_rate": 8.767952743629562e-06, "loss": 0.4538, "step": 9767 }, { "epoch": 0.9166666666666666, "grad_norm": 0.8615782255579896, "learning_rate": 8.76759383419076e-06, "loss": 0.4071, "step": 9768 }, { "epoch": 0.9167605105105106, "grad_norm": 1.0183645590385066, "learning_rate": 8.767234879830494e-06, "loss": 0.4602, "step": 9769 }, { "epoch": 0.9168543543543544, "grad_norm": 1.8648784693141305, "learning_rate": 8.766875880553044e-06, "loss": 0.4184, "step": 9770 }, { "epoch": 0.9169481981981982, "grad_norm": 0.9266128394344485, "learning_rate": 8.766516836362685e-06, "loss": 0.5089, "step": 9771 }, { "epoch": 0.9170420420420421, "grad_norm": 1.0307790157064305, "learning_rate": 8.766157747263706e-06, "loss": 0.471, "step": 9772 }, { "epoch": 0.9171358858858859, "grad_norm": 0.8391261770707377, "learning_rate": 8.765798613260385e-06, "loss": 0.4235, "step": 9773 }, { "epoch": 0.9172297297297297, "grad_norm": 1.099684533110511, "learning_rate": 8.765439434357002e-06, "loss": 0.4757, "step": 9774 }, { "epoch": 0.9173235735735735, "grad_norm": 1.0489715295765574, "learning_rate": 8.765080210557844e-06, "loss": 0.4397, "step": 9775 }, { "epoch": 0.9174174174174174, "grad_norm": 1.0665895659454505, "learning_rate": 8.764720941867189e-06, "loss": 0.472, "step": 9776 }, { "epoch": 0.9175112612612613, "grad_norm": 0.9288765226276139, "learning_rate": 8.764361628289323e-06, "loss": 0.4025, "step": 9777 }, { "epoch": 0.9176051051051051, "grad_norm": 1.0476911265256903, "learning_rate": 8.764002269828534e-06, "loss": 0.4657, "step": 9778 }, { "epoch": 0.917698948948949, "grad_norm": 0.9501422026483284, "learning_rate": 8.7636428664891e-06, "loss": 0.5101, "step": 9779 }, { "epoch": 0.9177927927927928, "grad_norm": 1.0186937181928586, "learning_rate": 8.763283418275308e-06, "loss": 0.5291, "step": 9780 }, { "epoch": 0.9178866366366366, "grad_norm": 1.0253247948746362, "learning_rate": 8.762923925191447e-06, "loss": 0.4979, "step": 9781 }, { "epoch": 0.9179804804804805, "grad_norm": 0.9371677426793391, "learning_rate": 8.762564387241801e-06, "loss": 0.4599, "step": 9782 }, { "epoch": 0.9180743243243243, "grad_norm": 3.0899242406559395, "learning_rate": 8.762204804430656e-06, "loss": 0.4694, "step": 9783 }, { "epoch": 0.9181681681681682, "grad_norm": 1.084466357872455, "learning_rate": 8.761845176762301e-06, "loss": 0.4323, "step": 9784 }, { "epoch": 0.918262012012012, "grad_norm": 0.9843416124303495, "learning_rate": 8.761485504241022e-06, "loss": 0.4926, "step": 9785 }, { "epoch": 0.9183558558558559, "grad_norm": 0.9349203676332037, "learning_rate": 8.76112578687111e-06, "loss": 0.466, "step": 9786 }, { "epoch": 0.9184496996996997, "grad_norm": 0.9715654934664647, "learning_rate": 8.760766024656854e-06, "loss": 0.4961, "step": 9787 }, { "epoch": 0.9185435435435435, "grad_norm": 0.9879471718415667, "learning_rate": 8.76040621760254e-06, "loss": 0.4667, "step": 9788 }, { "epoch": 0.9186373873873874, "grad_norm": 1.1159482982368936, "learning_rate": 8.76004636571246e-06, "loss": 0.4272, "step": 9789 }, { "epoch": 0.9187312312312312, "grad_norm": 0.9485571005837401, "learning_rate": 8.759686468990906e-06, "loss": 0.4278, "step": 9790 }, { "epoch": 0.918825075075075, "grad_norm": 1.0752101938617313, "learning_rate": 8.759326527442167e-06, "loss": 0.4626, "step": 9791 }, { "epoch": 0.918918918918919, "grad_norm": 1.1808172022420027, "learning_rate": 8.758966541070534e-06, "loss": 0.5229, "step": 9792 }, { "epoch": 0.9190127627627628, "grad_norm": 0.992470730806048, "learning_rate": 8.758606509880303e-06, "loss": 0.4621, "step": 9793 }, { "epoch": 0.9191066066066066, "grad_norm": 6.465144757378264, "learning_rate": 8.758246433875762e-06, "loss": 0.4628, "step": 9794 }, { "epoch": 0.9192004504504504, "grad_norm": 0.9733483015184355, "learning_rate": 8.757886313061209e-06, "loss": 0.4585, "step": 9795 }, { "epoch": 0.9192942942942943, "grad_norm": 1.333407564706814, "learning_rate": 8.757526147440933e-06, "loss": 0.494, "step": 9796 }, { "epoch": 0.9193881381381381, "grad_norm": 0.8940799072514788, "learning_rate": 8.757165937019231e-06, "loss": 0.4745, "step": 9797 }, { "epoch": 0.9194819819819819, "grad_norm": 0.9161229752531449, "learning_rate": 8.756805681800398e-06, "loss": 0.4482, "step": 9798 }, { "epoch": 0.9195758258258259, "grad_norm": 0.9615677859592704, "learning_rate": 8.756445381788728e-06, "loss": 0.5053, "step": 9799 }, { "epoch": 0.9196696696696697, "grad_norm": 0.9919254083120529, "learning_rate": 8.756085036988517e-06, "loss": 0.4163, "step": 9800 }, { "epoch": 0.9197635135135135, "grad_norm": 0.9747400387145254, "learning_rate": 8.755724647404062e-06, "loss": 0.4599, "step": 9801 }, { "epoch": 0.9198573573573574, "grad_norm": 1.0215608212441654, "learning_rate": 8.755364213039661e-06, "loss": 0.4667, "step": 9802 }, { "epoch": 0.9199512012012012, "grad_norm": 0.9869253634035916, "learning_rate": 8.755003733899612e-06, "loss": 0.4755, "step": 9803 }, { "epoch": 0.920045045045045, "grad_norm": 0.9569103645934364, "learning_rate": 8.754643209988209e-06, "loss": 0.4588, "step": 9804 }, { "epoch": 0.9201388888888888, "grad_norm": 1.45411234524004, "learning_rate": 8.754282641309753e-06, "loss": 0.48, "step": 9805 }, { "epoch": 0.9202327327327328, "grad_norm": 0.8672170343339717, "learning_rate": 8.753922027868546e-06, "loss": 0.4414, "step": 9806 }, { "epoch": 0.9203265765765766, "grad_norm": 1.0628289746711332, "learning_rate": 8.753561369668882e-06, "loss": 0.4274, "step": 9807 }, { "epoch": 0.9204204204204204, "grad_norm": 1.1209813323410238, "learning_rate": 8.753200666715065e-06, "loss": 0.4484, "step": 9808 }, { "epoch": 0.9205142642642643, "grad_norm": 0.9604862887523449, "learning_rate": 8.752839919011395e-06, "loss": 0.4781, "step": 9809 }, { "epoch": 0.9206081081081081, "grad_norm": 1.1479514061673421, "learning_rate": 8.752479126562173e-06, "loss": 0.4507, "step": 9810 }, { "epoch": 0.9207019519519519, "grad_norm": 1.1886918268796156, "learning_rate": 8.752118289371701e-06, "loss": 0.4942, "step": 9811 }, { "epoch": 0.9207957957957958, "grad_norm": 0.9626343476033509, "learning_rate": 8.75175740744428e-06, "loss": 0.5445, "step": 9812 }, { "epoch": 0.9208896396396397, "grad_norm": 0.9644321702020349, "learning_rate": 8.751396480784215e-06, "loss": 0.4279, "step": 9813 }, { "epoch": 0.9209834834834835, "grad_norm": 1.1746748014593376, "learning_rate": 8.751035509395806e-06, "loss": 0.4464, "step": 9814 }, { "epoch": 0.9210773273273273, "grad_norm": 1.0297494343068905, "learning_rate": 8.750674493283362e-06, "loss": 0.4599, "step": 9815 }, { "epoch": 0.9211711711711712, "grad_norm": 1.0974590898465928, "learning_rate": 8.750313432451183e-06, "loss": 0.4493, "step": 9816 }, { "epoch": 0.921265015015015, "grad_norm": 1.0257799265556287, "learning_rate": 8.749952326903577e-06, "loss": 0.4764, "step": 9817 }, { "epoch": 0.9213588588588588, "grad_norm": 1.050599578703018, "learning_rate": 8.749591176644847e-06, "loss": 0.4628, "step": 9818 }, { "epoch": 0.9214527027027027, "grad_norm": 0.9987018333886255, "learning_rate": 8.749229981679299e-06, "loss": 0.497, "step": 9819 }, { "epoch": 0.9215465465465466, "grad_norm": 0.937796862776611, "learning_rate": 8.748868742011242e-06, "loss": 0.4472, "step": 9820 }, { "epoch": 0.9216403903903904, "grad_norm": 1.2487839908479783, "learning_rate": 8.748507457644981e-06, "loss": 0.4705, "step": 9821 }, { "epoch": 0.9217342342342343, "grad_norm": 0.9953198747032297, "learning_rate": 8.748146128584825e-06, "loss": 0.4821, "step": 9822 }, { "epoch": 0.9218280780780781, "grad_norm": 1.5369231999302388, "learning_rate": 8.747784754835082e-06, "loss": 0.4573, "step": 9823 }, { "epoch": 0.9219219219219219, "grad_norm": 0.9572833985425399, "learning_rate": 8.74742333640006e-06, "loss": 0.4501, "step": 9824 }, { "epoch": 0.9220157657657657, "grad_norm": 0.8934859021238767, "learning_rate": 8.747061873284068e-06, "loss": 0.4119, "step": 9825 }, { "epoch": 0.9221096096096096, "grad_norm": 1.6326352277857943, "learning_rate": 8.746700365491415e-06, "loss": 0.4762, "step": 9826 }, { "epoch": 0.9222034534534534, "grad_norm": 1.1825222753158422, "learning_rate": 8.746338813026416e-06, "loss": 0.4643, "step": 9827 }, { "epoch": 0.9222972972972973, "grad_norm": 1.0207964968875072, "learning_rate": 8.745977215893376e-06, "loss": 0.524, "step": 9828 }, { "epoch": 0.9223911411411412, "grad_norm": 0.9318191838826418, "learning_rate": 8.74561557409661e-06, "loss": 0.4833, "step": 9829 }, { "epoch": 0.922484984984985, "grad_norm": 1.0526458278505244, "learning_rate": 8.745253887640427e-06, "loss": 0.4408, "step": 9830 }, { "epoch": 0.9225788288288288, "grad_norm": 0.9343671344090464, "learning_rate": 8.744892156529144e-06, "loss": 0.4701, "step": 9831 }, { "epoch": 0.9226726726726727, "grad_norm": 1.0075730648574177, "learning_rate": 8.74453038076707e-06, "loss": 0.4283, "step": 9832 }, { "epoch": 0.9227665165165165, "grad_norm": 1.101899083054864, "learning_rate": 8.744168560358516e-06, "loss": 0.4356, "step": 9833 }, { "epoch": 0.9228603603603603, "grad_norm": 1.1039203652733047, "learning_rate": 8.743806695307804e-06, "loss": 0.4429, "step": 9834 }, { "epoch": 0.9229542042042042, "grad_norm": 1.0892839572862252, "learning_rate": 8.743444785619242e-06, "loss": 0.4646, "step": 9835 }, { "epoch": 0.9230480480480481, "grad_norm": 1.0720533917730222, "learning_rate": 8.743082831297147e-06, "loss": 0.4808, "step": 9836 }, { "epoch": 0.9231418918918919, "grad_norm": 0.8913393825479544, "learning_rate": 8.742720832345836e-06, "loss": 0.4526, "step": 9837 }, { "epoch": 0.9232357357357357, "grad_norm": 1.3240680757169083, "learning_rate": 8.742358788769625e-06, "loss": 0.455, "step": 9838 }, { "epoch": 0.9233295795795796, "grad_norm": 2.3165485474087775, "learning_rate": 8.741996700572829e-06, "loss": 0.4549, "step": 9839 }, { "epoch": 0.9234234234234234, "grad_norm": 0.8366303872279838, "learning_rate": 8.741634567759763e-06, "loss": 0.3795, "step": 9840 }, { "epoch": 0.9235172672672672, "grad_norm": 1.178523958331055, "learning_rate": 8.741272390334751e-06, "loss": 0.5137, "step": 9841 }, { "epoch": 0.9236111111111112, "grad_norm": 1.5871383467393905, "learning_rate": 8.740910168302106e-06, "loss": 0.4538, "step": 9842 }, { "epoch": 0.923704954954955, "grad_norm": 0.9732203232018917, "learning_rate": 8.74054790166615e-06, "loss": 0.4994, "step": 9843 }, { "epoch": 0.9237987987987988, "grad_norm": 1.174986730216854, "learning_rate": 8.740185590431201e-06, "loss": 0.5026, "step": 9844 }, { "epoch": 0.9238926426426426, "grad_norm": 0.8590187045112099, "learning_rate": 8.739823234601579e-06, "loss": 0.4157, "step": 9845 }, { "epoch": 0.9239864864864865, "grad_norm": 0.981508383428277, "learning_rate": 8.739460834181605e-06, "loss": 0.5296, "step": 9846 }, { "epoch": 0.9240803303303303, "grad_norm": 0.9489645168875825, "learning_rate": 8.739098389175598e-06, "loss": 0.4898, "step": 9847 }, { "epoch": 0.9241741741741741, "grad_norm": 0.9295320163751071, "learning_rate": 8.738735899587881e-06, "loss": 0.4385, "step": 9848 }, { "epoch": 0.9242680180180181, "grad_norm": 0.9692291116921306, "learning_rate": 8.738373365422777e-06, "loss": 0.4423, "step": 9849 }, { "epoch": 0.9243618618618619, "grad_norm": 0.9880938827226572, "learning_rate": 8.738010786684607e-06, "loss": 0.5035, "step": 9850 }, { "epoch": 0.9244557057057057, "grad_norm": 1.327148229615307, "learning_rate": 8.737648163377694e-06, "loss": 0.4576, "step": 9851 }, { "epoch": 0.9245495495495496, "grad_norm": 0.9047755984160046, "learning_rate": 8.737285495506362e-06, "loss": 0.4579, "step": 9852 }, { "epoch": 0.9246433933933934, "grad_norm": 1.1977414840720124, "learning_rate": 8.736922783074936e-06, "loss": 0.4246, "step": 9853 }, { "epoch": 0.9247372372372372, "grad_norm": 0.94936922039794, "learning_rate": 8.73656002608774e-06, "loss": 0.467, "step": 9854 }, { "epoch": 0.924831081081081, "grad_norm": 0.804707726024785, "learning_rate": 8.736197224549096e-06, "loss": 0.4063, "step": 9855 }, { "epoch": 0.924924924924925, "grad_norm": 0.9813166782346935, "learning_rate": 8.735834378463337e-06, "loss": 0.4512, "step": 9856 }, { "epoch": 0.9250187687687688, "grad_norm": 0.9363941483251524, "learning_rate": 8.73547148783478e-06, "loss": 0.4842, "step": 9857 }, { "epoch": 0.9251126126126126, "grad_norm": 1.0313735485874977, "learning_rate": 8.735108552667762e-06, "loss": 0.5069, "step": 9858 }, { "epoch": 0.9252064564564565, "grad_norm": 1.1523721865744156, "learning_rate": 8.734745572966604e-06, "loss": 0.4306, "step": 9859 }, { "epoch": 0.9253003003003003, "grad_norm": 1.107223206995322, "learning_rate": 8.734382548735632e-06, "loss": 0.5126, "step": 9860 }, { "epoch": 0.9253941441441441, "grad_norm": 0.9848342340274449, "learning_rate": 8.73401947997918e-06, "loss": 0.4645, "step": 9861 }, { "epoch": 0.925487987987988, "grad_norm": 1.1043782530375934, "learning_rate": 8.733656366701573e-06, "loss": 0.4776, "step": 9862 }, { "epoch": 0.9255818318318318, "grad_norm": 0.9394177574214232, "learning_rate": 8.733293208907143e-06, "loss": 0.5018, "step": 9863 }, { "epoch": 0.9256756756756757, "grad_norm": 0.8984916521426447, "learning_rate": 8.732930006600217e-06, "loss": 0.4396, "step": 9864 }, { "epoch": 0.9257695195195195, "grad_norm": 0.937612231289947, "learning_rate": 8.732566759785129e-06, "loss": 0.453, "step": 9865 }, { "epoch": 0.9258633633633634, "grad_norm": 1.2245080515266673, "learning_rate": 8.732203468466207e-06, "loss": 0.5069, "step": 9866 }, { "epoch": 0.9259572072072072, "grad_norm": 0.9707045125146259, "learning_rate": 8.731840132647785e-06, "loss": 0.4765, "step": 9867 }, { "epoch": 0.926051051051051, "grad_norm": 1.2222860952401167, "learning_rate": 8.731476752334193e-06, "loss": 0.4427, "step": 9868 }, { "epoch": 0.9261448948948949, "grad_norm": 0.9893133748967963, "learning_rate": 8.731113327529766e-06, "loss": 0.5045, "step": 9869 }, { "epoch": 0.9262387387387387, "grad_norm": 1.308076980762692, "learning_rate": 8.730749858238832e-06, "loss": 0.4373, "step": 9870 }, { "epoch": 0.9263325825825826, "grad_norm": 1.0760656657982919, "learning_rate": 8.730386344465732e-06, "loss": 0.4248, "step": 9871 }, { "epoch": 0.9264264264264265, "grad_norm": 0.8729035651600189, "learning_rate": 8.730022786214795e-06, "loss": 0.4234, "step": 9872 }, { "epoch": 0.9265202702702703, "grad_norm": 1.1201492025150246, "learning_rate": 8.729659183490357e-06, "loss": 0.5029, "step": 9873 }, { "epoch": 0.9266141141141141, "grad_norm": 0.8558052282087039, "learning_rate": 8.729295536296755e-06, "loss": 0.4131, "step": 9874 }, { "epoch": 0.9267079579579579, "grad_norm": 0.9381319030772648, "learning_rate": 8.728931844638323e-06, "loss": 0.4882, "step": 9875 }, { "epoch": 0.9268018018018018, "grad_norm": 1.0544368974275062, "learning_rate": 8.728568108519397e-06, "loss": 0.475, "step": 9876 }, { "epoch": 0.9268956456456456, "grad_norm": 1.0199997957533555, "learning_rate": 8.728204327944315e-06, "loss": 0.4656, "step": 9877 }, { "epoch": 0.9269894894894894, "grad_norm": 0.9031119354773154, "learning_rate": 8.727840502917412e-06, "loss": 0.4419, "step": 9878 }, { "epoch": 0.9270833333333334, "grad_norm": 1.1953279478366474, "learning_rate": 8.72747663344303e-06, "loss": 0.4578, "step": 9879 }, { "epoch": 0.9271771771771772, "grad_norm": 0.8708325517171327, "learning_rate": 8.727112719525506e-06, "loss": 0.4514, "step": 9880 }, { "epoch": 0.927271021021021, "grad_norm": 1.7983761671358423, "learning_rate": 8.726748761169177e-06, "loss": 0.4891, "step": 9881 }, { "epoch": 0.9273648648648649, "grad_norm": 1.246458101169719, "learning_rate": 8.726384758378386e-06, "loss": 0.522, "step": 9882 }, { "epoch": 0.9274587087087087, "grad_norm": 1.0931716377125613, "learning_rate": 8.726020711157469e-06, "loss": 0.4153, "step": 9883 }, { "epoch": 0.9275525525525525, "grad_norm": 0.9773669980086049, "learning_rate": 8.72565661951077e-06, "loss": 0.4509, "step": 9884 }, { "epoch": 0.9276463963963963, "grad_norm": 0.9956092008175645, "learning_rate": 8.725292483442627e-06, "loss": 0.428, "step": 9885 }, { "epoch": 0.9277402402402403, "grad_norm": 0.9414045784313634, "learning_rate": 8.724928302957383e-06, "loss": 0.4127, "step": 9886 }, { "epoch": 0.9278340840840841, "grad_norm": 0.9198268743479151, "learning_rate": 8.724564078059383e-06, "loss": 0.4292, "step": 9887 }, { "epoch": 0.9279279279279279, "grad_norm": 0.9039998209526005, "learning_rate": 8.724199808752964e-06, "loss": 0.4698, "step": 9888 }, { "epoch": 0.9280217717717718, "grad_norm": 0.8572162689712256, "learning_rate": 8.723835495042474e-06, "loss": 0.4482, "step": 9889 }, { "epoch": 0.9281156156156156, "grad_norm": 0.9689672744507833, "learning_rate": 8.723471136932255e-06, "loss": 0.4637, "step": 9890 }, { "epoch": 0.9282094594594594, "grad_norm": 1.2793247478280154, "learning_rate": 8.723106734426652e-06, "loss": 0.4307, "step": 9891 }, { "epoch": 0.9283033033033034, "grad_norm": 0.914520820161761, "learning_rate": 8.722742287530008e-06, "loss": 0.4743, "step": 9892 }, { "epoch": 0.9283971471471472, "grad_norm": 1.0251333944325693, "learning_rate": 8.722377796246671e-06, "loss": 0.4669, "step": 9893 }, { "epoch": 0.928490990990991, "grad_norm": 0.9407667972369397, "learning_rate": 8.722013260580983e-06, "loss": 0.4215, "step": 9894 }, { "epoch": 0.9285848348348348, "grad_norm": 1.180715969496425, "learning_rate": 8.721648680537296e-06, "loss": 0.4658, "step": 9895 }, { "epoch": 0.9286786786786787, "grad_norm": 0.8974666214706036, "learning_rate": 8.721284056119951e-06, "loss": 0.4811, "step": 9896 }, { "epoch": 0.9287725225225225, "grad_norm": 1.0907048452830876, "learning_rate": 8.7209193873333e-06, "loss": 0.4684, "step": 9897 }, { "epoch": 0.9288663663663663, "grad_norm": 1.033079955305983, "learning_rate": 8.72055467418169e-06, "loss": 0.421, "step": 9898 }, { "epoch": 0.9289602102102102, "grad_norm": 1.3759258373680283, "learning_rate": 8.720189916669465e-06, "loss": 0.4375, "step": 9899 }, { "epoch": 0.9290540540540541, "grad_norm": 1.4185016663219716, "learning_rate": 8.719825114800982e-06, "loss": 0.4464, "step": 9900 }, { "epoch": 0.9291478978978979, "grad_norm": 1.067883446375819, "learning_rate": 8.719460268580584e-06, "loss": 0.4626, "step": 9901 }, { "epoch": 0.9292417417417418, "grad_norm": 1.2905483338418278, "learning_rate": 8.719095378012624e-06, "loss": 0.5283, "step": 9902 }, { "epoch": 0.9293355855855856, "grad_norm": 1.254468576395488, "learning_rate": 8.718730443101451e-06, "loss": 0.4353, "step": 9903 }, { "epoch": 0.9294294294294294, "grad_norm": 0.9471658378968277, "learning_rate": 8.718365463851419e-06, "loss": 0.4874, "step": 9904 }, { "epoch": 0.9295232732732732, "grad_norm": 0.9137493851788419, "learning_rate": 8.718000440266879e-06, "loss": 0.4446, "step": 9905 }, { "epoch": 0.9296171171171171, "grad_norm": 0.8983626942707874, "learning_rate": 8.717635372352179e-06, "loss": 0.4422, "step": 9906 }, { "epoch": 0.929710960960961, "grad_norm": 0.9267701808639485, "learning_rate": 8.717270260111677e-06, "loss": 0.4205, "step": 9907 }, { "epoch": 0.9298048048048048, "grad_norm": 1.1459771717726517, "learning_rate": 8.716905103549725e-06, "loss": 0.4731, "step": 9908 }, { "epoch": 0.9298986486486487, "grad_norm": 0.972594805606295, "learning_rate": 8.716539902670673e-06, "loss": 0.4856, "step": 9909 }, { "epoch": 0.9299924924924925, "grad_norm": 1.4101732540440275, "learning_rate": 8.71617465747888e-06, "loss": 0.4437, "step": 9910 }, { "epoch": 0.9300863363363363, "grad_norm": 0.8627845103059189, "learning_rate": 8.715809367978699e-06, "loss": 0.4669, "step": 9911 }, { "epoch": 0.9301801801801802, "grad_norm": 0.8679876534741444, "learning_rate": 8.715444034174484e-06, "loss": 0.4478, "step": 9912 }, { "epoch": 0.930274024024024, "grad_norm": 1.0401536705947927, "learning_rate": 8.715078656070596e-06, "loss": 0.4214, "step": 9913 }, { "epoch": 0.9303678678678678, "grad_norm": 0.8809210408942691, "learning_rate": 8.714713233671386e-06, "loss": 0.5133, "step": 9914 }, { "epoch": 0.9304617117117117, "grad_norm": 0.834587989861703, "learning_rate": 8.714347766981213e-06, "loss": 0.4754, "step": 9915 }, { "epoch": 0.9305555555555556, "grad_norm": 1.0282765877418294, "learning_rate": 8.713982256004433e-06, "loss": 0.4327, "step": 9916 }, { "epoch": 0.9306493993993994, "grad_norm": 0.9895216733775528, "learning_rate": 8.713616700745408e-06, "loss": 0.441, "step": 9917 }, { "epoch": 0.9307432432432432, "grad_norm": 0.960464551897958, "learning_rate": 8.713251101208493e-06, "loss": 0.4396, "step": 9918 }, { "epoch": 0.9308370870870871, "grad_norm": 0.8594452589100033, "learning_rate": 8.712885457398049e-06, "loss": 0.4182, "step": 9919 }, { "epoch": 0.9309309309309309, "grad_norm": 0.8942425720211526, "learning_rate": 8.712519769318432e-06, "loss": 0.4739, "step": 9920 }, { "epoch": 0.9310247747747747, "grad_norm": 1.0383552305874213, "learning_rate": 8.712154036974008e-06, "loss": 0.4738, "step": 9921 }, { "epoch": 0.9311186186186187, "grad_norm": 0.9450800221863832, "learning_rate": 8.711788260369133e-06, "loss": 0.4373, "step": 9922 }, { "epoch": 0.9312124624624625, "grad_norm": 1.0184864245429903, "learning_rate": 8.71142243950817e-06, "loss": 0.4424, "step": 9923 }, { "epoch": 0.9313063063063063, "grad_norm": 1.5828406411674885, "learning_rate": 8.71105657439548e-06, "loss": 0.4715, "step": 9924 }, { "epoch": 0.9314001501501501, "grad_norm": 1.1903446341447816, "learning_rate": 8.710690665035427e-06, "loss": 0.4732, "step": 9925 }, { "epoch": 0.931493993993994, "grad_norm": 1.0372844052024903, "learning_rate": 8.71032471143237e-06, "loss": 0.4489, "step": 9926 }, { "epoch": 0.9315878378378378, "grad_norm": 0.9705156689695572, "learning_rate": 8.709958713590677e-06, "loss": 0.4381, "step": 9927 }, { "epoch": 0.9316816816816816, "grad_norm": 1.1147190683039283, "learning_rate": 8.709592671514709e-06, "loss": 0.4903, "step": 9928 }, { "epoch": 0.9317755255255256, "grad_norm": 1.0848724944139436, "learning_rate": 8.70922658520883e-06, "loss": 0.4523, "step": 9929 }, { "epoch": 0.9318693693693694, "grad_norm": 1.3165041783473541, "learning_rate": 8.708860454677406e-06, "loss": 0.435, "step": 9930 }, { "epoch": 0.9319632132132132, "grad_norm": 1.1460071588292582, "learning_rate": 8.708494279924804e-06, "loss": 0.4646, "step": 9931 }, { "epoch": 0.9320570570570571, "grad_norm": 0.9070977366467655, "learning_rate": 8.708128060955388e-06, "loss": 0.4641, "step": 9932 }, { "epoch": 0.9321509009009009, "grad_norm": 0.9208382049835363, "learning_rate": 8.707761797773522e-06, "loss": 0.4622, "step": 9933 }, { "epoch": 0.9322447447447447, "grad_norm": 0.9940215191733041, "learning_rate": 8.707395490383579e-06, "loss": 0.4629, "step": 9934 }, { "epoch": 0.9323385885885885, "grad_norm": 1.1381467413242092, "learning_rate": 8.707029138789922e-06, "loss": 0.5167, "step": 9935 }, { "epoch": 0.9324324324324325, "grad_norm": 0.9882634810561873, "learning_rate": 8.706662742996919e-06, "loss": 0.4562, "step": 9936 }, { "epoch": 0.9325262762762763, "grad_norm": 1.1080963911787092, "learning_rate": 8.706296303008941e-06, "loss": 0.4908, "step": 9937 }, { "epoch": 0.9326201201201201, "grad_norm": 0.9401462708066356, "learning_rate": 8.705929818830357e-06, "loss": 0.4486, "step": 9938 }, { "epoch": 0.932713963963964, "grad_norm": 0.8829580166658431, "learning_rate": 8.705563290465533e-06, "loss": 0.4403, "step": 9939 }, { "epoch": 0.9328078078078078, "grad_norm": 1.17822041218228, "learning_rate": 8.705196717918844e-06, "loss": 0.5065, "step": 9940 }, { "epoch": 0.9329016516516516, "grad_norm": 1.0664042504277595, "learning_rate": 8.704830101194657e-06, "loss": 0.5028, "step": 9941 }, { "epoch": 0.9329954954954955, "grad_norm": 1.300368587309821, "learning_rate": 8.704463440297346e-06, "loss": 0.3975, "step": 9942 }, { "epoch": 0.9330893393393394, "grad_norm": 1.006809624686969, "learning_rate": 8.70409673523128e-06, "loss": 0.4643, "step": 9943 }, { "epoch": 0.9331831831831832, "grad_norm": 0.9747119470755797, "learning_rate": 8.703729986000836e-06, "loss": 0.4457, "step": 9944 }, { "epoch": 0.933277027027027, "grad_norm": 0.8697057710903685, "learning_rate": 8.703363192610379e-06, "loss": 0.4061, "step": 9945 }, { "epoch": 0.9333708708708709, "grad_norm": 1.0963828879095683, "learning_rate": 8.702996355064289e-06, "loss": 0.4442, "step": 9946 }, { "epoch": 0.9334647147147147, "grad_norm": 0.9208230744511053, "learning_rate": 8.702629473366939e-06, "loss": 0.4199, "step": 9947 }, { "epoch": 0.9335585585585585, "grad_norm": 1.0110337020622626, "learning_rate": 8.7022625475227e-06, "loss": 0.4774, "step": 9948 }, { "epoch": 0.9336524024024024, "grad_norm": 1.0230926939256435, "learning_rate": 8.701895577535947e-06, "loss": 0.5029, "step": 9949 }, { "epoch": 0.9337462462462462, "grad_norm": 1.0787510139571785, "learning_rate": 8.701528563411061e-06, "loss": 0.4833, "step": 9950 }, { "epoch": 0.9338400900900901, "grad_norm": 0.9770710775720071, "learning_rate": 8.701161505152414e-06, "loss": 0.4497, "step": 9951 }, { "epoch": 0.933933933933934, "grad_norm": 0.9944456041083157, "learning_rate": 8.700794402764382e-06, "loss": 0.528, "step": 9952 }, { "epoch": 0.9340277777777778, "grad_norm": 1.3047051462563126, "learning_rate": 8.700427256251342e-06, "loss": 0.4837, "step": 9953 }, { "epoch": 0.9341216216216216, "grad_norm": 0.9317102209222033, "learning_rate": 8.700060065617673e-06, "loss": 0.4415, "step": 9954 }, { "epoch": 0.9342154654654654, "grad_norm": 1.4955743606111043, "learning_rate": 8.699692830867753e-06, "loss": 0.4342, "step": 9955 }, { "epoch": 0.9343093093093093, "grad_norm": 1.1290365624914207, "learning_rate": 8.699325552005959e-06, "loss": 0.4951, "step": 9956 }, { "epoch": 0.9344031531531531, "grad_norm": 1.1537610006711216, "learning_rate": 8.69895822903667e-06, "loss": 0.4975, "step": 9957 }, { "epoch": 0.934496996996997, "grad_norm": 0.9028998273448939, "learning_rate": 8.698590861964269e-06, "loss": 0.4172, "step": 9958 }, { "epoch": 0.9345908408408409, "grad_norm": 2.8875256313195696, "learning_rate": 8.698223450793132e-06, "loss": 0.4911, "step": 9959 }, { "epoch": 0.9346846846846847, "grad_norm": 1.6066966758521384, "learning_rate": 8.697855995527642e-06, "loss": 0.4502, "step": 9960 }, { "epoch": 0.9347785285285285, "grad_norm": 0.8570800387921889, "learning_rate": 8.697488496172181e-06, "loss": 0.4504, "step": 9961 }, { "epoch": 0.9348723723723724, "grad_norm": 1.0860344610929038, "learning_rate": 8.697120952731129e-06, "loss": 0.4348, "step": 9962 }, { "epoch": 0.9349662162162162, "grad_norm": 0.896132463147938, "learning_rate": 8.696753365208868e-06, "loss": 0.4954, "step": 9963 }, { "epoch": 0.93506006006006, "grad_norm": 1.237478380253977, "learning_rate": 8.696385733609782e-06, "loss": 0.4992, "step": 9964 }, { "epoch": 0.9351539039039038, "grad_norm": 1.8628151624024283, "learning_rate": 8.696018057938253e-06, "loss": 0.4565, "step": 9965 }, { "epoch": 0.9352477477477478, "grad_norm": 1.0452628035192135, "learning_rate": 8.695650338198667e-06, "loss": 0.5058, "step": 9966 }, { "epoch": 0.9353415915915916, "grad_norm": 1.017557491968071, "learning_rate": 8.695282574395407e-06, "loss": 0.4613, "step": 9967 }, { "epoch": 0.9354354354354354, "grad_norm": 1.0330192976111328, "learning_rate": 8.694914766532856e-06, "loss": 0.4646, "step": 9968 }, { "epoch": 0.9355292792792793, "grad_norm": 1.0282439726778836, "learning_rate": 8.694546914615403e-06, "loss": 0.4751, "step": 9969 }, { "epoch": 0.9356231231231231, "grad_norm": 0.9126572811181297, "learning_rate": 8.694179018647433e-06, "loss": 0.4098, "step": 9970 }, { "epoch": 0.9357169669669669, "grad_norm": 1.1208202820653859, "learning_rate": 8.69381107863333e-06, "loss": 0.4751, "step": 9971 }, { "epoch": 0.9358108108108109, "grad_norm": 0.9032826736352282, "learning_rate": 8.693443094577485e-06, "loss": 0.4515, "step": 9972 }, { "epoch": 0.9359046546546547, "grad_norm": 1.032051602599403, "learning_rate": 8.69307506648428e-06, "loss": 0.4603, "step": 9973 }, { "epoch": 0.9359984984984985, "grad_norm": 0.8585127527458514, "learning_rate": 8.692706994358108e-06, "loss": 0.4175, "step": 9974 }, { "epoch": 0.9360923423423423, "grad_norm": 0.9801996239818728, "learning_rate": 8.692338878203357e-06, "loss": 0.4894, "step": 9975 }, { "epoch": 0.9361861861861862, "grad_norm": 1.0642744999277505, "learning_rate": 8.691970718024414e-06, "loss": 0.4484, "step": 9976 }, { "epoch": 0.93628003003003, "grad_norm": 1.0437606026718256, "learning_rate": 8.69160251382567e-06, "loss": 0.479, "step": 9977 }, { "epoch": 0.9363738738738738, "grad_norm": 1.000678411370873, "learning_rate": 8.691234265611515e-06, "loss": 0.478, "step": 9978 }, { "epoch": 0.9364677177177178, "grad_norm": 0.937155288408892, "learning_rate": 8.690865973386339e-06, "loss": 0.4494, "step": 9979 }, { "epoch": 0.9365615615615616, "grad_norm": 1.0796779580179419, "learning_rate": 8.690497637154533e-06, "loss": 0.482, "step": 9980 }, { "epoch": 0.9366554054054054, "grad_norm": 0.9820909415143345, "learning_rate": 8.69012925692049e-06, "loss": 0.4327, "step": 9981 }, { "epoch": 0.9367492492492493, "grad_norm": 0.9232957984573888, "learning_rate": 8.689760832688602e-06, "loss": 0.4848, "step": 9982 }, { "epoch": 0.9368430930930931, "grad_norm": 2.6752920885789337, "learning_rate": 8.689392364463261e-06, "loss": 0.4596, "step": 9983 }, { "epoch": 0.9369369369369369, "grad_norm": 0.9322431427283746, "learning_rate": 8.68902385224886e-06, "loss": 0.441, "step": 9984 }, { "epoch": 0.9370307807807807, "grad_norm": 0.964035461708114, "learning_rate": 8.688655296049794e-06, "loss": 0.4416, "step": 9985 }, { "epoch": 0.9371246246246246, "grad_norm": 1.0061327933086537, "learning_rate": 8.688286695870456e-06, "loss": 0.4712, "step": 9986 }, { "epoch": 0.9372184684684685, "grad_norm": 0.9221008144870216, "learning_rate": 8.687918051715242e-06, "loss": 0.4791, "step": 9987 }, { "epoch": 0.9373123123123123, "grad_norm": 1.1625919214136544, "learning_rate": 8.687549363588547e-06, "loss": 0.4681, "step": 9988 }, { "epoch": 0.9374061561561562, "grad_norm": 1.0602882999127594, "learning_rate": 8.687180631494767e-06, "loss": 0.4475, "step": 9989 }, { "epoch": 0.9375, "grad_norm": 1.3169516113723883, "learning_rate": 8.686811855438299e-06, "loss": 0.4793, "step": 9990 }, { "epoch": 0.9375938438438438, "grad_norm": 1.9179859305307807, "learning_rate": 8.68644303542354e-06, "loss": 0.4451, "step": 9991 }, { "epoch": 0.9376876876876877, "grad_norm": 0.9712200045928228, "learning_rate": 8.686074171454886e-06, "loss": 0.3779, "step": 9992 }, { "epoch": 0.9377815315315315, "grad_norm": 1.4687228886981585, "learning_rate": 8.685705263536735e-06, "loss": 0.4481, "step": 9993 }, { "epoch": 0.9378753753753754, "grad_norm": 1.0904742266848508, "learning_rate": 8.685336311673488e-06, "loss": 0.4704, "step": 9994 }, { "epoch": 0.9379692192192193, "grad_norm": 0.9576249308484491, "learning_rate": 8.68496731586954e-06, "loss": 0.5318, "step": 9995 }, { "epoch": 0.9380630630630631, "grad_norm": 1.0424589963266213, "learning_rate": 8.684598276129294e-06, "loss": 0.4931, "step": 9996 }, { "epoch": 0.9381569069069069, "grad_norm": 0.9952639779715311, "learning_rate": 8.68422919245715e-06, "loss": 0.4581, "step": 9997 }, { "epoch": 0.9382507507507507, "grad_norm": 0.9112986944439678, "learning_rate": 8.683860064857508e-06, "loss": 0.4294, "step": 9998 }, { "epoch": 0.9383445945945946, "grad_norm": 0.986358181334012, "learning_rate": 8.683490893334767e-06, "loss": 0.4838, "step": 9999 }, { "epoch": 0.9384384384384384, "grad_norm": 0.9105954823502301, "learning_rate": 8.683121677893331e-06, "loss": 0.4734, "step": 10000 }, { "epoch": 0.9385322822822822, "grad_norm": 0.970774707406384, "learning_rate": 8.682752418537603e-06, "loss": 0.4137, "step": 10001 }, { "epoch": 0.9386261261261262, "grad_norm": 1.0753554890480834, "learning_rate": 8.682383115271984e-06, "loss": 0.4692, "step": 10002 }, { "epoch": 0.93871996996997, "grad_norm": 1.1449137229809883, "learning_rate": 8.682013768100877e-06, "loss": 0.4725, "step": 10003 }, { "epoch": 0.9388138138138138, "grad_norm": 1.4005871615605585, "learning_rate": 8.681644377028687e-06, "loss": 0.4563, "step": 10004 }, { "epoch": 0.9389076576576577, "grad_norm": 0.9674716049103156, "learning_rate": 8.681274942059818e-06, "loss": 0.4808, "step": 10005 }, { "epoch": 0.9390015015015015, "grad_norm": 0.9213360089698239, "learning_rate": 8.680905463198675e-06, "loss": 0.4118, "step": 10006 }, { "epoch": 0.9390953453453453, "grad_norm": 1.1704171068429847, "learning_rate": 8.680535940449661e-06, "loss": 0.4613, "step": 10007 }, { "epoch": 0.9391891891891891, "grad_norm": 0.9961714926808947, "learning_rate": 8.680166373817187e-06, "loss": 0.4828, "step": 10008 }, { "epoch": 0.9392830330330331, "grad_norm": 1.8731520706990454, "learning_rate": 8.679796763305653e-06, "loss": 0.4338, "step": 10009 }, { "epoch": 0.9393768768768769, "grad_norm": 1.269917978858708, "learning_rate": 8.679427108919471e-06, "loss": 0.4792, "step": 10010 }, { "epoch": 0.9394707207207207, "grad_norm": 0.9631285827691871, "learning_rate": 8.679057410663045e-06, "loss": 0.4113, "step": 10011 }, { "epoch": 0.9395645645645646, "grad_norm": 1.23952540945513, "learning_rate": 8.678687668540786e-06, "loss": 0.4516, "step": 10012 }, { "epoch": 0.9396584084084084, "grad_norm": 1.1108658809156733, "learning_rate": 8.678317882557102e-06, "loss": 0.5012, "step": 10013 }, { "epoch": 0.9397522522522522, "grad_norm": 1.3861351190260274, "learning_rate": 8.677948052716398e-06, "loss": 0.4966, "step": 10014 }, { "epoch": 0.9398460960960962, "grad_norm": 0.8427886984743428, "learning_rate": 8.677578179023088e-06, "loss": 0.4311, "step": 10015 }, { "epoch": 0.93993993993994, "grad_norm": 0.9200535208552075, "learning_rate": 8.67720826148158e-06, "loss": 0.4311, "step": 10016 }, { "epoch": 0.9400337837837838, "grad_norm": 0.9072172918134893, "learning_rate": 8.676838300096286e-06, "loss": 0.413, "step": 10017 }, { "epoch": 0.9401276276276276, "grad_norm": 0.9803975442592024, "learning_rate": 8.676468294871617e-06, "loss": 0.4621, "step": 10018 }, { "epoch": 0.9402214714714715, "grad_norm": 1.033966039918863, "learning_rate": 8.676098245811981e-06, "loss": 0.4519, "step": 10019 }, { "epoch": 0.9403153153153153, "grad_norm": 0.9539850540183741, "learning_rate": 8.675728152921796e-06, "loss": 0.4368, "step": 10020 }, { "epoch": 0.9404091591591591, "grad_norm": 0.9949768617801314, "learning_rate": 8.67535801620547e-06, "loss": 0.4567, "step": 10021 }, { "epoch": 0.940503003003003, "grad_norm": 0.9918550654037662, "learning_rate": 8.674987835667419e-06, "loss": 0.4337, "step": 10022 }, { "epoch": 0.9405968468468469, "grad_norm": 1.27357472633291, "learning_rate": 8.674617611312053e-06, "loss": 0.4214, "step": 10023 }, { "epoch": 0.9406906906906907, "grad_norm": 1.6937056083151776, "learning_rate": 8.674247343143792e-06, "loss": 0.4953, "step": 10024 }, { "epoch": 0.9407845345345346, "grad_norm": 0.9980133697950602, "learning_rate": 8.673877031167044e-06, "loss": 0.4459, "step": 10025 }, { "epoch": 0.9408783783783784, "grad_norm": 1.0947080970607013, "learning_rate": 8.673506675386229e-06, "loss": 0.4753, "step": 10026 }, { "epoch": 0.9409722222222222, "grad_norm": 1.4690978288771468, "learning_rate": 8.673136275805764e-06, "loss": 0.4445, "step": 10027 }, { "epoch": 0.941066066066066, "grad_norm": 0.9018540316954106, "learning_rate": 8.67276583243006e-06, "loss": 0.4211, "step": 10028 }, { "epoch": 0.9411599099099099, "grad_norm": 0.9116072103018626, "learning_rate": 8.672395345263538e-06, "loss": 0.4324, "step": 10029 }, { "epoch": 0.9412537537537538, "grad_norm": 1.693690495142472, "learning_rate": 8.672024814310613e-06, "loss": 0.4495, "step": 10030 }, { "epoch": 0.9413475975975976, "grad_norm": 1.8401481439587797, "learning_rate": 8.671654239575705e-06, "loss": 0.5132, "step": 10031 }, { "epoch": 0.9414414414414415, "grad_norm": 0.8544114840094659, "learning_rate": 8.671283621063232e-06, "loss": 0.4795, "step": 10032 }, { "epoch": 0.9415352852852853, "grad_norm": 1.000743894237966, "learning_rate": 8.670912958777611e-06, "loss": 0.4398, "step": 10033 }, { "epoch": 0.9416291291291291, "grad_norm": 1.0410103726893676, "learning_rate": 8.670542252723263e-06, "loss": 0.5261, "step": 10034 }, { "epoch": 0.941722972972973, "grad_norm": 0.9293292946819792, "learning_rate": 8.670171502904607e-06, "loss": 0.4523, "step": 10035 }, { "epoch": 0.9418168168168168, "grad_norm": 1.0058418746081117, "learning_rate": 8.669800709326066e-06, "loss": 0.4832, "step": 10036 }, { "epoch": 0.9419106606606606, "grad_norm": 0.9839049799344918, "learning_rate": 8.669429871992059e-06, "loss": 0.4541, "step": 10037 }, { "epoch": 0.9420045045045045, "grad_norm": 1.4391022649613363, "learning_rate": 8.669058990907007e-06, "loss": 0.4839, "step": 10038 }, { "epoch": 0.9420983483483484, "grad_norm": 1.1081522866071245, "learning_rate": 8.668688066075332e-06, "loss": 0.4719, "step": 10039 }, { "epoch": 0.9421921921921922, "grad_norm": 1.3789377696101521, "learning_rate": 8.668317097501459e-06, "loss": 0.4929, "step": 10040 }, { "epoch": 0.942286036036036, "grad_norm": 0.9798217471764911, "learning_rate": 8.66794608518981e-06, "loss": 0.4632, "step": 10041 }, { "epoch": 0.9423798798798799, "grad_norm": 0.8228313183000959, "learning_rate": 8.667575029144806e-06, "loss": 0.431, "step": 10042 }, { "epoch": 0.9424737237237237, "grad_norm": 1.2781090668999047, "learning_rate": 8.667203929370874e-06, "loss": 0.4236, "step": 10043 }, { "epoch": 0.9425675675675675, "grad_norm": 1.0334085587594894, "learning_rate": 8.666832785872438e-06, "loss": 0.5291, "step": 10044 }, { "epoch": 0.9426614114114115, "grad_norm": 1.0089927033417232, "learning_rate": 8.666461598653923e-06, "loss": 0.4923, "step": 10045 }, { "epoch": 0.9427552552552553, "grad_norm": 2.649860961086565, "learning_rate": 8.666090367719756e-06, "loss": 0.4909, "step": 10046 }, { "epoch": 0.9428490990990991, "grad_norm": 1.0034685288313319, "learning_rate": 8.665719093074361e-06, "loss": 0.4433, "step": 10047 }, { "epoch": 0.9429429429429429, "grad_norm": 1.0029561962016682, "learning_rate": 8.665347774722166e-06, "loss": 0.5115, "step": 10048 }, { "epoch": 0.9430367867867868, "grad_norm": 1.2192116524004368, "learning_rate": 8.664976412667598e-06, "loss": 0.4535, "step": 10049 }, { "epoch": 0.9431306306306306, "grad_norm": 0.962459399716962, "learning_rate": 8.664605006915086e-06, "loss": 0.4731, "step": 10050 }, { "epoch": 0.9432244744744744, "grad_norm": 1.317414532387012, "learning_rate": 8.664233557469055e-06, "loss": 0.4523, "step": 10051 }, { "epoch": 0.9433183183183184, "grad_norm": 4.071706050428136, "learning_rate": 8.663862064333937e-06, "loss": 0.4267, "step": 10052 }, { "epoch": 0.9434121621621622, "grad_norm": 1.003976252042008, "learning_rate": 8.663490527514161e-06, "loss": 0.4566, "step": 10053 }, { "epoch": 0.943506006006006, "grad_norm": 1.0334428410169523, "learning_rate": 8.663118947014156e-06, "loss": 0.4884, "step": 10054 }, { "epoch": 0.9435998498498499, "grad_norm": 0.9670595881154673, "learning_rate": 8.662747322838352e-06, "loss": 0.4716, "step": 10055 }, { "epoch": 0.9436936936936937, "grad_norm": 1.1076971383277978, "learning_rate": 8.662375654991181e-06, "loss": 0.4673, "step": 10056 }, { "epoch": 0.9437875375375375, "grad_norm": 1.1546865914729791, "learning_rate": 8.662003943477075e-06, "loss": 0.5033, "step": 10057 }, { "epoch": 0.9438813813813813, "grad_norm": 0.9752386218932678, "learning_rate": 8.661632188300465e-06, "loss": 0.4871, "step": 10058 }, { "epoch": 0.9439752252252253, "grad_norm": 1.1503211943932445, "learning_rate": 8.661260389465781e-06, "loss": 0.4865, "step": 10059 }, { "epoch": 0.9440690690690691, "grad_norm": 1.1513380404230442, "learning_rate": 8.660888546977461e-06, "loss": 0.502, "step": 10060 }, { "epoch": 0.9441629129129129, "grad_norm": 1.1425088390979048, "learning_rate": 8.660516660839937e-06, "loss": 0.482, "step": 10061 }, { "epoch": 0.9442567567567568, "grad_norm": 1.1020713413432897, "learning_rate": 8.660144731057637e-06, "loss": 0.485, "step": 10062 }, { "epoch": 0.9443506006006006, "grad_norm": 1.456101509835834, "learning_rate": 8.659772757635006e-06, "loss": 0.4697, "step": 10063 }, { "epoch": 0.9444444444444444, "grad_norm": 1.4088430891134498, "learning_rate": 8.659400740576471e-06, "loss": 0.4803, "step": 10064 }, { "epoch": 0.9445382882882883, "grad_norm": 0.9917553848000104, "learning_rate": 8.659028679886472e-06, "loss": 0.4321, "step": 10065 }, { "epoch": 0.9446321321321322, "grad_norm": 0.9471499472504745, "learning_rate": 8.65865657556944e-06, "loss": 0.4099, "step": 10066 }, { "epoch": 0.944725975975976, "grad_norm": 0.9125763438630502, "learning_rate": 8.658284427629819e-06, "loss": 0.447, "step": 10067 }, { "epoch": 0.9448198198198198, "grad_norm": 0.9158920726520946, "learning_rate": 8.65791223607204e-06, "loss": 0.4819, "step": 10068 }, { "epoch": 0.9449136636636637, "grad_norm": 1.2709224007545266, "learning_rate": 8.657540000900543e-06, "loss": 0.4859, "step": 10069 }, { "epoch": 0.9450075075075075, "grad_norm": 0.9459901556920487, "learning_rate": 8.657167722119766e-06, "loss": 0.4486, "step": 10070 }, { "epoch": 0.9451013513513513, "grad_norm": 1.3142128123473946, "learning_rate": 8.656795399734148e-06, "loss": 0.3865, "step": 10071 }, { "epoch": 0.9451951951951952, "grad_norm": 0.9835064730051819, "learning_rate": 8.656423033748129e-06, "loss": 0.492, "step": 10072 }, { "epoch": 0.945289039039039, "grad_norm": 1.878079782606445, "learning_rate": 8.656050624166148e-06, "loss": 0.4735, "step": 10073 }, { "epoch": 0.9453828828828829, "grad_norm": 0.934447895566607, "learning_rate": 8.655678170992642e-06, "loss": 0.439, "step": 10074 }, { "epoch": 0.9454767267267268, "grad_norm": 1.0915197230544669, "learning_rate": 8.655305674232058e-06, "loss": 0.4692, "step": 10075 }, { "epoch": 0.9455705705705706, "grad_norm": 1.0378689460741715, "learning_rate": 8.654933133888834e-06, "loss": 0.5358, "step": 10076 }, { "epoch": 0.9456644144144144, "grad_norm": 0.9023675125104249, "learning_rate": 8.65456054996741e-06, "loss": 0.4318, "step": 10077 }, { "epoch": 0.9457582582582582, "grad_norm": 1.6522723756414182, "learning_rate": 8.654187922472233e-06, "loss": 0.3754, "step": 10078 }, { "epoch": 0.9458521021021021, "grad_norm": 1.0866626802498403, "learning_rate": 8.653815251407742e-06, "loss": 0.4602, "step": 10079 }, { "epoch": 0.9459459459459459, "grad_norm": 2.1462042225239637, "learning_rate": 8.653442536778383e-06, "loss": 0.4993, "step": 10080 }, { "epoch": 0.9460397897897898, "grad_norm": 0.9265588503759966, "learning_rate": 8.653069778588596e-06, "loss": 0.4682, "step": 10081 }, { "epoch": 0.9461336336336337, "grad_norm": 1.0662787968173426, "learning_rate": 8.652696976842832e-06, "loss": 0.469, "step": 10082 }, { "epoch": 0.9462274774774775, "grad_norm": 1.7897900156279476, "learning_rate": 8.652324131545529e-06, "loss": 0.4559, "step": 10083 }, { "epoch": 0.9463213213213213, "grad_norm": 0.9581875562002451, "learning_rate": 8.651951242701135e-06, "loss": 0.4204, "step": 10084 }, { "epoch": 0.9464151651651652, "grad_norm": 0.8188708666346141, "learning_rate": 8.6515783103141e-06, "loss": 0.4284, "step": 10085 }, { "epoch": 0.946509009009009, "grad_norm": 1.733480260536584, "learning_rate": 8.651205334388863e-06, "loss": 0.4688, "step": 10086 }, { "epoch": 0.9466028528528528, "grad_norm": 1.1207499874004407, "learning_rate": 8.650832314929878e-06, "loss": 0.4835, "step": 10087 }, { "epoch": 0.9466966966966966, "grad_norm": 1.496399671992353, "learning_rate": 8.650459251941587e-06, "loss": 0.4495, "step": 10088 }, { "epoch": 0.9467905405405406, "grad_norm": 2.010800582739948, "learning_rate": 8.650086145428444e-06, "loss": 0.4746, "step": 10089 }, { "epoch": 0.9468843843843844, "grad_norm": 0.845145729461237, "learning_rate": 8.649712995394892e-06, "loss": 0.4337, "step": 10090 }, { "epoch": 0.9469782282282282, "grad_norm": 1.397347086312872, "learning_rate": 8.649339801845381e-06, "loss": 0.4672, "step": 10091 }, { "epoch": 0.9470720720720721, "grad_norm": 0.9221995817579643, "learning_rate": 8.648966564784366e-06, "loss": 0.4616, "step": 10092 }, { "epoch": 0.9471659159159159, "grad_norm": 1.1027911746624723, "learning_rate": 8.648593284216291e-06, "loss": 0.4776, "step": 10093 }, { "epoch": 0.9472597597597597, "grad_norm": 1.5397384236091884, "learning_rate": 8.64821996014561e-06, "loss": 0.5008, "step": 10094 }, { "epoch": 0.9473536036036037, "grad_norm": 0.9451223712872925, "learning_rate": 8.647846592576772e-06, "loss": 0.5215, "step": 10095 }, { "epoch": 0.9474474474474475, "grad_norm": 0.9634670099118245, "learning_rate": 8.64747318151423e-06, "loss": 0.474, "step": 10096 }, { "epoch": 0.9475412912912913, "grad_norm": 0.8218185716944183, "learning_rate": 8.647099726962435e-06, "loss": 0.4704, "step": 10097 }, { "epoch": 0.9476351351351351, "grad_norm": 1.1144249876780774, "learning_rate": 8.646726228925842e-06, "loss": 0.4354, "step": 10098 }, { "epoch": 0.947728978978979, "grad_norm": 1.0458370830506052, "learning_rate": 8.646352687408902e-06, "loss": 0.4066, "step": 10099 }, { "epoch": 0.9478228228228228, "grad_norm": 0.8534478706076901, "learning_rate": 8.64597910241607e-06, "loss": 0.4698, "step": 10100 }, { "epoch": 0.9479166666666666, "grad_norm": 0.9377338716628587, "learning_rate": 8.645605473951801e-06, "loss": 0.4858, "step": 10101 }, { "epoch": 0.9480105105105106, "grad_norm": 1.0393681922831544, "learning_rate": 8.645231802020549e-06, "loss": 0.4749, "step": 10102 }, { "epoch": 0.9481043543543544, "grad_norm": 1.2076049877058634, "learning_rate": 8.644858086626767e-06, "loss": 0.4508, "step": 10103 }, { "epoch": 0.9481981981981982, "grad_norm": 1.3800300776333125, "learning_rate": 8.644484327774914e-06, "loss": 0.4609, "step": 10104 }, { "epoch": 0.9482920420420421, "grad_norm": 0.8994495516811669, "learning_rate": 8.644110525469447e-06, "loss": 0.4192, "step": 10105 }, { "epoch": 0.9483858858858859, "grad_norm": 0.937708594745528, "learning_rate": 8.643736679714821e-06, "loss": 0.4752, "step": 10106 }, { "epoch": 0.9484797297297297, "grad_norm": 1.2185451539649006, "learning_rate": 8.64336279051549e-06, "loss": 0.4501, "step": 10107 }, { "epoch": 0.9485735735735735, "grad_norm": 0.8637857124255535, "learning_rate": 8.64298885787592e-06, "loss": 0.4615, "step": 10108 }, { "epoch": 0.9486674174174174, "grad_norm": 2.210204687541722, "learning_rate": 8.642614881800564e-06, "loss": 0.4811, "step": 10109 }, { "epoch": 0.9487612612612613, "grad_norm": 0.9135047057707384, "learning_rate": 8.642240862293882e-06, "loss": 0.4495, "step": 10110 }, { "epoch": 0.9488551051051051, "grad_norm": 1.018849639652416, "learning_rate": 8.641866799360332e-06, "loss": 0.4166, "step": 10111 }, { "epoch": 0.948948948948949, "grad_norm": 1.2514958259378055, "learning_rate": 8.641492693004378e-06, "loss": 0.4492, "step": 10112 }, { "epoch": 0.9490427927927928, "grad_norm": 1.465921113630569, "learning_rate": 8.641118543230476e-06, "loss": 0.4418, "step": 10113 }, { "epoch": 0.9491366366366366, "grad_norm": 0.8925473538001609, "learning_rate": 8.640744350043091e-06, "loss": 0.48, "step": 10114 }, { "epoch": 0.9492304804804805, "grad_norm": 1.0789310879314584, "learning_rate": 8.640370113446681e-06, "loss": 0.4758, "step": 10115 }, { "epoch": 0.9493243243243243, "grad_norm": 1.1603266079644512, "learning_rate": 8.63999583344571e-06, "loss": 0.5113, "step": 10116 }, { "epoch": 0.9494181681681682, "grad_norm": 1.0066655515377834, "learning_rate": 8.63962151004464e-06, "loss": 0.3985, "step": 10117 }, { "epoch": 0.949512012012012, "grad_norm": 0.9544832509439879, "learning_rate": 8.639247143247936e-06, "loss": 0.4437, "step": 10118 }, { "epoch": 0.9496058558558559, "grad_norm": 1.1303421872647184, "learning_rate": 8.638872733060058e-06, "loss": 0.5219, "step": 10119 }, { "epoch": 0.9496996996996997, "grad_norm": 1.001460052414302, "learning_rate": 8.638498279485473e-06, "loss": 0.4846, "step": 10120 }, { "epoch": 0.9497935435435435, "grad_norm": 0.9302005048046539, "learning_rate": 8.638123782528645e-06, "loss": 0.4168, "step": 10121 }, { "epoch": 0.9498873873873874, "grad_norm": 0.9302275062238712, "learning_rate": 8.637749242194037e-06, "loss": 0.4727, "step": 10122 }, { "epoch": 0.9499812312312312, "grad_norm": 0.8945958145373826, "learning_rate": 8.637374658486119e-06, "loss": 0.4926, "step": 10123 }, { "epoch": 0.950075075075075, "grad_norm": 1.2041806754124393, "learning_rate": 8.637000031409354e-06, "loss": 0.4303, "step": 10124 }, { "epoch": 0.950168918918919, "grad_norm": 1.219395689726289, "learning_rate": 8.636625360968211e-06, "loss": 0.4269, "step": 10125 }, { "epoch": 0.9502627627627628, "grad_norm": 0.9887708566081018, "learning_rate": 8.636250647167155e-06, "loss": 0.4606, "step": 10126 }, { "epoch": 0.9503566066066066, "grad_norm": 1.1668128580958066, "learning_rate": 8.635875890010655e-06, "loss": 0.471, "step": 10127 }, { "epoch": 0.9504504504504504, "grad_norm": 1.0071821765032087, "learning_rate": 8.635501089503176e-06, "loss": 0.506, "step": 10128 }, { "epoch": 0.9505442942942943, "grad_norm": 0.948250504734064, "learning_rate": 8.635126245649193e-06, "loss": 0.451, "step": 10129 }, { "epoch": 0.9506381381381381, "grad_norm": 1.2335760835895366, "learning_rate": 8.634751358453171e-06, "loss": 0.4843, "step": 10130 }, { "epoch": 0.9507319819819819, "grad_norm": 1.0220739533673033, "learning_rate": 8.63437642791958e-06, "loss": 0.4951, "step": 10131 }, { "epoch": 0.9508258258258259, "grad_norm": 1.0136239415925794, "learning_rate": 8.63400145405289e-06, "loss": 0.5056, "step": 10132 }, { "epoch": 0.9509196696696697, "grad_norm": 2.328394750779266, "learning_rate": 8.633626436857576e-06, "loss": 0.4662, "step": 10133 }, { "epoch": 0.9510135135135135, "grad_norm": 2.2624419164875245, "learning_rate": 8.633251376338104e-06, "loss": 0.4669, "step": 10134 }, { "epoch": 0.9511073573573574, "grad_norm": 1.3591627780671598, "learning_rate": 8.632876272498948e-06, "loss": 0.5118, "step": 10135 }, { "epoch": 0.9512012012012012, "grad_norm": 1.1284829271211496, "learning_rate": 8.632501125344583e-06, "loss": 0.4771, "step": 10136 }, { "epoch": 0.951295045045045, "grad_norm": 1.137085621742049, "learning_rate": 8.632125934879478e-06, "loss": 0.4971, "step": 10137 }, { "epoch": 0.9513888888888888, "grad_norm": 0.9202829945311048, "learning_rate": 8.631750701108108e-06, "loss": 0.4754, "step": 10138 }, { "epoch": 0.9514827327327328, "grad_norm": 1.848310895487871, "learning_rate": 8.631375424034948e-06, "loss": 0.4482, "step": 10139 }, { "epoch": 0.9515765765765766, "grad_norm": 1.037820839954861, "learning_rate": 8.63100010366447e-06, "loss": 0.4851, "step": 10140 }, { "epoch": 0.9516704204204204, "grad_norm": 0.9409498733131916, "learning_rate": 8.63062474000115e-06, "loss": 0.4584, "step": 10141 }, { "epoch": 0.9517642642642643, "grad_norm": 0.8931244827552945, "learning_rate": 8.630249333049465e-06, "loss": 0.4651, "step": 10142 }, { "epoch": 0.9518581081081081, "grad_norm": 1.001178915462719, "learning_rate": 8.62987388281389e-06, "loss": 0.4823, "step": 10143 }, { "epoch": 0.9519519519519519, "grad_norm": 1.114219660589233, "learning_rate": 8.629498389298903e-06, "loss": 0.4787, "step": 10144 }, { "epoch": 0.9520457957957958, "grad_norm": 0.9169227494540555, "learning_rate": 8.629122852508977e-06, "loss": 0.4385, "step": 10145 }, { "epoch": 0.9521396396396397, "grad_norm": 0.881383763879871, "learning_rate": 8.628747272448592e-06, "loss": 0.4439, "step": 10146 }, { "epoch": 0.9522334834834835, "grad_norm": 0.9571145169069636, "learning_rate": 8.628371649122228e-06, "loss": 0.4925, "step": 10147 }, { "epoch": 0.9523273273273273, "grad_norm": 1.870477149043423, "learning_rate": 8.62799598253436e-06, "loss": 0.4763, "step": 10148 }, { "epoch": 0.9524211711711712, "grad_norm": 0.9410355687229921, "learning_rate": 8.62762027268947e-06, "loss": 0.4564, "step": 10149 }, { "epoch": 0.952515015015015, "grad_norm": 1.1991448224035015, "learning_rate": 8.627244519592037e-06, "loss": 0.471, "step": 10150 }, { "epoch": 0.9526088588588588, "grad_norm": 1.0457068094429178, "learning_rate": 8.62686872324654e-06, "loss": 0.4396, "step": 10151 }, { "epoch": 0.9527027027027027, "grad_norm": 0.8829588058351391, "learning_rate": 8.62649288365746e-06, "loss": 0.4723, "step": 10152 }, { "epoch": 0.9527965465465466, "grad_norm": 0.9401721747679871, "learning_rate": 8.62611700082928e-06, "loss": 0.5194, "step": 10153 }, { "epoch": 0.9528903903903904, "grad_norm": 1.6913451156389507, "learning_rate": 8.625741074766478e-06, "loss": 0.4687, "step": 10154 }, { "epoch": 0.9529842342342343, "grad_norm": 0.9306833832200335, "learning_rate": 8.62536510547354e-06, "loss": 0.497, "step": 10155 }, { "epoch": 0.9530780780780781, "grad_norm": 1.55021399130842, "learning_rate": 8.624989092954947e-06, "loss": 0.4405, "step": 10156 }, { "epoch": 0.9531719219219219, "grad_norm": 1.0635518622419164, "learning_rate": 8.624613037215183e-06, "loss": 0.4593, "step": 10157 }, { "epoch": 0.9532657657657657, "grad_norm": 1.1535127041562028, "learning_rate": 8.624236938258729e-06, "loss": 0.4961, "step": 10158 }, { "epoch": 0.9533596096096096, "grad_norm": 0.8886788355548235, "learning_rate": 8.623860796090073e-06, "loss": 0.4799, "step": 10159 }, { "epoch": 0.9534534534534534, "grad_norm": 0.9945344691407626, "learning_rate": 8.6234846107137e-06, "loss": 0.4518, "step": 10160 }, { "epoch": 0.9535472972972973, "grad_norm": 1.0744101983066818, "learning_rate": 8.623108382134092e-06, "loss": 0.4744, "step": 10161 }, { "epoch": 0.9536411411411412, "grad_norm": 0.99205141691537, "learning_rate": 8.622732110355735e-06, "loss": 0.4934, "step": 10162 }, { "epoch": 0.953734984984985, "grad_norm": 1.0488325931911324, "learning_rate": 8.622355795383117e-06, "loss": 0.4519, "step": 10163 }, { "epoch": 0.9538288288288288, "grad_norm": 0.9612146436106648, "learning_rate": 8.621979437220724e-06, "loss": 0.4474, "step": 10164 }, { "epoch": 0.9539226726726727, "grad_norm": 0.9094745501406217, "learning_rate": 8.621603035873047e-06, "loss": 0.4513, "step": 10165 }, { "epoch": 0.9540165165165165, "grad_norm": 0.9708780224282276, "learning_rate": 8.621226591344569e-06, "loss": 0.4964, "step": 10166 }, { "epoch": 0.9541103603603603, "grad_norm": 0.971651819483372, "learning_rate": 8.620850103639779e-06, "loss": 0.4408, "step": 10167 }, { "epoch": 0.9542042042042042, "grad_norm": 0.9140618874273766, "learning_rate": 8.620473572763168e-06, "loss": 0.4582, "step": 10168 }, { "epoch": 0.9542980480480481, "grad_norm": 0.9123148548685017, "learning_rate": 8.620096998719224e-06, "loss": 0.4246, "step": 10169 }, { "epoch": 0.9543918918918919, "grad_norm": 0.9043089949271269, "learning_rate": 8.619720381512438e-06, "loss": 0.4707, "step": 10170 }, { "epoch": 0.9544857357357357, "grad_norm": 1.2793691456126621, "learning_rate": 8.6193437211473e-06, "loss": 0.4683, "step": 10171 }, { "epoch": 0.9545795795795796, "grad_norm": 0.9537936984043295, "learning_rate": 8.618967017628302e-06, "loss": 0.4763, "step": 10172 }, { "epoch": 0.9546734234234234, "grad_norm": 1.0873248101167114, "learning_rate": 8.618590270959933e-06, "loss": 0.4977, "step": 10173 }, { "epoch": 0.9547672672672672, "grad_norm": 0.8878875992871238, "learning_rate": 8.618213481146685e-06, "loss": 0.452, "step": 10174 }, { "epoch": 0.9548611111111112, "grad_norm": 0.9694757812955114, "learning_rate": 8.617836648193053e-06, "loss": 0.488, "step": 10175 }, { "epoch": 0.954954954954955, "grad_norm": 0.9746854698748653, "learning_rate": 8.61745977210353e-06, "loss": 0.4478, "step": 10176 }, { "epoch": 0.9550487987987988, "grad_norm": 0.9819008396363728, "learning_rate": 8.617082852882606e-06, "loss": 0.4488, "step": 10177 }, { "epoch": 0.9551426426426426, "grad_norm": 0.9389203536447873, "learning_rate": 8.61670589053478e-06, "loss": 0.4531, "step": 10178 }, { "epoch": 0.9552364864864865, "grad_norm": 1.0209796965648976, "learning_rate": 8.616328885064543e-06, "loss": 0.4062, "step": 10179 }, { "epoch": 0.9553303303303303, "grad_norm": 1.676793876039895, "learning_rate": 8.615951836476389e-06, "loss": 0.5271, "step": 10180 }, { "epoch": 0.9554241741741741, "grad_norm": 0.9230957960162193, "learning_rate": 8.615574744774818e-06, "loss": 0.4658, "step": 10181 }, { "epoch": 0.9555180180180181, "grad_norm": 1.123092899737429, "learning_rate": 8.615197609964323e-06, "loss": 0.5067, "step": 10182 }, { "epoch": 0.9556118618618619, "grad_norm": 0.8997296118878656, "learning_rate": 8.614820432049403e-06, "loss": 0.4822, "step": 10183 }, { "epoch": 0.9557057057057057, "grad_norm": 1.080051300205635, "learning_rate": 8.614443211034552e-06, "loss": 0.4861, "step": 10184 }, { "epoch": 0.9557995495495496, "grad_norm": 0.9598752027383575, "learning_rate": 8.614065946924268e-06, "loss": 0.4806, "step": 10185 }, { "epoch": 0.9558933933933934, "grad_norm": 1.099560023933909, "learning_rate": 8.613688639723052e-06, "loss": 0.5211, "step": 10186 }, { "epoch": 0.9559872372372372, "grad_norm": 1.0874272196184498, "learning_rate": 8.613311289435403e-06, "loss": 0.483, "step": 10187 }, { "epoch": 0.956081081081081, "grad_norm": 1.083097847885302, "learning_rate": 8.612933896065813e-06, "loss": 0.3992, "step": 10188 }, { "epoch": 0.956174924924925, "grad_norm": 1.0906936066323303, "learning_rate": 8.612556459618792e-06, "loss": 0.4754, "step": 10189 }, { "epoch": 0.9562687687687688, "grad_norm": 1.2110580926937886, "learning_rate": 8.612178980098833e-06, "loss": 0.463, "step": 10190 }, { "epoch": 0.9563626126126126, "grad_norm": 1.2778821729850058, "learning_rate": 8.611801457510437e-06, "loss": 0.4818, "step": 10191 }, { "epoch": 0.9564564564564565, "grad_norm": 0.9270127948288525, "learning_rate": 8.61142389185811e-06, "loss": 0.4819, "step": 10192 }, { "epoch": 0.9565503003003003, "grad_norm": 0.9862641132157755, "learning_rate": 8.611046283146352e-06, "loss": 0.4254, "step": 10193 }, { "epoch": 0.9566441441441441, "grad_norm": 1.484565500672468, "learning_rate": 8.610668631379661e-06, "loss": 0.4904, "step": 10194 }, { "epoch": 0.956737987987988, "grad_norm": 0.9295349564913561, "learning_rate": 8.610290936562545e-06, "loss": 0.4143, "step": 10195 }, { "epoch": 0.9568318318318318, "grad_norm": 0.7869625641630066, "learning_rate": 8.609913198699504e-06, "loss": 0.4215, "step": 10196 }, { "epoch": 0.9569256756756757, "grad_norm": 0.9797855055512523, "learning_rate": 8.609535417795044e-06, "loss": 0.4506, "step": 10197 }, { "epoch": 0.9570195195195195, "grad_norm": 1.0860208944702934, "learning_rate": 8.609157593853667e-06, "loss": 0.4596, "step": 10198 }, { "epoch": 0.9571133633633634, "grad_norm": 0.9832240009837611, "learning_rate": 8.608779726879881e-06, "loss": 0.462, "step": 10199 }, { "epoch": 0.9572072072072072, "grad_norm": 1.524207757873346, "learning_rate": 8.60840181687819e-06, "loss": 0.4394, "step": 10200 }, { "epoch": 0.957301051051051, "grad_norm": 0.8880230233401829, "learning_rate": 8.608023863853096e-06, "loss": 0.4322, "step": 10201 }, { "epoch": 0.9573948948948949, "grad_norm": 1.0525746804702922, "learning_rate": 8.607645867809112e-06, "loss": 0.4519, "step": 10202 }, { "epoch": 0.9574887387387387, "grad_norm": 0.8746028593384967, "learning_rate": 8.60726782875074e-06, "loss": 0.4581, "step": 10203 }, { "epoch": 0.9575825825825826, "grad_norm": 1.0461552503187914, "learning_rate": 8.606889746682491e-06, "loss": 0.4946, "step": 10204 }, { "epoch": 0.9576764264264265, "grad_norm": 1.6545560684081748, "learning_rate": 8.60651162160887e-06, "loss": 0.4454, "step": 10205 }, { "epoch": 0.9577702702702703, "grad_norm": 0.9002353191391049, "learning_rate": 8.606133453534387e-06, "loss": 0.4589, "step": 10206 }, { "epoch": 0.9578641141141141, "grad_norm": 1.264022996313298, "learning_rate": 8.605755242463549e-06, "loss": 0.4179, "step": 10207 }, { "epoch": 0.9579579579579579, "grad_norm": 1.0367196412409634, "learning_rate": 8.605376988400867e-06, "loss": 0.473, "step": 10208 }, { "epoch": 0.9580518018018018, "grad_norm": 1.140149818699977, "learning_rate": 8.604998691350852e-06, "loss": 0.4848, "step": 10209 }, { "epoch": 0.9581456456456456, "grad_norm": 1.027303789706225, "learning_rate": 8.604620351318013e-06, "loss": 0.4833, "step": 10210 }, { "epoch": 0.9582394894894894, "grad_norm": 3.6637684053573305, "learning_rate": 8.604241968306861e-06, "loss": 0.4319, "step": 10211 }, { "epoch": 0.9583333333333334, "grad_norm": 0.9006234285219167, "learning_rate": 8.603863542321909e-06, "loss": 0.453, "step": 10212 }, { "epoch": 0.9584271771771772, "grad_norm": 2.1493884173955045, "learning_rate": 8.603485073367665e-06, "loss": 0.4674, "step": 10213 }, { "epoch": 0.958521021021021, "grad_norm": 1.3254026038329962, "learning_rate": 8.603106561448647e-06, "loss": 0.4235, "step": 10214 }, { "epoch": 0.9586148648648649, "grad_norm": 1.1258554922917035, "learning_rate": 8.602728006569365e-06, "loss": 0.502, "step": 10215 }, { "epoch": 0.9587087087087087, "grad_norm": 1.0104017558250575, "learning_rate": 8.602349408734332e-06, "loss": 0.4388, "step": 10216 }, { "epoch": 0.9588025525525525, "grad_norm": 1.0268929541169671, "learning_rate": 8.601970767948064e-06, "loss": 0.5005, "step": 10217 }, { "epoch": 0.9588963963963963, "grad_norm": 0.8398812999363451, "learning_rate": 8.601592084215074e-06, "loss": 0.461, "step": 10218 }, { "epoch": 0.9589902402402403, "grad_norm": 0.8801884470004078, "learning_rate": 8.601213357539878e-06, "loss": 0.4523, "step": 10219 }, { "epoch": 0.9590840840840841, "grad_norm": 1.201375197925103, "learning_rate": 8.600834587926992e-06, "loss": 0.422, "step": 10220 }, { "epoch": 0.9591779279279279, "grad_norm": 0.9402497323547047, "learning_rate": 8.60045577538093e-06, "loss": 0.4717, "step": 10221 }, { "epoch": 0.9592717717717718, "grad_norm": 1.1027132226891099, "learning_rate": 8.60007691990621e-06, "loss": 0.4868, "step": 10222 }, { "epoch": 0.9593656156156156, "grad_norm": 0.9414350283292799, "learning_rate": 8.59969802150735e-06, "loss": 0.4672, "step": 10223 }, { "epoch": 0.9594594594594594, "grad_norm": 1.3972538555671994, "learning_rate": 8.599319080188868e-06, "loss": 0.4398, "step": 10224 }, { "epoch": 0.9595533033033034, "grad_norm": 1.620178607564165, "learning_rate": 8.59894009595528e-06, "loss": 0.4093, "step": 10225 }, { "epoch": 0.9596471471471472, "grad_norm": 1.050918766573084, "learning_rate": 8.598561068811104e-06, "loss": 0.4678, "step": 10226 }, { "epoch": 0.959740990990991, "grad_norm": 1.0030751253081887, "learning_rate": 8.598181998760861e-06, "loss": 0.46, "step": 10227 }, { "epoch": 0.9598348348348348, "grad_norm": 1.0124848312500199, "learning_rate": 8.597802885809072e-06, "loss": 0.471, "step": 10228 }, { "epoch": 0.9599286786786787, "grad_norm": 1.83873293144537, "learning_rate": 8.597423729960258e-06, "loss": 0.457, "step": 10229 }, { "epoch": 0.9600225225225225, "grad_norm": 1.0750240799815287, "learning_rate": 8.597044531218933e-06, "loss": 0.4874, "step": 10230 }, { "epoch": 0.9601163663663663, "grad_norm": 1.0226379381616237, "learning_rate": 8.596665289589626e-06, "loss": 0.4482, "step": 10231 }, { "epoch": 0.9602102102102102, "grad_norm": 1.713379701645626, "learning_rate": 8.596286005076852e-06, "loss": 0.4467, "step": 10232 }, { "epoch": 0.9603040540540541, "grad_norm": 0.9868256982794205, "learning_rate": 8.59590667768514e-06, "loss": 0.4857, "step": 10233 }, { "epoch": 0.9603978978978979, "grad_norm": 0.978936371831327, "learning_rate": 8.595527307419007e-06, "loss": 0.4844, "step": 10234 }, { "epoch": 0.9604917417417418, "grad_norm": 1.0581913488660402, "learning_rate": 8.59514789428298e-06, "loss": 0.4524, "step": 10235 }, { "epoch": 0.9605855855855856, "grad_norm": 1.00909216701234, "learning_rate": 8.594768438281583e-06, "loss": 0.4433, "step": 10236 }, { "epoch": 0.9606794294294294, "grad_norm": 0.9303815348008002, "learning_rate": 8.594388939419336e-06, "loss": 0.4283, "step": 10237 }, { "epoch": 0.9607732732732732, "grad_norm": 1.0068056767978522, "learning_rate": 8.594009397700768e-06, "loss": 0.4784, "step": 10238 }, { "epoch": 0.9608671171171171, "grad_norm": 1.0952773564969884, "learning_rate": 8.593629813130404e-06, "loss": 0.5089, "step": 10239 }, { "epoch": 0.960960960960961, "grad_norm": 1.2288214911669624, "learning_rate": 8.593250185712768e-06, "loss": 0.4611, "step": 10240 }, { "epoch": 0.9610548048048048, "grad_norm": 0.9999429727480063, "learning_rate": 8.592870515452386e-06, "loss": 0.4422, "step": 10241 }, { "epoch": 0.9611486486486487, "grad_norm": 0.9530579540085663, "learning_rate": 8.592490802353788e-06, "loss": 0.4419, "step": 10242 }, { "epoch": 0.9612424924924925, "grad_norm": 0.9035027186548734, "learning_rate": 8.592111046421497e-06, "loss": 0.4398, "step": 10243 }, { "epoch": 0.9613363363363363, "grad_norm": 2.0657831395282265, "learning_rate": 8.591731247660044e-06, "loss": 0.5523, "step": 10244 }, { "epoch": 0.9614301801801802, "grad_norm": 0.9634367737591074, "learning_rate": 8.591351406073958e-06, "loss": 0.4712, "step": 10245 }, { "epoch": 0.961524024024024, "grad_norm": 2.5135661342407984, "learning_rate": 8.590971521667765e-06, "loss": 0.5197, "step": 10246 }, { "epoch": 0.9616178678678678, "grad_norm": 1.2720856202148632, "learning_rate": 8.590591594445997e-06, "loss": 0.5373, "step": 10247 }, { "epoch": 0.9617117117117117, "grad_norm": 1.003838670193948, "learning_rate": 8.590211624413183e-06, "loss": 0.4643, "step": 10248 }, { "epoch": 0.9618055555555556, "grad_norm": 0.9809337401582078, "learning_rate": 8.589831611573851e-06, "loss": 0.4158, "step": 10249 }, { "epoch": 0.9618993993993994, "grad_norm": 0.8336963889910967, "learning_rate": 8.589451555932537e-06, "loss": 0.4416, "step": 10250 }, { "epoch": 0.9619932432432432, "grad_norm": 1.0872948407485856, "learning_rate": 8.589071457493767e-06, "loss": 0.4602, "step": 10251 }, { "epoch": 0.9620870870870871, "grad_norm": 0.900236633497819, "learning_rate": 8.588691316262077e-06, "loss": 0.4441, "step": 10252 }, { "epoch": 0.9621809309309309, "grad_norm": 0.9917423845515997, "learning_rate": 8.588311132242e-06, "loss": 0.4768, "step": 10253 }, { "epoch": 0.9622747747747747, "grad_norm": 0.9176664411181161, "learning_rate": 8.587930905438064e-06, "loss": 0.479, "step": 10254 }, { "epoch": 0.9623686186186187, "grad_norm": 0.9880916913742027, "learning_rate": 8.587550635854807e-06, "loss": 0.4665, "step": 10255 }, { "epoch": 0.9624624624624625, "grad_norm": 0.8938709683774718, "learning_rate": 8.58717032349676e-06, "loss": 0.4968, "step": 10256 }, { "epoch": 0.9625563063063063, "grad_norm": 1.009824241969203, "learning_rate": 8.58678996836846e-06, "loss": 0.4622, "step": 10257 }, { "epoch": 0.9626501501501501, "grad_norm": 1.0664648095270932, "learning_rate": 8.586409570474443e-06, "loss": 0.459, "step": 10258 }, { "epoch": 0.962743993993994, "grad_norm": 0.9274860564435782, "learning_rate": 8.58602912981924e-06, "loss": 0.4872, "step": 10259 }, { "epoch": 0.9628378378378378, "grad_norm": 1.0647289841519312, "learning_rate": 8.58564864640739e-06, "loss": 0.5391, "step": 10260 }, { "epoch": 0.9629316816816816, "grad_norm": 1.0508118065147591, "learning_rate": 8.585268120243432e-06, "loss": 0.4147, "step": 10261 }, { "epoch": 0.9630255255255256, "grad_norm": 0.9963447250090885, "learning_rate": 8.584887551331896e-06, "loss": 0.5, "step": 10262 }, { "epoch": 0.9631193693693694, "grad_norm": 1.9073374960912775, "learning_rate": 8.584506939677327e-06, "loss": 0.4788, "step": 10263 }, { "epoch": 0.9632132132132132, "grad_norm": 1.1554899693706409, "learning_rate": 8.584126285284258e-06, "loss": 0.4066, "step": 10264 }, { "epoch": 0.9633070570570571, "grad_norm": 0.7970568434502258, "learning_rate": 8.58374558815723e-06, "loss": 0.4638, "step": 10265 }, { "epoch": 0.9634009009009009, "grad_norm": 1.0575554328057748, "learning_rate": 8.58336484830078e-06, "loss": 0.465, "step": 10266 }, { "epoch": 0.9634947447447447, "grad_norm": 0.9947394601819124, "learning_rate": 8.582984065719454e-06, "loss": 0.4541, "step": 10267 }, { "epoch": 0.9635885885885885, "grad_norm": 1.4474155010092264, "learning_rate": 8.582603240417783e-06, "loss": 0.5103, "step": 10268 }, { "epoch": 0.9636824324324325, "grad_norm": 0.8922897369338353, "learning_rate": 8.582222372400313e-06, "loss": 0.4378, "step": 10269 }, { "epoch": 0.9637762762762763, "grad_norm": 0.9731069319511526, "learning_rate": 8.581841461671585e-06, "loss": 0.4707, "step": 10270 }, { "epoch": 0.9638701201201201, "grad_norm": 0.8510467126111152, "learning_rate": 8.581460508236136e-06, "loss": 0.4467, "step": 10271 }, { "epoch": 0.963963963963964, "grad_norm": 0.9471929760529922, "learning_rate": 8.581079512098516e-06, "loss": 0.4776, "step": 10272 }, { "epoch": 0.9640578078078078, "grad_norm": 0.9423771248327152, "learning_rate": 8.580698473263263e-06, "loss": 0.4744, "step": 10273 }, { "epoch": 0.9641516516516516, "grad_norm": 0.9830631527738813, "learning_rate": 8.58031739173492e-06, "loss": 0.4813, "step": 10274 }, { "epoch": 0.9642454954954955, "grad_norm": 0.9068119062812023, "learning_rate": 8.57993626751803e-06, "loss": 0.4131, "step": 10275 }, { "epoch": 0.9643393393393394, "grad_norm": 1.199181802900695, "learning_rate": 8.57955510061714e-06, "loss": 0.4962, "step": 10276 }, { "epoch": 0.9644331831831832, "grad_norm": 0.8833136696397795, "learning_rate": 8.579173891036794e-06, "loss": 0.4239, "step": 10277 }, { "epoch": 0.964527027027027, "grad_norm": 1.5046233493300998, "learning_rate": 8.578792638781535e-06, "loss": 0.4617, "step": 10278 }, { "epoch": 0.9646208708708709, "grad_norm": 0.9832619218050859, "learning_rate": 8.57841134385591e-06, "loss": 0.4861, "step": 10279 }, { "epoch": 0.9647147147147147, "grad_norm": 1.090298147106633, "learning_rate": 8.578030006264466e-06, "loss": 0.4859, "step": 10280 }, { "epoch": 0.9648085585585585, "grad_norm": 1.6183403613705953, "learning_rate": 8.577648626011748e-06, "loss": 0.4993, "step": 10281 }, { "epoch": 0.9649024024024024, "grad_norm": 0.9370737774563872, "learning_rate": 8.577267203102307e-06, "loss": 0.449, "step": 10282 }, { "epoch": 0.9649962462462462, "grad_norm": 0.9705766569556421, "learning_rate": 8.576885737540686e-06, "loss": 0.4546, "step": 10283 }, { "epoch": 0.9650900900900901, "grad_norm": 0.8973058404718384, "learning_rate": 8.576504229331435e-06, "loss": 0.4746, "step": 10284 }, { "epoch": 0.965183933933934, "grad_norm": 1.195432114970191, "learning_rate": 8.576122678479106e-06, "loss": 0.4573, "step": 10285 }, { "epoch": 0.9652777777777778, "grad_norm": 1.029904805456165, "learning_rate": 8.575741084988242e-06, "loss": 0.4303, "step": 10286 }, { "epoch": 0.9653716216216216, "grad_norm": 6.8715665685600555, "learning_rate": 8.575359448863398e-06, "loss": 0.4725, "step": 10287 }, { "epoch": 0.9654654654654654, "grad_norm": 1.2392866931426096, "learning_rate": 8.574977770109124e-06, "loss": 0.4728, "step": 10288 }, { "epoch": 0.9655593093093093, "grad_norm": 0.9848192795747956, "learning_rate": 8.574596048729966e-06, "loss": 0.4394, "step": 10289 }, { "epoch": 0.9656531531531531, "grad_norm": 1.4153090441860996, "learning_rate": 8.57421428473048e-06, "loss": 0.4824, "step": 10290 }, { "epoch": 0.965746996996997, "grad_norm": 1.9153795528015212, "learning_rate": 8.573832478115218e-06, "loss": 0.4637, "step": 10291 }, { "epoch": 0.9658408408408409, "grad_norm": 0.9014074617392855, "learning_rate": 8.57345062888873e-06, "loss": 0.4211, "step": 10292 }, { "epoch": 0.9659346846846847, "grad_norm": 0.7793698235323115, "learning_rate": 8.573068737055567e-06, "loss": 0.4723, "step": 10293 }, { "epoch": 0.9660285285285285, "grad_norm": 1.024323912749849, "learning_rate": 8.572686802620288e-06, "loss": 0.4229, "step": 10294 }, { "epoch": 0.9661223723723724, "grad_norm": 1.0207567647115143, "learning_rate": 8.57230482558744e-06, "loss": 0.4644, "step": 10295 }, { "epoch": 0.9662162162162162, "grad_norm": 3.3439628956170915, "learning_rate": 8.571922805961585e-06, "loss": 0.427, "step": 10296 }, { "epoch": 0.96631006006006, "grad_norm": 1.42444527229416, "learning_rate": 8.571540743747273e-06, "loss": 0.5203, "step": 10297 }, { "epoch": 0.9664039039039038, "grad_norm": 0.9903778370789337, "learning_rate": 8.57115863894906e-06, "loss": 0.4984, "step": 10298 }, { "epoch": 0.9664977477477478, "grad_norm": 1.087627525204227, "learning_rate": 8.570776491571502e-06, "loss": 0.4675, "step": 10299 }, { "epoch": 0.9665915915915916, "grad_norm": 1.0412462285005137, "learning_rate": 8.570394301619156e-06, "loss": 0.4947, "step": 10300 }, { "epoch": 0.9666854354354354, "grad_norm": 1.1079558595035817, "learning_rate": 8.570012069096578e-06, "loss": 0.5192, "step": 10301 }, { "epoch": 0.9667792792792793, "grad_norm": 0.9162207288923118, "learning_rate": 8.569629794008327e-06, "loss": 0.4714, "step": 10302 }, { "epoch": 0.9668731231231231, "grad_norm": 1.2234533934106615, "learning_rate": 8.569247476358959e-06, "loss": 0.4674, "step": 10303 }, { "epoch": 0.9669669669669669, "grad_norm": 1.1114651799460118, "learning_rate": 8.568865116153033e-06, "loss": 0.4774, "step": 10304 }, { "epoch": 0.9670608108108109, "grad_norm": 0.907680746306339, "learning_rate": 8.56848271339511e-06, "loss": 0.4738, "step": 10305 }, { "epoch": 0.9671546546546547, "grad_norm": 0.8829382056717651, "learning_rate": 8.568100268089745e-06, "loss": 0.3914, "step": 10306 }, { "epoch": 0.9672484984984985, "grad_norm": 7.344242876567609, "learning_rate": 8.567717780241502e-06, "loss": 0.4661, "step": 10307 }, { "epoch": 0.9673423423423423, "grad_norm": 1.3700818836115471, "learning_rate": 8.56733524985494e-06, "loss": 0.4525, "step": 10308 }, { "epoch": 0.9674361861861862, "grad_norm": 0.9912992317274668, "learning_rate": 8.56695267693462e-06, "loss": 0.4589, "step": 10309 }, { "epoch": 0.96753003003003, "grad_norm": 0.9443843149251016, "learning_rate": 8.566570061485105e-06, "loss": 0.4877, "step": 10310 }, { "epoch": 0.9676238738738738, "grad_norm": 1.1454393832357577, "learning_rate": 8.566187403510952e-06, "loss": 0.4913, "step": 10311 }, { "epoch": 0.9677177177177178, "grad_norm": 0.8953047268553574, "learning_rate": 8.565804703016729e-06, "loss": 0.4521, "step": 10312 }, { "epoch": 0.9678115615615616, "grad_norm": 1.0217059968088105, "learning_rate": 8.565421960006997e-06, "loss": 0.5162, "step": 10313 }, { "epoch": 0.9679054054054054, "grad_norm": 0.9580721560085271, "learning_rate": 8.565039174486317e-06, "loss": 0.4627, "step": 10314 }, { "epoch": 0.9679992492492493, "grad_norm": 0.888511043436252, "learning_rate": 8.564656346459256e-06, "loss": 0.4537, "step": 10315 }, { "epoch": 0.9680930930930931, "grad_norm": 1.0200197681043277, "learning_rate": 8.564273475930381e-06, "loss": 0.4762, "step": 10316 }, { "epoch": 0.9681869369369369, "grad_norm": 1.0299380386302013, "learning_rate": 8.563890562904251e-06, "loss": 0.4894, "step": 10317 }, { "epoch": 0.9682807807807807, "grad_norm": 0.9860046994900948, "learning_rate": 8.563507607385435e-06, "loss": 0.4384, "step": 10318 }, { "epoch": 0.9683746246246246, "grad_norm": 1.0002594690677218, "learning_rate": 8.5631246093785e-06, "loss": 0.4789, "step": 10319 }, { "epoch": 0.9684684684684685, "grad_norm": 1.2405201407452342, "learning_rate": 8.56274156888801e-06, "loss": 0.4577, "step": 10320 }, { "epoch": 0.9685623123123123, "grad_norm": 0.9888535782886027, "learning_rate": 8.562358485918532e-06, "loss": 0.5326, "step": 10321 }, { "epoch": 0.9686561561561562, "grad_norm": 1.9813339680837958, "learning_rate": 8.561975360474634e-06, "loss": 0.515, "step": 10322 }, { "epoch": 0.96875, "grad_norm": 0.8638143336130584, "learning_rate": 8.561592192560887e-06, "loss": 0.4265, "step": 10323 }, { "epoch": 0.9688438438438438, "grad_norm": 0.8840355075367853, "learning_rate": 8.561208982181854e-06, "loss": 0.4558, "step": 10324 }, { "epoch": 0.9689376876876877, "grad_norm": 0.8637411220167431, "learning_rate": 8.560825729342108e-06, "loss": 0.4639, "step": 10325 }, { "epoch": 0.9690315315315315, "grad_norm": 3.2297056929090506, "learning_rate": 8.560442434046218e-06, "loss": 0.463, "step": 10326 }, { "epoch": 0.9691253753753754, "grad_norm": 1.0532461195209486, "learning_rate": 8.560059096298756e-06, "loss": 0.4873, "step": 10327 }, { "epoch": 0.9692192192192193, "grad_norm": 0.9179721800642677, "learning_rate": 8.559675716104288e-06, "loss": 0.4019, "step": 10328 }, { "epoch": 0.9693130630630631, "grad_norm": 0.9342750157878045, "learning_rate": 8.559292293467388e-06, "loss": 0.5101, "step": 10329 }, { "epoch": 0.9694069069069069, "grad_norm": 0.9807699930635809, "learning_rate": 8.558908828392627e-06, "loss": 0.4112, "step": 10330 }, { "epoch": 0.9695007507507507, "grad_norm": 0.9606281598000105, "learning_rate": 8.55852532088458e-06, "loss": 0.4507, "step": 10331 }, { "epoch": 0.9695945945945946, "grad_norm": 1.3793130610726911, "learning_rate": 8.558141770947812e-06, "loss": 0.4401, "step": 10332 }, { "epoch": 0.9696884384384384, "grad_norm": 0.9537433583071651, "learning_rate": 8.557758178586903e-06, "loss": 0.4227, "step": 10333 }, { "epoch": 0.9697822822822822, "grad_norm": 1.085214903106108, "learning_rate": 8.557374543806425e-06, "loss": 0.4963, "step": 10334 }, { "epoch": 0.9698761261261262, "grad_norm": 1.0104735948394716, "learning_rate": 8.55699086661095e-06, "loss": 0.4954, "step": 10335 }, { "epoch": 0.96996996996997, "grad_norm": 0.9827383859045469, "learning_rate": 8.556607147005058e-06, "loss": 0.463, "step": 10336 }, { "epoch": 0.9700638138138138, "grad_norm": 0.995632463091865, "learning_rate": 8.556223384993316e-06, "loss": 0.4287, "step": 10337 }, { "epoch": 0.9701576576576577, "grad_norm": 1.0266218236628284, "learning_rate": 8.555839580580308e-06, "loss": 0.4387, "step": 10338 }, { "epoch": 0.9702515015015015, "grad_norm": 0.9551934416946587, "learning_rate": 8.555455733770603e-06, "loss": 0.4214, "step": 10339 }, { "epoch": 0.9703453453453453, "grad_norm": 1.1671596416535803, "learning_rate": 8.555071844568782e-06, "loss": 0.4237, "step": 10340 }, { "epoch": 0.9704391891891891, "grad_norm": 0.8514257342055344, "learning_rate": 8.554687912979421e-06, "loss": 0.4474, "step": 10341 }, { "epoch": 0.9705330330330331, "grad_norm": 0.8816093036691542, "learning_rate": 8.554303939007099e-06, "loss": 0.4407, "step": 10342 }, { "epoch": 0.9706268768768769, "grad_norm": 0.9984134107192407, "learning_rate": 8.55391992265639e-06, "loss": 0.4281, "step": 10343 }, { "epoch": 0.9707207207207207, "grad_norm": 1.0446361272903413, "learning_rate": 8.553535863931877e-06, "loss": 0.479, "step": 10344 }, { "epoch": 0.9708145645645646, "grad_norm": 0.9291848727352221, "learning_rate": 8.553151762838137e-06, "loss": 0.4241, "step": 10345 }, { "epoch": 0.9709084084084084, "grad_norm": 1.052115060229734, "learning_rate": 8.552767619379751e-06, "loss": 0.4697, "step": 10346 }, { "epoch": 0.9710022522522522, "grad_norm": 0.9912570387598636, "learning_rate": 8.5523834335613e-06, "loss": 0.4158, "step": 10347 }, { "epoch": 0.9710960960960962, "grad_norm": 1.0290028458131282, "learning_rate": 8.551999205387361e-06, "loss": 0.4607, "step": 10348 }, { "epoch": 0.97118993993994, "grad_norm": 1.9429657907412, "learning_rate": 8.551614934862518e-06, "loss": 0.4407, "step": 10349 }, { "epoch": 0.9712837837837838, "grad_norm": 1.2139146153587714, "learning_rate": 8.551230621991353e-06, "loss": 0.4389, "step": 10350 }, { "epoch": 0.9713776276276276, "grad_norm": 0.9708304651155585, "learning_rate": 8.550846266778445e-06, "loss": 0.4507, "step": 10351 }, { "epoch": 0.9714714714714715, "grad_norm": 1.031328908446511, "learning_rate": 8.55046186922838e-06, "loss": 0.447, "step": 10352 }, { "epoch": 0.9715653153153153, "grad_norm": 1.135955347858977, "learning_rate": 8.550077429345743e-06, "loss": 0.4426, "step": 10353 }, { "epoch": 0.9716591591591591, "grad_norm": 1.0668883114533374, "learning_rate": 8.549692947135113e-06, "loss": 0.4505, "step": 10354 }, { "epoch": 0.971753003003003, "grad_norm": 1.0193183145660318, "learning_rate": 8.549308422601076e-06, "loss": 0.4824, "step": 10355 }, { "epoch": 0.9718468468468469, "grad_norm": 1.6211585744928207, "learning_rate": 8.548923855748217e-06, "loss": 0.4821, "step": 10356 }, { "epoch": 0.9719406906906907, "grad_norm": 0.9441475848977998, "learning_rate": 8.548539246581121e-06, "loss": 0.4968, "step": 10357 }, { "epoch": 0.9720345345345346, "grad_norm": 1.0148943808279955, "learning_rate": 8.548154595104374e-06, "loss": 0.4376, "step": 10358 }, { "epoch": 0.9721283783783784, "grad_norm": 1.0003520036340718, "learning_rate": 8.547769901322562e-06, "loss": 0.4342, "step": 10359 }, { "epoch": 0.9722222222222222, "grad_norm": 1.0033221524100107, "learning_rate": 8.547385165240273e-06, "loss": 0.4624, "step": 10360 }, { "epoch": 0.972316066066066, "grad_norm": 1.3619479519737143, "learning_rate": 8.547000386862091e-06, "loss": 0.505, "step": 10361 }, { "epoch": 0.9724099099099099, "grad_norm": 0.8478498570528398, "learning_rate": 8.546615566192609e-06, "loss": 0.441, "step": 10362 }, { "epoch": 0.9725037537537538, "grad_norm": 1.3064471478272408, "learning_rate": 8.54623070323641e-06, "loss": 0.4325, "step": 10363 }, { "epoch": 0.9725975975975976, "grad_norm": 1.101532921406659, "learning_rate": 8.545845797998086e-06, "loss": 0.5078, "step": 10364 }, { "epoch": 0.9726914414414415, "grad_norm": 1.253635288460638, "learning_rate": 8.545460850482225e-06, "loss": 0.4668, "step": 10365 }, { "epoch": 0.9727852852852853, "grad_norm": 0.9111682455529982, "learning_rate": 8.545075860693417e-06, "loss": 0.4694, "step": 10366 }, { "epoch": 0.9728791291291291, "grad_norm": 1.1106912140782845, "learning_rate": 8.544690828636251e-06, "loss": 0.4623, "step": 10367 }, { "epoch": 0.972972972972973, "grad_norm": 1.6491879385343986, "learning_rate": 8.54430575431532e-06, "loss": 0.4648, "step": 10368 }, { "epoch": 0.9730668168168168, "grad_norm": 0.9790766239826267, "learning_rate": 8.543920637735215e-06, "loss": 0.4806, "step": 10369 }, { "epoch": 0.9731606606606606, "grad_norm": 0.9446576968044816, "learning_rate": 8.543535478900527e-06, "loss": 0.4397, "step": 10370 }, { "epoch": 0.9732545045045045, "grad_norm": 0.9950864239143835, "learning_rate": 8.543150277815847e-06, "loss": 0.4539, "step": 10371 }, { "epoch": 0.9733483483483484, "grad_norm": 2.364638100076736, "learning_rate": 8.542765034485772e-06, "loss": 0.4862, "step": 10372 }, { "epoch": 0.9734421921921922, "grad_norm": 0.9202289570581758, "learning_rate": 8.542379748914889e-06, "loss": 0.4699, "step": 10373 }, { "epoch": 0.973536036036036, "grad_norm": 1.180495916700047, "learning_rate": 8.541994421107797e-06, "loss": 0.472, "step": 10374 }, { "epoch": 0.9736298798798799, "grad_norm": 1.2371344427848727, "learning_rate": 8.54160905106909e-06, "loss": 0.4232, "step": 10375 }, { "epoch": 0.9737237237237237, "grad_norm": 1.057912188736135, "learning_rate": 8.54122363880336e-06, "loss": 0.4777, "step": 10376 }, { "epoch": 0.9738175675675675, "grad_norm": 0.8693820891878375, "learning_rate": 8.540838184315203e-06, "loss": 0.4832, "step": 10377 }, { "epoch": 0.9739114114114115, "grad_norm": 0.9711031377064573, "learning_rate": 8.540452687609218e-06, "loss": 0.4486, "step": 10378 }, { "epoch": 0.9740052552552553, "grad_norm": 0.9376645432093457, "learning_rate": 8.540067148689996e-06, "loss": 0.4857, "step": 10379 }, { "epoch": 0.9740990990990991, "grad_norm": 0.7903778900853554, "learning_rate": 8.53968156756214e-06, "loss": 0.435, "step": 10380 }, { "epoch": 0.9741929429429429, "grad_norm": 1.2887072807856481, "learning_rate": 8.539295944230242e-06, "loss": 0.4526, "step": 10381 }, { "epoch": 0.9742867867867868, "grad_norm": 0.8001316140890361, "learning_rate": 8.538910278698901e-06, "loss": 0.4469, "step": 10382 }, { "epoch": 0.9743806306306306, "grad_norm": 1.0711794821388383, "learning_rate": 8.538524570972718e-06, "loss": 0.4818, "step": 10383 }, { "epoch": 0.9744744744744744, "grad_norm": 1.102275168372526, "learning_rate": 8.53813882105629e-06, "loss": 0.4884, "step": 10384 }, { "epoch": 0.9745683183183184, "grad_norm": 1.0010394263234033, "learning_rate": 8.537753028954215e-06, "loss": 0.4786, "step": 10385 }, { "epoch": 0.9746621621621622, "grad_norm": 2.1536208821767717, "learning_rate": 8.537367194671096e-06, "loss": 0.4203, "step": 10386 }, { "epoch": 0.974756006006006, "grad_norm": 1.1010537548776924, "learning_rate": 8.53698131821153e-06, "loss": 0.4362, "step": 10387 }, { "epoch": 0.9748498498498499, "grad_norm": 1.0351673913778945, "learning_rate": 8.53659539958012e-06, "loss": 0.4654, "step": 10388 }, { "epoch": 0.9749436936936937, "grad_norm": 0.9627645553916393, "learning_rate": 8.536209438781469e-06, "loss": 0.4473, "step": 10389 }, { "epoch": 0.9750375375375375, "grad_norm": 0.858989945246465, "learning_rate": 8.535823435820174e-06, "loss": 0.4886, "step": 10390 }, { "epoch": 0.9751313813813813, "grad_norm": 0.9177883466041704, "learning_rate": 8.535437390700841e-06, "loss": 0.4299, "step": 10391 }, { "epoch": 0.9752252252252253, "grad_norm": 1.1740121674039257, "learning_rate": 8.535051303428071e-06, "loss": 0.5172, "step": 10392 }, { "epoch": 0.9753190690690691, "grad_norm": 1.01573879667758, "learning_rate": 8.534665174006469e-06, "loss": 0.4793, "step": 10393 }, { "epoch": 0.9754129129129129, "grad_norm": 1.1818374163027552, "learning_rate": 8.534279002440639e-06, "loss": 0.4628, "step": 10394 }, { "epoch": 0.9755067567567568, "grad_norm": 0.8968088783993902, "learning_rate": 8.533892788735182e-06, "loss": 0.5064, "step": 10395 }, { "epoch": 0.9756006006006006, "grad_norm": 1.0649466690973497, "learning_rate": 8.533506532894707e-06, "loss": 0.432, "step": 10396 }, { "epoch": 0.9756944444444444, "grad_norm": 1.1486673410264754, "learning_rate": 8.53312023492382e-06, "loss": 0.4898, "step": 10397 }, { "epoch": 0.9757882882882883, "grad_norm": 1.3934035181047795, "learning_rate": 8.532733894827121e-06, "loss": 0.4359, "step": 10398 }, { "epoch": 0.9758821321321322, "grad_norm": 1.129523699855025, "learning_rate": 8.53234751260922e-06, "loss": 0.4628, "step": 10399 }, { "epoch": 0.975975975975976, "grad_norm": 0.8557915817931169, "learning_rate": 8.531961088274727e-06, "loss": 0.4271, "step": 10400 }, { "epoch": 0.9760698198198198, "grad_norm": 1.0200748393288366, "learning_rate": 8.531574621828244e-06, "loss": 0.4628, "step": 10401 }, { "epoch": 0.9761636636636637, "grad_norm": 1.0610967363175996, "learning_rate": 8.531188113274381e-06, "loss": 0.4788, "step": 10402 }, { "epoch": 0.9762575075075075, "grad_norm": 0.9410902835501682, "learning_rate": 8.530801562617748e-06, "loss": 0.4338, "step": 10403 }, { "epoch": 0.9763513513513513, "grad_norm": 1.0640577977113292, "learning_rate": 8.53041496986295e-06, "loss": 0.4911, "step": 10404 }, { "epoch": 0.9764451951951952, "grad_norm": 1.4655892195162827, "learning_rate": 8.530028335014602e-06, "loss": 0.4855, "step": 10405 }, { "epoch": 0.976539039039039, "grad_norm": 0.9412131369924195, "learning_rate": 8.52964165807731e-06, "loss": 0.4793, "step": 10406 }, { "epoch": 0.9766328828828829, "grad_norm": 1.2369698214294957, "learning_rate": 8.529254939055683e-06, "loss": 0.4892, "step": 10407 }, { "epoch": 0.9767267267267268, "grad_norm": 0.9008510324905971, "learning_rate": 8.528868177954334e-06, "loss": 0.4433, "step": 10408 }, { "epoch": 0.9768205705705706, "grad_norm": 1.045514946187295, "learning_rate": 8.528481374777877e-06, "loss": 0.4902, "step": 10409 }, { "epoch": 0.9769144144144144, "grad_norm": 0.9458273130451859, "learning_rate": 8.52809452953092e-06, "loss": 0.43, "step": 10410 }, { "epoch": 0.9770082582582582, "grad_norm": 0.9853551078330245, "learning_rate": 8.527707642218075e-06, "loss": 0.4906, "step": 10411 }, { "epoch": 0.9771021021021021, "grad_norm": 1.797404817260816, "learning_rate": 8.527320712843958e-06, "loss": 0.4764, "step": 10412 }, { "epoch": 0.9771959459459459, "grad_norm": 1.2076459428364463, "learning_rate": 8.526933741413182e-06, "loss": 0.4218, "step": 10413 }, { "epoch": 0.9772897897897898, "grad_norm": 1.3399328207838301, "learning_rate": 8.526546727930358e-06, "loss": 0.4548, "step": 10414 }, { "epoch": 0.9773836336336337, "grad_norm": 1.0263005842316189, "learning_rate": 8.526159672400105e-06, "loss": 0.482, "step": 10415 }, { "epoch": 0.9774774774774775, "grad_norm": 1.041271453714415, "learning_rate": 8.525772574827032e-06, "loss": 0.4543, "step": 10416 }, { "epoch": 0.9775713213213213, "grad_norm": 0.9433357461791575, "learning_rate": 8.525385435215759e-06, "loss": 0.4294, "step": 10417 }, { "epoch": 0.9776651651651652, "grad_norm": 0.8014201624925104, "learning_rate": 8.524998253570902e-06, "loss": 0.398, "step": 10418 }, { "epoch": 0.977759009009009, "grad_norm": 0.9953017647553458, "learning_rate": 8.524611029897075e-06, "loss": 0.4785, "step": 10419 }, { "epoch": 0.9778528528528528, "grad_norm": 0.9282045150491797, "learning_rate": 8.524223764198893e-06, "loss": 0.4401, "step": 10420 }, { "epoch": 0.9779466966966966, "grad_norm": 1.025864281082413, "learning_rate": 8.52383645648098e-06, "loss": 0.4425, "step": 10421 }, { "epoch": 0.9780405405405406, "grad_norm": 1.568640190427698, "learning_rate": 8.52344910674795e-06, "loss": 0.5018, "step": 10422 }, { "epoch": 0.9781343843843844, "grad_norm": 0.9748874210771843, "learning_rate": 8.523061715004419e-06, "loss": 0.511, "step": 10423 }, { "epoch": 0.9782282282282282, "grad_norm": 1.1606448094019606, "learning_rate": 8.522674281255011e-06, "loss": 0.4815, "step": 10424 }, { "epoch": 0.9783220720720721, "grad_norm": 1.1025310011956089, "learning_rate": 8.522286805504341e-06, "loss": 0.4844, "step": 10425 }, { "epoch": 0.9784159159159159, "grad_norm": 1.0085697213887859, "learning_rate": 8.521899287757032e-06, "loss": 0.5021, "step": 10426 }, { "epoch": 0.9785097597597597, "grad_norm": 1.0036254180184037, "learning_rate": 8.521511728017705e-06, "loss": 0.491, "step": 10427 }, { "epoch": 0.9786036036036037, "grad_norm": 1.7541237763928774, "learning_rate": 8.521124126290975e-06, "loss": 0.4099, "step": 10428 }, { "epoch": 0.9786974474474475, "grad_norm": 0.9892603058247984, "learning_rate": 8.520736482581472e-06, "loss": 0.5148, "step": 10429 }, { "epoch": 0.9787912912912913, "grad_norm": 0.8996367920490579, "learning_rate": 8.520348796893811e-06, "loss": 0.5109, "step": 10430 }, { "epoch": 0.9788851351351351, "grad_norm": 1.1456503075977427, "learning_rate": 8.519961069232617e-06, "loss": 0.4758, "step": 10431 }, { "epoch": 0.978978978978979, "grad_norm": 1.0105820884261747, "learning_rate": 8.519573299602514e-06, "loss": 0.4664, "step": 10432 }, { "epoch": 0.9790728228228228, "grad_norm": 1.242775435013388, "learning_rate": 8.519185488008124e-06, "loss": 0.4877, "step": 10433 }, { "epoch": 0.9791666666666666, "grad_norm": 1.118111157056693, "learning_rate": 8.51879763445407e-06, "loss": 0.4555, "step": 10434 }, { "epoch": 0.9792605105105106, "grad_norm": 1.0265614949081348, "learning_rate": 8.51840973894498e-06, "loss": 0.4497, "step": 10435 }, { "epoch": 0.9793543543543544, "grad_norm": 1.0185693063061316, "learning_rate": 8.518021801485475e-06, "loss": 0.4645, "step": 10436 }, { "epoch": 0.9794481981981982, "grad_norm": 0.9713528090545046, "learning_rate": 8.517633822080183e-06, "loss": 0.4442, "step": 10437 }, { "epoch": 0.9795420420420421, "grad_norm": 1.0002245994813277, "learning_rate": 8.517245800733727e-06, "loss": 0.4861, "step": 10438 }, { "epoch": 0.9796358858858859, "grad_norm": 1.5602318461083142, "learning_rate": 8.516857737450737e-06, "loss": 0.4838, "step": 10439 }, { "epoch": 0.9797297297297297, "grad_norm": 0.9076239179570609, "learning_rate": 8.516469632235838e-06, "loss": 0.5071, "step": 10440 }, { "epoch": 0.9798235735735735, "grad_norm": 1.2736312436201858, "learning_rate": 8.516081485093657e-06, "loss": 0.4824, "step": 10441 }, { "epoch": 0.9799174174174174, "grad_norm": 1.097528618133798, "learning_rate": 8.515693296028824e-06, "loss": 0.4791, "step": 10442 }, { "epoch": 0.9800112612612613, "grad_norm": 1.7341264040347657, "learning_rate": 8.515305065045964e-06, "loss": 0.4553, "step": 10443 }, { "epoch": 0.9801051051051051, "grad_norm": 1.013646557854098, "learning_rate": 8.51491679214971e-06, "loss": 0.4881, "step": 10444 }, { "epoch": 0.980198948948949, "grad_norm": 0.8852247557464219, "learning_rate": 8.514528477344688e-06, "loss": 0.4446, "step": 10445 }, { "epoch": 0.9802927927927928, "grad_norm": 0.8421534729998497, "learning_rate": 8.51414012063553e-06, "loss": 0.4383, "step": 10446 }, { "epoch": 0.9803866366366366, "grad_norm": 0.966382026956078, "learning_rate": 8.513751722026867e-06, "loss": 0.4586, "step": 10447 }, { "epoch": 0.9804804804804805, "grad_norm": 0.9554243687539191, "learning_rate": 8.513363281523327e-06, "loss": 0.4702, "step": 10448 }, { "epoch": 0.9805743243243243, "grad_norm": 0.9514232139784174, "learning_rate": 8.512974799129543e-06, "loss": 0.4935, "step": 10449 }, { "epoch": 0.9806681681681682, "grad_norm": 0.7953647212995077, "learning_rate": 8.512586274850147e-06, "loss": 0.4294, "step": 10450 }, { "epoch": 0.980762012012012, "grad_norm": 0.8572251355580044, "learning_rate": 8.512197708689773e-06, "loss": 0.4527, "step": 10451 }, { "epoch": 0.9808558558558559, "grad_norm": 0.9431954882407526, "learning_rate": 8.51180910065305e-06, "loss": 0.4582, "step": 10452 }, { "epoch": 0.9809496996996997, "grad_norm": 0.9381273866585552, "learning_rate": 8.511420450744616e-06, "loss": 0.4863, "step": 10453 }, { "epoch": 0.9810435435435435, "grad_norm": 0.8458872261572701, "learning_rate": 8.511031758969102e-06, "loss": 0.4392, "step": 10454 }, { "epoch": 0.9811373873873874, "grad_norm": 1.858917558851347, "learning_rate": 8.510643025331142e-06, "loss": 0.4101, "step": 10455 }, { "epoch": 0.9812312312312312, "grad_norm": 1.0915745657051799, "learning_rate": 8.510254249835373e-06, "loss": 0.3918, "step": 10456 }, { "epoch": 0.981325075075075, "grad_norm": 1.6189310653427673, "learning_rate": 8.509865432486431e-06, "loss": 0.4539, "step": 10457 }, { "epoch": 0.981418918918919, "grad_norm": 0.8403901491830926, "learning_rate": 8.509476573288948e-06, "loss": 0.4164, "step": 10458 }, { "epoch": 0.9815127627627628, "grad_norm": 1.124158144168105, "learning_rate": 8.509087672247562e-06, "loss": 0.4271, "step": 10459 }, { "epoch": 0.9816066066066066, "grad_norm": 1.5382693327567067, "learning_rate": 8.508698729366913e-06, "loss": 0.4885, "step": 10460 }, { "epoch": 0.9817004504504504, "grad_norm": 0.8639582699707031, "learning_rate": 8.508309744651635e-06, "loss": 0.413, "step": 10461 }, { "epoch": 0.9817942942942943, "grad_norm": 0.8681307779613193, "learning_rate": 8.507920718106369e-06, "loss": 0.4137, "step": 10462 }, { "epoch": 0.9818881381381381, "grad_norm": 1.2289759718412132, "learning_rate": 8.507531649735749e-06, "loss": 0.3997, "step": 10463 }, { "epoch": 0.9819819819819819, "grad_norm": 0.9537556594219307, "learning_rate": 8.507142539544417e-06, "loss": 0.5092, "step": 10464 }, { "epoch": 0.9820758258258259, "grad_norm": 1.116850105210359, "learning_rate": 8.506753387537012e-06, "loss": 0.4577, "step": 10465 }, { "epoch": 0.9821696696696697, "grad_norm": 1.142536602632319, "learning_rate": 8.506364193718173e-06, "loss": 0.4136, "step": 10466 }, { "epoch": 0.9822635135135135, "grad_norm": 0.8825228945424829, "learning_rate": 8.505974958092543e-06, "loss": 0.4388, "step": 10467 }, { "epoch": 0.9823573573573574, "grad_norm": 0.9550076241146913, "learning_rate": 8.50558568066476e-06, "loss": 0.4414, "step": 10468 }, { "epoch": 0.9824512012012012, "grad_norm": 1.1079435262423927, "learning_rate": 8.505196361439464e-06, "loss": 0.4853, "step": 10469 }, { "epoch": 0.982545045045045, "grad_norm": 0.949713138381967, "learning_rate": 8.5048070004213e-06, "loss": 0.4627, "step": 10470 }, { "epoch": 0.9826388888888888, "grad_norm": 1.0433695849135964, "learning_rate": 8.504417597614912e-06, "loss": 0.4455, "step": 10471 }, { "epoch": 0.9827327327327328, "grad_norm": 0.8623794887863793, "learning_rate": 8.504028153024939e-06, "loss": 0.4149, "step": 10472 }, { "epoch": 0.9828265765765766, "grad_norm": 0.9563920216097206, "learning_rate": 8.503638666656026e-06, "loss": 0.458, "step": 10473 }, { "epoch": 0.9829204204204204, "grad_norm": 1.355462217897551, "learning_rate": 8.503249138512817e-06, "loss": 0.4733, "step": 10474 }, { "epoch": 0.9830142642642643, "grad_norm": 0.9858756914326273, "learning_rate": 8.502859568599955e-06, "loss": 0.4631, "step": 10475 }, { "epoch": 0.9831081081081081, "grad_norm": 0.7031026070653907, "learning_rate": 8.502469956922088e-06, "loss": 0.3905, "step": 10476 }, { "epoch": 0.9832019519519519, "grad_norm": 0.9220824759872874, "learning_rate": 8.502080303483858e-06, "loss": 0.4374, "step": 10477 }, { "epoch": 0.9832957957957958, "grad_norm": 0.9872470095627196, "learning_rate": 8.501690608289914e-06, "loss": 0.4747, "step": 10478 }, { "epoch": 0.9833896396396397, "grad_norm": 1.0354071937228604, "learning_rate": 8.5013008713449e-06, "loss": 0.4686, "step": 10479 }, { "epoch": 0.9834834834834835, "grad_norm": 1.1130138845094644, "learning_rate": 8.500911092653463e-06, "loss": 0.4731, "step": 10480 }, { "epoch": 0.9835773273273273, "grad_norm": 1.2292349691907882, "learning_rate": 8.500521272220252e-06, "loss": 0.4672, "step": 10481 }, { "epoch": 0.9836711711711712, "grad_norm": 1.0160408255277638, "learning_rate": 8.500131410049913e-06, "loss": 0.4257, "step": 10482 }, { "epoch": 0.983765015015015, "grad_norm": 1.2123606966323108, "learning_rate": 8.499741506147094e-06, "loss": 0.5145, "step": 10483 }, { "epoch": 0.9838588588588588, "grad_norm": 1.1904013505850566, "learning_rate": 8.49935156051645e-06, "loss": 0.5179, "step": 10484 }, { "epoch": 0.9839527027027027, "grad_norm": 0.9699952154542655, "learning_rate": 8.49896157316262e-06, "loss": 0.4945, "step": 10485 }, { "epoch": 0.9840465465465466, "grad_norm": 0.9308226323014978, "learning_rate": 8.498571544090261e-06, "loss": 0.4403, "step": 10486 }, { "epoch": 0.9841403903903904, "grad_norm": 0.9824361085895985, "learning_rate": 8.498181473304024e-06, "loss": 0.4566, "step": 10487 }, { "epoch": 0.9842342342342343, "grad_norm": 0.882291103444615, "learning_rate": 8.497791360808557e-06, "loss": 0.4668, "step": 10488 }, { "epoch": 0.9843280780780781, "grad_norm": 0.8167902211826295, "learning_rate": 8.497401206608509e-06, "loss": 0.4686, "step": 10489 }, { "epoch": 0.9844219219219219, "grad_norm": 0.9813010441371678, "learning_rate": 8.497011010708536e-06, "loss": 0.4599, "step": 10490 }, { "epoch": 0.9845157657657657, "grad_norm": 1.049148712238779, "learning_rate": 8.49662077311329e-06, "loss": 0.416, "step": 10491 }, { "epoch": 0.9846096096096096, "grad_norm": 1.1460995794781228, "learning_rate": 8.496230493827424e-06, "loss": 0.4625, "step": 10492 }, { "epoch": 0.9847034534534534, "grad_norm": 1.3902752797660844, "learning_rate": 8.495840172855588e-06, "loss": 0.4842, "step": 10493 }, { "epoch": 0.9847972972972973, "grad_norm": 0.9585853392828002, "learning_rate": 8.49544981020244e-06, "loss": 0.4619, "step": 10494 }, { "epoch": 0.9848911411411412, "grad_norm": 1.0763709540793065, "learning_rate": 8.49505940587263e-06, "loss": 0.4761, "step": 10495 }, { "epoch": 0.984984984984985, "grad_norm": 0.8893578895618776, "learning_rate": 8.494668959870818e-06, "loss": 0.434, "step": 10496 }, { "epoch": 0.9850788288288288, "grad_norm": 0.8427910404342568, "learning_rate": 8.494278472201656e-06, "loss": 0.4556, "step": 10497 }, { "epoch": 0.9851726726726727, "grad_norm": 1.094485417174022, "learning_rate": 8.4938879428698e-06, "loss": 0.496, "step": 10498 }, { "epoch": 0.9852665165165165, "grad_norm": 1.0546769190183547, "learning_rate": 8.493497371879908e-06, "loss": 0.454, "step": 10499 }, { "epoch": 0.9853603603603603, "grad_norm": 0.8116548356712908, "learning_rate": 8.493106759236635e-06, "loss": 0.3913, "step": 10500 }, { "epoch": 0.9854542042042042, "grad_norm": 0.9441870020110777, "learning_rate": 8.492716104944639e-06, "loss": 0.4141, "step": 10501 }, { "epoch": 0.9855480480480481, "grad_norm": 0.9498084937665026, "learning_rate": 8.492325409008576e-06, "loss": 0.4358, "step": 10502 }, { "epoch": 0.9856418918918919, "grad_norm": 1.072629806410745, "learning_rate": 8.49193467143311e-06, "loss": 0.4857, "step": 10503 }, { "epoch": 0.9857357357357357, "grad_norm": 0.9092690662683014, "learning_rate": 8.491543892222893e-06, "loss": 0.4365, "step": 10504 }, { "epoch": 0.9858295795795796, "grad_norm": 1.0324938348966943, "learning_rate": 8.491153071382588e-06, "loss": 0.4465, "step": 10505 }, { "epoch": 0.9859234234234234, "grad_norm": 1.0388343213396012, "learning_rate": 8.490762208916854e-06, "loss": 0.4712, "step": 10506 }, { "epoch": 0.9860172672672672, "grad_norm": 0.9157189628126006, "learning_rate": 8.49037130483035e-06, "loss": 0.4426, "step": 10507 }, { "epoch": 0.9861111111111112, "grad_norm": 0.8259542308897773, "learning_rate": 8.489980359127742e-06, "loss": 0.3595, "step": 10508 }, { "epoch": 0.986204954954955, "grad_norm": 0.9708012426860282, "learning_rate": 8.489589371813685e-06, "loss": 0.4577, "step": 10509 }, { "epoch": 0.9862987987987988, "grad_norm": 1.1249897356321397, "learning_rate": 8.489198342892841e-06, "loss": 0.4391, "step": 10510 }, { "epoch": 0.9863926426426426, "grad_norm": 0.9321525713708821, "learning_rate": 8.488807272369878e-06, "loss": 0.4906, "step": 10511 }, { "epoch": 0.9864864864864865, "grad_norm": 0.9107103999853513, "learning_rate": 8.488416160249454e-06, "loss": 0.443, "step": 10512 }, { "epoch": 0.9865803303303303, "grad_norm": 0.8925907813600363, "learning_rate": 8.488025006536234e-06, "loss": 0.4749, "step": 10513 }, { "epoch": 0.9866741741741741, "grad_norm": 0.9225482913892226, "learning_rate": 8.48763381123488e-06, "loss": 0.4749, "step": 10514 }, { "epoch": 0.9867680180180181, "grad_norm": 1.9736631921251193, "learning_rate": 8.487242574350059e-06, "loss": 0.4701, "step": 10515 }, { "epoch": 0.9868618618618619, "grad_norm": 1.001942799517119, "learning_rate": 8.486851295886433e-06, "loss": 0.4808, "step": 10516 }, { "epoch": 0.9869557057057057, "grad_norm": 0.9232605253154081, "learning_rate": 8.48645997584867e-06, "loss": 0.4714, "step": 10517 }, { "epoch": 0.9870495495495496, "grad_norm": 1.6711325987493815, "learning_rate": 8.486068614241435e-06, "loss": 0.4845, "step": 10518 }, { "epoch": 0.9871433933933934, "grad_norm": 1.11773585574132, "learning_rate": 8.48567721106939e-06, "loss": 0.4625, "step": 10519 }, { "epoch": 0.9872372372372372, "grad_norm": 1.0742239881739473, "learning_rate": 8.485285766337208e-06, "loss": 0.4574, "step": 10520 }, { "epoch": 0.987331081081081, "grad_norm": 1.0512983319507798, "learning_rate": 8.484894280049554e-06, "loss": 0.4367, "step": 10521 }, { "epoch": 0.987424924924925, "grad_norm": 1.1389834606146034, "learning_rate": 8.484502752211094e-06, "loss": 0.5269, "step": 10522 }, { "epoch": 0.9875187687687688, "grad_norm": 0.9677490602582417, "learning_rate": 8.484111182826499e-06, "loss": 0.476, "step": 10523 }, { "epoch": 0.9876126126126126, "grad_norm": 1.0757919310359627, "learning_rate": 8.483719571900436e-06, "loss": 0.4465, "step": 10524 }, { "epoch": 0.9877064564564565, "grad_norm": 0.8711927583537636, "learning_rate": 8.483327919437574e-06, "loss": 0.4315, "step": 10525 }, { "epoch": 0.9878003003003003, "grad_norm": 0.8718117742031471, "learning_rate": 8.482936225442584e-06, "loss": 0.4669, "step": 10526 }, { "epoch": 0.9878941441441441, "grad_norm": 1.0206099390328462, "learning_rate": 8.482544489920134e-06, "loss": 0.4254, "step": 10527 }, { "epoch": 0.987987987987988, "grad_norm": 0.8267113442061679, "learning_rate": 8.482152712874897e-06, "loss": 0.4462, "step": 10528 }, { "epoch": 0.9880818318318318, "grad_norm": 1.1113658343321116, "learning_rate": 8.481760894311544e-06, "loss": 0.5014, "step": 10529 }, { "epoch": 0.9881756756756757, "grad_norm": 0.9597628899420193, "learning_rate": 8.481369034234746e-06, "loss": 0.4438, "step": 10530 }, { "epoch": 0.9882695195195195, "grad_norm": 0.9359701607056671, "learning_rate": 8.480977132649172e-06, "loss": 0.4688, "step": 10531 }, { "epoch": 0.9883633633633634, "grad_norm": 0.8785731884139334, "learning_rate": 8.480585189559503e-06, "loss": 0.4289, "step": 10532 }, { "epoch": 0.9884572072072072, "grad_norm": 0.9131924924667716, "learning_rate": 8.480193204970404e-06, "loss": 0.3658, "step": 10533 }, { "epoch": 0.988551051051051, "grad_norm": 0.9288236045534201, "learning_rate": 8.479801178886552e-06, "loss": 0.4616, "step": 10534 }, { "epoch": 0.9886448948948949, "grad_norm": 0.9200734570067025, "learning_rate": 8.479409111312623e-06, "loss": 0.472, "step": 10535 }, { "epoch": 0.9887387387387387, "grad_norm": 1.01057916041934, "learning_rate": 8.479017002253288e-06, "loss": 0.5051, "step": 10536 }, { "epoch": 0.9888325825825826, "grad_norm": 1.0342223476471244, "learning_rate": 8.478624851713224e-06, "loss": 0.422, "step": 10537 }, { "epoch": 0.9889264264264265, "grad_norm": 0.9806832754423515, "learning_rate": 8.478232659697107e-06, "loss": 0.5276, "step": 10538 }, { "epoch": 0.9890202702702703, "grad_norm": 2.2296161969043267, "learning_rate": 8.477840426209611e-06, "loss": 0.405, "step": 10539 }, { "epoch": 0.9891141141141141, "grad_norm": 0.9557085887995538, "learning_rate": 8.477448151255416e-06, "loss": 0.4644, "step": 10540 }, { "epoch": 0.9892079579579579, "grad_norm": 1.5215425008665515, "learning_rate": 8.477055834839199e-06, "loss": 0.4595, "step": 10541 }, { "epoch": 0.9893018018018018, "grad_norm": 1.0258243333494874, "learning_rate": 8.476663476965633e-06, "loss": 0.4964, "step": 10542 }, { "epoch": 0.9893956456456456, "grad_norm": 1.167347168402562, "learning_rate": 8.4762710776394e-06, "loss": 0.4783, "step": 10543 }, { "epoch": 0.9894894894894894, "grad_norm": 1.018888070761025, "learning_rate": 8.475878636865179e-06, "loss": 0.4783, "step": 10544 }, { "epoch": 0.9895833333333334, "grad_norm": 0.9943705085354588, "learning_rate": 8.475486154647646e-06, "loss": 0.4634, "step": 10545 }, { "epoch": 0.9896771771771772, "grad_norm": 2.2597000680275316, "learning_rate": 8.475093630991484e-06, "loss": 0.4959, "step": 10546 }, { "epoch": 0.989771021021021, "grad_norm": 1.398797280982838, "learning_rate": 8.474701065901372e-06, "loss": 0.4759, "step": 10547 }, { "epoch": 0.9898648648648649, "grad_norm": 1.1710850747912305, "learning_rate": 8.474308459381988e-06, "loss": 0.4112, "step": 10548 }, { "epoch": 0.9899587087087087, "grad_norm": 0.9620494068093638, "learning_rate": 8.473915811438017e-06, "loss": 0.4833, "step": 10549 }, { "epoch": 0.9900525525525525, "grad_norm": 1.1256357860799338, "learning_rate": 8.473523122074138e-06, "loss": 0.5147, "step": 10550 }, { "epoch": 0.9901463963963963, "grad_norm": 0.9878667281342101, "learning_rate": 8.473130391295034e-06, "loss": 0.4682, "step": 10551 }, { "epoch": 0.9902402402402403, "grad_norm": 1.0590399456878872, "learning_rate": 8.472737619105389e-06, "loss": 0.4738, "step": 10552 }, { "epoch": 0.9903340840840841, "grad_norm": 0.9159989612129474, "learning_rate": 8.472344805509885e-06, "loss": 0.448, "step": 10553 }, { "epoch": 0.9904279279279279, "grad_norm": 0.8854967535796805, "learning_rate": 8.471951950513203e-06, "loss": 0.4648, "step": 10554 }, { "epoch": 0.9905217717717718, "grad_norm": 0.866499284624091, "learning_rate": 8.471559054120032e-06, "loss": 0.3975, "step": 10555 }, { "epoch": 0.9906156156156156, "grad_norm": 0.9799714870746664, "learning_rate": 8.47116611633505e-06, "loss": 0.4544, "step": 10556 }, { "epoch": 0.9907094594594594, "grad_norm": 0.9078008752515386, "learning_rate": 8.470773137162949e-06, "loss": 0.4877, "step": 10557 }, { "epoch": 0.9908033033033034, "grad_norm": 0.8043183404419397, "learning_rate": 8.47038011660841e-06, "loss": 0.4379, "step": 10558 }, { "epoch": 0.9908971471471472, "grad_norm": 0.955196028103477, "learning_rate": 8.46998705467612e-06, "loss": 0.4805, "step": 10559 }, { "epoch": 0.990990990990991, "grad_norm": 1.0209455695438059, "learning_rate": 8.469593951370767e-06, "loss": 0.4621, "step": 10560 }, { "epoch": 0.9910848348348348, "grad_norm": 0.8090753847868262, "learning_rate": 8.469200806697037e-06, "loss": 0.4777, "step": 10561 }, { "epoch": 0.9911786786786787, "grad_norm": 1.1831385283150775, "learning_rate": 8.468807620659618e-06, "loss": 0.4414, "step": 10562 }, { "epoch": 0.9912725225225225, "grad_norm": 1.1353756592944506, "learning_rate": 8.468414393263196e-06, "loss": 0.4793, "step": 10563 }, { "epoch": 0.9913663663663663, "grad_norm": 0.8345016676577114, "learning_rate": 8.468021124512463e-06, "loss": 0.4755, "step": 10564 }, { "epoch": 0.9914602102102102, "grad_norm": 0.9532751486389508, "learning_rate": 8.467627814412105e-06, "loss": 0.4254, "step": 10565 }, { "epoch": 0.9915540540540541, "grad_norm": 1.6002181866738214, "learning_rate": 8.467234462966812e-06, "loss": 0.471, "step": 10566 }, { "epoch": 0.9916478978978979, "grad_norm": 1.0714393700680525, "learning_rate": 8.466841070181273e-06, "loss": 0.4526, "step": 10567 }, { "epoch": 0.9917417417417418, "grad_norm": 4.145145165837686, "learning_rate": 8.466447636060182e-06, "loss": 0.4328, "step": 10568 }, { "epoch": 0.9918355855855856, "grad_norm": 0.9074402158920505, "learning_rate": 8.466054160608229e-06, "loss": 0.4412, "step": 10569 }, { "epoch": 0.9919294294294294, "grad_norm": 0.960469166830771, "learning_rate": 8.465660643830102e-06, "loss": 0.4513, "step": 10570 }, { "epoch": 0.9920232732732732, "grad_norm": 1.087461460128394, "learning_rate": 8.465267085730495e-06, "loss": 0.4367, "step": 10571 }, { "epoch": 0.9921171171171171, "grad_norm": 0.9796958228520551, "learning_rate": 8.464873486314102e-06, "loss": 0.4859, "step": 10572 }, { "epoch": 0.992210960960961, "grad_norm": 1.3224101873060747, "learning_rate": 8.464479845585613e-06, "loss": 0.5029, "step": 10573 }, { "epoch": 0.9923048048048048, "grad_norm": 1.03122072567129, "learning_rate": 8.464086163549725e-06, "loss": 0.4635, "step": 10574 }, { "epoch": 0.9923986486486487, "grad_norm": 0.8583529954719056, "learning_rate": 8.463692440211129e-06, "loss": 0.4467, "step": 10575 }, { "epoch": 0.9924924924924925, "grad_norm": 1.1237474357753596, "learning_rate": 8.46329867557452e-06, "loss": 0.4805, "step": 10576 }, { "epoch": 0.9925863363363363, "grad_norm": 1.097355652367329, "learning_rate": 8.462904869644594e-06, "loss": 0.4615, "step": 10577 }, { "epoch": 0.9926801801801802, "grad_norm": 0.9285384998071623, "learning_rate": 8.462511022426047e-06, "loss": 0.4099, "step": 10578 }, { "epoch": 0.992774024024024, "grad_norm": 0.9205155944402047, "learning_rate": 8.462117133923571e-06, "loss": 0.4319, "step": 10579 }, { "epoch": 0.9928678678678678, "grad_norm": 1.0338076536443717, "learning_rate": 8.461723204141868e-06, "loss": 0.4544, "step": 10580 }, { "epoch": 0.9929617117117117, "grad_norm": 0.9718244126267755, "learning_rate": 8.46132923308563e-06, "loss": 0.464, "step": 10581 }, { "epoch": 0.9930555555555556, "grad_norm": 1.8135817204148852, "learning_rate": 8.460935220759554e-06, "loss": 0.5756, "step": 10582 }, { "epoch": 0.9931493993993994, "grad_norm": 0.9215993071968805, "learning_rate": 8.460541167168344e-06, "loss": 0.4747, "step": 10583 }, { "epoch": 0.9932432432432432, "grad_norm": 0.997942400890925, "learning_rate": 8.46014707231669e-06, "loss": 0.5025, "step": 10584 }, { "epoch": 0.9933370870870871, "grad_norm": 0.9976021457638524, "learning_rate": 8.4597529362093e-06, "loss": 0.4693, "step": 10585 }, { "epoch": 0.9934309309309309, "grad_norm": 0.9518790360890773, "learning_rate": 8.459358758850865e-06, "loss": 0.4489, "step": 10586 }, { "epoch": 0.9935247747747747, "grad_norm": 1.757705824804955, "learning_rate": 8.45896454024609e-06, "loss": 0.4324, "step": 10587 }, { "epoch": 0.9936186186186187, "grad_norm": 0.7788237711078904, "learning_rate": 8.458570280399673e-06, "loss": 0.4354, "step": 10588 }, { "epoch": 0.9937124624624625, "grad_norm": 1.052011900748884, "learning_rate": 8.458175979316317e-06, "loss": 0.4856, "step": 10589 }, { "epoch": 0.9938063063063063, "grad_norm": 0.9301129872520114, "learning_rate": 8.45778163700072e-06, "loss": 0.4622, "step": 10590 }, { "epoch": 0.9939001501501501, "grad_norm": 1.392910054351893, "learning_rate": 8.457387253457585e-06, "loss": 0.396, "step": 10591 }, { "epoch": 0.993993993993994, "grad_norm": 1.1921481049049825, "learning_rate": 8.456992828691616e-06, "loss": 0.4478, "step": 10592 }, { "epoch": 0.9940878378378378, "grad_norm": 1.0691083366077012, "learning_rate": 8.456598362707515e-06, "loss": 0.4312, "step": 10593 }, { "epoch": 0.9941816816816816, "grad_norm": 1.1692969961439155, "learning_rate": 8.456203855509986e-06, "loss": 0.4808, "step": 10594 }, { "epoch": 0.9942755255255256, "grad_norm": 0.998756181969033, "learning_rate": 8.455809307103729e-06, "loss": 0.5188, "step": 10595 }, { "epoch": 0.9943693693693694, "grad_norm": 1.0215316966630097, "learning_rate": 8.455414717493453e-06, "loss": 0.4592, "step": 10596 }, { "epoch": 0.9944632132132132, "grad_norm": 1.0297681389822047, "learning_rate": 8.45502008668386e-06, "loss": 0.4248, "step": 10597 }, { "epoch": 0.9945570570570571, "grad_norm": 1.2240787140229938, "learning_rate": 8.454625414679655e-06, "loss": 0.4661, "step": 10598 }, { "epoch": 0.9946509009009009, "grad_norm": 0.9500473658155312, "learning_rate": 8.454230701485546e-06, "loss": 0.3834, "step": 10599 }, { "epoch": 0.9947447447447447, "grad_norm": 0.8796124637945759, "learning_rate": 8.453835947106237e-06, "loss": 0.4517, "step": 10600 }, { "epoch": 0.9948385885885885, "grad_norm": 0.9743899942936471, "learning_rate": 8.453441151546435e-06, "loss": 0.4079, "step": 10601 }, { "epoch": 0.9949324324324325, "grad_norm": 0.8511969655943085, "learning_rate": 8.453046314810848e-06, "loss": 0.4334, "step": 10602 }, { "epoch": 0.9950262762762763, "grad_norm": 1.2650790933237093, "learning_rate": 8.452651436904184e-06, "loss": 0.4862, "step": 10603 }, { "epoch": 0.9951201201201201, "grad_norm": 1.1475156815803993, "learning_rate": 8.45225651783115e-06, "loss": 0.4437, "step": 10604 }, { "epoch": 0.995213963963964, "grad_norm": 0.9614654359076845, "learning_rate": 8.451861557596456e-06, "loss": 0.4644, "step": 10605 }, { "epoch": 0.9953078078078078, "grad_norm": 0.9599138287977659, "learning_rate": 8.451466556204809e-06, "loss": 0.4934, "step": 10606 }, { "epoch": 0.9954016516516516, "grad_norm": 1.1197678935387922, "learning_rate": 8.451071513660922e-06, "loss": 0.4689, "step": 10607 }, { "epoch": 0.9954954954954955, "grad_norm": 0.9852974635531785, "learning_rate": 8.450676429969502e-06, "loss": 0.515, "step": 10608 }, { "epoch": 0.9955893393393394, "grad_norm": 1.3607337048796466, "learning_rate": 8.45028130513526e-06, "loss": 0.4917, "step": 10609 }, { "epoch": 0.9956831831831832, "grad_norm": 0.9687778130478955, "learning_rate": 8.449886139162909e-06, "loss": 0.4419, "step": 10610 }, { "epoch": 0.995777027027027, "grad_norm": 1.6453817017588293, "learning_rate": 8.449490932057161e-06, "loss": 0.4669, "step": 10611 }, { "epoch": 0.9958708708708709, "grad_norm": 0.9169779664166511, "learning_rate": 8.449095683822724e-06, "loss": 0.481, "step": 10612 }, { "epoch": 0.9959647147147147, "grad_norm": 0.7860155172356939, "learning_rate": 8.448700394464313e-06, "loss": 0.4459, "step": 10613 }, { "epoch": 0.9960585585585585, "grad_norm": 0.8615994897076575, "learning_rate": 8.448305063986643e-06, "loss": 0.4431, "step": 10614 }, { "epoch": 0.9961524024024024, "grad_norm": 0.896111657232086, "learning_rate": 8.447909692394425e-06, "loss": 0.4384, "step": 10615 }, { "epoch": 0.9962462462462462, "grad_norm": 4.782412152641005, "learning_rate": 8.447514279692375e-06, "loss": 0.4789, "step": 10616 }, { "epoch": 0.9963400900900901, "grad_norm": 1.0351757337146763, "learning_rate": 8.447118825885206e-06, "loss": 0.4366, "step": 10617 }, { "epoch": 0.996433933933934, "grad_norm": 0.9191915465052974, "learning_rate": 8.446723330977632e-06, "loss": 0.4586, "step": 10618 }, { "epoch": 0.9965277777777778, "grad_norm": 1.1757403544937064, "learning_rate": 8.446327794974373e-06, "loss": 0.5091, "step": 10619 }, { "epoch": 0.9966216216216216, "grad_norm": 1.057375375642688, "learning_rate": 8.44593221788014e-06, "loss": 0.4114, "step": 10620 }, { "epoch": 0.9967154654654654, "grad_norm": 1.098303396588818, "learning_rate": 8.445536599699651e-06, "loss": 0.4794, "step": 10621 }, { "epoch": 0.9968093093093093, "grad_norm": 0.9196210085400448, "learning_rate": 8.445140940437626e-06, "loss": 0.5079, "step": 10622 }, { "epoch": 0.9969031531531531, "grad_norm": 1.4073220080897493, "learning_rate": 8.444745240098779e-06, "loss": 0.5009, "step": 10623 }, { "epoch": 0.996996996996997, "grad_norm": 0.9513704691490483, "learning_rate": 8.444349498687828e-06, "loss": 0.4801, "step": 10624 }, { "epoch": 0.9970908408408409, "grad_norm": 0.9393284954420288, "learning_rate": 8.443953716209493e-06, "loss": 0.4573, "step": 10625 }, { "epoch": 0.9971846846846847, "grad_norm": 1.925713459404075, "learning_rate": 8.443557892668495e-06, "loss": 0.4657, "step": 10626 }, { "epoch": 0.9972785285285285, "grad_norm": 1.0533801163311676, "learning_rate": 8.443162028069547e-06, "loss": 0.5434, "step": 10627 }, { "epoch": 0.9973723723723724, "grad_norm": 1.2848381232181592, "learning_rate": 8.442766122417376e-06, "loss": 0.4031, "step": 10628 }, { "epoch": 0.9974662162162162, "grad_norm": 1.0488206439047767, "learning_rate": 8.442370175716697e-06, "loss": 0.4417, "step": 10629 }, { "epoch": 0.99756006006006, "grad_norm": 1.0319296773991042, "learning_rate": 8.441974187972236e-06, "loss": 0.5167, "step": 10630 }, { "epoch": 0.9976539039039038, "grad_norm": 0.9554872616320059, "learning_rate": 8.44157815918871e-06, "loss": 0.453, "step": 10631 }, { "epoch": 0.9977477477477478, "grad_norm": 0.9018751916095562, "learning_rate": 8.441182089370842e-06, "loss": 0.4409, "step": 10632 }, { "epoch": 0.9978415915915916, "grad_norm": 0.9326334608375869, "learning_rate": 8.440785978523355e-06, "loss": 0.4435, "step": 10633 }, { "epoch": 0.9979354354354354, "grad_norm": 0.8565265662211519, "learning_rate": 8.440389826650973e-06, "loss": 0.4245, "step": 10634 }, { "epoch": 0.9980292792792793, "grad_norm": 1.066207220729887, "learning_rate": 8.439993633758416e-06, "loss": 0.4875, "step": 10635 }, { "epoch": 0.9981231231231231, "grad_norm": 1.4642229031034553, "learning_rate": 8.439597399850412e-06, "loss": 0.4922, "step": 10636 }, { "epoch": 0.9982169669669669, "grad_norm": 1.0178246058267402, "learning_rate": 8.439201124931684e-06, "loss": 0.4783, "step": 10637 }, { "epoch": 0.9983108108108109, "grad_norm": 0.7976297078742397, "learning_rate": 8.438804809006955e-06, "loss": 0.4734, "step": 10638 }, { "epoch": 0.9984046546546547, "grad_norm": 0.9048633151935486, "learning_rate": 8.438408452080951e-06, "loss": 0.4179, "step": 10639 }, { "epoch": 0.9984984984984985, "grad_norm": 1.2894606456176363, "learning_rate": 8.4380120541584e-06, "loss": 0.4634, "step": 10640 }, { "epoch": 0.9985923423423423, "grad_norm": 1.074932525644329, "learning_rate": 8.437615615244026e-06, "loss": 0.4536, "step": 10641 }, { "epoch": 0.9986861861861862, "grad_norm": 0.9023901470541644, "learning_rate": 8.437219135342556e-06, "loss": 0.4398, "step": 10642 }, { "epoch": 0.99878003003003, "grad_norm": 1.0187195971749732, "learning_rate": 8.436822614458717e-06, "loss": 0.4587, "step": 10643 }, { "epoch": 0.9988738738738738, "grad_norm": 2.943125940303811, "learning_rate": 8.436426052597238e-06, "loss": 0.4948, "step": 10644 }, { "epoch": 0.9989677177177178, "grad_norm": 1.0390579667039623, "learning_rate": 8.436029449762848e-06, "loss": 0.4249, "step": 10645 }, { "epoch": 0.9990615615615616, "grad_norm": 1.1670039306663704, "learning_rate": 8.435632805960274e-06, "loss": 0.4846, "step": 10646 }, { "epoch": 0.9991554054054054, "grad_norm": 0.8980516305613939, "learning_rate": 8.435236121194245e-06, "loss": 0.4032, "step": 10647 }, { "epoch": 0.9992492492492493, "grad_norm": 1.0577694416432564, "learning_rate": 8.434839395469493e-06, "loss": 0.4836, "step": 10648 }, { "epoch": 0.9993430930930931, "grad_norm": 0.9443943256846201, "learning_rate": 8.434442628790744e-06, "loss": 0.4785, "step": 10649 }, { "epoch": 0.9994369369369369, "grad_norm": 0.8942405023167258, "learning_rate": 8.434045821162732e-06, "loss": 0.4759, "step": 10650 }, { "epoch": 0.9995307807807807, "grad_norm": 1.033798924981024, "learning_rate": 8.433648972590189e-06, "loss": 0.4233, "step": 10651 }, { "epoch": 0.9996246246246246, "grad_norm": 0.8996734126763046, "learning_rate": 8.433252083077843e-06, "loss": 0.4163, "step": 10652 }, { "epoch": 0.9997184684684685, "grad_norm": 0.9332768620906091, "learning_rate": 8.43285515263043e-06, "loss": 0.4666, "step": 10653 }, { "epoch": 0.9998123123123123, "grad_norm": 0.9553909228466323, "learning_rate": 8.43245818125268e-06, "loss": 0.4713, "step": 10654 }, { "epoch": 0.9999061561561562, "grad_norm": 2.282871405865836, "learning_rate": 8.432061168949327e-06, "loss": 0.4654, "step": 10655 }, { "epoch": 1.0, "grad_norm": 1.1165715666144038, "learning_rate": 8.431664115725106e-06, "loss": 0.4823, "step": 10656 }, { "epoch": 1.0000938438438438, "grad_norm": 0.8639901814710955, "learning_rate": 8.43126702158475e-06, "loss": 0.4375, "step": 10657 }, { "epoch": 1.0001876876876876, "grad_norm": 0.8585939128784679, "learning_rate": 8.430869886532993e-06, "loss": 0.3873, "step": 10658 }, { "epoch": 1.0002815315315314, "grad_norm": 0.8482391129780753, "learning_rate": 8.43047271057457e-06, "loss": 0.4066, "step": 10659 }, { "epoch": 1.0003753753753755, "grad_norm": 0.8060615552585125, "learning_rate": 8.430075493714218e-06, "loss": 0.4461, "step": 10660 }, { "epoch": 1.0004692192192193, "grad_norm": 0.9312810088131305, "learning_rate": 8.429678235956674e-06, "loss": 0.415, "step": 10661 }, { "epoch": 1.000563063063063, "grad_norm": 1.2579615185836661, "learning_rate": 8.429280937306669e-06, "loss": 0.4093, "step": 10662 }, { "epoch": 1.000656906906907, "grad_norm": 1.1593647346811917, "learning_rate": 8.428883597768944e-06, "loss": 0.3765, "step": 10663 }, { "epoch": 1.0007507507507507, "grad_norm": 0.8822269606565888, "learning_rate": 8.428486217348238e-06, "loss": 0.4008, "step": 10664 }, { "epoch": 1.0008445945945945, "grad_norm": 0.8501782804798542, "learning_rate": 8.428088796049287e-06, "loss": 0.4046, "step": 10665 }, { "epoch": 1.0009384384384385, "grad_norm": 0.976601950490404, "learning_rate": 8.42769133387683e-06, "loss": 0.395, "step": 10666 }, { "epoch": 1.0010322822822824, "grad_norm": 0.9544223205309674, "learning_rate": 8.427293830835606e-06, "loss": 0.4128, "step": 10667 }, { "epoch": 1.0011261261261262, "grad_norm": 1.1220510982005367, "learning_rate": 8.426896286930352e-06, "loss": 0.3917, "step": 10668 }, { "epoch": 1.00121996996997, "grad_norm": 1.2973659606585408, "learning_rate": 8.426498702165813e-06, "loss": 0.439, "step": 10669 }, { "epoch": 1.0013138138138138, "grad_norm": 0.8452987895161984, "learning_rate": 8.426101076546726e-06, "loss": 0.3937, "step": 10670 }, { "epoch": 1.0014076576576576, "grad_norm": 0.9267489019064433, "learning_rate": 8.425703410077833e-06, "loss": 0.4382, "step": 10671 }, { "epoch": 1.0015015015015014, "grad_norm": 0.8815430336532118, "learning_rate": 8.425305702763872e-06, "loss": 0.3947, "step": 10672 }, { "epoch": 1.0015953453453454, "grad_norm": 1.0003845442629644, "learning_rate": 8.424907954609593e-06, "loss": 0.4367, "step": 10673 }, { "epoch": 1.0016891891891893, "grad_norm": 0.8576074773044822, "learning_rate": 8.42451016561973e-06, "loss": 0.3943, "step": 10674 }, { "epoch": 1.001783033033033, "grad_norm": 0.962736882530134, "learning_rate": 8.42411233579903e-06, "loss": 0.4116, "step": 10675 }, { "epoch": 1.0018768768768769, "grad_norm": 0.8876140534322587, "learning_rate": 8.423714465152234e-06, "loss": 0.3882, "step": 10676 }, { "epoch": 1.0019707207207207, "grad_norm": 1.6171702438186588, "learning_rate": 8.42331655368409e-06, "loss": 0.422, "step": 10677 }, { "epoch": 1.0020645645645645, "grad_norm": 1.4022923076155998, "learning_rate": 8.42291860139934e-06, "loss": 0.4075, "step": 10678 }, { "epoch": 1.0021584084084083, "grad_norm": 0.8246009196126003, "learning_rate": 8.422520608302728e-06, "loss": 0.4081, "step": 10679 }, { "epoch": 1.0022522522522523, "grad_norm": 0.9260169124168951, "learning_rate": 8.422122574398998e-06, "loss": 0.4275, "step": 10680 }, { "epoch": 1.0023460960960962, "grad_norm": 0.9871152981815402, "learning_rate": 8.421724499692901e-06, "loss": 0.4362, "step": 10681 }, { "epoch": 1.00243993993994, "grad_norm": 0.937620039023422, "learning_rate": 8.421326384189177e-06, "loss": 0.3836, "step": 10682 }, { "epoch": 1.0025337837837838, "grad_norm": 0.8851681057605946, "learning_rate": 8.420928227892577e-06, "loss": 0.4055, "step": 10683 }, { "epoch": 1.0026276276276276, "grad_norm": 0.9783176831063872, "learning_rate": 8.420530030807847e-06, "loss": 0.4032, "step": 10684 }, { "epoch": 1.0027214714714714, "grad_norm": 0.9777289085367012, "learning_rate": 8.420131792939737e-06, "loss": 0.4394, "step": 10685 }, { "epoch": 1.0028153153153154, "grad_norm": 0.8554521501570731, "learning_rate": 8.419733514292992e-06, "loss": 0.4121, "step": 10686 }, { "epoch": 1.0029091591591592, "grad_norm": 0.9177710207515842, "learning_rate": 8.41933519487236e-06, "loss": 0.3916, "step": 10687 }, { "epoch": 1.003003003003003, "grad_norm": 0.9186326753116315, "learning_rate": 8.418936834682594e-06, "loss": 0.3823, "step": 10688 }, { "epoch": 1.0030968468468469, "grad_norm": 1.1284528262922506, "learning_rate": 8.41853843372844e-06, "loss": 0.4343, "step": 10689 }, { "epoch": 1.0031906906906907, "grad_norm": 0.8310893647898907, "learning_rate": 8.41813999201465e-06, "loss": 0.4299, "step": 10690 }, { "epoch": 1.0032845345345345, "grad_norm": 0.9878162029711556, "learning_rate": 8.417741509545978e-06, "loss": 0.4046, "step": 10691 }, { "epoch": 1.0033783783783783, "grad_norm": 0.9487909275164763, "learning_rate": 8.417342986327168e-06, "loss": 0.4557, "step": 10692 }, { "epoch": 1.0034722222222223, "grad_norm": 0.9757218423980267, "learning_rate": 8.416944422362977e-06, "loss": 0.4541, "step": 10693 }, { "epoch": 1.0035660660660661, "grad_norm": 1.0339013836031774, "learning_rate": 8.416545817658154e-06, "loss": 0.4177, "step": 10694 }, { "epoch": 1.00365990990991, "grad_norm": 0.9149303484173773, "learning_rate": 8.416147172217456e-06, "loss": 0.389, "step": 10695 }, { "epoch": 1.0037537537537538, "grad_norm": 1.9598256387866946, "learning_rate": 8.41574848604563e-06, "loss": 0.4343, "step": 10696 }, { "epoch": 1.0038475975975976, "grad_norm": 0.9274889331280448, "learning_rate": 8.415349759147436e-06, "loss": 0.4217, "step": 10697 }, { "epoch": 1.0039414414414414, "grad_norm": 1.0431340094930788, "learning_rate": 8.414950991527622e-06, "loss": 0.3658, "step": 10698 }, { "epoch": 1.0040352852852852, "grad_norm": 1.3307614056987933, "learning_rate": 8.414552183190946e-06, "loss": 0.4281, "step": 10699 }, { "epoch": 1.0041291291291292, "grad_norm": 0.940174461220832, "learning_rate": 8.414153334142163e-06, "loss": 0.4126, "step": 10700 }, { "epoch": 1.004222972972973, "grad_norm": 0.9066973400813425, "learning_rate": 8.413754444386027e-06, "loss": 0.3663, "step": 10701 }, { "epoch": 1.0043168168168168, "grad_norm": 1.0767307352355395, "learning_rate": 8.413355513927297e-06, "loss": 0.4024, "step": 10702 }, { "epoch": 1.0044106606606606, "grad_norm": 1.153649838951716, "learning_rate": 8.412956542770725e-06, "loss": 0.4527, "step": 10703 }, { "epoch": 1.0045045045045045, "grad_norm": 0.9177813882760277, "learning_rate": 8.412557530921073e-06, "loss": 0.3744, "step": 10704 }, { "epoch": 1.0045983483483483, "grad_norm": 1.2327247871468068, "learning_rate": 8.412158478383094e-06, "loss": 0.4281, "step": 10705 }, { "epoch": 1.0046921921921923, "grad_norm": 1.8690633340517682, "learning_rate": 8.411759385161549e-06, "loss": 0.3677, "step": 10706 }, { "epoch": 1.0047860360360361, "grad_norm": 1.1234094122196743, "learning_rate": 8.411360251261195e-06, "loss": 0.4368, "step": 10707 }, { "epoch": 1.00487987987988, "grad_norm": 0.9932682640239766, "learning_rate": 8.41096107668679e-06, "loss": 0.4485, "step": 10708 }, { "epoch": 1.0049737237237237, "grad_norm": 0.9267652921229418, "learning_rate": 8.410561861443096e-06, "loss": 0.4387, "step": 10709 }, { "epoch": 1.0050675675675675, "grad_norm": 1.1217124466621442, "learning_rate": 8.410162605534873e-06, "loss": 0.4341, "step": 10710 }, { "epoch": 1.0051614114114114, "grad_norm": 0.9392428807384969, "learning_rate": 8.409763308966876e-06, "loss": 0.4348, "step": 10711 }, { "epoch": 1.0052552552552552, "grad_norm": 0.9427788676364574, "learning_rate": 8.409363971743873e-06, "loss": 0.4324, "step": 10712 }, { "epoch": 1.0053490990990992, "grad_norm": 1.3934055799995744, "learning_rate": 8.40896459387062e-06, "loss": 0.4061, "step": 10713 }, { "epoch": 1.005442942942943, "grad_norm": 0.9906776854173877, "learning_rate": 8.408565175351882e-06, "loss": 0.419, "step": 10714 }, { "epoch": 1.0055367867867868, "grad_norm": 1.0219232224972292, "learning_rate": 8.408165716192423e-06, "loss": 0.3899, "step": 10715 }, { "epoch": 1.0056306306306306, "grad_norm": 0.8537227615331433, "learning_rate": 8.407766216396999e-06, "loss": 0.3675, "step": 10716 }, { "epoch": 1.0057244744744744, "grad_norm": 1.0695858576097212, "learning_rate": 8.407366675970378e-06, "loss": 0.3954, "step": 10717 }, { "epoch": 1.0058183183183182, "grad_norm": 0.8732769438730018, "learning_rate": 8.406967094917324e-06, "loss": 0.3737, "step": 10718 }, { "epoch": 1.005912162162162, "grad_norm": 0.902294453815538, "learning_rate": 8.4065674732426e-06, "loss": 0.4011, "step": 10719 }, { "epoch": 1.006006006006006, "grad_norm": 1.9285574672131658, "learning_rate": 8.40616781095097e-06, "loss": 0.4149, "step": 10720 }, { "epoch": 1.00609984984985, "grad_norm": 1.0830254279325637, "learning_rate": 8.405768108047203e-06, "loss": 0.4489, "step": 10721 }, { "epoch": 1.0061936936936937, "grad_norm": 1.0374461951732439, "learning_rate": 8.40536836453606e-06, "loss": 0.4208, "step": 10722 }, { "epoch": 1.0062875375375375, "grad_norm": 1.1124201203437867, "learning_rate": 8.40496858042231e-06, "loss": 0.4442, "step": 10723 }, { "epoch": 1.0063813813813813, "grad_norm": 0.8827576656668821, "learning_rate": 8.404568755710718e-06, "loss": 0.4329, "step": 10724 }, { "epoch": 1.0064752252252251, "grad_norm": 1.2635701809530828, "learning_rate": 8.404168890406053e-06, "loss": 0.3812, "step": 10725 }, { "epoch": 1.0065690690690692, "grad_norm": 1.2122758856812998, "learning_rate": 8.403768984513081e-06, "loss": 0.4486, "step": 10726 }, { "epoch": 1.006662912912913, "grad_norm": 0.8829504660204908, "learning_rate": 8.40336903803657e-06, "loss": 0.414, "step": 10727 }, { "epoch": 1.0067567567567568, "grad_norm": 0.9490653321833104, "learning_rate": 8.402969050981291e-06, "loss": 0.4501, "step": 10728 }, { "epoch": 1.0068506006006006, "grad_norm": 1.0471950409554518, "learning_rate": 8.402569023352012e-06, "loss": 0.4324, "step": 10729 }, { "epoch": 1.0069444444444444, "grad_norm": 0.9301073573152441, "learning_rate": 8.402168955153501e-06, "loss": 0.4177, "step": 10730 }, { "epoch": 1.0070382882882882, "grad_norm": 1.331032202290344, "learning_rate": 8.40176884639053e-06, "loss": 0.4286, "step": 10731 }, { "epoch": 1.007132132132132, "grad_norm": 1.6024495892836488, "learning_rate": 8.401368697067866e-06, "loss": 0.4021, "step": 10732 }, { "epoch": 1.007225975975976, "grad_norm": 1.031879245025259, "learning_rate": 8.400968507190285e-06, "loss": 0.4837, "step": 10733 }, { "epoch": 1.0073198198198199, "grad_norm": 0.9704003791451598, "learning_rate": 8.400568276762557e-06, "loss": 0.4057, "step": 10734 }, { "epoch": 1.0074136636636637, "grad_norm": 0.991769790011865, "learning_rate": 8.40016800578945e-06, "loss": 0.4161, "step": 10735 }, { "epoch": 1.0075075075075075, "grad_norm": 1.100837074915538, "learning_rate": 8.399767694275743e-06, "loss": 0.442, "step": 10736 }, { "epoch": 1.0076013513513513, "grad_norm": 0.8722205864067124, "learning_rate": 8.399367342226203e-06, "loss": 0.4169, "step": 10737 }, { "epoch": 1.0076951951951951, "grad_norm": 0.9003652330507349, "learning_rate": 8.398966949645607e-06, "loss": 0.4244, "step": 10738 }, { "epoch": 1.007789039039039, "grad_norm": 0.8530183760856828, "learning_rate": 8.39856651653873e-06, "loss": 0.4467, "step": 10739 }, { "epoch": 1.007882882882883, "grad_norm": 1.0088420325217244, "learning_rate": 8.398166042910342e-06, "loss": 0.4244, "step": 10740 }, { "epoch": 1.0079767267267268, "grad_norm": 0.9823165186128804, "learning_rate": 8.397765528765222e-06, "loss": 0.3509, "step": 10741 }, { "epoch": 1.0080705705705706, "grad_norm": 1.0868226600936934, "learning_rate": 8.397364974108142e-06, "loss": 0.3822, "step": 10742 }, { "epoch": 1.0081644144144144, "grad_norm": 1.1902865332094907, "learning_rate": 8.396964378943881e-06, "loss": 0.437, "step": 10743 }, { "epoch": 1.0082582582582582, "grad_norm": 0.8946354460295245, "learning_rate": 8.396563743277212e-06, "loss": 0.3725, "step": 10744 }, { "epoch": 1.008352102102102, "grad_norm": 1.4202050692723194, "learning_rate": 8.396163067112917e-06, "loss": 0.4162, "step": 10745 }, { "epoch": 1.008445945945946, "grad_norm": 0.8847308314125696, "learning_rate": 8.395762350455767e-06, "loss": 0.4079, "step": 10746 }, { "epoch": 1.0085397897897899, "grad_norm": 0.958761096671838, "learning_rate": 8.395361593310544e-06, "loss": 0.4503, "step": 10747 }, { "epoch": 1.0086336336336337, "grad_norm": 1.1726327214810848, "learning_rate": 8.394960795682026e-06, "loss": 0.4387, "step": 10748 }, { "epoch": 1.0087274774774775, "grad_norm": 0.9600828717265949, "learning_rate": 8.394559957574991e-06, "loss": 0.3665, "step": 10749 }, { "epoch": 1.0088213213213213, "grad_norm": 1.106252822590316, "learning_rate": 8.394159078994216e-06, "loss": 0.4387, "step": 10750 }, { "epoch": 1.008915165165165, "grad_norm": 2.3633111726512395, "learning_rate": 8.393758159944484e-06, "loss": 0.4013, "step": 10751 }, { "epoch": 1.009009009009009, "grad_norm": 1.0716551989088399, "learning_rate": 8.393357200430575e-06, "loss": 0.3891, "step": 10752 }, { "epoch": 1.009102852852853, "grad_norm": 0.9589157801677062, "learning_rate": 8.392956200457266e-06, "loss": 0.3871, "step": 10753 }, { "epoch": 1.0091966966966968, "grad_norm": 1.299528572105759, "learning_rate": 8.392555160029344e-06, "loss": 0.3997, "step": 10754 }, { "epoch": 1.0092905405405406, "grad_norm": 1.3291049189846103, "learning_rate": 8.392154079151585e-06, "loss": 0.4213, "step": 10755 }, { "epoch": 1.0093843843843844, "grad_norm": 1.1215401608105728, "learning_rate": 8.391752957828774e-06, "loss": 0.4111, "step": 10756 }, { "epoch": 1.0094782282282282, "grad_norm": 1.0008132461123047, "learning_rate": 8.391351796065694e-06, "loss": 0.3465, "step": 10757 }, { "epoch": 1.009572072072072, "grad_norm": 0.9995437481743475, "learning_rate": 8.390950593867128e-06, "loss": 0.4357, "step": 10758 }, { "epoch": 1.0096659159159158, "grad_norm": 1.0680836923623807, "learning_rate": 8.390549351237857e-06, "loss": 0.4235, "step": 10759 }, { "epoch": 1.0097597597597598, "grad_norm": 1.000397026065015, "learning_rate": 8.39014806818267e-06, "loss": 0.4516, "step": 10760 }, { "epoch": 1.0098536036036037, "grad_norm": 1.1776743883529248, "learning_rate": 8.389746744706347e-06, "loss": 0.3728, "step": 10761 }, { "epoch": 1.0099474474474475, "grad_norm": 0.9645026361331025, "learning_rate": 8.389345380813674e-06, "loss": 0.3921, "step": 10762 }, { "epoch": 1.0100412912912913, "grad_norm": 1.0233663927736543, "learning_rate": 8.388943976509438e-06, "loss": 0.4129, "step": 10763 }, { "epoch": 1.010135135135135, "grad_norm": 0.9844362400075448, "learning_rate": 8.388542531798426e-06, "loss": 0.4052, "step": 10764 }, { "epoch": 1.010228978978979, "grad_norm": 0.8734739680095416, "learning_rate": 8.38814104668542e-06, "loss": 0.4433, "step": 10765 }, { "epoch": 1.010322822822823, "grad_norm": 1.2320403101638486, "learning_rate": 8.38773952117521e-06, "loss": 0.4559, "step": 10766 }, { "epoch": 1.0104166666666667, "grad_norm": 0.9710325706398116, "learning_rate": 8.387337955272582e-06, "loss": 0.4211, "step": 10767 }, { "epoch": 1.0105105105105106, "grad_norm": 2.0806131743877474, "learning_rate": 8.386936348982326e-06, "loss": 0.4147, "step": 10768 }, { "epoch": 1.0106043543543544, "grad_norm": 0.9198446191316797, "learning_rate": 8.38653470230923e-06, "loss": 0.3836, "step": 10769 }, { "epoch": 1.0106981981981982, "grad_norm": 1.064349427643394, "learning_rate": 8.38613301525808e-06, "loss": 0.4108, "step": 10770 }, { "epoch": 1.010792042042042, "grad_norm": 1.0688246512113018, "learning_rate": 8.385731287833669e-06, "loss": 0.4055, "step": 10771 }, { "epoch": 1.0108858858858858, "grad_norm": 1.0621661887090925, "learning_rate": 8.385329520040786e-06, "loss": 0.4722, "step": 10772 }, { "epoch": 1.0109797297297298, "grad_norm": 0.9745442283838441, "learning_rate": 8.384927711884218e-06, "loss": 0.4456, "step": 10773 }, { "epoch": 1.0110735735735736, "grad_norm": 0.8627046824266386, "learning_rate": 8.384525863368763e-06, "loss": 0.3989, "step": 10774 }, { "epoch": 1.0111674174174174, "grad_norm": 1.154009436193299, "learning_rate": 8.384123974499205e-06, "loss": 0.3669, "step": 10775 }, { "epoch": 1.0112612612612613, "grad_norm": 1.0664515125543839, "learning_rate": 8.383722045280338e-06, "loss": 0.4205, "step": 10776 }, { "epoch": 1.011355105105105, "grad_norm": 1.0499702819645438, "learning_rate": 8.383320075716956e-06, "loss": 0.45, "step": 10777 }, { "epoch": 1.0114489489489489, "grad_norm": 0.9686530594835074, "learning_rate": 8.38291806581385e-06, "loss": 0.4176, "step": 10778 }, { "epoch": 1.0115427927927927, "grad_norm": 1.0626205334590433, "learning_rate": 8.382516015575815e-06, "loss": 0.4596, "step": 10779 }, { "epoch": 1.0116366366366367, "grad_norm": 0.9754891616858631, "learning_rate": 8.38211392500764e-06, "loss": 0.4169, "step": 10780 }, { "epoch": 1.0117304804804805, "grad_norm": 2.222133438498136, "learning_rate": 8.381711794114127e-06, "loss": 0.4117, "step": 10781 }, { "epoch": 1.0118243243243243, "grad_norm": 0.8742532396890721, "learning_rate": 8.381309622900064e-06, "loss": 0.3882, "step": 10782 }, { "epoch": 1.0119181681681682, "grad_norm": 1.3318505861841863, "learning_rate": 8.380907411370249e-06, "loss": 0.3925, "step": 10783 }, { "epoch": 1.012012012012012, "grad_norm": 0.9043044035799019, "learning_rate": 8.380505159529476e-06, "loss": 0.4077, "step": 10784 }, { "epoch": 1.0121058558558558, "grad_norm": 1.048154937325163, "learning_rate": 8.380102867382543e-06, "loss": 0.4249, "step": 10785 }, { "epoch": 1.0121996996996998, "grad_norm": 0.9040950093191025, "learning_rate": 8.379700534934246e-06, "loss": 0.4026, "step": 10786 }, { "epoch": 1.0122935435435436, "grad_norm": 0.8170970890625384, "learning_rate": 8.379298162189382e-06, "loss": 0.3954, "step": 10787 }, { "epoch": 1.0123873873873874, "grad_norm": 0.9424907351478335, "learning_rate": 8.378895749152747e-06, "loss": 0.3785, "step": 10788 }, { "epoch": 1.0124812312312312, "grad_norm": 0.9971522696711239, "learning_rate": 8.37849329582914e-06, "loss": 0.4307, "step": 10789 }, { "epoch": 1.012575075075075, "grad_norm": 1.2045266000799706, "learning_rate": 8.37809080222336e-06, "loss": 0.3847, "step": 10790 }, { "epoch": 1.0126689189189189, "grad_norm": 1.1463482841513832, "learning_rate": 8.377688268340207e-06, "loss": 0.4016, "step": 10791 }, { "epoch": 1.0127627627627627, "grad_norm": 1.1455334585971744, "learning_rate": 8.377285694184479e-06, "loss": 0.4266, "step": 10792 }, { "epoch": 1.0128566066066067, "grad_norm": 1.094266953154539, "learning_rate": 8.376883079760976e-06, "loss": 0.4359, "step": 10793 }, { "epoch": 1.0129504504504505, "grad_norm": 1.2685188526166447, "learning_rate": 8.376480425074498e-06, "loss": 0.4251, "step": 10794 }, { "epoch": 1.0130442942942943, "grad_norm": 0.9749212704488857, "learning_rate": 8.376077730129847e-06, "loss": 0.4011, "step": 10795 }, { "epoch": 1.0131381381381381, "grad_norm": 0.8985702525509279, "learning_rate": 8.375674994931824e-06, "loss": 0.3867, "step": 10796 }, { "epoch": 1.013231981981982, "grad_norm": 1.2060565772130956, "learning_rate": 8.375272219485232e-06, "loss": 0.3701, "step": 10797 }, { "epoch": 1.0133258258258258, "grad_norm": 1.5077965929389265, "learning_rate": 8.37486940379487e-06, "loss": 0.4408, "step": 10798 }, { "epoch": 1.0134196696696696, "grad_norm": 0.8566458416694328, "learning_rate": 8.374466547865543e-06, "loss": 0.4008, "step": 10799 }, { "epoch": 1.0135135135135136, "grad_norm": 0.9370907180128116, "learning_rate": 8.374063651702056e-06, "loss": 0.3748, "step": 10800 }, { "epoch": 1.0136073573573574, "grad_norm": 0.9711164138874705, "learning_rate": 8.373660715309208e-06, "loss": 0.4189, "step": 10801 }, { "epoch": 1.0137012012012012, "grad_norm": 1.2671589427933967, "learning_rate": 8.37325773869181e-06, "loss": 0.4707, "step": 10802 }, { "epoch": 1.013795045045045, "grad_norm": 0.9180773778109582, "learning_rate": 8.372854721854659e-06, "loss": 0.4056, "step": 10803 }, { "epoch": 1.0138888888888888, "grad_norm": 0.9977749442584773, "learning_rate": 8.372451664802566e-06, "loss": 0.3968, "step": 10804 }, { "epoch": 1.0139827327327327, "grad_norm": 0.9581331727319548, "learning_rate": 8.372048567540337e-06, "loss": 0.4012, "step": 10805 }, { "epoch": 1.0140765765765767, "grad_norm": 1.1159879988494132, "learning_rate": 8.371645430072772e-06, "loss": 0.4552, "step": 10806 }, { "epoch": 1.0141704204204205, "grad_norm": 1.1838245790195514, "learning_rate": 8.371242252404685e-06, "loss": 0.4387, "step": 10807 }, { "epoch": 1.0142642642642643, "grad_norm": 0.8692047615646034, "learning_rate": 8.370839034540878e-06, "loss": 0.4126, "step": 10808 }, { "epoch": 1.0143581081081081, "grad_norm": 1.2150981717190423, "learning_rate": 8.370435776486161e-06, "loss": 0.4335, "step": 10809 }, { "epoch": 1.014451951951952, "grad_norm": 1.0349483509008957, "learning_rate": 8.370032478245343e-06, "loss": 0.4106, "step": 10810 }, { "epoch": 1.0145457957957957, "grad_norm": 1.056083080236073, "learning_rate": 8.36962913982323e-06, "loss": 0.406, "step": 10811 }, { "epoch": 1.0146396396396395, "grad_norm": 0.9697346211587953, "learning_rate": 8.369225761224631e-06, "loss": 0.4488, "step": 10812 }, { "epoch": 1.0147334834834836, "grad_norm": 0.9898778456799795, "learning_rate": 8.368822342454359e-06, "loss": 0.4229, "step": 10813 }, { "epoch": 1.0148273273273274, "grad_norm": 1.1194028541041952, "learning_rate": 8.36841888351722e-06, "loss": 0.3716, "step": 10814 }, { "epoch": 1.0149211711711712, "grad_norm": 1.1087159929713601, "learning_rate": 8.368015384418027e-06, "loss": 0.454, "step": 10815 }, { "epoch": 1.015015015015015, "grad_norm": 1.073315522194894, "learning_rate": 8.36761184516159e-06, "loss": 0.481, "step": 10816 }, { "epoch": 1.0151088588588588, "grad_norm": 0.8475175575541924, "learning_rate": 8.36720826575272e-06, "loss": 0.3968, "step": 10817 }, { "epoch": 1.0152027027027026, "grad_norm": 1.0649057063237355, "learning_rate": 8.36680464619623e-06, "loss": 0.409, "step": 10818 }, { "epoch": 1.0152965465465464, "grad_norm": 0.9360482271778071, "learning_rate": 8.366400986496932e-06, "loss": 0.3992, "step": 10819 }, { "epoch": 1.0153903903903905, "grad_norm": 1.1099173248136747, "learning_rate": 8.365997286659638e-06, "loss": 0.4202, "step": 10820 }, { "epoch": 1.0154842342342343, "grad_norm": 1.1046011409264225, "learning_rate": 8.365593546689163e-06, "loss": 0.3964, "step": 10821 }, { "epoch": 1.015578078078078, "grad_norm": 1.1786574845552193, "learning_rate": 8.36518976659032e-06, "loss": 0.402, "step": 10822 }, { "epoch": 1.015671921921922, "grad_norm": 1.0470515536417462, "learning_rate": 8.364785946367924e-06, "loss": 0.4059, "step": 10823 }, { "epoch": 1.0157657657657657, "grad_norm": 1.027399893458921, "learning_rate": 8.364382086026787e-06, "loss": 0.3815, "step": 10824 }, { "epoch": 1.0158596096096095, "grad_norm": 1.0958982456537831, "learning_rate": 8.363978185571728e-06, "loss": 0.4546, "step": 10825 }, { "epoch": 1.0159534534534536, "grad_norm": 1.1801389051307787, "learning_rate": 8.363574245007563e-06, "loss": 0.4231, "step": 10826 }, { "epoch": 1.0160472972972974, "grad_norm": 0.9834636624147978, "learning_rate": 8.363170264339104e-06, "loss": 0.4113, "step": 10827 }, { "epoch": 1.0161411411411412, "grad_norm": 1.1192092873200856, "learning_rate": 8.36276624357117e-06, "loss": 0.4269, "step": 10828 }, { "epoch": 1.016234984984985, "grad_norm": 1.370586945248142, "learning_rate": 8.362362182708579e-06, "loss": 0.396, "step": 10829 }, { "epoch": 1.0163288288288288, "grad_norm": 0.95292779029187, "learning_rate": 8.361958081756148e-06, "loss": 0.433, "step": 10830 }, { "epoch": 1.0164226726726726, "grad_norm": 1.012421833163177, "learning_rate": 8.361553940718694e-06, "loss": 0.4252, "step": 10831 }, { "epoch": 1.0165165165165164, "grad_norm": 1.0208875711255871, "learning_rate": 8.361149759601039e-06, "loss": 0.4274, "step": 10832 }, { "epoch": 1.0166103603603605, "grad_norm": 0.9542143806561273, "learning_rate": 8.360745538407998e-06, "loss": 0.3887, "step": 10833 }, { "epoch": 1.0167042042042043, "grad_norm": 0.8740407467866941, "learning_rate": 8.360341277144392e-06, "loss": 0.404, "step": 10834 }, { "epoch": 1.016798048048048, "grad_norm": 0.8734882260997404, "learning_rate": 8.359936975815043e-06, "loss": 0.4747, "step": 10835 }, { "epoch": 1.0168918918918919, "grad_norm": 1.082883338500959, "learning_rate": 8.359532634424772e-06, "loss": 0.4308, "step": 10836 }, { "epoch": 1.0169857357357357, "grad_norm": 0.9840686720099188, "learning_rate": 8.359128252978393e-06, "loss": 0.3948, "step": 10837 }, { "epoch": 1.0170795795795795, "grad_norm": 0.9237818095353941, "learning_rate": 8.358723831480734e-06, "loss": 0.3937, "step": 10838 }, { "epoch": 1.0171734234234233, "grad_norm": 0.8237175959836551, "learning_rate": 8.358319369936618e-06, "loss": 0.3723, "step": 10839 }, { "epoch": 1.0172672672672673, "grad_norm": 0.8982249650380018, "learning_rate": 8.357914868350862e-06, "loss": 0.3966, "step": 10840 }, { "epoch": 1.0173611111111112, "grad_norm": 0.9584675738860783, "learning_rate": 8.357510326728295e-06, "loss": 0.3942, "step": 10841 }, { "epoch": 1.017454954954955, "grad_norm": 0.8620112387683936, "learning_rate": 8.357105745073735e-06, "loss": 0.373, "step": 10842 }, { "epoch": 1.0175487987987988, "grad_norm": 0.859376645864686, "learning_rate": 8.356701123392008e-06, "loss": 0.3789, "step": 10843 }, { "epoch": 1.0176426426426426, "grad_norm": 1.0074879801573005, "learning_rate": 8.35629646168794e-06, "loss": 0.4304, "step": 10844 }, { "epoch": 1.0177364864864864, "grad_norm": 1.1938551238783361, "learning_rate": 8.355891759966352e-06, "loss": 0.4083, "step": 10845 }, { "epoch": 1.0178303303303304, "grad_norm": 0.910592814453758, "learning_rate": 8.355487018232073e-06, "loss": 0.4048, "step": 10846 }, { "epoch": 1.0179241741741742, "grad_norm": 0.8463061161740315, "learning_rate": 8.355082236489928e-06, "loss": 0.3833, "step": 10847 }, { "epoch": 1.018018018018018, "grad_norm": 1.0162560885228185, "learning_rate": 8.354677414744741e-06, "loss": 0.3841, "step": 10848 }, { "epoch": 1.0181118618618619, "grad_norm": 1.057625197228492, "learning_rate": 8.35427255300134e-06, "loss": 0.4274, "step": 10849 }, { "epoch": 1.0182057057057057, "grad_norm": 1.0068864264825748, "learning_rate": 8.353867651264554e-06, "loss": 0.3915, "step": 10850 }, { "epoch": 1.0182995495495495, "grad_norm": 0.9335509794763639, "learning_rate": 8.35346270953921e-06, "loss": 0.4469, "step": 10851 }, { "epoch": 1.0183933933933933, "grad_norm": 0.983149586383089, "learning_rate": 8.353057727830133e-06, "loss": 0.3936, "step": 10852 }, { "epoch": 1.0184872372372373, "grad_norm": 0.975984448619659, "learning_rate": 8.352652706142157e-06, "loss": 0.432, "step": 10853 }, { "epoch": 1.0185810810810811, "grad_norm": 0.9106013925945775, "learning_rate": 8.352247644480107e-06, "loss": 0.4118, "step": 10854 }, { "epoch": 1.018674924924925, "grad_norm": 0.9091046300875677, "learning_rate": 8.351842542848811e-06, "loss": 0.4079, "step": 10855 }, { "epoch": 1.0187687687687688, "grad_norm": 0.950619725344242, "learning_rate": 8.351437401253106e-06, "loss": 0.3983, "step": 10856 }, { "epoch": 1.0188626126126126, "grad_norm": 1.3834873737276294, "learning_rate": 8.351032219697816e-06, "loss": 0.4023, "step": 10857 }, { "epoch": 1.0189564564564564, "grad_norm": 1.0989676565091766, "learning_rate": 8.350626998187775e-06, "loss": 0.3896, "step": 10858 }, { "epoch": 1.0190503003003002, "grad_norm": 3.2196792816055813, "learning_rate": 8.350221736727815e-06, "loss": 0.4386, "step": 10859 }, { "epoch": 1.0191441441441442, "grad_norm": 1.0390555203113976, "learning_rate": 8.349816435322766e-06, "loss": 0.4114, "step": 10860 }, { "epoch": 1.019237987987988, "grad_norm": 0.9404678859356246, "learning_rate": 8.34941109397746e-06, "loss": 0.3699, "step": 10861 }, { "epoch": 1.0193318318318318, "grad_norm": 1.0619427622175137, "learning_rate": 8.349005712696733e-06, "loss": 0.4221, "step": 10862 }, { "epoch": 1.0194256756756757, "grad_norm": 1.1942394283313809, "learning_rate": 8.348600291485418e-06, "loss": 0.4113, "step": 10863 }, { "epoch": 1.0195195195195195, "grad_norm": 1.0689936554326633, "learning_rate": 8.348194830348346e-06, "loss": 0.4106, "step": 10864 }, { "epoch": 1.0196133633633633, "grad_norm": 0.9411979908131185, "learning_rate": 8.347789329290352e-06, "loss": 0.3965, "step": 10865 }, { "epoch": 1.0197072072072073, "grad_norm": 0.9342496741720617, "learning_rate": 8.347383788316274e-06, "loss": 0.4609, "step": 10866 }, { "epoch": 1.0198010510510511, "grad_norm": 0.9392081552242787, "learning_rate": 8.346978207430942e-06, "loss": 0.3699, "step": 10867 }, { "epoch": 1.019894894894895, "grad_norm": 0.8641696893652392, "learning_rate": 8.346572586639199e-06, "loss": 0.3948, "step": 10868 }, { "epoch": 1.0199887387387387, "grad_norm": 1.1044322356098288, "learning_rate": 8.346166925945876e-06, "loss": 0.4225, "step": 10869 }, { "epoch": 1.0200825825825826, "grad_norm": 0.9065850286652957, "learning_rate": 8.345761225355808e-06, "loss": 0.441, "step": 10870 }, { "epoch": 1.0201764264264264, "grad_norm": 0.8946922007907139, "learning_rate": 8.345355484873838e-06, "loss": 0.4157, "step": 10871 }, { "epoch": 1.0202702702702702, "grad_norm": 0.9662500860790878, "learning_rate": 8.3449497045048e-06, "loss": 0.3944, "step": 10872 }, { "epoch": 1.0203641141141142, "grad_norm": 1.1185890389348914, "learning_rate": 8.344543884253534e-06, "loss": 0.3672, "step": 10873 }, { "epoch": 1.020457957957958, "grad_norm": 0.924046109741631, "learning_rate": 8.344138024124875e-06, "loss": 0.4377, "step": 10874 }, { "epoch": 1.0205518018018018, "grad_norm": 1.0287983535679999, "learning_rate": 8.343732124123666e-06, "loss": 0.4107, "step": 10875 }, { "epoch": 1.0206456456456456, "grad_norm": 1.2098234956534948, "learning_rate": 8.343326184254746e-06, "loss": 0.4533, "step": 10876 }, { "epoch": 1.0207394894894894, "grad_norm": 1.076891413963047, "learning_rate": 8.342920204522955e-06, "loss": 0.4389, "step": 10877 }, { "epoch": 1.0208333333333333, "grad_norm": 1.1052756746610257, "learning_rate": 8.342514184933132e-06, "loss": 0.4453, "step": 10878 }, { "epoch": 1.020927177177177, "grad_norm": 1.0290719779769488, "learning_rate": 8.342108125490118e-06, "loss": 0.4412, "step": 10879 }, { "epoch": 1.021021021021021, "grad_norm": 0.9373692901618437, "learning_rate": 8.341702026198757e-06, "loss": 0.3793, "step": 10880 }, { "epoch": 1.021114864864865, "grad_norm": 0.9457116740803042, "learning_rate": 8.34129588706389e-06, "loss": 0.3771, "step": 10881 }, { "epoch": 1.0212087087087087, "grad_norm": 1.4230781683086646, "learning_rate": 8.340889708090357e-06, "loss": 0.4204, "step": 10882 }, { "epoch": 1.0213025525525525, "grad_norm": 1.0442220789463643, "learning_rate": 8.340483489283004e-06, "loss": 0.3907, "step": 10883 }, { "epoch": 1.0213963963963963, "grad_norm": 0.9894589527808884, "learning_rate": 8.340077230646672e-06, "loss": 0.3632, "step": 10884 }, { "epoch": 1.0214902402402402, "grad_norm": 1.2194832045700172, "learning_rate": 8.339670932186208e-06, "loss": 0.3862, "step": 10885 }, { "epoch": 1.0215840840840842, "grad_norm": 1.1924054015977932, "learning_rate": 8.339264593906453e-06, "loss": 0.3776, "step": 10886 }, { "epoch": 1.021677927927928, "grad_norm": 0.8411703156752697, "learning_rate": 8.338858215812253e-06, "loss": 0.4118, "step": 10887 }, { "epoch": 1.0217717717717718, "grad_norm": 0.9917859274865148, "learning_rate": 8.338451797908456e-06, "loss": 0.4491, "step": 10888 }, { "epoch": 1.0218656156156156, "grad_norm": 1.0254016646409563, "learning_rate": 8.338045340199903e-06, "loss": 0.4403, "step": 10889 }, { "epoch": 1.0219594594594594, "grad_norm": 0.864227253938007, "learning_rate": 8.337638842691443e-06, "loss": 0.398, "step": 10890 }, { "epoch": 1.0220533033033032, "grad_norm": 1.033433619447498, "learning_rate": 8.337232305387922e-06, "loss": 0.4119, "step": 10891 }, { "epoch": 1.022147147147147, "grad_norm": 0.8798264457339888, "learning_rate": 8.336825728294188e-06, "loss": 0.4264, "step": 10892 }, { "epoch": 1.022240990990991, "grad_norm": 0.9743545547448831, "learning_rate": 8.336419111415088e-06, "loss": 0.3983, "step": 10893 }, { "epoch": 1.022334834834835, "grad_norm": 1.041735833845558, "learning_rate": 8.33601245475547e-06, "loss": 0.3738, "step": 10894 }, { "epoch": 1.0224286786786787, "grad_norm": 0.9690741555105896, "learning_rate": 8.335605758320182e-06, "loss": 0.3915, "step": 10895 }, { "epoch": 1.0225225225225225, "grad_norm": 0.996578227758468, "learning_rate": 8.335199022114075e-06, "loss": 0.4192, "step": 10896 }, { "epoch": 1.0226163663663663, "grad_norm": 1.0983252858680126, "learning_rate": 8.334792246141998e-06, "loss": 0.4043, "step": 10897 }, { "epoch": 1.0227102102102101, "grad_norm": 0.9742855760134024, "learning_rate": 8.3343854304088e-06, "loss": 0.4121, "step": 10898 }, { "epoch": 1.022804054054054, "grad_norm": 1.061580782139924, "learning_rate": 8.333978574919332e-06, "loss": 0.4216, "step": 10899 }, { "epoch": 1.022897897897898, "grad_norm": 1.0209649299660957, "learning_rate": 8.333571679678444e-06, "loss": 0.4181, "step": 10900 }, { "epoch": 1.0229917417417418, "grad_norm": 1.2049314903444661, "learning_rate": 8.333164744690991e-06, "loss": 0.4294, "step": 10901 }, { "epoch": 1.0230855855855856, "grad_norm": 1.067074016782658, "learning_rate": 8.33275776996182e-06, "loss": 0.3891, "step": 10902 }, { "epoch": 1.0231794294294294, "grad_norm": 0.8900675874046691, "learning_rate": 8.332350755495789e-06, "loss": 0.3766, "step": 10903 }, { "epoch": 1.0232732732732732, "grad_norm": 1.1928386984645443, "learning_rate": 8.331943701297745e-06, "loss": 0.3708, "step": 10904 }, { "epoch": 1.023367117117117, "grad_norm": 0.8221703470700208, "learning_rate": 8.331536607372546e-06, "loss": 0.4254, "step": 10905 }, { "epoch": 1.023460960960961, "grad_norm": 1.8603456737287252, "learning_rate": 8.331129473725041e-06, "loss": 0.4497, "step": 10906 }, { "epoch": 1.0235548048048049, "grad_norm": 0.9935482159101121, "learning_rate": 8.33072230036009e-06, "loss": 0.399, "step": 10907 }, { "epoch": 1.0236486486486487, "grad_norm": 0.9508421234123748, "learning_rate": 8.330315087282544e-06, "loss": 0.461, "step": 10908 }, { "epoch": 1.0237424924924925, "grad_norm": 0.9587151552426837, "learning_rate": 8.32990783449726e-06, "loss": 0.4327, "step": 10909 }, { "epoch": 1.0238363363363363, "grad_norm": 1.0144559310542292, "learning_rate": 8.329500542009092e-06, "loss": 0.3669, "step": 10910 }, { "epoch": 1.0239301801801801, "grad_norm": 1.3163681583442386, "learning_rate": 8.329093209822896e-06, "loss": 0.4377, "step": 10911 }, { "epoch": 1.024024024024024, "grad_norm": 1.157847541128814, "learning_rate": 8.328685837943533e-06, "loss": 0.3991, "step": 10912 }, { "epoch": 1.024117867867868, "grad_norm": 1.2294614718868646, "learning_rate": 8.328278426375855e-06, "loss": 0.4234, "step": 10913 }, { "epoch": 1.0242117117117118, "grad_norm": 0.9750814341171717, "learning_rate": 8.32787097512472e-06, "loss": 0.4488, "step": 10914 }, { "epoch": 1.0243055555555556, "grad_norm": 0.8796633569193559, "learning_rate": 8.327463484194992e-06, "loss": 0.4139, "step": 10915 }, { "epoch": 1.0243993993993994, "grad_norm": 2.197507330556978, "learning_rate": 8.327055953591522e-06, "loss": 0.3755, "step": 10916 }, { "epoch": 1.0244932432432432, "grad_norm": 1.2331154454560116, "learning_rate": 8.326648383319174e-06, "loss": 0.4351, "step": 10917 }, { "epoch": 1.024587087087087, "grad_norm": 1.509714043698803, "learning_rate": 8.326240773382803e-06, "loss": 0.3514, "step": 10918 }, { "epoch": 1.0246809309309308, "grad_norm": 0.8792584227392285, "learning_rate": 8.325833123787275e-06, "loss": 0.4564, "step": 10919 }, { "epoch": 1.0247747747747749, "grad_norm": 1.2905311748386992, "learning_rate": 8.325425434537446e-06, "loss": 0.3869, "step": 10920 }, { "epoch": 1.0248686186186187, "grad_norm": 1.1234265610589735, "learning_rate": 8.325017705638177e-06, "loss": 0.3769, "step": 10921 }, { "epoch": 1.0249624624624625, "grad_norm": 0.9080553681210316, "learning_rate": 8.324609937094333e-06, "loss": 0.4115, "step": 10922 }, { "epoch": 1.0250563063063063, "grad_norm": 1.1443563538806372, "learning_rate": 8.324202128910772e-06, "loss": 0.4493, "step": 10923 }, { "epoch": 1.02515015015015, "grad_norm": 0.8982832372302494, "learning_rate": 8.323794281092356e-06, "loss": 0.4009, "step": 10924 }, { "epoch": 1.025243993993994, "grad_norm": 0.9750789551997201, "learning_rate": 8.323386393643953e-06, "loss": 0.3968, "step": 10925 }, { "epoch": 1.025337837837838, "grad_norm": 0.9412306740672152, "learning_rate": 8.322978466570421e-06, "loss": 0.4083, "step": 10926 }, { "epoch": 1.0254316816816818, "grad_norm": 1.076950169387436, "learning_rate": 8.322570499876626e-06, "loss": 0.3745, "step": 10927 }, { "epoch": 1.0255255255255256, "grad_norm": 0.9753243088866786, "learning_rate": 8.322162493567432e-06, "loss": 0.4049, "step": 10928 }, { "epoch": 1.0256193693693694, "grad_norm": 1.023429107242618, "learning_rate": 8.321754447647703e-06, "loss": 0.3967, "step": 10929 }, { "epoch": 1.0257132132132132, "grad_norm": 0.9453550869665099, "learning_rate": 8.321346362122306e-06, "loss": 0.4179, "step": 10930 }, { "epoch": 1.025807057057057, "grad_norm": 1.195689656275919, "learning_rate": 8.320938236996106e-06, "loss": 0.4293, "step": 10931 }, { "epoch": 1.0259009009009008, "grad_norm": 0.8040695800210257, "learning_rate": 8.320530072273967e-06, "loss": 0.4133, "step": 10932 }, { "epoch": 1.0259947447447448, "grad_norm": 1.4110157192392647, "learning_rate": 8.320121867960757e-06, "loss": 0.4248, "step": 10933 }, { "epoch": 1.0260885885885886, "grad_norm": 0.8542856250955937, "learning_rate": 8.319713624061343e-06, "loss": 0.3669, "step": 10934 }, { "epoch": 1.0261824324324325, "grad_norm": 1.017423903386344, "learning_rate": 8.319305340580593e-06, "loss": 0.3912, "step": 10935 }, { "epoch": 1.0262762762762763, "grad_norm": 1.1871626947887648, "learning_rate": 8.318897017523375e-06, "loss": 0.4116, "step": 10936 }, { "epoch": 1.02637012012012, "grad_norm": 1.0387309785535608, "learning_rate": 8.318488654894559e-06, "loss": 0.4114, "step": 10937 }, { "epoch": 1.0264639639639639, "grad_norm": 1.042260633367692, "learning_rate": 8.31808025269901e-06, "loss": 0.4385, "step": 10938 }, { "epoch": 1.0265578078078077, "grad_norm": 1.5735389318899273, "learning_rate": 8.317671810941599e-06, "loss": 0.3987, "step": 10939 }, { "epoch": 1.0266516516516517, "grad_norm": 1.0541950121279993, "learning_rate": 8.3172633296272e-06, "loss": 0.4167, "step": 10940 }, { "epoch": 1.0267454954954955, "grad_norm": 0.9876807162043328, "learning_rate": 8.316854808760676e-06, "loss": 0.3936, "step": 10941 }, { "epoch": 1.0268393393393394, "grad_norm": 1.0780017458526991, "learning_rate": 8.316446248346904e-06, "loss": 0.3804, "step": 10942 }, { "epoch": 1.0269331831831832, "grad_norm": 1.0360372572709082, "learning_rate": 8.316037648390752e-06, "loss": 0.4516, "step": 10943 }, { "epoch": 1.027027027027027, "grad_norm": 0.9022595263442436, "learning_rate": 8.315629008897093e-06, "loss": 0.4083, "step": 10944 }, { "epoch": 1.0271208708708708, "grad_norm": 0.8994083387633288, "learning_rate": 8.315220329870799e-06, "loss": 0.4321, "step": 10945 }, { "epoch": 1.0272147147147148, "grad_norm": 1.077465071078616, "learning_rate": 8.314811611316745e-06, "loss": 0.442, "step": 10946 }, { "epoch": 1.0273085585585586, "grad_norm": 1.023138063710147, "learning_rate": 8.3144028532398e-06, "loss": 0.426, "step": 10947 }, { "epoch": 1.0274024024024024, "grad_norm": 0.9134335735400002, "learning_rate": 8.313994055644839e-06, "loss": 0.3359, "step": 10948 }, { "epoch": 1.0274962462462462, "grad_norm": 0.8852467464796467, "learning_rate": 8.313585218536738e-06, "loss": 0.4009, "step": 10949 }, { "epoch": 1.02759009009009, "grad_norm": 0.8749653972971788, "learning_rate": 8.31317634192037e-06, "loss": 0.4125, "step": 10950 }, { "epoch": 1.0276839339339339, "grad_norm": 1.2195479100657167, "learning_rate": 8.312767425800609e-06, "loss": 0.3875, "step": 10951 }, { "epoch": 1.0277777777777777, "grad_norm": 1.0257476616280377, "learning_rate": 8.312358470182336e-06, "loss": 0.4475, "step": 10952 }, { "epoch": 1.0278716216216217, "grad_norm": 1.7738807021058092, "learning_rate": 8.31194947507042e-06, "loss": 0.4665, "step": 10953 }, { "epoch": 1.0279654654654655, "grad_norm": 0.8191453664693717, "learning_rate": 8.311540440469743e-06, "loss": 0.324, "step": 10954 }, { "epoch": 1.0280593093093093, "grad_norm": 0.9487547243071371, "learning_rate": 8.311131366385176e-06, "loss": 0.4215, "step": 10955 }, { "epoch": 1.0281531531531531, "grad_norm": 0.9472278491792886, "learning_rate": 8.310722252821603e-06, "loss": 0.4161, "step": 10956 }, { "epoch": 1.028246996996997, "grad_norm": 0.9898981582441961, "learning_rate": 8.310313099783898e-06, "loss": 0.4125, "step": 10957 }, { "epoch": 1.0283408408408408, "grad_norm": 0.9861888529453945, "learning_rate": 8.309903907276942e-06, "loss": 0.378, "step": 10958 }, { "epoch": 1.0284346846846848, "grad_norm": 0.9295535785823659, "learning_rate": 8.309494675305609e-06, "loss": 0.4047, "step": 10959 }, { "epoch": 1.0285285285285286, "grad_norm": 1.0248230162916663, "learning_rate": 8.309085403874783e-06, "loss": 0.4739, "step": 10960 }, { "epoch": 1.0286223723723724, "grad_norm": 0.9212276852560294, "learning_rate": 8.308676092989344e-06, "loss": 0.422, "step": 10961 }, { "epoch": 1.0287162162162162, "grad_norm": 0.936304007603953, "learning_rate": 8.308266742654168e-06, "loss": 0.4532, "step": 10962 }, { "epoch": 1.02881006006006, "grad_norm": 0.8604984277885767, "learning_rate": 8.307857352874137e-06, "loss": 0.3787, "step": 10963 }, { "epoch": 1.0289039039039038, "grad_norm": 0.7507396638267849, "learning_rate": 8.307447923654138e-06, "loss": 0.361, "step": 10964 }, { "epoch": 1.0289977477477477, "grad_norm": 0.9113089259637449, "learning_rate": 8.307038454999046e-06, "loss": 0.3751, "step": 10965 }, { "epoch": 1.0290915915915917, "grad_norm": 0.7959881116703996, "learning_rate": 8.306628946913744e-06, "loss": 0.4218, "step": 10966 }, { "epoch": 1.0291854354354355, "grad_norm": 1.0424828844321448, "learning_rate": 8.306219399403116e-06, "loss": 0.4152, "step": 10967 }, { "epoch": 1.0292792792792793, "grad_norm": 0.8789265944358537, "learning_rate": 8.305809812472047e-06, "loss": 0.4415, "step": 10968 }, { "epoch": 1.0293731231231231, "grad_norm": 0.8920218874199753, "learning_rate": 8.305400186125417e-06, "loss": 0.4441, "step": 10969 }, { "epoch": 1.029466966966967, "grad_norm": 1.1840254292940693, "learning_rate": 8.304990520368112e-06, "loss": 0.4096, "step": 10970 }, { "epoch": 1.0295608108108107, "grad_norm": 0.9452377557653853, "learning_rate": 8.304580815205017e-06, "loss": 0.395, "step": 10971 }, { "epoch": 1.0296546546546546, "grad_norm": 0.9087565935873901, "learning_rate": 8.304171070641015e-06, "loss": 0.3798, "step": 10972 }, { "epoch": 1.0297484984984986, "grad_norm": 0.9791388148308093, "learning_rate": 8.303761286680993e-06, "loss": 0.3952, "step": 10973 }, { "epoch": 1.0298423423423424, "grad_norm": 1.0579449359036541, "learning_rate": 8.303351463329834e-06, "loss": 0.4131, "step": 10974 }, { "epoch": 1.0299361861861862, "grad_norm": 1.1726658546100852, "learning_rate": 8.302941600592428e-06, "loss": 0.4357, "step": 10975 }, { "epoch": 1.03003003003003, "grad_norm": 1.4448908519198034, "learning_rate": 8.302531698473662e-06, "loss": 0.4251, "step": 10976 }, { "epoch": 1.0301238738738738, "grad_norm": 1.2410416408872367, "learning_rate": 8.302121756978422e-06, "loss": 0.3655, "step": 10977 }, { "epoch": 1.0302177177177176, "grad_norm": 0.9060928233697562, "learning_rate": 8.301711776111594e-06, "loss": 0.4033, "step": 10978 }, { "epoch": 1.0303115615615615, "grad_norm": 0.9617636664913614, "learning_rate": 8.30130175587807e-06, "loss": 0.4031, "step": 10979 }, { "epoch": 1.0304054054054055, "grad_norm": 0.9399859710880901, "learning_rate": 8.300891696282734e-06, "loss": 0.4163, "step": 10980 }, { "epoch": 1.0304992492492493, "grad_norm": 1.0614699632750306, "learning_rate": 8.300481597330478e-06, "loss": 0.4092, "step": 10981 }, { "epoch": 1.030593093093093, "grad_norm": 1.0179548832601617, "learning_rate": 8.300071459026193e-06, "loss": 0.4621, "step": 10982 }, { "epoch": 1.030686936936937, "grad_norm": 0.9741399424935588, "learning_rate": 8.299661281374768e-06, "loss": 0.4219, "step": 10983 }, { "epoch": 1.0307807807807807, "grad_norm": 1.1933761774534606, "learning_rate": 8.29925106438109e-06, "loss": 0.3947, "step": 10984 }, { "epoch": 1.0308746246246245, "grad_norm": 1.1346441417576252, "learning_rate": 8.298840808050057e-06, "loss": 0.4275, "step": 10985 }, { "epoch": 1.0309684684684686, "grad_norm": 1.5801556131403507, "learning_rate": 8.298430512386554e-06, "loss": 0.395, "step": 10986 }, { "epoch": 1.0310623123123124, "grad_norm": 0.9291501568620569, "learning_rate": 8.298020177395478e-06, "loss": 0.3956, "step": 10987 }, { "epoch": 1.0311561561561562, "grad_norm": 0.8875940953039723, "learning_rate": 8.297609803081718e-06, "loss": 0.4088, "step": 10988 }, { "epoch": 1.03125, "grad_norm": 0.9630545732862166, "learning_rate": 8.29719938945017e-06, "loss": 0.4608, "step": 10989 }, { "epoch": 1.0313438438438438, "grad_norm": 0.9854517032371783, "learning_rate": 8.296788936505724e-06, "loss": 0.379, "step": 10990 }, { "epoch": 1.0314376876876876, "grad_norm": 1.061425066892324, "learning_rate": 8.296378444253276e-06, "loss": 0.415, "step": 10991 }, { "epoch": 1.0315315315315314, "grad_norm": 0.9461521691268144, "learning_rate": 8.29596791269772e-06, "loss": 0.3858, "step": 10992 }, { "epoch": 1.0316253753753755, "grad_norm": 0.9705073274037621, "learning_rate": 8.295557341843949e-06, "loss": 0.4159, "step": 10993 }, { "epoch": 1.0317192192192193, "grad_norm": 0.900132601078773, "learning_rate": 8.295146731696862e-06, "loss": 0.3677, "step": 10994 }, { "epoch": 1.031813063063063, "grad_norm": 0.9042417212870402, "learning_rate": 8.294736082261352e-06, "loss": 0.3822, "step": 10995 }, { "epoch": 1.031906906906907, "grad_norm": 0.8880604031609178, "learning_rate": 8.294325393542315e-06, "loss": 0.4109, "step": 10996 }, { "epoch": 1.0320007507507507, "grad_norm": 1.0413607388939397, "learning_rate": 8.29391466554465e-06, "loss": 0.3844, "step": 10997 }, { "epoch": 1.0320945945945945, "grad_norm": 1.1892876624036008, "learning_rate": 8.293503898273253e-06, "loss": 0.3773, "step": 10998 }, { "epoch": 1.0321884384384385, "grad_norm": 0.9054314626115672, "learning_rate": 8.293093091733019e-06, "loss": 0.3753, "step": 10999 }, { "epoch": 1.0322822822822824, "grad_norm": 1.0781430170084625, "learning_rate": 8.29268224592885e-06, "loss": 0.4113, "step": 11000 }, { "epoch": 1.0323761261261262, "grad_norm": 0.9182229316400243, "learning_rate": 8.292271360865644e-06, "loss": 0.4364, "step": 11001 }, { "epoch": 1.03246996996997, "grad_norm": 1.3249851012137044, "learning_rate": 8.291860436548297e-06, "loss": 0.4474, "step": 11002 }, { "epoch": 1.0325638138138138, "grad_norm": 0.9540571901034842, "learning_rate": 8.291449472981713e-06, "loss": 0.4051, "step": 11003 }, { "epoch": 1.0326576576576576, "grad_norm": 0.9561073883598451, "learning_rate": 8.291038470170787e-06, "loss": 0.4274, "step": 11004 }, { "epoch": 1.0327515015015014, "grad_norm": 1.1643172363469072, "learning_rate": 8.290627428120423e-06, "loss": 0.4241, "step": 11005 }, { "epoch": 1.0328453453453454, "grad_norm": 1.4999699573039598, "learning_rate": 8.290216346835522e-06, "loss": 0.4093, "step": 11006 }, { "epoch": 1.0329391891891893, "grad_norm": 0.9831407445912497, "learning_rate": 8.289805226320983e-06, "loss": 0.4379, "step": 11007 }, { "epoch": 1.033033033033033, "grad_norm": 0.8494426162330231, "learning_rate": 8.28939406658171e-06, "loss": 0.3929, "step": 11008 }, { "epoch": 1.0331268768768769, "grad_norm": 0.851153830627062, "learning_rate": 8.288982867622604e-06, "loss": 0.3648, "step": 11009 }, { "epoch": 1.0332207207207207, "grad_norm": 1.0283096461290715, "learning_rate": 8.288571629448567e-06, "loss": 0.4017, "step": 11010 }, { "epoch": 1.0333145645645645, "grad_norm": 0.9649876940785084, "learning_rate": 8.288160352064505e-06, "loss": 0.4302, "step": 11011 }, { "epoch": 1.0334084084084083, "grad_norm": 1.0128010288758897, "learning_rate": 8.287749035475321e-06, "loss": 0.4156, "step": 11012 }, { "epoch": 1.0335022522522523, "grad_norm": 0.9658499061382326, "learning_rate": 8.287337679685916e-06, "loss": 0.4317, "step": 11013 }, { "epoch": 1.0335960960960962, "grad_norm": 0.9711665956667174, "learning_rate": 8.286926284701197e-06, "loss": 0.4391, "step": 11014 }, { "epoch": 1.03368993993994, "grad_norm": 0.971639026927574, "learning_rate": 8.28651485052607e-06, "loss": 0.4121, "step": 11015 }, { "epoch": 1.0337837837837838, "grad_norm": 0.9782333911267219, "learning_rate": 8.286103377165442e-06, "loss": 0.3688, "step": 11016 }, { "epoch": 1.0338776276276276, "grad_norm": 0.8950594879133015, "learning_rate": 8.285691864624212e-06, "loss": 0.3939, "step": 11017 }, { "epoch": 1.0339714714714714, "grad_norm": 0.9450761834657958, "learning_rate": 8.285280312907295e-06, "loss": 0.4203, "step": 11018 }, { "epoch": 1.0340653153153152, "grad_norm": 1.2652701322075788, "learning_rate": 8.284868722019592e-06, "loss": 0.4259, "step": 11019 }, { "epoch": 1.0341591591591592, "grad_norm": 1.4404343945684346, "learning_rate": 8.284457091966013e-06, "loss": 0.4137, "step": 11020 }, { "epoch": 1.034253003003003, "grad_norm": 0.8450840406922958, "learning_rate": 8.284045422751466e-06, "loss": 0.3559, "step": 11021 }, { "epoch": 1.0343468468468469, "grad_norm": 1.3648455615522934, "learning_rate": 8.283633714380859e-06, "loss": 0.4303, "step": 11022 }, { "epoch": 1.0344406906906907, "grad_norm": 0.969050112883974, "learning_rate": 8.283221966859101e-06, "loss": 0.4304, "step": 11023 }, { "epoch": 1.0345345345345345, "grad_norm": 1.157344759460917, "learning_rate": 8.282810180191101e-06, "loss": 0.4058, "step": 11024 }, { "epoch": 1.0346283783783783, "grad_norm": 1.0445248282713988, "learning_rate": 8.282398354381768e-06, "loss": 0.3881, "step": 11025 }, { "epoch": 1.0347222222222223, "grad_norm": 1.2447261374496164, "learning_rate": 8.281986489436013e-06, "loss": 0.3952, "step": 11026 }, { "epoch": 1.0348160660660661, "grad_norm": 0.9421446670950716, "learning_rate": 8.281574585358746e-06, "loss": 0.403, "step": 11027 }, { "epoch": 1.03490990990991, "grad_norm": 0.9441577104666428, "learning_rate": 8.281162642154882e-06, "loss": 0.4156, "step": 11028 }, { "epoch": 1.0350037537537538, "grad_norm": 1.0126600810281348, "learning_rate": 8.280750659829327e-06, "loss": 0.4629, "step": 11029 }, { "epoch": 1.0350975975975976, "grad_norm": 1.0148250457465806, "learning_rate": 8.280338638386997e-06, "loss": 0.3631, "step": 11030 }, { "epoch": 1.0351914414414414, "grad_norm": 0.8274458455815707, "learning_rate": 8.279926577832805e-06, "loss": 0.3758, "step": 11031 }, { "epoch": 1.0352852852852852, "grad_norm": 1.0415123743572947, "learning_rate": 8.27951447817166e-06, "loss": 0.3959, "step": 11032 }, { "epoch": 1.0353791291291292, "grad_norm": 0.9766281064803184, "learning_rate": 8.279102339408478e-06, "loss": 0.3992, "step": 11033 }, { "epoch": 1.035472972972973, "grad_norm": 1.059650004362789, "learning_rate": 8.278690161548174e-06, "loss": 0.4233, "step": 11034 }, { "epoch": 1.0355668168168168, "grad_norm": 1.0430109973845032, "learning_rate": 8.278277944595658e-06, "loss": 0.432, "step": 11035 }, { "epoch": 1.0356606606606606, "grad_norm": 1.018836612092426, "learning_rate": 8.277865688555853e-06, "loss": 0.4111, "step": 11036 }, { "epoch": 1.0357545045045045, "grad_norm": 0.9463531916962145, "learning_rate": 8.277453393433666e-06, "loss": 0.4531, "step": 11037 }, { "epoch": 1.0358483483483483, "grad_norm": 0.9832647479826025, "learning_rate": 8.27704105923402e-06, "loss": 0.4044, "step": 11038 }, { "epoch": 1.0359421921921923, "grad_norm": 0.8444492363489007, "learning_rate": 8.276628685961826e-06, "loss": 0.3411, "step": 11039 }, { "epoch": 1.0360360360360361, "grad_norm": 1.025116230238262, "learning_rate": 8.276216273622e-06, "loss": 0.3667, "step": 11040 }, { "epoch": 1.03612987987988, "grad_norm": 0.8356466362298289, "learning_rate": 8.275803822219464e-06, "loss": 0.3906, "step": 11041 }, { "epoch": 1.0362237237237237, "grad_norm": 0.8466307583162094, "learning_rate": 8.275391331759133e-06, "loss": 0.3868, "step": 11042 }, { "epoch": 1.0363175675675675, "grad_norm": 1.040827627138033, "learning_rate": 8.274978802245927e-06, "loss": 0.4275, "step": 11043 }, { "epoch": 1.0364114114114114, "grad_norm": 0.9199613048997084, "learning_rate": 8.274566233684763e-06, "loss": 0.373, "step": 11044 }, { "epoch": 1.0365052552552552, "grad_norm": 0.959993155167964, "learning_rate": 8.274153626080558e-06, "loss": 0.4048, "step": 11045 }, { "epoch": 1.0365990990990992, "grad_norm": 0.9445623755000023, "learning_rate": 8.273740979438237e-06, "loss": 0.4011, "step": 11046 }, { "epoch": 1.036692942942943, "grad_norm": 1.0510406230723577, "learning_rate": 8.273328293762716e-06, "loss": 0.4305, "step": 11047 }, { "epoch": 1.0367867867867868, "grad_norm": 0.8840354823437896, "learning_rate": 8.272915569058917e-06, "loss": 0.4226, "step": 11048 }, { "epoch": 1.0368806306306306, "grad_norm": 1.075213447339264, "learning_rate": 8.27250280533176e-06, "loss": 0.4041, "step": 11049 }, { "epoch": 1.0369744744744744, "grad_norm": 0.9338761629980331, "learning_rate": 8.272090002586165e-06, "loss": 0.3956, "step": 11050 }, { "epoch": 1.0370683183183182, "grad_norm": 1.2347044026710463, "learning_rate": 8.271677160827058e-06, "loss": 0.3738, "step": 11051 }, { "epoch": 1.037162162162162, "grad_norm": 1.9334670354066479, "learning_rate": 8.271264280059358e-06, "loss": 0.3882, "step": 11052 }, { "epoch": 1.037256006006006, "grad_norm": 1.0761911446225587, "learning_rate": 8.27085136028799e-06, "loss": 0.4423, "step": 11053 }, { "epoch": 1.03734984984985, "grad_norm": 0.9723062384700087, "learning_rate": 8.270438401517876e-06, "loss": 0.3925, "step": 11054 }, { "epoch": 1.0374436936936937, "grad_norm": 1.1084756826187765, "learning_rate": 8.27002540375394e-06, "loss": 0.4037, "step": 11055 }, { "epoch": 1.0375375375375375, "grad_norm": 1.125061696511475, "learning_rate": 8.269612367001105e-06, "loss": 0.4488, "step": 11056 }, { "epoch": 1.0376313813813813, "grad_norm": 0.9082368202884181, "learning_rate": 8.2691992912643e-06, "loss": 0.3933, "step": 11057 }, { "epoch": 1.0377252252252251, "grad_norm": 0.9223664621062349, "learning_rate": 8.268786176548444e-06, "loss": 0.4482, "step": 11058 }, { "epoch": 1.037819069069069, "grad_norm": 1.7700807258278017, "learning_rate": 8.268373022858466e-06, "loss": 0.4235, "step": 11059 }, { "epoch": 1.037912912912913, "grad_norm": 0.9761551902313258, "learning_rate": 8.267959830199293e-06, "loss": 0.3679, "step": 11060 }, { "epoch": 1.0380067567567568, "grad_norm": 0.9218486236432496, "learning_rate": 8.26754659857585e-06, "loss": 0.3722, "step": 11061 }, { "epoch": 1.0381006006006006, "grad_norm": 0.8960806262836524, "learning_rate": 8.267133327993064e-06, "loss": 0.4137, "step": 11062 }, { "epoch": 1.0381944444444444, "grad_norm": 3.0051559835153347, "learning_rate": 8.266720018455865e-06, "loss": 0.3864, "step": 11063 }, { "epoch": 1.0382882882882882, "grad_norm": 0.8754920271640272, "learning_rate": 8.266306669969175e-06, "loss": 0.4182, "step": 11064 }, { "epoch": 1.038382132132132, "grad_norm": 0.9207772082596323, "learning_rate": 8.265893282537929e-06, "loss": 0.4093, "step": 11065 }, { "epoch": 1.038475975975976, "grad_norm": 1.1227321088462991, "learning_rate": 8.265479856167052e-06, "loss": 0.407, "step": 11066 }, { "epoch": 1.0385698198198199, "grad_norm": 0.9443337346221953, "learning_rate": 8.265066390861473e-06, "loss": 0.392, "step": 11067 }, { "epoch": 1.0386636636636637, "grad_norm": 1.0766702626046176, "learning_rate": 8.264652886626126e-06, "loss": 0.4775, "step": 11068 }, { "epoch": 1.0387575075075075, "grad_norm": 0.9052750997938298, "learning_rate": 8.264239343465936e-06, "loss": 0.3918, "step": 11069 }, { "epoch": 1.0388513513513513, "grad_norm": 1.0365864433457053, "learning_rate": 8.263825761385838e-06, "loss": 0.441, "step": 11070 }, { "epoch": 1.0389451951951951, "grad_norm": 1.2278970382242487, "learning_rate": 8.26341214039076e-06, "loss": 0.4457, "step": 11071 }, { "epoch": 1.039039039039039, "grad_norm": 0.9067806044518266, "learning_rate": 8.262998480485636e-06, "loss": 0.3785, "step": 11072 }, { "epoch": 1.039132882882883, "grad_norm": 1.1266288418043149, "learning_rate": 8.262584781675396e-06, "loss": 0.3979, "step": 11073 }, { "epoch": 1.0392267267267268, "grad_norm": 0.8801453055142764, "learning_rate": 8.262171043964973e-06, "loss": 0.3938, "step": 11074 }, { "epoch": 1.0393205705705706, "grad_norm": 0.9357011347780675, "learning_rate": 8.261757267359302e-06, "loss": 0.4051, "step": 11075 }, { "epoch": 1.0394144144144144, "grad_norm": 1.0196389624080364, "learning_rate": 8.261343451863315e-06, "loss": 0.3969, "step": 11076 }, { "epoch": 1.0395082582582582, "grad_norm": 1.0073465202144152, "learning_rate": 8.260929597481945e-06, "loss": 0.42, "step": 11077 }, { "epoch": 1.039602102102102, "grad_norm": 0.9448352924424266, "learning_rate": 8.26051570422013e-06, "loss": 0.3694, "step": 11078 }, { "epoch": 1.039695945945946, "grad_norm": 0.9629152463757732, "learning_rate": 8.260101772082802e-06, "loss": 0.4254, "step": 11079 }, { "epoch": 1.0397897897897899, "grad_norm": 1.4941446610714615, "learning_rate": 8.259687801074895e-06, "loss": 0.4455, "step": 11080 }, { "epoch": 1.0398836336336337, "grad_norm": 0.8891022298625816, "learning_rate": 8.25927379120135e-06, "loss": 0.3991, "step": 11081 }, { "epoch": 1.0399774774774775, "grad_norm": 0.9986758904639818, "learning_rate": 8.258859742467097e-06, "loss": 0.4228, "step": 11082 }, { "epoch": 1.0400713213213213, "grad_norm": 1.0503230305900702, "learning_rate": 8.258445654877076e-06, "loss": 0.4055, "step": 11083 }, { "epoch": 1.040165165165165, "grad_norm": 0.7978383005171978, "learning_rate": 8.258031528436225e-06, "loss": 0.3945, "step": 11084 }, { "epoch": 1.040259009009009, "grad_norm": 1.0456451144366283, "learning_rate": 8.257617363149481e-06, "loss": 0.4198, "step": 11085 }, { "epoch": 1.040352852852853, "grad_norm": 0.941486712877791, "learning_rate": 8.25720315902178e-06, "loss": 0.3696, "step": 11086 }, { "epoch": 1.0404466966966968, "grad_norm": 0.8689883146563712, "learning_rate": 8.256788916058064e-06, "loss": 0.366, "step": 11087 }, { "epoch": 1.0405405405405406, "grad_norm": 0.9188681722613141, "learning_rate": 8.25637463426327e-06, "loss": 0.3629, "step": 11088 }, { "epoch": 1.0406343843843844, "grad_norm": 1.0170031729964344, "learning_rate": 8.255960313642337e-06, "loss": 0.4147, "step": 11089 }, { "epoch": 1.0407282282282282, "grad_norm": 0.9431461098975434, "learning_rate": 8.255545954200209e-06, "loss": 0.3989, "step": 11090 }, { "epoch": 1.040822072072072, "grad_norm": 0.9950627954600334, "learning_rate": 8.25513155594182e-06, "loss": 0.3846, "step": 11091 }, { "epoch": 1.0409159159159158, "grad_norm": 1.006317801485515, "learning_rate": 8.254717118872115e-06, "loss": 0.3858, "step": 11092 }, { "epoch": 1.0410097597597598, "grad_norm": 1.1043673500823588, "learning_rate": 8.254302642996034e-06, "loss": 0.419, "step": 11093 }, { "epoch": 1.0411036036036037, "grad_norm": 0.951588067079828, "learning_rate": 8.253888128318522e-06, "loss": 0.3788, "step": 11094 }, { "epoch": 1.0411974474474475, "grad_norm": 0.7952612639124705, "learning_rate": 8.253473574844518e-06, "loss": 0.3567, "step": 11095 }, { "epoch": 1.0412912912912913, "grad_norm": 0.8268192679061628, "learning_rate": 8.253058982578965e-06, "loss": 0.3655, "step": 11096 }, { "epoch": 1.041385135135135, "grad_norm": 0.9942232281725129, "learning_rate": 8.252644351526808e-06, "loss": 0.4215, "step": 11097 }, { "epoch": 1.041478978978979, "grad_norm": 0.8152109575298373, "learning_rate": 8.25222968169299e-06, "loss": 0.3668, "step": 11098 }, { "epoch": 1.041572822822823, "grad_norm": 0.9709109581411611, "learning_rate": 8.251814973082452e-06, "loss": 0.4068, "step": 11099 }, { "epoch": 1.0416666666666667, "grad_norm": 0.9505206131923449, "learning_rate": 8.251400225700144e-06, "loss": 0.3621, "step": 11100 }, { "epoch": 1.0417605105105106, "grad_norm": 1.0770718193433624, "learning_rate": 8.250985439551007e-06, "loss": 0.3688, "step": 11101 }, { "epoch": 1.0418543543543544, "grad_norm": 1.2823663644513905, "learning_rate": 8.250570614639988e-06, "loss": 0.3864, "step": 11102 }, { "epoch": 1.0419481981981982, "grad_norm": 1.552760388826484, "learning_rate": 8.250155750972035e-06, "loss": 0.415, "step": 11103 }, { "epoch": 1.042042042042042, "grad_norm": 0.8166525005580378, "learning_rate": 8.24974084855209e-06, "loss": 0.4262, "step": 11104 }, { "epoch": 1.0421358858858858, "grad_norm": 0.9428724008686014, "learning_rate": 8.249325907385104e-06, "loss": 0.3856, "step": 11105 }, { "epoch": 1.0422297297297298, "grad_norm": 1.0269647594296838, "learning_rate": 8.248910927476021e-06, "loss": 0.4183, "step": 11106 }, { "epoch": 1.0423235735735736, "grad_norm": 0.8553947185431829, "learning_rate": 8.248495908829791e-06, "loss": 0.4043, "step": 11107 }, { "epoch": 1.0424174174174174, "grad_norm": 0.9893651135524439, "learning_rate": 8.248080851451363e-06, "loss": 0.4351, "step": 11108 }, { "epoch": 1.0425112612612613, "grad_norm": 1.0871314704427748, "learning_rate": 8.247665755345685e-06, "loss": 0.4038, "step": 11109 }, { "epoch": 1.042605105105105, "grad_norm": 0.9236062232558601, "learning_rate": 8.247250620517706e-06, "loss": 0.376, "step": 11110 }, { "epoch": 1.0426989489489489, "grad_norm": 0.9693035406219694, "learning_rate": 8.246835446972374e-06, "loss": 0.395, "step": 11111 }, { "epoch": 1.0427927927927927, "grad_norm": 0.8702095378865049, "learning_rate": 8.246420234714643e-06, "loss": 0.3813, "step": 11112 }, { "epoch": 1.0428866366366367, "grad_norm": 1.004581980822857, "learning_rate": 8.246004983749458e-06, "loss": 0.4162, "step": 11113 }, { "epoch": 1.0429804804804805, "grad_norm": 0.9243518570343093, "learning_rate": 8.245589694081776e-06, "loss": 0.4223, "step": 11114 }, { "epoch": 1.0430743243243243, "grad_norm": 1.1965625665781445, "learning_rate": 8.245174365716547e-06, "loss": 0.3737, "step": 11115 }, { "epoch": 1.0431681681681682, "grad_norm": 0.9719193902660328, "learning_rate": 8.24475899865872e-06, "loss": 0.4163, "step": 11116 }, { "epoch": 1.043262012012012, "grad_norm": 0.9401193510511264, "learning_rate": 8.244343592913252e-06, "loss": 0.4219, "step": 11117 }, { "epoch": 1.0433558558558558, "grad_norm": 0.9234499038051374, "learning_rate": 8.243928148485092e-06, "loss": 0.4132, "step": 11118 }, { "epoch": 1.0434496996996998, "grad_norm": 1.0974413608405966, "learning_rate": 8.243512665379194e-06, "loss": 0.4212, "step": 11119 }, { "epoch": 1.0435435435435436, "grad_norm": 0.9911180978335699, "learning_rate": 8.243097143600515e-06, "loss": 0.4425, "step": 11120 }, { "epoch": 1.0436373873873874, "grad_norm": 0.996016178439464, "learning_rate": 8.242681583154006e-06, "loss": 0.4042, "step": 11121 }, { "epoch": 1.0437312312312312, "grad_norm": 0.9033804111707592, "learning_rate": 8.242265984044621e-06, "loss": 0.4068, "step": 11122 }, { "epoch": 1.043825075075075, "grad_norm": 0.8740615227156003, "learning_rate": 8.241850346277321e-06, "loss": 0.3877, "step": 11123 }, { "epoch": 1.0439189189189189, "grad_norm": 0.925898491431984, "learning_rate": 8.241434669857055e-06, "loss": 0.3956, "step": 11124 }, { "epoch": 1.0440127627627627, "grad_norm": 0.9742266110831526, "learning_rate": 8.241018954788783e-06, "loss": 0.4731, "step": 11125 }, { "epoch": 1.0441066066066067, "grad_norm": 1.0608305591266796, "learning_rate": 8.24060320107746e-06, "loss": 0.4641, "step": 11126 }, { "epoch": 1.0442004504504505, "grad_norm": 1.7025435416588202, "learning_rate": 8.240187408728041e-06, "loss": 0.4094, "step": 11127 }, { "epoch": 1.0442942942942943, "grad_norm": 0.9344380428091834, "learning_rate": 8.23977157774549e-06, "loss": 0.4199, "step": 11128 }, { "epoch": 1.0443881381381381, "grad_norm": 0.8508550017871827, "learning_rate": 8.239355708134761e-06, "loss": 0.3801, "step": 11129 }, { "epoch": 1.044481981981982, "grad_norm": 0.8394864207403556, "learning_rate": 8.238939799900812e-06, "loss": 0.3946, "step": 11130 }, { "epoch": 1.0445758258258258, "grad_norm": 0.9629518131638503, "learning_rate": 8.238523853048601e-06, "loss": 0.3908, "step": 11131 }, { "epoch": 1.0446696696696696, "grad_norm": 0.9333786461813939, "learning_rate": 8.23810786758309e-06, "loss": 0.4273, "step": 11132 }, { "epoch": 1.0447635135135136, "grad_norm": 1.0896742336327478, "learning_rate": 8.237691843509239e-06, "loss": 0.4152, "step": 11133 }, { "epoch": 1.0448573573573574, "grad_norm": 0.9449674038995054, "learning_rate": 8.237275780832006e-06, "loss": 0.4164, "step": 11134 }, { "epoch": 1.0449512012012012, "grad_norm": 0.8491317874708024, "learning_rate": 8.236859679556353e-06, "loss": 0.3749, "step": 11135 }, { "epoch": 1.045045045045045, "grad_norm": 0.9561692284933881, "learning_rate": 8.23644353968724e-06, "loss": 0.4281, "step": 11136 }, { "epoch": 1.0451388888888888, "grad_norm": 0.8510345498955086, "learning_rate": 8.23602736122963e-06, "loss": 0.384, "step": 11137 }, { "epoch": 1.0452327327327327, "grad_norm": 0.9666235297136524, "learning_rate": 8.235611144188487e-06, "loss": 0.422, "step": 11138 }, { "epoch": 1.0453265765765767, "grad_norm": 1.0509074337299233, "learning_rate": 8.235194888568769e-06, "loss": 0.4072, "step": 11139 }, { "epoch": 1.0454204204204205, "grad_norm": 0.910385257434888, "learning_rate": 8.234778594375443e-06, "loss": 0.4139, "step": 11140 }, { "epoch": 1.0455142642642643, "grad_norm": 1.0343813392095627, "learning_rate": 8.234362261613471e-06, "loss": 0.39, "step": 11141 }, { "epoch": 1.0456081081081081, "grad_norm": 0.910895705895224, "learning_rate": 8.233945890287817e-06, "loss": 0.4052, "step": 11142 }, { "epoch": 1.045701951951952, "grad_norm": 3.0128606924702255, "learning_rate": 8.233529480403446e-06, "loss": 0.4171, "step": 11143 }, { "epoch": 1.0457957957957957, "grad_norm": 1.0034922527201318, "learning_rate": 8.233113031965322e-06, "loss": 0.4476, "step": 11144 }, { "epoch": 1.0458896396396395, "grad_norm": 1.0597416237471597, "learning_rate": 8.23269654497841e-06, "loss": 0.4085, "step": 11145 }, { "epoch": 1.0459834834834836, "grad_norm": 0.8800671684883541, "learning_rate": 8.232280019447678e-06, "loss": 0.4086, "step": 11146 }, { "epoch": 1.0460773273273274, "grad_norm": 0.8774942761211061, "learning_rate": 8.231863455378091e-06, "loss": 0.3903, "step": 11147 }, { "epoch": 1.0461711711711712, "grad_norm": 1.0356161234626842, "learning_rate": 8.231446852774614e-06, "loss": 0.4284, "step": 11148 }, { "epoch": 1.046265015015015, "grad_norm": 0.8979935025252339, "learning_rate": 8.231030211642217e-06, "loss": 0.3923, "step": 11149 }, { "epoch": 1.0463588588588588, "grad_norm": 1.0540735544687385, "learning_rate": 8.230613531985867e-06, "loss": 0.4416, "step": 11150 }, { "epoch": 1.0464527027027026, "grad_norm": 0.9047878541707636, "learning_rate": 8.23019681381053e-06, "loss": 0.399, "step": 11151 }, { "epoch": 1.0465465465465464, "grad_norm": 0.8615758193616025, "learning_rate": 8.229780057121177e-06, "loss": 0.3831, "step": 11152 }, { "epoch": 1.0466403903903905, "grad_norm": 0.9644223924331968, "learning_rate": 8.229363261922777e-06, "loss": 0.4194, "step": 11153 }, { "epoch": 1.0467342342342343, "grad_norm": 0.9361320409780347, "learning_rate": 8.228946428220298e-06, "loss": 0.4151, "step": 11154 }, { "epoch": 1.046828078078078, "grad_norm": 1.9553346557848332, "learning_rate": 8.22852955601871e-06, "loss": 0.4414, "step": 11155 }, { "epoch": 1.046921921921922, "grad_norm": 0.9923095068083313, "learning_rate": 8.228112645322986e-06, "loss": 0.4408, "step": 11156 }, { "epoch": 1.0470157657657657, "grad_norm": 0.8759715462405503, "learning_rate": 8.227695696138094e-06, "loss": 0.388, "step": 11157 }, { "epoch": 1.0471096096096095, "grad_norm": 1.3375207020259832, "learning_rate": 8.227278708469006e-06, "loss": 0.4339, "step": 11158 }, { "epoch": 1.0472034534534536, "grad_norm": 0.8866730451318426, "learning_rate": 8.226861682320695e-06, "loss": 0.4072, "step": 11159 }, { "epoch": 1.0472972972972974, "grad_norm": 1.0846832956769978, "learning_rate": 8.22644461769813e-06, "loss": 0.3762, "step": 11160 }, { "epoch": 1.0473911411411412, "grad_norm": 0.9270211345608378, "learning_rate": 8.22602751460629e-06, "loss": 0.4006, "step": 11161 }, { "epoch": 1.047484984984985, "grad_norm": 1.088807981351188, "learning_rate": 8.225610373050141e-06, "loss": 0.3907, "step": 11162 }, { "epoch": 1.0475788288288288, "grad_norm": 1.0745592655029685, "learning_rate": 8.225193193034661e-06, "loss": 0.4184, "step": 11163 }, { "epoch": 1.0476726726726726, "grad_norm": 0.8698410477168583, "learning_rate": 8.224775974564824e-06, "loss": 0.3759, "step": 11164 }, { "epoch": 1.0477665165165164, "grad_norm": 0.9870630578390863, "learning_rate": 8.224358717645602e-06, "loss": 0.4318, "step": 11165 }, { "epoch": 1.0478603603603605, "grad_norm": 0.9470521189393463, "learning_rate": 8.223941422281972e-06, "loss": 0.4029, "step": 11166 }, { "epoch": 1.0479542042042043, "grad_norm": 1.083555829937987, "learning_rate": 8.22352408847891e-06, "loss": 0.3753, "step": 11167 }, { "epoch": 1.048048048048048, "grad_norm": 1.0314494355407335, "learning_rate": 8.22310671624139e-06, "loss": 0.4126, "step": 11168 }, { "epoch": 1.0481418918918919, "grad_norm": 1.0593086607309976, "learning_rate": 8.222689305574388e-06, "loss": 0.4146, "step": 11169 }, { "epoch": 1.0482357357357357, "grad_norm": 1.0128210320592081, "learning_rate": 8.222271856482884e-06, "loss": 0.4724, "step": 11170 }, { "epoch": 1.0483295795795795, "grad_norm": 0.9172520047982385, "learning_rate": 8.221854368971854e-06, "loss": 0.3836, "step": 11171 }, { "epoch": 1.0484234234234233, "grad_norm": 1.2283024346600435, "learning_rate": 8.221436843046274e-06, "loss": 0.3949, "step": 11172 }, { "epoch": 1.0485172672672673, "grad_norm": 0.9122972647482808, "learning_rate": 8.221019278711123e-06, "loss": 0.386, "step": 11173 }, { "epoch": 1.0486111111111112, "grad_norm": 0.8190798831177817, "learning_rate": 8.22060167597138e-06, "loss": 0.4099, "step": 11174 }, { "epoch": 1.048704954954955, "grad_norm": 0.9883493356050224, "learning_rate": 8.220184034832024e-06, "loss": 0.4394, "step": 11175 }, { "epoch": 1.0487987987987988, "grad_norm": 0.9393343123745221, "learning_rate": 8.219766355298035e-06, "loss": 0.4443, "step": 11176 }, { "epoch": 1.0488926426426426, "grad_norm": 0.929443743478927, "learning_rate": 8.219348637374394e-06, "loss": 0.4168, "step": 11177 }, { "epoch": 1.0489864864864864, "grad_norm": 0.9025650549182017, "learning_rate": 8.218930881066078e-06, "loss": 0.4164, "step": 11178 }, { "epoch": 1.0490803303303304, "grad_norm": 1.3319653550112895, "learning_rate": 8.218513086378072e-06, "loss": 0.4085, "step": 11179 }, { "epoch": 1.0491741741741742, "grad_norm": 1.0372984274370982, "learning_rate": 8.218095253315357e-06, "loss": 0.4293, "step": 11180 }, { "epoch": 1.049268018018018, "grad_norm": 1.0842551143314543, "learning_rate": 8.217677381882912e-06, "loss": 0.4248, "step": 11181 }, { "epoch": 1.0493618618618619, "grad_norm": 0.9882055480158399, "learning_rate": 8.217259472085718e-06, "loss": 0.4291, "step": 11182 }, { "epoch": 1.0494557057057057, "grad_norm": 0.9774129348459711, "learning_rate": 8.216841523928766e-06, "loss": 0.3884, "step": 11183 }, { "epoch": 1.0495495495495495, "grad_norm": 0.8498797178513965, "learning_rate": 8.21642353741703e-06, "loss": 0.3746, "step": 11184 }, { "epoch": 1.0496433933933933, "grad_norm": 0.9575664237598819, "learning_rate": 8.216005512555499e-06, "loss": 0.3773, "step": 11185 }, { "epoch": 1.0497372372372373, "grad_norm": 1.2931687160953032, "learning_rate": 8.215587449349157e-06, "loss": 0.4636, "step": 11186 }, { "epoch": 1.0498310810810811, "grad_norm": 0.9533387079851933, "learning_rate": 8.215169347802984e-06, "loss": 0.3733, "step": 11187 }, { "epoch": 1.049924924924925, "grad_norm": 0.871745985497964, "learning_rate": 8.214751207921971e-06, "loss": 0.4145, "step": 11188 }, { "epoch": 1.0500187687687688, "grad_norm": 1.0772801491838104, "learning_rate": 8.2143330297111e-06, "loss": 0.432, "step": 11189 }, { "epoch": 1.0501126126126126, "grad_norm": 0.8717043930615558, "learning_rate": 8.21391481317536e-06, "loss": 0.4289, "step": 11190 }, { "epoch": 1.0502064564564564, "grad_norm": 0.974010463724415, "learning_rate": 8.213496558319734e-06, "loss": 0.3936, "step": 11191 }, { "epoch": 1.0503003003003002, "grad_norm": 0.8723586578868646, "learning_rate": 8.21307826514921e-06, "loss": 0.3917, "step": 11192 }, { "epoch": 1.0503941441441442, "grad_norm": 1.339629615418757, "learning_rate": 8.212659933668776e-06, "loss": 0.4016, "step": 11193 }, { "epoch": 1.050487987987988, "grad_norm": 1.0417511416963965, "learning_rate": 8.212241563883419e-06, "loss": 0.3897, "step": 11194 }, { "epoch": 1.0505818318318318, "grad_norm": 0.8972852312694349, "learning_rate": 8.21182315579813e-06, "loss": 0.4396, "step": 11195 }, { "epoch": 1.0506756756756757, "grad_norm": 0.9079940743467428, "learning_rate": 8.211404709417893e-06, "loss": 0.3804, "step": 11196 }, { "epoch": 1.0507695195195195, "grad_norm": 1.0182839913389325, "learning_rate": 8.2109862247477e-06, "loss": 0.4329, "step": 11197 }, { "epoch": 1.0508633633633633, "grad_norm": 0.8561633016889764, "learning_rate": 8.210567701792542e-06, "loss": 0.4314, "step": 11198 }, { "epoch": 1.0509572072072073, "grad_norm": 0.9454891652018044, "learning_rate": 8.210149140557405e-06, "loss": 0.4054, "step": 11199 }, { "epoch": 1.0510510510510511, "grad_norm": 1.7244915542824408, "learning_rate": 8.209730541047283e-06, "loss": 0.3873, "step": 11200 }, { "epoch": 1.051144894894895, "grad_norm": 1.083614928787267, "learning_rate": 8.209311903267168e-06, "loss": 0.4196, "step": 11201 }, { "epoch": 1.0512387387387387, "grad_norm": 0.982257510542375, "learning_rate": 8.208893227222048e-06, "loss": 0.4363, "step": 11202 }, { "epoch": 1.0513325825825826, "grad_norm": 0.9388610320052138, "learning_rate": 8.208474512916913e-06, "loss": 0.4026, "step": 11203 }, { "epoch": 1.0514264264264264, "grad_norm": 1.0215420700014914, "learning_rate": 8.208055760356764e-06, "loss": 0.4428, "step": 11204 }, { "epoch": 1.0515202702702702, "grad_norm": 0.9635032900239452, "learning_rate": 8.207636969546586e-06, "loss": 0.4194, "step": 11205 }, { "epoch": 1.0516141141141142, "grad_norm": 0.987647588784953, "learning_rate": 8.207218140491376e-06, "loss": 0.4302, "step": 11206 }, { "epoch": 1.051707957957958, "grad_norm": 0.9541171447878365, "learning_rate": 8.206799273196125e-06, "loss": 0.4287, "step": 11207 }, { "epoch": 1.0518018018018018, "grad_norm": 1.0304615734182967, "learning_rate": 8.20638036766583e-06, "loss": 0.4126, "step": 11208 }, { "epoch": 1.0518956456456456, "grad_norm": 1.0689289582810424, "learning_rate": 8.205961423905484e-06, "loss": 0.4005, "step": 11209 }, { "epoch": 1.0519894894894894, "grad_norm": 1.3302982727931252, "learning_rate": 8.205542441920082e-06, "loss": 0.4583, "step": 11210 }, { "epoch": 1.0520833333333333, "grad_norm": 0.8844416028886576, "learning_rate": 8.205123421714623e-06, "loss": 0.3997, "step": 11211 }, { "epoch": 1.052177177177177, "grad_norm": 0.8538126994413456, "learning_rate": 8.204704363294097e-06, "loss": 0.3833, "step": 11212 }, { "epoch": 1.052271021021021, "grad_norm": 1.0482546650830145, "learning_rate": 8.204285266663505e-06, "loss": 0.3821, "step": 11213 }, { "epoch": 1.052364864864865, "grad_norm": 0.9671022862901635, "learning_rate": 8.203866131827844e-06, "loss": 0.361, "step": 11214 }, { "epoch": 1.0524587087087087, "grad_norm": 2.8484109230939105, "learning_rate": 8.203446958792107e-06, "loss": 0.4403, "step": 11215 }, { "epoch": 1.0525525525525525, "grad_norm": 0.998045071728487, "learning_rate": 8.203027747561297e-06, "loss": 0.4093, "step": 11216 }, { "epoch": 1.0526463963963963, "grad_norm": 1.0721774029660902, "learning_rate": 8.20260849814041e-06, "loss": 0.4258, "step": 11217 }, { "epoch": 1.0527402402402402, "grad_norm": 1.0753173999054748, "learning_rate": 8.202189210534444e-06, "loss": 0.3861, "step": 11218 }, { "epoch": 1.0528340840840842, "grad_norm": 1.3085910575829691, "learning_rate": 8.2017698847484e-06, "loss": 0.4335, "step": 11219 }, { "epoch": 1.052927927927928, "grad_norm": 0.9247272964678264, "learning_rate": 8.201350520787278e-06, "loss": 0.3769, "step": 11220 }, { "epoch": 1.0530217717717718, "grad_norm": 1.006901828173314, "learning_rate": 8.200931118656075e-06, "loss": 0.4342, "step": 11221 }, { "epoch": 1.0531156156156156, "grad_norm": 0.9149965311503707, "learning_rate": 8.200511678359794e-06, "loss": 0.3946, "step": 11222 }, { "epoch": 1.0532094594594594, "grad_norm": 0.8738007937869128, "learning_rate": 8.200092199903436e-06, "loss": 0.373, "step": 11223 }, { "epoch": 1.0533033033033032, "grad_norm": 0.7724030810664053, "learning_rate": 8.199672683292e-06, "loss": 0.3771, "step": 11224 }, { "epoch": 1.053397147147147, "grad_norm": 0.892396840056332, "learning_rate": 8.199253128530495e-06, "loss": 0.3772, "step": 11225 }, { "epoch": 1.053490990990991, "grad_norm": 1.0187714228239562, "learning_rate": 8.198833535623914e-06, "loss": 0.432, "step": 11226 }, { "epoch": 1.053584834834835, "grad_norm": 0.9035697712619496, "learning_rate": 8.198413904577265e-06, "loss": 0.3517, "step": 11227 }, { "epoch": 1.0536786786786787, "grad_norm": 0.9446347473161201, "learning_rate": 8.197994235395552e-06, "loss": 0.3816, "step": 11228 }, { "epoch": 1.0537725225225225, "grad_norm": 1.1698128714308622, "learning_rate": 8.197574528083777e-06, "loss": 0.4134, "step": 11229 }, { "epoch": 1.0538663663663663, "grad_norm": 1.0927524114235239, "learning_rate": 8.197154782646943e-06, "loss": 0.4094, "step": 11230 }, { "epoch": 1.0539602102102101, "grad_norm": 1.0545201912542777, "learning_rate": 8.19673499909006e-06, "loss": 0.4266, "step": 11231 }, { "epoch": 1.054054054054054, "grad_norm": 0.9910077964623474, "learning_rate": 8.196315177418126e-06, "loss": 0.4343, "step": 11232 }, { "epoch": 1.054147897897898, "grad_norm": 1.0238931967012106, "learning_rate": 8.19589531763615e-06, "loss": 0.3834, "step": 11233 }, { "epoch": 1.0542417417417418, "grad_norm": 0.9554322540731094, "learning_rate": 8.19547541974914e-06, "loss": 0.3738, "step": 11234 }, { "epoch": 1.0543355855855856, "grad_norm": 1.4557849834605443, "learning_rate": 8.195055483762098e-06, "loss": 0.4352, "step": 11235 }, { "epoch": 1.0544294294294294, "grad_norm": 1.6013210924800358, "learning_rate": 8.194635509680033e-06, "loss": 0.4096, "step": 11236 }, { "epoch": 1.0545232732732732, "grad_norm": 1.043992383256312, "learning_rate": 8.194215497507956e-06, "loss": 0.4044, "step": 11237 }, { "epoch": 1.054617117117117, "grad_norm": 1.023032692316505, "learning_rate": 8.19379544725087e-06, "loss": 0.3941, "step": 11238 }, { "epoch": 1.054710960960961, "grad_norm": 1.0474861837180554, "learning_rate": 8.193375358913784e-06, "loss": 0.4493, "step": 11239 }, { "epoch": 1.0548048048048049, "grad_norm": 0.9854069241081083, "learning_rate": 8.192955232501709e-06, "loss": 0.4106, "step": 11240 }, { "epoch": 1.0548986486486487, "grad_norm": 2.836926982569328, "learning_rate": 8.192535068019653e-06, "loss": 0.3914, "step": 11241 }, { "epoch": 1.0549924924924925, "grad_norm": 1.0506169514214576, "learning_rate": 8.192114865472627e-06, "loss": 0.4267, "step": 11242 }, { "epoch": 1.0550863363363363, "grad_norm": 0.9030395669117535, "learning_rate": 8.191694624865638e-06, "loss": 0.4346, "step": 11243 }, { "epoch": 1.0551801801801801, "grad_norm": 0.8338418643382426, "learning_rate": 8.191274346203698e-06, "loss": 0.3899, "step": 11244 }, { "epoch": 1.055274024024024, "grad_norm": 0.9211371561886931, "learning_rate": 8.190854029491818e-06, "loss": 0.3794, "step": 11245 }, { "epoch": 1.055367867867868, "grad_norm": 1.300840012625091, "learning_rate": 8.190433674735012e-06, "loss": 0.3767, "step": 11246 }, { "epoch": 1.0554617117117118, "grad_norm": 0.9599773573378622, "learning_rate": 8.190013281938288e-06, "loss": 0.4102, "step": 11247 }, { "epoch": 1.0555555555555556, "grad_norm": 0.8827482506768562, "learning_rate": 8.18959285110666e-06, "loss": 0.4565, "step": 11248 }, { "epoch": 1.0556493993993994, "grad_norm": 0.9095851424552676, "learning_rate": 8.189172382245143e-06, "loss": 0.3905, "step": 11249 }, { "epoch": 1.0557432432432432, "grad_norm": 0.8812806053141541, "learning_rate": 8.188751875358747e-06, "loss": 0.3972, "step": 11250 }, { "epoch": 1.055837087087087, "grad_norm": 1.2602202837472023, "learning_rate": 8.188331330452487e-06, "loss": 0.4354, "step": 11251 }, { "epoch": 1.055930930930931, "grad_norm": 1.2209992652639678, "learning_rate": 8.187910747531378e-06, "loss": 0.411, "step": 11252 }, { "epoch": 1.0560247747747749, "grad_norm": 0.9133086665720362, "learning_rate": 8.187490126600433e-06, "loss": 0.3477, "step": 11253 }, { "epoch": 1.0561186186186187, "grad_norm": 0.8988364684087723, "learning_rate": 8.18706946766467e-06, "loss": 0.3995, "step": 11254 }, { "epoch": 1.0562124624624625, "grad_norm": 0.863008158796543, "learning_rate": 8.186648770729099e-06, "loss": 0.3747, "step": 11255 }, { "epoch": 1.0563063063063063, "grad_norm": 0.8953635255922027, "learning_rate": 8.186228035798743e-06, "loss": 0.3772, "step": 11256 }, { "epoch": 1.05640015015015, "grad_norm": 0.9772321448196218, "learning_rate": 8.185807262878613e-06, "loss": 0.3976, "step": 11257 }, { "epoch": 1.056493993993994, "grad_norm": 1.0147903235904046, "learning_rate": 8.185386451973731e-06, "loss": 0.4326, "step": 11258 }, { "epoch": 1.056587837837838, "grad_norm": 0.8598451622271165, "learning_rate": 8.184965603089108e-06, "loss": 0.4103, "step": 11259 }, { "epoch": 1.0566816816816818, "grad_norm": 0.9190681997587019, "learning_rate": 8.184544716229767e-06, "loss": 0.4003, "step": 11260 }, { "epoch": 1.0567755255255256, "grad_norm": 0.9863016507152197, "learning_rate": 8.184123791400726e-06, "loss": 0.4002, "step": 11261 }, { "epoch": 1.0568693693693694, "grad_norm": 0.9744120485976722, "learning_rate": 8.183702828607e-06, "loss": 0.4006, "step": 11262 }, { "epoch": 1.0569632132132132, "grad_norm": 1.0479216573778591, "learning_rate": 8.18328182785361e-06, "loss": 0.4288, "step": 11263 }, { "epoch": 1.057057057057057, "grad_norm": 0.8839319880107634, "learning_rate": 8.182860789145579e-06, "loss": 0.4344, "step": 11264 }, { "epoch": 1.0571509009009008, "grad_norm": 1.1800523723030283, "learning_rate": 8.182439712487921e-06, "loss": 0.432, "step": 11265 }, { "epoch": 1.0572447447447448, "grad_norm": 1.2451752929670123, "learning_rate": 8.182018597885661e-06, "loss": 0.4303, "step": 11266 }, { "epoch": 1.0573385885885886, "grad_norm": 0.9315370867200817, "learning_rate": 8.181597445343821e-06, "loss": 0.409, "step": 11267 }, { "epoch": 1.0574324324324325, "grad_norm": 1.1140591755521845, "learning_rate": 8.181176254867416e-06, "loss": 0.4166, "step": 11268 }, { "epoch": 1.0575262762762763, "grad_norm": 0.9668749269680911, "learning_rate": 8.180755026461475e-06, "loss": 0.3845, "step": 11269 }, { "epoch": 1.05762012012012, "grad_norm": 0.8937184019783915, "learning_rate": 8.180333760131016e-06, "loss": 0.4398, "step": 11270 }, { "epoch": 1.0577139639639639, "grad_norm": 0.8877003006863112, "learning_rate": 8.179912455881064e-06, "loss": 0.3802, "step": 11271 }, { "epoch": 1.0578078078078077, "grad_norm": 1.17957337626406, "learning_rate": 8.179491113716642e-06, "loss": 0.4461, "step": 11272 }, { "epoch": 1.0579016516516517, "grad_norm": 1.6517376683746217, "learning_rate": 8.179069733642772e-06, "loss": 0.4203, "step": 11273 }, { "epoch": 1.0579954954954955, "grad_norm": 0.9590266832291078, "learning_rate": 8.178648315664481e-06, "loss": 0.3675, "step": 11274 }, { "epoch": 1.0580893393393394, "grad_norm": 0.9157347867746864, "learning_rate": 8.17822685978679e-06, "loss": 0.4286, "step": 11275 }, { "epoch": 1.0581831831831832, "grad_norm": 0.9466932961267547, "learning_rate": 8.177805366014729e-06, "loss": 0.4324, "step": 11276 }, { "epoch": 1.058277027027027, "grad_norm": 1.093053662499271, "learning_rate": 8.177383834353319e-06, "loss": 0.4135, "step": 11277 }, { "epoch": 1.0583708708708708, "grad_norm": 1.26418343478099, "learning_rate": 8.176962264807586e-06, "loss": 0.4125, "step": 11278 }, { "epoch": 1.0584647147147148, "grad_norm": 0.8784050207268199, "learning_rate": 8.176540657382562e-06, "loss": 0.3832, "step": 11279 }, { "epoch": 1.0585585585585586, "grad_norm": 0.779052662082092, "learning_rate": 8.176119012083265e-06, "loss": 0.4019, "step": 11280 }, { "epoch": 1.0586524024024024, "grad_norm": 1.0278200233574035, "learning_rate": 8.175697328914732e-06, "loss": 0.3907, "step": 11281 }, { "epoch": 1.0587462462462462, "grad_norm": 0.9283818627987677, "learning_rate": 8.175275607881983e-06, "loss": 0.4217, "step": 11282 }, { "epoch": 1.05884009009009, "grad_norm": 0.9592851979234385, "learning_rate": 8.174853848990049e-06, "loss": 0.3977, "step": 11283 }, { "epoch": 1.0589339339339339, "grad_norm": 0.9817007970538575, "learning_rate": 8.174432052243959e-06, "loss": 0.4211, "step": 11284 }, { "epoch": 1.0590277777777777, "grad_norm": 1.0521874283012, "learning_rate": 8.174010217648745e-06, "loss": 0.4414, "step": 11285 }, { "epoch": 1.0591216216216217, "grad_norm": 1.018019176234821, "learning_rate": 8.17358834520943e-06, "loss": 0.4058, "step": 11286 }, { "epoch": 1.0592154654654655, "grad_norm": 0.7956781084866578, "learning_rate": 8.17316643493105e-06, "loss": 0.3826, "step": 11287 }, { "epoch": 1.0593093093093093, "grad_norm": 1.2006961370965465, "learning_rate": 8.172744486818631e-06, "loss": 0.3942, "step": 11288 }, { "epoch": 1.0594031531531531, "grad_norm": 0.9860226074403655, "learning_rate": 8.172322500877207e-06, "loss": 0.3906, "step": 11289 }, { "epoch": 1.059496996996997, "grad_norm": 1.1536498173353096, "learning_rate": 8.17190047711181e-06, "loss": 0.4194, "step": 11290 }, { "epoch": 1.0595908408408408, "grad_norm": 1.0230293453326968, "learning_rate": 8.171478415527468e-06, "loss": 0.4037, "step": 11291 }, { "epoch": 1.0596846846846848, "grad_norm": 0.931250201049485, "learning_rate": 8.171056316129217e-06, "loss": 0.4312, "step": 11292 }, { "epoch": 1.0597785285285286, "grad_norm": 0.8821816553494134, "learning_rate": 8.170634178922087e-06, "loss": 0.4124, "step": 11293 }, { "epoch": 1.0598723723723724, "grad_norm": 1.0353868219493534, "learning_rate": 8.170212003911115e-06, "loss": 0.397, "step": 11294 }, { "epoch": 1.0599662162162162, "grad_norm": 0.9835519637218689, "learning_rate": 8.16978979110133e-06, "loss": 0.426, "step": 11295 }, { "epoch": 1.06006006006006, "grad_norm": 1.0908455205805034, "learning_rate": 8.169367540497768e-06, "loss": 0.3966, "step": 11296 }, { "epoch": 1.0601539039039038, "grad_norm": 0.8769064336228579, "learning_rate": 8.168945252105464e-06, "loss": 0.4058, "step": 11297 }, { "epoch": 1.0602477477477477, "grad_norm": 0.887473116147611, "learning_rate": 8.168522925929453e-06, "loss": 0.3655, "step": 11298 }, { "epoch": 1.0603415915915917, "grad_norm": 0.9268013315311666, "learning_rate": 8.168100561974772e-06, "loss": 0.4497, "step": 11299 }, { "epoch": 1.0604354354354355, "grad_norm": 0.9142588053192793, "learning_rate": 8.167678160246455e-06, "loss": 0.3766, "step": 11300 }, { "epoch": 1.0605292792792793, "grad_norm": 0.864994816212784, "learning_rate": 8.167255720749536e-06, "loss": 0.4209, "step": 11301 }, { "epoch": 1.0606231231231231, "grad_norm": 1.0611420699218053, "learning_rate": 8.166833243489056e-06, "loss": 0.4536, "step": 11302 }, { "epoch": 1.060716966966967, "grad_norm": 1.1028678568774295, "learning_rate": 8.16641072847005e-06, "loss": 0.4269, "step": 11303 }, { "epoch": 1.0608108108108107, "grad_norm": 0.887426935724251, "learning_rate": 8.165988175697557e-06, "loss": 0.3943, "step": 11304 }, { "epoch": 1.0609046546546546, "grad_norm": 1.370390008151914, "learning_rate": 8.165565585176613e-06, "loss": 0.4071, "step": 11305 }, { "epoch": 1.0609984984984986, "grad_norm": 1.0139673158763136, "learning_rate": 8.165142956912259e-06, "loss": 0.4126, "step": 11306 }, { "epoch": 1.0610923423423424, "grad_norm": 0.957271475659111, "learning_rate": 8.164720290909532e-06, "loss": 0.4135, "step": 11307 }, { "epoch": 1.0611861861861862, "grad_norm": 0.940863706480762, "learning_rate": 8.164297587173474e-06, "loss": 0.427, "step": 11308 }, { "epoch": 1.06128003003003, "grad_norm": 0.9000313164574376, "learning_rate": 8.163874845709122e-06, "loss": 0.4353, "step": 11309 }, { "epoch": 1.0613738738738738, "grad_norm": 0.8011920445199204, "learning_rate": 8.163452066521518e-06, "loss": 0.3542, "step": 11310 }, { "epoch": 1.0614677177177176, "grad_norm": 1.208119136045689, "learning_rate": 8.163029249615704e-06, "loss": 0.4185, "step": 11311 }, { "epoch": 1.0615615615615615, "grad_norm": 1.1347577423714295, "learning_rate": 8.162606394996719e-06, "loss": 0.4061, "step": 11312 }, { "epoch": 1.0616554054054055, "grad_norm": 0.9031464774374776, "learning_rate": 8.162183502669605e-06, "loss": 0.4247, "step": 11313 }, { "epoch": 1.0617492492492493, "grad_norm": 0.9866585776900113, "learning_rate": 8.161760572639405e-06, "loss": 0.4019, "step": 11314 }, { "epoch": 1.061843093093093, "grad_norm": 0.8888364305715293, "learning_rate": 8.161337604911163e-06, "loss": 0.4144, "step": 11315 }, { "epoch": 1.061936936936937, "grad_norm": 0.906394518577851, "learning_rate": 8.16091459948992e-06, "loss": 0.44, "step": 11316 }, { "epoch": 1.0620307807807807, "grad_norm": 0.9260797008416123, "learning_rate": 8.160491556380721e-06, "loss": 0.429, "step": 11317 }, { "epoch": 1.0621246246246245, "grad_norm": 8.098494662662562, "learning_rate": 8.160068475588608e-06, "loss": 0.4439, "step": 11318 }, { "epoch": 1.0622184684684686, "grad_norm": 1.3127933936712088, "learning_rate": 8.159645357118627e-06, "loss": 0.393, "step": 11319 }, { "epoch": 1.0623123123123124, "grad_norm": 1.024851677449295, "learning_rate": 8.159222200975822e-06, "loss": 0.3969, "step": 11320 }, { "epoch": 1.0624061561561562, "grad_norm": 1.0129691979608746, "learning_rate": 8.158799007165241e-06, "loss": 0.3987, "step": 11321 }, { "epoch": 1.0625, "grad_norm": 0.8393463232375946, "learning_rate": 8.158375775691926e-06, "loss": 0.3508, "step": 11322 }, { "epoch": 1.0625938438438438, "grad_norm": 0.9412227692088017, "learning_rate": 8.157952506560925e-06, "loss": 0.4551, "step": 11323 }, { "epoch": 1.0626876876876876, "grad_norm": 1.172722992172507, "learning_rate": 8.157529199777285e-06, "loss": 0.3831, "step": 11324 }, { "epoch": 1.0627815315315314, "grad_norm": 1.5680607038313727, "learning_rate": 8.157105855346053e-06, "loss": 0.3773, "step": 11325 }, { "epoch": 1.0628753753753755, "grad_norm": 1.0745715932172084, "learning_rate": 8.156682473272273e-06, "loss": 0.4024, "step": 11326 }, { "epoch": 1.0629692192192193, "grad_norm": 1.0495827992562892, "learning_rate": 8.156259053561e-06, "loss": 0.429, "step": 11327 }, { "epoch": 1.063063063063063, "grad_norm": 0.9095361966552007, "learning_rate": 8.155835596217279e-06, "loss": 0.393, "step": 11328 }, { "epoch": 1.063156906906907, "grad_norm": 0.8120780623313313, "learning_rate": 8.155412101246156e-06, "loss": 0.4046, "step": 11329 }, { "epoch": 1.0632507507507507, "grad_norm": 0.9886615280875021, "learning_rate": 8.154988568652687e-06, "loss": 0.4433, "step": 11330 }, { "epoch": 1.0633445945945945, "grad_norm": 0.9194199951513696, "learning_rate": 8.154564998441914e-06, "loss": 0.3868, "step": 11331 }, { "epoch": 1.0634384384384385, "grad_norm": 0.9702885955823243, "learning_rate": 8.154141390618892e-06, "loss": 0.3903, "step": 11332 }, { "epoch": 1.0635322822822824, "grad_norm": 0.9961354456558118, "learning_rate": 8.153717745188675e-06, "loss": 0.4069, "step": 11333 }, { "epoch": 1.0636261261261262, "grad_norm": 0.9755076517008578, "learning_rate": 8.153294062156305e-06, "loss": 0.3667, "step": 11334 }, { "epoch": 1.06371996996997, "grad_norm": 1.1518213393001675, "learning_rate": 8.152870341526841e-06, "loss": 0.4253, "step": 11335 }, { "epoch": 1.0638138138138138, "grad_norm": 0.9423780214877169, "learning_rate": 8.152446583305334e-06, "loss": 0.3703, "step": 11336 }, { "epoch": 1.0639076576576576, "grad_norm": 1.0074470978776648, "learning_rate": 8.152022787496834e-06, "loss": 0.4462, "step": 11337 }, { "epoch": 1.0640015015015014, "grad_norm": 1.0007020393312052, "learning_rate": 8.151598954106395e-06, "loss": 0.4303, "step": 11338 }, { "epoch": 1.0640953453453454, "grad_norm": 0.8631856696557243, "learning_rate": 8.151175083139071e-06, "loss": 0.3842, "step": 11339 }, { "epoch": 1.0641891891891893, "grad_norm": 0.9391356172459728, "learning_rate": 8.150751174599915e-06, "loss": 0.4142, "step": 11340 }, { "epoch": 1.064283033033033, "grad_norm": 2.1388159791859334, "learning_rate": 8.150327228493984e-06, "loss": 0.433, "step": 11341 }, { "epoch": 1.0643768768768769, "grad_norm": 1.0097403741401358, "learning_rate": 8.14990324482633e-06, "loss": 0.3635, "step": 11342 }, { "epoch": 1.0644707207207207, "grad_norm": 1.0298955332080597, "learning_rate": 8.149479223602009e-06, "loss": 0.4235, "step": 11343 }, { "epoch": 1.0645645645645645, "grad_norm": 0.8507932989221745, "learning_rate": 8.149055164826074e-06, "loss": 0.4068, "step": 11344 }, { "epoch": 1.0646584084084083, "grad_norm": 1.1347499942175219, "learning_rate": 8.148631068503586e-06, "loss": 0.4214, "step": 11345 }, { "epoch": 1.0647522522522523, "grad_norm": 0.9601200989107657, "learning_rate": 8.148206934639599e-06, "loss": 0.4096, "step": 11346 }, { "epoch": 1.0648460960960962, "grad_norm": 0.9531937153129839, "learning_rate": 8.147782763239169e-06, "loss": 0.4539, "step": 11347 }, { "epoch": 1.06493993993994, "grad_norm": 0.8827376627838001, "learning_rate": 8.147358554307356e-06, "loss": 0.4019, "step": 11348 }, { "epoch": 1.0650337837837838, "grad_norm": 1.0473676834590377, "learning_rate": 8.146934307849217e-06, "loss": 0.4362, "step": 11349 }, { "epoch": 1.0651276276276276, "grad_norm": 1.3475732842542614, "learning_rate": 8.146510023869808e-06, "loss": 0.4164, "step": 11350 }, { "epoch": 1.0652214714714714, "grad_norm": 0.982581175284777, "learning_rate": 8.146085702374191e-06, "loss": 0.3637, "step": 11351 }, { "epoch": 1.0653153153153152, "grad_norm": 0.9253703874705234, "learning_rate": 8.145661343367423e-06, "loss": 0.3937, "step": 11352 }, { "epoch": 1.0654091591591592, "grad_norm": 1.4554458972607631, "learning_rate": 8.145236946854565e-06, "loss": 0.4002, "step": 11353 }, { "epoch": 1.065503003003003, "grad_norm": 1.0833769052361444, "learning_rate": 8.144812512840677e-06, "loss": 0.3998, "step": 11354 }, { "epoch": 1.0655968468468469, "grad_norm": 0.9050883892760737, "learning_rate": 8.14438804133082e-06, "loss": 0.4067, "step": 11355 }, { "epoch": 1.0656906906906907, "grad_norm": 0.8618162451971635, "learning_rate": 8.143963532330052e-06, "loss": 0.4141, "step": 11356 }, { "epoch": 1.0657845345345345, "grad_norm": 1.842434984566356, "learning_rate": 8.14353898584344e-06, "loss": 0.3706, "step": 11357 }, { "epoch": 1.0658783783783783, "grad_norm": 0.970436445648158, "learning_rate": 8.14311440187604e-06, "loss": 0.3548, "step": 11358 }, { "epoch": 1.0659722222222223, "grad_norm": 0.9699201742161323, "learning_rate": 8.142689780432917e-06, "loss": 0.4031, "step": 11359 }, { "epoch": 1.0660660660660661, "grad_norm": 0.8778109326555992, "learning_rate": 8.142265121519135e-06, "loss": 0.4132, "step": 11360 }, { "epoch": 1.06615990990991, "grad_norm": 0.9846600432676526, "learning_rate": 8.141840425139756e-06, "loss": 0.4394, "step": 11361 }, { "epoch": 1.0662537537537538, "grad_norm": 0.9963060969842114, "learning_rate": 8.141415691299844e-06, "loss": 0.4108, "step": 11362 }, { "epoch": 1.0663475975975976, "grad_norm": 0.9498262406312378, "learning_rate": 8.140990920004463e-06, "loss": 0.4054, "step": 11363 }, { "epoch": 1.0664414414414414, "grad_norm": 1.018306797669556, "learning_rate": 8.140566111258677e-06, "loss": 0.3856, "step": 11364 }, { "epoch": 1.0665352852852852, "grad_norm": 0.9759503854256258, "learning_rate": 8.140141265067552e-06, "loss": 0.4413, "step": 11365 }, { "epoch": 1.0666291291291292, "grad_norm": 0.8898956356426353, "learning_rate": 8.139716381436153e-06, "loss": 0.3897, "step": 11366 }, { "epoch": 1.066722972972973, "grad_norm": 1.446783823365048, "learning_rate": 8.139291460369548e-06, "loss": 0.4488, "step": 11367 }, { "epoch": 1.0668168168168168, "grad_norm": 1.6707190162171, "learning_rate": 8.138866501872798e-06, "loss": 0.3732, "step": 11368 }, { "epoch": 1.0669106606606606, "grad_norm": 1.0812210574066947, "learning_rate": 8.138441505950976e-06, "loss": 0.4358, "step": 11369 }, { "epoch": 1.0670045045045045, "grad_norm": 2.145231025790134, "learning_rate": 8.138016472609144e-06, "loss": 0.4382, "step": 11370 }, { "epoch": 1.0670983483483483, "grad_norm": 1.0249620957763483, "learning_rate": 8.137591401852375e-06, "loss": 0.4145, "step": 11371 }, { "epoch": 1.0671921921921923, "grad_norm": 1.0553750810463849, "learning_rate": 8.137166293685733e-06, "loss": 0.4182, "step": 11372 }, { "epoch": 1.0672860360360361, "grad_norm": 1.41615010693106, "learning_rate": 8.136741148114287e-06, "loss": 0.4395, "step": 11373 }, { "epoch": 1.06737987987988, "grad_norm": 1.6740464682046317, "learning_rate": 8.13631596514311e-06, "loss": 0.4356, "step": 11374 }, { "epoch": 1.0674737237237237, "grad_norm": 1.2600035387650723, "learning_rate": 8.135890744777267e-06, "loss": 0.3986, "step": 11375 }, { "epoch": 1.0675675675675675, "grad_norm": 0.9510965609059285, "learning_rate": 8.135465487021828e-06, "loss": 0.3961, "step": 11376 }, { "epoch": 1.0676614114114114, "grad_norm": 0.8798178982661781, "learning_rate": 8.135040191881867e-06, "loss": 0.3692, "step": 11377 }, { "epoch": 1.0677552552552552, "grad_norm": 1.2009581624426386, "learning_rate": 8.134614859362454e-06, "loss": 0.4368, "step": 11378 }, { "epoch": 1.0678490990990992, "grad_norm": 0.9538940408953722, "learning_rate": 8.134189489468655e-06, "loss": 0.3794, "step": 11379 }, { "epoch": 1.067942942942943, "grad_norm": 1.068474505584924, "learning_rate": 8.133764082205549e-06, "loss": 0.4317, "step": 11380 }, { "epoch": 1.0680367867867868, "grad_norm": 0.8406772042175393, "learning_rate": 8.133338637578204e-06, "loss": 0.4262, "step": 11381 }, { "epoch": 1.0681306306306306, "grad_norm": 1.0672364182925564, "learning_rate": 8.132913155591696e-06, "loss": 0.411, "step": 11382 }, { "epoch": 1.0682244744744744, "grad_norm": 0.8929537100312277, "learning_rate": 8.132487636251092e-06, "loss": 0.3873, "step": 11383 }, { "epoch": 1.0683183183183182, "grad_norm": 1.1449260149310254, "learning_rate": 8.132062079561472e-06, "loss": 0.4626, "step": 11384 }, { "epoch": 1.068412162162162, "grad_norm": 0.9515732894647015, "learning_rate": 8.131636485527905e-06, "loss": 0.3844, "step": 11385 }, { "epoch": 1.068506006006006, "grad_norm": 0.9718322847550651, "learning_rate": 8.13121085415547e-06, "loss": 0.388, "step": 11386 }, { "epoch": 1.06859984984985, "grad_norm": 0.9751317669798061, "learning_rate": 8.130785185449237e-06, "loss": 0.4136, "step": 11387 }, { "epoch": 1.0686936936936937, "grad_norm": 1.545606086935098, "learning_rate": 8.130359479414284e-06, "loss": 0.3892, "step": 11388 }, { "epoch": 1.0687875375375375, "grad_norm": 1.0146018162493098, "learning_rate": 8.129933736055688e-06, "loss": 0.4087, "step": 11389 }, { "epoch": 1.0688813813813813, "grad_norm": 0.898935597223203, "learning_rate": 8.129507955378524e-06, "loss": 0.4336, "step": 11390 }, { "epoch": 1.0689752252252251, "grad_norm": 0.9282713728568598, "learning_rate": 8.129082137387867e-06, "loss": 0.3463, "step": 11391 }, { "epoch": 1.069069069069069, "grad_norm": 0.8572093760323342, "learning_rate": 8.128656282088796e-06, "loss": 0.3612, "step": 11392 }, { "epoch": 1.069162912912913, "grad_norm": 0.9216279381295288, "learning_rate": 8.128230389486386e-06, "loss": 0.4191, "step": 11393 }, { "epoch": 1.0692567567567568, "grad_norm": 0.942622415551681, "learning_rate": 8.12780445958572e-06, "loss": 0.387, "step": 11394 }, { "epoch": 1.0693506006006006, "grad_norm": 0.9760221902733406, "learning_rate": 8.127378492391871e-06, "loss": 0.4253, "step": 11395 }, { "epoch": 1.0694444444444444, "grad_norm": 0.8246874491488907, "learning_rate": 8.126952487909923e-06, "loss": 0.4119, "step": 11396 }, { "epoch": 1.0695382882882882, "grad_norm": 0.9244501905428212, "learning_rate": 8.126526446144949e-06, "loss": 0.3886, "step": 11397 }, { "epoch": 1.069632132132132, "grad_norm": 0.9878298345907561, "learning_rate": 8.126100367102035e-06, "loss": 0.3478, "step": 11398 }, { "epoch": 1.069725975975976, "grad_norm": 0.8362838430173117, "learning_rate": 8.125674250786257e-06, "loss": 0.3818, "step": 11399 }, { "epoch": 1.0698198198198199, "grad_norm": 0.9533190070854761, "learning_rate": 8.125248097202697e-06, "loss": 0.3924, "step": 11400 }, { "epoch": 1.0699136636636637, "grad_norm": 0.9659887399012766, "learning_rate": 8.124821906356438e-06, "loss": 0.4018, "step": 11401 }, { "epoch": 1.0700075075075075, "grad_norm": 0.9067169265176817, "learning_rate": 8.124395678252557e-06, "loss": 0.4248, "step": 11402 }, { "epoch": 1.0701013513513513, "grad_norm": 1.1545711523488953, "learning_rate": 8.123969412896138e-06, "loss": 0.4755, "step": 11403 }, { "epoch": 1.0701951951951951, "grad_norm": 0.8902851942122832, "learning_rate": 8.123543110292267e-06, "loss": 0.3867, "step": 11404 }, { "epoch": 1.070289039039039, "grad_norm": 1.0286697984483906, "learning_rate": 8.123116770446023e-06, "loss": 0.4046, "step": 11405 }, { "epoch": 1.070382882882883, "grad_norm": 0.9494974790138813, "learning_rate": 8.122690393362489e-06, "loss": 0.4086, "step": 11406 }, { "epoch": 1.0704767267267268, "grad_norm": 1.0320267767253881, "learning_rate": 8.122263979046751e-06, "loss": 0.3941, "step": 11407 }, { "epoch": 1.0705705705705706, "grad_norm": 0.885986899266523, "learning_rate": 8.12183752750389e-06, "loss": 0.3901, "step": 11408 }, { "epoch": 1.0706644144144144, "grad_norm": 1.043678082505698, "learning_rate": 8.121411038738996e-06, "loss": 0.3804, "step": 11409 }, { "epoch": 1.0707582582582582, "grad_norm": 0.9558875030153513, "learning_rate": 8.120984512757148e-06, "loss": 0.4303, "step": 11410 }, { "epoch": 1.070852102102102, "grad_norm": 0.914065872927034, "learning_rate": 8.120557949563437e-06, "loss": 0.3856, "step": 11411 }, { "epoch": 1.070945945945946, "grad_norm": 1.0113229316262087, "learning_rate": 8.120131349162944e-06, "loss": 0.43, "step": 11412 }, { "epoch": 1.0710397897897899, "grad_norm": 1.3340923459598741, "learning_rate": 8.119704711560758e-06, "loss": 0.5043, "step": 11413 }, { "epoch": 1.0711336336336337, "grad_norm": 0.8758606087611744, "learning_rate": 8.119278036761969e-06, "loss": 0.4203, "step": 11414 }, { "epoch": 1.0712274774774775, "grad_norm": 0.9139298335237523, "learning_rate": 8.118851324771657e-06, "loss": 0.4498, "step": 11415 }, { "epoch": 1.0713213213213213, "grad_norm": 0.9756490842216543, "learning_rate": 8.118424575594914e-06, "loss": 0.3982, "step": 11416 }, { "epoch": 1.071415165165165, "grad_norm": 0.9532152001900641, "learning_rate": 8.117997789236829e-06, "loss": 0.4413, "step": 11417 }, { "epoch": 1.071509009009009, "grad_norm": 0.9390507103189265, "learning_rate": 8.117570965702488e-06, "loss": 0.4038, "step": 11418 }, { "epoch": 1.071602852852853, "grad_norm": 1.0213195063908393, "learning_rate": 8.117144104996982e-06, "loss": 0.4648, "step": 11419 }, { "epoch": 1.0716966966966968, "grad_norm": 0.996045375884885, "learning_rate": 8.116717207125402e-06, "loss": 0.3806, "step": 11420 }, { "epoch": 1.0717905405405406, "grad_norm": 1.1140090374824343, "learning_rate": 8.116290272092832e-06, "loss": 0.4495, "step": 11421 }, { "epoch": 1.0718843843843844, "grad_norm": 0.9197600372536348, "learning_rate": 8.11586329990437e-06, "loss": 0.4091, "step": 11422 }, { "epoch": 1.0719782282282282, "grad_norm": 1.0984622058308409, "learning_rate": 8.1154362905651e-06, "loss": 0.4313, "step": 11423 }, { "epoch": 1.072072072072072, "grad_norm": 1.2905841128713949, "learning_rate": 8.115009244080119e-06, "loss": 0.4219, "step": 11424 }, { "epoch": 1.0721659159159158, "grad_norm": 0.8746583056256045, "learning_rate": 8.114582160454514e-06, "loss": 0.4135, "step": 11425 }, { "epoch": 1.0722597597597598, "grad_norm": 0.9107726468098148, "learning_rate": 8.114155039693381e-06, "loss": 0.3909, "step": 11426 }, { "epoch": 1.0723536036036037, "grad_norm": 0.96931648839104, "learning_rate": 8.11372788180181e-06, "loss": 0.406, "step": 11427 }, { "epoch": 1.0724474474474475, "grad_norm": 1.2172631594034942, "learning_rate": 8.113300686784896e-06, "loss": 0.4192, "step": 11428 }, { "epoch": 1.0725412912912913, "grad_norm": 0.9285030332279475, "learning_rate": 8.112873454647732e-06, "loss": 0.3884, "step": 11429 }, { "epoch": 1.072635135135135, "grad_norm": 1.154179180623566, "learning_rate": 8.11244618539541e-06, "loss": 0.4425, "step": 11430 }, { "epoch": 1.072728978978979, "grad_norm": 0.9895322733182909, "learning_rate": 8.112018879033026e-06, "loss": 0.3974, "step": 11431 }, { "epoch": 1.0728228228228227, "grad_norm": 1.3199114108657528, "learning_rate": 8.111591535565676e-06, "loss": 0.4333, "step": 11432 }, { "epoch": 1.0729166666666667, "grad_norm": 0.8096560701510724, "learning_rate": 8.111164154998452e-06, "loss": 0.3577, "step": 11433 }, { "epoch": 1.0730105105105106, "grad_norm": 0.8653499604753355, "learning_rate": 8.110736737336452e-06, "loss": 0.3942, "step": 11434 }, { "epoch": 1.0731043543543544, "grad_norm": 1.0529262939291635, "learning_rate": 8.110309282584773e-06, "loss": 0.4676, "step": 11435 }, { "epoch": 1.0731981981981982, "grad_norm": 1.3125001201429898, "learning_rate": 8.109881790748509e-06, "loss": 0.4198, "step": 11436 }, { "epoch": 1.073292042042042, "grad_norm": 0.9456896215312474, "learning_rate": 8.109454261832758e-06, "loss": 0.4356, "step": 11437 }, { "epoch": 1.0733858858858858, "grad_norm": 1.1108740980443383, "learning_rate": 8.109026695842619e-06, "loss": 0.4281, "step": 11438 }, { "epoch": 1.0734797297297298, "grad_norm": 0.9201392877750786, "learning_rate": 8.108599092783189e-06, "loss": 0.353, "step": 11439 }, { "epoch": 1.0735735735735736, "grad_norm": 0.9146921452736593, "learning_rate": 8.108171452659566e-06, "loss": 0.3802, "step": 11440 }, { "epoch": 1.0736674174174174, "grad_norm": 1.1651879706631174, "learning_rate": 8.107743775476849e-06, "loss": 0.4777, "step": 11441 }, { "epoch": 1.0737612612612613, "grad_norm": 1.3569144891950708, "learning_rate": 8.107316061240136e-06, "loss": 0.3778, "step": 11442 }, { "epoch": 1.073855105105105, "grad_norm": 1.026658988181688, "learning_rate": 8.106888309954528e-06, "loss": 0.3898, "step": 11443 }, { "epoch": 1.0739489489489489, "grad_norm": 0.9871907853092814, "learning_rate": 8.106460521625127e-06, "loss": 0.4207, "step": 11444 }, { "epoch": 1.0740427927927927, "grad_norm": 0.9835027901757045, "learning_rate": 8.10603269625703e-06, "loss": 0.399, "step": 11445 }, { "epoch": 1.0741366366366367, "grad_norm": 0.9724385694469146, "learning_rate": 8.105604833855339e-06, "loss": 0.4445, "step": 11446 }, { "epoch": 1.0742304804804805, "grad_norm": 1.0030569359567976, "learning_rate": 8.10517693442516e-06, "loss": 0.384, "step": 11447 }, { "epoch": 1.0743243243243243, "grad_norm": 0.9650206389922964, "learning_rate": 8.104748997971586e-06, "loss": 0.4314, "step": 11448 }, { "epoch": 1.0744181681681682, "grad_norm": 1.963233825772325, "learning_rate": 8.104321024499725e-06, "loss": 0.4008, "step": 11449 }, { "epoch": 1.074512012012012, "grad_norm": 1.0104369568385592, "learning_rate": 8.103893014014682e-06, "loss": 0.4398, "step": 11450 }, { "epoch": 1.0746058558558558, "grad_norm": 0.8903591301204009, "learning_rate": 8.103464966521555e-06, "loss": 0.3934, "step": 11451 }, { "epoch": 1.0746996996996998, "grad_norm": 0.9065896848561084, "learning_rate": 8.10303688202545e-06, "loss": 0.415, "step": 11452 }, { "epoch": 1.0747935435435436, "grad_norm": 0.8520851516056528, "learning_rate": 8.102608760531472e-06, "loss": 0.3774, "step": 11453 }, { "epoch": 1.0748873873873874, "grad_norm": 1.1196831223326702, "learning_rate": 8.102180602044724e-06, "loss": 0.3732, "step": 11454 }, { "epoch": 1.0749812312312312, "grad_norm": 0.8911005275734957, "learning_rate": 8.101752406570312e-06, "loss": 0.3741, "step": 11455 }, { "epoch": 1.075075075075075, "grad_norm": 0.9336771543892606, "learning_rate": 8.10132417411334e-06, "loss": 0.4034, "step": 11456 }, { "epoch": 1.0751689189189189, "grad_norm": 1.0338653681655587, "learning_rate": 8.100895904678917e-06, "loss": 0.4534, "step": 11457 }, { "epoch": 1.0752627627627627, "grad_norm": 1.0547608031043871, "learning_rate": 8.100467598272145e-06, "loss": 0.3742, "step": 11458 }, { "epoch": 1.0753566066066067, "grad_norm": 1.136298672429936, "learning_rate": 8.100039254898135e-06, "loss": 0.4476, "step": 11459 }, { "epoch": 1.0754504504504505, "grad_norm": 0.8506222187463321, "learning_rate": 8.099610874561991e-06, "loss": 0.4387, "step": 11460 }, { "epoch": 1.0755442942942943, "grad_norm": 1.0642387092792946, "learning_rate": 8.09918245726882e-06, "loss": 0.4874, "step": 11461 }, { "epoch": 1.0756381381381381, "grad_norm": 0.964533995961294, "learning_rate": 8.098754003023735e-06, "loss": 0.4589, "step": 11462 }, { "epoch": 1.075731981981982, "grad_norm": 1.436807992105152, "learning_rate": 8.098325511831839e-06, "loss": 0.4558, "step": 11463 }, { "epoch": 1.0758258258258258, "grad_norm": 1.037888219934933, "learning_rate": 8.097896983698244e-06, "loss": 0.4136, "step": 11464 }, { "epoch": 1.0759196696696698, "grad_norm": 0.954373705535192, "learning_rate": 8.097468418628058e-06, "loss": 0.4399, "step": 11465 }, { "epoch": 1.0760135135135136, "grad_norm": 1.021976473549036, "learning_rate": 8.097039816626393e-06, "loss": 0.3935, "step": 11466 }, { "epoch": 1.0761073573573574, "grad_norm": 1.1054692858193282, "learning_rate": 8.096611177698357e-06, "loss": 0.4448, "step": 11467 }, { "epoch": 1.0762012012012012, "grad_norm": 0.9011552818525455, "learning_rate": 8.096182501849062e-06, "loss": 0.3695, "step": 11468 }, { "epoch": 1.076295045045045, "grad_norm": 0.9595753200620852, "learning_rate": 8.095753789083618e-06, "loss": 0.3862, "step": 11469 }, { "epoch": 1.0763888888888888, "grad_norm": 1.1575632317289277, "learning_rate": 8.095325039407136e-06, "loss": 0.4157, "step": 11470 }, { "epoch": 1.0764827327327327, "grad_norm": 0.8538593632461552, "learning_rate": 8.09489625282473e-06, "loss": 0.3596, "step": 11471 }, { "epoch": 1.0765765765765765, "grad_norm": 0.992518866502795, "learning_rate": 8.094467429341512e-06, "loss": 0.3951, "step": 11472 }, { "epoch": 1.0766704204204205, "grad_norm": 1.0657219747401219, "learning_rate": 8.094038568962596e-06, "loss": 0.4135, "step": 11473 }, { "epoch": 1.0767642642642643, "grad_norm": 1.0528228049852746, "learning_rate": 8.093609671693093e-06, "loss": 0.4584, "step": 11474 }, { "epoch": 1.0768581081081081, "grad_norm": 1.0018348801546932, "learning_rate": 8.093180737538117e-06, "loss": 0.4181, "step": 11475 }, { "epoch": 1.076951951951952, "grad_norm": 0.9474431410698002, "learning_rate": 8.092751766502785e-06, "loss": 0.4215, "step": 11476 }, { "epoch": 1.0770457957957957, "grad_norm": 1.1124571164318482, "learning_rate": 8.092322758592208e-06, "loss": 0.3897, "step": 11477 }, { "epoch": 1.0771396396396395, "grad_norm": 2.439371538219028, "learning_rate": 8.091893713811502e-06, "loss": 0.4358, "step": 11478 }, { "epoch": 1.0772334834834836, "grad_norm": 1.028767973478279, "learning_rate": 8.091464632165785e-06, "loss": 0.3925, "step": 11479 }, { "epoch": 1.0773273273273274, "grad_norm": 0.8776206297189627, "learning_rate": 8.091035513660171e-06, "loss": 0.3628, "step": 11480 }, { "epoch": 1.0774211711711712, "grad_norm": 1.1291121376754052, "learning_rate": 8.090606358299778e-06, "loss": 0.4358, "step": 11481 }, { "epoch": 1.077515015015015, "grad_norm": 0.9193583608462685, "learning_rate": 8.090177166089718e-06, "loss": 0.3895, "step": 11482 }, { "epoch": 1.0776088588588588, "grad_norm": 1.006091379233978, "learning_rate": 8.089747937035116e-06, "loss": 0.3791, "step": 11483 }, { "epoch": 1.0777027027027026, "grad_norm": 0.9450666272715009, "learning_rate": 8.089318671141084e-06, "loss": 0.3488, "step": 11484 }, { "epoch": 1.0777965465465464, "grad_norm": 0.9370943768342562, "learning_rate": 8.088889368412742e-06, "loss": 0.4147, "step": 11485 }, { "epoch": 1.0778903903903905, "grad_norm": 0.8559745137901846, "learning_rate": 8.088460028855207e-06, "loss": 0.3556, "step": 11486 }, { "epoch": 1.0779842342342343, "grad_norm": 0.9453140916290977, "learning_rate": 8.088030652473603e-06, "loss": 0.4062, "step": 11487 }, { "epoch": 1.078078078078078, "grad_norm": 0.9021485807928904, "learning_rate": 8.087601239273041e-06, "loss": 0.426, "step": 11488 }, { "epoch": 1.078171921921922, "grad_norm": 0.9601816588987309, "learning_rate": 8.087171789258651e-06, "loss": 0.3357, "step": 11489 }, { "epoch": 1.0782657657657657, "grad_norm": 1.104642788424109, "learning_rate": 8.086742302435546e-06, "loss": 0.4101, "step": 11490 }, { "epoch": 1.0783596096096095, "grad_norm": 0.8829770328221169, "learning_rate": 8.08631277880885e-06, "loss": 0.3816, "step": 11491 }, { "epoch": 1.0784534534534536, "grad_norm": 0.8319402795836001, "learning_rate": 8.085883218383683e-06, "loss": 0.3796, "step": 11492 }, { "epoch": 1.0785472972972974, "grad_norm": 1.0111877379373364, "learning_rate": 8.085453621165168e-06, "loss": 0.3993, "step": 11493 }, { "epoch": 1.0786411411411412, "grad_norm": 1.0602332443824805, "learning_rate": 8.085023987158425e-06, "loss": 0.4119, "step": 11494 }, { "epoch": 1.078734984984985, "grad_norm": 1.7163313780426221, "learning_rate": 8.084594316368579e-06, "loss": 0.4023, "step": 11495 }, { "epoch": 1.0788288288288288, "grad_norm": 1.0527237405132972, "learning_rate": 8.084164608800753e-06, "loss": 0.3659, "step": 11496 }, { "epoch": 1.0789226726726726, "grad_norm": 2.19760501671367, "learning_rate": 8.083734864460067e-06, "loss": 0.4044, "step": 11497 }, { "epoch": 1.0790165165165164, "grad_norm": 0.9724933110232609, "learning_rate": 8.083305083351648e-06, "loss": 0.396, "step": 11498 }, { "epoch": 1.0791103603603605, "grad_norm": 1.050491529661582, "learning_rate": 8.08287526548062e-06, "loss": 0.4058, "step": 11499 }, { "epoch": 1.0792042042042043, "grad_norm": 1.0978649659691704, "learning_rate": 8.082445410852107e-06, "loss": 0.3823, "step": 11500 }, { "epoch": 1.079298048048048, "grad_norm": 0.9612486597836906, "learning_rate": 8.082015519471235e-06, "loss": 0.3623, "step": 11501 }, { "epoch": 1.0793918918918919, "grad_norm": 1.036210812283356, "learning_rate": 8.081585591343128e-06, "loss": 0.4664, "step": 11502 }, { "epoch": 1.0794857357357357, "grad_norm": 0.8747341663249553, "learning_rate": 8.081155626472913e-06, "loss": 0.3709, "step": 11503 }, { "epoch": 1.0795795795795795, "grad_norm": 1.191454144459779, "learning_rate": 8.080725624865721e-06, "loss": 0.4329, "step": 11504 }, { "epoch": 1.0796734234234235, "grad_norm": 0.9666085555619648, "learning_rate": 8.080295586526673e-06, "loss": 0.3451, "step": 11505 }, { "epoch": 1.0797672672672673, "grad_norm": 0.933628210552631, "learning_rate": 8.079865511460895e-06, "loss": 0.4183, "step": 11506 }, { "epoch": 1.0798611111111112, "grad_norm": 0.9355417664409493, "learning_rate": 8.079435399673521e-06, "loss": 0.4194, "step": 11507 }, { "epoch": 1.079954954954955, "grad_norm": 1.578628070823247, "learning_rate": 8.079005251169674e-06, "loss": 0.3855, "step": 11508 }, { "epoch": 1.0800487987987988, "grad_norm": 0.9036502439653231, "learning_rate": 8.078575065954489e-06, "loss": 0.4279, "step": 11509 }, { "epoch": 1.0801426426426426, "grad_norm": 0.9473843658281175, "learning_rate": 8.078144844033087e-06, "loss": 0.3856, "step": 11510 }, { "epoch": 1.0802364864864864, "grad_norm": 0.883490419229325, "learning_rate": 8.077714585410603e-06, "loss": 0.4332, "step": 11511 }, { "epoch": 1.0803303303303302, "grad_norm": 0.938550827552579, "learning_rate": 8.077284290092167e-06, "loss": 0.4235, "step": 11512 }, { "epoch": 1.0804241741741742, "grad_norm": 1.034967325727582, "learning_rate": 8.076853958082907e-06, "loss": 0.4104, "step": 11513 }, { "epoch": 1.080518018018018, "grad_norm": 1.5103766075616474, "learning_rate": 8.076423589387957e-06, "loss": 0.3594, "step": 11514 }, { "epoch": 1.0806118618618619, "grad_norm": 1.1218931256460893, "learning_rate": 8.075993184012444e-06, "loss": 0.404, "step": 11515 }, { "epoch": 1.0807057057057057, "grad_norm": 0.9737907840064837, "learning_rate": 8.075562741961504e-06, "loss": 0.4651, "step": 11516 }, { "epoch": 1.0807995495495495, "grad_norm": 0.9371772960181812, "learning_rate": 8.075132263240267e-06, "loss": 0.3983, "step": 11517 }, { "epoch": 1.0808933933933933, "grad_norm": 1.0451827488578287, "learning_rate": 8.074701747853865e-06, "loss": 0.3972, "step": 11518 }, { "epoch": 1.0809872372372373, "grad_norm": 0.9314031929762815, "learning_rate": 8.074271195807433e-06, "loss": 0.4311, "step": 11519 }, { "epoch": 1.0810810810810811, "grad_norm": 1.0296596558479325, "learning_rate": 8.073840607106103e-06, "loss": 0.4187, "step": 11520 }, { "epoch": 1.081174924924925, "grad_norm": 0.9058731552895549, "learning_rate": 8.073409981755012e-06, "loss": 0.3386, "step": 11521 }, { "epoch": 1.0812687687687688, "grad_norm": 0.9610261354379832, "learning_rate": 8.07297931975929e-06, "loss": 0.4651, "step": 11522 }, { "epoch": 1.0813626126126126, "grad_norm": 1.4318457333367929, "learning_rate": 8.072548621124074e-06, "loss": 0.3963, "step": 11523 }, { "epoch": 1.0814564564564564, "grad_norm": 1.0014488847050638, "learning_rate": 8.072117885854498e-06, "loss": 0.3771, "step": 11524 }, { "epoch": 1.0815503003003002, "grad_norm": 1.11722237579324, "learning_rate": 8.071687113955702e-06, "loss": 0.4663, "step": 11525 }, { "epoch": 1.0816441441441442, "grad_norm": 0.9721212181614115, "learning_rate": 8.071256305432818e-06, "loss": 0.4184, "step": 11526 }, { "epoch": 1.081737987987988, "grad_norm": 0.9465992010048403, "learning_rate": 8.070825460290982e-06, "loss": 0.3843, "step": 11527 }, { "epoch": 1.0818318318318318, "grad_norm": 0.9718471286395417, "learning_rate": 8.070394578535334e-06, "loss": 0.3891, "step": 11528 }, { "epoch": 1.0819256756756757, "grad_norm": 1.033653172819308, "learning_rate": 8.069963660171009e-06, "loss": 0.4371, "step": 11529 }, { "epoch": 1.0820195195195195, "grad_norm": 1.069797477210977, "learning_rate": 8.069532705203147e-06, "loss": 0.4235, "step": 11530 }, { "epoch": 1.0821133633633633, "grad_norm": 0.9763979626934748, "learning_rate": 8.069101713636886e-06, "loss": 0.4436, "step": 11531 }, { "epoch": 1.0822072072072073, "grad_norm": 0.9033851492403431, "learning_rate": 8.068670685477363e-06, "loss": 0.4079, "step": 11532 }, { "epoch": 1.0823010510510511, "grad_norm": 0.8697235279698514, "learning_rate": 8.068239620729718e-06, "loss": 0.4156, "step": 11533 }, { "epoch": 1.082394894894895, "grad_norm": 0.8280654722463475, "learning_rate": 8.067808519399092e-06, "loss": 0.3778, "step": 11534 }, { "epoch": 1.0824887387387387, "grad_norm": 1.3319808766614003, "learning_rate": 8.067377381490623e-06, "loss": 0.4484, "step": 11535 }, { "epoch": 1.0825825825825826, "grad_norm": 0.9880741791785165, "learning_rate": 8.066946207009451e-06, "loss": 0.4271, "step": 11536 }, { "epoch": 1.0826764264264264, "grad_norm": 2.794374107222468, "learning_rate": 8.066514995960721e-06, "loss": 0.3779, "step": 11537 }, { "epoch": 1.0827702702702702, "grad_norm": 0.7893777169515228, "learning_rate": 8.066083748349571e-06, "loss": 0.3822, "step": 11538 }, { "epoch": 1.0828641141141142, "grad_norm": 0.9097062345700146, "learning_rate": 8.065652464181143e-06, "loss": 0.3988, "step": 11539 }, { "epoch": 1.082957957957958, "grad_norm": 0.9908178421251459, "learning_rate": 8.06522114346058e-06, "loss": 0.451, "step": 11540 }, { "epoch": 1.0830518018018018, "grad_norm": 0.8944607392096183, "learning_rate": 8.064789786193025e-06, "loss": 0.4029, "step": 11541 }, { "epoch": 1.0831456456456456, "grad_norm": 1.0772414680578366, "learning_rate": 8.06435839238362e-06, "loss": 0.4276, "step": 11542 }, { "epoch": 1.0832394894894894, "grad_norm": 0.8655502236326308, "learning_rate": 8.063926962037511e-06, "loss": 0.4231, "step": 11543 }, { "epoch": 1.0833333333333333, "grad_norm": 1.1157376655524371, "learning_rate": 8.06349549515984e-06, "loss": 0.4024, "step": 11544 }, { "epoch": 1.0834271771771773, "grad_norm": 1.0524011756800682, "learning_rate": 8.06306399175575e-06, "loss": 0.3956, "step": 11545 }, { "epoch": 1.083521021021021, "grad_norm": 0.9380140510554875, "learning_rate": 8.062632451830389e-06, "loss": 0.3955, "step": 11546 }, { "epoch": 1.083614864864865, "grad_norm": 0.9217396630774852, "learning_rate": 8.062200875388899e-06, "loss": 0.427, "step": 11547 }, { "epoch": 1.0837087087087087, "grad_norm": 1.0338779251618522, "learning_rate": 8.061769262436428e-06, "loss": 0.4701, "step": 11548 }, { "epoch": 1.0838025525525525, "grad_norm": 0.9467888382830848, "learning_rate": 8.061337612978121e-06, "loss": 0.4281, "step": 11549 }, { "epoch": 1.0838963963963963, "grad_norm": 1.4375809781443822, "learning_rate": 8.060905927019127e-06, "loss": 0.3825, "step": 11550 }, { "epoch": 1.0839902402402402, "grad_norm": 0.9517627084290068, "learning_rate": 8.060474204564592e-06, "loss": 0.4169, "step": 11551 }, { "epoch": 1.0840840840840842, "grad_norm": 1.1012299027556707, "learning_rate": 8.060042445619662e-06, "loss": 0.3723, "step": 11552 }, { "epoch": 1.084177927927928, "grad_norm": 0.8513548987525064, "learning_rate": 8.059610650189484e-06, "loss": 0.4103, "step": 11553 }, { "epoch": 1.0842717717717718, "grad_norm": 1.1969131261187391, "learning_rate": 8.059178818279212e-06, "loss": 0.4146, "step": 11554 }, { "epoch": 1.0843656156156156, "grad_norm": 1.0628548517648662, "learning_rate": 8.058746949893988e-06, "loss": 0.3609, "step": 11555 }, { "epoch": 1.0844594594594594, "grad_norm": 0.8620836418769724, "learning_rate": 8.058315045038964e-06, "loss": 0.4308, "step": 11556 }, { "epoch": 1.0845533033033032, "grad_norm": 2.421560092805895, "learning_rate": 8.05788310371929e-06, "loss": 0.4421, "step": 11557 }, { "epoch": 1.084647147147147, "grad_norm": 1.0608122494604506, "learning_rate": 8.057451125940116e-06, "loss": 0.3777, "step": 11558 }, { "epoch": 1.084740990990991, "grad_norm": 0.9849261359700126, "learning_rate": 8.057019111706595e-06, "loss": 0.4175, "step": 11559 }, { "epoch": 1.084834834834835, "grad_norm": 0.8760895728984033, "learning_rate": 8.056587061023873e-06, "loss": 0.4128, "step": 11560 }, { "epoch": 1.0849286786786787, "grad_norm": 1.0228451614563574, "learning_rate": 8.056154973897104e-06, "loss": 0.4181, "step": 11561 }, { "epoch": 1.0850225225225225, "grad_norm": 1.1511594845472486, "learning_rate": 8.055722850331438e-06, "loss": 0.4124, "step": 11562 }, { "epoch": 1.0851163663663663, "grad_norm": 1.2305623864589574, "learning_rate": 8.05529069033203e-06, "loss": 0.4234, "step": 11563 }, { "epoch": 1.0852102102102101, "grad_norm": 0.9985675932043804, "learning_rate": 8.054858493904034e-06, "loss": 0.3742, "step": 11564 }, { "epoch": 1.085304054054054, "grad_norm": 0.9698631450015516, "learning_rate": 8.054426261052596e-06, "loss": 0.3783, "step": 11565 }, { "epoch": 1.085397897897898, "grad_norm": 2.5858710547102484, "learning_rate": 8.053993991782877e-06, "loss": 0.394, "step": 11566 }, { "epoch": 1.0854917417417418, "grad_norm": 0.9691440400261999, "learning_rate": 8.053561686100028e-06, "loss": 0.4165, "step": 11567 }, { "epoch": 1.0855855855855856, "grad_norm": 1.2208331245453603, "learning_rate": 8.053129344009203e-06, "loss": 0.4074, "step": 11568 }, { "epoch": 1.0856794294294294, "grad_norm": 1.8715446147106063, "learning_rate": 8.05269696551556e-06, "loss": 0.3872, "step": 11569 }, { "epoch": 1.0857732732732732, "grad_norm": 1.3325944530827727, "learning_rate": 8.05226455062425e-06, "loss": 0.441, "step": 11570 }, { "epoch": 1.085867117117117, "grad_norm": 1.0560313091901226, "learning_rate": 8.051832099340428e-06, "loss": 0.3925, "step": 11571 }, { "epoch": 1.085960960960961, "grad_norm": 0.9723593451118719, "learning_rate": 8.051399611669255e-06, "loss": 0.4204, "step": 11572 }, { "epoch": 1.0860548048048049, "grad_norm": 1.2829912278505768, "learning_rate": 8.050967087615886e-06, "loss": 0.417, "step": 11573 }, { "epoch": 1.0861486486486487, "grad_norm": 0.9579755238503935, "learning_rate": 8.050534527185476e-06, "loss": 0.4264, "step": 11574 }, { "epoch": 1.0862424924924925, "grad_norm": 2.107919317541195, "learning_rate": 8.050101930383185e-06, "loss": 0.4211, "step": 11575 }, { "epoch": 1.0863363363363363, "grad_norm": 0.9320182614576239, "learning_rate": 8.049669297214168e-06, "loss": 0.4413, "step": 11576 }, { "epoch": 1.0864301801801801, "grad_norm": 0.9313562367753174, "learning_rate": 8.049236627683587e-06, "loss": 0.3756, "step": 11577 }, { "epoch": 1.086524024024024, "grad_norm": 0.9156244511508568, "learning_rate": 8.048803921796598e-06, "loss": 0.3983, "step": 11578 }, { "epoch": 1.086617867867868, "grad_norm": 0.965693843720902, "learning_rate": 8.04837117955836e-06, "loss": 0.4104, "step": 11579 }, { "epoch": 1.0867117117117118, "grad_norm": 0.9318635068352714, "learning_rate": 8.047938400974035e-06, "loss": 0.3947, "step": 11580 }, { "epoch": 1.0868055555555556, "grad_norm": 1.2153205291337343, "learning_rate": 8.047505586048782e-06, "loss": 0.453, "step": 11581 }, { "epoch": 1.0868993993993994, "grad_norm": 0.8360092209092366, "learning_rate": 8.047072734787761e-06, "loss": 0.3861, "step": 11582 }, { "epoch": 1.0869932432432432, "grad_norm": 1.071828901849068, "learning_rate": 8.046639847196132e-06, "loss": 0.4058, "step": 11583 }, { "epoch": 1.087087087087087, "grad_norm": 1.0383922525186247, "learning_rate": 8.04620692327906e-06, "loss": 0.3685, "step": 11584 }, { "epoch": 1.087180930930931, "grad_norm": 0.9443990377247292, "learning_rate": 8.0457739630417e-06, "loss": 0.3862, "step": 11585 }, { "epoch": 1.0872747747747749, "grad_norm": 0.9685410977327855, "learning_rate": 8.045340966489223e-06, "loss": 0.4075, "step": 11586 }, { "epoch": 1.0873686186186187, "grad_norm": 0.8923529140591452, "learning_rate": 8.044907933626785e-06, "loss": 0.3954, "step": 11587 }, { "epoch": 1.0874624624624625, "grad_norm": 0.8327159415719453, "learning_rate": 8.044474864459552e-06, "loss": 0.3866, "step": 11588 }, { "epoch": 1.0875563063063063, "grad_norm": 1.052046906675092, "learning_rate": 8.044041758992687e-06, "loss": 0.4694, "step": 11589 }, { "epoch": 1.08765015015015, "grad_norm": 1.0769836455786705, "learning_rate": 8.043608617231352e-06, "loss": 0.3846, "step": 11590 }, { "epoch": 1.087743993993994, "grad_norm": 0.9218088885723212, "learning_rate": 8.043175439180714e-06, "loss": 0.4053, "step": 11591 }, { "epoch": 1.087837837837838, "grad_norm": 0.9687485329172734, "learning_rate": 8.04274222484594e-06, "loss": 0.4329, "step": 11592 }, { "epoch": 1.0879316816816818, "grad_norm": 0.8790708013916212, "learning_rate": 8.04230897423219e-06, "loss": 0.3964, "step": 11593 }, { "epoch": 1.0880255255255256, "grad_norm": 1.1011290941954834, "learning_rate": 8.041875687344631e-06, "loss": 0.3733, "step": 11594 }, { "epoch": 1.0881193693693694, "grad_norm": 0.9267012095432278, "learning_rate": 8.041442364188432e-06, "loss": 0.4001, "step": 11595 }, { "epoch": 1.0882132132132132, "grad_norm": 0.9764485795012536, "learning_rate": 8.041009004768756e-06, "loss": 0.4177, "step": 11596 }, { "epoch": 1.088307057057057, "grad_norm": 1.341057004574266, "learning_rate": 8.040575609090774e-06, "loss": 0.4095, "step": 11597 }, { "epoch": 1.0884009009009008, "grad_norm": 1.0000007316742674, "learning_rate": 8.04014217715965e-06, "loss": 0.4112, "step": 11598 }, { "epoch": 1.0884947447447448, "grad_norm": 1.549292173066901, "learning_rate": 8.039708708980552e-06, "loss": 0.4157, "step": 11599 }, { "epoch": 1.0885885885885886, "grad_norm": 0.9346810084094208, "learning_rate": 8.039275204558651e-06, "loss": 0.3794, "step": 11600 }, { "epoch": 1.0886824324324325, "grad_norm": 0.9287927474073986, "learning_rate": 8.038841663899114e-06, "loss": 0.4361, "step": 11601 }, { "epoch": 1.0887762762762763, "grad_norm": 1.100347746047553, "learning_rate": 8.038408087007109e-06, "loss": 0.4404, "step": 11602 }, { "epoch": 1.08887012012012, "grad_norm": 0.9561019910232371, "learning_rate": 8.037974473887807e-06, "loss": 0.3802, "step": 11603 }, { "epoch": 1.0889639639639639, "grad_norm": 0.9990928408190249, "learning_rate": 8.037540824546379e-06, "loss": 0.4059, "step": 11604 }, { "epoch": 1.0890578078078077, "grad_norm": 0.9102993334680721, "learning_rate": 8.037107138987992e-06, "loss": 0.3719, "step": 11605 }, { "epoch": 1.0891516516516517, "grad_norm": 1.1581295232272721, "learning_rate": 8.036673417217821e-06, "loss": 0.4163, "step": 11606 }, { "epoch": 1.0892454954954955, "grad_norm": 0.9715917271421122, "learning_rate": 8.036239659241034e-06, "loss": 0.4268, "step": 11607 }, { "epoch": 1.0893393393393394, "grad_norm": 0.9137070942230825, "learning_rate": 8.035805865062806e-06, "loss": 0.4166, "step": 11608 }, { "epoch": 1.0894331831831832, "grad_norm": 0.9444303331043906, "learning_rate": 8.035372034688308e-06, "loss": 0.4139, "step": 11609 }, { "epoch": 1.089527027027027, "grad_norm": 0.8773131191389588, "learning_rate": 8.034938168122708e-06, "loss": 0.3842, "step": 11610 }, { "epoch": 1.0896208708708708, "grad_norm": 0.851330904480613, "learning_rate": 8.034504265371186e-06, "loss": 0.4286, "step": 11611 }, { "epoch": 1.0897147147147148, "grad_norm": 1.1139334311909679, "learning_rate": 8.034070326438913e-06, "loss": 0.4168, "step": 11612 }, { "epoch": 1.0898085585585586, "grad_norm": 0.9131892414120011, "learning_rate": 8.033636351331061e-06, "loss": 0.359, "step": 11613 }, { "epoch": 1.0899024024024024, "grad_norm": 1.4049367595888074, "learning_rate": 8.033202340052807e-06, "loss": 0.4133, "step": 11614 }, { "epoch": 1.0899962462462462, "grad_norm": 1.0708024741963398, "learning_rate": 8.032768292609323e-06, "loss": 0.4245, "step": 11615 }, { "epoch": 1.09009009009009, "grad_norm": 1.000173283564074, "learning_rate": 8.032334209005787e-06, "loss": 0.3925, "step": 11616 }, { "epoch": 1.0901839339339339, "grad_norm": 0.9069421100798871, "learning_rate": 8.031900089247373e-06, "loss": 0.3873, "step": 11617 }, { "epoch": 1.0902777777777777, "grad_norm": 1.0105830090146493, "learning_rate": 8.031465933339257e-06, "loss": 0.4067, "step": 11618 }, { "epoch": 1.0903716216216217, "grad_norm": 1.0128183750645225, "learning_rate": 8.031031741286614e-06, "loss": 0.4155, "step": 11619 }, { "epoch": 1.0904654654654655, "grad_norm": 1.0111536656363775, "learning_rate": 8.030597513094625e-06, "loss": 0.4443, "step": 11620 }, { "epoch": 1.0905593093093093, "grad_norm": 0.8400598571031174, "learning_rate": 8.030163248768466e-06, "loss": 0.4459, "step": 11621 }, { "epoch": 1.0906531531531531, "grad_norm": 1.1090071303643816, "learning_rate": 8.029728948313314e-06, "loss": 0.4517, "step": 11622 }, { "epoch": 1.090746996996997, "grad_norm": 1.1018854304879466, "learning_rate": 8.029294611734345e-06, "loss": 0.4352, "step": 11623 }, { "epoch": 1.0908408408408408, "grad_norm": 1.268034919102685, "learning_rate": 8.028860239036742e-06, "loss": 0.4094, "step": 11624 }, { "epoch": 1.0909346846846848, "grad_norm": 1.0995933265488986, "learning_rate": 8.02842583022568e-06, "loss": 0.4181, "step": 11625 }, { "epoch": 1.0910285285285286, "grad_norm": 1.0113870920317174, "learning_rate": 8.02799138530634e-06, "loss": 0.4412, "step": 11626 }, { "epoch": 1.0911223723723724, "grad_norm": 1.1134970444036345, "learning_rate": 8.027556904283905e-06, "loss": 0.3855, "step": 11627 }, { "epoch": 1.0912162162162162, "grad_norm": 0.8985679074536636, "learning_rate": 8.02712238716355e-06, "loss": 0.4036, "step": 11628 }, { "epoch": 1.09131006006006, "grad_norm": 0.8797560270453747, "learning_rate": 8.02668783395046e-06, "loss": 0.3923, "step": 11629 }, { "epoch": 1.0914039039039038, "grad_norm": 1.0357982289732774, "learning_rate": 8.026253244649815e-06, "loss": 0.4228, "step": 11630 }, { "epoch": 1.0914977477477477, "grad_norm": 0.8408537065802627, "learning_rate": 8.025818619266797e-06, "loss": 0.4101, "step": 11631 }, { "epoch": 1.0915915915915917, "grad_norm": 0.9167566479474916, "learning_rate": 8.025383957806586e-06, "loss": 0.3882, "step": 11632 }, { "epoch": 1.0916854354354355, "grad_norm": 1.4186621739240621, "learning_rate": 8.024949260274365e-06, "loss": 0.3566, "step": 11633 }, { "epoch": 1.0917792792792793, "grad_norm": 1.2158595760103663, "learning_rate": 8.02451452667532e-06, "loss": 0.4257, "step": 11634 }, { "epoch": 1.0918731231231231, "grad_norm": 0.8389479247424811, "learning_rate": 8.024079757014632e-06, "loss": 0.3737, "step": 11635 }, { "epoch": 1.091966966966967, "grad_norm": 0.9921990570762342, "learning_rate": 8.023644951297485e-06, "loss": 0.4085, "step": 11636 }, { "epoch": 1.0920608108108107, "grad_norm": 0.929588110138845, "learning_rate": 8.023210109529063e-06, "loss": 0.3756, "step": 11637 }, { "epoch": 1.0921546546546546, "grad_norm": 0.9409776528766102, "learning_rate": 8.022775231714552e-06, "loss": 0.4276, "step": 11638 }, { "epoch": 1.0922484984984986, "grad_norm": 1.1313532027346624, "learning_rate": 8.022340317859136e-06, "loss": 0.3941, "step": 11639 }, { "epoch": 1.0923423423423424, "grad_norm": 0.9744350126608796, "learning_rate": 8.021905367968e-06, "loss": 0.3937, "step": 11640 }, { "epoch": 1.0924361861861862, "grad_norm": 0.9196979889128182, "learning_rate": 8.02147038204633e-06, "loss": 0.4211, "step": 11641 }, { "epoch": 1.09253003003003, "grad_norm": 1.0667164012414554, "learning_rate": 8.021035360099314e-06, "loss": 0.3768, "step": 11642 }, { "epoch": 1.0926238738738738, "grad_norm": 0.9442320496257136, "learning_rate": 8.020600302132138e-06, "loss": 0.3891, "step": 11643 }, { "epoch": 1.0927177177177176, "grad_norm": 0.9761123650635761, "learning_rate": 8.020165208149987e-06, "loss": 0.4389, "step": 11644 }, { "epoch": 1.0928115615615615, "grad_norm": 0.9792644652665373, "learning_rate": 8.019730078158053e-06, "loss": 0.3965, "step": 11645 }, { "epoch": 1.0929054054054055, "grad_norm": 0.8954998833509972, "learning_rate": 8.019294912161522e-06, "loss": 0.39, "step": 11646 }, { "epoch": 1.0929992492492493, "grad_norm": 1.0362352691501202, "learning_rate": 8.01885971016558e-06, "loss": 0.3328, "step": 11647 }, { "epoch": 1.093093093093093, "grad_norm": 1.0033799436462785, "learning_rate": 8.018424472175421e-06, "loss": 0.3706, "step": 11648 }, { "epoch": 1.093186936936937, "grad_norm": 0.9277113513039236, "learning_rate": 8.017989198196231e-06, "loss": 0.3658, "step": 11649 }, { "epoch": 1.0932807807807807, "grad_norm": 1.0572118031123634, "learning_rate": 8.0175538882332e-06, "loss": 0.4404, "step": 11650 }, { "epoch": 1.0933746246246245, "grad_norm": 0.9070599265765699, "learning_rate": 8.01711854229152e-06, "loss": 0.3734, "step": 11651 }, { "epoch": 1.0934684684684686, "grad_norm": 0.8809586094073838, "learning_rate": 8.016683160376379e-06, "loss": 0.41, "step": 11652 }, { "epoch": 1.0935623123123124, "grad_norm": 0.9170384707235951, "learning_rate": 8.01624774249297e-06, "loss": 0.3838, "step": 11653 }, { "epoch": 1.0936561561561562, "grad_norm": 0.9723087128780464, "learning_rate": 8.015812288646487e-06, "loss": 0.3944, "step": 11654 }, { "epoch": 1.09375, "grad_norm": 1.0844602444224511, "learning_rate": 8.015376798842116e-06, "loss": 0.3918, "step": 11655 }, { "epoch": 1.0938438438438438, "grad_norm": 0.9405333723962455, "learning_rate": 8.014941273085053e-06, "loss": 0.4234, "step": 11656 }, { "epoch": 1.0939376876876876, "grad_norm": 1.6934504930578977, "learning_rate": 8.014505711380492e-06, "loss": 0.4366, "step": 11657 }, { "epoch": 1.0940315315315314, "grad_norm": 5.710883802276894, "learning_rate": 8.014070113733622e-06, "loss": 0.4107, "step": 11658 }, { "epoch": 1.0941253753753755, "grad_norm": 1.0144989023061322, "learning_rate": 8.01363448014964e-06, "loss": 0.4408, "step": 11659 }, { "epoch": 1.0942192192192193, "grad_norm": 1.0168329710287682, "learning_rate": 8.013198810633742e-06, "loss": 0.3689, "step": 11660 }, { "epoch": 1.094313063063063, "grad_norm": 0.9555728058218064, "learning_rate": 8.012763105191117e-06, "loss": 0.4072, "step": 11661 }, { "epoch": 1.094406906906907, "grad_norm": 1.4320327728122977, "learning_rate": 8.012327363826963e-06, "loss": 0.4546, "step": 11662 }, { "epoch": 1.0945007507507507, "grad_norm": 0.784191099310378, "learning_rate": 8.011891586546478e-06, "loss": 0.3639, "step": 11663 }, { "epoch": 1.0945945945945945, "grad_norm": 1.102106318070646, "learning_rate": 8.011455773354853e-06, "loss": 0.3468, "step": 11664 }, { "epoch": 1.0946884384384385, "grad_norm": 0.9921666062384811, "learning_rate": 8.011019924257285e-06, "loss": 0.4263, "step": 11665 }, { "epoch": 1.0947822822822824, "grad_norm": 1.003588139265263, "learning_rate": 8.010584039258975e-06, "loss": 0.4354, "step": 11666 }, { "epoch": 1.0948761261261262, "grad_norm": 1.0230664051078118, "learning_rate": 8.010148118365115e-06, "loss": 0.3662, "step": 11667 }, { "epoch": 1.09496996996997, "grad_norm": 1.0309768909553851, "learning_rate": 8.009712161580906e-06, "loss": 0.414, "step": 11668 }, { "epoch": 1.0950638138138138, "grad_norm": 1.01983391008643, "learning_rate": 8.009276168911547e-06, "loss": 0.3779, "step": 11669 }, { "epoch": 1.0951576576576576, "grad_norm": 0.839379215643345, "learning_rate": 8.00884014036223e-06, "loss": 0.3868, "step": 11670 }, { "epoch": 1.0952515015015014, "grad_norm": 1.0984797194950817, "learning_rate": 8.00840407593816e-06, "loss": 0.4145, "step": 11671 }, { "epoch": 1.0953453453453454, "grad_norm": 0.9579319454652465, "learning_rate": 8.007967975644533e-06, "loss": 0.3981, "step": 11672 }, { "epoch": 1.0954391891891893, "grad_norm": 0.8864740419059458, "learning_rate": 8.007531839486548e-06, "loss": 0.3855, "step": 11673 }, { "epoch": 1.095533033033033, "grad_norm": 0.9542360268535391, "learning_rate": 8.007095667469411e-06, "loss": 0.4101, "step": 11674 }, { "epoch": 1.0956268768768769, "grad_norm": 0.8034414272688795, "learning_rate": 8.006659459598316e-06, "loss": 0.3671, "step": 11675 }, { "epoch": 1.0957207207207207, "grad_norm": 0.9152718581211852, "learning_rate": 8.006223215878467e-06, "loss": 0.4451, "step": 11676 }, { "epoch": 1.0958145645645645, "grad_norm": 0.9126219530468607, "learning_rate": 8.005786936315065e-06, "loss": 0.3914, "step": 11677 }, { "epoch": 1.0959084084084083, "grad_norm": 0.9669899658344431, "learning_rate": 8.005350620913312e-06, "loss": 0.3636, "step": 11678 }, { "epoch": 1.0960022522522523, "grad_norm": 0.9658088821464239, "learning_rate": 8.004914269678408e-06, "loss": 0.3959, "step": 11679 }, { "epoch": 1.0960960960960962, "grad_norm": 0.9858542950185907, "learning_rate": 8.00447788261556e-06, "loss": 0.3589, "step": 11680 }, { "epoch": 1.09618993993994, "grad_norm": 0.9112515810242081, "learning_rate": 8.004041459729968e-06, "loss": 0.3718, "step": 11681 }, { "epoch": 1.0962837837837838, "grad_norm": 0.9965342366543415, "learning_rate": 8.003605001026834e-06, "loss": 0.4399, "step": 11682 }, { "epoch": 1.0963776276276276, "grad_norm": 0.9438492565705681, "learning_rate": 8.003168506511365e-06, "loss": 0.414, "step": 11683 }, { "epoch": 1.0964714714714714, "grad_norm": 1.0035591720551518, "learning_rate": 8.002731976188765e-06, "loss": 0.3512, "step": 11684 }, { "epoch": 1.0965653153153152, "grad_norm": 1.257338224081724, "learning_rate": 8.002295410064238e-06, "loss": 0.4591, "step": 11685 }, { "epoch": 1.0966591591591592, "grad_norm": 1.3832736342652658, "learning_rate": 8.00185880814299e-06, "loss": 0.3857, "step": 11686 }, { "epoch": 1.096753003003003, "grad_norm": 0.9424420183884459, "learning_rate": 8.001422170430227e-06, "loss": 0.3804, "step": 11687 }, { "epoch": 1.0968468468468469, "grad_norm": 0.9807627285091549, "learning_rate": 8.000985496931151e-06, "loss": 0.4143, "step": 11688 }, { "epoch": 1.0969406906906907, "grad_norm": 0.9373045036747972, "learning_rate": 8.000548787650974e-06, "loss": 0.3655, "step": 11689 }, { "epoch": 1.0970345345345345, "grad_norm": 1.2458606966616257, "learning_rate": 8.000112042594901e-06, "loss": 0.4209, "step": 11690 }, { "epoch": 1.0971283783783783, "grad_norm": 1.1402756183313996, "learning_rate": 7.999675261768138e-06, "loss": 0.4172, "step": 11691 }, { "epoch": 1.0972222222222223, "grad_norm": 1.4585280949295012, "learning_rate": 7.999238445175894e-06, "loss": 0.3515, "step": 11692 }, { "epoch": 1.0973160660660661, "grad_norm": 0.9959975429179182, "learning_rate": 7.998801592823379e-06, "loss": 0.3645, "step": 11693 }, { "epoch": 1.09740990990991, "grad_norm": 1.0463961308969376, "learning_rate": 7.998364704715797e-06, "loss": 0.404, "step": 11694 }, { "epoch": 1.0975037537537538, "grad_norm": 0.9613265920751752, "learning_rate": 7.99792778085836e-06, "loss": 0.3769, "step": 11695 }, { "epoch": 1.0975975975975976, "grad_norm": 0.925662249226603, "learning_rate": 7.997490821256278e-06, "loss": 0.392, "step": 11696 }, { "epoch": 1.0976914414414414, "grad_norm": 1.1766094791583812, "learning_rate": 7.99705382591476e-06, "loss": 0.4217, "step": 11697 }, { "epoch": 1.0977852852852852, "grad_norm": 1.4692499285654714, "learning_rate": 7.996616794839015e-06, "loss": 0.3931, "step": 11698 }, { "epoch": 1.0978791291291292, "grad_norm": 0.8280718113359824, "learning_rate": 7.996179728034258e-06, "loss": 0.3956, "step": 11699 }, { "epoch": 1.097972972972973, "grad_norm": 0.8951768457490709, "learning_rate": 7.995742625505697e-06, "loss": 0.367, "step": 11700 }, { "epoch": 1.0980668168168168, "grad_norm": 0.945334744703742, "learning_rate": 7.995305487258542e-06, "loss": 0.401, "step": 11701 }, { "epoch": 1.0981606606606606, "grad_norm": 1.0202362313488327, "learning_rate": 7.99486831329801e-06, "loss": 0.4526, "step": 11702 }, { "epoch": 1.0982545045045045, "grad_norm": 0.9021106400002596, "learning_rate": 7.99443110362931e-06, "loss": 0.4006, "step": 11703 }, { "epoch": 1.0983483483483483, "grad_norm": 0.8951611014198888, "learning_rate": 7.993993858257654e-06, "loss": 0.3891, "step": 11704 }, { "epoch": 1.0984421921921923, "grad_norm": 1.1223314083438038, "learning_rate": 7.993556577188258e-06, "loss": 0.3676, "step": 11705 }, { "epoch": 1.0985360360360361, "grad_norm": 1.1640902444661037, "learning_rate": 7.993119260426334e-06, "loss": 0.4197, "step": 11706 }, { "epoch": 1.09862987987988, "grad_norm": 0.9662943523665166, "learning_rate": 7.992681907977095e-06, "loss": 0.4195, "step": 11707 }, { "epoch": 1.0987237237237237, "grad_norm": 1.188574642783856, "learning_rate": 7.992244519845761e-06, "loss": 0.4084, "step": 11708 }, { "epoch": 1.0988175675675675, "grad_norm": 1.4854621742055978, "learning_rate": 7.991807096037542e-06, "loss": 0.3766, "step": 11709 }, { "epoch": 1.0989114114114114, "grad_norm": 2.5359278164485475, "learning_rate": 7.991369636557653e-06, "loss": 0.4454, "step": 11710 }, { "epoch": 1.0990052552552552, "grad_norm": 1.1167711494664336, "learning_rate": 7.990932141411315e-06, "loss": 0.4299, "step": 11711 }, { "epoch": 1.0990990990990992, "grad_norm": 1.0926777334845088, "learning_rate": 7.990494610603739e-06, "loss": 0.4298, "step": 11712 }, { "epoch": 1.099192942942943, "grad_norm": 0.9398911434380812, "learning_rate": 7.990057044140145e-06, "loss": 0.3803, "step": 11713 }, { "epoch": 1.0992867867867868, "grad_norm": 0.9625095897047159, "learning_rate": 7.989619442025747e-06, "loss": 0.3924, "step": 11714 }, { "epoch": 1.0993806306306306, "grad_norm": 1.0256789653254177, "learning_rate": 7.989181804265767e-06, "loss": 0.3896, "step": 11715 }, { "epoch": 1.0994744744744744, "grad_norm": 1.088297527747778, "learning_rate": 7.988744130865419e-06, "loss": 0.389, "step": 11716 }, { "epoch": 1.0995683183183182, "grad_norm": 0.8777753497067375, "learning_rate": 7.988306421829923e-06, "loss": 0.3811, "step": 11717 }, { "epoch": 1.099662162162162, "grad_norm": 0.8577835041710685, "learning_rate": 7.987868677164497e-06, "loss": 0.3894, "step": 11718 }, { "epoch": 1.099756006006006, "grad_norm": 0.8054887221308672, "learning_rate": 7.987430896874363e-06, "loss": 0.3936, "step": 11719 }, { "epoch": 1.09984984984985, "grad_norm": 1.2059000117417238, "learning_rate": 7.986993080964738e-06, "loss": 0.4089, "step": 11720 }, { "epoch": 1.0999436936936937, "grad_norm": 0.9137612886815996, "learning_rate": 7.986555229440842e-06, "loss": 0.4238, "step": 11721 }, { "epoch": 1.1000375375375375, "grad_norm": 0.9700609775955608, "learning_rate": 7.986117342307897e-06, "loss": 0.416, "step": 11722 }, { "epoch": 1.1001313813813813, "grad_norm": 0.9014138784732607, "learning_rate": 7.985679419571126e-06, "loss": 0.4203, "step": 11723 }, { "epoch": 1.1002252252252251, "grad_norm": 0.9747544666724095, "learning_rate": 7.985241461235745e-06, "loss": 0.3925, "step": 11724 }, { "epoch": 1.100319069069069, "grad_norm": 1.783825898465733, "learning_rate": 7.98480346730698e-06, "loss": 0.3842, "step": 11725 }, { "epoch": 1.100412912912913, "grad_norm": 1.0517729760018935, "learning_rate": 7.984365437790051e-06, "loss": 0.4218, "step": 11726 }, { "epoch": 1.1005067567567568, "grad_norm": 0.9371910704412271, "learning_rate": 7.983927372690182e-06, "loss": 0.4302, "step": 11727 }, { "epoch": 1.1006006006006006, "grad_norm": 1.1004985600871053, "learning_rate": 7.983489272012595e-06, "loss": 0.3909, "step": 11728 }, { "epoch": 1.1006944444444444, "grad_norm": 0.8976697532404967, "learning_rate": 7.983051135762516e-06, "loss": 0.4377, "step": 11729 }, { "epoch": 1.1007882882882882, "grad_norm": 0.8411506243395398, "learning_rate": 7.982612963945166e-06, "loss": 0.3795, "step": 11730 }, { "epoch": 1.100882132132132, "grad_norm": 0.9475676920256211, "learning_rate": 7.98217475656577e-06, "loss": 0.4028, "step": 11731 }, { "epoch": 1.100975975975976, "grad_norm": 0.9733822449778825, "learning_rate": 7.981736513629555e-06, "loss": 0.4396, "step": 11732 }, { "epoch": 1.1010698198198199, "grad_norm": 0.8637878848807379, "learning_rate": 7.981298235141744e-06, "loss": 0.3769, "step": 11733 }, { "epoch": 1.1011636636636637, "grad_norm": 1.1089431239461698, "learning_rate": 7.980859921107564e-06, "loss": 0.4058, "step": 11734 }, { "epoch": 1.1012575075075075, "grad_norm": 0.9152167440256398, "learning_rate": 7.98042157153224e-06, "loss": 0.3383, "step": 11735 }, { "epoch": 1.1013513513513513, "grad_norm": 0.9961409022531822, "learning_rate": 7.979983186420998e-06, "loss": 0.4019, "step": 11736 }, { "epoch": 1.1014451951951951, "grad_norm": 1.0393345377559708, "learning_rate": 7.979544765779066e-06, "loss": 0.4083, "step": 11737 }, { "epoch": 1.101539039039039, "grad_norm": 0.9459316839417794, "learning_rate": 7.97910630961167e-06, "loss": 0.4205, "step": 11738 }, { "epoch": 1.101632882882883, "grad_norm": 0.929098788615128, "learning_rate": 7.978667817924042e-06, "loss": 0.3936, "step": 11739 }, { "epoch": 1.1017267267267268, "grad_norm": 0.9648022973095242, "learning_rate": 7.978229290721405e-06, "loss": 0.3854, "step": 11740 }, { "epoch": 1.1018205705705706, "grad_norm": 0.9671613453876076, "learning_rate": 7.97779072800899e-06, "loss": 0.3938, "step": 11741 }, { "epoch": 1.1019144144144144, "grad_norm": 0.93079602545398, "learning_rate": 7.977352129792024e-06, "loss": 0.4361, "step": 11742 }, { "epoch": 1.1020082582582582, "grad_norm": 1.0989997501255593, "learning_rate": 7.97691349607574e-06, "loss": 0.4097, "step": 11743 }, { "epoch": 1.102102102102102, "grad_norm": 0.945449901586257, "learning_rate": 7.976474826865365e-06, "loss": 0.4211, "step": 11744 }, { "epoch": 1.102195945945946, "grad_norm": 0.8438997423945552, "learning_rate": 7.976036122166132e-06, "loss": 0.3667, "step": 11745 }, { "epoch": 1.1022897897897899, "grad_norm": 1.0390548761215503, "learning_rate": 7.975597381983269e-06, "loss": 0.4312, "step": 11746 }, { "epoch": 1.1023836336336337, "grad_norm": 0.9752570014978222, "learning_rate": 7.97515860632201e-06, "loss": 0.4181, "step": 11747 }, { "epoch": 1.1024774774774775, "grad_norm": 0.9234223847809597, "learning_rate": 7.97471979518758e-06, "loss": 0.4066, "step": 11748 }, { "epoch": 1.1025713213213213, "grad_norm": 0.92016378557074, "learning_rate": 7.97428094858522e-06, "loss": 0.3709, "step": 11749 }, { "epoch": 1.102665165165165, "grad_norm": 0.856223799129895, "learning_rate": 7.973842066520157e-06, "loss": 0.3518, "step": 11750 }, { "epoch": 1.102759009009009, "grad_norm": 0.9005855924910137, "learning_rate": 7.973403148997624e-06, "loss": 0.3881, "step": 11751 }, { "epoch": 1.102852852852853, "grad_norm": 0.8629340627536677, "learning_rate": 7.972964196022856e-06, "loss": 0.3808, "step": 11752 }, { "epoch": 1.1029466966966968, "grad_norm": 0.8746167295554428, "learning_rate": 7.972525207601088e-06, "loss": 0.3886, "step": 11753 }, { "epoch": 1.1030405405405406, "grad_norm": 2.031988126888772, "learning_rate": 7.97208618373755e-06, "loss": 0.4318, "step": 11754 }, { "epoch": 1.1031343843843844, "grad_norm": 1.442881438728214, "learning_rate": 7.971647124437478e-06, "loss": 0.3467, "step": 11755 }, { "epoch": 1.1032282282282282, "grad_norm": 0.9462002869037192, "learning_rate": 7.97120802970611e-06, "loss": 0.4374, "step": 11756 }, { "epoch": 1.103322072072072, "grad_norm": 1.0565081199514912, "learning_rate": 7.970768899548676e-06, "loss": 0.4137, "step": 11757 }, { "epoch": 1.1034159159159158, "grad_norm": 1.0344484181161897, "learning_rate": 7.970329733970417e-06, "loss": 0.3753, "step": 11758 }, { "epoch": 1.1035097597597598, "grad_norm": 0.9405382875901943, "learning_rate": 7.969890532976567e-06, "loss": 0.4025, "step": 11759 }, { "epoch": 1.1036036036036037, "grad_norm": 0.9548678819220572, "learning_rate": 7.969451296572362e-06, "loss": 0.3973, "step": 11760 }, { "epoch": 1.1036974474474475, "grad_norm": 0.9833121741398827, "learning_rate": 7.96901202476304e-06, "loss": 0.3758, "step": 11761 }, { "epoch": 1.1037912912912913, "grad_norm": 0.991031803829447, "learning_rate": 7.968572717553838e-06, "loss": 0.4183, "step": 11762 }, { "epoch": 1.103885135135135, "grad_norm": 0.957382317883431, "learning_rate": 7.968133374949994e-06, "loss": 0.3926, "step": 11763 }, { "epoch": 1.103978978978979, "grad_norm": 1.1357154729357755, "learning_rate": 7.967693996956745e-06, "loss": 0.4038, "step": 11764 }, { "epoch": 1.1040728228228227, "grad_norm": 1.3399285562161358, "learning_rate": 7.967254583579334e-06, "loss": 0.4898, "step": 11765 }, { "epoch": 1.1041666666666667, "grad_norm": 0.8548220141798281, "learning_rate": 7.966815134822996e-06, "loss": 0.3994, "step": 11766 }, { "epoch": 1.1042605105105106, "grad_norm": 1.2432430313548464, "learning_rate": 7.96637565069297e-06, "loss": 0.3994, "step": 11767 }, { "epoch": 1.1043543543543544, "grad_norm": 1.5927819213309926, "learning_rate": 7.9659361311945e-06, "loss": 0.4309, "step": 11768 }, { "epoch": 1.1044481981981982, "grad_norm": 0.9038084494833305, "learning_rate": 7.965496576332824e-06, "loss": 0.3956, "step": 11769 }, { "epoch": 1.104542042042042, "grad_norm": 1.5585159526949486, "learning_rate": 7.965056986113185e-06, "loss": 0.4062, "step": 11770 }, { "epoch": 1.1046358858858858, "grad_norm": 0.8836810782317399, "learning_rate": 7.964617360540822e-06, "loss": 0.4105, "step": 11771 }, { "epoch": 1.1047297297297298, "grad_norm": 1.3215970782835906, "learning_rate": 7.964177699620976e-06, "loss": 0.4211, "step": 11772 }, { "epoch": 1.1048235735735736, "grad_norm": 0.8685300580054374, "learning_rate": 7.96373800335889e-06, "loss": 0.426, "step": 11773 }, { "epoch": 1.1049174174174174, "grad_norm": 1.1066582023962834, "learning_rate": 7.96329827175981e-06, "loss": 0.4329, "step": 11774 }, { "epoch": 1.1050112612612613, "grad_norm": 0.8929772225471684, "learning_rate": 7.962858504828972e-06, "loss": 0.3911, "step": 11775 }, { "epoch": 1.105105105105105, "grad_norm": 0.9643946419923641, "learning_rate": 7.962418702571627e-06, "loss": 0.3942, "step": 11776 }, { "epoch": 1.1051989489489489, "grad_norm": 1.298732919398286, "learning_rate": 7.961978864993013e-06, "loss": 0.4165, "step": 11777 }, { "epoch": 1.1052927927927927, "grad_norm": 0.8669345437719895, "learning_rate": 7.961538992098377e-06, "loss": 0.3889, "step": 11778 }, { "epoch": 1.1053866366366367, "grad_norm": 1.1327704592027013, "learning_rate": 7.961099083892962e-06, "loss": 0.4327, "step": 11779 }, { "epoch": 1.1054804804804805, "grad_norm": 1.0337886228352682, "learning_rate": 7.960659140382016e-06, "loss": 0.3982, "step": 11780 }, { "epoch": 1.1055743243243243, "grad_norm": 0.8472406892103709, "learning_rate": 7.96021916157078e-06, "loss": 0.3868, "step": 11781 }, { "epoch": 1.1056681681681682, "grad_norm": 0.9038068396108097, "learning_rate": 7.959779147464505e-06, "loss": 0.3966, "step": 11782 }, { "epoch": 1.105762012012012, "grad_norm": 0.960250206291908, "learning_rate": 7.959339098068435e-06, "loss": 0.3976, "step": 11783 }, { "epoch": 1.1058558558558558, "grad_norm": 1.1274637733926012, "learning_rate": 7.958899013387815e-06, "loss": 0.3699, "step": 11784 }, { "epoch": 1.1059496996996998, "grad_norm": 1.1557144174273, "learning_rate": 7.958458893427893e-06, "loss": 0.4065, "step": 11785 }, { "epoch": 1.1060435435435436, "grad_norm": 0.9556411319551532, "learning_rate": 7.95801873819392e-06, "loss": 0.4134, "step": 11786 }, { "epoch": 1.1061373873873874, "grad_norm": 1.1158978182612875, "learning_rate": 7.95757854769114e-06, "loss": 0.4201, "step": 11787 }, { "epoch": 1.1062312312312312, "grad_norm": 0.8407681068193579, "learning_rate": 7.957138321924803e-06, "loss": 0.4068, "step": 11788 }, { "epoch": 1.106325075075075, "grad_norm": 0.9756720844498008, "learning_rate": 7.956698060900159e-06, "loss": 0.3772, "step": 11789 }, { "epoch": 1.1064189189189189, "grad_norm": 0.9856897589371886, "learning_rate": 7.956257764622455e-06, "loss": 0.3617, "step": 11790 }, { "epoch": 1.1065127627627627, "grad_norm": 0.9513906056100658, "learning_rate": 7.95581743309694e-06, "loss": 0.4343, "step": 11791 }, { "epoch": 1.1066066066066067, "grad_norm": 0.9732750423692664, "learning_rate": 7.955377066328869e-06, "loss": 0.3807, "step": 11792 }, { "epoch": 1.1067004504504505, "grad_norm": 1.003274453932983, "learning_rate": 7.954936664323488e-06, "loss": 0.4238, "step": 11793 }, { "epoch": 1.1067942942942943, "grad_norm": 1.0968815065002337, "learning_rate": 7.954496227086049e-06, "loss": 0.3546, "step": 11794 }, { "epoch": 1.1068881381381381, "grad_norm": 1.8359889130383473, "learning_rate": 7.954055754621804e-06, "loss": 0.3826, "step": 11795 }, { "epoch": 1.106981981981982, "grad_norm": 1.1740803223012801, "learning_rate": 7.953615246936004e-06, "loss": 0.4064, "step": 11796 }, { "epoch": 1.1070758258258258, "grad_norm": 1.0429096365057087, "learning_rate": 7.953174704033903e-06, "loss": 0.4194, "step": 11797 }, { "epoch": 1.1071696696696698, "grad_norm": 1.017690374378416, "learning_rate": 7.952734125920752e-06, "loss": 0.4114, "step": 11798 }, { "epoch": 1.1072635135135136, "grad_norm": 1.1299692109123372, "learning_rate": 7.952293512601804e-06, "loss": 0.4162, "step": 11799 }, { "epoch": 1.1073573573573574, "grad_norm": 0.9511101146003085, "learning_rate": 7.951852864082314e-06, "loss": 0.414, "step": 11800 }, { "epoch": 1.1074512012012012, "grad_norm": 1.1548571289712932, "learning_rate": 7.951412180367536e-06, "loss": 0.3995, "step": 11801 }, { "epoch": 1.107545045045045, "grad_norm": 0.8709455263987468, "learning_rate": 7.95097146146272e-06, "loss": 0.4058, "step": 11802 }, { "epoch": 1.1076388888888888, "grad_norm": 0.8390003105042415, "learning_rate": 7.950530707373125e-06, "loss": 0.429, "step": 11803 }, { "epoch": 1.1077327327327327, "grad_norm": 0.9266411992397782, "learning_rate": 7.950089918104008e-06, "loss": 0.3982, "step": 11804 }, { "epoch": 1.1078265765765765, "grad_norm": 1.1876511706839539, "learning_rate": 7.949649093660617e-06, "loss": 0.4047, "step": 11805 }, { "epoch": 1.1079204204204205, "grad_norm": 0.9914618029230251, "learning_rate": 7.949208234048216e-06, "loss": 0.4795, "step": 11806 }, { "epoch": 1.1080142642642643, "grad_norm": 0.9336362762217544, "learning_rate": 7.948767339272056e-06, "loss": 0.4114, "step": 11807 }, { "epoch": 1.1081081081081081, "grad_norm": 1.117825408846036, "learning_rate": 7.948326409337397e-06, "loss": 0.4076, "step": 11808 }, { "epoch": 1.108201951951952, "grad_norm": 1.224333546801821, "learning_rate": 7.947885444249495e-06, "loss": 0.4556, "step": 11809 }, { "epoch": 1.1082957957957957, "grad_norm": 1.0224475742605863, "learning_rate": 7.94744444401361e-06, "loss": 0.3966, "step": 11810 }, { "epoch": 1.1083896396396395, "grad_norm": 0.8284017361501549, "learning_rate": 7.947003408634994e-06, "loss": 0.3875, "step": 11811 }, { "epoch": 1.1084834834834836, "grad_norm": 1.0432136430697856, "learning_rate": 7.946562338118912e-06, "loss": 0.4357, "step": 11812 }, { "epoch": 1.1085773273273274, "grad_norm": 1.1144596273441207, "learning_rate": 7.94612123247062e-06, "loss": 0.3762, "step": 11813 }, { "epoch": 1.1086711711711712, "grad_norm": 1.4950156970389512, "learning_rate": 7.945680091695378e-06, "loss": 0.3959, "step": 11814 }, { "epoch": 1.108765015015015, "grad_norm": 2.58316421682557, "learning_rate": 7.945238915798445e-06, "loss": 0.4516, "step": 11815 }, { "epoch": 1.1088588588588588, "grad_norm": 0.9323660826275701, "learning_rate": 7.944797704785084e-06, "loss": 0.4239, "step": 11816 }, { "epoch": 1.1089527027027026, "grad_norm": 0.9824382542236613, "learning_rate": 7.944356458660551e-06, "loss": 0.4293, "step": 11817 }, { "epoch": 1.1090465465465464, "grad_norm": 1.1549815358538265, "learning_rate": 7.94391517743011e-06, "loss": 0.4285, "step": 11818 }, { "epoch": 1.1091403903903905, "grad_norm": 1.077434032787556, "learning_rate": 7.943473861099023e-06, "loss": 0.4251, "step": 11819 }, { "epoch": 1.1092342342342343, "grad_norm": 1.0154432274316891, "learning_rate": 7.94303250967255e-06, "loss": 0.4292, "step": 11820 }, { "epoch": 1.109328078078078, "grad_norm": 1.3094495133522914, "learning_rate": 7.942591123155952e-06, "loss": 0.4439, "step": 11821 }, { "epoch": 1.109421921921922, "grad_norm": 0.9954789315559074, "learning_rate": 7.942149701554495e-06, "loss": 0.422, "step": 11822 }, { "epoch": 1.1095157657657657, "grad_norm": 1.0702170697851907, "learning_rate": 7.941708244873443e-06, "loss": 0.4705, "step": 11823 }, { "epoch": 1.1096096096096095, "grad_norm": 0.9356836429034989, "learning_rate": 7.941266753118056e-06, "loss": 0.4441, "step": 11824 }, { "epoch": 1.1097034534534536, "grad_norm": 0.918857693197135, "learning_rate": 7.940825226293599e-06, "loss": 0.4215, "step": 11825 }, { "epoch": 1.1097972972972974, "grad_norm": 1.0721863597591623, "learning_rate": 7.940383664405335e-06, "loss": 0.429, "step": 11826 }, { "epoch": 1.1098911411411412, "grad_norm": 0.9042480397374484, "learning_rate": 7.939942067458532e-06, "loss": 0.4498, "step": 11827 }, { "epoch": 1.109984984984985, "grad_norm": 1.085235228653939, "learning_rate": 7.939500435458457e-06, "loss": 0.3998, "step": 11828 }, { "epoch": 1.1100788288288288, "grad_norm": 0.9492877558763102, "learning_rate": 7.939058768410368e-06, "loss": 0.3814, "step": 11829 }, { "epoch": 1.1101726726726726, "grad_norm": 0.8518183217976851, "learning_rate": 7.938617066319536e-06, "loss": 0.3623, "step": 11830 }, { "epoch": 1.1102665165165164, "grad_norm": 1.104441350113756, "learning_rate": 7.938175329191228e-06, "loss": 0.4157, "step": 11831 }, { "epoch": 1.1103603603603605, "grad_norm": 0.8681894021088703, "learning_rate": 7.937733557030708e-06, "loss": 0.3481, "step": 11832 }, { "epoch": 1.1104542042042043, "grad_norm": 0.9660303001468955, "learning_rate": 7.937291749843247e-06, "loss": 0.4156, "step": 11833 }, { "epoch": 1.110548048048048, "grad_norm": 3.519208947245547, "learning_rate": 7.93684990763411e-06, "loss": 0.4115, "step": 11834 }, { "epoch": 1.1106418918918919, "grad_norm": 1.11211556245158, "learning_rate": 7.936408030408567e-06, "loss": 0.4467, "step": 11835 }, { "epoch": 1.1107357357357357, "grad_norm": 0.9939130336688619, "learning_rate": 7.935966118171883e-06, "loss": 0.4063, "step": 11836 }, { "epoch": 1.1108295795795795, "grad_norm": 0.9804951574078883, "learning_rate": 7.935524170929332e-06, "loss": 0.3951, "step": 11837 }, { "epoch": 1.1109234234234235, "grad_norm": 0.9400509608482404, "learning_rate": 7.935082188686178e-06, "loss": 0.455, "step": 11838 }, { "epoch": 1.1110172672672673, "grad_norm": 0.9176567027661419, "learning_rate": 7.934640171447696e-06, "loss": 0.4171, "step": 11839 }, { "epoch": 1.1111111111111112, "grad_norm": 1.1649774147754366, "learning_rate": 7.934198119219154e-06, "loss": 0.4007, "step": 11840 }, { "epoch": 1.111204954954955, "grad_norm": 1.1547064919292294, "learning_rate": 7.93375603200582e-06, "loss": 0.3392, "step": 11841 }, { "epoch": 1.1112987987987988, "grad_norm": 0.9033958540592104, "learning_rate": 7.933313909812969e-06, "loss": 0.3739, "step": 11842 }, { "epoch": 1.1113926426426426, "grad_norm": 1.432838052590726, "learning_rate": 7.932871752645873e-06, "loss": 0.4012, "step": 11843 }, { "epoch": 1.1114864864864864, "grad_norm": 0.9291594376242012, "learning_rate": 7.9324295605098e-06, "loss": 0.3795, "step": 11844 }, { "epoch": 1.1115803303303302, "grad_norm": 0.8953168761910798, "learning_rate": 7.931987333410024e-06, "loss": 0.3712, "step": 11845 }, { "epoch": 1.1116741741741742, "grad_norm": 1.1144416072018144, "learning_rate": 7.931545071351818e-06, "loss": 0.4143, "step": 11846 }, { "epoch": 1.111768018018018, "grad_norm": 0.8745800144750359, "learning_rate": 7.931102774340456e-06, "loss": 0.3603, "step": 11847 }, { "epoch": 1.1118618618618619, "grad_norm": 0.9125260060251689, "learning_rate": 7.93066044238121e-06, "loss": 0.425, "step": 11848 }, { "epoch": 1.1119557057057057, "grad_norm": 0.9557006568515153, "learning_rate": 7.930218075479355e-06, "loss": 0.4353, "step": 11849 }, { "epoch": 1.1120495495495495, "grad_norm": 1.0384072279303245, "learning_rate": 7.929775673640165e-06, "loss": 0.4344, "step": 11850 }, { "epoch": 1.1121433933933933, "grad_norm": 0.8554741796567592, "learning_rate": 7.929333236868914e-06, "loss": 0.3669, "step": 11851 }, { "epoch": 1.1122372372372373, "grad_norm": 0.8988503694163411, "learning_rate": 7.928890765170878e-06, "loss": 0.3833, "step": 11852 }, { "epoch": 1.1123310810810811, "grad_norm": 0.8928526339573695, "learning_rate": 7.928448258551333e-06, "loss": 0.4174, "step": 11853 }, { "epoch": 1.112424924924925, "grad_norm": 0.9151124492108262, "learning_rate": 7.928005717015555e-06, "loss": 0.4358, "step": 11854 }, { "epoch": 1.1125187687687688, "grad_norm": 0.8752092340947388, "learning_rate": 7.927563140568821e-06, "loss": 0.3645, "step": 11855 }, { "epoch": 1.1126126126126126, "grad_norm": 0.8545904226954717, "learning_rate": 7.927120529216406e-06, "loss": 0.4356, "step": 11856 }, { "epoch": 1.1127064564564564, "grad_norm": 0.8244614204495797, "learning_rate": 7.926677882963589e-06, "loss": 0.369, "step": 11857 }, { "epoch": 1.1128003003003002, "grad_norm": 1.0190749744913576, "learning_rate": 7.926235201815647e-06, "loss": 0.3968, "step": 11858 }, { "epoch": 1.1128941441441442, "grad_norm": 0.873063530110219, "learning_rate": 7.925792485777858e-06, "loss": 0.3832, "step": 11859 }, { "epoch": 1.112987987987988, "grad_norm": 0.9467457567767666, "learning_rate": 7.925349734855501e-06, "loss": 0.4347, "step": 11860 }, { "epoch": 1.1130818318318318, "grad_norm": 1.0076542523984295, "learning_rate": 7.924906949053855e-06, "loss": 0.3657, "step": 11861 }, { "epoch": 1.1131756756756757, "grad_norm": 1.050329322117031, "learning_rate": 7.924464128378199e-06, "loss": 0.4413, "step": 11862 }, { "epoch": 1.1132695195195195, "grad_norm": 0.8764420904549824, "learning_rate": 7.924021272833813e-06, "loss": 0.3694, "step": 11863 }, { "epoch": 1.1133633633633633, "grad_norm": 0.8997168321415925, "learning_rate": 7.923578382425978e-06, "loss": 0.3853, "step": 11864 }, { "epoch": 1.1134572072072073, "grad_norm": 0.8808353512991414, "learning_rate": 7.923135457159973e-06, "loss": 0.3741, "step": 11865 }, { "epoch": 1.1135510510510511, "grad_norm": 1.0376966638593579, "learning_rate": 7.922692497041081e-06, "loss": 0.4668, "step": 11866 }, { "epoch": 1.113644894894895, "grad_norm": 0.9943880479089274, "learning_rate": 7.92224950207458e-06, "loss": 0.3875, "step": 11867 }, { "epoch": 1.1137387387387387, "grad_norm": 1.1208216232256283, "learning_rate": 7.921806472265756e-06, "loss": 0.3911, "step": 11868 }, { "epoch": 1.1138325825825826, "grad_norm": 1.221199215262475, "learning_rate": 7.921363407619888e-06, "loss": 0.4598, "step": 11869 }, { "epoch": 1.1139264264264264, "grad_norm": 1.1133372218923916, "learning_rate": 7.920920308142262e-06, "loss": 0.3704, "step": 11870 }, { "epoch": 1.1140202702702702, "grad_norm": 0.8446512830588516, "learning_rate": 7.92047717383816e-06, "loss": 0.3937, "step": 11871 }, { "epoch": 1.1141141141141142, "grad_norm": 0.8000562942739459, "learning_rate": 7.920034004712863e-06, "loss": 0.3983, "step": 11872 }, { "epoch": 1.114207957957958, "grad_norm": 1.1161628284833947, "learning_rate": 7.919590800771659e-06, "loss": 0.389, "step": 11873 }, { "epoch": 1.1143018018018018, "grad_norm": 0.9654919509064754, "learning_rate": 7.919147562019829e-06, "loss": 0.399, "step": 11874 }, { "epoch": 1.1143956456456456, "grad_norm": 0.9560427939497866, "learning_rate": 7.918704288462657e-06, "loss": 0.3869, "step": 11875 }, { "epoch": 1.1144894894894894, "grad_norm": 1.0383011668599158, "learning_rate": 7.918260980105434e-06, "loss": 0.4107, "step": 11876 }, { "epoch": 1.1145833333333333, "grad_norm": 0.9227502319073506, "learning_rate": 7.91781763695344e-06, "loss": 0.4164, "step": 11877 }, { "epoch": 1.1146771771771773, "grad_norm": 0.8509853395559965, "learning_rate": 7.917374259011962e-06, "loss": 0.3834, "step": 11878 }, { "epoch": 1.114771021021021, "grad_norm": 0.9285652281340852, "learning_rate": 7.91693084628629e-06, "loss": 0.418, "step": 11879 }, { "epoch": 1.114864864864865, "grad_norm": 0.9931550297625784, "learning_rate": 7.916487398781706e-06, "loss": 0.3897, "step": 11880 }, { "epoch": 1.1149587087087087, "grad_norm": 1.04189903220557, "learning_rate": 7.916043916503499e-06, "loss": 0.3619, "step": 11881 }, { "epoch": 1.1150525525525525, "grad_norm": 0.9457529038114147, "learning_rate": 7.91560039945696e-06, "loss": 0.3846, "step": 11882 }, { "epoch": 1.1151463963963963, "grad_norm": 1.3537513987727012, "learning_rate": 7.915156847647371e-06, "loss": 0.4027, "step": 11883 }, { "epoch": 1.1152402402402402, "grad_norm": 1.0037864195885355, "learning_rate": 7.914713261080026e-06, "loss": 0.3978, "step": 11884 }, { "epoch": 1.1153340840840842, "grad_norm": 0.9215788477446795, "learning_rate": 7.914269639760212e-06, "loss": 0.3518, "step": 11885 }, { "epoch": 1.115427927927928, "grad_norm": 4.086700369326675, "learning_rate": 7.913825983693218e-06, "loss": 0.4512, "step": 11886 }, { "epoch": 1.1155217717717718, "grad_norm": 1.0650304234773251, "learning_rate": 7.913382292884332e-06, "loss": 0.4519, "step": 11887 }, { "epoch": 1.1156156156156156, "grad_norm": 0.9231876360029873, "learning_rate": 7.912938567338849e-06, "loss": 0.455, "step": 11888 }, { "epoch": 1.1157094594594594, "grad_norm": 1.3505158080014372, "learning_rate": 7.912494807062053e-06, "loss": 0.4623, "step": 11889 }, { "epoch": 1.1158033033033032, "grad_norm": 0.9309381312003976, "learning_rate": 7.912051012059241e-06, "loss": 0.4118, "step": 11890 }, { "epoch": 1.115897147147147, "grad_norm": 1.0632915167382178, "learning_rate": 7.911607182335702e-06, "loss": 0.4322, "step": 11891 }, { "epoch": 1.115990990990991, "grad_norm": 1.1134245668782825, "learning_rate": 7.911163317896725e-06, "loss": 0.4362, "step": 11892 }, { "epoch": 1.116084834834835, "grad_norm": 0.8292983001922959, "learning_rate": 7.91071941874761e-06, "loss": 0.3622, "step": 11893 }, { "epoch": 1.1161786786786787, "grad_norm": 5.075172856735273, "learning_rate": 7.91027548489364e-06, "loss": 0.4337, "step": 11894 }, { "epoch": 1.1162725225225225, "grad_norm": 0.8822478736730082, "learning_rate": 7.909831516340116e-06, "loss": 0.4345, "step": 11895 }, { "epoch": 1.1163663663663663, "grad_norm": 0.9682205141984659, "learning_rate": 7.909387513092327e-06, "loss": 0.4052, "step": 11896 }, { "epoch": 1.1164602102102101, "grad_norm": 0.8755433543130654, "learning_rate": 7.90894347515557e-06, "loss": 0.3611, "step": 11897 }, { "epoch": 1.116554054054054, "grad_norm": 0.9986841264887542, "learning_rate": 7.908499402535137e-06, "loss": 0.4564, "step": 11898 }, { "epoch": 1.116647897897898, "grad_norm": 1.053523851407902, "learning_rate": 7.908055295236322e-06, "loss": 0.4256, "step": 11899 }, { "epoch": 1.1167417417417418, "grad_norm": 1.212676260683052, "learning_rate": 7.907611153264422e-06, "loss": 0.385, "step": 11900 }, { "epoch": 1.1168355855855856, "grad_norm": 0.9149757787435504, "learning_rate": 7.907166976624732e-06, "loss": 0.3995, "step": 11901 }, { "epoch": 1.1169294294294294, "grad_norm": 1.1378903721922597, "learning_rate": 7.906722765322549e-06, "loss": 0.4352, "step": 11902 }, { "epoch": 1.1170232732732732, "grad_norm": 0.8812908310631623, "learning_rate": 7.906278519363167e-06, "loss": 0.3988, "step": 11903 }, { "epoch": 1.117117117117117, "grad_norm": 1.0091464784503785, "learning_rate": 7.905834238751885e-06, "loss": 0.4889, "step": 11904 }, { "epoch": 1.117210960960961, "grad_norm": 0.9409443331272053, "learning_rate": 7.905389923493999e-06, "loss": 0.4209, "step": 11905 }, { "epoch": 1.1173048048048049, "grad_norm": 3.5585272345544245, "learning_rate": 7.904945573594807e-06, "loss": 0.4383, "step": 11906 }, { "epoch": 1.1173986486486487, "grad_norm": 0.8881868903469724, "learning_rate": 7.904501189059609e-06, "loss": 0.3992, "step": 11907 }, { "epoch": 1.1174924924924925, "grad_norm": 1.198444083357947, "learning_rate": 7.904056769893698e-06, "loss": 0.3968, "step": 11908 }, { "epoch": 1.1175863363363363, "grad_norm": 1.0150275011641743, "learning_rate": 7.903612316102379e-06, "loss": 0.3995, "step": 11909 }, { "epoch": 1.1176801801801801, "grad_norm": 1.0100359516143507, "learning_rate": 7.903167827690947e-06, "loss": 0.3921, "step": 11910 }, { "epoch": 1.117774024024024, "grad_norm": 7.561433844163536, "learning_rate": 7.902723304664703e-06, "loss": 0.4429, "step": 11911 }, { "epoch": 1.117867867867868, "grad_norm": 0.8443656020699427, "learning_rate": 7.902278747028948e-06, "loss": 0.3085, "step": 11912 }, { "epoch": 1.1179617117117118, "grad_norm": 0.9394426308161997, "learning_rate": 7.901834154788984e-06, "loss": 0.3907, "step": 11913 }, { "epoch": 1.1180555555555556, "grad_norm": 0.8342928234568294, "learning_rate": 7.901389527950107e-06, "loss": 0.3758, "step": 11914 }, { "epoch": 1.1181493993993994, "grad_norm": 1.0334497698585154, "learning_rate": 7.900944866517622e-06, "loss": 0.4459, "step": 11915 }, { "epoch": 1.1182432432432432, "grad_norm": 0.9354156571498685, "learning_rate": 7.90050017049683e-06, "loss": 0.4234, "step": 11916 }, { "epoch": 1.118337087087087, "grad_norm": 1.0941608980329405, "learning_rate": 7.900055439893033e-06, "loss": 0.4098, "step": 11917 }, { "epoch": 1.118430930930931, "grad_norm": 0.9448424023778247, "learning_rate": 7.899610674711535e-06, "loss": 0.3944, "step": 11918 }, { "epoch": 1.1185247747747749, "grad_norm": 0.9464124052733259, "learning_rate": 7.899165874957634e-06, "loss": 0.4078, "step": 11919 }, { "epoch": 1.1186186186186187, "grad_norm": 0.882418859781104, "learning_rate": 7.898721040636641e-06, "loss": 0.4183, "step": 11920 }, { "epoch": 1.1187124624624625, "grad_norm": 0.8622056618520388, "learning_rate": 7.898276171753856e-06, "loss": 0.3948, "step": 11921 }, { "epoch": 1.1188063063063063, "grad_norm": 0.9769517918974292, "learning_rate": 7.89783126831458e-06, "loss": 0.4202, "step": 11922 }, { "epoch": 1.11890015015015, "grad_norm": 0.9338457450574873, "learning_rate": 7.897386330324123e-06, "loss": 0.4086, "step": 11923 }, { "epoch": 1.118993993993994, "grad_norm": 1.2111445008371546, "learning_rate": 7.896941357787787e-06, "loss": 0.4225, "step": 11924 }, { "epoch": 1.119087837837838, "grad_norm": 0.9129804234205671, "learning_rate": 7.896496350710879e-06, "loss": 0.4118, "step": 11925 }, { "epoch": 1.1191816816816818, "grad_norm": 0.9475476947903729, "learning_rate": 7.896051309098702e-06, "loss": 0.4451, "step": 11926 }, { "epoch": 1.1192755255255256, "grad_norm": 0.9252073988378144, "learning_rate": 7.895606232956567e-06, "loss": 0.401, "step": 11927 }, { "epoch": 1.1193693693693694, "grad_norm": 0.9059427999640504, "learning_rate": 7.895161122289776e-06, "loss": 0.3801, "step": 11928 }, { "epoch": 1.1194632132132132, "grad_norm": 0.936054461247592, "learning_rate": 7.89471597710364e-06, "loss": 0.4088, "step": 11929 }, { "epoch": 1.119557057057057, "grad_norm": 1.0114636394608378, "learning_rate": 7.894270797403464e-06, "loss": 0.4502, "step": 11930 }, { "epoch": 1.1196509009009008, "grad_norm": 1.1087263116029955, "learning_rate": 7.893825583194557e-06, "loss": 0.462, "step": 11931 }, { "epoch": 1.1197447447447448, "grad_norm": 0.9438543909010452, "learning_rate": 7.893380334482227e-06, "loss": 0.4089, "step": 11932 }, { "epoch": 1.1198385885885886, "grad_norm": 1.092617993177172, "learning_rate": 7.892935051271781e-06, "loss": 0.3981, "step": 11933 }, { "epoch": 1.1199324324324325, "grad_norm": 0.7444609232690103, "learning_rate": 7.892489733568531e-06, "loss": 0.3516, "step": 11934 }, { "epoch": 1.1200262762762763, "grad_norm": 0.9301931182686312, "learning_rate": 7.892044381377786e-06, "loss": 0.4184, "step": 11935 }, { "epoch": 1.12012012012012, "grad_norm": 0.8953567192259825, "learning_rate": 7.891598994704855e-06, "loss": 0.4053, "step": 11936 }, { "epoch": 1.1202139639639639, "grad_norm": 1.092749810285053, "learning_rate": 7.891153573555048e-06, "loss": 0.4267, "step": 11937 }, { "epoch": 1.1203078078078077, "grad_norm": 0.9517403866351899, "learning_rate": 7.890708117933679e-06, "loss": 0.4533, "step": 11938 }, { "epoch": 1.1204016516516517, "grad_norm": 0.8545523410665953, "learning_rate": 7.890262627846054e-06, "loss": 0.4072, "step": 11939 }, { "epoch": 1.1204954954954955, "grad_norm": 1.2013946950912384, "learning_rate": 7.88981710329749e-06, "loss": 0.4154, "step": 11940 }, { "epoch": 1.1205893393393394, "grad_norm": 0.933430094248729, "learning_rate": 7.889371544293296e-06, "loss": 0.4328, "step": 11941 }, { "epoch": 1.1206831831831832, "grad_norm": 0.908985516335857, "learning_rate": 7.888925950838783e-06, "loss": 0.3678, "step": 11942 }, { "epoch": 1.120777027027027, "grad_norm": 0.9197671415073143, "learning_rate": 7.88848032293927e-06, "loss": 0.4251, "step": 11943 }, { "epoch": 1.1208708708708708, "grad_norm": 0.8331700143246952, "learning_rate": 7.888034660600061e-06, "loss": 0.3608, "step": 11944 }, { "epoch": 1.1209647147147148, "grad_norm": 0.9341692964471249, "learning_rate": 7.887588963826478e-06, "loss": 0.4088, "step": 11945 }, { "epoch": 1.1210585585585586, "grad_norm": 0.9435498800983436, "learning_rate": 7.887143232623833e-06, "loss": 0.4285, "step": 11946 }, { "epoch": 1.1211524024024024, "grad_norm": 1.7104906762804084, "learning_rate": 7.886697466997436e-06, "loss": 0.4521, "step": 11947 }, { "epoch": 1.1212462462462462, "grad_norm": 0.9355088939439156, "learning_rate": 7.886251666952608e-06, "loss": 0.3799, "step": 11948 }, { "epoch": 1.12134009009009, "grad_norm": 0.8886774654570598, "learning_rate": 7.885805832494662e-06, "loss": 0.4093, "step": 11949 }, { "epoch": 1.1214339339339339, "grad_norm": 0.9512418219640233, "learning_rate": 7.885359963628911e-06, "loss": 0.4255, "step": 11950 }, { "epoch": 1.1215277777777777, "grad_norm": 0.9277089773766409, "learning_rate": 7.884914060360675e-06, "loss": 0.4339, "step": 11951 }, { "epoch": 1.1216216216216217, "grad_norm": 0.9126482545642844, "learning_rate": 7.88446812269527e-06, "loss": 0.4302, "step": 11952 }, { "epoch": 1.1217154654654655, "grad_norm": 1.1188615396656565, "learning_rate": 7.88402215063801e-06, "loss": 0.3791, "step": 11953 }, { "epoch": 1.1218093093093093, "grad_norm": 0.9652218685240229, "learning_rate": 7.883576144194216e-06, "loss": 0.4066, "step": 11954 }, { "epoch": 1.1219031531531531, "grad_norm": 0.9209632009008457, "learning_rate": 7.883130103369203e-06, "loss": 0.4262, "step": 11955 }, { "epoch": 1.121996996996997, "grad_norm": 0.9906471044161483, "learning_rate": 7.882684028168292e-06, "loss": 0.3867, "step": 11956 }, { "epoch": 1.1220908408408408, "grad_norm": 1.0461385548218407, "learning_rate": 7.8822379185968e-06, "loss": 0.3979, "step": 11957 }, { "epoch": 1.1221846846846848, "grad_norm": 1.2228571244231865, "learning_rate": 7.881791774660045e-06, "loss": 0.4259, "step": 11958 }, { "epoch": 1.1222785285285286, "grad_norm": 1.2193547220034784, "learning_rate": 7.881345596363348e-06, "loss": 0.3917, "step": 11959 }, { "epoch": 1.1223723723723724, "grad_norm": 0.994064340765141, "learning_rate": 7.880899383712029e-06, "loss": 0.3948, "step": 11960 }, { "epoch": 1.1224662162162162, "grad_norm": 0.9661944992366382, "learning_rate": 7.880453136711406e-06, "loss": 0.381, "step": 11961 }, { "epoch": 1.12256006006006, "grad_norm": 1.0479832083993916, "learning_rate": 7.880006855366802e-06, "loss": 0.4069, "step": 11962 }, { "epoch": 1.1226539039039038, "grad_norm": 1.0606262963267632, "learning_rate": 7.879560539683538e-06, "loss": 0.3861, "step": 11963 }, { "epoch": 1.1227477477477477, "grad_norm": 0.9533284422477248, "learning_rate": 7.879114189666934e-06, "loss": 0.4285, "step": 11964 }, { "epoch": 1.1228415915915917, "grad_norm": 0.971828969387559, "learning_rate": 7.87866780532231e-06, "loss": 0.4053, "step": 11965 }, { "epoch": 1.1229354354354355, "grad_norm": 0.9593991460689223, "learning_rate": 7.878221386654997e-06, "loss": 0.4517, "step": 11966 }, { "epoch": 1.1230292792792793, "grad_norm": 0.8694986809402884, "learning_rate": 7.877774933670308e-06, "loss": 0.4218, "step": 11967 }, { "epoch": 1.1231231231231231, "grad_norm": 0.9170871998023936, "learning_rate": 7.87732844637357e-06, "loss": 0.3704, "step": 11968 }, { "epoch": 1.123216966966967, "grad_norm": 0.9279959830480645, "learning_rate": 7.876881924770106e-06, "loss": 0.4104, "step": 11969 }, { "epoch": 1.1233108108108107, "grad_norm": 1.016910123324945, "learning_rate": 7.87643536886524e-06, "loss": 0.4211, "step": 11970 }, { "epoch": 1.1234046546546546, "grad_norm": 3.248878552200393, "learning_rate": 7.875988778664298e-06, "loss": 0.3937, "step": 11971 }, { "epoch": 1.1234984984984986, "grad_norm": 0.8758740583508567, "learning_rate": 7.875542154172603e-06, "loss": 0.4084, "step": 11972 }, { "epoch": 1.1235923423423424, "grad_norm": 0.8211469471327798, "learning_rate": 7.87509549539548e-06, "loss": 0.3874, "step": 11973 }, { "epoch": 1.1236861861861862, "grad_norm": 0.9052655504184012, "learning_rate": 7.874648802338255e-06, "loss": 0.3898, "step": 11974 }, { "epoch": 1.12378003003003, "grad_norm": 0.9138417181739645, "learning_rate": 7.874202075006254e-06, "loss": 0.434, "step": 11975 }, { "epoch": 1.1238738738738738, "grad_norm": 0.8138478396150229, "learning_rate": 7.873755313404802e-06, "loss": 0.363, "step": 11976 }, { "epoch": 1.1239677177177176, "grad_norm": 0.9080625407336842, "learning_rate": 7.87330851753923e-06, "loss": 0.4219, "step": 11977 }, { "epoch": 1.1240615615615615, "grad_norm": 0.9496667893343781, "learning_rate": 7.87286168741486e-06, "loss": 0.3999, "step": 11978 }, { "epoch": 1.1241554054054055, "grad_norm": 0.8937200197958077, "learning_rate": 7.872414823037025e-06, "loss": 0.3958, "step": 11979 }, { "epoch": 1.1242492492492493, "grad_norm": 0.9327854374394956, "learning_rate": 7.871967924411045e-06, "loss": 0.3416, "step": 11980 }, { "epoch": 1.124343093093093, "grad_norm": 1.3851319194405258, "learning_rate": 7.871520991542259e-06, "loss": 0.409, "step": 11981 }, { "epoch": 1.124436936936937, "grad_norm": 0.9042616509661534, "learning_rate": 7.871074024435988e-06, "loss": 0.4198, "step": 11982 }, { "epoch": 1.1245307807807807, "grad_norm": 0.912476379241512, "learning_rate": 7.870627023097561e-06, "loss": 0.3898, "step": 11983 }, { "epoch": 1.1246246246246245, "grad_norm": 1.146382080200529, "learning_rate": 7.870179987532314e-06, "loss": 0.3881, "step": 11984 }, { "epoch": 1.1247184684684686, "grad_norm": 0.9708546639646488, "learning_rate": 7.869732917745571e-06, "loss": 0.4079, "step": 11985 }, { "epoch": 1.1248123123123124, "grad_norm": 0.9516874792687859, "learning_rate": 7.869285813742664e-06, "loss": 0.4253, "step": 11986 }, { "epoch": 1.1249061561561562, "grad_norm": 0.9189109221043108, "learning_rate": 7.868838675528927e-06, "loss": 0.3729, "step": 11987 }, { "epoch": 1.125, "grad_norm": 0.986038380499453, "learning_rate": 7.868391503109687e-06, "loss": 0.3793, "step": 11988 }, { "epoch": 1.1250938438438438, "grad_norm": 1.0711006140486838, "learning_rate": 7.867944296490278e-06, "loss": 0.4312, "step": 11989 }, { "epoch": 1.1251876876876876, "grad_norm": 0.9258434896909374, "learning_rate": 7.867497055676032e-06, "loss": 0.3264, "step": 11990 }, { "epoch": 1.1252815315315314, "grad_norm": 0.8984805021107227, "learning_rate": 7.867049780672282e-06, "loss": 0.4059, "step": 11991 }, { "epoch": 1.1253753753753755, "grad_norm": 0.9094078854851354, "learning_rate": 7.866602471484357e-06, "loss": 0.4198, "step": 11992 }, { "epoch": 1.1254692192192193, "grad_norm": 0.9704632447925131, "learning_rate": 7.866155128117597e-06, "loss": 0.3888, "step": 11993 }, { "epoch": 1.125563063063063, "grad_norm": 0.9591959724393854, "learning_rate": 7.865707750577329e-06, "loss": 0.4124, "step": 11994 }, { "epoch": 1.125656906906907, "grad_norm": 1.0386849147328694, "learning_rate": 7.865260338868891e-06, "loss": 0.4133, "step": 11995 }, { "epoch": 1.1257507507507507, "grad_norm": 0.9630591791872548, "learning_rate": 7.864812892997618e-06, "loss": 0.4244, "step": 11996 }, { "epoch": 1.1258445945945945, "grad_norm": 0.8765163364307819, "learning_rate": 7.864365412968845e-06, "loss": 0.389, "step": 11997 }, { "epoch": 1.1259384384384385, "grad_norm": 0.9364557385137183, "learning_rate": 7.863917898787903e-06, "loss": 0.4281, "step": 11998 }, { "epoch": 1.1260322822822824, "grad_norm": 2.8830412653801822, "learning_rate": 7.863470350460132e-06, "loss": 0.4087, "step": 11999 }, { "epoch": 1.1261261261261262, "grad_norm": 0.9377288192765666, "learning_rate": 7.863022767990866e-06, "loss": 0.3927, "step": 12000 }, { "epoch": 1.12621996996997, "grad_norm": 1.0135692949382922, "learning_rate": 7.862575151385443e-06, "loss": 0.4281, "step": 12001 }, { "epoch": 1.1263138138138138, "grad_norm": 0.965078397922511, "learning_rate": 7.8621275006492e-06, "loss": 0.3595, "step": 12002 }, { "epoch": 1.1264076576576576, "grad_norm": 1.157673621894932, "learning_rate": 7.861679815787474e-06, "loss": 0.4159, "step": 12003 }, { "epoch": 1.1265015015015014, "grad_norm": 0.8846803458492422, "learning_rate": 7.861232096805603e-06, "loss": 0.3807, "step": 12004 }, { "epoch": 1.1265953453453452, "grad_norm": 1.025501927581415, "learning_rate": 7.860784343708925e-06, "loss": 0.4214, "step": 12005 }, { "epoch": 1.1266891891891893, "grad_norm": 0.8823796248858367, "learning_rate": 7.860336556502778e-06, "loss": 0.3792, "step": 12006 }, { "epoch": 1.126783033033033, "grad_norm": 1.116526637434319, "learning_rate": 7.859888735192502e-06, "loss": 0.3862, "step": 12007 }, { "epoch": 1.1268768768768769, "grad_norm": 1.6398417130804144, "learning_rate": 7.859440879783435e-06, "loss": 0.4171, "step": 12008 }, { "epoch": 1.1269707207207207, "grad_norm": 0.9512018848912208, "learning_rate": 7.85899299028092e-06, "loss": 0.4423, "step": 12009 }, { "epoch": 1.1270645645645645, "grad_norm": 1.0265959122989892, "learning_rate": 7.858545066690293e-06, "loss": 0.4106, "step": 12010 }, { "epoch": 1.1271584084084085, "grad_norm": 1.3399226808241829, "learning_rate": 7.858097109016898e-06, "loss": 0.4121, "step": 12011 }, { "epoch": 1.1272522522522523, "grad_norm": 0.9046028775647558, "learning_rate": 7.857649117266074e-06, "loss": 0.4074, "step": 12012 }, { "epoch": 1.1273460960960962, "grad_norm": 0.9801281738770744, "learning_rate": 7.857201091443162e-06, "loss": 0.4294, "step": 12013 }, { "epoch": 1.12743993993994, "grad_norm": 1.1531764739019514, "learning_rate": 7.85675303155351e-06, "loss": 0.3823, "step": 12014 }, { "epoch": 1.1275337837837838, "grad_norm": 0.9627514639719658, "learning_rate": 7.85630493760245e-06, "loss": 0.4042, "step": 12015 }, { "epoch": 1.1276276276276276, "grad_norm": 0.9513000800111125, "learning_rate": 7.855856809595333e-06, "loss": 0.4039, "step": 12016 }, { "epoch": 1.1277214714714714, "grad_norm": 0.9203555077237369, "learning_rate": 7.855408647537499e-06, "loss": 0.4074, "step": 12017 }, { "epoch": 1.1278153153153152, "grad_norm": 1.0363943861891913, "learning_rate": 7.85496045143429e-06, "loss": 0.4214, "step": 12018 }, { "epoch": 1.1279091591591592, "grad_norm": 0.8587218073757704, "learning_rate": 7.854512221291052e-06, "loss": 0.4005, "step": 12019 }, { "epoch": 1.128003003003003, "grad_norm": 1.0123854043508649, "learning_rate": 7.85406395711313e-06, "loss": 0.3915, "step": 12020 }, { "epoch": 1.1280968468468469, "grad_norm": 0.8951037148067803, "learning_rate": 7.853615658905867e-06, "loss": 0.3488, "step": 12021 }, { "epoch": 1.1281906906906907, "grad_norm": 0.8034222769910981, "learning_rate": 7.853167326674608e-06, "loss": 0.3526, "step": 12022 }, { "epoch": 1.1282845345345345, "grad_norm": 0.9440934092332219, "learning_rate": 7.8527189604247e-06, "loss": 0.3912, "step": 12023 }, { "epoch": 1.1283783783783783, "grad_norm": 1.2673318164255436, "learning_rate": 7.852270560161486e-06, "loss": 0.4536, "step": 12024 }, { "epoch": 1.1284722222222223, "grad_norm": 0.9822585493196091, "learning_rate": 7.851822125890318e-06, "loss": 0.4101, "step": 12025 }, { "epoch": 1.1285660660660661, "grad_norm": 0.9672671363189258, "learning_rate": 7.851373657616536e-06, "loss": 0.4021, "step": 12026 }, { "epoch": 1.12865990990991, "grad_norm": 0.9685696145893695, "learning_rate": 7.850925155345492e-06, "loss": 0.3913, "step": 12027 }, { "epoch": 1.1287537537537538, "grad_norm": 0.9274683835993595, "learning_rate": 7.850476619082532e-06, "loss": 0.4191, "step": 12028 }, { "epoch": 1.1288475975975976, "grad_norm": 0.9219824308978938, "learning_rate": 7.850028048833002e-06, "loss": 0.4213, "step": 12029 }, { "epoch": 1.1289414414414414, "grad_norm": 1.0590196871563935, "learning_rate": 7.849579444602254e-06, "loss": 0.426, "step": 12030 }, { "epoch": 1.1290352852852852, "grad_norm": 0.9134633932823374, "learning_rate": 7.849130806395635e-06, "loss": 0.427, "step": 12031 }, { "epoch": 1.1291291291291292, "grad_norm": 1.0012816228981278, "learning_rate": 7.848682134218492e-06, "loss": 0.3864, "step": 12032 }, { "epoch": 1.129222972972973, "grad_norm": 0.9645002767606435, "learning_rate": 7.848233428076178e-06, "loss": 0.3576, "step": 12033 }, { "epoch": 1.1293168168168168, "grad_norm": 1.078291808344637, "learning_rate": 7.847784687974044e-06, "loss": 0.3977, "step": 12034 }, { "epoch": 1.1294106606606606, "grad_norm": 1.0347204627042395, "learning_rate": 7.847335913917434e-06, "loss": 0.3918, "step": 12035 }, { "epoch": 1.1295045045045045, "grad_norm": 1.1579441557537622, "learning_rate": 7.846887105911704e-06, "loss": 0.4079, "step": 12036 }, { "epoch": 1.1295983483483483, "grad_norm": 0.8745568943171315, "learning_rate": 7.846438263962204e-06, "loss": 0.4027, "step": 12037 }, { "epoch": 1.1296921921921923, "grad_norm": 1.020193112031396, "learning_rate": 7.845989388074286e-06, "loss": 0.4032, "step": 12038 }, { "epoch": 1.1297860360360361, "grad_norm": 0.850086773031674, "learning_rate": 7.845540478253302e-06, "loss": 0.435, "step": 12039 }, { "epoch": 1.12987987987988, "grad_norm": 0.9553092252497924, "learning_rate": 7.845091534504603e-06, "loss": 0.4188, "step": 12040 }, { "epoch": 1.1299737237237237, "grad_norm": 1.107458662927123, "learning_rate": 7.844642556833543e-06, "loss": 0.4103, "step": 12041 }, { "epoch": 1.1300675675675675, "grad_norm": 0.9212827362819597, "learning_rate": 7.844193545245475e-06, "loss": 0.38, "step": 12042 }, { "epoch": 1.1301614114114114, "grad_norm": 0.8793555655692021, "learning_rate": 7.843744499745754e-06, "loss": 0.3516, "step": 12043 }, { "epoch": 1.1302552552552552, "grad_norm": 1.091836555544273, "learning_rate": 7.843295420339732e-06, "loss": 0.3851, "step": 12044 }, { "epoch": 1.130349099099099, "grad_norm": 0.8981693288586589, "learning_rate": 7.842846307032763e-06, "loss": 0.4168, "step": 12045 }, { "epoch": 1.130442942942943, "grad_norm": 1.13274931672193, "learning_rate": 7.842397159830202e-06, "loss": 0.4295, "step": 12046 }, { "epoch": 1.1305367867867868, "grad_norm": 1.0179624261651883, "learning_rate": 7.841947978737408e-06, "loss": 0.3798, "step": 12047 }, { "epoch": 1.1306306306306306, "grad_norm": 1.1013048880113767, "learning_rate": 7.841498763759731e-06, "loss": 0.4049, "step": 12048 }, { "epoch": 1.1307244744744744, "grad_norm": 0.9004535333812094, "learning_rate": 7.841049514902532e-06, "loss": 0.4107, "step": 12049 }, { "epoch": 1.1308183183183182, "grad_norm": 1.349657182611182, "learning_rate": 7.840600232171164e-06, "loss": 0.3952, "step": 12050 }, { "epoch": 1.1309121621621623, "grad_norm": 0.9689938861844432, "learning_rate": 7.840150915570986e-06, "loss": 0.4051, "step": 12051 }, { "epoch": 1.131006006006006, "grad_norm": 0.9279485771997387, "learning_rate": 7.839701565107353e-06, "loss": 0.4213, "step": 12052 }, { "epoch": 1.13109984984985, "grad_norm": 1.073915252834661, "learning_rate": 7.839252180785627e-06, "loss": 0.4636, "step": 12053 }, { "epoch": 1.1311936936936937, "grad_norm": 0.8873095321911868, "learning_rate": 7.83880276261116e-06, "loss": 0.3847, "step": 12054 }, { "epoch": 1.1312875375375375, "grad_norm": 1.0233865964375422, "learning_rate": 7.838353310589315e-06, "loss": 0.4096, "step": 12055 }, { "epoch": 1.1313813813813813, "grad_norm": 1.108167733325631, "learning_rate": 7.837903824725448e-06, "loss": 0.4339, "step": 12056 }, { "epoch": 1.1314752252252251, "grad_norm": 1.2993922627939167, "learning_rate": 7.837454305024921e-06, "loss": 0.4292, "step": 12057 }, { "epoch": 1.131569069069069, "grad_norm": 2.279542493059298, "learning_rate": 7.837004751493093e-06, "loss": 0.4207, "step": 12058 }, { "epoch": 1.131662912912913, "grad_norm": 0.9951134902217305, "learning_rate": 7.836555164135323e-06, "loss": 0.3933, "step": 12059 }, { "epoch": 1.1317567567567568, "grad_norm": 0.8539701630913379, "learning_rate": 7.83610554295697e-06, "loss": 0.3873, "step": 12060 }, { "epoch": 1.1318506006006006, "grad_norm": 0.8358170102052525, "learning_rate": 7.835655887963397e-06, "loss": 0.3601, "step": 12061 }, { "epoch": 1.1319444444444444, "grad_norm": 0.936571715194871, "learning_rate": 7.835206199159967e-06, "loss": 0.3924, "step": 12062 }, { "epoch": 1.1320382882882882, "grad_norm": 0.9004508783253645, "learning_rate": 7.83475647655204e-06, "loss": 0.3734, "step": 12063 }, { "epoch": 1.132132132132132, "grad_norm": 1.3855236713653716, "learning_rate": 7.834306720144978e-06, "loss": 0.4212, "step": 12064 }, { "epoch": 1.132225975975976, "grad_norm": 0.9696588453111854, "learning_rate": 7.833856929944141e-06, "loss": 0.4025, "step": 12065 }, { "epoch": 1.1323198198198199, "grad_norm": 1.3728721507892174, "learning_rate": 7.833407105954897e-06, "loss": 0.4648, "step": 12066 }, { "epoch": 1.1324136636636637, "grad_norm": 1.0675517281759046, "learning_rate": 7.832957248182605e-06, "loss": 0.4461, "step": 12067 }, { "epoch": 1.1325075075075075, "grad_norm": 0.9017247046131508, "learning_rate": 7.83250735663263e-06, "loss": 0.3755, "step": 12068 }, { "epoch": 1.1326013513513513, "grad_norm": 0.9791421566763786, "learning_rate": 7.832057431310339e-06, "loss": 0.3764, "step": 12069 }, { "epoch": 1.1326951951951951, "grad_norm": 0.951200880089313, "learning_rate": 7.83160747222109e-06, "loss": 0.3465, "step": 12070 }, { "epoch": 1.132789039039039, "grad_norm": 1.0779739233567867, "learning_rate": 7.831157479370254e-06, "loss": 0.3978, "step": 12071 }, { "epoch": 1.132882882882883, "grad_norm": 0.9080500099642066, "learning_rate": 7.830707452763195e-06, "loss": 0.409, "step": 12072 }, { "epoch": 1.1329767267267268, "grad_norm": 0.8949384052397933, "learning_rate": 7.830257392405276e-06, "loss": 0.4255, "step": 12073 }, { "epoch": 1.1330705705705706, "grad_norm": 0.9978661162939316, "learning_rate": 7.829807298301866e-06, "loss": 0.4353, "step": 12074 }, { "epoch": 1.1331644144144144, "grad_norm": 0.9609397365446848, "learning_rate": 7.829357170458332e-06, "loss": 0.4392, "step": 12075 }, { "epoch": 1.1332582582582582, "grad_norm": 1.0517951409425974, "learning_rate": 7.828907008880036e-06, "loss": 0.4142, "step": 12076 }, { "epoch": 1.133352102102102, "grad_norm": 1.133432775571596, "learning_rate": 7.828456813572352e-06, "loss": 0.4154, "step": 12077 }, { "epoch": 1.133445945945946, "grad_norm": 0.9002307322596226, "learning_rate": 7.828006584540642e-06, "loss": 0.3861, "step": 12078 }, { "epoch": 1.1335397897897899, "grad_norm": 1.0187274650073737, "learning_rate": 7.827556321790279e-06, "loss": 0.3779, "step": 12079 }, { "epoch": 1.1336336336336337, "grad_norm": 0.879216918192842, "learning_rate": 7.827106025326627e-06, "loss": 0.3758, "step": 12080 }, { "epoch": 1.1337274774774775, "grad_norm": 1.009913639671921, "learning_rate": 7.826655695155059e-06, "loss": 0.4143, "step": 12081 }, { "epoch": 1.1338213213213213, "grad_norm": 2.742348946848178, "learning_rate": 7.82620533128094e-06, "loss": 0.4118, "step": 12082 }, { "epoch": 1.133915165165165, "grad_norm": 0.935667330601961, "learning_rate": 7.825754933709645e-06, "loss": 0.3641, "step": 12083 }, { "epoch": 1.134009009009009, "grad_norm": 0.9139974259876937, "learning_rate": 7.82530450244654e-06, "loss": 0.3762, "step": 12084 }, { "epoch": 1.134102852852853, "grad_norm": 0.9636912014880185, "learning_rate": 7.824854037496998e-06, "loss": 0.3909, "step": 12085 }, { "epoch": 1.1341966966966968, "grad_norm": 1.0255039372996875, "learning_rate": 7.824403538866387e-06, "loss": 0.4399, "step": 12086 }, { "epoch": 1.1342905405405406, "grad_norm": 1.274408909381563, "learning_rate": 7.823953006560082e-06, "loss": 0.462, "step": 12087 }, { "epoch": 1.1343843843843844, "grad_norm": 0.8795061598237475, "learning_rate": 7.823502440583452e-06, "loss": 0.3801, "step": 12088 }, { "epoch": 1.1344782282282282, "grad_norm": 1.4917317160605317, "learning_rate": 7.823051840941871e-06, "loss": 0.4283, "step": 12089 }, { "epoch": 1.134572072072072, "grad_norm": 1.0639235525404447, "learning_rate": 7.82260120764071e-06, "loss": 0.4136, "step": 12090 }, { "epoch": 1.134665915915916, "grad_norm": 0.9713151909051748, "learning_rate": 7.822150540685341e-06, "loss": 0.3893, "step": 12091 }, { "epoch": 1.1347597597597598, "grad_norm": 0.8566142046696186, "learning_rate": 7.82169984008114e-06, "loss": 0.4155, "step": 12092 }, { "epoch": 1.1348536036036037, "grad_norm": 0.9085405229550442, "learning_rate": 7.821249105833482e-06, "loss": 0.3985, "step": 12093 }, { "epoch": 1.1349474474474475, "grad_norm": 0.9963892565385875, "learning_rate": 7.820798337947736e-06, "loss": 0.4124, "step": 12094 }, { "epoch": 1.1350412912912913, "grad_norm": 0.8979322465033441, "learning_rate": 7.82034753642928e-06, "loss": 0.3456, "step": 12095 }, { "epoch": 1.135135135135135, "grad_norm": 0.9674373276971261, "learning_rate": 7.819896701283488e-06, "loss": 0.4068, "step": 12096 }, { "epoch": 1.135228978978979, "grad_norm": 1.2893853104109083, "learning_rate": 7.819445832515737e-06, "loss": 0.3786, "step": 12097 }, { "epoch": 1.1353228228228227, "grad_norm": 1.112794562638009, "learning_rate": 7.818994930131402e-06, "loss": 0.4161, "step": 12098 }, { "epoch": 1.1354166666666667, "grad_norm": 0.992671050586177, "learning_rate": 7.818543994135857e-06, "loss": 0.3885, "step": 12099 }, { "epoch": 1.1355105105105106, "grad_norm": 0.7497197913105849, "learning_rate": 7.818093024534482e-06, "loss": 0.3708, "step": 12100 }, { "epoch": 1.1356043543543544, "grad_norm": 0.8670339841074474, "learning_rate": 7.817642021332651e-06, "loss": 0.3419, "step": 12101 }, { "epoch": 1.1356981981981982, "grad_norm": 0.9644533699460205, "learning_rate": 7.817190984535742e-06, "loss": 0.3768, "step": 12102 }, { "epoch": 1.135792042042042, "grad_norm": 1.3078305185776058, "learning_rate": 7.816739914149133e-06, "loss": 0.4015, "step": 12103 }, { "epoch": 1.1358858858858858, "grad_norm": 0.887398524204699, "learning_rate": 7.816288810178205e-06, "loss": 0.4326, "step": 12104 }, { "epoch": 1.1359797297297298, "grad_norm": 1.0520408331792663, "learning_rate": 7.815837672628332e-06, "loss": 0.414, "step": 12105 }, { "epoch": 1.1360735735735736, "grad_norm": 2.739687459256792, "learning_rate": 7.815386501504896e-06, "loss": 0.4139, "step": 12106 }, { "epoch": 1.1361674174174174, "grad_norm": 1.0010433518282176, "learning_rate": 7.814935296813275e-06, "loss": 0.4011, "step": 12107 }, { "epoch": 1.1362612612612613, "grad_norm": 1.0284041525065768, "learning_rate": 7.81448405855885e-06, "loss": 0.3813, "step": 12108 }, { "epoch": 1.136355105105105, "grad_norm": 0.9802258618260864, "learning_rate": 7.814032786747e-06, "loss": 0.398, "step": 12109 }, { "epoch": 1.1364489489489489, "grad_norm": 0.9169226156887508, "learning_rate": 7.813581481383104e-06, "loss": 0.3877, "step": 12110 }, { "epoch": 1.1365427927927927, "grad_norm": 0.9126442089393356, "learning_rate": 7.813130142472546e-06, "loss": 0.419, "step": 12111 }, { "epoch": 1.1366366366366367, "grad_norm": 1.0102846831951318, "learning_rate": 7.812678770020707e-06, "loss": 0.4057, "step": 12112 }, { "epoch": 1.1367304804804805, "grad_norm": 1.2706801711632651, "learning_rate": 7.812227364032967e-06, "loss": 0.3708, "step": 12113 }, { "epoch": 1.1368243243243243, "grad_norm": 0.9465700574676185, "learning_rate": 7.81177592451471e-06, "loss": 0.4401, "step": 12114 }, { "epoch": 1.1369181681681682, "grad_norm": 0.9381493262501112, "learning_rate": 7.811324451471317e-06, "loss": 0.3748, "step": 12115 }, { "epoch": 1.137012012012012, "grad_norm": 1.3486298294234658, "learning_rate": 7.810872944908172e-06, "loss": 0.4161, "step": 12116 }, { "epoch": 1.1371058558558558, "grad_norm": 1.0140409177991707, "learning_rate": 7.810421404830659e-06, "loss": 0.4323, "step": 12117 }, { "epoch": 1.1371996996996998, "grad_norm": 1.0334077457465656, "learning_rate": 7.80996983124416e-06, "loss": 0.4099, "step": 12118 }, { "epoch": 1.1372935435435436, "grad_norm": 0.9571892677812097, "learning_rate": 7.809518224154059e-06, "loss": 0.4203, "step": 12119 }, { "epoch": 1.1373873873873874, "grad_norm": 1.124215061534555, "learning_rate": 7.809066583565743e-06, "loss": 0.4413, "step": 12120 }, { "epoch": 1.1374812312312312, "grad_norm": 0.8863431775722498, "learning_rate": 7.808614909484595e-06, "loss": 0.4135, "step": 12121 }, { "epoch": 1.137575075075075, "grad_norm": 1.0992653566041855, "learning_rate": 7.808163201916e-06, "loss": 0.4227, "step": 12122 }, { "epoch": 1.1376689189189189, "grad_norm": 0.8106994628604934, "learning_rate": 7.807711460865346e-06, "loss": 0.3815, "step": 12123 }, { "epoch": 1.1377627627627627, "grad_norm": 1.1753164661366746, "learning_rate": 7.807259686338017e-06, "loss": 0.445, "step": 12124 }, { "epoch": 1.1378566066066067, "grad_norm": 1.0100427628174375, "learning_rate": 7.8068078783394e-06, "loss": 0.4074, "step": 12125 }, { "epoch": 1.1379504504504505, "grad_norm": 1.432721578675263, "learning_rate": 7.806356036874883e-06, "loss": 0.3924, "step": 12126 }, { "epoch": 1.1380442942942943, "grad_norm": 0.8687536841769904, "learning_rate": 7.805904161949854e-06, "loss": 0.4305, "step": 12127 }, { "epoch": 1.1381381381381381, "grad_norm": 0.9815048647689232, "learning_rate": 7.805452253569698e-06, "loss": 0.4291, "step": 12128 }, { "epoch": 1.138231981981982, "grad_norm": 0.9093435484457141, "learning_rate": 7.805000311739803e-06, "loss": 0.4018, "step": 12129 }, { "epoch": 1.1383258258258258, "grad_norm": 1.0161289420619022, "learning_rate": 7.804548336465562e-06, "loss": 0.4479, "step": 12130 }, { "epoch": 1.1384196696696698, "grad_norm": 1.0966868979068485, "learning_rate": 7.80409632775236e-06, "loss": 0.4422, "step": 12131 }, { "epoch": 1.1385135135135136, "grad_norm": 0.8839760628611669, "learning_rate": 7.803644285605588e-06, "loss": 0.4413, "step": 12132 }, { "epoch": 1.1386073573573574, "grad_norm": 0.9010757526490257, "learning_rate": 7.803192210030633e-06, "loss": 0.3962, "step": 12133 }, { "epoch": 1.1387012012012012, "grad_norm": 0.9471565642807177, "learning_rate": 7.80274010103289e-06, "loss": 0.3667, "step": 12134 }, { "epoch": 1.138795045045045, "grad_norm": 1.034250409772537, "learning_rate": 7.802287958617745e-06, "loss": 0.3857, "step": 12135 }, { "epoch": 1.1388888888888888, "grad_norm": 0.9314970202664308, "learning_rate": 7.801835782790592e-06, "loss": 0.4134, "step": 12136 }, { "epoch": 1.1389827327327327, "grad_norm": 0.86842938870106, "learning_rate": 7.801383573556822e-06, "loss": 0.3705, "step": 12137 }, { "epoch": 1.1390765765765765, "grad_norm": 0.8774730580225409, "learning_rate": 7.800931330921823e-06, "loss": 0.4013, "step": 12138 }, { "epoch": 1.1391704204204205, "grad_norm": 1.0012755215746731, "learning_rate": 7.800479054890993e-06, "loss": 0.4005, "step": 12139 }, { "epoch": 1.1392642642642643, "grad_norm": 0.9286542230679027, "learning_rate": 7.80002674546972e-06, "loss": 0.3796, "step": 12140 }, { "epoch": 1.1393581081081081, "grad_norm": 0.9836436096074676, "learning_rate": 7.7995744026634e-06, "loss": 0.3814, "step": 12141 }, { "epoch": 1.139451951951952, "grad_norm": 1.051729265127138, "learning_rate": 7.799122026477423e-06, "loss": 0.4371, "step": 12142 }, { "epoch": 1.1395457957957957, "grad_norm": 1.3143014548572356, "learning_rate": 7.798669616917188e-06, "loss": 0.435, "step": 12143 }, { "epoch": 1.1396396396396395, "grad_norm": 1.1573964981206102, "learning_rate": 7.798217173988081e-06, "loss": 0.4319, "step": 12144 }, { "epoch": 1.1397334834834836, "grad_norm": 1.366405415128759, "learning_rate": 7.797764697695504e-06, "loss": 0.4093, "step": 12145 }, { "epoch": 1.1398273273273274, "grad_norm": 1.228390788053453, "learning_rate": 7.79731218804485e-06, "loss": 0.4007, "step": 12146 }, { "epoch": 1.1399211711711712, "grad_norm": 0.8689831840153518, "learning_rate": 7.796859645041513e-06, "loss": 0.3502, "step": 12147 }, { "epoch": 1.140015015015015, "grad_norm": 1.439049722492193, "learning_rate": 7.796407068690887e-06, "loss": 0.4051, "step": 12148 }, { "epoch": 1.1401088588588588, "grad_norm": 0.9759906863722149, "learning_rate": 7.795954458998374e-06, "loss": 0.4223, "step": 12149 }, { "epoch": 1.1402027027027026, "grad_norm": 0.9138536347906765, "learning_rate": 7.795501815969364e-06, "loss": 0.393, "step": 12150 }, { "epoch": 1.1402965465465464, "grad_norm": 0.9518236238001055, "learning_rate": 7.79504913960926e-06, "loss": 0.426, "step": 12151 }, { "epoch": 1.1403903903903905, "grad_norm": 1.030751939147803, "learning_rate": 7.794596429923453e-06, "loss": 0.3845, "step": 12152 }, { "epoch": 1.1404842342342343, "grad_norm": 0.9731969006248381, "learning_rate": 7.794143686917345e-06, "loss": 0.4757, "step": 12153 }, { "epoch": 1.140578078078078, "grad_norm": 1.0135058091443878, "learning_rate": 7.793690910596333e-06, "loss": 0.4112, "step": 12154 }, { "epoch": 1.140671921921922, "grad_norm": 0.957045602047907, "learning_rate": 7.793238100965817e-06, "loss": 0.4552, "step": 12155 }, { "epoch": 1.1407657657657657, "grad_norm": 1.1083418077492104, "learning_rate": 7.792785258031191e-06, "loss": 0.4263, "step": 12156 }, { "epoch": 1.1408596096096095, "grad_norm": 1.2550513955755032, "learning_rate": 7.79233238179786e-06, "loss": 0.3988, "step": 12157 }, { "epoch": 1.1409534534534536, "grad_norm": 1.0092975430788618, "learning_rate": 7.79187947227122e-06, "loss": 0.3825, "step": 12158 }, { "epoch": 1.1410472972972974, "grad_norm": 0.9329847364752862, "learning_rate": 7.791426529456674e-06, "loss": 0.4163, "step": 12159 }, { "epoch": 1.1411411411411412, "grad_norm": 0.8735902988751532, "learning_rate": 7.79097355335962e-06, "loss": 0.3903, "step": 12160 }, { "epoch": 1.141234984984985, "grad_norm": 1.1972882661462851, "learning_rate": 7.790520543985461e-06, "loss": 0.4045, "step": 12161 }, { "epoch": 1.1413288288288288, "grad_norm": 1.1548371501686476, "learning_rate": 7.790067501339596e-06, "loss": 0.4431, "step": 12162 }, { "epoch": 1.1414226726726726, "grad_norm": 1.64778201586845, "learning_rate": 7.789614425427428e-06, "loss": 0.4058, "step": 12163 }, { "epoch": 1.1415165165165164, "grad_norm": 2.8333175086516964, "learning_rate": 7.789161316254359e-06, "loss": 0.4359, "step": 12164 }, { "epoch": 1.1416103603603605, "grad_norm": 0.8542383512341839, "learning_rate": 7.788708173825791e-06, "loss": 0.3874, "step": 12165 }, { "epoch": 1.1417042042042043, "grad_norm": 1.2197282211335132, "learning_rate": 7.788254998147127e-06, "loss": 0.3849, "step": 12166 }, { "epoch": 1.141798048048048, "grad_norm": 1.5427901827499972, "learning_rate": 7.787801789223772e-06, "loss": 0.4369, "step": 12167 }, { "epoch": 1.1418918918918919, "grad_norm": 0.7376517019671445, "learning_rate": 7.787348547061127e-06, "loss": 0.3576, "step": 12168 }, { "epoch": 1.1419857357357357, "grad_norm": 0.847437894381949, "learning_rate": 7.7868952716646e-06, "loss": 0.4101, "step": 12169 }, { "epoch": 1.1420795795795795, "grad_norm": 1.0260688127625446, "learning_rate": 7.786441963039589e-06, "loss": 0.438, "step": 12170 }, { "epoch": 1.1421734234234235, "grad_norm": 1.0252577169427919, "learning_rate": 7.785988621191502e-06, "loss": 0.4271, "step": 12171 }, { "epoch": 1.1422672672672673, "grad_norm": 0.8944194428731784, "learning_rate": 7.785535246125748e-06, "loss": 0.3879, "step": 12172 }, { "epoch": 1.1423611111111112, "grad_norm": 1.2504784554170623, "learning_rate": 7.785081837847728e-06, "loss": 0.4277, "step": 12173 }, { "epoch": 1.142454954954955, "grad_norm": 0.8619854683885868, "learning_rate": 7.784628396362847e-06, "loss": 0.4155, "step": 12174 }, { "epoch": 1.1425487987987988, "grad_norm": 0.9797765025972828, "learning_rate": 7.784174921676518e-06, "loss": 0.4136, "step": 12175 }, { "epoch": 1.1426426426426426, "grad_norm": 0.9275046527250563, "learning_rate": 7.783721413794142e-06, "loss": 0.4267, "step": 12176 }, { "epoch": 1.1427364864864864, "grad_norm": 1.1449060023125825, "learning_rate": 7.783267872721127e-06, "loss": 0.4876, "step": 12177 }, { "epoch": 1.1428303303303302, "grad_norm": 0.9334582549573318, "learning_rate": 7.782814298462882e-06, "loss": 0.4171, "step": 12178 }, { "epoch": 1.1429241741741742, "grad_norm": 0.7686924807192269, "learning_rate": 7.782360691024816e-06, "loss": 0.3429, "step": 12179 }, { "epoch": 1.143018018018018, "grad_norm": 0.9580475046343527, "learning_rate": 7.781907050412335e-06, "loss": 0.4219, "step": 12180 }, { "epoch": 1.1431118618618619, "grad_norm": 1.007584688214502, "learning_rate": 7.781453376630848e-06, "loss": 0.4349, "step": 12181 }, { "epoch": 1.1432057057057057, "grad_norm": 0.9574529260358622, "learning_rate": 7.780999669685767e-06, "loss": 0.3897, "step": 12182 }, { "epoch": 1.1432995495495495, "grad_norm": 0.9635739186545839, "learning_rate": 7.780545929582497e-06, "loss": 0.446, "step": 12183 }, { "epoch": 1.1433933933933933, "grad_norm": 0.8927749418626558, "learning_rate": 7.780092156326453e-06, "loss": 0.4294, "step": 12184 }, { "epoch": 1.1434872372372373, "grad_norm": 0.8288112134589888, "learning_rate": 7.779638349923042e-06, "loss": 0.3248, "step": 12185 }, { "epoch": 1.1435810810810811, "grad_norm": 0.9388888397934495, "learning_rate": 7.779184510377678e-06, "loss": 0.3931, "step": 12186 }, { "epoch": 1.143674924924925, "grad_norm": 1.211968705829177, "learning_rate": 7.778730637695765e-06, "loss": 0.4281, "step": 12187 }, { "epoch": 1.1437687687687688, "grad_norm": 0.9811177415085304, "learning_rate": 7.778276731882725e-06, "loss": 0.4418, "step": 12188 }, { "epoch": 1.1438626126126126, "grad_norm": 1.0993208266179622, "learning_rate": 7.777822792943961e-06, "loss": 0.4276, "step": 12189 }, { "epoch": 1.1439564564564564, "grad_norm": 0.9121348575045407, "learning_rate": 7.77736882088489e-06, "loss": 0.4027, "step": 12190 }, { "epoch": 1.1440503003003002, "grad_norm": 1.0344330235647812, "learning_rate": 7.776914815710926e-06, "loss": 0.4548, "step": 12191 }, { "epoch": 1.1441441441441442, "grad_norm": 0.9735938917692465, "learning_rate": 7.776460777427476e-06, "loss": 0.4103, "step": 12192 }, { "epoch": 1.144237987987988, "grad_norm": 1.0812767956969818, "learning_rate": 7.776006706039957e-06, "loss": 0.3899, "step": 12193 }, { "epoch": 1.1443318318318318, "grad_norm": 1.015593344754061, "learning_rate": 7.775552601553786e-06, "loss": 0.4396, "step": 12194 }, { "epoch": 1.1444256756756757, "grad_norm": 1.2317045984843156, "learning_rate": 7.775098463974376e-06, "loss": 0.3518, "step": 12195 }, { "epoch": 1.1445195195195195, "grad_norm": 1.0496260478075015, "learning_rate": 7.774644293307137e-06, "loss": 0.4044, "step": 12196 }, { "epoch": 1.1446133633633633, "grad_norm": 1.0029024217923252, "learning_rate": 7.77419008955749e-06, "loss": 0.4066, "step": 12197 }, { "epoch": 1.1447072072072073, "grad_norm": 1.094896388690591, "learning_rate": 7.773735852730845e-06, "loss": 0.3423, "step": 12198 }, { "epoch": 1.1448010510510511, "grad_norm": 4.56527136417072, "learning_rate": 7.773281582832624e-06, "loss": 0.3967, "step": 12199 }, { "epoch": 1.144894894894895, "grad_norm": 1.0091008528171428, "learning_rate": 7.77282727986824e-06, "loss": 0.4449, "step": 12200 }, { "epoch": 1.1449887387387387, "grad_norm": 9.423001564299254, "learning_rate": 7.772372943843108e-06, "loss": 0.4199, "step": 12201 }, { "epoch": 1.1450825825825826, "grad_norm": 1.0387258156864971, "learning_rate": 7.771918574762648e-06, "loss": 0.4332, "step": 12202 }, { "epoch": 1.1451764264264264, "grad_norm": 2.2146819393013626, "learning_rate": 7.771464172632277e-06, "loss": 0.4477, "step": 12203 }, { "epoch": 1.1452702702702702, "grad_norm": 0.9560647044755904, "learning_rate": 7.771009737457413e-06, "loss": 0.3818, "step": 12204 }, { "epoch": 1.1453641141141142, "grad_norm": 0.8884367226782434, "learning_rate": 7.770555269243473e-06, "loss": 0.3873, "step": 12205 }, { "epoch": 1.145457957957958, "grad_norm": 0.8891322653186163, "learning_rate": 7.770100767995876e-06, "loss": 0.3751, "step": 12206 }, { "epoch": 1.1455518018018018, "grad_norm": 0.8755886003075003, "learning_rate": 7.769646233720043e-06, "loss": 0.4078, "step": 12207 }, { "epoch": 1.1456456456456456, "grad_norm": 0.8368053112560385, "learning_rate": 7.769191666421391e-06, "loss": 0.3437, "step": 12208 }, { "epoch": 1.1457394894894894, "grad_norm": 0.9481951061156028, "learning_rate": 7.768737066105342e-06, "loss": 0.3645, "step": 12209 }, { "epoch": 1.1458333333333333, "grad_norm": 1.0043587505583826, "learning_rate": 7.768282432777312e-06, "loss": 0.4494, "step": 12210 }, { "epoch": 1.1459271771771773, "grad_norm": 1.8231438873998669, "learning_rate": 7.767827766442727e-06, "loss": 0.4112, "step": 12211 }, { "epoch": 1.146021021021021, "grad_norm": 1.0192941445635482, "learning_rate": 7.767373067107008e-06, "loss": 0.3982, "step": 12212 }, { "epoch": 1.146114864864865, "grad_norm": 1.0896367094061683, "learning_rate": 7.76691833477557e-06, "loss": 0.4235, "step": 12213 }, { "epoch": 1.1462087087087087, "grad_norm": 0.959829451380792, "learning_rate": 7.76646356945384e-06, "loss": 0.4127, "step": 12214 }, { "epoch": 1.1463025525525525, "grad_norm": 1.0830764098190129, "learning_rate": 7.76600877114724e-06, "loss": 0.4025, "step": 12215 }, { "epoch": 1.1463963963963963, "grad_norm": 0.9253517646368563, "learning_rate": 7.765553939861192e-06, "loss": 0.3729, "step": 12216 }, { "epoch": 1.1464902402402402, "grad_norm": 0.9080479525256094, "learning_rate": 7.76509907560112e-06, "loss": 0.4025, "step": 12217 }, { "epoch": 1.146584084084084, "grad_norm": 1.1973260410931104, "learning_rate": 7.764644178372444e-06, "loss": 0.4041, "step": 12218 }, { "epoch": 1.146677927927928, "grad_norm": 1.0124817813762823, "learning_rate": 7.764189248180592e-06, "loss": 0.484, "step": 12219 }, { "epoch": 1.1467717717717718, "grad_norm": 0.9218804930221163, "learning_rate": 7.763734285030984e-06, "loss": 0.4194, "step": 12220 }, { "epoch": 1.1468656156156156, "grad_norm": 1.009601503885237, "learning_rate": 7.76327928892905e-06, "loss": 0.3844, "step": 12221 }, { "epoch": 1.1469594594594594, "grad_norm": 0.9677578808822428, "learning_rate": 7.76282425988021e-06, "loss": 0.3951, "step": 12222 }, { "epoch": 1.1470533033033032, "grad_norm": 0.8298468327055399, "learning_rate": 7.76236919788989e-06, "loss": 0.3413, "step": 12223 }, { "epoch": 1.147147147147147, "grad_norm": 1.470850510870248, "learning_rate": 7.761914102963518e-06, "loss": 0.4014, "step": 12224 }, { "epoch": 1.147240990990991, "grad_norm": 0.9425562300796968, "learning_rate": 7.761458975106521e-06, "loss": 0.4539, "step": 12225 }, { "epoch": 1.147334834834835, "grad_norm": 0.942864437880964, "learning_rate": 7.761003814324322e-06, "loss": 0.4412, "step": 12226 }, { "epoch": 1.1474286786786787, "grad_norm": 1.109634989242705, "learning_rate": 7.760548620622351e-06, "loss": 0.4151, "step": 12227 }, { "epoch": 1.1475225225225225, "grad_norm": 0.8620861416957605, "learning_rate": 7.760093394006032e-06, "loss": 0.3204, "step": 12228 }, { "epoch": 1.1476163663663663, "grad_norm": 0.8612063480069807, "learning_rate": 7.759638134480794e-06, "loss": 0.3812, "step": 12229 }, { "epoch": 1.1477102102102101, "grad_norm": 0.9709215158630836, "learning_rate": 7.759182842052066e-06, "loss": 0.3817, "step": 12230 }, { "epoch": 1.147804054054054, "grad_norm": 1.0968713189105035, "learning_rate": 7.758727516725278e-06, "loss": 0.4418, "step": 12231 }, { "epoch": 1.147897897897898, "grad_norm": 0.9954211227288435, "learning_rate": 7.758272158505855e-06, "loss": 0.3786, "step": 12232 }, { "epoch": 1.1479917417417418, "grad_norm": 1.0638687384333496, "learning_rate": 7.75781676739923e-06, "loss": 0.4388, "step": 12233 }, { "epoch": 1.1480855855855856, "grad_norm": 1.220790452729989, "learning_rate": 7.757361343410833e-06, "loss": 0.4373, "step": 12234 }, { "epoch": 1.1481794294294294, "grad_norm": 0.9258334146739232, "learning_rate": 7.75690588654609e-06, "loss": 0.405, "step": 12235 }, { "epoch": 1.1482732732732732, "grad_norm": 1.203843150645318, "learning_rate": 7.756450396810432e-06, "loss": 0.3524, "step": 12236 }, { "epoch": 1.148367117117117, "grad_norm": 0.8721973961380388, "learning_rate": 7.755994874209293e-06, "loss": 0.3969, "step": 12237 }, { "epoch": 1.148460960960961, "grad_norm": 1.0402483838420578, "learning_rate": 7.755539318748102e-06, "loss": 0.4305, "step": 12238 }, { "epoch": 1.1485548048048049, "grad_norm": 1.0302168157398481, "learning_rate": 7.755083730432294e-06, "loss": 0.4208, "step": 12239 }, { "epoch": 1.1486486486486487, "grad_norm": 0.8554391782680687, "learning_rate": 7.754628109267296e-06, "loss": 0.3844, "step": 12240 }, { "epoch": 1.1487424924924925, "grad_norm": 1.1748776241891592, "learning_rate": 7.754172455258543e-06, "loss": 0.4372, "step": 12241 }, { "epoch": 1.1488363363363363, "grad_norm": 1.335808063750583, "learning_rate": 7.753716768411468e-06, "loss": 0.4175, "step": 12242 }, { "epoch": 1.1489301801801801, "grad_norm": 0.9820543593586021, "learning_rate": 7.753261048731504e-06, "loss": 0.4781, "step": 12243 }, { "epoch": 1.149024024024024, "grad_norm": 1.6246372193336385, "learning_rate": 7.752805296224085e-06, "loss": 0.4183, "step": 12244 }, { "epoch": 1.149117867867868, "grad_norm": 1.0970392676265945, "learning_rate": 7.752349510894644e-06, "loss": 0.4654, "step": 12245 }, { "epoch": 1.1492117117117118, "grad_norm": 0.949446902133244, "learning_rate": 7.751893692748615e-06, "loss": 0.4056, "step": 12246 }, { "epoch": 1.1493055555555556, "grad_norm": 1.067634470018408, "learning_rate": 7.751437841791434e-06, "loss": 0.3676, "step": 12247 }, { "epoch": 1.1493993993993994, "grad_norm": 0.884218239054875, "learning_rate": 7.750981958028537e-06, "loss": 0.3832, "step": 12248 }, { "epoch": 1.1494932432432432, "grad_norm": 1.0949490272373814, "learning_rate": 7.750526041465359e-06, "loss": 0.4148, "step": 12249 }, { "epoch": 1.149587087087087, "grad_norm": 1.123082471777516, "learning_rate": 7.750070092107332e-06, "loss": 0.4249, "step": 12250 }, { "epoch": 1.149680930930931, "grad_norm": 1.1623831754932965, "learning_rate": 7.749614109959897e-06, "loss": 0.3561, "step": 12251 }, { "epoch": 1.1497747747747749, "grad_norm": 0.8988883176529395, "learning_rate": 7.749158095028491e-06, "loss": 0.3652, "step": 12252 }, { "epoch": 1.1498686186186187, "grad_norm": 0.9478712877217123, "learning_rate": 7.748702047318548e-06, "loss": 0.3964, "step": 12253 }, { "epoch": 1.1499624624624625, "grad_norm": 0.8967252247317715, "learning_rate": 7.748245966835508e-06, "loss": 0.3605, "step": 12254 }, { "epoch": 1.1500563063063063, "grad_norm": 0.9120901582859616, "learning_rate": 7.747789853584807e-06, "loss": 0.406, "step": 12255 }, { "epoch": 1.15015015015015, "grad_norm": 1.024154042529523, "learning_rate": 7.747333707571884e-06, "loss": 0.4351, "step": 12256 }, { "epoch": 1.150243993993994, "grad_norm": 0.845714058207271, "learning_rate": 7.746877528802178e-06, "loss": 0.3566, "step": 12257 }, { "epoch": 1.1503378378378377, "grad_norm": 0.8816226501482445, "learning_rate": 7.746421317281127e-06, "loss": 0.3872, "step": 12258 }, { "epoch": 1.1504316816816818, "grad_norm": 2.298544193203337, "learning_rate": 7.745965073014173e-06, "loss": 0.3738, "step": 12259 }, { "epoch": 1.1505255255255256, "grad_norm": 1.5859530342803814, "learning_rate": 7.745508796006755e-06, "loss": 0.4347, "step": 12260 }, { "epoch": 1.1506193693693694, "grad_norm": 1.0494564786121876, "learning_rate": 7.74505248626431e-06, "loss": 0.4029, "step": 12261 }, { "epoch": 1.1507132132132132, "grad_norm": 0.9697606514699816, "learning_rate": 7.744596143792282e-06, "loss": 0.4215, "step": 12262 }, { "epoch": 1.150807057057057, "grad_norm": 1.1058671039206316, "learning_rate": 7.744139768596113e-06, "loss": 0.4372, "step": 12263 }, { "epoch": 1.150900900900901, "grad_norm": 1.0993913425646058, "learning_rate": 7.74368336068124e-06, "loss": 0.3819, "step": 12264 }, { "epoch": 1.1509947447447448, "grad_norm": 1.038786375051246, "learning_rate": 7.74322692005311e-06, "loss": 0.4034, "step": 12265 }, { "epoch": 1.1510885885885886, "grad_norm": 0.9505318361107336, "learning_rate": 7.742770446717161e-06, "loss": 0.3937, "step": 12266 }, { "epoch": 1.1511824324324325, "grad_norm": 0.8368524500953103, "learning_rate": 7.742313940678836e-06, "loss": 0.3876, "step": 12267 }, { "epoch": 1.1512762762762763, "grad_norm": 1.1556722846927168, "learning_rate": 7.741857401943581e-06, "loss": 0.3813, "step": 12268 }, { "epoch": 1.15137012012012, "grad_norm": 3.723863426576972, "learning_rate": 7.741400830516838e-06, "loss": 0.4221, "step": 12269 }, { "epoch": 1.1514639639639639, "grad_norm": 0.9361463726657768, "learning_rate": 7.740944226404047e-06, "loss": 0.4474, "step": 12270 }, { "epoch": 1.1515578078078077, "grad_norm": 1.0687246135895896, "learning_rate": 7.740487589610659e-06, "loss": 0.3939, "step": 12271 }, { "epoch": 1.1516516516516517, "grad_norm": 0.9389279917888614, "learning_rate": 7.740030920142113e-06, "loss": 0.3994, "step": 12272 }, { "epoch": 1.1517454954954955, "grad_norm": 1.1154699627703946, "learning_rate": 7.739574218003856e-06, "loss": 0.4096, "step": 12273 }, { "epoch": 1.1518393393393394, "grad_norm": 0.97082051522988, "learning_rate": 7.739117483201334e-06, "loss": 0.4493, "step": 12274 }, { "epoch": 1.1519331831831832, "grad_norm": 1.3316654486192905, "learning_rate": 7.738660715739993e-06, "loss": 0.3861, "step": 12275 }, { "epoch": 1.152027027027027, "grad_norm": 1.0244055586530454, "learning_rate": 7.738203915625275e-06, "loss": 0.3973, "step": 12276 }, { "epoch": 1.1521208708708708, "grad_norm": 0.8518457606200581, "learning_rate": 7.73774708286263e-06, "loss": 0.3995, "step": 12277 }, { "epoch": 1.1522147147147148, "grad_norm": 1.001256712107556, "learning_rate": 7.737290217457505e-06, "loss": 0.4361, "step": 12278 }, { "epoch": 1.1523085585585586, "grad_norm": 1.0465807788364023, "learning_rate": 7.736833319415347e-06, "loss": 0.4014, "step": 12279 }, { "epoch": 1.1524024024024024, "grad_norm": 1.0295552417469314, "learning_rate": 7.736376388741602e-06, "loss": 0.4733, "step": 12280 }, { "epoch": 1.1524962462462462, "grad_norm": 0.9044325237943078, "learning_rate": 7.735919425441722e-06, "loss": 0.3882, "step": 12281 }, { "epoch": 1.15259009009009, "grad_norm": 0.8799752573178489, "learning_rate": 7.73546242952115e-06, "loss": 0.4458, "step": 12282 }, { "epoch": 1.1526839339339339, "grad_norm": 0.8474317585722485, "learning_rate": 7.735005400985338e-06, "loss": 0.3743, "step": 12283 }, { "epoch": 1.1527777777777777, "grad_norm": 0.9993227281439642, "learning_rate": 7.734548339839735e-06, "loss": 0.3465, "step": 12284 }, { "epoch": 1.1528716216216217, "grad_norm": 1.6803863164239667, "learning_rate": 7.734091246089791e-06, "loss": 0.4054, "step": 12285 }, { "epoch": 1.1529654654654655, "grad_norm": 0.8678979977482061, "learning_rate": 7.733634119740955e-06, "loss": 0.3654, "step": 12286 }, { "epoch": 1.1530593093093093, "grad_norm": 1.006336739874337, "learning_rate": 7.733176960798678e-06, "loss": 0.4424, "step": 12287 }, { "epoch": 1.1531531531531531, "grad_norm": 0.9831459361690514, "learning_rate": 7.73271976926841e-06, "loss": 0.3732, "step": 12288 }, { "epoch": 1.153246996996997, "grad_norm": 1.0478322320250713, "learning_rate": 7.7322625451556e-06, "loss": 0.3945, "step": 12289 }, { "epoch": 1.1533408408408408, "grad_norm": 1.1464452479019482, "learning_rate": 7.731805288465706e-06, "loss": 0.4899, "step": 12290 }, { "epoch": 1.1534346846846848, "grad_norm": 3.0267470657479847, "learning_rate": 7.731347999204173e-06, "loss": 0.3787, "step": 12291 }, { "epoch": 1.1535285285285286, "grad_norm": 1.1413570888335869, "learning_rate": 7.73089067737646e-06, "loss": 0.4082, "step": 12292 }, { "epoch": 1.1536223723723724, "grad_norm": 1.187852327124805, "learning_rate": 7.730433322988013e-06, "loss": 0.3615, "step": 12293 }, { "epoch": 1.1537162162162162, "grad_norm": 1.1143313167757523, "learning_rate": 7.729975936044288e-06, "loss": 0.3961, "step": 12294 }, { "epoch": 1.15381006006006, "grad_norm": 0.9345776020237484, "learning_rate": 7.729518516550741e-06, "loss": 0.3865, "step": 12295 }, { "epoch": 1.1539039039039038, "grad_norm": 0.9502611116871217, "learning_rate": 7.729061064512821e-06, "loss": 0.4069, "step": 12296 }, { "epoch": 1.1539977477477477, "grad_norm": 0.9109402936848864, "learning_rate": 7.728603579935985e-06, "loss": 0.3826, "step": 12297 }, { "epoch": 1.1540915915915915, "grad_norm": 1.2716932032800812, "learning_rate": 7.72814606282569e-06, "loss": 0.3754, "step": 12298 }, { "epoch": 1.1541854354354355, "grad_norm": 0.9954344666295508, "learning_rate": 7.727688513187386e-06, "loss": 0.3769, "step": 12299 }, { "epoch": 1.1542792792792793, "grad_norm": 1.0884328676404234, "learning_rate": 7.72723093102653e-06, "loss": 0.3978, "step": 12300 }, { "epoch": 1.1543731231231231, "grad_norm": 1.0042100382745425, "learning_rate": 7.72677331634858e-06, "loss": 0.407, "step": 12301 }, { "epoch": 1.154466966966967, "grad_norm": 1.113111700539253, "learning_rate": 7.726315669158993e-06, "loss": 0.421, "step": 12302 }, { "epoch": 1.1545608108108107, "grad_norm": 0.9218418368699182, "learning_rate": 7.72585798946322e-06, "loss": 0.3882, "step": 12303 }, { "epoch": 1.1546546546546548, "grad_norm": 0.9252751755089051, "learning_rate": 7.725400277266722e-06, "loss": 0.4209, "step": 12304 }, { "epoch": 1.1547484984984986, "grad_norm": 0.8998628355413002, "learning_rate": 7.724942532574957e-06, "loss": 0.3799, "step": 12305 }, { "epoch": 1.1548423423423424, "grad_norm": 1.010275584476218, "learning_rate": 7.724484755393381e-06, "loss": 0.34, "step": 12306 }, { "epoch": 1.1549361861861862, "grad_norm": 0.8746950280863061, "learning_rate": 7.724026945727451e-06, "loss": 0.4244, "step": 12307 }, { "epoch": 1.15503003003003, "grad_norm": 0.958136372402332, "learning_rate": 7.723569103582631e-06, "loss": 0.3991, "step": 12308 }, { "epoch": 1.1551238738738738, "grad_norm": 0.8507966357249224, "learning_rate": 7.723111228964374e-06, "loss": 0.3629, "step": 12309 }, { "epoch": 1.1552177177177176, "grad_norm": 0.9756379259355784, "learning_rate": 7.72265332187814e-06, "loss": 0.4671, "step": 12310 }, { "epoch": 1.1553115615615615, "grad_norm": 1.0207743620679524, "learning_rate": 7.722195382329392e-06, "loss": 0.4787, "step": 12311 }, { "epoch": 1.1554054054054055, "grad_norm": 0.8957869289465946, "learning_rate": 7.721737410323585e-06, "loss": 0.4085, "step": 12312 }, { "epoch": 1.1554992492492493, "grad_norm": 1.7405134281864216, "learning_rate": 7.721279405866185e-06, "loss": 0.4061, "step": 12313 }, { "epoch": 1.155593093093093, "grad_norm": 0.9607035943920079, "learning_rate": 7.72082136896265e-06, "loss": 0.4169, "step": 12314 }, { "epoch": 1.155686936936937, "grad_norm": 1.2541776431507536, "learning_rate": 7.72036329961844e-06, "loss": 0.3914, "step": 12315 }, { "epoch": 1.1557807807807807, "grad_norm": 0.9188263531089526, "learning_rate": 7.71990519783902e-06, "loss": 0.4433, "step": 12316 }, { "epoch": 1.1558746246246245, "grad_norm": 0.9097064195019486, "learning_rate": 7.71944706362985e-06, "loss": 0.4115, "step": 12317 }, { "epoch": 1.1559684684684686, "grad_norm": 0.9837249283692784, "learning_rate": 7.718988896996392e-06, "loss": 0.4295, "step": 12318 }, { "epoch": 1.1560623123123124, "grad_norm": 2.0641223060704226, "learning_rate": 7.718530697944111e-06, "loss": 0.4052, "step": 12319 }, { "epoch": 1.1561561561561562, "grad_norm": 0.9368995987555245, "learning_rate": 7.718072466478467e-06, "loss": 0.4118, "step": 12320 }, { "epoch": 1.15625, "grad_norm": 1.1833101562587414, "learning_rate": 7.717614202604926e-06, "loss": 0.3971, "step": 12321 }, { "epoch": 1.1563438438438438, "grad_norm": 1.224337845872369, "learning_rate": 7.71715590632895e-06, "loss": 0.3778, "step": 12322 }, { "epoch": 1.1564376876876876, "grad_norm": 0.8879959301989475, "learning_rate": 7.716697577656004e-06, "loss": 0.3742, "step": 12323 }, { "epoch": 1.1565315315315314, "grad_norm": 4.539401941257649, "learning_rate": 7.716239216591555e-06, "loss": 0.3887, "step": 12324 }, { "epoch": 1.1566253753753755, "grad_norm": 1.1786204370277111, "learning_rate": 7.715780823141065e-06, "loss": 0.4449, "step": 12325 }, { "epoch": 1.1567192192192193, "grad_norm": 1.0689243623071711, "learning_rate": 7.715322397310002e-06, "loss": 0.4045, "step": 12326 }, { "epoch": 1.156813063063063, "grad_norm": 0.9374979997085374, "learning_rate": 7.714863939103828e-06, "loss": 0.4107, "step": 12327 }, { "epoch": 1.156906906906907, "grad_norm": 0.8209948746609341, "learning_rate": 7.714405448528014e-06, "loss": 0.3895, "step": 12328 }, { "epoch": 1.1570007507507507, "grad_norm": 1.0990119748617078, "learning_rate": 7.713946925588023e-06, "loss": 0.4115, "step": 12329 }, { "epoch": 1.1570945945945945, "grad_norm": 0.9923655446827041, "learning_rate": 7.713488370289323e-06, "loss": 0.3966, "step": 12330 }, { "epoch": 1.1571884384384385, "grad_norm": 0.9731143860760499, "learning_rate": 7.713029782637382e-06, "loss": 0.3727, "step": 12331 }, { "epoch": 1.1572822822822824, "grad_norm": 1.1558411511527027, "learning_rate": 7.712571162637668e-06, "loss": 0.439, "step": 12332 }, { "epoch": 1.1573761261261262, "grad_norm": 0.9398651348904566, "learning_rate": 7.71211251029565e-06, "loss": 0.3838, "step": 12333 }, { "epoch": 1.15746996996997, "grad_norm": 1.1893143603927478, "learning_rate": 7.711653825616794e-06, "loss": 0.4334, "step": 12334 }, { "epoch": 1.1575638138138138, "grad_norm": 1.0042012385750978, "learning_rate": 7.711195108606571e-06, "loss": 0.3829, "step": 12335 }, { "epoch": 1.1576576576576576, "grad_norm": 0.9155480842501359, "learning_rate": 7.710736359270448e-06, "loss": 0.4309, "step": 12336 }, { "epoch": 1.1577515015015014, "grad_norm": 0.9827594624727767, "learning_rate": 7.710277577613896e-06, "loss": 0.4208, "step": 12337 }, { "epoch": 1.1578453453453452, "grad_norm": 1.4717192729285367, "learning_rate": 7.709818763642389e-06, "loss": 0.4186, "step": 12338 }, { "epoch": 1.1579391891891893, "grad_norm": 1.0110453073430647, "learning_rate": 7.709359917361392e-06, "loss": 0.4188, "step": 12339 }, { "epoch": 1.158033033033033, "grad_norm": 1.1901590994493692, "learning_rate": 7.708901038776377e-06, "loss": 0.3941, "step": 12340 }, { "epoch": 1.1581268768768769, "grad_norm": 1.0125390409455166, "learning_rate": 7.708442127892817e-06, "loss": 0.4085, "step": 12341 }, { "epoch": 1.1582207207207207, "grad_norm": 1.1008417028085826, "learning_rate": 7.707983184716183e-06, "loss": 0.4218, "step": 12342 }, { "epoch": 1.1583145645645645, "grad_norm": 1.15608898712385, "learning_rate": 7.707524209251944e-06, "loss": 0.4243, "step": 12343 }, { "epoch": 1.1584084084084085, "grad_norm": 0.9711723192518625, "learning_rate": 7.70706520150558e-06, "loss": 0.4122, "step": 12344 }, { "epoch": 1.1585022522522523, "grad_norm": 1.1274455507016539, "learning_rate": 7.706606161482555e-06, "loss": 0.3698, "step": 12345 }, { "epoch": 1.1585960960960962, "grad_norm": 0.9862680059230061, "learning_rate": 7.706147089188347e-06, "loss": 0.3618, "step": 12346 }, { "epoch": 1.15868993993994, "grad_norm": 0.9112116475104409, "learning_rate": 7.705687984628431e-06, "loss": 0.4247, "step": 12347 }, { "epoch": 1.1587837837837838, "grad_norm": 0.9814386999754874, "learning_rate": 7.705228847808277e-06, "loss": 0.434, "step": 12348 }, { "epoch": 1.1588776276276276, "grad_norm": 1.0414447368371957, "learning_rate": 7.70476967873336e-06, "loss": 0.3853, "step": 12349 }, { "epoch": 1.1589714714714714, "grad_norm": 0.9124997970965716, "learning_rate": 7.704310477409157e-06, "loss": 0.4532, "step": 12350 }, { "epoch": 1.1590653153153152, "grad_norm": 0.8578710464736482, "learning_rate": 7.703851243841143e-06, "loss": 0.4437, "step": 12351 }, { "epoch": 1.1591591591591592, "grad_norm": 0.9209422913970037, "learning_rate": 7.70339197803479e-06, "loss": 0.4036, "step": 12352 }, { "epoch": 1.159253003003003, "grad_norm": 1.235412767372145, "learning_rate": 7.702932679995578e-06, "loss": 0.3867, "step": 12353 }, { "epoch": 1.1593468468468469, "grad_norm": 0.9879328522443429, "learning_rate": 7.702473349728981e-06, "loss": 0.4469, "step": 12354 }, { "epoch": 1.1594406906906907, "grad_norm": 0.849009535342187, "learning_rate": 7.702013987240477e-06, "loss": 0.3651, "step": 12355 }, { "epoch": 1.1595345345345345, "grad_norm": 1.148111198910274, "learning_rate": 7.70155459253554e-06, "loss": 0.4212, "step": 12356 }, { "epoch": 1.1596283783783783, "grad_norm": 1.6267435746981749, "learning_rate": 7.701095165619652e-06, "loss": 0.4014, "step": 12357 }, { "epoch": 1.1597222222222223, "grad_norm": 0.8645091156541894, "learning_rate": 7.700635706498287e-06, "loss": 0.4077, "step": 12358 }, { "epoch": 1.1598160660660661, "grad_norm": 1.1367242527915873, "learning_rate": 7.700176215176927e-06, "loss": 0.4311, "step": 12359 }, { "epoch": 1.15990990990991, "grad_norm": 0.9087110918221091, "learning_rate": 7.699716691661046e-06, "loss": 0.4122, "step": 12360 }, { "epoch": 1.1600037537537538, "grad_norm": 0.9115878852875756, "learning_rate": 7.699257135956125e-06, "loss": 0.4866, "step": 12361 }, { "epoch": 1.1600975975975976, "grad_norm": 1.0850668748656442, "learning_rate": 7.698797548067644e-06, "loss": 0.3963, "step": 12362 }, { "epoch": 1.1601914414414414, "grad_norm": 0.8774297219213785, "learning_rate": 7.698337928001083e-06, "loss": 0.3899, "step": 12363 }, { "epoch": 1.1602852852852852, "grad_norm": 1.3224692926989685, "learning_rate": 7.69787827576192e-06, "loss": 0.3769, "step": 12364 }, { "epoch": 1.1603791291291292, "grad_norm": 0.9354020859809488, "learning_rate": 7.697418591355637e-06, "loss": 0.3844, "step": 12365 }, { "epoch": 1.160472972972973, "grad_norm": 1.0306590811125043, "learning_rate": 7.696958874787714e-06, "loss": 0.3864, "step": 12366 }, { "epoch": 1.1605668168168168, "grad_norm": 1.304753178378889, "learning_rate": 7.696499126063635e-06, "loss": 0.388, "step": 12367 }, { "epoch": 1.1606606606606606, "grad_norm": 2.4692772550945508, "learning_rate": 7.69603934518888e-06, "loss": 0.4103, "step": 12368 }, { "epoch": 1.1607545045045045, "grad_norm": 0.920228452637589, "learning_rate": 7.695579532168928e-06, "loss": 0.4058, "step": 12369 }, { "epoch": 1.1608483483483483, "grad_norm": 0.8163754368514261, "learning_rate": 7.695119687009264e-06, "loss": 0.3818, "step": 12370 }, { "epoch": 1.1609421921921923, "grad_norm": 0.9551937330242677, "learning_rate": 7.694659809715373e-06, "loss": 0.3795, "step": 12371 }, { "epoch": 1.1610360360360361, "grad_norm": 0.8945735094564197, "learning_rate": 7.694199900292733e-06, "loss": 0.4034, "step": 12372 }, { "epoch": 1.16112987987988, "grad_norm": 0.87135725737932, "learning_rate": 7.693739958746832e-06, "loss": 0.3665, "step": 12373 }, { "epoch": 1.1612237237237237, "grad_norm": 1.0479620517200459, "learning_rate": 7.693279985083155e-06, "loss": 0.4481, "step": 12374 }, { "epoch": 1.1613175675675675, "grad_norm": 0.9259748423134417, "learning_rate": 7.69281997930718e-06, "loss": 0.3816, "step": 12375 }, { "epoch": 1.1614114114114114, "grad_norm": 0.8261370399730851, "learning_rate": 7.692359941424397e-06, "loss": 0.3827, "step": 12376 }, { "epoch": 1.1615052552552552, "grad_norm": 1.056286600655177, "learning_rate": 7.691899871440289e-06, "loss": 0.422, "step": 12377 }, { "epoch": 1.161599099099099, "grad_norm": 0.9723650549569408, "learning_rate": 7.691439769360343e-06, "loss": 0.4073, "step": 12378 }, { "epoch": 1.161692942942943, "grad_norm": 1.131026130026069, "learning_rate": 7.690979635190043e-06, "loss": 0.4543, "step": 12379 }, { "epoch": 1.1617867867867868, "grad_norm": 1.0061048084713422, "learning_rate": 7.690519468934876e-06, "loss": 0.4007, "step": 12380 }, { "epoch": 1.1618806306306306, "grad_norm": 0.9377033771859958, "learning_rate": 7.690059270600329e-06, "loss": 0.3709, "step": 12381 }, { "epoch": 1.1619744744744744, "grad_norm": 0.9482889459821129, "learning_rate": 7.68959904019189e-06, "loss": 0.4099, "step": 12382 }, { "epoch": 1.1620683183183182, "grad_norm": 0.9915886514875885, "learning_rate": 7.689138777715043e-06, "loss": 0.4193, "step": 12383 }, { "epoch": 1.1621621621621623, "grad_norm": 0.9658913351298636, "learning_rate": 7.68867848317528e-06, "loss": 0.3926, "step": 12384 }, { "epoch": 1.162256006006006, "grad_norm": 0.8925536090100835, "learning_rate": 7.688218156578084e-06, "loss": 0.3985, "step": 12385 }, { "epoch": 1.16234984984985, "grad_norm": 0.8370454654421907, "learning_rate": 7.68775779792895e-06, "loss": 0.3699, "step": 12386 }, { "epoch": 1.1624436936936937, "grad_norm": 1.0556998318516075, "learning_rate": 7.687297407233363e-06, "loss": 0.378, "step": 12387 }, { "epoch": 1.1625375375375375, "grad_norm": 1.1715679802582224, "learning_rate": 7.68683698449681e-06, "loss": 0.4346, "step": 12388 }, { "epoch": 1.1626313813813813, "grad_norm": 0.8939216753248138, "learning_rate": 7.686376529724785e-06, "loss": 0.3397, "step": 12389 }, { "epoch": 1.1627252252252251, "grad_norm": 0.8200807428457852, "learning_rate": 7.685916042922777e-06, "loss": 0.383, "step": 12390 }, { "epoch": 1.162819069069069, "grad_norm": 1.1079085838928269, "learning_rate": 7.685455524096274e-06, "loss": 0.4016, "step": 12391 }, { "epoch": 1.162912912912913, "grad_norm": 0.9351733181078892, "learning_rate": 7.68499497325077e-06, "loss": 0.4025, "step": 12392 }, { "epoch": 1.1630067567567568, "grad_norm": 1.0092805619611964, "learning_rate": 7.684534390391756e-06, "loss": 0.419, "step": 12393 }, { "epoch": 1.1631006006006006, "grad_norm": 0.901303016591015, "learning_rate": 7.684073775524721e-06, "loss": 0.4106, "step": 12394 }, { "epoch": 1.1631944444444444, "grad_norm": 0.8495141086485914, "learning_rate": 7.68361312865516e-06, "loss": 0.4072, "step": 12395 }, { "epoch": 1.1632882882882882, "grad_norm": 1.0820043455335422, "learning_rate": 7.683152449788562e-06, "loss": 0.4025, "step": 12396 }, { "epoch": 1.163382132132132, "grad_norm": 1.081035875527371, "learning_rate": 7.68269173893042e-06, "loss": 0.3985, "step": 12397 }, { "epoch": 1.163475975975976, "grad_norm": 0.9864143076355856, "learning_rate": 7.682230996086232e-06, "loss": 0.4384, "step": 12398 }, { "epoch": 1.1635698198198199, "grad_norm": 0.8565896909969797, "learning_rate": 7.681770221261489e-06, "loss": 0.4101, "step": 12399 }, { "epoch": 1.1636636636636637, "grad_norm": 0.944889395974154, "learning_rate": 7.681309414461681e-06, "loss": 0.3803, "step": 12400 }, { "epoch": 1.1637575075075075, "grad_norm": 1.155754981740081, "learning_rate": 7.680848575692308e-06, "loss": 0.4411, "step": 12401 }, { "epoch": 1.1638513513513513, "grad_norm": 1.38965969643471, "learning_rate": 7.680387704958859e-06, "loss": 0.3503, "step": 12402 }, { "epoch": 1.1639451951951951, "grad_norm": 0.9192428006050496, "learning_rate": 7.679926802266833e-06, "loss": 0.4058, "step": 12403 }, { "epoch": 1.164039039039039, "grad_norm": 1.081087197247162, "learning_rate": 7.679465867621725e-06, "loss": 0.3952, "step": 12404 }, { "epoch": 1.164132882882883, "grad_norm": 0.9031208028571192, "learning_rate": 7.67900490102903e-06, "loss": 0.4017, "step": 12405 }, { "epoch": 1.1642267267267268, "grad_norm": 0.8935334072664097, "learning_rate": 7.678543902494244e-06, "loss": 0.3654, "step": 12406 }, { "epoch": 1.1643205705705706, "grad_norm": 0.9877297110309801, "learning_rate": 7.678082872022865e-06, "loss": 0.3702, "step": 12407 }, { "epoch": 1.1644144144144144, "grad_norm": 0.9279628852387726, "learning_rate": 7.677621809620386e-06, "loss": 0.394, "step": 12408 }, { "epoch": 1.1645082582582582, "grad_norm": 0.9848308835241467, "learning_rate": 7.67716071529231e-06, "loss": 0.3993, "step": 12409 }, { "epoch": 1.164602102102102, "grad_norm": 0.8974212530132891, "learning_rate": 7.67669958904413e-06, "loss": 0.3728, "step": 12410 }, { "epoch": 1.164695945945946, "grad_norm": 1.3462867179291143, "learning_rate": 7.676238430881346e-06, "loss": 0.4234, "step": 12411 }, { "epoch": 1.1647897897897899, "grad_norm": 1.0744938350332338, "learning_rate": 7.675777240809455e-06, "loss": 0.3998, "step": 12412 }, { "epoch": 1.1648836336336337, "grad_norm": 0.9508780747914398, "learning_rate": 7.675316018833958e-06, "loss": 0.4166, "step": 12413 }, { "epoch": 1.1649774774774775, "grad_norm": 0.8409399964467, "learning_rate": 7.674854764960353e-06, "loss": 0.3471, "step": 12414 }, { "epoch": 1.1650713213213213, "grad_norm": 0.9718404476672826, "learning_rate": 7.674393479194141e-06, "loss": 0.3681, "step": 12415 }, { "epoch": 1.165165165165165, "grad_norm": 0.7958270436771927, "learning_rate": 7.67393216154082e-06, "loss": 0.4063, "step": 12416 }, { "epoch": 1.165259009009009, "grad_norm": 0.8117521421273332, "learning_rate": 7.673470812005892e-06, "loss": 0.3598, "step": 12417 }, { "epoch": 1.165352852852853, "grad_norm": 0.9252851375103109, "learning_rate": 7.673009430594856e-06, "loss": 0.384, "step": 12418 }, { "epoch": 1.1654466966966968, "grad_norm": 0.8378699912300066, "learning_rate": 7.672548017313216e-06, "loss": 0.3987, "step": 12419 }, { "epoch": 1.1655405405405406, "grad_norm": 0.8411957853129527, "learning_rate": 7.672086572166467e-06, "loss": 0.3803, "step": 12420 }, { "epoch": 1.1656343843843844, "grad_norm": 0.907509464628287, "learning_rate": 7.671625095160118e-06, "loss": 0.419, "step": 12421 }, { "epoch": 1.1657282282282282, "grad_norm": 1.0909453729178507, "learning_rate": 7.671163586299668e-06, "loss": 0.3783, "step": 12422 }, { "epoch": 1.165822072072072, "grad_norm": 0.9810062691776218, "learning_rate": 7.670702045590621e-06, "loss": 0.4656, "step": 12423 }, { "epoch": 1.165915915915916, "grad_norm": 0.8416705313531567, "learning_rate": 7.67024047303848e-06, "loss": 0.4269, "step": 12424 }, { "epoch": 1.1660097597597598, "grad_norm": 0.9266613833875866, "learning_rate": 7.669778868648746e-06, "loss": 0.3868, "step": 12425 }, { "epoch": 1.1661036036036037, "grad_norm": 0.8897320074573009, "learning_rate": 7.669317232426924e-06, "loss": 0.3806, "step": 12426 }, { "epoch": 1.1661974474474475, "grad_norm": 1.1161427425595112, "learning_rate": 7.66885556437852e-06, "loss": 0.4501, "step": 12427 }, { "epoch": 1.1662912912912913, "grad_norm": 1.0114164456328676, "learning_rate": 7.668393864509035e-06, "loss": 0.366, "step": 12428 }, { "epoch": 1.166385135135135, "grad_norm": 1.2396169087673972, "learning_rate": 7.667932132823978e-06, "loss": 0.4082, "step": 12429 }, { "epoch": 1.166478978978979, "grad_norm": 0.8868449011200592, "learning_rate": 7.66747036932885e-06, "loss": 0.4362, "step": 12430 }, { "epoch": 1.1665728228228227, "grad_norm": 0.8913342656301277, "learning_rate": 7.667008574029161e-06, "loss": 0.4068, "step": 12431 }, { "epoch": 1.1666666666666667, "grad_norm": 0.9797629810692939, "learning_rate": 7.666546746930413e-06, "loss": 0.4061, "step": 12432 }, { "epoch": 1.1667605105105106, "grad_norm": 1.2332921973026436, "learning_rate": 7.666084888038114e-06, "loss": 0.3973, "step": 12433 }, { "epoch": 1.1668543543543544, "grad_norm": 1.1850341472978365, "learning_rate": 7.665622997357773e-06, "loss": 0.4335, "step": 12434 }, { "epoch": 1.1669481981981982, "grad_norm": 0.9349124303988692, "learning_rate": 7.665161074894896e-06, "loss": 0.4195, "step": 12435 }, { "epoch": 1.167042042042042, "grad_norm": 0.8345856901188081, "learning_rate": 7.664699120654988e-06, "loss": 0.3399, "step": 12436 }, { "epoch": 1.1671358858858858, "grad_norm": 1.4613597007919543, "learning_rate": 7.66423713464356e-06, "loss": 0.4161, "step": 12437 }, { "epoch": 1.1672297297297298, "grad_norm": 0.9046241441115414, "learning_rate": 7.663775116866116e-06, "loss": 0.3997, "step": 12438 }, { "epoch": 1.1673235735735736, "grad_norm": 0.7428379350495389, "learning_rate": 7.663313067328171e-06, "loss": 0.3818, "step": 12439 }, { "epoch": 1.1674174174174174, "grad_norm": 1.0265806156884256, "learning_rate": 7.66285098603523e-06, "loss": 0.4154, "step": 12440 }, { "epoch": 1.1675112612612613, "grad_norm": 1.2132635364687387, "learning_rate": 7.662388872992803e-06, "loss": 0.4113, "step": 12441 }, { "epoch": 1.167605105105105, "grad_norm": 0.9424717383149618, "learning_rate": 7.661926728206401e-06, "loss": 0.422, "step": 12442 }, { "epoch": 1.1676989489489489, "grad_norm": 0.8344679192983102, "learning_rate": 7.661464551681533e-06, "loss": 0.3577, "step": 12443 }, { "epoch": 1.1677927927927927, "grad_norm": 1.0176686591532351, "learning_rate": 7.661002343423708e-06, "loss": 0.4227, "step": 12444 }, { "epoch": 1.1678866366366367, "grad_norm": 0.9484314591867388, "learning_rate": 7.66054010343844e-06, "loss": 0.4184, "step": 12445 }, { "epoch": 1.1679804804804805, "grad_norm": 9.110257929472505, "learning_rate": 7.660077831731241e-06, "loss": 0.4185, "step": 12446 }, { "epoch": 1.1680743243243243, "grad_norm": 0.8349562433714987, "learning_rate": 7.65961552830762e-06, "loss": 0.4181, "step": 12447 }, { "epoch": 1.1681681681681682, "grad_norm": 1.0625511084248467, "learning_rate": 7.65915319317309e-06, "loss": 0.3766, "step": 12448 }, { "epoch": 1.168262012012012, "grad_norm": 0.9106660810143334, "learning_rate": 7.658690826333164e-06, "loss": 0.4203, "step": 12449 }, { "epoch": 1.1683558558558558, "grad_norm": 1.4179657078250032, "learning_rate": 7.658228427793352e-06, "loss": 0.4113, "step": 12450 }, { "epoch": 1.1684496996996998, "grad_norm": 0.9825063392114318, "learning_rate": 7.657765997559171e-06, "loss": 0.3726, "step": 12451 }, { "epoch": 1.1685435435435436, "grad_norm": 1.049209142901573, "learning_rate": 7.657303535636133e-06, "loss": 0.4087, "step": 12452 }, { "epoch": 1.1686373873873874, "grad_norm": 0.9557063355503612, "learning_rate": 7.656841042029753e-06, "loss": 0.4237, "step": 12453 }, { "epoch": 1.1687312312312312, "grad_norm": 0.9036502754164873, "learning_rate": 7.656378516745544e-06, "loss": 0.4259, "step": 12454 }, { "epoch": 1.168825075075075, "grad_norm": 1.1423748731609364, "learning_rate": 7.655915959789023e-06, "loss": 0.3883, "step": 12455 }, { "epoch": 1.1689189189189189, "grad_norm": 1.9224045753734167, "learning_rate": 7.6554533711657e-06, "loss": 0.4166, "step": 12456 }, { "epoch": 1.1690127627627627, "grad_norm": 1.031130469463658, "learning_rate": 7.654990750881096e-06, "loss": 0.4149, "step": 12457 }, { "epoch": 1.1691066066066067, "grad_norm": 0.9501434868351689, "learning_rate": 7.654528098940725e-06, "loss": 0.3843, "step": 12458 }, { "epoch": 1.1692004504504505, "grad_norm": 1.0209228795249372, "learning_rate": 7.654065415350103e-06, "loss": 0.4151, "step": 12459 }, { "epoch": 1.1692942942942943, "grad_norm": 0.9902636755245985, "learning_rate": 7.653602700114747e-06, "loss": 0.3929, "step": 12460 }, { "epoch": 1.1693881381381381, "grad_norm": 0.9776639255208536, "learning_rate": 7.653139953240172e-06, "loss": 0.4336, "step": 12461 }, { "epoch": 1.169481981981982, "grad_norm": 0.8850952288109685, "learning_rate": 7.652677174731897e-06, "loss": 0.4198, "step": 12462 }, { "epoch": 1.1695758258258258, "grad_norm": 0.898868785910645, "learning_rate": 7.65221436459544e-06, "loss": 0.4069, "step": 12463 }, { "epoch": 1.1696696696696698, "grad_norm": 0.9176919794197207, "learning_rate": 7.65175152283632e-06, "loss": 0.3694, "step": 12464 }, { "epoch": 1.1697635135135136, "grad_norm": 1.4670938615807914, "learning_rate": 7.651288649460055e-06, "loss": 0.3858, "step": 12465 }, { "epoch": 1.1698573573573574, "grad_norm": 0.8944827279030332, "learning_rate": 7.650825744472163e-06, "loss": 0.3902, "step": 12466 }, { "epoch": 1.1699512012012012, "grad_norm": 0.898083278516934, "learning_rate": 7.650362807878165e-06, "loss": 0.411, "step": 12467 }, { "epoch": 1.170045045045045, "grad_norm": 0.8339460248123199, "learning_rate": 7.649899839683576e-06, "loss": 0.3831, "step": 12468 }, { "epoch": 1.1701388888888888, "grad_norm": 1.3384103581532891, "learning_rate": 7.649436839893922e-06, "loss": 0.4236, "step": 12469 }, { "epoch": 1.1702327327327327, "grad_norm": 0.8709475539704794, "learning_rate": 7.648973808514719e-06, "loss": 0.4056, "step": 12470 }, { "epoch": 1.1703265765765765, "grad_norm": 0.9384223048796712, "learning_rate": 7.64851074555149e-06, "loss": 0.4242, "step": 12471 }, { "epoch": 1.1704204204204205, "grad_norm": 1.0783181814303522, "learning_rate": 7.648047651009757e-06, "loss": 0.3897, "step": 12472 }, { "epoch": 1.1705142642642643, "grad_norm": 0.8644303903062706, "learning_rate": 7.647584524895039e-06, "loss": 0.42, "step": 12473 }, { "epoch": 1.1706081081081081, "grad_norm": 1.0176234133340945, "learning_rate": 7.64712136721286e-06, "loss": 0.3926, "step": 12474 }, { "epoch": 1.170701951951952, "grad_norm": 0.9182722338431145, "learning_rate": 7.64665817796874e-06, "loss": 0.4256, "step": 12475 }, { "epoch": 1.1707957957957957, "grad_norm": 2.138634577397713, "learning_rate": 7.646194957168205e-06, "loss": 0.4008, "step": 12476 }, { "epoch": 1.1708896396396395, "grad_norm": 1.0100360458702413, "learning_rate": 7.645731704816773e-06, "loss": 0.3959, "step": 12477 }, { "epoch": 1.1709834834834836, "grad_norm": 1.0966041895689569, "learning_rate": 7.645268420919974e-06, "loss": 0.4142, "step": 12478 }, { "epoch": 1.1710773273273274, "grad_norm": 1.0082709027288728, "learning_rate": 7.644805105483326e-06, "loss": 0.3892, "step": 12479 }, { "epoch": 1.1711711711711712, "grad_norm": 0.8801356990885832, "learning_rate": 7.644341758512356e-06, "loss": 0.4009, "step": 12480 }, { "epoch": 1.171265015015015, "grad_norm": 0.9627979153250146, "learning_rate": 7.643878380012588e-06, "loss": 0.3959, "step": 12481 }, { "epoch": 1.1713588588588588, "grad_norm": 1.0482814294218294, "learning_rate": 7.643414969989546e-06, "loss": 0.3959, "step": 12482 }, { "epoch": 1.1714527027027026, "grad_norm": 1.1352491424205584, "learning_rate": 7.642951528448757e-06, "loss": 0.3983, "step": 12483 }, { "epoch": 1.1715465465465464, "grad_norm": 0.9506644680529382, "learning_rate": 7.642488055395747e-06, "loss": 0.4457, "step": 12484 }, { "epoch": 1.1716403903903905, "grad_norm": 1.012723514715533, "learning_rate": 7.64202455083604e-06, "loss": 0.4384, "step": 12485 }, { "epoch": 1.1717342342342343, "grad_norm": 0.994781740243225, "learning_rate": 7.641561014775163e-06, "loss": 0.4024, "step": 12486 }, { "epoch": 1.171828078078078, "grad_norm": 0.9103265351066931, "learning_rate": 7.641097447218643e-06, "loss": 0.4054, "step": 12487 }, { "epoch": 1.171921921921922, "grad_norm": 0.9306234568053022, "learning_rate": 7.640633848172008e-06, "loss": 0.429, "step": 12488 }, { "epoch": 1.1720157657657657, "grad_norm": 0.9533755218555854, "learning_rate": 7.640170217640784e-06, "loss": 0.468, "step": 12489 }, { "epoch": 1.1721096096096095, "grad_norm": 0.9275232132778828, "learning_rate": 7.639706555630502e-06, "loss": 0.3632, "step": 12490 }, { "epoch": 1.1722034534534536, "grad_norm": 1.0082907723929717, "learning_rate": 7.639242862146686e-06, "loss": 0.4035, "step": 12491 }, { "epoch": 1.1722972972972974, "grad_norm": 1.0237069803228314, "learning_rate": 7.638779137194868e-06, "loss": 0.436, "step": 12492 }, { "epoch": 1.1723911411411412, "grad_norm": 0.9237902947268899, "learning_rate": 7.638315380780574e-06, "loss": 0.4013, "step": 12493 }, { "epoch": 1.172484984984985, "grad_norm": 1.2154313023327559, "learning_rate": 7.637851592909336e-06, "loss": 0.4274, "step": 12494 }, { "epoch": 1.1725788288288288, "grad_norm": 1.1194538679955153, "learning_rate": 7.637387773586682e-06, "loss": 0.4549, "step": 12495 }, { "epoch": 1.1726726726726726, "grad_norm": 0.9280914651854604, "learning_rate": 7.636923922818145e-06, "loss": 0.3563, "step": 12496 }, { "epoch": 1.1727665165165164, "grad_norm": 1.0484127633199647, "learning_rate": 7.636460040609254e-06, "loss": 0.4229, "step": 12497 }, { "epoch": 1.1728603603603605, "grad_norm": 0.8974991377187315, "learning_rate": 7.635996126965536e-06, "loss": 0.4089, "step": 12498 }, { "epoch": 1.1729542042042043, "grad_norm": 0.9497400167935277, "learning_rate": 7.63553218189253e-06, "loss": 0.419, "step": 12499 }, { "epoch": 1.173048048048048, "grad_norm": 1.3653891487027974, "learning_rate": 7.635068205395762e-06, "loss": 0.4862, "step": 12500 }, { "epoch": 1.1731418918918919, "grad_norm": 0.9595397677884888, "learning_rate": 7.634604197480765e-06, "loss": 0.4297, "step": 12501 }, { "epoch": 1.1732357357357357, "grad_norm": 0.9697349654575745, "learning_rate": 7.634140158153074e-06, "loss": 0.4065, "step": 12502 }, { "epoch": 1.1733295795795795, "grad_norm": 2.0723557083898694, "learning_rate": 7.633676087418219e-06, "loss": 0.4163, "step": 12503 }, { "epoch": 1.1734234234234235, "grad_norm": 1.0536012893439473, "learning_rate": 7.633211985281735e-06, "loss": 0.4551, "step": 12504 }, { "epoch": 1.1735172672672673, "grad_norm": 0.9264623646743662, "learning_rate": 7.632747851749151e-06, "loss": 0.4381, "step": 12505 }, { "epoch": 1.1736111111111112, "grad_norm": 1.035503475097587, "learning_rate": 7.632283686826008e-06, "loss": 0.4135, "step": 12506 }, { "epoch": 1.173704954954955, "grad_norm": 1.0010166124973137, "learning_rate": 7.631819490517836e-06, "loss": 0.4312, "step": 12507 }, { "epoch": 1.1737987987987988, "grad_norm": 2.523939731513988, "learning_rate": 7.63135526283017e-06, "loss": 0.4211, "step": 12508 }, { "epoch": 1.1738926426426426, "grad_norm": 0.8741318038294734, "learning_rate": 7.630891003768545e-06, "loss": 0.4109, "step": 12509 }, { "epoch": 1.1739864864864864, "grad_norm": 1.3493672769805702, "learning_rate": 7.630426713338499e-06, "loss": 0.4127, "step": 12510 }, { "epoch": 1.1740803303303302, "grad_norm": 0.9888754775385491, "learning_rate": 7.629962391545561e-06, "loss": 0.3913, "step": 12511 }, { "epoch": 1.1741741741741742, "grad_norm": 0.8998897339458128, "learning_rate": 7.629498038395278e-06, "loss": 0.3837, "step": 12512 }, { "epoch": 1.174268018018018, "grad_norm": 1.1915228966547047, "learning_rate": 7.629033653893176e-06, "loss": 0.4523, "step": 12513 }, { "epoch": 1.1743618618618619, "grad_norm": 1.01308899169118, "learning_rate": 7.628569238044798e-06, "loss": 0.4042, "step": 12514 }, { "epoch": 1.1744557057057057, "grad_norm": 1.053353803486893, "learning_rate": 7.628104790855681e-06, "loss": 0.4389, "step": 12515 }, { "epoch": 1.1745495495495495, "grad_norm": 0.8997207045782438, "learning_rate": 7.6276403123313594e-06, "loss": 0.4377, "step": 12516 }, { "epoch": 1.1746433933933933, "grad_norm": 0.8818139531603236, "learning_rate": 7.627175802477373e-06, "loss": 0.3825, "step": 12517 }, { "epoch": 1.1747372372372373, "grad_norm": 0.9670784138240965, "learning_rate": 7.626711261299262e-06, "loss": 0.3453, "step": 12518 }, { "epoch": 1.1748310810810811, "grad_norm": 0.9808722258644635, "learning_rate": 7.626246688802561e-06, "loss": 0.4045, "step": 12519 }, { "epoch": 1.174924924924925, "grad_norm": 0.9488951810379195, "learning_rate": 7.625782084992812e-06, "loss": 0.3879, "step": 12520 }, { "epoch": 1.1750187687687688, "grad_norm": 1.0328449270634725, "learning_rate": 7.6253174498755565e-06, "loss": 0.4334, "step": 12521 }, { "epoch": 1.1751126126126126, "grad_norm": 1.006666393322453, "learning_rate": 7.624852783456331e-06, "loss": 0.3839, "step": 12522 }, { "epoch": 1.1752064564564564, "grad_norm": 0.9121391222090774, "learning_rate": 7.624388085740676e-06, "loss": 0.3909, "step": 12523 }, { "epoch": 1.1753003003003002, "grad_norm": 1.0588593717237462, "learning_rate": 7.623923356734134e-06, "loss": 0.4418, "step": 12524 }, { "epoch": 1.1753941441441442, "grad_norm": 0.8537939691602039, "learning_rate": 7.623458596442244e-06, "loss": 0.3548, "step": 12525 }, { "epoch": 1.175487987987988, "grad_norm": 0.95786837170931, "learning_rate": 7.622993804870549e-06, "loss": 0.3994, "step": 12526 }, { "epoch": 1.1755818318318318, "grad_norm": 0.9007785532086439, "learning_rate": 7.622528982024591e-06, "loss": 0.4135, "step": 12527 }, { "epoch": 1.1756756756756757, "grad_norm": 1.1806955163682282, "learning_rate": 7.622064127909909e-06, "loss": 0.458, "step": 12528 }, { "epoch": 1.1757695195195195, "grad_norm": 1.0135569708881174, "learning_rate": 7.62159924253205e-06, "loss": 0.3877, "step": 12529 }, { "epoch": 1.1758633633633633, "grad_norm": 1.2791459142466044, "learning_rate": 7.621134325896553e-06, "loss": 0.4308, "step": 12530 }, { "epoch": 1.1759572072072073, "grad_norm": 0.9346874535733442, "learning_rate": 7.620669378008964e-06, "loss": 0.3926, "step": 12531 }, { "epoch": 1.1760510510510511, "grad_norm": 0.9206486533009313, "learning_rate": 7.6202043988748245e-06, "loss": 0.381, "step": 12532 }, { "epoch": 1.176144894894895, "grad_norm": 0.9469790296119511, "learning_rate": 7.619739388499681e-06, "loss": 0.445, "step": 12533 }, { "epoch": 1.1762387387387387, "grad_norm": 0.9120906079448783, "learning_rate": 7.619274346889076e-06, "loss": 0.3639, "step": 12534 }, { "epoch": 1.1763325825825826, "grad_norm": 0.8045709507882707, "learning_rate": 7.618809274048554e-06, "loss": 0.4329, "step": 12535 }, { "epoch": 1.1764264264264264, "grad_norm": 0.9820858075365645, "learning_rate": 7.618344169983663e-06, "loss": 0.4251, "step": 12536 }, { "epoch": 1.1765202702702702, "grad_norm": 0.812092458594256, "learning_rate": 7.6178790346999435e-06, "loss": 0.3988, "step": 12537 }, { "epoch": 1.1766141141141142, "grad_norm": 0.9503843535335262, "learning_rate": 7.617413868202945e-06, "loss": 0.4188, "step": 12538 }, { "epoch": 1.176707957957958, "grad_norm": 0.9427663836575179, "learning_rate": 7.616948670498213e-06, "loss": 0.4159, "step": 12539 }, { "epoch": 1.1768018018018018, "grad_norm": 0.8996216653074265, "learning_rate": 7.616483441591296e-06, "loss": 0.3895, "step": 12540 }, { "epoch": 1.1768956456456456, "grad_norm": 0.861854244882709, "learning_rate": 7.616018181487736e-06, "loss": 0.4115, "step": 12541 }, { "epoch": 1.1769894894894894, "grad_norm": 1.1473302660455995, "learning_rate": 7.615552890193085e-06, "loss": 0.4075, "step": 12542 }, { "epoch": 1.1770833333333333, "grad_norm": 0.9064532480580839, "learning_rate": 7.615087567712888e-06, "loss": 0.396, "step": 12543 }, { "epoch": 1.1771771771771773, "grad_norm": 0.9848725808766531, "learning_rate": 7.614622214052694e-06, "loss": 0.3908, "step": 12544 }, { "epoch": 1.177271021021021, "grad_norm": 1.0729154155616885, "learning_rate": 7.614156829218053e-06, "loss": 0.4326, "step": 12545 }, { "epoch": 1.177364864864865, "grad_norm": 0.881942458076581, "learning_rate": 7.6136914132145125e-06, "loss": 0.4193, "step": 12546 }, { "epoch": 1.1774587087087087, "grad_norm": 0.8793879517390635, "learning_rate": 7.613225966047619e-06, "loss": 0.3736, "step": 12547 }, { "epoch": 1.1775525525525525, "grad_norm": 0.899220323513135, "learning_rate": 7.612760487722928e-06, "loss": 0.4, "step": 12548 }, { "epoch": 1.1776463963963963, "grad_norm": 0.7908574916495884, "learning_rate": 7.6122949782459844e-06, "loss": 0.3778, "step": 12549 }, { "epoch": 1.1777402402402402, "grad_norm": 1.019540151461237, "learning_rate": 7.61182943762234e-06, "loss": 0.4038, "step": 12550 }, { "epoch": 1.177834084084084, "grad_norm": 1.0932379933934733, "learning_rate": 7.611363865857548e-06, "loss": 0.4691, "step": 12551 }, { "epoch": 1.177927927927928, "grad_norm": 0.8675478719584184, "learning_rate": 7.610898262957156e-06, "loss": 0.3777, "step": 12552 }, { "epoch": 1.1780217717717718, "grad_norm": 1.046524851771641, "learning_rate": 7.610432628926716e-06, "loss": 0.36, "step": 12553 }, { "epoch": 1.1781156156156156, "grad_norm": 0.9094943549422028, "learning_rate": 7.609966963771782e-06, "loss": 0.3946, "step": 12554 }, { "epoch": 1.1782094594594594, "grad_norm": 1.024796380996686, "learning_rate": 7.609501267497903e-06, "loss": 0.3916, "step": 12555 }, { "epoch": 1.1783033033033032, "grad_norm": 0.9552416378107147, "learning_rate": 7.6090355401106344e-06, "loss": 0.4159, "step": 12556 }, { "epoch": 1.178397147147147, "grad_norm": 0.9019266098608858, "learning_rate": 7.608569781615528e-06, "loss": 0.3821, "step": 12557 }, { "epoch": 1.178490990990991, "grad_norm": 1.0553287120032981, "learning_rate": 7.608103992018137e-06, "loss": 0.4153, "step": 12558 }, { "epoch": 1.178584834834835, "grad_norm": 1.0197401611897983, "learning_rate": 7.607638171324014e-06, "loss": 0.4101, "step": 12559 }, { "epoch": 1.1786786786786787, "grad_norm": 1.4013974214618994, "learning_rate": 7.607172319538716e-06, "loss": 0.3911, "step": 12560 }, { "epoch": 1.1787725225225225, "grad_norm": 0.9747514193838368, "learning_rate": 7.6067064366677944e-06, "loss": 0.4273, "step": 12561 }, { "epoch": 1.1788663663663663, "grad_norm": 1.0818340537352142, "learning_rate": 7.606240522716805e-06, "loss": 0.421, "step": 12562 }, { "epoch": 1.1789602102102101, "grad_norm": 1.0083893029075732, "learning_rate": 7.605774577691305e-06, "loss": 0.404, "step": 12563 }, { "epoch": 1.179054054054054, "grad_norm": 1.051775343443288, "learning_rate": 7.605308601596846e-06, "loss": 0.4389, "step": 12564 }, { "epoch": 1.179147897897898, "grad_norm": 1.1213668572252773, "learning_rate": 7.6048425944389856e-06, "loss": 0.3981, "step": 12565 }, { "epoch": 1.1792417417417418, "grad_norm": 1.2029445715906109, "learning_rate": 7.604376556223283e-06, "loss": 0.3736, "step": 12566 }, { "epoch": 1.1793355855855856, "grad_norm": 1.04386904606991, "learning_rate": 7.603910486955291e-06, "loss": 0.4447, "step": 12567 }, { "epoch": 1.1794294294294294, "grad_norm": 0.8997479963322043, "learning_rate": 7.603444386640566e-06, "loss": 0.3842, "step": 12568 }, { "epoch": 1.1795232732732732, "grad_norm": 0.9666017097659295, "learning_rate": 7.602978255284669e-06, "loss": 0.3735, "step": 12569 }, { "epoch": 1.179617117117117, "grad_norm": 0.858387189003076, "learning_rate": 7.6025120928931555e-06, "loss": 0.3705, "step": 12570 }, { "epoch": 1.179710960960961, "grad_norm": 1.6880611266479277, "learning_rate": 7.602045899471584e-06, "loss": 0.4215, "step": 12571 }, { "epoch": 1.1798048048048049, "grad_norm": 0.9386398328936928, "learning_rate": 7.601579675025513e-06, "loss": 0.4219, "step": 12572 }, { "epoch": 1.1798986486486487, "grad_norm": 0.9045989964772342, "learning_rate": 7.601113419560501e-06, "loss": 0.4304, "step": 12573 }, { "epoch": 1.1799924924924925, "grad_norm": 0.7987113856119271, "learning_rate": 7.6006471330821075e-06, "loss": 0.4008, "step": 12574 }, { "epoch": 1.1800863363363363, "grad_norm": 0.9646375670401657, "learning_rate": 7.600180815595892e-06, "loss": 0.4328, "step": 12575 }, { "epoch": 1.1801801801801801, "grad_norm": 0.9756672249175441, "learning_rate": 7.599714467107415e-06, "loss": 0.4345, "step": 12576 }, { "epoch": 1.180274024024024, "grad_norm": 0.9003341842518234, "learning_rate": 7.599248087622238e-06, "loss": 0.4578, "step": 12577 }, { "epoch": 1.180367867867868, "grad_norm": 0.9912465755270392, "learning_rate": 7.598781677145919e-06, "loss": 0.4557, "step": 12578 }, { "epoch": 1.1804617117117118, "grad_norm": 0.9121646394025151, "learning_rate": 7.59831523568402e-06, "loss": 0.3815, "step": 12579 }, { "epoch": 1.1805555555555556, "grad_norm": 0.9868918183737747, "learning_rate": 7.597848763242102e-06, "loss": 0.4433, "step": 12580 }, { "epoch": 1.1806493993993994, "grad_norm": 1.0314960119163326, "learning_rate": 7.597382259825727e-06, "loss": 0.4068, "step": 12581 }, { "epoch": 1.1807432432432432, "grad_norm": 0.8825242413494994, "learning_rate": 7.596915725440459e-06, "loss": 0.4132, "step": 12582 }, { "epoch": 1.180837087087087, "grad_norm": 1.0359913222408146, "learning_rate": 7.5964491600918584e-06, "loss": 0.4364, "step": 12583 }, { "epoch": 1.180930930930931, "grad_norm": 0.958903574105536, "learning_rate": 7.59598256378549e-06, "loss": 0.3858, "step": 12584 }, { "epoch": 1.1810247747747749, "grad_norm": 0.8463072821071015, "learning_rate": 7.595515936526915e-06, "loss": 0.4016, "step": 12585 }, { "epoch": 1.1811186186186187, "grad_norm": 0.9196377667176645, "learning_rate": 7.5950492783216976e-06, "loss": 0.386, "step": 12586 }, { "epoch": 1.1812124624624625, "grad_norm": 1.1241166189415928, "learning_rate": 7.594582589175403e-06, "loss": 0.4152, "step": 12587 }, { "epoch": 1.1813063063063063, "grad_norm": 1.1209577901506127, "learning_rate": 7.594115869093594e-06, "loss": 0.4052, "step": 12588 }, { "epoch": 1.18140015015015, "grad_norm": 0.9352239674389871, "learning_rate": 7.593649118081839e-06, "loss": 0.4457, "step": 12589 }, { "epoch": 1.181493993993994, "grad_norm": 0.9125463967947325, "learning_rate": 7.593182336145697e-06, "loss": 0.4221, "step": 12590 }, { "epoch": 1.1815878378378377, "grad_norm": 1.0107782029311325, "learning_rate": 7.592715523290738e-06, "loss": 0.4308, "step": 12591 }, { "epoch": 1.1816816816816818, "grad_norm": 0.8790424486091957, "learning_rate": 7.592248679522527e-06, "loss": 0.3304, "step": 12592 }, { "epoch": 1.1817755255255256, "grad_norm": 0.9125091652372312, "learning_rate": 7.591781804846629e-06, "loss": 0.4114, "step": 12593 }, { "epoch": 1.1818693693693694, "grad_norm": 0.924664838763851, "learning_rate": 7.5913148992686115e-06, "loss": 0.3902, "step": 12594 }, { "epoch": 1.1819632132132132, "grad_norm": 1.7200035509743816, "learning_rate": 7.590847962794044e-06, "loss": 0.3978, "step": 12595 }, { "epoch": 1.182057057057057, "grad_norm": 1.0573184004071516, "learning_rate": 7.590380995428488e-06, "loss": 0.3891, "step": 12596 }, { "epoch": 1.182150900900901, "grad_norm": 0.949916620386552, "learning_rate": 7.589913997177516e-06, "loss": 0.3861, "step": 12597 }, { "epoch": 1.1822447447447448, "grad_norm": 0.9071794874919663, "learning_rate": 7.589446968046694e-06, "loss": 0.3733, "step": 12598 }, { "epoch": 1.1823385885885886, "grad_norm": 0.8742027883915784, "learning_rate": 7.588979908041592e-06, "loss": 0.4247, "step": 12599 }, { "epoch": 1.1824324324324325, "grad_norm": 4.425474410616562, "learning_rate": 7.5885128171677765e-06, "loss": 0.4555, "step": 12600 }, { "epoch": 1.1825262762762763, "grad_norm": 1.7139617873214612, "learning_rate": 7.588045695430819e-06, "loss": 0.3675, "step": 12601 }, { "epoch": 1.18262012012012, "grad_norm": 0.9294076517275297, "learning_rate": 7.5875785428362895e-06, "loss": 0.4363, "step": 12602 }, { "epoch": 1.1827139639639639, "grad_norm": 1.0380287767212932, "learning_rate": 7.587111359389754e-06, "loss": 0.4463, "step": 12603 }, { "epoch": 1.1828078078078077, "grad_norm": 1.4379037797193455, "learning_rate": 7.586644145096786e-06, "loss": 0.3948, "step": 12604 }, { "epoch": 1.1829016516516517, "grad_norm": 1.0451646150414553, "learning_rate": 7.586176899962956e-06, "loss": 0.4577, "step": 12605 }, { "epoch": 1.1829954954954955, "grad_norm": 1.1301136735722004, "learning_rate": 7.585709623993833e-06, "loss": 0.4261, "step": 12606 }, { "epoch": 1.1830893393393394, "grad_norm": 1.1447273875715687, "learning_rate": 7.585242317194992e-06, "loss": 0.3879, "step": 12607 }, { "epoch": 1.1831831831831832, "grad_norm": 0.8323118478460193, "learning_rate": 7.584774979572003e-06, "loss": 0.3695, "step": 12608 }, { "epoch": 1.183277027027027, "grad_norm": 0.9623345400253709, "learning_rate": 7.584307611130435e-06, "loss": 0.3922, "step": 12609 }, { "epoch": 1.1833708708708708, "grad_norm": 0.9154532960950552, "learning_rate": 7.583840211875864e-06, "loss": 0.4053, "step": 12610 }, { "epoch": 1.1834647147147148, "grad_norm": 0.9366798252984604, "learning_rate": 7.583372781813863e-06, "loss": 0.4181, "step": 12611 }, { "epoch": 1.1835585585585586, "grad_norm": 0.9002373347961188, "learning_rate": 7.582905320950002e-06, "loss": 0.3854, "step": 12612 }, { "epoch": 1.1836524024024024, "grad_norm": 0.9126042949230527, "learning_rate": 7.582437829289859e-06, "loss": 0.3636, "step": 12613 }, { "epoch": 1.1837462462462462, "grad_norm": 0.9390059842101713, "learning_rate": 7.581970306839006e-06, "loss": 0.4462, "step": 12614 }, { "epoch": 1.18384009009009, "grad_norm": 0.9134879621988903, "learning_rate": 7.581502753603016e-06, "loss": 0.4167, "step": 12615 }, { "epoch": 1.1839339339339339, "grad_norm": 0.9420973026482605, "learning_rate": 7.581035169587465e-06, "loss": 0.37, "step": 12616 }, { "epoch": 1.1840277777777777, "grad_norm": 0.9694467728028706, "learning_rate": 7.580567554797929e-06, "loss": 0.4499, "step": 12617 }, { "epoch": 1.1841216216216217, "grad_norm": 1.030574540807265, "learning_rate": 7.5800999092399814e-06, "loss": 0.375, "step": 12618 }, { "epoch": 1.1842154654654655, "grad_norm": 1.0010511952249852, "learning_rate": 7.5796322329192004e-06, "loss": 0.4146, "step": 12619 }, { "epoch": 1.1843093093093093, "grad_norm": 0.8772079304312057, "learning_rate": 7.57916452584116e-06, "loss": 0.3899, "step": 12620 }, { "epoch": 1.1844031531531531, "grad_norm": 1.8099239938547707, "learning_rate": 7.5786967880114355e-06, "loss": 0.4111, "step": 12621 }, { "epoch": 1.184496996996997, "grad_norm": 0.9761942984641567, "learning_rate": 7.578229019435608e-06, "loss": 0.4559, "step": 12622 }, { "epoch": 1.1845908408408408, "grad_norm": 0.8504750805372119, "learning_rate": 7.57776122011925e-06, "loss": 0.4569, "step": 12623 }, { "epoch": 1.1846846846846848, "grad_norm": 1.251687112387361, "learning_rate": 7.577293390067943e-06, "loss": 0.4235, "step": 12624 }, { "epoch": 1.1847785285285286, "grad_norm": 0.9862110551497536, "learning_rate": 7.576825529287264e-06, "loss": 0.3712, "step": 12625 }, { "epoch": 1.1848723723723724, "grad_norm": 1.03681200288813, "learning_rate": 7.576357637782792e-06, "loss": 0.343, "step": 12626 }, { "epoch": 1.1849662162162162, "grad_norm": 0.8242096908878905, "learning_rate": 7.575889715560103e-06, "loss": 0.4083, "step": 12627 }, { "epoch": 1.18506006006006, "grad_norm": 1.2787945280000321, "learning_rate": 7.575421762624777e-06, "loss": 0.4096, "step": 12628 }, { "epoch": 1.1851539039039038, "grad_norm": 1.132008330359979, "learning_rate": 7.574953778982397e-06, "loss": 0.4033, "step": 12629 }, { "epoch": 1.1852477477477477, "grad_norm": 0.9946850217260992, "learning_rate": 7.574485764638537e-06, "loss": 0.3528, "step": 12630 }, { "epoch": 1.1853415915915915, "grad_norm": 1.146004117246741, "learning_rate": 7.5740177195987805e-06, "loss": 0.4295, "step": 12631 }, { "epoch": 1.1854354354354355, "grad_norm": 0.958599576206153, "learning_rate": 7.5735496438687096e-06, "loss": 0.4265, "step": 12632 }, { "epoch": 1.1855292792792793, "grad_norm": 0.9154241234437465, "learning_rate": 7.573081537453904e-06, "loss": 0.3717, "step": 12633 }, { "epoch": 1.1856231231231231, "grad_norm": 0.799759753635456, "learning_rate": 7.572613400359942e-06, "loss": 0.4075, "step": 12634 }, { "epoch": 1.185716966966967, "grad_norm": 1.0638398421051942, "learning_rate": 7.572145232592409e-06, "loss": 0.3678, "step": 12635 }, { "epoch": 1.1858108108108107, "grad_norm": 5.174327885937696, "learning_rate": 7.571677034156884e-06, "loss": 0.4194, "step": 12636 }, { "epoch": 1.1859046546546548, "grad_norm": 0.8747889836124172, "learning_rate": 7.571208805058953e-06, "loss": 0.447, "step": 12637 }, { "epoch": 1.1859984984984986, "grad_norm": 0.9377478610251936, "learning_rate": 7.570740545304196e-06, "loss": 0.3832, "step": 12638 }, { "epoch": 1.1860923423423424, "grad_norm": 0.9095957593113214, "learning_rate": 7.570272254898198e-06, "loss": 0.4473, "step": 12639 }, { "epoch": 1.1861861861861862, "grad_norm": 0.9164242993080547, "learning_rate": 7.5698039338465405e-06, "loss": 0.4074, "step": 12640 }, { "epoch": 1.18628003003003, "grad_norm": 1.0198291953209555, "learning_rate": 7.569335582154809e-06, "loss": 0.3491, "step": 12641 }, { "epoch": 1.1863738738738738, "grad_norm": 0.8814663947848247, "learning_rate": 7.568867199828584e-06, "loss": 0.4298, "step": 12642 }, { "epoch": 1.1864677177177176, "grad_norm": 0.7917923742184971, "learning_rate": 7.568398786873455e-06, "loss": 0.3821, "step": 12643 }, { "epoch": 1.1865615615615615, "grad_norm": 0.9217842654419331, "learning_rate": 7.567930343295007e-06, "loss": 0.3961, "step": 12644 }, { "epoch": 1.1866554054054055, "grad_norm": 0.9858798819208177, "learning_rate": 7.567461869098822e-06, "loss": 0.4275, "step": 12645 }, { "epoch": 1.1867492492492493, "grad_norm": 1.3444746407734496, "learning_rate": 7.566993364290485e-06, "loss": 0.4174, "step": 12646 }, { "epoch": 1.186843093093093, "grad_norm": 1.0102999897361258, "learning_rate": 7.566524828875587e-06, "loss": 0.4444, "step": 12647 }, { "epoch": 1.186936936936937, "grad_norm": 0.9815177297775768, "learning_rate": 7.566056262859709e-06, "loss": 0.39, "step": 12648 }, { "epoch": 1.1870307807807807, "grad_norm": 1.0281707626040983, "learning_rate": 7.56558766624844e-06, "loss": 0.3981, "step": 12649 }, { "epoch": 1.1871246246246245, "grad_norm": 2.222793639496535, "learning_rate": 7.5651190390473685e-06, "loss": 0.412, "step": 12650 }, { "epoch": 1.1872184684684686, "grad_norm": 0.8837531664999808, "learning_rate": 7.564650381262082e-06, "loss": 0.4325, "step": 12651 }, { "epoch": 1.1873123123123124, "grad_norm": 0.94590217741639, "learning_rate": 7.5641816928981645e-06, "loss": 0.3977, "step": 12652 }, { "epoch": 1.1874061561561562, "grad_norm": 0.9235119427954821, "learning_rate": 7.563712973961209e-06, "loss": 0.3908, "step": 12653 }, { "epoch": 1.1875, "grad_norm": 0.987945772739143, "learning_rate": 7.563244224456801e-06, "loss": 0.4313, "step": 12654 }, { "epoch": 1.1875938438438438, "grad_norm": 1.0621044457332247, "learning_rate": 7.56277544439053e-06, "loss": 0.4194, "step": 12655 }, { "epoch": 1.1876876876876876, "grad_norm": 0.9754034785511273, "learning_rate": 7.562306633767986e-06, "loss": 0.4274, "step": 12656 }, { "epoch": 1.1877815315315314, "grad_norm": 1.6564499662474503, "learning_rate": 7.5618377925947595e-06, "loss": 0.4574, "step": 12657 }, { "epoch": 1.1878753753753755, "grad_norm": 0.9810351179703299, "learning_rate": 7.5613689208764394e-06, "loss": 0.3823, "step": 12658 }, { "epoch": 1.1879692192192193, "grad_norm": 1.1396679405037105, "learning_rate": 7.560900018618616e-06, "loss": 0.384, "step": 12659 }, { "epoch": 1.188063063063063, "grad_norm": 1.1644697352449929, "learning_rate": 7.560431085826879e-06, "loss": 0.4153, "step": 12660 }, { "epoch": 1.188156906906907, "grad_norm": 0.9019466173573044, "learning_rate": 7.5599621225068205e-06, "loss": 0.4437, "step": 12661 }, { "epoch": 1.1882507507507507, "grad_norm": 1.9730103560462073, "learning_rate": 7.559493128664035e-06, "loss": 0.4115, "step": 12662 }, { "epoch": 1.1883445945945945, "grad_norm": 1.5759940305060716, "learning_rate": 7.55902410430411e-06, "loss": 0.412, "step": 12663 }, { "epoch": 1.1884384384384385, "grad_norm": 0.9207303772093293, "learning_rate": 7.558555049432639e-06, "loss": 0.3955, "step": 12664 }, { "epoch": 1.1885322822822824, "grad_norm": 0.9646335475848121, "learning_rate": 7.558085964055217e-06, "loss": 0.3872, "step": 12665 }, { "epoch": 1.1886261261261262, "grad_norm": 1.197719712896579, "learning_rate": 7.557616848177433e-06, "loss": 0.4051, "step": 12666 }, { "epoch": 1.18871996996997, "grad_norm": 1.0800651602294888, "learning_rate": 7.557147701804882e-06, "loss": 0.3981, "step": 12667 }, { "epoch": 1.1888138138138138, "grad_norm": 0.9561473882605284, "learning_rate": 7.55667852494316e-06, "loss": 0.3931, "step": 12668 }, { "epoch": 1.1889076576576576, "grad_norm": 1.6184835847408243, "learning_rate": 7.556209317597858e-06, "loss": 0.4116, "step": 12669 }, { "epoch": 1.1890015015015014, "grad_norm": 2.6172239977909237, "learning_rate": 7.5557400797745716e-06, "loss": 0.3992, "step": 12670 }, { "epoch": 1.1890953453453452, "grad_norm": 0.8995329517337278, "learning_rate": 7.555270811478895e-06, "loss": 0.4136, "step": 12671 }, { "epoch": 1.1891891891891893, "grad_norm": 1.4237570519991507, "learning_rate": 7.5548015127164235e-06, "loss": 0.3991, "step": 12672 }, { "epoch": 1.189283033033033, "grad_norm": 0.9122272605084287, "learning_rate": 7.554332183492753e-06, "loss": 0.4243, "step": 12673 }, { "epoch": 1.1893768768768769, "grad_norm": 0.937327804777484, "learning_rate": 7.55386282381348e-06, "loss": 0.4332, "step": 12674 }, { "epoch": 1.1894707207207207, "grad_norm": 1.0740797796757635, "learning_rate": 7.5533934336842e-06, "loss": 0.3913, "step": 12675 }, { "epoch": 1.1895645645645645, "grad_norm": 2.6726340760858034, "learning_rate": 7.55292401311051e-06, "loss": 0.3844, "step": 12676 }, { "epoch": 1.1896584084084085, "grad_norm": 0.9418387926776493, "learning_rate": 7.552454562098006e-06, "loss": 0.3572, "step": 12677 }, { "epoch": 1.1897522522522523, "grad_norm": 1.0346811051733584, "learning_rate": 7.551985080652284e-06, "loss": 0.416, "step": 12678 }, { "epoch": 1.1898460960960962, "grad_norm": 1.0575169675185396, "learning_rate": 7.551515568778944e-06, "loss": 0.4039, "step": 12679 }, { "epoch": 1.18993993993994, "grad_norm": 1.3147894660731854, "learning_rate": 7.5510460264835864e-06, "loss": 0.4126, "step": 12680 }, { "epoch": 1.1900337837837838, "grad_norm": 0.9944394866833131, "learning_rate": 7.550576453771805e-06, "loss": 0.4072, "step": 12681 }, { "epoch": 1.1901276276276276, "grad_norm": 0.9409365631940565, "learning_rate": 7.5501068506492e-06, "loss": 0.4129, "step": 12682 }, { "epoch": 1.1902214714714714, "grad_norm": 0.9379127016709315, "learning_rate": 7.5496372171213725e-06, "loss": 0.3487, "step": 12683 }, { "epoch": 1.1903153153153152, "grad_norm": 1.1228681751767464, "learning_rate": 7.549167553193919e-06, "loss": 0.4121, "step": 12684 }, { "epoch": 1.1904091591591592, "grad_norm": 0.9172692989432878, "learning_rate": 7.5486978588724404e-06, "loss": 0.4181, "step": 12685 }, { "epoch": 1.190503003003003, "grad_norm": 0.9203285102475713, "learning_rate": 7.548228134162539e-06, "loss": 0.3562, "step": 12686 }, { "epoch": 1.1905968468468469, "grad_norm": 1.0152016138053055, "learning_rate": 7.547758379069811e-06, "loss": 0.4168, "step": 12687 }, { "epoch": 1.1906906906906907, "grad_norm": 1.0681749586174691, "learning_rate": 7.547288593599862e-06, "loss": 0.4476, "step": 12688 }, { "epoch": 1.1907845345345345, "grad_norm": 0.9558637465737857, "learning_rate": 7.546818777758291e-06, "loss": 0.4123, "step": 12689 }, { "epoch": 1.1908783783783783, "grad_norm": 0.9347437336995874, "learning_rate": 7.546348931550699e-06, "loss": 0.4387, "step": 12690 }, { "epoch": 1.1909722222222223, "grad_norm": 0.8679782138255988, "learning_rate": 7.54587905498269e-06, "loss": 0.3917, "step": 12691 }, { "epoch": 1.1910660660660661, "grad_norm": 1.2808236023529542, "learning_rate": 7.545409148059865e-06, "loss": 0.4074, "step": 12692 }, { "epoch": 1.19115990990991, "grad_norm": 0.8928577015398619, "learning_rate": 7.544939210787827e-06, "loss": 0.3949, "step": 12693 }, { "epoch": 1.1912537537537538, "grad_norm": 0.9629332914921361, "learning_rate": 7.544469243172179e-06, "loss": 0.3842, "step": 12694 }, { "epoch": 1.1913475975975976, "grad_norm": 0.8494886198682412, "learning_rate": 7.543999245218526e-06, "loss": 0.3688, "step": 12695 }, { "epoch": 1.1914414414414414, "grad_norm": 0.918577672644866, "learning_rate": 7.543529216932471e-06, "loss": 0.4071, "step": 12696 }, { "epoch": 1.1915352852852852, "grad_norm": 0.8429490461833371, "learning_rate": 7.5430591583196144e-06, "loss": 0.4173, "step": 12697 }, { "epoch": 1.1916291291291292, "grad_norm": 0.9735792522872707, "learning_rate": 7.542589069385567e-06, "loss": 0.3937, "step": 12698 }, { "epoch": 1.191722972972973, "grad_norm": 1.0194013525544916, "learning_rate": 7.542118950135931e-06, "loss": 0.4198, "step": 12699 }, { "epoch": 1.1918168168168168, "grad_norm": 0.927322522920999, "learning_rate": 7.541648800576311e-06, "loss": 0.4058, "step": 12700 }, { "epoch": 1.1919106606606606, "grad_norm": 0.9056140888997496, "learning_rate": 7.541178620712313e-06, "loss": 0.4313, "step": 12701 }, { "epoch": 1.1920045045045045, "grad_norm": 0.8797080524421413, "learning_rate": 7.540708410549543e-06, "loss": 0.362, "step": 12702 }, { "epoch": 1.1920983483483483, "grad_norm": 1.7044398824116083, "learning_rate": 7.540238170093608e-06, "loss": 0.3891, "step": 12703 }, { "epoch": 1.1921921921921923, "grad_norm": 1.2859398658453702, "learning_rate": 7.539767899350114e-06, "loss": 0.4394, "step": 12704 }, { "epoch": 1.1922860360360361, "grad_norm": 0.9669842700190304, "learning_rate": 7.5392975983246685e-06, "loss": 0.3891, "step": 12705 }, { "epoch": 1.19237987987988, "grad_norm": 2.9614399432845735, "learning_rate": 7.538827267022879e-06, "loss": 0.3856, "step": 12706 }, { "epoch": 1.1924737237237237, "grad_norm": 0.8905633199225451, "learning_rate": 7.538356905450353e-06, "loss": 0.3882, "step": 12707 }, { "epoch": 1.1925675675675675, "grad_norm": 0.8589054403890245, "learning_rate": 7.5378865136126995e-06, "loss": 0.3552, "step": 12708 }, { "epoch": 1.1926614114114114, "grad_norm": 0.8611610057343987, "learning_rate": 7.5374160915155255e-06, "loss": 0.3681, "step": 12709 }, { "epoch": 1.1927552552552552, "grad_norm": 1.066307653196581, "learning_rate": 7.536945639164441e-06, "loss": 0.4157, "step": 12710 }, { "epoch": 1.192849099099099, "grad_norm": 1.126771014272479, "learning_rate": 7.5364751565650555e-06, "loss": 0.3646, "step": 12711 }, { "epoch": 1.192942942942943, "grad_norm": 0.9178356371509202, "learning_rate": 7.5360046437229786e-06, "loss": 0.352, "step": 12712 }, { "epoch": 1.1930367867867868, "grad_norm": 1.0835322297808798, "learning_rate": 7.53553410064382e-06, "loss": 0.4181, "step": 12713 }, { "epoch": 1.1931306306306306, "grad_norm": 1.1670948998938753, "learning_rate": 7.5350635273331885e-06, "loss": 0.3625, "step": 12714 }, { "epoch": 1.1932244744744744, "grad_norm": 1.156617531246411, "learning_rate": 7.534592923796696e-06, "loss": 0.3537, "step": 12715 }, { "epoch": 1.1933183183183182, "grad_norm": 1.0101647206570712, "learning_rate": 7.534122290039956e-06, "loss": 0.4355, "step": 12716 }, { "epoch": 1.1934121621621623, "grad_norm": 1.1159789999296468, "learning_rate": 7.533651626068576e-06, "loss": 0.4363, "step": 12717 }, { "epoch": 1.193506006006006, "grad_norm": 1.1163400338095943, "learning_rate": 7.533180931888169e-06, "loss": 0.4096, "step": 12718 }, { "epoch": 1.19359984984985, "grad_norm": 0.921830585866795, "learning_rate": 7.53271020750435e-06, "loss": 0.3872, "step": 12719 }, { "epoch": 1.1936936936936937, "grad_norm": 1.163943293244869, "learning_rate": 7.532239452922727e-06, "loss": 0.398, "step": 12720 }, { "epoch": 1.1937875375375375, "grad_norm": 1.007790147294886, "learning_rate": 7.531768668148914e-06, "loss": 0.4203, "step": 12721 }, { "epoch": 1.1938813813813813, "grad_norm": 1.1175442268907896, "learning_rate": 7.531297853188528e-06, "loss": 0.4156, "step": 12722 }, { "epoch": 1.1939752252252251, "grad_norm": 0.9678451434254682, "learning_rate": 7.530827008047178e-06, "loss": 0.4485, "step": 12723 }, { "epoch": 1.194069069069069, "grad_norm": 0.9179175914313137, "learning_rate": 7.530356132730481e-06, "loss": 0.4162, "step": 12724 }, { "epoch": 1.194162912912913, "grad_norm": 0.9776443082858594, "learning_rate": 7.529885227244049e-06, "loss": 0.4245, "step": 12725 }, { "epoch": 1.1942567567567568, "grad_norm": 1.0980112618492297, "learning_rate": 7.529414291593498e-06, "loss": 0.3997, "step": 12726 }, { "epoch": 1.1943506006006006, "grad_norm": 0.9592536246639527, "learning_rate": 7.528943325784441e-06, "loss": 0.4268, "step": 12727 }, { "epoch": 1.1944444444444444, "grad_norm": 1.0150920164640902, "learning_rate": 7.5284723298224985e-06, "loss": 0.445, "step": 12728 }, { "epoch": 1.1945382882882882, "grad_norm": 0.9318329540198473, "learning_rate": 7.52800130371328e-06, "loss": 0.4167, "step": 12729 }, { "epoch": 1.194632132132132, "grad_norm": 1.027325676324435, "learning_rate": 7.527530247462406e-06, "loss": 0.3612, "step": 12730 }, { "epoch": 1.194725975975976, "grad_norm": 1.07193288638976, "learning_rate": 7.527059161075492e-06, "loss": 0.4422, "step": 12731 }, { "epoch": 1.1948198198198199, "grad_norm": 1.1156171838280402, "learning_rate": 7.526588044558152e-06, "loss": 0.4341, "step": 12732 }, { "epoch": 1.1949136636636637, "grad_norm": 0.9753636552002007, "learning_rate": 7.526116897916005e-06, "loss": 0.3779, "step": 12733 }, { "epoch": 1.1950075075075075, "grad_norm": 1.0879343799666552, "learning_rate": 7.5256457211546705e-06, "loss": 0.4421, "step": 12734 }, { "epoch": 1.1951013513513513, "grad_norm": 0.9224007735513002, "learning_rate": 7.525174514279765e-06, "loss": 0.4072, "step": 12735 }, { "epoch": 1.1951951951951951, "grad_norm": 1.113875111818416, "learning_rate": 7.524703277296907e-06, "loss": 0.4392, "step": 12736 }, { "epoch": 1.195289039039039, "grad_norm": 0.9265814880308614, "learning_rate": 7.524232010211714e-06, "loss": 0.396, "step": 12737 }, { "epoch": 1.195382882882883, "grad_norm": 1.1970371526773713, "learning_rate": 7.523760713029805e-06, "loss": 0.3747, "step": 12738 }, { "epoch": 1.1954767267267268, "grad_norm": 1.1875430708177632, "learning_rate": 7.5232893857568e-06, "loss": 0.4463, "step": 12739 }, { "epoch": 1.1955705705705706, "grad_norm": 0.9147659351020241, "learning_rate": 7.52281802839832e-06, "loss": 0.3842, "step": 12740 }, { "epoch": 1.1956644144144144, "grad_norm": 0.927901047573939, "learning_rate": 7.522346640959983e-06, "loss": 0.3866, "step": 12741 }, { "epoch": 1.1957582582582582, "grad_norm": 0.9575589647042589, "learning_rate": 7.52187522344741e-06, "loss": 0.4121, "step": 12742 }, { "epoch": 1.195852102102102, "grad_norm": 0.889342000840766, "learning_rate": 7.521403775866223e-06, "loss": 0.409, "step": 12743 }, { "epoch": 1.195945945945946, "grad_norm": 0.9908478549423454, "learning_rate": 7.52093229822204e-06, "loss": 0.3915, "step": 12744 }, { "epoch": 1.1960397897897899, "grad_norm": 1.0391416311900963, "learning_rate": 7.520460790520485e-06, "loss": 0.3945, "step": 12745 }, { "epoch": 1.1961336336336337, "grad_norm": 0.7934865017923047, "learning_rate": 7.519989252767181e-06, "loss": 0.4006, "step": 12746 }, { "epoch": 1.1962274774774775, "grad_norm": 0.8810385690419865, "learning_rate": 7.519517684967746e-06, "loss": 0.3564, "step": 12747 }, { "epoch": 1.1963213213213213, "grad_norm": 1.001068163838479, "learning_rate": 7.519046087127806e-06, "loss": 0.4177, "step": 12748 }, { "epoch": 1.196415165165165, "grad_norm": 0.961831442354952, "learning_rate": 7.518574459252985e-06, "loss": 0.3933, "step": 12749 }, { "epoch": 1.196509009009009, "grad_norm": 1.1650229343867495, "learning_rate": 7.518102801348903e-06, "loss": 0.4082, "step": 12750 }, { "epoch": 1.196602852852853, "grad_norm": 1.090386480943676, "learning_rate": 7.517631113421184e-06, "loss": 0.4177, "step": 12751 }, { "epoch": 1.1966966966966968, "grad_norm": 0.9375899696868056, "learning_rate": 7.517159395475455e-06, "loss": 0.3892, "step": 12752 }, { "epoch": 1.1967905405405406, "grad_norm": 0.9670419777391263, "learning_rate": 7.516687647517336e-06, "loss": 0.4123, "step": 12753 }, { "epoch": 1.1968843843843844, "grad_norm": 0.9877041908807428, "learning_rate": 7.516215869552454e-06, "loss": 0.4434, "step": 12754 }, { "epoch": 1.1969782282282282, "grad_norm": 0.9232348569107127, "learning_rate": 7.515744061586436e-06, "loss": 0.3961, "step": 12755 }, { "epoch": 1.197072072072072, "grad_norm": 0.9163716533652058, "learning_rate": 7.515272223624906e-06, "loss": 0.4603, "step": 12756 }, { "epoch": 1.197165915915916, "grad_norm": 0.9580037336132237, "learning_rate": 7.514800355673486e-06, "loss": 0.386, "step": 12757 }, { "epoch": 1.1972597597597598, "grad_norm": 0.9250274406858869, "learning_rate": 7.5143284577378085e-06, "loss": 0.4428, "step": 12758 }, { "epoch": 1.1973536036036037, "grad_norm": 0.9668004264638779, "learning_rate": 7.513856529823494e-06, "loss": 0.4097, "step": 12759 }, { "epoch": 1.1974474474474475, "grad_norm": 1.0731477243139644, "learning_rate": 7.513384571936174e-06, "loss": 0.4051, "step": 12760 }, { "epoch": 1.1975412912912913, "grad_norm": 1.5701741714206316, "learning_rate": 7.512912584081474e-06, "loss": 0.4159, "step": 12761 }, { "epoch": 1.197635135135135, "grad_norm": 0.8080468353600555, "learning_rate": 7.5124405662650225e-06, "loss": 0.3593, "step": 12762 }, { "epoch": 1.197728978978979, "grad_norm": 0.969899634848611, "learning_rate": 7.511968518492444e-06, "loss": 0.4403, "step": 12763 }, { "epoch": 1.1978228228228227, "grad_norm": 3.3369269012429417, "learning_rate": 7.51149644076937e-06, "loss": 0.4051, "step": 12764 }, { "epoch": 1.1979166666666667, "grad_norm": 0.8887480378387512, "learning_rate": 7.511024333101429e-06, "loss": 0.4229, "step": 12765 }, { "epoch": 1.1980105105105106, "grad_norm": 1.8821913930769258, "learning_rate": 7.510552195494248e-06, "loss": 0.3936, "step": 12766 }, { "epoch": 1.1981043543543544, "grad_norm": 1.0424784213831078, "learning_rate": 7.510080027953459e-06, "loss": 0.4299, "step": 12767 }, { "epoch": 1.1981981981981982, "grad_norm": 1.2001728122165018, "learning_rate": 7.509607830484691e-06, "loss": 0.4069, "step": 12768 }, { "epoch": 1.198292042042042, "grad_norm": 0.93796188636777, "learning_rate": 7.509135603093573e-06, "loss": 0.4464, "step": 12769 }, { "epoch": 1.1983858858858858, "grad_norm": 1.1215116757178738, "learning_rate": 7.508663345785736e-06, "loss": 0.3968, "step": 12770 }, { "epoch": 1.1984797297297298, "grad_norm": 0.9145843900336336, "learning_rate": 7.50819105856681e-06, "loss": 0.358, "step": 12771 }, { "epoch": 1.1985735735735736, "grad_norm": 0.9532135037707367, "learning_rate": 7.507718741442426e-06, "loss": 0.3774, "step": 12772 }, { "epoch": 1.1986674174174174, "grad_norm": 0.8735485898265454, "learning_rate": 7.507246394418217e-06, "loss": 0.4328, "step": 12773 }, { "epoch": 1.1987612612612613, "grad_norm": 0.7874725671597652, "learning_rate": 7.506774017499816e-06, "loss": 0.4421, "step": 12774 }, { "epoch": 1.198855105105105, "grad_norm": 1.2023990072298265, "learning_rate": 7.506301610692851e-06, "loss": 0.3932, "step": 12775 }, { "epoch": 1.1989489489489489, "grad_norm": 0.9877009950673882, "learning_rate": 7.505829174002959e-06, "loss": 0.4081, "step": 12776 }, { "epoch": 1.1990427927927927, "grad_norm": 0.8910785978908485, "learning_rate": 7.505356707435768e-06, "loss": 0.4235, "step": 12777 }, { "epoch": 1.1991366366366367, "grad_norm": 1.0652197409500286, "learning_rate": 7.504884210996915e-06, "loss": 0.4157, "step": 12778 }, { "epoch": 1.1992304804804805, "grad_norm": 1.0302428497814204, "learning_rate": 7.5044116846920345e-06, "loss": 0.3942, "step": 12779 }, { "epoch": 1.1993243243243243, "grad_norm": 1.1034451999096835, "learning_rate": 7.503939128526758e-06, "loss": 0.3982, "step": 12780 }, { "epoch": 1.1994181681681682, "grad_norm": 1.2291702563887565, "learning_rate": 7.503466542506719e-06, "loss": 0.4126, "step": 12781 }, { "epoch": 1.199512012012012, "grad_norm": 1.0093024214875363, "learning_rate": 7.502993926637556e-06, "loss": 0.4293, "step": 12782 }, { "epoch": 1.1996058558558558, "grad_norm": 1.0132185212144649, "learning_rate": 7.5025212809249005e-06, "loss": 0.3754, "step": 12783 }, { "epoch": 1.1996996996996998, "grad_norm": 1.933972778396687, "learning_rate": 7.502048605374389e-06, "loss": 0.3839, "step": 12784 }, { "epoch": 1.1997935435435436, "grad_norm": 0.9303367848215374, "learning_rate": 7.501575899991657e-06, "loss": 0.4149, "step": 12785 }, { "epoch": 1.1998873873873874, "grad_norm": 1.188994491557487, "learning_rate": 7.501103164782344e-06, "loss": 0.3916, "step": 12786 }, { "epoch": 1.1999812312312312, "grad_norm": 0.9230490475259593, "learning_rate": 7.500630399752081e-06, "loss": 0.3755, "step": 12787 }, { "epoch": 1.200075075075075, "grad_norm": 0.862800790940105, "learning_rate": 7.500157604906508e-06, "loss": 0.3774, "step": 12788 }, { "epoch": 1.2001689189189189, "grad_norm": 0.8023899398746304, "learning_rate": 7.499684780251263e-06, "loss": 0.367, "step": 12789 }, { "epoch": 1.2002627627627627, "grad_norm": 1.0130256876154125, "learning_rate": 7.49921192579198e-06, "loss": 0.3899, "step": 12790 }, { "epoch": 1.2003566066066067, "grad_norm": 1.0232130837104931, "learning_rate": 7.498739041534301e-06, "loss": 0.4498, "step": 12791 }, { "epoch": 1.2004504504504505, "grad_norm": 1.1907403385013617, "learning_rate": 7.498266127483862e-06, "loss": 0.369, "step": 12792 }, { "epoch": 1.2005442942942943, "grad_norm": 1.023069878848398, "learning_rate": 7.4977931836463005e-06, "loss": 0.38, "step": 12793 }, { "epoch": 1.2006381381381381, "grad_norm": 1.1061679628497771, "learning_rate": 7.497320210027258e-06, "loss": 0.4666, "step": 12794 }, { "epoch": 1.200731981981982, "grad_norm": 1.3241287607023327, "learning_rate": 7.496847206632371e-06, "loss": 0.3624, "step": 12795 }, { "epoch": 1.2008258258258258, "grad_norm": 0.9455809340402862, "learning_rate": 7.496374173467282e-06, "loss": 0.4263, "step": 12796 }, { "epoch": 1.2009196696696698, "grad_norm": 0.8314026121791566, "learning_rate": 7.495901110537631e-06, "loss": 0.4042, "step": 12797 }, { "epoch": 1.2010135135135136, "grad_norm": 2.7414287119622847, "learning_rate": 7.495428017849058e-06, "loss": 0.402, "step": 12798 }, { "epoch": 1.2011073573573574, "grad_norm": 0.9281214390006616, "learning_rate": 7.494954895407201e-06, "loss": 0.3982, "step": 12799 }, { "epoch": 1.2012012012012012, "grad_norm": 1.3502417104973914, "learning_rate": 7.494481743217704e-06, "loss": 0.388, "step": 12800 }, { "epoch": 1.201295045045045, "grad_norm": 0.9818647926321846, "learning_rate": 7.494008561286207e-06, "loss": 0.3974, "step": 12801 }, { "epoch": 1.2013888888888888, "grad_norm": 0.9175521535870526, "learning_rate": 7.493535349618352e-06, "loss": 0.412, "step": 12802 }, { "epoch": 1.2014827327327327, "grad_norm": 0.9546606761039488, "learning_rate": 7.493062108219783e-06, "loss": 0.4366, "step": 12803 }, { "epoch": 1.2015765765765765, "grad_norm": 0.9177413517454578, "learning_rate": 7.492588837096141e-06, "loss": 0.4091, "step": 12804 }, { "epoch": 1.2016704204204205, "grad_norm": 0.9608895917124747, "learning_rate": 7.4921155362530674e-06, "loss": 0.4159, "step": 12805 }, { "epoch": 1.2017642642642643, "grad_norm": 0.8461306305216891, "learning_rate": 7.491642205696209e-06, "loss": 0.3457, "step": 12806 }, { "epoch": 1.2018581081081081, "grad_norm": 0.9816312530680801, "learning_rate": 7.491168845431205e-06, "loss": 0.3971, "step": 12807 }, { "epoch": 1.201951951951952, "grad_norm": 0.8900338604980693, "learning_rate": 7.4906954554637025e-06, "loss": 0.3759, "step": 12808 }, { "epoch": 1.2020457957957957, "grad_norm": 0.8938802198451318, "learning_rate": 7.490222035799346e-06, "loss": 0.3587, "step": 12809 }, { "epoch": 1.2021396396396395, "grad_norm": 0.9622835472162536, "learning_rate": 7.489748586443778e-06, "loss": 0.4545, "step": 12810 }, { "epoch": 1.2022334834834836, "grad_norm": 0.8009138290263133, "learning_rate": 7.489275107402645e-06, "loss": 0.3911, "step": 12811 }, { "epoch": 1.2023273273273274, "grad_norm": 0.9145669775532902, "learning_rate": 7.4888015986815924e-06, "loss": 0.3853, "step": 12812 }, { "epoch": 1.2024211711711712, "grad_norm": 0.9051615817335891, "learning_rate": 7.4883280602862655e-06, "loss": 0.4461, "step": 12813 }, { "epoch": 1.202515015015015, "grad_norm": 1.093479121081415, "learning_rate": 7.4878544922223085e-06, "loss": 0.4505, "step": 12814 }, { "epoch": 1.2026088588588588, "grad_norm": 1.073975461661665, "learning_rate": 7.487380894495371e-06, "loss": 0.4511, "step": 12815 }, { "epoch": 1.2027027027027026, "grad_norm": 0.8295401523025575, "learning_rate": 7.486907267111097e-06, "loss": 0.3962, "step": 12816 }, { "epoch": 1.2027965465465464, "grad_norm": 0.9183679820468428, "learning_rate": 7.486433610075137e-06, "loss": 0.3877, "step": 12817 }, { "epoch": 1.2028903903903905, "grad_norm": 0.8947628778889264, "learning_rate": 7.4859599233931355e-06, "loss": 0.4174, "step": 12818 }, { "epoch": 1.2029842342342343, "grad_norm": 1.2847158709348876, "learning_rate": 7.485486207070741e-06, "loss": 0.4456, "step": 12819 }, { "epoch": 1.203078078078078, "grad_norm": 1.0285929728738865, "learning_rate": 7.485012461113602e-06, "loss": 0.3706, "step": 12820 }, { "epoch": 1.203171921921922, "grad_norm": 0.8940616086103297, "learning_rate": 7.484538685527368e-06, "loss": 0.4048, "step": 12821 }, { "epoch": 1.2032657657657657, "grad_norm": 1.0586592631379252, "learning_rate": 7.484064880317685e-06, "loss": 0.4267, "step": 12822 }, { "epoch": 1.2033596096096095, "grad_norm": 0.9096849914550587, "learning_rate": 7.483591045490206e-06, "loss": 0.3848, "step": 12823 }, { "epoch": 1.2034534534534536, "grad_norm": 0.8610034849942535, "learning_rate": 7.483117181050579e-06, "loss": 0.3837, "step": 12824 }, { "epoch": 1.2035472972972974, "grad_norm": 0.9489345098271904, "learning_rate": 7.4826432870044505e-06, "loss": 0.3979, "step": 12825 }, { "epoch": 1.2036411411411412, "grad_norm": 1.033651064274011, "learning_rate": 7.482169363357475e-06, "loss": 0.4229, "step": 12826 }, { "epoch": 1.203734984984985, "grad_norm": 2.575736140377282, "learning_rate": 7.481695410115304e-06, "loss": 0.4276, "step": 12827 }, { "epoch": 1.2038288288288288, "grad_norm": 0.9661401706057104, "learning_rate": 7.4812214272835845e-06, "loss": 0.4255, "step": 12828 }, { "epoch": 1.2039226726726726, "grad_norm": 1.122011527911782, "learning_rate": 7.480747414867971e-06, "loss": 0.4192, "step": 12829 }, { "epoch": 1.2040165165165164, "grad_norm": 1.2641885060087112, "learning_rate": 7.480273372874115e-06, "loss": 0.4143, "step": 12830 }, { "epoch": 1.2041103603603605, "grad_norm": 0.8658150438197807, "learning_rate": 7.479799301307667e-06, "loss": 0.4104, "step": 12831 }, { "epoch": 1.2042042042042043, "grad_norm": 1.3487387269607725, "learning_rate": 7.4793252001742775e-06, "loss": 0.4024, "step": 12832 }, { "epoch": 1.204298048048048, "grad_norm": 0.9911799622074314, "learning_rate": 7.478851069479604e-06, "loss": 0.378, "step": 12833 }, { "epoch": 1.2043918918918919, "grad_norm": 1.0263485605815108, "learning_rate": 7.478376909229298e-06, "loss": 0.4343, "step": 12834 }, { "epoch": 1.2044857357357357, "grad_norm": 0.9688840893756689, "learning_rate": 7.477902719429012e-06, "loss": 0.4226, "step": 12835 }, { "epoch": 1.2045795795795795, "grad_norm": 1.0140565813892577, "learning_rate": 7.4774285000844e-06, "loss": 0.419, "step": 12836 }, { "epoch": 1.2046734234234235, "grad_norm": 1.186075587825469, "learning_rate": 7.476954251201116e-06, "loss": 0.4071, "step": 12837 }, { "epoch": 1.2047672672672673, "grad_norm": 0.9307964799824273, "learning_rate": 7.476479972784815e-06, "loss": 0.3935, "step": 12838 }, { "epoch": 1.2048611111111112, "grad_norm": 0.9768345765581744, "learning_rate": 7.476005664841153e-06, "loss": 0.4399, "step": 12839 }, { "epoch": 1.204954954954955, "grad_norm": 0.9025156723156998, "learning_rate": 7.4755313273757824e-06, "loss": 0.4069, "step": 12840 }, { "epoch": 1.2050487987987988, "grad_norm": 0.8268296936744116, "learning_rate": 7.475056960394362e-06, "loss": 0.3778, "step": 12841 }, { "epoch": 1.2051426426426426, "grad_norm": 0.8844490878907864, "learning_rate": 7.474582563902546e-06, "loss": 0.3693, "step": 12842 }, { "epoch": 1.2052364864864864, "grad_norm": 1.2606448246664799, "learning_rate": 7.47410813790599e-06, "loss": 0.4305, "step": 12843 }, { "epoch": 1.2053303303303302, "grad_norm": 1.0961146183804127, "learning_rate": 7.473633682410351e-06, "loss": 0.4635, "step": 12844 }, { "epoch": 1.2054241741741742, "grad_norm": 0.9216126792707985, "learning_rate": 7.473159197421287e-06, "loss": 0.4409, "step": 12845 }, { "epoch": 1.205518018018018, "grad_norm": 1.1356831900197615, "learning_rate": 7.472684682944456e-06, "loss": 0.4253, "step": 12846 }, { "epoch": 1.2056118618618619, "grad_norm": 0.9455559873746584, "learning_rate": 7.472210138985513e-06, "loss": 0.4086, "step": 12847 }, { "epoch": 1.2057057057057057, "grad_norm": 1.4432905435465766, "learning_rate": 7.471735565550119e-06, "loss": 0.4631, "step": 12848 }, { "epoch": 1.2057995495495495, "grad_norm": 1.0604136551752104, "learning_rate": 7.471260962643929e-06, "loss": 0.4371, "step": 12849 }, { "epoch": 1.2058933933933933, "grad_norm": 1.1777189718236865, "learning_rate": 7.470786330272605e-06, "loss": 0.4563, "step": 12850 }, { "epoch": 1.2059872372372373, "grad_norm": 0.9773510799124469, "learning_rate": 7.470311668441805e-06, "loss": 0.4084, "step": 12851 }, { "epoch": 1.2060810810810811, "grad_norm": 0.9540102795024952, "learning_rate": 7.469836977157186e-06, "loss": 0.4158, "step": 12852 }, { "epoch": 1.206174924924925, "grad_norm": 0.9335972844898526, "learning_rate": 7.469362256424413e-06, "loss": 0.4544, "step": 12853 }, { "epoch": 1.2062687687687688, "grad_norm": 1.0909756827771717, "learning_rate": 7.468887506249143e-06, "loss": 0.4332, "step": 12854 }, { "epoch": 1.2063626126126126, "grad_norm": 0.804455401257099, "learning_rate": 7.468412726637036e-06, "loss": 0.4083, "step": 12855 }, { "epoch": 1.2064564564564564, "grad_norm": 0.7956368354528603, "learning_rate": 7.467937917593752e-06, "loss": 0.3958, "step": 12856 }, { "epoch": 1.2065503003003002, "grad_norm": 0.809888621044878, "learning_rate": 7.467463079124955e-06, "loss": 0.3926, "step": 12857 }, { "epoch": 1.2066441441441442, "grad_norm": 0.8948008521869516, "learning_rate": 7.466988211236305e-06, "loss": 0.4144, "step": 12858 }, { "epoch": 1.206737987987988, "grad_norm": 1.1844061289331267, "learning_rate": 7.466513313933464e-06, "loss": 0.3769, "step": 12859 }, { "epoch": 1.2068318318318318, "grad_norm": 1.006216198669996, "learning_rate": 7.466038387222097e-06, "loss": 0.4079, "step": 12860 }, { "epoch": 1.2069256756756757, "grad_norm": 0.861227351898646, "learning_rate": 7.4655634311078605e-06, "loss": 0.402, "step": 12861 }, { "epoch": 1.2070195195195195, "grad_norm": 0.9205187915052043, "learning_rate": 7.465088445596422e-06, "loss": 0.3682, "step": 12862 }, { "epoch": 1.2071133633633633, "grad_norm": 0.8799608539195197, "learning_rate": 7.464613430693445e-06, "loss": 0.4137, "step": 12863 }, { "epoch": 1.2072072072072073, "grad_norm": 0.8622828779614393, "learning_rate": 7.464138386404592e-06, "loss": 0.3825, "step": 12864 }, { "epoch": 1.2073010510510511, "grad_norm": 0.9408667889090713, "learning_rate": 7.463663312735526e-06, "loss": 0.406, "step": 12865 }, { "epoch": 1.207394894894895, "grad_norm": 0.9542216436931004, "learning_rate": 7.463188209691913e-06, "loss": 0.4022, "step": 12866 }, { "epoch": 1.2074887387387387, "grad_norm": 1.2367740562686103, "learning_rate": 7.462713077279416e-06, "loss": 0.385, "step": 12867 }, { "epoch": 1.2075825825825826, "grad_norm": 3.39094696891649, "learning_rate": 7.462237915503702e-06, "loss": 0.4329, "step": 12868 }, { "epoch": 1.2076764264264264, "grad_norm": 0.9358745048103169, "learning_rate": 7.461762724370436e-06, "loss": 0.4076, "step": 12869 }, { "epoch": 1.2077702702702702, "grad_norm": 1.2855926307291252, "learning_rate": 7.461287503885282e-06, "loss": 0.4363, "step": 12870 }, { "epoch": 1.2078641141141142, "grad_norm": 0.947745869543551, "learning_rate": 7.460812254053908e-06, "loss": 0.3881, "step": 12871 }, { "epoch": 1.207957957957958, "grad_norm": 1.169771827821219, "learning_rate": 7.460336974881981e-06, "loss": 0.4069, "step": 12872 }, { "epoch": 1.2080518018018018, "grad_norm": 0.9620371694304267, "learning_rate": 7.459861666375165e-06, "loss": 0.3914, "step": 12873 }, { "epoch": 1.2081456456456456, "grad_norm": 1.1897450938010532, "learning_rate": 7.45938632853913e-06, "loss": 0.3751, "step": 12874 }, { "epoch": 1.2082394894894894, "grad_norm": 1.070999064115467, "learning_rate": 7.458910961379542e-06, "loss": 0.4271, "step": 12875 }, { "epoch": 1.2083333333333333, "grad_norm": 1.0876016815480452, "learning_rate": 7.458435564902069e-06, "loss": 0.451, "step": 12876 }, { "epoch": 1.2084271771771773, "grad_norm": 0.9272212931983285, "learning_rate": 7.457960139112379e-06, "loss": 0.3667, "step": 12877 }, { "epoch": 1.208521021021021, "grad_norm": 1.1156360453274445, "learning_rate": 7.457484684016143e-06, "loss": 0.3831, "step": 12878 }, { "epoch": 1.208614864864865, "grad_norm": 0.876883143601328, "learning_rate": 7.457009199619026e-06, "loss": 0.3694, "step": 12879 }, { "epoch": 1.2087087087087087, "grad_norm": 0.8389998322942699, "learning_rate": 7.456533685926699e-06, "loss": 0.3645, "step": 12880 }, { "epoch": 1.2088025525525525, "grad_norm": 1.021196099617957, "learning_rate": 7.456058142944833e-06, "loss": 0.4278, "step": 12881 }, { "epoch": 1.2088963963963963, "grad_norm": 1.0303864745827025, "learning_rate": 7.455582570679096e-06, "loss": 0.4029, "step": 12882 }, { "epoch": 1.2089902402402402, "grad_norm": 1.0093476253868676, "learning_rate": 7.455106969135159e-06, "loss": 0.4145, "step": 12883 }, { "epoch": 1.209084084084084, "grad_norm": 0.9337756653730774, "learning_rate": 7.454631338318692e-06, "loss": 0.4308, "step": 12884 }, { "epoch": 1.209177927927928, "grad_norm": 0.9573283012330263, "learning_rate": 7.4541556782353685e-06, "loss": 0.4293, "step": 12885 }, { "epoch": 1.2092717717717718, "grad_norm": 1.0303624930605049, "learning_rate": 7.453679988890856e-06, "loss": 0.3809, "step": 12886 }, { "epoch": 1.2093656156156156, "grad_norm": 1.0127700152572363, "learning_rate": 7.45320427029083e-06, "loss": 0.4194, "step": 12887 }, { "epoch": 1.2094594594594594, "grad_norm": 0.9439140546964502, "learning_rate": 7.452728522440959e-06, "loss": 0.3835, "step": 12888 }, { "epoch": 1.2095533033033032, "grad_norm": 1.3516045979785352, "learning_rate": 7.452252745346917e-06, "loss": 0.4062, "step": 12889 }, { "epoch": 1.209647147147147, "grad_norm": 0.91293110938224, "learning_rate": 7.451776939014379e-06, "loss": 0.3743, "step": 12890 }, { "epoch": 1.209740990990991, "grad_norm": 0.9313734337414316, "learning_rate": 7.451301103449016e-06, "loss": 0.4331, "step": 12891 }, { "epoch": 1.209834834834835, "grad_norm": 0.9726756051810287, "learning_rate": 7.4508252386565e-06, "loss": 0.3384, "step": 12892 }, { "epoch": 1.2099286786786787, "grad_norm": 1.118924159280578, "learning_rate": 7.450349344642507e-06, "loss": 0.4352, "step": 12893 }, { "epoch": 1.2100225225225225, "grad_norm": 0.9724700379072967, "learning_rate": 7.449873421412709e-06, "loss": 0.4249, "step": 12894 }, { "epoch": 1.2101163663663663, "grad_norm": 0.8322195310113668, "learning_rate": 7.4493974689727825e-06, "loss": 0.3903, "step": 12895 }, { "epoch": 1.2102102102102101, "grad_norm": 1.4985068466625855, "learning_rate": 7.448921487328402e-06, "loss": 0.4206, "step": 12896 }, { "epoch": 1.210304054054054, "grad_norm": 0.8953575667228533, "learning_rate": 7.448445476485243e-06, "loss": 0.3728, "step": 12897 }, { "epoch": 1.210397897897898, "grad_norm": 1.0697550659423394, "learning_rate": 7.447969436448979e-06, "loss": 0.4083, "step": 12898 }, { "epoch": 1.2104917417417418, "grad_norm": 1.1018719483246724, "learning_rate": 7.447493367225289e-06, "loss": 0.3914, "step": 12899 }, { "epoch": 1.2105855855855856, "grad_norm": 1.082952022393551, "learning_rate": 7.447017268819846e-06, "loss": 0.3883, "step": 12900 }, { "epoch": 1.2106794294294294, "grad_norm": 0.9564192352887011, "learning_rate": 7.446541141238328e-06, "loss": 0.4034, "step": 12901 }, { "epoch": 1.2107732732732732, "grad_norm": 0.9848834747399677, "learning_rate": 7.446064984486414e-06, "loss": 0.3796, "step": 12902 }, { "epoch": 1.210867117117117, "grad_norm": 0.9987700723781896, "learning_rate": 7.4455887985697785e-06, "loss": 0.3916, "step": 12903 }, { "epoch": 1.210960960960961, "grad_norm": 0.936563751496479, "learning_rate": 7.4451125834940986e-06, "loss": 0.3765, "step": 12904 }, { "epoch": 1.2110548048048049, "grad_norm": 0.9610059405933266, "learning_rate": 7.444636339265055e-06, "loss": 0.3771, "step": 12905 }, { "epoch": 1.2111486486486487, "grad_norm": 0.9288519846962794, "learning_rate": 7.444160065888324e-06, "loss": 0.4258, "step": 12906 }, { "epoch": 1.2112424924924925, "grad_norm": 1.049412287456603, "learning_rate": 7.443683763369584e-06, "loss": 0.3666, "step": 12907 }, { "epoch": 1.2113363363363363, "grad_norm": 1.287682056217721, "learning_rate": 7.443207431714516e-06, "loss": 0.3758, "step": 12908 }, { "epoch": 1.2114301801801801, "grad_norm": 0.9965303705140804, "learning_rate": 7.442731070928799e-06, "loss": 0.4278, "step": 12909 }, { "epoch": 1.211524024024024, "grad_norm": 1.0819095948587132, "learning_rate": 7.4422546810181105e-06, "loss": 0.362, "step": 12910 }, { "epoch": 1.211617867867868, "grad_norm": 0.9080918021636639, "learning_rate": 7.441778261988133e-06, "loss": 0.4137, "step": 12911 }, { "epoch": 1.2117117117117118, "grad_norm": 1.037050601830424, "learning_rate": 7.441301813844546e-06, "loss": 0.407, "step": 12912 }, { "epoch": 1.2118055555555556, "grad_norm": 0.9660813262668282, "learning_rate": 7.4408253365930285e-06, "loss": 0.4129, "step": 12913 }, { "epoch": 1.2118993993993994, "grad_norm": 0.9268972909645391, "learning_rate": 7.440348830239265e-06, "loss": 0.399, "step": 12914 }, { "epoch": 1.2119932432432432, "grad_norm": 1.0710628018140742, "learning_rate": 7.439872294788934e-06, "loss": 0.3853, "step": 12915 }, { "epoch": 1.212087087087087, "grad_norm": 0.9846621978702114, "learning_rate": 7.43939573024772e-06, "loss": 0.3747, "step": 12916 }, { "epoch": 1.212180930930931, "grad_norm": 0.8965685614704406, "learning_rate": 7.438919136621302e-06, "loss": 0.4124, "step": 12917 }, { "epoch": 1.2122747747747749, "grad_norm": 1.094973456956656, "learning_rate": 7.438442513915365e-06, "loss": 0.3599, "step": 12918 }, { "epoch": 1.2123686186186187, "grad_norm": 0.8862174460500345, "learning_rate": 7.43796586213559e-06, "loss": 0.356, "step": 12919 }, { "epoch": 1.2124624624624625, "grad_norm": 1.367373025449721, "learning_rate": 7.437489181287662e-06, "loss": 0.4129, "step": 12920 }, { "epoch": 1.2125563063063063, "grad_norm": 0.9324570552965181, "learning_rate": 7.437012471377263e-06, "loss": 0.368, "step": 12921 }, { "epoch": 1.21265015015015, "grad_norm": 0.989952864411141, "learning_rate": 7.436535732410077e-06, "loss": 0.3923, "step": 12922 }, { "epoch": 1.212743993993994, "grad_norm": 1.0995078692221532, "learning_rate": 7.4360589643917905e-06, "loss": 0.4001, "step": 12923 }, { "epoch": 1.2128378378378377, "grad_norm": 0.957371369530655, "learning_rate": 7.4355821673280845e-06, "loss": 0.4535, "step": 12924 }, { "epoch": 1.2129316816816818, "grad_norm": 1.0630979109780279, "learning_rate": 7.435105341224647e-06, "loss": 0.4145, "step": 12925 }, { "epoch": 1.2130255255255256, "grad_norm": 0.8320126016008248, "learning_rate": 7.434628486087161e-06, "loss": 0.445, "step": 12926 }, { "epoch": 1.2131193693693694, "grad_norm": 0.8495012625989218, "learning_rate": 7.434151601921314e-06, "loss": 0.4185, "step": 12927 }, { "epoch": 1.2132132132132132, "grad_norm": 1.0061740415111744, "learning_rate": 7.43367468873279e-06, "loss": 0.4072, "step": 12928 }, { "epoch": 1.213307057057057, "grad_norm": 1.1225951222269956, "learning_rate": 7.4331977465272775e-06, "loss": 0.4157, "step": 12929 }, { "epoch": 1.213400900900901, "grad_norm": 1.0455999238732046, "learning_rate": 7.43272077531046e-06, "loss": 0.4385, "step": 12930 }, { "epoch": 1.2134947447447448, "grad_norm": 1.154301554218716, "learning_rate": 7.432243775088027e-06, "loss": 0.3801, "step": 12931 }, { "epoch": 1.2135885885885886, "grad_norm": 1.0050432654660622, "learning_rate": 7.4317667458656665e-06, "loss": 0.3599, "step": 12932 }, { "epoch": 1.2136824324324325, "grad_norm": 0.8538346776743039, "learning_rate": 7.4312896876490634e-06, "loss": 0.4096, "step": 12933 }, { "epoch": 1.2137762762762763, "grad_norm": 1.0916260594043998, "learning_rate": 7.430812600443909e-06, "loss": 0.3779, "step": 12934 }, { "epoch": 1.21387012012012, "grad_norm": 0.8877600183942336, "learning_rate": 7.430335484255888e-06, "loss": 0.3898, "step": 12935 }, { "epoch": 1.2139639639639639, "grad_norm": 2.3742550678089605, "learning_rate": 7.4298583390906905e-06, "loss": 0.4427, "step": 12936 }, { "epoch": 1.2140578078078077, "grad_norm": 0.9442746798919033, "learning_rate": 7.429381164954007e-06, "loss": 0.4318, "step": 12937 }, { "epoch": 1.2141516516516517, "grad_norm": 0.9812030017317976, "learning_rate": 7.428903961851527e-06, "loss": 0.4293, "step": 12938 }, { "epoch": 1.2142454954954955, "grad_norm": 0.9462098631576367, "learning_rate": 7.428426729788938e-06, "loss": 0.3951, "step": 12939 }, { "epoch": 1.2143393393393394, "grad_norm": 0.9297401822435066, "learning_rate": 7.427949468771932e-06, "loss": 0.4306, "step": 12940 }, { "epoch": 1.2144331831831832, "grad_norm": 0.9609117387912639, "learning_rate": 7.4274721788061985e-06, "loss": 0.3941, "step": 12941 }, { "epoch": 1.214527027027027, "grad_norm": 0.8631980019041452, "learning_rate": 7.426994859897428e-06, "loss": 0.4017, "step": 12942 }, { "epoch": 1.2146208708708708, "grad_norm": 1.517388648392151, "learning_rate": 7.426517512051312e-06, "loss": 0.3986, "step": 12943 }, { "epoch": 1.2147147147147148, "grad_norm": 2.9555963058567674, "learning_rate": 7.4260401352735435e-06, "loss": 0.368, "step": 12944 }, { "epoch": 1.2148085585585586, "grad_norm": 0.7966003151053641, "learning_rate": 7.42556272956981e-06, "loss": 0.4102, "step": 12945 }, { "epoch": 1.2149024024024024, "grad_norm": 0.92711023583271, "learning_rate": 7.425085294945811e-06, "loss": 0.4211, "step": 12946 }, { "epoch": 1.2149962462462462, "grad_norm": 0.9475562271817539, "learning_rate": 7.4246078314072324e-06, "loss": 0.4362, "step": 12947 }, { "epoch": 1.21509009009009, "grad_norm": 1.1329276909947124, "learning_rate": 7.424130338959768e-06, "loss": 0.4034, "step": 12948 }, { "epoch": 1.2151839339339339, "grad_norm": 1.0368885313452858, "learning_rate": 7.423652817609112e-06, "loss": 0.3646, "step": 12949 }, { "epoch": 1.2152777777777777, "grad_norm": 1.2267817628989712, "learning_rate": 7.423175267360961e-06, "loss": 0.3764, "step": 12950 }, { "epoch": 1.2153716216216217, "grad_norm": 0.8066717608836848, "learning_rate": 7.422697688221005e-06, "loss": 0.3515, "step": 12951 }, { "epoch": 1.2154654654654655, "grad_norm": 0.8728751052720791, "learning_rate": 7.422220080194939e-06, "loss": 0.4148, "step": 12952 }, { "epoch": 1.2155593093093093, "grad_norm": 0.9276995581620415, "learning_rate": 7.421742443288459e-06, "loss": 0.4236, "step": 12953 }, { "epoch": 1.2156531531531531, "grad_norm": 1.093142063544826, "learning_rate": 7.421264777507257e-06, "loss": 0.3692, "step": 12954 }, { "epoch": 1.215746996996997, "grad_norm": 0.9332548257472743, "learning_rate": 7.42078708285703e-06, "loss": 0.3925, "step": 12955 }, { "epoch": 1.2158408408408408, "grad_norm": 1.2592084584860062, "learning_rate": 7.420309359343475e-06, "loss": 0.4403, "step": 12956 }, { "epoch": 1.2159346846846848, "grad_norm": 1.3012842672733005, "learning_rate": 7.4198316069722864e-06, "loss": 0.4229, "step": 12957 }, { "epoch": 1.2160285285285286, "grad_norm": 1.0291254063784623, "learning_rate": 7.41935382574916e-06, "loss": 0.4161, "step": 12958 }, { "epoch": 1.2161223723723724, "grad_norm": 0.918654456873056, "learning_rate": 7.418876015679795e-06, "loss": 0.3906, "step": 12959 }, { "epoch": 1.2162162162162162, "grad_norm": 0.9946252138096496, "learning_rate": 7.418398176769885e-06, "loss": 0.4002, "step": 12960 }, { "epoch": 1.21631006006006, "grad_norm": 0.9892108727977978, "learning_rate": 7.4179203090251285e-06, "loss": 0.4242, "step": 12961 }, { "epoch": 1.2164039039039038, "grad_norm": 1.498087481495156, "learning_rate": 7.417442412451224e-06, "loss": 0.4055, "step": 12962 }, { "epoch": 1.2164977477477477, "grad_norm": 2.0309388709591736, "learning_rate": 7.41696448705387e-06, "loss": 0.4079, "step": 12963 }, { "epoch": 1.2165915915915915, "grad_norm": 1.0362329946742594, "learning_rate": 7.416486532838763e-06, "loss": 0.3801, "step": 12964 }, { "epoch": 1.2166854354354355, "grad_norm": 1.0339963537774395, "learning_rate": 7.416008549811604e-06, "loss": 0.4244, "step": 12965 }, { "epoch": 1.2167792792792793, "grad_norm": 0.9078890814504239, "learning_rate": 7.41553053797809e-06, "loss": 0.3914, "step": 12966 }, { "epoch": 1.2168731231231231, "grad_norm": 1.2793406024154452, "learning_rate": 7.415052497343919e-06, "loss": 0.3901, "step": 12967 }, { "epoch": 1.216966966966967, "grad_norm": 1.153323690818515, "learning_rate": 7.414574427914795e-06, "loss": 0.4389, "step": 12968 }, { "epoch": 1.2170608108108107, "grad_norm": 0.944444874578199, "learning_rate": 7.4140963296964165e-06, "loss": 0.421, "step": 12969 }, { "epoch": 1.2171546546546548, "grad_norm": 1.0735570596651918, "learning_rate": 7.4136182026944825e-06, "loss": 0.4007, "step": 12970 }, { "epoch": 1.2172484984984986, "grad_norm": 0.9964245000376366, "learning_rate": 7.413140046914696e-06, "loss": 0.4217, "step": 12971 }, { "epoch": 1.2173423423423424, "grad_norm": 0.9153229416159137, "learning_rate": 7.412661862362753e-06, "loss": 0.3976, "step": 12972 }, { "epoch": 1.2174361861861862, "grad_norm": 1.100094621757048, "learning_rate": 7.412183649044361e-06, "loss": 0.3965, "step": 12973 }, { "epoch": 1.21753003003003, "grad_norm": 0.88975926035055, "learning_rate": 7.4117054069652195e-06, "loss": 0.3874, "step": 12974 }, { "epoch": 1.2176238738738738, "grad_norm": 1.2132471345959812, "learning_rate": 7.4112271361310305e-06, "loss": 0.4711, "step": 12975 }, { "epoch": 1.2177177177177176, "grad_norm": 0.8692497734025199, "learning_rate": 7.4107488365474964e-06, "loss": 0.3613, "step": 12976 }, { "epoch": 1.2178115615615615, "grad_norm": 0.8965836314231909, "learning_rate": 7.410270508220321e-06, "loss": 0.3795, "step": 12977 }, { "epoch": 1.2179054054054055, "grad_norm": 1.0204613522641481, "learning_rate": 7.409792151155206e-06, "loss": 0.3918, "step": 12978 }, { "epoch": 1.2179992492492493, "grad_norm": 0.9241370936857971, "learning_rate": 7.409313765357855e-06, "loss": 0.3996, "step": 12979 }, { "epoch": 1.218093093093093, "grad_norm": 1.0382249761085502, "learning_rate": 7.408835350833974e-06, "loss": 0.3736, "step": 12980 }, { "epoch": 1.218186936936937, "grad_norm": 0.9218101942786859, "learning_rate": 7.408356907589264e-06, "loss": 0.3859, "step": 12981 }, { "epoch": 1.2182807807807807, "grad_norm": 0.8896056838638181, "learning_rate": 7.407878435629432e-06, "loss": 0.4275, "step": 12982 }, { "epoch": 1.2183746246246245, "grad_norm": 0.8583657813709649, "learning_rate": 7.4073999349601826e-06, "loss": 0.3663, "step": 12983 }, { "epoch": 1.2184684684684686, "grad_norm": 1.0322019586912639, "learning_rate": 7.406921405587218e-06, "loss": 0.4388, "step": 12984 }, { "epoch": 1.2185623123123124, "grad_norm": 0.9002301499528064, "learning_rate": 7.406442847516247e-06, "loss": 0.4054, "step": 12985 }, { "epoch": 1.2186561561561562, "grad_norm": 1.0343289978651151, "learning_rate": 7.405964260752977e-06, "loss": 0.4179, "step": 12986 }, { "epoch": 1.21875, "grad_norm": 0.9607803636605461, "learning_rate": 7.40548564530311e-06, "loss": 0.3778, "step": 12987 }, { "epoch": 1.2188438438438438, "grad_norm": 1.0681196528382444, "learning_rate": 7.405007001172355e-06, "loss": 0.3856, "step": 12988 }, { "epoch": 1.2189376876876876, "grad_norm": 0.9277793523141761, "learning_rate": 7.404528328366419e-06, "loss": 0.4003, "step": 12989 }, { "epoch": 1.2190315315315314, "grad_norm": 1.0750823716102205, "learning_rate": 7.4040496268910075e-06, "loss": 0.417, "step": 12990 }, { "epoch": 1.2191253753753755, "grad_norm": 1.0266882568167155, "learning_rate": 7.403570896751829e-06, "loss": 0.4499, "step": 12991 }, { "epoch": 1.2192192192192193, "grad_norm": 0.9483153491787676, "learning_rate": 7.4030921379545935e-06, "loss": 0.4093, "step": 12992 }, { "epoch": 1.219313063063063, "grad_norm": 1.0445584088194164, "learning_rate": 7.4026133505050066e-06, "loss": 0.4173, "step": 12993 }, { "epoch": 1.219406906906907, "grad_norm": 1.2089830860070598, "learning_rate": 7.402134534408778e-06, "loss": 0.4256, "step": 12994 }, { "epoch": 1.2195007507507507, "grad_norm": 0.8488811397951633, "learning_rate": 7.401655689671619e-06, "loss": 0.4336, "step": 12995 }, { "epoch": 1.2195945945945945, "grad_norm": 0.9495700436980755, "learning_rate": 7.401176816299234e-06, "loss": 0.3767, "step": 12996 }, { "epoch": 1.2196884384384385, "grad_norm": 0.9832068190056661, "learning_rate": 7.400697914297335e-06, "loss": 0.3994, "step": 12997 }, { "epoch": 1.2197822822822824, "grad_norm": 1.100979073011258, "learning_rate": 7.4002189836716334e-06, "loss": 0.3764, "step": 12998 }, { "epoch": 1.2198761261261262, "grad_norm": 1.2493751534453061, "learning_rate": 7.399740024427837e-06, "loss": 0.3736, "step": 12999 }, { "epoch": 1.21996996996997, "grad_norm": 0.9573338332502142, "learning_rate": 7.3992610365716584e-06, "loss": 0.4304, "step": 13000 }, { "epoch": 1.2200638138138138, "grad_norm": 0.8787326454881565, "learning_rate": 7.398782020108809e-06, "loss": 0.3874, "step": 13001 }, { "epoch": 1.2201576576576576, "grad_norm": 0.8792598480334062, "learning_rate": 7.398302975045e-06, "loss": 0.4056, "step": 13002 }, { "epoch": 1.2202515015015014, "grad_norm": 0.9042904823623537, "learning_rate": 7.3978239013859395e-06, "loss": 0.3671, "step": 13003 }, { "epoch": 1.2203453453453452, "grad_norm": 1.0149799132466857, "learning_rate": 7.3973447991373446e-06, "loss": 0.4383, "step": 13004 }, { "epoch": 1.2204391891891893, "grad_norm": 1.0009042784648645, "learning_rate": 7.396865668304924e-06, "loss": 0.4108, "step": 13005 }, { "epoch": 1.220533033033033, "grad_norm": 1.0062331566811318, "learning_rate": 7.396386508894393e-06, "loss": 0.415, "step": 13006 }, { "epoch": 1.2206268768768769, "grad_norm": 0.859283190893352, "learning_rate": 7.395907320911464e-06, "loss": 0.4198, "step": 13007 }, { "epoch": 1.2207207207207207, "grad_norm": 1.7352026563934961, "learning_rate": 7.395428104361851e-06, "loss": 0.3971, "step": 13008 }, { "epoch": 1.2208145645645645, "grad_norm": 0.9984771151721364, "learning_rate": 7.394948859251264e-06, "loss": 0.4435, "step": 13009 }, { "epoch": 1.2209084084084085, "grad_norm": 0.8981956265725491, "learning_rate": 7.394469585585422e-06, "loss": 0.3812, "step": 13010 }, { "epoch": 1.2210022522522523, "grad_norm": 1.1848098266740104, "learning_rate": 7.393990283370035e-06, "loss": 0.4208, "step": 13011 }, { "epoch": 1.2210960960960962, "grad_norm": 1.138046130206089, "learning_rate": 7.393510952610822e-06, "loss": 0.4117, "step": 13012 }, { "epoch": 1.22118993993994, "grad_norm": 1.1506836134243892, "learning_rate": 7.393031593313495e-06, "loss": 0.4696, "step": 13013 }, { "epoch": 1.2212837837837838, "grad_norm": 0.9661978161071956, "learning_rate": 7.392552205483773e-06, "loss": 0.3652, "step": 13014 }, { "epoch": 1.2213776276276276, "grad_norm": 1.151614467716753, "learning_rate": 7.392072789127367e-06, "loss": 0.429, "step": 13015 }, { "epoch": 1.2214714714714714, "grad_norm": 0.8699287294846051, "learning_rate": 7.391593344249997e-06, "loss": 0.4081, "step": 13016 }, { "epoch": 1.2215653153153152, "grad_norm": 0.9544025548362601, "learning_rate": 7.391113870857377e-06, "loss": 0.3834, "step": 13017 }, { "epoch": 1.2216591591591592, "grad_norm": 0.9888730371242791, "learning_rate": 7.3906343689552255e-06, "loss": 0.3607, "step": 13018 }, { "epoch": 1.221753003003003, "grad_norm": 0.8474446624145276, "learning_rate": 7.390154838549259e-06, "loss": 0.4119, "step": 13019 }, { "epoch": 1.2218468468468469, "grad_norm": 0.978579657286118, "learning_rate": 7.3896752796451955e-06, "loss": 0.4214, "step": 13020 }, { "epoch": 1.2219406906906907, "grad_norm": 0.9750953007026694, "learning_rate": 7.389195692248752e-06, "loss": 0.413, "step": 13021 }, { "epoch": 1.2220345345345345, "grad_norm": 1.1132409851871448, "learning_rate": 7.3887160763656476e-06, "loss": 0.3769, "step": 13022 }, { "epoch": 1.2221283783783783, "grad_norm": 1.5960949833043556, "learning_rate": 7.388236432001599e-06, "loss": 0.3993, "step": 13023 }, { "epoch": 1.2222222222222223, "grad_norm": 1.1770262313398232, "learning_rate": 7.387756759162326e-06, "loss": 0.4198, "step": 13024 }, { "epoch": 1.2223160660660661, "grad_norm": 1.2451520770021212, "learning_rate": 7.3872770578535505e-06, "loss": 0.4243, "step": 13025 }, { "epoch": 1.22240990990991, "grad_norm": 0.9284105190224802, "learning_rate": 7.386797328080988e-06, "loss": 0.3664, "step": 13026 }, { "epoch": 1.2225037537537538, "grad_norm": 1.143707878752884, "learning_rate": 7.3863175698503605e-06, "loss": 0.3865, "step": 13027 }, { "epoch": 1.2225975975975976, "grad_norm": 1.0542877664219115, "learning_rate": 7.385837783167388e-06, "loss": 0.3927, "step": 13028 }, { "epoch": 1.2226914414414414, "grad_norm": 1.0262037297029643, "learning_rate": 7.38535796803779e-06, "loss": 0.4038, "step": 13029 }, { "epoch": 1.2227852852852852, "grad_norm": 0.9174390739584862, "learning_rate": 7.384878124467289e-06, "loss": 0.4071, "step": 13030 }, { "epoch": 1.2228791291291292, "grad_norm": 0.8820009292925188, "learning_rate": 7.384398252461605e-06, "loss": 0.4301, "step": 13031 }, { "epoch": 1.222972972972973, "grad_norm": 0.8821127572552458, "learning_rate": 7.38391835202646e-06, "loss": 0.3816, "step": 13032 }, { "epoch": 1.2230668168168168, "grad_norm": 0.9996743276062583, "learning_rate": 7.383438423167576e-06, "loss": 0.3531, "step": 13033 }, { "epoch": 1.2231606606606606, "grad_norm": 0.8536101638596343, "learning_rate": 7.382958465890676e-06, "loss": 0.3921, "step": 13034 }, { "epoch": 1.2232545045045045, "grad_norm": 1.0137278077963763, "learning_rate": 7.38247848020148e-06, "loss": 0.4492, "step": 13035 }, { "epoch": 1.2233483483483483, "grad_norm": 0.9727520080474324, "learning_rate": 7.381998466105713e-06, "loss": 0.4132, "step": 13036 }, { "epoch": 1.2234421921921923, "grad_norm": 0.9038478566592694, "learning_rate": 7.3815184236091e-06, "loss": 0.3807, "step": 13037 }, { "epoch": 1.2235360360360361, "grad_norm": 0.9180779151818788, "learning_rate": 7.381038352717361e-06, "loss": 0.4042, "step": 13038 }, { "epoch": 1.22362987987988, "grad_norm": 1.0479659802338432, "learning_rate": 7.380558253436222e-06, "loss": 0.403, "step": 13039 }, { "epoch": 1.2237237237237237, "grad_norm": 0.936087204723574, "learning_rate": 7.380078125771406e-06, "loss": 0.403, "step": 13040 }, { "epoch": 1.2238175675675675, "grad_norm": 0.9219878162707816, "learning_rate": 7.379597969728639e-06, "loss": 0.4135, "step": 13041 }, { "epoch": 1.2239114114114114, "grad_norm": 0.8838453232414802, "learning_rate": 7.379117785313644e-06, "loss": 0.3395, "step": 13042 }, { "epoch": 1.2240052552552552, "grad_norm": 0.9977673744347907, "learning_rate": 7.3786375725321504e-06, "loss": 0.3872, "step": 13043 }, { "epoch": 1.224099099099099, "grad_norm": 0.9633900654232613, "learning_rate": 7.378157331389879e-06, "loss": 0.4353, "step": 13044 }, { "epoch": 1.224192942942943, "grad_norm": 1.0472111925522583, "learning_rate": 7.377677061892559e-06, "loss": 0.3757, "step": 13045 }, { "epoch": 1.2242867867867868, "grad_norm": 0.9630655119532728, "learning_rate": 7.377196764045914e-06, "loss": 0.3986, "step": 13046 }, { "epoch": 1.2243806306306306, "grad_norm": 0.8852199932793059, "learning_rate": 7.376716437855673e-06, "loss": 0.38, "step": 13047 }, { "epoch": 1.2244744744744744, "grad_norm": 0.9200098010523159, "learning_rate": 7.3762360833275615e-06, "loss": 0.4084, "step": 13048 }, { "epoch": 1.2245683183183182, "grad_norm": 0.9638544981830864, "learning_rate": 7.3757557004673085e-06, "loss": 0.3942, "step": 13049 }, { "epoch": 1.2246621621621623, "grad_norm": 0.961992917533021, "learning_rate": 7.3752752892806415e-06, "loss": 0.4117, "step": 13050 }, { "epoch": 1.224756006006006, "grad_norm": 1.06540904793313, "learning_rate": 7.3747948497732855e-06, "loss": 0.4204, "step": 13051 }, { "epoch": 1.22484984984985, "grad_norm": 0.9355955329679057, "learning_rate": 7.3743143819509735e-06, "loss": 0.3712, "step": 13052 }, { "epoch": 1.2249436936936937, "grad_norm": 15.261090014880352, "learning_rate": 7.3738338858194294e-06, "loss": 0.3542, "step": 13053 }, { "epoch": 1.2250375375375375, "grad_norm": 1.3977220364441345, "learning_rate": 7.373353361384385e-06, "loss": 0.4128, "step": 13054 }, { "epoch": 1.2251313813813813, "grad_norm": 0.9165805531980119, "learning_rate": 7.37287280865157e-06, "loss": 0.4538, "step": 13055 }, { "epoch": 1.2252252252252251, "grad_norm": 0.9543023606174761, "learning_rate": 7.3723922276267135e-06, "loss": 0.4425, "step": 13056 }, { "epoch": 1.225319069069069, "grad_norm": 1.8136929092862952, "learning_rate": 7.371911618315544e-06, "loss": 0.4003, "step": 13057 }, { "epoch": 1.225412912912913, "grad_norm": 0.8837550072800477, "learning_rate": 7.3714309807237935e-06, "loss": 0.3668, "step": 13058 }, { "epoch": 1.2255067567567568, "grad_norm": 1.3856256380986849, "learning_rate": 7.3709503148571925e-06, "loss": 0.39, "step": 13059 }, { "epoch": 1.2256006006006006, "grad_norm": 0.9217837237650269, "learning_rate": 7.370469620721471e-06, "loss": 0.4256, "step": 13060 }, { "epoch": 1.2256944444444444, "grad_norm": 0.9294936234256836, "learning_rate": 7.369988898322362e-06, "loss": 0.4385, "step": 13061 }, { "epoch": 1.2257882882882882, "grad_norm": 0.9625053257855785, "learning_rate": 7.369508147665596e-06, "loss": 0.3906, "step": 13062 }, { "epoch": 1.225882132132132, "grad_norm": 0.9949422999901443, "learning_rate": 7.369027368756907e-06, "loss": 0.3949, "step": 13063 }, { "epoch": 1.225975975975976, "grad_norm": 1.8351109886906602, "learning_rate": 7.368546561602026e-06, "loss": 0.4565, "step": 13064 }, { "epoch": 1.2260698198198199, "grad_norm": 0.9101776564225438, "learning_rate": 7.368065726206684e-06, "loss": 0.3904, "step": 13065 }, { "epoch": 1.2261636636636637, "grad_norm": 0.9284469279377909, "learning_rate": 7.367584862576614e-06, "loss": 0.3429, "step": 13066 }, { "epoch": 1.2262575075075075, "grad_norm": 0.8957780783626044, "learning_rate": 7.367103970717554e-06, "loss": 0.3922, "step": 13067 }, { "epoch": 1.2263513513513513, "grad_norm": 1.0120906013641044, "learning_rate": 7.3666230506352335e-06, "loss": 0.443, "step": 13068 }, { "epoch": 1.2264451951951951, "grad_norm": 0.933244777181331, "learning_rate": 7.366142102335389e-06, "loss": 0.3939, "step": 13069 }, { "epoch": 1.226539039039039, "grad_norm": 0.9814468136995468, "learning_rate": 7.365661125823753e-06, "loss": 0.427, "step": 13070 }, { "epoch": 1.226632882882883, "grad_norm": 0.9037062009403367, "learning_rate": 7.36518012110606e-06, "loss": 0.4533, "step": 13071 }, { "epoch": 1.2267267267267268, "grad_norm": 1.0676507558476378, "learning_rate": 7.3646990881880465e-06, "loss": 0.367, "step": 13072 }, { "epoch": 1.2268205705705706, "grad_norm": 0.9549119260931191, "learning_rate": 7.364218027075449e-06, "loss": 0.4135, "step": 13073 }, { "epoch": 1.2269144144144144, "grad_norm": 0.8280972219025492, "learning_rate": 7.363736937774e-06, "loss": 0.4201, "step": 13074 }, { "epoch": 1.2270082582582582, "grad_norm": 0.9303061102797414, "learning_rate": 7.363255820289438e-06, "loss": 0.4107, "step": 13075 }, { "epoch": 1.227102102102102, "grad_norm": 0.8511242123049848, "learning_rate": 7.3627746746275e-06, "loss": 0.392, "step": 13076 }, { "epoch": 1.227195945945946, "grad_norm": 0.8469667703240098, "learning_rate": 7.362293500793919e-06, "loss": 0.3922, "step": 13077 }, { "epoch": 1.2272897897897899, "grad_norm": 0.8414110845851701, "learning_rate": 7.3618122987944355e-06, "loss": 0.3701, "step": 13078 }, { "epoch": 1.2273836336336337, "grad_norm": 0.9273122642307563, "learning_rate": 7.361331068634786e-06, "loss": 0.393, "step": 13079 }, { "epoch": 1.2274774774774775, "grad_norm": 1.003402152963911, "learning_rate": 7.360849810320707e-06, "loss": 0.3928, "step": 13080 }, { "epoch": 1.2275713213213213, "grad_norm": 0.9247594023847647, "learning_rate": 7.36036852385794e-06, "loss": 0.4025, "step": 13081 }, { "epoch": 1.227665165165165, "grad_norm": 0.9311903014231598, "learning_rate": 7.359887209252221e-06, "loss": 0.3964, "step": 13082 }, { "epoch": 1.227759009009009, "grad_norm": 1.113619578753903, "learning_rate": 7.359405866509288e-06, "loss": 0.4045, "step": 13083 }, { "epoch": 1.227852852852853, "grad_norm": 0.890446491493524, "learning_rate": 7.358924495634881e-06, "loss": 0.3743, "step": 13084 }, { "epoch": 1.2279466966966968, "grad_norm": 1.0905574603585035, "learning_rate": 7.358443096634742e-06, "loss": 0.4029, "step": 13085 }, { "epoch": 1.2280405405405406, "grad_norm": 0.9561348187802174, "learning_rate": 7.357961669514606e-06, "loss": 0.3751, "step": 13086 }, { "epoch": 1.2281343843843844, "grad_norm": 0.9052330238864691, "learning_rate": 7.357480214280217e-06, "loss": 0.3931, "step": 13087 }, { "epoch": 1.2282282282282282, "grad_norm": 1.0488468629301342, "learning_rate": 7.356998730937313e-06, "loss": 0.457, "step": 13088 }, { "epoch": 1.228322072072072, "grad_norm": 1.823217380621613, "learning_rate": 7.356517219491635e-06, "loss": 0.4169, "step": 13089 }, { "epoch": 1.228415915915916, "grad_norm": 0.8900434932596708, "learning_rate": 7.356035679948927e-06, "loss": 0.4312, "step": 13090 }, { "epoch": 1.2285097597597598, "grad_norm": 0.9964276398903529, "learning_rate": 7.355554112314927e-06, "loss": 0.3988, "step": 13091 }, { "epoch": 1.2286036036036037, "grad_norm": 0.9183367634430404, "learning_rate": 7.355072516595378e-06, "loss": 0.4251, "step": 13092 }, { "epoch": 1.2286974474474475, "grad_norm": 1.1709682764893294, "learning_rate": 7.3545908927960215e-06, "loss": 0.3973, "step": 13093 }, { "epoch": 1.2287912912912913, "grad_norm": 0.9637080981874552, "learning_rate": 7.354109240922602e-06, "loss": 0.4264, "step": 13094 }, { "epoch": 1.228885135135135, "grad_norm": 0.9949300604410846, "learning_rate": 7.35362756098086e-06, "loss": 0.4044, "step": 13095 }, { "epoch": 1.228978978978979, "grad_norm": 1.0884590686415816, "learning_rate": 7.35314585297654e-06, "loss": 0.4677, "step": 13096 }, { "epoch": 1.2290728228228227, "grad_norm": 1.0204776409442384, "learning_rate": 7.352664116915386e-06, "loss": 0.3541, "step": 13097 }, { "epoch": 1.2291666666666667, "grad_norm": 0.8741057494018305, "learning_rate": 7.352182352803139e-06, "loss": 0.3916, "step": 13098 }, { "epoch": 1.2292605105105106, "grad_norm": 0.9640457486697426, "learning_rate": 7.3517005606455464e-06, "loss": 0.4134, "step": 13099 }, { "epoch": 1.2293543543543544, "grad_norm": 1.123431183157714, "learning_rate": 7.351218740448351e-06, "loss": 0.4363, "step": 13100 }, { "epoch": 1.2294481981981982, "grad_norm": 0.8848475521796356, "learning_rate": 7.350736892217297e-06, "loss": 0.3923, "step": 13101 }, { "epoch": 1.229542042042042, "grad_norm": 1.1280152144995839, "learning_rate": 7.350255015958131e-06, "loss": 0.4467, "step": 13102 }, { "epoch": 1.2296358858858858, "grad_norm": 0.9589381372839733, "learning_rate": 7.349773111676598e-06, "loss": 0.4191, "step": 13103 }, { "epoch": 1.2297297297297298, "grad_norm": 0.9772558171329001, "learning_rate": 7.349291179378443e-06, "loss": 0.3701, "step": 13104 }, { "epoch": 1.2298235735735736, "grad_norm": 1.1037474524299358, "learning_rate": 7.348809219069414e-06, "loss": 0.4254, "step": 13105 }, { "epoch": 1.2299174174174174, "grad_norm": 0.8251596088536515, "learning_rate": 7.3483272307552556e-06, "loss": 0.3415, "step": 13106 }, { "epoch": 1.2300112612612613, "grad_norm": 1.017506812304659, "learning_rate": 7.347845214441714e-06, "loss": 0.4267, "step": 13107 }, { "epoch": 1.230105105105105, "grad_norm": 1.4116143558105585, "learning_rate": 7.347363170134539e-06, "loss": 0.4145, "step": 13108 }, { "epoch": 1.2301989489489489, "grad_norm": 1.0311768677993212, "learning_rate": 7.346881097839477e-06, "loss": 0.4264, "step": 13109 }, { "epoch": 1.2302927927927927, "grad_norm": 0.9726222236470085, "learning_rate": 7.346398997562274e-06, "loss": 0.3871, "step": 13110 }, { "epoch": 1.2303866366366367, "grad_norm": 0.8630326933349627, "learning_rate": 7.34591686930868e-06, "loss": 0.3876, "step": 13111 }, { "epoch": 1.2304804804804805, "grad_norm": 0.8097071489799174, "learning_rate": 7.345434713084445e-06, "loss": 0.4443, "step": 13112 }, { "epoch": 1.2305743243243243, "grad_norm": 0.9112904640873931, "learning_rate": 7.344952528895313e-06, "loss": 0.389, "step": 13113 }, { "epoch": 1.2306681681681682, "grad_norm": 0.9512605349822382, "learning_rate": 7.3444703167470365e-06, "loss": 0.3798, "step": 13114 }, { "epoch": 1.230762012012012, "grad_norm": 1.0626266636975403, "learning_rate": 7.343988076645366e-06, "loss": 0.4239, "step": 13115 }, { "epoch": 1.2308558558558558, "grad_norm": 0.9541552673042757, "learning_rate": 7.343505808596049e-06, "loss": 0.4187, "step": 13116 }, { "epoch": 1.2309496996996998, "grad_norm": 1.1108127826186873, "learning_rate": 7.343023512604837e-06, "loss": 0.4111, "step": 13117 }, { "epoch": 1.2310435435435436, "grad_norm": 0.9906738426627087, "learning_rate": 7.34254118867748e-06, "loss": 0.384, "step": 13118 }, { "epoch": 1.2311373873873874, "grad_norm": 2.0010744226083905, "learning_rate": 7.342058836819728e-06, "loss": 0.4551, "step": 13119 }, { "epoch": 1.2312312312312312, "grad_norm": 1.1097535876553422, "learning_rate": 7.341576457037332e-06, "loss": 0.4255, "step": 13120 }, { "epoch": 1.231325075075075, "grad_norm": 0.8972503491204206, "learning_rate": 7.341094049336046e-06, "loss": 0.3685, "step": 13121 }, { "epoch": 1.2314189189189189, "grad_norm": 0.9227158351183596, "learning_rate": 7.340611613721619e-06, "loss": 0.3874, "step": 13122 }, { "epoch": 1.2315127627627627, "grad_norm": 0.9470794408955863, "learning_rate": 7.340129150199804e-06, "loss": 0.4152, "step": 13123 }, { "epoch": 1.2316066066066067, "grad_norm": 1.0502618281417384, "learning_rate": 7.3396466587763545e-06, "loss": 0.3958, "step": 13124 }, { "epoch": 1.2317004504504505, "grad_norm": 1.2534064758851662, "learning_rate": 7.339164139457022e-06, "loss": 0.435, "step": 13125 }, { "epoch": 1.2317942942942943, "grad_norm": 0.9235712301135677, "learning_rate": 7.3386815922475605e-06, "loss": 0.3785, "step": 13126 }, { "epoch": 1.2318881381381381, "grad_norm": 0.9480926962205853, "learning_rate": 7.338199017153724e-06, "loss": 0.3812, "step": 13127 }, { "epoch": 1.231981981981982, "grad_norm": 0.8671129483132685, "learning_rate": 7.337716414181264e-06, "loss": 0.404, "step": 13128 }, { "epoch": 1.2320758258258258, "grad_norm": 0.9268230752602205, "learning_rate": 7.337233783335937e-06, "loss": 0.4143, "step": 13129 }, { "epoch": 1.2321696696696698, "grad_norm": 1.1810677389135569, "learning_rate": 7.336751124623496e-06, "loss": 0.4247, "step": 13130 }, { "epoch": 1.2322635135135136, "grad_norm": 1.0011045437821484, "learning_rate": 7.3362684380496976e-06, "loss": 0.4231, "step": 13131 }, { "epoch": 1.2323573573573574, "grad_norm": 1.3759536508297392, "learning_rate": 7.335785723620293e-06, "loss": 0.4239, "step": 13132 }, { "epoch": 1.2324512012012012, "grad_norm": 0.926021612912234, "learning_rate": 7.335302981341042e-06, "loss": 0.378, "step": 13133 }, { "epoch": 1.232545045045045, "grad_norm": 0.9133936980172495, "learning_rate": 7.3348202112176995e-06, "loss": 0.4058, "step": 13134 }, { "epoch": 1.2326388888888888, "grad_norm": 0.8566405827792368, "learning_rate": 7.3343374132560186e-06, "loss": 0.3687, "step": 13135 }, { "epoch": 1.2327327327327327, "grad_norm": 0.9186261533336215, "learning_rate": 7.33385458746176e-06, "loss": 0.4057, "step": 13136 }, { "epoch": 1.2328265765765765, "grad_norm": 1.182281194335571, "learning_rate": 7.333371733840678e-06, "loss": 0.4178, "step": 13137 }, { "epoch": 1.2329204204204205, "grad_norm": 1.8966184445988032, "learning_rate": 7.332888852398529e-06, "loss": 0.4412, "step": 13138 }, { "epoch": 1.2330142642642643, "grad_norm": 1.0286609197341323, "learning_rate": 7.3324059431410746e-06, "loss": 0.4353, "step": 13139 }, { "epoch": 1.2331081081081081, "grad_norm": 0.8579759372272399, "learning_rate": 7.331923006074066e-06, "loss": 0.4038, "step": 13140 }, { "epoch": 1.233201951951952, "grad_norm": 0.9555136961471324, "learning_rate": 7.331440041203266e-06, "loss": 0.3577, "step": 13141 }, { "epoch": 1.2332957957957957, "grad_norm": 1.1613095183157633, "learning_rate": 7.3309570485344336e-06, "loss": 0.4909, "step": 13142 }, { "epoch": 1.2333896396396395, "grad_norm": 0.9097069281422484, "learning_rate": 7.330474028073326e-06, "loss": 0.4374, "step": 13143 }, { "epoch": 1.2334834834834836, "grad_norm": 0.9422945662020664, "learning_rate": 7.329990979825701e-06, "loss": 0.4452, "step": 13144 }, { "epoch": 1.2335773273273274, "grad_norm": 2.6567664813617857, "learning_rate": 7.329507903797319e-06, "loss": 0.4236, "step": 13145 }, { "epoch": 1.2336711711711712, "grad_norm": 0.9252747998129766, "learning_rate": 7.32902479999394e-06, "loss": 0.4487, "step": 13146 }, { "epoch": 1.233765015015015, "grad_norm": 0.8501287744372664, "learning_rate": 7.328541668421325e-06, "loss": 0.4102, "step": 13147 }, { "epoch": 1.2338588588588588, "grad_norm": 0.9673664321122425, "learning_rate": 7.328058509085234e-06, "loss": 0.374, "step": 13148 }, { "epoch": 1.2339527027027026, "grad_norm": 1.002979141089552, "learning_rate": 7.327575321991427e-06, "loss": 0.3838, "step": 13149 }, { "epoch": 1.2340465465465464, "grad_norm": 0.9438849726559835, "learning_rate": 7.327092107145665e-06, "loss": 0.3908, "step": 13150 }, { "epoch": 1.2341403903903905, "grad_norm": 0.9495247398195042, "learning_rate": 7.32660886455371e-06, "loss": 0.4482, "step": 13151 }, { "epoch": 1.2342342342342343, "grad_norm": 0.9348669795728111, "learning_rate": 7.326125594221325e-06, "loss": 0.4113, "step": 13152 }, { "epoch": 1.234328078078078, "grad_norm": 1.134633318114307, "learning_rate": 7.325642296154268e-06, "loss": 0.4281, "step": 13153 }, { "epoch": 1.234421921921922, "grad_norm": 0.9823544876026262, "learning_rate": 7.3251589703583074e-06, "loss": 0.3963, "step": 13154 }, { "epoch": 1.2345157657657657, "grad_norm": 0.8328981950117174, "learning_rate": 7.3246756168392004e-06, "loss": 0.3954, "step": 13155 }, { "epoch": 1.2346096096096095, "grad_norm": 0.8196224308728314, "learning_rate": 7.324192235602714e-06, "loss": 0.4237, "step": 13156 }, { "epoch": 1.2347034534534536, "grad_norm": 1.2469128804073875, "learning_rate": 7.323708826654609e-06, "loss": 0.407, "step": 13157 }, { "epoch": 1.2347972972972974, "grad_norm": 1.0019462855661108, "learning_rate": 7.32322539000065e-06, "loss": 0.3923, "step": 13158 }, { "epoch": 1.2348911411411412, "grad_norm": 0.9405543836434079, "learning_rate": 7.322741925646602e-06, "loss": 0.4024, "step": 13159 }, { "epoch": 1.234984984984985, "grad_norm": 0.8959355203681203, "learning_rate": 7.3222584335982285e-06, "loss": 0.3741, "step": 13160 }, { "epoch": 1.2350788288288288, "grad_norm": 0.9252237621376451, "learning_rate": 7.321774913861295e-06, "loss": 0.4294, "step": 13161 }, { "epoch": 1.2351726726726726, "grad_norm": 1.2498202030935412, "learning_rate": 7.321291366441565e-06, "loss": 0.4316, "step": 13162 }, { "epoch": 1.2352665165165164, "grad_norm": 0.9866522044232716, "learning_rate": 7.320807791344805e-06, "loss": 0.3631, "step": 13163 }, { "epoch": 1.2353603603603605, "grad_norm": 1.4532390818015735, "learning_rate": 7.32032418857678e-06, "loss": 0.3844, "step": 13164 }, { "epoch": 1.2354542042042043, "grad_norm": 1.5148629131963884, "learning_rate": 7.3198405581432576e-06, "loss": 0.3602, "step": 13165 }, { "epoch": 1.235548048048048, "grad_norm": 1.5019676167297291, "learning_rate": 7.3193569000500034e-06, "loss": 0.4062, "step": 13166 }, { "epoch": 1.2356418918918919, "grad_norm": 0.9987272118768619, "learning_rate": 7.318873214302784e-06, "loss": 0.3783, "step": 13167 }, { "epoch": 1.2357357357357357, "grad_norm": 0.9053439202065215, "learning_rate": 7.318389500907364e-06, "loss": 0.3932, "step": 13168 }, { "epoch": 1.2358295795795795, "grad_norm": 0.8747757263512315, "learning_rate": 7.317905759869516e-06, "loss": 0.3774, "step": 13169 }, { "epoch": 1.2359234234234235, "grad_norm": 1.1803365717257344, "learning_rate": 7.3174219911950025e-06, "loss": 0.3993, "step": 13170 }, { "epoch": 1.2360172672672673, "grad_norm": 0.991846578511554, "learning_rate": 7.316938194889596e-06, "loss": 0.4319, "step": 13171 }, { "epoch": 1.2361111111111112, "grad_norm": 1.0015319145009487, "learning_rate": 7.316454370959061e-06, "loss": 0.3982, "step": 13172 }, { "epoch": 1.236204954954955, "grad_norm": 0.8991632330200936, "learning_rate": 7.315970519409169e-06, "loss": 0.4121, "step": 13173 }, { "epoch": 1.2362987987987988, "grad_norm": 1.4146823690015764, "learning_rate": 7.315486640245687e-06, "loss": 0.4002, "step": 13174 }, { "epoch": 1.2363926426426426, "grad_norm": 0.9983061482024038, "learning_rate": 7.315002733474387e-06, "loss": 0.4039, "step": 13175 }, { "epoch": 1.2364864864864864, "grad_norm": 1.016291933701111, "learning_rate": 7.314518799101035e-06, "loss": 0.3936, "step": 13176 }, { "epoch": 1.2365803303303302, "grad_norm": 1.3364508992953474, "learning_rate": 7.314034837131403e-06, "loss": 0.4383, "step": 13177 }, { "epoch": 1.2366741741741742, "grad_norm": 0.9494878359596445, "learning_rate": 7.313550847571263e-06, "loss": 0.3931, "step": 13178 }, { "epoch": 1.236768018018018, "grad_norm": 0.928118211532983, "learning_rate": 7.313066830426383e-06, "loss": 0.4153, "step": 13179 }, { "epoch": 1.2368618618618619, "grad_norm": 1.007452654924597, "learning_rate": 7.312582785702535e-06, "loss": 0.406, "step": 13180 }, { "epoch": 1.2369557057057057, "grad_norm": 1.456256073437357, "learning_rate": 7.31209871340549e-06, "loss": 0.3905, "step": 13181 }, { "epoch": 1.2370495495495495, "grad_norm": 0.9079599579275459, "learning_rate": 7.311614613541019e-06, "loss": 0.4046, "step": 13182 }, { "epoch": 1.2371433933933933, "grad_norm": 1.3229894148774421, "learning_rate": 7.311130486114896e-06, "loss": 0.4063, "step": 13183 }, { "epoch": 1.2372372372372373, "grad_norm": 0.8372162126189413, "learning_rate": 7.310646331132893e-06, "loss": 0.3705, "step": 13184 }, { "epoch": 1.2373310810810811, "grad_norm": 0.7999735681861928, "learning_rate": 7.31016214860078e-06, "loss": 0.3892, "step": 13185 }, { "epoch": 1.237424924924925, "grad_norm": 0.9095283718285966, "learning_rate": 7.309677938524334e-06, "loss": 0.4247, "step": 13186 }, { "epoch": 1.2375187687687688, "grad_norm": 4.438596428859991, "learning_rate": 7.309193700909325e-06, "loss": 0.4404, "step": 13187 }, { "epoch": 1.2376126126126126, "grad_norm": 0.9139011514251346, "learning_rate": 7.308709435761527e-06, "loss": 0.3805, "step": 13188 }, { "epoch": 1.2377064564564564, "grad_norm": 0.9655343329457591, "learning_rate": 7.308225143086716e-06, "loss": 0.3995, "step": 13189 }, { "epoch": 1.2378003003003002, "grad_norm": 0.9628048531560973, "learning_rate": 7.307740822890664e-06, "loss": 0.43, "step": 13190 }, { "epoch": 1.2378941441441442, "grad_norm": 1.0590688119530256, "learning_rate": 7.307256475179147e-06, "loss": 0.4205, "step": 13191 }, { "epoch": 1.237987987987988, "grad_norm": 0.9816827019535387, "learning_rate": 7.30677209995794e-06, "loss": 0.4047, "step": 13192 }, { "epoch": 1.2380818318318318, "grad_norm": 0.8717518002816658, "learning_rate": 7.306287697232817e-06, "loss": 0.378, "step": 13193 }, { "epoch": 1.2381756756756757, "grad_norm": 0.8064942937259988, "learning_rate": 7.305803267009553e-06, "loss": 0.3705, "step": 13194 }, { "epoch": 1.2382695195195195, "grad_norm": 0.9198645168011216, "learning_rate": 7.3053188092939265e-06, "loss": 0.4114, "step": 13195 }, { "epoch": 1.2383633633633633, "grad_norm": 0.9519631211941659, "learning_rate": 7.3048343240917145e-06, "loss": 0.4387, "step": 13196 }, { "epoch": 1.2384572072072073, "grad_norm": 1.1250014657388694, "learning_rate": 7.304349811408688e-06, "loss": 0.3587, "step": 13197 }, { "epoch": 1.2385510510510511, "grad_norm": 1.0773815356840988, "learning_rate": 7.303865271250631e-06, "loss": 0.4476, "step": 13198 }, { "epoch": 1.238644894894895, "grad_norm": 0.9706490542319963, "learning_rate": 7.303380703623315e-06, "loss": 0.391, "step": 13199 }, { "epoch": 1.2387387387387387, "grad_norm": 1.2621264481005192, "learning_rate": 7.30289610853252e-06, "loss": 0.3792, "step": 13200 }, { "epoch": 1.2388325825825826, "grad_norm": 1.0606456662576795, "learning_rate": 7.302411485984024e-06, "loss": 0.4253, "step": 13201 }, { "epoch": 1.2389264264264264, "grad_norm": 1.0123924152806292, "learning_rate": 7.301926835983606e-06, "loss": 0.3857, "step": 13202 }, { "epoch": 1.2390202702702702, "grad_norm": 1.1885745870935867, "learning_rate": 7.301442158537041e-06, "loss": 0.4004, "step": 13203 }, { "epoch": 1.2391141141141142, "grad_norm": 1.1378182373998253, "learning_rate": 7.300957453650113e-06, "loss": 0.4464, "step": 13204 }, { "epoch": 1.239207957957958, "grad_norm": 1.2242762844008186, "learning_rate": 7.3004727213285965e-06, "loss": 0.4046, "step": 13205 }, { "epoch": 1.2393018018018018, "grad_norm": 0.8163713762553688, "learning_rate": 7.299987961578273e-06, "loss": 0.4038, "step": 13206 }, { "epoch": 1.2393956456456456, "grad_norm": 0.905442627552891, "learning_rate": 7.2995031744049224e-06, "loss": 0.3858, "step": 13207 }, { "epoch": 1.2394894894894894, "grad_norm": 0.9951731145628923, "learning_rate": 7.299018359814326e-06, "loss": 0.4156, "step": 13208 }, { "epoch": 1.2395833333333333, "grad_norm": 0.9587842321044707, "learning_rate": 7.298533517812261e-06, "loss": 0.3806, "step": 13209 }, { "epoch": 1.2396771771771773, "grad_norm": 0.9317768855810235, "learning_rate": 7.298048648404513e-06, "loss": 0.4082, "step": 13210 }, { "epoch": 1.239771021021021, "grad_norm": 0.8019742824794204, "learning_rate": 7.29756375159686e-06, "loss": 0.4262, "step": 13211 }, { "epoch": 1.239864864864865, "grad_norm": 2.0808897490444442, "learning_rate": 7.297078827395084e-06, "loss": 0.4183, "step": 13212 }, { "epoch": 1.2399587087087087, "grad_norm": 1.1674200752594277, "learning_rate": 7.296593875804965e-06, "loss": 0.3706, "step": 13213 }, { "epoch": 1.2400525525525525, "grad_norm": 1.2639690701208033, "learning_rate": 7.296108896832288e-06, "loss": 0.3829, "step": 13214 }, { "epoch": 1.2401463963963963, "grad_norm": 0.8999879644346216, "learning_rate": 7.2956238904828335e-06, "loss": 0.4118, "step": 13215 }, { "epoch": 1.2402402402402402, "grad_norm": 0.9929974329283656, "learning_rate": 7.2951388567623874e-06, "loss": 0.4088, "step": 13216 }, { "epoch": 1.240334084084084, "grad_norm": 1.0386448821912846, "learning_rate": 7.294653795676729e-06, "loss": 0.3697, "step": 13217 }, { "epoch": 1.240427927927928, "grad_norm": 1.364742619481149, "learning_rate": 7.294168707231643e-06, "loss": 0.4333, "step": 13218 }, { "epoch": 1.2405217717717718, "grad_norm": 1.0068193084694113, "learning_rate": 7.293683591432914e-06, "loss": 0.4215, "step": 13219 }, { "epoch": 1.2406156156156156, "grad_norm": 1.0110971183795228, "learning_rate": 7.293198448286325e-06, "loss": 0.4146, "step": 13220 }, { "epoch": 1.2407094594594594, "grad_norm": 1.5443909267661287, "learning_rate": 7.292713277797661e-06, "loss": 0.3964, "step": 13221 }, { "epoch": 1.2408033033033032, "grad_norm": 0.9179616390139074, "learning_rate": 7.292228079972708e-06, "loss": 0.4207, "step": 13222 }, { "epoch": 1.240897147147147, "grad_norm": 0.9774893966210586, "learning_rate": 7.291742854817249e-06, "loss": 0.3895, "step": 13223 }, { "epoch": 1.240990990990991, "grad_norm": 0.7973337484271169, "learning_rate": 7.291257602337071e-06, "loss": 0.3488, "step": 13224 }, { "epoch": 1.241084834834835, "grad_norm": 0.9292624970029231, "learning_rate": 7.290772322537958e-06, "loss": 0.4266, "step": 13225 }, { "epoch": 1.2411786786786787, "grad_norm": 2.215672840451863, "learning_rate": 7.290287015425698e-06, "loss": 0.399, "step": 13226 }, { "epoch": 1.2412725225225225, "grad_norm": 1.0256666484736672, "learning_rate": 7.289801681006075e-06, "loss": 0.3922, "step": 13227 }, { "epoch": 1.2413663663663663, "grad_norm": 0.9203892018333827, "learning_rate": 7.289316319284878e-06, "loss": 0.3898, "step": 13228 }, { "epoch": 1.2414602102102101, "grad_norm": 0.8911356189663112, "learning_rate": 7.288830930267894e-06, "loss": 0.432, "step": 13229 }, { "epoch": 1.241554054054054, "grad_norm": 0.826523470042552, "learning_rate": 7.288345513960907e-06, "loss": 0.3697, "step": 13230 }, { "epoch": 1.241647897897898, "grad_norm": 0.9633528312671072, "learning_rate": 7.287860070369707e-06, "loss": 0.3975, "step": 13231 }, { "epoch": 1.2417417417417418, "grad_norm": 0.9280661371320279, "learning_rate": 7.2873745995000855e-06, "loss": 0.4463, "step": 13232 }, { "epoch": 1.2418355855855856, "grad_norm": 0.8909587059711289, "learning_rate": 7.286889101357825e-06, "loss": 0.4064, "step": 13233 }, { "epoch": 1.2419294294294294, "grad_norm": 1.079429476455104, "learning_rate": 7.286403575948717e-06, "loss": 0.4326, "step": 13234 }, { "epoch": 1.2420232732732732, "grad_norm": 1.3074246360423745, "learning_rate": 7.285918023278551e-06, "loss": 0.4318, "step": 13235 }, { "epoch": 1.242117117117117, "grad_norm": 0.9980791620251299, "learning_rate": 7.285432443353114e-06, "loss": 0.3764, "step": 13236 }, { "epoch": 1.242210960960961, "grad_norm": 0.9569536484832489, "learning_rate": 7.2849468361781975e-06, "loss": 0.429, "step": 13237 }, { "epoch": 1.2423048048048049, "grad_norm": 0.9222804915466102, "learning_rate": 7.284461201759591e-06, "loss": 0.3973, "step": 13238 }, { "epoch": 1.2423986486486487, "grad_norm": 0.7915743469470197, "learning_rate": 7.283975540103085e-06, "loss": 0.4156, "step": 13239 }, { "epoch": 1.2424924924924925, "grad_norm": 0.9092371943383176, "learning_rate": 7.283489851214469e-06, "loss": 0.3519, "step": 13240 }, { "epoch": 1.2425863363363363, "grad_norm": 0.872484746388259, "learning_rate": 7.283004135099536e-06, "loss": 0.429, "step": 13241 }, { "epoch": 1.2426801801801801, "grad_norm": 1.1198170638425198, "learning_rate": 7.282518391764074e-06, "loss": 0.4449, "step": 13242 }, { "epoch": 1.242774024024024, "grad_norm": 0.9037003453241045, "learning_rate": 7.282032621213878e-06, "loss": 0.401, "step": 13243 }, { "epoch": 1.242867867867868, "grad_norm": 0.9415540645478417, "learning_rate": 7.281546823454738e-06, "loss": 0.3663, "step": 13244 }, { "epoch": 1.2429617117117118, "grad_norm": 1.047453455381771, "learning_rate": 7.281060998492448e-06, "loss": 0.4349, "step": 13245 }, { "epoch": 1.2430555555555556, "grad_norm": 1.4963456860387034, "learning_rate": 7.280575146332796e-06, "loss": 0.3866, "step": 13246 }, { "epoch": 1.2431493993993994, "grad_norm": 0.9990493918338733, "learning_rate": 7.280089266981582e-06, "loss": 0.3992, "step": 13247 }, { "epoch": 1.2432432432432432, "grad_norm": 0.9219942974261482, "learning_rate": 7.279603360444594e-06, "loss": 0.4271, "step": 13248 }, { "epoch": 1.243337087087087, "grad_norm": 0.9506498782523736, "learning_rate": 7.279117426727626e-06, "loss": 0.396, "step": 13249 }, { "epoch": 1.243430930930931, "grad_norm": 0.94230676543034, "learning_rate": 7.278631465836473e-06, "loss": 0.3805, "step": 13250 }, { "epoch": 1.2435247747747749, "grad_norm": 1.030529133203939, "learning_rate": 7.278145477776929e-06, "loss": 0.3762, "step": 13251 }, { "epoch": 1.2436186186186187, "grad_norm": 0.9033330955667045, "learning_rate": 7.277659462554788e-06, "loss": 0.3761, "step": 13252 }, { "epoch": 1.2437124624624625, "grad_norm": 0.9028234201264729, "learning_rate": 7.277173420175846e-06, "loss": 0.4406, "step": 13253 }, { "epoch": 1.2438063063063063, "grad_norm": 0.8450797378312411, "learning_rate": 7.276687350645897e-06, "loss": 0.3965, "step": 13254 }, { "epoch": 1.24390015015015, "grad_norm": 0.9003240694024595, "learning_rate": 7.276201253970736e-06, "loss": 0.3893, "step": 13255 }, { "epoch": 1.243993993993994, "grad_norm": 0.9839725411551016, "learning_rate": 7.275715130156162e-06, "loss": 0.4171, "step": 13256 }, { "epoch": 1.2440878378378377, "grad_norm": 0.8389277531102787, "learning_rate": 7.275228979207966e-06, "loss": 0.4368, "step": 13257 }, { "epoch": 1.2441816816816818, "grad_norm": 0.8765077834081251, "learning_rate": 7.274742801131946e-06, "loss": 0.4503, "step": 13258 }, { "epoch": 1.2442755255255256, "grad_norm": 2.356086115726995, "learning_rate": 7.274256595933904e-06, "loss": 0.4044, "step": 13259 }, { "epoch": 1.2443693693693694, "grad_norm": 0.8373420130299049, "learning_rate": 7.27377036361963e-06, "loss": 0.4252, "step": 13260 }, { "epoch": 1.2444632132132132, "grad_norm": 0.9781442442554031, "learning_rate": 7.273284104194925e-06, "loss": 0.4371, "step": 13261 }, { "epoch": 1.244557057057057, "grad_norm": 0.9392438964931678, "learning_rate": 7.272797817665587e-06, "loss": 0.3902, "step": 13262 }, { "epoch": 1.244650900900901, "grad_norm": 0.905555525888417, "learning_rate": 7.272311504037412e-06, "loss": 0.3773, "step": 13263 }, { "epoch": 1.2447447447447448, "grad_norm": 1.025433512938086, "learning_rate": 7.271825163316199e-06, "loss": 0.4406, "step": 13264 }, { "epoch": 1.2448385885885886, "grad_norm": 1.0500914847585736, "learning_rate": 7.271338795507748e-06, "loss": 0.429, "step": 13265 }, { "epoch": 1.2449324324324325, "grad_norm": 0.9251132817656155, "learning_rate": 7.270852400617858e-06, "loss": 0.3973, "step": 13266 }, { "epoch": 1.2450262762762763, "grad_norm": 1.0059814314421927, "learning_rate": 7.270365978652326e-06, "loss": 0.4299, "step": 13267 }, { "epoch": 1.24512012012012, "grad_norm": 1.1561154510810145, "learning_rate": 7.2698795296169545e-06, "loss": 0.4281, "step": 13268 }, { "epoch": 1.2452139639639639, "grad_norm": 1.0047777192289804, "learning_rate": 7.269393053517542e-06, "loss": 0.3997, "step": 13269 }, { "epoch": 1.2453078078078077, "grad_norm": 0.9368328849972694, "learning_rate": 7.268906550359888e-06, "loss": 0.4087, "step": 13270 }, { "epoch": 1.2454016516516517, "grad_norm": 1.2793846421941122, "learning_rate": 7.268420020149796e-06, "loss": 0.431, "step": 13271 }, { "epoch": 1.2454954954954955, "grad_norm": 0.86613890556674, "learning_rate": 7.267933462893064e-06, "loss": 0.3299, "step": 13272 }, { "epoch": 1.2455893393393394, "grad_norm": 1.1019224720281924, "learning_rate": 7.267446878595495e-06, "loss": 0.4043, "step": 13273 }, { "epoch": 1.2456831831831832, "grad_norm": 0.9323987258923292, "learning_rate": 7.266960267262888e-06, "loss": 0.3749, "step": 13274 }, { "epoch": 1.245777027027027, "grad_norm": 1.0615239645800711, "learning_rate": 7.2664736289010485e-06, "loss": 0.3745, "step": 13275 }, { "epoch": 1.2458708708708708, "grad_norm": 0.8984089574475959, "learning_rate": 7.265986963515777e-06, "loss": 0.4461, "step": 13276 }, { "epoch": 1.2459647147147148, "grad_norm": 1.06662380257679, "learning_rate": 7.2655002711128764e-06, "loss": 0.398, "step": 13277 }, { "epoch": 1.2460585585585586, "grad_norm": 1.236040743479191, "learning_rate": 7.26501355169815e-06, "loss": 0.3364, "step": 13278 }, { "epoch": 1.2461524024024024, "grad_norm": 0.963596404822075, "learning_rate": 7.2645268052773984e-06, "loss": 0.4104, "step": 13279 }, { "epoch": 1.2462462462462462, "grad_norm": 1.1159445279578402, "learning_rate": 7.264040031856429e-06, "loss": 0.4567, "step": 13280 }, { "epoch": 1.24634009009009, "grad_norm": 1.5656424934655095, "learning_rate": 7.263553231441044e-06, "loss": 0.4019, "step": 13281 }, { "epoch": 1.2464339339339339, "grad_norm": 1.1141865757060943, "learning_rate": 7.263066404037047e-06, "loss": 0.3891, "step": 13282 }, { "epoch": 1.2465277777777777, "grad_norm": 1.1276751529877325, "learning_rate": 7.262579549650243e-06, "loss": 0.4155, "step": 13283 }, { "epoch": 1.2466216216216217, "grad_norm": 0.9709462968547001, "learning_rate": 7.2620926682864365e-06, "loss": 0.3725, "step": 13284 }, { "epoch": 1.2467154654654655, "grad_norm": 1.113777053723828, "learning_rate": 7.261605759951433e-06, "loss": 0.4442, "step": 13285 }, { "epoch": 1.2468093093093093, "grad_norm": 0.9107888948603804, "learning_rate": 7.261118824651037e-06, "loss": 0.3708, "step": 13286 }, { "epoch": 1.2469031531531531, "grad_norm": 0.948526425680422, "learning_rate": 7.260631862391056e-06, "loss": 0.4024, "step": 13287 }, { "epoch": 1.246996996996997, "grad_norm": 1.1976555553725778, "learning_rate": 7.260144873177295e-06, "loss": 0.4387, "step": 13288 }, { "epoch": 1.2470908408408408, "grad_norm": 0.9615179337652777, "learning_rate": 7.259657857015561e-06, "loss": 0.3919, "step": 13289 }, { "epoch": 1.2471846846846848, "grad_norm": 0.9923290662204631, "learning_rate": 7.2591708139116605e-06, "loss": 0.4208, "step": 13290 }, { "epoch": 1.2472785285285286, "grad_norm": 1.1540429798937066, "learning_rate": 7.2586837438714e-06, "loss": 0.4102, "step": 13291 }, { "epoch": 1.2473723723723724, "grad_norm": 0.9513177904464055, "learning_rate": 7.258196646900586e-06, "loss": 0.3633, "step": 13292 }, { "epoch": 1.2474662162162162, "grad_norm": 1.129089361097782, "learning_rate": 7.25770952300503e-06, "loss": 0.4075, "step": 13293 }, { "epoch": 1.24756006006006, "grad_norm": 0.9042654400162743, "learning_rate": 7.257222372190537e-06, "loss": 0.4097, "step": 13294 }, { "epoch": 1.2476539039039038, "grad_norm": 0.8711485046989039, "learning_rate": 7.256735194462916e-06, "loss": 0.4371, "step": 13295 }, { "epoch": 1.2477477477477477, "grad_norm": 0.9002672894278376, "learning_rate": 7.256247989827976e-06, "loss": 0.3795, "step": 13296 }, { "epoch": 1.2478415915915915, "grad_norm": 0.9774589510202263, "learning_rate": 7.255760758291524e-06, "loss": 0.4644, "step": 13297 }, { "epoch": 1.2479354354354355, "grad_norm": 0.8527020628897495, "learning_rate": 7.255273499859372e-06, "loss": 0.369, "step": 13298 }, { "epoch": 1.2480292792792793, "grad_norm": 0.9275614104214609, "learning_rate": 7.2547862145373295e-06, "loss": 0.3711, "step": 13299 }, { "epoch": 1.2481231231231231, "grad_norm": 0.8917019414392064, "learning_rate": 7.254298902331204e-06, "loss": 0.3707, "step": 13300 }, { "epoch": 1.248216966966967, "grad_norm": 0.8517979191393988, "learning_rate": 7.253811563246807e-06, "loss": 0.4159, "step": 13301 }, { "epoch": 1.2483108108108107, "grad_norm": 0.8913145192913949, "learning_rate": 7.253324197289953e-06, "loss": 0.4158, "step": 13302 }, { "epoch": 1.2484046546546548, "grad_norm": 0.8792257515169076, "learning_rate": 7.252836804466446e-06, "loss": 0.3946, "step": 13303 }, { "epoch": 1.2484984984984986, "grad_norm": 0.9343840595917541, "learning_rate": 7.2523493847821015e-06, "loss": 0.4036, "step": 13304 }, { "epoch": 1.2485923423423424, "grad_norm": 0.9771979058222829, "learning_rate": 7.2518619382427314e-06, "loss": 0.3838, "step": 13305 }, { "epoch": 1.2486861861861862, "grad_norm": 0.888040788436625, "learning_rate": 7.251374464854144e-06, "loss": 0.3653, "step": 13306 }, { "epoch": 1.24878003003003, "grad_norm": 0.8727324680375871, "learning_rate": 7.250886964622156e-06, "loss": 0.3823, "step": 13307 }, { "epoch": 1.2488738738738738, "grad_norm": 1.1355694947067276, "learning_rate": 7.250399437552578e-06, "loss": 0.4277, "step": 13308 }, { "epoch": 1.2489677177177176, "grad_norm": 0.8527502057561249, "learning_rate": 7.249911883651222e-06, "loss": 0.3472, "step": 13309 }, { "epoch": 1.2490615615615615, "grad_norm": 0.9595052081735673, "learning_rate": 7.249424302923899e-06, "loss": 0.3895, "step": 13310 }, { "epoch": 1.2491554054054055, "grad_norm": 1.346813826543631, "learning_rate": 7.24893669537643e-06, "loss": 0.4109, "step": 13311 }, { "epoch": 1.2492492492492493, "grad_norm": 1.062006103176069, "learning_rate": 7.2484490610146205e-06, "loss": 0.429, "step": 13312 }, { "epoch": 1.249343093093093, "grad_norm": 1.091562757905817, "learning_rate": 7.2479613998442896e-06, "loss": 0.3779, "step": 13313 }, { "epoch": 1.249436936936937, "grad_norm": 0.914384188467733, "learning_rate": 7.247473711871251e-06, "loss": 0.4264, "step": 13314 }, { "epoch": 1.2495307807807807, "grad_norm": 0.9172259246237683, "learning_rate": 7.246985997101319e-06, "loss": 0.348, "step": 13315 }, { "epoch": 1.2496246246246245, "grad_norm": 1.1382203816432845, "learning_rate": 7.246498255540307e-06, "loss": 0.3798, "step": 13316 }, { "epoch": 1.2497184684684686, "grad_norm": 0.9703922335665932, "learning_rate": 7.2460104871940326e-06, "loss": 0.3692, "step": 13317 }, { "epoch": 1.2498123123123124, "grad_norm": 0.9205329138899534, "learning_rate": 7.245522692068311e-06, "loss": 0.4545, "step": 13318 }, { "epoch": 1.2499061561561562, "grad_norm": 0.881118540154718, "learning_rate": 7.245034870168957e-06, "loss": 0.3828, "step": 13319 }, { "epoch": 1.25, "grad_norm": 0.9760297726181132, "learning_rate": 7.24454702150179e-06, "loss": 0.4145, "step": 13320 }, { "epoch": 1.2500938438438438, "grad_norm": 1.1202183097446754, "learning_rate": 7.244059146072624e-06, "loss": 0.3757, "step": 13321 }, { "epoch": 1.2501876876876876, "grad_norm": 1.576720310624913, "learning_rate": 7.243571243887275e-06, "loss": 0.427, "step": 13322 }, { "epoch": 1.2502815315315314, "grad_norm": 0.9468960917276188, "learning_rate": 7.243083314951565e-06, "loss": 0.4398, "step": 13323 }, { "epoch": 1.2503753753753752, "grad_norm": 1.054384254975099, "learning_rate": 7.242595359271306e-06, "loss": 0.3846, "step": 13324 }, { "epoch": 1.2504692192192193, "grad_norm": 0.9329735564319575, "learning_rate": 7.242107376852319e-06, "loss": 0.3894, "step": 13325 }, { "epoch": 1.250563063063063, "grad_norm": 0.9951900918818493, "learning_rate": 7.241619367700423e-06, "loss": 0.4287, "step": 13326 }, { "epoch": 1.250656906906907, "grad_norm": 0.9772268367080329, "learning_rate": 7.241131331821435e-06, "loss": 0.4238, "step": 13327 }, { "epoch": 1.2507507507507507, "grad_norm": 0.9033932421577834, "learning_rate": 7.240643269221174e-06, "loss": 0.4176, "step": 13328 }, { "epoch": 1.2508445945945945, "grad_norm": 0.8229864533323598, "learning_rate": 7.24015517990546e-06, "loss": 0.3782, "step": 13329 }, { "epoch": 1.2509384384384385, "grad_norm": 0.9423385591097384, "learning_rate": 7.239667063880111e-06, "loss": 0.4346, "step": 13330 }, { "epoch": 1.2510322822822824, "grad_norm": 0.9989496573281575, "learning_rate": 7.239178921150949e-06, "loss": 0.4343, "step": 13331 }, { "epoch": 1.2511261261261262, "grad_norm": 1.0185731019147461, "learning_rate": 7.238690751723794e-06, "loss": 0.4294, "step": 13332 }, { "epoch": 1.25121996996997, "grad_norm": 0.9157544170751766, "learning_rate": 7.238202555604465e-06, "loss": 0.3801, "step": 13333 }, { "epoch": 1.2513138138138138, "grad_norm": 0.8962487338386085, "learning_rate": 7.2377143327987835e-06, "loss": 0.3937, "step": 13334 }, { "epoch": 1.2514076576576576, "grad_norm": 1.002839132940721, "learning_rate": 7.237226083312571e-06, "loss": 0.4224, "step": 13335 }, { "epoch": 1.2515015015015014, "grad_norm": 1.0636228819409894, "learning_rate": 7.236737807151647e-06, "loss": 0.3986, "step": 13336 }, { "epoch": 1.2515953453453452, "grad_norm": 1.2325526179371005, "learning_rate": 7.236249504321835e-06, "loss": 0.3894, "step": 13337 }, { "epoch": 1.2516891891891893, "grad_norm": 1.3071481325983232, "learning_rate": 7.235761174828959e-06, "loss": 0.399, "step": 13338 }, { "epoch": 1.251783033033033, "grad_norm": 1.029875983428373, "learning_rate": 7.235272818678839e-06, "loss": 0.4202, "step": 13339 }, { "epoch": 1.2518768768768769, "grad_norm": 0.9880806912259761, "learning_rate": 7.234784435877297e-06, "loss": 0.4035, "step": 13340 }, { "epoch": 1.2519707207207207, "grad_norm": 0.9317877232265697, "learning_rate": 7.234296026430157e-06, "loss": 0.4118, "step": 13341 }, { "epoch": 1.2520645645645645, "grad_norm": 1.0731575536322733, "learning_rate": 7.233807590343244e-06, "loss": 0.4332, "step": 13342 }, { "epoch": 1.2521584084084085, "grad_norm": 1.1896064378281266, "learning_rate": 7.233319127622378e-06, "loss": 0.4232, "step": 13343 }, { "epoch": 1.2522522522522523, "grad_norm": 0.8190174259309566, "learning_rate": 7.232830638273387e-06, "loss": 0.3766, "step": 13344 }, { "epoch": 1.2523460960960962, "grad_norm": 1.005105252552367, "learning_rate": 7.232342122302094e-06, "loss": 0.4188, "step": 13345 }, { "epoch": 1.25243993993994, "grad_norm": 0.9476476470477945, "learning_rate": 7.231853579714321e-06, "loss": 0.4362, "step": 13346 }, { "epoch": 1.2525337837837838, "grad_norm": 0.9591174489242242, "learning_rate": 7.231365010515897e-06, "loss": 0.4275, "step": 13347 }, { "epoch": 1.2526276276276276, "grad_norm": 0.8630864407391174, "learning_rate": 7.230876414712644e-06, "loss": 0.4274, "step": 13348 }, { "epoch": 1.2527214714714714, "grad_norm": 0.8577685964945392, "learning_rate": 7.230387792310389e-06, "loss": 0.4022, "step": 13349 }, { "epoch": 1.2528153153153152, "grad_norm": 0.8963308628046399, "learning_rate": 7.229899143314959e-06, "loss": 0.3618, "step": 13350 }, { "epoch": 1.2529091591591592, "grad_norm": 0.9076994840715628, "learning_rate": 7.229410467732178e-06, "loss": 0.4386, "step": 13351 }, { "epoch": 1.253003003003003, "grad_norm": 1.1875160678933607, "learning_rate": 7.228921765567874e-06, "loss": 0.4327, "step": 13352 }, { "epoch": 1.2530968468468469, "grad_norm": 1.111838056257318, "learning_rate": 7.228433036827874e-06, "loss": 0.4389, "step": 13353 }, { "epoch": 1.2531906906906907, "grad_norm": 1.0922019540397145, "learning_rate": 7.2279442815180046e-06, "loss": 0.3984, "step": 13354 }, { "epoch": 1.2532845345345345, "grad_norm": 0.8413042401913052, "learning_rate": 7.227455499644092e-06, "loss": 0.4117, "step": 13355 }, { "epoch": 1.2533783783783785, "grad_norm": 0.919925892888272, "learning_rate": 7.2269666912119674e-06, "loss": 0.3929, "step": 13356 }, { "epoch": 1.2534722222222223, "grad_norm": 1.2299617715744446, "learning_rate": 7.226477856227457e-06, "loss": 0.4231, "step": 13357 }, { "epoch": 1.2535660660660661, "grad_norm": 0.9383680877093983, "learning_rate": 7.225988994696386e-06, "loss": 0.4155, "step": 13358 }, { "epoch": 1.25365990990991, "grad_norm": 1.7557566504912636, "learning_rate": 7.2255001066245876e-06, "loss": 0.4088, "step": 13359 }, { "epoch": 1.2537537537537538, "grad_norm": 0.8532929558405041, "learning_rate": 7.22501119201789e-06, "loss": 0.4007, "step": 13360 }, { "epoch": 1.2538475975975976, "grad_norm": 0.9118865565852357, "learning_rate": 7.224522250882121e-06, "loss": 0.4418, "step": 13361 }, { "epoch": 1.2539414414414414, "grad_norm": 0.9026592587862616, "learning_rate": 7.224033283223113e-06, "loss": 0.4105, "step": 13362 }, { "epoch": 1.2540352852852852, "grad_norm": 0.8996947166593635, "learning_rate": 7.223544289046695e-06, "loss": 0.3776, "step": 13363 }, { "epoch": 1.254129129129129, "grad_norm": 1.0646175758131502, "learning_rate": 7.223055268358694e-06, "loss": 0.4237, "step": 13364 }, { "epoch": 1.254222972972973, "grad_norm": 0.9087451920498344, "learning_rate": 7.222566221164946e-06, "loss": 0.3706, "step": 13365 }, { "epoch": 1.2543168168168168, "grad_norm": 0.8682058989931667, "learning_rate": 7.222077147471278e-06, "loss": 0.3779, "step": 13366 }, { "epoch": 1.2544106606606606, "grad_norm": 0.8563145545775229, "learning_rate": 7.221588047283522e-06, "loss": 0.3742, "step": 13367 }, { "epoch": 1.2545045045045045, "grad_norm": 0.9920535434414945, "learning_rate": 7.221098920607512e-06, "loss": 0.4116, "step": 13368 }, { "epoch": 1.2545983483483483, "grad_norm": 0.8433855774945416, "learning_rate": 7.220609767449076e-06, "loss": 0.4063, "step": 13369 }, { "epoch": 1.2546921921921923, "grad_norm": 0.9866364137916798, "learning_rate": 7.22012058781405e-06, "loss": 0.4005, "step": 13370 }, { "epoch": 1.2547860360360361, "grad_norm": 4.203780557774808, "learning_rate": 7.219631381708265e-06, "loss": 0.3919, "step": 13371 }, { "epoch": 1.25487987987988, "grad_norm": 0.9922720549945963, "learning_rate": 7.2191421491375534e-06, "loss": 0.4016, "step": 13372 }, { "epoch": 1.2549737237237237, "grad_norm": 0.8152070676706538, "learning_rate": 7.218652890107749e-06, "loss": 0.419, "step": 13373 }, { "epoch": 1.2550675675675675, "grad_norm": 1.0749205901795886, "learning_rate": 7.218163604624685e-06, "loss": 0.3877, "step": 13374 }, { "epoch": 1.2551614114114114, "grad_norm": 0.956150895863004, "learning_rate": 7.217674292694194e-06, "loss": 0.4056, "step": 13375 }, { "epoch": 1.2552552552552552, "grad_norm": 1.3003218258767033, "learning_rate": 7.217184954322114e-06, "loss": 0.3853, "step": 13376 }, { "epoch": 1.255349099099099, "grad_norm": 0.8576807984962471, "learning_rate": 7.216695589514275e-06, "loss": 0.4202, "step": 13377 }, { "epoch": 1.255442942942943, "grad_norm": 0.9992302803722095, "learning_rate": 7.216206198276514e-06, "loss": 0.371, "step": 13378 }, { "epoch": 1.2555367867867868, "grad_norm": 1.155867989786159, "learning_rate": 7.215716780614665e-06, "loss": 0.3966, "step": 13379 }, { "epoch": 1.2556306306306306, "grad_norm": 0.9098613397337233, "learning_rate": 7.215227336534565e-06, "loss": 0.397, "step": 13380 }, { "epoch": 1.2557244744744744, "grad_norm": 0.8542987821979354, "learning_rate": 7.214737866042047e-06, "loss": 0.3526, "step": 13381 }, { "epoch": 1.2558183183183182, "grad_norm": 0.9117298539968418, "learning_rate": 7.214248369142952e-06, "loss": 0.4055, "step": 13382 }, { "epoch": 1.2559121621621623, "grad_norm": 0.8004639810637459, "learning_rate": 7.213758845843112e-06, "loss": 0.3633, "step": 13383 }, { "epoch": 1.256006006006006, "grad_norm": 16.804829929922043, "learning_rate": 7.213269296148363e-06, "loss": 0.3719, "step": 13384 }, { "epoch": 1.25609984984985, "grad_norm": 0.9956673661369161, "learning_rate": 7.212779720064545e-06, "loss": 0.386, "step": 13385 }, { "epoch": 1.2561936936936937, "grad_norm": 0.9938367570128406, "learning_rate": 7.212290117597494e-06, "loss": 0.4132, "step": 13386 }, { "epoch": 1.2562875375375375, "grad_norm": 1.0840796277935845, "learning_rate": 7.211800488753047e-06, "loss": 0.4527, "step": 13387 }, { "epoch": 1.2563813813813813, "grad_norm": 0.991011460378367, "learning_rate": 7.2113108335370415e-06, "loss": 0.375, "step": 13388 }, { "epoch": 1.2564752252252251, "grad_norm": 1.1341811614455368, "learning_rate": 7.210821151955319e-06, "loss": 0.4229, "step": 13389 }, { "epoch": 1.256569069069069, "grad_norm": 0.9496319405622088, "learning_rate": 7.210331444013714e-06, "loss": 0.4078, "step": 13390 }, { "epoch": 1.256662912912913, "grad_norm": 0.8112828240601689, "learning_rate": 7.2098417097180675e-06, "loss": 0.3553, "step": 13391 }, { "epoch": 1.2567567567567568, "grad_norm": 0.8887356754347375, "learning_rate": 7.20935194907422e-06, "loss": 0.3856, "step": 13392 }, { "epoch": 1.2568506006006006, "grad_norm": 0.8375316082113384, "learning_rate": 7.208862162088006e-06, "loss": 0.3888, "step": 13393 }, { "epoch": 1.2569444444444444, "grad_norm": 1.0000379412585194, "learning_rate": 7.208372348765271e-06, "loss": 0.4242, "step": 13394 }, { "epoch": 1.2570382882882882, "grad_norm": 0.8952693745619649, "learning_rate": 7.2078825091118524e-06, "loss": 0.4021, "step": 13395 }, { "epoch": 1.2571321321321323, "grad_norm": 0.9878571800662941, "learning_rate": 7.2073926431335884e-06, "loss": 0.3658, "step": 13396 }, { "epoch": 1.257225975975976, "grad_norm": 0.8614524026115937, "learning_rate": 7.2069027508363235e-06, "loss": 0.4454, "step": 13397 }, { "epoch": 1.2573198198198199, "grad_norm": 0.8624817148359553, "learning_rate": 7.206412832225898e-06, "loss": 0.3885, "step": 13398 }, { "epoch": 1.2574136636636637, "grad_norm": 0.9166658172479959, "learning_rate": 7.205922887308152e-06, "loss": 0.4051, "step": 13399 }, { "epoch": 1.2575075075075075, "grad_norm": 1.0139849205055202, "learning_rate": 7.205432916088928e-06, "loss": 0.4003, "step": 13400 }, { "epoch": 1.2576013513513513, "grad_norm": 0.9821927711637775, "learning_rate": 7.204942918574068e-06, "loss": 0.4291, "step": 13401 }, { "epoch": 1.2576951951951951, "grad_norm": 1.201150088257677, "learning_rate": 7.204452894769414e-06, "loss": 0.3949, "step": 13402 }, { "epoch": 1.257789039039039, "grad_norm": 1.4066992204877466, "learning_rate": 7.203962844680807e-06, "loss": 0.3622, "step": 13403 }, { "epoch": 1.2578828828828827, "grad_norm": 0.7627161618010093, "learning_rate": 7.203472768314095e-06, "loss": 0.3783, "step": 13404 }, { "epoch": 1.2579767267267268, "grad_norm": 1.0113322136819025, "learning_rate": 7.2029826656751145e-06, "loss": 0.4003, "step": 13405 }, { "epoch": 1.2580705705705706, "grad_norm": 0.848474583800883, "learning_rate": 7.202492536769715e-06, "loss": 0.3723, "step": 13406 }, { "epoch": 1.2581644144144144, "grad_norm": 0.8298078986715192, "learning_rate": 7.202002381603736e-06, "loss": 0.3543, "step": 13407 }, { "epoch": 1.2582582582582582, "grad_norm": 0.9142139589134182, "learning_rate": 7.201512200183025e-06, "loss": 0.366, "step": 13408 }, { "epoch": 1.258352102102102, "grad_norm": 0.9383033606375888, "learning_rate": 7.2010219925134226e-06, "loss": 0.4032, "step": 13409 }, { "epoch": 1.258445945945946, "grad_norm": 0.9611534525169027, "learning_rate": 7.200531758600779e-06, "loss": 0.4429, "step": 13410 }, { "epoch": 1.2585397897897899, "grad_norm": 1.0166853037269903, "learning_rate": 7.200041498450933e-06, "loss": 0.4113, "step": 13411 }, { "epoch": 1.2586336336336337, "grad_norm": 0.891469933231065, "learning_rate": 7.1995512120697355e-06, "loss": 0.4051, "step": 13412 }, { "epoch": 1.2587274774774775, "grad_norm": 1.3149493353131942, "learning_rate": 7.1990608994630305e-06, "loss": 0.4104, "step": 13413 }, { "epoch": 1.2588213213213213, "grad_norm": 0.9542385557092236, "learning_rate": 7.198570560636662e-06, "loss": 0.4672, "step": 13414 }, { "epoch": 1.258915165165165, "grad_norm": 0.8326284432321818, "learning_rate": 7.198080195596477e-06, "loss": 0.358, "step": 13415 }, { "epoch": 1.259009009009009, "grad_norm": 0.922043911027596, "learning_rate": 7.197589804348326e-06, "loss": 0.4243, "step": 13416 }, { "epoch": 1.2591028528528527, "grad_norm": 2.7744382705658874, "learning_rate": 7.197099386898051e-06, "loss": 0.442, "step": 13417 }, { "epoch": 1.2591966966966968, "grad_norm": 1.445708014230761, "learning_rate": 7.196608943251502e-06, "loss": 0.3795, "step": 13418 }, { "epoch": 1.2592905405405406, "grad_norm": 0.9159274143959445, "learning_rate": 7.196118473414527e-06, "loss": 0.4312, "step": 13419 }, { "epoch": 1.2593843843843844, "grad_norm": 1.135711751004293, "learning_rate": 7.195627977392972e-06, "loss": 0.3701, "step": 13420 }, { "epoch": 1.2594782282282282, "grad_norm": 0.9134823486885005, "learning_rate": 7.1951374551926856e-06, "loss": 0.4034, "step": 13421 }, { "epoch": 1.259572072072072, "grad_norm": 1.0411674940254754, "learning_rate": 7.194646906819518e-06, "loss": 0.3852, "step": 13422 }, { "epoch": 1.259665915915916, "grad_norm": 0.8853235309942312, "learning_rate": 7.194156332279316e-06, "loss": 0.3806, "step": 13423 }, { "epoch": 1.2597597597597598, "grad_norm": 0.9451876643146181, "learning_rate": 7.193665731577931e-06, "loss": 0.4361, "step": 13424 }, { "epoch": 1.2598536036036037, "grad_norm": 2.0120150956869836, "learning_rate": 7.193175104721212e-06, "loss": 0.4375, "step": 13425 }, { "epoch": 1.2599474474474475, "grad_norm": 1.0052445863057955, "learning_rate": 7.192684451715006e-06, "loss": 0.4264, "step": 13426 }, { "epoch": 1.2600412912912913, "grad_norm": 0.846142447375126, "learning_rate": 7.192193772565166e-06, "loss": 0.3928, "step": 13427 }, { "epoch": 1.260135135135135, "grad_norm": 1.4770806993862966, "learning_rate": 7.1917030672775425e-06, "loss": 0.4494, "step": 13428 }, { "epoch": 1.260228978978979, "grad_norm": 0.8975220220821, "learning_rate": 7.1912123358579845e-06, "loss": 0.4109, "step": 13429 }, { "epoch": 1.2603228228228227, "grad_norm": 0.8290213014069742, "learning_rate": 7.190721578312343e-06, "loss": 0.4192, "step": 13430 }, { "epoch": 1.2604166666666667, "grad_norm": 1.019148639420539, "learning_rate": 7.190230794646472e-06, "loss": 0.402, "step": 13431 }, { "epoch": 1.2605105105105106, "grad_norm": 0.8633484361012127, "learning_rate": 7.189739984866222e-06, "loss": 0.432, "step": 13432 }, { "epoch": 1.2606043543543544, "grad_norm": 0.8618370066330221, "learning_rate": 7.189249148977442e-06, "loss": 0.4172, "step": 13433 }, { "epoch": 1.2606981981981982, "grad_norm": 0.8315937804227708, "learning_rate": 7.188758286985988e-06, "loss": 0.3922, "step": 13434 }, { "epoch": 1.260792042042042, "grad_norm": 0.9658212174937709, "learning_rate": 7.188267398897712e-06, "loss": 0.3891, "step": 13435 }, { "epoch": 1.260885885885886, "grad_norm": 0.9876001217485205, "learning_rate": 7.187776484718465e-06, "loss": 0.4367, "step": 13436 }, { "epoch": 1.2609797297297298, "grad_norm": 0.9664341960129321, "learning_rate": 7.187285544454103e-06, "loss": 0.368, "step": 13437 }, { "epoch": 1.2610735735735736, "grad_norm": 1.4141899399131526, "learning_rate": 7.186794578110477e-06, "loss": 0.4385, "step": 13438 }, { "epoch": 1.2611674174174174, "grad_norm": 0.9676265400690411, "learning_rate": 7.186303585693441e-06, "loss": 0.3582, "step": 13439 }, { "epoch": 1.2612612612612613, "grad_norm": 0.9727194478031858, "learning_rate": 7.1858125672088515e-06, "loss": 0.3968, "step": 13440 }, { "epoch": 1.261355105105105, "grad_norm": 0.8873236174605222, "learning_rate": 7.1853215226625614e-06, "loss": 0.411, "step": 13441 }, { "epoch": 1.2614489489489489, "grad_norm": 0.8972464983119032, "learning_rate": 7.184830452060425e-06, "loss": 0.4139, "step": 13442 }, { "epoch": 1.2615427927927927, "grad_norm": 0.9183944316252953, "learning_rate": 7.184339355408298e-06, "loss": 0.3838, "step": 13443 }, { "epoch": 1.2616366366366365, "grad_norm": 0.9081379170249314, "learning_rate": 7.1838482327120364e-06, "loss": 0.3992, "step": 13444 }, { "epoch": 1.2617304804804805, "grad_norm": 0.9466622603589878, "learning_rate": 7.183357083977495e-06, "loss": 0.4177, "step": 13445 }, { "epoch": 1.2618243243243243, "grad_norm": 1.0086524498629708, "learning_rate": 7.18286590921053e-06, "loss": 0.3928, "step": 13446 }, { "epoch": 1.2619181681681682, "grad_norm": 1.031698078980531, "learning_rate": 7.182374708416998e-06, "loss": 0.3842, "step": 13447 }, { "epoch": 1.262012012012012, "grad_norm": 0.9452602626325719, "learning_rate": 7.181883481602755e-06, "loss": 0.4481, "step": 13448 }, { "epoch": 1.2621058558558558, "grad_norm": 1.1609342795285726, "learning_rate": 7.181392228773659e-06, "loss": 0.3813, "step": 13449 }, { "epoch": 1.2621996996996998, "grad_norm": 0.9048391564516882, "learning_rate": 7.1809009499355675e-06, "loss": 0.3884, "step": 13450 }, { "epoch": 1.2622935435435436, "grad_norm": 1.0856280213895941, "learning_rate": 7.180409645094335e-06, "loss": 0.441, "step": 13451 }, { "epoch": 1.2623873873873874, "grad_norm": 1.0032761461685784, "learning_rate": 7.179918314255824e-06, "loss": 0.4513, "step": 13452 }, { "epoch": 1.2624812312312312, "grad_norm": 0.9443786403835833, "learning_rate": 7.179426957425889e-06, "loss": 0.4366, "step": 13453 }, { "epoch": 1.262575075075075, "grad_norm": 1.053508985196892, "learning_rate": 7.178935574610389e-06, "loss": 0.4117, "step": 13454 }, { "epoch": 1.2626689189189189, "grad_norm": 0.8476129114299981, "learning_rate": 7.178444165815186e-06, "loss": 0.4227, "step": 13455 }, { "epoch": 1.2627627627627627, "grad_norm": 0.9613568063354502, "learning_rate": 7.1779527310461365e-06, "loss": 0.3906, "step": 13456 }, { "epoch": 1.2628566066066065, "grad_norm": 0.7866670074145773, "learning_rate": 7.1774612703091e-06, "loss": 0.3515, "step": 13457 }, { "epoch": 1.2629504504504505, "grad_norm": 0.9445868472113622, "learning_rate": 7.176969783609936e-06, "loss": 0.4124, "step": 13458 }, { "epoch": 1.2630442942942943, "grad_norm": 1.216209446839825, "learning_rate": 7.1764782709545045e-06, "loss": 0.3892, "step": 13459 }, { "epoch": 1.2631381381381381, "grad_norm": 0.9998424636008094, "learning_rate": 7.175986732348667e-06, "loss": 0.4322, "step": 13460 }, { "epoch": 1.263231981981982, "grad_norm": 1.0863075118827263, "learning_rate": 7.175495167798283e-06, "loss": 0.4174, "step": 13461 }, { "epoch": 1.2633258258258258, "grad_norm": 0.8906112379130767, "learning_rate": 7.175003577309217e-06, "loss": 0.4328, "step": 13462 }, { "epoch": 1.2634196696696698, "grad_norm": 0.9990000609963154, "learning_rate": 7.174511960887324e-06, "loss": 0.3522, "step": 13463 }, { "epoch": 1.2635135135135136, "grad_norm": 0.9319562507686818, "learning_rate": 7.17402031853847e-06, "loss": 0.368, "step": 13464 }, { "epoch": 1.2636073573573574, "grad_norm": 0.9167554289730453, "learning_rate": 7.173528650268515e-06, "loss": 0.4122, "step": 13465 }, { "epoch": 1.2637012012012012, "grad_norm": 1.4027932013837063, "learning_rate": 7.173036956083324e-06, "loss": 0.4161, "step": 13466 }, { "epoch": 1.263795045045045, "grad_norm": 1.096057553136262, "learning_rate": 7.172545235988756e-06, "loss": 0.3855, "step": 13467 }, { "epoch": 1.2638888888888888, "grad_norm": 0.866029892783469, "learning_rate": 7.172053489990677e-06, "loss": 0.3865, "step": 13468 }, { "epoch": 1.2639827327327327, "grad_norm": 1.1027020307010165, "learning_rate": 7.171561718094947e-06, "loss": 0.3745, "step": 13469 }, { "epoch": 1.2640765765765765, "grad_norm": 0.8825878921491935, "learning_rate": 7.171069920307432e-06, "loss": 0.3944, "step": 13470 }, { "epoch": 1.2641704204204205, "grad_norm": 1.295626360672086, "learning_rate": 7.170578096633995e-06, "loss": 0.4403, "step": 13471 }, { "epoch": 1.2642642642642643, "grad_norm": 1.1240775407127093, "learning_rate": 7.1700862470804986e-06, "loss": 0.3954, "step": 13472 }, { "epoch": 1.2643581081081081, "grad_norm": 0.8872763231640912, "learning_rate": 7.169594371652809e-06, "loss": 0.3879, "step": 13473 }, { "epoch": 1.264451951951952, "grad_norm": 1.0044338562560367, "learning_rate": 7.169102470356791e-06, "loss": 0.3863, "step": 13474 }, { "epoch": 1.2645457957957957, "grad_norm": 0.8293483690979754, "learning_rate": 7.168610543198309e-06, "loss": 0.3884, "step": 13475 }, { "epoch": 1.2646396396396398, "grad_norm": 0.9924511950616279, "learning_rate": 7.168118590183228e-06, "loss": 0.3907, "step": 13476 }, { "epoch": 1.2647334834834836, "grad_norm": 0.9419954074225129, "learning_rate": 7.167626611317413e-06, "loss": 0.3491, "step": 13477 }, { "epoch": 1.2648273273273274, "grad_norm": 1.1400772082779114, "learning_rate": 7.167134606606731e-06, "loss": 0.4081, "step": 13478 }, { "epoch": 1.2649211711711712, "grad_norm": 0.9269890156887752, "learning_rate": 7.16664257605705e-06, "loss": 0.4361, "step": 13479 }, { "epoch": 1.265015015015015, "grad_norm": 1.15424561709658, "learning_rate": 7.166150519674233e-06, "loss": 0.4403, "step": 13480 }, { "epoch": 1.2651088588588588, "grad_norm": 1.0175920804977554, "learning_rate": 7.165658437464149e-06, "loss": 0.359, "step": 13481 }, { "epoch": 1.2652027027027026, "grad_norm": 0.992807676450769, "learning_rate": 7.165166329432664e-06, "loss": 0.36, "step": 13482 }, { "epoch": 1.2652965465465464, "grad_norm": 0.939285895987274, "learning_rate": 7.164674195585646e-06, "loss": 0.4246, "step": 13483 }, { "epoch": 1.2653903903903903, "grad_norm": 0.9059641258024675, "learning_rate": 7.164182035928962e-06, "loss": 0.4007, "step": 13484 }, { "epoch": 1.2654842342342343, "grad_norm": 0.9344000682870995, "learning_rate": 7.163689850468483e-06, "loss": 0.3951, "step": 13485 }, { "epoch": 1.265578078078078, "grad_norm": 1.0276284235779827, "learning_rate": 7.163197639210074e-06, "loss": 0.417, "step": 13486 }, { "epoch": 1.265671921921922, "grad_norm": 0.8574779637914755, "learning_rate": 7.162705402159605e-06, "loss": 0.3874, "step": 13487 }, { "epoch": 1.2657657657657657, "grad_norm": 0.8340268128575907, "learning_rate": 7.162213139322946e-06, "loss": 0.4056, "step": 13488 }, { "epoch": 1.2658596096096097, "grad_norm": 0.928586641115735, "learning_rate": 7.1617208507059634e-06, "loss": 0.377, "step": 13489 }, { "epoch": 1.2659534534534536, "grad_norm": 0.9351045300151883, "learning_rate": 7.161228536314528e-06, "loss": 0.4179, "step": 13490 }, { "epoch": 1.2660472972972974, "grad_norm": 0.9398769893836734, "learning_rate": 7.160736196154513e-06, "loss": 0.414, "step": 13491 }, { "epoch": 1.2661411411411412, "grad_norm": 0.9546820128202272, "learning_rate": 7.160243830231784e-06, "loss": 0.4387, "step": 13492 }, { "epoch": 1.266234984984985, "grad_norm": 0.9634258455863266, "learning_rate": 7.159751438552216e-06, "loss": 0.4298, "step": 13493 }, { "epoch": 1.2663288288288288, "grad_norm": 0.9723287614456475, "learning_rate": 7.159259021121676e-06, "loss": 0.4525, "step": 13494 }, { "epoch": 1.2664226726726726, "grad_norm": 1.4806713449202584, "learning_rate": 7.158766577946037e-06, "loss": 0.3785, "step": 13495 }, { "epoch": 1.2665165165165164, "grad_norm": 0.8549215247220582, "learning_rate": 7.158274109031168e-06, "loss": 0.4469, "step": 13496 }, { "epoch": 1.2666103603603602, "grad_norm": 0.9207445506596132, "learning_rate": 7.157781614382945e-06, "loss": 0.3951, "step": 13497 }, { "epoch": 1.2667042042042043, "grad_norm": 1.3321153128478262, "learning_rate": 7.157289094007237e-06, "loss": 0.4417, "step": 13498 }, { "epoch": 1.266798048048048, "grad_norm": 1.0913479223265683, "learning_rate": 7.156796547909918e-06, "loss": 0.4376, "step": 13499 }, { "epoch": 1.2668918918918919, "grad_norm": 0.8207207429700752, "learning_rate": 7.15630397609686e-06, "loss": 0.3778, "step": 13500 }, { "epoch": 1.2669857357357357, "grad_norm": 0.9253584365914966, "learning_rate": 7.155811378573935e-06, "loss": 0.433, "step": 13501 }, { "epoch": 1.2670795795795795, "grad_norm": 1.092578249589396, "learning_rate": 7.155318755347016e-06, "loss": 0.375, "step": 13502 }, { "epoch": 1.2671734234234235, "grad_norm": 1.4671529902083613, "learning_rate": 7.15482610642198e-06, "loss": 0.4357, "step": 13503 }, { "epoch": 1.2672672672672673, "grad_norm": 0.9245424364108352, "learning_rate": 7.1543334318046975e-06, "loss": 0.4077, "step": 13504 }, { "epoch": 1.2673611111111112, "grad_norm": 0.9162423005295441, "learning_rate": 7.153840731501045e-06, "loss": 0.3604, "step": 13505 }, { "epoch": 1.267454954954955, "grad_norm": 0.7993980734090211, "learning_rate": 7.153348005516896e-06, "loss": 0.3796, "step": 13506 }, { "epoch": 1.2675487987987988, "grad_norm": 1.10091291569295, "learning_rate": 7.152855253858124e-06, "loss": 0.4276, "step": 13507 }, { "epoch": 1.2676426426426426, "grad_norm": 0.9863584698515795, "learning_rate": 7.152362476530605e-06, "loss": 0.429, "step": 13508 }, { "epoch": 1.2677364864864864, "grad_norm": 0.7864088351166233, "learning_rate": 7.151869673540217e-06, "loss": 0.4073, "step": 13509 }, { "epoch": 1.2678303303303302, "grad_norm": 0.9849275534458333, "learning_rate": 7.151376844892832e-06, "loss": 0.4151, "step": 13510 }, { "epoch": 1.2679241741741742, "grad_norm": 0.8265233981021901, "learning_rate": 7.150883990594329e-06, "loss": 0.3805, "step": 13511 }, { "epoch": 1.268018018018018, "grad_norm": 0.8532976057655626, "learning_rate": 7.150391110650583e-06, "loss": 0.3991, "step": 13512 }, { "epoch": 1.2681118618618619, "grad_norm": 0.9699970987461354, "learning_rate": 7.14989820506747e-06, "loss": 0.3703, "step": 13513 }, { "epoch": 1.2682057057057057, "grad_norm": 0.9723769647174247, "learning_rate": 7.149405273850867e-06, "loss": 0.4042, "step": 13514 }, { "epoch": 1.2682995495495495, "grad_norm": 0.9057180047783495, "learning_rate": 7.148912317006653e-06, "loss": 0.4398, "step": 13515 }, { "epoch": 1.2683933933933935, "grad_norm": 0.9697510105948333, "learning_rate": 7.1484193345407036e-06, "loss": 0.4575, "step": 13516 }, { "epoch": 1.2684872372372373, "grad_norm": 1.4883090583438805, "learning_rate": 7.1479263264588985e-06, "loss": 0.4308, "step": 13517 }, { "epoch": 1.2685810810810811, "grad_norm": 0.8533099206967596, "learning_rate": 7.147433292767115e-06, "loss": 0.3961, "step": 13518 }, { "epoch": 1.268674924924925, "grad_norm": 0.9028082813450771, "learning_rate": 7.146940233471231e-06, "loss": 0.4542, "step": 13519 }, { "epoch": 1.2687687687687688, "grad_norm": 0.9713055373257106, "learning_rate": 7.146447148577126e-06, "loss": 0.4102, "step": 13520 }, { "epoch": 1.2688626126126126, "grad_norm": 1.109827221909832, "learning_rate": 7.1459540380906785e-06, "loss": 0.4543, "step": 13521 }, { "epoch": 1.2689564564564564, "grad_norm": 1.069592883503295, "learning_rate": 7.14546090201777e-06, "loss": 0.3712, "step": 13522 }, { "epoch": 1.2690503003003002, "grad_norm": 0.9856821218970603, "learning_rate": 7.144967740364278e-06, "loss": 0.4198, "step": 13523 }, { "epoch": 1.2691441441441442, "grad_norm": 0.8769775218998396, "learning_rate": 7.144474553136084e-06, "loss": 0.3523, "step": 13524 }, { "epoch": 1.269237987987988, "grad_norm": 0.9378468823678587, "learning_rate": 7.143981340339066e-06, "loss": 0.4599, "step": 13525 }, { "epoch": 1.2693318318318318, "grad_norm": 0.96270845426132, "learning_rate": 7.143488101979106e-06, "loss": 0.4077, "step": 13526 }, { "epoch": 1.2694256756756757, "grad_norm": 1.029510472131998, "learning_rate": 7.142994838062086e-06, "loss": 0.4539, "step": 13527 }, { "epoch": 1.2695195195195195, "grad_norm": 2.6800404140069882, "learning_rate": 7.142501548593885e-06, "loss": 0.4222, "step": 13528 }, { "epoch": 1.2696133633633635, "grad_norm": 1.4091016632231312, "learning_rate": 7.142008233580389e-06, "loss": 0.4453, "step": 13529 }, { "epoch": 1.2697072072072073, "grad_norm": 0.9104568161952277, "learning_rate": 7.141514893027475e-06, "loss": 0.433, "step": 13530 }, { "epoch": 1.2698010510510511, "grad_norm": 1.046995640864622, "learning_rate": 7.141021526941025e-06, "loss": 0.4155, "step": 13531 }, { "epoch": 1.269894894894895, "grad_norm": 0.9038303464620733, "learning_rate": 7.140528135326925e-06, "loss": 0.3795, "step": 13532 }, { "epoch": 1.2699887387387387, "grad_norm": 0.9876724512307798, "learning_rate": 7.140034718191057e-06, "loss": 0.3793, "step": 13533 }, { "epoch": 1.2700825825825826, "grad_norm": 0.9216855221303932, "learning_rate": 7.1395412755393015e-06, "loss": 0.4085, "step": 13534 }, { "epoch": 1.2701764264264264, "grad_norm": 0.9727994447524845, "learning_rate": 7.139047807377545e-06, "loss": 0.4002, "step": 13535 }, { "epoch": 1.2702702702702702, "grad_norm": 0.914343538234024, "learning_rate": 7.13855431371167e-06, "loss": 0.3877, "step": 13536 }, { "epoch": 1.270364114114114, "grad_norm": 1.2257151498859324, "learning_rate": 7.138060794547558e-06, "loss": 0.3854, "step": 13537 }, { "epoch": 1.270457957957958, "grad_norm": 1.2709366881226998, "learning_rate": 7.137567249891097e-06, "loss": 0.3965, "step": 13538 }, { "epoch": 1.2705518018018018, "grad_norm": 1.1321582539998756, "learning_rate": 7.1370736797481695e-06, "loss": 0.4607, "step": 13539 }, { "epoch": 1.2706456456456456, "grad_norm": 0.8075766054491795, "learning_rate": 7.136580084124661e-06, "loss": 0.4384, "step": 13540 }, { "epoch": 1.2707394894894894, "grad_norm": 0.9470941196825804, "learning_rate": 7.136086463026458e-06, "loss": 0.4248, "step": 13541 }, { "epoch": 1.2708333333333333, "grad_norm": 1.0907236884864508, "learning_rate": 7.135592816459444e-06, "loss": 0.3842, "step": 13542 }, { "epoch": 1.2709271771771773, "grad_norm": 1.069577563369565, "learning_rate": 7.135099144429505e-06, "loss": 0.4098, "step": 13543 }, { "epoch": 1.271021021021021, "grad_norm": 0.9476671953421867, "learning_rate": 7.134605446942527e-06, "loss": 0.4025, "step": 13544 }, { "epoch": 1.271114864864865, "grad_norm": 0.9691806362849111, "learning_rate": 7.134111724004399e-06, "loss": 0.392, "step": 13545 }, { "epoch": 1.2712087087087087, "grad_norm": 0.8645565137866219, "learning_rate": 7.133617975621003e-06, "loss": 0.3891, "step": 13546 }, { "epoch": 1.2713025525525525, "grad_norm": 1.0011170185580915, "learning_rate": 7.1331242017982315e-06, "loss": 0.3937, "step": 13547 }, { "epoch": 1.2713963963963963, "grad_norm": 0.8877633869831223, "learning_rate": 7.132630402541969e-06, "loss": 0.3682, "step": 13548 }, { "epoch": 1.2714902402402402, "grad_norm": 0.9411994940250327, "learning_rate": 7.132136577858102e-06, "loss": 0.4306, "step": 13549 }, { "epoch": 1.271584084084084, "grad_norm": 1.0178854536994855, "learning_rate": 7.1316427277525194e-06, "loss": 0.4246, "step": 13550 }, { "epoch": 1.271677927927928, "grad_norm": 0.9269779737598225, "learning_rate": 7.13114885223111e-06, "loss": 0.3803, "step": 13551 }, { "epoch": 1.2717717717717718, "grad_norm": 1.0521231639368067, "learning_rate": 7.130654951299762e-06, "loss": 0.407, "step": 13552 }, { "epoch": 1.2718656156156156, "grad_norm": 0.9376438287079804, "learning_rate": 7.130161024964364e-06, "loss": 0.3968, "step": 13553 }, { "epoch": 1.2719594594594594, "grad_norm": 0.9046871589445543, "learning_rate": 7.129667073230807e-06, "loss": 0.4067, "step": 13554 }, { "epoch": 1.2720533033033032, "grad_norm": 2.387188256859159, "learning_rate": 7.129173096104978e-06, "loss": 0.3736, "step": 13555 }, { "epoch": 1.2721471471471473, "grad_norm": 1.0500537507983114, "learning_rate": 7.128679093592767e-06, "loss": 0.4597, "step": 13556 }, { "epoch": 1.272240990990991, "grad_norm": 0.8385158741103311, "learning_rate": 7.1281850657000636e-06, "loss": 0.4, "step": 13557 }, { "epoch": 1.272334834834835, "grad_norm": 1.0992238666721668, "learning_rate": 7.1276910124327605e-06, "loss": 0.3937, "step": 13558 }, { "epoch": 1.2724286786786787, "grad_norm": 0.9819108766099056, "learning_rate": 7.127196933796747e-06, "loss": 0.4388, "step": 13559 }, { "epoch": 1.2725225225225225, "grad_norm": 0.8970287539621764, "learning_rate": 7.126702829797914e-06, "loss": 0.4257, "step": 13560 }, { "epoch": 1.2726163663663663, "grad_norm": 1.0357552397345504, "learning_rate": 7.126208700442154e-06, "loss": 0.4365, "step": 13561 }, { "epoch": 1.2727102102102101, "grad_norm": 1.2208223004755239, "learning_rate": 7.125714545735356e-06, "loss": 0.4066, "step": 13562 }, { "epoch": 1.272804054054054, "grad_norm": 0.9677161194788371, "learning_rate": 7.125220365683414e-06, "loss": 0.4176, "step": 13563 }, { "epoch": 1.272897897897898, "grad_norm": 0.9080178495134931, "learning_rate": 7.124726160292219e-06, "loss": 0.3825, "step": 13564 }, { "epoch": 1.2729917417417418, "grad_norm": 1.0657267231596983, "learning_rate": 7.124231929567663e-06, "loss": 0.3711, "step": 13565 }, { "epoch": 1.2730855855855856, "grad_norm": 0.9501457437252151, "learning_rate": 7.123737673515641e-06, "loss": 0.3874, "step": 13566 }, { "epoch": 1.2731794294294294, "grad_norm": 1.1671354256302677, "learning_rate": 7.123243392142045e-06, "loss": 0.4463, "step": 13567 }, { "epoch": 1.2732732732732732, "grad_norm": 0.9643165405625068, "learning_rate": 7.122749085452767e-06, "loss": 0.3987, "step": 13568 }, { "epoch": 1.2733671171171173, "grad_norm": 1.9631560970031465, "learning_rate": 7.122254753453703e-06, "loss": 0.4087, "step": 13569 }, { "epoch": 1.273460960960961, "grad_norm": 1.025251077467881, "learning_rate": 7.121760396150746e-06, "loss": 0.4513, "step": 13570 }, { "epoch": 1.2735548048048049, "grad_norm": 1.1771535121542784, "learning_rate": 7.121266013549788e-06, "loss": 0.4177, "step": 13571 }, { "epoch": 1.2736486486486487, "grad_norm": 1.0180445867718988, "learning_rate": 7.120771605656727e-06, "loss": 0.398, "step": 13572 }, { "epoch": 1.2737424924924925, "grad_norm": 0.9614708314077962, "learning_rate": 7.120277172477458e-06, "loss": 0.416, "step": 13573 }, { "epoch": 1.2738363363363363, "grad_norm": 1.0099494238912579, "learning_rate": 7.119782714017873e-06, "loss": 0.3999, "step": 13574 }, { "epoch": 1.2739301801801801, "grad_norm": 1.0984578055307532, "learning_rate": 7.119288230283869e-06, "loss": 0.3789, "step": 13575 }, { "epoch": 1.274024024024024, "grad_norm": 0.8746066334379671, "learning_rate": 7.118793721281343e-06, "loss": 0.3664, "step": 13576 }, { "epoch": 1.2741178678678677, "grad_norm": 0.8573830561652729, "learning_rate": 7.118299187016189e-06, "loss": 0.4213, "step": 13577 }, { "epoch": 1.2742117117117118, "grad_norm": 0.8588202207014763, "learning_rate": 7.117804627494306e-06, "loss": 0.3641, "step": 13578 }, { "epoch": 1.2743055555555556, "grad_norm": 0.8675426999562659, "learning_rate": 7.117310042721588e-06, "loss": 0.3724, "step": 13579 }, { "epoch": 1.2743993993993994, "grad_norm": 0.8841444437901649, "learning_rate": 7.116815432703933e-06, "loss": 0.383, "step": 13580 }, { "epoch": 1.2744932432432432, "grad_norm": 1.0817152470122, "learning_rate": 7.116320797447239e-06, "loss": 0.3982, "step": 13581 }, { "epoch": 1.274587087087087, "grad_norm": 1.0500398851100583, "learning_rate": 7.1158261369574035e-06, "loss": 0.4027, "step": 13582 }, { "epoch": 1.274680930930931, "grad_norm": 0.9453863179821637, "learning_rate": 7.115331451240323e-06, "loss": 0.4193, "step": 13583 }, { "epoch": 1.2747747747747749, "grad_norm": 1.2939446872579163, "learning_rate": 7.114836740301897e-06, "loss": 0.4324, "step": 13584 }, { "epoch": 1.2748686186186187, "grad_norm": 0.8697413179455245, "learning_rate": 7.114342004148023e-06, "loss": 0.3933, "step": 13585 }, { "epoch": 1.2749624624624625, "grad_norm": 0.9843492968574111, "learning_rate": 7.113847242784602e-06, "loss": 0.4571, "step": 13586 }, { "epoch": 1.2750563063063063, "grad_norm": 0.9557875432154546, "learning_rate": 7.113352456217531e-06, "loss": 0.3834, "step": 13587 }, { "epoch": 1.27515015015015, "grad_norm": 1.0059182741837234, "learning_rate": 7.112857644452709e-06, "loss": 0.4086, "step": 13588 }, { "epoch": 1.275243993993994, "grad_norm": 0.9839855045727237, "learning_rate": 7.112362807496037e-06, "loss": 0.3933, "step": 13589 }, { "epoch": 1.2753378378378377, "grad_norm": 0.7491225110920524, "learning_rate": 7.111867945353415e-06, "loss": 0.3499, "step": 13590 }, { "epoch": 1.2754316816816818, "grad_norm": 0.9592882310932904, "learning_rate": 7.111373058030744e-06, "loss": 0.3812, "step": 13591 }, { "epoch": 1.2755255255255256, "grad_norm": 0.9410265944382532, "learning_rate": 7.110878145533922e-06, "loss": 0.3946, "step": 13592 }, { "epoch": 1.2756193693693694, "grad_norm": 0.9889824877936175, "learning_rate": 7.110383207868852e-06, "loss": 0.4268, "step": 13593 }, { "epoch": 1.2757132132132132, "grad_norm": 0.8639484739709914, "learning_rate": 7.109888245041434e-06, "loss": 0.4186, "step": 13594 }, { "epoch": 1.275807057057057, "grad_norm": 1.0527036862857715, "learning_rate": 7.10939325705757e-06, "loss": 0.4074, "step": 13595 }, { "epoch": 1.275900900900901, "grad_norm": 0.9096632474328374, "learning_rate": 7.108898243923162e-06, "loss": 0.3981, "step": 13596 }, { "epoch": 1.2759947447447448, "grad_norm": 1.0527066235253233, "learning_rate": 7.108403205644114e-06, "loss": 0.4296, "step": 13597 }, { "epoch": 1.2760885885885886, "grad_norm": 1.0342476849520936, "learning_rate": 7.107908142226324e-06, "loss": 0.3895, "step": 13598 }, { "epoch": 1.2761824324324325, "grad_norm": 1.3168142342743245, "learning_rate": 7.107413053675698e-06, "loss": 0.3888, "step": 13599 }, { "epoch": 1.2762762762762763, "grad_norm": 1.0443580095592673, "learning_rate": 7.106917939998138e-06, "loss": 0.3826, "step": 13600 }, { "epoch": 1.27637012012012, "grad_norm": 0.9673012191319915, "learning_rate": 7.106422801199548e-06, "loss": 0.3851, "step": 13601 }, { "epoch": 1.2764639639639639, "grad_norm": 0.8912907679444265, "learning_rate": 7.105927637285831e-06, "loss": 0.38, "step": 13602 }, { "epoch": 1.2765578078078077, "grad_norm": 0.9284642659982034, "learning_rate": 7.105432448262891e-06, "loss": 0.4539, "step": 13603 }, { "epoch": 1.2766516516516517, "grad_norm": 0.9509318347607978, "learning_rate": 7.10493723413663e-06, "loss": 0.4291, "step": 13604 }, { "epoch": 1.2767454954954955, "grad_norm": 0.992220536742593, "learning_rate": 7.104441994912958e-06, "loss": 0.4284, "step": 13605 }, { "epoch": 1.2768393393393394, "grad_norm": 0.8778556545828687, "learning_rate": 7.103946730597774e-06, "loss": 0.4405, "step": 13606 }, { "epoch": 1.2769331831831832, "grad_norm": 0.9080111327445822, "learning_rate": 7.103451441196985e-06, "loss": 0.3602, "step": 13607 }, { "epoch": 1.277027027027027, "grad_norm": 1.0182972467427682, "learning_rate": 7.102956126716499e-06, "loss": 0.3924, "step": 13608 }, { "epoch": 1.277120870870871, "grad_norm": 0.8656483469436044, "learning_rate": 7.10246078716222e-06, "loss": 0.3638, "step": 13609 }, { "epoch": 1.2772147147147148, "grad_norm": 0.843210200879756, "learning_rate": 7.101965422540051e-06, "loss": 0.3746, "step": 13610 }, { "epoch": 1.2773085585585586, "grad_norm": 0.9479152123402255, "learning_rate": 7.101470032855904e-06, "loss": 0.4274, "step": 13611 }, { "epoch": 1.2774024024024024, "grad_norm": 8.996033656385299, "learning_rate": 7.100974618115679e-06, "loss": 0.4212, "step": 13612 }, { "epoch": 1.2774962462462462, "grad_norm": 0.9313081876178549, "learning_rate": 7.100479178325288e-06, "loss": 0.4013, "step": 13613 }, { "epoch": 1.27759009009009, "grad_norm": 0.9624062471228308, "learning_rate": 7.0999837134906365e-06, "loss": 0.3984, "step": 13614 }, { "epoch": 1.2776839339339339, "grad_norm": 1.0575813173703679, "learning_rate": 7.099488223617632e-06, "loss": 0.4052, "step": 13615 }, { "epoch": 1.2777777777777777, "grad_norm": 0.9640372357596232, "learning_rate": 7.098992708712183e-06, "loss": 0.3875, "step": 13616 }, { "epoch": 1.2778716216216215, "grad_norm": 1.0715897302471706, "learning_rate": 7.098497168780198e-06, "loss": 0.4183, "step": 13617 }, { "epoch": 1.2779654654654655, "grad_norm": 0.8035555990907552, "learning_rate": 7.098001603827581e-06, "loss": 0.3947, "step": 13618 }, { "epoch": 1.2780593093093093, "grad_norm": 0.9086357321559164, "learning_rate": 7.097506013860246e-06, "loss": 0.4044, "step": 13619 }, { "epoch": 1.2781531531531531, "grad_norm": 1.0740102018359865, "learning_rate": 7.097010398884099e-06, "loss": 0.4014, "step": 13620 }, { "epoch": 1.278246996996997, "grad_norm": 1.0312685617020587, "learning_rate": 7.096514758905051e-06, "loss": 0.4054, "step": 13621 }, { "epoch": 1.2783408408408408, "grad_norm": 0.9467150719633115, "learning_rate": 7.0960190939290105e-06, "loss": 0.413, "step": 13622 }, { "epoch": 1.2784346846846848, "grad_norm": 0.9390933976214728, "learning_rate": 7.0955234039618885e-06, "loss": 0.3894, "step": 13623 }, { "epoch": 1.2785285285285286, "grad_norm": 3.2028264514827134, "learning_rate": 7.0950276890095924e-06, "loss": 0.4395, "step": 13624 }, { "epoch": 1.2786223723723724, "grad_norm": 1.0662863546872035, "learning_rate": 7.0945319490780365e-06, "loss": 0.4313, "step": 13625 }, { "epoch": 1.2787162162162162, "grad_norm": 1.6252008452746178, "learning_rate": 7.094036184173129e-06, "loss": 0.4229, "step": 13626 }, { "epoch": 1.27881006006006, "grad_norm": 1.23953452146248, "learning_rate": 7.093540394300781e-06, "loss": 0.3773, "step": 13627 }, { "epoch": 1.2789039039039038, "grad_norm": 0.9468560061064467, "learning_rate": 7.093044579466905e-06, "loss": 0.4127, "step": 13628 }, { "epoch": 1.2789977477477477, "grad_norm": 0.9388260626177246, "learning_rate": 7.092548739677413e-06, "loss": 0.3991, "step": 13629 }, { "epoch": 1.2790915915915915, "grad_norm": 0.8382503343390082, "learning_rate": 7.0920528749382145e-06, "loss": 0.3545, "step": 13630 }, { "epoch": 1.2791854354354355, "grad_norm": 0.8123379766268544, "learning_rate": 7.091556985255224e-06, "loss": 0.3434, "step": 13631 }, { "epoch": 1.2792792792792793, "grad_norm": 1.2245203159511293, "learning_rate": 7.091061070634353e-06, "loss": 0.4169, "step": 13632 }, { "epoch": 1.2793731231231231, "grad_norm": 0.9984237455993598, "learning_rate": 7.090565131081515e-06, "loss": 0.433, "step": 13633 }, { "epoch": 1.279466966966967, "grad_norm": 0.9471009026231142, "learning_rate": 7.0900691666026236e-06, "loss": 0.4484, "step": 13634 }, { "epoch": 1.2795608108108107, "grad_norm": 0.8901829234848818, "learning_rate": 7.089573177203591e-06, "loss": 0.4294, "step": 13635 }, { "epoch": 1.2796546546546548, "grad_norm": 1.1820982916976799, "learning_rate": 7.089077162890331e-06, "loss": 0.4025, "step": 13636 }, { "epoch": 1.2797484984984986, "grad_norm": 0.9247481835497087, "learning_rate": 7.088581123668758e-06, "loss": 0.3916, "step": 13637 }, { "epoch": 1.2798423423423424, "grad_norm": 0.9146486245163554, "learning_rate": 7.088085059544788e-06, "loss": 0.4471, "step": 13638 }, { "epoch": 1.2799361861861862, "grad_norm": 0.8979897741962782, "learning_rate": 7.087588970524333e-06, "loss": 0.4284, "step": 13639 }, { "epoch": 1.28003003003003, "grad_norm": 0.8885601524265654, "learning_rate": 7.087092856613309e-06, "loss": 0.4182, "step": 13640 }, { "epoch": 1.2801238738738738, "grad_norm": 1.0573769830539754, "learning_rate": 7.086596717817632e-06, "loss": 0.4533, "step": 13641 }, { "epoch": 1.2802177177177176, "grad_norm": 0.8525652355757406, "learning_rate": 7.086100554143216e-06, "loss": 0.3922, "step": 13642 }, { "epoch": 1.2803115615615615, "grad_norm": 0.9498736973808795, "learning_rate": 7.0856043655959775e-06, "loss": 0.421, "step": 13643 }, { "epoch": 1.2804054054054055, "grad_norm": 0.8264640911004315, "learning_rate": 7.0851081521818335e-06, "loss": 0.4163, "step": 13644 }, { "epoch": 1.2804992492492493, "grad_norm": 1.6649368826701807, "learning_rate": 7.084611913906698e-06, "loss": 0.4244, "step": 13645 }, { "epoch": 1.280593093093093, "grad_norm": 0.9953712245248141, "learning_rate": 7.084115650776491e-06, "loss": 0.3945, "step": 13646 }, { "epoch": 1.280686936936937, "grad_norm": 1.0149814735512221, "learning_rate": 7.0836193627971274e-06, "loss": 0.4056, "step": 13647 }, { "epoch": 1.2807807807807807, "grad_norm": 1.011763018021323, "learning_rate": 7.083123049974524e-06, "loss": 0.4383, "step": 13648 }, { "epoch": 1.2808746246246248, "grad_norm": 1.2785706204748009, "learning_rate": 7.0826267123146e-06, "loss": 0.4049, "step": 13649 }, { "epoch": 1.2809684684684686, "grad_norm": 0.998295497979519, "learning_rate": 7.082130349823273e-06, "loss": 0.4117, "step": 13650 }, { "epoch": 1.2810623123123124, "grad_norm": 0.9778531604954975, "learning_rate": 7.08163396250646e-06, "loss": 0.4172, "step": 13651 }, { "epoch": 1.2811561561561562, "grad_norm": 0.9455921106636591, "learning_rate": 7.0811375503700805e-06, "loss": 0.4148, "step": 13652 }, { "epoch": 1.28125, "grad_norm": 0.8286087716219638, "learning_rate": 7.080641113420054e-06, "loss": 0.3773, "step": 13653 }, { "epoch": 1.2813438438438438, "grad_norm": 1.1016561755709955, "learning_rate": 7.080144651662297e-06, "loss": 0.4303, "step": 13654 }, { "epoch": 1.2814376876876876, "grad_norm": 0.9140612724690882, "learning_rate": 7.079648165102731e-06, "loss": 0.4275, "step": 13655 }, { "epoch": 1.2815315315315314, "grad_norm": 0.924813777106223, "learning_rate": 7.079151653747276e-06, "loss": 0.4204, "step": 13656 }, { "epoch": 1.2816253753753752, "grad_norm": 0.8687034258531631, "learning_rate": 7.078655117601849e-06, "loss": 0.391, "step": 13657 }, { "epoch": 1.2817192192192193, "grad_norm": 0.8830802798560939, "learning_rate": 7.078158556672375e-06, "loss": 0.3813, "step": 13658 }, { "epoch": 1.281813063063063, "grad_norm": 0.9343274389416899, "learning_rate": 7.077661970964771e-06, "loss": 0.3934, "step": 13659 }, { "epoch": 1.281906906906907, "grad_norm": 0.9472734974164024, "learning_rate": 7.077165360484959e-06, "loss": 0.4048, "step": 13660 }, { "epoch": 1.2820007507507507, "grad_norm": 1.7077258181265915, "learning_rate": 7.0766687252388585e-06, "loss": 0.3754, "step": 13661 }, { "epoch": 1.2820945945945945, "grad_norm": 0.9622835132413292, "learning_rate": 7.076172065232394e-06, "loss": 0.4303, "step": 13662 }, { "epoch": 1.2821884384384385, "grad_norm": 0.9420696179032864, "learning_rate": 7.075675380471485e-06, "loss": 0.3929, "step": 13663 }, { "epoch": 1.2822822822822824, "grad_norm": 0.9366233367961451, "learning_rate": 7.075178670962055e-06, "loss": 0.4051, "step": 13664 }, { "epoch": 1.2823761261261262, "grad_norm": 2.1784369291069368, "learning_rate": 7.074681936710025e-06, "loss": 0.3766, "step": 13665 }, { "epoch": 1.28246996996997, "grad_norm": 0.8977367354581607, "learning_rate": 7.074185177721318e-06, "loss": 0.3955, "step": 13666 }, { "epoch": 1.2825638138138138, "grad_norm": 0.9895313611656241, "learning_rate": 7.073688394001856e-06, "loss": 0.4699, "step": 13667 }, { "epoch": 1.2826576576576576, "grad_norm": 1.1202726518072121, "learning_rate": 7.073191585557564e-06, "loss": 0.3691, "step": 13668 }, { "epoch": 1.2827515015015014, "grad_norm": 1.0114519384371297, "learning_rate": 7.072694752394365e-06, "loss": 0.3675, "step": 13669 }, { "epoch": 1.2828453453453452, "grad_norm": 1.0799645755856326, "learning_rate": 7.072197894518183e-06, "loss": 0.4336, "step": 13670 }, { "epoch": 1.2829391891891893, "grad_norm": 0.8875136891837199, "learning_rate": 7.0717010119349405e-06, "loss": 0.4379, "step": 13671 }, { "epoch": 1.283033033033033, "grad_norm": 0.8768994979345696, "learning_rate": 7.0712041046505635e-06, "loss": 0.3547, "step": 13672 }, { "epoch": 1.2831268768768769, "grad_norm": 0.9314157817755131, "learning_rate": 7.0707071726709755e-06, "loss": 0.3424, "step": 13673 }, { "epoch": 1.2832207207207207, "grad_norm": 0.9970335042225612, "learning_rate": 7.070210216002103e-06, "loss": 0.4345, "step": 13674 }, { "epoch": 1.2833145645645645, "grad_norm": 1.200400329759212, "learning_rate": 7.06971323464987e-06, "loss": 0.3708, "step": 13675 }, { "epoch": 1.2834084084084085, "grad_norm": 0.8612806418905684, "learning_rate": 7.069216228620202e-06, "loss": 0.4045, "step": 13676 }, { "epoch": 1.2835022522522523, "grad_norm": 0.9775512219281716, "learning_rate": 7.068719197919027e-06, "loss": 0.4175, "step": 13677 }, { "epoch": 1.2835960960960962, "grad_norm": 0.9216282463474076, "learning_rate": 7.068222142552268e-06, "loss": 0.4414, "step": 13678 }, { "epoch": 1.28368993993994, "grad_norm": 0.899071965068159, "learning_rate": 7.067725062525854e-06, "loss": 0.3989, "step": 13679 }, { "epoch": 1.2837837837837838, "grad_norm": 0.8283088865030285, "learning_rate": 7.06722795784571e-06, "loss": 0.4, "step": 13680 }, { "epoch": 1.2838776276276276, "grad_norm": 0.9922186683871532, "learning_rate": 7.066730828517764e-06, "loss": 0.4066, "step": 13681 }, { "epoch": 1.2839714714714714, "grad_norm": 0.9080238525299817, "learning_rate": 7.066233674547942e-06, "loss": 0.3852, "step": 13682 }, { "epoch": 1.2840653153153152, "grad_norm": 1.0189998215757317, "learning_rate": 7.065736495942175e-06, "loss": 0.4124, "step": 13683 }, { "epoch": 1.2841591591591592, "grad_norm": 1.0349675543835066, "learning_rate": 7.0652392927063886e-06, "loss": 0.428, "step": 13684 }, { "epoch": 1.284253003003003, "grad_norm": 1.046871145213503, "learning_rate": 7.064742064846509e-06, "loss": 0.3879, "step": 13685 }, { "epoch": 1.2843468468468469, "grad_norm": 0.9254502734209011, "learning_rate": 7.064244812368468e-06, "loss": 0.3977, "step": 13686 }, { "epoch": 1.2844406906906907, "grad_norm": 1.079215823121627, "learning_rate": 7.063747535278193e-06, "loss": 0.3455, "step": 13687 }, { "epoch": 1.2845345345345345, "grad_norm": 1.140429026859345, "learning_rate": 7.063250233581613e-06, "loss": 0.4284, "step": 13688 }, { "epoch": 1.2846283783783785, "grad_norm": 1.0296830303228603, "learning_rate": 7.062752907284658e-06, "loss": 0.4134, "step": 13689 }, { "epoch": 1.2847222222222223, "grad_norm": 0.9034325569193516, "learning_rate": 7.062255556393257e-06, "loss": 0.4344, "step": 13690 }, { "epoch": 1.2848160660660661, "grad_norm": 0.9722736611879443, "learning_rate": 7.061758180913341e-06, "loss": 0.4322, "step": 13691 }, { "epoch": 1.28490990990991, "grad_norm": 0.8126333066065934, "learning_rate": 7.061260780850838e-06, "loss": 0.3793, "step": 13692 }, { "epoch": 1.2850037537537538, "grad_norm": 0.9105048501504198, "learning_rate": 7.060763356211681e-06, "loss": 0.3855, "step": 13693 }, { "epoch": 1.2850975975975976, "grad_norm": 0.9160607954112088, "learning_rate": 7.0602659070018e-06, "loss": 0.3718, "step": 13694 }, { "epoch": 1.2851914414414414, "grad_norm": 0.8928025317792657, "learning_rate": 7.059768433227127e-06, "loss": 0.3981, "step": 13695 }, { "epoch": 1.2852852852852852, "grad_norm": 0.829618096608837, "learning_rate": 7.059270934893592e-06, "loss": 0.4014, "step": 13696 }, { "epoch": 1.285379129129129, "grad_norm": 1.0104679070598273, "learning_rate": 7.058773412007126e-06, "loss": 0.4275, "step": 13697 }, { "epoch": 1.285472972972973, "grad_norm": 0.9687758293683048, "learning_rate": 7.058275864573664e-06, "loss": 0.4092, "step": 13698 }, { "epoch": 1.2855668168168168, "grad_norm": 0.9016402492147865, "learning_rate": 7.057778292599135e-06, "loss": 0.4092, "step": 13699 }, { "epoch": 1.2856606606606606, "grad_norm": 0.9613051112360681, "learning_rate": 7.057280696089473e-06, "loss": 0.4586, "step": 13700 }, { "epoch": 1.2857545045045045, "grad_norm": 1.1743821164529997, "learning_rate": 7.056783075050612e-06, "loss": 0.4226, "step": 13701 }, { "epoch": 1.2858483483483483, "grad_norm": 1.0760481727044282, "learning_rate": 7.0562854294884865e-06, "loss": 0.3593, "step": 13702 }, { "epoch": 1.2859421921921923, "grad_norm": 0.8772595633999785, "learning_rate": 7.055787759409025e-06, "loss": 0.3319, "step": 13703 }, { "epoch": 1.2860360360360361, "grad_norm": 0.8895886778658249, "learning_rate": 7.055290064818166e-06, "loss": 0.4237, "step": 13704 }, { "epoch": 1.28612987987988, "grad_norm": 0.8784960829208028, "learning_rate": 7.054792345721839e-06, "loss": 0.4147, "step": 13705 }, { "epoch": 1.2862237237237237, "grad_norm": 0.941258954311112, "learning_rate": 7.054294602125983e-06, "loss": 0.3841, "step": 13706 }, { "epoch": 1.2863175675675675, "grad_norm": 0.9717209619652077, "learning_rate": 7.05379683403653e-06, "loss": 0.3752, "step": 13707 }, { "epoch": 1.2864114114114114, "grad_norm": 1.006126475145131, "learning_rate": 7.053299041459417e-06, "loss": 0.4516, "step": 13708 }, { "epoch": 1.2865052552552552, "grad_norm": 1.0103275467972752, "learning_rate": 7.052801224400576e-06, "loss": 0.3878, "step": 13709 }, { "epoch": 1.286599099099099, "grad_norm": 2.8093684214811816, "learning_rate": 7.052303382865946e-06, "loss": 0.4661, "step": 13710 }, { "epoch": 1.286692942942943, "grad_norm": 1.0892709748201692, "learning_rate": 7.0518055168614605e-06, "loss": 0.4334, "step": 13711 }, { "epoch": 1.2867867867867868, "grad_norm": 0.9614514517427157, "learning_rate": 7.0513076263930555e-06, "loss": 0.4117, "step": 13712 }, { "epoch": 1.2868806306306306, "grad_norm": 1.2659806796108346, "learning_rate": 7.05080971146667e-06, "loss": 0.4157, "step": 13713 }, { "epoch": 1.2869744744744744, "grad_norm": 0.9456699163306673, "learning_rate": 7.050311772088239e-06, "loss": 0.456, "step": 13714 }, { "epoch": 1.2870683183183182, "grad_norm": 1.140567045805086, "learning_rate": 7.0498138082636965e-06, "loss": 0.3973, "step": 13715 }, { "epoch": 1.2871621621621623, "grad_norm": 0.8870161942678584, "learning_rate": 7.049315819998985e-06, "loss": 0.3735, "step": 13716 }, { "epoch": 1.287256006006006, "grad_norm": 0.8286280317409535, "learning_rate": 7.0488178073000395e-06, "loss": 0.3894, "step": 13717 }, { "epoch": 1.28734984984985, "grad_norm": 0.9632128633720464, "learning_rate": 7.0483197701727975e-06, "loss": 0.3891, "step": 13718 }, { "epoch": 1.2874436936936937, "grad_norm": 1.1856687398020782, "learning_rate": 7.0478217086231995e-06, "loss": 0.3807, "step": 13719 }, { "epoch": 1.2875375375375375, "grad_norm": 0.8254320189316033, "learning_rate": 7.0473236226571805e-06, "loss": 0.3836, "step": 13720 }, { "epoch": 1.2876313813813813, "grad_norm": 0.9296259454023584, "learning_rate": 7.0468255122806815e-06, "loss": 0.4164, "step": 13721 }, { "epoch": 1.2877252252252251, "grad_norm": 0.9598458958647517, "learning_rate": 7.046327377499642e-06, "loss": 0.4165, "step": 13722 }, { "epoch": 1.287819069069069, "grad_norm": 0.8443790095177776, "learning_rate": 7.045829218319999e-06, "loss": 0.4245, "step": 13723 }, { "epoch": 1.287912912912913, "grad_norm": 0.8569652908229476, "learning_rate": 7.045331034747694e-06, "loss": 0.3839, "step": 13724 }, { "epoch": 1.2880067567567568, "grad_norm": 0.9996522159253053, "learning_rate": 7.044832826788666e-06, "loss": 0.3619, "step": 13725 }, { "epoch": 1.2881006006006006, "grad_norm": 1.2094652290484964, "learning_rate": 7.044334594448856e-06, "loss": 0.3963, "step": 13726 }, { "epoch": 1.2881944444444444, "grad_norm": 0.9633802313743702, "learning_rate": 7.043836337734203e-06, "loss": 0.4363, "step": 13727 }, { "epoch": 1.2882882882882882, "grad_norm": 0.977358778483485, "learning_rate": 7.04333805665065e-06, "loss": 0.4034, "step": 13728 }, { "epoch": 1.2883821321321323, "grad_norm": 0.8489301348082406, "learning_rate": 7.042839751204137e-06, "loss": 0.3917, "step": 13729 }, { "epoch": 1.288475975975976, "grad_norm": 0.8942530279713994, "learning_rate": 7.042341421400605e-06, "loss": 0.3525, "step": 13730 }, { "epoch": 1.2885698198198199, "grad_norm": 0.9807441882138059, "learning_rate": 7.041843067245995e-06, "loss": 0.4126, "step": 13731 }, { "epoch": 1.2886636636636637, "grad_norm": 0.974904473033316, "learning_rate": 7.041344688746251e-06, "loss": 0.4118, "step": 13732 }, { "epoch": 1.2887575075075075, "grad_norm": 0.8561115175142201, "learning_rate": 7.0408462859073125e-06, "loss": 0.3826, "step": 13733 }, { "epoch": 1.2888513513513513, "grad_norm": 0.9067377492015046, "learning_rate": 7.040347858735126e-06, "loss": 0.3764, "step": 13734 }, { "epoch": 1.2889451951951951, "grad_norm": 1.0223372996145408, "learning_rate": 7.039849407235629e-06, "loss": 0.4411, "step": 13735 }, { "epoch": 1.289039039039039, "grad_norm": 1.0212234081091855, "learning_rate": 7.039350931414768e-06, "loss": 0.3907, "step": 13736 }, { "epoch": 1.2891328828828827, "grad_norm": 1.0402100261963414, "learning_rate": 7.038852431278488e-06, "loss": 0.3863, "step": 13737 }, { "epoch": 1.2892267267267268, "grad_norm": 1.0379209912967209, "learning_rate": 7.038353906832729e-06, "loss": 0.4128, "step": 13738 }, { "epoch": 1.2893205705705706, "grad_norm": 1.074913678274053, "learning_rate": 7.037855358083436e-06, "loss": 0.3873, "step": 13739 }, { "epoch": 1.2894144144144144, "grad_norm": 1.4359043775054972, "learning_rate": 7.037356785036555e-06, "loss": 0.415, "step": 13740 }, { "epoch": 1.2895082582582582, "grad_norm": 0.9014426933010273, "learning_rate": 7.036858187698028e-06, "loss": 0.4247, "step": 13741 }, { "epoch": 1.289602102102102, "grad_norm": 0.8534098827026386, "learning_rate": 7.0363595660738004e-06, "loss": 0.4197, "step": 13742 }, { "epoch": 1.289695945945946, "grad_norm": 1.06809962888517, "learning_rate": 7.03586092016982e-06, "loss": 0.4111, "step": 13743 }, { "epoch": 1.2897897897897899, "grad_norm": 0.911489639331839, "learning_rate": 7.035362249992029e-06, "loss": 0.4182, "step": 13744 }, { "epoch": 1.2898836336336337, "grad_norm": 2.7611052278143324, "learning_rate": 7.034863555546376e-06, "loss": 0.382, "step": 13745 }, { "epoch": 1.2899774774774775, "grad_norm": 0.8548043413391861, "learning_rate": 7.034364836838804e-06, "loss": 0.436, "step": 13746 }, { "epoch": 1.2900713213213213, "grad_norm": 0.9368620205183816, "learning_rate": 7.033866093875261e-06, "loss": 0.4462, "step": 13747 }, { "epoch": 1.290165165165165, "grad_norm": 1.0617678843431375, "learning_rate": 7.033367326661692e-06, "loss": 0.4007, "step": 13748 }, { "epoch": 1.290259009009009, "grad_norm": 1.1916571011836967, "learning_rate": 7.032868535204047e-06, "loss": 0.4365, "step": 13749 }, { "epoch": 1.2903528528528527, "grad_norm": 1.0837106680796575, "learning_rate": 7.0323697195082695e-06, "loss": 0.4067, "step": 13750 }, { "epoch": 1.2904466966966968, "grad_norm": 0.8346830350106946, "learning_rate": 7.031870879580309e-06, "loss": 0.3548, "step": 13751 }, { "epoch": 1.2905405405405406, "grad_norm": 1.0327986951432848, "learning_rate": 7.031372015426114e-06, "loss": 0.4387, "step": 13752 }, { "epoch": 1.2906343843843844, "grad_norm": 0.9781012157620076, "learning_rate": 7.03087312705163e-06, "loss": 0.3929, "step": 13753 }, { "epoch": 1.2907282282282282, "grad_norm": 0.9118259820282582, "learning_rate": 7.030374214462807e-06, "loss": 0.4161, "step": 13754 }, { "epoch": 1.290822072072072, "grad_norm": 1.0560259666663425, "learning_rate": 7.0298752776655945e-06, "loss": 0.4023, "step": 13755 }, { "epoch": 1.290915915915916, "grad_norm": 1.0396579611070185, "learning_rate": 7.029376316665939e-06, "loss": 0.3861, "step": 13756 }, { "epoch": 1.2910097597597598, "grad_norm": 1.0250765965054278, "learning_rate": 7.028877331469791e-06, "loss": 0.3737, "step": 13757 }, { "epoch": 1.2911036036036037, "grad_norm": 1.1276272219048689, "learning_rate": 7.0283783220831e-06, "loss": 0.3829, "step": 13758 }, { "epoch": 1.2911974474474475, "grad_norm": 0.9915416398657934, "learning_rate": 7.027879288511815e-06, "loss": 0.4366, "step": 13759 }, { "epoch": 1.2912912912912913, "grad_norm": 0.9428951536943965, "learning_rate": 7.027380230761887e-06, "loss": 0.3438, "step": 13760 }, { "epoch": 1.291385135135135, "grad_norm": 1.319566489913059, "learning_rate": 7.026881148839266e-06, "loss": 0.4567, "step": 13761 }, { "epoch": 1.291478978978979, "grad_norm": 1.0041859203658774, "learning_rate": 7.026382042749902e-06, "loss": 0.3932, "step": 13762 }, { "epoch": 1.2915728228228227, "grad_norm": 1.0430230456209182, "learning_rate": 7.0258829124997475e-06, "loss": 0.3812, "step": 13763 }, { "epoch": 1.2916666666666667, "grad_norm": 0.8771465513905713, "learning_rate": 7.025383758094753e-06, "loss": 0.3971, "step": 13764 }, { "epoch": 1.2917605105105106, "grad_norm": 1.0074506561699854, "learning_rate": 7.024884579540867e-06, "loss": 0.4008, "step": 13765 }, { "epoch": 1.2918543543543544, "grad_norm": 1.109201995294422, "learning_rate": 7.024385376844045e-06, "loss": 0.3573, "step": 13766 }, { "epoch": 1.2919481981981982, "grad_norm": 0.9319656853062397, "learning_rate": 7.0238861500102395e-06, "loss": 0.4004, "step": 13767 }, { "epoch": 1.292042042042042, "grad_norm": 1.0377813400893707, "learning_rate": 7.023386899045399e-06, "loss": 0.4105, "step": 13768 }, { "epoch": 1.292135885885886, "grad_norm": 0.9500502945778388, "learning_rate": 7.02288762395548e-06, "loss": 0.4026, "step": 13769 }, { "epoch": 1.2922297297297298, "grad_norm": 0.9043037125318659, "learning_rate": 7.022388324746433e-06, "loss": 0.3628, "step": 13770 }, { "epoch": 1.2923235735735736, "grad_norm": 1.0430314736510053, "learning_rate": 7.021889001424212e-06, "loss": 0.4125, "step": 13771 }, { "epoch": 1.2924174174174174, "grad_norm": 0.9323138270070831, "learning_rate": 7.02138965399477e-06, "loss": 0.4521, "step": 13772 }, { "epoch": 1.2925112612612613, "grad_norm": 0.915881613458352, "learning_rate": 7.020890282464062e-06, "loss": 0.3801, "step": 13773 }, { "epoch": 1.292605105105105, "grad_norm": 0.8963614480210501, "learning_rate": 7.020390886838041e-06, "loss": 0.4235, "step": 13774 }, { "epoch": 1.2926989489489489, "grad_norm": 1.2813865311043566, "learning_rate": 7.019891467122661e-06, "loss": 0.4172, "step": 13775 }, { "epoch": 1.2927927927927927, "grad_norm": 0.911419727445408, "learning_rate": 7.01939202332388e-06, "loss": 0.4129, "step": 13776 }, { "epoch": 1.2928866366366365, "grad_norm": 0.9107652791115186, "learning_rate": 7.018892555447648e-06, "loss": 0.3832, "step": 13777 }, { "epoch": 1.2929804804804805, "grad_norm": 1.0153758401101616, "learning_rate": 7.018393063499922e-06, "loss": 0.3843, "step": 13778 }, { "epoch": 1.2930743243243243, "grad_norm": 1.2604850597314916, "learning_rate": 7.017893547486658e-06, "loss": 0.4294, "step": 13779 }, { "epoch": 1.2931681681681682, "grad_norm": 1.2254049144003512, "learning_rate": 7.017394007413813e-06, "loss": 0.3806, "step": 13780 }, { "epoch": 1.293262012012012, "grad_norm": 3.6162315003189063, "learning_rate": 7.016894443287342e-06, "loss": 0.4488, "step": 13781 }, { "epoch": 1.2933558558558558, "grad_norm": 0.8715979931783173, "learning_rate": 7.016394855113201e-06, "loss": 0.4087, "step": 13782 }, { "epoch": 1.2934496996996998, "grad_norm": 1.0476726186017968, "learning_rate": 7.015895242897346e-06, "loss": 0.4049, "step": 13783 }, { "epoch": 1.2935435435435436, "grad_norm": 0.9327382543444533, "learning_rate": 7.015395606645734e-06, "loss": 0.3873, "step": 13784 }, { "epoch": 1.2936373873873874, "grad_norm": 0.8256275463071604, "learning_rate": 7.014895946364324e-06, "loss": 0.3859, "step": 13785 }, { "epoch": 1.2937312312312312, "grad_norm": 0.8935967991787482, "learning_rate": 7.014396262059072e-06, "loss": 0.3423, "step": 13786 }, { "epoch": 1.293825075075075, "grad_norm": 1.0373142714174546, "learning_rate": 7.013896553735938e-06, "loss": 0.4871, "step": 13787 }, { "epoch": 1.2939189189189189, "grad_norm": 0.8963149967824029, "learning_rate": 7.013396821400878e-06, "loss": 0.433, "step": 13788 }, { "epoch": 1.2940127627627627, "grad_norm": 0.9142824433023394, "learning_rate": 7.01289706505985e-06, "loss": 0.4294, "step": 13789 }, { "epoch": 1.2941066066066065, "grad_norm": 1.8041843247894809, "learning_rate": 7.0123972847188115e-06, "loss": 0.4191, "step": 13790 }, { "epoch": 1.2942004504504505, "grad_norm": 1.0000299948846856, "learning_rate": 7.011897480383726e-06, "loss": 0.4199, "step": 13791 }, { "epoch": 1.2942942942942943, "grad_norm": 0.8381087559662981, "learning_rate": 7.011397652060548e-06, "loss": 0.4134, "step": 13792 }, { "epoch": 1.2943881381381381, "grad_norm": 1.0142557231119707, "learning_rate": 7.010897799755241e-06, "loss": 0.4239, "step": 13793 }, { "epoch": 1.294481981981982, "grad_norm": 0.839461734324518, "learning_rate": 7.0103979234737625e-06, "loss": 0.3559, "step": 13794 }, { "epoch": 1.2945758258258258, "grad_norm": 1.6859460249666887, "learning_rate": 7.009898023222072e-06, "loss": 0.3822, "step": 13795 }, { "epoch": 1.2946696696696698, "grad_norm": 1.0972299717618712, "learning_rate": 7.009398099006132e-06, "loss": 0.4232, "step": 13796 }, { "epoch": 1.2947635135135136, "grad_norm": 0.9695877414673105, "learning_rate": 7.008898150831901e-06, "loss": 0.4352, "step": 13797 }, { "epoch": 1.2948573573573574, "grad_norm": 0.9029237951398661, "learning_rate": 7.008398178705341e-06, "loss": 0.3975, "step": 13798 }, { "epoch": 1.2949512012012012, "grad_norm": 0.9067519621244358, "learning_rate": 7.007898182632413e-06, "loss": 0.3773, "step": 13799 }, { "epoch": 1.295045045045045, "grad_norm": 1.0321068591542915, "learning_rate": 7.00739816261908e-06, "loss": 0.4225, "step": 13800 }, { "epoch": 1.2951388888888888, "grad_norm": 0.7973218857326285, "learning_rate": 7.0068981186713015e-06, "loss": 0.3125, "step": 13801 }, { "epoch": 1.2952327327327327, "grad_norm": 0.8651489470833076, "learning_rate": 7.00639805079504e-06, "loss": 0.3912, "step": 13802 }, { "epoch": 1.2953265765765765, "grad_norm": 1.0055654872699633, "learning_rate": 7.0058979589962595e-06, "loss": 0.3892, "step": 13803 }, { "epoch": 1.2954204204204205, "grad_norm": 0.9558478661090706, "learning_rate": 7.00539784328092e-06, "loss": 0.3663, "step": 13804 }, { "epoch": 1.2955142642642643, "grad_norm": 0.9259151977046302, "learning_rate": 7.004897703654987e-06, "loss": 0.4286, "step": 13805 }, { "epoch": 1.2956081081081081, "grad_norm": 0.9733611295070248, "learning_rate": 7.004397540124423e-06, "loss": 0.4199, "step": 13806 }, { "epoch": 1.295701951951952, "grad_norm": 1.0185838810865213, "learning_rate": 7.003897352695192e-06, "loss": 0.4033, "step": 13807 }, { "epoch": 1.2957957957957957, "grad_norm": 0.9798655943418086, "learning_rate": 7.003397141373255e-06, "loss": 0.4192, "step": 13808 }, { "epoch": 1.2958896396396398, "grad_norm": 1.7262411491029512, "learning_rate": 7.002896906164579e-06, "loss": 0.4129, "step": 13809 }, { "epoch": 1.2959834834834836, "grad_norm": 1.1541179415689795, "learning_rate": 7.002396647075127e-06, "loss": 0.3841, "step": 13810 }, { "epoch": 1.2960773273273274, "grad_norm": 1.0661778519053102, "learning_rate": 7.001896364110865e-06, "loss": 0.3607, "step": 13811 }, { "epoch": 1.2961711711711712, "grad_norm": 1.0755310540068541, "learning_rate": 7.0013960572777565e-06, "loss": 0.4183, "step": 13812 }, { "epoch": 1.296265015015015, "grad_norm": 1.2164387988055017, "learning_rate": 7.000895726581768e-06, "loss": 0.4154, "step": 13813 }, { "epoch": 1.2963588588588588, "grad_norm": 1.0681897477297972, "learning_rate": 7.000395372028863e-06, "loss": 0.4132, "step": 13814 }, { "epoch": 1.2964527027027026, "grad_norm": 0.8328134857717356, "learning_rate": 6.999894993625009e-06, "loss": 0.4001, "step": 13815 }, { "epoch": 1.2965465465465464, "grad_norm": 1.3013105226976702, "learning_rate": 6.999394591376172e-06, "loss": 0.4332, "step": 13816 }, { "epoch": 1.2966403903903903, "grad_norm": 0.8930632746598098, "learning_rate": 6.998894165288317e-06, "loss": 0.421, "step": 13817 }, { "epoch": 1.2967342342342343, "grad_norm": 0.9171668859460707, "learning_rate": 6.998393715367413e-06, "loss": 0.4194, "step": 13818 }, { "epoch": 1.296828078078078, "grad_norm": 1.1801451429173513, "learning_rate": 6.997893241619426e-06, "loss": 0.4147, "step": 13819 }, { "epoch": 1.296921921921922, "grad_norm": 0.9326090906443965, "learning_rate": 6.997392744050321e-06, "loss": 0.4069, "step": 13820 }, { "epoch": 1.2970157657657657, "grad_norm": 1.1138466650929482, "learning_rate": 6.996892222666068e-06, "loss": 0.4398, "step": 13821 }, { "epoch": 1.2971096096096097, "grad_norm": 0.8890697944481386, "learning_rate": 6.996391677472634e-06, "loss": 0.3991, "step": 13822 }, { "epoch": 1.2972034534534536, "grad_norm": 0.9674076483691283, "learning_rate": 6.9958911084759865e-06, "loss": 0.429, "step": 13823 }, { "epoch": 1.2972972972972974, "grad_norm": 1.1349134252763484, "learning_rate": 6.995390515682096e-06, "loss": 0.3942, "step": 13824 }, { "epoch": 1.2973911411411412, "grad_norm": 1.0681675629839045, "learning_rate": 6.994889899096928e-06, "loss": 0.4227, "step": 13825 }, { "epoch": 1.297484984984985, "grad_norm": 1.0702974039700937, "learning_rate": 6.994389258726453e-06, "loss": 0.4387, "step": 13826 }, { "epoch": 1.2975788288288288, "grad_norm": 0.9207323206160895, "learning_rate": 6.993888594576641e-06, "loss": 0.3993, "step": 13827 }, { "epoch": 1.2976726726726726, "grad_norm": 0.9454724267380248, "learning_rate": 6.9933879066534594e-06, "loss": 0.4245, "step": 13828 }, { "epoch": 1.2977665165165164, "grad_norm": 0.8100136972187783, "learning_rate": 6.9928871949628794e-06, "loss": 0.4067, "step": 13829 }, { "epoch": 1.2978603603603602, "grad_norm": 0.8595371928551778, "learning_rate": 6.992386459510871e-06, "loss": 0.4205, "step": 13830 }, { "epoch": 1.2979542042042043, "grad_norm": 1.0173632031777156, "learning_rate": 6.9918857003034065e-06, "loss": 0.4346, "step": 13831 }, { "epoch": 1.298048048048048, "grad_norm": 0.9337293413003717, "learning_rate": 6.9913849173464525e-06, "loss": 0.4052, "step": 13832 }, { "epoch": 1.2981418918918919, "grad_norm": 0.8464295655875416, "learning_rate": 6.990884110645982e-06, "loss": 0.3748, "step": 13833 }, { "epoch": 1.2982357357357357, "grad_norm": 1.7221729706029123, "learning_rate": 6.990383280207966e-06, "loss": 0.3861, "step": 13834 }, { "epoch": 1.2983295795795795, "grad_norm": 0.8746575028266006, "learning_rate": 6.989882426038375e-06, "loss": 0.3648, "step": 13835 }, { "epoch": 1.2984234234234235, "grad_norm": 0.9990936891213202, "learning_rate": 6.9893815481431815e-06, "loss": 0.4372, "step": 13836 }, { "epoch": 1.2985172672672673, "grad_norm": 0.8420313435948106, "learning_rate": 6.98888064652836e-06, "loss": 0.3738, "step": 13837 }, { "epoch": 1.2986111111111112, "grad_norm": 0.9408128314567916, "learning_rate": 6.988379721199878e-06, "loss": 0.4688, "step": 13838 }, { "epoch": 1.298704954954955, "grad_norm": 0.9512899766280125, "learning_rate": 6.987878772163712e-06, "loss": 0.3885, "step": 13839 }, { "epoch": 1.2987987987987988, "grad_norm": 0.9290142739198745, "learning_rate": 6.987377799425832e-06, "loss": 0.4076, "step": 13840 }, { "epoch": 1.2988926426426426, "grad_norm": 1.0679645606415396, "learning_rate": 6.986876802992213e-06, "loss": 0.449, "step": 13841 }, { "epoch": 1.2989864864864864, "grad_norm": 0.9531550335287652, "learning_rate": 6.98637578286883e-06, "loss": 0.4311, "step": 13842 }, { "epoch": 1.2990803303303302, "grad_norm": 1.024793514227537, "learning_rate": 6.985874739061653e-06, "loss": 0.4217, "step": 13843 }, { "epoch": 1.2991741741741742, "grad_norm": 0.983171437548441, "learning_rate": 6.9853736715766574e-06, "loss": 0.4061, "step": 13844 }, { "epoch": 1.299268018018018, "grad_norm": 1.3490032786786064, "learning_rate": 6.984872580419819e-06, "loss": 0.3914, "step": 13845 }, { "epoch": 1.2993618618618619, "grad_norm": 0.8956802686500941, "learning_rate": 6.9843714655971105e-06, "loss": 0.3878, "step": 13846 }, { "epoch": 1.2994557057057057, "grad_norm": 0.823082013783584, "learning_rate": 6.983870327114507e-06, "loss": 0.3955, "step": 13847 }, { "epoch": 1.2995495495495495, "grad_norm": 0.8869172837306688, "learning_rate": 6.983369164977985e-06, "loss": 0.401, "step": 13848 }, { "epoch": 1.2996433933933935, "grad_norm": 1.1660534447805753, "learning_rate": 6.98286797919352e-06, "loss": 0.3842, "step": 13849 }, { "epoch": 1.2997372372372373, "grad_norm": 0.9515478872153373, "learning_rate": 6.982366769767085e-06, "loss": 0.4359, "step": 13850 }, { "epoch": 1.2998310810810811, "grad_norm": 0.8811800827467251, "learning_rate": 6.981865536704658e-06, "loss": 0.4138, "step": 13851 }, { "epoch": 1.299924924924925, "grad_norm": 1.3890692446953339, "learning_rate": 6.981364280012215e-06, "loss": 0.3966, "step": 13852 }, { "epoch": 1.3000187687687688, "grad_norm": 0.9487311912941134, "learning_rate": 6.980862999695732e-06, "loss": 0.4055, "step": 13853 }, { "epoch": 1.3001126126126126, "grad_norm": 1.2739711522163069, "learning_rate": 6.9803616957611885e-06, "loss": 0.3979, "step": 13854 }, { "epoch": 1.3002064564564564, "grad_norm": 0.9967807577407858, "learning_rate": 6.979860368214559e-06, "loss": 0.3954, "step": 13855 }, { "epoch": 1.3003003003003002, "grad_norm": 0.7988287024986943, "learning_rate": 6.9793590170618185e-06, "loss": 0.3652, "step": 13856 }, { "epoch": 1.3003941441441442, "grad_norm": 0.8933161426792716, "learning_rate": 6.978857642308949e-06, "loss": 0.4217, "step": 13857 }, { "epoch": 1.300487987987988, "grad_norm": 1.113918957425275, "learning_rate": 6.978356243961928e-06, "loss": 0.4032, "step": 13858 }, { "epoch": 1.3005818318318318, "grad_norm": 0.9654268649361398, "learning_rate": 6.97785482202673e-06, "loss": 0.4155, "step": 13859 }, { "epoch": 1.3006756756756757, "grad_norm": 1.0375557107481614, "learning_rate": 6.977353376509339e-06, "loss": 0.4298, "step": 13860 }, { "epoch": 1.3007695195195195, "grad_norm": 1.0526435108817416, "learning_rate": 6.976851907415732e-06, "loss": 0.3867, "step": 13861 }, { "epoch": 1.3008633633633635, "grad_norm": 1.0234767775878681, "learning_rate": 6.976350414751884e-06, "loss": 0.4115, "step": 13862 }, { "epoch": 1.3009572072072073, "grad_norm": 0.8926475074768856, "learning_rate": 6.975848898523778e-06, "loss": 0.3919, "step": 13863 }, { "epoch": 1.3010510510510511, "grad_norm": 0.987531033567858, "learning_rate": 6.975347358737393e-06, "loss": 0.4252, "step": 13864 }, { "epoch": 1.301144894894895, "grad_norm": 0.842082655508649, "learning_rate": 6.974845795398709e-06, "loss": 0.4005, "step": 13865 }, { "epoch": 1.3012387387387387, "grad_norm": 0.9269597148040319, "learning_rate": 6.9743442085137066e-06, "loss": 0.3909, "step": 13866 }, { "epoch": 1.3013325825825826, "grad_norm": 0.8254503383273794, "learning_rate": 6.9738425980883665e-06, "loss": 0.4089, "step": 13867 }, { "epoch": 1.3014264264264264, "grad_norm": 1.319578878946255, "learning_rate": 6.9733409641286674e-06, "loss": 0.3799, "step": 13868 }, { "epoch": 1.3015202702702702, "grad_norm": 1.0845760222437597, "learning_rate": 6.972839306640593e-06, "loss": 0.399, "step": 13869 }, { "epoch": 1.301614114114114, "grad_norm": 0.9996188077289972, "learning_rate": 6.972337625630122e-06, "loss": 0.3688, "step": 13870 }, { "epoch": 1.301707957957958, "grad_norm": 1.063605647199594, "learning_rate": 6.971835921103238e-06, "loss": 0.476, "step": 13871 }, { "epoch": 1.3018018018018018, "grad_norm": 1.0470343135119582, "learning_rate": 6.971334193065923e-06, "loss": 0.3614, "step": 13872 }, { "epoch": 1.3018956456456456, "grad_norm": 0.9102354446066506, "learning_rate": 6.970832441524158e-06, "loss": 0.3835, "step": 13873 }, { "epoch": 1.3019894894894894, "grad_norm": 0.9546844176231122, "learning_rate": 6.970330666483925e-06, "loss": 0.3909, "step": 13874 }, { "epoch": 1.3020833333333333, "grad_norm": 0.8618362880056536, "learning_rate": 6.969828867951209e-06, "loss": 0.4241, "step": 13875 }, { "epoch": 1.3021771771771773, "grad_norm": 0.9304466392603696, "learning_rate": 6.9693270459319904e-06, "loss": 0.3804, "step": 13876 }, { "epoch": 1.302271021021021, "grad_norm": 1.0191663919067109, "learning_rate": 6.9688252004322545e-06, "loss": 0.4156, "step": 13877 }, { "epoch": 1.302364864864865, "grad_norm": 0.9481581453094058, "learning_rate": 6.968323331457984e-06, "loss": 0.4125, "step": 13878 }, { "epoch": 1.3024587087087087, "grad_norm": 0.9681365721715932, "learning_rate": 6.967821439015161e-06, "loss": 0.425, "step": 13879 }, { "epoch": 1.3025525525525525, "grad_norm": 1.2301236182895696, "learning_rate": 6.967319523109774e-06, "loss": 0.4123, "step": 13880 }, { "epoch": 1.3026463963963963, "grad_norm": 1.0619427244098134, "learning_rate": 6.966817583747802e-06, "loss": 0.3987, "step": 13881 }, { "epoch": 1.3027402402402402, "grad_norm": 1.1559839843128483, "learning_rate": 6.9663156209352345e-06, "loss": 0.4106, "step": 13882 }, { "epoch": 1.302834084084084, "grad_norm": 1.0473284197019754, "learning_rate": 6.965813634678054e-06, "loss": 0.401, "step": 13883 }, { "epoch": 1.302927927927928, "grad_norm": 0.9891732776516108, "learning_rate": 6.965311624982246e-06, "loss": 0.3855, "step": 13884 }, { "epoch": 1.3030217717717718, "grad_norm": 1.2344676728436084, "learning_rate": 6.964809591853796e-06, "loss": 0.3435, "step": 13885 }, { "epoch": 1.3031156156156156, "grad_norm": 1.054616622779798, "learning_rate": 6.964307535298691e-06, "loss": 0.3788, "step": 13886 }, { "epoch": 1.3032094594594594, "grad_norm": 0.9371788142004617, "learning_rate": 6.963805455322915e-06, "loss": 0.3655, "step": 13887 }, { "epoch": 1.3033033033033032, "grad_norm": 1.0105312640944855, "learning_rate": 6.963303351932455e-06, "loss": 0.3983, "step": 13888 }, { "epoch": 1.3033971471471473, "grad_norm": 0.952005407659623, "learning_rate": 6.962801225133299e-06, "loss": 0.3811, "step": 13889 }, { "epoch": 1.303490990990991, "grad_norm": 0.9251680983920314, "learning_rate": 6.962299074931433e-06, "loss": 0.4098, "step": 13890 }, { "epoch": 1.303584834834835, "grad_norm": 0.9379209560177028, "learning_rate": 6.961796901332844e-06, "loss": 0.3666, "step": 13891 }, { "epoch": 1.3036786786786787, "grad_norm": 0.9044901398024051, "learning_rate": 6.961294704343519e-06, "loss": 0.4105, "step": 13892 }, { "epoch": 1.3037725225225225, "grad_norm": 0.9843858075570149, "learning_rate": 6.960792483969447e-06, "loss": 0.395, "step": 13893 }, { "epoch": 1.3038663663663663, "grad_norm": 0.9315811899449259, "learning_rate": 6.960290240216616e-06, "loss": 0.3636, "step": 13894 }, { "epoch": 1.3039602102102101, "grad_norm": 0.9403358577314203, "learning_rate": 6.959787973091012e-06, "loss": 0.4459, "step": 13895 }, { "epoch": 1.304054054054054, "grad_norm": 1.013149890663673, "learning_rate": 6.959285682598628e-06, "loss": 0.4599, "step": 13896 }, { "epoch": 1.304147897897898, "grad_norm": 0.9759397357700605, "learning_rate": 6.9587833687454474e-06, "loss": 0.3926, "step": 13897 }, { "epoch": 1.3042417417417418, "grad_norm": 0.8567615914734446, "learning_rate": 6.958281031537464e-06, "loss": 0.4133, "step": 13898 }, { "epoch": 1.3043355855855856, "grad_norm": 1.237393480330714, "learning_rate": 6.957778670980666e-06, "loss": 0.3934, "step": 13899 }, { "epoch": 1.3044294294294294, "grad_norm": 0.8598509887902464, "learning_rate": 6.95727628708104e-06, "loss": 0.4082, "step": 13900 }, { "epoch": 1.3045232732732732, "grad_norm": 0.8293920270462913, "learning_rate": 6.956773879844581e-06, "loss": 0.4262, "step": 13901 }, { "epoch": 1.3046171171171173, "grad_norm": 1.0519407037124038, "learning_rate": 6.956271449277275e-06, "loss": 0.3933, "step": 13902 }, { "epoch": 1.304710960960961, "grad_norm": 0.9171788740373963, "learning_rate": 6.955768995385115e-06, "loss": 0.3576, "step": 13903 }, { "epoch": 1.3048048048048049, "grad_norm": 1.0091099420713432, "learning_rate": 6.955266518174091e-06, "loss": 0.4259, "step": 13904 }, { "epoch": 1.3048986486486487, "grad_norm": 0.8308660571925447, "learning_rate": 6.954764017650195e-06, "loss": 0.3745, "step": 13905 }, { "epoch": 1.3049924924924925, "grad_norm": 0.9501902201853378, "learning_rate": 6.954261493819417e-06, "loss": 0.4353, "step": 13906 }, { "epoch": 1.3050863363363363, "grad_norm": 0.8651355021550218, "learning_rate": 6.953758946687748e-06, "loss": 0.3911, "step": 13907 }, { "epoch": 1.3051801801801801, "grad_norm": 2.2324317717072812, "learning_rate": 6.953256376261182e-06, "loss": 0.3883, "step": 13908 }, { "epoch": 1.305274024024024, "grad_norm": 1.0243897714264116, "learning_rate": 6.9527537825457095e-06, "loss": 0.4293, "step": 13909 }, { "epoch": 1.3053678678678677, "grad_norm": 0.9121686935805073, "learning_rate": 6.952251165547326e-06, "loss": 0.4175, "step": 13910 }, { "epoch": 1.3054617117117118, "grad_norm": 1.0640772054691832, "learning_rate": 6.951748525272021e-06, "loss": 0.3967, "step": 13911 }, { "epoch": 1.3055555555555556, "grad_norm": 1.3338427461412732, "learning_rate": 6.951245861725787e-06, "loss": 0.4079, "step": 13912 }, { "epoch": 1.3056493993993994, "grad_norm": 1.0206151638112115, "learning_rate": 6.950743174914621e-06, "loss": 0.3981, "step": 13913 }, { "epoch": 1.3057432432432432, "grad_norm": 1.1635678162902403, "learning_rate": 6.950240464844513e-06, "loss": 0.4195, "step": 13914 }, { "epoch": 1.305837087087087, "grad_norm": 1.0171802796933942, "learning_rate": 6.94973773152146e-06, "loss": 0.3811, "step": 13915 }, { "epoch": 1.305930930930931, "grad_norm": 1.006165452002848, "learning_rate": 6.949234974951453e-06, "loss": 0.3804, "step": 13916 }, { "epoch": 1.3060247747747749, "grad_norm": 0.9829079950110169, "learning_rate": 6.94873219514049e-06, "loss": 0.3672, "step": 13917 }, { "epoch": 1.3061186186186187, "grad_norm": 0.9485207256679311, "learning_rate": 6.948229392094562e-06, "loss": 0.3783, "step": 13918 }, { "epoch": 1.3062124624624625, "grad_norm": 0.9036495451387748, "learning_rate": 6.947726565819665e-06, "loss": 0.4233, "step": 13919 }, { "epoch": 1.3063063063063063, "grad_norm": 1.049690013477767, "learning_rate": 6.947223716321797e-06, "loss": 0.4099, "step": 13920 }, { "epoch": 1.30640015015015, "grad_norm": 1.0744292705093519, "learning_rate": 6.94672084360695e-06, "loss": 0.401, "step": 13921 }, { "epoch": 1.306493993993994, "grad_norm": 1.3361739924120148, "learning_rate": 6.946217947681122e-06, "loss": 0.4266, "step": 13922 }, { "epoch": 1.3065878378378377, "grad_norm": 0.947897759307216, "learning_rate": 6.94571502855031e-06, "loss": 0.4239, "step": 13923 }, { "epoch": 1.3066816816816818, "grad_norm": 0.9508651440800756, "learning_rate": 6.9452120862205056e-06, "loss": 0.4094, "step": 13924 }, { "epoch": 1.3067755255255256, "grad_norm": 0.9038468072319951, "learning_rate": 6.944709120697709e-06, "loss": 0.4262, "step": 13925 }, { "epoch": 1.3068693693693694, "grad_norm": 1.1691198737444877, "learning_rate": 6.944206131987918e-06, "loss": 0.4004, "step": 13926 }, { "epoch": 1.3069632132132132, "grad_norm": 0.9962556279640656, "learning_rate": 6.943703120097127e-06, "loss": 0.3908, "step": 13927 }, { "epoch": 1.307057057057057, "grad_norm": 1.0197778240794881, "learning_rate": 6.943200085031336e-06, "loss": 0.3537, "step": 13928 }, { "epoch": 1.307150900900901, "grad_norm": 0.9446658535997308, "learning_rate": 6.942697026796542e-06, "loss": 0.4166, "step": 13929 }, { "epoch": 1.3072447447447448, "grad_norm": 1.0197967461366426, "learning_rate": 6.942193945398743e-06, "loss": 0.4267, "step": 13930 }, { "epoch": 1.3073385885885886, "grad_norm": 1.272431046672359, "learning_rate": 6.941690840843935e-06, "loss": 0.378, "step": 13931 }, { "epoch": 1.3074324324324325, "grad_norm": 1.0022245104315213, "learning_rate": 6.941187713138121e-06, "loss": 0.4229, "step": 13932 }, { "epoch": 1.3075262762762763, "grad_norm": 0.8906776488549918, "learning_rate": 6.940684562287297e-06, "loss": 0.4069, "step": 13933 }, { "epoch": 1.30762012012012, "grad_norm": 0.9156377116241959, "learning_rate": 6.94018138829746e-06, "loss": 0.4072, "step": 13934 }, { "epoch": 1.3077139639639639, "grad_norm": 0.8875899707922656, "learning_rate": 6.939678191174615e-06, "loss": 0.4297, "step": 13935 }, { "epoch": 1.3078078078078077, "grad_norm": 0.9519007648329181, "learning_rate": 6.939174970924758e-06, "loss": 0.4202, "step": 13936 }, { "epoch": 1.3079016516516517, "grad_norm": 0.8613151591678766, "learning_rate": 6.938671727553888e-06, "loss": 0.3976, "step": 13937 }, { "epoch": 1.3079954954954955, "grad_norm": 1.048442113216312, "learning_rate": 6.938168461068009e-06, "loss": 0.403, "step": 13938 }, { "epoch": 1.3080893393393394, "grad_norm": 1.0775382193024763, "learning_rate": 6.937665171473118e-06, "loss": 0.4183, "step": 13939 }, { "epoch": 1.3081831831831832, "grad_norm": 1.4243550639043707, "learning_rate": 6.937161858775216e-06, "loss": 0.3907, "step": 13940 }, { "epoch": 1.308277027027027, "grad_norm": 1.2430678308041072, "learning_rate": 6.936658522980308e-06, "loss": 0.4176, "step": 13941 }, { "epoch": 1.308370870870871, "grad_norm": 0.9787932828932246, "learning_rate": 6.93615516409439e-06, "loss": 0.3895, "step": 13942 }, { "epoch": 1.3084647147147148, "grad_norm": 0.908895958305871, "learning_rate": 6.935651782123467e-06, "loss": 0.3949, "step": 13943 }, { "epoch": 1.3085585585585586, "grad_norm": 1.2612859085407957, "learning_rate": 6.935148377073541e-06, "loss": 0.411, "step": 13944 }, { "epoch": 1.3086524024024024, "grad_norm": 0.9765388784481818, "learning_rate": 6.934644948950612e-06, "loss": 0.4002, "step": 13945 }, { "epoch": 1.3087462462462462, "grad_norm": 0.9840078210365341, "learning_rate": 6.9341414977606825e-06, "loss": 0.4255, "step": 13946 }, { "epoch": 1.30884009009009, "grad_norm": 0.9425121469192559, "learning_rate": 6.933638023509759e-06, "loss": 0.4114, "step": 13947 }, { "epoch": 1.3089339339339339, "grad_norm": 0.790404659085014, "learning_rate": 6.93313452620384e-06, "loss": 0.4057, "step": 13948 }, { "epoch": 1.3090277777777777, "grad_norm": 0.8675987083360629, "learning_rate": 6.932631005848931e-06, "loss": 0.3837, "step": 13949 }, { "epoch": 1.3091216216216215, "grad_norm": 2.55109942522381, "learning_rate": 6.932127462451035e-06, "loss": 0.4768, "step": 13950 }, { "epoch": 1.3092154654654655, "grad_norm": 0.9034473351685872, "learning_rate": 6.931623896016156e-06, "loss": 0.3646, "step": 13951 }, { "epoch": 1.3093093093093093, "grad_norm": 1.039756706646524, "learning_rate": 6.931120306550296e-06, "loss": 0.4274, "step": 13952 }, { "epoch": 1.3094031531531531, "grad_norm": 1.0750263395351645, "learning_rate": 6.930616694059464e-06, "loss": 0.407, "step": 13953 }, { "epoch": 1.309496996996997, "grad_norm": 0.9104614870126899, "learning_rate": 6.930113058549661e-06, "loss": 0.4263, "step": 13954 }, { "epoch": 1.3095908408408408, "grad_norm": 0.882242533113596, "learning_rate": 6.929609400026893e-06, "loss": 0.3768, "step": 13955 }, { "epoch": 1.3096846846846848, "grad_norm": 3.039059997066268, "learning_rate": 6.929105718497165e-06, "loss": 0.4111, "step": 13956 }, { "epoch": 1.3097785285285286, "grad_norm": 0.8012871076859746, "learning_rate": 6.928602013966482e-06, "loss": 0.3299, "step": 13957 }, { "epoch": 1.3098723723723724, "grad_norm": 1.1205255178521616, "learning_rate": 6.92809828644085e-06, "loss": 0.44, "step": 13958 }, { "epoch": 1.3099662162162162, "grad_norm": 0.868057799796245, "learning_rate": 6.927594535926276e-06, "loss": 0.3759, "step": 13959 }, { "epoch": 1.31006006006006, "grad_norm": 0.9776951628065909, "learning_rate": 6.927090762428767e-06, "loss": 0.4055, "step": 13960 }, { "epoch": 1.3101539039039038, "grad_norm": 0.910191208240845, "learning_rate": 6.926586965954325e-06, "loss": 0.4273, "step": 13961 }, { "epoch": 1.3102477477477477, "grad_norm": 1.7229051715458554, "learning_rate": 6.926083146508962e-06, "loss": 0.4245, "step": 13962 }, { "epoch": 1.3103415915915915, "grad_norm": 0.9256841399348031, "learning_rate": 6.925579304098682e-06, "loss": 0.4472, "step": 13963 }, { "epoch": 1.3104354354354355, "grad_norm": 0.9296224270777234, "learning_rate": 6.925075438729492e-06, "loss": 0.4471, "step": 13964 }, { "epoch": 1.3105292792792793, "grad_norm": 1.0027214416828183, "learning_rate": 6.924571550407403e-06, "loss": 0.3974, "step": 13965 }, { "epoch": 1.3106231231231231, "grad_norm": 1.668945141611164, "learning_rate": 6.924067639138421e-06, "loss": 0.4472, "step": 13966 }, { "epoch": 1.310716966966967, "grad_norm": 1.0273010215117426, "learning_rate": 6.923563704928553e-06, "loss": 0.4374, "step": 13967 }, { "epoch": 1.3108108108108107, "grad_norm": 0.9568262399472723, "learning_rate": 6.923059747783809e-06, "loss": 0.398, "step": 13968 }, { "epoch": 1.3109046546546548, "grad_norm": 1.169755746310373, "learning_rate": 6.922555767710197e-06, "loss": 0.4237, "step": 13969 }, { "epoch": 1.3109984984984986, "grad_norm": 1.8696332565149076, "learning_rate": 6.922051764713726e-06, "loss": 0.4174, "step": 13970 }, { "epoch": 1.3110923423423424, "grad_norm": 1.151355504304274, "learning_rate": 6.921547738800407e-06, "loss": 0.3892, "step": 13971 }, { "epoch": 1.3111861861861862, "grad_norm": 1.086746840200508, "learning_rate": 6.921043689976248e-06, "loss": 0.382, "step": 13972 }, { "epoch": 1.31128003003003, "grad_norm": 0.9708349002559569, "learning_rate": 6.920539618247257e-06, "loss": 0.4108, "step": 13973 }, { "epoch": 1.3113738738738738, "grad_norm": 1.0615644511513356, "learning_rate": 6.920035523619448e-06, "loss": 0.4031, "step": 13974 }, { "epoch": 1.3114677177177176, "grad_norm": 0.934271015871382, "learning_rate": 6.919531406098829e-06, "loss": 0.4451, "step": 13975 }, { "epoch": 1.3115615615615615, "grad_norm": 0.9577750926259934, "learning_rate": 6.9190272656914096e-06, "loss": 0.4519, "step": 13976 }, { "epoch": 1.3116554054054055, "grad_norm": 0.8757919168781949, "learning_rate": 6.918523102403204e-06, "loss": 0.3459, "step": 13977 }, { "epoch": 1.3117492492492493, "grad_norm": 0.983878454204106, "learning_rate": 6.918018916240222e-06, "loss": 0.3904, "step": 13978 }, { "epoch": 1.311843093093093, "grad_norm": 0.8654951478873659, "learning_rate": 6.917514707208474e-06, "loss": 0.3767, "step": 13979 }, { "epoch": 1.311936936936937, "grad_norm": 0.956393308265664, "learning_rate": 6.917010475313972e-06, "loss": 0.3733, "step": 13980 }, { "epoch": 1.3120307807807807, "grad_norm": 0.9956655456792469, "learning_rate": 6.916506220562729e-06, "loss": 0.4374, "step": 13981 }, { "epoch": 1.3121246246246248, "grad_norm": 1.135997980274605, "learning_rate": 6.9160019429607564e-06, "loss": 0.4391, "step": 13982 }, { "epoch": 1.3122184684684686, "grad_norm": 1.0043013809987218, "learning_rate": 6.9154976425140675e-06, "loss": 0.4241, "step": 13983 }, { "epoch": 1.3123123123123124, "grad_norm": 0.8792136212271086, "learning_rate": 6.914993319228676e-06, "loss": 0.3924, "step": 13984 }, { "epoch": 1.3124061561561562, "grad_norm": 1.0968542924098754, "learning_rate": 6.914488973110591e-06, "loss": 0.4246, "step": 13985 }, { "epoch": 1.3125, "grad_norm": 0.9034790081851916, "learning_rate": 6.913984604165831e-06, "loss": 0.3894, "step": 13986 }, { "epoch": 1.3125938438438438, "grad_norm": 1.028521705586748, "learning_rate": 6.913480212400407e-06, "loss": 0.461, "step": 13987 }, { "epoch": 1.3126876876876876, "grad_norm": 1.163584725011222, "learning_rate": 6.912975797820332e-06, "loss": 0.4423, "step": 13988 }, { "epoch": 1.3127815315315314, "grad_norm": 1.0001289829753486, "learning_rate": 6.9124713604316215e-06, "loss": 0.4228, "step": 13989 }, { "epoch": 1.3128753753753752, "grad_norm": 0.9126543576885876, "learning_rate": 6.91196690024029e-06, "loss": 0.4095, "step": 13990 }, { "epoch": 1.3129692192192193, "grad_norm": 1.1123863671221996, "learning_rate": 6.911462417252353e-06, "loss": 0.4112, "step": 13991 }, { "epoch": 1.313063063063063, "grad_norm": 0.9106681608382564, "learning_rate": 6.910957911473826e-06, "loss": 0.418, "step": 13992 }, { "epoch": 1.313156906906907, "grad_norm": 0.918234295712883, "learning_rate": 6.91045338291072e-06, "loss": 0.4056, "step": 13993 }, { "epoch": 1.3132507507507507, "grad_norm": 1.0806038248043934, "learning_rate": 6.909948831569053e-06, "loss": 0.3742, "step": 13994 }, { "epoch": 1.3133445945945945, "grad_norm": 0.9877070928426951, "learning_rate": 6.909444257454845e-06, "loss": 0.3855, "step": 13995 }, { "epoch": 1.3134384384384385, "grad_norm": 1.0700392267508803, "learning_rate": 6.908939660574106e-06, "loss": 0.3762, "step": 13996 }, { "epoch": 1.3135322822822824, "grad_norm": 0.8867786897986332, "learning_rate": 6.908435040932855e-06, "loss": 0.4392, "step": 13997 }, { "epoch": 1.3136261261261262, "grad_norm": 1.1341830846476555, "learning_rate": 6.907930398537109e-06, "loss": 0.4098, "step": 13998 }, { "epoch": 1.31371996996997, "grad_norm": 1.0225436025157213, "learning_rate": 6.907425733392883e-06, "loss": 0.4303, "step": 13999 }, { "epoch": 1.3138138138138138, "grad_norm": 0.8929025607699669, "learning_rate": 6.906921045506195e-06, "loss": 0.3849, "step": 14000 }, { "epoch": 1.3139076576576576, "grad_norm": 0.8609932601272619, "learning_rate": 6.906416334883065e-06, "loss": 0.3932, "step": 14001 }, { "epoch": 1.3140015015015014, "grad_norm": 0.9071865252696119, "learning_rate": 6.905911601529507e-06, "loss": 0.4188, "step": 14002 }, { "epoch": 1.3140953453453452, "grad_norm": 0.9065941564523365, "learning_rate": 6.905406845451542e-06, "loss": 0.3476, "step": 14003 }, { "epoch": 1.3141891891891893, "grad_norm": 1.060776903779275, "learning_rate": 6.904902066655188e-06, "loss": 0.4165, "step": 14004 }, { "epoch": 1.314283033033033, "grad_norm": 0.9493176062083015, "learning_rate": 6.90439726514646e-06, "loss": 0.3553, "step": 14005 }, { "epoch": 1.3143768768768769, "grad_norm": 1.130597902504483, "learning_rate": 6.903892440931379e-06, "loss": 0.3957, "step": 14006 }, { "epoch": 1.3144707207207207, "grad_norm": 0.950425385432602, "learning_rate": 6.903387594015967e-06, "loss": 0.3813, "step": 14007 }, { "epoch": 1.3145645645645645, "grad_norm": 0.9327564911904322, "learning_rate": 6.902882724406238e-06, "loss": 0.4002, "step": 14008 }, { "epoch": 1.3146584084084085, "grad_norm": 0.94521582788721, "learning_rate": 6.9023778321082155e-06, "loss": 0.4611, "step": 14009 }, { "epoch": 1.3147522522522523, "grad_norm": 1.1225190256804665, "learning_rate": 6.90187291712792e-06, "loss": 0.402, "step": 14010 }, { "epoch": 1.3148460960960962, "grad_norm": 0.9424871606464308, "learning_rate": 6.901367979471368e-06, "loss": 0.4248, "step": 14011 }, { "epoch": 1.31493993993994, "grad_norm": 0.9675133020241401, "learning_rate": 6.90086301914458e-06, "loss": 0.4237, "step": 14012 }, { "epoch": 1.3150337837837838, "grad_norm": 1.1010859341528567, "learning_rate": 6.900358036153581e-06, "loss": 0.4579, "step": 14013 }, { "epoch": 1.3151276276276276, "grad_norm": 0.8962142476109394, "learning_rate": 6.8998530305043885e-06, "loss": 0.3812, "step": 14014 }, { "epoch": 1.3152214714714714, "grad_norm": 0.7598085815502064, "learning_rate": 6.899348002203026e-06, "loss": 0.3551, "step": 14015 }, { "epoch": 1.3153153153153152, "grad_norm": 0.9308257942738061, "learning_rate": 6.898842951255513e-06, "loss": 0.4284, "step": 14016 }, { "epoch": 1.3154091591591592, "grad_norm": 1.1839443611166116, "learning_rate": 6.898337877667871e-06, "loss": 0.3882, "step": 14017 }, { "epoch": 1.315503003003003, "grad_norm": 0.982278903583391, "learning_rate": 6.897832781446124e-06, "loss": 0.3797, "step": 14018 }, { "epoch": 1.3155968468468469, "grad_norm": 1.0289147948877155, "learning_rate": 6.897327662596294e-06, "loss": 0.4233, "step": 14019 }, { "epoch": 1.3156906906906907, "grad_norm": 1.0128117574422237, "learning_rate": 6.8968225211244025e-06, "loss": 0.4075, "step": 14020 }, { "epoch": 1.3157845345345345, "grad_norm": 0.9813134370024206, "learning_rate": 6.896317357036473e-06, "loss": 0.3955, "step": 14021 }, { "epoch": 1.3158783783783785, "grad_norm": 0.9338689287839856, "learning_rate": 6.895812170338529e-06, "loss": 0.3763, "step": 14022 }, { "epoch": 1.3159722222222223, "grad_norm": 0.9586070042042381, "learning_rate": 6.895306961036591e-06, "loss": 0.4376, "step": 14023 }, { "epoch": 1.3160660660660661, "grad_norm": 1.0487672631084406, "learning_rate": 6.894801729136687e-06, "loss": 0.4252, "step": 14024 }, { "epoch": 1.31615990990991, "grad_norm": 0.9477415928924091, "learning_rate": 6.8942964746448385e-06, "loss": 0.3894, "step": 14025 }, { "epoch": 1.3162537537537538, "grad_norm": 0.9552302924969599, "learning_rate": 6.893791197567069e-06, "loss": 0.3597, "step": 14026 }, { "epoch": 1.3163475975975976, "grad_norm": 0.983077101576973, "learning_rate": 6.893285897909406e-06, "loss": 0.4015, "step": 14027 }, { "epoch": 1.3164414414414414, "grad_norm": 0.884702866452004, "learning_rate": 6.892780575677872e-06, "loss": 0.4214, "step": 14028 }, { "epoch": 1.3165352852852852, "grad_norm": 1.1050491961673563, "learning_rate": 6.892275230878491e-06, "loss": 0.3837, "step": 14029 }, { "epoch": 1.316629129129129, "grad_norm": 1.1401021705365306, "learning_rate": 6.891769863517292e-06, "loss": 0.3837, "step": 14030 }, { "epoch": 1.316722972972973, "grad_norm": 0.9122096303140103, "learning_rate": 6.891264473600296e-06, "loss": 0.4103, "step": 14031 }, { "epoch": 1.3168168168168168, "grad_norm": 1.0538909680082786, "learning_rate": 6.890759061133533e-06, "loss": 0.4259, "step": 14032 }, { "epoch": 1.3169106606606606, "grad_norm": 1.1813509568312694, "learning_rate": 6.8902536261230265e-06, "loss": 0.3797, "step": 14033 }, { "epoch": 1.3170045045045045, "grad_norm": 1.068420398825301, "learning_rate": 6.889748168574804e-06, "loss": 0.4229, "step": 14034 }, { "epoch": 1.3170983483483483, "grad_norm": 0.9028987041198363, "learning_rate": 6.889242688494891e-06, "loss": 0.3678, "step": 14035 }, { "epoch": 1.3171921921921923, "grad_norm": 1.0887374095332623, "learning_rate": 6.888737185889314e-06, "loss": 0.3561, "step": 14036 }, { "epoch": 1.3172860360360361, "grad_norm": 1.0997034222114686, "learning_rate": 6.888231660764103e-06, "loss": 0.4268, "step": 14037 }, { "epoch": 1.31737987987988, "grad_norm": 0.8464743854228649, "learning_rate": 6.887726113125282e-06, "loss": 0.3892, "step": 14038 }, { "epoch": 1.3174737237237237, "grad_norm": 0.9497622739390159, "learning_rate": 6.887220542978883e-06, "loss": 0.3733, "step": 14039 }, { "epoch": 1.3175675675675675, "grad_norm": 1.0858960785375533, "learning_rate": 6.8867149503309306e-06, "loss": 0.3972, "step": 14040 }, { "epoch": 1.3176614114114114, "grad_norm": 0.880788143108578, "learning_rate": 6.8862093351874525e-06, "loss": 0.369, "step": 14041 }, { "epoch": 1.3177552552552552, "grad_norm": 1.4102818549836307, "learning_rate": 6.885703697554478e-06, "loss": 0.466, "step": 14042 }, { "epoch": 1.317849099099099, "grad_norm": 1.002725749304475, "learning_rate": 6.885198037438038e-06, "loss": 0.3838, "step": 14043 }, { "epoch": 1.317942942942943, "grad_norm": 0.8538271465290599, "learning_rate": 6.884692354844159e-06, "loss": 0.4349, "step": 14044 }, { "epoch": 1.3180367867867868, "grad_norm": 1.1950332801291523, "learning_rate": 6.8841866497788725e-06, "loss": 0.3732, "step": 14045 }, { "epoch": 1.3181306306306306, "grad_norm": 0.9033308577443794, "learning_rate": 6.883680922248206e-06, "loss": 0.4149, "step": 14046 }, { "epoch": 1.3182244744744744, "grad_norm": 0.9023410091608264, "learning_rate": 6.88317517225819e-06, "loss": 0.4056, "step": 14047 }, { "epoch": 1.3183183183183182, "grad_norm": 0.9121795945168487, "learning_rate": 6.882669399814854e-06, "loss": 0.3906, "step": 14048 }, { "epoch": 1.3184121621621623, "grad_norm": 0.9881301797391584, "learning_rate": 6.88216360492423e-06, "loss": 0.4171, "step": 14049 }, { "epoch": 1.318506006006006, "grad_norm": 0.9958014441910426, "learning_rate": 6.881657787592349e-06, "loss": 0.3574, "step": 14050 }, { "epoch": 1.31859984984985, "grad_norm": 0.8376059305263929, "learning_rate": 6.881151947825238e-06, "loss": 0.3568, "step": 14051 }, { "epoch": 1.3186936936936937, "grad_norm": 0.808631508414001, "learning_rate": 6.880646085628932e-06, "loss": 0.3939, "step": 14052 }, { "epoch": 1.3187875375375375, "grad_norm": 1.3023368167785512, "learning_rate": 6.880140201009463e-06, "loss": 0.4588, "step": 14053 }, { "epoch": 1.3188813813813813, "grad_norm": 0.9018061037592617, "learning_rate": 6.87963429397286e-06, "loss": 0.4071, "step": 14054 }, { "epoch": 1.3189752252252251, "grad_norm": 0.9275469833900921, "learning_rate": 6.879128364525156e-06, "loss": 0.4107, "step": 14055 }, { "epoch": 1.319069069069069, "grad_norm": 1.062102802043962, "learning_rate": 6.878622412672383e-06, "loss": 0.4421, "step": 14056 }, { "epoch": 1.319162912912913, "grad_norm": 0.920362173613755, "learning_rate": 6.878116438420575e-06, "loss": 0.4185, "step": 14057 }, { "epoch": 1.3192567567567568, "grad_norm": 0.9138241290177972, "learning_rate": 6.877610441775763e-06, "loss": 0.3521, "step": 14058 }, { "epoch": 1.3193506006006006, "grad_norm": 1.178076786000346, "learning_rate": 6.877104422743981e-06, "loss": 0.3499, "step": 14059 }, { "epoch": 1.3194444444444444, "grad_norm": 1.0055837046023788, "learning_rate": 6.8765983813312605e-06, "loss": 0.3763, "step": 14060 }, { "epoch": 1.3195382882882882, "grad_norm": 0.994912098194488, "learning_rate": 6.8760923175436385e-06, "loss": 0.456, "step": 14061 }, { "epoch": 1.3196321321321323, "grad_norm": 0.8304160033523244, "learning_rate": 6.875586231387146e-06, "loss": 0.3724, "step": 14062 }, { "epoch": 1.319725975975976, "grad_norm": 1.0775477558868933, "learning_rate": 6.8750801228678184e-06, "loss": 0.397, "step": 14063 }, { "epoch": 1.3198198198198199, "grad_norm": 0.993191134677495, "learning_rate": 6.874573991991691e-06, "loss": 0.4731, "step": 14064 }, { "epoch": 1.3199136636636637, "grad_norm": 0.9848380574926835, "learning_rate": 6.874067838764797e-06, "loss": 0.4522, "step": 14065 }, { "epoch": 1.3200075075075075, "grad_norm": 0.9041924222195299, "learning_rate": 6.873561663193169e-06, "loss": 0.3988, "step": 14066 }, { "epoch": 1.3201013513513513, "grad_norm": 1.0263269265764308, "learning_rate": 6.873055465282847e-06, "loss": 0.3907, "step": 14067 }, { "epoch": 1.3201951951951951, "grad_norm": 1.0706276142975844, "learning_rate": 6.8725492450398636e-06, "loss": 0.4543, "step": 14068 }, { "epoch": 1.320289039039039, "grad_norm": 1.0259731558352219, "learning_rate": 6.8720430024702544e-06, "loss": 0.3787, "step": 14069 }, { "epoch": 1.3203828828828827, "grad_norm": 0.875741991265844, "learning_rate": 6.871536737580058e-06, "loss": 0.4105, "step": 14070 }, { "epoch": 1.3204767267267268, "grad_norm": 0.8932379671049734, "learning_rate": 6.8710304503753085e-06, "loss": 0.3829, "step": 14071 }, { "epoch": 1.3205705705705706, "grad_norm": 1.0118517798164057, "learning_rate": 6.870524140862042e-06, "loss": 0.4316, "step": 14072 }, { "epoch": 1.3206644144144144, "grad_norm": 0.9929782879574636, "learning_rate": 6.870017809046296e-06, "loss": 0.4217, "step": 14073 }, { "epoch": 1.3207582582582582, "grad_norm": 0.8346057551937327, "learning_rate": 6.869511454934108e-06, "loss": 0.3671, "step": 14074 }, { "epoch": 1.320852102102102, "grad_norm": 0.8429346987015174, "learning_rate": 6.869005078531515e-06, "loss": 0.3682, "step": 14075 }, { "epoch": 1.320945945945946, "grad_norm": 0.8204917381880357, "learning_rate": 6.868498679844553e-06, "loss": 0.3646, "step": 14076 }, { "epoch": 1.3210397897897899, "grad_norm": 0.9609571838224982, "learning_rate": 6.867992258879263e-06, "loss": 0.3699, "step": 14077 }, { "epoch": 1.3211336336336337, "grad_norm": 1.0279634727329316, "learning_rate": 6.8674858156416815e-06, "loss": 0.4111, "step": 14078 }, { "epoch": 1.3212274774774775, "grad_norm": 1.0253853792377197, "learning_rate": 6.866979350137846e-06, "loss": 0.4044, "step": 14079 }, { "epoch": 1.3213213213213213, "grad_norm": 0.9892463486481202, "learning_rate": 6.866472862373795e-06, "loss": 0.3928, "step": 14080 }, { "epoch": 1.321415165165165, "grad_norm": 0.8835090680183826, "learning_rate": 6.86596635235557e-06, "loss": 0.4325, "step": 14081 }, { "epoch": 1.321509009009009, "grad_norm": 0.8779178037149298, "learning_rate": 6.865459820089209e-06, "loss": 0.4523, "step": 14082 }, { "epoch": 1.3216028528528527, "grad_norm": 0.9524730370181552, "learning_rate": 6.864953265580749e-06, "loss": 0.3768, "step": 14083 }, { "epoch": 1.3216966966966968, "grad_norm": 0.964400463255909, "learning_rate": 6.864446688836234e-06, "loss": 0.407, "step": 14084 }, { "epoch": 1.3217905405405406, "grad_norm": 0.8590167253618907, "learning_rate": 6.863940089861701e-06, "loss": 0.4526, "step": 14085 }, { "epoch": 1.3218843843843844, "grad_norm": 1.31377990278454, "learning_rate": 6.86343346866319e-06, "loss": 0.3609, "step": 14086 }, { "epoch": 1.3219782282282282, "grad_norm": 0.8971271815795187, "learning_rate": 6.862926825246742e-06, "loss": 0.4117, "step": 14087 }, { "epoch": 1.322072072072072, "grad_norm": 0.9500783919487995, "learning_rate": 6.862420159618399e-06, "loss": 0.4442, "step": 14088 }, { "epoch": 1.322165915915916, "grad_norm": 0.8689099678068145, "learning_rate": 6.861913471784203e-06, "loss": 0.4268, "step": 14089 }, { "epoch": 1.3222597597597598, "grad_norm": 1.0115895405105375, "learning_rate": 6.861406761750191e-06, "loss": 0.3895, "step": 14090 }, { "epoch": 1.3223536036036037, "grad_norm": 1.0781267322910135, "learning_rate": 6.860900029522408e-06, "loss": 0.4154, "step": 14091 }, { "epoch": 1.3224474474474475, "grad_norm": 0.8840991191103003, "learning_rate": 6.8603932751068946e-06, "loss": 0.3695, "step": 14092 }, { "epoch": 1.3225412912912913, "grad_norm": 0.9064812438393698, "learning_rate": 6.859886498509694e-06, "loss": 0.3986, "step": 14093 }, { "epoch": 1.322635135135135, "grad_norm": 0.9764915580365623, "learning_rate": 6.859379699736848e-06, "loss": 0.3608, "step": 14094 }, { "epoch": 1.322728978978979, "grad_norm": 1.4351703697294933, "learning_rate": 6.858872878794398e-06, "loss": 0.4439, "step": 14095 }, { "epoch": 1.3228228228228227, "grad_norm": 0.9003504820336633, "learning_rate": 6.858366035688389e-06, "loss": 0.3907, "step": 14096 }, { "epoch": 1.3229166666666667, "grad_norm": 4.861229445917058, "learning_rate": 6.857859170424862e-06, "loss": 0.4051, "step": 14097 }, { "epoch": 1.3230105105105106, "grad_norm": 1.0734798667297494, "learning_rate": 6.857352283009862e-06, "loss": 0.4002, "step": 14098 }, { "epoch": 1.3231043543543544, "grad_norm": 0.881742163265705, "learning_rate": 6.85684537344943e-06, "loss": 0.3863, "step": 14099 }, { "epoch": 1.3231981981981982, "grad_norm": 0.9698076573915657, "learning_rate": 6.856338441749615e-06, "loss": 0.4376, "step": 14100 }, { "epoch": 1.323292042042042, "grad_norm": 0.8585961162184986, "learning_rate": 6.8558314879164575e-06, "loss": 0.3758, "step": 14101 }, { "epoch": 1.323385885885886, "grad_norm": 1.2931183087439608, "learning_rate": 6.855324511956003e-06, "loss": 0.3967, "step": 14102 }, { "epoch": 1.3234797297297298, "grad_norm": 0.8416144754461629, "learning_rate": 6.854817513874296e-06, "loss": 0.4188, "step": 14103 }, { "epoch": 1.3235735735735736, "grad_norm": 0.9223029694729615, "learning_rate": 6.8543104936773795e-06, "loss": 0.4299, "step": 14104 }, { "epoch": 1.3236674174174174, "grad_norm": 1.1342115325224402, "learning_rate": 6.853803451371301e-06, "loss": 0.4096, "step": 14105 }, { "epoch": 1.3237612612612613, "grad_norm": 1.4355156842792443, "learning_rate": 6.853296386962109e-06, "loss": 0.4312, "step": 14106 }, { "epoch": 1.323855105105105, "grad_norm": 0.9164301430395906, "learning_rate": 6.852789300455845e-06, "loss": 0.3748, "step": 14107 }, { "epoch": 1.3239489489489489, "grad_norm": 1.222558105434089, "learning_rate": 6.852282191858553e-06, "loss": 0.401, "step": 14108 }, { "epoch": 1.3240427927927927, "grad_norm": 0.8020895760158206, "learning_rate": 6.851775061176286e-06, "loss": 0.3367, "step": 14109 }, { "epoch": 1.3241366366366365, "grad_norm": 0.9073366190884001, "learning_rate": 6.851267908415084e-06, "loss": 0.405, "step": 14110 }, { "epoch": 1.3242304804804805, "grad_norm": 0.9049799316539949, "learning_rate": 6.850760733580999e-06, "loss": 0.3755, "step": 14111 }, { "epoch": 1.3243243243243243, "grad_norm": 1.336222462810401, "learning_rate": 6.850253536680076e-06, "loss": 0.4481, "step": 14112 }, { "epoch": 1.3244181681681682, "grad_norm": 1.0185172629087484, "learning_rate": 6.849746317718361e-06, "loss": 0.4536, "step": 14113 }, { "epoch": 1.324512012012012, "grad_norm": 1.057968294056853, "learning_rate": 6.8492390767019035e-06, "loss": 0.4246, "step": 14114 }, { "epoch": 1.3246058558558558, "grad_norm": 0.8839380192370802, "learning_rate": 6.848731813636752e-06, "loss": 0.4082, "step": 14115 }, { "epoch": 1.3246996996996998, "grad_norm": 1.127171633951691, "learning_rate": 6.848224528528952e-06, "loss": 0.4285, "step": 14116 }, { "epoch": 1.3247935435435436, "grad_norm": 0.884505444151962, "learning_rate": 6.847717221384553e-06, "loss": 0.3613, "step": 14117 }, { "epoch": 1.3248873873873874, "grad_norm": 1.429546198224016, "learning_rate": 6.847209892209605e-06, "loss": 0.3797, "step": 14118 }, { "epoch": 1.3249812312312312, "grad_norm": 0.9341231296441503, "learning_rate": 6.846702541010157e-06, "loss": 0.4013, "step": 14119 }, { "epoch": 1.325075075075075, "grad_norm": 1.1671820048228283, "learning_rate": 6.846195167792257e-06, "loss": 0.431, "step": 14120 }, { "epoch": 1.3251689189189189, "grad_norm": 0.9621571174585725, "learning_rate": 6.845687772561954e-06, "loss": 0.375, "step": 14121 }, { "epoch": 1.3252627627627627, "grad_norm": 0.8967165701963972, "learning_rate": 6.845180355325298e-06, "loss": 0.3713, "step": 14122 }, { "epoch": 1.3253566066066065, "grad_norm": 0.9521057165451909, "learning_rate": 6.84467291608834e-06, "loss": 0.3863, "step": 14123 }, { "epoch": 1.3254504504504505, "grad_norm": 0.8911859928626906, "learning_rate": 6.84416545485713e-06, "loss": 0.4747, "step": 14124 }, { "epoch": 1.3255442942942943, "grad_norm": 1.038422201866774, "learning_rate": 6.843657971637717e-06, "loss": 0.425, "step": 14125 }, { "epoch": 1.3256381381381381, "grad_norm": 0.9453033947127071, "learning_rate": 6.843150466436155e-06, "loss": 0.3925, "step": 14126 }, { "epoch": 1.325731981981982, "grad_norm": 0.9341521542760226, "learning_rate": 6.842642939258493e-06, "loss": 0.3426, "step": 14127 }, { "epoch": 1.3258258258258258, "grad_norm": 0.9263870331091923, "learning_rate": 6.84213539011078e-06, "loss": 0.3895, "step": 14128 }, { "epoch": 1.3259196696696698, "grad_norm": 0.9162795140635598, "learning_rate": 6.84162781899907e-06, "loss": 0.3441, "step": 14129 }, { "epoch": 1.3260135135135136, "grad_norm": 0.9266376096645735, "learning_rate": 6.8411202259294175e-06, "loss": 0.3948, "step": 14130 }, { "epoch": 1.3261073573573574, "grad_norm": 1.196793380741303, "learning_rate": 6.840612610907869e-06, "loss": 0.4118, "step": 14131 }, { "epoch": 1.3262012012012012, "grad_norm": 0.9805071862960254, "learning_rate": 6.840104973940482e-06, "loss": 0.3292, "step": 14132 }, { "epoch": 1.326295045045045, "grad_norm": 0.9988261131418076, "learning_rate": 6.839597315033307e-06, "loss": 0.4444, "step": 14133 }, { "epoch": 1.3263888888888888, "grad_norm": 0.9269756832773319, "learning_rate": 6.839089634192395e-06, "loss": 0.4325, "step": 14134 }, { "epoch": 1.3264827327327327, "grad_norm": 1.0503505845694863, "learning_rate": 6.838581931423801e-06, "loss": 0.4177, "step": 14135 }, { "epoch": 1.3265765765765765, "grad_norm": 1.4090195947771076, "learning_rate": 6.838074206733579e-06, "loss": 0.459, "step": 14136 }, { "epoch": 1.3266704204204205, "grad_norm": 0.8771798224961008, "learning_rate": 6.837566460127781e-06, "loss": 0.4403, "step": 14137 }, { "epoch": 1.3267642642642643, "grad_norm": 0.9643572900998824, "learning_rate": 6.837058691612464e-06, "loss": 0.3807, "step": 14138 }, { "epoch": 1.3268581081081081, "grad_norm": 0.995394640040829, "learning_rate": 6.83655090119368e-06, "loss": 0.4225, "step": 14139 }, { "epoch": 1.326951951951952, "grad_norm": 0.9527359274222242, "learning_rate": 6.8360430888774794e-06, "loss": 0.4124, "step": 14140 }, { "epoch": 1.3270457957957957, "grad_norm": 0.9299874529046805, "learning_rate": 6.835535254669924e-06, "loss": 0.391, "step": 14141 }, { "epoch": 1.3271396396396398, "grad_norm": 0.8550913037982472, "learning_rate": 6.835027398577065e-06, "loss": 0.3848, "step": 14142 }, { "epoch": 1.3272334834834836, "grad_norm": 1.0439090920752159, "learning_rate": 6.834519520604959e-06, "loss": 0.3954, "step": 14143 }, { "epoch": 1.3273273273273274, "grad_norm": 0.9308977328028439, "learning_rate": 6.834011620759661e-06, "loss": 0.3921, "step": 14144 }, { "epoch": 1.3274211711711712, "grad_norm": 0.9962036712713458, "learning_rate": 6.833503699047226e-06, "loss": 0.4415, "step": 14145 }, { "epoch": 1.327515015015015, "grad_norm": 0.8297194415387571, "learning_rate": 6.83299575547371e-06, "loss": 0.3658, "step": 14146 }, { "epoch": 1.3276088588588588, "grad_norm": 0.954130858379256, "learning_rate": 6.83248779004517e-06, "loss": 0.4038, "step": 14147 }, { "epoch": 1.3277027027027026, "grad_norm": 0.9643883784327882, "learning_rate": 6.831979802767663e-06, "loss": 0.4237, "step": 14148 }, { "epoch": 1.3277965465465464, "grad_norm": 1.0157851116662435, "learning_rate": 6.831471793647245e-06, "loss": 0.3744, "step": 14149 }, { "epoch": 1.3278903903903903, "grad_norm": 1.016927520801595, "learning_rate": 6.8309637626899735e-06, "loss": 0.3486, "step": 14150 }, { "epoch": 1.3279842342342343, "grad_norm": 1.1223580993393296, "learning_rate": 6.8304557099019065e-06, "loss": 0.4007, "step": 14151 }, { "epoch": 1.328078078078078, "grad_norm": 0.9857760233150703, "learning_rate": 6.829947635289099e-06, "loss": 0.3898, "step": 14152 }, { "epoch": 1.328171921921922, "grad_norm": 0.9052809972710364, "learning_rate": 6.829439538857611e-06, "loss": 0.4085, "step": 14153 }, { "epoch": 1.3282657657657657, "grad_norm": 0.8603556660969677, "learning_rate": 6.8289314206135e-06, "loss": 0.4121, "step": 14154 }, { "epoch": 1.3283596096096097, "grad_norm": 0.9451600263901181, "learning_rate": 6.828423280562825e-06, "loss": 0.3879, "step": 14155 }, { "epoch": 1.3284534534534536, "grad_norm": 0.9766548733201945, "learning_rate": 6.8279151187116435e-06, "loss": 0.3321, "step": 14156 }, { "epoch": 1.3285472972972974, "grad_norm": 0.899346121166296, "learning_rate": 6.8274069350660155e-06, "loss": 0.4143, "step": 14157 }, { "epoch": 1.3286411411411412, "grad_norm": 1.0197044359556817, "learning_rate": 6.826898729631999e-06, "loss": 0.3985, "step": 14158 }, { "epoch": 1.328734984984985, "grad_norm": 1.1929428490901752, "learning_rate": 6.826390502415653e-06, "loss": 0.3531, "step": 14159 }, { "epoch": 1.3288288288288288, "grad_norm": 0.8938584561949019, "learning_rate": 6.82588225342304e-06, "loss": 0.366, "step": 14160 }, { "epoch": 1.3289226726726726, "grad_norm": 0.8460156173191236, "learning_rate": 6.825373982660216e-06, "loss": 0.3606, "step": 14161 }, { "epoch": 1.3290165165165164, "grad_norm": 1.2214682994071406, "learning_rate": 6.8248656901332446e-06, "loss": 0.3499, "step": 14162 }, { "epoch": 1.3291103603603602, "grad_norm": 1.061121705373313, "learning_rate": 6.824357375848185e-06, "loss": 0.3718, "step": 14163 }, { "epoch": 1.3292042042042043, "grad_norm": 0.9983397812890002, "learning_rate": 6.823849039811097e-06, "loss": 0.3819, "step": 14164 }, { "epoch": 1.329298048048048, "grad_norm": 1.2588852937862436, "learning_rate": 6.823340682028041e-06, "loss": 0.4156, "step": 14165 }, { "epoch": 1.3293918918918919, "grad_norm": 2.0752620361294936, "learning_rate": 6.822832302505082e-06, "loss": 0.366, "step": 14166 }, { "epoch": 1.3294857357357357, "grad_norm": 1.1248322339559542, "learning_rate": 6.8223239012482765e-06, "loss": 0.4203, "step": 14167 }, { "epoch": 1.3295795795795795, "grad_norm": 1.2020408786883285, "learning_rate": 6.821815478263692e-06, "loss": 0.4153, "step": 14168 }, { "epoch": 1.3296734234234235, "grad_norm": 0.912759001017759, "learning_rate": 6.821307033557385e-06, "loss": 0.3654, "step": 14169 }, { "epoch": 1.3297672672672673, "grad_norm": 1.1292519445055682, "learning_rate": 6.820798567135418e-06, "loss": 0.4149, "step": 14170 }, { "epoch": 1.3298611111111112, "grad_norm": 1.3330417973495825, "learning_rate": 6.820290079003857e-06, "loss": 0.3975, "step": 14171 }, { "epoch": 1.329954954954955, "grad_norm": 0.9619716823391369, "learning_rate": 6.819781569168764e-06, "loss": 0.4051, "step": 14172 }, { "epoch": 1.3300487987987988, "grad_norm": 0.9799507371171879, "learning_rate": 6.8192730376362e-06, "loss": 0.4315, "step": 14173 }, { "epoch": 1.3301426426426426, "grad_norm": 1.0358417559547903, "learning_rate": 6.81876448441223e-06, "loss": 0.3553, "step": 14174 }, { "epoch": 1.3302364864864864, "grad_norm": 1.0223798635879007, "learning_rate": 6.818255909502919e-06, "loss": 0.3988, "step": 14175 }, { "epoch": 1.3303303303303302, "grad_norm": 0.8230923668102453, "learning_rate": 6.817747312914326e-06, "loss": 0.405, "step": 14176 }, { "epoch": 1.3304241741741742, "grad_norm": 1.0655786789332153, "learning_rate": 6.817238694652518e-06, "loss": 0.4371, "step": 14177 }, { "epoch": 1.330518018018018, "grad_norm": 1.0649861647989145, "learning_rate": 6.81673005472356e-06, "loss": 0.4447, "step": 14178 }, { "epoch": 1.3306118618618619, "grad_norm": 0.8512309199019968, "learning_rate": 6.816221393133515e-06, "loss": 0.4122, "step": 14179 }, { "epoch": 1.3307057057057057, "grad_norm": 1.0866217287973483, "learning_rate": 6.815712709888449e-06, "loss": 0.4603, "step": 14180 }, { "epoch": 1.3307995495495495, "grad_norm": 0.9781158203717221, "learning_rate": 6.815204004994428e-06, "loss": 0.4239, "step": 14181 }, { "epoch": 1.3308933933933935, "grad_norm": 1.1894352987857846, "learning_rate": 6.814695278457515e-06, "loss": 0.3714, "step": 14182 }, { "epoch": 1.3309872372372373, "grad_norm": 1.2646543805858181, "learning_rate": 6.814186530283776e-06, "loss": 0.4038, "step": 14183 }, { "epoch": 1.3310810810810811, "grad_norm": 0.9682013399252254, "learning_rate": 6.813677760479278e-06, "loss": 0.3853, "step": 14184 }, { "epoch": 1.331174924924925, "grad_norm": 0.9027772840729038, "learning_rate": 6.8131689690500866e-06, "loss": 0.4263, "step": 14185 }, { "epoch": 1.3312687687687688, "grad_norm": 0.9396667811845516, "learning_rate": 6.812660156002267e-06, "loss": 0.3878, "step": 14186 }, { "epoch": 1.3313626126126126, "grad_norm": 1.1830091170270165, "learning_rate": 6.8121513213418884e-06, "loss": 0.4191, "step": 14187 }, { "epoch": 1.3314564564564564, "grad_norm": 1.1720209197601255, "learning_rate": 6.811642465075017e-06, "loss": 0.429, "step": 14188 }, { "epoch": 1.3315503003003002, "grad_norm": 0.9941386370280473, "learning_rate": 6.811133587207719e-06, "loss": 0.436, "step": 14189 }, { "epoch": 1.3316441441441442, "grad_norm": 0.9255248276448769, "learning_rate": 6.8106246877460615e-06, "loss": 0.3664, "step": 14190 }, { "epoch": 1.331737987987988, "grad_norm": 1.0312070704709984, "learning_rate": 6.810115766696111e-06, "loss": 0.4307, "step": 14191 }, { "epoch": 1.3318318318318318, "grad_norm": 0.9614690663096082, "learning_rate": 6.809606824063939e-06, "loss": 0.4244, "step": 14192 }, { "epoch": 1.3319256756756757, "grad_norm": 0.8506917916155559, "learning_rate": 6.809097859855612e-06, "loss": 0.4015, "step": 14193 }, { "epoch": 1.3320195195195195, "grad_norm": 1.1812934517467593, "learning_rate": 6.808588874077199e-06, "loss": 0.3994, "step": 14194 }, { "epoch": 1.3321133633633635, "grad_norm": 0.8835673550058952, "learning_rate": 6.808079866734767e-06, "loss": 0.4189, "step": 14195 }, { "epoch": 1.3322072072072073, "grad_norm": 0.9630095336430399, "learning_rate": 6.807570837834385e-06, "loss": 0.4297, "step": 14196 }, { "epoch": 1.3323010510510511, "grad_norm": 0.9177958706209216, "learning_rate": 6.8070617873821245e-06, "loss": 0.3974, "step": 14197 }, { "epoch": 1.332394894894895, "grad_norm": 1.2475861521161553, "learning_rate": 6.806552715384053e-06, "loss": 0.3902, "step": 14198 }, { "epoch": 1.3324887387387387, "grad_norm": 0.8659905190248628, "learning_rate": 6.806043621846241e-06, "loss": 0.4284, "step": 14199 }, { "epoch": 1.3325825825825826, "grad_norm": 1.0368283081744922, "learning_rate": 6.805534506774759e-06, "loss": 0.4295, "step": 14200 }, { "epoch": 1.3326764264264264, "grad_norm": 1.066668801114169, "learning_rate": 6.805025370175675e-06, "loss": 0.4154, "step": 14201 }, { "epoch": 1.3327702702702702, "grad_norm": 1.0957237272041802, "learning_rate": 6.804516212055064e-06, "loss": 0.4519, "step": 14202 }, { "epoch": 1.332864114114114, "grad_norm": 1.0352021751577634, "learning_rate": 6.804007032418991e-06, "loss": 0.3936, "step": 14203 }, { "epoch": 1.332957957957958, "grad_norm": 0.9456936716640078, "learning_rate": 6.80349783127353e-06, "loss": 0.4248, "step": 14204 }, { "epoch": 1.3330518018018018, "grad_norm": 0.9589659276347681, "learning_rate": 6.802988608624753e-06, "loss": 0.3751, "step": 14205 }, { "epoch": 1.3331456456456456, "grad_norm": 0.8721033317737112, "learning_rate": 6.802479364478731e-06, "loss": 0.3759, "step": 14206 }, { "epoch": 1.3332394894894894, "grad_norm": 1.3770848955125967, "learning_rate": 6.801970098841535e-06, "loss": 0.385, "step": 14207 }, { "epoch": 1.3333333333333333, "grad_norm": 0.9559585454278815, "learning_rate": 6.801460811719237e-06, "loss": 0.4011, "step": 14208 }, { "epoch": 1.3334271771771773, "grad_norm": 1.2938539972976053, "learning_rate": 6.800951503117909e-06, "loss": 0.3839, "step": 14209 }, { "epoch": 1.333521021021021, "grad_norm": 0.9142330449923098, "learning_rate": 6.800442173043625e-06, "loss": 0.3617, "step": 14210 }, { "epoch": 1.333614864864865, "grad_norm": 0.9401561550071978, "learning_rate": 6.7999328215024585e-06, "loss": 0.3972, "step": 14211 }, { "epoch": 1.3337087087087087, "grad_norm": 1.1810613597060595, "learning_rate": 6.7994234485004804e-06, "loss": 0.3925, "step": 14212 }, { "epoch": 1.3338025525525525, "grad_norm": 1.00843572174452, "learning_rate": 6.798914054043763e-06, "loss": 0.492, "step": 14213 }, { "epoch": 1.3338963963963963, "grad_norm": 0.9582186629526781, "learning_rate": 6.798404638138385e-06, "loss": 0.4343, "step": 14214 }, { "epoch": 1.3339902402402402, "grad_norm": 1.6447307375310543, "learning_rate": 6.797895200790414e-06, "loss": 0.4353, "step": 14215 }, { "epoch": 1.334084084084084, "grad_norm": 0.9036001980028561, "learning_rate": 6.797385742005926e-06, "loss": 0.394, "step": 14216 }, { "epoch": 1.334177927927928, "grad_norm": 0.8689782982300601, "learning_rate": 6.796876261790999e-06, "loss": 0.4034, "step": 14217 }, { "epoch": 1.3342717717717718, "grad_norm": 1.074050535474057, "learning_rate": 6.796366760151705e-06, "loss": 0.4756, "step": 14218 }, { "epoch": 1.3343656156156156, "grad_norm": 1.0080897995851315, "learning_rate": 6.795857237094117e-06, "loss": 0.4139, "step": 14219 }, { "epoch": 1.3344594594594594, "grad_norm": 1.1714339936017053, "learning_rate": 6.795347692624312e-06, "loss": 0.424, "step": 14220 }, { "epoch": 1.3345533033033032, "grad_norm": 1.305454068172664, "learning_rate": 6.794838126748366e-06, "loss": 0.404, "step": 14221 }, { "epoch": 1.3346471471471473, "grad_norm": 1.1337868345413846, "learning_rate": 6.794328539472353e-06, "loss": 0.4011, "step": 14222 }, { "epoch": 1.334740990990991, "grad_norm": 0.8952316484377162, "learning_rate": 6.793818930802349e-06, "loss": 0.4294, "step": 14223 }, { "epoch": 1.334834834834835, "grad_norm": 1.4595754298881531, "learning_rate": 6.793309300744433e-06, "loss": 0.3985, "step": 14224 }, { "epoch": 1.3349286786786787, "grad_norm": 1.033644798665022, "learning_rate": 6.792799649304676e-06, "loss": 0.4011, "step": 14225 }, { "epoch": 1.3350225225225225, "grad_norm": 0.7923840929905627, "learning_rate": 6.792289976489162e-06, "loss": 0.3923, "step": 14226 }, { "epoch": 1.3351163663663663, "grad_norm": 0.878555814785897, "learning_rate": 6.791780282303959e-06, "loss": 0.3658, "step": 14227 }, { "epoch": 1.3352102102102101, "grad_norm": 0.9997896224389639, "learning_rate": 6.79127056675515e-06, "loss": 0.3924, "step": 14228 }, { "epoch": 1.335304054054054, "grad_norm": 0.9350537971344747, "learning_rate": 6.790760829848814e-06, "loss": 0.4196, "step": 14229 }, { "epoch": 1.335397897897898, "grad_norm": 0.8508524321997272, "learning_rate": 6.790251071591024e-06, "loss": 0.4074, "step": 14230 }, { "epoch": 1.3354917417417418, "grad_norm": 0.9699677912533329, "learning_rate": 6.78974129198786e-06, "loss": 0.4243, "step": 14231 }, { "epoch": 1.3355855855855856, "grad_norm": 0.8319002288047008, "learning_rate": 6.7892314910454e-06, "loss": 0.3575, "step": 14232 }, { "epoch": 1.3356794294294294, "grad_norm": 0.9231254812245072, "learning_rate": 6.788721668769722e-06, "loss": 0.3496, "step": 14233 }, { "epoch": 1.3357732732732732, "grad_norm": 1.0688152872531111, "learning_rate": 6.788211825166905e-06, "loss": 0.4057, "step": 14234 }, { "epoch": 1.3358671171171173, "grad_norm": 0.9498921286642459, "learning_rate": 6.787701960243028e-06, "loss": 0.4378, "step": 14235 }, { "epoch": 1.335960960960961, "grad_norm": 0.9036173601656059, "learning_rate": 6.78719207400417e-06, "loss": 0.3712, "step": 14236 }, { "epoch": 1.3360548048048049, "grad_norm": 1.2206872358527416, "learning_rate": 6.786682166456411e-06, "loss": 0.3346, "step": 14237 }, { "epoch": 1.3361486486486487, "grad_norm": 0.9709618309437646, "learning_rate": 6.7861722376058305e-06, "loss": 0.4182, "step": 14238 }, { "epoch": 1.3362424924924925, "grad_norm": 1.058883008969683, "learning_rate": 6.785662287458508e-06, "loss": 0.3784, "step": 14239 }, { "epoch": 1.3363363363363363, "grad_norm": 2.6939261641843486, "learning_rate": 6.785152316020522e-06, "loss": 0.3677, "step": 14240 }, { "epoch": 1.3364301801801801, "grad_norm": 1.0669443683518305, "learning_rate": 6.784642323297957e-06, "loss": 0.4354, "step": 14241 }, { "epoch": 1.336524024024024, "grad_norm": 0.9503390375110919, "learning_rate": 6.784132309296891e-06, "loss": 0.3907, "step": 14242 }, { "epoch": 1.3366178678678677, "grad_norm": 1.0576996633619924, "learning_rate": 6.783622274023407e-06, "loss": 0.4215, "step": 14243 }, { "epoch": 1.3367117117117118, "grad_norm": 0.8318144948634458, "learning_rate": 6.783112217483583e-06, "loss": 0.3506, "step": 14244 }, { "epoch": 1.3368055555555556, "grad_norm": 0.9641081418615438, "learning_rate": 6.782602139683502e-06, "loss": 0.3553, "step": 14245 }, { "epoch": 1.3368993993993994, "grad_norm": 1.2263328723745908, "learning_rate": 6.782092040629245e-06, "loss": 0.4416, "step": 14246 }, { "epoch": 1.3369932432432432, "grad_norm": 0.9982677577123747, "learning_rate": 6.781581920326897e-06, "loss": 0.4161, "step": 14247 }, { "epoch": 1.337087087087087, "grad_norm": 0.9504103214174581, "learning_rate": 6.781071778782537e-06, "loss": 0.3795, "step": 14248 }, { "epoch": 1.337180930930931, "grad_norm": 0.8787006814677412, "learning_rate": 6.780561616002249e-06, "loss": 0.3842, "step": 14249 }, { "epoch": 1.3372747747747749, "grad_norm": 1.182954698717593, "learning_rate": 6.780051431992116e-06, "loss": 0.4261, "step": 14250 }, { "epoch": 1.3373686186186187, "grad_norm": 0.9360375417149873, "learning_rate": 6.779541226758219e-06, "loss": 0.389, "step": 14251 }, { "epoch": 1.3374624624624625, "grad_norm": 1.2940510887943124, "learning_rate": 6.779031000306642e-06, "loss": 0.406, "step": 14252 }, { "epoch": 1.3375563063063063, "grad_norm": 0.8753274480681665, "learning_rate": 6.77852075264347e-06, "loss": 0.4063, "step": 14253 }, { "epoch": 1.33765015015015, "grad_norm": 1.0074567076965044, "learning_rate": 6.778010483774786e-06, "loss": 0.3867, "step": 14254 }, { "epoch": 1.337743993993994, "grad_norm": 0.9552645762730484, "learning_rate": 6.777500193706675e-06, "loss": 0.4062, "step": 14255 }, { "epoch": 1.3378378378378377, "grad_norm": 1.0015382425536905, "learning_rate": 6.7769898824452175e-06, "loss": 0.4014, "step": 14256 }, { "epoch": 1.3379316816816818, "grad_norm": 1.0013464810978414, "learning_rate": 6.776479549996501e-06, "loss": 0.4011, "step": 14257 }, { "epoch": 1.3380255255255256, "grad_norm": 0.9877405030021765, "learning_rate": 6.77596919636661e-06, "loss": 0.3862, "step": 14258 }, { "epoch": 1.3381193693693694, "grad_norm": 0.9347325358084704, "learning_rate": 6.775458821561629e-06, "loss": 0.4159, "step": 14259 }, { "epoch": 1.3382132132132132, "grad_norm": 0.9157352710935159, "learning_rate": 6.774948425587644e-06, "loss": 0.3379, "step": 14260 }, { "epoch": 1.338307057057057, "grad_norm": 0.9677275379695974, "learning_rate": 6.77443800845074e-06, "loss": 0.4251, "step": 14261 }, { "epoch": 1.338400900900901, "grad_norm": 0.9828504337383388, "learning_rate": 6.773927570157004e-06, "loss": 0.3515, "step": 14262 }, { "epoch": 1.3384947447447448, "grad_norm": 1.0144483683886667, "learning_rate": 6.773417110712518e-06, "loss": 0.4461, "step": 14263 }, { "epoch": 1.3385885885885886, "grad_norm": 1.0028799637500445, "learning_rate": 6.7729066301233726e-06, "loss": 0.4034, "step": 14264 }, { "epoch": 1.3386824324324325, "grad_norm": 0.8898827144837361, "learning_rate": 6.7723961283956526e-06, "loss": 0.4364, "step": 14265 }, { "epoch": 1.3387762762762763, "grad_norm": 1.2006533483855464, "learning_rate": 6.771885605535444e-06, "loss": 0.4155, "step": 14266 }, { "epoch": 1.33887012012012, "grad_norm": 0.8838164256983884, "learning_rate": 6.771375061548837e-06, "loss": 0.3893, "step": 14267 }, { "epoch": 1.3389639639639639, "grad_norm": 1.1161528783587837, "learning_rate": 6.7708644964419155e-06, "loss": 0.3866, "step": 14268 }, { "epoch": 1.3390578078078077, "grad_norm": 0.9950181420823756, "learning_rate": 6.7703539102207684e-06, "loss": 0.3753, "step": 14269 }, { "epoch": 1.3391516516516517, "grad_norm": 0.8017677064671779, "learning_rate": 6.769843302891483e-06, "loss": 0.3583, "step": 14270 }, { "epoch": 1.3392454954954955, "grad_norm": 0.9231955903405615, "learning_rate": 6.7693326744601476e-06, "loss": 0.4217, "step": 14271 }, { "epoch": 1.3393393393393394, "grad_norm": 0.9642160380155971, "learning_rate": 6.768822024932851e-06, "loss": 0.4101, "step": 14272 }, { "epoch": 1.3394331831831832, "grad_norm": 0.9979600292428175, "learning_rate": 6.768311354315681e-06, "loss": 0.397, "step": 14273 }, { "epoch": 1.339527027027027, "grad_norm": 1.145927334781145, "learning_rate": 6.7678006626147264e-06, "loss": 0.3889, "step": 14274 }, { "epoch": 1.339620870870871, "grad_norm": 0.8110380453971747, "learning_rate": 6.767289949836075e-06, "loss": 0.3791, "step": 14275 }, { "epoch": 1.3397147147147148, "grad_norm": 0.9721421362552545, "learning_rate": 6.766779215985819e-06, "loss": 0.4247, "step": 14276 }, { "epoch": 1.3398085585585586, "grad_norm": 1.5868538485138748, "learning_rate": 6.766268461070047e-06, "loss": 0.4148, "step": 14277 }, { "epoch": 1.3399024024024024, "grad_norm": 0.8780954680656946, "learning_rate": 6.7657576850948484e-06, "loss": 0.4433, "step": 14278 }, { "epoch": 1.3399962462462462, "grad_norm": 0.9660891716472438, "learning_rate": 6.765246888066312e-06, "loss": 0.4015, "step": 14279 }, { "epoch": 1.34009009009009, "grad_norm": 0.9764633427926431, "learning_rate": 6.764736069990531e-06, "loss": 0.3702, "step": 14280 }, { "epoch": 1.3401839339339339, "grad_norm": 1.3276145645911412, "learning_rate": 6.7642252308735915e-06, "loss": 0.4483, "step": 14281 }, { "epoch": 1.3402777777777777, "grad_norm": 1.082543092665482, "learning_rate": 6.763714370721588e-06, "loss": 0.414, "step": 14282 }, { "epoch": 1.3403716216216215, "grad_norm": 1.0625914616803067, "learning_rate": 6.7632034895406105e-06, "loss": 0.3915, "step": 14283 }, { "epoch": 1.3404654654654655, "grad_norm": 0.9132274005585694, "learning_rate": 6.76269258733675e-06, "loss": 0.393, "step": 14284 }, { "epoch": 1.3405593093093093, "grad_norm": 0.9562089961212756, "learning_rate": 6.7621816641161e-06, "loss": 0.3731, "step": 14285 }, { "epoch": 1.3406531531531531, "grad_norm": 0.8012879746124258, "learning_rate": 6.76167071988475e-06, "loss": 0.377, "step": 14286 }, { "epoch": 1.340746996996997, "grad_norm": 0.8854339780535952, "learning_rate": 6.761159754648791e-06, "loss": 0.3699, "step": 14287 }, { "epoch": 1.3408408408408408, "grad_norm": 0.8996012571491137, "learning_rate": 6.760648768414318e-06, "loss": 0.3725, "step": 14288 }, { "epoch": 1.3409346846846848, "grad_norm": 0.9393924207607732, "learning_rate": 6.760137761187422e-06, "loss": 0.3539, "step": 14289 }, { "epoch": 1.3410285285285286, "grad_norm": 0.8778006313081496, "learning_rate": 6.759626732974197e-06, "loss": 0.3809, "step": 14290 }, { "epoch": 1.3411223723723724, "grad_norm": 1.0355414316942209, "learning_rate": 6.759115683780736e-06, "loss": 0.4253, "step": 14291 }, { "epoch": 1.3412162162162162, "grad_norm": 0.8428473250174335, "learning_rate": 6.758604613613131e-06, "loss": 0.4321, "step": 14292 }, { "epoch": 1.34131006006006, "grad_norm": 1.0084958703341356, "learning_rate": 6.758093522477476e-06, "loss": 0.452, "step": 14293 }, { "epoch": 1.3414039039039038, "grad_norm": 0.8827617913288474, "learning_rate": 6.757582410379864e-06, "loss": 0.3733, "step": 14294 }, { "epoch": 1.3414977477477477, "grad_norm": 0.9276463603286453, "learning_rate": 6.757071277326392e-06, "loss": 0.4065, "step": 14295 }, { "epoch": 1.3415915915915915, "grad_norm": 1.0015943100006246, "learning_rate": 6.75656012332315e-06, "loss": 0.4066, "step": 14296 }, { "epoch": 1.3416854354354355, "grad_norm": 0.8600799690043416, "learning_rate": 6.756048948376237e-06, "loss": 0.4299, "step": 14297 }, { "epoch": 1.3417792792792793, "grad_norm": 0.8954231474448477, "learning_rate": 6.755537752491746e-06, "loss": 0.4459, "step": 14298 }, { "epoch": 1.3418731231231231, "grad_norm": 0.8903202497380691, "learning_rate": 6.7550265356757704e-06, "loss": 0.3902, "step": 14299 }, { "epoch": 1.341966966966967, "grad_norm": 3.950757310101976, "learning_rate": 6.754515297934407e-06, "loss": 0.4217, "step": 14300 }, { "epoch": 1.3420608108108107, "grad_norm": 0.9095359684375095, "learning_rate": 6.7540040392737526e-06, "loss": 0.3871, "step": 14301 }, { "epoch": 1.3421546546546548, "grad_norm": 0.9485750602350979, "learning_rate": 6.7534927596999e-06, "loss": 0.402, "step": 14302 }, { "epoch": 1.3422484984984986, "grad_norm": 1.5999171434907713, "learning_rate": 6.752981459218947e-06, "loss": 0.4021, "step": 14303 }, { "epoch": 1.3423423423423424, "grad_norm": 0.9668258483372113, "learning_rate": 6.75247013783699e-06, "loss": 0.4047, "step": 14304 }, { "epoch": 1.3424361861861862, "grad_norm": 1.7182883429921718, "learning_rate": 6.751958795560126e-06, "loss": 0.4293, "step": 14305 }, { "epoch": 1.34253003003003, "grad_norm": 0.9360481800563147, "learning_rate": 6.751447432394451e-06, "loss": 0.4049, "step": 14306 }, { "epoch": 1.3426238738738738, "grad_norm": 0.9075411932350641, "learning_rate": 6.750936048346062e-06, "loss": 0.4165, "step": 14307 }, { "epoch": 1.3427177177177176, "grad_norm": 0.8774940529498778, "learning_rate": 6.750424643421055e-06, "loss": 0.3916, "step": 14308 }, { "epoch": 1.3428115615615615, "grad_norm": 1.0784289879025664, "learning_rate": 6.74991321762553e-06, "loss": 0.3853, "step": 14309 }, { "epoch": 1.3429054054054055, "grad_norm": 0.9198781578470843, "learning_rate": 6.749401770965585e-06, "loss": 0.3627, "step": 14310 }, { "epoch": 1.3429992492492493, "grad_norm": 1.0900047860462667, "learning_rate": 6.7488903034473155e-06, "loss": 0.4019, "step": 14311 }, { "epoch": 1.343093093093093, "grad_norm": 1.0686236651734726, "learning_rate": 6.74837881507682e-06, "loss": 0.4156, "step": 14312 }, { "epoch": 1.343186936936937, "grad_norm": 0.8972137704010043, "learning_rate": 6.747867305860201e-06, "loss": 0.4324, "step": 14313 }, { "epoch": 1.3432807807807807, "grad_norm": 0.9902583996572141, "learning_rate": 6.7473557758035515e-06, "loss": 0.4107, "step": 14314 }, { "epoch": 1.3433746246246248, "grad_norm": 0.923858185986955, "learning_rate": 6.746844224912974e-06, "loss": 0.3691, "step": 14315 }, { "epoch": 1.3434684684684686, "grad_norm": 0.8288662847115582, "learning_rate": 6.746332653194568e-06, "loss": 0.3764, "step": 14316 }, { "epoch": 1.3435623123123124, "grad_norm": 0.981730629000691, "learning_rate": 6.745821060654434e-06, "loss": 0.4038, "step": 14317 }, { "epoch": 1.3436561561561562, "grad_norm": 0.9564382587422626, "learning_rate": 6.745309447298667e-06, "loss": 0.4335, "step": 14318 }, { "epoch": 1.34375, "grad_norm": 1.1865168664710881, "learning_rate": 6.744797813133371e-06, "loss": 0.3546, "step": 14319 }, { "epoch": 1.3438438438438438, "grad_norm": 1.2923731973005814, "learning_rate": 6.744286158164645e-06, "loss": 0.4301, "step": 14320 }, { "epoch": 1.3439376876876876, "grad_norm": 1.005185305108584, "learning_rate": 6.743774482398591e-06, "loss": 0.3644, "step": 14321 }, { "epoch": 1.3440315315315314, "grad_norm": 0.9086530186231074, "learning_rate": 6.743262785841308e-06, "loss": 0.3976, "step": 14322 }, { "epoch": 1.3441253753753752, "grad_norm": 1.6833916812097491, "learning_rate": 6.742751068498898e-06, "loss": 0.3819, "step": 14323 }, { "epoch": 1.3442192192192193, "grad_norm": 1.3286938499208962, "learning_rate": 6.742239330377461e-06, "loss": 0.3539, "step": 14324 }, { "epoch": 1.344313063063063, "grad_norm": 1.0782698028873337, "learning_rate": 6.741727571483101e-06, "loss": 0.4508, "step": 14325 }, { "epoch": 1.344406906906907, "grad_norm": 1.2201436953539884, "learning_rate": 6.741215791821916e-06, "loss": 0.4489, "step": 14326 }, { "epoch": 1.3445007507507507, "grad_norm": 0.7502827646321973, "learning_rate": 6.740703991400011e-06, "loss": 0.3238, "step": 14327 }, { "epoch": 1.3445945945945945, "grad_norm": 0.9700162857911176, "learning_rate": 6.7401921702234876e-06, "loss": 0.3905, "step": 14328 }, { "epoch": 1.3446884384384385, "grad_norm": 1.2166935350027344, "learning_rate": 6.739680328298449e-06, "loss": 0.3637, "step": 14329 }, { "epoch": 1.3447822822822824, "grad_norm": 1.0108728971062517, "learning_rate": 6.739168465630996e-06, "loss": 0.4214, "step": 14330 }, { "epoch": 1.3448761261261262, "grad_norm": 1.0480302159442456, "learning_rate": 6.738656582227234e-06, "loss": 0.3921, "step": 14331 }, { "epoch": 1.34496996996997, "grad_norm": 0.8360692466669488, "learning_rate": 6.738144678093264e-06, "loss": 0.3941, "step": 14332 }, { "epoch": 1.3450638138138138, "grad_norm": 1.470373637974495, "learning_rate": 6.737632753235189e-06, "loss": 0.4227, "step": 14333 }, { "epoch": 1.3451576576576576, "grad_norm": 1.1587186259281386, "learning_rate": 6.737120807659116e-06, "loss": 0.3929, "step": 14334 }, { "epoch": 1.3452515015015014, "grad_norm": 0.9407786961220852, "learning_rate": 6.736608841371147e-06, "loss": 0.3683, "step": 14335 }, { "epoch": 1.3453453453453452, "grad_norm": 0.9213328408461939, "learning_rate": 6.736096854377387e-06, "loss": 0.4686, "step": 14336 }, { "epoch": 1.3454391891891893, "grad_norm": 1.1145361128226892, "learning_rate": 6.735584846683938e-06, "loss": 0.403, "step": 14337 }, { "epoch": 1.345533033033033, "grad_norm": 1.03736613769703, "learning_rate": 6.735072818296908e-06, "loss": 0.4692, "step": 14338 }, { "epoch": 1.3456268768768769, "grad_norm": 0.9895872047999057, "learning_rate": 6.734560769222399e-06, "loss": 0.4117, "step": 14339 }, { "epoch": 1.3457207207207207, "grad_norm": 0.9166092403624904, "learning_rate": 6.73404869946652e-06, "loss": 0.4048, "step": 14340 }, { "epoch": 1.3458145645645645, "grad_norm": 1.095470234805429, "learning_rate": 6.733536609035374e-06, "loss": 0.409, "step": 14341 }, { "epoch": 1.3459084084084085, "grad_norm": 0.9189546636159308, "learning_rate": 6.733024497935066e-06, "loss": 0.3733, "step": 14342 }, { "epoch": 1.3460022522522523, "grad_norm": 0.9455092011394689, "learning_rate": 6.7325123661717026e-06, "loss": 0.4341, "step": 14343 }, { "epoch": 1.3460960960960962, "grad_norm": 0.8971586093305973, "learning_rate": 6.732000213751391e-06, "loss": 0.3827, "step": 14344 }, { "epoch": 1.34618993993994, "grad_norm": 1.0691541029831537, "learning_rate": 6.7314880406802366e-06, "loss": 0.3939, "step": 14345 }, { "epoch": 1.3462837837837838, "grad_norm": 0.9000147446366247, "learning_rate": 6.730975846964347e-06, "loss": 0.4181, "step": 14346 }, { "epoch": 1.3463776276276276, "grad_norm": 1.0172301478837715, "learning_rate": 6.730463632609829e-06, "loss": 0.3887, "step": 14347 }, { "epoch": 1.3464714714714714, "grad_norm": 0.7971120469647913, "learning_rate": 6.729951397622789e-06, "loss": 0.3548, "step": 14348 }, { "epoch": 1.3465653153153152, "grad_norm": 1.1891969492334955, "learning_rate": 6.729439142009335e-06, "loss": 0.4079, "step": 14349 }, { "epoch": 1.3466591591591592, "grad_norm": 0.9373766440320497, "learning_rate": 6.728926865775574e-06, "loss": 0.434, "step": 14350 }, { "epoch": 1.346753003003003, "grad_norm": 0.9254605101249941, "learning_rate": 6.728414568927615e-06, "loss": 0.4098, "step": 14351 }, { "epoch": 1.3468468468468469, "grad_norm": 0.9537985492839149, "learning_rate": 6.7279022514715655e-06, "loss": 0.4038, "step": 14352 }, { "epoch": 1.3469406906906907, "grad_norm": 0.9254996505020515, "learning_rate": 6.727389913413535e-06, "loss": 0.3642, "step": 14353 }, { "epoch": 1.3470345345345345, "grad_norm": 1.0721095423030556, "learning_rate": 6.72687755475963e-06, "loss": 0.4143, "step": 14354 }, { "epoch": 1.3471283783783785, "grad_norm": 0.8636316250065129, "learning_rate": 6.726365175515961e-06, "loss": 0.4093, "step": 14355 }, { "epoch": 1.3472222222222223, "grad_norm": 1.0573809084547785, "learning_rate": 6.7258527756886375e-06, "loss": 0.4186, "step": 14356 }, { "epoch": 1.3473160660660661, "grad_norm": 1.3103040469056633, "learning_rate": 6.725340355283767e-06, "loss": 0.4097, "step": 14357 }, { "epoch": 1.34740990990991, "grad_norm": 1.028003410550972, "learning_rate": 6.724827914307461e-06, "loss": 0.3731, "step": 14358 }, { "epoch": 1.3475037537537538, "grad_norm": 0.963410354773265, "learning_rate": 6.724315452765829e-06, "loss": 0.3891, "step": 14359 }, { "epoch": 1.3475975975975976, "grad_norm": 0.9124619841447809, "learning_rate": 6.723802970664981e-06, "loss": 0.3546, "step": 14360 }, { "epoch": 1.3476914414414414, "grad_norm": 0.9067317224685514, "learning_rate": 6.723290468011028e-06, "loss": 0.3791, "step": 14361 }, { "epoch": 1.3477852852852852, "grad_norm": 1.065012865490794, "learning_rate": 6.7227779448100785e-06, "loss": 0.3647, "step": 14362 }, { "epoch": 1.347879129129129, "grad_norm": 1.27332478208506, "learning_rate": 6.722265401068244e-06, "loss": 0.3998, "step": 14363 }, { "epoch": 1.347972972972973, "grad_norm": 0.8743210001850927, "learning_rate": 6.721752836791639e-06, "loss": 0.4097, "step": 14364 }, { "epoch": 1.3480668168168168, "grad_norm": 0.9780159577403917, "learning_rate": 6.7212402519863696e-06, "loss": 0.4202, "step": 14365 }, { "epoch": 1.3481606606606606, "grad_norm": 0.9393615987288222, "learning_rate": 6.7207276466585524e-06, "loss": 0.4194, "step": 14366 }, { "epoch": 1.3482545045045045, "grad_norm": 1.2754143650002647, "learning_rate": 6.720215020814297e-06, "loss": 0.4012, "step": 14367 }, { "epoch": 1.3483483483483483, "grad_norm": 0.8863527680073396, "learning_rate": 6.719702374459713e-06, "loss": 0.3922, "step": 14368 }, { "epoch": 1.3484421921921923, "grad_norm": 0.8858985858419411, "learning_rate": 6.719189707600917e-06, "loss": 0.3415, "step": 14369 }, { "epoch": 1.3485360360360361, "grad_norm": 0.9281024233425764, "learning_rate": 6.71867702024402e-06, "loss": 0.4251, "step": 14370 }, { "epoch": 1.34862987987988, "grad_norm": 1.0077020656974154, "learning_rate": 6.718164312395134e-06, "loss": 0.398, "step": 14371 }, { "epoch": 1.3487237237237237, "grad_norm": 1.0470033600577289, "learning_rate": 6.717651584060373e-06, "loss": 0.404, "step": 14372 }, { "epoch": 1.3488175675675675, "grad_norm": 0.9258699334647928, "learning_rate": 6.71713883524585e-06, "loss": 0.3998, "step": 14373 }, { "epoch": 1.3489114114114114, "grad_norm": 0.8395213668423811, "learning_rate": 6.716626065957678e-06, "loss": 0.3284, "step": 14374 }, { "epoch": 1.3490052552552552, "grad_norm": 1.6193752422268484, "learning_rate": 6.716113276201971e-06, "loss": 0.4208, "step": 14375 }, { "epoch": 1.349099099099099, "grad_norm": 0.9961721525587265, "learning_rate": 6.715600465984845e-06, "loss": 0.405, "step": 14376 }, { "epoch": 1.349192942942943, "grad_norm": 0.9936805184935731, "learning_rate": 6.715087635312412e-06, "loss": 0.4211, "step": 14377 }, { "epoch": 1.3492867867867868, "grad_norm": 1.034283952690027, "learning_rate": 6.714574784190786e-06, "loss": 0.399, "step": 14378 }, { "epoch": 1.3493806306306306, "grad_norm": 0.9760849215677878, "learning_rate": 6.714061912626085e-06, "loss": 0.3843, "step": 14379 }, { "epoch": 1.3494744744744744, "grad_norm": 0.8644866109376722, "learning_rate": 6.713549020624421e-06, "loss": 0.4088, "step": 14380 }, { "epoch": 1.3495683183183182, "grad_norm": 1.0614144744096432, "learning_rate": 6.713036108191911e-06, "loss": 0.3878, "step": 14381 }, { "epoch": 1.3496621621621623, "grad_norm": 1.1013689388607524, "learning_rate": 6.71252317533467e-06, "loss": 0.4254, "step": 14382 }, { "epoch": 1.349756006006006, "grad_norm": 0.9245787682720515, "learning_rate": 6.712010222058812e-06, "loss": 0.4075, "step": 14383 }, { "epoch": 1.34984984984985, "grad_norm": 1.9164294666857709, "learning_rate": 6.711497248370457e-06, "loss": 0.3935, "step": 14384 }, { "epoch": 1.3499436936936937, "grad_norm": 1.0002609848365975, "learning_rate": 6.710984254275717e-06, "loss": 0.4196, "step": 14385 }, { "epoch": 1.3500375375375375, "grad_norm": 0.8965749204518556, "learning_rate": 6.710471239780712e-06, "loss": 0.4235, "step": 14386 }, { "epoch": 1.3501313813813813, "grad_norm": 0.9230061890286076, "learning_rate": 6.709958204891554e-06, "loss": 0.3591, "step": 14387 }, { "epoch": 1.3502252252252251, "grad_norm": 0.8788434624770727, "learning_rate": 6.709445149614366e-06, "loss": 0.432, "step": 14388 }, { "epoch": 1.350319069069069, "grad_norm": 0.9659409251945349, "learning_rate": 6.708932073955261e-06, "loss": 0.3977, "step": 14389 }, { "epoch": 1.350412912912913, "grad_norm": 0.8972212744073172, "learning_rate": 6.708418977920359e-06, "loss": 0.4099, "step": 14390 }, { "epoch": 1.3505067567567568, "grad_norm": 0.9380546935048693, "learning_rate": 6.707905861515776e-06, "loss": 0.3777, "step": 14391 }, { "epoch": 1.3506006006006006, "grad_norm": 0.9148555172205843, "learning_rate": 6.70739272474763e-06, "loss": 0.377, "step": 14392 }, { "epoch": 1.3506944444444444, "grad_norm": 0.8413823409264464, "learning_rate": 6.706879567622039e-06, "loss": 0.4204, "step": 14393 }, { "epoch": 1.3507882882882882, "grad_norm": 1.8681323440159148, "learning_rate": 6.706366390145122e-06, "loss": 0.4237, "step": 14394 }, { "epoch": 1.3508821321321323, "grad_norm": 0.8520381431308658, "learning_rate": 6.705853192323e-06, "loss": 0.4162, "step": 14395 }, { "epoch": 1.350975975975976, "grad_norm": 0.8733875964859222, "learning_rate": 6.705339974161788e-06, "loss": 0.3973, "step": 14396 }, { "epoch": 1.3510698198198199, "grad_norm": 1.1328859839847853, "learning_rate": 6.704826735667607e-06, "loss": 0.4597, "step": 14397 }, { "epoch": 1.3511636636636637, "grad_norm": 0.910612798655954, "learning_rate": 6.704313476846575e-06, "loss": 0.4337, "step": 14398 }, { "epoch": 1.3512575075075075, "grad_norm": 1.6956730980828714, "learning_rate": 6.703800197704814e-06, "loss": 0.4139, "step": 14399 }, { "epoch": 1.3513513513513513, "grad_norm": 1.1982872938533065, "learning_rate": 6.703286898248442e-06, "loss": 0.4011, "step": 14400 }, { "epoch": 1.3514451951951951, "grad_norm": 5.627464220757992, "learning_rate": 6.702773578483581e-06, "loss": 0.407, "step": 14401 }, { "epoch": 1.351539039039039, "grad_norm": 1.2477055583879426, "learning_rate": 6.702260238416349e-06, "loss": 0.3761, "step": 14402 }, { "epoch": 1.3516328828828827, "grad_norm": 0.9620656508263257, "learning_rate": 6.701746878052869e-06, "loss": 0.4064, "step": 14403 }, { "epoch": 1.3517267267267268, "grad_norm": 1.0147730459410056, "learning_rate": 6.70123349739926e-06, "loss": 0.4488, "step": 14404 }, { "epoch": 1.3518205705705706, "grad_norm": 0.9276629404539426, "learning_rate": 6.700720096461644e-06, "loss": 0.4017, "step": 14405 }, { "epoch": 1.3519144144144144, "grad_norm": 1.002325528160758, "learning_rate": 6.700206675246142e-06, "loss": 0.418, "step": 14406 }, { "epoch": 1.3520082582582582, "grad_norm": 0.9672239576417876, "learning_rate": 6.6996932337588745e-06, "loss": 0.4268, "step": 14407 }, { "epoch": 1.352102102102102, "grad_norm": 0.8739554710396014, "learning_rate": 6.699179772005967e-06, "loss": 0.458, "step": 14408 }, { "epoch": 1.352195945945946, "grad_norm": 1.017973236288054, "learning_rate": 6.698666289993538e-06, "loss": 0.4526, "step": 14409 }, { "epoch": 1.3522897897897899, "grad_norm": 0.8474943520508179, "learning_rate": 6.698152787727711e-06, "loss": 0.3993, "step": 14410 }, { "epoch": 1.3523836336336337, "grad_norm": 1.0103051304639425, "learning_rate": 6.697639265214608e-06, "loss": 0.4069, "step": 14411 }, { "epoch": 1.3524774774774775, "grad_norm": 0.810940569769025, "learning_rate": 6.697125722460353e-06, "loss": 0.4028, "step": 14412 }, { "epoch": 1.3525713213213213, "grad_norm": 1.107936074168314, "learning_rate": 6.6966121594710665e-06, "loss": 0.4185, "step": 14413 }, { "epoch": 1.352665165165165, "grad_norm": 1.148232836743694, "learning_rate": 6.696098576252877e-06, "loss": 0.3812, "step": 14414 }, { "epoch": 1.352759009009009, "grad_norm": 0.9580028869156858, "learning_rate": 6.6955849728119014e-06, "loss": 0.4135, "step": 14415 }, { "epoch": 1.3528528528528527, "grad_norm": 0.8899754708200716, "learning_rate": 6.695071349154267e-06, "loss": 0.391, "step": 14416 }, { "epoch": 1.3529466966966968, "grad_norm": 0.937691585329114, "learning_rate": 6.694557705286098e-06, "loss": 0.4001, "step": 14417 }, { "epoch": 1.3530405405405406, "grad_norm": 0.8999075400436198, "learning_rate": 6.694044041213517e-06, "loss": 0.3803, "step": 14418 }, { "epoch": 1.3531343843843844, "grad_norm": 0.9595339706570802, "learning_rate": 6.693530356942651e-06, "loss": 0.4256, "step": 14419 }, { "epoch": 1.3532282282282282, "grad_norm": 0.9057925855119288, "learning_rate": 6.693016652479622e-06, "loss": 0.3676, "step": 14420 }, { "epoch": 1.353322072072072, "grad_norm": 1.1407970188283918, "learning_rate": 6.692502927830559e-06, "loss": 0.3924, "step": 14421 }, { "epoch": 1.353415915915916, "grad_norm": 0.8431062960521158, "learning_rate": 6.691989183001581e-06, "loss": 0.4125, "step": 14422 }, { "epoch": 1.3535097597597598, "grad_norm": 0.9917748075694885, "learning_rate": 6.691475417998818e-06, "loss": 0.3225, "step": 14423 }, { "epoch": 1.3536036036036037, "grad_norm": 0.9935384023135994, "learning_rate": 6.690961632828395e-06, "loss": 0.443, "step": 14424 }, { "epoch": 1.3536974474474475, "grad_norm": 0.9395989093136775, "learning_rate": 6.690447827496437e-06, "loss": 0.4108, "step": 14425 }, { "epoch": 1.3537912912912913, "grad_norm": 1.0466868915932297, "learning_rate": 6.6899340020090704e-06, "loss": 0.4032, "step": 14426 }, { "epoch": 1.353885135135135, "grad_norm": 1.2103668285876206, "learning_rate": 6.689420156372422e-06, "loss": 0.3725, "step": 14427 }, { "epoch": 1.353978978978979, "grad_norm": 0.8983150873298272, "learning_rate": 6.688906290592619e-06, "loss": 0.3671, "step": 14428 }, { "epoch": 1.3540728228228227, "grad_norm": 1.0342292221138532, "learning_rate": 6.688392404675787e-06, "loss": 0.4072, "step": 14429 }, { "epoch": 1.3541666666666667, "grad_norm": 0.9801691675115313, "learning_rate": 6.6878784986280534e-06, "loss": 0.4575, "step": 14430 }, { "epoch": 1.3542605105105106, "grad_norm": 2.1755685858084832, "learning_rate": 6.687364572455546e-06, "loss": 0.396, "step": 14431 }, { "epoch": 1.3543543543543544, "grad_norm": 0.8706838223646821, "learning_rate": 6.686850626164392e-06, "loss": 0.4011, "step": 14432 }, { "epoch": 1.3544481981981982, "grad_norm": 1.2002267554741657, "learning_rate": 6.686336659760719e-06, "loss": 0.3807, "step": 14433 }, { "epoch": 1.354542042042042, "grad_norm": 1.1742772169413003, "learning_rate": 6.685822673250657e-06, "loss": 0.4032, "step": 14434 }, { "epoch": 1.354635885885886, "grad_norm": 0.9010372993798562, "learning_rate": 6.685308666640331e-06, "loss": 0.3825, "step": 14435 }, { "epoch": 1.3547297297297298, "grad_norm": 0.9162634985410429, "learning_rate": 6.684794639935873e-06, "loss": 0.4046, "step": 14436 }, { "epoch": 1.3548235735735736, "grad_norm": 0.8709013913033371, "learning_rate": 6.684280593143409e-06, "loss": 0.3259, "step": 14437 }, { "epoch": 1.3549174174174174, "grad_norm": 0.8416409919562797, "learning_rate": 6.6837665262690695e-06, "loss": 0.3836, "step": 14438 }, { "epoch": 1.3550112612612613, "grad_norm": 0.9905042685157187, "learning_rate": 6.683252439318986e-06, "loss": 0.4108, "step": 14439 }, { "epoch": 1.355105105105105, "grad_norm": 1.2430869018243786, "learning_rate": 6.682738332299283e-06, "loss": 0.4245, "step": 14440 }, { "epoch": 1.3551989489489489, "grad_norm": 2.2139998773270237, "learning_rate": 6.682224205216092e-06, "loss": 0.3986, "step": 14441 }, { "epoch": 1.3552927927927927, "grad_norm": 0.9871149615038447, "learning_rate": 6.681710058075547e-06, "loss": 0.4251, "step": 14442 }, { "epoch": 1.3553866366366365, "grad_norm": 0.9317155656912693, "learning_rate": 6.681195890883772e-06, "loss": 0.3675, "step": 14443 }, { "epoch": 1.3554804804804805, "grad_norm": 0.9609706784292718, "learning_rate": 6.680681703646901e-06, "loss": 0.4105, "step": 14444 }, { "epoch": 1.3555743243243243, "grad_norm": 1.535367465798342, "learning_rate": 6.680167496371066e-06, "loss": 0.4059, "step": 14445 }, { "epoch": 1.3556681681681682, "grad_norm": 0.8771337083372454, "learning_rate": 6.679653269062395e-06, "loss": 0.3909, "step": 14446 }, { "epoch": 1.355762012012012, "grad_norm": 1.0545768299306253, "learning_rate": 6.67913902172702e-06, "loss": 0.3836, "step": 14447 }, { "epoch": 1.3558558558558558, "grad_norm": 2.262368770372585, "learning_rate": 6.678624754371075e-06, "loss": 0.3856, "step": 14448 }, { "epoch": 1.3559496996996998, "grad_norm": 2.1990065056684314, "learning_rate": 6.6781104670006865e-06, "loss": 0.42, "step": 14449 }, { "epoch": 1.3560435435435436, "grad_norm": 0.7982019967532449, "learning_rate": 6.677596159621991e-06, "loss": 0.3704, "step": 14450 }, { "epoch": 1.3561373873873874, "grad_norm": 1.1447483026380836, "learning_rate": 6.677081832241119e-06, "loss": 0.3662, "step": 14451 }, { "epoch": 1.3562312312312312, "grad_norm": 0.9346515406833737, "learning_rate": 6.676567484864202e-06, "loss": 0.4209, "step": 14452 }, { "epoch": 1.356325075075075, "grad_norm": 0.9022125743179535, "learning_rate": 6.676053117497374e-06, "loss": 0.4199, "step": 14453 }, { "epoch": 1.3564189189189189, "grad_norm": 1.0530082574693995, "learning_rate": 6.675538730146769e-06, "loss": 0.3565, "step": 14454 }, { "epoch": 1.3565127627627627, "grad_norm": 0.9011773006677098, "learning_rate": 6.675024322818516e-06, "loss": 0.3711, "step": 14455 }, { "epoch": 1.3566066066066065, "grad_norm": 0.8754273344943841, "learning_rate": 6.674509895518753e-06, "loss": 0.3824, "step": 14456 }, { "epoch": 1.3567004504504505, "grad_norm": 1.0248514285632806, "learning_rate": 6.67399544825361e-06, "loss": 0.4279, "step": 14457 }, { "epoch": 1.3567942942942943, "grad_norm": 0.8848198560407081, "learning_rate": 6.6734809810292236e-06, "loss": 0.4087, "step": 14458 }, { "epoch": 1.3568881381381381, "grad_norm": 1.5069223896984085, "learning_rate": 6.6729664938517245e-06, "loss": 0.3839, "step": 14459 }, { "epoch": 1.356981981981982, "grad_norm": 2.725008376506785, "learning_rate": 6.672451986727251e-06, "loss": 0.3959, "step": 14460 }, { "epoch": 1.3570758258258258, "grad_norm": 0.9722479424517125, "learning_rate": 6.671937459661934e-06, "loss": 0.3699, "step": 14461 }, { "epoch": 1.3571696696696698, "grad_norm": 1.026278511438548, "learning_rate": 6.67142291266191e-06, "loss": 0.4283, "step": 14462 }, { "epoch": 1.3572635135135136, "grad_norm": 0.9332197470827058, "learning_rate": 6.6709083457333154e-06, "loss": 0.4281, "step": 14463 }, { "epoch": 1.3573573573573574, "grad_norm": 0.9680009790259881, "learning_rate": 6.670393758882283e-06, "loss": 0.3938, "step": 14464 }, { "epoch": 1.3574512012012012, "grad_norm": 0.9399121514155211, "learning_rate": 6.66987915211495e-06, "loss": 0.3683, "step": 14465 }, { "epoch": 1.357545045045045, "grad_norm": 1.1193535680687539, "learning_rate": 6.669364525437451e-06, "loss": 0.3621, "step": 14466 }, { "epoch": 1.3576388888888888, "grad_norm": 0.9360354976379016, "learning_rate": 6.668849878855921e-06, "loss": 0.4038, "step": 14467 }, { "epoch": 1.3577327327327327, "grad_norm": 1.0326179310187784, "learning_rate": 6.668335212376498e-06, "loss": 0.4287, "step": 14468 }, { "epoch": 1.3578265765765765, "grad_norm": 1.0877011416037217, "learning_rate": 6.6678205260053186e-06, "loss": 0.3957, "step": 14469 }, { "epoch": 1.3579204204204205, "grad_norm": 0.9930325267863508, "learning_rate": 6.667305819748519e-06, "loss": 0.4122, "step": 14470 }, { "epoch": 1.3580142642642643, "grad_norm": 0.8731412944982895, "learning_rate": 6.666791093612235e-06, "loss": 0.4246, "step": 14471 }, { "epoch": 1.3581081081081081, "grad_norm": 1.3060697425402754, "learning_rate": 6.666276347602606e-06, "loss": 0.4064, "step": 14472 }, { "epoch": 1.358201951951952, "grad_norm": 0.9710921396964991, "learning_rate": 6.665761581725766e-06, "loss": 0.4338, "step": 14473 }, { "epoch": 1.3582957957957957, "grad_norm": 0.9413704289669448, "learning_rate": 6.665246795987855e-06, "loss": 0.3915, "step": 14474 }, { "epoch": 1.3583896396396398, "grad_norm": 1.1217874332048217, "learning_rate": 6.664731990395012e-06, "loss": 0.4116, "step": 14475 }, { "epoch": 1.3584834834834836, "grad_norm": 1.0350109562075094, "learning_rate": 6.6642171649533725e-06, "loss": 0.4271, "step": 14476 }, { "epoch": 1.3585773273273274, "grad_norm": 1.1244766637902046, "learning_rate": 6.663702319669076e-06, "loss": 0.3918, "step": 14477 }, { "epoch": 1.3586711711711712, "grad_norm": 1.0204184117854878, "learning_rate": 6.663187454548261e-06, "loss": 0.4213, "step": 14478 }, { "epoch": 1.358765015015015, "grad_norm": 0.9850995176606719, "learning_rate": 6.662672569597066e-06, "loss": 0.4438, "step": 14479 }, { "epoch": 1.3588588588588588, "grad_norm": 0.9525762608875139, "learning_rate": 6.6621576648216305e-06, "loss": 0.4082, "step": 14480 }, { "epoch": 1.3589527027027026, "grad_norm": 0.9568701296481814, "learning_rate": 6.661642740228094e-06, "loss": 0.4312, "step": 14481 }, { "epoch": 1.3590465465465464, "grad_norm": 1.0918334225632311, "learning_rate": 6.661127795822595e-06, "loss": 0.3722, "step": 14482 }, { "epoch": 1.3591403903903903, "grad_norm": 1.1089875609627933, "learning_rate": 6.660612831611274e-06, "loss": 0.397, "step": 14483 }, { "epoch": 1.3592342342342343, "grad_norm": 0.9473974364669222, "learning_rate": 6.6600978476002706e-06, "loss": 0.385, "step": 14484 }, { "epoch": 1.359328078078078, "grad_norm": 0.9763013702676574, "learning_rate": 6.659582843795724e-06, "loss": 0.3961, "step": 14485 }, { "epoch": 1.359421921921922, "grad_norm": 0.8791274244235779, "learning_rate": 6.6590678202037776e-06, "loss": 0.3909, "step": 14486 }, { "epoch": 1.3595157657657657, "grad_norm": 1.1345014156498245, "learning_rate": 6.65855277683057e-06, "loss": 0.4451, "step": 14487 }, { "epoch": 1.3596096096096097, "grad_norm": 0.9592701771086891, "learning_rate": 6.658037713682241e-06, "loss": 0.4086, "step": 14488 }, { "epoch": 1.3597034534534536, "grad_norm": 0.964163096666104, "learning_rate": 6.6575226307649355e-06, "loss": 0.3761, "step": 14489 }, { "epoch": 1.3597972972972974, "grad_norm": 1.025106820452859, "learning_rate": 6.6570075280847915e-06, "loss": 0.3679, "step": 14490 }, { "epoch": 1.3598911411411412, "grad_norm": 1.0162841926145514, "learning_rate": 6.656492405647952e-06, "loss": 0.411, "step": 14491 }, { "epoch": 1.359984984984985, "grad_norm": 0.9296506325475198, "learning_rate": 6.655977263460558e-06, "loss": 0.4124, "step": 14492 }, { "epoch": 1.3600788288288288, "grad_norm": 0.85062484211347, "learning_rate": 6.655462101528752e-06, "loss": 0.3805, "step": 14493 }, { "epoch": 1.3601726726726726, "grad_norm": 1.0875480930549606, "learning_rate": 6.654946919858677e-06, "loss": 0.4334, "step": 14494 }, { "epoch": 1.3602665165165164, "grad_norm": 0.8892083892496755, "learning_rate": 6.654431718456476e-06, "loss": 0.3821, "step": 14495 }, { "epoch": 1.3603603603603602, "grad_norm": 0.9294128673092036, "learning_rate": 6.65391649732829e-06, "loss": 0.3558, "step": 14496 }, { "epoch": 1.3604542042042043, "grad_norm": 0.9793249679027592, "learning_rate": 6.653401256480262e-06, "loss": 0.3851, "step": 14497 }, { "epoch": 1.360548048048048, "grad_norm": 0.9048742967708199, "learning_rate": 6.6528859959185365e-06, "loss": 0.3985, "step": 14498 }, { "epoch": 1.3606418918918919, "grad_norm": 1.0601007464857959, "learning_rate": 6.652370715649258e-06, "loss": 0.4335, "step": 14499 }, { "epoch": 1.3607357357357357, "grad_norm": 0.8508373218357481, "learning_rate": 6.651855415678568e-06, "loss": 0.3844, "step": 14500 }, { "epoch": 1.3608295795795795, "grad_norm": 0.9011791773314651, "learning_rate": 6.651340096012613e-06, "loss": 0.3758, "step": 14501 }, { "epoch": 1.3609234234234235, "grad_norm": 1.0581915567158875, "learning_rate": 6.650824756657534e-06, "loss": 0.435, "step": 14502 }, { "epoch": 1.3610172672672673, "grad_norm": 0.8825995978729106, "learning_rate": 6.650309397619477e-06, "loss": 0.3804, "step": 14503 }, { "epoch": 1.3611111111111112, "grad_norm": 0.9816231119243666, "learning_rate": 6.6497940189045875e-06, "loss": 0.4111, "step": 14504 }, { "epoch": 1.361204954954955, "grad_norm": 0.9090706912858356, "learning_rate": 6.649278620519011e-06, "loss": 0.4228, "step": 14505 }, { "epoch": 1.3612987987987988, "grad_norm": 1.1316859165291828, "learning_rate": 6.6487632024688885e-06, "loss": 0.4165, "step": 14506 }, { "epoch": 1.3613926426426426, "grad_norm": 0.8518280313285046, "learning_rate": 6.648247764760371e-06, "loss": 0.3898, "step": 14507 }, { "epoch": 1.3614864864864864, "grad_norm": 1.0240148433389333, "learning_rate": 6.647732307399602e-06, "loss": 0.425, "step": 14508 }, { "epoch": 1.3615803303303302, "grad_norm": 1.1450067522115672, "learning_rate": 6.647216830392723e-06, "loss": 0.4382, "step": 14509 }, { "epoch": 1.3616741741741742, "grad_norm": 1.0535837877732495, "learning_rate": 6.646701333745886e-06, "loss": 0.4162, "step": 14510 }, { "epoch": 1.361768018018018, "grad_norm": 1.6471196576423748, "learning_rate": 6.646185817465236e-06, "loss": 0.422, "step": 14511 }, { "epoch": 1.3618618618618619, "grad_norm": 0.9490633500141136, "learning_rate": 6.645670281556917e-06, "loss": 0.4292, "step": 14512 }, { "epoch": 1.3619557057057057, "grad_norm": 1.071351009399128, "learning_rate": 6.645154726027079e-06, "loss": 0.3609, "step": 14513 }, { "epoch": 1.3620495495495495, "grad_norm": 0.9671753021446142, "learning_rate": 6.644639150881868e-06, "loss": 0.398, "step": 14514 }, { "epoch": 1.3621433933933935, "grad_norm": 0.9489304884438752, "learning_rate": 6.6441235561274284e-06, "loss": 0.3683, "step": 14515 }, { "epoch": 1.3622372372372373, "grad_norm": 0.9356104037056057, "learning_rate": 6.6436079417699116e-06, "loss": 0.3882, "step": 14516 }, { "epoch": 1.3623310810810811, "grad_norm": 0.9041652127707943, "learning_rate": 6.643092307815464e-06, "loss": 0.3687, "step": 14517 }, { "epoch": 1.362424924924925, "grad_norm": 0.8833748457486311, "learning_rate": 6.642576654270233e-06, "loss": 0.4207, "step": 14518 }, { "epoch": 1.3625187687687688, "grad_norm": 1.2703104221224542, "learning_rate": 6.642060981140368e-06, "loss": 0.3757, "step": 14519 }, { "epoch": 1.3626126126126126, "grad_norm": 0.9016280996708861, "learning_rate": 6.641545288432016e-06, "loss": 0.3882, "step": 14520 }, { "epoch": 1.3627064564564564, "grad_norm": 0.9538634308817786, "learning_rate": 6.641029576151327e-06, "loss": 0.4055, "step": 14521 }, { "epoch": 1.3628003003003002, "grad_norm": 0.9178489556052292, "learning_rate": 6.640513844304448e-06, "loss": 0.3872, "step": 14522 }, { "epoch": 1.3628941441441442, "grad_norm": 1.6483185785118686, "learning_rate": 6.639998092897531e-06, "loss": 0.426, "step": 14523 }, { "epoch": 1.362987987987988, "grad_norm": 0.8630638464307482, "learning_rate": 6.639482321936722e-06, "loss": 0.4274, "step": 14524 }, { "epoch": 1.3630818318318318, "grad_norm": 1.1527876911576875, "learning_rate": 6.638966531428174e-06, "loss": 0.4401, "step": 14525 }, { "epoch": 1.3631756756756757, "grad_norm": 0.9781915687865619, "learning_rate": 6.6384507213780345e-06, "loss": 0.4593, "step": 14526 }, { "epoch": 1.3632695195195195, "grad_norm": 0.8855157622563756, "learning_rate": 6.637934891792454e-06, "loss": 0.3695, "step": 14527 }, { "epoch": 1.3633633633633635, "grad_norm": 1.2998082009511973, "learning_rate": 6.637419042677582e-06, "loss": 0.4164, "step": 14528 }, { "epoch": 1.3634572072072073, "grad_norm": 0.8485030627390341, "learning_rate": 6.6369031740395715e-06, "loss": 0.4033, "step": 14529 }, { "epoch": 1.3635510510510511, "grad_norm": 0.8822963443828084, "learning_rate": 6.636387285884571e-06, "loss": 0.43, "step": 14530 }, { "epoch": 1.363644894894895, "grad_norm": 1.7544891175510957, "learning_rate": 6.635871378218732e-06, "loss": 0.3908, "step": 14531 }, { "epoch": 1.3637387387387387, "grad_norm": 0.9858986850239844, "learning_rate": 6.635355451048208e-06, "loss": 0.3635, "step": 14532 }, { "epoch": 1.3638325825825826, "grad_norm": 0.9171666936283659, "learning_rate": 6.634839504379147e-06, "loss": 0.4221, "step": 14533 }, { "epoch": 1.3639264264264264, "grad_norm": 0.9927079390530772, "learning_rate": 6.634323538217702e-06, "loss": 0.4058, "step": 14534 }, { "epoch": 1.3640202702702702, "grad_norm": 1.0665480341947513, "learning_rate": 6.633807552570026e-06, "loss": 0.3868, "step": 14535 }, { "epoch": 1.364114114114114, "grad_norm": 0.8706382100548785, "learning_rate": 6.6332915474422685e-06, "loss": 0.4165, "step": 14536 }, { "epoch": 1.364207957957958, "grad_norm": 0.9619885149121269, "learning_rate": 6.6327755228405855e-06, "loss": 0.382, "step": 14537 }, { "epoch": 1.3643018018018018, "grad_norm": 1.1222912838638568, "learning_rate": 6.632259478771128e-06, "loss": 0.4187, "step": 14538 }, { "epoch": 1.3643956456456456, "grad_norm": 1.0271725121913466, "learning_rate": 6.6317434152400475e-06, "loss": 0.3597, "step": 14539 }, { "epoch": 1.3644894894894894, "grad_norm": 0.8604502366084057, "learning_rate": 6.631227332253498e-06, "loss": 0.4431, "step": 14540 }, { "epoch": 1.3645833333333333, "grad_norm": 0.9905898237092021, "learning_rate": 6.630711229817633e-06, "loss": 0.3911, "step": 14541 }, { "epoch": 1.3646771771771773, "grad_norm": 1.2800491312414963, "learning_rate": 6.630195107938606e-06, "loss": 0.4156, "step": 14542 }, { "epoch": 1.364771021021021, "grad_norm": 1.2888211866490478, "learning_rate": 6.629678966622571e-06, "loss": 0.4612, "step": 14543 }, { "epoch": 1.364864864864865, "grad_norm": 1.0211004643508426, "learning_rate": 6.629162805875684e-06, "loss": 0.4021, "step": 14544 }, { "epoch": 1.3649587087087087, "grad_norm": 0.8820132372069318, "learning_rate": 6.628646625704096e-06, "loss": 0.3858, "step": 14545 }, { "epoch": 1.3650525525525525, "grad_norm": 1.0076334847259818, "learning_rate": 6.628130426113961e-06, "loss": 0.428, "step": 14546 }, { "epoch": 1.3651463963963963, "grad_norm": 0.9351028608877328, "learning_rate": 6.627614207111437e-06, "loss": 0.4136, "step": 14547 }, { "epoch": 1.3652402402402402, "grad_norm": 0.9608338324545534, "learning_rate": 6.627097968702676e-06, "loss": 0.4455, "step": 14548 }, { "epoch": 1.365334084084084, "grad_norm": 0.9673436295824713, "learning_rate": 6.626581710893834e-06, "loss": 0.3992, "step": 14549 }, { "epoch": 1.365427927927928, "grad_norm": 1.1744871888198631, "learning_rate": 6.626065433691069e-06, "loss": 0.4608, "step": 14550 }, { "epoch": 1.3655217717717718, "grad_norm": 1.2353874951012007, "learning_rate": 6.6255491371005345e-06, "loss": 0.4027, "step": 14551 }, { "epoch": 1.3656156156156156, "grad_norm": 1.0584731915348986, "learning_rate": 6.625032821128384e-06, "loss": 0.4215, "step": 14552 }, { "epoch": 1.3657094594594594, "grad_norm": 0.7788673157250167, "learning_rate": 6.6245164857807774e-06, "loss": 0.35, "step": 14553 }, { "epoch": 1.3658033033033032, "grad_norm": 1.2091513188201637, "learning_rate": 6.624000131063869e-06, "loss": 0.424, "step": 14554 }, { "epoch": 1.3658971471471473, "grad_norm": 0.8160342186967031, "learning_rate": 6.623483756983815e-06, "loss": 0.3642, "step": 14555 }, { "epoch": 1.365990990990991, "grad_norm": 1.0026431194971341, "learning_rate": 6.622967363546775e-06, "loss": 0.4266, "step": 14556 }, { "epoch": 1.366084834834835, "grad_norm": 0.9547126649247956, "learning_rate": 6.622450950758904e-06, "loss": 0.374, "step": 14557 }, { "epoch": 1.3661786786786787, "grad_norm": 1.0034322320244615, "learning_rate": 6.621934518626358e-06, "loss": 0.3938, "step": 14558 }, { "epoch": 1.3662725225225225, "grad_norm": 0.9999570622822656, "learning_rate": 6.621418067155298e-06, "loss": 0.4235, "step": 14559 }, { "epoch": 1.3663663663663663, "grad_norm": 1.028686971881122, "learning_rate": 6.620901596351876e-06, "loss": 0.4012, "step": 14560 }, { "epoch": 1.3664602102102101, "grad_norm": 0.9471427193081964, "learning_rate": 6.620385106222256e-06, "loss": 0.2977, "step": 14561 }, { "epoch": 1.366554054054054, "grad_norm": 0.9736159068230194, "learning_rate": 6.619868596772594e-06, "loss": 0.4301, "step": 14562 }, { "epoch": 1.366647897897898, "grad_norm": 0.9433802190682296, "learning_rate": 6.619352068009047e-06, "loss": 0.4049, "step": 14563 }, { "epoch": 1.3667417417417418, "grad_norm": 1.1312346758874687, "learning_rate": 6.618835519937775e-06, "loss": 0.4184, "step": 14564 }, { "epoch": 1.3668355855855856, "grad_norm": 0.8772709453873093, "learning_rate": 6.618318952564937e-06, "loss": 0.3964, "step": 14565 }, { "epoch": 1.3669294294294294, "grad_norm": 1.0430169341892381, "learning_rate": 6.617802365896691e-06, "loss": 0.3978, "step": 14566 }, { "epoch": 1.3670232732732732, "grad_norm": 1.1258629353455851, "learning_rate": 6.617285759939196e-06, "loss": 0.418, "step": 14567 }, { "epoch": 1.3671171171171173, "grad_norm": 1.0962378859727047, "learning_rate": 6.616769134698615e-06, "loss": 0.396, "step": 14568 }, { "epoch": 1.367210960960961, "grad_norm": 0.9429524407877404, "learning_rate": 6.616252490181104e-06, "loss": 0.4034, "step": 14569 }, { "epoch": 1.3673048048048049, "grad_norm": 0.8412376342248794, "learning_rate": 6.6157358263928236e-06, "loss": 0.3655, "step": 14570 }, { "epoch": 1.3673986486486487, "grad_norm": 1.0388127345766394, "learning_rate": 6.6152191433399375e-06, "loss": 0.477, "step": 14571 }, { "epoch": 1.3674924924924925, "grad_norm": 1.484474512025356, "learning_rate": 6.614702441028601e-06, "loss": 0.4224, "step": 14572 }, { "epoch": 1.3675863363363363, "grad_norm": 0.9542983081284423, "learning_rate": 6.6141857194649776e-06, "loss": 0.3569, "step": 14573 }, { "epoch": 1.3676801801801801, "grad_norm": 1.0780794401506695, "learning_rate": 6.613668978655228e-06, "loss": 0.4201, "step": 14574 }, { "epoch": 1.367774024024024, "grad_norm": 2.0406913116348426, "learning_rate": 6.613152218605514e-06, "loss": 0.4211, "step": 14575 }, { "epoch": 1.3678678678678677, "grad_norm": 0.8219019118626422, "learning_rate": 6.612635439321995e-06, "loss": 0.3866, "step": 14576 }, { "epoch": 1.3679617117117118, "grad_norm": 0.8350351290995363, "learning_rate": 6.6121186408108355e-06, "loss": 0.384, "step": 14577 }, { "epoch": 1.3680555555555556, "grad_norm": 1.1143306612517387, "learning_rate": 6.611601823078193e-06, "loss": 0.3867, "step": 14578 }, { "epoch": 1.3681493993993994, "grad_norm": 1.1984115292698305, "learning_rate": 6.611084986130234e-06, "loss": 0.3765, "step": 14579 }, { "epoch": 1.3682432432432432, "grad_norm": 0.9782335819031321, "learning_rate": 6.6105681299731195e-06, "loss": 0.3922, "step": 14580 }, { "epoch": 1.368337087087087, "grad_norm": 0.9534996424241436, "learning_rate": 6.610051254613012e-06, "loss": 0.4589, "step": 14581 }, { "epoch": 1.368430930930931, "grad_norm": 0.9520451861188466, "learning_rate": 6.609534360056072e-06, "loss": 0.4186, "step": 14582 }, { "epoch": 1.3685247747747749, "grad_norm": 1.0623047979865459, "learning_rate": 6.609017446308466e-06, "loss": 0.4046, "step": 14583 }, { "epoch": 1.3686186186186187, "grad_norm": 1.6817012049939495, "learning_rate": 6.6085005133763545e-06, "loss": 0.4129, "step": 14584 }, { "epoch": 1.3687124624624625, "grad_norm": 0.8942851157627638, "learning_rate": 6.607983561265902e-06, "loss": 0.4157, "step": 14585 }, { "epoch": 1.3688063063063063, "grad_norm": 1.099179968198088, "learning_rate": 6.607466589983274e-06, "loss": 0.4276, "step": 14586 }, { "epoch": 1.36890015015015, "grad_norm": 1.0767060053875341, "learning_rate": 6.606949599534632e-06, "loss": 0.3482, "step": 14587 }, { "epoch": 1.368993993993994, "grad_norm": 1.072389749092333, "learning_rate": 6.606432589926139e-06, "loss": 0.4238, "step": 14588 }, { "epoch": 1.3690878378378377, "grad_norm": 1.1245889225140908, "learning_rate": 6.605915561163965e-06, "loss": 0.3397, "step": 14589 }, { "epoch": 1.3691816816816818, "grad_norm": 0.8829024866995829, "learning_rate": 6.605398513254266e-06, "loss": 0.3763, "step": 14590 }, { "epoch": 1.3692755255255256, "grad_norm": 1.0695870515428803, "learning_rate": 6.604881446203214e-06, "loss": 0.3888, "step": 14591 }, { "epoch": 1.3693693693693694, "grad_norm": 1.3864384512531764, "learning_rate": 6.604364360016973e-06, "loss": 0.4515, "step": 14592 }, { "epoch": 1.3694632132132132, "grad_norm": 1.2366675865774384, "learning_rate": 6.603847254701705e-06, "loss": 0.4205, "step": 14593 }, { "epoch": 1.369557057057057, "grad_norm": 0.9122257462542108, "learning_rate": 6.603330130263578e-06, "loss": 0.412, "step": 14594 }, { "epoch": 1.369650900900901, "grad_norm": 0.9785099074828651, "learning_rate": 6.602812986708757e-06, "loss": 0.4147, "step": 14595 }, { "epoch": 1.3697447447447448, "grad_norm": 0.9234902501236512, "learning_rate": 6.6022958240434074e-06, "loss": 0.4288, "step": 14596 }, { "epoch": 1.3698385885885886, "grad_norm": 0.9746459200454896, "learning_rate": 6.601778642273696e-06, "loss": 0.4335, "step": 14597 }, { "epoch": 1.3699324324324325, "grad_norm": 0.9281748589370502, "learning_rate": 6.601261441405791e-06, "loss": 0.3683, "step": 14598 }, { "epoch": 1.3700262762762763, "grad_norm": 1.1941686861273686, "learning_rate": 6.600744221445857e-06, "loss": 0.431, "step": 14599 }, { "epoch": 1.37012012012012, "grad_norm": 0.9834025572471211, "learning_rate": 6.600226982400061e-06, "loss": 0.3944, "step": 14600 }, { "epoch": 1.3702139639639639, "grad_norm": 0.9068118866257193, "learning_rate": 6.59970972427457e-06, "loss": 0.3782, "step": 14601 }, { "epoch": 1.3703078078078077, "grad_norm": 1.2333896560245614, "learning_rate": 6.599192447075552e-06, "loss": 0.3608, "step": 14602 }, { "epoch": 1.3704016516516517, "grad_norm": 1.87021208740032, "learning_rate": 6.598675150809173e-06, "loss": 0.386, "step": 14603 }, { "epoch": 1.3704954954954955, "grad_norm": 0.9073749036656057, "learning_rate": 6.598157835481604e-06, "loss": 0.3645, "step": 14604 }, { "epoch": 1.3705893393393394, "grad_norm": 1.4021249750131495, "learning_rate": 6.597640501099011e-06, "loss": 0.4252, "step": 14605 }, { "epoch": 1.3706831831831832, "grad_norm": 1.1092357073550507, "learning_rate": 6.59712314766756e-06, "loss": 0.4181, "step": 14606 }, { "epoch": 1.370777027027027, "grad_norm": 0.9467779867344529, "learning_rate": 6.596605775193424e-06, "loss": 0.4006, "step": 14607 }, { "epoch": 1.370870870870871, "grad_norm": 0.9738383773024606, "learning_rate": 6.596088383682768e-06, "loss": 0.3932, "step": 14608 }, { "epoch": 1.3709647147147148, "grad_norm": 1.4430528150660273, "learning_rate": 6.5955709731417625e-06, "loss": 0.4251, "step": 14609 }, { "epoch": 1.3710585585585586, "grad_norm": 0.9562978604397898, "learning_rate": 6.595053543576577e-06, "loss": 0.4671, "step": 14610 }, { "epoch": 1.3711524024024024, "grad_norm": 0.9700474585331113, "learning_rate": 6.59453609499338e-06, "loss": 0.4112, "step": 14611 }, { "epoch": 1.3712462462462462, "grad_norm": 1.1739395769408585, "learning_rate": 6.594018627398342e-06, "loss": 0.3798, "step": 14612 }, { "epoch": 1.37134009009009, "grad_norm": 1.042457277147527, "learning_rate": 6.593501140797633e-06, "loss": 0.4336, "step": 14613 }, { "epoch": 1.3714339339339339, "grad_norm": 1.0556387540733345, "learning_rate": 6.592983635197421e-06, "loss": 0.4153, "step": 14614 }, { "epoch": 1.3715277777777777, "grad_norm": 1.1065552920551094, "learning_rate": 6.592466110603877e-06, "loss": 0.398, "step": 14615 }, { "epoch": 1.3716216216216215, "grad_norm": 0.8615390465314335, "learning_rate": 6.591948567023174e-06, "loss": 0.3643, "step": 14616 }, { "epoch": 1.3717154654654655, "grad_norm": 0.898970915074023, "learning_rate": 6.591431004461479e-06, "loss": 0.4343, "step": 14617 }, { "epoch": 1.3718093093093093, "grad_norm": 0.9968566898805301, "learning_rate": 6.590913422924967e-06, "loss": 0.4034, "step": 14618 }, { "epoch": 1.3719031531531531, "grad_norm": 1.1299337335227617, "learning_rate": 6.590395822419806e-06, "loss": 0.3776, "step": 14619 }, { "epoch": 1.371996996996997, "grad_norm": 0.8779516675720451, "learning_rate": 6.589878202952168e-06, "loss": 0.4104, "step": 14620 }, { "epoch": 1.3720908408408408, "grad_norm": 0.8361357403254907, "learning_rate": 6.5893605645282255e-06, "loss": 0.3396, "step": 14621 }, { "epoch": 1.3721846846846848, "grad_norm": 0.9140366510377608, "learning_rate": 6.588842907154149e-06, "loss": 0.4127, "step": 14622 }, { "epoch": 1.3722785285285286, "grad_norm": 0.9150227713616537, "learning_rate": 6.5883252308361125e-06, "loss": 0.4172, "step": 14623 }, { "epoch": 1.3723723723723724, "grad_norm": 0.9376540889053978, "learning_rate": 6.5878075355802885e-06, "loss": 0.3977, "step": 14624 }, { "epoch": 1.3724662162162162, "grad_norm": 0.963668535638472, "learning_rate": 6.587289821392847e-06, "loss": 0.3741, "step": 14625 }, { "epoch": 1.37256006006006, "grad_norm": 1.2119364318106747, "learning_rate": 6.586772088279963e-06, "loss": 0.4332, "step": 14626 }, { "epoch": 1.3726539039039038, "grad_norm": 0.8701887426847197, "learning_rate": 6.586254336247806e-06, "loss": 0.3905, "step": 14627 }, { "epoch": 1.3727477477477477, "grad_norm": 0.9221205919865829, "learning_rate": 6.585736565302554e-06, "loss": 0.3896, "step": 14628 }, { "epoch": 1.3728415915915915, "grad_norm": 0.9831616943148952, "learning_rate": 6.5852187754503785e-06, "loss": 0.4106, "step": 14629 }, { "epoch": 1.3729354354354355, "grad_norm": 0.9736565231559642, "learning_rate": 6.584700966697453e-06, "loss": 0.3894, "step": 14630 }, { "epoch": 1.3730292792792793, "grad_norm": 1.0605093491937312, "learning_rate": 6.584183139049951e-06, "loss": 0.3997, "step": 14631 }, { "epoch": 1.3731231231231231, "grad_norm": 1.0359717550831713, "learning_rate": 6.5836652925140466e-06, "loss": 0.4392, "step": 14632 }, { "epoch": 1.373216966966967, "grad_norm": 0.948201215945112, "learning_rate": 6.583147427095914e-06, "loss": 0.3903, "step": 14633 }, { "epoch": 1.3733108108108107, "grad_norm": 0.9844584290442238, "learning_rate": 6.5826295428017295e-06, "loss": 0.4247, "step": 14634 }, { "epoch": 1.3734046546546548, "grad_norm": 0.9981818521213099, "learning_rate": 6.582111639637665e-06, "loss": 0.3982, "step": 14635 }, { "epoch": 1.3734984984984986, "grad_norm": 1.2617416193171769, "learning_rate": 6.5815937176099e-06, "loss": 0.4546, "step": 14636 }, { "epoch": 1.3735923423423424, "grad_norm": 1.1380173240066294, "learning_rate": 6.581075776724606e-06, "loss": 0.4066, "step": 14637 }, { "epoch": 1.3736861861861862, "grad_norm": 0.9259932612741347, "learning_rate": 6.580557816987958e-06, "loss": 0.4491, "step": 14638 }, { "epoch": 1.37378003003003, "grad_norm": 1.06904033734801, "learning_rate": 6.580039838406134e-06, "loss": 0.4087, "step": 14639 }, { "epoch": 1.3738738738738738, "grad_norm": 1.3164040638592285, "learning_rate": 6.579521840985309e-06, "loss": 0.4266, "step": 14640 }, { "epoch": 1.3739677177177176, "grad_norm": 0.8768219366839186, "learning_rate": 6.579003824731659e-06, "loss": 0.381, "step": 14641 }, { "epoch": 1.3740615615615615, "grad_norm": 0.8933860636757605, "learning_rate": 6.578485789651361e-06, "loss": 0.3746, "step": 14642 }, { "epoch": 1.3741554054054055, "grad_norm": 1.1378185500312996, "learning_rate": 6.577967735750592e-06, "loss": 0.4174, "step": 14643 }, { "epoch": 1.3742492492492493, "grad_norm": 0.9161176861085687, "learning_rate": 6.577449663035526e-06, "loss": 0.4203, "step": 14644 }, { "epoch": 1.374343093093093, "grad_norm": 0.9493219742846235, "learning_rate": 6.576931571512343e-06, "loss": 0.4358, "step": 14645 }, { "epoch": 1.374436936936937, "grad_norm": 0.9457308966336768, "learning_rate": 6.576413461187219e-06, "loss": 0.3628, "step": 14646 }, { "epoch": 1.3745307807807807, "grad_norm": 1.1471759958206478, "learning_rate": 6.575895332066331e-06, "loss": 0.4283, "step": 14647 }, { "epoch": 1.3746246246246248, "grad_norm": 0.9371412033295853, "learning_rate": 6.57537718415586e-06, "loss": 0.4321, "step": 14648 }, { "epoch": 1.3747184684684686, "grad_norm": 1.0534192445730004, "learning_rate": 6.57485901746198e-06, "loss": 0.4418, "step": 14649 }, { "epoch": 1.3748123123123124, "grad_norm": 0.9473253820897523, "learning_rate": 6.57434083199087e-06, "loss": 0.4103, "step": 14650 }, { "epoch": 1.3749061561561562, "grad_norm": 1.3107182185878716, "learning_rate": 6.573822627748709e-06, "loss": 0.3987, "step": 14651 }, { "epoch": 1.375, "grad_norm": 0.8756701841853783, "learning_rate": 6.573304404741676e-06, "loss": 0.3842, "step": 14652 }, { "epoch": 1.3750938438438438, "grad_norm": 1.0420528708463102, "learning_rate": 6.572786162975949e-06, "loss": 0.4117, "step": 14653 }, { "epoch": 1.3751876876876876, "grad_norm": 0.9810157051770828, "learning_rate": 6.572267902457708e-06, "loss": 0.4003, "step": 14654 }, { "epoch": 1.3752815315315314, "grad_norm": 0.8151964176978024, "learning_rate": 6.571749623193133e-06, "loss": 0.3746, "step": 14655 }, { "epoch": 1.3753753753753752, "grad_norm": 0.9478881055189132, "learning_rate": 6.571231325188401e-06, "loss": 0.426, "step": 14656 }, { "epoch": 1.3754692192192193, "grad_norm": 1.0338580259561112, "learning_rate": 6.570713008449692e-06, "loss": 0.4043, "step": 14657 }, { "epoch": 1.375563063063063, "grad_norm": 1.0362541561048824, "learning_rate": 6.570194672983188e-06, "loss": 0.385, "step": 14658 }, { "epoch": 1.375656906906907, "grad_norm": 1.2222928822611694, "learning_rate": 6.569676318795067e-06, "loss": 0.4119, "step": 14659 }, { "epoch": 1.3757507507507507, "grad_norm": 0.9337959007480672, "learning_rate": 6.569157945891512e-06, "loss": 0.3849, "step": 14660 }, { "epoch": 1.3758445945945945, "grad_norm": 1.591149895599351, "learning_rate": 6.568639554278703e-06, "loss": 0.4092, "step": 14661 }, { "epoch": 1.3759384384384385, "grad_norm": 0.8551731998338394, "learning_rate": 6.568121143962818e-06, "loss": 0.3522, "step": 14662 }, { "epoch": 1.3760322822822824, "grad_norm": 1.6325917800538132, "learning_rate": 6.56760271495004e-06, "loss": 0.3897, "step": 14663 }, { "epoch": 1.3761261261261262, "grad_norm": 0.8877017653254985, "learning_rate": 6.567084267246551e-06, "loss": 0.3728, "step": 14664 }, { "epoch": 1.37621996996997, "grad_norm": 1.0125807642467843, "learning_rate": 6.566565800858531e-06, "loss": 0.4319, "step": 14665 }, { "epoch": 1.3763138138138138, "grad_norm": 1.0559981345458738, "learning_rate": 6.566047315792165e-06, "loss": 0.3738, "step": 14666 }, { "epoch": 1.3764076576576576, "grad_norm": 1.1396313143459607, "learning_rate": 6.565528812053631e-06, "loss": 0.4153, "step": 14667 }, { "epoch": 1.3765015015015014, "grad_norm": 1.5176897494938129, "learning_rate": 6.565010289649112e-06, "loss": 0.407, "step": 14668 }, { "epoch": 1.3765953453453452, "grad_norm": 1.0525063041963325, "learning_rate": 6.564491748584791e-06, "loss": 0.3844, "step": 14669 }, { "epoch": 1.3766891891891893, "grad_norm": 1.0945258215422968, "learning_rate": 6.5639731888668526e-06, "loss": 0.4205, "step": 14670 }, { "epoch": 1.376783033033033, "grad_norm": 0.9322445825629844, "learning_rate": 6.563454610501476e-06, "loss": 0.3851, "step": 14671 }, { "epoch": 1.3768768768768769, "grad_norm": 0.840148497261706, "learning_rate": 6.562936013494846e-06, "loss": 0.3796, "step": 14672 }, { "epoch": 1.3769707207207207, "grad_norm": 0.9553470852736718, "learning_rate": 6.562417397853147e-06, "loss": 0.4484, "step": 14673 }, { "epoch": 1.3770645645645645, "grad_norm": 1.3770694179889889, "learning_rate": 6.561898763582562e-06, "loss": 0.4384, "step": 14674 }, { "epoch": 1.3771584084084085, "grad_norm": 0.9106014280850858, "learning_rate": 6.561380110689273e-06, "loss": 0.3895, "step": 14675 }, { "epoch": 1.3772522522522523, "grad_norm": 0.8274383510327953, "learning_rate": 6.5608614391794665e-06, "loss": 0.3681, "step": 14676 }, { "epoch": 1.3773460960960962, "grad_norm": 0.9389823018810934, "learning_rate": 6.560342749059323e-06, "loss": 0.3885, "step": 14677 }, { "epoch": 1.37743993993994, "grad_norm": 1.0799258149904645, "learning_rate": 6.559824040335031e-06, "loss": 0.4249, "step": 14678 }, { "epoch": 1.3775337837837838, "grad_norm": 1.2759313229963438, "learning_rate": 6.559305313012773e-06, "loss": 0.4045, "step": 14679 }, { "epoch": 1.3776276276276276, "grad_norm": 1.000468609327364, "learning_rate": 6.558786567098735e-06, "loss": 0.4528, "step": 14680 }, { "epoch": 1.3777214714714714, "grad_norm": 1.646134208504028, "learning_rate": 6.558267802599101e-06, "loss": 0.369, "step": 14681 }, { "epoch": 1.3778153153153152, "grad_norm": 0.8746465164263886, "learning_rate": 6.557749019520057e-06, "loss": 0.3529, "step": 14682 }, { "epoch": 1.3779091591591592, "grad_norm": 0.8499383115660596, "learning_rate": 6.5572302178677864e-06, "loss": 0.373, "step": 14683 }, { "epoch": 1.378003003003003, "grad_norm": 1.0894465616278892, "learning_rate": 6.5567113976484766e-06, "loss": 0.3975, "step": 14684 }, { "epoch": 1.3780968468468469, "grad_norm": 1.3951712487083696, "learning_rate": 6.556192558868315e-06, "loss": 0.4252, "step": 14685 }, { "epoch": 1.3781906906906907, "grad_norm": 1.1604509808466648, "learning_rate": 6.555673701533487e-06, "loss": 0.3918, "step": 14686 }, { "epoch": 1.3782845345345345, "grad_norm": 1.4251149829357224, "learning_rate": 6.555154825650176e-06, "loss": 0.4361, "step": 14687 }, { "epoch": 1.3783783783783785, "grad_norm": 1.040176557430659, "learning_rate": 6.554635931224573e-06, "loss": 0.4232, "step": 14688 }, { "epoch": 1.3784722222222223, "grad_norm": 0.8205744548864012, "learning_rate": 6.5541170182628625e-06, "loss": 0.3955, "step": 14689 }, { "epoch": 1.3785660660660661, "grad_norm": 1.199199277164772, "learning_rate": 6.55359808677123e-06, "loss": 0.4311, "step": 14690 }, { "epoch": 1.37865990990991, "grad_norm": 1.1103980981223254, "learning_rate": 6.5530791367558665e-06, "loss": 0.3781, "step": 14691 }, { "epoch": 1.3787537537537538, "grad_norm": 1.025312135151828, "learning_rate": 6.552560168222959e-06, "loss": 0.4119, "step": 14692 }, { "epoch": 1.3788475975975976, "grad_norm": 1.6474336494203572, "learning_rate": 6.552041181178691e-06, "loss": 0.4257, "step": 14693 }, { "epoch": 1.3789414414414414, "grad_norm": 0.9085869032672784, "learning_rate": 6.551522175629254e-06, "loss": 0.404, "step": 14694 }, { "epoch": 1.3790352852852852, "grad_norm": 1.1213143350908663, "learning_rate": 6.551003151580837e-06, "loss": 0.4241, "step": 14695 }, { "epoch": 1.379129129129129, "grad_norm": 1.1175355966620701, "learning_rate": 6.550484109039625e-06, "loss": 0.4204, "step": 14696 }, { "epoch": 1.379222972972973, "grad_norm": 1.0292330891105754, "learning_rate": 6.54996504801181e-06, "loss": 0.3303, "step": 14697 }, { "epoch": 1.3793168168168168, "grad_norm": 0.9424098371069372, "learning_rate": 6.549445968503579e-06, "loss": 0.4269, "step": 14698 }, { "epoch": 1.3794106606606606, "grad_norm": 1.0801310749488722, "learning_rate": 6.5489268705211205e-06, "loss": 0.3936, "step": 14699 }, { "epoch": 1.3795045045045045, "grad_norm": 1.549205690510237, "learning_rate": 6.548407754070626e-06, "loss": 0.4137, "step": 14700 }, { "epoch": 1.3795983483483483, "grad_norm": 0.9881349870621549, "learning_rate": 6.5478886191582824e-06, "loss": 0.4305, "step": 14701 }, { "epoch": 1.3796921921921923, "grad_norm": 1.4742512007162418, "learning_rate": 6.547369465790281e-06, "loss": 0.3959, "step": 14702 }, { "epoch": 1.3797860360360361, "grad_norm": 0.9384337032096334, "learning_rate": 6.5468502939728105e-06, "loss": 0.4161, "step": 14703 }, { "epoch": 1.37987987987988, "grad_norm": 2.635100360212026, "learning_rate": 6.5463311037120635e-06, "loss": 0.3929, "step": 14704 }, { "epoch": 1.3799737237237237, "grad_norm": 1.0473536824137075, "learning_rate": 6.545811895014228e-06, "loss": 0.3925, "step": 14705 }, { "epoch": 1.3800675675675675, "grad_norm": 0.7806288383937089, "learning_rate": 6.5452926678854955e-06, "loss": 0.3833, "step": 14706 }, { "epoch": 1.3801614114114114, "grad_norm": 0.9809886754838786, "learning_rate": 6.5447734223320555e-06, "loss": 0.4814, "step": 14707 }, { "epoch": 1.3802552552552552, "grad_norm": 0.9854778220923462, "learning_rate": 6.544254158360102e-06, "loss": 0.3548, "step": 14708 }, { "epoch": 1.380349099099099, "grad_norm": 0.9982118813511458, "learning_rate": 6.5437348759758236e-06, "loss": 0.4414, "step": 14709 }, { "epoch": 1.380442942942943, "grad_norm": 0.8891706277224893, "learning_rate": 6.5432155751854136e-06, "loss": 0.3623, "step": 14710 }, { "epoch": 1.3805367867867868, "grad_norm": 1.9815052386706913, "learning_rate": 6.54269625599506e-06, "loss": 0.413, "step": 14711 }, { "epoch": 1.3806306306306306, "grad_norm": 1.2378423782416328, "learning_rate": 6.542176918410961e-06, "loss": 0.4295, "step": 14712 }, { "epoch": 1.3807244744744744, "grad_norm": 1.0794213200099476, "learning_rate": 6.541657562439302e-06, "loss": 0.4292, "step": 14713 }, { "epoch": 1.3808183183183182, "grad_norm": 0.84751924688255, "learning_rate": 6.541138188086279e-06, "loss": 0.416, "step": 14714 }, { "epoch": 1.3809121621621623, "grad_norm": 1.0819037696204754, "learning_rate": 6.540618795358084e-06, "loss": 0.3718, "step": 14715 }, { "epoch": 1.381006006006006, "grad_norm": 1.2526430849059167, "learning_rate": 6.540099384260912e-06, "loss": 0.3983, "step": 14716 }, { "epoch": 1.38109984984985, "grad_norm": 1.0110329640540439, "learning_rate": 6.539579954800952e-06, "loss": 0.3897, "step": 14717 }, { "epoch": 1.3811936936936937, "grad_norm": 0.9612883357001424, "learning_rate": 6.539060506984399e-06, "loss": 0.416, "step": 14718 }, { "epoch": 1.3812875375375375, "grad_norm": 1.1964931723933159, "learning_rate": 6.538541040817447e-06, "loss": 0.4357, "step": 14719 }, { "epoch": 1.3813813813813813, "grad_norm": 1.0076853557346739, "learning_rate": 6.538021556306288e-06, "loss": 0.3946, "step": 14720 }, { "epoch": 1.3814752252252251, "grad_norm": 0.9787016052501548, "learning_rate": 6.537502053457118e-06, "loss": 0.4205, "step": 14721 }, { "epoch": 1.381569069069069, "grad_norm": 1.3003007683358119, "learning_rate": 6.53698253227613e-06, "loss": 0.4117, "step": 14722 }, { "epoch": 1.381662912912913, "grad_norm": 0.8195377462522139, "learning_rate": 6.536462992769516e-06, "loss": 0.3588, "step": 14723 }, { "epoch": 1.3817567567567568, "grad_norm": 1.0982054559724619, "learning_rate": 6.535943434943477e-06, "loss": 0.3923, "step": 14724 }, { "epoch": 1.3818506006006006, "grad_norm": 0.8665106280358061, "learning_rate": 6.535423858804201e-06, "loss": 0.3995, "step": 14725 }, { "epoch": 1.3819444444444444, "grad_norm": 0.8666822063287598, "learning_rate": 6.534904264357885e-06, "loss": 0.426, "step": 14726 }, { "epoch": 1.3820382882882882, "grad_norm": 0.9428103385194146, "learning_rate": 6.534384651610728e-06, "loss": 0.4175, "step": 14727 }, { "epoch": 1.3821321321321323, "grad_norm": 1.1289474331963598, "learning_rate": 6.53386502056892e-06, "loss": 0.4, "step": 14728 }, { "epoch": 1.382225975975976, "grad_norm": 0.899925276572581, "learning_rate": 6.5333453712386584e-06, "loss": 0.4332, "step": 14729 }, { "epoch": 1.3823198198198199, "grad_norm": 0.9553651741254958, "learning_rate": 6.532825703626141e-06, "loss": 0.422, "step": 14730 }, { "epoch": 1.3824136636636637, "grad_norm": 0.9919403103674536, "learning_rate": 6.532306017737561e-06, "loss": 0.3843, "step": 14731 }, { "epoch": 1.3825075075075075, "grad_norm": 1.3117154621032203, "learning_rate": 6.531786313579116e-06, "loss": 0.3394, "step": 14732 }, { "epoch": 1.3826013513513513, "grad_norm": 0.9881493506260164, "learning_rate": 6.5312665911570036e-06, "loss": 0.3913, "step": 14733 }, { "epoch": 1.3826951951951951, "grad_norm": 0.8729416420448374, "learning_rate": 6.530746850477419e-06, "loss": 0.3875, "step": 14734 }, { "epoch": 1.382789039039039, "grad_norm": 0.8009478285378594, "learning_rate": 6.530227091546559e-06, "loss": 0.373, "step": 14735 }, { "epoch": 1.3828828828828827, "grad_norm": 0.9035896221241151, "learning_rate": 6.529707314370622e-06, "loss": 0.3669, "step": 14736 }, { "epoch": 1.3829767267267268, "grad_norm": 1.057051068421304, "learning_rate": 6.529187518955804e-06, "loss": 0.3793, "step": 14737 }, { "epoch": 1.3830705705705706, "grad_norm": 1.240424921292594, "learning_rate": 6.528667705308302e-06, "loss": 0.4228, "step": 14738 }, { "epoch": 1.3831644144144144, "grad_norm": 1.7057079013139356, "learning_rate": 6.528147873434318e-06, "loss": 0.4169, "step": 14739 }, { "epoch": 1.3832582582582582, "grad_norm": 0.9512042649258555, "learning_rate": 6.527628023340046e-06, "loss": 0.3995, "step": 14740 }, { "epoch": 1.383352102102102, "grad_norm": 1.0065365676087008, "learning_rate": 6.527108155031685e-06, "loss": 0.3304, "step": 14741 }, { "epoch": 1.383445945945946, "grad_norm": 0.991483616768774, "learning_rate": 6.5265882685154345e-06, "loss": 0.4202, "step": 14742 }, { "epoch": 1.3835397897897899, "grad_norm": 0.8068747556308548, "learning_rate": 6.526068363797491e-06, "loss": 0.3913, "step": 14743 }, { "epoch": 1.3836336336336337, "grad_norm": 1.1067540968611966, "learning_rate": 6.525548440884054e-06, "loss": 0.3941, "step": 14744 }, { "epoch": 1.3837274774774775, "grad_norm": 1.0776073613801975, "learning_rate": 6.525028499781325e-06, "loss": 0.3939, "step": 14745 }, { "epoch": 1.3838213213213213, "grad_norm": 0.9869525916205529, "learning_rate": 6.5245085404955024e-06, "loss": 0.4091, "step": 14746 }, { "epoch": 1.383915165165165, "grad_norm": 1.3598109509275604, "learning_rate": 6.523988563032783e-06, "loss": 0.4352, "step": 14747 }, { "epoch": 1.384009009009009, "grad_norm": 0.8681997080219537, "learning_rate": 6.523468567399371e-06, "loss": 0.4373, "step": 14748 }, { "epoch": 1.3841028528528527, "grad_norm": 3.820087469302669, "learning_rate": 6.522948553601462e-06, "loss": 0.4186, "step": 14749 }, { "epoch": 1.3841966966966968, "grad_norm": 1.0331696004629984, "learning_rate": 6.522428521645259e-06, "loss": 0.4113, "step": 14750 }, { "epoch": 1.3842905405405406, "grad_norm": 1.0488338287963892, "learning_rate": 6.521908471536962e-06, "loss": 0.3641, "step": 14751 }, { "epoch": 1.3843843843843844, "grad_norm": 0.9907513421734506, "learning_rate": 6.521388403282769e-06, "loss": 0.3727, "step": 14752 }, { "epoch": 1.3844782282282282, "grad_norm": 1.9227722627625965, "learning_rate": 6.520868316888886e-06, "loss": 0.4378, "step": 14753 }, { "epoch": 1.384572072072072, "grad_norm": 1.0221900290160526, "learning_rate": 6.52034821236151e-06, "loss": 0.4402, "step": 14754 }, { "epoch": 1.384665915915916, "grad_norm": 0.9512093634528028, "learning_rate": 6.519828089706843e-06, "loss": 0.4104, "step": 14755 }, { "epoch": 1.3847597597597598, "grad_norm": 0.831722471146955, "learning_rate": 6.519307948931087e-06, "loss": 0.3749, "step": 14756 }, { "epoch": 1.3848536036036037, "grad_norm": 1.0148816515423642, "learning_rate": 6.5187877900404426e-06, "loss": 0.412, "step": 14757 }, { "epoch": 1.3849474474474475, "grad_norm": 0.9892523952016291, "learning_rate": 6.5182676130411125e-06, "loss": 0.3912, "step": 14758 }, { "epoch": 1.3850412912912913, "grad_norm": 1.28741130917951, "learning_rate": 6.517747417939301e-06, "loss": 0.4212, "step": 14759 }, { "epoch": 1.385135135135135, "grad_norm": 0.8984026761973402, "learning_rate": 6.517227204741208e-06, "loss": 0.4017, "step": 14760 }, { "epoch": 1.385228978978979, "grad_norm": 1.1100345419333788, "learning_rate": 6.516706973453034e-06, "loss": 0.4448, "step": 14761 }, { "epoch": 1.3853228228228227, "grad_norm": 1.031101295691307, "learning_rate": 6.516186724080986e-06, "loss": 0.4113, "step": 14762 }, { "epoch": 1.3854166666666667, "grad_norm": 0.8860226304693679, "learning_rate": 6.515666456631265e-06, "loss": 0.4163, "step": 14763 }, { "epoch": 1.3855105105105106, "grad_norm": 1.1983615204273304, "learning_rate": 6.515146171110074e-06, "loss": 0.3934, "step": 14764 }, { "epoch": 1.3856043543543544, "grad_norm": 0.9264270995382055, "learning_rate": 6.514625867523618e-06, "loss": 0.3663, "step": 14765 }, { "epoch": 1.3856981981981982, "grad_norm": 0.9117156854637587, "learning_rate": 6.5141055458781e-06, "loss": 0.4345, "step": 14766 }, { "epoch": 1.385792042042042, "grad_norm": 0.9943297129813512, "learning_rate": 6.513585206179722e-06, "loss": 0.3825, "step": 14767 }, { "epoch": 1.385885885885886, "grad_norm": 0.9127221326220663, "learning_rate": 6.51306484843469e-06, "loss": 0.4027, "step": 14768 }, { "epoch": 1.3859797297297298, "grad_norm": 0.9268116956340609, "learning_rate": 6.512544472649207e-06, "loss": 0.4087, "step": 14769 }, { "epoch": 1.3860735735735736, "grad_norm": 2.2941637251783162, "learning_rate": 6.512024078829479e-06, "loss": 0.4256, "step": 14770 }, { "epoch": 1.3861674174174174, "grad_norm": 0.8797154935333105, "learning_rate": 6.51150366698171e-06, "loss": 0.416, "step": 14771 }, { "epoch": 1.3862612612612613, "grad_norm": 0.9376667694277314, "learning_rate": 6.510983237112105e-06, "loss": 0.383, "step": 14772 }, { "epoch": 1.386355105105105, "grad_norm": 0.9233699197285482, "learning_rate": 6.510462789226868e-06, "loss": 0.4151, "step": 14773 }, { "epoch": 1.3864489489489489, "grad_norm": 0.9368230134604663, "learning_rate": 6.509942323332206e-06, "loss": 0.4526, "step": 14774 }, { "epoch": 1.3865427927927927, "grad_norm": 1.0757464532883718, "learning_rate": 6.509421839434325e-06, "loss": 0.4317, "step": 14775 }, { "epoch": 1.3866366366366365, "grad_norm": 0.879309830155327, "learning_rate": 6.5089013375394276e-06, "loss": 0.4255, "step": 14776 }, { "epoch": 1.3867304804804805, "grad_norm": 0.9244658552313582, "learning_rate": 6.5083808176537235e-06, "loss": 0.4018, "step": 14777 }, { "epoch": 1.3868243243243243, "grad_norm": 1.2490610070817059, "learning_rate": 6.507860279783418e-06, "loss": 0.3599, "step": 14778 }, { "epoch": 1.3869181681681682, "grad_norm": 0.9101105570617365, "learning_rate": 6.507339723934715e-06, "loss": 0.3943, "step": 14779 }, { "epoch": 1.387012012012012, "grad_norm": 1.0519681837998467, "learning_rate": 6.506819150113824e-06, "loss": 0.4169, "step": 14780 }, { "epoch": 1.3871058558558558, "grad_norm": 0.8683702392788573, "learning_rate": 6.506298558326951e-06, "loss": 0.4236, "step": 14781 }, { "epoch": 1.3871996996996998, "grad_norm": 0.8220760865836374, "learning_rate": 6.505777948580303e-06, "loss": 0.4241, "step": 14782 }, { "epoch": 1.3872935435435436, "grad_norm": 0.9444202374726317, "learning_rate": 6.505257320880087e-06, "loss": 0.4167, "step": 14783 }, { "epoch": 1.3873873873873874, "grad_norm": 0.8967229632440188, "learning_rate": 6.504736675232512e-06, "loss": 0.4093, "step": 14784 }, { "epoch": 1.3874812312312312, "grad_norm": 0.9507082586266722, "learning_rate": 6.504216011643782e-06, "loss": 0.3798, "step": 14785 }, { "epoch": 1.387575075075075, "grad_norm": 1.01968101709647, "learning_rate": 6.50369533012011e-06, "loss": 0.4273, "step": 14786 }, { "epoch": 1.3876689189189189, "grad_norm": 1.085878812463183, "learning_rate": 6.503174630667701e-06, "loss": 0.426, "step": 14787 }, { "epoch": 1.3877627627627627, "grad_norm": 0.8520400563726792, "learning_rate": 6.502653913292762e-06, "loss": 0.3722, "step": 14788 }, { "epoch": 1.3878566066066065, "grad_norm": 0.86687430721993, "learning_rate": 6.502133178001506e-06, "loss": 0.3967, "step": 14789 }, { "epoch": 1.3879504504504505, "grad_norm": 0.9631711777753813, "learning_rate": 6.5016124248001386e-06, "loss": 0.3806, "step": 14790 }, { "epoch": 1.3880442942942943, "grad_norm": 1.698761806067907, "learning_rate": 6.50109165369487e-06, "loss": 0.4209, "step": 14791 }, { "epoch": 1.3881381381381381, "grad_norm": 0.9295433730858249, "learning_rate": 6.500570864691908e-06, "loss": 0.3754, "step": 14792 }, { "epoch": 1.388231981981982, "grad_norm": 1.1172656154243412, "learning_rate": 6.500050057797463e-06, "loss": 0.3733, "step": 14793 }, { "epoch": 1.3883258258258258, "grad_norm": 1.0508843820926648, "learning_rate": 6.499529233017745e-06, "loss": 0.4098, "step": 14794 }, { "epoch": 1.3884196696696698, "grad_norm": 1.1147207654490234, "learning_rate": 6.4990083903589625e-06, "loss": 0.457, "step": 14795 }, { "epoch": 1.3885135135135136, "grad_norm": 1.0611967648762715, "learning_rate": 6.498487529827329e-06, "loss": 0.4619, "step": 14796 }, { "epoch": 1.3886073573573574, "grad_norm": 0.9898622266768098, "learning_rate": 6.497966651429051e-06, "loss": 0.388, "step": 14797 }, { "epoch": 1.3887012012012012, "grad_norm": 0.867549946062093, "learning_rate": 6.49744575517034e-06, "loss": 0.3657, "step": 14798 }, { "epoch": 1.388795045045045, "grad_norm": 1.0743607588769983, "learning_rate": 6.496924841057406e-06, "loss": 0.4339, "step": 14799 }, { "epoch": 1.3888888888888888, "grad_norm": 0.8514379006567462, "learning_rate": 6.4964039090964625e-06, "loss": 0.3967, "step": 14800 }, { "epoch": 1.3889827327327327, "grad_norm": 0.9381503515788285, "learning_rate": 6.495882959293718e-06, "loss": 0.3867, "step": 14801 }, { "epoch": 1.3890765765765765, "grad_norm": 0.8103934251423819, "learning_rate": 6.495361991655385e-06, "loss": 0.3738, "step": 14802 }, { "epoch": 1.3891704204204205, "grad_norm": 0.8846490560992617, "learning_rate": 6.494841006187676e-06, "loss": 0.3968, "step": 14803 }, { "epoch": 1.3892642642642643, "grad_norm": 0.9952570941467919, "learning_rate": 6.494320002896801e-06, "loss": 0.4513, "step": 14804 }, { "epoch": 1.3893581081081081, "grad_norm": 1.0121700084681515, "learning_rate": 6.493798981788975e-06, "loss": 0.4217, "step": 14805 }, { "epoch": 1.389451951951952, "grad_norm": 1.113705959448245, "learning_rate": 6.493277942870404e-06, "loss": 0.443, "step": 14806 }, { "epoch": 1.3895457957957957, "grad_norm": 1.0281073852223315, "learning_rate": 6.492756886147307e-06, "loss": 0.3857, "step": 14807 }, { "epoch": 1.3896396396396398, "grad_norm": 0.9282426366628963, "learning_rate": 6.492235811625893e-06, "loss": 0.4192, "step": 14808 }, { "epoch": 1.3897334834834836, "grad_norm": 1.1110000114349265, "learning_rate": 6.491714719312376e-06, "loss": 0.4215, "step": 14809 }, { "epoch": 1.3898273273273274, "grad_norm": 1.3800204395916116, "learning_rate": 6.491193609212969e-06, "loss": 0.396, "step": 14810 }, { "epoch": 1.3899211711711712, "grad_norm": 1.0205933278352362, "learning_rate": 6.490672481333885e-06, "loss": 0.3978, "step": 14811 }, { "epoch": 1.390015015015015, "grad_norm": 0.8698073184488406, "learning_rate": 6.4901513356813375e-06, "loss": 0.393, "step": 14812 }, { "epoch": 1.3901088588588588, "grad_norm": 0.9620545289613049, "learning_rate": 6.4896301722615395e-06, "loss": 0.3997, "step": 14813 }, { "epoch": 1.3902027027027026, "grad_norm": 1.3000331649782877, "learning_rate": 6.489108991080707e-06, "loss": 0.4482, "step": 14814 }, { "epoch": 1.3902965465465464, "grad_norm": 0.9769230710691311, "learning_rate": 6.4885877921450525e-06, "loss": 0.3712, "step": 14815 }, { "epoch": 1.3903903903903903, "grad_norm": 1.0242459827158892, "learning_rate": 6.4880665754607895e-06, "loss": 0.449, "step": 14816 }, { "epoch": 1.3904842342342343, "grad_norm": 2.005003001676607, "learning_rate": 6.487545341034135e-06, "loss": 0.4108, "step": 14817 }, { "epoch": 1.390578078078078, "grad_norm": 0.9970820697255064, "learning_rate": 6.487024088871301e-06, "loss": 0.3435, "step": 14818 }, { "epoch": 1.390671921921922, "grad_norm": 1.2019060193205262, "learning_rate": 6.486502818978504e-06, "loss": 0.4037, "step": 14819 }, { "epoch": 1.3907657657657657, "grad_norm": 0.939137671504853, "learning_rate": 6.48598153136196e-06, "loss": 0.4172, "step": 14820 }, { "epoch": 1.3908596096096097, "grad_norm": 1.0134529890039365, "learning_rate": 6.485460226027883e-06, "loss": 0.4157, "step": 14821 }, { "epoch": 1.3909534534534536, "grad_norm": 0.9170745766004665, "learning_rate": 6.484938902982488e-06, "loss": 0.4261, "step": 14822 }, { "epoch": 1.3910472972972974, "grad_norm": 1.3209174042369964, "learning_rate": 6.484417562231993e-06, "loss": 0.3764, "step": 14823 }, { "epoch": 1.3911411411411412, "grad_norm": 0.9531130903500238, "learning_rate": 6.483896203782612e-06, "loss": 0.3309, "step": 14824 }, { "epoch": 1.391234984984985, "grad_norm": 0.9600040882619766, "learning_rate": 6.483374827640561e-06, "loss": 0.3853, "step": 14825 }, { "epoch": 1.3913288288288288, "grad_norm": 1.139835622372163, "learning_rate": 6.4828534338120574e-06, "loss": 0.3769, "step": 14826 }, { "epoch": 1.3914226726726726, "grad_norm": 0.9297485376466296, "learning_rate": 6.482332022303319e-06, "loss": 0.3791, "step": 14827 }, { "epoch": 1.3915165165165164, "grad_norm": 0.8993153178218527, "learning_rate": 6.481810593120561e-06, "loss": 0.36, "step": 14828 }, { "epoch": 1.3916103603603602, "grad_norm": 0.9339132103555048, "learning_rate": 6.481289146270002e-06, "loss": 0.3748, "step": 14829 }, { "epoch": 1.3917042042042043, "grad_norm": 0.953950386054716, "learning_rate": 6.4807676817578554e-06, "loss": 0.4397, "step": 14830 }, { "epoch": 1.391798048048048, "grad_norm": 0.9141998650402209, "learning_rate": 6.480246199590343e-06, "loss": 0.3732, "step": 14831 }, { "epoch": 1.3918918918918919, "grad_norm": 1.0111063049867046, "learning_rate": 6.4797246997736815e-06, "loss": 0.4047, "step": 14832 }, { "epoch": 1.3919857357357357, "grad_norm": 0.895630907771306, "learning_rate": 6.479203182314089e-06, "loss": 0.3604, "step": 14833 }, { "epoch": 1.3920795795795795, "grad_norm": 1.9983381637835225, "learning_rate": 6.478681647217782e-06, "loss": 0.3577, "step": 14834 }, { "epoch": 1.3921734234234235, "grad_norm": 0.9877372794700829, "learning_rate": 6.47816009449098e-06, "loss": 0.394, "step": 14835 }, { "epoch": 1.3922672672672673, "grad_norm": 0.8226166016540677, "learning_rate": 6.4776385241398985e-06, "loss": 0.3816, "step": 14836 }, { "epoch": 1.3923611111111112, "grad_norm": 1.003362767800332, "learning_rate": 6.477116936170761e-06, "loss": 0.4223, "step": 14837 }, { "epoch": 1.392454954954955, "grad_norm": 0.8792058399768877, "learning_rate": 6.476595330589785e-06, "loss": 0.3715, "step": 14838 }, { "epoch": 1.3925487987987988, "grad_norm": 0.9878606040147027, "learning_rate": 6.476073707403189e-06, "loss": 0.3674, "step": 14839 }, { "epoch": 1.3926426426426426, "grad_norm": 1.2683454804832146, "learning_rate": 6.475552066617192e-06, "loss": 0.4206, "step": 14840 }, { "epoch": 1.3927364864864864, "grad_norm": 1.4660828283553966, "learning_rate": 6.475030408238015e-06, "loss": 0.3953, "step": 14841 }, { "epoch": 1.3928303303303302, "grad_norm": 0.9830589592907256, "learning_rate": 6.474508732271875e-06, "loss": 0.4513, "step": 14842 }, { "epoch": 1.3929241741741742, "grad_norm": 0.9914416188410611, "learning_rate": 6.473987038724994e-06, "loss": 0.4067, "step": 14843 }, { "epoch": 1.393018018018018, "grad_norm": 0.912472728117837, "learning_rate": 6.473465327603593e-06, "loss": 0.3965, "step": 14844 }, { "epoch": 1.3931118618618619, "grad_norm": 0.9600486243160533, "learning_rate": 6.472943598913892e-06, "loss": 0.432, "step": 14845 }, { "epoch": 1.3932057057057057, "grad_norm": 1.1012612039361895, "learning_rate": 6.47242185266211e-06, "loss": 0.3825, "step": 14846 }, { "epoch": 1.3932995495495495, "grad_norm": 0.9137293015586406, "learning_rate": 6.47190008885447e-06, "loss": 0.4179, "step": 14847 }, { "epoch": 1.3933933933933935, "grad_norm": 0.9098873255763201, "learning_rate": 6.471378307497191e-06, "loss": 0.3756, "step": 14848 }, { "epoch": 1.3934872372372373, "grad_norm": 1.0056608942565761, "learning_rate": 6.4708565085964945e-06, "loss": 0.4727, "step": 14849 }, { "epoch": 1.3935810810810811, "grad_norm": 1.0025549280126553, "learning_rate": 6.470334692158604e-06, "loss": 0.4289, "step": 14850 }, { "epoch": 1.393674924924925, "grad_norm": 0.9912358711648879, "learning_rate": 6.4698128581897414e-06, "loss": 0.4292, "step": 14851 }, { "epoch": 1.3937687687687688, "grad_norm": 0.9128667345926853, "learning_rate": 6.4692910066961245e-06, "loss": 0.3851, "step": 14852 }, { "epoch": 1.3938626126126126, "grad_norm": 0.905854562667116, "learning_rate": 6.46876913768398e-06, "loss": 0.4162, "step": 14853 }, { "epoch": 1.3939564564564564, "grad_norm": 2.828059121434727, "learning_rate": 6.4682472511595265e-06, "loss": 0.412, "step": 14854 }, { "epoch": 1.3940503003003002, "grad_norm": 1.3459182986620268, "learning_rate": 6.467725347128988e-06, "loss": 0.4422, "step": 14855 }, { "epoch": 1.3941441441441442, "grad_norm": 1.0193685811977204, "learning_rate": 6.4672034255985894e-06, "loss": 0.4011, "step": 14856 }, { "epoch": 1.394237987987988, "grad_norm": 2.0381942563765683, "learning_rate": 6.4666814865745495e-06, "loss": 0.3864, "step": 14857 }, { "epoch": 1.3943318318318318, "grad_norm": 1.0730517537250022, "learning_rate": 6.466159530063097e-06, "loss": 0.4117, "step": 14858 }, { "epoch": 1.3944256756756757, "grad_norm": 0.8394914052292937, "learning_rate": 6.465637556070448e-06, "loss": 0.3677, "step": 14859 }, { "epoch": 1.3945195195195195, "grad_norm": 0.8982273665089339, "learning_rate": 6.465115564602832e-06, "loss": 0.4054, "step": 14860 }, { "epoch": 1.3946133633633635, "grad_norm": 1.0490400182361763, "learning_rate": 6.46459355566647e-06, "loss": 0.3571, "step": 14861 }, { "epoch": 1.3947072072072073, "grad_norm": 1.1191263520153867, "learning_rate": 6.464071529267587e-06, "loss": 0.3857, "step": 14862 }, { "epoch": 1.3948010510510511, "grad_norm": 0.9240358286116287, "learning_rate": 6.463549485412406e-06, "loss": 0.4235, "step": 14863 }, { "epoch": 1.394894894894895, "grad_norm": 0.9114580806874872, "learning_rate": 6.463027424107153e-06, "loss": 0.4125, "step": 14864 }, { "epoch": 1.3949887387387387, "grad_norm": 0.9467560909367823, "learning_rate": 6.462505345358053e-06, "loss": 0.3776, "step": 14865 }, { "epoch": 1.3950825825825826, "grad_norm": 0.9747483225694262, "learning_rate": 6.461983249171328e-06, "loss": 0.4212, "step": 14866 }, { "epoch": 1.3951764264264264, "grad_norm": 0.8842403054114272, "learning_rate": 6.461461135553204e-06, "loss": 0.3834, "step": 14867 }, { "epoch": 1.3952702702702702, "grad_norm": 0.9305351043533576, "learning_rate": 6.4609390045099095e-06, "loss": 0.3788, "step": 14868 }, { "epoch": 1.395364114114114, "grad_norm": 0.9245880037982525, "learning_rate": 6.460416856047665e-06, "loss": 0.4096, "step": 14869 }, { "epoch": 1.395457957957958, "grad_norm": 0.9481114324462364, "learning_rate": 6.4598946901727e-06, "loss": 0.4336, "step": 14870 }, { "epoch": 1.3955518018018018, "grad_norm": 1.0929329017996872, "learning_rate": 6.459372506891238e-06, "loss": 0.363, "step": 14871 }, { "epoch": 1.3956456456456456, "grad_norm": 0.8909372428228175, "learning_rate": 6.458850306209505e-06, "loss": 0.3864, "step": 14872 }, { "epoch": 1.3957394894894894, "grad_norm": 0.9202163151097723, "learning_rate": 6.4583280881337295e-06, "loss": 0.3687, "step": 14873 }, { "epoch": 1.3958333333333333, "grad_norm": 1.024634310915948, "learning_rate": 6.457805852670136e-06, "loss": 0.4579, "step": 14874 }, { "epoch": 1.3959271771771773, "grad_norm": 0.887098734651467, "learning_rate": 6.457283599824952e-06, "loss": 0.3787, "step": 14875 }, { "epoch": 1.396021021021021, "grad_norm": 1.2085233916691265, "learning_rate": 6.456761329604404e-06, "loss": 0.3487, "step": 14876 }, { "epoch": 1.396114864864865, "grad_norm": 1.2384912055736836, "learning_rate": 6.456239042014721e-06, "loss": 0.426, "step": 14877 }, { "epoch": 1.3962087087087087, "grad_norm": 1.1266967976707627, "learning_rate": 6.4557167370621255e-06, "loss": 0.4398, "step": 14878 }, { "epoch": 1.3963025525525525, "grad_norm": 0.8920786185343007, "learning_rate": 6.455194414752848e-06, "loss": 0.3594, "step": 14879 }, { "epoch": 1.3963963963963963, "grad_norm": 1.0423351921953754, "learning_rate": 6.454672075093118e-06, "loss": 0.4227, "step": 14880 }, { "epoch": 1.3964902402402402, "grad_norm": 2.1000664529202155, "learning_rate": 6.45414971808916e-06, "loss": 0.3925, "step": 14881 }, { "epoch": 1.396584084084084, "grad_norm": 2.306416035216986, "learning_rate": 6.453627343747206e-06, "loss": 0.3691, "step": 14882 }, { "epoch": 1.396677927927928, "grad_norm": 1.1941750608315762, "learning_rate": 6.453104952073481e-06, "loss": 0.4144, "step": 14883 }, { "epoch": 1.3967717717717718, "grad_norm": 0.8430085745353373, "learning_rate": 6.452582543074214e-06, "loss": 0.3949, "step": 14884 }, { "epoch": 1.3968656156156156, "grad_norm": 1.00376990648027, "learning_rate": 6.452060116755634e-06, "loss": 0.4268, "step": 14885 }, { "epoch": 1.3969594594594594, "grad_norm": 0.9374517836033467, "learning_rate": 6.451537673123972e-06, "loss": 0.4103, "step": 14886 }, { "epoch": 1.3970533033033032, "grad_norm": 1.0435978494590903, "learning_rate": 6.451015212185454e-06, "loss": 0.4101, "step": 14887 }, { "epoch": 1.3971471471471473, "grad_norm": 1.1988500467543044, "learning_rate": 6.450492733946313e-06, "loss": 0.4336, "step": 14888 }, { "epoch": 1.397240990990991, "grad_norm": 0.8830497901151149, "learning_rate": 6.449970238412775e-06, "loss": 0.4069, "step": 14889 }, { "epoch": 1.397334834834835, "grad_norm": 0.9974773692209241, "learning_rate": 6.449447725591071e-06, "loss": 0.4527, "step": 14890 }, { "epoch": 1.3974286786786787, "grad_norm": 3.3362742253568, "learning_rate": 6.448925195487431e-06, "loss": 0.3717, "step": 14891 }, { "epoch": 1.3975225225225225, "grad_norm": 1.438736264071346, "learning_rate": 6.448402648108087e-06, "loss": 0.3858, "step": 14892 }, { "epoch": 1.3976163663663663, "grad_norm": 1.0549108779054444, "learning_rate": 6.447880083459265e-06, "loss": 0.4003, "step": 14893 }, { "epoch": 1.3977102102102101, "grad_norm": 1.2139611072564889, "learning_rate": 6.4473575015472e-06, "loss": 0.3884, "step": 14894 }, { "epoch": 1.397804054054054, "grad_norm": 1.0900282513001558, "learning_rate": 6.446834902378122e-06, "loss": 0.4205, "step": 14895 }, { "epoch": 1.397897897897898, "grad_norm": 0.9973560987355334, "learning_rate": 6.446312285958259e-06, "loss": 0.4209, "step": 14896 }, { "epoch": 1.3979917417417418, "grad_norm": 1.3323837037242316, "learning_rate": 6.445789652293845e-06, "loss": 0.4022, "step": 14897 }, { "epoch": 1.3980855855855856, "grad_norm": 1.3304306794301723, "learning_rate": 6.445267001391112e-06, "loss": 0.4064, "step": 14898 }, { "epoch": 1.3981794294294294, "grad_norm": 0.9693210379779569, "learning_rate": 6.444744333256289e-06, "loss": 0.3989, "step": 14899 }, { "epoch": 1.3982732732732732, "grad_norm": 0.9609598451596637, "learning_rate": 6.444221647895612e-06, "loss": 0.3861, "step": 14900 }, { "epoch": 1.3983671171171173, "grad_norm": 0.9711882977773555, "learning_rate": 6.4436989453153086e-06, "loss": 0.4113, "step": 14901 }, { "epoch": 1.398460960960961, "grad_norm": 1.1135419688433148, "learning_rate": 6.443176225521612e-06, "loss": 0.4255, "step": 14902 }, { "epoch": 1.3985548048048049, "grad_norm": 0.8917351248960906, "learning_rate": 6.442653488520756e-06, "loss": 0.4415, "step": 14903 }, { "epoch": 1.3986486486486487, "grad_norm": 0.9741024872092795, "learning_rate": 6.4421307343189736e-06, "loss": 0.4081, "step": 14904 }, { "epoch": 1.3987424924924925, "grad_norm": 0.9130659695232622, "learning_rate": 6.4416079629224944e-06, "loss": 0.3882, "step": 14905 }, { "epoch": 1.3988363363363363, "grad_norm": 1.8209681367628623, "learning_rate": 6.441085174337556e-06, "loss": 0.3793, "step": 14906 }, { "epoch": 1.3989301801801801, "grad_norm": 1.0127265734779605, "learning_rate": 6.440562368570389e-06, "loss": 0.4229, "step": 14907 }, { "epoch": 1.399024024024024, "grad_norm": 0.8331041689382377, "learning_rate": 6.440039545627226e-06, "loss": 0.3847, "step": 14908 }, { "epoch": 1.3991178678678677, "grad_norm": 0.7976909477043249, "learning_rate": 6.439516705514303e-06, "loss": 0.3916, "step": 14909 }, { "epoch": 1.3992117117117118, "grad_norm": 1.1800774874126376, "learning_rate": 6.438993848237853e-06, "loss": 0.4431, "step": 14910 }, { "epoch": 1.3993055555555556, "grad_norm": 0.930178319760216, "learning_rate": 6.4384709738041095e-06, "loss": 0.4031, "step": 14911 }, { "epoch": 1.3993993993993994, "grad_norm": 0.9167409715577283, "learning_rate": 6.437948082219308e-06, "loss": 0.3545, "step": 14912 }, { "epoch": 1.3994932432432432, "grad_norm": 0.9971634783617153, "learning_rate": 6.4374251734896834e-06, "loss": 0.3668, "step": 14913 }, { "epoch": 1.399587087087087, "grad_norm": 0.9295808826173376, "learning_rate": 6.436902247621467e-06, "loss": 0.3843, "step": 14914 }, { "epoch": 1.399680930930931, "grad_norm": 0.9685917080946164, "learning_rate": 6.4363793046208965e-06, "loss": 0.4331, "step": 14915 }, { "epoch": 1.3997747747747749, "grad_norm": 1.132706535895141, "learning_rate": 6.435856344494207e-06, "loss": 0.388, "step": 14916 }, { "epoch": 1.3998686186186187, "grad_norm": 2.769824936229751, "learning_rate": 6.435333367247632e-06, "loss": 0.3794, "step": 14917 }, { "epoch": 1.3999624624624625, "grad_norm": 0.9569859856399928, "learning_rate": 6.43481037288741e-06, "loss": 0.3744, "step": 14918 }, { "epoch": 1.4000563063063063, "grad_norm": 1.06337799574672, "learning_rate": 6.434287361419776e-06, "loss": 0.403, "step": 14919 }, { "epoch": 1.40015015015015, "grad_norm": 1.052194926361431, "learning_rate": 6.433764332850962e-06, "loss": 0.4116, "step": 14920 }, { "epoch": 1.400243993993994, "grad_norm": 1.1078413934935054, "learning_rate": 6.433241287187209e-06, "loss": 0.3952, "step": 14921 }, { "epoch": 1.4003378378378377, "grad_norm": 1.1203658854565888, "learning_rate": 6.432718224434751e-06, "loss": 0.3873, "step": 14922 }, { "epoch": 1.4004316816816818, "grad_norm": 1.1001852976228883, "learning_rate": 6.432195144599824e-06, "loss": 0.4007, "step": 14923 }, { "epoch": 1.4005255255255256, "grad_norm": 1.238563552422686, "learning_rate": 6.431672047688667e-06, "loss": 0.3559, "step": 14924 }, { "epoch": 1.4006193693693694, "grad_norm": 0.9049750262586057, "learning_rate": 6.431148933707516e-06, "loss": 0.3748, "step": 14925 }, { "epoch": 1.4007132132132132, "grad_norm": 1.1014296582071919, "learning_rate": 6.430625802662608e-06, "loss": 0.4087, "step": 14926 }, { "epoch": 1.400807057057057, "grad_norm": 0.9607301849133677, "learning_rate": 6.430102654560179e-06, "loss": 0.3938, "step": 14927 }, { "epoch": 1.400900900900901, "grad_norm": 1.1226750454913006, "learning_rate": 6.429579489406468e-06, "loss": 0.3831, "step": 14928 }, { "epoch": 1.4009947447447448, "grad_norm": 0.8100111710557725, "learning_rate": 6.429056307207713e-06, "loss": 0.3677, "step": 14929 }, { "epoch": 1.4010885885885886, "grad_norm": 0.93111496720973, "learning_rate": 6.4285331079701495e-06, "loss": 0.4045, "step": 14930 }, { "epoch": 1.4011824324324325, "grad_norm": 0.9243741870981674, "learning_rate": 6.42800989170002e-06, "loss": 0.4081, "step": 14931 }, { "epoch": 1.4012762762762763, "grad_norm": 1.2324590507237572, "learning_rate": 6.427486658403561e-06, "loss": 0.4101, "step": 14932 }, { "epoch": 1.40137012012012, "grad_norm": 1.0408750499350574, "learning_rate": 6.426963408087009e-06, "loss": 0.415, "step": 14933 }, { "epoch": 1.4014639639639639, "grad_norm": 1.0872551887526536, "learning_rate": 6.426440140756605e-06, "loss": 0.4364, "step": 14934 }, { "epoch": 1.4015578078078077, "grad_norm": 1.8249622169601971, "learning_rate": 6.425916856418586e-06, "loss": 0.4508, "step": 14935 }, { "epoch": 1.4016516516516517, "grad_norm": 0.9734815121625638, "learning_rate": 6.425393555079193e-06, "loss": 0.3795, "step": 14936 }, { "epoch": 1.4017454954954955, "grad_norm": 0.9917867533710522, "learning_rate": 6.424870236744667e-06, "loss": 0.3747, "step": 14937 }, { "epoch": 1.4018393393393394, "grad_norm": 1.0420106467942345, "learning_rate": 6.424346901421243e-06, "loss": 0.4413, "step": 14938 }, { "epoch": 1.4019331831831832, "grad_norm": 1.116014787584518, "learning_rate": 6.423823549115164e-06, "loss": 0.4367, "step": 14939 }, { "epoch": 1.402027027027027, "grad_norm": 1.0233022570958066, "learning_rate": 6.42330017983267e-06, "loss": 0.4269, "step": 14940 }, { "epoch": 1.402120870870871, "grad_norm": 1.0096589382831447, "learning_rate": 6.422776793579999e-06, "loss": 0.4219, "step": 14941 }, { "epoch": 1.4022147147147148, "grad_norm": 0.8358088434999147, "learning_rate": 6.422253390363393e-06, "loss": 0.3919, "step": 14942 }, { "epoch": 1.4023085585585586, "grad_norm": 0.922154470947597, "learning_rate": 6.421729970189094e-06, "loss": 0.3834, "step": 14943 }, { "epoch": 1.4024024024024024, "grad_norm": 1.0093215318936613, "learning_rate": 6.4212065330633415e-06, "loss": 0.4147, "step": 14944 }, { "epoch": 1.4024962462462462, "grad_norm": 0.9520328939491988, "learning_rate": 6.420683078992374e-06, "loss": 0.3931, "step": 14945 }, { "epoch": 1.40259009009009, "grad_norm": 0.9908722052288886, "learning_rate": 6.420159607982437e-06, "loss": 0.3928, "step": 14946 }, { "epoch": 1.4026839339339339, "grad_norm": 0.9341744928623927, "learning_rate": 6.419636120039767e-06, "loss": 0.3806, "step": 14947 }, { "epoch": 1.4027777777777777, "grad_norm": 1.084585416397635, "learning_rate": 6.419112615170611e-06, "loss": 0.4372, "step": 14948 }, { "epoch": 1.4028716216216215, "grad_norm": 0.9026529464195276, "learning_rate": 6.418589093381208e-06, "loss": 0.4154, "step": 14949 }, { "epoch": 1.4029654654654655, "grad_norm": 0.9966633246098404, "learning_rate": 6.418065554677801e-06, "loss": 0.4096, "step": 14950 }, { "epoch": 1.4030593093093093, "grad_norm": 0.8329986393320116, "learning_rate": 6.417541999066629e-06, "loss": 0.3655, "step": 14951 }, { "epoch": 1.4031531531531531, "grad_norm": 0.9178711599524191, "learning_rate": 6.417018426553938e-06, "loss": 0.3884, "step": 14952 }, { "epoch": 1.403246996996997, "grad_norm": 0.91555880370991, "learning_rate": 6.41649483714597e-06, "loss": 0.4124, "step": 14953 }, { "epoch": 1.4033408408408408, "grad_norm": 0.9233062580684649, "learning_rate": 6.415971230848966e-06, "loss": 0.4036, "step": 14954 }, { "epoch": 1.4034346846846848, "grad_norm": 0.8885641006321146, "learning_rate": 6.415447607669173e-06, "loss": 0.3869, "step": 14955 }, { "epoch": 1.4035285285285286, "grad_norm": 1.0807526718805418, "learning_rate": 6.4149239676128305e-06, "loss": 0.3874, "step": 14956 }, { "epoch": 1.4036223723723724, "grad_norm": 0.9635742013405623, "learning_rate": 6.414400310686182e-06, "loss": 0.4147, "step": 14957 }, { "epoch": 1.4037162162162162, "grad_norm": 0.8820354207540659, "learning_rate": 6.413876636895473e-06, "loss": 0.4047, "step": 14958 }, { "epoch": 1.40381006006006, "grad_norm": 0.9356173405288304, "learning_rate": 6.413352946246945e-06, "loss": 0.3985, "step": 14959 }, { "epoch": 1.4039039039039038, "grad_norm": 0.8204112029603547, "learning_rate": 6.412829238746845e-06, "loss": 0.3788, "step": 14960 }, { "epoch": 1.4039977477477477, "grad_norm": 0.8753506262775048, "learning_rate": 6.4123055144014156e-06, "loss": 0.3566, "step": 14961 }, { "epoch": 1.4040915915915915, "grad_norm": 0.9091486562825716, "learning_rate": 6.411781773216902e-06, "loss": 0.4404, "step": 14962 }, { "epoch": 1.4041854354354355, "grad_norm": 0.9297576741015681, "learning_rate": 6.411258015199546e-06, "loss": 0.3932, "step": 14963 }, { "epoch": 1.4042792792792793, "grad_norm": 1.1457780581611592, "learning_rate": 6.410734240355597e-06, "loss": 0.3563, "step": 14964 }, { "epoch": 1.4043731231231231, "grad_norm": 1.0665953090343734, "learning_rate": 6.4102104486912965e-06, "loss": 0.4146, "step": 14965 }, { "epoch": 1.404466966966967, "grad_norm": 0.9625956160997541, "learning_rate": 6.4096866402128895e-06, "loss": 0.4232, "step": 14966 }, { "epoch": 1.4045608108108107, "grad_norm": 1.1298017721382232, "learning_rate": 6.409162814926625e-06, "loss": 0.3936, "step": 14967 }, { "epoch": 1.4046546546546548, "grad_norm": 0.929414282811137, "learning_rate": 6.408638972838746e-06, "loss": 0.3607, "step": 14968 }, { "epoch": 1.4047484984984986, "grad_norm": 1.0093757520834519, "learning_rate": 6.408115113955498e-06, "loss": 0.3988, "step": 14969 }, { "epoch": 1.4048423423423424, "grad_norm": 1.385432131963138, "learning_rate": 6.4075912382831284e-06, "loss": 0.3808, "step": 14970 }, { "epoch": 1.4049361861861862, "grad_norm": 0.9756372747253962, "learning_rate": 6.407067345827882e-06, "loss": 0.407, "step": 14971 }, { "epoch": 1.40503003003003, "grad_norm": 2.8095443332998635, "learning_rate": 6.406543436596005e-06, "loss": 0.3992, "step": 14972 }, { "epoch": 1.4051238738738738, "grad_norm": 1.1713547924531413, "learning_rate": 6.406019510593747e-06, "loss": 0.3831, "step": 14973 }, { "epoch": 1.4052177177177176, "grad_norm": 0.8572246413187621, "learning_rate": 6.4054955678273534e-06, "loss": 0.4046, "step": 14974 }, { "epoch": 1.4053115615615615, "grad_norm": 0.9288724419855857, "learning_rate": 6.404971608303069e-06, "loss": 0.37, "step": 14975 }, { "epoch": 1.4054054054054055, "grad_norm": 1.096006605187237, "learning_rate": 6.404447632027144e-06, "loss": 0.4374, "step": 14976 }, { "epoch": 1.4054992492492493, "grad_norm": 0.8852438038675464, "learning_rate": 6.403923639005824e-06, "loss": 0.3828, "step": 14977 }, { "epoch": 1.405593093093093, "grad_norm": 0.8922738692749397, "learning_rate": 6.403399629245356e-06, "loss": 0.3691, "step": 14978 }, { "epoch": 1.405686936936937, "grad_norm": 1.1428377495740505, "learning_rate": 6.402875602751991e-06, "loss": 0.3947, "step": 14979 }, { "epoch": 1.4057807807807807, "grad_norm": 0.8914935030560754, "learning_rate": 6.402351559531974e-06, "loss": 0.4185, "step": 14980 }, { "epoch": 1.4058746246246248, "grad_norm": 1.4347141903772773, "learning_rate": 6.401827499591555e-06, "loss": 0.4273, "step": 14981 }, { "epoch": 1.4059684684684686, "grad_norm": 3.3593656819678626, "learning_rate": 6.401303422936982e-06, "loss": 0.4359, "step": 14982 }, { "epoch": 1.4060623123123124, "grad_norm": 0.9287074888393655, "learning_rate": 6.400779329574502e-06, "loss": 0.3887, "step": 14983 }, { "epoch": 1.4061561561561562, "grad_norm": 0.860979407179075, "learning_rate": 6.400255219510366e-06, "loss": 0.3612, "step": 14984 }, { "epoch": 1.40625, "grad_norm": 0.8669710789892372, "learning_rate": 6.399731092750822e-06, "loss": 0.4135, "step": 14985 }, { "epoch": 1.4063438438438438, "grad_norm": 0.9401496885856347, "learning_rate": 6.39920694930212e-06, "loss": 0.3585, "step": 14986 }, { "epoch": 1.4064376876876876, "grad_norm": 0.8835906361976893, "learning_rate": 6.39868278917051e-06, "loss": 0.4393, "step": 14987 }, { "epoch": 1.4065315315315314, "grad_norm": 0.9294137620235967, "learning_rate": 6.398158612362239e-06, "loss": 0.3864, "step": 14988 }, { "epoch": 1.4066253753753752, "grad_norm": 0.8519942486542562, "learning_rate": 6.397634418883557e-06, "loss": 0.3753, "step": 14989 }, { "epoch": 1.4067192192192193, "grad_norm": 0.8222034059070973, "learning_rate": 6.397110208740717e-06, "loss": 0.4044, "step": 14990 }, { "epoch": 1.406813063063063, "grad_norm": 0.828069933705741, "learning_rate": 6.396585981939967e-06, "loss": 0.3943, "step": 14991 }, { "epoch": 1.406906906906907, "grad_norm": 1.1172815268247918, "learning_rate": 6.396061738487558e-06, "loss": 0.4236, "step": 14992 }, { "epoch": 1.4070007507507507, "grad_norm": 0.9992031912812239, "learning_rate": 6.395537478389742e-06, "loss": 0.4301, "step": 14993 }, { "epoch": 1.4070945945945945, "grad_norm": 1.2112657418543347, "learning_rate": 6.3950132016527665e-06, "loss": 0.4095, "step": 14994 }, { "epoch": 1.4071884384384385, "grad_norm": 0.921166957764944, "learning_rate": 6.394488908282886e-06, "loss": 0.4034, "step": 14995 }, { "epoch": 1.4072822822822824, "grad_norm": 0.9864627794059612, "learning_rate": 6.393964598286348e-06, "loss": 0.4103, "step": 14996 }, { "epoch": 1.4073761261261262, "grad_norm": 0.8803042658284811, "learning_rate": 6.393440271669407e-06, "loss": 0.3807, "step": 14997 }, { "epoch": 1.40746996996997, "grad_norm": 1.1108854078219887, "learning_rate": 6.392915928438314e-06, "loss": 0.3968, "step": 14998 }, { "epoch": 1.4075638138138138, "grad_norm": 0.9023310523424547, "learning_rate": 6.392391568599321e-06, "loss": 0.4407, "step": 14999 }, { "epoch": 1.4076576576576576, "grad_norm": 0.941748073867862, "learning_rate": 6.391867192158679e-06, "loss": 0.3595, "step": 15000 }, { "epoch": 1.4077515015015014, "grad_norm": 1.0896820809556296, "learning_rate": 6.391342799122639e-06, "loss": 0.4315, "step": 15001 }, { "epoch": 1.4078453453453452, "grad_norm": 0.9066577590104504, "learning_rate": 6.390818389497458e-06, "loss": 0.4357, "step": 15002 }, { "epoch": 1.4079391891891893, "grad_norm": 0.922712696591666, "learning_rate": 6.3902939632893836e-06, "loss": 0.3826, "step": 15003 }, { "epoch": 1.408033033033033, "grad_norm": 0.8982091477901368, "learning_rate": 6.3897695205046705e-06, "loss": 0.3638, "step": 15004 }, { "epoch": 1.4081268768768769, "grad_norm": 1.5419076256023283, "learning_rate": 6.389245061149574e-06, "loss": 0.35, "step": 15005 }, { "epoch": 1.4082207207207207, "grad_norm": 0.937839377796999, "learning_rate": 6.388720585230344e-06, "loss": 0.3989, "step": 15006 }, { "epoch": 1.4083145645645645, "grad_norm": 1.4638030702113507, "learning_rate": 6.388196092753234e-06, "loss": 0.4303, "step": 15007 }, { "epoch": 1.4084084084084085, "grad_norm": 0.9044282748206944, "learning_rate": 6.387671583724498e-06, "loss": 0.4197, "step": 15008 }, { "epoch": 1.4085022522522523, "grad_norm": 0.9939446484790843, "learning_rate": 6.387147058150393e-06, "loss": 0.4187, "step": 15009 }, { "epoch": 1.4085960960960962, "grad_norm": 0.9135665280908036, "learning_rate": 6.3866225160371686e-06, "loss": 0.3938, "step": 15010 }, { "epoch": 1.40868993993994, "grad_norm": 0.9412893120392368, "learning_rate": 6.386097957391081e-06, "loss": 0.4392, "step": 15011 }, { "epoch": 1.4087837837837838, "grad_norm": 1.002877495164334, "learning_rate": 6.385573382218385e-06, "loss": 0.3998, "step": 15012 }, { "epoch": 1.4088776276276276, "grad_norm": 0.9219236793095386, "learning_rate": 6.385048790525333e-06, "loss": 0.4141, "step": 15013 }, { "epoch": 1.4089714714714714, "grad_norm": 0.8981915064727515, "learning_rate": 6.384524182318182e-06, "loss": 0.4026, "step": 15014 }, { "epoch": 1.4090653153153152, "grad_norm": 0.8929578276109004, "learning_rate": 6.383999557603188e-06, "loss": 0.4425, "step": 15015 }, { "epoch": 1.4091591591591592, "grad_norm": 1.0533198583219696, "learning_rate": 6.383474916386602e-06, "loss": 0.3817, "step": 15016 }, { "epoch": 1.409253003003003, "grad_norm": 0.8547432816028406, "learning_rate": 6.382950258674682e-06, "loss": 0.3604, "step": 15017 }, { "epoch": 1.4093468468468469, "grad_norm": 0.9726704512478173, "learning_rate": 6.382425584473686e-06, "loss": 0.382, "step": 15018 }, { "epoch": 1.4094406906906907, "grad_norm": 0.9074725195236096, "learning_rate": 6.381900893789864e-06, "loss": 0.4557, "step": 15019 }, { "epoch": 1.4095345345345345, "grad_norm": 3.4921784806026377, "learning_rate": 6.381376186629475e-06, "loss": 0.3636, "step": 15020 }, { "epoch": 1.4096283783783785, "grad_norm": 1.0333421970905539, "learning_rate": 6.380851462998777e-06, "loss": 0.4708, "step": 15021 }, { "epoch": 1.4097222222222223, "grad_norm": 0.9898050369290818, "learning_rate": 6.380326722904024e-06, "loss": 0.409, "step": 15022 }, { "epoch": 1.4098160660660661, "grad_norm": 1.1801810348016673, "learning_rate": 6.379801966351472e-06, "loss": 0.4144, "step": 15023 }, { "epoch": 1.40990990990991, "grad_norm": 1.253162189657872, "learning_rate": 6.37927719334738e-06, "loss": 0.4124, "step": 15024 }, { "epoch": 1.4100037537537538, "grad_norm": 0.8615521119167746, "learning_rate": 6.378752403898003e-06, "loss": 0.3547, "step": 15025 }, { "epoch": 1.4100975975975976, "grad_norm": 0.8557683736684518, "learning_rate": 6.378227598009598e-06, "loss": 0.3998, "step": 15026 }, { "epoch": 1.4101914414414414, "grad_norm": 0.9926071140091859, "learning_rate": 6.377702775688425e-06, "loss": 0.4461, "step": 15027 }, { "epoch": 1.4102852852852852, "grad_norm": 0.9041654794489714, "learning_rate": 6.377177936940737e-06, "loss": 0.469, "step": 15028 }, { "epoch": 1.410379129129129, "grad_norm": 1.2351195196834601, "learning_rate": 6.376653081772797e-06, "loss": 0.4154, "step": 15029 }, { "epoch": 1.410472972972973, "grad_norm": 0.9258074558332786, "learning_rate": 6.3761282101908605e-06, "loss": 0.4198, "step": 15030 }, { "epoch": 1.4105668168168168, "grad_norm": 0.8656125946977866, "learning_rate": 6.3756033222011834e-06, "loss": 0.3556, "step": 15031 }, { "epoch": 1.4106606606606606, "grad_norm": 0.9435834895432497, "learning_rate": 6.375078417810027e-06, "loss": 0.4082, "step": 15032 }, { "epoch": 1.4107545045045045, "grad_norm": 2.0915515940771576, "learning_rate": 6.374553497023649e-06, "loss": 0.4126, "step": 15033 }, { "epoch": 1.4108483483483483, "grad_norm": 0.8327051832805739, "learning_rate": 6.374028559848306e-06, "loss": 0.3982, "step": 15034 }, { "epoch": 1.4109421921921923, "grad_norm": 1.0921845280730673, "learning_rate": 6.373503606290261e-06, "loss": 0.383, "step": 15035 }, { "epoch": 1.4110360360360361, "grad_norm": 1.002329557544535, "learning_rate": 6.3729786363557714e-06, "loss": 0.4375, "step": 15036 }, { "epoch": 1.41112987987988, "grad_norm": 1.1072773547270225, "learning_rate": 6.372453650051094e-06, "loss": 0.4003, "step": 15037 }, { "epoch": 1.4112237237237237, "grad_norm": 1.025148079027583, "learning_rate": 6.371928647382491e-06, "loss": 0.413, "step": 15038 }, { "epoch": 1.4113175675675675, "grad_norm": 0.8593369840712227, "learning_rate": 6.371403628356221e-06, "loss": 0.3793, "step": 15039 }, { "epoch": 1.4114114114114114, "grad_norm": 0.8980471339354149, "learning_rate": 6.370878592978543e-06, "loss": 0.3818, "step": 15040 }, { "epoch": 1.4115052552552552, "grad_norm": 0.9496980834673224, "learning_rate": 6.370353541255719e-06, "loss": 0.3959, "step": 15041 }, { "epoch": 1.411599099099099, "grad_norm": 0.9229856075746817, "learning_rate": 6.369828473194011e-06, "loss": 0.4247, "step": 15042 }, { "epoch": 1.411692942942943, "grad_norm": 1.000478473391268, "learning_rate": 6.369303388799675e-06, "loss": 0.423, "step": 15043 }, { "epoch": 1.4117867867867868, "grad_norm": 0.8639265252674382, "learning_rate": 6.368778288078971e-06, "loss": 0.383, "step": 15044 }, { "epoch": 1.4118806306306306, "grad_norm": 0.8371455772742215, "learning_rate": 6.368253171038166e-06, "loss": 0.3902, "step": 15045 }, { "epoch": 1.4119744744744744, "grad_norm": 0.8843112264546412, "learning_rate": 6.367728037683516e-06, "loss": 0.4139, "step": 15046 }, { "epoch": 1.4120683183183182, "grad_norm": 1.2404617578232016, "learning_rate": 6.367202888021282e-06, "loss": 0.4178, "step": 15047 }, { "epoch": 1.4121621621621623, "grad_norm": 0.8856712402643883, "learning_rate": 6.36667772205773e-06, "loss": 0.4165, "step": 15048 }, { "epoch": 1.412256006006006, "grad_norm": 2.628510938658227, "learning_rate": 6.366152539799117e-06, "loss": 0.3749, "step": 15049 }, { "epoch": 1.41234984984985, "grad_norm": 1.2387986089042793, "learning_rate": 6.365627341251708e-06, "loss": 0.4814, "step": 15050 }, { "epoch": 1.4124436936936937, "grad_norm": 1.3226169108105694, "learning_rate": 6.365102126421763e-06, "loss": 0.3939, "step": 15051 }, { "epoch": 1.4125375375375375, "grad_norm": 0.980608894404151, "learning_rate": 6.364576895315543e-06, "loss": 0.4515, "step": 15052 }, { "epoch": 1.4126313813813813, "grad_norm": 0.9887823031879096, "learning_rate": 6.364051647939313e-06, "loss": 0.393, "step": 15053 }, { "epoch": 1.4127252252252251, "grad_norm": 0.9806284146447187, "learning_rate": 6.363526384299336e-06, "loss": 0.3678, "step": 15054 }, { "epoch": 1.412819069069069, "grad_norm": 1.1214075003147936, "learning_rate": 6.363001104401871e-06, "loss": 0.3861, "step": 15055 }, { "epoch": 1.412912912912913, "grad_norm": 1.1381695000222651, "learning_rate": 6.362475808253186e-06, "loss": 0.3958, "step": 15056 }, { "epoch": 1.4130067567567568, "grad_norm": 0.9734071828904421, "learning_rate": 6.36195049585954e-06, "loss": 0.3784, "step": 15057 }, { "epoch": 1.4131006006006006, "grad_norm": 1.2312552201471367, "learning_rate": 6.361425167227197e-06, "loss": 0.3888, "step": 15058 }, { "epoch": 1.4131944444444444, "grad_norm": 0.8926009720595706, "learning_rate": 6.360899822362423e-06, "loss": 0.3992, "step": 15059 }, { "epoch": 1.4132882882882882, "grad_norm": 0.9474076729543568, "learning_rate": 6.3603744612714805e-06, "loss": 0.3964, "step": 15060 }, { "epoch": 1.4133821321321323, "grad_norm": 1.0253886198872648, "learning_rate": 6.359849083960633e-06, "loss": 0.3818, "step": 15061 }, { "epoch": 1.413475975975976, "grad_norm": 1.7481954740712522, "learning_rate": 6.3593236904361445e-06, "loss": 0.3911, "step": 15062 }, { "epoch": 1.4135698198198199, "grad_norm": 1.0410674321613587, "learning_rate": 6.358798280704279e-06, "loss": 0.378, "step": 15063 }, { "epoch": 1.4136636636636637, "grad_norm": 1.857468239781735, "learning_rate": 6.358272854771302e-06, "loss": 0.4318, "step": 15064 }, { "epoch": 1.4137575075075075, "grad_norm": 1.022471159663774, "learning_rate": 6.357747412643478e-06, "loss": 0.3872, "step": 15065 }, { "epoch": 1.4138513513513513, "grad_norm": 1.262466708341581, "learning_rate": 6.357221954327073e-06, "loss": 0.3963, "step": 15066 }, { "epoch": 1.4139451951951951, "grad_norm": 1.588672544964568, "learning_rate": 6.35669647982835e-06, "loss": 0.4261, "step": 15067 }, { "epoch": 1.414039039039039, "grad_norm": 1.1106862908412638, "learning_rate": 6.356170989153575e-06, "loss": 0.4228, "step": 15068 }, { "epoch": 1.4141328828828827, "grad_norm": 1.0043493433110537, "learning_rate": 6.355645482309014e-06, "loss": 0.3992, "step": 15069 }, { "epoch": 1.4142267267267268, "grad_norm": 0.9935636847489778, "learning_rate": 6.355119959300932e-06, "loss": 0.4106, "step": 15070 }, { "epoch": 1.4143205705705706, "grad_norm": 1.2570476681369624, "learning_rate": 6.3545944201355945e-06, "loss": 0.4292, "step": 15071 }, { "epoch": 1.4144144144144144, "grad_norm": 0.9422088695904292, "learning_rate": 6.354068864819269e-06, "loss": 0.4007, "step": 15072 }, { "epoch": 1.4145082582582582, "grad_norm": 0.9949734807582254, "learning_rate": 6.353543293358223e-06, "loss": 0.4166, "step": 15073 }, { "epoch": 1.414602102102102, "grad_norm": 0.9158709165415011, "learning_rate": 6.353017705758718e-06, "loss": 0.337, "step": 15074 }, { "epoch": 1.414695945945946, "grad_norm": 1.0427071237064731, "learning_rate": 6.3524921020270255e-06, "loss": 0.415, "step": 15075 }, { "epoch": 1.4147897897897899, "grad_norm": 0.8689755333759431, "learning_rate": 6.35196648216941e-06, "loss": 0.4082, "step": 15076 }, { "epoch": 1.4148836336336337, "grad_norm": 0.9953768171200075, "learning_rate": 6.3514408461921384e-06, "loss": 0.4471, "step": 15077 }, { "epoch": 1.4149774774774775, "grad_norm": 0.9038333309681508, "learning_rate": 6.350915194101479e-06, "loss": 0.4603, "step": 15078 }, { "epoch": 1.4150713213213213, "grad_norm": 0.9043318473074394, "learning_rate": 6.3503895259037e-06, "loss": 0.3856, "step": 15079 }, { "epoch": 1.415165165165165, "grad_norm": 0.9118034845241079, "learning_rate": 6.3498638416050665e-06, "loss": 0.3673, "step": 15080 }, { "epoch": 1.415259009009009, "grad_norm": 0.9966966004378262, "learning_rate": 6.349338141211847e-06, "loss": 0.3846, "step": 15081 }, { "epoch": 1.4153528528528527, "grad_norm": 1.1069526842619972, "learning_rate": 6.34881242473031e-06, "loss": 0.4268, "step": 15082 }, { "epoch": 1.4154466966966968, "grad_norm": 1.2276028795854081, "learning_rate": 6.348286692166725e-06, "loss": 0.4156, "step": 15083 }, { "epoch": 1.4155405405405406, "grad_norm": 0.9252075939999909, "learning_rate": 6.347760943527359e-06, "loss": 0.4035, "step": 15084 }, { "epoch": 1.4156343843843844, "grad_norm": 1.3704716079405126, "learning_rate": 6.347235178818481e-06, "loss": 0.3834, "step": 15085 }, { "epoch": 1.4157282282282282, "grad_norm": 1.057257946871436, "learning_rate": 6.3467093980463566e-06, "loss": 0.4283, "step": 15086 }, { "epoch": 1.415822072072072, "grad_norm": 1.0108306662665796, "learning_rate": 6.346183601217261e-06, "loss": 0.3664, "step": 15087 }, { "epoch": 1.415915915915916, "grad_norm": 0.9031277412713392, "learning_rate": 6.345657788337457e-06, "loss": 0.3717, "step": 15088 }, { "epoch": 1.4160097597597598, "grad_norm": 0.9300035223927405, "learning_rate": 6.3451319594132174e-06, "loss": 0.4125, "step": 15089 }, { "epoch": 1.4161036036036037, "grad_norm": 1.006894346247108, "learning_rate": 6.344606114450813e-06, "loss": 0.426, "step": 15090 }, { "epoch": 1.4161974474474475, "grad_norm": 2.029283234309621, "learning_rate": 6.34408025345651e-06, "loss": 0.4243, "step": 15091 }, { "epoch": 1.4162912912912913, "grad_norm": 1.3648660443360858, "learning_rate": 6.3435543764365805e-06, "loss": 0.4111, "step": 15092 }, { "epoch": 1.416385135135135, "grad_norm": 0.9095662697068888, "learning_rate": 6.343028483397296e-06, "loss": 0.4222, "step": 15093 }, { "epoch": 1.416478978978979, "grad_norm": 0.9626133735759484, "learning_rate": 6.342502574344922e-06, "loss": 0.4396, "step": 15094 }, { "epoch": 1.4165728228228227, "grad_norm": 1.0622478850027608, "learning_rate": 6.341976649285731e-06, "loss": 0.3755, "step": 15095 }, { "epoch": 1.4166666666666667, "grad_norm": 1.2155791692457603, "learning_rate": 6.3414507082259965e-06, "loss": 0.4039, "step": 15096 }, { "epoch": 1.4167605105105106, "grad_norm": 1.4529937725514872, "learning_rate": 6.340924751171989e-06, "loss": 0.4329, "step": 15097 }, { "epoch": 1.4168543543543544, "grad_norm": 0.9176759418201433, "learning_rate": 6.3403987781299746e-06, "loss": 0.4066, "step": 15098 }, { "epoch": 1.4169481981981982, "grad_norm": 1.023950506747311, "learning_rate": 6.339872789106229e-06, "loss": 0.4115, "step": 15099 }, { "epoch": 1.417042042042042, "grad_norm": 0.8773988345868566, "learning_rate": 6.3393467841070225e-06, "loss": 0.4038, "step": 15100 }, { "epoch": 1.417135885885886, "grad_norm": 0.9702693308890273, "learning_rate": 6.338820763138626e-06, "loss": 0.4285, "step": 15101 }, { "epoch": 1.4172297297297298, "grad_norm": 1.0101033452995647, "learning_rate": 6.338294726207313e-06, "loss": 0.3644, "step": 15102 }, { "epoch": 1.4173235735735736, "grad_norm": 0.936495206685208, "learning_rate": 6.337768673319354e-06, "loss": 0.3785, "step": 15103 }, { "epoch": 1.4174174174174174, "grad_norm": 1.2074290371181713, "learning_rate": 6.3372426044810224e-06, "loss": 0.365, "step": 15104 }, { "epoch": 1.4175112612612613, "grad_norm": 1.1067570355950307, "learning_rate": 6.336716519698589e-06, "loss": 0.3337, "step": 15105 }, { "epoch": 1.417605105105105, "grad_norm": 0.9020767587888863, "learning_rate": 6.3361904189783275e-06, "loss": 0.3934, "step": 15106 }, { "epoch": 1.4176989489489489, "grad_norm": 0.9527162123321231, "learning_rate": 6.33566430232651e-06, "loss": 0.3935, "step": 15107 }, { "epoch": 1.4177927927927927, "grad_norm": 1.7356396679093582, "learning_rate": 6.3351381697494094e-06, "loss": 0.4543, "step": 15108 }, { "epoch": 1.4178866366366365, "grad_norm": 1.1904268452849873, "learning_rate": 6.3346120212533e-06, "loss": 0.3896, "step": 15109 }, { "epoch": 1.4179804804804805, "grad_norm": 0.9544964890514391, "learning_rate": 6.334085856844454e-06, "loss": 0.4069, "step": 15110 }, { "epoch": 1.4180743243243243, "grad_norm": 0.9978502430515911, "learning_rate": 6.333559676529146e-06, "loss": 0.3848, "step": 15111 }, { "epoch": 1.4181681681681682, "grad_norm": 0.9345204972759995, "learning_rate": 6.333033480313648e-06, "loss": 0.316, "step": 15112 }, { "epoch": 1.418262012012012, "grad_norm": 1.0877681155794552, "learning_rate": 6.332507268204235e-06, "loss": 0.4458, "step": 15113 }, { "epoch": 1.4183558558558558, "grad_norm": 0.9383576764102911, "learning_rate": 6.331981040207184e-06, "loss": 0.4404, "step": 15114 }, { "epoch": 1.4184496996996998, "grad_norm": 0.9014360681961866, "learning_rate": 6.331454796328762e-06, "loss": 0.4147, "step": 15115 }, { "epoch": 1.4185435435435436, "grad_norm": 0.8538744723892827, "learning_rate": 6.330928536575251e-06, "loss": 0.4384, "step": 15116 }, { "epoch": 1.4186373873873874, "grad_norm": 0.9514323417929726, "learning_rate": 6.330402260952921e-06, "loss": 0.3984, "step": 15117 }, { "epoch": 1.4187312312312312, "grad_norm": 1.2573094948345527, "learning_rate": 6.329875969468049e-06, "loss": 0.4023, "step": 15118 }, { "epoch": 1.418825075075075, "grad_norm": 1.0265610470870656, "learning_rate": 6.329349662126907e-06, "loss": 0.4336, "step": 15119 }, { "epoch": 1.4189189189189189, "grad_norm": 0.8686730078599595, "learning_rate": 6.328823338935774e-06, "loss": 0.4171, "step": 15120 }, { "epoch": 1.4190127627627627, "grad_norm": 1.0413433902762623, "learning_rate": 6.328296999900924e-06, "loss": 0.4377, "step": 15121 }, { "epoch": 1.4191066066066065, "grad_norm": 1.509846179528331, "learning_rate": 6.327770645028634e-06, "loss": 0.3867, "step": 15122 }, { "epoch": 1.4192004504504505, "grad_norm": 0.8818792949927041, "learning_rate": 6.327244274325177e-06, "loss": 0.374, "step": 15123 }, { "epoch": 1.4192942942942943, "grad_norm": 1.0406351527355868, "learning_rate": 6.32671788779683e-06, "loss": 0.4225, "step": 15124 }, { "epoch": 1.4193881381381381, "grad_norm": 0.9734076310393807, "learning_rate": 6.326191485449869e-06, "loss": 0.4411, "step": 15125 }, { "epoch": 1.419481981981982, "grad_norm": 0.9381843369533841, "learning_rate": 6.325665067290573e-06, "loss": 0.3987, "step": 15126 }, { "epoch": 1.4195758258258258, "grad_norm": 1.3025943053428921, "learning_rate": 6.325138633325214e-06, "loss": 0.437, "step": 15127 }, { "epoch": 1.4196696696696698, "grad_norm": 0.8548033930030322, "learning_rate": 6.324612183560073e-06, "loss": 0.3724, "step": 15128 }, { "epoch": 1.4197635135135136, "grad_norm": 1.0134842260293484, "learning_rate": 6.324085718001425e-06, "loss": 0.4023, "step": 15129 }, { "epoch": 1.4198573573573574, "grad_norm": 0.95157688870503, "learning_rate": 6.323559236655546e-06, "loss": 0.3835, "step": 15130 }, { "epoch": 1.4199512012012012, "grad_norm": 0.8949228422267386, "learning_rate": 6.323032739528714e-06, "loss": 0.3791, "step": 15131 }, { "epoch": 1.420045045045045, "grad_norm": 0.8770177512690733, "learning_rate": 6.322506226627207e-06, "loss": 0.3723, "step": 15132 }, { "epoch": 1.4201388888888888, "grad_norm": 1.1588388993009626, "learning_rate": 6.321979697957303e-06, "loss": 0.3958, "step": 15133 }, { "epoch": 1.4202327327327327, "grad_norm": 0.9460115027681902, "learning_rate": 6.321453153525281e-06, "loss": 0.4188, "step": 15134 }, { "epoch": 1.4203265765765765, "grad_norm": 0.9926903307700364, "learning_rate": 6.3209265933374166e-06, "loss": 0.3783, "step": 15135 }, { "epoch": 1.4204204204204205, "grad_norm": 0.9396021857390527, "learning_rate": 6.320400017399988e-06, "loss": 0.4036, "step": 15136 }, { "epoch": 1.4205142642642643, "grad_norm": 0.8736435286181076, "learning_rate": 6.319873425719274e-06, "loss": 0.4248, "step": 15137 }, { "epoch": 1.4206081081081081, "grad_norm": 1.3187035143639338, "learning_rate": 6.3193468183015564e-06, "loss": 0.3442, "step": 15138 }, { "epoch": 1.420701951951952, "grad_norm": 0.9753133451873467, "learning_rate": 6.318820195153109e-06, "loss": 0.3758, "step": 15139 }, { "epoch": 1.4207957957957957, "grad_norm": 0.8579419647051977, "learning_rate": 6.318293556280215e-06, "loss": 0.4319, "step": 15140 }, { "epoch": 1.4208896396396398, "grad_norm": 0.9070561784790594, "learning_rate": 6.317766901689151e-06, "loss": 0.3882, "step": 15141 }, { "epoch": 1.4209834834834836, "grad_norm": 1.073190005525759, "learning_rate": 6.317240231386197e-06, "loss": 0.4407, "step": 15142 }, { "epoch": 1.4210773273273274, "grad_norm": 0.9047090490515118, "learning_rate": 6.316713545377633e-06, "loss": 0.362, "step": 15143 }, { "epoch": 1.4211711711711712, "grad_norm": 0.8561129506858024, "learning_rate": 6.316186843669738e-06, "loss": 0.4088, "step": 15144 }, { "epoch": 1.421265015015015, "grad_norm": 0.9957649302895414, "learning_rate": 6.315660126268792e-06, "loss": 0.4204, "step": 15145 }, { "epoch": 1.4213588588588588, "grad_norm": 1.0483658241231555, "learning_rate": 6.315133393181076e-06, "loss": 0.431, "step": 15146 }, { "epoch": 1.4214527027027026, "grad_norm": 1.0567548040641983, "learning_rate": 6.3146066444128706e-06, "loss": 0.3945, "step": 15147 }, { "epoch": 1.4215465465465464, "grad_norm": 1.6713128618743103, "learning_rate": 6.314079879970455e-06, "loss": 0.4282, "step": 15148 }, { "epoch": 1.4216403903903903, "grad_norm": 0.9951367963502296, "learning_rate": 6.313553099860109e-06, "loss": 0.425, "step": 15149 }, { "epoch": 1.4217342342342343, "grad_norm": 0.9680865203039519, "learning_rate": 6.313026304088117e-06, "loss": 0.4333, "step": 15150 }, { "epoch": 1.421828078078078, "grad_norm": 0.9229497545613866, "learning_rate": 6.3124994926607556e-06, "loss": 0.3819, "step": 15151 }, { "epoch": 1.421921921921922, "grad_norm": 1.0545666037913881, "learning_rate": 6.3119726655843106e-06, "loss": 0.403, "step": 15152 }, { "epoch": 1.4220157657657657, "grad_norm": 1.2876044866206786, "learning_rate": 6.31144582286506e-06, "loss": 0.4118, "step": 15153 }, { "epoch": 1.4221096096096097, "grad_norm": 1.0027307140949522, "learning_rate": 6.310918964509286e-06, "loss": 0.4017, "step": 15154 }, { "epoch": 1.4222034534534536, "grad_norm": 0.8831195089457222, "learning_rate": 6.31039209052327e-06, "loss": 0.4125, "step": 15155 }, { "epoch": 1.4222972972972974, "grad_norm": 0.8913528291677312, "learning_rate": 6.309865200913297e-06, "loss": 0.4299, "step": 15156 }, { "epoch": 1.4223911411411412, "grad_norm": 0.8392688523118571, "learning_rate": 6.309338295685646e-06, "loss": 0.3884, "step": 15157 }, { "epoch": 1.422484984984985, "grad_norm": 0.8690966366857161, "learning_rate": 6.3088113748466006e-06, "loss": 0.3927, "step": 15158 }, { "epoch": 1.4225788288288288, "grad_norm": 0.9142169962331074, "learning_rate": 6.308284438402445e-06, "loss": 0.3555, "step": 15159 }, { "epoch": 1.4226726726726726, "grad_norm": 0.915020474447478, "learning_rate": 6.307757486359457e-06, "loss": 0.3986, "step": 15160 }, { "epoch": 1.4227665165165164, "grad_norm": 0.9078604171853341, "learning_rate": 6.307230518723924e-06, "loss": 0.3809, "step": 15161 }, { "epoch": 1.4228603603603602, "grad_norm": 1.1392250542051787, "learning_rate": 6.306703535502127e-06, "loss": 0.4335, "step": 15162 }, { "epoch": 1.4229542042042043, "grad_norm": 1.0959546449036457, "learning_rate": 6.30617653670035e-06, "loss": 0.395, "step": 15163 }, { "epoch": 1.423048048048048, "grad_norm": 0.9841279273724897, "learning_rate": 6.305649522324876e-06, "loss": 0.4204, "step": 15164 }, { "epoch": 1.4231418918918919, "grad_norm": 1.0548365150704102, "learning_rate": 6.305122492381992e-06, "loss": 0.4189, "step": 15165 }, { "epoch": 1.4232357357357357, "grad_norm": 0.8692205711520935, "learning_rate": 6.3045954468779754e-06, "loss": 0.4069, "step": 15166 }, { "epoch": 1.4233295795795795, "grad_norm": 0.8289238461671858, "learning_rate": 6.304068385819115e-06, "loss": 0.4257, "step": 15167 }, { "epoch": 1.4234234234234235, "grad_norm": 0.9149555755178926, "learning_rate": 6.303541309211695e-06, "loss": 0.3866, "step": 15168 }, { "epoch": 1.4235172672672673, "grad_norm": 1.0097413766276755, "learning_rate": 6.303014217061997e-06, "loss": 0.4029, "step": 15169 }, { "epoch": 1.4236111111111112, "grad_norm": 1.1707513265925298, "learning_rate": 6.302487109376306e-06, "loss": 0.4127, "step": 15170 }, { "epoch": 1.423704954954955, "grad_norm": 0.8861392231984254, "learning_rate": 6.30195998616091e-06, "loss": 0.3776, "step": 15171 }, { "epoch": 1.4237987987987988, "grad_norm": 0.9575962383529365, "learning_rate": 6.301432847422093e-06, "loss": 0.3846, "step": 15172 }, { "epoch": 1.4238926426426426, "grad_norm": 1.5677677032051884, "learning_rate": 6.3009056931661365e-06, "loss": 0.4131, "step": 15173 }, { "epoch": 1.4239864864864864, "grad_norm": 0.9412719437220803, "learning_rate": 6.3003785233993294e-06, "loss": 0.4135, "step": 15174 }, { "epoch": 1.4240803303303302, "grad_norm": 0.9710477262600654, "learning_rate": 6.299851338127956e-06, "loss": 0.3851, "step": 15175 }, { "epoch": 1.4241741741741742, "grad_norm": 1.0627630992827615, "learning_rate": 6.299324137358301e-06, "loss": 0.3754, "step": 15176 }, { "epoch": 1.424268018018018, "grad_norm": 1.0661117672388045, "learning_rate": 6.298796921096652e-06, "loss": 0.3821, "step": 15177 }, { "epoch": 1.4243618618618619, "grad_norm": 0.9864618396793468, "learning_rate": 6.298269689349297e-06, "loss": 0.4035, "step": 15178 }, { "epoch": 1.4244557057057057, "grad_norm": 0.9807583844502494, "learning_rate": 6.297742442122516e-06, "loss": 0.4345, "step": 15179 }, { "epoch": 1.4245495495495495, "grad_norm": 1.2008434249177982, "learning_rate": 6.297215179422601e-06, "loss": 0.4022, "step": 15180 }, { "epoch": 1.4246433933933935, "grad_norm": 1.531247006504583, "learning_rate": 6.2966879012558355e-06, "loss": 0.435, "step": 15181 }, { "epoch": 1.4247372372372373, "grad_norm": 1.1146999850188521, "learning_rate": 6.2961606076285085e-06, "loss": 0.4333, "step": 15182 }, { "epoch": 1.4248310810810811, "grad_norm": 1.0853024157245168, "learning_rate": 6.295633298546906e-06, "loss": 0.4217, "step": 15183 }, { "epoch": 1.424924924924925, "grad_norm": 0.9418580933072055, "learning_rate": 6.295105974017316e-06, "loss": 0.3986, "step": 15184 }, { "epoch": 1.4250187687687688, "grad_norm": 1.1176526198379335, "learning_rate": 6.294578634046024e-06, "loss": 0.4596, "step": 15185 }, { "epoch": 1.4251126126126126, "grad_norm": 0.9590595808114575, "learning_rate": 6.294051278639319e-06, "loss": 0.3965, "step": 15186 }, { "epoch": 1.4252064564564564, "grad_norm": 0.9211710517485551, "learning_rate": 6.293523907803488e-06, "loss": 0.435, "step": 15187 }, { "epoch": 1.4253003003003002, "grad_norm": 0.9441209138847744, "learning_rate": 6.2929965215448185e-06, "loss": 0.4015, "step": 15188 }, { "epoch": 1.4253941441441442, "grad_norm": 0.944614220827583, "learning_rate": 6.292469119869601e-06, "loss": 0.429, "step": 15189 }, { "epoch": 1.425487987987988, "grad_norm": 1.0607609587150002, "learning_rate": 6.291941702784121e-06, "loss": 0.4177, "step": 15190 }, { "epoch": 1.4255818318318318, "grad_norm": 0.8326992206070345, "learning_rate": 6.291414270294668e-06, "loss": 0.386, "step": 15191 }, { "epoch": 1.4256756756756757, "grad_norm": 1.3949801761622496, "learning_rate": 6.290886822407531e-06, "loss": 0.3839, "step": 15192 }, { "epoch": 1.4257695195195195, "grad_norm": 0.8589439521419985, "learning_rate": 6.290359359128998e-06, "loss": 0.3688, "step": 15193 }, { "epoch": 1.4258633633633635, "grad_norm": 0.9490113318141226, "learning_rate": 6.289831880465358e-06, "loss": 0.4206, "step": 15194 }, { "epoch": 1.4259572072072073, "grad_norm": 2.658000477196596, "learning_rate": 6.289304386422903e-06, "loss": 0.3798, "step": 15195 }, { "epoch": 1.4260510510510511, "grad_norm": 1.0257934824433352, "learning_rate": 6.288776877007919e-06, "loss": 0.3961, "step": 15196 }, { "epoch": 1.426144894894895, "grad_norm": 0.8902513776106832, "learning_rate": 6.288249352226695e-06, "loss": 0.3886, "step": 15197 }, { "epoch": 1.4262387387387387, "grad_norm": 0.9098462845662197, "learning_rate": 6.287721812085524e-06, "loss": 0.4091, "step": 15198 }, { "epoch": 1.4263325825825826, "grad_norm": 1.0383583544096118, "learning_rate": 6.287194256590694e-06, "loss": 0.3989, "step": 15199 }, { "epoch": 1.4264264264264264, "grad_norm": 1.1414626863735797, "learning_rate": 6.286666685748495e-06, "loss": 0.4318, "step": 15200 }, { "epoch": 1.4265202702702702, "grad_norm": 1.057057455577467, "learning_rate": 6.286139099565219e-06, "loss": 0.444, "step": 15201 }, { "epoch": 1.426614114114114, "grad_norm": 1.1187491397248566, "learning_rate": 6.285611498047153e-06, "loss": 0.3692, "step": 15202 }, { "epoch": 1.426707957957958, "grad_norm": 1.0103782985853622, "learning_rate": 6.285083881200591e-06, "loss": 0.3908, "step": 15203 }, { "epoch": 1.4268018018018018, "grad_norm": 1.3619846837241507, "learning_rate": 6.284556249031822e-06, "loss": 0.4329, "step": 15204 }, { "epoch": 1.4268956456456456, "grad_norm": 0.8573103943471478, "learning_rate": 6.284028601547139e-06, "loss": 0.4336, "step": 15205 }, { "epoch": 1.4269894894894894, "grad_norm": 1.115397798923188, "learning_rate": 6.283500938752829e-06, "loss": 0.4803, "step": 15206 }, { "epoch": 1.4270833333333333, "grad_norm": 1.14289962364065, "learning_rate": 6.28297326065519e-06, "loss": 0.4166, "step": 15207 }, { "epoch": 1.4271771771771773, "grad_norm": 1.208850703976478, "learning_rate": 6.2824455672605075e-06, "loss": 0.4055, "step": 15208 }, { "epoch": 1.427271021021021, "grad_norm": 0.9136449893004733, "learning_rate": 6.281917858575076e-06, "loss": 0.4012, "step": 15209 }, { "epoch": 1.427364864864865, "grad_norm": 0.9366931994334873, "learning_rate": 6.281390134605188e-06, "loss": 0.402, "step": 15210 }, { "epoch": 1.4274587087087087, "grad_norm": 1.0688518594894367, "learning_rate": 6.280862395357132e-06, "loss": 0.3974, "step": 15211 }, { "epoch": 1.4275525525525525, "grad_norm": 0.8021457508941512, "learning_rate": 6.280334640837205e-06, "loss": 0.3429, "step": 15212 }, { "epoch": 1.4276463963963963, "grad_norm": 1.5577618629975991, "learning_rate": 6.279806871051698e-06, "loss": 0.4312, "step": 15213 }, { "epoch": 1.4277402402402402, "grad_norm": 1.2088219362163495, "learning_rate": 6.2792790860069015e-06, "loss": 0.4213, "step": 15214 }, { "epoch": 1.427834084084084, "grad_norm": 1.0500613399855663, "learning_rate": 6.278751285709111e-06, "loss": 0.452, "step": 15215 }, { "epoch": 1.427927927927928, "grad_norm": 0.9358847724120576, "learning_rate": 6.278223470164618e-06, "loss": 0.3795, "step": 15216 }, { "epoch": 1.4280217717717718, "grad_norm": 1.0631741994879211, "learning_rate": 6.277695639379715e-06, "loss": 0.4249, "step": 15217 }, { "epoch": 1.4281156156156156, "grad_norm": 1.8200196901397547, "learning_rate": 6.277167793360698e-06, "loss": 0.3781, "step": 15218 }, { "epoch": 1.4282094594594594, "grad_norm": 0.9174980340930874, "learning_rate": 6.276639932113858e-06, "loss": 0.4192, "step": 15219 }, { "epoch": 1.4283033033033032, "grad_norm": 0.8859612798726724, "learning_rate": 6.276112055645491e-06, "loss": 0.3642, "step": 15220 }, { "epoch": 1.4283971471471473, "grad_norm": 0.9161772354925994, "learning_rate": 6.27558416396189e-06, "loss": 0.3645, "step": 15221 }, { "epoch": 1.428490990990991, "grad_norm": 0.9046931630521524, "learning_rate": 6.275056257069348e-06, "loss": 0.3685, "step": 15222 }, { "epoch": 1.428584834834835, "grad_norm": 0.9303792553634915, "learning_rate": 6.274528334974159e-06, "loss": 0.4005, "step": 15223 }, { "epoch": 1.4286786786786787, "grad_norm": 0.9031705845724233, "learning_rate": 6.2740003976826205e-06, "loss": 0.3851, "step": 15224 }, { "epoch": 1.4287725225225225, "grad_norm": 1.830629622413681, "learning_rate": 6.2734724452010255e-06, "loss": 0.4377, "step": 15225 }, { "epoch": 1.4288663663663663, "grad_norm": 1.05483981041609, "learning_rate": 6.272944477535667e-06, "loss": 0.3648, "step": 15226 }, { "epoch": 1.4289602102102101, "grad_norm": 0.9731650393277638, "learning_rate": 6.2724164946928435e-06, "loss": 0.4284, "step": 15227 }, { "epoch": 1.429054054054054, "grad_norm": 0.9060336521276063, "learning_rate": 6.271888496678848e-06, "loss": 0.3988, "step": 15228 }, { "epoch": 1.429147897897898, "grad_norm": 1.8528512118711506, "learning_rate": 6.271360483499975e-06, "loss": 0.4461, "step": 15229 }, { "epoch": 1.4292417417417418, "grad_norm": 0.8577345840158166, "learning_rate": 6.270832455162521e-06, "loss": 0.3849, "step": 15230 }, { "epoch": 1.4293355855855856, "grad_norm": 0.849208329692886, "learning_rate": 6.270304411672784e-06, "loss": 0.389, "step": 15231 }, { "epoch": 1.4294294294294294, "grad_norm": 0.9548249320955835, "learning_rate": 6.269776353037056e-06, "loss": 0.4246, "step": 15232 }, { "epoch": 1.4295232732732732, "grad_norm": 1.0575925666778065, "learning_rate": 6.269248279261637e-06, "loss": 0.4089, "step": 15233 }, { "epoch": 1.4296171171171173, "grad_norm": 1.1241856086395556, "learning_rate": 6.268720190352822e-06, "loss": 0.4102, "step": 15234 }, { "epoch": 1.429710960960961, "grad_norm": 2.0099118109643106, "learning_rate": 6.2681920863169045e-06, "loss": 0.4127, "step": 15235 }, { "epoch": 1.4298048048048049, "grad_norm": 0.9356376671213692, "learning_rate": 6.267663967160184e-06, "loss": 0.3849, "step": 15236 }, { "epoch": 1.4298986486486487, "grad_norm": 0.854683822933615, "learning_rate": 6.267135832888958e-06, "loss": 0.4177, "step": 15237 }, { "epoch": 1.4299924924924925, "grad_norm": 0.9741662047907675, "learning_rate": 6.26660768350952e-06, "loss": 0.4025, "step": 15238 }, { "epoch": 1.4300863363363363, "grad_norm": 1.0038688230067592, "learning_rate": 6.266079519028172e-06, "loss": 0.3983, "step": 15239 }, { "epoch": 1.4301801801801801, "grad_norm": 0.9993429182893511, "learning_rate": 6.265551339451209e-06, "loss": 0.4489, "step": 15240 }, { "epoch": 1.430274024024024, "grad_norm": 0.95801470282392, "learning_rate": 6.265023144784926e-06, "loss": 0.3806, "step": 15241 }, { "epoch": 1.4303678678678677, "grad_norm": 0.8575529609386204, "learning_rate": 6.264494935035625e-06, "loss": 0.397, "step": 15242 }, { "epoch": 1.4304617117117118, "grad_norm": 1.010204641241538, "learning_rate": 6.263966710209601e-06, "loss": 0.3808, "step": 15243 }, { "epoch": 1.4305555555555556, "grad_norm": 0.8763455041153945, "learning_rate": 6.263438470313155e-06, "loss": 0.4148, "step": 15244 }, { "epoch": 1.4306493993993994, "grad_norm": 0.9552895479348011, "learning_rate": 6.262910215352583e-06, "loss": 0.4172, "step": 15245 }, { "epoch": 1.4307432432432432, "grad_norm": 1.0148294161942168, "learning_rate": 6.262381945334183e-06, "loss": 0.4274, "step": 15246 }, { "epoch": 1.430837087087087, "grad_norm": 2.1879807997207577, "learning_rate": 6.261853660264254e-06, "loss": 0.4419, "step": 15247 }, { "epoch": 1.430930930930931, "grad_norm": 1.0057981263401008, "learning_rate": 6.261325360149095e-06, "loss": 0.3795, "step": 15248 }, { "epoch": 1.4310247747747749, "grad_norm": 0.977535032411754, "learning_rate": 6.260797044995006e-06, "loss": 0.4695, "step": 15249 }, { "epoch": 1.4311186186186187, "grad_norm": 1.0524923817252503, "learning_rate": 6.260268714808286e-06, "loss": 0.3757, "step": 15250 }, { "epoch": 1.4312124624624625, "grad_norm": 1.1904448910325904, "learning_rate": 6.259740369595234e-06, "loss": 0.416, "step": 15251 }, { "epoch": 1.4313063063063063, "grad_norm": 0.8600769630811202, "learning_rate": 6.259212009362149e-06, "loss": 0.3462, "step": 15252 }, { "epoch": 1.43140015015015, "grad_norm": 0.9419555609394016, "learning_rate": 6.25868363411533e-06, "loss": 0.409, "step": 15253 }, { "epoch": 1.431493993993994, "grad_norm": 1.1222157492267348, "learning_rate": 6.2581552438610785e-06, "loss": 0.3845, "step": 15254 }, { "epoch": 1.4315878378378377, "grad_norm": 1.170991602706387, "learning_rate": 6.257626838605695e-06, "loss": 0.4379, "step": 15255 }, { "epoch": 1.4316816816816818, "grad_norm": 0.9542889016079763, "learning_rate": 6.257098418355476e-06, "loss": 0.392, "step": 15256 }, { "epoch": 1.4317755255255256, "grad_norm": 1.2500924609060808, "learning_rate": 6.256569983116727e-06, "loss": 0.4047, "step": 15257 }, { "epoch": 1.4318693693693694, "grad_norm": 1.3270720887445548, "learning_rate": 6.256041532895747e-06, "loss": 0.4091, "step": 15258 }, { "epoch": 1.4319632132132132, "grad_norm": 1.1421478774661507, "learning_rate": 6.255513067698834e-06, "loss": 0.3898, "step": 15259 }, { "epoch": 1.432057057057057, "grad_norm": 1.1532265313991497, "learning_rate": 6.254984587532291e-06, "loss": 0.4431, "step": 15260 }, { "epoch": 1.432150900900901, "grad_norm": 1.178816015910176, "learning_rate": 6.25445609240242e-06, "loss": 0.3635, "step": 15261 }, { "epoch": 1.4322447447447448, "grad_norm": 1.1996599228310934, "learning_rate": 6.25392758231552e-06, "loss": 0.457, "step": 15262 }, { "epoch": 1.4323385885885886, "grad_norm": 1.0712908566538863, "learning_rate": 6.253399057277896e-06, "loss": 0.395, "step": 15263 }, { "epoch": 1.4324324324324325, "grad_norm": 0.8722114635331708, "learning_rate": 6.252870517295846e-06, "loss": 0.3721, "step": 15264 }, { "epoch": 1.4325262762762763, "grad_norm": 0.8589995389915748, "learning_rate": 6.252341962375673e-06, "loss": 0.3524, "step": 15265 }, { "epoch": 1.43262012012012, "grad_norm": 1.0145153775039983, "learning_rate": 6.25181339252368e-06, "loss": 0.4508, "step": 15266 }, { "epoch": 1.4327139639639639, "grad_norm": 1.2686312363572345, "learning_rate": 6.251284807746169e-06, "loss": 0.4332, "step": 15267 }, { "epoch": 1.4328078078078077, "grad_norm": 0.9798873832515445, "learning_rate": 6.250756208049441e-06, "loss": 0.4147, "step": 15268 }, { "epoch": 1.4329016516516517, "grad_norm": 0.9915009776987409, "learning_rate": 6.2502275934398e-06, "loss": 0.4178, "step": 15269 }, { "epoch": 1.4329954954954955, "grad_norm": 0.9773543024158639, "learning_rate": 6.24969896392355e-06, "loss": 0.4063, "step": 15270 }, { "epoch": 1.4330893393393394, "grad_norm": 1.046718573260979, "learning_rate": 6.24917031950699e-06, "loss": 0.4168, "step": 15271 }, { "epoch": 1.4331831831831832, "grad_norm": 0.9368108446642948, "learning_rate": 6.248641660196425e-06, "loss": 0.3759, "step": 15272 }, { "epoch": 1.433277027027027, "grad_norm": 1.0476635023274445, "learning_rate": 6.24811298599816e-06, "loss": 0.4172, "step": 15273 }, { "epoch": 1.433370870870871, "grad_norm": 1.0620718137757645, "learning_rate": 6.247584296918495e-06, "loss": 0.4231, "step": 15274 }, { "epoch": 1.4334647147147148, "grad_norm": 1.0158928255474304, "learning_rate": 6.247055592963738e-06, "loss": 0.357, "step": 15275 }, { "epoch": 1.4335585585585586, "grad_norm": 1.059752946103878, "learning_rate": 6.24652687414019e-06, "loss": 0.4073, "step": 15276 }, { "epoch": 1.4336524024024024, "grad_norm": 1.0061322784558286, "learning_rate": 6.245998140454153e-06, "loss": 0.4016, "step": 15277 }, { "epoch": 1.4337462462462462, "grad_norm": 1.1486695150220936, "learning_rate": 6.245469391911936e-06, "loss": 0.4139, "step": 15278 }, { "epoch": 1.43384009009009, "grad_norm": 0.9712483035234123, "learning_rate": 6.24494062851984e-06, "loss": 0.3626, "step": 15279 }, { "epoch": 1.4339339339339339, "grad_norm": 0.958399624722805, "learning_rate": 6.24441185028417e-06, "loss": 0.4274, "step": 15280 }, { "epoch": 1.4340277777777777, "grad_norm": 0.8815000875207484, "learning_rate": 6.243883057211232e-06, "loss": 0.4185, "step": 15281 }, { "epoch": 1.4341216216216215, "grad_norm": 0.9594792687506771, "learning_rate": 6.2433542493073296e-06, "loss": 0.4063, "step": 15282 }, { "epoch": 1.4342154654654655, "grad_norm": 0.9474591570327291, "learning_rate": 6.242825426578768e-06, "loss": 0.3917, "step": 15283 }, { "epoch": 1.4343093093093093, "grad_norm": 1.0881235807000242, "learning_rate": 6.242296589031852e-06, "loss": 0.41, "step": 15284 }, { "epoch": 1.4344031531531531, "grad_norm": 1.0331052961708127, "learning_rate": 6.241767736672889e-06, "loss": 0.4054, "step": 15285 }, { "epoch": 1.434496996996997, "grad_norm": 1.0268601334557874, "learning_rate": 6.2412388695081816e-06, "loss": 0.4807, "step": 15286 }, { "epoch": 1.4345908408408408, "grad_norm": 0.9245723789632347, "learning_rate": 6.240709987544037e-06, "loss": 0.3838, "step": 15287 }, { "epoch": 1.4346846846846848, "grad_norm": 1.2259862277753208, "learning_rate": 6.240181090786763e-06, "loss": 0.418, "step": 15288 }, { "epoch": 1.4347785285285286, "grad_norm": 1.0713218250802587, "learning_rate": 6.239652179242662e-06, "loss": 0.4156, "step": 15289 }, { "epoch": 1.4348723723723724, "grad_norm": 0.959320944360693, "learning_rate": 6.2391232529180424e-06, "loss": 0.3882, "step": 15290 }, { "epoch": 1.4349662162162162, "grad_norm": 0.915497849911392, "learning_rate": 6.238594311819211e-06, "loss": 0.4089, "step": 15291 }, { "epoch": 1.43506006006006, "grad_norm": 0.892411163231967, "learning_rate": 6.2380653559524736e-06, "loss": 0.3451, "step": 15292 }, { "epoch": 1.4351539039039038, "grad_norm": 1.0929944787841297, "learning_rate": 6.237536385324138e-06, "loss": 0.4085, "step": 15293 }, { "epoch": 1.4352477477477477, "grad_norm": 0.9521515153940382, "learning_rate": 6.23700739994051e-06, "loss": 0.4253, "step": 15294 }, { "epoch": 1.4353415915915915, "grad_norm": 0.905989014430742, "learning_rate": 6.236478399807897e-06, "loss": 0.3954, "step": 15295 }, { "epoch": 1.4354354354354355, "grad_norm": 1.0231983004644878, "learning_rate": 6.235949384932607e-06, "loss": 0.4158, "step": 15296 }, { "epoch": 1.4355292792792793, "grad_norm": 1.0925458334703193, "learning_rate": 6.235420355320947e-06, "loss": 0.4015, "step": 15297 }, { "epoch": 1.4356231231231231, "grad_norm": 0.992543606392224, "learning_rate": 6.234891310979223e-06, "loss": 0.4227, "step": 15298 }, { "epoch": 1.435716966966967, "grad_norm": 1.875494809450038, "learning_rate": 6.234362251913746e-06, "loss": 0.4207, "step": 15299 }, { "epoch": 1.4358108108108107, "grad_norm": 0.9166628285902425, "learning_rate": 6.233833178130823e-06, "loss": 0.3653, "step": 15300 }, { "epoch": 1.4359046546546548, "grad_norm": 0.950777628892848, "learning_rate": 6.233304089636761e-06, "loss": 0.4208, "step": 15301 }, { "epoch": 1.4359984984984986, "grad_norm": 0.8959336633480974, "learning_rate": 6.23277498643787e-06, "loss": 0.4185, "step": 15302 }, { "epoch": 1.4360923423423424, "grad_norm": 2.761515776016757, "learning_rate": 6.232245868540457e-06, "loss": 0.3666, "step": 15303 }, { "epoch": 1.4361861861861862, "grad_norm": 1.7108245230704922, "learning_rate": 6.231716735950831e-06, "loss": 0.4393, "step": 15304 }, { "epoch": 1.43628003003003, "grad_norm": 0.9377375792473217, "learning_rate": 6.231187588675301e-06, "loss": 0.4292, "step": 15305 }, { "epoch": 1.4363738738738738, "grad_norm": 0.8666729764245005, "learning_rate": 6.230658426720179e-06, "loss": 0.3672, "step": 15306 }, { "epoch": 1.4364677177177176, "grad_norm": 1.3334249267276632, "learning_rate": 6.23012925009177e-06, "loss": 0.3768, "step": 15307 }, { "epoch": 1.4365615615615615, "grad_norm": 0.897177266081483, "learning_rate": 6.229600058796386e-06, "loss": 0.3909, "step": 15308 }, { "epoch": 1.4366554054054055, "grad_norm": 0.9292998147164279, "learning_rate": 6.229070852840335e-06, "loss": 0.423, "step": 15309 }, { "epoch": 1.4367492492492493, "grad_norm": 0.9436408370478369, "learning_rate": 6.228541632229927e-06, "loss": 0.4023, "step": 15310 }, { "epoch": 1.436843093093093, "grad_norm": 0.9611912870138416, "learning_rate": 6.228012396971473e-06, "loss": 0.4126, "step": 15311 }, { "epoch": 1.436936936936937, "grad_norm": 1.1005616413389991, "learning_rate": 6.227483147071284e-06, "loss": 0.4289, "step": 15312 }, { "epoch": 1.4370307807807807, "grad_norm": 0.9131597411023136, "learning_rate": 6.226953882535668e-06, "loss": 0.4162, "step": 15313 }, { "epoch": 1.4371246246246248, "grad_norm": 0.8549863511505544, "learning_rate": 6.226424603370937e-06, "loss": 0.3819, "step": 15314 }, { "epoch": 1.4372184684684686, "grad_norm": 1.4935900004840994, "learning_rate": 6.2258953095834e-06, "loss": 0.4455, "step": 15315 }, { "epoch": 1.4373123123123124, "grad_norm": 1.7018578116986638, "learning_rate": 6.225366001179369e-06, "loss": 0.4462, "step": 15316 }, { "epoch": 1.4374061561561562, "grad_norm": 0.8594774058528711, "learning_rate": 6.224836678165155e-06, "loss": 0.4064, "step": 15317 }, { "epoch": 1.4375, "grad_norm": 0.9478366182419746, "learning_rate": 6.22430734054707e-06, "loss": 0.4074, "step": 15318 }, { "epoch": 1.4375938438438438, "grad_norm": 0.8172411899807182, "learning_rate": 6.223777988331424e-06, "loss": 0.3631, "step": 15319 }, { "epoch": 1.4376876876876876, "grad_norm": 1.060698534309918, "learning_rate": 6.2232486215245295e-06, "loss": 0.3803, "step": 15320 }, { "epoch": 1.4377815315315314, "grad_norm": 1.4097150156877314, "learning_rate": 6.222719240132697e-06, "loss": 0.3753, "step": 15321 }, { "epoch": 1.4378753753753752, "grad_norm": 1.0049142530160127, "learning_rate": 6.2221898441622385e-06, "loss": 0.3811, "step": 15322 }, { "epoch": 1.4379692192192193, "grad_norm": 1.371196954536145, "learning_rate": 6.221660433619467e-06, "loss": 0.4063, "step": 15323 }, { "epoch": 1.438063063063063, "grad_norm": 0.9895169504866574, "learning_rate": 6.221131008510696e-06, "loss": 0.3953, "step": 15324 }, { "epoch": 1.438156906906907, "grad_norm": 0.9047564643774916, "learning_rate": 6.220601568842235e-06, "loss": 0.4367, "step": 15325 }, { "epoch": 1.4382507507507507, "grad_norm": 0.9828439587273295, "learning_rate": 6.220072114620398e-06, "loss": 0.4449, "step": 15326 }, { "epoch": 1.4383445945945945, "grad_norm": 0.9404809916084447, "learning_rate": 6.2195426458514984e-06, "loss": 0.3978, "step": 15327 }, { "epoch": 1.4384384384384385, "grad_norm": 0.9442575267273495, "learning_rate": 6.219013162541847e-06, "loss": 0.3944, "step": 15328 }, { "epoch": 1.4385322822822824, "grad_norm": 0.9002687883421547, "learning_rate": 6.218483664697758e-06, "loss": 0.4144, "step": 15329 }, { "epoch": 1.4386261261261262, "grad_norm": 1.0946568806646748, "learning_rate": 6.217954152325546e-06, "loss": 0.3477, "step": 15330 }, { "epoch": 1.43871996996997, "grad_norm": 1.6542589673592747, "learning_rate": 6.217424625431523e-06, "loss": 0.3795, "step": 15331 }, { "epoch": 1.4388138138138138, "grad_norm": 1.4788186265764613, "learning_rate": 6.216895084022002e-06, "loss": 0.381, "step": 15332 }, { "epoch": 1.4389076576576576, "grad_norm": 0.8630101968658669, "learning_rate": 6.216365528103299e-06, "loss": 0.4061, "step": 15333 }, { "epoch": 1.4390015015015014, "grad_norm": 0.8830443637470711, "learning_rate": 6.215835957681727e-06, "loss": 0.3481, "step": 15334 }, { "epoch": 1.4390953453453452, "grad_norm": 0.8987114432326733, "learning_rate": 6.215306372763596e-06, "loss": 0.3993, "step": 15335 }, { "epoch": 1.4391891891891893, "grad_norm": 1.7473417679739205, "learning_rate": 6.214776773355229e-06, "loss": 0.3646, "step": 15336 }, { "epoch": 1.439283033033033, "grad_norm": 1.0261331211238502, "learning_rate": 6.214247159462934e-06, "loss": 0.4302, "step": 15337 }, { "epoch": 1.4393768768768769, "grad_norm": 0.9141468008242128, "learning_rate": 6.213717531093025e-06, "loss": 0.3856, "step": 15338 }, { "epoch": 1.4394707207207207, "grad_norm": 0.9261208624298148, "learning_rate": 6.213187888251821e-06, "loss": 0.39, "step": 15339 }, { "epoch": 1.4395645645645645, "grad_norm": 1.0836613377724433, "learning_rate": 6.212658230945635e-06, "loss": 0.4071, "step": 15340 }, { "epoch": 1.4396584084084085, "grad_norm": 0.9058297664877529, "learning_rate": 6.2121285591807815e-06, "loss": 0.3825, "step": 15341 }, { "epoch": 1.4397522522522523, "grad_norm": 0.9832616700287529, "learning_rate": 6.211598872963578e-06, "loss": 0.4193, "step": 15342 }, { "epoch": 1.4398460960960962, "grad_norm": 1.0665753723038018, "learning_rate": 6.211069172300337e-06, "loss": 0.3739, "step": 15343 }, { "epoch": 1.43993993993994, "grad_norm": 0.9389162596908972, "learning_rate": 6.210539457197377e-06, "loss": 0.4154, "step": 15344 }, { "epoch": 1.4400337837837838, "grad_norm": 0.9158270287423323, "learning_rate": 6.210009727661012e-06, "loss": 0.4261, "step": 15345 }, { "epoch": 1.4401276276276276, "grad_norm": 1.7445358271711353, "learning_rate": 6.209479983697557e-06, "loss": 0.3959, "step": 15346 }, { "epoch": 1.4402214714714714, "grad_norm": 0.8390445940200809, "learning_rate": 6.2089502253133306e-06, "loss": 0.3641, "step": 15347 }, { "epoch": 1.4403153153153152, "grad_norm": 0.9090923604841377, "learning_rate": 6.208420452514649e-06, "loss": 0.3769, "step": 15348 }, { "epoch": 1.4404091591591592, "grad_norm": 1.0718798128643339, "learning_rate": 6.207890665307829e-06, "loss": 0.4001, "step": 15349 }, { "epoch": 1.440503003003003, "grad_norm": 0.9880914551690132, "learning_rate": 6.207360863699186e-06, "loss": 0.3836, "step": 15350 }, { "epoch": 1.4405968468468469, "grad_norm": 1.3917342738584422, "learning_rate": 6.206831047695036e-06, "loss": 0.4027, "step": 15351 }, { "epoch": 1.4406906906906907, "grad_norm": 0.9507183788637983, "learning_rate": 6.206301217301699e-06, "loss": 0.4005, "step": 15352 }, { "epoch": 1.4407845345345345, "grad_norm": 0.828995043240449, "learning_rate": 6.205771372525489e-06, "loss": 0.3777, "step": 15353 }, { "epoch": 1.4408783783783785, "grad_norm": 1.0542935721401734, "learning_rate": 6.205241513372726e-06, "loss": 0.3742, "step": 15354 }, { "epoch": 1.4409722222222223, "grad_norm": 1.0001947766468715, "learning_rate": 6.204711639849726e-06, "loss": 0.376, "step": 15355 }, { "epoch": 1.4410660660660661, "grad_norm": 0.8674763663973665, "learning_rate": 6.204181751962809e-06, "loss": 0.3681, "step": 15356 }, { "epoch": 1.44115990990991, "grad_norm": 1.0048775986491199, "learning_rate": 6.2036518497182904e-06, "loss": 0.3958, "step": 15357 }, { "epoch": 1.4412537537537538, "grad_norm": 0.863038621504504, "learning_rate": 6.2031219331224875e-06, "loss": 0.3819, "step": 15358 }, { "epoch": 1.4413475975975976, "grad_norm": 1.0127252243256908, "learning_rate": 6.202592002181721e-06, "loss": 0.4151, "step": 15359 }, { "epoch": 1.4414414414414414, "grad_norm": 1.0443729888262632, "learning_rate": 6.20206205690231e-06, "loss": 0.448, "step": 15360 }, { "epoch": 1.4415352852852852, "grad_norm": 1.6592079691707078, "learning_rate": 6.20153209729057e-06, "loss": 0.4071, "step": 15361 }, { "epoch": 1.441629129129129, "grad_norm": 0.9187327527528736, "learning_rate": 6.201002123352823e-06, "loss": 0.3706, "step": 15362 }, { "epoch": 1.441722972972973, "grad_norm": 1.2853917405623794, "learning_rate": 6.200472135095386e-06, "loss": 0.4092, "step": 15363 }, { "epoch": 1.4418168168168168, "grad_norm": 2.436041533269523, "learning_rate": 6.1999421325245765e-06, "loss": 0.3736, "step": 15364 }, { "epoch": 1.4419106606606606, "grad_norm": 0.8728998344845245, "learning_rate": 6.199412115646717e-06, "loss": 0.3979, "step": 15365 }, { "epoch": 1.4420045045045045, "grad_norm": 0.9200689598129717, "learning_rate": 6.198882084468126e-06, "loss": 0.3876, "step": 15366 }, { "epoch": 1.4420983483483483, "grad_norm": 0.9704077108453061, "learning_rate": 6.198352038995121e-06, "loss": 0.4102, "step": 15367 }, { "epoch": 1.4421921921921923, "grad_norm": 2.5012555046366955, "learning_rate": 6.197821979234026e-06, "loss": 0.393, "step": 15368 }, { "epoch": 1.4422860360360361, "grad_norm": 0.9506643953987991, "learning_rate": 6.197291905191158e-06, "loss": 0.3651, "step": 15369 }, { "epoch": 1.44237987987988, "grad_norm": 1.1237848623383864, "learning_rate": 6.196761816872837e-06, "loss": 0.3822, "step": 15370 }, { "epoch": 1.4424737237237237, "grad_norm": 1.1283800184683879, "learning_rate": 6.196231714285385e-06, "loss": 0.4276, "step": 15371 }, { "epoch": 1.4425675675675675, "grad_norm": 0.8967286439608727, "learning_rate": 6.195701597435121e-06, "loss": 0.4034, "step": 15372 }, { "epoch": 1.4426614114114114, "grad_norm": 0.9322977574637834, "learning_rate": 6.195171466328364e-06, "loss": 0.4135, "step": 15373 }, { "epoch": 1.4427552552552552, "grad_norm": 0.9244358546825707, "learning_rate": 6.194641320971441e-06, "loss": 0.3871, "step": 15374 }, { "epoch": 1.442849099099099, "grad_norm": 0.8661810563252561, "learning_rate": 6.194111161370667e-06, "loss": 0.3384, "step": 15375 }, { "epoch": 1.442942942942943, "grad_norm": 0.9493409110094554, "learning_rate": 6.193580987532364e-06, "loss": 0.4105, "step": 15376 }, { "epoch": 1.4430367867867868, "grad_norm": 0.884321073522928, "learning_rate": 6.193050799462855e-06, "loss": 0.3627, "step": 15377 }, { "epoch": 1.4431306306306306, "grad_norm": 0.8849412564948038, "learning_rate": 6.192520597168462e-06, "loss": 0.3881, "step": 15378 }, { "epoch": 1.4432244744744744, "grad_norm": 0.9037941222523658, "learning_rate": 6.191990380655504e-06, "loss": 0.4029, "step": 15379 }, { "epoch": 1.4433183183183182, "grad_norm": 0.8464105599782737, "learning_rate": 6.1914601499303066e-06, "loss": 0.4128, "step": 15380 }, { "epoch": 1.4434121621621623, "grad_norm": 0.9045270749226847, "learning_rate": 6.1909299049991875e-06, "loss": 0.373, "step": 15381 }, { "epoch": 1.443506006006006, "grad_norm": 0.9983323491160507, "learning_rate": 6.1903996458684726e-06, "loss": 0.4164, "step": 15382 }, { "epoch": 1.44359984984985, "grad_norm": 2.119215957316546, "learning_rate": 6.1898693725444805e-06, "loss": 0.402, "step": 15383 }, { "epoch": 1.4436936936936937, "grad_norm": 0.9546057935548368, "learning_rate": 6.189339085033538e-06, "loss": 0.4373, "step": 15384 }, { "epoch": 1.4437875375375375, "grad_norm": 0.9427884003171143, "learning_rate": 6.188808783341964e-06, "loss": 0.3836, "step": 15385 }, { "epoch": 1.4438813813813813, "grad_norm": 1.1187535525722112, "learning_rate": 6.188278467476083e-06, "loss": 0.4223, "step": 15386 }, { "epoch": 1.4439752252252251, "grad_norm": 1.6315723870689791, "learning_rate": 6.18774813744222e-06, "loss": 0.4328, "step": 15387 }, { "epoch": 1.444069069069069, "grad_norm": 0.9617716106032775, "learning_rate": 6.1872177932466936e-06, "loss": 0.3812, "step": 15388 }, { "epoch": 1.444162912912913, "grad_norm": 1.1317813180793457, "learning_rate": 6.186687434895831e-06, "loss": 0.4428, "step": 15389 }, { "epoch": 1.4442567567567568, "grad_norm": 0.937185365397136, "learning_rate": 6.1861570623959545e-06, "loss": 0.4083, "step": 15390 }, { "epoch": 1.4443506006006006, "grad_norm": 0.7995244848623392, "learning_rate": 6.185626675753387e-06, "loss": 0.395, "step": 15391 }, { "epoch": 1.4444444444444444, "grad_norm": 0.9014807831374774, "learning_rate": 6.185096274974455e-06, "loss": 0.4001, "step": 15392 }, { "epoch": 1.4445382882882882, "grad_norm": 0.8782419544243606, "learning_rate": 6.184565860065479e-06, "loss": 0.3871, "step": 15393 }, { "epoch": 1.4446321321321323, "grad_norm": 0.9839058698341637, "learning_rate": 6.184035431032785e-06, "loss": 0.4107, "step": 15394 }, { "epoch": 1.444725975975976, "grad_norm": 0.8233719701727198, "learning_rate": 6.183504987882697e-06, "loss": 0.3823, "step": 15395 }, { "epoch": 1.4448198198198199, "grad_norm": 1.852542391002207, "learning_rate": 6.1829745306215406e-06, "loss": 0.3678, "step": 15396 }, { "epoch": 1.4449136636636637, "grad_norm": 1.205957285979585, "learning_rate": 6.182444059255639e-06, "loss": 0.4001, "step": 15397 }, { "epoch": 1.4450075075075075, "grad_norm": 0.8542662590890233, "learning_rate": 6.1819135737913186e-06, "loss": 0.3863, "step": 15398 }, { "epoch": 1.4451013513513513, "grad_norm": 0.8791493734200477, "learning_rate": 6.181383074234903e-06, "loss": 0.4056, "step": 15399 }, { "epoch": 1.4451951951951951, "grad_norm": 1.5428458719059062, "learning_rate": 6.1808525605927175e-06, "loss": 0.4208, "step": 15400 }, { "epoch": 1.445289039039039, "grad_norm": 0.931977899539317, "learning_rate": 6.180322032871086e-06, "loss": 0.3761, "step": 15401 }, { "epoch": 1.4453828828828827, "grad_norm": 1.0407805299670323, "learning_rate": 6.17979149107634e-06, "loss": 0.3832, "step": 15402 }, { "epoch": 1.4454767267267268, "grad_norm": 0.8414856816092096, "learning_rate": 6.179260935214799e-06, "loss": 0.3547, "step": 15403 }, { "epoch": 1.4455705705705706, "grad_norm": 1.176206741100014, "learning_rate": 6.178730365292792e-06, "loss": 0.4036, "step": 15404 }, { "epoch": 1.4456644144144144, "grad_norm": 0.8570559465912495, "learning_rate": 6.178199781316644e-06, "loss": 0.4109, "step": 15405 }, { "epoch": 1.4457582582582582, "grad_norm": 0.927475011442433, "learning_rate": 6.17766918329268e-06, "loss": 0.3866, "step": 15406 }, { "epoch": 1.445852102102102, "grad_norm": 1.6117279107779485, "learning_rate": 6.177138571227228e-06, "loss": 0.3683, "step": 15407 }, { "epoch": 1.445945945945946, "grad_norm": 0.9497062420891187, "learning_rate": 6.176607945126616e-06, "loss": 0.3856, "step": 15408 }, { "epoch": 1.4460397897897899, "grad_norm": 0.8731405620852758, "learning_rate": 6.176077304997167e-06, "loss": 0.3645, "step": 15409 }, { "epoch": 1.4461336336336337, "grad_norm": 0.8512118976954048, "learning_rate": 6.175546650845211e-06, "loss": 0.3288, "step": 15410 }, { "epoch": 1.4462274774774775, "grad_norm": 0.9970177246194013, "learning_rate": 6.175015982677075e-06, "loss": 0.391, "step": 15411 }, { "epoch": 1.4463213213213213, "grad_norm": 1.3603544253145896, "learning_rate": 6.174485300499083e-06, "loss": 0.4103, "step": 15412 }, { "epoch": 1.446415165165165, "grad_norm": 0.935323835314811, "learning_rate": 6.173954604317564e-06, "loss": 0.3599, "step": 15413 }, { "epoch": 1.446509009009009, "grad_norm": 1.8621805457156058, "learning_rate": 6.173423894138849e-06, "loss": 0.4394, "step": 15414 }, { "epoch": 1.4466028528528527, "grad_norm": 0.9669926161080901, "learning_rate": 6.1728931699692606e-06, "loss": 0.3886, "step": 15415 }, { "epoch": 1.4466966966966968, "grad_norm": 1.0930087637750519, "learning_rate": 6.172362431815129e-06, "loss": 0.395, "step": 15416 }, { "epoch": 1.4467905405405406, "grad_norm": 0.9016320077211675, "learning_rate": 6.171831679682784e-06, "loss": 0.3828, "step": 15417 }, { "epoch": 1.4468843843843844, "grad_norm": 0.978611377289483, "learning_rate": 6.1713009135785505e-06, "loss": 0.4275, "step": 15418 }, { "epoch": 1.4469782282282282, "grad_norm": 1.0640704772737481, "learning_rate": 6.170770133508759e-06, "loss": 0.3881, "step": 15419 }, { "epoch": 1.447072072072072, "grad_norm": 1.2457246363018533, "learning_rate": 6.170239339479737e-06, "loss": 0.3954, "step": 15420 }, { "epoch": 1.447165915915916, "grad_norm": 1.1620234825144427, "learning_rate": 6.169708531497813e-06, "loss": 0.447, "step": 15421 }, { "epoch": 1.4472597597597598, "grad_norm": 1.0081248917930024, "learning_rate": 6.169177709569316e-06, "loss": 0.4072, "step": 15422 }, { "epoch": 1.4473536036036037, "grad_norm": 0.861906758513076, "learning_rate": 6.168646873700578e-06, "loss": 0.351, "step": 15423 }, { "epoch": 1.4474474474474475, "grad_norm": 1.2729327381529099, "learning_rate": 6.168116023897924e-06, "loss": 0.3983, "step": 15424 }, { "epoch": 1.4475412912912913, "grad_norm": 0.9959557070916831, "learning_rate": 6.167585160167686e-06, "loss": 0.4306, "step": 15425 }, { "epoch": 1.447635135135135, "grad_norm": 1.1581744770161242, "learning_rate": 6.167054282516193e-06, "loss": 0.3966, "step": 15426 }, { "epoch": 1.447728978978979, "grad_norm": 1.0975002770548876, "learning_rate": 6.166523390949773e-06, "loss": 0.4081, "step": 15427 }, { "epoch": 1.4478228228228227, "grad_norm": 0.9054811937246343, "learning_rate": 6.165992485474757e-06, "loss": 0.3907, "step": 15428 }, { "epoch": 1.4479166666666667, "grad_norm": 0.8312136152602205, "learning_rate": 6.165461566097477e-06, "loss": 0.3953, "step": 15429 }, { "epoch": 1.4480105105105106, "grad_norm": 1.2256459351927482, "learning_rate": 6.164930632824262e-06, "loss": 0.3928, "step": 15430 }, { "epoch": 1.4481043543543544, "grad_norm": 0.9424114297334524, "learning_rate": 6.164399685661441e-06, "loss": 0.367, "step": 15431 }, { "epoch": 1.4481981981981982, "grad_norm": 0.9172545266567802, "learning_rate": 6.163868724615345e-06, "loss": 0.369, "step": 15432 }, { "epoch": 1.448292042042042, "grad_norm": 0.8978318511558061, "learning_rate": 6.163337749692307e-06, "loss": 0.4121, "step": 15433 }, { "epoch": 1.448385885885886, "grad_norm": 1.1070944971243026, "learning_rate": 6.162806760898653e-06, "loss": 0.4696, "step": 15434 }, { "epoch": 1.4484797297297298, "grad_norm": 0.881875321409477, "learning_rate": 6.16227575824072e-06, "loss": 0.3852, "step": 15435 }, { "epoch": 1.4485735735735736, "grad_norm": 0.8507208748796102, "learning_rate": 6.161744741724835e-06, "loss": 0.4173, "step": 15436 }, { "epoch": 1.4486674174174174, "grad_norm": 0.992068276902545, "learning_rate": 6.161213711357331e-06, "loss": 0.3976, "step": 15437 }, { "epoch": 1.4487612612612613, "grad_norm": 1.110129928940051, "learning_rate": 6.1606826671445395e-06, "loss": 0.3917, "step": 15438 }, { "epoch": 1.448855105105105, "grad_norm": 0.8924235786776301, "learning_rate": 6.160151609092792e-06, "loss": 0.4361, "step": 15439 }, { "epoch": 1.4489489489489489, "grad_norm": 0.9272072594645966, "learning_rate": 6.159620537208418e-06, "loss": 0.4501, "step": 15440 }, { "epoch": 1.4490427927927927, "grad_norm": 1.0948264907844343, "learning_rate": 6.159089451497755e-06, "loss": 0.4332, "step": 15441 }, { "epoch": 1.4491366366366365, "grad_norm": 0.9447170520310687, "learning_rate": 6.158558351967132e-06, "loss": 0.4003, "step": 15442 }, { "epoch": 1.4492304804804805, "grad_norm": 0.9523749289852476, "learning_rate": 6.158027238622879e-06, "loss": 0.3891, "step": 15443 }, { "epoch": 1.4493243243243243, "grad_norm": 0.9375192227019057, "learning_rate": 6.157496111471334e-06, "loss": 0.3952, "step": 15444 }, { "epoch": 1.4494181681681682, "grad_norm": 1.677770630494921, "learning_rate": 6.156964970518824e-06, "loss": 0.424, "step": 15445 }, { "epoch": 1.449512012012012, "grad_norm": 0.7948639564007569, "learning_rate": 6.1564338157716855e-06, "loss": 0.3759, "step": 15446 }, { "epoch": 1.4496058558558558, "grad_norm": 1.3931106001100015, "learning_rate": 6.155902647236251e-06, "loss": 0.3879, "step": 15447 }, { "epoch": 1.4496996996996998, "grad_norm": 1.4554188914705883, "learning_rate": 6.155371464918852e-06, "loss": 0.3893, "step": 15448 }, { "epoch": 1.4497935435435436, "grad_norm": 2.2066620052286368, "learning_rate": 6.154840268825823e-06, "loss": 0.3691, "step": 15449 }, { "epoch": 1.4498873873873874, "grad_norm": 0.9425723035324833, "learning_rate": 6.154309058963499e-06, "loss": 0.3894, "step": 15450 }, { "epoch": 1.4499812312312312, "grad_norm": 0.9683625200482207, "learning_rate": 6.153777835338211e-06, "loss": 0.4156, "step": 15451 }, { "epoch": 1.450075075075075, "grad_norm": 0.9965347554929194, "learning_rate": 6.153246597956294e-06, "loss": 0.4266, "step": 15452 }, { "epoch": 1.4501689189189189, "grad_norm": 0.9546963917137854, "learning_rate": 6.152715346824083e-06, "loss": 0.4182, "step": 15453 }, { "epoch": 1.4502627627627627, "grad_norm": 0.9711756426260625, "learning_rate": 6.1521840819479115e-06, "loss": 0.3936, "step": 15454 }, { "epoch": 1.4503566066066065, "grad_norm": 1.2778539720501803, "learning_rate": 6.151652803334113e-06, "loss": 0.3899, "step": 15455 }, { "epoch": 1.4504504504504505, "grad_norm": 0.9006650241463727, "learning_rate": 6.1511215109890225e-06, "loss": 0.3976, "step": 15456 }, { "epoch": 1.4505442942942943, "grad_norm": 0.9812616841871182, "learning_rate": 6.1505902049189745e-06, "loss": 0.3751, "step": 15457 }, { "epoch": 1.4506381381381381, "grad_norm": 0.8872514613609623, "learning_rate": 6.150058885130304e-06, "loss": 0.4192, "step": 15458 }, { "epoch": 1.450731981981982, "grad_norm": 0.8785855058795365, "learning_rate": 6.149527551629346e-06, "loss": 0.3975, "step": 15459 }, { "epoch": 1.4508258258258258, "grad_norm": 1.104469555146258, "learning_rate": 6.1489962044224375e-06, "loss": 0.4248, "step": 15460 }, { "epoch": 1.4509196696696698, "grad_norm": 0.8872790969678287, "learning_rate": 6.14846484351591e-06, "loss": 0.4144, "step": 15461 }, { "epoch": 1.4510135135135136, "grad_norm": 0.9468867967681783, "learning_rate": 6.147933468916102e-06, "loss": 0.39, "step": 15462 }, { "epoch": 1.4511073573573574, "grad_norm": 2.3801792332343386, "learning_rate": 6.147402080629348e-06, "loss": 0.4283, "step": 15463 }, { "epoch": 1.4512012012012012, "grad_norm": 0.8544042993984711, "learning_rate": 6.146870678661983e-06, "loss": 0.4143, "step": 15464 }, { "epoch": 1.451295045045045, "grad_norm": 0.8951207655284538, "learning_rate": 6.146339263020345e-06, "loss": 0.3758, "step": 15465 }, { "epoch": 1.4513888888888888, "grad_norm": 1.788397483030141, "learning_rate": 6.1458078337107695e-06, "loss": 0.4083, "step": 15466 }, { "epoch": 1.4514827327327327, "grad_norm": 4.264893970764977, "learning_rate": 6.145276390739591e-06, "loss": 0.4283, "step": 15467 }, { "epoch": 1.4515765765765765, "grad_norm": 1.1667622128036481, "learning_rate": 6.144744934113148e-06, "loss": 0.4249, "step": 15468 }, { "epoch": 1.4516704204204205, "grad_norm": 0.9520601319697448, "learning_rate": 6.144213463837776e-06, "loss": 0.4248, "step": 15469 }, { "epoch": 1.4517642642642643, "grad_norm": 0.942227895586337, "learning_rate": 6.143681979919812e-06, "loss": 0.3748, "step": 15470 }, { "epoch": 1.4518581081081081, "grad_norm": 1.150970973938702, "learning_rate": 6.143150482365594e-06, "loss": 0.3955, "step": 15471 }, { "epoch": 1.451951951951952, "grad_norm": 0.9798500046467147, "learning_rate": 6.142618971181459e-06, "loss": 0.3714, "step": 15472 }, { "epoch": 1.4520457957957957, "grad_norm": 0.9404885921122544, "learning_rate": 6.142087446373741e-06, "loss": 0.4384, "step": 15473 }, { "epoch": 1.4521396396396398, "grad_norm": 1.1069213740944497, "learning_rate": 6.141555907948782e-06, "loss": 0.4309, "step": 15474 }, { "epoch": 1.4522334834834836, "grad_norm": 1.1213796708584538, "learning_rate": 6.1410243559129165e-06, "loss": 0.4226, "step": 15475 }, { "epoch": 1.4523273273273274, "grad_norm": 0.9122396030423197, "learning_rate": 6.140492790272483e-06, "loss": 0.3662, "step": 15476 }, { "epoch": 1.4524211711711712, "grad_norm": 0.9334854661517834, "learning_rate": 6.139961211033822e-06, "loss": 0.4226, "step": 15477 }, { "epoch": 1.452515015015015, "grad_norm": 1.1599832281345481, "learning_rate": 6.139429618203266e-06, "loss": 0.3966, "step": 15478 }, { "epoch": 1.4526088588588588, "grad_norm": 0.8874188967173567, "learning_rate": 6.13889801178716e-06, "loss": 0.3992, "step": 15479 }, { "epoch": 1.4527027027027026, "grad_norm": 0.81873542463177, "learning_rate": 6.138366391791838e-06, "loss": 0.4341, "step": 15480 }, { "epoch": 1.4527965465465464, "grad_norm": 0.8397622643617527, "learning_rate": 6.137834758223638e-06, "loss": 0.3842, "step": 15481 }, { "epoch": 1.4528903903903903, "grad_norm": 1.260150839811176, "learning_rate": 6.1373031110889e-06, "loss": 0.4456, "step": 15482 }, { "epoch": 1.4529842342342343, "grad_norm": 0.9639732127119183, "learning_rate": 6.136771450393966e-06, "loss": 0.4287, "step": 15483 }, { "epoch": 1.453078078078078, "grad_norm": 1.0932197887590978, "learning_rate": 6.13623977614517e-06, "loss": 0.4142, "step": 15484 }, { "epoch": 1.453171921921922, "grad_norm": 1.0071607518931507, "learning_rate": 6.135708088348854e-06, "loss": 0.4153, "step": 15485 }, { "epoch": 1.4532657657657657, "grad_norm": 0.9881403663637277, "learning_rate": 6.135176387011357e-06, "loss": 0.4197, "step": 15486 }, { "epoch": 1.4533596096096097, "grad_norm": 0.8493601900325185, "learning_rate": 6.1346446721390175e-06, "loss": 0.3967, "step": 15487 }, { "epoch": 1.4534534534534536, "grad_norm": 0.971794921667306, "learning_rate": 6.134112943738176e-06, "loss": 0.4318, "step": 15488 }, { "epoch": 1.4535472972972974, "grad_norm": 1.0658118691816574, "learning_rate": 6.133581201815173e-06, "loss": 0.4294, "step": 15489 }, { "epoch": 1.4536411411411412, "grad_norm": 0.9324645872330789, "learning_rate": 6.133049446376348e-06, "loss": 0.3817, "step": 15490 }, { "epoch": 1.453734984984985, "grad_norm": 0.869982558328527, "learning_rate": 6.132517677428041e-06, "loss": 0.3989, "step": 15491 }, { "epoch": 1.4538288288288288, "grad_norm": 0.8448026135619944, "learning_rate": 6.131985894976593e-06, "loss": 0.4339, "step": 15492 }, { "epoch": 1.4539226726726726, "grad_norm": 0.7792318724190181, "learning_rate": 6.1314540990283435e-06, "loss": 0.3987, "step": 15493 }, { "epoch": 1.4540165165165164, "grad_norm": 0.9909548701986672, "learning_rate": 6.130922289589632e-06, "loss": 0.4025, "step": 15494 }, { "epoch": 1.4541103603603602, "grad_norm": 1.114474478809019, "learning_rate": 6.1303904666668035e-06, "loss": 0.3955, "step": 15495 }, { "epoch": 1.4542042042042043, "grad_norm": 0.9182147577282016, "learning_rate": 6.129858630266194e-06, "loss": 0.3734, "step": 15496 }, { "epoch": 1.454298048048048, "grad_norm": 0.9764141511379475, "learning_rate": 6.129326780394149e-06, "loss": 0.429, "step": 15497 }, { "epoch": 1.4543918918918919, "grad_norm": 0.8209085361552164, "learning_rate": 6.1287949170570085e-06, "loss": 0.4027, "step": 15498 }, { "epoch": 1.4544857357357357, "grad_norm": 1.113413395616332, "learning_rate": 6.128263040261112e-06, "loss": 0.4016, "step": 15499 }, { "epoch": 1.4545795795795795, "grad_norm": 0.94989072495438, "learning_rate": 6.1277311500128016e-06, "loss": 0.3917, "step": 15500 }, { "epoch": 1.4546734234234235, "grad_norm": 1.562570090888364, "learning_rate": 6.127199246318421e-06, "loss": 0.4198, "step": 15501 }, { "epoch": 1.4547672672672673, "grad_norm": 0.8927131797672929, "learning_rate": 6.1266673291843115e-06, "loss": 0.3829, "step": 15502 }, { "epoch": 1.4548611111111112, "grad_norm": 0.8871900669149786, "learning_rate": 6.126135398616815e-06, "loss": 0.4164, "step": 15503 }, { "epoch": 1.454954954954955, "grad_norm": 1.270824163872939, "learning_rate": 6.125603454622273e-06, "loss": 0.4232, "step": 15504 }, { "epoch": 1.4550487987987988, "grad_norm": 0.9883584640229821, "learning_rate": 6.125071497207028e-06, "loss": 0.4187, "step": 15505 }, { "epoch": 1.4551426426426426, "grad_norm": 1.457852702662003, "learning_rate": 6.1245395263774235e-06, "loss": 0.4089, "step": 15506 }, { "epoch": 1.4552364864864864, "grad_norm": 1.1083547141060837, "learning_rate": 6.1240075421398026e-06, "loss": 0.409, "step": 15507 }, { "epoch": 1.4553303303303302, "grad_norm": 1.0574354842920832, "learning_rate": 6.123475544500507e-06, "loss": 0.4737, "step": 15508 }, { "epoch": 1.4554241741741742, "grad_norm": 1.093845847193613, "learning_rate": 6.1229435334658805e-06, "loss": 0.3471, "step": 15509 }, { "epoch": 1.455518018018018, "grad_norm": 0.8960947470970395, "learning_rate": 6.122411509042266e-06, "loss": 0.3949, "step": 15510 }, { "epoch": 1.4556118618618619, "grad_norm": 1.4992315113072359, "learning_rate": 6.121879471236007e-06, "loss": 0.4335, "step": 15511 }, { "epoch": 1.4557057057057057, "grad_norm": 1.0046178201578586, "learning_rate": 6.121347420053446e-06, "loss": 0.4585, "step": 15512 }, { "epoch": 1.4557995495495495, "grad_norm": 0.9625820086112729, "learning_rate": 6.120815355500928e-06, "loss": 0.4094, "step": 15513 }, { "epoch": 1.4558933933933935, "grad_norm": 0.964618037892672, "learning_rate": 6.120283277584797e-06, "loss": 0.3739, "step": 15514 }, { "epoch": 1.4559872372372373, "grad_norm": 0.9846750826714368, "learning_rate": 6.119751186311398e-06, "loss": 0.4447, "step": 15515 }, { "epoch": 1.4560810810810811, "grad_norm": 1.6008654739857302, "learning_rate": 6.119219081687073e-06, "loss": 0.4612, "step": 15516 }, { "epoch": 1.456174924924925, "grad_norm": 1.1413077367403277, "learning_rate": 6.118686963718167e-06, "loss": 0.4051, "step": 15517 }, { "epoch": 1.4562687687687688, "grad_norm": 1.07613171581273, "learning_rate": 6.1181548324110225e-06, "loss": 0.4174, "step": 15518 }, { "epoch": 1.4563626126126126, "grad_norm": 1.3065506240017344, "learning_rate": 6.117622687771989e-06, "loss": 0.4163, "step": 15519 }, { "epoch": 1.4564564564564564, "grad_norm": 0.9727939331458082, "learning_rate": 6.117090529807408e-06, "loss": 0.4165, "step": 15520 }, { "epoch": 1.4565503003003002, "grad_norm": 1.0880955770935672, "learning_rate": 6.1165583585236255e-06, "loss": 0.4066, "step": 15521 }, { "epoch": 1.4566441441441442, "grad_norm": 0.8820628423972633, "learning_rate": 6.1160261739269864e-06, "loss": 0.3978, "step": 15522 }, { "epoch": 1.456737987987988, "grad_norm": 0.9907650771688086, "learning_rate": 6.115493976023835e-06, "loss": 0.4001, "step": 15523 }, { "epoch": 1.4568318318318318, "grad_norm": 1.1818576098678755, "learning_rate": 6.114961764820517e-06, "loss": 0.3942, "step": 15524 }, { "epoch": 1.4569256756756757, "grad_norm": 0.9269796975373645, "learning_rate": 6.114429540323381e-06, "loss": 0.4112, "step": 15525 }, { "epoch": 1.4570195195195195, "grad_norm": 0.8602062893368634, "learning_rate": 6.113897302538769e-06, "loss": 0.3925, "step": 15526 }, { "epoch": 1.4571133633633635, "grad_norm": 1.3166109826468506, "learning_rate": 6.113365051473028e-06, "loss": 0.4071, "step": 15527 }, { "epoch": 1.4572072072072073, "grad_norm": 0.9134354044353138, "learning_rate": 6.112832787132505e-06, "loss": 0.4401, "step": 15528 }, { "epoch": 1.4573010510510511, "grad_norm": 0.9106383063558388, "learning_rate": 6.112300509523545e-06, "loss": 0.4063, "step": 15529 }, { "epoch": 1.457394894894895, "grad_norm": 0.8420200500416293, "learning_rate": 6.111768218652496e-06, "loss": 0.3804, "step": 15530 }, { "epoch": 1.4574887387387387, "grad_norm": 0.944520071422861, "learning_rate": 6.111235914525704e-06, "loss": 0.3682, "step": 15531 }, { "epoch": 1.4575825825825826, "grad_norm": 2.0984208702602167, "learning_rate": 6.110703597149514e-06, "loss": 0.4134, "step": 15532 }, { "epoch": 1.4576764264264264, "grad_norm": 0.7993371299392155, "learning_rate": 6.110171266530276e-06, "loss": 0.3721, "step": 15533 }, { "epoch": 1.4577702702702702, "grad_norm": 0.9558651859915773, "learning_rate": 6.109638922674335e-06, "loss": 0.4069, "step": 15534 }, { "epoch": 1.457864114114114, "grad_norm": 0.8562418797695277, "learning_rate": 6.109106565588036e-06, "loss": 0.4255, "step": 15535 }, { "epoch": 1.457957957957958, "grad_norm": 0.9409636375827638, "learning_rate": 6.108574195277731e-06, "loss": 0.403, "step": 15536 }, { "epoch": 1.4580518018018018, "grad_norm": 0.9082760008711124, "learning_rate": 6.1080418117497656e-06, "loss": 0.411, "step": 15537 }, { "epoch": 1.4581456456456456, "grad_norm": 0.9577836707648908, "learning_rate": 6.107509415010487e-06, "loss": 0.4238, "step": 15538 }, { "epoch": 1.4582394894894894, "grad_norm": 1.0930330372961947, "learning_rate": 6.106977005066243e-06, "loss": 0.4415, "step": 15539 }, { "epoch": 1.4583333333333333, "grad_norm": 1.0097364824735047, "learning_rate": 6.106444581923383e-06, "loss": 0.4269, "step": 15540 }, { "epoch": 1.4584271771771773, "grad_norm": 0.9593233623607474, "learning_rate": 6.105912145588253e-06, "loss": 0.3966, "step": 15541 }, { "epoch": 1.458521021021021, "grad_norm": 0.9228901444899273, "learning_rate": 6.105379696067202e-06, "loss": 0.4324, "step": 15542 }, { "epoch": 1.458614864864865, "grad_norm": 1.1645619263684597, "learning_rate": 6.10484723336658e-06, "loss": 0.3918, "step": 15543 }, { "epoch": 1.4587087087087087, "grad_norm": 0.8346342486702282, "learning_rate": 6.104314757492732e-06, "loss": 0.3995, "step": 15544 }, { "epoch": 1.4588025525525525, "grad_norm": 1.0383244705498025, "learning_rate": 6.103782268452011e-06, "loss": 0.3781, "step": 15545 }, { "epoch": 1.4588963963963963, "grad_norm": 0.9405449801397524, "learning_rate": 6.103249766250764e-06, "loss": 0.3902, "step": 15546 }, { "epoch": 1.4589902402402402, "grad_norm": 0.935737237969625, "learning_rate": 6.102717250895341e-06, "loss": 0.3959, "step": 15547 }, { "epoch": 1.459084084084084, "grad_norm": 0.9973233746538782, "learning_rate": 6.102184722392088e-06, "loss": 0.3728, "step": 15548 }, { "epoch": 1.459177927927928, "grad_norm": 0.991276411859601, "learning_rate": 6.101652180747359e-06, "loss": 0.3781, "step": 15549 }, { "epoch": 1.4592717717717718, "grad_norm": 0.9188931639972876, "learning_rate": 6.101119625967501e-06, "loss": 0.395, "step": 15550 }, { "epoch": 1.4593656156156156, "grad_norm": 1.0360936208691964, "learning_rate": 6.100587058058864e-06, "loss": 0.4133, "step": 15551 }, { "epoch": 1.4594594594594594, "grad_norm": 1.1118046081648778, "learning_rate": 6.100054477027797e-06, "loss": 0.393, "step": 15552 }, { "epoch": 1.4595533033033032, "grad_norm": 0.9117333622557274, "learning_rate": 6.099521882880653e-06, "loss": 0.3739, "step": 15553 }, { "epoch": 1.4596471471471473, "grad_norm": 0.9547445306949573, "learning_rate": 6.098989275623778e-06, "loss": 0.395, "step": 15554 }, { "epoch": 1.459740990990991, "grad_norm": 0.8690680387256893, "learning_rate": 6.098456655263526e-06, "loss": 0.3767, "step": 15555 }, { "epoch": 1.459834834834835, "grad_norm": 2.274296671949389, "learning_rate": 6.097924021806245e-06, "loss": 0.4461, "step": 15556 }, { "epoch": 1.4599286786786787, "grad_norm": 1.0000955404461138, "learning_rate": 6.097391375258287e-06, "loss": 0.3537, "step": 15557 }, { "epoch": 1.4600225225225225, "grad_norm": 1.109016118217974, "learning_rate": 6.0968587156260025e-06, "loss": 0.4341, "step": 15558 }, { "epoch": 1.4601163663663663, "grad_norm": 0.9965639723471161, "learning_rate": 6.096326042915743e-06, "loss": 0.3946, "step": 15559 }, { "epoch": 1.4602102102102101, "grad_norm": 0.8743345399006077, "learning_rate": 6.095793357133859e-06, "loss": 0.3588, "step": 15560 }, { "epoch": 1.460304054054054, "grad_norm": 1.0298014412671113, "learning_rate": 6.095260658286702e-06, "loss": 0.4172, "step": 15561 }, { "epoch": 1.460397897897898, "grad_norm": 1.2850585673951986, "learning_rate": 6.094727946380623e-06, "loss": 0.4685, "step": 15562 }, { "epoch": 1.4604917417417418, "grad_norm": 1.1695902827211397, "learning_rate": 6.094195221421973e-06, "loss": 0.3923, "step": 15563 }, { "epoch": 1.4605855855855856, "grad_norm": 1.1385986392582141, "learning_rate": 6.0936624834171054e-06, "loss": 0.3904, "step": 15564 }, { "epoch": 1.4606794294294294, "grad_norm": 0.9517136620601145, "learning_rate": 6.093129732372372e-06, "loss": 0.3985, "step": 15565 }, { "epoch": 1.4607732732732732, "grad_norm": 1.2047455050088518, "learning_rate": 6.092596968294122e-06, "loss": 0.3839, "step": 15566 }, { "epoch": 1.4608671171171173, "grad_norm": 0.8545250552737468, "learning_rate": 6.092064191188712e-06, "loss": 0.3654, "step": 15567 }, { "epoch": 1.460960960960961, "grad_norm": 3.0047775053120365, "learning_rate": 6.091531401062491e-06, "loss": 0.4446, "step": 15568 }, { "epoch": 1.4610548048048049, "grad_norm": 1.683421452054809, "learning_rate": 6.090998597921812e-06, "loss": 0.3562, "step": 15569 }, { "epoch": 1.4611486486486487, "grad_norm": 0.9392030028233979, "learning_rate": 6.090465781773029e-06, "loss": 0.432, "step": 15570 }, { "epoch": 1.4612424924924925, "grad_norm": 0.8304706013024893, "learning_rate": 6.089932952622495e-06, "loss": 0.3761, "step": 15571 }, { "epoch": 1.4613363363363363, "grad_norm": 1.1439421106400438, "learning_rate": 6.089400110476561e-06, "loss": 0.4432, "step": 15572 }, { "epoch": 1.4614301801801801, "grad_norm": 1.0064735329124257, "learning_rate": 6.088867255341583e-06, "loss": 0.4286, "step": 15573 }, { "epoch": 1.461524024024024, "grad_norm": 0.8613250773640536, "learning_rate": 6.08833438722391e-06, "loss": 0.4081, "step": 15574 }, { "epoch": 1.4616178678678677, "grad_norm": 1.2217916178918926, "learning_rate": 6.087801506129899e-06, "loss": 0.4165, "step": 15575 }, { "epoch": 1.4617117117117118, "grad_norm": 0.9414488677465617, "learning_rate": 6.087268612065903e-06, "loss": 0.3971, "step": 15576 }, { "epoch": 1.4618055555555556, "grad_norm": 0.9187100640701132, "learning_rate": 6.086735705038276e-06, "loss": 0.3955, "step": 15577 }, { "epoch": 1.4618993993993994, "grad_norm": 1.1560243335732938, "learning_rate": 6.08620278505337e-06, "loss": 0.4018, "step": 15578 }, { "epoch": 1.4619932432432432, "grad_norm": 1.1852791388046942, "learning_rate": 6.085669852117541e-06, "loss": 0.4216, "step": 15579 }, { "epoch": 1.462087087087087, "grad_norm": 0.9760749280764404, "learning_rate": 6.085136906237142e-06, "loss": 0.3736, "step": 15580 }, { "epoch": 1.462180930930931, "grad_norm": 0.8620677902050237, "learning_rate": 6.0846039474185285e-06, "loss": 0.3419, "step": 15581 }, { "epoch": 1.4622747747747749, "grad_norm": 0.8840008127449622, "learning_rate": 6.084070975668055e-06, "loss": 0.371, "step": 15582 }, { "epoch": 1.4623686186186187, "grad_norm": 0.9248753030323995, "learning_rate": 6.083537990992076e-06, "loss": 0.373, "step": 15583 }, { "epoch": 1.4624624624624625, "grad_norm": 0.9696755185152133, "learning_rate": 6.083004993396944e-06, "loss": 0.4433, "step": 15584 }, { "epoch": 1.4625563063063063, "grad_norm": 0.7990094705082335, "learning_rate": 6.082471982889018e-06, "loss": 0.3947, "step": 15585 }, { "epoch": 1.46265015015015, "grad_norm": 0.9892663600944634, "learning_rate": 6.081938959474649e-06, "loss": 0.3925, "step": 15586 }, { "epoch": 1.462743993993994, "grad_norm": 6.047263013120371, "learning_rate": 6.0814059231601954e-06, "loss": 0.4137, "step": 15587 }, { "epoch": 1.4628378378378377, "grad_norm": 0.9674748721349045, "learning_rate": 6.0808728739520115e-06, "loss": 0.3906, "step": 15588 }, { "epoch": 1.4629316816816818, "grad_norm": 0.9537013500711647, "learning_rate": 6.080339811856455e-06, "loss": 0.4232, "step": 15589 }, { "epoch": 1.4630255255255256, "grad_norm": 1.0658956979551795, "learning_rate": 6.079806736879877e-06, "loss": 0.4016, "step": 15590 }, { "epoch": 1.4631193693693694, "grad_norm": 2.232855321106804, "learning_rate": 6.079273649028638e-06, "loss": 0.3584, "step": 15591 }, { "epoch": 1.4632132132132132, "grad_norm": 0.8882179919203945, "learning_rate": 6.0787405483090925e-06, "loss": 0.4064, "step": 15592 }, { "epoch": 1.463307057057057, "grad_norm": 0.8993873965205877, "learning_rate": 6.078207434727595e-06, "loss": 0.3869, "step": 15593 }, { "epoch": 1.463400900900901, "grad_norm": 1.168178274103902, "learning_rate": 6.077674308290504e-06, "loss": 0.4118, "step": 15594 }, { "epoch": 1.4634947447447448, "grad_norm": 0.871400678659146, "learning_rate": 6.077141169004176e-06, "loss": 0.3941, "step": 15595 }, { "epoch": 1.4635885885885886, "grad_norm": 0.8674528733150427, "learning_rate": 6.076608016874966e-06, "loss": 0.4123, "step": 15596 }, { "epoch": 1.4636824324324325, "grad_norm": 0.9018572404336294, "learning_rate": 6.076074851909234e-06, "loss": 0.4106, "step": 15597 }, { "epoch": 1.4637762762762763, "grad_norm": 0.9837895928147179, "learning_rate": 6.075541674113332e-06, "loss": 0.3854, "step": 15598 }, { "epoch": 1.46387012012012, "grad_norm": 1.0810485947318422, "learning_rate": 6.075008483493621e-06, "loss": 0.4238, "step": 15599 }, { "epoch": 1.4639639639639639, "grad_norm": 1.0820322729012424, "learning_rate": 6.074475280056459e-06, "loss": 0.432, "step": 15600 }, { "epoch": 1.4640578078078077, "grad_norm": 0.9542430006006034, "learning_rate": 6.073942063808199e-06, "loss": 0.4081, "step": 15601 }, { "epoch": 1.4641516516516517, "grad_norm": 1.1533515395270038, "learning_rate": 6.073408834755203e-06, "loss": 0.4129, "step": 15602 }, { "epoch": 1.4642454954954955, "grad_norm": 1.1731108064345537, "learning_rate": 6.0728755929038285e-06, "loss": 0.386, "step": 15603 }, { "epoch": 1.4643393393393394, "grad_norm": 0.9510894174274708, "learning_rate": 6.07234233826043e-06, "loss": 0.3737, "step": 15604 }, { "epoch": 1.4644331831831832, "grad_norm": 1.1481465372570654, "learning_rate": 6.071809070831367e-06, "loss": 0.4052, "step": 15605 }, { "epoch": 1.464527027027027, "grad_norm": 0.9568650593888547, "learning_rate": 6.071275790623001e-06, "loss": 0.384, "step": 15606 }, { "epoch": 1.464620870870871, "grad_norm": 1.089693657775922, "learning_rate": 6.070742497641685e-06, "loss": 0.3938, "step": 15607 }, { "epoch": 1.4647147147147148, "grad_norm": 0.9113337751289068, "learning_rate": 6.070209191893782e-06, "loss": 0.3943, "step": 15608 }, { "epoch": 1.4648085585585586, "grad_norm": 1.0688570311483736, "learning_rate": 6.069675873385649e-06, "loss": 0.3808, "step": 15609 }, { "epoch": 1.4649024024024024, "grad_norm": 0.8494894714696857, "learning_rate": 6.069142542123643e-06, "loss": 0.3977, "step": 15610 }, { "epoch": 1.4649962462462462, "grad_norm": 1.018735092335174, "learning_rate": 6.0686091981141245e-06, "loss": 0.4135, "step": 15611 }, { "epoch": 1.46509009009009, "grad_norm": 1.1407820340335804, "learning_rate": 6.0680758413634545e-06, "loss": 0.3532, "step": 15612 }, { "epoch": 1.4651839339339339, "grad_norm": 0.8866274416514178, "learning_rate": 6.06754247187799e-06, "loss": 0.4104, "step": 15613 }, { "epoch": 1.4652777777777777, "grad_norm": 0.9746582465081114, "learning_rate": 6.067009089664091e-06, "loss": 0.4244, "step": 15614 }, { "epoch": 1.4653716216216215, "grad_norm": 0.9194043109546455, "learning_rate": 6.066475694728117e-06, "loss": 0.4085, "step": 15615 }, { "epoch": 1.4654654654654655, "grad_norm": 0.9858509556047507, "learning_rate": 6.065942287076427e-06, "loss": 0.3999, "step": 15616 }, { "epoch": 1.4655593093093093, "grad_norm": 0.9734337251314428, "learning_rate": 6.065408866715382e-06, "loss": 0.3893, "step": 15617 }, { "epoch": 1.4656531531531531, "grad_norm": 0.9534039555198278, "learning_rate": 6.064875433651342e-06, "loss": 0.4132, "step": 15618 }, { "epoch": 1.465746996996997, "grad_norm": 0.8250706351521965, "learning_rate": 6.064341987890666e-06, "loss": 0.3953, "step": 15619 }, { "epoch": 1.4658408408408408, "grad_norm": 0.9311529190128421, "learning_rate": 6.063808529439716e-06, "loss": 0.4116, "step": 15620 }, { "epoch": 1.4659346846846848, "grad_norm": 1.1615378459854604, "learning_rate": 6.063275058304853e-06, "loss": 0.4096, "step": 15621 }, { "epoch": 1.4660285285285286, "grad_norm": 1.0134205581850328, "learning_rate": 6.062741574492434e-06, "loss": 0.4243, "step": 15622 }, { "epoch": 1.4661223723723724, "grad_norm": 0.9992291848877658, "learning_rate": 6.062208078008824e-06, "loss": 0.2998, "step": 15623 }, { "epoch": 1.4662162162162162, "grad_norm": 1.0448007462771087, "learning_rate": 6.0616745688603805e-06, "loss": 0.3802, "step": 15624 }, { "epoch": 1.46631006006006, "grad_norm": 0.8538372410580297, "learning_rate": 6.061141047053467e-06, "loss": 0.4045, "step": 15625 }, { "epoch": 1.4664039039039038, "grad_norm": 4.152856616868637, "learning_rate": 6.0606075125944445e-06, "loss": 0.3988, "step": 15626 }, { "epoch": 1.4664977477477477, "grad_norm": 0.9045685531945187, "learning_rate": 6.060073965489673e-06, "loss": 0.378, "step": 15627 }, { "epoch": 1.4665915915915915, "grad_norm": 1.049336589531636, "learning_rate": 6.0595404057455145e-06, "loss": 0.3731, "step": 15628 }, { "epoch": 1.4666854354354355, "grad_norm": 0.9296400543517942, "learning_rate": 6.059006833368332e-06, "loss": 0.3907, "step": 15629 }, { "epoch": 1.4667792792792793, "grad_norm": 1.1270822741507476, "learning_rate": 6.058473248364486e-06, "loss": 0.4207, "step": 15630 }, { "epoch": 1.4668731231231231, "grad_norm": 0.9229669783938708, "learning_rate": 6.057939650740338e-06, "loss": 0.4173, "step": 15631 }, { "epoch": 1.466966966966967, "grad_norm": 1.2719342157198144, "learning_rate": 6.057406040502251e-06, "loss": 0.4028, "step": 15632 }, { "epoch": 1.4670608108108107, "grad_norm": 1.0530298378125773, "learning_rate": 6.056872417656589e-06, "loss": 0.4261, "step": 15633 }, { "epoch": 1.4671546546546548, "grad_norm": 0.9289489214772375, "learning_rate": 6.056338782209712e-06, "loss": 0.4396, "step": 15634 }, { "epoch": 1.4672484984984986, "grad_norm": 0.9532530888106882, "learning_rate": 6.055805134167983e-06, "loss": 0.3797, "step": 15635 }, { "epoch": 1.4673423423423424, "grad_norm": 1.0481142819130018, "learning_rate": 6.0552714735377664e-06, "loss": 0.4432, "step": 15636 }, { "epoch": 1.4674361861861862, "grad_norm": 1.0593034249445745, "learning_rate": 6.054737800325422e-06, "loss": 0.4086, "step": 15637 }, { "epoch": 1.46753003003003, "grad_norm": 0.9388860196217913, "learning_rate": 6.054204114537315e-06, "loss": 0.3541, "step": 15638 }, { "epoch": 1.4676238738738738, "grad_norm": 1.0033177263601054, "learning_rate": 6.05367041617981e-06, "loss": 0.3968, "step": 15639 }, { "epoch": 1.4677177177177176, "grad_norm": 1.3127241850962144, "learning_rate": 6.053136705259267e-06, "loss": 0.4122, "step": 15640 }, { "epoch": 1.4678115615615615, "grad_norm": 1.067702138428609, "learning_rate": 6.052602981782051e-06, "loss": 0.4151, "step": 15641 }, { "epoch": 1.4679054054054055, "grad_norm": 0.9846768821246076, "learning_rate": 6.052069245754527e-06, "loss": 0.4227, "step": 15642 }, { "epoch": 1.4679992492492493, "grad_norm": 0.9786310846721389, "learning_rate": 6.0515354971830564e-06, "loss": 0.3575, "step": 15643 }, { "epoch": 1.468093093093093, "grad_norm": 1.1699662761789575, "learning_rate": 6.051001736074005e-06, "loss": 0.4206, "step": 15644 }, { "epoch": 1.468186936936937, "grad_norm": 0.9772824401575911, "learning_rate": 6.050467962433736e-06, "loss": 0.3953, "step": 15645 }, { "epoch": 1.4682807807807807, "grad_norm": 0.9304906990014631, "learning_rate": 6.049934176268613e-06, "loss": 0.4258, "step": 15646 }, { "epoch": 1.4683746246246248, "grad_norm": 1.0634638200477189, "learning_rate": 6.049400377585002e-06, "loss": 0.4263, "step": 15647 }, { "epoch": 1.4684684684684686, "grad_norm": 1.044160189749038, "learning_rate": 6.048866566389267e-06, "loss": 0.4445, "step": 15648 }, { "epoch": 1.4685623123123124, "grad_norm": 1.0216257865043281, "learning_rate": 6.048332742687773e-06, "loss": 0.4471, "step": 15649 }, { "epoch": 1.4686561561561562, "grad_norm": 0.8580296929126154, "learning_rate": 6.047798906486884e-06, "loss": 0.3775, "step": 15650 }, { "epoch": 1.46875, "grad_norm": 1.0430871286902144, "learning_rate": 6.047265057792965e-06, "loss": 0.4234, "step": 15651 }, { "epoch": 1.4688438438438438, "grad_norm": 0.9942131045563105, "learning_rate": 6.046731196612381e-06, "loss": 0.4217, "step": 15652 }, { "epoch": 1.4689376876876876, "grad_norm": 0.9102585917314848, "learning_rate": 6.046197322951497e-06, "loss": 0.3796, "step": 15653 }, { "epoch": 1.4690315315315314, "grad_norm": 1.8558883746980255, "learning_rate": 6.045663436816681e-06, "loss": 0.4022, "step": 15654 }, { "epoch": 1.4691253753753752, "grad_norm": 0.7414490636626964, "learning_rate": 6.045129538214296e-06, "loss": 0.3825, "step": 15655 }, { "epoch": 1.4692192192192193, "grad_norm": 1.0189502271689013, "learning_rate": 6.044595627150709e-06, "loss": 0.4234, "step": 15656 }, { "epoch": 1.469313063063063, "grad_norm": 0.8466490255721799, "learning_rate": 6.0440617036322855e-06, "loss": 0.4036, "step": 15657 }, { "epoch": 1.469406906906907, "grad_norm": 4.008185530863331, "learning_rate": 6.04352776766539e-06, "loss": 0.3661, "step": 15658 }, { "epoch": 1.4695007507507507, "grad_norm": 1.0028683438840944, "learning_rate": 6.042993819256391e-06, "loss": 0.4157, "step": 15659 }, { "epoch": 1.4695945945945945, "grad_norm": 0.9952485800121195, "learning_rate": 6.042459858411655e-06, "loss": 0.3717, "step": 15660 }, { "epoch": 1.4696884384384385, "grad_norm": 0.9125647665632007, "learning_rate": 6.041925885137545e-06, "loss": 0.3991, "step": 15661 }, { "epoch": 1.4697822822822824, "grad_norm": 0.8847451212276056, "learning_rate": 6.041391899440431e-06, "loss": 0.4323, "step": 15662 }, { "epoch": 1.4698761261261262, "grad_norm": 0.9382178048597929, "learning_rate": 6.040857901326681e-06, "loss": 0.3878, "step": 15663 }, { "epoch": 1.46996996996997, "grad_norm": 3.1155414965103696, "learning_rate": 6.040323890802657e-06, "loss": 0.4424, "step": 15664 }, { "epoch": 1.4700638138138138, "grad_norm": 1.5597909884022112, "learning_rate": 6.03978986787473e-06, "loss": 0.4161, "step": 15665 }, { "epoch": 1.4701576576576576, "grad_norm": 0.895726005986685, "learning_rate": 6.039255832549267e-06, "loss": 0.4217, "step": 15666 }, { "epoch": 1.4702515015015014, "grad_norm": 0.855446890603953, "learning_rate": 6.038721784832632e-06, "loss": 0.3936, "step": 15667 }, { "epoch": 1.4703453453453452, "grad_norm": 1.0251859252410982, "learning_rate": 6.038187724731196e-06, "loss": 0.4005, "step": 15668 }, { "epoch": 1.4704391891891893, "grad_norm": 1.0137931738696115, "learning_rate": 6.037653652251326e-06, "loss": 0.3917, "step": 15669 }, { "epoch": 1.470533033033033, "grad_norm": 1.1138195999821867, "learning_rate": 6.03711956739939e-06, "loss": 0.4451, "step": 15670 }, { "epoch": 1.4706268768768769, "grad_norm": 0.897735595169936, "learning_rate": 6.036585470181754e-06, "loss": 0.4397, "step": 15671 }, { "epoch": 1.4707207207207207, "grad_norm": 1.0038737886164981, "learning_rate": 6.036051360604788e-06, "loss": 0.4091, "step": 15672 }, { "epoch": 1.4708145645645645, "grad_norm": 1.2285215482144602, "learning_rate": 6.035517238674859e-06, "loss": 0.4442, "step": 15673 }, { "epoch": 1.4709084084084085, "grad_norm": 1.003166351134493, "learning_rate": 6.034983104398336e-06, "loss": 0.3695, "step": 15674 }, { "epoch": 1.4710022522522523, "grad_norm": 0.9845340999258861, "learning_rate": 6.034448957781589e-06, "loss": 0.3789, "step": 15675 }, { "epoch": 1.4710960960960962, "grad_norm": 0.932663103423752, "learning_rate": 6.0339147988309855e-06, "loss": 0.437, "step": 15676 }, { "epoch": 1.47118993993994, "grad_norm": 0.9062188730300014, "learning_rate": 6.033380627552893e-06, "loss": 0.4091, "step": 15677 }, { "epoch": 1.4712837837837838, "grad_norm": 1.032696538902803, "learning_rate": 6.032846443953681e-06, "loss": 0.4192, "step": 15678 }, { "epoch": 1.4713776276276276, "grad_norm": 1.0469650444896688, "learning_rate": 6.03231224803972e-06, "loss": 0.435, "step": 15679 }, { "epoch": 1.4714714714714714, "grad_norm": 1.068865003537982, "learning_rate": 6.031778039817378e-06, "loss": 0.3614, "step": 15680 }, { "epoch": 1.4715653153153152, "grad_norm": 0.9092364955825357, "learning_rate": 6.031243819293027e-06, "loss": 0.4043, "step": 15681 }, { "epoch": 1.4716591591591592, "grad_norm": 0.8710032329858268, "learning_rate": 6.0307095864730336e-06, "loss": 0.3797, "step": 15682 }, { "epoch": 1.471753003003003, "grad_norm": 1.1986313207523869, "learning_rate": 6.030175341363769e-06, "loss": 0.4338, "step": 15683 }, { "epoch": 1.4718468468468469, "grad_norm": 0.9079257676674782, "learning_rate": 6.029641083971601e-06, "loss": 0.3934, "step": 15684 }, { "epoch": 1.4719406906906907, "grad_norm": 0.957850765663015, "learning_rate": 6.029106814302902e-06, "loss": 0.4364, "step": 15685 }, { "epoch": 1.4720345345345345, "grad_norm": 0.9236155608448704, "learning_rate": 6.028572532364041e-06, "loss": 0.3987, "step": 15686 }, { "epoch": 1.4721283783783785, "grad_norm": 0.9120197598071184, "learning_rate": 6.028038238161389e-06, "loss": 0.3569, "step": 15687 }, { "epoch": 1.4722222222222223, "grad_norm": 1.0215350257671967, "learning_rate": 6.027503931701317e-06, "loss": 0.3801, "step": 15688 }, { "epoch": 1.4723160660660661, "grad_norm": 0.8729303406924851, "learning_rate": 6.026969612990194e-06, "loss": 0.4092, "step": 15689 }, { "epoch": 1.47240990990991, "grad_norm": 3.4340369581380275, "learning_rate": 6.026435282034391e-06, "loss": 0.3718, "step": 15690 }, { "epoch": 1.4725037537537538, "grad_norm": 0.9775921387217369, "learning_rate": 6.025900938840279e-06, "loss": 0.427, "step": 15691 }, { "epoch": 1.4725975975975976, "grad_norm": 1.1789724356643325, "learning_rate": 6.025366583414229e-06, "loss": 0.4206, "step": 15692 }, { "epoch": 1.4726914414414414, "grad_norm": 1.0897846276024834, "learning_rate": 6.024832215762615e-06, "loss": 0.3707, "step": 15693 }, { "epoch": 1.4727852852852852, "grad_norm": 0.8984601590344428, "learning_rate": 6.024297835891804e-06, "loss": 0.3493, "step": 15694 }, { "epoch": 1.472879129129129, "grad_norm": 0.9706767184622802, "learning_rate": 6.02376344380817e-06, "loss": 0.4451, "step": 15695 }, { "epoch": 1.472972972972973, "grad_norm": 0.8370721703086562, "learning_rate": 6.023229039518084e-06, "loss": 0.3884, "step": 15696 }, { "epoch": 1.4730668168168168, "grad_norm": 1.039449477769047, "learning_rate": 6.022694623027916e-06, "loss": 0.4241, "step": 15697 }, { "epoch": 1.4731606606606606, "grad_norm": 1.0618279496874106, "learning_rate": 6.022160194344041e-06, "loss": 0.4128, "step": 15698 }, { "epoch": 1.4732545045045045, "grad_norm": 0.9296992879011994, "learning_rate": 6.0216257534728305e-06, "loss": 0.4155, "step": 15699 }, { "epoch": 1.4733483483483483, "grad_norm": 0.9665901829556987, "learning_rate": 6.021091300420655e-06, "loss": 0.3855, "step": 15700 }, { "epoch": 1.4734421921921923, "grad_norm": 0.9284983924226514, "learning_rate": 6.020556835193887e-06, "loss": 0.3705, "step": 15701 }, { "epoch": 1.4735360360360361, "grad_norm": 1.1889385131130694, "learning_rate": 6.020022357798902e-06, "loss": 0.4269, "step": 15702 }, { "epoch": 1.47362987987988, "grad_norm": 0.8842545572500558, "learning_rate": 6.019487868242068e-06, "loss": 0.4162, "step": 15703 }, { "epoch": 1.4737237237237237, "grad_norm": 0.9699554165489944, "learning_rate": 6.018953366529761e-06, "loss": 0.3826, "step": 15704 }, { "epoch": 1.4738175675675675, "grad_norm": 2.1182300882990694, "learning_rate": 6.018418852668353e-06, "loss": 0.4403, "step": 15705 }, { "epoch": 1.4739114114114114, "grad_norm": 0.9166227685616468, "learning_rate": 6.017884326664218e-06, "loss": 0.3958, "step": 15706 }, { "epoch": 1.4740052552552552, "grad_norm": 0.914935810148512, "learning_rate": 6.017349788523728e-06, "loss": 0.394, "step": 15707 }, { "epoch": 1.474099099099099, "grad_norm": 0.9704653477079348, "learning_rate": 6.016815238253256e-06, "loss": 0.4026, "step": 15708 }, { "epoch": 1.474192942942943, "grad_norm": 1.1376607896868502, "learning_rate": 6.016280675859177e-06, "loss": 0.3823, "step": 15709 }, { "epoch": 1.4742867867867868, "grad_norm": 4.93861958885246, "learning_rate": 6.015746101347864e-06, "loss": 0.4047, "step": 15710 }, { "epoch": 1.4743806306306306, "grad_norm": 0.9306672712802959, "learning_rate": 6.015211514725689e-06, "loss": 0.4199, "step": 15711 }, { "epoch": 1.4744744744744744, "grad_norm": 1.0057317595312767, "learning_rate": 6.01467691599903e-06, "loss": 0.426, "step": 15712 }, { "epoch": 1.4745683183183182, "grad_norm": 1.047207423204269, "learning_rate": 6.014142305174258e-06, "loss": 0.3646, "step": 15713 }, { "epoch": 1.4746621621621623, "grad_norm": 1.1133500622239438, "learning_rate": 6.013607682257747e-06, "loss": 0.4064, "step": 15714 }, { "epoch": 1.474756006006006, "grad_norm": 0.9142081117582144, "learning_rate": 6.013073047255873e-06, "loss": 0.4018, "step": 15715 }, { "epoch": 1.47484984984985, "grad_norm": 0.9753618996506533, "learning_rate": 6.01253840017501e-06, "loss": 0.4363, "step": 15716 }, { "epoch": 1.4749436936936937, "grad_norm": 1.0081037689358625, "learning_rate": 6.012003741021532e-06, "loss": 0.3669, "step": 15717 }, { "epoch": 1.4750375375375375, "grad_norm": 0.9182593575027086, "learning_rate": 6.011469069801815e-06, "loss": 0.3988, "step": 15718 }, { "epoch": 1.4751313813813813, "grad_norm": 0.9502058489251973, "learning_rate": 6.010934386522233e-06, "loss": 0.412, "step": 15719 }, { "epoch": 1.4752252252252251, "grad_norm": 0.9027728531933492, "learning_rate": 6.01039969118916e-06, "loss": 0.4243, "step": 15720 }, { "epoch": 1.475319069069069, "grad_norm": 1.0047377920029117, "learning_rate": 6.009864983808974e-06, "loss": 0.419, "step": 15721 }, { "epoch": 1.475412912912913, "grad_norm": 0.9125508670944408, "learning_rate": 6.009330264388048e-06, "loss": 0.3763, "step": 15722 }, { "epoch": 1.4755067567567568, "grad_norm": 0.8952339922187817, "learning_rate": 6.00879553293276e-06, "loss": 0.3937, "step": 15723 }, { "epoch": 1.4756006006006006, "grad_norm": 1.2141177468692155, "learning_rate": 6.008260789449483e-06, "loss": 0.3505, "step": 15724 }, { "epoch": 1.4756944444444444, "grad_norm": 1.203395702142167, "learning_rate": 6.007726033944594e-06, "loss": 0.3892, "step": 15725 }, { "epoch": 1.4757882882882882, "grad_norm": 0.9209651835252869, "learning_rate": 6.0071912664244695e-06, "loss": 0.4199, "step": 15726 }, { "epoch": 1.4758821321321323, "grad_norm": 0.8806068106815953, "learning_rate": 6.006656486895484e-06, "loss": 0.3972, "step": 15727 }, { "epoch": 1.475975975975976, "grad_norm": 1.5300697961402556, "learning_rate": 6.0061216953640145e-06, "loss": 0.4, "step": 15728 }, { "epoch": 1.4760698198198199, "grad_norm": 0.9892785157646742, "learning_rate": 6.005586891836439e-06, "loss": 0.4107, "step": 15729 }, { "epoch": 1.4761636636636637, "grad_norm": 0.9652288955271245, "learning_rate": 6.005052076319131e-06, "loss": 0.3902, "step": 15730 }, { "epoch": 1.4762575075075075, "grad_norm": 0.8949409502850627, "learning_rate": 6.004517248818471e-06, "loss": 0.4035, "step": 15731 }, { "epoch": 1.4763513513513513, "grad_norm": 0.904408624602607, "learning_rate": 6.003982409340833e-06, "loss": 0.4019, "step": 15732 }, { "epoch": 1.4764451951951951, "grad_norm": 0.9326008404195839, "learning_rate": 6.003447557892594e-06, "loss": 0.4455, "step": 15733 }, { "epoch": 1.476539039039039, "grad_norm": 1.0872116410674517, "learning_rate": 6.00291269448013e-06, "loss": 0.3832, "step": 15734 }, { "epoch": 1.4766328828828827, "grad_norm": 1.0360002362287912, "learning_rate": 6.0023778191098225e-06, "loss": 0.437, "step": 15735 }, { "epoch": 1.4767267267267268, "grad_norm": 0.887858228946701, "learning_rate": 6.0018429317880446e-06, "loss": 0.3595, "step": 15736 }, { "epoch": 1.4768205705705706, "grad_norm": 1.0915381564617952, "learning_rate": 6.001308032521177e-06, "loss": 0.4166, "step": 15737 }, { "epoch": 1.4769144144144144, "grad_norm": 1.2267179046122698, "learning_rate": 6.000773121315595e-06, "loss": 0.3692, "step": 15738 }, { "epoch": 1.4770082582582582, "grad_norm": 2.1319236448932473, "learning_rate": 6.000238198177677e-06, "loss": 0.383, "step": 15739 }, { "epoch": 1.477102102102102, "grad_norm": 0.9124361537887216, "learning_rate": 5.999703263113802e-06, "loss": 0.4118, "step": 15740 }, { "epoch": 1.477195945945946, "grad_norm": 1.5100420041446239, "learning_rate": 5.999168316130347e-06, "loss": 0.3684, "step": 15741 }, { "epoch": 1.4772897897897899, "grad_norm": 1.210758853351212, "learning_rate": 5.99863335723369e-06, "loss": 0.4371, "step": 15742 }, { "epoch": 1.4773836336336337, "grad_norm": 1.1297529839991773, "learning_rate": 5.9980983864302115e-06, "loss": 0.46, "step": 15743 }, { "epoch": 1.4774774774774775, "grad_norm": 0.8133486659479133, "learning_rate": 5.997563403726288e-06, "loss": 0.35, "step": 15744 }, { "epoch": 1.4775713213213213, "grad_norm": 1.6194190848687888, "learning_rate": 5.997028409128297e-06, "loss": 0.3953, "step": 15745 }, { "epoch": 1.477665165165165, "grad_norm": 0.9198394348103553, "learning_rate": 5.99649340264262e-06, "loss": 0.4305, "step": 15746 }, { "epoch": 1.477759009009009, "grad_norm": 1.0700589628870778, "learning_rate": 5.995958384275635e-06, "loss": 0.3796, "step": 15747 }, { "epoch": 1.4778528528528527, "grad_norm": 0.86063449105427, "learning_rate": 5.99542335403372e-06, "loss": 0.388, "step": 15748 }, { "epoch": 1.4779466966966968, "grad_norm": 1.4222054807316598, "learning_rate": 5.994888311923257e-06, "loss": 0.3996, "step": 15749 }, { "epoch": 1.4780405405405406, "grad_norm": 0.9219560660020099, "learning_rate": 5.994353257950623e-06, "loss": 0.415, "step": 15750 }, { "epoch": 1.4781343843843844, "grad_norm": 0.8243945072820715, "learning_rate": 5.993818192122198e-06, "loss": 0.4031, "step": 15751 }, { "epoch": 1.4782282282282282, "grad_norm": 2.5290759443417796, "learning_rate": 5.9932831144443605e-06, "loss": 0.4394, "step": 15752 }, { "epoch": 1.478322072072072, "grad_norm": 0.9263942217052967, "learning_rate": 5.992748024923494e-06, "loss": 0.3644, "step": 15753 }, { "epoch": 1.478415915915916, "grad_norm": 0.8857490384788685, "learning_rate": 5.992212923565973e-06, "loss": 0.4239, "step": 15754 }, { "epoch": 1.4785097597597598, "grad_norm": 0.954635634100996, "learning_rate": 5.991677810378183e-06, "loss": 0.4085, "step": 15755 }, { "epoch": 1.4786036036036037, "grad_norm": 0.9255270886308619, "learning_rate": 5.991142685366501e-06, "loss": 0.429, "step": 15756 }, { "epoch": 1.4786974474474475, "grad_norm": 0.943742394831324, "learning_rate": 5.9906075485373074e-06, "loss": 0.3498, "step": 15757 }, { "epoch": 1.4787912912912913, "grad_norm": 0.9671214139799728, "learning_rate": 5.990072399896983e-06, "loss": 0.4, "step": 15758 }, { "epoch": 1.478885135135135, "grad_norm": 0.9761348038917315, "learning_rate": 5.98953723945191e-06, "loss": 0.4046, "step": 15759 }, { "epoch": 1.478978978978979, "grad_norm": 0.8187948555033825, "learning_rate": 5.989002067208467e-06, "loss": 0.3802, "step": 15760 }, { "epoch": 1.4790728228228227, "grad_norm": 0.9278646205929953, "learning_rate": 5.9884668831730376e-06, "loss": 0.4208, "step": 15761 }, { "epoch": 1.4791666666666667, "grad_norm": 0.9695447217088434, "learning_rate": 5.987931687352001e-06, "loss": 0.3818, "step": 15762 }, { "epoch": 1.4792605105105106, "grad_norm": 0.9102763986674078, "learning_rate": 5.987396479751737e-06, "loss": 0.3812, "step": 15763 }, { "epoch": 1.4793543543543544, "grad_norm": 0.9389689482296365, "learning_rate": 5.9868612603786295e-06, "loss": 0.374, "step": 15764 }, { "epoch": 1.4794481981981982, "grad_norm": 0.9169904718235126, "learning_rate": 5.98632602923906e-06, "loss": 0.3333, "step": 15765 }, { "epoch": 1.479542042042042, "grad_norm": 0.9108815072885051, "learning_rate": 5.9857907863394076e-06, "loss": 0.4387, "step": 15766 }, { "epoch": 1.479635885885886, "grad_norm": 0.8383048875578646, "learning_rate": 5.985255531686056e-06, "loss": 0.3943, "step": 15767 }, { "epoch": 1.4797297297297298, "grad_norm": 0.8975244491703872, "learning_rate": 5.984720265285387e-06, "loss": 0.3469, "step": 15768 }, { "epoch": 1.4798235735735736, "grad_norm": 0.9749495813996365, "learning_rate": 5.984184987143783e-06, "loss": 0.4106, "step": 15769 }, { "epoch": 1.4799174174174174, "grad_norm": 1.0496640055298891, "learning_rate": 5.983649697267624e-06, "loss": 0.4101, "step": 15770 }, { "epoch": 1.4800112612612613, "grad_norm": 0.9655919725897328, "learning_rate": 5.983114395663296e-06, "loss": 0.4081, "step": 15771 }, { "epoch": 1.480105105105105, "grad_norm": 0.8866085387191752, "learning_rate": 5.982579082337177e-06, "loss": 0.4153, "step": 15772 }, { "epoch": 1.4801989489489489, "grad_norm": 1.100258917389475, "learning_rate": 5.982043757295653e-06, "loss": 0.4326, "step": 15773 }, { "epoch": 1.4802927927927927, "grad_norm": 2.0649373088355203, "learning_rate": 5.981508420545107e-06, "loss": 0.3826, "step": 15774 }, { "epoch": 1.4803866366366365, "grad_norm": 0.9870493990783655, "learning_rate": 5.980973072091919e-06, "loss": 0.3756, "step": 15775 }, { "epoch": 1.4804804804804805, "grad_norm": 1.0852714939997243, "learning_rate": 5.980437711942474e-06, "loss": 0.4143, "step": 15776 }, { "epoch": 1.4805743243243243, "grad_norm": 0.9607035316218082, "learning_rate": 5.979902340103156e-06, "loss": 0.3946, "step": 15777 }, { "epoch": 1.4806681681681682, "grad_norm": 0.8205345163034364, "learning_rate": 5.979366956580344e-06, "loss": 0.3759, "step": 15778 }, { "epoch": 1.480762012012012, "grad_norm": 0.9557544100548165, "learning_rate": 5.978831561380427e-06, "loss": 0.4203, "step": 15779 }, { "epoch": 1.4808558558558558, "grad_norm": 0.9658547447817082, "learning_rate": 5.978296154509786e-06, "loss": 0.3486, "step": 15780 }, { "epoch": 1.4809496996996998, "grad_norm": 1.0684925814375326, "learning_rate": 5.977760735974804e-06, "loss": 0.4169, "step": 15781 }, { "epoch": 1.4810435435435436, "grad_norm": 1.0489416720905416, "learning_rate": 5.9772253057818655e-06, "loss": 0.4245, "step": 15782 }, { "epoch": 1.4811373873873874, "grad_norm": 0.9741419862396923, "learning_rate": 5.976689863937355e-06, "loss": 0.4228, "step": 15783 }, { "epoch": 1.4812312312312312, "grad_norm": 1.189292065552911, "learning_rate": 5.976154410447656e-06, "loss": 0.4162, "step": 15784 }, { "epoch": 1.481325075075075, "grad_norm": 1.0165709131274756, "learning_rate": 5.975618945319152e-06, "loss": 0.4074, "step": 15785 }, { "epoch": 1.4814189189189189, "grad_norm": 1.038809297647556, "learning_rate": 5.975083468558232e-06, "loss": 0.4013, "step": 15786 }, { "epoch": 1.4815127627627627, "grad_norm": 1.0162381995369272, "learning_rate": 5.974547980171276e-06, "loss": 0.4148, "step": 15787 }, { "epoch": 1.4816066066066065, "grad_norm": 0.9452539902837858, "learning_rate": 5.9740124801646686e-06, "loss": 0.4215, "step": 15788 }, { "epoch": 1.4817004504504505, "grad_norm": 1.031839131450953, "learning_rate": 5.9734769685447954e-06, "loss": 0.3877, "step": 15789 }, { "epoch": 1.4817942942942943, "grad_norm": 0.8530317765637092, "learning_rate": 5.972941445318041e-06, "loss": 0.3495, "step": 15790 }, { "epoch": 1.4818881381381381, "grad_norm": 1.0516024462956728, "learning_rate": 5.972405910490793e-06, "loss": 0.4425, "step": 15791 }, { "epoch": 1.481981981981982, "grad_norm": 1.0297791384903174, "learning_rate": 5.971870364069434e-06, "loss": 0.3979, "step": 15792 }, { "epoch": 1.4820758258258258, "grad_norm": 1.092380743188129, "learning_rate": 5.971334806060351e-06, "loss": 0.4022, "step": 15793 }, { "epoch": 1.4821696696696698, "grad_norm": 0.8401878655177598, "learning_rate": 5.970799236469929e-06, "loss": 0.4025, "step": 15794 }, { "epoch": 1.4822635135135136, "grad_norm": 0.924113756127345, "learning_rate": 5.9702636553045535e-06, "loss": 0.4139, "step": 15795 }, { "epoch": 1.4823573573573574, "grad_norm": 1.0392939243109987, "learning_rate": 5.969728062570609e-06, "loss": 0.4088, "step": 15796 }, { "epoch": 1.4824512012012012, "grad_norm": 0.8366625394635854, "learning_rate": 5.969192458274482e-06, "loss": 0.3826, "step": 15797 }, { "epoch": 1.482545045045045, "grad_norm": 1.1106165369761025, "learning_rate": 5.968656842422562e-06, "loss": 0.3929, "step": 15798 }, { "epoch": 1.4826388888888888, "grad_norm": 0.9108478685541098, "learning_rate": 5.968121215021231e-06, "loss": 0.3728, "step": 15799 }, { "epoch": 1.4827327327327327, "grad_norm": 0.8806288355196363, "learning_rate": 5.967585576076876e-06, "loss": 0.4185, "step": 15800 }, { "epoch": 1.4828265765765765, "grad_norm": 0.9357639567023717, "learning_rate": 5.9670499255958855e-06, "loss": 0.4164, "step": 15801 }, { "epoch": 1.4829204204204205, "grad_norm": 0.8755258272170631, "learning_rate": 5.966514263584644e-06, "loss": 0.3856, "step": 15802 }, { "epoch": 1.4830142642642643, "grad_norm": 0.8666902231186941, "learning_rate": 5.965978590049539e-06, "loss": 0.3652, "step": 15803 }, { "epoch": 1.4831081081081081, "grad_norm": 1.051578585687575, "learning_rate": 5.96544290499696e-06, "loss": 0.3632, "step": 15804 }, { "epoch": 1.483201951951952, "grad_norm": 1.1051105294926853, "learning_rate": 5.9649072084332905e-06, "loss": 0.4481, "step": 15805 }, { "epoch": 1.4832957957957957, "grad_norm": 0.9451677402846721, "learning_rate": 5.9643715003649174e-06, "loss": 0.3849, "step": 15806 }, { "epoch": 1.4833896396396398, "grad_norm": 0.8525792224324115, "learning_rate": 5.9638357807982305e-06, "loss": 0.3953, "step": 15807 }, { "epoch": 1.4834834834834836, "grad_norm": 0.9970204559561787, "learning_rate": 5.963300049739615e-06, "loss": 0.3582, "step": 15808 }, { "epoch": 1.4835773273273274, "grad_norm": 0.9936755135241536, "learning_rate": 5.9627643071954614e-06, "loss": 0.4348, "step": 15809 }, { "epoch": 1.4836711711711712, "grad_norm": 0.9312789764459187, "learning_rate": 5.962228553172154e-06, "loss": 0.3803, "step": 15810 }, { "epoch": 1.483765015015015, "grad_norm": 0.9641151752336838, "learning_rate": 5.961692787676084e-06, "loss": 0.3697, "step": 15811 }, { "epoch": 1.4838588588588588, "grad_norm": 1.0485698513032282, "learning_rate": 5.961157010713637e-06, "loss": 0.3791, "step": 15812 }, { "epoch": 1.4839527027027026, "grad_norm": 0.9140143612528272, "learning_rate": 5.960621222291201e-06, "loss": 0.4303, "step": 15813 }, { "epoch": 1.4840465465465464, "grad_norm": 1.0980842046069992, "learning_rate": 5.960085422415165e-06, "loss": 0.3695, "step": 15814 }, { "epoch": 1.4841403903903903, "grad_norm": 1.03716049626127, "learning_rate": 5.959549611091918e-06, "loss": 0.4077, "step": 15815 }, { "epoch": 1.4842342342342343, "grad_norm": 0.9485836295830332, "learning_rate": 5.959013788327849e-06, "loss": 0.411, "step": 15816 }, { "epoch": 1.484328078078078, "grad_norm": 1.0886750110329828, "learning_rate": 5.958477954129346e-06, "loss": 0.4497, "step": 15817 }, { "epoch": 1.484421921921922, "grad_norm": 0.9939926996935873, "learning_rate": 5.957942108502795e-06, "loss": 0.4249, "step": 15818 }, { "epoch": 1.4845157657657657, "grad_norm": 0.8511193713565073, "learning_rate": 5.9574062514545906e-06, "loss": 0.4243, "step": 15819 }, { "epoch": 1.4846096096096097, "grad_norm": 0.9615209816972244, "learning_rate": 5.956870382991117e-06, "loss": 0.4346, "step": 15820 }, { "epoch": 1.4847034534534536, "grad_norm": 0.9829616102649039, "learning_rate": 5.9563345031187646e-06, "loss": 0.4057, "step": 15821 }, { "epoch": 1.4847972972972974, "grad_norm": 2.395525977472519, "learning_rate": 5.955798611843925e-06, "loss": 0.4619, "step": 15822 }, { "epoch": 1.4848911411411412, "grad_norm": 1.2167466303545411, "learning_rate": 5.955262709172985e-06, "loss": 0.4114, "step": 15823 }, { "epoch": 1.484984984984985, "grad_norm": 0.9290665836956026, "learning_rate": 5.954726795112336e-06, "loss": 0.355, "step": 15824 }, { "epoch": 1.4850788288288288, "grad_norm": 7.2275423323475385, "learning_rate": 5.954190869668366e-06, "loss": 0.3864, "step": 15825 }, { "epoch": 1.4851726726726726, "grad_norm": 1.1303030210612497, "learning_rate": 5.9536549328474665e-06, "loss": 0.4341, "step": 15826 }, { "epoch": 1.4852665165165164, "grad_norm": 0.9930829246936721, "learning_rate": 5.953118984656027e-06, "loss": 0.3537, "step": 15827 }, { "epoch": 1.4853603603603602, "grad_norm": 0.9612308279542285, "learning_rate": 5.952583025100438e-06, "loss": 0.4177, "step": 15828 }, { "epoch": 1.4854542042042043, "grad_norm": 0.9862573257471802, "learning_rate": 5.95204705418709e-06, "loss": 0.4127, "step": 15829 }, { "epoch": 1.485548048048048, "grad_norm": 0.9522878832490624, "learning_rate": 5.951511071922371e-06, "loss": 0.4333, "step": 15830 }, { "epoch": 1.4856418918918919, "grad_norm": 0.928192063449058, "learning_rate": 5.950975078312675e-06, "loss": 0.3925, "step": 15831 }, { "epoch": 1.4857357357357357, "grad_norm": 1.7166287578279191, "learning_rate": 5.9504390733643914e-06, "loss": 0.4195, "step": 15832 }, { "epoch": 1.4858295795795795, "grad_norm": 1.2537056756922949, "learning_rate": 5.949903057083909e-06, "loss": 0.4084, "step": 15833 }, { "epoch": 1.4859234234234235, "grad_norm": 0.8522556263466602, "learning_rate": 5.949367029477623e-06, "loss": 0.4074, "step": 15834 }, { "epoch": 1.4860172672672673, "grad_norm": 1.1013155846992566, "learning_rate": 5.948830990551921e-06, "loss": 0.3732, "step": 15835 }, { "epoch": 1.4861111111111112, "grad_norm": 0.8583443023988321, "learning_rate": 5.948294940313195e-06, "loss": 0.3747, "step": 15836 }, { "epoch": 1.486204954954955, "grad_norm": 1.0045505731733084, "learning_rate": 5.947758878767839e-06, "loss": 0.4245, "step": 15837 }, { "epoch": 1.4862987987987988, "grad_norm": 0.883104362502382, "learning_rate": 5.947222805922239e-06, "loss": 0.3636, "step": 15838 }, { "epoch": 1.4863926426426426, "grad_norm": 1.065999012687426, "learning_rate": 5.946686721782792e-06, "loss": 0.4137, "step": 15839 }, { "epoch": 1.4864864864864864, "grad_norm": 0.9379847978199214, "learning_rate": 5.946150626355888e-06, "loss": 0.4424, "step": 15840 }, { "epoch": 1.4865803303303302, "grad_norm": 0.9217097644020502, "learning_rate": 5.9456145196479185e-06, "loss": 0.396, "step": 15841 }, { "epoch": 1.4866741741741742, "grad_norm": 1.236882147363838, "learning_rate": 5.945078401665275e-06, "loss": 0.3755, "step": 15842 }, { "epoch": 1.486768018018018, "grad_norm": 1.0436882863925716, "learning_rate": 5.944542272414352e-06, "loss": 0.4519, "step": 15843 }, { "epoch": 1.4868618618618619, "grad_norm": 0.8838741876407095, "learning_rate": 5.944006131901538e-06, "loss": 0.3933, "step": 15844 }, { "epoch": 1.4869557057057057, "grad_norm": 1.2425232641899573, "learning_rate": 5.94346998013323e-06, "loss": 0.4323, "step": 15845 }, { "epoch": 1.4870495495495495, "grad_norm": 0.9010473972499493, "learning_rate": 5.942933817115818e-06, "loss": 0.4236, "step": 15846 }, { "epoch": 1.4871433933933935, "grad_norm": 1.1593745020483806, "learning_rate": 5.942397642855695e-06, "loss": 0.3734, "step": 15847 }, { "epoch": 1.4872372372372373, "grad_norm": 1.1349330866084653, "learning_rate": 5.941861457359252e-06, "loss": 0.4011, "step": 15848 }, { "epoch": 1.4873310810810811, "grad_norm": 0.8888298184852076, "learning_rate": 5.941325260632888e-06, "loss": 0.3859, "step": 15849 }, { "epoch": 1.487424924924925, "grad_norm": 0.9255435106068625, "learning_rate": 5.940789052682988e-06, "loss": 0.3937, "step": 15850 }, { "epoch": 1.4875187687687688, "grad_norm": 0.9054692313837298, "learning_rate": 5.9402528335159515e-06, "loss": 0.3801, "step": 15851 }, { "epoch": 1.4876126126126126, "grad_norm": 0.8934828466836207, "learning_rate": 5.939716603138169e-06, "loss": 0.3466, "step": 15852 }, { "epoch": 1.4877064564564564, "grad_norm": 0.9016888446629676, "learning_rate": 5.939180361556035e-06, "loss": 0.4169, "step": 15853 }, { "epoch": 1.4878003003003002, "grad_norm": 0.825305355780497, "learning_rate": 5.938644108775944e-06, "loss": 0.4119, "step": 15854 }, { "epoch": 1.4878941441441442, "grad_norm": 0.9567111465931839, "learning_rate": 5.938107844804288e-06, "loss": 0.4566, "step": 15855 }, { "epoch": 1.487987987987988, "grad_norm": 1.212547817692445, "learning_rate": 5.937571569647462e-06, "loss": 0.4077, "step": 15856 }, { "epoch": 1.4880818318318318, "grad_norm": 1.556118139265343, "learning_rate": 5.937035283311859e-06, "loss": 0.4537, "step": 15857 }, { "epoch": 1.4881756756756757, "grad_norm": 0.9496374750578941, "learning_rate": 5.936498985803876e-06, "loss": 0.4067, "step": 15858 }, { "epoch": 1.4882695195195195, "grad_norm": 0.9095952359760759, "learning_rate": 5.935962677129904e-06, "loss": 0.4346, "step": 15859 }, { "epoch": 1.4883633633633635, "grad_norm": 0.8466995485242346, "learning_rate": 5.9354263572963385e-06, "loss": 0.3902, "step": 15860 }, { "epoch": 1.4884572072072073, "grad_norm": 0.9175347687596553, "learning_rate": 5.934890026309577e-06, "loss": 0.4346, "step": 15861 }, { "epoch": 1.4885510510510511, "grad_norm": 1.2550783638536738, "learning_rate": 5.934353684176009e-06, "loss": 0.3789, "step": 15862 }, { "epoch": 1.488644894894895, "grad_norm": 1.046759000736823, "learning_rate": 5.933817330902031e-06, "loss": 0.4309, "step": 15863 }, { "epoch": 1.4887387387387387, "grad_norm": 0.8973871999714751, "learning_rate": 5.9332809664940415e-06, "loss": 0.4541, "step": 15864 }, { "epoch": 1.4888325825825826, "grad_norm": 1.0587310573259392, "learning_rate": 5.932744590958432e-06, "loss": 0.4195, "step": 15865 }, { "epoch": 1.4889264264264264, "grad_norm": 1.1663972763359738, "learning_rate": 5.932208204301599e-06, "loss": 0.3824, "step": 15866 }, { "epoch": 1.4890202702702702, "grad_norm": 1.9661929725779126, "learning_rate": 5.931671806529939e-06, "loss": 0.3763, "step": 15867 }, { "epoch": 1.489114114114114, "grad_norm": 0.9548070084572804, "learning_rate": 5.931135397649846e-06, "loss": 0.386, "step": 15868 }, { "epoch": 1.489207957957958, "grad_norm": 0.8877162991726484, "learning_rate": 5.930598977667716e-06, "loss": 0.3564, "step": 15869 }, { "epoch": 1.4893018018018018, "grad_norm": 0.8436722234727517, "learning_rate": 5.9300625465899445e-06, "loss": 0.3666, "step": 15870 }, { "epoch": 1.4893956456456456, "grad_norm": 0.9156387971165665, "learning_rate": 5.9295261044229265e-06, "loss": 0.3912, "step": 15871 }, { "epoch": 1.4894894894894894, "grad_norm": 0.8453084915531817, "learning_rate": 5.928989651173062e-06, "loss": 0.4099, "step": 15872 }, { "epoch": 1.4895833333333333, "grad_norm": 2.526319471169031, "learning_rate": 5.9284531868467436e-06, "loss": 0.4013, "step": 15873 }, { "epoch": 1.4896771771771773, "grad_norm": 1.510922270847844, "learning_rate": 5.927916711450368e-06, "loss": 0.4317, "step": 15874 }, { "epoch": 1.489771021021021, "grad_norm": 0.9311878086872247, "learning_rate": 5.927380224990332e-06, "loss": 0.3787, "step": 15875 }, { "epoch": 1.489864864864865, "grad_norm": 1.0209654563728623, "learning_rate": 5.926843727473033e-06, "loss": 0.4099, "step": 15876 }, { "epoch": 1.4899587087087087, "grad_norm": 1.028884052909181, "learning_rate": 5.926307218904867e-06, "loss": 0.4338, "step": 15877 }, { "epoch": 1.4900525525525525, "grad_norm": 1.4286526789316518, "learning_rate": 5.925770699292231e-06, "loss": 0.4056, "step": 15878 }, { "epoch": 1.4901463963963963, "grad_norm": 0.8617329156980706, "learning_rate": 5.925234168641524e-06, "loss": 0.3704, "step": 15879 }, { "epoch": 1.4902402402402402, "grad_norm": 1.0302459170066378, "learning_rate": 5.9246976269591385e-06, "loss": 0.4186, "step": 15880 }, { "epoch": 1.490334084084084, "grad_norm": 0.9978508607429872, "learning_rate": 5.924161074251475e-06, "loss": 0.4014, "step": 15881 }, { "epoch": 1.490427927927928, "grad_norm": 0.9965235758767954, "learning_rate": 5.9236245105249306e-06, "loss": 0.38, "step": 15882 }, { "epoch": 1.4905217717717718, "grad_norm": 1.080706994627906, "learning_rate": 5.923087935785901e-06, "loss": 0.4611, "step": 15883 }, { "epoch": 1.4906156156156156, "grad_norm": 0.8883259964647054, "learning_rate": 5.922551350040787e-06, "loss": 0.4071, "step": 15884 }, { "epoch": 1.4907094594594594, "grad_norm": 1.169471271262736, "learning_rate": 5.9220147532959845e-06, "loss": 0.4301, "step": 15885 }, { "epoch": 1.4908033033033032, "grad_norm": 0.9362074749443582, "learning_rate": 5.92147814555789e-06, "loss": 0.4033, "step": 15886 }, { "epoch": 1.4908971471471473, "grad_norm": 0.9834102048807615, "learning_rate": 5.920941526832905e-06, "loss": 0.3636, "step": 15887 }, { "epoch": 1.490990990990991, "grad_norm": 2.5510204762356428, "learning_rate": 5.9204048971274255e-06, "loss": 0.3733, "step": 15888 }, { "epoch": 1.491084834834835, "grad_norm": 0.8750875225998458, "learning_rate": 5.919868256447849e-06, "loss": 0.4272, "step": 15889 }, { "epoch": 1.4911786786786787, "grad_norm": 0.7664942894507636, "learning_rate": 5.919331604800577e-06, "loss": 0.3559, "step": 15890 }, { "epoch": 1.4912725225225225, "grad_norm": 0.9742847029101367, "learning_rate": 5.918794942192005e-06, "loss": 0.4126, "step": 15891 }, { "epoch": 1.4913663663663663, "grad_norm": 0.9737587276604118, "learning_rate": 5.918258268628532e-06, "loss": 0.3792, "step": 15892 }, { "epoch": 1.4914602102102101, "grad_norm": 1.2007377045989365, "learning_rate": 5.917721584116558e-06, "loss": 0.3852, "step": 15893 }, { "epoch": 1.491554054054054, "grad_norm": 0.9546968233065272, "learning_rate": 5.9171848886624815e-06, "loss": 0.4237, "step": 15894 }, { "epoch": 1.491647897897898, "grad_norm": 0.8584418565307407, "learning_rate": 5.916648182272702e-06, "loss": 0.3875, "step": 15895 }, { "epoch": 1.4917417417417418, "grad_norm": 1.0036944870118938, "learning_rate": 5.9161114649536186e-06, "loss": 0.3859, "step": 15896 }, { "epoch": 1.4918355855855856, "grad_norm": 3.0379117493259753, "learning_rate": 5.91557473671163e-06, "loss": 0.4367, "step": 15897 }, { "epoch": 1.4919294294294294, "grad_norm": 1.0687922044817186, "learning_rate": 5.915037997553136e-06, "loss": 0.4098, "step": 15898 }, { "epoch": 1.4920232732732732, "grad_norm": 1.035507174172985, "learning_rate": 5.914501247484536e-06, "loss": 0.4058, "step": 15899 }, { "epoch": 1.4921171171171173, "grad_norm": 1.0116371671424298, "learning_rate": 5.91396448651223e-06, "loss": 0.4063, "step": 15900 }, { "epoch": 1.492210960960961, "grad_norm": 0.967150369777219, "learning_rate": 5.913427714642618e-06, "loss": 0.4161, "step": 15901 }, { "epoch": 1.4923048048048049, "grad_norm": 0.9764096740064248, "learning_rate": 5.9128909318821e-06, "loss": 0.418, "step": 15902 }, { "epoch": 1.4923986486486487, "grad_norm": 0.9761022218847643, "learning_rate": 5.9123541382370775e-06, "loss": 0.3702, "step": 15903 }, { "epoch": 1.4924924924924925, "grad_norm": 0.8812283653430228, "learning_rate": 5.911817333713947e-06, "loss": 0.3829, "step": 15904 }, { "epoch": 1.4925863363363363, "grad_norm": 0.8573800818553831, "learning_rate": 5.9112805183191115e-06, "loss": 0.3879, "step": 15905 }, { "epoch": 1.4926801801801801, "grad_norm": 0.9585846604602594, "learning_rate": 5.910743692058971e-06, "loss": 0.3986, "step": 15906 }, { "epoch": 1.492774024024024, "grad_norm": 0.9776286796569369, "learning_rate": 5.910206854939927e-06, "loss": 0.4248, "step": 15907 }, { "epoch": 1.4928678678678677, "grad_norm": 0.9816368445894016, "learning_rate": 5.909670006968379e-06, "loss": 0.3892, "step": 15908 }, { "epoch": 1.4929617117117118, "grad_norm": 0.9447836115582259, "learning_rate": 5.90913314815073e-06, "loss": 0.4116, "step": 15909 }, { "epoch": 1.4930555555555556, "grad_norm": 0.9640024215474142, "learning_rate": 5.908596278493377e-06, "loss": 0.4232, "step": 15910 }, { "epoch": 1.4931493993993994, "grad_norm": 1.322039588541065, "learning_rate": 5.908059398002725e-06, "loss": 0.3862, "step": 15911 }, { "epoch": 1.4932432432432432, "grad_norm": 1.9905521693520087, "learning_rate": 5.907522506685174e-06, "loss": 0.4086, "step": 15912 }, { "epoch": 1.493337087087087, "grad_norm": 0.9713234036799561, "learning_rate": 5.906985604547124e-06, "loss": 0.3842, "step": 15913 }, { "epoch": 1.493430930930931, "grad_norm": 1.6794972609001826, "learning_rate": 5.906448691594979e-06, "loss": 0.4279, "step": 15914 }, { "epoch": 1.4935247747747749, "grad_norm": 2.142684286391876, "learning_rate": 5.905911767835141e-06, "loss": 0.3864, "step": 15915 }, { "epoch": 1.4936186186186187, "grad_norm": 0.8965860006213963, "learning_rate": 5.905374833274009e-06, "loss": 0.3772, "step": 15916 }, { "epoch": 1.4937124624624625, "grad_norm": 0.9229416170121797, "learning_rate": 5.904837887917986e-06, "loss": 0.415, "step": 15917 }, { "epoch": 1.4938063063063063, "grad_norm": 0.8766536571863416, "learning_rate": 5.9043009317734745e-06, "loss": 0.3959, "step": 15918 }, { "epoch": 1.49390015015015, "grad_norm": 0.9339924182112261, "learning_rate": 5.903763964846876e-06, "loss": 0.401, "step": 15919 }, { "epoch": 1.493993993993994, "grad_norm": 1.0099436466693372, "learning_rate": 5.903226987144594e-06, "loss": 0.374, "step": 15920 }, { "epoch": 1.4940878378378377, "grad_norm": 0.8999302849584078, "learning_rate": 5.90268999867303e-06, "loss": 0.3649, "step": 15921 }, { "epoch": 1.4941816816816818, "grad_norm": 0.881846848208593, "learning_rate": 5.902152999438588e-06, "loss": 0.3853, "step": 15922 }, { "epoch": 1.4942755255255256, "grad_norm": 1.4112370221163175, "learning_rate": 5.901615989447669e-06, "loss": 0.3879, "step": 15923 }, { "epoch": 1.4943693693693694, "grad_norm": 0.9969017773260814, "learning_rate": 5.901078968706677e-06, "loss": 0.4253, "step": 15924 }, { "epoch": 1.4944632132132132, "grad_norm": 1.0417746366941052, "learning_rate": 5.900541937222013e-06, "loss": 0.4127, "step": 15925 }, { "epoch": 1.494557057057057, "grad_norm": 0.9168470525396333, "learning_rate": 5.900004895000082e-06, "loss": 0.3809, "step": 15926 }, { "epoch": 1.494650900900901, "grad_norm": 1.4237722070660184, "learning_rate": 5.899467842047287e-06, "loss": 0.4003, "step": 15927 }, { "epoch": 1.4947447447447448, "grad_norm": 1.307792269560407, "learning_rate": 5.8989307783700314e-06, "loss": 0.4353, "step": 15928 }, { "epoch": 1.4948385885885886, "grad_norm": 0.8893344755377457, "learning_rate": 5.898393703974717e-06, "loss": 0.4066, "step": 15929 }, { "epoch": 1.4949324324324325, "grad_norm": 0.8411524747465998, "learning_rate": 5.897856618867751e-06, "loss": 0.3603, "step": 15930 }, { "epoch": 1.4950262762762763, "grad_norm": 0.9134244019306098, "learning_rate": 5.897319523055532e-06, "loss": 0.3816, "step": 15931 }, { "epoch": 1.49512012012012, "grad_norm": 1.299201900605396, "learning_rate": 5.896782416544468e-06, "loss": 0.4025, "step": 15932 }, { "epoch": 1.4952139639639639, "grad_norm": 0.9961696110783691, "learning_rate": 5.8962452993409635e-06, "loss": 0.3446, "step": 15933 }, { "epoch": 1.4953078078078077, "grad_norm": 0.980390113551538, "learning_rate": 5.89570817145142e-06, "loss": 0.3809, "step": 15934 }, { "epoch": 1.4954016516516517, "grad_norm": 0.9109246388213171, "learning_rate": 5.8951710328822406e-06, "loss": 0.3804, "step": 15935 }, { "epoch": 1.4954954954954955, "grad_norm": 0.8933734612558663, "learning_rate": 5.894633883639833e-06, "loss": 0.3755, "step": 15936 }, { "epoch": 1.4955893393393394, "grad_norm": 1.170777261456016, "learning_rate": 5.8940967237306e-06, "loss": 0.4234, "step": 15937 }, { "epoch": 1.4956831831831832, "grad_norm": 1.0400039088969653, "learning_rate": 5.8935595531609455e-06, "loss": 0.4172, "step": 15938 }, { "epoch": 1.495777027027027, "grad_norm": 1.1076964223281434, "learning_rate": 5.8930223719372785e-06, "loss": 0.4078, "step": 15939 }, { "epoch": 1.495870870870871, "grad_norm": 1.181548614407458, "learning_rate": 5.892485180065999e-06, "loss": 0.4107, "step": 15940 }, { "epoch": 1.4959647147147148, "grad_norm": 0.9852044782622899, "learning_rate": 5.891947977553512e-06, "loss": 0.4187, "step": 15941 }, { "epoch": 1.4960585585585586, "grad_norm": 0.8850652057695121, "learning_rate": 5.891410764406226e-06, "loss": 0.3958, "step": 15942 }, { "epoch": 1.4961524024024024, "grad_norm": 1.1040705179446757, "learning_rate": 5.890873540630544e-06, "loss": 0.4562, "step": 15943 }, { "epoch": 1.4962462462462462, "grad_norm": 0.9840257061111319, "learning_rate": 5.890336306232871e-06, "loss": 0.3962, "step": 15944 }, { "epoch": 1.49634009009009, "grad_norm": 0.8872450762670956, "learning_rate": 5.889799061219615e-06, "loss": 0.371, "step": 15945 }, { "epoch": 1.4964339339339339, "grad_norm": 0.899867217553148, "learning_rate": 5.889261805597181e-06, "loss": 0.4088, "step": 15946 }, { "epoch": 1.4965277777777777, "grad_norm": 0.8881839788000196, "learning_rate": 5.8887245393719705e-06, "loss": 0.3479, "step": 15947 }, { "epoch": 1.4966216216216215, "grad_norm": 0.8720309107692388, "learning_rate": 5.888187262550396e-06, "loss": 0.392, "step": 15948 }, { "epoch": 1.4967154654654655, "grad_norm": 0.810478431837218, "learning_rate": 5.887649975138858e-06, "loss": 0.3807, "step": 15949 }, { "epoch": 1.4968093093093093, "grad_norm": 0.9443798890358033, "learning_rate": 5.8871126771437644e-06, "loss": 0.4097, "step": 15950 }, { "epoch": 1.4969031531531531, "grad_norm": 0.9948975913866338, "learning_rate": 5.886575368571525e-06, "loss": 0.4127, "step": 15951 }, { "epoch": 1.496996996996997, "grad_norm": 0.8810137258890031, "learning_rate": 5.886038049428542e-06, "loss": 0.3822, "step": 15952 }, { "epoch": 1.4970908408408408, "grad_norm": 1.0211874016096982, "learning_rate": 5.885500719721221e-06, "loss": 0.3726, "step": 15953 }, { "epoch": 1.4971846846846848, "grad_norm": 0.9361053367517617, "learning_rate": 5.884963379455972e-06, "loss": 0.3826, "step": 15954 }, { "epoch": 1.4972785285285286, "grad_norm": 0.9975149614565526, "learning_rate": 5.884426028639201e-06, "loss": 0.4217, "step": 15955 }, { "epoch": 1.4973723723723724, "grad_norm": 1.090497997901811, "learning_rate": 5.883888667277313e-06, "loss": 0.4282, "step": 15956 }, { "epoch": 1.4974662162162162, "grad_norm": 0.9443852969542232, "learning_rate": 5.883351295376718e-06, "loss": 0.3889, "step": 15957 }, { "epoch": 1.49756006006006, "grad_norm": 0.9462372004389333, "learning_rate": 5.882813912943822e-06, "loss": 0.4007, "step": 15958 }, { "epoch": 1.4976539039039038, "grad_norm": 0.9001060463966378, "learning_rate": 5.8822765199850295e-06, "loss": 0.3807, "step": 15959 }, { "epoch": 1.4977477477477477, "grad_norm": 0.922338593198886, "learning_rate": 5.881739116506753e-06, "loss": 0.4138, "step": 15960 }, { "epoch": 1.4978415915915915, "grad_norm": 1.0204201303862053, "learning_rate": 5.881201702515394e-06, "loss": 0.4006, "step": 15961 }, { "epoch": 1.4979354354354355, "grad_norm": 1.1011182064801117, "learning_rate": 5.880664278017365e-06, "loss": 0.3639, "step": 15962 }, { "epoch": 1.4980292792792793, "grad_norm": 1.4497675203259186, "learning_rate": 5.8801268430190725e-06, "loss": 0.4158, "step": 15963 }, { "epoch": 1.4981231231231231, "grad_norm": 0.9344415908211195, "learning_rate": 5.879589397526925e-06, "loss": 0.3702, "step": 15964 }, { "epoch": 1.498216966966967, "grad_norm": 0.8560179842489339, "learning_rate": 5.879051941547328e-06, "loss": 0.3921, "step": 15965 }, { "epoch": 1.4983108108108107, "grad_norm": 0.9157690724485981, "learning_rate": 5.878514475086692e-06, "loss": 0.3922, "step": 15966 }, { "epoch": 1.4984046546546548, "grad_norm": 1.0371048090644128, "learning_rate": 5.877976998151423e-06, "loss": 0.4301, "step": 15967 }, { "epoch": 1.4984984984984986, "grad_norm": 1.041813249523418, "learning_rate": 5.877439510747932e-06, "loss": 0.4054, "step": 15968 }, { "epoch": 1.4985923423423424, "grad_norm": 0.9163380151707186, "learning_rate": 5.876902012882627e-06, "loss": 0.3852, "step": 15969 }, { "epoch": 1.4986861861861862, "grad_norm": 1.0209727945301916, "learning_rate": 5.876364504561916e-06, "loss": 0.3792, "step": 15970 }, { "epoch": 1.49878003003003, "grad_norm": 1.009356262662103, "learning_rate": 5.875826985792207e-06, "loss": 0.3933, "step": 15971 }, { "epoch": 1.4988738738738738, "grad_norm": 0.948223614063981, "learning_rate": 5.875289456579911e-06, "loss": 0.4381, "step": 15972 }, { "epoch": 1.4989677177177176, "grad_norm": 1.0000235112331846, "learning_rate": 5.8747519169314336e-06, "loss": 0.3763, "step": 15973 }, { "epoch": 1.4990615615615615, "grad_norm": 0.8572675106443289, "learning_rate": 5.874214366853188e-06, "loss": 0.3806, "step": 15974 }, { "epoch": 1.4991554054054055, "grad_norm": 0.893553962623693, "learning_rate": 5.8736768063515815e-06, "loss": 0.3708, "step": 15975 }, { "epoch": 1.4992492492492493, "grad_norm": 1.2528983919182186, "learning_rate": 5.8731392354330234e-06, "loss": 0.4283, "step": 15976 }, { "epoch": 1.499343093093093, "grad_norm": 0.8597895312639222, "learning_rate": 5.872601654103923e-06, "loss": 0.3383, "step": 15977 }, { "epoch": 1.499436936936937, "grad_norm": 0.9050264296324263, "learning_rate": 5.872064062370692e-06, "loss": 0.4064, "step": 15978 }, { "epoch": 1.4995307807807807, "grad_norm": 1.5296457500351488, "learning_rate": 5.871526460239736e-06, "loss": 0.3778, "step": 15979 }, { "epoch": 1.4996246246246248, "grad_norm": 1.108794071399234, "learning_rate": 5.870988847717468e-06, "loss": 0.3756, "step": 15980 }, { "epoch": 1.4997184684684686, "grad_norm": 1.556172308062337, "learning_rate": 5.870451224810299e-06, "loss": 0.3985, "step": 15981 }, { "epoch": 1.4998123123123124, "grad_norm": 0.9858770135488688, "learning_rate": 5.869913591524636e-06, "loss": 0.3932, "step": 15982 }, { "epoch": 1.4999061561561562, "grad_norm": 1.0723379371029242, "learning_rate": 5.869375947866892e-06, "loss": 0.4168, "step": 15983 }, { "epoch": 1.5, "grad_norm": 1.0864259081533276, "learning_rate": 5.8688382938434775e-06, "loss": 0.4128, "step": 15984 }, { "epoch": 1.5000938438438438, "grad_norm": 1.560159344299401, "learning_rate": 5.868300629460799e-06, "loss": 0.4143, "step": 15985 }, { "epoch": 1.5001876876876876, "grad_norm": 0.9726663643043874, "learning_rate": 5.86776295472527e-06, "loss": 0.4296, "step": 15986 }, { "epoch": 1.5002815315315314, "grad_norm": 1.1057439273171816, "learning_rate": 5.867225269643303e-06, "loss": 0.3988, "step": 15987 }, { "epoch": 1.5003753753753752, "grad_norm": 1.0047139107241583, "learning_rate": 5.866687574221306e-06, "loss": 0.3701, "step": 15988 }, { "epoch": 1.5004692192192193, "grad_norm": 0.8164326687159064, "learning_rate": 5.866149868465692e-06, "loss": 0.3887, "step": 15989 }, { "epoch": 1.500563063063063, "grad_norm": 0.962070142794451, "learning_rate": 5.865612152382871e-06, "loss": 0.3899, "step": 15990 }, { "epoch": 1.500656906906907, "grad_norm": 0.9235075616270865, "learning_rate": 5.865074425979254e-06, "loss": 0.3874, "step": 15991 }, { "epoch": 1.5007507507507507, "grad_norm": 0.9675305655394848, "learning_rate": 5.864536689261251e-06, "loss": 0.384, "step": 15992 }, { "epoch": 1.5008445945945947, "grad_norm": 1.0469313511309473, "learning_rate": 5.8639989422352785e-06, "loss": 0.3774, "step": 15993 }, { "epoch": 1.5009384384384385, "grad_norm": 3.1284124415346195, "learning_rate": 5.863461184907743e-06, "loss": 0.3989, "step": 15994 }, { "epoch": 1.5010322822822824, "grad_norm": 0.869430421739439, "learning_rate": 5.862923417285058e-06, "loss": 0.3869, "step": 15995 }, { "epoch": 1.5011261261261262, "grad_norm": 0.9192609314541376, "learning_rate": 5.8623856393736365e-06, "loss": 0.4064, "step": 15996 }, { "epoch": 1.50121996996997, "grad_norm": 1.099849943657531, "learning_rate": 5.861847851179889e-06, "loss": 0.4284, "step": 15997 }, { "epoch": 1.5013138138138138, "grad_norm": 0.8568028757210782, "learning_rate": 5.861310052710228e-06, "loss": 0.4078, "step": 15998 }, { "epoch": 1.5014076576576576, "grad_norm": 0.8396523233719824, "learning_rate": 5.860772243971067e-06, "loss": 0.3817, "step": 15999 }, { "epoch": 1.5015015015015014, "grad_norm": 0.9696357884257205, "learning_rate": 5.860234424968816e-06, "loss": 0.4182, "step": 16000 }, { "epoch": 1.5015953453453452, "grad_norm": 1.0342626425245172, "learning_rate": 5.8596965957098894e-06, "loss": 0.3523, "step": 16001 }, { "epoch": 1.501689189189189, "grad_norm": 0.9336440880190554, "learning_rate": 5.859158756200699e-06, "loss": 0.4177, "step": 16002 }, { "epoch": 1.501783033033033, "grad_norm": 0.8773320535883673, "learning_rate": 5.858620906447657e-06, "loss": 0.3908, "step": 16003 }, { "epoch": 1.5018768768768769, "grad_norm": 1.0389994283407142, "learning_rate": 5.858083046457177e-06, "loss": 0.398, "step": 16004 }, { "epoch": 1.5019707207207207, "grad_norm": 0.9898256771210604, "learning_rate": 5.857545176235673e-06, "loss": 0.4203, "step": 16005 }, { "epoch": 1.5020645645645647, "grad_norm": 1.4328578312338611, "learning_rate": 5.857007295789555e-06, "loss": 0.391, "step": 16006 }, { "epoch": 1.5021584084084085, "grad_norm": 0.9738898211026826, "learning_rate": 5.85646940512524e-06, "loss": 0.3871, "step": 16007 }, { "epoch": 1.5022522522522523, "grad_norm": 1.1088121616479731, "learning_rate": 5.8559315042491396e-06, "loss": 0.36, "step": 16008 }, { "epoch": 1.5023460960960962, "grad_norm": 1.1104225919853778, "learning_rate": 5.855393593167665e-06, "loss": 0.4573, "step": 16009 }, { "epoch": 1.50243993993994, "grad_norm": 0.8360345058461304, "learning_rate": 5.8548556718872326e-06, "loss": 0.3846, "step": 16010 }, { "epoch": 1.5025337837837838, "grad_norm": 0.9503320609502268, "learning_rate": 5.854317740414257e-06, "loss": 0.3941, "step": 16011 }, { "epoch": 1.5026276276276276, "grad_norm": 1.0538183103290057, "learning_rate": 5.853779798755149e-06, "loss": 0.3238, "step": 16012 }, { "epoch": 1.5027214714714714, "grad_norm": 1.0455154394328083, "learning_rate": 5.853241846916324e-06, "loss": 0.3811, "step": 16013 }, { "epoch": 1.5028153153153152, "grad_norm": 1.1736115684653272, "learning_rate": 5.852703884904198e-06, "loss": 0.3939, "step": 16014 }, { "epoch": 1.502909159159159, "grad_norm": 0.8932298802728302, "learning_rate": 5.852165912725181e-06, "loss": 0.37, "step": 16015 }, { "epoch": 1.503003003003003, "grad_norm": 0.8611493269668288, "learning_rate": 5.851627930385689e-06, "loss": 0.3742, "step": 16016 }, { "epoch": 1.5030968468468469, "grad_norm": 0.9841193969758014, "learning_rate": 5.851089937892139e-06, "loss": 0.3893, "step": 16017 }, { "epoch": 1.5031906906906907, "grad_norm": 0.9684020663645108, "learning_rate": 5.850551935250942e-06, "loss": 0.4107, "step": 16018 }, { "epoch": 1.5032845345345347, "grad_norm": 0.8131638235393612, "learning_rate": 5.850013922468516e-06, "loss": 0.4012, "step": 16019 }, { "epoch": 1.5033783783783785, "grad_norm": 1.3715091014141993, "learning_rate": 5.849475899551273e-06, "loss": 0.4102, "step": 16020 }, { "epoch": 1.5034722222222223, "grad_norm": 0.9152955813892288, "learning_rate": 5.848937866505629e-06, "loss": 0.3754, "step": 16021 }, { "epoch": 1.5035660660660661, "grad_norm": 0.7444106663725445, "learning_rate": 5.848399823337998e-06, "loss": 0.3833, "step": 16022 }, { "epoch": 1.50365990990991, "grad_norm": 1.205325156543506, "learning_rate": 5.847861770054797e-06, "loss": 0.3839, "step": 16023 }, { "epoch": 1.5037537537537538, "grad_norm": 1.6011674188490677, "learning_rate": 5.847323706662439e-06, "loss": 0.3792, "step": 16024 }, { "epoch": 1.5038475975975976, "grad_norm": 1.156931050600928, "learning_rate": 5.846785633167342e-06, "loss": 0.4168, "step": 16025 }, { "epoch": 1.5039414414414414, "grad_norm": 0.9818293973727219, "learning_rate": 5.846247549575921e-06, "loss": 0.4087, "step": 16026 }, { "epoch": 1.5040352852852852, "grad_norm": 0.8413298026508449, "learning_rate": 5.8457094558945895e-06, "loss": 0.3778, "step": 16027 }, { "epoch": 1.504129129129129, "grad_norm": 0.9655990303282567, "learning_rate": 5.8451713521297636e-06, "loss": 0.4401, "step": 16028 }, { "epoch": 1.504222972972973, "grad_norm": 0.8370606803232505, "learning_rate": 5.844633238287862e-06, "loss": 0.3825, "step": 16029 }, { "epoch": 1.5043168168168168, "grad_norm": 0.998103828445313, "learning_rate": 5.844095114375298e-06, "loss": 0.4208, "step": 16030 }, { "epoch": 1.5044106606606606, "grad_norm": 0.8344351031626847, "learning_rate": 5.84355698039849e-06, "loss": 0.4027, "step": 16031 }, { "epoch": 1.5045045045045045, "grad_norm": 0.9850459789851291, "learning_rate": 5.843018836363851e-06, "loss": 0.4166, "step": 16032 }, { "epoch": 1.5045983483483485, "grad_norm": 1.3542627476334732, "learning_rate": 5.8424806822778e-06, "loss": 0.3893, "step": 16033 }, { "epoch": 1.5046921921921923, "grad_norm": 1.1127594867212163, "learning_rate": 5.841942518146753e-06, "loss": 0.413, "step": 16034 }, { "epoch": 1.5047860360360361, "grad_norm": 0.9404359834951894, "learning_rate": 5.841404343977126e-06, "loss": 0.3712, "step": 16035 }, { "epoch": 1.50487987987988, "grad_norm": 0.9075021312741071, "learning_rate": 5.840866159775336e-06, "loss": 0.3781, "step": 16036 }, { "epoch": 1.5049737237237237, "grad_norm": 1.0556552113028483, "learning_rate": 5.8403279655477986e-06, "loss": 0.4304, "step": 16037 }, { "epoch": 1.5050675675675675, "grad_norm": 0.9397684479956471, "learning_rate": 5.839789761300934e-06, "loss": 0.4142, "step": 16038 }, { "epoch": 1.5051614114114114, "grad_norm": 0.9455025062948607, "learning_rate": 5.839251547041157e-06, "loss": 0.3793, "step": 16039 }, { "epoch": 1.5052552552552552, "grad_norm": 1.419316937807369, "learning_rate": 5.838713322774884e-06, "loss": 0.3564, "step": 16040 }, { "epoch": 1.505349099099099, "grad_norm": 0.9492435595731517, "learning_rate": 5.8381750885085346e-06, "loss": 0.4099, "step": 16041 }, { "epoch": 1.5054429429429428, "grad_norm": 0.9224244386667971, "learning_rate": 5.837636844248523e-06, "loss": 0.3989, "step": 16042 }, { "epoch": 1.5055367867867868, "grad_norm": 0.9999191061589935, "learning_rate": 5.837098590001269e-06, "loss": 0.405, "step": 16043 }, { "epoch": 1.5056306306306306, "grad_norm": 0.8640378740308514, "learning_rate": 5.836560325773192e-06, "loss": 0.4189, "step": 16044 }, { "epoch": 1.5057244744744744, "grad_norm": 1.314297470380513, "learning_rate": 5.836022051570706e-06, "loss": 0.3895, "step": 16045 }, { "epoch": 1.5058183183183185, "grad_norm": 7.118608339794472, "learning_rate": 5.835483767400232e-06, "loss": 0.4077, "step": 16046 }, { "epoch": 1.5059121621621623, "grad_norm": 0.9814066523385439, "learning_rate": 5.834945473268186e-06, "loss": 0.3807, "step": 16047 }, { "epoch": 1.506006006006006, "grad_norm": 2.6184479884464733, "learning_rate": 5.834407169180987e-06, "loss": 0.4185, "step": 16048 }, { "epoch": 1.50609984984985, "grad_norm": 1.042217973125245, "learning_rate": 5.833868855145051e-06, "loss": 0.4211, "step": 16049 }, { "epoch": 1.5061936936936937, "grad_norm": 1.0039217125083895, "learning_rate": 5.833330531166802e-06, "loss": 0.4069, "step": 16050 }, { "epoch": 1.5062875375375375, "grad_norm": 0.8422917983353383, "learning_rate": 5.832792197252654e-06, "loss": 0.3627, "step": 16051 }, { "epoch": 1.5063813813813813, "grad_norm": 1.0417630401490823, "learning_rate": 5.832253853409026e-06, "loss": 0.4361, "step": 16052 }, { "epoch": 1.5064752252252251, "grad_norm": 1.624050197977065, "learning_rate": 5.831715499642337e-06, "loss": 0.4258, "step": 16053 }, { "epoch": 1.506569069069069, "grad_norm": 7.293128313018698, "learning_rate": 5.831177135959006e-06, "loss": 0.4126, "step": 16054 }, { "epoch": 1.5066629129129128, "grad_norm": 0.9257109317599759, "learning_rate": 5.830638762365453e-06, "loss": 0.4142, "step": 16055 }, { "epoch": 1.5067567567567568, "grad_norm": 1.2932495088777254, "learning_rate": 5.830100378868097e-06, "loss": 0.3987, "step": 16056 }, { "epoch": 1.5068506006006006, "grad_norm": 0.938175088035858, "learning_rate": 5.829561985473356e-06, "loss": 0.4088, "step": 16057 }, { "epoch": 1.5069444444444444, "grad_norm": 1.1162095485715045, "learning_rate": 5.829023582187648e-06, "loss": 0.4338, "step": 16058 }, { "epoch": 1.5070382882882885, "grad_norm": 0.9052015532455764, "learning_rate": 5.8284851690173964e-06, "loss": 0.4086, "step": 16059 }, { "epoch": 1.5071321321321323, "grad_norm": 1.074383378357082, "learning_rate": 5.827946745969018e-06, "loss": 0.41, "step": 16060 }, { "epoch": 1.507225975975976, "grad_norm": 0.9592198553068746, "learning_rate": 5.8274083130489325e-06, "loss": 0.3646, "step": 16061 }, { "epoch": 1.5073198198198199, "grad_norm": 1.251014417441557, "learning_rate": 5.8268698702635605e-06, "loss": 0.4154, "step": 16062 }, { "epoch": 1.5074136636636637, "grad_norm": 0.86485237135497, "learning_rate": 5.826331417619323e-06, "loss": 0.3651, "step": 16063 }, { "epoch": 1.5075075075075075, "grad_norm": 1.0226650547551515, "learning_rate": 5.825792955122637e-06, "loss": 0.3957, "step": 16064 }, { "epoch": 1.5076013513513513, "grad_norm": 0.8520087444778162, "learning_rate": 5.825254482779925e-06, "loss": 0.42, "step": 16065 }, { "epoch": 1.5076951951951951, "grad_norm": 0.9619889804254893, "learning_rate": 5.824716000597607e-06, "loss": 0.4347, "step": 16066 }, { "epoch": 1.507789039039039, "grad_norm": 2.1380654685750233, "learning_rate": 5.8241775085821024e-06, "loss": 0.3827, "step": 16067 }, { "epoch": 1.5078828828828827, "grad_norm": 0.9659372532614466, "learning_rate": 5.823639006739833e-06, "loss": 0.3778, "step": 16068 }, { "epoch": 1.5079767267267268, "grad_norm": 0.9443792717011742, "learning_rate": 5.82310049507722e-06, "loss": 0.4581, "step": 16069 }, { "epoch": 1.5080705705705706, "grad_norm": 1.2690018834544174, "learning_rate": 5.822561973600681e-06, "loss": 0.4158, "step": 16070 }, { "epoch": 1.5081644144144144, "grad_norm": 1.4125228930192744, "learning_rate": 5.82202344231664e-06, "loss": 0.3629, "step": 16071 }, { "epoch": 1.5082582582582582, "grad_norm": 1.005792391877243, "learning_rate": 5.8214849012315155e-06, "loss": 0.388, "step": 16072 }, { "epoch": 1.5083521021021022, "grad_norm": 0.965621319369643, "learning_rate": 5.820946350351729e-06, "loss": 0.3641, "step": 16073 }, { "epoch": 1.508445945945946, "grad_norm": 0.8521269424243507, "learning_rate": 5.820407789683705e-06, "loss": 0.3495, "step": 16074 }, { "epoch": 1.5085397897897899, "grad_norm": 0.8098851540337236, "learning_rate": 5.819869219233863e-06, "loss": 0.389, "step": 16075 }, { "epoch": 1.5086336336336337, "grad_norm": 0.9909121142015236, "learning_rate": 5.8193306390086205e-06, "loss": 0.408, "step": 16076 }, { "epoch": 1.5087274774774775, "grad_norm": 1.01901927917953, "learning_rate": 5.818792049014404e-06, "loss": 0.3463, "step": 16077 }, { "epoch": 1.5088213213213213, "grad_norm": 0.9478642322483471, "learning_rate": 5.818253449257633e-06, "loss": 0.3978, "step": 16078 }, { "epoch": 1.508915165165165, "grad_norm": 0.8988669475849337, "learning_rate": 5.81771483974473e-06, "loss": 0.4119, "step": 16079 }, { "epoch": 1.509009009009009, "grad_norm": 1.9182482738664066, "learning_rate": 5.817176220482118e-06, "loss": 0.3951, "step": 16080 }, { "epoch": 1.5091028528528527, "grad_norm": 1.006116683969061, "learning_rate": 5.816637591476217e-06, "loss": 0.3621, "step": 16081 }, { "epoch": 1.5091966966966965, "grad_norm": 0.8894426919983711, "learning_rate": 5.81609895273345e-06, "loss": 0.374, "step": 16082 }, { "epoch": 1.5092905405405406, "grad_norm": 0.9125921362556666, "learning_rate": 5.81556030426024e-06, "loss": 0.3914, "step": 16083 }, { "epoch": 1.5093843843843844, "grad_norm": 0.9599064931221558, "learning_rate": 5.815021646063006e-06, "loss": 0.4063, "step": 16084 }, { "epoch": 1.5094782282282282, "grad_norm": 0.9118754639864934, "learning_rate": 5.814482978148174e-06, "loss": 0.4114, "step": 16085 }, { "epoch": 1.5095720720720722, "grad_norm": 1.1163990795482692, "learning_rate": 5.813944300522167e-06, "loss": 0.3657, "step": 16086 }, { "epoch": 1.509665915915916, "grad_norm": 0.8782377979651704, "learning_rate": 5.813405613191405e-06, "loss": 0.4362, "step": 16087 }, { "epoch": 1.5097597597597598, "grad_norm": 0.8728184139304045, "learning_rate": 5.8128669161623115e-06, "loss": 0.4151, "step": 16088 }, { "epoch": 1.5098536036036037, "grad_norm": 1.0308880845766828, "learning_rate": 5.812328209441312e-06, "loss": 0.4104, "step": 16089 }, { "epoch": 1.5099474474474475, "grad_norm": 0.9139530864814708, "learning_rate": 5.8117894930348265e-06, "loss": 0.4271, "step": 16090 }, { "epoch": 1.5100412912912913, "grad_norm": 1.1509275466010616, "learning_rate": 5.811250766949279e-06, "loss": 0.4348, "step": 16091 }, { "epoch": 1.510135135135135, "grad_norm": 0.8575959822614996, "learning_rate": 5.810712031191093e-06, "loss": 0.3999, "step": 16092 }, { "epoch": 1.510228978978979, "grad_norm": 2.501187660141054, "learning_rate": 5.810173285766694e-06, "loss": 0.3949, "step": 16093 }, { "epoch": 1.5103228228228227, "grad_norm": 0.9539269527871519, "learning_rate": 5.809634530682502e-06, "loss": 0.3658, "step": 16094 }, { "epoch": 1.5104166666666665, "grad_norm": 1.0163559137139462, "learning_rate": 5.809095765944943e-06, "loss": 0.4484, "step": 16095 }, { "epoch": 1.5105105105105106, "grad_norm": 0.9434760618466036, "learning_rate": 5.808556991560439e-06, "loss": 0.43, "step": 16096 }, { "epoch": 1.5106043543543544, "grad_norm": 2.548625291852932, "learning_rate": 5.808018207535414e-06, "loss": 0.4276, "step": 16097 }, { "epoch": 1.5106981981981982, "grad_norm": 1.5539545983495067, "learning_rate": 5.807479413876295e-06, "loss": 0.3693, "step": 16098 }, { "epoch": 1.5107920420420422, "grad_norm": 1.2799106914679252, "learning_rate": 5.806940610589502e-06, "loss": 0.4301, "step": 16099 }, { "epoch": 1.510885885885886, "grad_norm": 0.9017558326141386, "learning_rate": 5.806401797681462e-06, "loss": 0.3713, "step": 16100 }, { "epoch": 1.5109797297297298, "grad_norm": 0.9518684038257608, "learning_rate": 5.805862975158601e-06, "loss": 0.412, "step": 16101 }, { "epoch": 1.5110735735735736, "grad_norm": 0.971989810824981, "learning_rate": 5.8053241430273364e-06, "loss": 0.3609, "step": 16102 }, { "epoch": 1.5111674174174174, "grad_norm": 0.8296335220251896, "learning_rate": 5.804785301294099e-06, "loss": 0.3938, "step": 16103 }, { "epoch": 1.5112612612612613, "grad_norm": 0.9425100850387651, "learning_rate": 5.804246449965313e-06, "loss": 0.4336, "step": 16104 }, { "epoch": 1.511355105105105, "grad_norm": 0.8475192824616445, "learning_rate": 5.8037075890474e-06, "loss": 0.3906, "step": 16105 }, { "epoch": 1.5114489489489489, "grad_norm": 1.1990525694024046, "learning_rate": 5.803168718546788e-06, "loss": 0.4423, "step": 16106 }, { "epoch": 1.5115427927927927, "grad_norm": 1.0267252143134202, "learning_rate": 5.802629838469902e-06, "loss": 0.4141, "step": 16107 }, { "epoch": 1.5116366366366365, "grad_norm": 0.9681964244573028, "learning_rate": 5.802090948823163e-06, "loss": 0.4107, "step": 16108 }, { "epoch": 1.5117304804804805, "grad_norm": 0.9417745391579333, "learning_rate": 5.801552049613001e-06, "loss": 0.4529, "step": 16109 }, { "epoch": 1.5118243243243243, "grad_norm": 0.8604376263292167, "learning_rate": 5.801013140845839e-06, "loss": 0.413, "step": 16110 }, { "epoch": 1.5119181681681682, "grad_norm": 1.7491819408621847, "learning_rate": 5.800474222528103e-06, "loss": 0.4063, "step": 16111 }, { "epoch": 1.512012012012012, "grad_norm": 0.8928158594263362, "learning_rate": 5.79993529466622e-06, "loss": 0.4235, "step": 16112 }, { "epoch": 1.512105855855856, "grad_norm": 0.9272694627413469, "learning_rate": 5.799396357266613e-06, "loss": 0.402, "step": 16113 }, { "epoch": 1.5121996996996998, "grad_norm": 1.0443770573777738, "learning_rate": 5.79885741033571e-06, "loss": 0.3943, "step": 16114 }, { "epoch": 1.5122935435435436, "grad_norm": 0.9013394888055106, "learning_rate": 5.798318453879935e-06, "loss": 0.419, "step": 16115 }, { "epoch": 1.5123873873873874, "grad_norm": 0.8860876237910741, "learning_rate": 5.797779487905716e-06, "loss": 0.3711, "step": 16116 }, { "epoch": 1.5124812312312312, "grad_norm": 0.9432196774722524, "learning_rate": 5.7972405124194776e-06, "loss": 0.4089, "step": 16117 }, { "epoch": 1.512575075075075, "grad_norm": 0.9029851834813244, "learning_rate": 5.796701527427648e-06, "loss": 0.3741, "step": 16118 }, { "epoch": 1.5126689189189189, "grad_norm": 0.9840188686468614, "learning_rate": 5.796162532936652e-06, "loss": 0.4162, "step": 16119 }, { "epoch": 1.5127627627627627, "grad_norm": 1.2047610542235327, "learning_rate": 5.795623528952915e-06, "loss": 0.3956, "step": 16120 }, { "epoch": 1.5128566066066065, "grad_norm": 0.8978471750311908, "learning_rate": 5.795084515482865e-06, "loss": 0.4103, "step": 16121 }, { "epoch": 1.5129504504504503, "grad_norm": 0.916570976080981, "learning_rate": 5.794545492532931e-06, "loss": 0.3809, "step": 16122 }, { "epoch": 1.5130442942942943, "grad_norm": 1.0666353476714239, "learning_rate": 5.794006460109536e-06, "loss": 0.3934, "step": 16123 }, { "epoch": 1.5131381381381381, "grad_norm": 1.0107658174854057, "learning_rate": 5.793467418219109e-06, "loss": 0.4322, "step": 16124 }, { "epoch": 1.513231981981982, "grad_norm": 1.362989087940248, "learning_rate": 5.792928366868077e-06, "loss": 0.3778, "step": 16125 }, { "epoch": 1.513325825825826, "grad_norm": 0.9685543863985063, "learning_rate": 5.792389306062866e-06, "loss": 0.4158, "step": 16126 }, { "epoch": 1.5134196696696698, "grad_norm": 0.9689598928105616, "learning_rate": 5.791850235809903e-06, "loss": 0.441, "step": 16127 }, { "epoch": 1.5135135135135136, "grad_norm": 0.9044925349367877, "learning_rate": 5.791311156115618e-06, "loss": 0.3905, "step": 16128 }, { "epoch": 1.5136073573573574, "grad_norm": 1.760494539041387, "learning_rate": 5.7907720669864366e-06, "loss": 0.3924, "step": 16129 }, { "epoch": 1.5137012012012012, "grad_norm": 0.94097753739005, "learning_rate": 5.790232968428786e-06, "loss": 0.4035, "step": 16130 }, { "epoch": 1.513795045045045, "grad_norm": 0.8816153046513684, "learning_rate": 5.789693860449097e-06, "loss": 0.3656, "step": 16131 }, { "epoch": 1.5138888888888888, "grad_norm": 1.0331023521969094, "learning_rate": 5.789154743053793e-06, "loss": 0.4242, "step": 16132 }, { "epoch": 1.5139827327327327, "grad_norm": 1.039185613421597, "learning_rate": 5.788615616249304e-06, "loss": 0.441, "step": 16133 }, { "epoch": 1.5140765765765765, "grad_norm": 0.8813299437167671, "learning_rate": 5.788076480042059e-06, "loss": 0.3812, "step": 16134 }, { "epoch": 1.5141704204204203, "grad_norm": 0.8834100763008382, "learning_rate": 5.787537334438485e-06, "loss": 0.3439, "step": 16135 }, { "epoch": 1.5142642642642643, "grad_norm": 0.8969394318035925, "learning_rate": 5.786998179445012e-06, "loss": 0.3692, "step": 16136 }, { "epoch": 1.5143581081081081, "grad_norm": 1.0107167202310023, "learning_rate": 5.7864590150680655e-06, "loss": 0.3925, "step": 16137 }, { "epoch": 1.514451951951952, "grad_norm": 0.9426617700688402, "learning_rate": 5.785919841314076e-06, "loss": 0.4029, "step": 16138 }, { "epoch": 1.514545795795796, "grad_norm": 1.019571036663512, "learning_rate": 5.785380658189471e-06, "loss": 0.3846, "step": 16139 }, { "epoch": 1.5146396396396398, "grad_norm": 0.8909623398311424, "learning_rate": 5.784841465700681e-06, "loss": 0.3753, "step": 16140 }, { "epoch": 1.5147334834834836, "grad_norm": 0.9585138053443923, "learning_rate": 5.784302263854134e-06, "loss": 0.3922, "step": 16141 }, { "epoch": 1.5148273273273274, "grad_norm": 0.944646142158368, "learning_rate": 5.78376305265626e-06, "loss": 0.4413, "step": 16142 }, { "epoch": 1.5149211711711712, "grad_norm": 1.0055903723015582, "learning_rate": 5.783223832113485e-06, "loss": 0.373, "step": 16143 }, { "epoch": 1.515015015015015, "grad_norm": 1.1926801724542238, "learning_rate": 5.78268460223224e-06, "loss": 0.4042, "step": 16144 }, { "epoch": 1.5151088588588588, "grad_norm": 0.9004771947404906, "learning_rate": 5.782145363018954e-06, "loss": 0.3968, "step": 16145 }, { "epoch": 1.5152027027027026, "grad_norm": 0.9653003849761546, "learning_rate": 5.781606114480057e-06, "loss": 0.3808, "step": 16146 }, { "epoch": 1.5152965465465464, "grad_norm": 0.8678157264970382, "learning_rate": 5.781066856621979e-06, "loss": 0.3831, "step": 16147 }, { "epoch": 1.5153903903903903, "grad_norm": 1.1602098921626574, "learning_rate": 5.78052758945115e-06, "loss": 0.3831, "step": 16148 }, { "epoch": 1.5154842342342343, "grad_norm": 0.8903848807907012, "learning_rate": 5.779988312973996e-06, "loss": 0.3932, "step": 16149 }, { "epoch": 1.515578078078078, "grad_norm": 0.7948382886622261, "learning_rate": 5.779449027196953e-06, "loss": 0.3865, "step": 16150 }, { "epoch": 1.515671921921922, "grad_norm": 0.8658353621811198, "learning_rate": 5.778909732126444e-06, "loss": 0.3756, "step": 16151 }, { "epoch": 1.5157657657657657, "grad_norm": 0.9874768269498747, "learning_rate": 5.778370427768904e-06, "loss": 0.4359, "step": 16152 }, { "epoch": 1.5158596096096097, "grad_norm": 0.8585007387134471, "learning_rate": 5.777831114130763e-06, "loss": 0.3591, "step": 16153 }, { "epoch": 1.5159534534534536, "grad_norm": 1.0531662913362643, "learning_rate": 5.777291791218449e-06, "loss": 0.3741, "step": 16154 }, { "epoch": 1.5160472972972974, "grad_norm": 0.9150226511169121, "learning_rate": 5.7767524590383925e-06, "loss": 0.3765, "step": 16155 }, { "epoch": 1.5161411411411412, "grad_norm": 0.8428800910641396, "learning_rate": 5.776213117597027e-06, "loss": 0.3706, "step": 16156 }, { "epoch": 1.516234984984985, "grad_norm": 2.9008273344422997, "learning_rate": 5.77567376690078e-06, "loss": 0.3655, "step": 16157 }, { "epoch": 1.5163288288288288, "grad_norm": 0.8805739523154584, "learning_rate": 5.775134406956082e-06, "loss": 0.3879, "step": 16158 }, { "epoch": 1.5164226726726726, "grad_norm": 1.88822406934323, "learning_rate": 5.774595037769368e-06, "loss": 0.4172, "step": 16159 }, { "epoch": 1.5165165165165164, "grad_norm": 0.8115845885350135, "learning_rate": 5.774055659347065e-06, "loss": 0.379, "step": 16160 }, { "epoch": 1.5166103603603602, "grad_norm": 1.1336831639930638, "learning_rate": 5.773516271695605e-06, "loss": 0.4079, "step": 16161 }, { "epoch": 1.516704204204204, "grad_norm": 1.314759632391769, "learning_rate": 5.772976874821421e-06, "loss": 0.4401, "step": 16162 }, { "epoch": 1.516798048048048, "grad_norm": 1.2062402341044176, "learning_rate": 5.77243746873094e-06, "loss": 0.4545, "step": 16163 }, { "epoch": 1.5168918918918919, "grad_norm": 0.9452805952431366, "learning_rate": 5.771898053430598e-06, "loss": 0.3954, "step": 16164 }, { "epoch": 1.5169857357357357, "grad_norm": 0.8633655329221751, "learning_rate": 5.771358628926824e-06, "loss": 0.4416, "step": 16165 }, { "epoch": 1.5170795795795797, "grad_norm": 1.1273712341796012, "learning_rate": 5.77081919522605e-06, "loss": 0.4142, "step": 16166 }, { "epoch": 1.5171734234234235, "grad_norm": 0.9926860610257401, "learning_rate": 5.77027975233471e-06, "loss": 0.4268, "step": 16167 }, { "epoch": 1.5172672672672673, "grad_norm": 0.8266966968226267, "learning_rate": 5.769740300259232e-06, "loss": 0.3771, "step": 16168 }, { "epoch": 1.5173611111111112, "grad_norm": 1.769879470595972, "learning_rate": 5.76920083900605e-06, "loss": 0.3582, "step": 16169 }, { "epoch": 1.517454954954955, "grad_norm": 1.002114354304312, "learning_rate": 5.7686613685815965e-06, "loss": 0.3729, "step": 16170 }, { "epoch": 1.5175487987987988, "grad_norm": 1.3644235315150994, "learning_rate": 5.768121888992303e-06, "loss": 0.4096, "step": 16171 }, { "epoch": 1.5176426426426426, "grad_norm": 1.021194290891556, "learning_rate": 5.7675824002446e-06, "loss": 0.4182, "step": 16172 }, { "epoch": 1.5177364864864864, "grad_norm": 1.089310503629235, "learning_rate": 5.767042902344924e-06, "loss": 0.4068, "step": 16173 }, { "epoch": 1.5178303303303302, "grad_norm": 0.9058235332630352, "learning_rate": 5.766503395299705e-06, "loss": 0.3902, "step": 16174 }, { "epoch": 1.517924174174174, "grad_norm": 0.9626696813232062, "learning_rate": 5.765963879115375e-06, "loss": 0.4102, "step": 16175 }, { "epoch": 1.518018018018018, "grad_norm": 0.9251248059470522, "learning_rate": 5.765424353798367e-06, "loss": 0.3872, "step": 16176 }, { "epoch": 1.5181118618618619, "grad_norm": 0.949990733808714, "learning_rate": 5.764884819355116e-06, "loss": 0.3721, "step": 16177 }, { "epoch": 1.5182057057057057, "grad_norm": 0.8899450746881618, "learning_rate": 5.764345275792051e-06, "loss": 0.3708, "step": 16178 }, { "epoch": 1.5182995495495497, "grad_norm": 1.811649254252886, "learning_rate": 5.763805723115609e-06, "loss": 0.412, "step": 16179 }, { "epoch": 1.5183933933933935, "grad_norm": 1.0574132610480276, "learning_rate": 5.7632661613322215e-06, "loss": 0.4271, "step": 16180 }, { "epoch": 1.5184872372372373, "grad_norm": 0.8697579158078543, "learning_rate": 5.762726590448321e-06, "loss": 0.3549, "step": 16181 }, { "epoch": 1.5185810810810811, "grad_norm": 1.0096610397650194, "learning_rate": 5.762187010470341e-06, "loss": 0.4291, "step": 16182 }, { "epoch": 1.518674924924925, "grad_norm": 0.9422990675258232, "learning_rate": 5.761647421404718e-06, "loss": 0.4023, "step": 16183 }, { "epoch": 1.5187687687687688, "grad_norm": 0.9224321433974273, "learning_rate": 5.761107823257881e-06, "loss": 0.4153, "step": 16184 }, { "epoch": 1.5188626126126126, "grad_norm": 1.1265169777578388, "learning_rate": 5.760568216036266e-06, "loss": 0.4266, "step": 16185 }, { "epoch": 1.5189564564564564, "grad_norm": 0.9070033636744277, "learning_rate": 5.760028599746309e-06, "loss": 0.4095, "step": 16186 }, { "epoch": 1.5190503003003002, "grad_norm": 0.9068865157159043, "learning_rate": 5.759488974394438e-06, "loss": 0.4287, "step": 16187 }, { "epoch": 1.519144144144144, "grad_norm": 0.8811226989709532, "learning_rate": 5.758949339987092e-06, "loss": 0.3793, "step": 16188 }, { "epoch": 1.519237987987988, "grad_norm": 0.9921791198831315, "learning_rate": 5.7584096965307045e-06, "loss": 0.378, "step": 16189 }, { "epoch": 1.5193318318318318, "grad_norm": 0.9283294841241301, "learning_rate": 5.757870044031708e-06, "loss": 0.4074, "step": 16190 }, { "epoch": 1.5194256756756757, "grad_norm": 1.3596188567584402, "learning_rate": 5.7573303824965386e-06, "loss": 0.3744, "step": 16191 }, { "epoch": 1.5195195195195195, "grad_norm": 1.0466611694641037, "learning_rate": 5.75679071193163e-06, "loss": 0.4418, "step": 16192 }, { "epoch": 1.5196133633633635, "grad_norm": 1.0144849635891544, "learning_rate": 5.7562510323434165e-06, "loss": 0.3826, "step": 16193 }, { "epoch": 1.5197072072072073, "grad_norm": 0.9343570805089659, "learning_rate": 5.7557113437383315e-06, "loss": 0.38, "step": 16194 }, { "epoch": 1.5198010510510511, "grad_norm": 1.0021021814872793, "learning_rate": 5.755171646122812e-06, "loss": 0.413, "step": 16195 }, { "epoch": 1.519894894894895, "grad_norm": 0.9235481877054534, "learning_rate": 5.7546319395032924e-06, "loss": 0.374, "step": 16196 }, { "epoch": 1.5199887387387387, "grad_norm": 0.9820454481436116, "learning_rate": 5.754092223886208e-06, "loss": 0.4078, "step": 16197 }, { "epoch": 1.5200825825825826, "grad_norm": 1.00507506684002, "learning_rate": 5.753552499277994e-06, "loss": 0.3636, "step": 16198 }, { "epoch": 1.5201764264264264, "grad_norm": 0.9510114428880643, "learning_rate": 5.753012765685082e-06, "loss": 0.4089, "step": 16199 }, { "epoch": 1.5202702702702702, "grad_norm": 1.0120633523714166, "learning_rate": 5.752473023113911e-06, "loss": 0.4098, "step": 16200 }, { "epoch": 1.520364114114114, "grad_norm": 0.9426145403500401, "learning_rate": 5.751933271570917e-06, "loss": 0.3632, "step": 16201 }, { "epoch": 1.5204579579579578, "grad_norm": 0.8851139658561111, "learning_rate": 5.751393511062533e-06, "loss": 0.4163, "step": 16202 }, { "epoch": 1.5205518018018018, "grad_norm": 1.2124265096776363, "learning_rate": 5.7508537415951974e-06, "loss": 0.4432, "step": 16203 }, { "epoch": 1.5206456456456456, "grad_norm": 1.2259741093081558, "learning_rate": 5.750313963175342e-06, "loss": 0.3835, "step": 16204 }, { "epoch": 1.5207394894894894, "grad_norm": 0.9507549971098395, "learning_rate": 5.7497741758094065e-06, "loss": 0.4116, "step": 16205 }, { "epoch": 1.5208333333333335, "grad_norm": 0.9420911634750245, "learning_rate": 5.749234379503823e-06, "loss": 0.3903, "step": 16206 }, { "epoch": 1.5209271771771773, "grad_norm": 0.9875262983578288, "learning_rate": 5.748694574265032e-06, "loss": 0.4049, "step": 16207 }, { "epoch": 1.521021021021021, "grad_norm": 1.0895226736059604, "learning_rate": 5.748154760099467e-06, "loss": 0.3918, "step": 16208 }, { "epoch": 1.521114864864865, "grad_norm": 0.893180707767704, "learning_rate": 5.7476149370135645e-06, "loss": 0.4294, "step": 16209 }, { "epoch": 1.5212087087087087, "grad_norm": 1.2036180175047368, "learning_rate": 5.747075105013762e-06, "loss": 0.4226, "step": 16210 }, { "epoch": 1.5213025525525525, "grad_norm": 0.9102586423464754, "learning_rate": 5.746535264106494e-06, "loss": 0.3555, "step": 16211 }, { "epoch": 1.5213963963963963, "grad_norm": 1.4559746544368721, "learning_rate": 5.745995414298197e-06, "loss": 0.4238, "step": 16212 }, { "epoch": 1.5214902402402402, "grad_norm": 1.5172245410689995, "learning_rate": 5.745455555595312e-06, "loss": 0.4562, "step": 16213 }, { "epoch": 1.521584084084084, "grad_norm": 0.8622382294216595, "learning_rate": 5.74491568800427e-06, "loss": 0.3605, "step": 16214 }, { "epoch": 1.5216779279279278, "grad_norm": 1.0167278293319375, "learning_rate": 5.744375811531513e-06, "loss": 0.4454, "step": 16215 }, { "epoch": 1.5217717717717718, "grad_norm": 0.8432503378668404, "learning_rate": 5.743835926183474e-06, "loss": 0.3695, "step": 16216 }, { "epoch": 1.5218656156156156, "grad_norm": 0.8400541700014983, "learning_rate": 5.743296031966592e-06, "loss": 0.3504, "step": 16217 }, { "epoch": 1.5219594594594594, "grad_norm": 0.9053683522895247, "learning_rate": 5.742756128887304e-06, "loss": 0.4087, "step": 16218 }, { "epoch": 1.5220533033033035, "grad_norm": 1.0133039255130052, "learning_rate": 5.742216216952047e-06, "loss": 0.4639, "step": 16219 }, { "epoch": 1.5221471471471473, "grad_norm": 0.904535955041424, "learning_rate": 5.741676296167258e-06, "loss": 0.447, "step": 16220 }, { "epoch": 1.522240990990991, "grad_norm": 0.8835701751094118, "learning_rate": 5.741136366539375e-06, "loss": 0.4028, "step": 16221 }, { "epoch": 1.522334834834835, "grad_norm": 0.8792819511919857, "learning_rate": 5.740596428074839e-06, "loss": 0.3696, "step": 16222 }, { "epoch": 1.5224286786786787, "grad_norm": 0.9520787744135484, "learning_rate": 5.740056480780082e-06, "loss": 0.4328, "step": 16223 }, { "epoch": 1.5225225225225225, "grad_norm": 0.8452180654304245, "learning_rate": 5.739516524661545e-06, "loss": 0.3562, "step": 16224 }, { "epoch": 1.5226163663663663, "grad_norm": 0.9458022781035453, "learning_rate": 5.738976559725666e-06, "loss": 0.4517, "step": 16225 }, { "epoch": 1.5227102102102101, "grad_norm": 0.9558631378963582, "learning_rate": 5.738436585978881e-06, "loss": 0.3986, "step": 16226 }, { "epoch": 1.522804054054054, "grad_norm": 0.8682926952086915, "learning_rate": 5.737896603427631e-06, "loss": 0.4114, "step": 16227 }, { "epoch": 1.5228978978978978, "grad_norm": 0.9515471578045034, "learning_rate": 5.737356612078352e-06, "loss": 0.4204, "step": 16228 }, { "epoch": 1.5229917417417418, "grad_norm": 1.012504036103623, "learning_rate": 5.736816611937484e-06, "loss": 0.38, "step": 16229 }, { "epoch": 1.5230855855855856, "grad_norm": 0.860520530117001, "learning_rate": 5.736276603011465e-06, "loss": 0.3881, "step": 16230 }, { "epoch": 1.5231794294294294, "grad_norm": 0.859456841319389, "learning_rate": 5.7357365853067335e-06, "loss": 0.4053, "step": 16231 }, { "epoch": 1.5232732732732732, "grad_norm": 0.9757249933009856, "learning_rate": 5.735196558829727e-06, "loss": 0.3773, "step": 16232 }, { "epoch": 1.5233671171171173, "grad_norm": 2.041572554436272, "learning_rate": 5.734656523586886e-06, "loss": 0.3996, "step": 16233 }, { "epoch": 1.523460960960961, "grad_norm": 1.136413812828875, "learning_rate": 5.734116479584649e-06, "loss": 0.4033, "step": 16234 }, { "epoch": 1.5235548048048049, "grad_norm": 0.9717879809098662, "learning_rate": 5.733576426829456e-06, "loss": 0.3958, "step": 16235 }, { "epoch": 1.5236486486486487, "grad_norm": 0.9773760575382262, "learning_rate": 5.733036365327743e-06, "loss": 0.4055, "step": 16236 }, { "epoch": 1.5237424924924925, "grad_norm": 0.8763782424238324, "learning_rate": 5.732496295085952e-06, "loss": 0.3908, "step": 16237 }, { "epoch": 1.5238363363363363, "grad_norm": 0.8742497260354397, "learning_rate": 5.731956216110522e-06, "loss": 0.392, "step": 16238 }, { "epoch": 1.5239301801801801, "grad_norm": 0.9157328411172335, "learning_rate": 5.7314161284078895e-06, "loss": 0.4259, "step": 16239 }, { "epoch": 1.524024024024024, "grad_norm": 1.045843552781893, "learning_rate": 5.730876031984499e-06, "loss": 0.3962, "step": 16240 }, { "epoch": 1.5241178678678677, "grad_norm": 0.8873091267858292, "learning_rate": 5.730335926846787e-06, "loss": 0.3851, "step": 16241 }, { "epoch": 1.5242117117117115, "grad_norm": 0.8839926926271711, "learning_rate": 5.729795813001193e-06, "loss": 0.4016, "step": 16242 }, { "epoch": 1.5243055555555556, "grad_norm": 1.5615494763835922, "learning_rate": 5.729255690454158e-06, "loss": 0.4032, "step": 16243 }, { "epoch": 1.5243993993993994, "grad_norm": 0.9172022358944, "learning_rate": 5.7287155592121226e-06, "loss": 0.4033, "step": 16244 }, { "epoch": 1.5244932432432432, "grad_norm": 0.87867857636605, "learning_rate": 5.7281754192815245e-06, "loss": 0.3587, "step": 16245 }, { "epoch": 1.5245870870870872, "grad_norm": 0.9074305110553298, "learning_rate": 5.727635270668806e-06, "loss": 0.4118, "step": 16246 }, { "epoch": 1.524680930930931, "grad_norm": 1.3621150989006139, "learning_rate": 5.727095113380407e-06, "loss": 0.4279, "step": 16247 }, { "epoch": 1.5247747747747749, "grad_norm": 0.945709020384488, "learning_rate": 5.726554947422765e-06, "loss": 0.4237, "step": 16248 }, { "epoch": 1.5248686186186187, "grad_norm": 0.9198769162968417, "learning_rate": 5.726014772802325e-06, "loss": 0.3855, "step": 16249 }, { "epoch": 1.5249624624624625, "grad_norm": 1.0109464740255154, "learning_rate": 5.7254745895255235e-06, "loss": 0.4027, "step": 16250 }, { "epoch": 1.5250563063063063, "grad_norm": 0.8992864723590716, "learning_rate": 5.724934397598805e-06, "loss": 0.4002, "step": 16251 }, { "epoch": 1.52515015015015, "grad_norm": 1.0935348466627062, "learning_rate": 5.7243941970286075e-06, "loss": 0.4252, "step": 16252 }, { "epoch": 1.525243993993994, "grad_norm": 1.1616265858193549, "learning_rate": 5.723853987821374e-06, "loss": 0.395, "step": 16253 }, { "epoch": 1.5253378378378377, "grad_norm": 0.9162950603970789, "learning_rate": 5.723313769983542e-06, "loss": 0.3629, "step": 16254 }, { "epoch": 1.5254316816816815, "grad_norm": 0.9790712848913893, "learning_rate": 5.722773543521557e-06, "loss": 0.3993, "step": 16255 }, { "epoch": 1.5255255255255256, "grad_norm": 0.8923292045634755, "learning_rate": 5.722233308441856e-06, "loss": 0.3817, "step": 16256 }, { "epoch": 1.5256193693693694, "grad_norm": 0.9720249854424869, "learning_rate": 5.721693064750883e-06, "loss": 0.3973, "step": 16257 }, { "epoch": 1.5257132132132132, "grad_norm": 0.9640646067790369, "learning_rate": 5.7211528124550785e-06, "loss": 0.4247, "step": 16258 }, { "epoch": 1.5258070570570572, "grad_norm": 1.2155998998302264, "learning_rate": 5.720612551560886e-06, "loss": 0.4606, "step": 16259 }, { "epoch": 1.525900900900901, "grad_norm": 1.0062242585791636, "learning_rate": 5.720072282074742e-06, "loss": 0.3888, "step": 16260 }, { "epoch": 1.5259947447447448, "grad_norm": 2.632303679234705, "learning_rate": 5.719532004003095e-06, "loss": 0.4155, "step": 16261 }, { "epoch": 1.5260885885885886, "grad_norm": 0.8596223767673751, "learning_rate": 5.71899171735238e-06, "loss": 0.4097, "step": 16262 }, { "epoch": 1.5261824324324325, "grad_norm": 0.9609671085841439, "learning_rate": 5.7184514221290435e-06, "loss": 0.3744, "step": 16263 }, { "epoch": 1.5262762762762763, "grad_norm": 1.1101596774021205, "learning_rate": 5.717911118339526e-06, "loss": 0.3686, "step": 16264 }, { "epoch": 1.52637012012012, "grad_norm": 0.9833695232600322, "learning_rate": 5.717370805990271e-06, "loss": 0.4701, "step": 16265 }, { "epoch": 1.5264639639639639, "grad_norm": 1.0959288016378042, "learning_rate": 5.7168304850877175e-06, "loss": 0.421, "step": 16266 }, { "epoch": 1.5265578078078077, "grad_norm": 1.0485530127137397, "learning_rate": 5.716290155638311e-06, "loss": 0.4066, "step": 16267 }, { "epoch": 1.5266516516516515, "grad_norm": 0.9621227556572777, "learning_rate": 5.715749817648492e-06, "loss": 0.3999, "step": 16268 }, { "epoch": 1.5267454954954955, "grad_norm": 1.01454890606342, "learning_rate": 5.715209471124703e-06, "loss": 0.413, "step": 16269 }, { "epoch": 1.5268393393393394, "grad_norm": 0.9702765551619525, "learning_rate": 5.714669116073389e-06, "loss": 0.3873, "step": 16270 }, { "epoch": 1.5269331831831832, "grad_norm": 0.9796178635862057, "learning_rate": 5.714128752500991e-06, "loss": 0.3804, "step": 16271 }, { "epoch": 1.527027027027027, "grad_norm": 1.0991301333718853, "learning_rate": 5.7135883804139505e-06, "loss": 0.4027, "step": 16272 }, { "epoch": 1.527120870870871, "grad_norm": 1.0184802815561198, "learning_rate": 5.713047999818713e-06, "loss": 0.4015, "step": 16273 }, { "epoch": 1.5272147147147148, "grad_norm": 1.0057336672185142, "learning_rate": 5.712507610721719e-06, "loss": 0.3512, "step": 16274 }, { "epoch": 1.5273085585585586, "grad_norm": 0.9457665894775527, "learning_rate": 5.711967213129412e-06, "loss": 0.3645, "step": 16275 }, { "epoch": 1.5274024024024024, "grad_norm": 0.9318235878040311, "learning_rate": 5.711426807048239e-06, "loss": 0.4258, "step": 16276 }, { "epoch": 1.5274962462462462, "grad_norm": 0.9100819882805895, "learning_rate": 5.7108863924846405e-06, "loss": 0.3409, "step": 16277 }, { "epoch": 1.52759009009009, "grad_norm": 0.9156624380772134, "learning_rate": 5.710345969445057e-06, "loss": 0.4231, "step": 16278 }, { "epoch": 1.5276839339339339, "grad_norm": 0.8954025405355411, "learning_rate": 5.7098055379359375e-06, "loss": 0.3841, "step": 16279 }, { "epoch": 1.5277777777777777, "grad_norm": 0.9540248294074554, "learning_rate": 5.709265097963721e-06, "loss": 0.3593, "step": 16280 }, { "epoch": 1.5278716216216215, "grad_norm": 0.8738873833748506, "learning_rate": 5.708724649534854e-06, "loss": 0.4068, "step": 16281 }, { "epoch": 1.5279654654654653, "grad_norm": 1.4196936027860239, "learning_rate": 5.70818419265578e-06, "loss": 0.4078, "step": 16282 }, { "epoch": 1.5280593093093093, "grad_norm": 0.8956947026025631, "learning_rate": 5.707643727332941e-06, "loss": 0.3968, "step": 16283 }, { "epoch": 1.5281531531531531, "grad_norm": 1.1164247810122252, "learning_rate": 5.7071032535727855e-06, "loss": 0.3243, "step": 16284 }, { "epoch": 1.528246996996997, "grad_norm": 0.9344930026341459, "learning_rate": 5.706562771381753e-06, "loss": 0.4028, "step": 16285 }, { "epoch": 1.528340840840841, "grad_norm": 0.9980474475478667, "learning_rate": 5.706022280766289e-06, "loss": 0.4225, "step": 16286 }, { "epoch": 1.5284346846846848, "grad_norm": 0.9601251268339895, "learning_rate": 5.705481781732838e-06, "loss": 0.3923, "step": 16287 }, { "epoch": 1.5285285285285286, "grad_norm": 1.1882171576829672, "learning_rate": 5.704941274287846e-06, "loss": 0.4254, "step": 16288 }, { "epoch": 1.5286223723723724, "grad_norm": 0.9003134668131348, "learning_rate": 5.704400758437755e-06, "loss": 0.4216, "step": 16289 }, { "epoch": 1.5287162162162162, "grad_norm": 0.9504304864847992, "learning_rate": 5.703860234189011e-06, "loss": 0.3918, "step": 16290 }, { "epoch": 1.52881006006006, "grad_norm": 0.8830105146810341, "learning_rate": 5.70331970154806e-06, "loss": 0.38, "step": 16291 }, { "epoch": 1.5289039039039038, "grad_norm": 0.9234571997936663, "learning_rate": 5.702779160521344e-06, "loss": 0.3705, "step": 16292 }, { "epoch": 1.5289977477477477, "grad_norm": 1.002617732185025, "learning_rate": 5.70223861111531e-06, "loss": 0.392, "step": 16293 }, { "epoch": 1.5290915915915915, "grad_norm": 1.2672681857752488, "learning_rate": 5.701698053336403e-06, "loss": 0.3827, "step": 16294 }, { "epoch": 1.5291854354354353, "grad_norm": 1.0228135845817132, "learning_rate": 5.701157487191065e-06, "loss": 0.4373, "step": 16295 }, { "epoch": 1.5292792792792793, "grad_norm": 1.0826773244562302, "learning_rate": 5.700616912685747e-06, "loss": 0.427, "step": 16296 }, { "epoch": 1.5293731231231231, "grad_norm": 0.8775768261031112, "learning_rate": 5.70007632982689e-06, "loss": 0.3967, "step": 16297 }, { "epoch": 1.529466966966967, "grad_norm": 0.788130823222851, "learning_rate": 5.69953573862094e-06, "loss": 0.3989, "step": 16298 }, { "epoch": 1.529560810810811, "grad_norm": 0.9192117171324066, "learning_rate": 5.698995139074343e-06, "loss": 0.3873, "step": 16299 }, { "epoch": 1.5296546546546548, "grad_norm": 1.0821863080081429, "learning_rate": 5.6984545311935446e-06, "loss": 0.4319, "step": 16300 }, { "epoch": 1.5297484984984986, "grad_norm": 1.105804767501351, "learning_rate": 5.6979139149849905e-06, "loss": 0.4285, "step": 16301 }, { "epoch": 1.5298423423423424, "grad_norm": 1.040577247216701, "learning_rate": 5.697373290455127e-06, "loss": 0.348, "step": 16302 }, { "epoch": 1.5299361861861862, "grad_norm": 0.867764702596185, "learning_rate": 5.6968326576104e-06, "loss": 0.377, "step": 16303 }, { "epoch": 1.53003003003003, "grad_norm": 0.836847116686697, "learning_rate": 5.6962920164572545e-06, "loss": 0.3681, "step": 16304 }, { "epoch": 1.5301238738738738, "grad_norm": 0.9111916428848301, "learning_rate": 5.695751367002138e-06, "loss": 0.3864, "step": 16305 }, { "epoch": 1.5302177177177176, "grad_norm": 0.9250052256396922, "learning_rate": 5.695210709251497e-06, "loss": 0.3941, "step": 16306 }, { "epoch": 1.5303115615615615, "grad_norm": 0.9031556195126017, "learning_rate": 5.694670043211774e-06, "loss": 0.3368, "step": 16307 }, { "epoch": 1.5304054054054053, "grad_norm": 1.0711668177711668, "learning_rate": 5.69412936888942e-06, "loss": 0.4427, "step": 16308 }, { "epoch": 1.5304992492492493, "grad_norm": 0.9636113157612893, "learning_rate": 5.69358868629088e-06, "loss": 0.3908, "step": 16309 }, { "epoch": 1.530593093093093, "grad_norm": 0.8416853929272554, "learning_rate": 5.693047995422602e-06, "loss": 0.3807, "step": 16310 }, { "epoch": 1.530686936936937, "grad_norm": 0.8805919126737889, "learning_rate": 5.692507296291027e-06, "loss": 0.3755, "step": 16311 }, { "epoch": 1.5307807807807807, "grad_norm": 0.9815037528832967, "learning_rate": 5.6919665889026095e-06, "loss": 0.3954, "step": 16312 }, { "epoch": 1.5308746246246248, "grad_norm": 1.0064102996008262, "learning_rate": 5.691425873263792e-06, "loss": 0.4355, "step": 16313 }, { "epoch": 1.5309684684684686, "grad_norm": 0.9140227265543566, "learning_rate": 5.690885149381023e-06, "loss": 0.3698, "step": 16314 }, { "epoch": 1.5310623123123124, "grad_norm": 1.0732845095625652, "learning_rate": 5.690344417260749e-06, "loss": 0.3747, "step": 16315 }, { "epoch": 1.5311561561561562, "grad_norm": 0.8582218275825771, "learning_rate": 5.6898036769094156e-06, "loss": 0.4113, "step": 16316 }, { "epoch": 1.53125, "grad_norm": 1.0059307831075142, "learning_rate": 5.689262928333472e-06, "loss": 0.4065, "step": 16317 }, { "epoch": 1.5313438438438438, "grad_norm": 0.8266085915119603, "learning_rate": 5.688722171539367e-06, "loss": 0.3816, "step": 16318 }, { "epoch": 1.5314376876876876, "grad_norm": 0.9325248589459537, "learning_rate": 5.688181406533546e-06, "loss": 0.4124, "step": 16319 }, { "epoch": 1.5315315315315314, "grad_norm": 1.000027963300366, "learning_rate": 5.687640633322458e-06, "loss": 0.3958, "step": 16320 }, { "epoch": 1.5316253753753752, "grad_norm": 1.009374835190399, "learning_rate": 5.687099851912549e-06, "loss": 0.428, "step": 16321 }, { "epoch": 1.5317192192192193, "grad_norm": 1.075481950434938, "learning_rate": 5.686559062310266e-06, "loss": 0.4198, "step": 16322 }, { "epoch": 1.531813063063063, "grad_norm": 1.0153657168150017, "learning_rate": 5.68601826452206e-06, "loss": 0.4267, "step": 16323 }, { "epoch": 1.531906906906907, "grad_norm": 1.6435938615982215, "learning_rate": 5.685477458554377e-06, "loss": 0.3499, "step": 16324 }, { "epoch": 1.5320007507507507, "grad_norm": 3.2053974963145415, "learning_rate": 5.684936644413665e-06, "loss": 0.3568, "step": 16325 }, { "epoch": 1.5320945945945947, "grad_norm": 0.936304400750075, "learning_rate": 5.684395822106374e-06, "loss": 0.4162, "step": 16326 }, { "epoch": 1.5321884384384385, "grad_norm": 0.9490796082699935, "learning_rate": 5.6838549916389515e-06, "loss": 0.4305, "step": 16327 }, { "epoch": 1.5322822822822824, "grad_norm": 0.9493680045752004, "learning_rate": 5.683314153017844e-06, "loss": 0.3987, "step": 16328 }, { "epoch": 1.5323761261261262, "grad_norm": 0.8886227656210424, "learning_rate": 5.682773306249502e-06, "loss": 0.4256, "step": 16329 }, { "epoch": 1.53246996996997, "grad_norm": 0.8035408512581778, "learning_rate": 5.682232451340374e-06, "loss": 0.4178, "step": 16330 }, { "epoch": 1.5325638138138138, "grad_norm": 1.4914431866325795, "learning_rate": 5.681691588296907e-06, "loss": 0.4477, "step": 16331 }, { "epoch": 1.5326576576576576, "grad_norm": 1.1674862333016618, "learning_rate": 5.681150717125552e-06, "loss": 0.3878, "step": 16332 }, { "epoch": 1.5327515015015014, "grad_norm": 0.8507227095140254, "learning_rate": 5.6806098378327565e-06, "loss": 0.3775, "step": 16333 }, { "epoch": 1.5328453453453452, "grad_norm": 0.9444602342311994, "learning_rate": 5.680068950424969e-06, "loss": 0.4153, "step": 16334 }, { "epoch": 1.532939189189189, "grad_norm": 1.0015552642731713, "learning_rate": 5.679528054908641e-06, "loss": 0.3974, "step": 16335 }, { "epoch": 1.533033033033033, "grad_norm": 0.854215182563485, "learning_rate": 5.67898715129022e-06, "loss": 0.3232, "step": 16336 }, { "epoch": 1.5331268768768769, "grad_norm": 0.9482372104540132, "learning_rate": 5.678446239576153e-06, "loss": 0.4083, "step": 16337 }, { "epoch": 1.5332207207207207, "grad_norm": 0.8433162461310456, "learning_rate": 5.677905319772894e-06, "loss": 0.3936, "step": 16338 }, { "epoch": 1.5333145645645647, "grad_norm": 0.9573640957880265, "learning_rate": 5.67736439188689e-06, "loss": 0.4191, "step": 16339 }, { "epoch": 1.5334084084084085, "grad_norm": 0.9577362155294677, "learning_rate": 5.6768234559245895e-06, "loss": 0.3788, "step": 16340 }, { "epoch": 1.5335022522522523, "grad_norm": 1.7220184679295998, "learning_rate": 5.676282511892442e-06, "loss": 0.3578, "step": 16341 }, { "epoch": 1.5335960960960962, "grad_norm": 0.9274010941057147, "learning_rate": 5.6757415597969015e-06, "loss": 0.3907, "step": 16342 }, { "epoch": 1.53368993993994, "grad_norm": 0.9977773988646866, "learning_rate": 5.675200599644413e-06, "loss": 0.3949, "step": 16343 }, { "epoch": 1.5337837837837838, "grad_norm": 0.9692532164820158, "learning_rate": 5.6746596314414285e-06, "loss": 0.3955, "step": 16344 }, { "epoch": 1.5338776276276276, "grad_norm": 0.8880162053788602, "learning_rate": 5.674118655194399e-06, "loss": 0.4389, "step": 16345 }, { "epoch": 1.5339714714714714, "grad_norm": 1.773069279587666, "learning_rate": 5.6735776709097725e-06, "loss": 0.4063, "step": 16346 }, { "epoch": 1.5340653153153152, "grad_norm": 0.9483822928760129, "learning_rate": 5.673036678594e-06, "loss": 0.3755, "step": 16347 }, { "epoch": 1.534159159159159, "grad_norm": 0.8411862490141409, "learning_rate": 5.672495678253532e-06, "loss": 0.4074, "step": 16348 }, { "epoch": 1.534253003003003, "grad_norm": 1.1884693784057767, "learning_rate": 5.671954669894819e-06, "loss": 0.3533, "step": 16349 }, { "epoch": 1.5343468468468469, "grad_norm": 1.0437623096916993, "learning_rate": 5.67141365352431e-06, "loss": 0.4089, "step": 16350 }, { "epoch": 1.5344406906906907, "grad_norm": 0.9888104014967495, "learning_rate": 5.670872629148459e-06, "loss": 0.4073, "step": 16351 }, { "epoch": 1.5345345345345347, "grad_norm": 0.9593675799792633, "learning_rate": 5.670331596773715e-06, "loss": 0.4086, "step": 16352 }, { "epoch": 1.5346283783783785, "grad_norm": 0.9765515250050273, "learning_rate": 5.669790556406527e-06, "loss": 0.3647, "step": 16353 }, { "epoch": 1.5347222222222223, "grad_norm": 0.9428373198092115, "learning_rate": 5.669249508053348e-06, "loss": 0.4015, "step": 16354 }, { "epoch": 1.5348160660660661, "grad_norm": 0.9257884036651596, "learning_rate": 5.668708451720628e-06, "loss": 0.392, "step": 16355 }, { "epoch": 1.53490990990991, "grad_norm": 1.7989547658969913, "learning_rate": 5.668167387414818e-06, "loss": 0.441, "step": 16356 }, { "epoch": 1.5350037537537538, "grad_norm": 0.9535289400325846, "learning_rate": 5.6676263151423714e-06, "loss": 0.394, "step": 16357 }, { "epoch": 1.5350975975975976, "grad_norm": 1.250132637722765, "learning_rate": 5.667085234909737e-06, "loss": 0.4378, "step": 16358 }, { "epoch": 1.5351914414414414, "grad_norm": 0.8193738379775095, "learning_rate": 5.666544146723366e-06, "loss": 0.3498, "step": 16359 }, { "epoch": 1.5352852852852852, "grad_norm": 0.933441933838739, "learning_rate": 5.666003050589712e-06, "loss": 0.4153, "step": 16360 }, { "epoch": 1.535379129129129, "grad_norm": 0.9462254797950655, "learning_rate": 5.665461946515224e-06, "loss": 0.4237, "step": 16361 }, { "epoch": 1.535472972972973, "grad_norm": 0.9622709099466928, "learning_rate": 5.664920834506354e-06, "loss": 0.4091, "step": 16362 }, { "epoch": 1.5355668168168168, "grad_norm": 0.8671095360654683, "learning_rate": 5.664379714569557e-06, "loss": 0.3828, "step": 16363 }, { "epoch": 1.5356606606606606, "grad_norm": 0.9344222694595782, "learning_rate": 5.663838586711281e-06, "loss": 0.3947, "step": 16364 }, { "epoch": 1.5357545045045045, "grad_norm": 0.8912799851377718, "learning_rate": 5.66329745093798e-06, "loss": 0.4105, "step": 16365 }, { "epoch": 1.5358483483483485, "grad_norm": 0.9904484917887536, "learning_rate": 5.662756307256106e-06, "loss": 0.3855, "step": 16366 }, { "epoch": 1.5359421921921923, "grad_norm": 0.7327620234849594, "learning_rate": 5.6622151556721086e-06, "loss": 0.3965, "step": 16367 }, { "epoch": 1.5360360360360361, "grad_norm": 0.970497800732488, "learning_rate": 5.661673996192443e-06, "loss": 0.4089, "step": 16368 }, { "epoch": 1.53612987987988, "grad_norm": 1.0497963583811698, "learning_rate": 5.661132828823561e-06, "loss": 0.4248, "step": 16369 }, { "epoch": 1.5362237237237237, "grad_norm": 1.15120120224298, "learning_rate": 5.660591653571914e-06, "loss": 0.3906, "step": 16370 }, { "epoch": 1.5363175675675675, "grad_norm": 1.3988561211671129, "learning_rate": 5.660050470443955e-06, "loss": 0.4027, "step": 16371 }, { "epoch": 1.5364114114114114, "grad_norm": 1.1734221663681597, "learning_rate": 5.659509279446137e-06, "loss": 0.3929, "step": 16372 }, { "epoch": 1.5365052552552552, "grad_norm": 0.9607307283428218, "learning_rate": 5.658968080584912e-06, "loss": 0.3868, "step": 16373 }, { "epoch": 1.536599099099099, "grad_norm": 0.8886564814400552, "learning_rate": 5.658426873866731e-06, "loss": 0.4032, "step": 16374 }, { "epoch": 1.5366929429429428, "grad_norm": 0.9117183672855528, "learning_rate": 5.657885659298052e-06, "loss": 0.4003, "step": 16375 }, { "epoch": 1.5367867867867868, "grad_norm": 1.070233408189907, "learning_rate": 5.657344436885324e-06, "loss": 0.4404, "step": 16376 }, { "epoch": 1.5368806306306306, "grad_norm": 1.0848566437188607, "learning_rate": 5.656803206635e-06, "loss": 0.3879, "step": 16377 }, { "epoch": 1.5369744744744744, "grad_norm": 0.8542492724124715, "learning_rate": 5.656261968553535e-06, "loss": 0.3694, "step": 16378 }, { "epoch": 1.5370683183183185, "grad_norm": 0.9470250968477723, "learning_rate": 5.65572072264738e-06, "loss": 0.4014, "step": 16379 }, { "epoch": 1.5371621621621623, "grad_norm": 1.4259455516522654, "learning_rate": 5.65517946892299e-06, "loss": 0.4156, "step": 16380 }, { "epoch": 1.537256006006006, "grad_norm": 1.4134979737146898, "learning_rate": 5.654638207386819e-06, "loss": 0.4203, "step": 16381 }, { "epoch": 1.53734984984985, "grad_norm": 0.9057435501253319, "learning_rate": 5.654096938045319e-06, "loss": 0.3715, "step": 16382 }, { "epoch": 1.5374436936936937, "grad_norm": 0.9202071443352643, "learning_rate": 5.653555660904945e-06, "loss": 0.4402, "step": 16383 }, { "epoch": 1.5375375375375375, "grad_norm": 1.098057077831615, "learning_rate": 5.65301437597215e-06, "loss": 0.3989, "step": 16384 }, { "epoch": 1.5376313813813813, "grad_norm": 0.9197375387300575, "learning_rate": 5.652473083253386e-06, "loss": 0.3975, "step": 16385 }, { "epoch": 1.5377252252252251, "grad_norm": 1.05222349556028, "learning_rate": 5.651931782755109e-06, "loss": 0.3978, "step": 16386 }, { "epoch": 1.537819069069069, "grad_norm": 1.118208541610784, "learning_rate": 5.651390474483774e-06, "loss": 0.3888, "step": 16387 }, { "epoch": 1.5379129129129128, "grad_norm": 0.8940975033988493, "learning_rate": 5.650849158445833e-06, "loss": 0.4045, "step": 16388 }, { "epoch": 1.5380067567567568, "grad_norm": 0.8985791415566412, "learning_rate": 5.6503078346477415e-06, "loss": 0.4044, "step": 16389 }, { "epoch": 1.5381006006006006, "grad_norm": 1.6143045183041917, "learning_rate": 5.6497665030959526e-06, "loss": 0.423, "step": 16390 }, { "epoch": 1.5381944444444444, "grad_norm": 0.924512526937164, "learning_rate": 5.649225163796921e-06, "loss": 0.3355, "step": 16391 }, { "epoch": 1.5382882882882885, "grad_norm": 1.0452391153963194, "learning_rate": 5.648683816757101e-06, "loss": 0.3486, "step": 16392 }, { "epoch": 1.5383821321321323, "grad_norm": 1.0124384226976912, "learning_rate": 5.648142461982949e-06, "loss": 0.3356, "step": 16393 }, { "epoch": 1.538475975975976, "grad_norm": 1.7165020973448217, "learning_rate": 5.647601099480917e-06, "loss": 0.4, "step": 16394 }, { "epoch": 1.5385698198198199, "grad_norm": 0.9533222365346823, "learning_rate": 5.64705972925746e-06, "loss": 0.3802, "step": 16395 }, { "epoch": 1.5386636636636637, "grad_norm": 1.0059033209096597, "learning_rate": 5.646518351319036e-06, "loss": 0.3651, "step": 16396 }, { "epoch": 1.5387575075075075, "grad_norm": 1.2009909530019058, "learning_rate": 5.645976965672095e-06, "loss": 0.3877, "step": 16397 }, { "epoch": 1.5388513513513513, "grad_norm": 1.2770937035393635, "learning_rate": 5.645435572323096e-06, "loss": 0.4004, "step": 16398 }, { "epoch": 1.5389451951951951, "grad_norm": 1.0257961998996286, "learning_rate": 5.6448941712784935e-06, "loss": 0.3617, "step": 16399 }, { "epoch": 1.539039039039039, "grad_norm": 0.9294197752842321, "learning_rate": 5.644352762544741e-06, "loss": 0.4637, "step": 16400 }, { "epoch": 1.5391328828828827, "grad_norm": 0.8215540357481348, "learning_rate": 5.643811346128295e-06, "loss": 0.3648, "step": 16401 }, { "epoch": 1.5392267267267268, "grad_norm": 0.8858924451054385, "learning_rate": 5.64326992203561e-06, "loss": 0.4164, "step": 16402 }, { "epoch": 1.5393205705705706, "grad_norm": 0.9654528446365915, "learning_rate": 5.642728490273143e-06, "loss": 0.4079, "step": 16403 }, { "epoch": 1.5394144144144144, "grad_norm": 0.9176673082310984, "learning_rate": 5.6421870508473465e-06, "loss": 0.3939, "step": 16404 }, { "epoch": 1.5395082582582582, "grad_norm": 0.9448111992072294, "learning_rate": 5.641645603764681e-06, "loss": 0.4061, "step": 16405 }, { "epoch": 1.5396021021021022, "grad_norm": 1.4575803667145857, "learning_rate": 5.6411041490315975e-06, "loss": 0.4069, "step": 16406 }, { "epoch": 1.539695945945946, "grad_norm": 1.4171890995795804, "learning_rate": 5.640562686654555e-06, "loss": 0.3749, "step": 16407 }, { "epoch": 1.5397897897897899, "grad_norm": 0.9246652977729614, "learning_rate": 5.640021216640007e-06, "loss": 0.3559, "step": 16408 }, { "epoch": 1.5398836336336337, "grad_norm": 0.8658011358368792, "learning_rate": 5.6394797389944115e-06, "loss": 0.406, "step": 16409 }, { "epoch": 1.5399774774774775, "grad_norm": 0.9635779994155595, "learning_rate": 5.638938253724222e-06, "loss": 0.3928, "step": 16410 }, { "epoch": 1.5400713213213213, "grad_norm": 1.027026562197769, "learning_rate": 5.638396760835899e-06, "loss": 0.4186, "step": 16411 }, { "epoch": 1.540165165165165, "grad_norm": 1.5323279732332629, "learning_rate": 5.637855260335895e-06, "loss": 0.4095, "step": 16412 }, { "epoch": 1.540259009009009, "grad_norm": 1.0891081325091974, "learning_rate": 5.637313752230668e-06, "loss": 0.4287, "step": 16413 }, { "epoch": 1.5403528528528527, "grad_norm": 0.8918216043299642, "learning_rate": 5.636772236526675e-06, "loss": 0.3843, "step": 16414 }, { "epoch": 1.5404466966966965, "grad_norm": 0.9570096800236968, "learning_rate": 5.63623071323037e-06, "loss": 0.4468, "step": 16415 }, { "epoch": 1.5405405405405406, "grad_norm": 0.953757244793847, "learning_rate": 5.635689182348212e-06, "loss": 0.3828, "step": 16416 }, { "epoch": 1.5406343843843844, "grad_norm": 0.9107829986195219, "learning_rate": 5.635147643886658e-06, "loss": 0.3881, "step": 16417 }, { "epoch": 1.5407282282282282, "grad_norm": 1.0098442965641987, "learning_rate": 5.634606097852162e-06, "loss": 0.3732, "step": 16418 }, { "epoch": 1.5408220720720722, "grad_norm": 1.6070639454161169, "learning_rate": 5.634064544251184e-06, "loss": 0.4387, "step": 16419 }, { "epoch": 1.540915915915916, "grad_norm": 0.9390152577795934, "learning_rate": 5.633522983090181e-06, "loss": 0.4108, "step": 16420 }, { "epoch": 1.5410097597597598, "grad_norm": 0.8647492891189693, "learning_rate": 5.632981414375606e-06, "loss": 0.4073, "step": 16421 }, { "epoch": 1.5411036036036037, "grad_norm": 0.9102638948499479, "learning_rate": 5.632439838113921e-06, "loss": 0.3778, "step": 16422 }, { "epoch": 1.5411974474474475, "grad_norm": 0.8771389605935757, "learning_rate": 5.631898254311582e-06, "loss": 0.3697, "step": 16423 }, { "epoch": 1.5412912912912913, "grad_norm": 0.85772212203736, "learning_rate": 5.631356662975044e-06, "loss": 0.3754, "step": 16424 }, { "epoch": 1.541385135135135, "grad_norm": 1.1153374370311047, "learning_rate": 5.630815064110767e-06, "loss": 0.4545, "step": 16425 }, { "epoch": 1.541478978978979, "grad_norm": 1.1961961328758848, "learning_rate": 5.630273457725209e-06, "loss": 0.4295, "step": 16426 }, { "epoch": 1.5415728228228227, "grad_norm": 1.0842150555639836, "learning_rate": 5.629731843824825e-06, "loss": 0.4521, "step": 16427 }, { "epoch": 1.5416666666666665, "grad_norm": 1.0395432059960639, "learning_rate": 5.629190222416073e-06, "loss": 0.382, "step": 16428 }, { "epoch": 1.5417605105105106, "grad_norm": 1.0671000065276717, "learning_rate": 5.628648593505415e-06, "loss": 0.4545, "step": 16429 }, { "epoch": 1.5418543543543544, "grad_norm": 1.0019830407801684, "learning_rate": 5.628106957099303e-06, "loss": 0.3941, "step": 16430 }, { "epoch": 1.5419481981981982, "grad_norm": 0.9983045075768638, "learning_rate": 5.6275653132041984e-06, "loss": 0.438, "step": 16431 }, { "epoch": 1.5420420420420422, "grad_norm": 0.7932693530113405, "learning_rate": 5.627023661826561e-06, "loss": 0.3713, "step": 16432 }, { "epoch": 1.542135885885886, "grad_norm": 0.930321132820745, "learning_rate": 5.6264820029728436e-06, "loss": 0.4098, "step": 16433 }, { "epoch": 1.5422297297297298, "grad_norm": 0.8875666577926309, "learning_rate": 5.625940336649508e-06, "loss": 0.3919, "step": 16434 }, { "epoch": 1.5423235735735736, "grad_norm": 0.956521927631925, "learning_rate": 5.625398662863014e-06, "loss": 0.3869, "step": 16435 }, { "epoch": 1.5424174174174174, "grad_norm": 0.9419366640522654, "learning_rate": 5.624856981619816e-06, "loss": 0.4139, "step": 16436 }, { "epoch": 1.5425112612612613, "grad_norm": 0.9084951337065148, "learning_rate": 5.624315292926377e-06, "loss": 0.4426, "step": 16437 }, { "epoch": 1.542605105105105, "grad_norm": 1.003251373770927, "learning_rate": 5.6237735967891516e-06, "loss": 0.3754, "step": 16438 }, { "epoch": 1.5426989489489489, "grad_norm": 0.8680904353190245, "learning_rate": 5.6232318932146e-06, "loss": 0.4197, "step": 16439 }, { "epoch": 1.5427927927927927, "grad_norm": 1.0412311393544949, "learning_rate": 5.622690182209182e-06, "loss": 0.4231, "step": 16440 }, { "epoch": 1.5428866366366365, "grad_norm": 1.0793034635012129, "learning_rate": 5.622148463779356e-06, "loss": 0.381, "step": 16441 }, { "epoch": 1.5429804804804805, "grad_norm": 1.0613176562470397, "learning_rate": 5.6216067379315786e-06, "loss": 0.3804, "step": 16442 }, { "epoch": 1.5430743243243243, "grad_norm": 0.8983495106112495, "learning_rate": 5.621065004672314e-06, "loss": 0.4029, "step": 16443 }, { "epoch": 1.5431681681681682, "grad_norm": 0.9324401820736835, "learning_rate": 5.620523264008017e-06, "loss": 0.3984, "step": 16444 }, { "epoch": 1.543262012012012, "grad_norm": 0.9627472536463563, "learning_rate": 5.619981515945147e-06, "loss": 0.4416, "step": 16445 }, { "epoch": 1.543355855855856, "grad_norm": 0.8513065258036484, "learning_rate": 5.619439760490164e-06, "loss": 0.4021, "step": 16446 }, { "epoch": 1.5434496996996998, "grad_norm": 1.2767193736305358, "learning_rate": 5.618897997649529e-06, "loss": 0.41, "step": 16447 }, { "epoch": 1.5435435435435436, "grad_norm": 0.9158418032841122, "learning_rate": 5.618356227429701e-06, "loss": 0.3454, "step": 16448 }, { "epoch": 1.5436373873873874, "grad_norm": 1.150358235049636, "learning_rate": 5.617814449837138e-06, "loss": 0.3897, "step": 16449 }, { "epoch": 1.5437312312312312, "grad_norm": 0.8255502890217642, "learning_rate": 5.6172726648783015e-06, "loss": 0.3708, "step": 16450 }, { "epoch": 1.543825075075075, "grad_norm": 0.9524492836432482, "learning_rate": 5.616730872559649e-06, "loss": 0.4014, "step": 16451 }, { "epoch": 1.5439189189189189, "grad_norm": 1.0331663532497477, "learning_rate": 5.616189072887642e-06, "loss": 0.3835, "step": 16452 }, { "epoch": 1.5440127627627627, "grad_norm": 1.0796070605012522, "learning_rate": 5.6156472658687414e-06, "loss": 0.4068, "step": 16453 }, { "epoch": 1.5441066066066065, "grad_norm": 0.9181988973439728, "learning_rate": 5.615105451509405e-06, "loss": 0.3867, "step": 16454 }, { "epoch": 1.5442004504504503, "grad_norm": 1.0788358582789788, "learning_rate": 5.614563629816095e-06, "loss": 0.4021, "step": 16455 }, { "epoch": 1.5442942942942943, "grad_norm": 1.1289312458536875, "learning_rate": 5.614021800795269e-06, "loss": 0.399, "step": 16456 }, { "epoch": 1.5443881381381381, "grad_norm": 8.695602755719744, "learning_rate": 5.613479964453389e-06, "loss": 0.4054, "step": 16457 }, { "epoch": 1.544481981981982, "grad_norm": 1.4509965058135317, "learning_rate": 5.612938120796916e-06, "loss": 0.4174, "step": 16458 }, { "epoch": 1.544575825825826, "grad_norm": 0.9622279411658953, "learning_rate": 5.612396269832309e-06, "loss": 0.4063, "step": 16459 }, { "epoch": 1.5446696696696698, "grad_norm": 0.9030946469268261, "learning_rate": 5.611854411566029e-06, "loss": 0.3553, "step": 16460 }, { "epoch": 1.5447635135135136, "grad_norm": 0.8430407466661295, "learning_rate": 5.611312546004538e-06, "loss": 0.3942, "step": 16461 }, { "epoch": 1.5448573573573574, "grad_norm": 0.9045876672946694, "learning_rate": 5.610770673154295e-06, "loss": 0.4398, "step": 16462 }, { "epoch": 1.5449512012012012, "grad_norm": 0.9857371314883541, "learning_rate": 5.61022879302176e-06, "loss": 0.377, "step": 16463 }, { "epoch": 1.545045045045045, "grad_norm": 2.2964856697663145, "learning_rate": 5.609686905613396e-06, "loss": 0.4513, "step": 16464 }, { "epoch": 1.5451388888888888, "grad_norm": 1.01875565452008, "learning_rate": 5.609145010935665e-06, "loss": 0.3628, "step": 16465 }, { "epoch": 1.5452327327327327, "grad_norm": 1.0120209842996803, "learning_rate": 5.608603108995025e-06, "loss": 0.4733, "step": 16466 }, { "epoch": 1.5453265765765765, "grad_norm": 0.8522076847878157, "learning_rate": 5.608061199797937e-06, "loss": 0.3522, "step": 16467 }, { "epoch": 1.5454204204204203, "grad_norm": 0.884259870843927, "learning_rate": 5.607519283350867e-06, "loss": 0.4132, "step": 16468 }, { "epoch": 1.5455142642642643, "grad_norm": 1.7557069166827195, "learning_rate": 5.606977359660272e-06, "loss": 0.3864, "step": 16469 }, { "epoch": 1.5456081081081081, "grad_norm": 0.9546244396558852, "learning_rate": 5.606435428732613e-06, "loss": 0.436, "step": 16470 }, { "epoch": 1.545701951951952, "grad_norm": 0.9710512717395641, "learning_rate": 5.605893490574354e-06, "loss": 0.4227, "step": 16471 }, { "epoch": 1.545795795795796, "grad_norm": 1.0284042042465518, "learning_rate": 5.605351545191955e-06, "loss": 0.4557, "step": 16472 }, { "epoch": 1.5458896396396398, "grad_norm": 0.8447715414570283, "learning_rate": 5.604809592591879e-06, "loss": 0.348, "step": 16473 }, { "epoch": 1.5459834834834836, "grad_norm": 1.8700046116290656, "learning_rate": 5.604267632780587e-06, "loss": 0.4294, "step": 16474 }, { "epoch": 1.5460773273273274, "grad_norm": 0.9287986840737047, "learning_rate": 5.603725665764541e-06, "loss": 0.4107, "step": 16475 }, { "epoch": 1.5461711711711712, "grad_norm": 0.9319728636873787, "learning_rate": 5.603183691550202e-06, "loss": 0.4276, "step": 16476 }, { "epoch": 1.546265015015015, "grad_norm": 0.8268926130275057, "learning_rate": 5.602641710144035e-06, "loss": 0.4283, "step": 16477 }, { "epoch": 1.5463588588588588, "grad_norm": 0.8416251176058956, "learning_rate": 5.602099721552498e-06, "loss": 0.3598, "step": 16478 }, { "epoch": 1.5464527027027026, "grad_norm": 0.9591723875639636, "learning_rate": 5.601557725782054e-06, "loss": 0.4521, "step": 16479 }, { "epoch": 1.5465465465465464, "grad_norm": 0.8834139541273056, "learning_rate": 5.601015722839169e-06, "loss": 0.3866, "step": 16480 }, { "epoch": 1.5466403903903903, "grad_norm": 0.8547465565544684, "learning_rate": 5.600473712730302e-06, "loss": 0.3484, "step": 16481 }, { "epoch": 1.5467342342342343, "grad_norm": 1.0651578460960123, "learning_rate": 5.599931695461916e-06, "loss": 0.4446, "step": 16482 }, { "epoch": 1.546828078078078, "grad_norm": 1.022125204947322, "learning_rate": 5.599389671040474e-06, "loss": 0.3955, "step": 16483 }, { "epoch": 1.546921921921922, "grad_norm": 1.0666249182932643, "learning_rate": 5.598847639472438e-06, "loss": 0.4216, "step": 16484 }, { "epoch": 1.5470157657657657, "grad_norm": 1.1002907713153944, "learning_rate": 5.598305600764271e-06, "loss": 0.4142, "step": 16485 }, { "epoch": 1.5471096096096097, "grad_norm": 0.9090085371956792, "learning_rate": 5.597763554922436e-06, "loss": 0.4293, "step": 16486 }, { "epoch": 1.5472034534534536, "grad_norm": 1.7113525649860961, "learning_rate": 5.597221501953397e-06, "loss": 0.3644, "step": 16487 }, { "epoch": 1.5472972972972974, "grad_norm": 0.9188827603784256, "learning_rate": 5.596679441863615e-06, "loss": 0.4533, "step": 16488 }, { "epoch": 1.5473911411411412, "grad_norm": 0.9512293296024388, "learning_rate": 5.596137374659554e-06, "loss": 0.3813, "step": 16489 }, { "epoch": 1.547484984984985, "grad_norm": 0.9295469596648892, "learning_rate": 5.595595300347676e-06, "loss": 0.3813, "step": 16490 }, { "epoch": 1.5475788288288288, "grad_norm": 0.8741440480596914, "learning_rate": 5.5950532189344446e-06, "loss": 0.3941, "step": 16491 }, { "epoch": 1.5476726726726726, "grad_norm": 1.0294594860737207, "learning_rate": 5.594511130426325e-06, "loss": 0.4244, "step": 16492 }, { "epoch": 1.5477665165165164, "grad_norm": 0.8154739736135471, "learning_rate": 5.593969034829779e-06, "loss": 0.3617, "step": 16493 }, { "epoch": 1.5478603603603602, "grad_norm": 1.144422981218636, "learning_rate": 5.59342693215127e-06, "loss": 0.3838, "step": 16494 }, { "epoch": 1.547954204204204, "grad_norm": 0.8562425642183812, "learning_rate": 5.592884822397263e-06, "loss": 0.3678, "step": 16495 }, { "epoch": 1.548048048048048, "grad_norm": 1.5481392869493857, "learning_rate": 5.592342705574219e-06, "loss": 0.4107, "step": 16496 }, { "epoch": 1.5481418918918919, "grad_norm": 1.1099205077506245, "learning_rate": 5.591800581688604e-06, "loss": 0.4602, "step": 16497 }, { "epoch": 1.5482357357357357, "grad_norm": 0.9987607955011494, "learning_rate": 5.591258450746881e-06, "loss": 0.4314, "step": 16498 }, { "epoch": 1.5483295795795797, "grad_norm": 1.3865140561123086, "learning_rate": 5.590716312755514e-06, "loss": 0.3911, "step": 16499 }, { "epoch": 1.5484234234234235, "grad_norm": 1.0527446646830312, "learning_rate": 5.590174167720965e-06, "loss": 0.3663, "step": 16500 }, { "epoch": 1.5485172672672673, "grad_norm": 1.0124670396524982, "learning_rate": 5.589632015649703e-06, "loss": 0.4633, "step": 16501 }, { "epoch": 1.5486111111111112, "grad_norm": 1.158136295460757, "learning_rate": 5.589089856548186e-06, "loss": 0.4137, "step": 16502 }, { "epoch": 1.548704954954955, "grad_norm": 1.0937263664851604, "learning_rate": 5.588547690422883e-06, "loss": 0.4263, "step": 16503 }, { "epoch": 1.5487987987987988, "grad_norm": 0.9648331024147793, "learning_rate": 5.588005517280257e-06, "loss": 0.3776, "step": 16504 }, { "epoch": 1.5488926426426426, "grad_norm": 0.9490020609989442, "learning_rate": 5.587463337126772e-06, "loss": 0.4485, "step": 16505 }, { "epoch": 1.5489864864864864, "grad_norm": 0.8275010661033996, "learning_rate": 5.5869211499688915e-06, "loss": 0.3622, "step": 16506 }, { "epoch": 1.5490803303303302, "grad_norm": 1.140423450529568, "learning_rate": 5.586378955813082e-06, "loss": 0.4053, "step": 16507 }, { "epoch": 1.549174174174174, "grad_norm": 0.9861489679931881, "learning_rate": 5.585836754665807e-06, "loss": 0.3903, "step": 16508 }, { "epoch": 1.549268018018018, "grad_norm": 0.8579005525952367, "learning_rate": 5.585294546533529e-06, "loss": 0.3911, "step": 16509 }, { "epoch": 1.5493618618618619, "grad_norm": 1.065931004641378, "learning_rate": 5.584752331422719e-06, "loss": 0.409, "step": 16510 }, { "epoch": 1.5494557057057057, "grad_norm": 1.2541174416040162, "learning_rate": 5.584210109339837e-06, "loss": 0.4289, "step": 16511 }, { "epoch": 1.5495495495495497, "grad_norm": 0.9657939862846567, "learning_rate": 5.583667880291348e-06, "loss": 0.3942, "step": 16512 }, { "epoch": 1.5496433933933935, "grad_norm": 0.8907853559857974, "learning_rate": 5.583125644283719e-06, "loss": 0.4056, "step": 16513 }, { "epoch": 1.5497372372372373, "grad_norm": 1.1758266037673557, "learning_rate": 5.582583401323414e-06, "loss": 0.4207, "step": 16514 }, { "epoch": 1.5498310810810811, "grad_norm": 0.8484388133963608, "learning_rate": 5.5820411514168986e-06, "loss": 0.3803, "step": 16515 }, { "epoch": 1.549924924924925, "grad_norm": 0.9654107197656396, "learning_rate": 5.581498894570638e-06, "loss": 0.3958, "step": 16516 }, { "epoch": 1.5500187687687688, "grad_norm": 1.9382568609516448, "learning_rate": 5.580956630791099e-06, "loss": 0.3582, "step": 16517 }, { "epoch": 1.5501126126126126, "grad_norm": 0.8993499830359942, "learning_rate": 5.5804143600847435e-06, "loss": 0.3896, "step": 16518 }, { "epoch": 1.5502064564564564, "grad_norm": 0.8822883537185582, "learning_rate": 5.579872082458041e-06, "loss": 0.3632, "step": 16519 }, { "epoch": 1.5503003003003002, "grad_norm": 1.3280315498225648, "learning_rate": 5.579329797917453e-06, "loss": 0.4064, "step": 16520 }, { "epoch": 1.550394144144144, "grad_norm": 5.506093912750599, "learning_rate": 5.5787875064694494e-06, "loss": 0.4022, "step": 16521 }, { "epoch": 1.550487987987988, "grad_norm": 1.0007128066857298, "learning_rate": 5.578245208120495e-06, "loss": 0.4544, "step": 16522 }, { "epoch": 1.5505818318318318, "grad_norm": 0.8448835153283, "learning_rate": 5.577702902877055e-06, "loss": 0.3525, "step": 16523 }, { "epoch": 1.5506756756756757, "grad_norm": 0.9793564218966831, "learning_rate": 5.577160590745592e-06, "loss": 0.3984, "step": 16524 }, { "epoch": 1.5507695195195195, "grad_norm": 0.9631143023654455, "learning_rate": 5.576618271732579e-06, "loss": 0.3678, "step": 16525 }, { "epoch": 1.5508633633633635, "grad_norm": 1.5719500598780791, "learning_rate": 5.576075945844477e-06, "loss": 0.4099, "step": 16526 }, { "epoch": 1.5509572072072073, "grad_norm": 1.11981223335816, "learning_rate": 5.5755336130877535e-06, "loss": 0.4256, "step": 16527 }, { "epoch": 1.5510510510510511, "grad_norm": 1.0571219815623656, "learning_rate": 5.574991273468876e-06, "loss": 0.3422, "step": 16528 }, { "epoch": 1.551144894894895, "grad_norm": 1.1518759389018762, "learning_rate": 5.5744489269943084e-06, "loss": 0.4266, "step": 16529 }, { "epoch": 1.5512387387387387, "grad_norm": 0.9051417618797162, "learning_rate": 5.57390657367052e-06, "loss": 0.3709, "step": 16530 }, { "epoch": 1.5513325825825826, "grad_norm": 2.263358326017999, "learning_rate": 5.573364213503976e-06, "loss": 0.3665, "step": 16531 }, { "epoch": 1.5514264264264264, "grad_norm": 1.15838325722835, "learning_rate": 5.572821846501142e-06, "loss": 0.4158, "step": 16532 }, { "epoch": 1.5515202702702702, "grad_norm": 0.9060863290184928, "learning_rate": 5.572279472668486e-06, "loss": 0.4106, "step": 16533 }, { "epoch": 1.551614114114114, "grad_norm": 1.472502324266367, "learning_rate": 5.571737092012475e-06, "loss": 0.4212, "step": 16534 }, { "epoch": 1.5517079579579578, "grad_norm": 0.9482543547157979, "learning_rate": 5.571194704539574e-06, "loss": 0.3879, "step": 16535 }, { "epoch": 1.5518018018018018, "grad_norm": 0.9938361321263365, "learning_rate": 5.570652310256254e-06, "loss": 0.4107, "step": 16536 }, { "epoch": 1.5518956456456456, "grad_norm": 1.0275477720645811, "learning_rate": 5.570109909168977e-06, "loss": 0.4194, "step": 16537 }, { "epoch": 1.5519894894894894, "grad_norm": 2.0634536410447155, "learning_rate": 5.569567501284213e-06, "loss": 0.4124, "step": 16538 }, { "epoch": 1.5520833333333335, "grad_norm": 0.8617454119750368, "learning_rate": 5.569025086608428e-06, "loss": 0.408, "step": 16539 }, { "epoch": 1.5521771771771773, "grad_norm": 0.9598443767401001, "learning_rate": 5.568482665148091e-06, "loss": 0.3859, "step": 16540 }, { "epoch": 1.552271021021021, "grad_norm": 2.396093621132855, "learning_rate": 5.567940236909668e-06, "loss": 0.4193, "step": 16541 }, { "epoch": 1.552364864864865, "grad_norm": 1.0288445065290441, "learning_rate": 5.567397801899628e-06, "loss": 0.4095, "step": 16542 }, { "epoch": 1.5524587087087087, "grad_norm": 0.9191775754512914, "learning_rate": 5.566855360124436e-06, "loss": 0.4115, "step": 16543 }, { "epoch": 1.5525525525525525, "grad_norm": 0.9373312513303734, "learning_rate": 5.56631291159056e-06, "loss": 0.395, "step": 16544 }, { "epoch": 1.5526463963963963, "grad_norm": 1.110632102226755, "learning_rate": 5.565770456304469e-06, "loss": 0.3887, "step": 16545 }, { "epoch": 1.5527402402402402, "grad_norm": 1.0159885083924658, "learning_rate": 5.5652279942726305e-06, "loss": 0.41, "step": 16546 }, { "epoch": 1.552834084084084, "grad_norm": 0.8116608669007696, "learning_rate": 5.564685525501512e-06, "loss": 0.3623, "step": 16547 }, { "epoch": 1.5529279279279278, "grad_norm": 0.9177109385682739, "learning_rate": 5.5641430499975825e-06, "loss": 0.3916, "step": 16548 }, { "epoch": 1.5530217717717718, "grad_norm": 0.8200689111296986, "learning_rate": 5.563600567767308e-06, "loss": 0.4117, "step": 16549 }, { "epoch": 1.5531156156156156, "grad_norm": 0.9963446601580873, "learning_rate": 5.563058078817157e-06, "loss": 0.4317, "step": 16550 }, { "epoch": 1.5532094594594594, "grad_norm": 0.9268668235257708, "learning_rate": 5.562515583153598e-06, "loss": 0.364, "step": 16551 }, { "epoch": 1.5533033033033035, "grad_norm": 0.8980684165336844, "learning_rate": 5.5619730807831015e-06, "loss": 0.3812, "step": 16552 }, { "epoch": 1.5533971471471473, "grad_norm": 0.9646395346540815, "learning_rate": 5.5614305717121335e-06, "loss": 0.4102, "step": 16553 }, { "epoch": 1.553490990990991, "grad_norm": 0.8869442273346481, "learning_rate": 5.5608880559471615e-06, "loss": 0.3781, "step": 16554 }, { "epoch": 1.553584834834835, "grad_norm": 1.06785324125311, "learning_rate": 5.560345533494657e-06, "loss": 0.3929, "step": 16555 }, { "epoch": 1.5536786786786787, "grad_norm": 0.8424194736680369, "learning_rate": 5.559803004361084e-06, "loss": 0.3535, "step": 16556 }, { "epoch": 1.5537725225225225, "grad_norm": 1.043984131895105, "learning_rate": 5.559260468552915e-06, "loss": 0.4561, "step": 16557 }, { "epoch": 1.5538663663663663, "grad_norm": 0.9917525903706739, "learning_rate": 5.558717926076619e-06, "loss": 0.3725, "step": 16558 }, { "epoch": 1.5539602102102101, "grad_norm": 0.9273441970072019, "learning_rate": 5.558175376938662e-06, "loss": 0.4048, "step": 16559 }, { "epoch": 1.554054054054054, "grad_norm": 0.8773198237522692, "learning_rate": 5.557632821145515e-06, "loss": 0.3464, "step": 16560 }, { "epoch": 1.5541478978978978, "grad_norm": 0.8958812089791651, "learning_rate": 5.557090258703647e-06, "loss": 0.4132, "step": 16561 }, { "epoch": 1.5542417417417418, "grad_norm": 0.9266037221998932, "learning_rate": 5.556547689619524e-06, "loss": 0.4004, "step": 16562 }, { "epoch": 1.5543355855855856, "grad_norm": 0.8259985239245985, "learning_rate": 5.5560051138996185e-06, "loss": 0.4083, "step": 16563 }, { "epoch": 1.5544294294294294, "grad_norm": 0.9074897891134306, "learning_rate": 5.555462531550399e-06, "loss": 0.3289, "step": 16564 }, { "epoch": 1.5545232732732732, "grad_norm": 0.9579974588278966, "learning_rate": 5.554919942578333e-06, "loss": 0.392, "step": 16565 }, { "epoch": 1.5546171171171173, "grad_norm": 1.3160909434192918, "learning_rate": 5.554377346989893e-06, "loss": 0.4485, "step": 16566 }, { "epoch": 1.554710960960961, "grad_norm": 1.0268201962538772, "learning_rate": 5.553834744791546e-06, "loss": 0.4331, "step": 16567 }, { "epoch": 1.5548048048048049, "grad_norm": 0.9061497853509388, "learning_rate": 5.553292135989762e-06, "loss": 0.399, "step": 16568 }, { "epoch": 1.5548986486486487, "grad_norm": 1.1921779319499821, "learning_rate": 5.552749520591011e-06, "loss": 0.3941, "step": 16569 }, { "epoch": 1.5549924924924925, "grad_norm": 4.862056199210747, "learning_rate": 5.552206898601762e-06, "loss": 0.3929, "step": 16570 }, { "epoch": 1.5550863363363363, "grad_norm": 1.1388281880298439, "learning_rate": 5.551664270028485e-06, "loss": 0.4039, "step": 16571 }, { "epoch": 1.5551801801801801, "grad_norm": 0.9640260136147307, "learning_rate": 5.551121634877651e-06, "loss": 0.4029, "step": 16572 }, { "epoch": 1.555274024024024, "grad_norm": 0.9750490410047732, "learning_rate": 5.550578993155729e-06, "loss": 0.4392, "step": 16573 }, { "epoch": 1.5553678678678677, "grad_norm": 0.9036826076510206, "learning_rate": 5.550036344869187e-06, "loss": 0.391, "step": 16574 }, { "epoch": 1.5554617117117115, "grad_norm": 0.8416470326125771, "learning_rate": 5.549493690024498e-06, "loss": 0.4037, "step": 16575 }, { "epoch": 1.5555555555555556, "grad_norm": 0.8995620141577735, "learning_rate": 5.548951028628131e-06, "loss": 0.412, "step": 16576 }, { "epoch": 1.5556493993993994, "grad_norm": 0.86281501825964, "learning_rate": 5.5484083606865566e-06, "loss": 0.3909, "step": 16577 }, { "epoch": 1.5557432432432432, "grad_norm": 0.8763716929832258, "learning_rate": 5.547865686206245e-06, "loss": 0.39, "step": 16578 }, { "epoch": 1.5558370870870872, "grad_norm": 1.128980274525928, "learning_rate": 5.547323005193667e-06, "loss": 0.3794, "step": 16579 }, { "epoch": 1.555930930930931, "grad_norm": 0.8781980528509866, "learning_rate": 5.5467803176552906e-06, "loss": 0.4099, "step": 16580 }, { "epoch": 1.5560247747747749, "grad_norm": 0.9971465091044797, "learning_rate": 5.546237623597588e-06, "loss": 0.3889, "step": 16581 }, { "epoch": 1.5561186186186187, "grad_norm": 0.8658379460421434, "learning_rate": 5.545694923027031e-06, "loss": 0.39, "step": 16582 }, { "epoch": 1.5562124624624625, "grad_norm": 0.9501108873407217, "learning_rate": 5.545152215950088e-06, "loss": 0.382, "step": 16583 }, { "epoch": 1.5563063063063063, "grad_norm": 0.850733596997642, "learning_rate": 5.5446095023732326e-06, "loss": 0.3543, "step": 16584 }, { "epoch": 1.55640015015015, "grad_norm": 1.3243828730784004, "learning_rate": 5.544066782302934e-06, "loss": 0.4325, "step": 16585 }, { "epoch": 1.556493993993994, "grad_norm": 1.0534448984635985, "learning_rate": 5.543524055745662e-06, "loss": 0.4099, "step": 16586 }, { "epoch": 1.5565878378378377, "grad_norm": 0.9551227469705824, "learning_rate": 5.542981322707889e-06, "loss": 0.3893, "step": 16587 }, { "epoch": 1.5566816816816815, "grad_norm": 1.317556159809331, "learning_rate": 5.542438583196086e-06, "loss": 0.4209, "step": 16588 }, { "epoch": 1.5567755255255256, "grad_norm": 2.695661132858126, "learning_rate": 5.541895837216723e-06, "loss": 0.4145, "step": 16589 }, { "epoch": 1.5568693693693694, "grad_norm": 0.9459508665145167, "learning_rate": 5.541353084776273e-06, "loss": 0.421, "step": 16590 }, { "epoch": 1.5569632132132132, "grad_norm": 0.8574578441694264, "learning_rate": 5.540810325881209e-06, "loss": 0.3968, "step": 16591 }, { "epoch": 1.5570570570570572, "grad_norm": 0.8822106059306486, "learning_rate": 5.5402675605379966e-06, "loss": 0.4368, "step": 16592 }, { "epoch": 1.557150900900901, "grad_norm": 0.9271410284443273, "learning_rate": 5.53972478875311e-06, "loss": 0.3906, "step": 16593 }, { "epoch": 1.5572447447447448, "grad_norm": 1.0646666451380902, "learning_rate": 5.539182010533023e-06, "loss": 0.3705, "step": 16594 }, { "epoch": 1.5573385885885886, "grad_norm": 0.9262433157450166, "learning_rate": 5.538639225884203e-06, "loss": 0.4106, "step": 16595 }, { "epoch": 1.5574324324324325, "grad_norm": 1.0190704216315465, "learning_rate": 5.538096434813126e-06, "loss": 0.3444, "step": 16596 }, { "epoch": 1.5575262762762763, "grad_norm": 1.0708213136273708, "learning_rate": 5.537553637326263e-06, "loss": 0.4196, "step": 16597 }, { "epoch": 1.55762012012012, "grad_norm": 1.0608863068175396, "learning_rate": 5.537010833430085e-06, "loss": 0.4152, "step": 16598 }, { "epoch": 1.5577139639639639, "grad_norm": 1.001544098811404, "learning_rate": 5.5364680231310606e-06, "loss": 0.3661, "step": 16599 }, { "epoch": 1.5578078078078077, "grad_norm": 1.2033816626522003, "learning_rate": 5.535925206435667e-06, "loss": 0.3974, "step": 16600 }, { "epoch": 1.5579016516516515, "grad_norm": 1.1997975139945305, "learning_rate": 5.535382383350373e-06, "loss": 0.422, "step": 16601 }, { "epoch": 1.5579954954954955, "grad_norm": 0.9141207205165766, "learning_rate": 5.534839553881653e-06, "loss": 0.3741, "step": 16602 }, { "epoch": 1.5580893393393394, "grad_norm": 0.9034534546246226, "learning_rate": 5.534296718035977e-06, "loss": 0.3774, "step": 16603 }, { "epoch": 1.5581831831831832, "grad_norm": 0.9065691478089675, "learning_rate": 5.533753875819819e-06, "loss": 0.3861, "step": 16604 }, { "epoch": 1.558277027027027, "grad_norm": 0.9556701118063332, "learning_rate": 5.53321102723965e-06, "loss": 0.367, "step": 16605 }, { "epoch": 1.558370870870871, "grad_norm": 0.939424762724688, "learning_rate": 5.532668172301944e-06, "loss": 0.3853, "step": 16606 }, { "epoch": 1.5584647147147148, "grad_norm": 1.3727384362761317, "learning_rate": 5.532125311013172e-06, "loss": 0.42, "step": 16607 }, { "epoch": 1.5585585585585586, "grad_norm": 0.7860610899568875, "learning_rate": 5.5315824433798075e-06, "loss": 0.4004, "step": 16608 }, { "epoch": 1.5586524024024024, "grad_norm": 0.9803289772451789, "learning_rate": 5.531039569408323e-06, "loss": 0.3947, "step": 16609 }, { "epoch": 1.5587462462462462, "grad_norm": 1.023351796962456, "learning_rate": 5.530496689105191e-06, "loss": 0.4297, "step": 16610 }, { "epoch": 1.55884009009009, "grad_norm": 0.9130417071922258, "learning_rate": 5.529953802476885e-06, "loss": 0.358, "step": 16611 }, { "epoch": 1.5589339339339339, "grad_norm": 1.0438617837720836, "learning_rate": 5.529410909529877e-06, "loss": 0.4524, "step": 16612 }, { "epoch": 1.5590277777777777, "grad_norm": 1.088029927717915, "learning_rate": 5.528868010270642e-06, "loss": 0.4232, "step": 16613 }, { "epoch": 1.5591216216216215, "grad_norm": 0.8893022551403106, "learning_rate": 5.528325104705648e-06, "loss": 0.4016, "step": 16614 }, { "epoch": 1.5592154654654653, "grad_norm": 0.8484528490125283, "learning_rate": 5.527782192841375e-06, "loss": 0.3717, "step": 16615 }, { "epoch": 1.5593093093093093, "grad_norm": 0.9656545786317969, "learning_rate": 5.527239274684292e-06, "loss": 0.355, "step": 16616 }, { "epoch": 1.5594031531531531, "grad_norm": 1.043924964762363, "learning_rate": 5.526696350240872e-06, "loss": 0.3959, "step": 16617 }, { "epoch": 1.559496996996997, "grad_norm": 0.8567417911531994, "learning_rate": 5.526153419517591e-06, "loss": 0.3601, "step": 16618 }, { "epoch": 1.559590840840841, "grad_norm": 1.1296680967851593, "learning_rate": 5.525610482520919e-06, "loss": 0.4205, "step": 16619 }, { "epoch": 1.5596846846846848, "grad_norm": 0.9167934757388259, "learning_rate": 5.525067539257332e-06, "loss": 0.4232, "step": 16620 }, { "epoch": 1.5597785285285286, "grad_norm": 0.9897137362061277, "learning_rate": 5.524524589733303e-06, "loss": 0.3774, "step": 16621 }, { "epoch": 1.5598723723723724, "grad_norm": 0.987601644888888, "learning_rate": 5.523981633955306e-06, "loss": 0.3681, "step": 16622 }, { "epoch": 1.5599662162162162, "grad_norm": 1.525312715641986, "learning_rate": 5.523438671929815e-06, "loss": 0.3637, "step": 16623 }, { "epoch": 1.56006006006006, "grad_norm": 0.8825949672802723, "learning_rate": 5.5228957036633025e-06, "loss": 0.4074, "step": 16624 }, { "epoch": 1.5601539039039038, "grad_norm": 0.869652216009566, "learning_rate": 5.522352729162241e-06, "loss": 0.4033, "step": 16625 }, { "epoch": 1.5602477477477477, "grad_norm": 1.5472052448665794, "learning_rate": 5.521809748433108e-06, "loss": 0.4251, "step": 16626 }, { "epoch": 1.5603415915915915, "grad_norm": 0.8534905625942399, "learning_rate": 5.521266761482377e-06, "loss": 0.3883, "step": 16627 }, { "epoch": 1.5604354354354353, "grad_norm": 3.362205583148125, "learning_rate": 5.52072376831652e-06, "loss": 0.3771, "step": 16628 }, { "epoch": 1.5605292792792793, "grad_norm": 0.9089363768138938, "learning_rate": 5.520180768942013e-06, "loss": 0.4454, "step": 16629 }, { "epoch": 1.5606231231231231, "grad_norm": 0.9477117427572266, "learning_rate": 5.519637763365329e-06, "loss": 0.4063, "step": 16630 }, { "epoch": 1.560716966966967, "grad_norm": 0.8569828140915132, "learning_rate": 5.519094751592941e-06, "loss": 0.3767, "step": 16631 }, { "epoch": 1.560810810810811, "grad_norm": 1.1772243211550752, "learning_rate": 5.518551733631327e-06, "loss": 0.4312, "step": 16632 }, { "epoch": 1.5609046546546548, "grad_norm": 1.0287481487829009, "learning_rate": 5.51800870948696e-06, "loss": 0.4429, "step": 16633 }, { "epoch": 1.5609984984984986, "grad_norm": 0.925668841673124, "learning_rate": 5.517465679166314e-06, "loss": 0.4038, "step": 16634 }, { "epoch": 1.5610923423423424, "grad_norm": 0.7889999797529631, "learning_rate": 5.516922642675862e-06, "loss": 0.3768, "step": 16635 }, { "epoch": 1.5611861861861862, "grad_norm": 1.3847154864667668, "learning_rate": 5.516379600022084e-06, "loss": 0.4263, "step": 16636 }, { "epoch": 1.56128003003003, "grad_norm": 0.9075664152700296, "learning_rate": 5.515836551211449e-06, "loss": 0.4193, "step": 16637 }, { "epoch": 1.5613738738738738, "grad_norm": 1.0236467143282506, "learning_rate": 5.515293496250433e-06, "loss": 0.3701, "step": 16638 }, { "epoch": 1.5614677177177176, "grad_norm": 1.0243613210095357, "learning_rate": 5.5147504351455135e-06, "loss": 0.4388, "step": 16639 }, { "epoch": 1.5615615615615615, "grad_norm": 0.9249833944180689, "learning_rate": 5.514207367903165e-06, "loss": 0.3725, "step": 16640 }, { "epoch": 1.5616554054054053, "grad_norm": 0.8956560339287795, "learning_rate": 5.5136642945298595e-06, "loss": 0.4114, "step": 16641 }, { "epoch": 1.5617492492492493, "grad_norm": 0.8830779604251245, "learning_rate": 5.513121215032076e-06, "loss": 0.4015, "step": 16642 }, { "epoch": 1.561843093093093, "grad_norm": 0.8991151618822926, "learning_rate": 5.512578129416285e-06, "loss": 0.4026, "step": 16643 }, { "epoch": 1.561936936936937, "grad_norm": 0.8554521165316853, "learning_rate": 5.512035037688966e-06, "loss": 0.3578, "step": 16644 }, { "epoch": 1.5620307807807807, "grad_norm": 1.9732189440396435, "learning_rate": 5.5114919398565944e-06, "loss": 0.4402, "step": 16645 }, { "epoch": 1.5621246246246248, "grad_norm": 1.1619359353663492, "learning_rate": 5.510948835925644e-06, "loss": 0.3705, "step": 16646 }, { "epoch": 1.5622184684684686, "grad_norm": 0.8729511151776822, "learning_rate": 5.510405725902589e-06, "loss": 0.3839, "step": 16647 }, { "epoch": 1.5623123123123124, "grad_norm": 0.9863606383522908, "learning_rate": 5.509862609793908e-06, "loss": 0.3666, "step": 16648 }, { "epoch": 1.5624061561561562, "grad_norm": 1.0299271155721323, "learning_rate": 5.509319487606073e-06, "loss": 0.376, "step": 16649 }, { "epoch": 1.5625, "grad_norm": 1.027285294720859, "learning_rate": 5.508776359345562e-06, "loss": 0.4477, "step": 16650 }, { "epoch": 1.5625938438438438, "grad_norm": 0.866626628215214, "learning_rate": 5.508233225018853e-06, "loss": 0.3913, "step": 16651 }, { "epoch": 1.5626876876876876, "grad_norm": 0.940084037692256, "learning_rate": 5.507690084632419e-06, "loss": 0.4119, "step": 16652 }, { "epoch": 1.5627815315315314, "grad_norm": 0.9701286412192957, "learning_rate": 5.507146938192735e-06, "loss": 0.3754, "step": 16653 }, { "epoch": 1.5628753753753752, "grad_norm": 0.8713400769709196, "learning_rate": 5.506603785706279e-06, "loss": 0.3588, "step": 16654 }, { "epoch": 1.5629692192192193, "grad_norm": 1.7721230391042813, "learning_rate": 5.506060627179526e-06, "loss": 0.4114, "step": 16655 }, { "epoch": 1.563063063063063, "grad_norm": 0.9439668146666335, "learning_rate": 5.5055174626189525e-06, "loss": 0.4175, "step": 16656 }, { "epoch": 1.563156906906907, "grad_norm": 0.9482786607156953, "learning_rate": 5.5049742920310354e-06, "loss": 0.3827, "step": 16657 }, { "epoch": 1.5632507507507507, "grad_norm": 1.1310293804156197, "learning_rate": 5.50443111542225e-06, "loss": 0.4049, "step": 16658 }, { "epoch": 1.5633445945945947, "grad_norm": 0.9303232029640779, "learning_rate": 5.503887932799074e-06, "loss": 0.4076, "step": 16659 }, { "epoch": 1.5634384384384385, "grad_norm": 1.027837298649791, "learning_rate": 5.503344744167982e-06, "loss": 0.4062, "step": 16660 }, { "epoch": 1.5635322822822824, "grad_norm": 0.9141921016676133, "learning_rate": 5.502801549535451e-06, "loss": 0.3989, "step": 16661 }, { "epoch": 1.5636261261261262, "grad_norm": 1.0605387087151195, "learning_rate": 5.502258348907957e-06, "loss": 0.3799, "step": 16662 }, { "epoch": 1.56371996996997, "grad_norm": 0.8953680727586409, "learning_rate": 5.50171514229198e-06, "loss": 0.4013, "step": 16663 }, { "epoch": 1.5638138138138138, "grad_norm": 1.6088390366293084, "learning_rate": 5.501171929693993e-06, "loss": 0.4334, "step": 16664 }, { "epoch": 1.5639076576576576, "grad_norm": 0.9349308678987539, "learning_rate": 5.500628711120475e-06, "loss": 0.3777, "step": 16665 }, { "epoch": 1.5640015015015014, "grad_norm": 0.9050580031056691, "learning_rate": 5.500085486577902e-06, "loss": 0.404, "step": 16666 }, { "epoch": 1.5640953453453452, "grad_norm": 1.1248535556728951, "learning_rate": 5.4995422560727495e-06, "loss": 0.4354, "step": 16667 }, { "epoch": 1.564189189189189, "grad_norm": 1.108622166098515, "learning_rate": 5.498999019611495e-06, "loss": 0.3807, "step": 16668 }, { "epoch": 1.564283033033033, "grad_norm": 1.700553571344407, "learning_rate": 5.49845577720062e-06, "loss": 0.3949, "step": 16669 }, { "epoch": 1.5643768768768769, "grad_norm": 0.8019160781589969, "learning_rate": 5.497912528846595e-06, "loss": 0.3761, "step": 16670 }, { "epoch": 1.5644707207207207, "grad_norm": 0.9216822880152086, "learning_rate": 5.497369274555901e-06, "loss": 0.3934, "step": 16671 }, { "epoch": 1.5645645645645647, "grad_norm": 0.89171019725413, "learning_rate": 5.496826014335016e-06, "loss": 0.4302, "step": 16672 }, { "epoch": 1.5646584084084085, "grad_norm": 0.9174697770732868, "learning_rate": 5.496282748190415e-06, "loss": 0.4118, "step": 16673 }, { "epoch": 1.5647522522522523, "grad_norm": 0.8419056263413393, "learning_rate": 5.4957394761285764e-06, "loss": 0.4234, "step": 16674 }, { "epoch": 1.5648460960960962, "grad_norm": 0.8332381310965838, "learning_rate": 5.495196198155977e-06, "loss": 0.3426, "step": 16675 }, { "epoch": 1.56493993993994, "grad_norm": 0.9606466749296372, "learning_rate": 5.494652914279096e-06, "loss": 0.3825, "step": 16676 }, { "epoch": 1.5650337837837838, "grad_norm": 0.947162271235819, "learning_rate": 5.4941096245044105e-06, "loss": 0.4118, "step": 16677 }, { "epoch": 1.5651276276276276, "grad_norm": 0.9152598019622074, "learning_rate": 5.4935663288383975e-06, "loss": 0.4296, "step": 16678 }, { "epoch": 1.5652214714714714, "grad_norm": 0.8720140226910073, "learning_rate": 5.493023027287534e-06, "loss": 0.4109, "step": 16679 }, { "epoch": 1.5653153153153152, "grad_norm": 0.9300303177335559, "learning_rate": 5.4924797198582995e-06, "loss": 0.3435, "step": 16680 }, { "epoch": 1.565409159159159, "grad_norm": 0.9151374150397632, "learning_rate": 5.491936406557172e-06, "loss": 0.3713, "step": 16681 }, { "epoch": 1.565503003003003, "grad_norm": 0.9516406296051706, "learning_rate": 5.4913930873906275e-06, "loss": 0.3641, "step": 16682 }, { "epoch": 1.5655968468468469, "grad_norm": 0.8735370395919764, "learning_rate": 5.4908497623651465e-06, "loss": 0.3857, "step": 16683 }, { "epoch": 1.5656906906906907, "grad_norm": 1.0516440902733586, "learning_rate": 5.4903064314872065e-06, "loss": 0.38, "step": 16684 }, { "epoch": 1.5657845345345347, "grad_norm": 0.9218455586584634, "learning_rate": 5.489763094763284e-06, "loss": 0.4199, "step": 16685 }, { "epoch": 1.5658783783783785, "grad_norm": 1.5561514607723657, "learning_rate": 5.489219752199858e-06, "loss": 0.4218, "step": 16686 }, { "epoch": 1.5659722222222223, "grad_norm": 1.020597841624042, "learning_rate": 5.488676403803409e-06, "loss": 0.3867, "step": 16687 }, { "epoch": 1.5660660660660661, "grad_norm": 0.9918432424136665, "learning_rate": 5.488133049580413e-06, "loss": 0.416, "step": 16688 }, { "epoch": 1.56615990990991, "grad_norm": 1.0473853312366528, "learning_rate": 5.487589689537349e-06, "loss": 0.3872, "step": 16689 }, { "epoch": 1.5662537537537538, "grad_norm": 1.019703954522955, "learning_rate": 5.487046323680697e-06, "loss": 0.4073, "step": 16690 }, { "epoch": 1.5663475975975976, "grad_norm": 1.068823365692636, "learning_rate": 5.486502952016932e-06, "loss": 0.4556, "step": 16691 }, { "epoch": 1.5664414414414414, "grad_norm": 0.9246856442237334, "learning_rate": 5.485959574552536e-06, "loss": 0.4184, "step": 16692 }, { "epoch": 1.5665352852852852, "grad_norm": 0.8571931327114125, "learning_rate": 5.485416191293987e-06, "loss": 0.3731, "step": 16693 }, { "epoch": 1.566629129129129, "grad_norm": 1.0603496768184564, "learning_rate": 5.484872802247764e-06, "loss": 0.3927, "step": 16694 }, { "epoch": 1.566722972972973, "grad_norm": 0.97090463910532, "learning_rate": 5.484329407420346e-06, "loss": 0.4387, "step": 16695 }, { "epoch": 1.5668168168168168, "grad_norm": 1.4060620214591348, "learning_rate": 5.48378600681821e-06, "loss": 0.3599, "step": 16696 }, { "epoch": 1.5669106606606606, "grad_norm": 0.9921171565316983, "learning_rate": 5.483242600447838e-06, "loss": 0.3786, "step": 16697 }, { "epoch": 1.5670045045045045, "grad_norm": 0.8680654363462975, "learning_rate": 5.482699188315705e-06, "loss": 0.4168, "step": 16698 }, { "epoch": 1.5670983483483485, "grad_norm": 0.8928151394350992, "learning_rate": 5.4821557704282955e-06, "loss": 0.3948, "step": 16699 }, { "epoch": 1.5671921921921923, "grad_norm": 1.1792850764647738, "learning_rate": 5.481612346792085e-06, "loss": 0.3766, "step": 16700 }, { "epoch": 1.5672860360360361, "grad_norm": 1.091658067942209, "learning_rate": 5.481068917413554e-06, "loss": 0.4148, "step": 16701 }, { "epoch": 1.56737987987988, "grad_norm": 0.8625308610529339, "learning_rate": 5.480525482299182e-06, "loss": 0.3466, "step": 16702 }, { "epoch": 1.5674737237237237, "grad_norm": 1.6159426326003774, "learning_rate": 5.4799820414554475e-06, "loss": 0.3768, "step": 16703 }, { "epoch": 1.5675675675675675, "grad_norm": 1.011360761140012, "learning_rate": 5.479438594888829e-06, "loss": 0.4155, "step": 16704 }, { "epoch": 1.5676614114114114, "grad_norm": 0.861441088277337, "learning_rate": 5.478895142605811e-06, "loss": 0.362, "step": 16705 }, { "epoch": 1.5677552552552552, "grad_norm": 0.8332389839091634, "learning_rate": 5.4783516846128686e-06, "loss": 0.3912, "step": 16706 }, { "epoch": 1.567849099099099, "grad_norm": 0.9365044271683733, "learning_rate": 5.477808220916483e-06, "loss": 0.3946, "step": 16707 }, { "epoch": 1.5679429429429428, "grad_norm": 1.247422590428669, "learning_rate": 5.477264751523133e-06, "loss": 0.436, "step": 16708 }, { "epoch": 1.5680367867867868, "grad_norm": 0.9692116967824269, "learning_rate": 5.476721276439299e-06, "loss": 0.4023, "step": 16709 }, { "epoch": 1.5681306306306306, "grad_norm": 0.9743298199574697, "learning_rate": 5.476177795671461e-06, "loss": 0.4186, "step": 16710 }, { "epoch": 1.5682244744744744, "grad_norm": 0.9027731108181122, "learning_rate": 5.4756343092261e-06, "loss": 0.3673, "step": 16711 }, { "epoch": 1.5683183183183185, "grad_norm": 0.8791703681290632, "learning_rate": 5.475090817109695e-06, "loss": 0.4184, "step": 16712 }, { "epoch": 1.5684121621621623, "grad_norm": 1.0741249976092222, "learning_rate": 5.474547319328724e-06, "loss": 0.4321, "step": 16713 }, { "epoch": 1.568506006006006, "grad_norm": 0.847830268833605, "learning_rate": 5.474003815889675e-06, "loss": 0.3352, "step": 16714 }, { "epoch": 1.56859984984985, "grad_norm": 0.889110853527786, "learning_rate": 5.473460306799018e-06, "loss": 0.4157, "step": 16715 }, { "epoch": 1.5686936936936937, "grad_norm": 0.9678010735061634, "learning_rate": 5.472916792063238e-06, "loss": 0.4102, "step": 16716 }, { "epoch": 1.5687875375375375, "grad_norm": 1.257562998758938, "learning_rate": 5.472373271688818e-06, "loss": 0.4086, "step": 16717 }, { "epoch": 1.5688813813813813, "grad_norm": 0.8705529894509101, "learning_rate": 5.471829745682235e-06, "loss": 0.3458, "step": 16718 }, { "epoch": 1.5689752252252251, "grad_norm": 0.934988959339307, "learning_rate": 5.471286214049969e-06, "loss": 0.3962, "step": 16719 }, { "epoch": 1.569069069069069, "grad_norm": 0.9850587144355469, "learning_rate": 5.470742676798502e-06, "loss": 0.4103, "step": 16720 }, { "epoch": 1.5691629129129128, "grad_norm": 1.1785686516653862, "learning_rate": 5.470199133934318e-06, "loss": 0.4355, "step": 16721 }, { "epoch": 1.5692567567567568, "grad_norm": 1.1289050256187574, "learning_rate": 5.469655585463891e-06, "loss": 0.4189, "step": 16722 }, { "epoch": 1.5693506006006006, "grad_norm": 0.872219962490306, "learning_rate": 5.469112031393708e-06, "loss": 0.43, "step": 16723 }, { "epoch": 1.5694444444444444, "grad_norm": 0.868712780007809, "learning_rate": 5.468568471730243e-06, "loss": 0.384, "step": 16724 }, { "epoch": 1.5695382882882885, "grad_norm": 0.9922466723918247, "learning_rate": 5.4680249064799826e-06, "loss": 0.3997, "step": 16725 }, { "epoch": 1.5696321321321323, "grad_norm": 0.9045551998262502, "learning_rate": 5.467481335649409e-06, "loss": 0.3957, "step": 16726 }, { "epoch": 1.569725975975976, "grad_norm": 1.0500318081800386, "learning_rate": 5.466937759244998e-06, "loss": 0.3989, "step": 16727 }, { "epoch": 1.5698198198198199, "grad_norm": 0.875955441753119, "learning_rate": 5.466394177273234e-06, "loss": 0.4033, "step": 16728 }, { "epoch": 1.5699136636636637, "grad_norm": 0.956394733512273, "learning_rate": 5.4658505897405955e-06, "loss": 0.3921, "step": 16729 }, { "epoch": 1.5700075075075075, "grad_norm": 0.9372492432734237, "learning_rate": 5.465306996653567e-06, "loss": 0.3884, "step": 16730 }, { "epoch": 1.5701013513513513, "grad_norm": 0.8896293752376373, "learning_rate": 5.464763398018627e-06, "loss": 0.4063, "step": 16731 }, { "epoch": 1.5701951951951951, "grad_norm": 0.9017815554260135, "learning_rate": 5.46421979384226e-06, "loss": 0.3756, "step": 16732 }, { "epoch": 1.570289039039039, "grad_norm": 0.8471801718796252, "learning_rate": 5.463676184130946e-06, "loss": 0.3931, "step": 16733 }, { "epoch": 1.5703828828828827, "grad_norm": 1.005172263737454, "learning_rate": 5.463132568891165e-06, "loss": 0.4512, "step": 16734 }, { "epoch": 1.5704767267267268, "grad_norm": 0.9616723344473108, "learning_rate": 5.462588948129401e-06, "loss": 0.3705, "step": 16735 }, { "epoch": 1.5705705705705706, "grad_norm": 0.9974375101585489, "learning_rate": 5.462045321852134e-06, "loss": 0.4128, "step": 16736 }, { "epoch": 1.5706644144144144, "grad_norm": 0.9146537914965613, "learning_rate": 5.461501690065846e-06, "loss": 0.4119, "step": 16737 }, { "epoch": 1.5707582582582582, "grad_norm": 1.67301430772371, "learning_rate": 5.460958052777019e-06, "loss": 0.4227, "step": 16738 }, { "epoch": 1.5708521021021022, "grad_norm": 1.0215875541247463, "learning_rate": 5.460414409992136e-06, "loss": 0.4169, "step": 16739 }, { "epoch": 1.570945945945946, "grad_norm": 1.73634403231446, "learning_rate": 5.459870761717676e-06, "loss": 0.3647, "step": 16740 }, { "epoch": 1.5710397897897899, "grad_norm": 1.0523908350935187, "learning_rate": 5.459327107960125e-06, "loss": 0.432, "step": 16741 }, { "epoch": 1.5711336336336337, "grad_norm": 1.157421322800823, "learning_rate": 5.458783448725961e-06, "loss": 0.4138, "step": 16742 }, { "epoch": 1.5712274774774775, "grad_norm": 0.922792742326342, "learning_rate": 5.458239784021667e-06, "loss": 0.3615, "step": 16743 }, { "epoch": 1.5713213213213213, "grad_norm": 1.0746957895330769, "learning_rate": 5.457696113853729e-06, "loss": 0.421, "step": 16744 }, { "epoch": 1.571415165165165, "grad_norm": 1.0400455567069369, "learning_rate": 5.457152438228626e-06, "loss": 0.4352, "step": 16745 }, { "epoch": 1.571509009009009, "grad_norm": 0.8881377093551026, "learning_rate": 5.4566087571528385e-06, "loss": 0.4069, "step": 16746 }, { "epoch": 1.5716028528528527, "grad_norm": 0.9277140103939121, "learning_rate": 5.456065070632852e-06, "loss": 0.3772, "step": 16747 }, { "epoch": 1.5716966966966965, "grad_norm": 1.0048570297822692, "learning_rate": 5.455521378675148e-06, "loss": 0.4058, "step": 16748 }, { "epoch": 1.5717905405405406, "grad_norm": 1.1136957981207196, "learning_rate": 5.454977681286209e-06, "loss": 0.3996, "step": 16749 }, { "epoch": 1.5718843843843844, "grad_norm": 0.8209620592451954, "learning_rate": 5.454433978472519e-06, "loss": 0.3586, "step": 16750 }, { "epoch": 1.5719782282282282, "grad_norm": 0.8565213309054696, "learning_rate": 5.453890270240558e-06, "loss": 0.3798, "step": 16751 }, { "epoch": 1.5720720720720722, "grad_norm": 0.9160664118702965, "learning_rate": 5.453346556596809e-06, "loss": 0.4027, "step": 16752 }, { "epoch": 1.572165915915916, "grad_norm": 0.9679560623876301, "learning_rate": 5.452802837547757e-06, "loss": 0.4041, "step": 16753 }, { "epoch": 1.5722597597597598, "grad_norm": 3.352287904519351, "learning_rate": 5.452259113099883e-06, "loss": 0.411, "step": 16754 }, { "epoch": 1.5723536036036037, "grad_norm": 0.851132938998774, "learning_rate": 5.451715383259669e-06, "loss": 0.3471, "step": 16755 }, { "epoch": 1.5724474474474475, "grad_norm": 0.9098934993048109, "learning_rate": 5.451171648033602e-06, "loss": 0.4019, "step": 16756 }, { "epoch": 1.5725412912912913, "grad_norm": 1.2040430714269224, "learning_rate": 5.450627907428161e-06, "loss": 0.4005, "step": 16757 }, { "epoch": 1.572635135135135, "grad_norm": 0.869396143352216, "learning_rate": 5.45008416144983e-06, "loss": 0.3398, "step": 16758 }, { "epoch": 1.572728978978979, "grad_norm": 1.0452416408697183, "learning_rate": 5.449540410105093e-06, "loss": 0.4074, "step": 16759 }, { "epoch": 1.5728228228228227, "grad_norm": 0.9253673759251888, "learning_rate": 5.448996653400433e-06, "loss": 0.4594, "step": 16760 }, { "epoch": 1.5729166666666665, "grad_norm": 0.7657832714373008, "learning_rate": 5.448452891342332e-06, "loss": 0.3935, "step": 16761 }, { "epoch": 1.5730105105105106, "grad_norm": 1.23278021164529, "learning_rate": 5.447909123937276e-06, "loss": 0.3831, "step": 16762 }, { "epoch": 1.5731043543543544, "grad_norm": 0.9184057551458771, "learning_rate": 5.447365351191747e-06, "loss": 0.4106, "step": 16763 }, { "epoch": 1.5731981981981982, "grad_norm": 1.0960312175614213, "learning_rate": 5.446821573112226e-06, "loss": 0.435, "step": 16764 }, { "epoch": 1.5732920420420422, "grad_norm": 0.917391422550571, "learning_rate": 5.446277789705201e-06, "loss": 0.3875, "step": 16765 }, { "epoch": 1.573385885885886, "grad_norm": 0.9227760114783595, "learning_rate": 5.445734000977152e-06, "loss": 0.3477, "step": 16766 }, { "epoch": 1.5734797297297298, "grad_norm": 0.9501654525268316, "learning_rate": 5.445190206934563e-06, "loss": 0.4292, "step": 16767 }, { "epoch": 1.5735735735735736, "grad_norm": 0.9442972852164038, "learning_rate": 5.4446464075839215e-06, "loss": 0.4018, "step": 16768 }, { "epoch": 1.5736674174174174, "grad_norm": 0.9020389781774152, "learning_rate": 5.444102602931708e-06, "loss": 0.3938, "step": 16769 }, { "epoch": 1.5737612612612613, "grad_norm": 0.9962150707574545, "learning_rate": 5.443558792984406e-06, "loss": 0.3477, "step": 16770 }, { "epoch": 1.573855105105105, "grad_norm": 1.0073129232573903, "learning_rate": 5.443014977748501e-06, "loss": 0.4125, "step": 16771 }, { "epoch": 1.5739489489489489, "grad_norm": 0.9266115083610034, "learning_rate": 5.4424711572304746e-06, "loss": 0.4057, "step": 16772 }, { "epoch": 1.5740427927927927, "grad_norm": 1.1432542149776015, "learning_rate": 5.441927331436814e-06, "loss": 0.4367, "step": 16773 }, { "epoch": 1.5741366366366365, "grad_norm": 0.9339261069857285, "learning_rate": 5.4413835003740014e-06, "loss": 0.3835, "step": 16774 }, { "epoch": 1.5742304804804805, "grad_norm": 1.015876785808745, "learning_rate": 5.440839664048522e-06, "loss": 0.342, "step": 16775 }, { "epoch": 1.5743243243243243, "grad_norm": 1.169033608095061, "learning_rate": 5.440295822466859e-06, "loss": 0.447, "step": 16776 }, { "epoch": 1.5744181681681682, "grad_norm": 0.9617877880307095, "learning_rate": 5.439751975635497e-06, "loss": 0.3922, "step": 16777 }, { "epoch": 1.574512012012012, "grad_norm": 1.0326582002279838, "learning_rate": 5.43920812356092e-06, "loss": 0.4402, "step": 16778 }, { "epoch": 1.574605855855856, "grad_norm": 0.7955650524580832, "learning_rate": 5.438664266249613e-06, "loss": 0.3577, "step": 16779 }, { "epoch": 1.5746996996996998, "grad_norm": 0.9458079389646981, "learning_rate": 5.43812040370806e-06, "loss": 0.3855, "step": 16780 }, { "epoch": 1.5747935435435436, "grad_norm": 1.2805923407488977, "learning_rate": 5.437576535942747e-06, "loss": 0.4353, "step": 16781 }, { "epoch": 1.5748873873873874, "grad_norm": 1.176855460565975, "learning_rate": 5.4370326629601575e-06, "loss": 0.4152, "step": 16782 }, { "epoch": 1.5749812312312312, "grad_norm": 0.9835577640382536, "learning_rate": 5.436488784766776e-06, "loss": 0.4116, "step": 16783 }, { "epoch": 1.575075075075075, "grad_norm": 0.8462788277229139, "learning_rate": 5.435944901369087e-06, "loss": 0.4515, "step": 16784 }, { "epoch": 1.5751689189189189, "grad_norm": 0.8489249427301866, "learning_rate": 5.435401012773574e-06, "loss": 0.4092, "step": 16785 }, { "epoch": 1.5752627627627627, "grad_norm": 0.8847728910157363, "learning_rate": 5.434857118986726e-06, "loss": 0.3968, "step": 16786 }, { "epoch": 1.5753566066066065, "grad_norm": 0.9506493708420571, "learning_rate": 5.434313220015024e-06, "loss": 0.4481, "step": 16787 }, { "epoch": 1.5754504504504503, "grad_norm": 1.0094218354762219, "learning_rate": 5.4337693158649555e-06, "loss": 0.4201, "step": 16788 }, { "epoch": 1.5755442942942943, "grad_norm": 0.9993408889359144, "learning_rate": 5.433225406543003e-06, "loss": 0.4133, "step": 16789 }, { "epoch": 1.5756381381381381, "grad_norm": 0.9193545164863397, "learning_rate": 5.432681492055653e-06, "loss": 0.4051, "step": 16790 }, { "epoch": 1.575731981981982, "grad_norm": 0.8286751132568184, "learning_rate": 5.432137572409392e-06, "loss": 0.3939, "step": 16791 }, { "epoch": 1.575825825825826, "grad_norm": 0.9828246448330317, "learning_rate": 5.431593647610703e-06, "loss": 0.4165, "step": 16792 }, { "epoch": 1.5759196696696698, "grad_norm": 0.972103516038212, "learning_rate": 5.431049717666073e-06, "loss": 0.3975, "step": 16793 }, { "epoch": 1.5760135135135136, "grad_norm": 1.001215186369412, "learning_rate": 5.430505782581985e-06, "loss": 0.4283, "step": 16794 }, { "epoch": 1.5761073573573574, "grad_norm": 0.9008104134371934, "learning_rate": 5.429961842364928e-06, "loss": 0.4225, "step": 16795 }, { "epoch": 1.5762012012012012, "grad_norm": 0.978636133154662, "learning_rate": 5.4294178970213826e-06, "loss": 0.3656, "step": 16796 }, { "epoch": 1.576295045045045, "grad_norm": 0.8571681654937414, "learning_rate": 5.428873946557839e-06, "loss": 0.3696, "step": 16797 }, { "epoch": 1.5763888888888888, "grad_norm": 0.9079422605989748, "learning_rate": 5.428329990980781e-06, "loss": 0.3739, "step": 16798 }, { "epoch": 1.5764827327327327, "grad_norm": 0.8893466998460218, "learning_rate": 5.427786030296692e-06, "loss": 0.3689, "step": 16799 }, { "epoch": 1.5765765765765765, "grad_norm": 2.6336928532494093, "learning_rate": 5.427242064512062e-06, "loss": 0.3793, "step": 16800 }, { "epoch": 1.5766704204204203, "grad_norm": 1.8644131321113282, "learning_rate": 5.426698093633375e-06, "loss": 0.3931, "step": 16801 }, { "epoch": 1.5767642642642643, "grad_norm": 0.9095595268650306, "learning_rate": 5.426154117667115e-06, "loss": 0.3971, "step": 16802 }, { "epoch": 1.5768581081081081, "grad_norm": 0.9218343746851021, "learning_rate": 5.425610136619769e-06, "loss": 0.4181, "step": 16803 }, { "epoch": 1.576951951951952, "grad_norm": 0.924559654681658, "learning_rate": 5.425066150497824e-06, "loss": 0.4027, "step": 16804 }, { "epoch": 1.577045795795796, "grad_norm": 0.9067699342913994, "learning_rate": 5.424522159307764e-06, "loss": 0.3725, "step": 16805 }, { "epoch": 1.5771396396396398, "grad_norm": 0.9843995896495483, "learning_rate": 5.423978163056078e-06, "loss": 0.3724, "step": 16806 }, { "epoch": 1.5772334834834836, "grad_norm": 0.9472162324083829, "learning_rate": 5.423434161749252e-06, "loss": 0.379, "step": 16807 }, { "epoch": 1.5773273273273274, "grad_norm": 1.1492414593156641, "learning_rate": 5.422890155393767e-06, "loss": 0.4453, "step": 16808 }, { "epoch": 1.5774211711711712, "grad_norm": 1.006145923146968, "learning_rate": 5.422346143996114e-06, "loss": 0.4073, "step": 16809 }, { "epoch": 1.577515015015015, "grad_norm": 0.9326208840428933, "learning_rate": 5.42180212756278e-06, "loss": 0.3735, "step": 16810 }, { "epoch": 1.5776088588588588, "grad_norm": 1.1733661165585987, "learning_rate": 5.421258106100247e-06, "loss": 0.3987, "step": 16811 }, { "epoch": 1.5777027027027026, "grad_norm": 1.0319873244718465, "learning_rate": 5.420714079615006e-06, "loss": 0.4013, "step": 16812 }, { "epoch": 1.5777965465465464, "grad_norm": 0.8968355327434497, "learning_rate": 5.420170048113541e-06, "loss": 0.36, "step": 16813 }, { "epoch": 1.5778903903903903, "grad_norm": 0.9018392408458915, "learning_rate": 5.419626011602338e-06, "loss": 0.421, "step": 16814 }, { "epoch": 1.5779842342342343, "grad_norm": 0.9908840466293594, "learning_rate": 5.419081970087885e-06, "loss": 0.3878, "step": 16815 }, { "epoch": 1.578078078078078, "grad_norm": 1.1413425790935443, "learning_rate": 5.41853792357667e-06, "loss": 0.4119, "step": 16816 }, { "epoch": 1.578171921921922, "grad_norm": 1.071982846505256, "learning_rate": 5.417993872075177e-06, "loss": 0.393, "step": 16817 }, { "epoch": 1.5782657657657657, "grad_norm": 0.8743981404533747, "learning_rate": 5.417449815589893e-06, "loss": 0.3747, "step": 16818 }, { "epoch": 1.5783596096096097, "grad_norm": 0.9127954960177704, "learning_rate": 5.4169057541273074e-06, "loss": 0.3668, "step": 16819 }, { "epoch": 1.5784534534534536, "grad_norm": 0.9891251583097084, "learning_rate": 5.416361687693904e-06, "loss": 0.3931, "step": 16820 }, { "epoch": 1.5785472972972974, "grad_norm": 0.9242608038163662, "learning_rate": 5.415817616296172e-06, "loss": 0.3595, "step": 16821 }, { "epoch": 1.5786411411411412, "grad_norm": 0.8823350457032426, "learning_rate": 5.415273539940597e-06, "loss": 0.418, "step": 16822 }, { "epoch": 1.578734984984985, "grad_norm": 2.7300543137719506, "learning_rate": 5.414729458633667e-06, "loss": 0.3913, "step": 16823 }, { "epoch": 1.5788288288288288, "grad_norm": 0.9175938956155766, "learning_rate": 5.414185372381869e-06, "loss": 0.3889, "step": 16824 }, { "epoch": 1.5789226726726726, "grad_norm": 0.9551793574197222, "learning_rate": 5.4136412811916915e-06, "loss": 0.3807, "step": 16825 }, { "epoch": 1.5790165165165164, "grad_norm": 0.9015485469677929, "learning_rate": 5.413097185069618e-06, "loss": 0.415, "step": 16826 }, { "epoch": 1.5791103603603602, "grad_norm": 0.8956438092938249, "learning_rate": 5.412553084022138e-06, "loss": 0.3815, "step": 16827 }, { "epoch": 1.579204204204204, "grad_norm": 0.9491781697413789, "learning_rate": 5.412008978055741e-06, "loss": 0.4089, "step": 16828 }, { "epoch": 1.579298048048048, "grad_norm": 1.1151299008459095, "learning_rate": 5.411464867176911e-06, "loss": 0.4377, "step": 16829 }, { "epoch": 1.5793918918918919, "grad_norm": 0.9799231499736354, "learning_rate": 5.410920751392138e-06, "loss": 0.3941, "step": 16830 }, { "epoch": 1.5794857357357357, "grad_norm": 0.8554526372475635, "learning_rate": 5.410376630707909e-06, "loss": 0.3849, "step": 16831 }, { "epoch": 1.5795795795795797, "grad_norm": 0.9810433972310839, "learning_rate": 5.409832505130709e-06, "loss": 0.3998, "step": 16832 }, { "epoch": 1.5796734234234235, "grad_norm": 0.8845779367712469, "learning_rate": 5.409288374667028e-06, "loss": 0.3957, "step": 16833 }, { "epoch": 1.5797672672672673, "grad_norm": 2.1271986848869413, "learning_rate": 5.4087442393233545e-06, "loss": 0.4011, "step": 16834 }, { "epoch": 1.5798611111111112, "grad_norm": 0.8614670987201427, "learning_rate": 5.408200099106175e-06, "loss": 0.4102, "step": 16835 }, { "epoch": 1.579954954954955, "grad_norm": 1.1046710167269824, "learning_rate": 5.407655954021977e-06, "loss": 0.4132, "step": 16836 }, { "epoch": 1.5800487987987988, "grad_norm": 0.9895181490560904, "learning_rate": 5.407111804077252e-06, "loss": 0.3951, "step": 16837 }, { "epoch": 1.5801426426426426, "grad_norm": 0.868134833980633, "learning_rate": 5.406567649278482e-06, "loss": 0.3387, "step": 16838 }, { "epoch": 1.5802364864864864, "grad_norm": 1.1611251228588415, "learning_rate": 5.40602348963216e-06, "loss": 0.4071, "step": 16839 }, { "epoch": 1.5803303303303302, "grad_norm": 0.9746888083084385, "learning_rate": 5.405479325144771e-06, "loss": 0.4035, "step": 16840 }, { "epoch": 1.580424174174174, "grad_norm": 0.855723656881317, "learning_rate": 5.404935155822805e-06, "loss": 0.3952, "step": 16841 }, { "epoch": 1.580518018018018, "grad_norm": 0.8670313260303196, "learning_rate": 5.404390981672748e-06, "loss": 0.3633, "step": 16842 }, { "epoch": 1.5806118618618619, "grad_norm": 0.861668992104719, "learning_rate": 5.403846802701092e-06, "loss": 0.3956, "step": 16843 }, { "epoch": 1.5807057057057057, "grad_norm": 0.936062409673447, "learning_rate": 5.403302618914321e-06, "loss": 0.3942, "step": 16844 }, { "epoch": 1.5807995495495497, "grad_norm": 0.8723417178030358, "learning_rate": 5.402758430318926e-06, "loss": 0.3409, "step": 16845 }, { "epoch": 1.5808933933933935, "grad_norm": 0.9180777863957215, "learning_rate": 5.402214236921396e-06, "loss": 0.3943, "step": 16846 }, { "epoch": 1.5809872372372373, "grad_norm": 0.9368349940353129, "learning_rate": 5.4016700387282165e-06, "loss": 0.3829, "step": 16847 }, { "epoch": 1.5810810810810811, "grad_norm": 0.8304175713493792, "learning_rate": 5.401125835745878e-06, "loss": 0.3691, "step": 16848 }, { "epoch": 1.581174924924925, "grad_norm": 1.046958282042618, "learning_rate": 5.400581627980871e-06, "loss": 0.4341, "step": 16849 }, { "epoch": 1.5812687687687688, "grad_norm": 0.9721298896756984, "learning_rate": 5.40003741543968e-06, "loss": 0.4124, "step": 16850 }, { "epoch": 1.5813626126126126, "grad_norm": 0.9764906202567524, "learning_rate": 5.399493198128797e-06, "loss": 0.3948, "step": 16851 }, { "epoch": 1.5814564564564564, "grad_norm": 0.9196141057271032, "learning_rate": 5.39894897605471e-06, "loss": 0.3915, "step": 16852 }, { "epoch": 1.5815503003003002, "grad_norm": 0.9916851680142309, "learning_rate": 5.3984047492239056e-06, "loss": 0.399, "step": 16853 }, { "epoch": 1.581644144144144, "grad_norm": 0.9563216035099276, "learning_rate": 5.3978605176428746e-06, "loss": 0.4096, "step": 16854 }, { "epoch": 1.581737987987988, "grad_norm": 0.9395928082382513, "learning_rate": 5.3973162813181075e-06, "loss": 0.3778, "step": 16855 }, { "epoch": 1.5818318318318318, "grad_norm": 1.5294623873207032, "learning_rate": 5.396772040256091e-06, "loss": 0.3952, "step": 16856 }, { "epoch": 1.5819256756756757, "grad_norm": 0.9849056110641161, "learning_rate": 5.396227794463314e-06, "loss": 0.4015, "step": 16857 }, { "epoch": 1.5820195195195195, "grad_norm": 3.215443342758648, "learning_rate": 5.395683543946267e-06, "loss": 0.3695, "step": 16858 }, { "epoch": 1.5821133633633635, "grad_norm": 0.9948390103553032, "learning_rate": 5.395139288711439e-06, "loss": 0.4132, "step": 16859 }, { "epoch": 1.5822072072072073, "grad_norm": 1.1630654973950891, "learning_rate": 5.394595028765317e-06, "loss": 0.4145, "step": 16860 }, { "epoch": 1.5823010510510511, "grad_norm": 0.9769322228654194, "learning_rate": 5.394050764114395e-06, "loss": 0.3842, "step": 16861 }, { "epoch": 1.582394894894895, "grad_norm": 0.8989331263721374, "learning_rate": 5.393506494765157e-06, "loss": 0.4092, "step": 16862 }, { "epoch": 1.5824887387387387, "grad_norm": 1.0042011162160627, "learning_rate": 5.392962220724095e-06, "loss": 0.4385, "step": 16863 }, { "epoch": 1.5825825825825826, "grad_norm": 0.8489412922247819, "learning_rate": 5.3924179419977e-06, "loss": 0.3727, "step": 16864 }, { "epoch": 1.5826764264264264, "grad_norm": 3.647193726644151, "learning_rate": 5.391873658592457e-06, "loss": 0.3885, "step": 16865 }, { "epoch": 1.5827702702702702, "grad_norm": 0.9108863096660398, "learning_rate": 5.39132937051486e-06, "loss": 0.441, "step": 16866 }, { "epoch": 1.582864114114114, "grad_norm": 0.9840079152765778, "learning_rate": 5.3907850777713955e-06, "loss": 0.4301, "step": 16867 }, { "epoch": 1.5829579579579578, "grad_norm": 1.109474042769615, "learning_rate": 5.3902407803685565e-06, "loss": 0.3722, "step": 16868 }, { "epoch": 1.5830518018018018, "grad_norm": 0.9035016457870413, "learning_rate": 5.389696478312829e-06, "loss": 0.4063, "step": 16869 }, { "epoch": 1.5831456456456456, "grad_norm": 0.9661632172811935, "learning_rate": 5.389152171610705e-06, "loss": 0.4186, "step": 16870 }, { "epoch": 1.5832394894894894, "grad_norm": 0.8508258601078434, "learning_rate": 5.388607860268674e-06, "loss": 0.3743, "step": 16871 }, { "epoch": 1.5833333333333335, "grad_norm": 0.7625886908337084, "learning_rate": 5.388063544293225e-06, "loss": 0.2944, "step": 16872 }, { "epoch": 1.5834271771771773, "grad_norm": 0.9825217806512702, "learning_rate": 5.38751922369085e-06, "loss": 0.4004, "step": 16873 }, { "epoch": 1.583521021021021, "grad_norm": 0.9224810961336686, "learning_rate": 5.386974898468038e-06, "loss": 0.3844, "step": 16874 }, { "epoch": 1.583614864864865, "grad_norm": 1.0313043014019248, "learning_rate": 5.386430568631278e-06, "loss": 0.3588, "step": 16875 }, { "epoch": 1.5837087087087087, "grad_norm": 0.9353367877589287, "learning_rate": 5.385886234187061e-06, "loss": 0.3984, "step": 16876 }, { "epoch": 1.5838025525525525, "grad_norm": 0.8685469311473994, "learning_rate": 5.3853418951418754e-06, "loss": 0.38, "step": 16877 }, { "epoch": 1.5838963963963963, "grad_norm": 0.9668136834545156, "learning_rate": 5.384797551502214e-06, "loss": 0.3493, "step": 16878 }, { "epoch": 1.5839902402402402, "grad_norm": 0.8195586325166524, "learning_rate": 5.3842532032745675e-06, "loss": 0.3183, "step": 16879 }, { "epoch": 1.584084084084084, "grad_norm": 1.001810607863189, "learning_rate": 5.383708850465425e-06, "loss": 0.4102, "step": 16880 }, { "epoch": 1.5841779279279278, "grad_norm": 1.0486623085954627, "learning_rate": 5.383164493081274e-06, "loss": 0.4109, "step": 16881 }, { "epoch": 1.5842717717717718, "grad_norm": 0.8378786314059348, "learning_rate": 5.3826201311286095e-06, "loss": 0.3793, "step": 16882 }, { "epoch": 1.5843656156156156, "grad_norm": 1.0804300906624689, "learning_rate": 5.38207576461392e-06, "loss": 0.3845, "step": 16883 }, { "epoch": 1.5844594594594594, "grad_norm": 0.8156836468908993, "learning_rate": 5.381531393543695e-06, "loss": 0.347, "step": 16884 }, { "epoch": 1.5845533033033035, "grad_norm": 0.9615802731160303, "learning_rate": 5.3809870179244285e-06, "loss": 0.3889, "step": 16885 }, { "epoch": 1.5846471471471473, "grad_norm": 0.889380093680945, "learning_rate": 5.380442637762607e-06, "loss": 0.3754, "step": 16886 }, { "epoch": 1.584740990990991, "grad_norm": 0.9157223159278156, "learning_rate": 5.379898253064724e-06, "loss": 0.4306, "step": 16887 }, { "epoch": 1.584834834834835, "grad_norm": 1.006615904604365, "learning_rate": 5.37935386383727e-06, "loss": 0.3972, "step": 16888 }, { "epoch": 1.5849286786786787, "grad_norm": 0.9709809966851646, "learning_rate": 5.378809470086734e-06, "loss": 0.4002, "step": 16889 }, { "epoch": 1.5850225225225225, "grad_norm": 1.083069770230636, "learning_rate": 5.378265071819608e-06, "loss": 0.4501, "step": 16890 }, { "epoch": 1.5851163663663663, "grad_norm": 0.875408247862126, "learning_rate": 5.377720669042384e-06, "loss": 0.4019, "step": 16891 }, { "epoch": 1.5852102102102101, "grad_norm": 0.9821017418849751, "learning_rate": 5.377176261761553e-06, "loss": 0.3873, "step": 16892 }, { "epoch": 1.585304054054054, "grad_norm": 0.9386197616473501, "learning_rate": 5.376631849983603e-06, "loss": 0.3749, "step": 16893 }, { "epoch": 1.5853978978978978, "grad_norm": 1.8660875906768346, "learning_rate": 5.3760874337150285e-06, "loss": 0.4107, "step": 16894 }, { "epoch": 1.5854917417417418, "grad_norm": 0.9156354273094426, "learning_rate": 5.375543012962318e-06, "loss": 0.4012, "step": 16895 }, { "epoch": 1.5855855855855856, "grad_norm": 0.9331190595174681, "learning_rate": 5.374998587731963e-06, "loss": 0.3879, "step": 16896 }, { "epoch": 1.5856794294294294, "grad_norm": 0.9608754952986992, "learning_rate": 5.374454158030459e-06, "loss": 0.407, "step": 16897 }, { "epoch": 1.5857732732732732, "grad_norm": 1.091651738278304, "learning_rate": 5.373909723864292e-06, "loss": 0.4081, "step": 16898 }, { "epoch": 1.5858671171171173, "grad_norm": 0.9476322847457108, "learning_rate": 5.373365285239955e-06, "loss": 0.3846, "step": 16899 }, { "epoch": 1.585960960960961, "grad_norm": 0.9955395148155518, "learning_rate": 5.372820842163942e-06, "loss": 0.4284, "step": 16900 }, { "epoch": 1.5860548048048049, "grad_norm": 0.8339446915838672, "learning_rate": 5.3722763946427395e-06, "loss": 0.364, "step": 16901 }, { "epoch": 1.5861486486486487, "grad_norm": 0.9074245785078118, "learning_rate": 5.371731942682843e-06, "loss": 0.4163, "step": 16902 }, { "epoch": 1.5862424924924925, "grad_norm": 1.0514863993911574, "learning_rate": 5.371187486290743e-06, "loss": 0.4198, "step": 16903 }, { "epoch": 1.5863363363363363, "grad_norm": 0.9246516870522014, "learning_rate": 5.37064302547293e-06, "loss": 0.3581, "step": 16904 }, { "epoch": 1.5864301801801801, "grad_norm": 1.1200649263758782, "learning_rate": 5.3700985602358965e-06, "loss": 0.4193, "step": 16905 }, { "epoch": 1.586524024024024, "grad_norm": 1.4536752798812764, "learning_rate": 5.369554090586136e-06, "loss": 0.3754, "step": 16906 }, { "epoch": 1.5866178678678677, "grad_norm": 1.0302164642880098, "learning_rate": 5.3690096165301375e-06, "loss": 0.3708, "step": 16907 }, { "epoch": 1.5867117117117115, "grad_norm": 1.3045647231122641, "learning_rate": 5.368465138074393e-06, "loss": 0.4315, "step": 16908 }, { "epoch": 1.5868055555555556, "grad_norm": 0.898986039007165, "learning_rate": 5.367920655225397e-06, "loss": 0.4367, "step": 16909 }, { "epoch": 1.5868993993993994, "grad_norm": 1.2015700014832744, "learning_rate": 5.367376167989639e-06, "loss": 0.3741, "step": 16910 }, { "epoch": 1.5869932432432432, "grad_norm": 0.9855203036441452, "learning_rate": 5.366831676373612e-06, "loss": 0.3616, "step": 16911 }, { "epoch": 1.5870870870870872, "grad_norm": 1.175808816780091, "learning_rate": 5.366287180383808e-06, "loss": 0.3755, "step": 16912 }, { "epoch": 1.587180930930931, "grad_norm": 1.0118202538040466, "learning_rate": 5.365742680026718e-06, "loss": 0.3413, "step": 16913 }, { "epoch": 1.5872747747747749, "grad_norm": 0.8974892497718289, "learning_rate": 5.365198175308835e-06, "loss": 0.3647, "step": 16914 }, { "epoch": 1.5873686186186187, "grad_norm": 1.0161599420318161, "learning_rate": 5.364653666236651e-06, "loss": 0.379, "step": 16915 }, { "epoch": 1.5874624624624625, "grad_norm": 1.2745143656588969, "learning_rate": 5.3641091528166585e-06, "loss": 0.4268, "step": 16916 }, { "epoch": 1.5875563063063063, "grad_norm": 1.0844989777358796, "learning_rate": 5.3635646350553506e-06, "loss": 0.4251, "step": 16917 }, { "epoch": 1.58765015015015, "grad_norm": 0.8667874301194112, "learning_rate": 5.3630201129592185e-06, "loss": 0.3783, "step": 16918 }, { "epoch": 1.587743993993994, "grad_norm": 0.8596754172468154, "learning_rate": 5.362475586534755e-06, "loss": 0.4096, "step": 16919 }, { "epoch": 1.5878378378378377, "grad_norm": 0.8947158178579673, "learning_rate": 5.36193105578845e-06, "loss": 0.372, "step": 16920 }, { "epoch": 1.5879316816816815, "grad_norm": 0.9568432254269249, "learning_rate": 5.361386520726802e-06, "loss": 0.3744, "step": 16921 }, { "epoch": 1.5880255255255256, "grad_norm": 0.9995204324721364, "learning_rate": 5.360841981356297e-06, "loss": 0.3917, "step": 16922 }, { "epoch": 1.5881193693693694, "grad_norm": 2.0309822253030707, "learning_rate": 5.360297437683434e-06, "loss": 0.3424, "step": 16923 }, { "epoch": 1.5882132132132132, "grad_norm": 1.3715933791634416, "learning_rate": 5.3597528897147e-06, "loss": 0.4231, "step": 16924 }, { "epoch": 1.5883070570570572, "grad_norm": 0.878668468796389, "learning_rate": 5.359208337456589e-06, "loss": 0.3746, "step": 16925 }, { "epoch": 1.588400900900901, "grad_norm": 0.9479616511857764, "learning_rate": 5.358663780915596e-06, "loss": 0.3819, "step": 16926 }, { "epoch": 1.5884947447447448, "grad_norm": 0.9059807906807242, "learning_rate": 5.358119220098214e-06, "loss": 0.4138, "step": 16927 }, { "epoch": 1.5885885885885886, "grad_norm": 1.731104849815284, "learning_rate": 5.357574655010932e-06, "loss": 0.3719, "step": 16928 }, { "epoch": 1.5886824324324325, "grad_norm": 0.9314236059036359, "learning_rate": 5.357030085660246e-06, "loss": 0.4194, "step": 16929 }, { "epoch": 1.5887762762762763, "grad_norm": 0.9384596729189085, "learning_rate": 5.356485512052649e-06, "loss": 0.3758, "step": 16930 }, { "epoch": 1.58887012012012, "grad_norm": 0.9043791748090674, "learning_rate": 5.355940934194632e-06, "loss": 0.3408, "step": 16931 }, { "epoch": 1.5889639639639639, "grad_norm": 1.0537728306103291, "learning_rate": 5.35539635209269e-06, "loss": 0.3718, "step": 16932 }, { "epoch": 1.5890578078078077, "grad_norm": 0.9620542622811907, "learning_rate": 5.354851765753316e-06, "loss": 0.4416, "step": 16933 }, { "epoch": 1.5891516516516515, "grad_norm": 1.0128242254524051, "learning_rate": 5.3543071751830015e-06, "loss": 0.4334, "step": 16934 }, { "epoch": 1.5892454954954955, "grad_norm": 0.8049582561054249, "learning_rate": 5.353762580388242e-06, "loss": 0.3358, "step": 16935 }, { "epoch": 1.5893393393393394, "grad_norm": 1.0092712477014567, "learning_rate": 5.35321798137553e-06, "loss": 0.3813, "step": 16936 }, { "epoch": 1.5894331831831832, "grad_norm": 1.0190888786373524, "learning_rate": 5.352673378151357e-06, "loss": 0.3966, "step": 16937 }, { "epoch": 1.589527027027027, "grad_norm": 1.0760323085730013, "learning_rate": 5.352128770722218e-06, "loss": 0.3921, "step": 16938 }, { "epoch": 1.589620870870871, "grad_norm": 0.9312427675401994, "learning_rate": 5.351584159094607e-06, "loss": 0.4309, "step": 16939 }, { "epoch": 1.5897147147147148, "grad_norm": 1.02040081910909, "learning_rate": 5.351039543275015e-06, "loss": 0.4419, "step": 16940 }, { "epoch": 1.5898085585585586, "grad_norm": 1.0098256126123881, "learning_rate": 5.35049492326994e-06, "loss": 0.4462, "step": 16941 }, { "epoch": 1.5899024024024024, "grad_norm": 1.0647693483267413, "learning_rate": 5.349950299085871e-06, "loss": 0.3772, "step": 16942 }, { "epoch": 1.5899962462462462, "grad_norm": 0.8893489759676179, "learning_rate": 5.349405670729304e-06, "loss": 0.3659, "step": 16943 }, { "epoch": 1.59009009009009, "grad_norm": 1.1769900781501799, "learning_rate": 5.348861038206731e-06, "loss": 0.3349, "step": 16944 }, { "epoch": 1.5901839339339339, "grad_norm": 0.8344038735562709, "learning_rate": 5.348316401524648e-06, "loss": 0.3746, "step": 16945 }, { "epoch": 1.5902777777777777, "grad_norm": 1.0771856019410002, "learning_rate": 5.3477717606895466e-06, "loss": 0.4041, "step": 16946 }, { "epoch": 1.5903716216216215, "grad_norm": 0.9538017374062666, "learning_rate": 5.347227115707923e-06, "loss": 0.4158, "step": 16947 }, { "epoch": 1.5904654654654653, "grad_norm": 0.898253855471823, "learning_rate": 5.3466824665862685e-06, "loss": 0.3552, "step": 16948 }, { "epoch": 1.5905593093093093, "grad_norm": 0.8176118422094358, "learning_rate": 5.346137813331078e-06, "loss": 0.3335, "step": 16949 }, { "epoch": 1.5906531531531531, "grad_norm": 0.8456773399286036, "learning_rate": 5.3455931559488446e-06, "loss": 0.4158, "step": 16950 }, { "epoch": 1.590746996996997, "grad_norm": 1.1159697605881413, "learning_rate": 5.345048494446066e-06, "loss": 0.3852, "step": 16951 }, { "epoch": 1.590840840840841, "grad_norm": 2.3231784707885117, "learning_rate": 5.344503828829232e-06, "loss": 0.4032, "step": 16952 }, { "epoch": 1.5909346846846848, "grad_norm": 0.9441456056445069, "learning_rate": 5.343959159104839e-06, "loss": 0.4053, "step": 16953 }, { "epoch": 1.5910285285285286, "grad_norm": 1.0420470529422847, "learning_rate": 5.34341448527938e-06, "loss": 0.4128, "step": 16954 }, { "epoch": 1.5911223723723724, "grad_norm": 0.8846106039239358, "learning_rate": 5.34286980735935e-06, "loss": 0.3887, "step": 16955 }, { "epoch": 1.5912162162162162, "grad_norm": 0.8622717904184356, "learning_rate": 5.342325125351242e-06, "loss": 0.4174, "step": 16956 }, { "epoch": 1.59131006006006, "grad_norm": 0.9703439903295039, "learning_rate": 5.341780439261552e-06, "loss": 0.4158, "step": 16957 }, { "epoch": 1.5914039039039038, "grad_norm": 0.9944484410349437, "learning_rate": 5.3412357490967724e-06, "loss": 0.4354, "step": 16958 }, { "epoch": 1.5914977477477477, "grad_norm": 1.278264775210303, "learning_rate": 5.3406910548634005e-06, "loss": 0.4301, "step": 16959 }, { "epoch": 1.5915915915915915, "grad_norm": 0.8936152681194666, "learning_rate": 5.340146356567929e-06, "loss": 0.4176, "step": 16960 }, { "epoch": 1.5916854354354353, "grad_norm": 0.8972814250290877, "learning_rate": 5.339601654216851e-06, "loss": 0.4112, "step": 16961 }, { "epoch": 1.5917792792792793, "grad_norm": 1.2065181016128874, "learning_rate": 5.3390569478166635e-06, "loss": 0.3811, "step": 16962 }, { "epoch": 1.5918731231231231, "grad_norm": 0.8651191949057053, "learning_rate": 5.338512237373861e-06, "loss": 0.395, "step": 16963 }, { "epoch": 1.591966966966967, "grad_norm": 0.8518953631595833, "learning_rate": 5.337967522894935e-06, "loss": 0.3929, "step": 16964 }, { "epoch": 1.592060810810811, "grad_norm": 0.9778323094557396, "learning_rate": 5.337422804386383e-06, "loss": 0.3702, "step": 16965 }, { "epoch": 1.5921546546546548, "grad_norm": 0.8372166499925126, "learning_rate": 5.3368780818547e-06, "loss": 0.4154, "step": 16966 }, { "epoch": 1.5922484984984986, "grad_norm": 0.9509906641341133, "learning_rate": 5.336333355306381e-06, "loss": 0.3609, "step": 16967 }, { "epoch": 1.5923423423423424, "grad_norm": 0.7975716379466726, "learning_rate": 5.335788624747918e-06, "loss": 0.3698, "step": 16968 }, { "epoch": 1.5924361861861862, "grad_norm": 1.3178024424205996, "learning_rate": 5.335243890185809e-06, "loss": 0.4, "step": 16969 }, { "epoch": 1.59253003003003, "grad_norm": 0.7958807857155457, "learning_rate": 5.334699151626547e-06, "loss": 0.3619, "step": 16970 }, { "epoch": 1.5926238738738738, "grad_norm": 0.926112934164559, "learning_rate": 5.3341544090766275e-06, "loss": 0.3957, "step": 16971 }, { "epoch": 1.5927177177177176, "grad_norm": 1.8233819560834539, "learning_rate": 5.333609662542546e-06, "loss": 0.406, "step": 16972 }, { "epoch": 1.5928115615615615, "grad_norm": 0.8582865282390898, "learning_rate": 5.333064912030798e-06, "loss": 0.3887, "step": 16973 }, { "epoch": 1.5929054054054053, "grad_norm": 0.8103965818053009, "learning_rate": 5.332520157547876e-06, "loss": 0.4104, "step": 16974 }, { "epoch": 1.5929992492492493, "grad_norm": 1.3063980512496025, "learning_rate": 5.331975399100278e-06, "loss": 0.4316, "step": 16975 }, { "epoch": 1.593093093093093, "grad_norm": 0.8358062535368964, "learning_rate": 5.331430636694498e-06, "loss": 0.3465, "step": 16976 }, { "epoch": 1.593186936936937, "grad_norm": 1.721368929924272, "learning_rate": 5.330885870337032e-06, "loss": 0.3913, "step": 16977 }, { "epoch": 1.5932807807807807, "grad_norm": 1.0116933971525446, "learning_rate": 5.330341100034374e-06, "loss": 0.3908, "step": 16978 }, { "epoch": 1.5933746246246248, "grad_norm": 0.9352527788505396, "learning_rate": 5.3297963257930205e-06, "loss": 0.4354, "step": 16979 }, { "epoch": 1.5934684684684686, "grad_norm": 0.952590823430766, "learning_rate": 5.329251547619467e-06, "loss": 0.4057, "step": 16980 }, { "epoch": 1.5935623123123124, "grad_norm": 0.894681823468359, "learning_rate": 5.328706765520209e-06, "loss": 0.338, "step": 16981 }, { "epoch": 1.5936561561561562, "grad_norm": 0.8461573072034714, "learning_rate": 5.32816197950174e-06, "loss": 0.3763, "step": 16982 }, { "epoch": 1.59375, "grad_norm": 0.9468969395906095, "learning_rate": 5.327617189570557e-06, "loss": 0.4055, "step": 16983 }, { "epoch": 1.5938438438438438, "grad_norm": 1.0044639848106305, "learning_rate": 5.327072395733158e-06, "loss": 0.3969, "step": 16984 }, { "epoch": 1.5939376876876876, "grad_norm": 0.8870828928290148, "learning_rate": 5.326527597996035e-06, "loss": 0.3891, "step": 16985 }, { "epoch": 1.5940315315315314, "grad_norm": 0.9763545836914426, "learning_rate": 5.3259827963656855e-06, "loss": 0.4101, "step": 16986 }, { "epoch": 1.5941253753753752, "grad_norm": 0.9732600243077026, "learning_rate": 5.325437990848604e-06, "loss": 0.3997, "step": 16987 }, { "epoch": 1.5942192192192193, "grad_norm": 0.9066166936149511, "learning_rate": 5.3248931814512875e-06, "loss": 0.435, "step": 16988 }, { "epoch": 1.594313063063063, "grad_norm": 0.9374671725779243, "learning_rate": 5.32434836818023e-06, "loss": 0.4079, "step": 16989 }, { "epoch": 1.594406906906907, "grad_norm": 1.0132820484515095, "learning_rate": 5.32380355104193e-06, "loss": 0.3711, "step": 16990 }, { "epoch": 1.5945007507507507, "grad_norm": 0.8492324496864395, "learning_rate": 5.323258730042883e-06, "loss": 0.4055, "step": 16991 }, { "epoch": 1.5945945945945947, "grad_norm": 1.2587546118390103, "learning_rate": 5.322713905189583e-06, "loss": 0.4312, "step": 16992 }, { "epoch": 1.5946884384384385, "grad_norm": 1.2181253644955368, "learning_rate": 5.322169076488527e-06, "loss": 0.4066, "step": 16993 }, { "epoch": 1.5947822822822824, "grad_norm": 1.0443702476296786, "learning_rate": 5.321624243946212e-06, "loss": 0.3981, "step": 16994 }, { "epoch": 1.5948761261261262, "grad_norm": 1.2672446772401957, "learning_rate": 5.321079407569132e-06, "loss": 0.456, "step": 16995 }, { "epoch": 1.59496996996997, "grad_norm": 1.0814731921953231, "learning_rate": 5.320534567363785e-06, "loss": 0.4378, "step": 16996 }, { "epoch": 1.5950638138138138, "grad_norm": 0.9392632561658336, "learning_rate": 5.3199897233366685e-06, "loss": 0.4347, "step": 16997 }, { "epoch": 1.5951576576576576, "grad_norm": 0.9839306826002028, "learning_rate": 5.319444875494274e-06, "loss": 0.3981, "step": 16998 }, { "epoch": 1.5952515015015014, "grad_norm": 1.0270617452061743, "learning_rate": 5.3189000238431025e-06, "loss": 0.4162, "step": 16999 }, { "epoch": 1.5953453453453452, "grad_norm": 1.064987343885063, "learning_rate": 5.318355168389648e-06, "loss": 0.3666, "step": 17000 }, { "epoch": 1.595439189189189, "grad_norm": 0.8034260626857168, "learning_rate": 5.317810309140405e-06, "loss": 0.359, "step": 17001 }, { "epoch": 1.595533033033033, "grad_norm": 0.8106085408182573, "learning_rate": 5.317265446101876e-06, "loss": 0.3683, "step": 17002 }, { "epoch": 1.5956268768768769, "grad_norm": 0.7990595044083705, "learning_rate": 5.316720579280552e-06, "loss": 0.3565, "step": 17003 }, { "epoch": 1.5957207207207207, "grad_norm": 1.0167969262852758, "learning_rate": 5.316175708682931e-06, "loss": 0.3942, "step": 17004 }, { "epoch": 1.5958145645645647, "grad_norm": 0.9091989163739356, "learning_rate": 5.3156308343155115e-06, "loss": 0.3674, "step": 17005 }, { "epoch": 1.5959084084084085, "grad_norm": 0.9525525473985257, "learning_rate": 5.315085956184787e-06, "loss": 0.3859, "step": 17006 }, { "epoch": 1.5960022522522523, "grad_norm": 0.9352885452608064, "learning_rate": 5.314541074297255e-06, "loss": 0.3626, "step": 17007 }, { "epoch": 1.5960960960960962, "grad_norm": 1.052052269126602, "learning_rate": 5.313996188659414e-06, "loss": 0.4031, "step": 17008 }, { "epoch": 1.59618993993994, "grad_norm": 1.0126132758518598, "learning_rate": 5.313451299277759e-06, "loss": 0.4016, "step": 17009 }, { "epoch": 1.5962837837837838, "grad_norm": 1.0462459940883118, "learning_rate": 5.312906406158787e-06, "loss": 0.4235, "step": 17010 }, { "epoch": 1.5963776276276276, "grad_norm": 1.3896634543349837, "learning_rate": 5.312361509308995e-06, "loss": 0.4408, "step": 17011 }, { "epoch": 1.5964714714714714, "grad_norm": 0.8572011579513465, "learning_rate": 5.311816608734881e-06, "loss": 0.3981, "step": 17012 }, { "epoch": 1.5965653153153152, "grad_norm": 1.0305951653398715, "learning_rate": 5.31127170444294e-06, "loss": 0.3805, "step": 17013 }, { "epoch": 1.596659159159159, "grad_norm": 1.006713071157746, "learning_rate": 5.31072679643967e-06, "loss": 0.4029, "step": 17014 }, { "epoch": 1.596753003003003, "grad_norm": 0.8117344799138337, "learning_rate": 5.3101818847315675e-06, "loss": 0.3707, "step": 17015 }, { "epoch": 1.5968468468468469, "grad_norm": 0.8965869374828814, "learning_rate": 5.309636969325131e-06, "loss": 0.3949, "step": 17016 }, { "epoch": 1.5969406906906907, "grad_norm": 1.084090147881006, "learning_rate": 5.309092050226856e-06, "loss": 0.3974, "step": 17017 }, { "epoch": 1.5970345345345347, "grad_norm": 0.8837451564524008, "learning_rate": 5.308547127443238e-06, "loss": 0.3845, "step": 17018 }, { "epoch": 1.5971283783783785, "grad_norm": 0.9384685184640763, "learning_rate": 5.308002200980779e-06, "loss": 0.4261, "step": 17019 }, { "epoch": 1.5972222222222223, "grad_norm": 0.9015620540128062, "learning_rate": 5.307457270845972e-06, "loss": 0.3623, "step": 17020 }, { "epoch": 1.5973160660660661, "grad_norm": 0.9927120656061752, "learning_rate": 5.306912337045318e-06, "loss": 0.3995, "step": 17021 }, { "epoch": 1.59740990990991, "grad_norm": 1.013752026399436, "learning_rate": 5.30636739958531e-06, "loss": 0.433, "step": 17022 }, { "epoch": 1.5975037537537538, "grad_norm": 0.913339646373362, "learning_rate": 5.305822458472448e-06, "loss": 0.396, "step": 17023 }, { "epoch": 1.5975975975975976, "grad_norm": 1.4257948405181111, "learning_rate": 5.305277513713229e-06, "loss": 0.4346, "step": 17024 }, { "epoch": 1.5976914414414414, "grad_norm": 1.326722012421823, "learning_rate": 5.304732565314149e-06, "loss": 0.4258, "step": 17025 }, { "epoch": 1.5977852852852852, "grad_norm": 1.0121662985090922, "learning_rate": 5.304187613281709e-06, "loss": 0.4009, "step": 17026 }, { "epoch": 1.597879129129129, "grad_norm": 0.9026887394656646, "learning_rate": 5.303642657622401e-06, "loss": 0.3957, "step": 17027 }, { "epoch": 1.597972972972973, "grad_norm": 0.9511038002615875, "learning_rate": 5.303097698342728e-06, "loss": 0.3679, "step": 17028 }, { "epoch": 1.5980668168168168, "grad_norm": 1.2886686952996254, "learning_rate": 5.302552735449187e-06, "loss": 0.3894, "step": 17029 }, { "epoch": 1.5981606606606606, "grad_norm": 0.9453620035641275, "learning_rate": 5.302007768948272e-06, "loss": 0.3978, "step": 17030 }, { "epoch": 1.5982545045045045, "grad_norm": 1.0241652245255042, "learning_rate": 5.301462798846483e-06, "loss": 0.3537, "step": 17031 }, { "epoch": 1.5983483483483485, "grad_norm": 0.9777450469802598, "learning_rate": 5.300917825150318e-06, "loss": 0.4129, "step": 17032 }, { "epoch": 1.5984421921921923, "grad_norm": 0.8631763317580369, "learning_rate": 5.3003728478662745e-06, "loss": 0.3846, "step": 17033 }, { "epoch": 1.5985360360360361, "grad_norm": 0.9706369054780273, "learning_rate": 5.29982786700085e-06, "loss": 0.4552, "step": 17034 }, { "epoch": 1.59862987987988, "grad_norm": 0.9550324052541558, "learning_rate": 5.2992828825605425e-06, "loss": 0.4486, "step": 17035 }, { "epoch": 1.5987237237237237, "grad_norm": 1.1681315494430802, "learning_rate": 5.29873789455185e-06, "loss": 0.3762, "step": 17036 }, { "epoch": 1.5988175675675675, "grad_norm": 0.9410350836520904, "learning_rate": 5.298192902981269e-06, "loss": 0.4108, "step": 17037 }, { "epoch": 1.5989114114114114, "grad_norm": 0.8988769483229968, "learning_rate": 5.297647907855301e-06, "loss": 0.3984, "step": 17038 }, { "epoch": 1.5990052552552552, "grad_norm": 0.9318982546400554, "learning_rate": 5.297102909180442e-06, "loss": 0.3468, "step": 17039 }, { "epoch": 1.599099099099099, "grad_norm": 0.9413613670500074, "learning_rate": 5.296557906963189e-06, "loss": 0.4266, "step": 17040 }, { "epoch": 1.5991929429429428, "grad_norm": 0.9391147495713226, "learning_rate": 5.296012901210043e-06, "loss": 0.3827, "step": 17041 }, { "epoch": 1.5992867867867868, "grad_norm": 0.9097485926142658, "learning_rate": 5.295467891927497e-06, "loss": 0.3549, "step": 17042 }, { "epoch": 1.5993806306306306, "grad_norm": 0.943846062903491, "learning_rate": 5.2949228791220545e-06, "loss": 0.3765, "step": 17043 }, { "epoch": 1.5994744744744744, "grad_norm": 0.875247138808518, "learning_rate": 5.2943778628002115e-06, "loss": 0.3909, "step": 17044 }, { "epoch": 1.5995683183183185, "grad_norm": 1.107370923924511, "learning_rate": 5.293832842968467e-06, "loss": 0.45, "step": 17045 }, { "epoch": 1.5996621621621623, "grad_norm": 1.0582495767602176, "learning_rate": 5.2932878196333195e-06, "loss": 0.4265, "step": 17046 }, { "epoch": 1.599756006006006, "grad_norm": 0.9400191469316379, "learning_rate": 5.2927427928012664e-06, "loss": 0.4036, "step": 17047 }, { "epoch": 1.59984984984985, "grad_norm": 0.9110451036296967, "learning_rate": 5.292197762478806e-06, "loss": 0.4421, "step": 17048 }, { "epoch": 1.5999436936936937, "grad_norm": 0.8414007517147298, "learning_rate": 5.291652728672436e-06, "loss": 0.3753, "step": 17049 }, { "epoch": 1.6000375375375375, "grad_norm": 0.8763109098032417, "learning_rate": 5.291107691388659e-06, "loss": 0.4014, "step": 17050 }, { "epoch": 1.6001313813813813, "grad_norm": 1.2469422655962674, "learning_rate": 5.290562650633968e-06, "loss": 0.4018, "step": 17051 }, { "epoch": 1.6002252252252251, "grad_norm": 0.9123957566359249, "learning_rate": 5.2900176064148664e-06, "loss": 0.4121, "step": 17052 }, { "epoch": 1.600319069069069, "grad_norm": 1.010601534503953, "learning_rate": 5.28947255873785e-06, "loss": 0.3921, "step": 17053 }, { "epoch": 1.6004129129129128, "grad_norm": 0.8755013389508496, "learning_rate": 5.288927507609418e-06, "loss": 0.3837, "step": 17054 }, { "epoch": 1.6005067567567568, "grad_norm": 0.8184197607387766, "learning_rate": 5.28838245303607e-06, "loss": 0.364, "step": 17055 }, { "epoch": 1.6006006006006006, "grad_norm": 1.089491934597503, "learning_rate": 5.287837395024304e-06, "loss": 0.443, "step": 17056 }, { "epoch": 1.6006944444444444, "grad_norm": 1.1092044210569487, "learning_rate": 5.287292333580617e-06, "loss": 0.3948, "step": 17057 }, { "epoch": 1.6007882882882885, "grad_norm": 0.8761834184013133, "learning_rate": 5.2867472687115116e-06, "loss": 0.3847, "step": 17058 }, { "epoch": 1.6008821321321323, "grad_norm": 1.0191360082110967, "learning_rate": 5.286202200423486e-06, "loss": 0.4264, "step": 17059 }, { "epoch": 1.600975975975976, "grad_norm": 0.8733567142348205, "learning_rate": 5.285657128723035e-06, "loss": 0.3939, "step": 17060 }, { "epoch": 1.6010698198198199, "grad_norm": 0.8189628436450579, "learning_rate": 5.285112053616661e-06, "loss": 0.4044, "step": 17061 }, { "epoch": 1.6011636636636637, "grad_norm": 0.9681216800610153, "learning_rate": 5.284566975110864e-06, "loss": 0.4409, "step": 17062 }, { "epoch": 1.6012575075075075, "grad_norm": 0.9088849649809079, "learning_rate": 5.2840218932121405e-06, "loss": 0.4232, "step": 17063 }, { "epoch": 1.6013513513513513, "grad_norm": 1.0111167606329612, "learning_rate": 5.283476807926991e-06, "loss": 0.409, "step": 17064 }, { "epoch": 1.6014451951951951, "grad_norm": 0.876351236576494, "learning_rate": 5.282931719261915e-06, "loss": 0.3857, "step": 17065 }, { "epoch": 1.601539039039039, "grad_norm": 1.1917183324624165, "learning_rate": 5.282386627223408e-06, "loss": 0.3959, "step": 17066 }, { "epoch": 1.6016328828828827, "grad_norm": 1.8999198912128543, "learning_rate": 5.281841531817973e-06, "loss": 0.3923, "step": 17067 }, { "epoch": 1.6017267267267268, "grad_norm": 0.9318078671766613, "learning_rate": 5.28129643305211e-06, "loss": 0.4006, "step": 17068 }, { "epoch": 1.6018205705705706, "grad_norm": 1.0005619851014276, "learning_rate": 5.2807513309323145e-06, "loss": 0.4138, "step": 17069 }, { "epoch": 1.6019144144144144, "grad_norm": 1.0062934959074459, "learning_rate": 5.280206225465089e-06, "loss": 0.3736, "step": 17070 }, { "epoch": 1.6020082582582582, "grad_norm": 0.8812867819551116, "learning_rate": 5.279661116656932e-06, "loss": 0.4051, "step": 17071 }, { "epoch": 1.6021021021021022, "grad_norm": 0.8676846386556567, "learning_rate": 5.279116004514342e-06, "loss": 0.4036, "step": 17072 }, { "epoch": 1.602195945945946, "grad_norm": 0.9569831941844166, "learning_rate": 5.278570889043818e-06, "loss": 0.4348, "step": 17073 }, { "epoch": 1.6022897897897899, "grad_norm": 1.923645953172435, "learning_rate": 5.278025770251862e-06, "loss": 0.3809, "step": 17074 }, { "epoch": 1.6023836336336337, "grad_norm": 1.0252426304401887, "learning_rate": 5.277480648144971e-06, "loss": 0.4217, "step": 17075 }, { "epoch": 1.6024774774774775, "grad_norm": 0.9185433493685766, "learning_rate": 5.276935522729647e-06, "loss": 0.3887, "step": 17076 }, { "epoch": 1.6025713213213213, "grad_norm": 0.8342220765213211, "learning_rate": 5.2763903940123875e-06, "loss": 0.4524, "step": 17077 }, { "epoch": 1.602665165165165, "grad_norm": 0.9438693021343878, "learning_rate": 5.275845261999691e-06, "loss": 0.3541, "step": 17078 }, { "epoch": 1.602759009009009, "grad_norm": 0.9002963548488, "learning_rate": 5.275300126698061e-06, "loss": 0.4055, "step": 17079 }, { "epoch": 1.6028528528528527, "grad_norm": 0.8291376454248023, "learning_rate": 5.274754988113995e-06, "loss": 0.4111, "step": 17080 }, { "epoch": 1.6029466966966965, "grad_norm": 1.2270382404362694, "learning_rate": 5.274209846253993e-06, "loss": 0.4002, "step": 17081 }, { "epoch": 1.6030405405405406, "grad_norm": 1.310317560840095, "learning_rate": 5.273664701124554e-06, "loss": 0.39, "step": 17082 }, { "epoch": 1.6031343843843844, "grad_norm": 0.9240087529261063, "learning_rate": 5.27311955273218e-06, "loss": 0.4228, "step": 17083 }, { "epoch": 1.6032282282282282, "grad_norm": 1.0109530983358554, "learning_rate": 5.272574401083368e-06, "loss": 0.3994, "step": 17084 }, { "epoch": 1.6033220720720722, "grad_norm": 0.9337765044324374, "learning_rate": 5.272029246184619e-06, "loss": 0.3667, "step": 17085 }, { "epoch": 1.603415915915916, "grad_norm": 0.8574429658171259, "learning_rate": 5.271484088042435e-06, "loss": 0.4499, "step": 17086 }, { "epoch": 1.6035097597597598, "grad_norm": 0.9205271178901977, "learning_rate": 5.270938926663313e-06, "loss": 0.4136, "step": 17087 }, { "epoch": 1.6036036036036037, "grad_norm": 1.0254395776973761, "learning_rate": 5.270393762053752e-06, "loss": 0.3557, "step": 17088 }, { "epoch": 1.6036974474474475, "grad_norm": 0.9194381649427457, "learning_rate": 5.26984859422026e-06, "loss": 0.3558, "step": 17089 }, { "epoch": 1.6037912912912913, "grad_norm": 0.8939583841446187, "learning_rate": 5.269303423169328e-06, "loss": 0.3873, "step": 17090 }, { "epoch": 1.603885135135135, "grad_norm": 0.9126111906558618, "learning_rate": 5.268758248907458e-06, "loss": 0.412, "step": 17091 }, { "epoch": 1.603978978978979, "grad_norm": 1.2879061044924982, "learning_rate": 5.268213071441155e-06, "loss": 0.377, "step": 17092 }, { "epoch": 1.6040728228228227, "grad_norm": 0.917530711282666, "learning_rate": 5.2676678907769145e-06, "loss": 0.3713, "step": 17093 }, { "epoch": 1.6041666666666665, "grad_norm": 1.1514939588556117, "learning_rate": 5.267122706921238e-06, "loss": 0.4039, "step": 17094 }, { "epoch": 1.6042605105105106, "grad_norm": 1.1611075325882658, "learning_rate": 5.266577519880627e-06, "loss": 0.4152, "step": 17095 }, { "epoch": 1.6043543543543544, "grad_norm": 0.8016267053221922, "learning_rate": 5.266032329661581e-06, "loss": 0.3703, "step": 17096 }, { "epoch": 1.6044481981981982, "grad_norm": 0.7905203980511206, "learning_rate": 5.265487136270598e-06, "loss": 0.3768, "step": 17097 }, { "epoch": 1.6045420420420422, "grad_norm": 0.8562734135033048, "learning_rate": 5.264941939714183e-06, "loss": 0.3703, "step": 17098 }, { "epoch": 1.604635885885886, "grad_norm": 0.9957578446151597, "learning_rate": 5.264396739998832e-06, "loss": 0.4394, "step": 17099 }, { "epoch": 1.6047297297297298, "grad_norm": 0.855450602808426, "learning_rate": 5.263851537131049e-06, "loss": 0.3872, "step": 17100 }, { "epoch": 1.6048235735735736, "grad_norm": 0.9952620752975642, "learning_rate": 5.263306331117334e-06, "loss": 0.4283, "step": 17101 }, { "epoch": 1.6049174174174174, "grad_norm": 0.8300364090906445, "learning_rate": 5.262761121964186e-06, "loss": 0.3694, "step": 17102 }, { "epoch": 1.6050112612612613, "grad_norm": 1.034570163313157, "learning_rate": 5.262215909678106e-06, "loss": 0.3975, "step": 17103 }, { "epoch": 1.605105105105105, "grad_norm": 0.9105430874587009, "learning_rate": 5.261670694265594e-06, "loss": 0.421, "step": 17104 }, { "epoch": 1.6051989489489489, "grad_norm": 0.9122779292819698, "learning_rate": 5.2611254757331524e-06, "loss": 0.3516, "step": 17105 }, { "epoch": 1.6052927927927927, "grad_norm": 1.0493799751878565, "learning_rate": 5.26058025408728e-06, "loss": 0.4139, "step": 17106 }, { "epoch": 1.6053866366366365, "grad_norm": 0.9825887976626824, "learning_rate": 5.260035029334479e-06, "loss": 0.4314, "step": 17107 }, { "epoch": 1.6054804804804805, "grad_norm": 0.936780238774687, "learning_rate": 5.259489801481251e-06, "loss": 0.3977, "step": 17108 }, { "epoch": 1.6055743243243243, "grad_norm": 0.8916224027040757, "learning_rate": 5.2589445705340935e-06, "loss": 0.352, "step": 17109 }, { "epoch": 1.6056681681681682, "grad_norm": 1.0075698354437541, "learning_rate": 5.258399336499511e-06, "loss": 0.3976, "step": 17110 }, { "epoch": 1.605762012012012, "grad_norm": 0.8807800216779332, "learning_rate": 5.257854099384002e-06, "loss": 0.3809, "step": 17111 }, { "epoch": 1.605855855855856, "grad_norm": 1.0208011780854864, "learning_rate": 5.257308859194068e-06, "loss": 0.3676, "step": 17112 }, { "epoch": 1.6059496996996998, "grad_norm": 0.927581413392329, "learning_rate": 5.256763615936211e-06, "loss": 0.4107, "step": 17113 }, { "epoch": 1.6060435435435436, "grad_norm": 0.8667380723713168, "learning_rate": 5.256218369616932e-06, "loss": 0.3582, "step": 17114 }, { "epoch": 1.6061373873873874, "grad_norm": 0.8335083746539842, "learning_rate": 5.255673120242729e-06, "loss": 0.3935, "step": 17115 }, { "epoch": 1.6062312312312312, "grad_norm": 1.2212274127718743, "learning_rate": 5.255127867820107e-06, "loss": 0.383, "step": 17116 }, { "epoch": 1.606325075075075, "grad_norm": 0.8269786434271484, "learning_rate": 5.254582612355565e-06, "loss": 0.3805, "step": 17117 }, { "epoch": 1.6064189189189189, "grad_norm": 0.9102525689758978, "learning_rate": 5.2540373538556035e-06, "loss": 0.3878, "step": 17118 }, { "epoch": 1.6065127627627627, "grad_norm": 2.198570621567735, "learning_rate": 5.253492092326726e-06, "loss": 0.3896, "step": 17119 }, { "epoch": 1.6066066066066065, "grad_norm": 1.0146148546945977, "learning_rate": 5.252946827775432e-06, "loss": 0.3881, "step": 17120 }, { "epoch": 1.6067004504504503, "grad_norm": 0.922176216611685, "learning_rate": 5.252401560208223e-06, "loss": 0.4097, "step": 17121 }, { "epoch": 1.6067942942942943, "grad_norm": 0.8784293969505252, "learning_rate": 5.251856289631602e-06, "loss": 0.3588, "step": 17122 }, { "epoch": 1.6068881381381381, "grad_norm": 0.9283748214145153, "learning_rate": 5.2513110160520664e-06, "loss": 0.365, "step": 17123 }, { "epoch": 1.606981981981982, "grad_norm": 1.0205032633118991, "learning_rate": 5.25076573947612e-06, "loss": 0.3855, "step": 17124 }, { "epoch": 1.607075825825826, "grad_norm": 1.3473094824800713, "learning_rate": 5.250220459910267e-06, "loss": 0.4135, "step": 17125 }, { "epoch": 1.6071696696696698, "grad_norm": 0.8894340517026839, "learning_rate": 5.2496751773610046e-06, "loss": 0.3869, "step": 17126 }, { "epoch": 1.6072635135135136, "grad_norm": 0.9770853121036396, "learning_rate": 5.249129891834834e-06, "loss": 0.408, "step": 17127 }, { "epoch": 1.6073573573573574, "grad_norm": 1.8304638225591783, "learning_rate": 5.24858460333826e-06, "loss": 0.3789, "step": 17128 }, { "epoch": 1.6074512012012012, "grad_norm": 0.8985297839158014, "learning_rate": 5.248039311877783e-06, "loss": 0.4182, "step": 17129 }, { "epoch": 1.607545045045045, "grad_norm": 0.9724720820147518, "learning_rate": 5.247494017459902e-06, "loss": 0.3776, "step": 17130 }, { "epoch": 1.6076388888888888, "grad_norm": 0.9557149874748134, "learning_rate": 5.246948720091123e-06, "loss": 0.4051, "step": 17131 }, { "epoch": 1.6077327327327327, "grad_norm": 0.9366912178796161, "learning_rate": 5.246403419777945e-06, "loss": 0.4193, "step": 17132 }, { "epoch": 1.6078265765765765, "grad_norm": 0.8444638440532303, "learning_rate": 5.245858116526868e-06, "loss": 0.3901, "step": 17133 }, { "epoch": 1.6079204204204203, "grad_norm": 1.3874801625222122, "learning_rate": 5.245312810344399e-06, "loss": 0.4222, "step": 17134 }, { "epoch": 1.6080142642642643, "grad_norm": 1.063217037591329, "learning_rate": 5.244767501237034e-06, "loss": 0.3668, "step": 17135 }, { "epoch": 1.6081081081081081, "grad_norm": 1.2211505165856997, "learning_rate": 5.244222189211277e-06, "loss": 0.4234, "step": 17136 }, { "epoch": 1.608201951951952, "grad_norm": 0.898683602849743, "learning_rate": 5.243676874273631e-06, "loss": 0.4207, "step": 17137 }, { "epoch": 1.608295795795796, "grad_norm": 0.8985756905235966, "learning_rate": 5.243131556430598e-06, "loss": 0.3676, "step": 17138 }, { "epoch": 1.6083896396396398, "grad_norm": 1.1410447827791792, "learning_rate": 5.242586235688677e-06, "loss": 0.3945, "step": 17139 }, { "epoch": 1.6084834834834836, "grad_norm": 1.0439150677148348, "learning_rate": 5.242040912054374e-06, "loss": 0.4302, "step": 17140 }, { "epoch": 1.6085773273273274, "grad_norm": 0.838382085575589, "learning_rate": 5.2414955855341865e-06, "loss": 0.3803, "step": 17141 }, { "epoch": 1.6086711711711712, "grad_norm": 0.8413354134211632, "learning_rate": 5.240950256134618e-06, "loss": 0.3602, "step": 17142 }, { "epoch": 1.608765015015015, "grad_norm": 0.8889502968699685, "learning_rate": 5.240404923862174e-06, "loss": 0.3919, "step": 17143 }, { "epoch": 1.6088588588588588, "grad_norm": 0.8970263663596638, "learning_rate": 5.2398595887233525e-06, "loss": 0.3729, "step": 17144 }, { "epoch": 1.6089527027027026, "grad_norm": 0.881299500912753, "learning_rate": 5.239314250724657e-06, "loss": 0.3884, "step": 17145 }, { "epoch": 1.6090465465465464, "grad_norm": 0.9031938341983167, "learning_rate": 5.23876890987259e-06, "loss": 0.4377, "step": 17146 }, { "epoch": 1.6091403903903903, "grad_norm": 1.4024897989773697, "learning_rate": 5.238223566173652e-06, "loss": 0.403, "step": 17147 }, { "epoch": 1.6092342342342343, "grad_norm": 0.8698753923862159, "learning_rate": 5.237678219634347e-06, "loss": 0.399, "step": 17148 }, { "epoch": 1.609328078078078, "grad_norm": 0.8447060247803031, "learning_rate": 5.237132870261177e-06, "loss": 0.3893, "step": 17149 }, { "epoch": 1.609421921921922, "grad_norm": 1.0464040429067647, "learning_rate": 5.236587518060643e-06, "loss": 0.4213, "step": 17150 }, { "epoch": 1.6095157657657657, "grad_norm": 1.2012108793097949, "learning_rate": 5.236042163039249e-06, "loss": 0.4329, "step": 17151 }, { "epoch": 1.6096096096096097, "grad_norm": 0.9305309054084113, "learning_rate": 5.235496805203497e-06, "loss": 0.4247, "step": 17152 }, { "epoch": 1.6097034534534536, "grad_norm": 1.0705999484457707, "learning_rate": 5.234951444559887e-06, "loss": 0.4359, "step": 17153 }, { "epoch": 1.6097972972972974, "grad_norm": 1.020723096948816, "learning_rate": 5.234406081114924e-06, "loss": 0.4148, "step": 17154 }, { "epoch": 1.6098911411411412, "grad_norm": 1.24355153091652, "learning_rate": 5.233860714875111e-06, "loss": 0.3923, "step": 17155 }, { "epoch": 1.609984984984985, "grad_norm": 0.9382744688127713, "learning_rate": 5.233315345846948e-06, "loss": 0.4382, "step": 17156 }, { "epoch": 1.6100788288288288, "grad_norm": 1.7577381321024614, "learning_rate": 5.232769974036939e-06, "loss": 0.3827, "step": 17157 }, { "epoch": 1.6101726726726726, "grad_norm": 1.0790860426216733, "learning_rate": 5.232224599451586e-06, "loss": 0.4421, "step": 17158 }, { "epoch": 1.6102665165165164, "grad_norm": 1.121996455759614, "learning_rate": 5.231679222097392e-06, "loss": 0.4295, "step": 17159 }, { "epoch": 1.6103603603603602, "grad_norm": 0.8592743737187499, "learning_rate": 5.231133841980859e-06, "loss": 0.361, "step": 17160 }, { "epoch": 1.610454204204204, "grad_norm": 1.0669228504652097, "learning_rate": 5.23058845910849e-06, "loss": 0.4388, "step": 17161 }, { "epoch": 1.610548048048048, "grad_norm": 0.893955667805656, "learning_rate": 5.230043073486788e-06, "loss": 0.4174, "step": 17162 }, { "epoch": 1.6106418918918919, "grad_norm": 0.9238488008565736, "learning_rate": 5.229497685122255e-06, "loss": 0.3558, "step": 17163 }, { "epoch": 1.6107357357357357, "grad_norm": 0.9055136852966067, "learning_rate": 5.228952294021395e-06, "loss": 0.3587, "step": 17164 }, { "epoch": 1.6108295795795797, "grad_norm": 0.9707875142369061, "learning_rate": 5.228406900190709e-06, "loss": 0.4083, "step": 17165 }, { "epoch": 1.6109234234234235, "grad_norm": 0.8666865727496015, "learning_rate": 5.2278615036367004e-06, "loss": 0.3822, "step": 17166 }, { "epoch": 1.6110172672672673, "grad_norm": 0.8878787886867021, "learning_rate": 5.227316104365874e-06, "loss": 0.3693, "step": 17167 }, { "epoch": 1.6111111111111112, "grad_norm": 1.0693894320172956, "learning_rate": 5.226770702384728e-06, "loss": 0.4344, "step": 17168 }, { "epoch": 1.611204954954955, "grad_norm": 1.245704448618365, "learning_rate": 5.226225297699771e-06, "loss": 0.3948, "step": 17169 }, { "epoch": 1.6112987987987988, "grad_norm": 0.9854929644953088, "learning_rate": 5.225679890317502e-06, "loss": 0.4406, "step": 17170 }, { "epoch": 1.6113926426426426, "grad_norm": 0.778297910084613, "learning_rate": 5.225134480244425e-06, "loss": 0.4094, "step": 17171 }, { "epoch": 1.6114864864864864, "grad_norm": 1.3113988228245397, "learning_rate": 5.2245890674870424e-06, "loss": 0.3572, "step": 17172 }, { "epoch": 1.6115803303303302, "grad_norm": 1.3329849165477683, "learning_rate": 5.22404365205186e-06, "loss": 0.3435, "step": 17173 }, { "epoch": 1.611674174174174, "grad_norm": 0.9591726223899307, "learning_rate": 5.223498233945377e-06, "loss": 0.4425, "step": 17174 }, { "epoch": 1.611768018018018, "grad_norm": 0.9391350266000225, "learning_rate": 5.222952813174099e-06, "loss": 0.4016, "step": 17175 }, { "epoch": 1.6118618618618619, "grad_norm": 1.013407666719307, "learning_rate": 5.222407389744529e-06, "loss": 0.4361, "step": 17176 }, { "epoch": 1.6119557057057057, "grad_norm": 0.9645889214834854, "learning_rate": 5.2218619636631685e-06, "loss": 0.3851, "step": 17177 }, { "epoch": 1.6120495495495497, "grad_norm": 1.1605867512641628, "learning_rate": 5.221316534936521e-06, "loss": 0.3909, "step": 17178 }, { "epoch": 1.6121433933933935, "grad_norm": 1.0751011809981381, "learning_rate": 5.220771103571092e-06, "loss": 0.4135, "step": 17179 }, { "epoch": 1.6122372372372373, "grad_norm": 0.9592824487864687, "learning_rate": 5.2202256695733814e-06, "loss": 0.4153, "step": 17180 }, { "epoch": 1.6123310810810811, "grad_norm": 1.001281744134308, "learning_rate": 5.219680232949896e-06, "loss": 0.3906, "step": 17181 }, { "epoch": 1.612424924924925, "grad_norm": 2.269956090036316, "learning_rate": 5.219134793707137e-06, "loss": 0.4389, "step": 17182 }, { "epoch": 1.6125187687687688, "grad_norm": 0.9113393318997612, "learning_rate": 5.218589351851607e-06, "loss": 0.3989, "step": 17183 }, { "epoch": 1.6126126126126126, "grad_norm": 0.9654831315704165, "learning_rate": 5.2180439073898105e-06, "loss": 0.4001, "step": 17184 }, { "epoch": 1.6127064564564564, "grad_norm": 0.9881568157545659, "learning_rate": 5.217498460328252e-06, "loss": 0.4158, "step": 17185 }, { "epoch": 1.6128003003003002, "grad_norm": 1.1652436921433929, "learning_rate": 5.2169530106734325e-06, "loss": 0.4523, "step": 17186 }, { "epoch": 1.612894144144144, "grad_norm": 0.8870554948463151, "learning_rate": 5.216407558431858e-06, "loss": 0.4017, "step": 17187 }, { "epoch": 1.612987987987988, "grad_norm": 0.9410867940495622, "learning_rate": 5.215862103610031e-06, "loss": 0.3994, "step": 17188 }, { "epoch": 1.6130818318318318, "grad_norm": 1.248510118376121, "learning_rate": 5.215316646214453e-06, "loss": 0.3597, "step": 17189 }, { "epoch": 1.6131756756756757, "grad_norm": 1.0793412047027564, "learning_rate": 5.21477118625163e-06, "loss": 0.3975, "step": 17190 }, { "epoch": 1.6132695195195195, "grad_norm": 0.9654138869666447, "learning_rate": 5.214225723728065e-06, "loss": 0.405, "step": 17191 }, { "epoch": 1.6133633633633635, "grad_norm": 0.8228528906953471, "learning_rate": 5.213680258650261e-06, "loss": 0.4039, "step": 17192 }, { "epoch": 1.6134572072072073, "grad_norm": 1.0510217184454909, "learning_rate": 5.213134791024723e-06, "loss": 0.4306, "step": 17193 }, { "epoch": 1.6135510510510511, "grad_norm": 1.2102001608318245, "learning_rate": 5.212589320857954e-06, "loss": 0.3945, "step": 17194 }, { "epoch": 1.613644894894895, "grad_norm": 0.9013660527995796, "learning_rate": 5.212043848156456e-06, "loss": 0.4082, "step": 17195 }, { "epoch": 1.6137387387387387, "grad_norm": 1.0774340734489791, "learning_rate": 5.211498372926736e-06, "loss": 0.3405, "step": 17196 }, { "epoch": 1.6138325825825826, "grad_norm": 1.0606962397844382, "learning_rate": 5.210952895175295e-06, "loss": 0.429, "step": 17197 }, { "epoch": 1.6139264264264264, "grad_norm": 0.9708924883318878, "learning_rate": 5.210407414908637e-06, "loss": 0.3942, "step": 17198 }, { "epoch": 1.6140202702702702, "grad_norm": 0.9249038856809314, "learning_rate": 5.209861932133267e-06, "loss": 0.4104, "step": 17199 }, { "epoch": 1.614114114114114, "grad_norm": 0.872667838662348, "learning_rate": 5.209316446855691e-06, "loss": 0.3746, "step": 17200 }, { "epoch": 1.6142079579579578, "grad_norm": 0.9038251084195705, "learning_rate": 5.208770959082407e-06, "loss": 0.3707, "step": 17201 }, { "epoch": 1.6143018018018018, "grad_norm": 1.3634598058447285, "learning_rate": 5.208225468819924e-06, "loss": 0.4413, "step": 17202 }, { "epoch": 1.6143956456456456, "grad_norm": 1.0584287784114281, "learning_rate": 5.207679976074744e-06, "loss": 0.4112, "step": 17203 }, { "epoch": 1.6144894894894894, "grad_norm": 1.062992173258983, "learning_rate": 5.207134480853371e-06, "loss": 0.4499, "step": 17204 }, { "epoch": 1.6145833333333335, "grad_norm": 0.8739546868190579, "learning_rate": 5.20658898316231e-06, "loss": 0.394, "step": 17205 }, { "epoch": 1.6146771771771773, "grad_norm": 1.1272273600606615, "learning_rate": 5.206043483008064e-06, "loss": 0.3891, "step": 17206 }, { "epoch": 1.614771021021021, "grad_norm": 0.9002228050307907, "learning_rate": 5.205497980397136e-06, "loss": 0.4031, "step": 17207 }, { "epoch": 1.614864864864865, "grad_norm": 0.9366365457048496, "learning_rate": 5.204952475336032e-06, "loss": 0.4134, "step": 17208 }, { "epoch": 1.6149587087087087, "grad_norm": 0.9985536160752319, "learning_rate": 5.204406967831257e-06, "loss": 0.3885, "step": 17209 }, { "epoch": 1.6150525525525525, "grad_norm": 1.217339832690626, "learning_rate": 5.203861457889311e-06, "loss": 0.3473, "step": 17210 }, { "epoch": 1.6151463963963963, "grad_norm": 1.010272456200011, "learning_rate": 5.203315945516702e-06, "loss": 0.4147, "step": 17211 }, { "epoch": 1.6152402402402402, "grad_norm": 1.4128091742715285, "learning_rate": 5.202770430719936e-06, "loss": 0.3853, "step": 17212 }, { "epoch": 1.615334084084084, "grad_norm": 0.971371968259814, "learning_rate": 5.20222491350551e-06, "loss": 0.4037, "step": 17213 }, { "epoch": 1.6154279279279278, "grad_norm": 0.9095597463674728, "learning_rate": 5.201679393879934e-06, "loss": 0.3632, "step": 17214 }, { "epoch": 1.6155217717717718, "grad_norm": 0.8500942178394902, "learning_rate": 5.201133871849712e-06, "loss": 0.3689, "step": 17215 }, { "epoch": 1.6156156156156156, "grad_norm": 0.8962570113771238, "learning_rate": 5.200588347421345e-06, "loss": 0.4259, "step": 17216 }, { "epoch": 1.6157094594594594, "grad_norm": 0.865204362141521, "learning_rate": 5.200042820601341e-06, "loss": 0.4215, "step": 17217 }, { "epoch": 1.6158033033033035, "grad_norm": 0.9120032767350204, "learning_rate": 5.199497291396203e-06, "loss": 0.4287, "step": 17218 }, { "epoch": 1.6158971471471473, "grad_norm": 1.2556558623843643, "learning_rate": 5.198951759812435e-06, "loss": 0.3746, "step": 17219 }, { "epoch": 1.615990990990991, "grad_norm": 0.9082226440317496, "learning_rate": 5.198406225856542e-06, "loss": 0.3647, "step": 17220 }, { "epoch": 1.616084834834835, "grad_norm": 1.3421239879669051, "learning_rate": 5.197860689535028e-06, "loss": 0.4021, "step": 17221 }, { "epoch": 1.6161786786786787, "grad_norm": 0.8953146060602795, "learning_rate": 5.197315150854397e-06, "loss": 0.4257, "step": 17222 }, { "epoch": 1.6162725225225225, "grad_norm": 0.8983206137540063, "learning_rate": 5.196769609821154e-06, "loss": 0.4037, "step": 17223 }, { "epoch": 1.6163663663663663, "grad_norm": 0.9434157829102575, "learning_rate": 5.196224066441805e-06, "loss": 0.3888, "step": 17224 }, { "epoch": 1.6164602102102101, "grad_norm": 0.9522373139677804, "learning_rate": 5.195678520722855e-06, "loss": 0.3467, "step": 17225 }, { "epoch": 1.616554054054054, "grad_norm": 0.9057721563087984, "learning_rate": 5.195132972670804e-06, "loss": 0.3705, "step": 17226 }, { "epoch": 1.6166478978978978, "grad_norm": 0.9380606288141403, "learning_rate": 5.194587422292161e-06, "loss": 0.4273, "step": 17227 }, { "epoch": 1.6167417417417418, "grad_norm": 0.8737388878243159, "learning_rate": 5.194041869593428e-06, "loss": 0.3661, "step": 17228 }, { "epoch": 1.6168355855855856, "grad_norm": 0.9170396357530435, "learning_rate": 5.1934963145811115e-06, "loss": 0.4033, "step": 17229 }, { "epoch": 1.6169294294294294, "grad_norm": 0.8680562899346453, "learning_rate": 5.192950757261716e-06, "loss": 0.394, "step": 17230 }, { "epoch": 1.6170232732732732, "grad_norm": 0.9614472289763094, "learning_rate": 5.192405197641747e-06, "loss": 0.3963, "step": 17231 }, { "epoch": 1.6171171171171173, "grad_norm": 0.9321612095240221, "learning_rate": 5.191859635727706e-06, "loss": 0.4243, "step": 17232 }, { "epoch": 1.617210960960961, "grad_norm": 1.0186655187819633, "learning_rate": 5.191314071526102e-06, "loss": 0.3984, "step": 17233 }, { "epoch": 1.6173048048048049, "grad_norm": 4.19084491095575, "learning_rate": 5.190768505043437e-06, "loss": 0.3907, "step": 17234 }, { "epoch": 1.6173986486486487, "grad_norm": 0.9532358386675189, "learning_rate": 5.190222936286216e-06, "loss": 0.357, "step": 17235 }, { "epoch": 1.6174924924924925, "grad_norm": 1.0308659193038008, "learning_rate": 5.189677365260946e-06, "loss": 0.3826, "step": 17236 }, { "epoch": 1.6175863363363363, "grad_norm": 1.025716808128453, "learning_rate": 5.189131791974129e-06, "loss": 0.3933, "step": 17237 }, { "epoch": 1.6176801801801801, "grad_norm": 1.0242246110259248, "learning_rate": 5.188586216432272e-06, "loss": 0.3793, "step": 17238 }, { "epoch": 1.617774024024024, "grad_norm": 0.8969627317615161, "learning_rate": 5.188040638641879e-06, "loss": 0.3959, "step": 17239 }, { "epoch": 1.6178678678678677, "grad_norm": 0.8729170261979438, "learning_rate": 5.187495058609455e-06, "loss": 0.3701, "step": 17240 }, { "epoch": 1.6179617117117115, "grad_norm": 1.2317733373514248, "learning_rate": 5.1869494763415065e-06, "loss": 0.3754, "step": 17241 }, { "epoch": 1.6180555555555556, "grad_norm": 0.9633874961909036, "learning_rate": 5.1864038918445366e-06, "loss": 0.4108, "step": 17242 }, { "epoch": 1.6181493993993994, "grad_norm": 1.3871734877368702, "learning_rate": 5.185858305125052e-06, "loss": 0.3619, "step": 17243 }, { "epoch": 1.6182432432432432, "grad_norm": 0.8985648024683575, "learning_rate": 5.185312716189556e-06, "loss": 0.3874, "step": 17244 }, { "epoch": 1.6183370870870872, "grad_norm": 1.0050061246408548, "learning_rate": 5.184767125044556e-06, "loss": 0.3824, "step": 17245 }, { "epoch": 1.618430930930931, "grad_norm": 1.0462143739016367, "learning_rate": 5.184221531696554e-06, "loss": 0.4156, "step": 17246 }, { "epoch": 1.6185247747747749, "grad_norm": 1.5346819801807288, "learning_rate": 5.183675936152058e-06, "loss": 0.3727, "step": 17247 }, { "epoch": 1.6186186186186187, "grad_norm": 0.8937469577752356, "learning_rate": 5.183130338417571e-06, "loss": 0.408, "step": 17248 }, { "epoch": 1.6187124624624625, "grad_norm": 1.0754397683439565, "learning_rate": 5.182584738499602e-06, "loss": 0.4395, "step": 17249 }, { "epoch": 1.6188063063063063, "grad_norm": 5.57572846199772, "learning_rate": 5.182039136404651e-06, "loss": 0.401, "step": 17250 }, { "epoch": 1.61890015015015, "grad_norm": 0.9346067931879588, "learning_rate": 5.181493532139228e-06, "loss": 0.3625, "step": 17251 }, { "epoch": 1.618993993993994, "grad_norm": 0.8699666387106332, "learning_rate": 5.180947925709835e-06, "loss": 0.3594, "step": 17252 }, { "epoch": 1.6190878378378377, "grad_norm": 1.110800875216257, "learning_rate": 5.1804023171229776e-06, "loss": 0.421, "step": 17253 }, { "epoch": 1.6191816816816815, "grad_norm": 0.8125263288582519, "learning_rate": 5.179856706385165e-06, "loss": 0.3887, "step": 17254 }, { "epoch": 1.6192755255255256, "grad_norm": 0.9923909720287896, "learning_rate": 5.179311093502898e-06, "loss": 0.3708, "step": 17255 }, { "epoch": 1.6193693693693694, "grad_norm": 1.1358816282744453, "learning_rate": 5.178765478482683e-06, "loss": 0.3736, "step": 17256 }, { "epoch": 1.6194632132132132, "grad_norm": 1.0103174899801797, "learning_rate": 5.178219861331029e-06, "loss": 0.3883, "step": 17257 }, { "epoch": 1.6195570570570572, "grad_norm": 0.9117210811258801, "learning_rate": 5.177674242054435e-06, "loss": 0.4157, "step": 17258 }, { "epoch": 1.619650900900901, "grad_norm": 1.0043004758425473, "learning_rate": 5.177128620659412e-06, "loss": 0.3984, "step": 17259 }, { "epoch": 1.6197447447447448, "grad_norm": 0.9387041727029257, "learning_rate": 5.176582997152464e-06, "loss": 0.3572, "step": 17260 }, { "epoch": 1.6198385885885886, "grad_norm": 0.9228250411241039, "learning_rate": 5.1760373715400955e-06, "loss": 0.4033, "step": 17261 }, { "epoch": 1.6199324324324325, "grad_norm": 0.8906706935692965, "learning_rate": 5.1754917438288125e-06, "loss": 0.4305, "step": 17262 }, { "epoch": 1.6200262762762763, "grad_norm": 0.9391209331682321, "learning_rate": 5.174946114025121e-06, "loss": 0.4251, "step": 17263 }, { "epoch": 1.62012012012012, "grad_norm": 1.077383691151682, "learning_rate": 5.174400482135527e-06, "loss": 0.3851, "step": 17264 }, { "epoch": 1.6202139639639639, "grad_norm": 1.4732620601078532, "learning_rate": 5.173854848166534e-06, "loss": 0.3939, "step": 17265 }, { "epoch": 1.6203078078078077, "grad_norm": 0.9398661701494829, "learning_rate": 5.173309212124651e-06, "loss": 0.4418, "step": 17266 }, { "epoch": 1.6204016516516515, "grad_norm": 1.8132015825259997, "learning_rate": 5.172763574016381e-06, "loss": 0.3646, "step": 17267 }, { "epoch": 1.6204954954954955, "grad_norm": 1.009434367653347, "learning_rate": 5.172217933848231e-06, "loss": 0.3731, "step": 17268 }, { "epoch": 1.6205893393393394, "grad_norm": 0.9895443508957213, "learning_rate": 5.171672291626706e-06, "loss": 0.3982, "step": 17269 }, { "epoch": 1.6206831831831832, "grad_norm": 0.9677783469495431, "learning_rate": 5.171126647358311e-06, "loss": 0.3937, "step": 17270 }, { "epoch": 1.620777027027027, "grad_norm": 1.1370945069267975, "learning_rate": 5.170581001049552e-06, "loss": 0.4055, "step": 17271 }, { "epoch": 1.620870870870871, "grad_norm": 0.8922608665067812, "learning_rate": 5.170035352706939e-06, "loss": 0.4047, "step": 17272 }, { "epoch": 1.6209647147147148, "grad_norm": 0.9490962850888034, "learning_rate": 5.169489702336974e-06, "loss": 0.3896, "step": 17273 }, { "epoch": 1.6210585585585586, "grad_norm": 0.9283200423558824, "learning_rate": 5.168944049946161e-06, "loss": 0.3846, "step": 17274 }, { "epoch": 1.6211524024024024, "grad_norm": 1.0022033044112346, "learning_rate": 5.16839839554101e-06, "loss": 0.4186, "step": 17275 }, { "epoch": 1.6212462462462462, "grad_norm": 0.8657157396955758, "learning_rate": 5.167852739128024e-06, "loss": 0.3899, "step": 17276 }, { "epoch": 1.62134009009009, "grad_norm": 0.9681455000554399, "learning_rate": 5.167307080713711e-06, "loss": 0.4296, "step": 17277 }, { "epoch": 1.6214339339339339, "grad_norm": 0.9771219545251612, "learning_rate": 5.166761420304576e-06, "loss": 0.3654, "step": 17278 }, { "epoch": 1.6215277777777777, "grad_norm": 0.9882211967053681, "learning_rate": 5.166215757907124e-06, "loss": 0.3849, "step": 17279 }, { "epoch": 1.6216216216216215, "grad_norm": 0.9585392445292157, "learning_rate": 5.165670093527863e-06, "loss": 0.4044, "step": 17280 }, { "epoch": 1.6217154654654653, "grad_norm": 1.0599901159395335, "learning_rate": 5.165124427173297e-06, "loss": 0.3949, "step": 17281 }, { "epoch": 1.6218093093093093, "grad_norm": 0.9511306088752693, "learning_rate": 5.164578758849933e-06, "loss": 0.3409, "step": 17282 }, { "epoch": 1.6219031531531531, "grad_norm": 0.9734694898038923, "learning_rate": 5.164033088564278e-06, "loss": 0.3771, "step": 17283 }, { "epoch": 1.621996996996997, "grad_norm": 1.0421673594226923, "learning_rate": 5.163487416322836e-06, "loss": 0.4242, "step": 17284 }, { "epoch": 1.622090840840841, "grad_norm": 0.9120296220878744, "learning_rate": 5.1629417421321144e-06, "loss": 0.4365, "step": 17285 }, { "epoch": 1.6221846846846848, "grad_norm": 1.1799898888758782, "learning_rate": 5.162396065998622e-06, "loss": 0.4474, "step": 17286 }, { "epoch": 1.6222785285285286, "grad_norm": 0.9178233677562303, "learning_rate": 5.16185038792886e-06, "loss": 0.3976, "step": 17287 }, { "epoch": 1.6223723723723724, "grad_norm": 0.9660648854527213, "learning_rate": 5.161304707929337e-06, "loss": 0.364, "step": 17288 }, { "epoch": 1.6224662162162162, "grad_norm": 0.9720003905357788, "learning_rate": 5.160759026006558e-06, "loss": 0.3789, "step": 17289 }, { "epoch": 1.62256006006006, "grad_norm": 1.0852959995792193, "learning_rate": 5.160213342167031e-06, "loss": 0.4289, "step": 17290 }, { "epoch": 1.6226539039039038, "grad_norm": 0.9532445236059741, "learning_rate": 5.159667656417261e-06, "loss": 0.3821, "step": 17291 }, { "epoch": 1.6227477477477477, "grad_norm": 0.9950699426480095, "learning_rate": 5.159121968763756e-06, "loss": 0.4058, "step": 17292 }, { "epoch": 1.6228415915915915, "grad_norm": 1.1323082003114993, "learning_rate": 5.158576279213021e-06, "loss": 0.4343, "step": 17293 }, { "epoch": 1.6229354354354353, "grad_norm": 0.951360559165541, "learning_rate": 5.158030587771561e-06, "loss": 0.3826, "step": 17294 }, { "epoch": 1.6230292792792793, "grad_norm": 1.2287621960476998, "learning_rate": 5.157484894445883e-06, "loss": 0.4215, "step": 17295 }, { "epoch": 1.6231231231231231, "grad_norm": 1.1845266824972238, "learning_rate": 5.156939199242496e-06, "loss": 0.3698, "step": 17296 }, { "epoch": 1.623216966966967, "grad_norm": 0.8924532171706074, "learning_rate": 5.156393502167905e-06, "loss": 0.3804, "step": 17297 }, { "epoch": 1.623310810810811, "grad_norm": 1.0206497276139255, "learning_rate": 5.155847803228614e-06, "loss": 0.399, "step": 17298 }, { "epoch": 1.6234046546546548, "grad_norm": 1.6450449644065854, "learning_rate": 5.155302102431134e-06, "loss": 0.3912, "step": 17299 }, { "epoch": 1.6234984984984986, "grad_norm": 0.9923135344808548, "learning_rate": 5.154756399781965e-06, "loss": 0.3723, "step": 17300 }, { "epoch": 1.6235923423423424, "grad_norm": 1.0705248854973795, "learning_rate": 5.1542106952876194e-06, "loss": 0.3745, "step": 17301 }, { "epoch": 1.6236861861861862, "grad_norm": 0.8241755435936817, "learning_rate": 5.153664988954601e-06, "loss": 0.3413, "step": 17302 }, { "epoch": 1.62378003003003, "grad_norm": 4.2661978509884, "learning_rate": 5.153119280789417e-06, "loss": 0.4217, "step": 17303 }, { "epoch": 1.6238738738738738, "grad_norm": 1.2652035588077446, "learning_rate": 5.152573570798573e-06, "loss": 0.3964, "step": 17304 }, { "epoch": 1.6239677177177176, "grad_norm": 0.9020210225030294, "learning_rate": 5.152027858988579e-06, "loss": 0.382, "step": 17305 }, { "epoch": 1.6240615615615615, "grad_norm": 0.9206323885957632, "learning_rate": 5.151482145365936e-06, "loss": 0.3943, "step": 17306 }, { "epoch": 1.6241554054054053, "grad_norm": 0.7795276172931072, "learning_rate": 5.150936429937155e-06, "loss": 0.3414, "step": 17307 }, { "epoch": 1.6242492492492493, "grad_norm": 1.103049642821556, "learning_rate": 5.1503907127087405e-06, "loss": 0.4058, "step": 17308 }, { "epoch": 1.624343093093093, "grad_norm": 1.230245960745867, "learning_rate": 5.1498449936872e-06, "loss": 0.3616, "step": 17309 }, { "epoch": 1.624436936936937, "grad_norm": 1.0151045654378814, "learning_rate": 5.1492992728790405e-06, "loss": 0.4209, "step": 17310 }, { "epoch": 1.6245307807807807, "grad_norm": 0.8753796349358957, "learning_rate": 5.148753550290768e-06, "loss": 0.394, "step": 17311 }, { "epoch": 1.6246246246246248, "grad_norm": 2.76453230451827, "learning_rate": 5.148207825928889e-06, "loss": 0.4279, "step": 17312 }, { "epoch": 1.6247184684684686, "grad_norm": 0.8641096538901669, "learning_rate": 5.1476620997999105e-06, "loss": 0.4028, "step": 17313 }, { "epoch": 1.6248123123123124, "grad_norm": 0.9548316623978622, "learning_rate": 5.1471163719103415e-06, "loss": 0.4168, "step": 17314 }, { "epoch": 1.6249061561561562, "grad_norm": 0.9120635682392103, "learning_rate": 5.146570642266684e-06, "loss": 0.4081, "step": 17315 }, { "epoch": 1.625, "grad_norm": 0.9736160403200337, "learning_rate": 5.146024910875449e-06, "loss": 0.453, "step": 17316 }, { "epoch": 1.6250938438438438, "grad_norm": 0.8336773382835895, "learning_rate": 5.145479177743141e-06, "loss": 0.4112, "step": 17317 }, { "epoch": 1.6251876876876876, "grad_norm": 0.9829942635684389, "learning_rate": 5.144933442876268e-06, "loss": 0.3908, "step": 17318 }, { "epoch": 1.6252815315315314, "grad_norm": 0.9049608222180358, "learning_rate": 5.144387706281336e-06, "loss": 0.4205, "step": 17319 }, { "epoch": 1.6253753753753752, "grad_norm": 0.962812231544935, "learning_rate": 5.143841967964854e-06, "loss": 0.3871, "step": 17320 }, { "epoch": 1.6254692192192193, "grad_norm": 0.8676274150577246, "learning_rate": 5.143296227933326e-06, "loss": 0.3535, "step": 17321 }, { "epoch": 1.625563063063063, "grad_norm": 0.8690423285179224, "learning_rate": 5.14275048619326e-06, "loss": 0.3954, "step": 17322 }, { "epoch": 1.625656906906907, "grad_norm": 0.8299126029162882, "learning_rate": 5.142204742751163e-06, "loss": 0.361, "step": 17323 }, { "epoch": 1.6257507507507507, "grad_norm": 0.8641770710725593, "learning_rate": 5.141658997613544e-06, "loss": 0.3718, "step": 17324 }, { "epoch": 1.6258445945945947, "grad_norm": 1.0789701924496065, "learning_rate": 5.141113250786906e-06, "loss": 0.4173, "step": 17325 }, { "epoch": 1.6259384384384385, "grad_norm": 0.8442424263465805, "learning_rate": 5.140567502277758e-06, "loss": 0.3634, "step": 17326 }, { "epoch": 1.6260322822822824, "grad_norm": 0.9533051959909336, "learning_rate": 5.140021752092608e-06, "loss": 0.4265, "step": 17327 }, { "epoch": 1.6261261261261262, "grad_norm": 1.1367274451501634, "learning_rate": 5.139476000237963e-06, "loss": 0.4421, "step": 17328 }, { "epoch": 1.62621996996997, "grad_norm": 1.311097431784999, "learning_rate": 5.1389302467203285e-06, "loss": 0.3674, "step": 17329 }, { "epoch": 1.6263138138138138, "grad_norm": 0.9709853625994885, "learning_rate": 5.138384491546211e-06, "loss": 0.4351, "step": 17330 }, { "epoch": 1.6264076576576576, "grad_norm": 0.9531188083862737, "learning_rate": 5.1378387347221204e-06, "loss": 0.413, "step": 17331 }, { "epoch": 1.6265015015015014, "grad_norm": 0.9369775308370064, "learning_rate": 5.137292976254562e-06, "loss": 0.4299, "step": 17332 }, { "epoch": 1.6265953453453452, "grad_norm": 1.9842491296565428, "learning_rate": 5.136747216150043e-06, "loss": 0.3865, "step": 17333 }, { "epoch": 1.626689189189189, "grad_norm": 0.8255549902082313, "learning_rate": 5.13620145441507e-06, "loss": 0.3673, "step": 17334 }, { "epoch": 1.626783033033033, "grad_norm": 1.0120041020140889, "learning_rate": 5.135655691056155e-06, "loss": 0.3324, "step": 17335 }, { "epoch": 1.6268768768768769, "grad_norm": 0.9139507699660734, "learning_rate": 5.135109926079797e-06, "loss": 0.4039, "step": 17336 }, { "epoch": 1.6269707207207207, "grad_norm": 3.9642923686322176, "learning_rate": 5.134564159492509e-06, "loss": 0.4155, "step": 17337 }, { "epoch": 1.6270645645645647, "grad_norm": 0.9723679659510569, "learning_rate": 5.1340183913007964e-06, "loss": 0.4269, "step": 17338 }, { "epoch": 1.6271584084084085, "grad_norm": 2.0901073845719855, "learning_rate": 5.133472621511166e-06, "loss": 0.4069, "step": 17339 }, { "epoch": 1.6272522522522523, "grad_norm": 0.9409623693928678, "learning_rate": 5.1329268501301265e-06, "loss": 0.4434, "step": 17340 }, { "epoch": 1.6273460960960962, "grad_norm": 1.0703150231309113, "learning_rate": 5.1323810771641855e-06, "loss": 0.4209, "step": 17341 }, { "epoch": 1.62743993993994, "grad_norm": 1.7832449014913703, "learning_rate": 5.1318353026198495e-06, "loss": 0.4005, "step": 17342 }, { "epoch": 1.6275337837837838, "grad_norm": 0.9064627012061359, "learning_rate": 5.131289526503624e-06, "loss": 0.3852, "step": 17343 }, { "epoch": 1.6276276276276276, "grad_norm": 1.0209982665400392, "learning_rate": 5.130743748822019e-06, "loss": 0.4181, "step": 17344 }, { "epoch": 1.6277214714714714, "grad_norm": 0.9061870300212767, "learning_rate": 5.130197969581541e-06, "loss": 0.3809, "step": 17345 }, { "epoch": 1.6278153153153152, "grad_norm": 0.9471186652215461, "learning_rate": 5.129652188788696e-06, "loss": 0.3827, "step": 17346 }, { "epoch": 1.627909159159159, "grad_norm": 1.0665308128721143, "learning_rate": 5.129106406449994e-06, "loss": 0.3885, "step": 17347 }, { "epoch": 1.628003003003003, "grad_norm": 1.052571791636654, "learning_rate": 5.128560622571942e-06, "loss": 0.426, "step": 17348 }, { "epoch": 1.6280968468468469, "grad_norm": 0.9919150962739349, "learning_rate": 5.128014837161044e-06, "loss": 0.4038, "step": 17349 }, { "epoch": 1.6281906906906907, "grad_norm": 1.0402277701415996, "learning_rate": 5.127469050223812e-06, "loss": 0.3858, "step": 17350 }, { "epoch": 1.6282845345345347, "grad_norm": 0.8312824915487597, "learning_rate": 5.126923261766751e-06, "loss": 0.3692, "step": 17351 }, { "epoch": 1.6283783783783785, "grad_norm": 1.2893310928287722, "learning_rate": 5.1263774717963675e-06, "loss": 0.3791, "step": 17352 }, { "epoch": 1.6284722222222223, "grad_norm": 1.0543425429503936, "learning_rate": 5.125831680319173e-06, "loss": 0.3802, "step": 17353 }, { "epoch": 1.6285660660660661, "grad_norm": 1.1965856368152958, "learning_rate": 5.125285887341672e-06, "loss": 0.4191, "step": 17354 }, { "epoch": 1.62865990990991, "grad_norm": 2.04725737618693, "learning_rate": 5.124740092870372e-06, "loss": 0.3966, "step": 17355 }, { "epoch": 1.6287537537537538, "grad_norm": 0.9035767700980155, "learning_rate": 5.124194296911781e-06, "loss": 0.4003, "step": 17356 }, { "epoch": 1.6288475975975976, "grad_norm": 0.9629816216417907, "learning_rate": 5.123648499472407e-06, "loss": 0.4592, "step": 17357 }, { "epoch": 1.6289414414414414, "grad_norm": 0.9855722571684165, "learning_rate": 5.1231027005587565e-06, "loss": 0.4241, "step": 17358 }, { "epoch": 1.6290352852852852, "grad_norm": 0.9916497211826115, "learning_rate": 5.12255690017734e-06, "loss": 0.42, "step": 17359 }, { "epoch": 1.629129129129129, "grad_norm": 0.8851825927401921, "learning_rate": 5.122011098334662e-06, "loss": 0.4223, "step": 17360 }, { "epoch": 1.629222972972973, "grad_norm": 0.9128234576785298, "learning_rate": 5.121465295037231e-06, "loss": 0.4111, "step": 17361 }, { "epoch": 1.6293168168168168, "grad_norm": 1.1182295416861425, "learning_rate": 5.120919490291556e-06, "loss": 0.4121, "step": 17362 }, { "epoch": 1.6294106606606606, "grad_norm": 0.9383875325317157, "learning_rate": 5.120373684104143e-06, "loss": 0.4025, "step": 17363 }, { "epoch": 1.6295045045045045, "grad_norm": 1.2379647618596652, "learning_rate": 5.1198278764815005e-06, "loss": 0.4082, "step": 17364 }, { "epoch": 1.6295983483483485, "grad_norm": 3.1136738012266565, "learning_rate": 5.119282067430136e-06, "loss": 0.4102, "step": 17365 }, { "epoch": 1.6296921921921923, "grad_norm": 0.8879588201872531, "learning_rate": 5.118736256956558e-06, "loss": 0.3955, "step": 17366 }, { "epoch": 1.6297860360360361, "grad_norm": 1.1924406342888465, "learning_rate": 5.1181904450672726e-06, "loss": 0.4301, "step": 17367 }, { "epoch": 1.62987987987988, "grad_norm": 0.9604897051035339, "learning_rate": 5.11764463176879e-06, "loss": 0.4705, "step": 17368 }, { "epoch": 1.6299737237237237, "grad_norm": 1.3763125555756133, "learning_rate": 5.117098817067615e-06, "loss": 0.3766, "step": 17369 }, { "epoch": 1.6300675675675675, "grad_norm": 1.4391160550842714, "learning_rate": 5.116553000970258e-06, "loss": 0.43, "step": 17370 }, { "epoch": 1.6301614114114114, "grad_norm": 1.1319318323653293, "learning_rate": 5.116007183483226e-06, "loss": 0.4344, "step": 17371 }, { "epoch": 1.6302552552552552, "grad_norm": 0.8760820794067711, "learning_rate": 5.115461364613027e-06, "loss": 0.3868, "step": 17372 }, { "epoch": 1.630349099099099, "grad_norm": 0.8015233367301657, "learning_rate": 5.114915544366167e-06, "loss": 0.3415, "step": 17373 }, { "epoch": 1.6304429429429428, "grad_norm": 0.9296881323780386, "learning_rate": 5.114369722749157e-06, "loss": 0.4047, "step": 17374 }, { "epoch": 1.6305367867867868, "grad_norm": 0.9802248282671583, "learning_rate": 5.113823899768502e-06, "loss": 0.3476, "step": 17375 }, { "epoch": 1.6306306306306306, "grad_norm": 0.9407764137104857, "learning_rate": 5.113278075430711e-06, "loss": 0.3941, "step": 17376 }, { "epoch": 1.6307244744744744, "grad_norm": 1.2543336885420655, "learning_rate": 5.112732249742294e-06, "loss": 0.4012, "step": 17377 }, { "epoch": 1.6308183183183185, "grad_norm": 0.918250011360411, "learning_rate": 5.112186422709757e-06, "loss": 0.4024, "step": 17378 }, { "epoch": 1.6309121621621623, "grad_norm": 1.2453228833121457, "learning_rate": 5.111640594339606e-06, "loss": 0.3976, "step": 17379 }, { "epoch": 1.631006006006006, "grad_norm": 1.0690947520015404, "learning_rate": 5.111094764638354e-06, "loss": 0.432, "step": 17380 }, { "epoch": 1.63109984984985, "grad_norm": 0.9245274802687891, "learning_rate": 5.110548933612504e-06, "loss": 0.4106, "step": 17381 }, { "epoch": 1.6311936936936937, "grad_norm": 1.155990981424935, "learning_rate": 5.110003101268566e-06, "loss": 0.3962, "step": 17382 }, { "epoch": 1.6312875375375375, "grad_norm": 0.9175481047308148, "learning_rate": 5.109457267613049e-06, "loss": 0.4011, "step": 17383 }, { "epoch": 1.6313813813813813, "grad_norm": 1.0576611335785993, "learning_rate": 5.108911432652459e-06, "loss": 0.4098, "step": 17384 }, { "epoch": 1.6314752252252251, "grad_norm": 0.8955094106238749, "learning_rate": 5.1083655963933056e-06, "loss": 0.4177, "step": 17385 }, { "epoch": 1.631569069069069, "grad_norm": 0.8942353259523157, "learning_rate": 5.107819758842098e-06, "loss": 0.3495, "step": 17386 }, { "epoch": 1.6316629129129128, "grad_norm": 0.8482210408704622, "learning_rate": 5.10727392000534e-06, "loss": 0.4043, "step": 17387 }, { "epoch": 1.6317567567567568, "grad_norm": 0.9577415435063122, "learning_rate": 5.1067280798895435e-06, "loss": 0.448, "step": 17388 }, { "epoch": 1.6318506006006006, "grad_norm": 0.9307662894464788, "learning_rate": 5.106182238501216e-06, "loss": 0.3949, "step": 17389 }, { "epoch": 1.6319444444444444, "grad_norm": 0.8884426557143033, "learning_rate": 5.105636395846866e-06, "loss": 0.4131, "step": 17390 }, { "epoch": 1.6320382882882885, "grad_norm": 0.9596207251181184, "learning_rate": 5.105090551933e-06, "loss": 0.4408, "step": 17391 }, { "epoch": 1.6321321321321323, "grad_norm": 1.1926814296909003, "learning_rate": 5.104544706766127e-06, "loss": 0.3264, "step": 17392 }, { "epoch": 1.632225975975976, "grad_norm": 1.3552732226983915, "learning_rate": 5.103998860352755e-06, "loss": 0.3595, "step": 17393 }, { "epoch": 1.6323198198198199, "grad_norm": 1.0349337552819502, "learning_rate": 5.103453012699391e-06, "loss": 0.3919, "step": 17394 }, { "epoch": 1.6324136636636637, "grad_norm": 1.0694726344364673, "learning_rate": 5.102907163812546e-06, "loss": 0.4006, "step": 17395 }, { "epoch": 1.6325075075075075, "grad_norm": 1.5478070895211415, "learning_rate": 5.102361313698728e-06, "loss": 0.4432, "step": 17396 }, { "epoch": 1.6326013513513513, "grad_norm": 0.9445484282762092, "learning_rate": 5.101815462364442e-06, "loss": 0.4245, "step": 17397 }, { "epoch": 1.6326951951951951, "grad_norm": 1.2846263679512315, "learning_rate": 5.101269609816199e-06, "loss": 0.3425, "step": 17398 }, { "epoch": 1.632789039039039, "grad_norm": 0.8975474556479447, "learning_rate": 5.1007237560605064e-06, "loss": 0.4103, "step": 17399 }, { "epoch": 1.6328828828828827, "grad_norm": 0.9135073706993929, "learning_rate": 5.100177901103872e-06, "loss": 0.4157, "step": 17400 }, { "epoch": 1.6329767267267268, "grad_norm": 0.926756692613758, "learning_rate": 5.099632044952806e-06, "loss": 0.4057, "step": 17401 }, { "epoch": 1.6330705705705706, "grad_norm": 1.0185052154253016, "learning_rate": 5.099086187613814e-06, "loss": 0.396, "step": 17402 }, { "epoch": 1.6331644144144144, "grad_norm": 1.0283098646224968, "learning_rate": 5.098540329093408e-06, "loss": 0.4421, "step": 17403 }, { "epoch": 1.6332582582582582, "grad_norm": 0.9712731219786982, "learning_rate": 5.097994469398092e-06, "loss": 0.4059, "step": 17404 }, { "epoch": 1.6333521021021022, "grad_norm": 0.9993473040854958, "learning_rate": 5.097448608534377e-06, "loss": 0.4006, "step": 17405 }, { "epoch": 1.633445945945946, "grad_norm": 0.7765923727579728, "learning_rate": 5.096902746508769e-06, "loss": 0.3482, "step": 17406 }, { "epoch": 1.6335397897897899, "grad_norm": 0.9928535081032848, "learning_rate": 5.096356883327781e-06, "loss": 0.3706, "step": 17407 }, { "epoch": 1.6336336336336337, "grad_norm": 1.04619292852292, "learning_rate": 5.095811018997917e-06, "loss": 0.4129, "step": 17408 }, { "epoch": 1.6337274774774775, "grad_norm": 1.256132164487179, "learning_rate": 5.095265153525688e-06, "loss": 0.4064, "step": 17409 }, { "epoch": 1.6338213213213213, "grad_norm": 1.2843683516823965, "learning_rate": 5.094719286917601e-06, "loss": 0.3938, "step": 17410 }, { "epoch": 1.633915165165165, "grad_norm": 0.9143178797393712, "learning_rate": 5.094173419180164e-06, "loss": 0.3951, "step": 17411 }, { "epoch": 1.634009009009009, "grad_norm": 0.894111875423444, "learning_rate": 5.093627550319886e-06, "loss": 0.4225, "step": 17412 }, { "epoch": 1.6341028528528527, "grad_norm": 1.0257224216520155, "learning_rate": 5.093081680343277e-06, "loss": 0.4082, "step": 17413 }, { "epoch": 1.6341966966966965, "grad_norm": 0.8660922457948499, "learning_rate": 5.092535809256843e-06, "loss": 0.3839, "step": 17414 }, { "epoch": 1.6342905405405406, "grad_norm": 1.0936585956759926, "learning_rate": 5.091989937067096e-06, "loss": 0.4258, "step": 17415 }, { "epoch": 1.6343843843843844, "grad_norm": 1.043323957638442, "learning_rate": 5.09144406378054e-06, "loss": 0.4003, "step": 17416 }, { "epoch": 1.6344782282282282, "grad_norm": 1.1200006868777737, "learning_rate": 5.090898189403686e-06, "loss": 0.4233, "step": 17417 }, { "epoch": 1.6345720720720722, "grad_norm": 1.2589265316607774, "learning_rate": 5.090352313943041e-06, "loss": 0.4056, "step": 17418 }, { "epoch": 1.634665915915916, "grad_norm": 0.9610978037871815, "learning_rate": 5.089806437405117e-06, "loss": 0.4228, "step": 17419 }, { "epoch": 1.6347597597597598, "grad_norm": 0.9210206103872313, "learning_rate": 5.089260559796419e-06, "loss": 0.4315, "step": 17420 }, { "epoch": 1.6348536036036037, "grad_norm": 0.9483343482868307, "learning_rate": 5.088714681123458e-06, "loss": 0.3463, "step": 17421 }, { "epoch": 1.6349474474474475, "grad_norm": 0.9169110929441054, "learning_rate": 5.088168801392741e-06, "loss": 0.3863, "step": 17422 }, { "epoch": 1.6350412912912913, "grad_norm": 1.0291479595939372, "learning_rate": 5.0876229206107755e-06, "loss": 0.3911, "step": 17423 }, { "epoch": 1.635135135135135, "grad_norm": 1.1309305099621614, "learning_rate": 5.087077038784072e-06, "loss": 0.4151, "step": 17424 }, { "epoch": 1.635228978978979, "grad_norm": 1.0250758101547142, "learning_rate": 5.08653115591914e-06, "loss": 0.4001, "step": 17425 }, { "epoch": 1.6353228228228227, "grad_norm": 0.8868349172876717, "learning_rate": 5.085985272022485e-06, "loss": 0.3956, "step": 17426 }, { "epoch": 1.6354166666666665, "grad_norm": 0.9123459228067684, "learning_rate": 5.08543938710062e-06, "loss": 0.3869, "step": 17427 }, { "epoch": 1.6355105105105106, "grad_norm": 0.9805631802852287, "learning_rate": 5.0848935011600505e-06, "loss": 0.3975, "step": 17428 }, { "epoch": 1.6356043543543544, "grad_norm": 0.8869415735325066, "learning_rate": 5.084347614207284e-06, "loss": 0.3742, "step": 17429 }, { "epoch": 1.6356981981981982, "grad_norm": 0.8226870942998009, "learning_rate": 5.083801726248833e-06, "loss": 0.3623, "step": 17430 }, { "epoch": 1.6357920420420422, "grad_norm": 19.62019333624604, "learning_rate": 5.083255837291203e-06, "loss": 0.4019, "step": 17431 }, { "epoch": 1.635885885885886, "grad_norm": 1.016109862982083, "learning_rate": 5.082709947340904e-06, "loss": 0.4558, "step": 17432 }, { "epoch": 1.6359797297297298, "grad_norm": 1.0114070719710457, "learning_rate": 5.082164056404445e-06, "loss": 0.395, "step": 17433 }, { "epoch": 1.6360735735735736, "grad_norm": 0.8762223231647103, "learning_rate": 5.081618164488335e-06, "loss": 0.4298, "step": 17434 }, { "epoch": 1.6361674174174174, "grad_norm": 0.937636425650863, "learning_rate": 5.08107227159908e-06, "loss": 0.4311, "step": 17435 }, { "epoch": 1.6362612612612613, "grad_norm": 0.959002112126631, "learning_rate": 5.080526377743191e-06, "loss": 0.4094, "step": 17436 }, { "epoch": 1.636355105105105, "grad_norm": 0.959524124694276, "learning_rate": 5.079980482927177e-06, "loss": 0.4252, "step": 17437 }, { "epoch": 1.6364489489489489, "grad_norm": 0.9427485454256213, "learning_rate": 5.079434587157547e-06, "loss": 0.3947, "step": 17438 }, { "epoch": 1.6365427927927927, "grad_norm": 1.070239904282912, "learning_rate": 5.078888690440809e-06, "loss": 0.3995, "step": 17439 }, { "epoch": 1.6366366366366365, "grad_norm": 1.0192115790479443, "learning_rate": 5.078342792783472e-06, "loss": 0.3775, "step": 17440 }, { "epoch": 1.6367304804804805, "grad_norm": 0.8943208369737384, "learning_rate": 5.077796894192043e-06, "loss": 0.3693, "step": 17441 }, { "epoch": 1.6368243243243243, "grad_norm": 0.9379928447223082, "learning_rate": 5.077250994673033e-06, "loss": 0.4147, "step": 17442 }, { "epoch": 1.6369181681681682, "grad_norm": 0.903732103029043, "learning_rate": 5.076705094232952e-06, "loss": 0.3814, "step": 17443 }, { "epoch": 1.637012012012012, "grad_norm": 0.9640053040209068, "learning_rate": 5.0761591928783045e-06, "loss": 0.4203, "step": 17444 }, { "epoch": 1.637105855855856, "grad_norm": 1.3798134476369144, "learning_rate": 5.075613290615606e-06, "loss": 0.4051, "step": 17445 }, { "epoch": 1.6371996996996998, "grad_norm": 1.2967057401323463, "learning_rate": 5.075067387451358e-06, "loss": 0.3841, "step": 17446 }, { "epoch": 1.6372935435435436, "grad_norm": 0.8940961909806298, "learning_rate": 5.074521483392073e-06, "loss": 0.4032, "step": 17447 }, { "epoch": 1.6373873873873874, "grad_norm": 0.8743496516831527, "learning_rate": 5.0739755784442605e-06, "loss": 0.3886, "step": 17448 }, { "epoch": 1.6374812312312312, "grad_norm": 0.9766590771318295, "learning_rate": 5.073429672614428e-06, "loss": 0.3964, "step": 17449 }, { "epoch": 1.637575075075075, "grad_norm": 0.8985704525264514, "learning_rate": 5.0728837659090845e-06, "loss": 0.4221, "step": 17450 }, { "epoch": 1.6376689189189189, "grad_norm": 1.0241881608250647, "learning_rate": 5.07233785833474e-06, "loss": 0.393, "step": 17451 }, { "epoch": 1.6377627627627627, "grad_norm": 0.8666159313873593, "learning_rate": 5.071791949897903e-06, "loss": 0.3797, "step": 17452 }, { "epoch": 1.6378566066066065, "grad_norm": 0.982870535531468, "learning_rate": 5.071246040605081e-06, "loss": 0.3894, "step": 17453 }, { "epoch": 1.6379504504504503, "grad_norm": 1.0059275211613183, "learning_rate": 5.070700130462785e-06, "loss": 0.4165, "step": 17454 }, { "epoch": 1.6380442942942943, "grad_norm": 0.8960666155973039, "learning_rate": 5.0701542194775235e-06, "loss": 0.3825, "step": 17455 }, { "epoch": 1.6381381381381381, "grad_norm": 0.9539686644905095, "learning_rate": 5.069608307655804e-06, "loss": 0.4489, "step": 17456 }, { "epoch": 1.638231981981982, "grad_norm": 0.7976618767227268, "learning_rate": 5.069062395004135e-06, "loss": 0.3974, "step": 17457 }, { "epoch": 1.638325825825826, "grad_norm": 0.9660217340746773, "learning_rate": 5.06851648152903e-06, "loss": 0.3484, "step": 17458 }, { "epoch": 1.6384196696696698, "grad_norm": 1.3089248463803718, "learning_rate": 5.067970567236993e-06, "loss": 0.4197, "step": 17459 }, { "epoch": 1.6385135135135136, "grad_norm": 1.0410236688375782, "learning_rate": 5.067424652134535e-06, "loss": 0.4077, "step": 17460 }, { "epoch": 1.6386073573573574, "grad_norm": 0.9867124332252526, "learning_rate": 5.066878736228166e-06, "loss": 0.4179, "step": 17461 }, { "epoch": 1.6387012012012012, "grad_norm": 0.9616254425098755, "learning_rate": 5.066332819524392e-06, "loss": 0.3794, "step": 17462 }, { "epoch": 1.638795045045045, "grad_norm": 0.9395671710738488, "learning_rate": 5.065786902029726e-06, "loss": 0.4356, "step": 17463 }, { "epoch": 1.6388888888888888, "grad_norm": 1.6086314730171636, "learning_rate": 5.065240983750675e-06, "loss": 0.3881, "step": 17464 }, { "epoch": 1.6389827327327327, "grad_norm": 0.8892239232635994, "learning_rate": 5.064695064693748e-06, "loss": 0.3893, "step": 17465 }, { "epoch": 1.6390765765765765, "grad_norm": 0.9143172832668621, "learning_rate": 5.0641491448654524e-06, "loss": 0.4032, "step": 17466 }, { "epoch": 1.6391704204204203, "grad_norm": 0.9405956831674305, "learning_rate": 5.063603224272301e-06, "loss": 0.4055, "step": 17467 }, { "epoch": 1.6392642642642643, "grad_norm": 0.9900963125207819, "learning_rate": 5.063057302920799e-06, "loss": 0.3857, "step": 17468 }, { "epoch": 1.6393581081081081, "grad_norm": 1.183249095721435, "learning_rate": 5.0625113808174575e-06, "loss": 0.4197, "step": 17469 }, { "epoch": 1.639451951951952, "grad_norm": 0.8868748541785288, "learning_rate": 5.0619654579687875e-06, "loss": 0.3838, "step": 17470 }, { "epoch": 1.639545795795796, "grad_norm": 1.0382304495677024, "learning_rate": 5.0614195343812956e-06, "loss": 0.4246, "step": 17471 }, { "epoch": 1.6396396396396398, "grad_norm": 0.9521912558933111, "learning_rate": 5.060873610061489e-06, "loss": 0.4336, "step": 17472 }, { "epoch": 1.6397334834834836, "grad_norm": 0.9795670388401524, "learning_rate": 5.060327685015882e-06, "loss": 0.3751, "step": 17473 }, { "epoch": 1.6398273273273274, "grad_norm": 0.8706701062564685, "learning_rate": 5.059781759250979e-06, "loss": 0.3699, "step": 17474 }, { "epoch": 1.6399211711711712, "grad_norm": 1.17668535579657, "learning_rate": 5.059235832773291e-06, "loss": 0.3982, "step": 17475 }, { "epoch": 1.640015015015015, "grad_norm": 0.9690704855070004, "learning_rate": 5.058689905589329e-06, "loss": 0.3862, "step": 17476 }, { "epoch": 1.6401088588588588, "grad_norm": 1.0244701151238764, "learning_rate": 5.058143977705599e-06, "loss": 0.3997, "step": 17477 }, { "epoch": 1.6402027027027026, "grad_norm": 0.9741601401703711, "learning_rate": 5.0575980491286115e-06, "loss": 0.3705, "step": 17478 }, { "epoch": 1.6402965465465464, "grad_norm": 4.378775138060244, "learning_rate": 5.057052119864877e-06, "loss": 0.385, "step": 17479 }, { "epoch": 1.6403903903903903, "grad_norm": 1.5551687241892718, "learning_rate": 5.056506189920901e-06, "loss": 0.4119, "step": 17480 }, { "epoch": 1.6404842342342343, "grad_norm": 1.0343360993741546, "learning_rate": 5.055960259303197e-06, "loss": 0.4508, "step": 17481 }, { "epoch": 1.640578078078078, "grad_norm": 1.0828863811557505, "learning_rate": 5.055414328018272e-06, "loss": 0.4137, "step": 17482 }, { "epoch": 1.640671921921922, "grad_norm": 0.9556135789222667, "learning_rate": 5.054868396072635e-06, "loss": 0.3738, "step": 17483 }, { "epoch": 1.6407657657657657, "grad_norm": 0.9097979706685407, "learning_rate": 5.054322463472796e-06, "loss": 0.3593, "step": 17484 }, { "epoch": 1.6408596096096097, "grad_norm": 1.1208233642260705, "learning_rate": 5.053776530225264e-06, "loss": 0.423, "step": 17485 }, { "epoch": 1.6409534534534536, "grad_norm": 0.7758421405527399, "learning_rate": 5.053230596336547e-06, "loss": 0.3666, "step": 17486 }, { "epoch": 1.6410472972972974, "grad_norm": 0.9228113115316859, "learning_rate": 5.052684661813155e-06, "loss": 0.4211, "step": 17487 }, { "epoch": 1.6411411411411412, "grad_norm": 0.8635088993074167, "learning_rate": 5.052138726661599e-06, "loss": 0.3653, "step": 17488 }, { "epoch": 1.641234984984985, "grad_norm": 0.9904160430617347, "learning_rate": 5.051592790888388e-06, "loss": 0.3608, "step": 17489 }, { "epoch": 1.6413288288288288, "grad_norm": 0.9215718021184467, "learning_rate": 5.051046854500028e-06, "loss": 0.4325, "step": 17490 }, { "epoch": 1.6414226726726726, "grad_norm": 0.8761981839341385, "learning_rate": 5.050500917503031e-06, "loss": 0.3435, "step": 17491 }, { "epoch": 1.6415165165165164, "grad_norm": 0.8863045251100723, "learning_rate": 5.049954979903905e-06, "loss": 0.4177, "step": 17492 }, { "epoch": 1.6416103603603602, "grad_norm": 1.3639005104744326, "learning_rate": 5.04940904170916e-06, "loss": 0.3736, "step": 17493 }, { "epoch": 1.641704204204204, "grad_norm": 0.912481407281751, "learning_rate": 5.048863102925306e-06, "loss": 0.3696, "step": 17494 }, { "epoch": 1.641798048048048, "grad_norm": 2.9749516606920565, "learning_rate": 5.048317163558851e-06, "loss": 0.3722, "step": 17495 }, { "epoch": 1.6418918918918919, "grad_norm": 0.9851177805408677, "learning_rate": 5.047771223616304e-06, "loss": 0.4243, "step": 17496 }, { "epoch": 1.6419857357357357, "grad_norm": 0.9256738241254793, "learning_rate": 5.047225283104177e-06, "loss": 0.4309, "step": 17497 }, { "epoch": 1.6420795795795797, "grad_norm": 1.5956990528905244, "learning_rate": 5.046679342028976e-06, "loss": 0.3705, "step": 17498 }, { "epoch": 1.6421734234234235, "grad_norm": 0.9642986503313545, "learning_rate": 5.046133400397212e-06, "loss": 0.4329, "step": 17499 }, { "epoch": 1.6422672672672673, "grad_norm": 0.9650437538370618, "learning_rate": 5.045587458215394e-06, "loss": 0.4279, "step": 17500 }, { "epoch": 1.6423611111111112, "grad_norm": 1.0190691062244994, "learning_rate": 5.045041515490032e-06, "loss": 0.3821, "step": 17501 }, { "epoch": 1.642454954954955, "grad_norm": 0.9631234522064601, "learning_rate": 5.0444955722276335e-06, "loss": 0.3983, "step": 17502 }, { "epoch": 1.6425487987987988, "grad_norm": 0.9911633992678532, "learning_rate": 5.04394962843471e-06, "loss": 0.4376, "step": 17503 }, { "epoch": 1.6426426426426426, "grad_norm": 0.8575642327688395, "learning_rate": 5.043403684117769e-06, "loss": 0.3612, "step": 17504 }, { "epoch": 1.6427364864864864, "grad_norm": 0.9130827236192013, "learning_rate": 5.042857739283321e-06, "loss": 0.4127, "step": 17505 }, { "epoch": 1.6428303303303302, "grad_norm": 1.0512238149399458, "learning_rate": 5.042311793937876e-06, "loss": 0.37, "step": 17506 }, { "epoch": 1.642924174174174, "grad_norm": 0.9719492020170297, "learning_rate": 5.041765848087943e-06, "loss": 0.3846, "step": 17507 }, { "epoch": 1.643018018018018, "grad_norm": 1.0198629017539511, "learning_rate": 5.041219901740028e-06, "loss": 0.4426, "step": 17508 }, { "epoch": 1.6431118618618619, "grad_norm": 1.0560494384862114, "learning_rate": 5.040673954900647e-06, "loss": 0.3953, "step": 17509 }, { "epoch": 1.6432057057057057, "grad_norm": 0.8517976277849685, "learning_rate": 5.0401280075763025e-06, "loss": 0.371, "step": 17510 }, { "epoch": 1.6432995495495497, "grad_norm": 0.9235442523923383, "learning_rate": 5.039582059773508e-06, "loss": 0.3819, "step": 17511 }, { "epoch": 1.6433933933933935, "grad_norm": 0.9458267309115727, "learning_rate": 5.0390361114987726e-06, "loss": 0.4191, "step": 17512 }, { "epoch": 1.6434872372372373, "grad_norm": 0.8668653034067397, "learning_rate": 5.038490162758606e-06, "loss": 0.4051, "step": 17513 }, { "epoch": 1.6435810810810811, "grad_norm": 1.0779033175396953, "learning_rate": 5.037944213559515e-06, "loss": 0.3644, "step": 17514 }, { "epoch": 1.643674924924925, "grad_norm": 0.9356640293894376, "learning_rate": 5.037398263908011e-06, "loss": 0.4092, "step": 17515 }, { "epoch": 1.6437687687687688, "grad_norm": 0.9299486838745962, "learning_rate": 5.036852313810603e-06, "loss": 0.372, "step": 17516 }, { "epoch": 1.6438626126126126, "grad_norm": 1.063067972881236, "learning_rate": 5.0363063632738e-06, "loss": 0.3623, "step": 17517 }, { "epoch": 1.6439564564564564, "grad_norm": 0.9048466548512671, "learning_rate": 5.035760412304113e-06, "loss": 0.3793, "step": 17518 }, { "epoch": 1.6440503003003002, "grad_norm": 1.0116992789840493, "learning_rate": 5.03521446090805e-06, "loss": 0.3691, "step": 17519 }, { "epoch": 1.644144144144144, "grad_norm": 1.020519715940373, "learning_rate": 5.034668509092121e-06, "loss": 0.4366, "step": 17520 }, { "epoch": 1.644237987987988, "grad_norm": 0.9844084134290244, "learning_rate": 5.034122556862835e-06, "loss": 0.4121, "step": 17521 }, { "epoch": 1.6443318318318318, "grad_norm": 1.0458390375657112, "learning_rate": 5.033576604226702e-06, "loss": 0.4598, "step": 17522 }, { "epoch": 1.6444256756756757, "grad_norm": 1.141512127135705, "learning_rate": 5.03303065119023e-06, "loss": 0.356, "step": 17523 }, { "epoch": 1.6445195195195195, "grad_norm": 1.024209561395034, "learning_rate": 5.032484697759932e-06, "loss": 0.4179, "step": 17524 }, { "epoch": 1.6446133633633635, "grad_norm": 1.0054359951342215, "learning_rate": 5.031938743942313e-06, "loss": 0.4064, "step": 17525 }, { "epoch": 1.6447072072072073, "grad_norm": 1.2314922350116704, "learning_rate": 5.031392789743886e-06, "loss": 0.3546, "step": 17526 }, { "epoch": 1.6448010510510511, "grad_norm": 1.0599729641255546, "learning_rate": 5.030846835171159e-06, "loss": 0.39, "step": 17527 }, { "epoch": 1.644894894894895, "grad_norm": 0.8680831264618087, "learning_rate": 5.030300880230641e-06, "loss": 0.3724, "step": 17528 }, { "epoch": 1.6449887387387387, "grad_norm": 1.051609320244495, "learning_rate": 5.029754924928841e-06, "loss": 0.3768, "step": 17529 }, { "epoch": 1.6450825825825826, "grad_norm": 0.9022622235292873, "learning_rate": 5.029208969272271e-06, "loss": 0.3543, "step": 17530 }, { "epoch": 1.6451764264264264, "grad_norm": 0.9464354491815465, "learning_rate": 5.028663013267438e-06, "loss": 0.3678, "step": 17531 }, { "epoch": 1.6452702702702702, "grad_norm": 0.8911077896676093, "learning_rate": 5.028117056920854e-06, "loss": 0.432, "step": 17532 }, { "epoch": 1.645364114114114, "grad_norm": 0.9154328690095488, "learning_rate": 5.027571100239027e-06, "loss": 0.4023, "step": 17533 }, { "epoch": 1.6454579579579578, "grad_norm": 0.8748666470955441, "learning_rate": 5.027025143228466e-06, "loss": 0.4295, "step": 17534 }, { "epoch": 1.6455518018018018, "grad_norm": 4.604158456827937, "learning_rate": 5.02647918589568e-06, "loss": 0.4264, "step": 17535 }, { "epoch": 1.6456456456456456, "grad_norm": 1.0460501312489796, "learning_rate": 5.025933228247181e-06, "loss": 0.3565, "step": 17536 }, { "epoch": 1.6457394894894894, "grad_norm": 0.9334725495974937, "learning_rate": 5.025387270289474e-06, "loss": 0.3878, "step": 17537 }, { "epoch": 1.6458333333333335, "grad_norm": 0.9683108681417315, "learning_rate": 5.024841312029075e-06, "loss": 0.3922, "step": 17538 }, { "epoch": 1.6459271771771773, "grad_norm": 2.5121806097395805, "learning_rate": 5.02429535347249e-06, "loss": 0.4314, "step": 17539 }, { "epoch": 1.646021021021021, "grad_norm": 1.1073661251013793, "learning_rate": 5.023749394626228e-06, "loss": 0.3687, "step": 17540 }, { "epoch": 1.646114864864865, "grad_norm": 0.8408278733938749, "learning_rate": 5.023203435496797e-06, "loss": 0.3907, "step": 17541 }, { "epoch": 1.6462087087087087, "grad_norm": 1.310014043781619, "learning_rate": 5.022657476090712e-06, "loss": 0.3814, "step": 17542 }, { "epoch": 1.6463025525525525, "grad_norm": 0.9381406657020005, "learning_rate": 5.022111516414477e-06, "loss": 0.4023, "step": 17543 }, { "epoch": 1.6463963963963963, "grad_norm": 0.9244065022097384, "learning_rate": 5.021565556474606e-06, "loss": 0.4386, "step": 17544 }, { "epoch": 1.6464902402402402, "grad_norm": 1.1182920748902812, "learning_rate": 5.021019596277605e-06, "loss": 0.367, "step": 17545 }, { "epoch": 1.646584084084084, "grad_norm": 0.8891951414807673, "learning_rate": 5.0204736358299845e-06, "loss": 0.4023, "step": 17546 }, { "epoch": 1.6466779279279278, "grad_norm": 0.8441200486468954, "learning_rate": 5.019927675138254e-06, "loss": 0.3562, "step": 17547 }, { "epoch": 1.6467717717717718, "grad_norm": 1.1511625439486661, "learning_rate": 5.019381714208925e-06, "loss": 0.4402, "step": 17548 }, { "epoch": 1.6468656156156156, "grad_norm": 0.8892146413776928, "learning_rate": 5.018835753048504e-06, "loss": 0.3926, "step": 17549 }, { "epoch": 1.6469594594594594, "grad_norm": 1.0825129791819361, "learning_rate": 5.0182897916635036e-06, "loss": 0.3633, "step": 17550 }, { "epoch": 1.6470533033033035, "grad_norm": 0.9196191622268304, "learning_rate": 5.017743830060432e-06, "loss": 0.3932, "step": 17551 }, { "epoch": 1.6471471471471473, "grad_norm": 1.8245015307363848, "learning_rate": 5.0171978682457966e-06, "loss": 0.3942, "step": 17552 }, { "epoch": 1.647240990990991, "grad_norm": 0.9693719077579279, "learning_rate": 5.01665190622611e-06, "loss": 0.3927, "step": 17553 }, { "epoch": 1.647334834834835, "grad_norm": 1.1862447111666186, "learning_rate": 5.016105944007882e-06, "loss": 0.3766, "step": 17554 }, { "epoch": 1.6474286786786787, "grad_norm": 0.9532059122725695, "learning_rate": 5.0155599815976185e-06, "loss": 0.3901, "step": 17555 }, { "epoch": 1.6475225225225225, "grad_norm": 0.927813223681583, "learning_rate": 5.015014019001833e-06, "loss": 0.396, "step": 17556 }, { "epoch": 1.6476163663663663, "grad_norm": 0.9642763728699707, "learning_rate": 5.014468056227035e-06, "loss": 0.3994, "step": 17557 }, { "epoch": 1.6477102102102101, "grad_norm": 0.952106606088972, "learning_rate": 5.013922093279731e-06, "loss": 0.4036, "step": 17558 }, { "epoch": 1.647804054054054, "grad_norm": 0.8540573432087635, "learning_rate": 5.013376130166431e-06, "loss": 0.3659, "step": 17559 }, { "epoch": 1.6478978978978978, "grad_norm": 0.9005588794174672, "learning_rate": 5.012830166893648e-06, "loss": 0.3411, "step": 17560 }, { "epoch": 1.6479917417417418, "grad_norm": 0.8547995748418693, "learning_rate": 5.012284203467887e-06, "loss": 0.3677, "step": 17561 }, { "epoch": 1.6480855855855856, "grad_norm": 0.9383702795687273, "learning_rate": 5.011738239895663e-06, "loss": 0.4015, "step": 17562 }, { "epoch": 1.6481794294294294, "grad_norm": 0.9712221132026286, "learning_rate": 5.011192276183482e-06, "loss": 0.4165, "step": 17563 }, { "epoch": 1.6482732732732732, "grad_norm": 0.968870520706014, "learning_rate": 5.010646312337852e-06, "loss": 0.3941, "step": 17564 }, { "epoch": 1.6483671171171173, "grad_norm": 1.1134795773074504, "learning_rate": 5.010100348365287e-06, "loss": 0.4146, "step": 17565 }, { "epoch": 1.648460960960961, "grad_norm": 0.961198107647968, "learning_rate": 5.009554384272294e-06, "loss": 0.4462, "step": 17566 }, { "epoch": 1.6485548048048049, "grad_norm": 1.0465655605265127, "learning_rate": 5.009008420065381e-06, "loss": 0.4078, "step": 17567 }, { "epoch": 1.6486486486486487, "grad_norm": 0.9183422181846707, "learning_rate": 5.008462455751062e-06, "loss": 0.3682, "step": 17568 }, { "epoch": 1.6487424924924925, "grad_norm": 0.9911071555868673, "learning_rate": 5.007916491335843e-06, "loss": 0.3825, "step": 17569 }, { "epoch": 1.6488363363363363, "grad_norm": 0.9079143851487315, "learning_rate": 5.0073705268262355e-06, "loss": 0.4107, "step": 17570 }, { "epoch": 1.6489301801801801, "grad_norm": 1.3490999801829426, "learning_rate": 5.006824562228747e-06, "loss": 0.4264, "step": 17571 }, { "epoch": 1.649024024024024, "grad_norm": 3.2848979621540093, "learning_rate": 5.0062785975498896e-06, "loss": 0.3946, "step": 17572 }, { "epoch": 1.6491178678678677, "grad_norm": 0.9670775378564015, "learning_rate": 5.005732632796171e-06, "loss": 0.4249, "step": 17573 }, { "epoch": 1.6492117117117115, "grad_norm": 1.091135622748547, "learning_rate": 5.005186667974102e-06, "loss": 0.4149, "step": 17574 }, { "epoch": 1.6493055555555556, "grad_norm": 0.8960147501112475, "learning_rate": 5.004640703090192e-06, "loss": 0.4303, "step": 17575 }, { "epoch": 1.6493993993993994, "grad_norm": 1.0162202126463702, "learning_rate": 5.00409473815095e-06, "loss": 0.3782, "step": 17576 }, { "epoch": 1.6494932432432432, "grad_norm": 0.9762842860569851, "learning_rate": 5.0035487731628855e-06, "loss": 0.4361, "step": 17577 }, { "epoch": 1.6495870870870872, "grad_norm": 0.8823851938702467, "learning_rate": 5.003002808132509e-06, "loss": 0.3661, "step": 17578 }, { "epoch": 1.649680930930931, "grad_norm": 0.8592243192127833, "learning_rate": 5.002456843066329e-06, "loss": 0.357, "step": 17579 }, { "epoch": 1.6497747747747749, "grad_norm": 0.9117241246957027, "learning_rate": 5.001910877970857e-06, "loss": 0.3967, "step": 17580 }, { "epoch": 1.6498686186186187, "grad_norm": 0.9570104173459213, "learning_rate": 5.001364912852601e-06, "loss": 0.3729, "step": 17581 }, { "epoch": 1.6499624624624625, "grad_norm": 1.0960534860716906, "learning_rate": 5.00081894771807e-06, "loss": 0.4021, "step": 17582 }, { "epoch": 1.6500563063063063, "grad_norm": 0.9167412698056017, "learning_rate": 5.000272982573775e-06, "loss": 0.3974, "step": 17583 }, { "epoch": 1.65015015015015, "grad_norm": 0.8365342012234924, "learning_rate": 4.999727017426226e-06, "loss": 0.3944, "step": 17584 }, { "epoch": 1.650243993993994, "grad_norm": 1.1074234239496747, "learning_rate": 4.9991810522819305e-06, "loss": 0.4298, "step": 17585 }, { "epoch": 1.6503378378378377, "grad_norm": 0.8803393728434096, "learning_rate": 4.9986350871474005e-06, "loss": 0.3689, "step": 17586 }, { "epoch": 1.6504316816816815, "grad_norm": 2.3820522029835023, "learning_rate": 4.9980891220291445e-06, "loss": 0.3881, "step": 17587 }, { "epoch": 1.6505255255255256, "grad_norm": 1.0359838347110635, "learning_rate": 4.997543156933671e-06, "loss": 0.4309, "step": 17588 }, { "epoch": 1.6506193693693694, "grad_norm": 1.140710474559545, "learning_rate": 4.9969971918674924e-06, "loss": 0.432, "step": 17589 }, { "epoch": 1.6507132132132132, "grad_norm": 0.8423084974571795, "learning_rate": 4.996451226837117e-06, "loss": 0.3752, "step": 17590 }, { "epoch": 1.6508070570570572, "grad_norm": 0.7684854040821476, "learning_rate": 4.995905261849051e-06, "loss": 0.3715, "step": 17591 }, { "epoch": 1.650900900900901, "grad_norm": 0.9780229888898958, "learning_rate": 4.995359296909809e-06, "loss": 0.4034, "step": 17592 }, { "epoch": 1.6509947447447448, "grad_norm": 1.1564095630897107, "learning_rate": 4.994813332025899e-06, "loss": 0.438, "step": 17593 }, { "epoch": 1.6510885885885886, "grad_norm": 1.0712404255654806, "learning_rate": 4.99426736720383e-06, "loss": 0.4034, "step": 17594 }, { "epoch": 1.6511824324324325, "grad_norm": 0.8877255324576789, "learning_rate": 4.993721402450112e-06, "loss": 0.3562, "step": 17595 }, { "epoch": 1.6512762762762763, "grad_norm": 0.9379542735647114, "learning_rate": 4.993175437771255e-06, "loss": 0.4705, "step": 17596 }, { "epoch": 1.65137012012012, "grad_norm": 1.0880328526150074, "learning_rate": 4.992629473173765e-06, "loss": 0.3875, "step": 17597 }, { "epoch": 1.6514639639639639, "grad_norm": 0.8539928174687839, "learning_rate": 4.992083508664158e-06, "loss": 0.422, "step": 17598 }, { "epoch": 1.6515578078078077, "grad_norm": 1.10927348972507, "learning_rate": 4.99153754424894e-06, "loss": 0.4546, "step": 17599 }, { "epoch": 1.6516516516516515, "grad_norm": 0.9803012939450323, "learning_rate": 4.990991579934619e-06, "loss": 0.435, "step": 17600 }, { "epoch": 1.6517454954954955, "grad_norm": 0.8150206888230539, "learning_rate": 4.9904456157277085e-06, "loss": 0.33, "step": 17601 }, { "epoch": 1.6518393393393394, "grad_norm": 2.239173498699043, "learning_rate": 4.989899651634716e-06, "loss": 0.4101, "step": 17602 }, { "epoch": 1.6519331831831832, "grad_norm": 0.9575219385777128, "learning_rate": 4.989353687662147e-06, "loss": 0.3866, "step": 17603 }, { "epoch": 1.652027027027027, "grad_norm": 0.894040050958741, "learning_rate": 4.988807723816519e-06, "loss": 0.4005, "step": 17604 }, { "epoch": 1.652120870870871, "grad_norm": 0.8552455577951126, "learning_rate": 4.988261760104338e-06, "loss": 0.3602, "step": 17605 }, { "epoch": 1.6522147147147148, "grad_norm": 0.8835526480592846, "learning_rate": 4.987715796532112e-06, "loss": 0.4236, "step": 17606 }, { "epoch": 1.6523085585585586, "grad_norm": 0.9439022274614882, "learning_rate": 4.987169833106353e-06, "loss": 0.4075, "step": 17607 }, { "epoch": 1.6524024024024024, "grad_norm": 1.089822541291573, "learning_rate": 4.98662386983357e-06, "loss": 0.3821, "step": 17608 }, { "epoch": 1.6524962462462462, "grad_norm": 0.8711775730565104, "learning_rate": 4.98607790672027e-06, "loss": 0.382, "step": 17609 }, { "epoch": 1.65259009009009, "grad_norm": 0.9876553141988391, "learning_rate": 4.985531943772966e-06, "loss": 0.4031, "step": 17610 }, { "epoch": 1.6526839339339339, "grad_norm": 0.98426247739378, "learning_rate": 4.9849859809981674e-06, "loss": 0.3521, "step": 17611 }, { "epoch": 1.6527777777777777, "grad_norm": 1.0302218765203743, "learning_rate": 4.9844400184023815e-06, "loss": 0.352, "step": 17612 }, { "epoch": 1.6528716216216215, "grad_norm": 1.0028065040937921, "learning_rate": 4.98389405599212e-06, "loss": 0.4183, "step": 17613 }, { "epoch": 1.6529654654654653, "grad_norm": 0.9072200859313252, "learning_rate": 4.983348093773892e-06, "loss": 0.3928, "step": 17614 }, { "epoch": 1.6530593093093093, "grad_norm": 0.9948115048741302, "learning_rate": 4.982802131754203e-06, "loss": 0.433, "step": 17615 }, { "epoch": 1.6531531531531531, "grad_norm": 0.9754904731635383, "learning_rate": 4.98225616993957e-06, "loss": 0.3907, "step": 17616 }, { "epoch": 1.653246996996997, "grad_norm": 1.331687113826263, "learning_rate": 4.981710208336498e-06, "loss": 0.402, "step": 17617 }, { "epoch": 1.653340840840841, "grad_norm": 0.9719406945800274, "learning_rate": 4.981164246951497e-06, "loss": 0.4196, "step": 17618 }, { "epoch": 1.6534346846846848, "grad_norm": 1.071038785774621, "learning_rate": 4.980618285791077e-06, "loss": 0.4016, "step": 17619 }, { "epoch": 1.6535285285285286, "grad_norm": 0.9509326558395166, "learning_rate": 4.9800723248617476e-06, "loss": 0.392, "step": 17620 }, { "epoch": 1.6536223723723724, "grad_norm": 0.9449334363450631, "learning_rate": 4.979526364170016e-06, "loss": 0.4226, "step": 17621 }, { "epoch": 1.6537162162162162, "grad_norm": 1.1306764749851037, "learning_rate": 4.9789804037223966e-06, "loss": 0.4048, "step": 17622 }, { "epoch": 1.65381006006006, "grad_norm": 1.0047666666884465, "learning_rate": 4.978434443525396e-06, "loss": 0.4151, "step": 17623 }, { "epoch": 1.6539039039039038, "grad_norm": 1.0428200663512273, "learning_rate": 4.977888483585523e-06, "loss": 0.3716, "step": 17624 }, { "epoch": 1.6539977477477477, "grad_norm": 1.2379745764147743, "learning_rate": 4.977342523909289e-06, "loss": 0.4192, "step": 17625 }, { "epoch": 1.6540915915915915, "grad_norm": 1.3150164869731595, "learning_rate": 4.976796564503204e-06, "loss": 0.3754, "step": 17626 }, { "epoch": 1.6541854354354353, "grad_norm": 0.8625988262687964, "learning_rate": 4.976250605373773e-06, "loss": 0.4303, "step": 17627 }, { "epoch": 1.6542792792792793, "grad_norm": 1.0505317611548073, "learning_rate": 4.975704646527511e-06, "loss": 0.4363, "step": 17628 }, { "epoch": 1.6543731231231231, "grad_norm": 0.9177187856636024, "learning_rate": 4.975158687970926e-06, "loss": 0.3948, "step": 17629 }, { "epoch": 1.654466966966967, "grad_norm": 1.1191995923539144, "learning_rate": 4.974612729710525e-06, "loss": 0.4231, "step": 17630 }, { "epoch": 1.654560810810811, "grad_norm": 0.9544853187108027, "learning_rate": 4.974066771752821e-06, "loss": 0.4026, "step": 17631 }, { "epoch": 1.6546546546546548, "grad_norm": 0.9879146274019912, "learning_rate": 4.9735208141043225e-06, "loss": 0.3992, "step": 17632 }, { "epoch": 1.6547484984984986, "grad_norm": 1.0772342431773818, "learning_rate": 4.972974856771535e-06, "loss": 0.4461, "step": 17633 }, { "epoch": 1.6548423423423424, "grad_norm": 1.074885813140309, "learning_rate": 4.972428899760974e-06, "loss": 0.4105, "step": 17634 }, { "epoch": 1.6549361861861862, "grad_norm": 0.9112280720607978, "learning_rate": 4.971882943079147e-06, "loss": 0.3854, "step": 17635 }, { "epoch": 1.65503003003003, "grad_norm": 0.9325612655737611, "learning_rate": 4.971336986732562e-06, "loss": 0.3685, "step": 17636 }, { "epoch": 1.6551238738738738, "grad_norm": 0.834574435936275, "learning_rate": 4.97079103072773e-06, "loss": 0.3583, "step": 17637 }, { "epoch": 1.6552177177177176, "grad_norm": 1.1705882426871568, "learning_rate": 4.9702450750711605e-06, "loss": 0.4009, "step": 17638 }, { "epoch": 1.6553115615615615, "grad_norm": 0.8612284692541471, "learning_rate": 4.96969911976936e-06, "loss": 0.4055, "step": 17639 }, { "epoch": 1.6554054054054053, "grad_norm": 0.8134338946713925, "learning_rate": 4.969153164828843e-06, "loss": 0.3934, "step": 17640 }, { "epoch": 1.6554992492492493, "grad_norm": 1.29668580998467, "learning_rate": 4.9686072102561155e-06, "loss": 0.409, "step": 17641 }, { "epoch": 1.655593093093093, "grad_norm": 1.0997239436773927, "learning_rate": 4.968061256057687e-06, "loss": 0.424, "step": 17642 }, { "epoch": 1.655686936936937, "grad_norm": 0.9066044375025016, "learning_rate": 4.96751530224007e-06, "loss": 0.3941, "step": 17643 }, { "epoch": 1.6557807807807807, "grad_norm": 0.9089788577940898, "learning_rate": 4.9669693488097705e-06, "loss": 0.4314, "step": 17644 }, { "epoch": 1.6558746246246248, "grad_norm": 0.8228530235366279, "learning_rate": 4.9664233957732986e-06, "loss": 0.3551, "step": 17645 }, { "epoch": 1.6559684684684686, "grad_norm": 1.1430866270676032, "learning_rate": 4.965877443137166e-06, "loss": 0.4282, "step": 17646 }, { "epoch": 1.6560623123123124, "grad_norm": 1.1390258369064248, "learning_rate": 4.965331490907881e-06, "loss": 0.4369, "step": 17647 }, { "epoch": 1.6561561561561562, "grad_norm": 1.0358410260570716, "learning_rate": 4.964785539091951e-06, "loss": 0.4214, "step": 17648 }, { "epoch": 1.65625, "grad_norm": 1.0051864647306399, "learning_rate": 4.964239587695889e-06, "loss": 0.4068, "step": 17649 }, { "epoch": 1.6563438438438438, "grad_norm": 0.9615216806912312, "learning_rate": 4.963693636726202e-06, "loss": 0.3921, "step": 17650 }, { "epoch": 1.6564376876876876, "grad_norm": 0.8264273876256252, "learning_rate": 4.963147686189398e-06, "loss": 0.4087, "step": 17651 }, { "epoch": 1.6565315315315314, "grad_norm": 1.0234695130460012, "learning_rate": 4.96260173609199e-06, "loss": 0.3975, "step": 17652 }, { "epoch": 1.6566253753753752, "grad_norm": 1.1820944746005766, "learning_rate": 4.9620557864404874e-06, "loss": 0.3679, "step": 17653 }, { "epoch": 1.6567192192192193, "grad_norm": 0.9089900729107124, "learning_rate": 4.961509837241396e-06, "loss": 0.4301, "step": 17654 }, { "epoch": 1.656813063063063, "grad_norm": 0.9752474203530539, "learning_rate": 4.960963888501228e-06, "loss": 0.4044, "step": 17655 }, { "epoch": 1.656906906906907, "grad_norm": 1.1109369766948365, "learning_rate": 4.9604179402264935e-06, "loss": 0.4184, "step": 17656 }, { "epoch": 1.6570007507507507, "grad_norm": 1.7335567082097827, "learning_rate": 4.9598719924236975e-06, "loss": 0.3926, "step": 17657 }, { "epoch": 1.6570945945945947, "grad_norm": 0.8411685402569564, "learning_rate": 4.959326045099355e-06, "loss": 0.3765, "step": 17658 }, { "epoch": 1.6571884384384385, "grad_norm": 0.8246862758963116, "learning_rate": 4.9587800982599725e-06, "loss": 0.392, "step": 17659 }, { "epoch": 1.6572822822822824, "grad_norm": 1.071413655755008, "learning_rate": 4.958234151912059e-06, "loss": 0.4154, "step": 17660 }, { "epoch": 1.6573761261261262, "grad_norm": 1.0166910173099477, "learning_rate": 4.957688206062125e-06, "loss": 0.3895, "step": 17661 }, { "epoch": 1.65746996996997, "grad_norm": 1.007090443822402, "learning_rate": 4.9571422607166804e-06, "loss": 0.4036, "step": 17662 }, { "epoch": 1.6575638138138138, "grad_norm": 1.0037613403042467, "learning_rate": 4.956596315882231e-06, "loss": 0.4307, "step": 17663 }, { "epoch": 1.6576576576576576, "grad_norm": 0.8761835401979493, "learning_rate": 4.9560503715652915e-06, "loss": 0.4254, "step": 17664 }, { "epoch": 1.6577515015015014, "grad_norm": 0.9041574780374803, "learning_rate": 4.955504427772368e-06, "loss": 0.3625, "step": 17665 }, { "epoch": 1.6578453453453452, "grad_norm": 1.0433526161742321, "learning_rate": 4.9549584845099695e-06, "loss": 0.397, "step": 17666 }, { "epoch": 1.657939189189189, "grad_norm": 1.1200754753726496, "learning_rate": 4.9544125417846075e-06, "loss": 0.4103, "step": 17667 }, { "epoch": 1.658033033033033, "grad_norm": 1.0545575003002754, "learning_rate": 4.95386659960279e-06, "loss": 0.4137, "step": 17668 }, { "epoch": 1.6581268768768769, "grad_norm": 0.9838005198538019, "learning_rate": 4.9533206579710245e-06, "loss": 0.3928, "step": 17669 }, { "epoch": 1.6582207207207207, "grad_norm": 1.1408587538713968, "learning_rate": 4.952774716895824e-06, "loss": 0.3903, "step": 17670 }, { "epoch": 1.6583145645645647, "grad_norm": 0.919631826913399, "learning_rate": 4.952228776383697e-06, "loss": 0.4132, "step": 17671 }, { "epoch": 1.6584084084084085, "grad_norm": 0.987977236599977, "learning_rate": 4.95168283644115e-06, "loss": 0.4244, "step": 17672 }, { "epoch": 1.6585022522522523, "grad_norm": 1.416487589455369, "learning_rate": 4.951136897074695e-06, "loss": 0.4401, "step": 17673 }, { "epoch": 1.6585960960960962, "grad_norm": 1.125712794868697, "learning_rate": 4.9505909582908416e-06, "loss": 0.4112, "step": 17674 }, { "epoch": 1.65868993993994, "grad_norm": 1.0747233624631327, "learning_rate": 4.950045020096095e-06, "loss": 0.3994, "step": 17675 }, { "epoch": 1.6587837837837838, "grad_norm": 0.9975425460121655, "learning_rate": 4.94949908249697e-06, "loss": 0.394, "step": 17676 }, { "epoch": 1.6588776276276276, "grad_norm": 0.8749190954295347, "learning_rate": 4.948953145499974e-06, "loss": 0.4111, "step": 17677 }, { "epoch": 1.6589714714714714, "grad_norm": 0.8889415363046957, "learning_rate": 4.948407209111614e-06, "loss": 0.4015, "step": 17678 }, { "epoch": 1.6590653153153152, "grad_norm": 0.9204196946724891, "learning_rate": 4.947861273338402e-06, "loss": 0.4484, "step": 17679 }, { "epoch": 1.659159159159159, "grad_norm": 0.884883028875497, "learning_rate": 4.947315338186846e-06, "loss": 0.3781, "step": 17680 }, { "epoch": 1.659253003003003, "grad_norm": 1.0803867996688963, "learning_rate": 4.946769403663454e-06, "loss": 0.3883, "step": 17681 }, { "epoch": 1.6593468468468469, "grad_norm": 0.9549749284023694, "learning_rate": 4.946223469774738e-06, "loss": 0.4001, "step": 17682 }, { "epoch": 1.6594406906906907, "grad_norm": 0.9850253008639456, "learning_rate": 4.945677536527206e-06, "loss": 0.4373, "step": 17683 }, { "epoch": 1.6595345345345347, "grad_norm": 0.8402805298260811, "learning_rate": 4.945131603927366e-06, "loss": 0.3985, "step": 17684 }, { "epoch": 1.6596283783783785, "grad_norm": 0.9578960169527789, "learning_rate": 4.94458567198173e-06, "loss": 0.3471, "step": 17685 }, { "epoch": 1.6597222222222223, "grad_norm": 0.9850192045537686, "learning_rate": 4.944039740696805e-06, "loss": 0.4292, "step": 17686 }, { "epoch": 1.6598160660660661, "grad_norm": 0.8777855691921981, "learning_rate": 4.943493810079099e-06, "loss": 0.3707, "step": 17687 }, { "epoch": 1.65990990990991, "grad_norm": 0.9473327723703312, "learning_rate": 4.942947880135124e-06, "loss": 0.435, "step": 17688 }, { "epoch": 1.6600037537537538, "grad_norm": 0.891033614773096, "learning_rate": 4.942401950871389e-06, "loss": 0.4414, "step": 17689 }, { "epoch": 1.6600975975975976, "grad_norm": 1.0350111916262257, "learning_rate": 4.9418560222944015e-06, "loss": 0.3364, "step": 17690 }, { "epoch": 1.6601914414414414, "grad_norm": 0.8661934571030957, "learning_rate": 4.941310094410673e-06, "loss": 0.3695, "step": 17691 }, { "epoch": 1.6602852852852852, "grad_norm": 0.9059386582842174, "learning_rate": 4.94076416722671e-06, "loss": 0.4301, "step": 17692 }, { "epoch": 1.660379129129129, "grad_norm": 1.0146617734156573, "learning_rate": 4.940218240749021e-06, "loss": 0.4384, "step": 17693 }, { "epoch": 1.660472972972973, "grad_norm": 0.9716414263101318, "learning_rate": 4.93967231498412e-06, "loss": 0.4222, "step": 17694 }, { "epoch": 1.6605668168168168, "grad_norm": 0.8305339141713205, "learning_rate": 4.939126389938512e-06, "loss": 0.3754, "step": 17695 }, { "epoch": 1.6606606606606606, "grad_norm": 1.0606694733888522, "learning_rate": 4.938580465618706e-06, "loss": 0.4186, "step": 17696 }, { "epoch": 1.6607545045045045, "grad_norm": 1.9656672436334426, "learning_rate": 4.938034542031214e-06, "loss": 0.4156, "step": 17697 }, { "epoch": 1.6608483483483485, "grad_norm": 0.9240945508304141, "learning_rate": 4.937488619182544e-06, "loss": 0.4208, "step": 17698 }, { "epoch": 1.6609421921921923, "grad_norm": 0.9319201290910809, "learning_rate": 4.936942697079201e-06, "loss": 0.4271, "step": 17699 }, { "epoch": 1.6610360360360361, "grad_norm": 0.8490253955208912, "learning_rate": 4.936396775727701e-06, "loss": 0.3472, "step": 17700 }, { "epoch": 1.66112987987988, "grad_norm": 0.9810945495670869, "learning_rate": 4.935850855134549e-06, "loss": 0.4059, "step": 17701 }, { "epoch": 1.6612237237237237, "grad_norm": 0.9540388929490409, "learning_rate": 4.935304935306253e-06, "loss": 0.3858, "step": 17702 }, { "epoch": 1.6613175675675675, "grad_norm": 1.0286359312332847, "learning_rate": 4.934759016249327e-06, "loss": 0.418, "step": 17703 }, { "epoch": 1.6614114114114114, "grad_norm": 0.9963568117298659, "learning_rate": 4.934213097970276e-06, "loss": 0.393, "step": 17704 }, { "epoch": 1.6615052552552552, "grad_norm": 0.891114030792471, "learning_rate": 4.933667180475607e-06, "loss": 0.4036, "step": 17705 }, { "epoch": 1.661599099099099, "grad_norm": 0.9370987445073808, "learning_rate": 4.933121263771836e-06, "loss": 0.4242, "step": 17706 }, { "epoch": 1.6616929429429428, "grad_norm": 1.3320824695935929, "learning_rate": 4.9325753478654665e-06, "loss": 0.3719, "step": 17707 }, { "epoch": 1.6617867867867868, "grad_norm": 0.8517933267999648, "learning_rate": 4.932029432763007e-06, "loss": 0.3867, "step": 17708 }, { "epoch": 1.6618806306306306, "grad_norm": 1.3557319979143967, "learning_rate": 4.931483518470971e-06, "loss": 0.3731, "step": 17709 }, { "epoch": 1.6619744744744744, "grad_norm": 1.0668349588550698, "learning_rate": 4.930937604995866e-06, "loss": 0.4146, "step": 17710 }, { "epoch": 1.6620683183183185, "grad_norm": 1.2731033584885834, "learning_rate": 4.930391692344197e-06, "loss": 0.3733, "step": 17711 }, { "epoch": 1.6621621621621623, "grad_norm": 1.0693582546909033, "learning_rate": 4.929845780522479e-06, "loss": 0.3841, "step": 17712 }, { "epoch": 1.662256006006006, "grad_norm": 0.8306664889000821, "learning_rate": 4.929299869537217e-06, "loss": 0.3788, "step": 17713 }, { "epoch": 1.66234984984985, "grad_norm": 0.951426365045598, "learning_rate": 4.9287539593949195e-06, "loss": 0.3614, "step": 17714 }, { "epoch": 1.6624436936936937, "grad_norm": 1.0301074371647938, "learning_rate": 4.928208050102098e-06, "loss": 0.3782, "step": 17715 }, { "epoch": 1.6625375375375375, "grad_norm": 0.9947844682683106, "learning_rate": 4.9276621416652606e-06, "loss": 0.3914, "step": 17716 }, { "epoch": 1.6626313813813813, "grad_norm": 0.8884517026229259, "learning_rate": 4.9271162340909155e-06, "loss": 0.3947, "step": 17717 }, { "epoch": 1.6627252252252251, "grad_norm": 0.9114598482875622, "learning_rate": 4.926570327385574e-06, "loss": 0.3724, "step": 17718 }, { "epoch": 1.662819069069069, "grad_norm": 0.9674882829969933, "learning_rate": 4.926024421555742e-06, "loss": 0.3833, "step": 17719 }, { "epoch": 1.6629129129129128, "grad_norm": 0.9684756606373953, "learning_rate": 4.9254785166079275e-06, "loss": 0.4065, "step": 17720 }, { "epoch": 1.6630067567567568, "grad_norm": 1.13288735073305, "learning_rate": 4.924932612548643e-06, "loss": 0.4025, "step": 17721 }, { "epoch": 1.6631006006006006, "grad_norm": 0.804322919041212, "learning_rate": 4.924386709384397e-06, "loss": 0.3613, "step": 17722 }, { "epoch": 1.6631944444444444, "grad_norm": 1.0197758585144774, "learning_rate": 4.923840807121695e-06, "loss": 0.4167, "step": 17723 }, { "epoch": 1.6632882882882885, "grad_norm": 0.8967219108666385, "learning_rate": 4.92329490576705e-06, "loss": 0.3871, "step": 17724 }, { "epoch": 1.6633821321321323, "grad_norm": 0.9366629989844922, "learning_rate": 4.922749005326968e-06, "loss": 0.3955, "step": 17725 }, { "epoch": 1.663475975975976, "grad_norm": 0.8845902775581025, "learning_rate": 4.922203105807957e-06, "loss": 0.3931, "step": 17726 }, { "epoch": 1.6635698198198199, "grad_norm": 0.8463361457120633, "learning_rate": 4.92165720721653e-06, "loss": 0.4494, "step": 17727 }, { "epoch": 1.6636636636636637, "grad_norm": 1.0578564203707457, "learning_rate": 4.921111309559193e-06, "loss": 0.382, "step": 17728 }, { "epoch": 1.6637575075075075, "grad_norm": 1.1885908164237158, "learning_rate": 4.920565412842453e-06, "loss": 0.3618, "step": 17729 }, { "epoch": 1.6638513513513513, "grad_norm": 1.116174309366622, "learning_rate": 4.9200195170728235e-06, "loss": 0.3777, "step": 17730 }, { "epoch": 1.6639451951951951, "grad_norm": 0.900544903868774, "learning_rate": 4.9194736222568105e-06, "loss": 0.3811, "step": 17731 }, { "epoch": 1.664039039039039, "grad_norm": 3.9862330612279746, "learning_rate": 4.91892772840092e-06, "loss": 0.4023, "step": 17732 }, { "epoch": 1.6641328828828827, "grad_norm": 0.8889049913192487, "learning_rate": 4.918381835511668e-06, "loss": 0.396, "step": 17733 }, { "epoch": 1.6642267267267268, "grad_norm": 1.0246802771262158, "learning_rate": 4.917835943595557e-06, "loss": 0.3759, "step": 17734 }, { "epoch": 1.6643205705705706, "grad_norm": 1.1403106153482558, "learning_rate": 4.917290052659097e-06, "loss": 0.4357, "step": 17735 }, { "epoch": 1.6644144144144144, "grad_norm": 1.270883156573153, "learning_rate": 4.916744162708799e-06, "loss": 0.4117, "step": 17736 }, { "epoch": 1.6645082582582582, "grad_norm": 0.9315454887903505, "learning_rate": 4.91619827375117e-06, "loss": 0.3912, "step": 17737 }, { "epoch": 1.6646021021021022, "grad_norm": 1.12410882012302, "learning_rate": 4.915652385792716e-06, "loss": 0.3988, "step": 17738 }, { "epoch": 1.664695945945946, "grad_norm": 0.8615331046134607, "learning_rate": 4.915106498839951e-06, "loss": 0.3861, "step": 17739 }, { "epoch": 1.6647897897897899, "grad_norm": 0.9049649170593794, "learning_rate": 4.9145606128993815e-06, "loss": 0.3973, "step": 17740 }, { "epoch": 1.6648836336336337, "grad_norm": 0.905657751311468, "learning_rate": 4.914014727977515e-06, "loss": 0.3982, "step": 17741 }, { "epoch": 1.6649774774774775, "grad_norm": 1.2704224874422094, "learning_rate": 4.913468844080862e-06, "loss": 0.3959, "step": 17742 }, { "epoch": 1.6650713213213213, "grad_norm": 0.9194069622229541, "learning_rate": 4.912922961215929e-06, "loss": 0.4055, "step": 17743 }, { "epoch": 1.665165165165165, "grad_norm": 1.9694611673272795, "learning_rate": 4.9123770793892245e-06, "loss": 0.4285, "step": 17744 }, { "epoch": 1.665259009009009, "grad_norm": 1.3994040063799542, "learning_rate": 4.911831198607261e-06, "loss": 0.3845, "step": 17745 }, { "epoch": 1.6653528528528527, "grad_norm": 0.9631759821376518, "learning_rate": 4.911285318876544e-06, "loss": 0.3969, "step": 17746 }, { "epoch": 1.6654466966966965, "grad_norm": 1.091668166720223, "learning_rate": 4.910739440203581e-06, "loss": 0.4069, "step": 17747 }, { "epoch": 1.6655405405405406, "grad_norm": 2.5351483075657835, "learning_rate": 4.910193562594884e-06, "loss": 0.3358, "step": 17748 }, { "epoch": 1.6656343843843844, "grad_norm": 2.1671254319279396, "learning_rate": 4.9096476860569595e-06, "loss": 0.4215, "step": 17749 }, { "epoch": 1.6657282282282282, "grad_norm": 0.8906584594262358, "learning_rate": 4.909101810596314e-06, "loss": 0.3802, "step": 17750 }, { "epoch": 1.6658220720720722, "grad_norm": 0.9374890358711363, "learning_rate": 4.908555936219461e-06, "loss": 0.4102, "step": 17751 }, { "epoch": 1.665915915915916, "grad_norm": 0.9812052365650811, "learning_rate": 4.908010062932906e-06, "loss": 0.3956, "step": 17752 }, { "epoch": 1.6660097597597598, "grad_norm": 0.9134784426118449, "learning_rate": 4.907464190743157e-06, "loss": 0.3613, "step": 17753 }, { "epoch": 1.6661036036036037, "grad_norm": 0.8874373712627804, "learning_rate": 4.906918319656724e-06, "loss": 0.3636, "step": 17754 }, { "epoch": 1.6661974474474475, "grad_norm": 0.9620188437648907, "learning_rate": 4.906372449680115e-06, "loss": 0.3745, "step": 17755 }, { "epoch": 1.6662912912912913, "grad_norm": 0.9832252421798993, "learning_rate": 4.9058265808198355e-06, "loss": 0.4024, "step": 17756 }, { "epoch": 1.666385135135135, "grad_norm": 1.043468899630084, "learning_rate": 4.9052807130823995e-06, "loss": 0.3376, "step": 17757 }, { "epoch": 1.666478978978979, "grad_norm": 1.0455352092424977, "learning_rate": 4.904734846474313e-06, "loss": 0.4303, "step": 17758 }, { "epoch": 1.6665728228228227, "grad_norm": 0.9060789693966858, "learning_rate": 4.904188981002083e-06, "loss": 0.4019, "step": 17759 }, { "epoch": 1.6666666666666665, "grad_norm": 0.986896280266204, "learning_rate": 4.903643116672221e-06, "loss": 0.414, "step": 17760 }, { "epoch": 1.6667605105105106, "grad_norm": 0.9237632695783206, "learning_rate": 4.9030972534912315e-06, "loss": 0.3235, "step": 17761 }, { "epoch": 1.6668543543543544, "grad_norm": 0.8868559187606876, "learning_rate": 4.902551391465624e-06, "loss": 0.3812, "step": 17762 }, { "epoch": 1.6669481981981982, "grad_norm": 1.0291542166140881, "learning_rate": 4.9020055306019094e-06, "loss": 0.3316, "step": 17763 }, { "epoch": 1.6670420420420422, "grad_norm": 1.0039701537899386, "learning_rate": 4.901459670906595e-06, "loss": 0.3928, "step": 17764 }, { "epoch": 1.667135885885886, "grad_norm": 0.9801096146439618, "learning_rate": 4.900913812386186e-06, "loss": 0.4039, "step": 17765 }, { "epoch": 1.6672297297297298, "grad_norm": 0.9130114208647004, "learning_rate": 4.9003679550471954e-06, "loss": 0.3909, "step": 17766 }, { "epoch": 1.6673235735735736, "grad_norm": 1.070267239580486, "learning_rate": 4.899822098896129e-06, "loss": 0.4288, "step": 17767 }, { "epoch": 1.6674174174174174, "grad_norm": 0.8981881154704183, "learning_rate": 4.899276243939494e-06, "loss": 0.3624, "step": 17768 }, { "epoch": 1.6675112612612613, "grad_norm": 0.9935207753813605, "learning_rate": 4.898730390183802e-06, "loss": 0.3821, "step": 17769 }, { "epoch": 1.667605105105105, "grad_norm": 1.0096569209835684, "learning_rate": 4.898184537635559e-06, "loss": 0.3962, "step": 17770 }, { "epoch": 1.6676989489489489, "grad_norm": 1.1576636451494973, "learning_rate": 4.897638686301274e-06, "loss": 0.3418, "step": 17771 }, { "epoch": 1.6677927927927927, "grad_norm": 0.9287968034566876, "learning_rate": 4.897092836187455e-06, "loss": 0.4272, "step": 17772 }, { "epoch": 1.6678866366366365, "grad_norm": 1.5179745846269304, "learning_rate": 4.89654698730061e-06, "loss": 0.4042, "step": 17773 }, { "epoch": 1.6679804804804805, "grad_norm": 1.2588260696794817, "learning_rate": 4.896001139647246e-06, "loss": 0.4229, "step": 17774 }, { "epoch": 1.6680743243243243, "grad_norm": 0.9384469627137312, "learning_rate": 4.895455293233875e-06, "loss": 0.3997, "step": 17775 }, { "epoch": 1.6681681681681682, "grad_norm": 0.916874400491516, "learning_rate": 4.894909448067002e-06, "loss": 0.3781, "step": 17776 }, { "epoch": 1.668262012012012, "grad_norm": 0.963342692693826, "learning_rate": 4.894363604153135e-06, "loss": 0.3669, "step": 17777 }, { "epoch": 1.668355855855856, "grad_norm": 1.4468568147813452, "learning_rate": 4.893817761498785e-06, "loss": 0.4108, "step": 17778 }, { "epoch": 1.6684496996996998, "grad_norm": 0.8809371750284486, "learning_rate": 4.893271920110458e-06, "loss": 0.4002, "step": 17779 }, { "epoch": 1.6685435435435436, "grad_norm": 1.0358618257080945, "learning_rate": 4.89272607999466e-06, "loss": 0.3505, "step": 17780 }, { "epoch": 1.6686373873873874, "grad_norm": 1.0158492352081505, "learning_rate": 4.892180241157904e-06, "loss": 0.3836, "step": 17781 }, { "epoch": 1.6687312312312312, "grad_norm": 0.9032728470406879, "learning_rate": 4.891634403606695e-06, "loss": 0.356, "step": 17782 }, { "epoch": 1.668825075075075, "grad_norm": 0.8523044212144762, "learning_rate": 4.891088567347542e-06, "loss": 0.3587, "step": 17783 }, { "epoch": 1.6689189189189189, "grad_norm": 0.8950573811749629, "learning_rate": 4.890542732386953e-06, "loss": 0.4277, "step": 17784 }, { "epoch": 1.6690127627627627, "grad_norm": 1.3144425137189963, "learning_rate": 4.889996898731436e-06, "loss": 0.4072, "step": 17785 }, { "epoch": 1.6691066066066065, "grad_norm": 1.0291590026831356, "learning_rate": 4.889451066387497e-06, "loss": 0.4278, "step": 17786 }, { "epoch": 1.6692004504504503, "grad_norm": 0.8995526835710119, "learning_rate": 4.888905235361648e-06, "loss": 0.3989, "step": 17787 }, { "epoch": 1.6692942942942943, "grad_norm": 1.8559709904578197, "learning_rate": 4.888359405660395e-06, "loss": 0.39, "step": 17788 }, { "epoch": 1.6693881381381381, "grad_norm": 0.9208204080318385, "learning_rate": 4.887813577290245e-06, "loss": 0.3981, "step": 17789 }, { "epoch": 1.669481981981982, "grad_norm": 0.805660166078386, "learning_rate": 4.887267750257707e-06, "loss": 0.392, "step": 17790 }, { "epoch": 1.669575825825826, "grad_norm": 1.1870167767546729, "learning_rate": 4.88672192456929e-06, "loss": 0.4711, "step": 17791 }, { "epoch": 1.6696696696696698, "grad_norm": 1.0765448825061823, "learning_rate": 4.886176100231498e-06, "loss": 0.4252, "step": 17792 }, { "epoch": 1.6697635135135136, "grad_norm": 1.0040380296869693, "learning_rate": 4.8856302772508445e-06, "loss": 0.428, "step": 17793 }, { "epoch": 1.6698573573573574, "grad_norm": 0.9060567923892494, "learning_rate": 4.885084455633835e-06, "loss": 0.4072, "step": 17794 }, { "epoch": 1.6699512012012012, "grad_norm": 0.942332280754199, "learning_rate": 4.884538635386975e-06, "loss": 0.3789, "step": 17795 }, { "epoch": 1.670045045045045, "grad_norm": 1.034825413559278, "learning_rate": 4.883992816516775e-06, "loss": 0.3996, "step": 17796 }, { "epoch": 1.6701388888888888, "grad_norm": 0.990361007125685, "learning_rate": 4.883446999029744e-06, "loss": 0.3937, "step": 17797 }, { "epoch": 1.6702327327327327, "grad_norm": 1.1002961321403524, "learning_rate": 4.882901182932386e-06, "loss": 0.3738, "step": 17798 }, { "epoch": 1.6703265765765765, "grad_norm": 0.9509774694791743, "learning_rate": 4.882355368231211e-06, "loss": 0.3397, "step": 17799 }, { "epoch": 1.6704204204204203, "grad_norm": 0.9024607883538865, "learning_rate": 4.881809554932729e-06, "loss": 0.3823, "step": 17800 }, { "epoch": 1.6705142642642643, "grad_norm": 0.923950521883944, "learning_rate": 4.881263743043443e-06, "loss": 0.4003, "step": 17801 }, { "epoch": 1.6706081081081081, "grad_norm": 1.1190307316416332, "learning_rate": 4.880717932569865e-06, "loss": 0.4449, "step": 17802 }, { "epoch": 1.670701951951952, "grad_norm": 0.9042686078037315, "learning_rate": 4.880172123518501e-06, "loss": 0.4019, "step": 17803 }, { "epoch": 1.670795795795796, "grad_norm": 0.8899219078642795, "learning_rate": 4.879626315895857e-06, "loss": 0.3995, "step": 17804 }, { "epoch": 1.6708896396396398, "grad_norm": 0.9563205265216836, "learning_rate": 4.879080509708445e-06, "loss": 0.3946, "step": 17805 }, { "epoch": 1.6709834834834836, "grad_norm": 1.1106652408775002, "learning_rate": 4.87853470496277e-06, "loss": 0.3927, "step": 17806 }, { "epoch": 1.6710773273273274, "grad_norm": 1.1485781254041536, "learning_rate": 4.8779889016653385e-06, "loss": 0.3859, "step": 17807 }, { "epoch": 1.6711711711711712, "grad_norm": 0.8909651717976256, "learning_rate": 4.877443099822662e-06, "loss": 0.4206, "step": 17808 }, { "epoch": 1.671265015015015, "grad_norm": 1.0293854942337153, "learning_rate": 4.876897299441244e-06, "loss": 0.4144, "step": 17809 }, { "epoch": 1.6713588588588588, "grad_norm": 1.1204090127029658, "learning_rate": 4.876351500527593e-06, "loss": 0.3895, "step": 17810 }, { "epoch": 1.6714527027027026, "grad_norm": 1.0425321639177565, "learning_rate": 4.87580570308822e-06, "loss": 0.4166, "step": 17811 }, { "epoch": 1.6715465465465464, "grad_norm": 1.0708145425052773, "learning_rate": 4.87525990712963e-06, "loss": 0.4049, "step": 17812 }, { "epoch": 1.6716403903903903, "grad_norm": 0.8607381904998092, "learning_rate": 4.87471411265833e-06, "loss": 0.4019, "step": 17813 }, { "epoch": 1.6717342342342343, "grad_norm": 0.9317543027678568, "learning_rate": 4.874168319680829e-06, "loss": 0.3735, "step": 17814 }, { "epoch": 1.671828078078078, "grad_norm": 1.0015176932420342, "learning_rate": 4.873622528203633e-06, "loss": 0.3875, "step": 17815 }, { "epoch": 1.671921921921922, "grad_norm": 1.270502866514107, "learning_rate": 4.87307673823325e-06, "loss": 0.4194, "step": 17816 }, { "epoch": 1.6720157657657657, "grad_norm": 0.8756414614322345, "learning_rate": 4.8725309497761895e-06, "loss": 0.3893, "step": 17817 }, { "epoch": 1.6721096096096097, "grad_norm": 0.8477334816902471, "learning_rate": 4.871985162838957e-06, "loss": 0.391, "step": 17818 }, { "epoch": 1.6722034534534536, "grad_norm": 1.1050835792495877, "learning_rate": 4.87143937742806e-06, "loss": 0.4234, "step": 17819 }, { "epoch": 1.6722972972972974, "grad_norm": 0.9968768668926784, "learning_rate": 4.8708935935500075e-06, "loss": 0.4147, "step": 17820 }, { "epoch": 1.6723911411411412, "grad_norm": 1.0368225681819625, "learning_rate": 4.870347811211306e-06, "loss": 0.4294, "step": 17821 }, { "epoch": 1.672484984984985, "grad_norm": 1.3503301260910159, "learning_rate": 4.869802030418459e-06, "loss": 0.4056, "step": 17822 }, { "epoch": 1.6725788288288288, "grad_norm": 0.8594375627357493, "learning_rate": 4.869256251177982e-06, "loss": 0.3636, "step": 17823 }, { "epoch": 1.6726726726726726, "grad_norm": 1.0375011866291368, "learning_rate": 4.868710473496377e-06, "loss": 0.412, "step": 17824 }, { "epoch": 1.6727665165165164, "grad_norm": 0.9286485604411312, "learning_rate": 4.868164697380151e-06, "loss": 0.4181, "step": 17825 }, { "epoch": 1.6728603603603602, "grad_norm": 0.9181724086511178, "learning_rate": 4.867618922835815e-06, "loss": 0.377, "step": 17826 }, { "epoch": 1.672954204204204, "grad_norm": 0.9498298130046201, "learning_rate": 4.867073149869875e-06, "loss": 0.3567, "step": 17827 }, { "epoch": 1.673048048048048, "grad_norm": 1.0107622999353874, "learning_rate": 4.866527378488834e-06, "loss": 0.4119, "step": 17828 }, { "epoch": 1.6731418918918919, "grad_norm": 1.5268843241627217, "learning_rate": 4.865981608699204e-06, "loss": 0.4314, "step": 17829 }, { "epoch": 1.6732357357357357, "grad_norm": 0.9803599739423344, "learning_rate": 4.865435840507493e-06, "loss": 0.395, "step": 17830 }, { "epoch": 1.6733295795795797, "grad_norm": 1.0624131116179232, "learning_rate": 4.8648900739202036e-06, "loss": 0.4069, "step": 17831 }, { "epoch": 1.6734234234234235, "grad_norm": 0.9742436412935507, "learning_rate": 4.864344308943847e-06, "loss": 0.3968, "step": 17832 }, { "epoch": 1.6735172672672673, "grad_norm": 0.9713622245907659, "learning_rate": 4.8637985455849305e-06, "loss": 0.3734, "step": 17833 }, { "epoch": 1.6736111111111112, "grad_norm": 0.9438361495671733, "learning_rate": 4.863252783849957e-06, "loss": 0.4192, "step": 17834 }, { "epoch": 1.673704954954955, "grad_norm": 0.9327319064980467, "learning_rate": 4.86270702374544e-06, "loss": 0.4447, "step": 17835 }, { "epoch": 1.6737987987987988, "grad_norm": 0.9671216371879509, "learning_rate": 4.862161265277881e-06, "loss": 0.4279, "step": 17836 }, { "epoch": 1.6738926426426426, "grad_norm": 0.9198115056024354, "learning_rate": 4.861615508453789e-06, "loss": 0.3996, "step": 17837 }, { "epoch": 1.6739864864864864, "grad_norm": 1.1912616775245088, "learning_rate": 4.861069753279673e-06, "loss": 0.4323, "step": 17838 }, { "epoch": 1.6740803303303302, "grad_norm": 0.9968369917146769, "learning_rate": 4.860523999762039e-06, "loss": 0.3927, "step": 17839 }, { "epoch": 1.674174174174174, "grad_norm": 1.587591265747084, "learning_rate": 4.859978247907393e-06, "loss": 0.4282, "step": 17840 }, { "epoch": 1.674268018018018, "grad_norm": 1.0851052097144074, "learning_rate": 4.859432497722243e-06, "loss": 0.3677, "step": 17841 }, { "epoch": 1.6743618618618619, "grad_norm": 0.966543391795383, "learning_rate": 4.858886749213097e-06, "loss": 0.3557, "step": 17842 }, { "epoch": 1.6744557057057057, "grad_norm": 0.8184431974488537, "learning_rate": 4.858341002386457e-06, "loss": 0.338, "step": 17843 }, { "epoch": 1.6745495495495497, "grad_norm": 0.8735176010580691, "learning_rate": 4.8577952572488375e-06, "loss": 0.4142, "step": 17844 }, { "epoch": 1.6746433933933935, "grad_norm": 0.8790514103537204, "learning_rate": 4.8572495138067415e-06, "loss": 0.3976, "step": 17845 }, { "epoch": 1.6747372372372373, "grad_norm": 0.8681671067899183, "learning_rate": 4.856703772066675e-06, "loss": 0.3661, "step": 17846 }, { "epoch": 1.6748310810810811, "grad_norm": 0.9901263970646611, "learning_rate": 4.856158032035148e-06, "loss": 0.4108, "step": 17847 }, { "epoch": 1.674924924924925, "grad_norm": 0.8844748544767673, "learning_rate": 4.855612293718665e-06, "loss": 0.4057, "step": 17848 }, { "epoch": 1.6750187687687688, "grad_norm": 0.9852862435386609, "learning_rate": 4.855066557123732e-06, "loss": 0.397, "step": 17849 }, { "epoch": 1.6751126126126126, "grad_norm": 1.0923090520851193, "learning_rate": 4.8545208222568596e-06, "loss": 0.4368, "step": 17850 }, { "epoch": 1.6752064564564564, "grad_norm": 0.8477080762251482, "learning_rate": 4.853975089124553e-06, "loss": 0.4253, "step": 17851 }, { "epoch": 1.6753003003003002, "grad_norm": 1.4665419679076912, "learning_rate": 4.853429357733317e-06, "loss": 0.3607, "step": 17852 }, { "epoch": 1.675394144144144, "grad_norm": 0.9253038718846579, "learning_rate": 4.852883628089661e-06, "loss": 0.4161, "step": 17853 }, { "epoch": 1.675487987987988, "grad_norm": 0.9252747092705169, "learning_rate": 4.852337900200091e-06, "loss": 0.3991, "step": 17854 }, { "epoch": 1.6755818318318318, "grad_norm": 0.9005779853830892, "learning_rate": 4.8517921740711114e-06, "loss": 0.3593, "step": 17855 }, { "epoch": 1.6756756756756757, "grad_norm": 0.9344201931430532, "learning_rate": 4.851246449709233e-06, "loss": 0.3831, "step": 17856 }, { "epoch": 1.6757695195195195, "grad_norm": 0.8328008538853711, "learning_rate": 4.85070072712096e-06, "loss": 0.3597, "step": 17857 }, { "epoch": 1.6758633633633635, "grad_norm": 1.0188731062383591, "learning_rate": 4.8501550063128e-06, "loss": 0.4206, "step": 17858 }, { "epoch": 1.6759572072072073, "grad_norm": 0.9384964567512974, "learning_rate": 4.849609287291261e-06, "loss": 0.4257, "step": 17859 }, { "epoch": 1.6760510510510511, "grad_norm": 1.0087313216231752, "learning_rate": 4.849063570062847e-06, "loss": 0.3963, "step": 17860 }, { "epoch": 1.676144894894895, "grad_norm": 0.8981239161281226, "learning_rate": 4.8485178546340635e-06, "loss": 0.4079, "step": 17861 }, { "epoch": 1.6762387387387387, "grad_norm": 0.8179196440687018, "learning_rate": 4.847972141011423e-06, "loss": 0.3755, "step": 17862 }, { "epoch": 1.6763325825825826, "grad_norm": 1.0631211940878587, "learning_rate": 4.847426429201427e-06, "loss": 0.3926, "step": 17863 }, { "epoch": 1.6764264264264264, "grad_norm": 1.057200231021114, "learning_rate": 4.8468807192105836e-06, "loss": 0.4159, "step": 17864 }, { "epoch": 1.6765202702702702, "grad_norm": 0.9681518175709359, "learning_rate": 4.846335011045401e-06, "loss": 0.4115, "step": 17865 }, { "epoch": 1.676614114114114, "grad_norm": 0.9697884950039155, "learning_rate": 4.845789304712383e-06, "loss": 0.4413, "step": 17866 }, { "epoch": 1.6767079579579578, "grad_norm": 0.9258083128247839, "learning_rate": 4.845243600218035e-06, "loss": 0.3894, "step": 17867 }, { "epoch": 1.6768018018018018, "grad_norm": 0.8719466358296375, "learning_rate": 4.844697897568869e-06, "loss": 0.3848, "step": 17868 }, { "epoch": 1.6768956456456456, "grad_norm": 2.020171058831461, "learning_rate": 4.844152196771387e-06, "loss": 0.431, "step": 17869 }, { "epoch": 1.6769894894894894, "grad_norm": 1.0126020223888497, "learning_rate": 4.843606497832096e-06, "loss": 0.3491, "step": 17870 }, { "epoch": 1.6770833333333335, "grad_norm": 0.9197824650150966, "learning_rate": 4.8430608007575045e-06, "loss": 0.3702, "step": 17871 }, { "epoch": 1.6771771771771773, "grad_norm": 0.8273956642512941, "learning_rate": 4.842515105554118e-06, "loss": 0.3942, "step": 17872 }, { "epoch": 1.677271021021021, "grad_norm": 0.9025344232333137, "learning_rate": 4.84196941222844e-06, "loss": 0.3716, "step": 17873 }, { "epoch": 1.677364864864865, "grad_norm": 1.2404933176846646, "learning_rate": 4.8414237207869804e-06, "loss": 0.3747, "step": 17874 }, { "epoch": 1.6774587087087087, "grad_norm": 1.4502119419075785, "learning_rate": 4.840878031236245e-06, "loss": 0.3709, "step": 17875 }, { "epoch": 1.6775525525525525, "grad_norm": 1.072792943332468, "learning_rate": 4.840332343582739e-06, "loss": 0.4313, "step": 17876 }, { "epoch": 1.6776463963963963, "grad_norm": 0.9425433183836964, "learning_rate": 4.83978665783297e-06, "loss": 0.4029, "step": 17877 }, { "epoch": 1.6777402402402402, "grad_norm": 1.193861341915784, "learning_rate": 4.839240973993444e-06, "loss": 0.3932, "step": 17878 }, { "epoch": 1.677834084084084, "grad_norm": 0.8548816959485976, "learning_rate": 4.838695292070664e-06, "loss": 0.3941, "step": 17879 }, { "epoch": 1.6779279279279278, "grad_norm": 0.9536617904226985, "learning_rate": 4.838149612071141e-06, "loss": 0.3947, "step": 17880 }, { "epoch": 1.6780217717717718, "grad_norm": 1.1212633571249633, "learning_rate": 4.83760393400138e-06, "loss": 0.3928, "step": 17881 }, { "epoch": 1.6781156156156156, "grad_norm": 1.0725311468435519, "learning_rate": 4.837058257867885e-06, "loss": 0.4395, "step": 17882 }, { "epoch": 1.6782094594594594, "grad_norm": 0.9555781796965853, "learning_rate": 4.8365125836771645e-06, "loss": 0.4076, "step": 17883 }, { "epoch": 1.6783033033033035, "grad_norm": 1.3574008550469694, "learning_rate": 4.835966911435724e-06, "loss": 0.4461, "step": 17884 }, { "epoch": 1.6783971471471473, "grad_norm": 0.8776347876348812, "learning_rate": 4.835421241150067e-06, "loss": 0.416, "step": 17885 }, { "epoch": 1.678490990990991, "grad_norm": 0.9703396639978238, "learning_rate": 4.834875572826704e-06, "loss": 0.4016, "step": 17886 }, { "epoch": 1.678584834834835, "grad_norm": 1.0852885015260967, "learning_rate": 4.834329906472139e-06, "loss": 0.4, "step": 17887 }, { "epoch": 1.6786786786786787, "grad_norm": 0.8964664872799476, "learning_rate": 4.833784242092877e-06, "loss": 0.4232, "step": 17888 }, { "epoch": 1.6787725225225225, "grad_norm": 1.002000533590411, "learning_rate": 4.833238579695426e-06, "loss": 0.3878, "step": 17889 }, { "epoch": 1.6788663663663663, "grad_norm": 0.9565853864946854, "learning_rate": 4.832692919286291e-06, "loss": 0.4042, "step": 17890 }, { "epoch": 1.6789602102102101, "grad_norm": 0.9902665201028078, "learning_rate": 4.832147260871976e-06, "loss": 0.4435, "step": 17891 }, { "epoch": 1.679054054054054, "grad_norm": 0.9972722441038063, "learning_rate": 4.831601604458991e-06, "loss": 0.4048, "step": 17892 }, { "epoch": 1.6791478978978978, "grad_norm": 0.773164128896841, "learning_rate": 4.83105595005384e-06, "loss": 0.3921, "step": 17893 }, { "epoch": 1.6792417417417418, "grad_norm": 1.1177199993488374, "learning_rate": 4.830510297663028e-06, "loss": 0.448, "step": 17894 }, { "epoch": 1.6793355855855856, "grad_norm": 0.9181304415498676, "learning_rate": 4.829964647293062e-06, "loss": 0.4466, "step": 17895 }, { "epoch": 1.6794294294294294, "grad_norm": 0.8008826320701539, "learning_rate": 4.8294189989504484e-06, "loss": 0.3505, "step": 17896 }, { "epoch": 1.6795232732732732, "grad_norm": 0.9939639351857522, "learning_rate": 4.82887335264169e-06, "loss": 0.3643, "step": 17897 }, { "epoch": 1.6796171171171173, "grad_norm": 0.8820105388186699, "learning_rate": 4.828327708373296e-06, "loss": 0.3858, "step": 17898 }, { "epoch": 1.679710960960961, "grad_norm": 0.9037569598963393, "learning_rate": 4.827782066151772e-06, "loss": 0.4128, "step": 17899 }, { "epoch": 1.6798048048048049, "grad_norm": 0.9845209632987841, "learning_rate": 4.82723642598362e-06, "loss": 0.3623, "step": 17900 }, { "epoch": 1.6798986486486487, "grad_norm": 0.8855958960771296, "learning_rate": 4.826690787875351e-06, "loss": 0.3755, "step": 17901 }, { "epoch": 1.6799924924924925, "grad_norm": 0.9999221055476023, "learning_rate": 4.826145151833468e-06, "loss": 0.3771, "step": 17902 }, { "epoch": 1.6800863363363363, "grad_norm": 0.8828781728323986, "learning_rate": 4.825599517864474e-06, "loss": 0.3554, "step": 17903 }, { "epoch": 1.6801801801801801, "grad_norm": 1.1536946552050913, "learning_rate": 4.82505388597488e-06, "loss": 0.3721, "step": 17904 }, { "epoch": 1.680274024024024, "grad_norm": 1.1480782971147034, "learning_rate": 4.824508256171189e-06, "loss": 0.3834, "step": 17905 }, { "epoch": 1.6803678678678677, "grad_norm": 0.8755882274269197, "learning_rate": 4.823962628459905e-06, "loss": 0.3606, "step": 17906 }, { "epoch": 1.6804617117117115, "grad_norm": 0.8373835688940912, "learning_rate": 4.823417002847538e-06, "loss": 0.4071, "step": 17907 }, { "epoch": 1.6805555555555556, "grad_norm": 0.9358915836991237, "learning_rate": 4.82287137934059e-06, "loss": 0.4146, "step": 17908 }, { "epoch": 1.6806493993993994, "grad_norm": 0.9053477988386053, "learning_rate": 4.8223257579455655e-06, "loss": 0.3694, "step": 17909 }, { "epoch": 1.6807432432432432, "grad_norm": 0.8696716518338452, "learning_rate": 4.821780138668973e-06, "loss": 0.4158, "step": 17910 }, { "epoch": 1.6808370870870872, "grad_norm": 0.899731469034045, "learning_rate": 4.821234521517318e-06, "loss": 0.3725, "step": 17911 }, { "epoch": 1.680930930930931, "grad_norm": 0.9152129510522571, "learning_rate": 4.820688906497103e-06, "loss": 0.4294, "step": 17912 }, { "epoch": 1.6810247747747749, "grad_norm": 1.00569821487222, "learning_rate": 4.820143293614837e-06, "loss": 0.4116, "step": 17913 }, { "epoch": 1.6811186186186187, "grad_norm": 0.985755488255556, "learning_rate": 4.819597682877023e-06, "loss": 0.4324, "step": 17914 }, { "epoch": 1.6812124624624625, "grad_norm": 0.9740539788055976, "learning_rate": 4.819052074290166e-06, "loss": 0.41, "step": 17915 }, { "epoch": 1.6813063063063063, "grad_norm": 0.9481028100642196, "learning_rate": 4.818506467860773e-06, "loss": 0.4075, "step": 17916 }, { "epoch": 1.68140015015015, "grad_norm": 0.9008970991915629, "learning_rate": 4.81796086359535e-06, "loss": 0.3691, "step": 17917 }, { "epoch": 1.681493993993994, "grad_norm": 7.686854261799167, "learning_rate": 4.8174152615004e-06, "loss": 0.3826, "step": 17918 }, { "epoch": 1.6815878378378377, "grad_norm": 0.9007051843710835, "learning_rate": 4.81686966158243e-06, "loss": 0.3807, "step": 17919 }, { "epoch": 1.6816816816816815, "grad_norm": 1.7429164103184767, "learning_rate": 4.816324063847945e-06, "loss": 0.3658, "step": 17920 }, { "epoch": 1.6817755255255256, "grad_norm": 0.92048567592496, "learning_rate": 4.815778468303446e-06, "loss": 0.4034, "step": 17921 }, { "epoch": 1.6818693693693694, "grad_norm": 0.9345675944667168, "learning_rate": 4.815232874955446e-06, "loss": 0.3618, "step": 17922 }, { "epoch": 1.6819632132132132, "grad_norm": 0.8803058097219835, "learning_rate": 4.814687283810446e-06, "loss": 0.3962, "step": 17923 }, { "epoch": 1.6820570570570572, "grad_norm": 0.974982683768537, "learning_rate": 4.8141416948749495e-06, "loss": 0.3886, "step": 17924 }, { "epoch": 1.682150900900901, "grad_norm": 0.8428192247793341, "learning_rate": 4.813596108155465e-06, "loss": 0.3662, "step": 17925 }, { "epoch": 1.6822447447447448, "grad_norm": 0.9345666021526097, "learning_rate": 4.813050523658496e-06, "loss": 0.3569, "step": 17926 }, { "epoch": 1.6823385885885886, "grad_norm": 1.0498234955377235, "learning_rate": 4.812504941390545e-06, "loss": 0.3943, "step": 17927 }, { "epoch": 1.6824324324324325, "grad_norm": 0.9029260619613014, "learning_rate": 4.811959361358122e-06, "loss": 0.3942, "step": 17928 }, { "epoch": 1.6825262762762763, "grad_norm": 1.0615016887367628, "learning_rate": 4.81141378356773e-06, "loss": 0.3925, "step": 17929 }, { "epoch": 1.68262012012012, "grad_norm": 0.9094651188959467, "learning_rate": 4.8108682080258725e-06, "loss": 0.3628, "step": 17930 }, { "epoch": 1.6827139639639639, "grad_norm": 0.9374579583779192, "learning_rate": 4.810322634739057e-06, "loss": 0.3912, "step": 17931 }, { "epoch": 1.6828078078078077, "grad_norm": 0.9416780593601227, "learning_rate": 4.809777063713786e-06, "loss": 0.4109, "step": 17932 }, { "epoch": 1.6829016516516515, "grad_norm": 1.0080806778576814, "learning_rate": 4.809231494956564e-06, "loss": 0.382, "step": 17933 }, { "epoch": 1.6829954954954955, "grad_norm": 0.9177826814729192, "learning_rate": 4.8086859284739e-06, "loss": 0.3937, "step": 17934 }, { "epoch": 1.6830893393393394, "grad_norm": 0.8427923960463612, "learning_rate": 4.808140364272295e-06, "loss": 0.4079, "step": 17935 }, { "epoch": 1.6831831831831832, "grad_norm": 0.872694846521993, "learning_rate": 4.807594802358255e-06, "loss": 0.409, "step": 17936 }, { "epoch": 1.683277027027027, "grad_norm": 2.883606415018356, "learning_rate": 4.8070492427382845e-06, "loss": 0.3823, "step": 17937 }, { "epoch": 1.683370870870871, "grad_norm": 3.138365419542048, "learning_rate": 4.80650368541889e-06, "loss": 0.3747, "step": 17938 }, { "epoch": 1.6834647147147148, "grad_norm": 1.0052029813675722, "learning_rate": 4.805958130406572e-06, "loss": 0.4179, "step": 17939 }, { "epoch": 1.6835585585585586, "grad_norm": 0.8663430315723666, "learning_rate": 4.805412577707841e-06, "loss": 0.4057, "step": 17940 }, { "epoch": 1.6836524024024024, "grad_norm": 1.091717147636093, "learning_rate": 4.804867027329197e-06, "loss": 0.3918, "step": 17941 }, { "epoch": 1.6837462462462462, "grad_norm": 1.5494004240164012, "learning_rate": 4.804321479277147e-06, "loss": 0.3694, "step": 17942 }, { "epoch": 1.68384009009009, "grad_norm": 1.19207253136166, "learning_rate": 4.803775933558196e-06, "loss": 0.4011, "step": 17943 }, { "epoch": 1.6839339339339339, "grad_norm": 0.8825404524285937, "learning_rate": 4.803230390178847e-06, "loss": 0.3825, "step": 17944 }, { "epoch": 1.6840277777777777, "grad_norm": 0.8736296078294227, "learning_rate": 4.8026848491456035e-06, "loss": 0.4024, "step": 17945 }, { "epoch": 1.6841216216216215, "grad_norm": 1.0829906538032954, "learning_rate": 4.8021393104649735e-06, "loss": 0.4058, "step": 17946 }, { "epoch": 1.6842154654654653, "grad_norm": 1.3624157318641357, "learning_rate": 4.80159377414346e-06, "loss": 0.3824, "step": 17947 }, { "epoch": 1.6843093093093093, "grad_norm": 0.802871867803999, "learning_rate": 4.801048240187565e-06, "loss": 0.3801, "step": 17948 }, { "epoch": 1.6844031531531531, "grad_norm": 0.9708142090991986, "learning_rate": 4.800502708603798e-06, "loss": 0.4386, "step": 17949 }, { "epoch": 1.684496996996997, "grad_norm": 0.8193768594654006, "learning_rate": 4.799957179398661e-06, "loss": 0.3753, "step": 17950 }, { "epoch": 1.684590840840841, "grad_norm": 0.9723613288358989, "learning_rate": 4.799411652578655e-06, "loss": 0.4238, "step": 17951 }, { "epoch": 1.6846846846846848, "grad_norm": 1.0412869507272304, "learning_rate": 4.79886612815029e-06, "loss": 0.3891, "step": 17952 }, { "epoch": 1.6847785285285286, "grad_norm": 1.0209569805014012, "learning_rate": 4.798320606120068e-06, "loss": 0.4013, "step": 17953 }, { "epoch": 1.6848723723723724, "grad_norm": 0.9764824692222192, "learning_rate": 4.79777508649449e-06, "loss": 0.384, "step": 17954 }, { "epoch": 1.6849662162162162, "grad_norm": 0.9586692672113463, "learning_rate": 4.797229569280066e-06, "loss": 0.4249, "step": 17955 }, { "epoch": 1.68506006006006, "grad_norm": 0.8966031724995963, "learning_rate": 4.796684054483299e-06, "loss": 0.4054, "step": 17956 }, { "epoch": 1.6851539039039038, "grad_norm": 2.033772690204966, "learning_rate": 4.7961385421106885e-06, "loss": 0.3415, "step": 17957 }, { "epoch": 1.6852477477477477, "grad_norm": 1.0445075260809749, "learning_rate": 4.795593032168745e-06, "loss": 0.385, "step": 17958 }, { "epoch": 1.6853415915915915, "grad_norm": 1.0730656439033877, "learning_rate": 4.7950475246639695e-06, "loss": 0.3552, "step": 17959 }, { "epoch": 1.6854354354354353, "grad_norm": 0.9507644026985241, "learning_rate": 4.794502019602864e-06, "loss": 0.3877, "step": 17960 }, { "epoch": 1.6855292792792793, "grad_norm": 0.8243608327127033, "learning_rate": 4.793956516991937e-06, "loss": 0.3842, "step": 17961 }, { "epoch": 1.6856231231231231, "grad_norm": 1.1882650543397848, "learning_rate": 4.793411016837691e-06, "loss": 0.4091, "step": 17962 }, { "epoch": 1.685716966966967, "grad_norm": 1.05320873646974, "learning_rate": 4.792865519146629e-06, "loss": 0.3855, "step": 17963 }, { "epoch": 1.685810810810811, "grad_norm": 1.089574212051952, "learning_rate": 4.7923200239252574e-06, "loss": 0.3745, "step": 17964 }, { "epoch": 1.6859046546546548, "grad_norm": 0.9789253102666572, "learning_rate": 4.791774531180077e-06, "loss": 0.4134, "step": 17965 }, { "epoch": 1.6859984984984986, "grad_norm": 0.8554857416249723, "learning_rate": 4.791229040917593e-06, "loss": 0.4235, "step": 17966 }, { "epoch": 1.6860923423423424, "grad_norm": 1.2251574368299591, "learning_rate": 4.79068355314431e-06, "loss": 0.3745, "step": 17967 }, { "epoch": 1.6861861861861862, "grad_norm": 1.0466173635505531, "learning_rate": 4.7901380678667335e-06, "loss": 0.3597, "step": 17968 }, { "epoch": 1.68628003003003, "grad_norm": 1.7104409342618698, "learning_rate": 4.789592585091363e-06, "loss": 0.3725, "step": 17969 }, { "epoch": 1.6863738738738738, "grad_norm": 1.12974740051056, "learning_rate": 4.789047104824707e-06, "loss": 0.3832, "step": 17970 }, { "epoch": 1.6864677177177176, "grad_norm": 1.007356921278528, "learning_rate": 4.788501627073267e-06, "loss": 0.4026, "step": 17971 }, { "epoch": 1.6865615615615615, "grad_norm": 1.007266018704255, "learning_rate": 4.7879561518435445e-06, "loss": 0.3672, "step": 17972 }, { "epoch": 1.6866554054054053, "grad_norm": 0.9802860021456142, "learning_rate": 4.787410679142047e-06, "loss": 0.4361, "step": 17973 }, { "epoch": 1.6867492492492493, "grad_norm": 0.9628850665341788, "learning_rate": 4.786865208975278e-06, "loss": 0.3807, "step": 17974 }, { "epoch": 1.686843093093093, "grad_norm": 0.9238304300201023, "learning_rate": 4.786319741349739e-06, "loss": 0.3788, "step": 17975 }, { "epoch": 1.686936936936937, "grad_norm": 0.9065470988822272, "learning_rate": 4.785774276271936e-06, "loss": 0.4266, "step": 17976 }, { "epoch": 1.6870307807807807, "grad_norm": 0.8765873187131816, "learning_rate": 4.785228813748372e-06, "loss": 0.3583, "step": 17977 }, { "epoch": 1.6871246246246248, "grad_norm": 0.8184602588018901, "learning_rate": 4.784683353785548e-06, "loss": 0.3968, "step": 17978 }, { "epoch": 1.6872184684684686, "grad_norm": 0.9189875332665841, "learning_rate": 4.784137896389971e-06, "loss": 0.3897, "step": 17979 }, { "epoch": 1.6873123123123124, "grad_norm": 0.9089639791131172, "learning_rate": 4.783592441568143e-06, "loss": 0.3679, "step": 17980 }, { "epoch": 1.6874061561561562, "grad_norm": 1.2248831014552635, "learning_rate": 4.7830469893265675e-06, "loss": 0.4418, "step": 17981 }, { "epoch": 1.6875, "grad_norm": 1.0332530119646945, "learning_rate": 4.78250153967175e-06, "loss": 0.3908, "step": 17982 }, { "epoch": 1.6875938438438438, "grad_norm": 0.7806986124216052, "learning_rate": 4.781956092610191e-06, "loss": 0.3785, "step": 17983 }, { "epoch": 1.6876876876876876, "grad_norm": 0.9398296930333245, "learning_rate": 4.7814106481483936e-06, "loss": 0.4546, "step": 17984 }, { "epoch": 1.6877815315315314, "grad_norm": 0.9793816929999255, "learning_rate": 4.780865206292865e-06, "loss": 0.4156, "step": 17985 }, { "epoch": 1.6878753753753752, "grad_norm": 0.8937954622773896, "learning_rate": 4.780319767050105e-06, "loss": 0.416, "step": 17986 }, { "epoch": 1.6879692192192193, "grad_norm": 0.9022235670471641, "learning_rate": 4.7797743304266185e-06, "loss": 0.3931, "step": 17987 }, { "epoch": 1.688063063063063, "grad_norm": 0.9374030797191527, "learning_rate": 4.779228896428909e-06, "loss": 0.3669, "step": 17988 }, { "epoch": 1.688156906906907, "grad_norm": 1.1320969287523426, "learning_rate": 4.77868346506348e-06, "loss": 0.3993, "step": 17989 }, { "epoch": 1.6882507507507507, "grad_norm": 1.187411037387924, "learning_rate": 4.778138036336832e-06, "loss": 0.3817, "step": 17990 }, { "epoch": 1.6883445945945947, "grad_norm": 0.9137030051976766, "learning_rate": 4.777592610255473e-06, "loss": 0.4054, "step": 17991 }, { "epoch": 1.6884384384384385, "grad_norm": 0.9714909536040931, "learning_rate": 4.777047186825902e-06, "loss": 0.3931, "step": 17992 }, { "epoch": 1.6885322822822824, "grad_norm": 0.9221589157917953, "learning_rate": 4.776501766054624e-06, "loss": 0.3954, "step": 17993 }, { "epoch": 1.6886261261261262, "grad_norm": 0.8264035710060198, "learning_rate": 4.775956347948142e-06, "loss": 0.4006, "step": 17994 }, { "epoch": 1.68871996996997, "grad_norm": 0.9959020157617223, "learning_rate": 4.775410932512958e-06, "loss": 0.3938, "step": 17995 }, { "epoch": 1.6888138138138138, "grad_norm": 0.9531138674550185, "learning_rate": 4.774865519755575e-06, "loss": 0.4589, "step": 17996 }, { "epoch": 1.6889076576576576, "grad_norm": 1.0840369170027413, "learning_rate": 4.774320109682499e-06, "loss": 0.4167, "step": 17997 }, { "epoch": 1.6890015015015014, "grad_norm": 1.0128842622817749, "learning_rate": 4.773774702300231e-06, "loss": 0.378, "step": 17998 }, { "epoch": 1.6890953453453452, "grad_norm": 0.8732592060683075, "learning_rate": 4.773229297615271e-06, "loss": 0.3948, "step": 17999 }, { "epoch": 1.689189189189189, "grad_norm": 0.8864381434418882, "learning_rate": 4.7726838956341284e-06, "loss": 0.4294, "step": 18000 }, { "epoch": 1.689283033033033, "grad_norm": 0.8810003480505854, "learning_rate": 4.772138496363301e-06, "loss": 0.3636, "step": 18001 }, { "epoch": 1.6893768768768769, "grad_norm": 0.9023565598851119, "learning_rate": 4.7715930998092915e-06, "loss": 0.3764, "step": 18002 }, { "epoch": 1.6894707207207207, "grad_norm": 1.095460385449834, "learning_rate": 4.771047705978606e-06, "loss": 0.4355, "step": 18003 }, { "epoch": 1.6895645645645647, "grad_norm": 0.9552077492458012, "learning_rate": 4.7705023148777455e-06, "loss": 0.3843, "step": 18004 }, { "epoch": 1.6896584084084085, "grad_norm": 3.707148613761542, "learning_rate": 4.769956926513213e-06, "loss": 0.4415, "step": 18005 }, { "epoch": 1.6897522522522523, "grad_norm": 0.9235660399870618, "learning_rate": 4.769411540891511e-06, "loss": 0.4093, "step": 18006 }, { "epoch": 1.6898460960960962, "grad_norm": 0.849711769339109, "learning_rate": 4.768866158019143e-06, "loss": 0.3769, "step": 18007 }, { "epoch": 1.68993993993994, "grad_norm": 0.8971062888986524, "learning_rate": 4.7683207779026085e-06, "loss": 0.3603, "step": 18008 }, { "epoch": 1.6900337837837838, "grad_norm": 0.9249024629087773, "learning_rate": 4.767775400548415e-06, "loss": 0.4125, "step": 18009 }, { "epoch": 1.6901276276276276, "grad_norm": 0.8620694183362331, "learning_rate": 4.767230025963063e-06, "loss": 0.3811, "step": 18010 }, { "epoch": 1.6902214714714714, "grad_norm": 0.9517551418423092, "learning_rate": 4.766684654153053e-06, "loss": 0.362, "step": 18011 }, { "epoch": 1.6903153153153152, "grad_norm": 0.8777810300109867, "learning_rate": 4.766139285124891e-06, "loss": 0.3739, "step": 18012 }, { "epoch": 1.690409159159159, "grad_norm": 1.0498710206539255, "learning_rate": 4.765593918885077e-06, "loss": 0.3734, "step": 18013 }, { "epoch": 1.690503003003003, "grad_norm": 0.8905279133020411, "learning_rate": 4.765048555440113e-06, "loss": 0.3719, "step": 18014 }, { "epoch": 1.6905968468468469, "grad_norm": 0.8842381646172588, "learning_rate": 4.764503194796505e-06, "loss": 0.4035, "step": 18015 }, { "epoch": 1.6906906906906907, "grad_norm": 1.472733028098996, "learning_rate": 4.763957836960753e-06, "loss": 0.3361, "step": 18016 }, { "epoch": 1.6907845345345347, "grad_norm": 0.878721465919183, "learning_rate": 4.763412481939358e-06, "loss": 0.393, "step": 18017 }, { "epoch": 1.6908783783783785, "grad_norm": 0.9325991965933155, "learning_rate": 4.762867129738825e-06, "loss": 0.385, "step": 18018 }, { "epoch": 1.6909722222222223, "grad_norm": 0.8897015307534849, "learning_rate": 4.762321780365655e-06, "loss": 0.3738, "step": 18019 }, { "epoch": 1.6910660660660661, "grad_norm": 1.0888214378616854, "learning_rate": 4.761776433826349e-06, "loss": 0.4, "step": 18020 }, { "epoch": 1.69115990990991, "grad_norm": 0.8836219986736592, "learning_rate": 4.761231090127412e-06, "loss": 0.3813, "step": 18021 }, { "epoch": 1.6912537537537538, "grad_norm": 1.007863024718805, "learning_rate": 4.760685749275344e-06, "loss": 0.369, "step": 18022 }, { "epoch": 1.6913475975975976, "grad_norm": 1.2170482387131725, "learning_rate": 4.760140411276648e-06, "loss": 0.4463, "step": 18023 }, { "epoch": 1.6914414414414414, "grad_norm": 0.8969374239383751, "learning_rate": 4.759595076137827e-06, "loss": 0.3879, "step": 18024 }, { "epoch": 1.6915352852852852, "grad_norm": 0.8419017017795564, "learning_rate": 4.7590497438653824e-06, "loss": 0.333, "step": 18025 }, { "epoch": 1.691629129129129, "grad_norm": 0.9188227677645395, "learning_rate": 4.758504414465814e-06, "loss": 0.4248, "step": 18026 }, { "epoch": 1.691722972972973, "grad_norm": 1.0510044153971352, "learning_rate": 4.757959087945628e-06, "loss": 0.3916, "step": 18027 }, { "epoch": 1.6918168168168168, "grad_norm": 0.981912714768791, "learning_rate": 4.7574137643113235e-06, "loss": 0.3918, "step": 18028 }, { "epoch": 1.6919106606606606, "grad_norm": 1.0294671726136106, "learning_rate": 4.756868443569404e-06, "loss": 0.3977, "step": 18029 }, { "epoch": 1.6920045045045045, "grad_norm": 1.1677235338551122, "learning_rate": 4.75632312572637e-06, "loss": 0.4037, "step": 18030 }, { "epoch": 1.6920983483483485, "grad_norm": 1.0577912690637283, "learning_rate": 4.755777810788724e-06, "loss": 0.4063, "step": 18031 }, { "epoch": 1.6921921921921923, "grad_norm": 0.925957838982845, "learning_rate": 4.755232498762966e-06, "loss": 0.395, "step": 18032 }, { "epoch": 1.6922860360360361, "grad_norm": 0.8652347713482073, "learning_rate": 4.754687189655603e-06, "loss": 0.3815, "step": 18033 }, { "epoch": 1.69237987987988, "grad_norm": 1.2046954145965545, "learning_rate": 4.754141883473133e-06, "loss": 0.4252, "step": 18034 }, { "epoch": 1.6924737237237237, "grad_norm": 1.0243948756848738, "learning_rate": 4.753596580222057e-06, "loss": 0.4179, "step": 18035 }, { "epoch": 1.6925675675675675, "grad_norm": 0.9894616488740478, "learning_rate": 4.7530512799088785e-06, "loss": 0.4021, "step": 18036 }, { "epoch": 1.6926614114114114, "grad_norm": 0.9941763481761225, "learning_rate": 4.752505982540099e-06, "loss": 0.3411, "step": 18037 }, { "epoch": 1.6927552552552552, "grad_norm": 1.0372675873064217, "learning_rate": 4.751960688122218e-06, "loss": 0.4306, "step": 18038 }, { "epoch": 1.692849099099099, "grad_norm": 1.1965946931795697, "learning_rate": 4.7514153966617406e-06, "loss": 0.3578, "step": 18039 }, { "epoch": 1.6929429429429428, "grad_norm": 0.9917823782797163, "learning_rate": 4.750870108165167e-06, "loss": 0.387, "step": 18040 }, { "epoch": 1.6930367867867868, "grad_norm": 0.8904888862166961, "learning_rate": 4.750324822638997e-06, "loss": 0.3916, "step": 18041 }, { "epoch": 1.6931306306306306, "grad_norm": 0.953599146720295, "learning_rate": 4.749779540089735e-06, "loss": 0.4261, "step": 18042 }, { "epoch": 1.6932244744744744, "grad_norm": 1.843838174512231, "learning_rate": 4.749234260523881e-06, "loss": 0.3933, "step": 18043 }, { "epoch": 1.6933183183183185, "grad_norm": 0.9900989161585115, "learning_rate": 4.7486889839479335e-06, "loss": 0.3538, "step": 18044 }, { "epoch": 1.6934121621621623, "grad_norm": 0.9254922192676507, "learning_rate": 4.7481437103684e-06, "loss": 0.3417, "step": 18045 }, { "epoch": 1.693506006006006, "grad_norm": 0.9064551528777867, "learning_rate": 4.7475984397917785e-06, "loss": 0.4431, "step": 18046 }, { "epoch": 1.69359984984985, "grad_norm": 1.0055547291536373, "learning_rate": 4.7470531722245686e-06, "loss": 0.3758, "step": 18047 }, { "epoch": 1.6936936936936937, "grad_norm": 0.9288687107091567, "learning_rate": 4.746507907673275e-06, "loss": 0.3882, "step": 18048 }, { "epoch": 1.6937875375375375, "grad_norm": 0.9245798164283031, "learning_rate": 4.745962646144398e-06, "loss": 0.417, "step": 18049 }, { "epoch": 1.6938813813813813, "grad_norm": 0.8974276934589908, "learning_rate": 4.745417387644436e-06, "loss": 0.3701, "step": 18050 }, { "epoch": 1.6939752252252251, "grad_norm": 1.1752943478356128, "learning_rate": 4.744872132179894e-06, "loss": 0.3892, "step": 18051 }, { "epoch": 1.694069069069069, "grad_norm": 1.0280068131647586, "learning_rate": 4.744326879757272e-06, "loss": 0.3793, "step": 18052 }, { "epoch": 1.6941629129129128, "grad_norm": 1.051738447586794, "learning_rate": 4.74378163038307e-06, "loss": 0.3868, "step": 18053 }, { "epoch": 1.6942567567567568, "grad_norm": 0.9233129756978186, "learning_rate": 4.7432363840637905e-06, "loss": 0.3867, "step": 18054 }, { "epoch": 1.6943506006006006, "grad_norm": 0.8688718515765131, "learning_rate": 4.742691140805933e-06, "loss": 0.3721, "step": 18055 }, { "epoch": 1.6944444444444444, "grad_norm": 0.8808396927031854, "learning_rate": 4.742145900615998e-06, "loss": 0.3777, "step": 18056 }, { "epoch": 1.6945382882882885, "grad_norm": 1.8598092589028778, "learning_rate": 4.741600663500491e-06, "loss": 0.406, "step": 18057 }, { "epoch": 1.6946321321321323, "grad_norm": 0.8792409712511899, "learning_rate": 4.741055429465908e-06, "loss": 0.3967, "step": 18058 }, { "epoch": 1.694725975975976, "grad_norm": 0.9282677587288244, "learning_rate": 4.740510198518751e-06, "loss": 0.385, "step": 18059 }, { "epoch": 1.6948198198198199, "grad_norm": 5.3571633581050175, "learning_rate": 4.739964970665522e-06, "loss": 0.3872, "step": 18060 }, { "epoch": 1.6949136636636637, "grad_norm": 1.009043823687502, "learning_rate": 4.739419745912722e-06, "loss": 0.393, "step": 18061 }, { "epoch": 1.6950075075075075, "grad_norm": 0.9680154337579994, "learning_rate": 4.738874524266848e-06, "loss": 0.3796, "step": 18062 }, { "epoch": 1.6951013513513513, "grad_norm": 0.9404726761487525, "learning_rate": 4.738329305734407e-06, "loss": 0.3579, "step": 18063 }, { "epoch": 1.6951951951951951, "grad_norm": 1.0726635557715005, "learning_rate": 4.7377840903218965e-06, "loss": 0.4154, "step": 18064 }, { "epoch": 1.695289039039039, "grad_norm": 0.950962702636776, "learning_rate": 4.737238878035815e-06, "loss": 0.4173, "step": 18065 }, { "epoch": 1.6953828828828827, "grad_norm": 0.925550791924283, "learning_rate": 4.736693668882668e-06, "loss": 0.3684, "step": 18066 }, { "epoch": 1.6954767267267268, "grad_norm": 1.1279184195561573, "learning_rate": 4.736148462868952e-06, "loss": 0.4107, "step": 18067 }, { "epoch": 1.6955705705705706, "grad_norm": 1.0894028710790782, "learning_rate": 4.735603260001167e-06, "loss": 0.415, "step": 18068 }, { "epoch": 1.6956644144144144, "grad_norm": 0.9229020562408932, "learning_rate": 4.735058060285818e-06, "loss": 0.4162, "step": 18069 }, { "epoch": 1.6957582582582582, "grad_norm": 0.8944839387569503, "learning_rate": 4.734512863729403e-06, "loss": 0.3734, "step": 18070 }, { "epoch": 1.6958521021021022, "grad_norm": 0.9247177955742607, "learning_rate": 4.733967670338421e-06, "loss": 0.4224, "step": 18071 }, { "epoch": 1.695945945945946, "grad_norm": 1.0027818838866864, "learning_rate": 4.7334224801193746e-06, "loss": 0.3993, "step": 18072 }, { "epoch": 1.6960397897897899, "grad_norm": 0.9321128778666017, "learning_rate": 4.732877293078764e-06, "loss": 0.3868, "step": 18073 }, { "epoch": 1.6961336336336337, "grad_norm": 0.883055546744527, "learning_rate": 4.732332109223086e-06, "loss": 0.3897, "step": 18074 }, { "epoch": 1.6962274774774775, "grad_norm": 1.3841887113283862, "learning_rate": 4.731786928558846e-06, "loss": 0.3932, "step": 18075 }, { "epoch": 1.6963213213213213, "grad_norm": 0.9654688601058813, "learning_rate": 4.731241751092543e-06, "loss": 0.381, "step": 18076 }, { "epoch": 1.696415165165165, "grad_norm": 0.9630002536182702, "learning_rate": 4.730696576830673e-06, "loss": 0.3888, "step": 18077 }, { "epoch": 1.696509009009009, "grad_norm": 0.937066856991649, "learning_rate": 4.730151405779741e-06, "loss": 0.3805, "step": 18078 }, { "epoch": 1.6966028528528527, "grad_norm": 0.926069287138437, "learning_rate": 4.7296062379462485e-06, "loss": 0.4077, "step": 18079 }, { "epoch": 1.6966966966966965, "grad_norm": 0.9964753039642089, "learning_rate": 4.729061073336688e-06, "loss": 0.4024, "step": 18080 }, { "epoch": 1.6967905405405406, "grad_norm": 0.7958964177639719, "learning_rate": 4.728515911957567e-06, "loss": 0.3898, "step": 18081 }, { "epoch": 1.6968843843843844, "grad_norm": 1.077799113803166, "learning_rate": 4.727970753815382e-06, "loss": 0.3998, "step": 18082 }, { "epoch": 1.6969782282282282, "grad_norm": 0.85120434573006, "learning_rate": 4.7274255989166325e-06, "loss": 0.3948, "step": 18083 }, { "epoch": 1.6970720720720722, "grad_norm": 1.00762291993193, "learning_rate": 4.726880447267821e-06, "loss": 0.3741, "step": 18084 }, { "epoch": 1.697165915915916, "grad_norm": 0.9570003744355989, "learning_rate": 4.726335298875447e-06, "loss": 0.3583, "step": 18085 }, { "epoch": 1.6972597597597598, "grad_norm": 0.8621167822166261, "learning_rate": 4.725790153746008e-06, "loss": 0.3995, "step": 18086 }, { "epoch": 1.6973536036036037, "grad_norm": 1.0361289169499375, "learning_rate": 4.725245011886006e-06, "loss": 0.4211, "step": 18087 }, { "epoch": 1.6974474474474475, "grad_norm": 1.5460855302347196, "learning_rate": 4.724699873301941e-06, "loss": 0.3845, "step": 18088 }, { "epoch": 1.6975412912912913, "grad_norm": 0.9407411896631295, "learning_rate": 4.724154738000309e-06, "loss": 0.3879, "step": 18089 }, { "epoch": 1.697635135135135, "grad_norm": 0.8080116524233918, "learning_rate": 4.723609605987614e-06, "loss": 0.3784, "step": 18090 }, { "epoch": 1.697728978978979, "grad_norm": 0.8702679037451078, "learning_rate": 4.723064477270355e-06, "loss": 0.4157, "step": 18091 }, { "epoch": 1.6978228228228227, "grad_norm": 0.8110161137813995, "learning_rate": 4.7225193518550295e-06, "loss": 0.3667, "step": 18092 }, { "epoch": 1.6979166666666665, "grad_norm": 2.0885992945440637, "learning_rate": 4.72197422974814e-06, "loss": 0.4247, "step": 18093 }, { "epoch": 1.6980105105105106, "grad_norm": 0.952682234604202, "learning_rate": 4.7214291109561835e-06, "loss": 0.3242, "step": 18094 }, { "epoch": 1.6981043543543544, "grad_norm": 0.8782429074704873, "learning_rate": 4.720883995485659e-06, "loss": 0.337, "step": 18095 }, { "epoch": 1.6981981981981982, "grad_norm": 0.9473585465471851, "learning_rate": 4.72033888334307e-06, "loss": 0.3757, "step": 18096 }, { "epoch": 1.6982920420420422, "grad_norm": 0.9536087734070202, "learning_rate": 4.719793774534913e-06, "loss": 0.452, "step": 18097 }, { "epoch": 1.698385885885886, "grad_norm": 1.623282044813731, "learning_rate": 4.7192486690676855e-06, "loss": 0.4328, "step": 18098 }, { "epoch": 1.6984797297297298, "grad_norm": 0.9672959332254445, "learning_rate": 4.718703566947892e-06, "loss": 0.3735, "step": 18099 }, { "epoch": 1.6985735735735736, "grad_norm": 1.3038185038371188, "learning_rate": 4.718158468182029e-06, "loss": 0.4203, "step": 18100 }, { "epoch": 1.6986674174174174, "grad_norm": 0.8643292193827108, "learning_rate": 4.717613372776592e-06, "loss": 0.3851, "step": 18101 }, { "epoch": 1.6987612612612613, "grad_norm": 1.0016142636499592, "learning_rate": 4.717068280738088e-06, "loss": 0.4106, "step": 18102 }, { "epoch": 1.698855105105105, "grad_norm": 0.9811307895208136, "learning_rate": 4.716523192073011e-06, "loss": 0.4083, "step": 18103 }, { "epoch": 1.6989489489489489, "grad_norm": 1.0221521202450803, "learning_rate": 4.71597810678786e-06, "loss": 0.3948, "step": 18104 }, { "epoch": 1.6990427927927927, "grad_norm": 0.9956553548791622, "learning_rate": 4.715433024889137e-06, "loss": 0.3816, "step": 18105 }, { "epoch": 1.6991366366366365, "grad_norm": 1.0045083128505057, "learning_rate": 4.7148879463833405e-06, "loss": 0.3708, "step": 18106 }, { "epoch": 1.6992304804804805, "grad_norm": 1.2380044691371546, "learning_rate": 4.7143428712769655e-06, "loss": 0.3827, "step": 18107 }, { "epoch": 1.6993243243243243, "grad_norm": 0.9777781320083309, "learning_rate": 4.713797799576516e-06, "loss": 0.4096, "step": 18108 }, { "epoch": 1.6994181681681682, "grad_norm": 1.2915635071195863, "learning_rate": 4.713252731288489e-06, "loss": 0.3695, "step": 18109 }, { "epoch": 1.699512012012012, "grad_norm": 1.0880292294096388, "learning_rate": 4.712707666419382e-06, "loss": 0.4359, "step": 18110 }, { "epoch": 1.699605855855856, "grad_norm": 0.9303188113096159, "learning_rate": 4.712162604975698e-06, "loss": 0.4022, "step": 18111 }, { "epoch": 1.6996996996996998, "grad_norm": 0.9232286744612972, "learning_rate": 4.711617546963932e-06, "loss": 0.4167, "step": 18112 }, { "epoch": 1.6997935435435436, "grad_norm": 0.9735211002542836, "learning_rate": 4.711072492390582e-06, "loss": 0.3728, "step": 18113 }, { "epoch": 1.6998873873873874, "grad_norm": 1.4519974989253586, "learning_rate": 4.710527441262151e-06, "loss": 0.4507, "step": 18114 }, { "epoch": 1.6999812312312312, "grad_norm": 0.9695427154365979, "learning_rate": 4.709982393585135e-06, "loss": 0.381, "step": 18115 }, { "epoch": 1.700075075075075, "grad_norm": 0.9105906467704463, "learning_rate": 4.709437349366033e-06, "loss": 0.3865, "step": 18116 }, { "epoch": 1.7001689189189189, "grad_norm": 0.941037814382789, "learning_rate": 4.708892308611343e-06, "loss": 0.4069, "step": 18117 }, { "epoch": 1.7002627627627627, "grad_norm": 0.8890321082838473, "learning_rate": 4.708347271327565e-06, "loss": 0.4027, "step": 18118 }, { "epoch": 1.7003566066066065, "grad_norm": 1.075097222551123, "learning_rate": 4.707802237521195e-06, "loss": 0.4244, "step": 18119 }, { "epoch": 1.7004504504504503, "grad_norm": 0.9611847301413997, "learning_rate": 4.707257207198735e-06, "loss": 0.4376, "step": 18120 }, { "epoch": 1.7005442942942943, "grad_norm": 0.7928946748540455, "learning_rate": 4.706712180366683e-06, "loss": 0.3712, "step": 18121 }, { "epoch": 1.7006381381381381, "grad_norm": 1.1762106831911296, "learning_rate": 4.7061671570315335e-06, "loss": 0.3952, "step": 18122 }, { "epoch": 1.700731981981982, "grad_norm": 1.2562549785471386, "learning_rate": 4.705622137199789e-06, "loss": 0.3891, "step": 18123 }, { "epoch": 1.700825825825826, "grad_norm": 1.229476694785409, "learning_rate": 4.705077120877947e-06, "loss": 0.3765, "step": 18124 }, { "epoch": 1.7009196696696698, "grad_norm": 1.004147960699987, "learning_rate": 4.704532108072502e-06, "loss": 0.4163, "step": 18125 }, { "epoch": 1.7010135135135136, "grad_norm": 0.8970408252752945, "learning_rate": 4.70398709878996e-06, "loss": 0.4054, "step": 18126 }, { "epoch": 1.7011073573573574, "grad_norm": 1.1059827669714153, "learning_rate": 4.703442093036813e-06, "loss": 0.4312, "step": 18127 }, { "epoch": 1.7012012012012012, "grad_norm": 1.1026199558657084, "learning_rate": 4.702897090819559e-06, "loss": 0.378, "step": 18128 }, { "epoch": 1.701295045045045, "grad_norm": 0.9476271492033014, "learning_rate": 4.7023520921447e-06, "loss": 0.3624, "step": 18129 }, { "epoch": 1.7013888888888888, "grad_norm": 1.0692961729702737, "learning_rate": 4.7018070970187315e-06, "loss": 0.3497, "step": 18130 }, { "epoch": 1.7014827327327327, "grad_norm": 0.8859285212638693, "learning_rate": 4.70126210544815e-06, "loss": 0.3964, "step": 18131 }, { "epoch": 1.7015765765765765, "grad_norm": 0.9139991780302308, "learning_rate": 4.700717117439458e-06, "loss": 0.3865, "step": 18132 }, { "epoch": 1.7016704204204203, "grad_norm": 0.9264290356446576, "learning_rate": 4.700172132999152e-06, "loss": 0.4042, "step": 18133 }, { "epoch": 1.7017642642642643, "grad_norm": 1.04453494171761, "learning_rate": 4.699627152133726e-06, "loss": 0.3964, "step": 18134 }, { "epoch": 1.7018581081081081, "grad_norm": 0.8922431119559947, "learning_rate": 4.699082174849684e-06, "loss": 0.412, "step": 18135 }, { "epoch": 1.701951951951952, "grad_norm": 0.840203897942189, "learning_rate": 4.698537201153519e-06, "loss": 0.3587, "step": 18136 }, { "epoch": 1.702045795795796, "grad_norm": 0.8096217719688452, "learning_rate": 4.697992231051728e-06, "loss": 0.392, "step": 18137 }, { "epoch": 1.7021396396396398, "grad_norm": 1.7663442414684372, "learning_rate": 4.697447264550815e-06, "loss": 0.3579, "step": 18138 }, { "epoch": 1.7022334834834836, "grad_norm": 0.897309682567257, "learning_rate": 4.696902301657273e-06, "loss": 0.3971, "step": 18139 }, { "epoch": 1.7023273273273274, "grad_norm": 1.3106743411893162, "learning_rate": 4.696357342377599e-06, "loss": 0.4303, "step": 18140 }, { "epoch": 1.7024211711711712, "grad_norm": 0.9226528776247327, "learning_rate": 4.695812386718294e-06, "loss": 0.3398, "step": 18141 }, { "epoch": 1.702515015015015, "grad_norm": 1.5606397383406803, "learning_rate": 4.695267434685853e-06, "loss": 0.3903, "step": 18142 }, { "epoch": 1.7026088588588588, "grad_norm": 0.9458926668049779, "learning_rate": 4.694722486286772e-06, "loss": 0.3704, "step": 18143 }, { "epoch": 1.7027027027027026, "grad_norm": 0.8925641645022505, "learning_rate": 4.694177541527554e-06, "loss": 0.3975, "step": 18144 }, { "epoch": 1.7027965465465464, "grad_norm": 1.0342461270538492, "learning_rate": 4.693632600414692e-06, "loss": 0.4069, "step": 18145 }, { "epoch": 1.7028903903903903, "grad_norm": 1.1555817988953652, "learning_rate": 4.6930876629546836e-06, "loss": 0.4087, "step": 18146 }, { "epoch": 1.7029842342342343, "grad_norm": 0.9261128367773037, "learning_rate": 4.6925427291540286e-06, "loss": 0.3646, "step": 18147 }, { "epoch": 1.703078078078078, "grad_norm": 0.9092681464181974, "learning_rate": 4.691997799019223e-06, "loss": 0.3842, "step": 18148 }, { "epoch": 1.703171921921922, "grad_norm": 1.0175414338127842, "learning_rate": 4.6914528725567616e-06, "loss": 0.4002, "step": 18149 }, { "epoch": 1.7032657657657657, "grad_norm": 0.8946090373477144, "learning_rate": 4.690907949773146e-06, "loss": 0.3862, "step": 18150 }, { "epoch": 1.7033596096096097, "grad_norm": 1.9538636830110785, "learning_rate": 4.690363030674871e-06, "loss": 0.4086, "step": 18151 }, { "epoch": 1.7034534534534536, "grad_norm": 0.9858919882373068, "learning_rate": 4.689818115268433e-06, "loss": 0.4325, "step": 18152 }, { "epoch": 1.7035472972972974, "grad_norm": 0.9815713724992331, "learning_rate": 4.689273203560331e-06, "loss": 0.3856, "step": 18153 }, { "epoch": 1.7036411411411412, "grad_norm": 1.1011099949435894, "learning_rate": 4.688728295557063e-06, "loss": 0.3594, "step": 18154 }, { "epoch": 1.703734984984985, "grad_norm": 0.9351096256408837, "learning_rate": 4.6881833912651195e-06, "loss": 0.3828, "step": 18155 }, { "epoch": 1.7038288288288288, "grad_norm": 0.9776447935554469, "learning_rate": 4.6876384906910055e-06, "loss": 0.4339, "step": 18156 }, { "epoch": 1.7039226726726726, "grad_norm": 0.9592016424170585, "learning_rate": 4.687093593841214e-06, "loss": 0.3932, "step": 18157 }, { "epoch": 1.7040165165165164, "grad_norm": 1.361721960272909, "learning_rate": 4.686548700722242e-06, "loss": 0.4169, "step": 18158 }, { "epoch": 1.7041103603603602, "grad_norm": 1.0757436755671759, "learning_rate": 4.686003811340588e-06, "loss": 0.4087, "step": 18159 }, { "epoch": 1.704204204204204, "grad_norm": 0.9847979318138849, "learning_rate": 4.685458925702747e-06, "loss": 0.3941, "step": 18160 }, { "epoch": 1.704298048048048, "grad_norm": 0.9933265533826628, "learning_rate": 4.684914043815214e-06, "loss": 0.3443, "step": 18161 }, { "epoch": 1.7043918918918919, "grad_norm": 0.9659512748941262, "learning_rate": 4.68436916568449e-06, "loss": 0.3871, "step": 18162 }, { "epoch": 1.7044857357357357, "grad_norm": 0.9048194108290976, "learning_rate": 4.68382429131707e-06, "loss": 0.4333, "step": 18163 }, { "epoch": 1.7045795795795797, "grad_norm": 0.9440826752693963, "learning_rate": 4.683279420719449e-06, "loss": 0.4008, "step": 18164 }, { "epoch": 1.7046734234234235, "grad_norm": 0.9735552811781422, "learning_rate": 4.6827345538981255e-06, "loss": 0.3407, "step": 18165 }, { "epoch": 1.7047672672672673, "grad_norm": 0.8816026525803246, "learning_rate": 4.682189690859596e-06, "loss": 0.3951, "step": 18166 }, { "epoch": 1.7048611111111112, "grad_norm": 0.9705593808966748, "learning_rate": 4.681644831610353e-06, "loss": 0.4433, "step": 18167 }, { "epoch": 1.704954954954955, "grad_norm": 0.8679478864863819, "learning_rate": 4.681099976156899e-06, "loss": 0.397, "step": 18168 }, { "epoch": 1.7050487987987988, "grad_norm": 0.9731044544911371, "learning_rate": 4.680555124505728e-06, "loss": 0.4604, "step": 18169 }, { "epoch": 1.7051426426426426, "grad_norm": 0.9041306077225868, "learning_rate": 4.680010276663334e-06, "loss": 0.3974, "step": 18170 }, { "epoch": 1.7052364864864864, "grad_norm": 3.073621490662124, "learning_rate": 4.679465432636216e-06, "loss": 0.4195, "step": 18171 }, { "epoch": 1.7053303303303302, "grad_norm": 1.0236491762262387, "learning_rate": 4.67892059243087e-06, "loss": 0.3892, "step": 18172 }, { "epoch": 1.705424174174174, "grad_norm": 2.864953944245367, "learning_rate": 4.678375756053789e-06, "loss": 0.3459, "step": 18173 }, { "epoch": 1.705518018018018, "grad_norm": 0.8795095612012109, "learning_rate": 4.677830923511474e-06, "loss": 0.4194, "step": 18174 }, { "epoch": 1.7056118618618619, "grad_norm": 1.0074597795595672, "learning_rate": 4.677286094810419e-06, "loss": 0.42, "step": 18175 }, { "epoch": 1.7057057057057057, "grad_norm": 0.9991991525839006, "learning_rate": 4.6767412699571186e-06, "loss": 0.3965, "step": 18176 }, { "epoch": 1.7057995495495497, "grad_norm": 0.9649335068660201, "learning_rate": 4.6761964489580705e-06, "loss": 0.3936, "step": 18177 }, { "epoch": 1.7058933933933935, "grad_norm": 0.9021760566415077, "learning_rate": 4.6756516318197716e-06, "loss": 0.4302, "step": 18178 }, { "epoch": 1.7059872372372373, "grad_norm": 1.0088050468927972, "learning_rate": 4.675106818548713e-06, "loss": 0.3959, "step": 18179 }, { "epoch": 1.7060810810810811, "grad_norm": 0.7749987630416607, "learning_rate": 4.674562009151397e-06, "loss": 0.4029, "step": 18180 }, { "epoch": 1.706174924924925, "grad_norm": 1.0530227443929732, "learning_rate": 4.674017203634317e-06, "loss": 0.4417, "step": 18181 }, { "epoch": 1.7062687687687688, "grad_norm": 0.876112428077403, "learning_rate": 4.6734724020039665e-06, "loss": 0.36, "step": 18182 }, { "epoch": 1.7063626126126126, "grad_norm": 0.9574597406592085, "learning_rate": 4.672927604266844e-06, "loss": 0.403, "step": 18183 }, { "epoch": 1.7064564564564564, "grad_norm": 1.3690647983599327, "learning_rate": 4.672382810429444e-06, "loss": 0.358, "step": 18184 }, { "epoch": 1.7065503003003002, "grad_norm": 0.9675077590371177, "learning_rate": 4.67183802049826e-06, "loss": 0.349, "step": 18185 }, { "epoch": 1.706644144144144, "grad_norm": 0.9756509040096323, "learning_rate": 4.671293234479792e-06, "loss": 0.3973, "step": 18186 }, { "epoch": 1.706737987987988, "grad_norm": 0.9304582544141182, "learning_rate": 4.670748452380535e-06, "loss": 0.4127, "step": 18187 }, { "epoch": 1.7068318318318318, "grad_norm": 0.9784554220310578, "learning_rate": 4.67020367420698e-06, "loss": 0.3857, "step": 18188 }, { "epoch": 1.7069256756756757, "grad_norm": 0.8696753829125603, "learning_rate": 4.669658899965627e-06, "loss": 0.3734, "step": 18189 }, { "epoch": 1.7070195195195195, "grad_norm": 0.8998914554598614, "learning_rate": 4.669114129662971e-06, "loss": 0.384, "step": 18190 }, { "epoch": 1.7071133633633635, "grad_norm": 0.9866902600139459, "learning_rate": 4.668569363305502e-06, "loss": 0.394, "step": 18191 }, { "epoch": 1.7072072072072073, "grad_norm": 1.068365775694743, "learning_rate": 4.668024600899723e-06, "loss": 0.369, "step": 18192 }, { "epoch": 1.7073010510510511, "grad_norm": 0.9655616621827704, "learning_rate": 4.667479842452126e-06, "loss": 0.418, "step": 18193 }, { "epoch": 1.707394894894895, "grad_norm": 1.3785759514146299, "learning_rate": 4.666935087969204e-06, "loss": 0.4423, "step": 18194 }, { "epoch": 1.7074887387387387, "grad_norm": 1.1204957194630805, "learning_rate": 4.666390337457456e-06, "loss": 0.3996, "step": 18195 }, { "epoch": 1.7075825825825826, "grad_norm": 1.2053913173657025, "learning_rate": 4.665845590923375e-06, "loss": 0.371, "step": 18196 }, { "epoch": 1.7076764264264264, "grad_norm": 0.9433195968348516, "learning_rate": 4.665300848373453e-06, "loss": 0.4113, "step": 18197 }, { "epoch": 1.7077702702702702, "grad_norm": 1.011879512125595, "learning_rate": 4.6647561098141915e-06, "loss": 0.3977, "step": 18198 }, { "epoch": 1.707864114114114, "grad_norm": 1.008738890329611, "learning_rate": 4.6642113752520825e-06, "loss": 0.4005, "step": 18199 }, { "epoch": 1.7079579579579578, "grad_norm": 1.019505529241888, "learning_rate": 4.663666644693619e-06, "loss": 0.3558, "step": 18200 }, { "epoch": 1.7080518018018018, "grad_norm": 0.856660376155459, "learning_rate": 4.6631219181453004e-06, "loss": 0.3842, "step": 18201 }, { "epoch": 1.7081456456456456, "grad_norm": 0.9185108941123695, "learning_rate": 4.662577195613618e-06, "loss": 0.34, "step": 18202 }, { "epoch": 1.7082394894894894, "grad_norm": 0.9113650276408389, "learning_rate": 4.6620324771050655e-06, "loss": 0.4247, "step": 18203 }, { "epoch": 1.7083333333333335, "grad_norm": 0.8999036160757036, "learning_rate": 4.661487762626142e-06, "loss": 0.3415, "step": 18204 }, { "epoch": 1.7084271771771773, "grad_norm": 0.9381390165964991, "learning_rate": 4.660943052183338e-06, "loss": 0.4051, "step": 18205 }, { "epoch": 1.708521021021021, "grad_norm": 1.9550301050413363, "learning_rate": 4.660398345783149e-06, "loss": 0.3712, "step": 18206 }, { "epoch": 1.708614864864865, "grad_norm": 0.8143549240119935, "learning_rate": 4.659853643432072e-06, "loss": 0.3305, "step": 18207 }, { "epoch": 1.7087087087087087, "grad_norm": 1.0914570741481142, "learning_rate": 4.659308945136601e-06, "loss": 0.3823, "step": 18208 }, { "epoch": 1.7088025525525525, "grad_norm": 0.8657654681745439, "learning_rate": 4.6587642509032275e-06, "loss": 0.3517, "step": 18209 }, { "epoch": 1.7088963963963963, "grad_norm": 0.9146157548366509, "learning_rate": 4.65821956073845e-06, "loss": 0.3917, "step": 18210 }, { "epoch": 1.7089902402402402, "grad_norm": 0.9717357595423926, "learning_rate": 4.65767487464876e-06, "loss": 0.3635, "step": 18211 }, { "epoch": 1.709084084084084, "grad_norm": 0.8727823864641225, "learning_rate": 4.657130192640651e-06, "loss": 0.3849, "step": 18212 }, { "epoch": 1.7091779279279278, "grad_norm": 1.0878297828315378, "learning_rate": 4.656585514720621e-06, "loss": 0.3726, "step": 18213 }, { "epoch": 1.7092717717717718, "grad_norm": 1.7070084877925082, "learning_rate": 4.656040840895163e-06, "loss": 0.3708, "step": 18214 }, { "epoch": 1.7093656156156156, "grad_norm": 0.8784955958521414, "learning_rate": 4.655496171170769e-06, "loss": 0.3988, "step": 18215 }, { "epoch": 1.7094594594594594, "grad_norm": 0.8530235302831138, "learning_rate": 4.654951505553936e-06, "loss": 0.4211, "step": 18216 }, { "epoch": 1.7095533033033035, "grad_norm": 1.1881002844771034, "learning_rate": 4.654406844051156e-06, "loss": 0.4305, "step": 18217 }, { "epoch": 1.7096471471471473, "grad_norm": 0.9445908279295118, "learning_rate": 4.6538621866689225e-06, "loss": 0.4069, "step": 18218 }, { "epoch": 1.709740990990991, "grad_norm": 1.5247969015153904, "learning_rate": 4.653317533413732e-06, "loss": 0.4007, "step": 18219 }, { "epoch": 1.709834834834835, "grad_norm": 1.006407536047093, "learning_rate": 4.652772884292079e-06, "loss": 0.4152, "step": 18220 }, { "epoch": 1.7099286786786787, "grad_norm": 0.8964280031462374, "learning_rate": 4.652228239310454e-06, "loss": 0.3865, "step": 18221 }, { "epoch": 1.7100225225225225, "grad_norm": 0.7884309239994044, "learning_rate": 4.651683598475353e-06, "loss": 0.3866, "step": 18222 }, { "epoch": 1.7101163663663663, "grad_norm": 0.8793685853199136, "learning_rate": 4.6511389617932705e-06, "loss": 0.3822, "step": 18223 }, { "epoch": 1.7102102102102101, "grad_norm": 0.9465431157744431, "learning_rate": 4.6505943292706966e-06, "loss": 0.4024, "step": 18224 }, { "epoch": 1.710304054054054, "grad_norm": 0.996008764479961, "learning_rate": 4.65004970091413e-06, "loss": 0.4268, "step": 18225 }, { "epoch": 1.7103978978978978, "grad_norm": 0.7600821133379344, "learning_rate": 4.649505076730062e-06, "loss": 0.3185, "step": 18226 }, { "epoch": 1.7104917417417418, "grad_norm": 1.0849857108703587, "learning_rate": 4.648960456724984e-06, "loss": 0.4045, "step": 18227 }, { "epoch": 1.7105855855855856, "grad_norm": 0.860986718296094, "learning_rate": 4.6484158409053945e-06, "loss": 0.3944, "step": 18228 }, { "epoch": 1.7106794294294294, "grad_norm": 0.9178069135376846, "learning_rate": 4.647871229277784e-06, "loss": 0.402, "step": 18229 }, { "epoch": 1.7107732732732732, "grad_norm": 1.1482085615448294, "learning_rate": 4.6473266218486434e-06, "loss": 0.4156, "step": 18230 }, { "epoch": 1.7108671171171173, "grad_norm": 1.062657610721987, "learning_rate": 4.6467820186244716e-06, "loss": 0.4119, "step": 18231 }, { "epoch": 1.710960960960961, "grad_norm": 0.8452093706167275, "learning_rate": 4.646237419611759e-06, "loss": 0.4245, "step": 18232 }, { "epoch": 1.7110548048048049, "grad_norm": 0.8844910676100846, "learning_rate": 4.6456928248169985e-06, "loss": 0.3654, "step": 18233 }, { "epoch": 1.7111486486486487, "grad_norm": 0.9601830719887626, "learning_rate": 4.645148234246685e-06, "loss": 0.4272, "step": 18234 }, { "epoch": 1.7112424924924925, "grad_norm": 0.9589908724056435, "learning_rate": 4.644603647907312e-06, "loss": 0.4003, "step": 18235 }, { "epoch": 1.7113363363363363, "grad_norm": 0.990491728216672, "learning_rate": 4.6440590658053684e-06, "loss": 0.3946, "step": 18236 }, { "epoch": 1.7114301801801801, "grad_norm": 0.8711340337750944, "learning_rate": 4.643514487947352e-06, "loss": 0.3993, "step": 18237 }, { "epoch": 1.711524024024024, "grad_norm": 1.226246454347602, "learning_rate": 4.642969914339755e-06, "loss": 0.4038, "step": 18238 }, { "epoch": 1.7116178678678677, "grad_norm": 1.0067223141956088, "learning_rate": 4.642425344989069e-06, "loss": 0.4129, "step": 18239 }, { "epoch": 1.7117117117117115, "grad_norm": 1.0295520236828433, "learning_rate": 4.641880779901789e-06, "loss": 0.4112, "step": 18240 }, { "epoch": 1.7118055555555556, "grad_norm": 1.0973418809793518, "learning_rate": 4.641336219084405e-06, "loss": 0.4361, "step": 18241 }, { "epoch": 1.7118993993993994, "grad_norm": 0.8704174836303399, "learning_rate": 4.64079166254341e-06, "loss": 0.3532, "step": 18242 }, { "epoch": 1.7119932432432432, "grad_norm": 0.9539369853000672, "learning_rate": 4.640247110285301e-06, "loss": 0.3428, "step": 18243 }, { "epoch": 1.7120870870870872, "grad_norm": 1.0442155752396953, "learning_rate": 4.639702562316568e-06, "loss": 0.3643, "step": 18244 }, { "epoch": 1.712180930930931, "grad_norm": 1.057680071190274, "learning_rate": 4.639158018643702e-06, "loss": 0.4062, "step": 18245 }, { "epoch": 1.7122747747747749, "grad_norm": 1.032176874001252, "learning_rate": 4.6386134792732e-06, "loss": 0.3729, "step": 18246 }, { "epoch": 1.7123686186186187, "grad_norm": 3.875364856238154, "learning_rate": 4.63806894421155e-06, "loss": 0.3981, "step": 18247 }, { "epoch": 1.7124624624624625, "grad_norm": 1.0675695207011044, "learning_rate": 4.637524413465246e-06, "loss": 0.4307, "step": 18248 }, { "epoch": 1.7125563063063063, "grad_norm": 0.8532859991974379, "learning_rate": 4.636979887040782e-06, "loss": 0.4047, "step": 18249 }, { "epoch": 1.71265015015015, "grad_norm": 0.9261736817789252, "learning_rate": 4.636435364944651e-06, "loss": 0.3808, "step": 18250 }, { "epoch": 1.712743993993994, "grad_norm": 0.9638129806621907, "learning_rate": 4.635890847183342e-06, "loss": 0.3955, "step": 18251 }, { "epoch": 1.7128378378378377, "grad_norm": 1.1521750953800238, "learning_rate": 4.63534633376335e-06, "loss": 0.349, "step": 18252 }, { "epoch": 1.7129316816816815, "grad_norm": 0.9740591611855387, "learning_rate": 4.634801824691168e-06, "loss": 0.4059, "step": 18253 }, { "epoch": 1.7130255255255256, "grad_norm": 0.9839025309640765, "learning_rate": 4.634257319973283e-06, "loss": 0.4028, "step": 18254 }, { "epoch": 1.7131193693693694, "grad_norm": 0.8380838818313288, "learning_rate": 4.633712819616194e-06, "loss": 0.3723, "step": 18255 }, { "epoch": 1.7132132132132132, "grad_norm": 0.9403076891618181, "learning_rate": 4.63316832362639e-06, "loss": 0.4196, "step": 18256 }, { "epoch": 1.7133070570570572, "grad_norm": 0.9521475766085109, "learning_rate": 4.632623832010361e-06, "loss": 0.3679, "step": 18257 }, { "epoch": 1.713400900900901, "grad_norm": 0.9738867293372528, "learning_rate": 4.6320793447746045e-06, "loss": 0.3854, "step": 18258 }, { "epoch": 1.7134947447447448, "grad_norm": 1.3714576392221505, "learning_rate": 4.631534861925608e-06, "loss": 0.4016, "step": 18259 }, { "epoch": 1.7135885885885886, "grad_norm": 1.067865916793947, "learning_rate": 4.6309903834698625e-06, "loss": 0.4046, "step": 18260 }, { "epoch": 1.7136824324324325, "grad_norm": 0.8148741491799425, "learning_rate": 4.630445909413865e-06, "loss": 0.3879, "step": 18261 }, { "epoch": 1.7137762762762763, "grad_norm": 0.8648760273081528, "learning_rate": 4.629901439764104e-06, "loss": 0.3964, "step": 18262 }, { "epoch": 1.71387012012012, "grad_norm": 0.902105545680815, "learning_rate": 4.6293569745270706e-06, "loss": 0.4009, "step": 18263 }, { "epoch": 1.7139639639639639, "grad_norm": 0.853257842245149, "learning_rate": 4.628812513709258e-06, "loss": 0.3785, "step": 18264 }, { "epoch": 1.7140578078078077, "grad_norm": 0.849124303581189, "learning_rate": 4.628268057317159e-06, "loss": 0.3746, "step": 18265 }, { "epoch": 1.7141516516516515, "grad_norm": 0.9870303687642769, "learning_rate": 4.6277236053572604e-06, "loss": 0.4033, "step": 18266 }, { "epoch": 1.7142454954954955, "grad_norm": 1.0264117663109271, "learning_rate": 4.62717915783606e-06, "loss": 0.4207, "step": 18267 }, { "epoch": 1.7143393393393394, "grad_norm": 1.5858340418106536, "learning_rate": 4.626634714760046e-06, "loss": 0.4034, "step": 18268 }, { "epoch": 1.7144331831831832, "grad_norm": 1.0940863634560172, "learning_rate": 4.6260902761357094e-06, "loss": 0.3924, "step": 18269 }, { "epoch": 1.714527027027027, "grad_norm": 0.9610282622835581, "learning_rate": 4.6255458419695425e-06, "loss": 0.4247, "step": 18270 }, { "epoch": 1.714620870870871, "grad_norm": 0.8399894146031804, "learning_rate": 4.625001412268038e-06, "loss": 0.3772, "step": 18271 }, { "epoch": 1.7147147147147148, "grad_norm": 1.0751416884868727, "learning_rate": 4.624456987037682e-06, "loss": 0.4334, "step": 18272 }, { "epoch": 1.7148085585585586, "grad_norm": 2.2041075326047537, "learning_rate": 4.623912566284973e-06, "loss": 0.3928, "step": 18273 }, { "epoch": 1.7149024024024024, "grad_norm": 0.8837259507731395, "learning_rate": 4.623368150016398e-06, "loss": 0.3685, "step": 18274 }, { "epoch": 1.7149962462462462, "grad_norm": 0.8623166927633712, "learning_rate": 4.622823738238449e-06, "loss": 0.4242, "step": 18275 }, { "epoch": 1.71509009009009, "grad_norm": 1.0923243290821703, "learning_rate": 4.622279330957617e-06, "loss": 0.3934, "step": 18276 }, { "epoch": 1.7151839339339339, "grad_norm": 0.9526232793352075, "learning_rate": 4.621734928180393e-06, "loss": 0.3779, "step": 18277 }, { "epoch": 1.7152777777777777, "grad_norm": 0.9004097583547606, "learning_rate": 4.6211905299132664e-06, "loss": 0.3851, "step": 18278 }, { "epoch": 1.7153716216216215, "grad_norm": 0.9661958146560816, "learning_rate": 4.620646136162731e-06, "loss": 0.416, "step": 18279 }, { "epoch": 1.7154654654654653, "grad_norm": 0.99936647422345, "learning_rate": 4.620101746935277e-06, "loss": 0.409, "step": 18280 }, { "epoch": 1.7155593093093093, "grad_norm": 0.954326111540662, "learning_rate": 4.619557362237394e-06, "loss": 0.3765, "step": 18281 }, { "epoch": 1.7156531531531531, "grad_norm": 0.9698253193459752, "learning_rate": 4.619012982075574e-06, "loss": 0.3634, "step": 18282 }, { "epoch": 1.715746996996997, "grad_norm": 0.9745782984799288, "learning_rate": 4.618468606456306e-06, "loss": 0.4095, "step": 18283 }, { "epoch": 1.715840840840841, "grad_norm": 0.8133540590690157, "learning_rate": 4.617924235386081e-06, "loss": 0.3763, "step": 18284 }, { "epoch": 1.7159346846846848, "grad_norm": 1.2850550093743018, "learning_rate": 4.617379868871391e-06, "loss": 0.3821, "step": 18285 }, { "epoch": 1.7160285285285286, "grad_norm": 0.8873480160776438, "learning_rate": 4.616835506918727e-06, "loss": 0.3985, "step": 18286 }, { "epoch": 1.7161223723723724, "grad_norm": 0.9604390852802173, "learning_rate": 4.616291149534578e-06, "loss": 0.4183, "step": 18287 }, { "epoch": 1.7162162162162162, "grad_norm": 0.8460922885174392, "learning_rate": 4.615746796725434e-06, "loss": 0.3983, "step": 18288 }, { "epoch": 1.71631006006006, "grad_norm": 0.8334614177134858, "learning_rate": 4.615202448497787e-06, "loss": 0.3723, "step": 18289 }, { "epoch": 1.7164039039039038, "grad_norm": 0.9719819595904577, "learning_rate": 4.6146581048581245e-06, "loss": 0.3973, "step": 18290 }, { "epoch": 1.7164977477477477, "grad_norm": 1.0336020795262308, "learning_rate": 4.614113765812941e-06, "loss": 0.4224, "step": 18291 }, { "epoch": 1.7165915915915915, "grad_norm": 0.8307116582081342, "learning_rate": 4.613569431368724e-06, "loss": 0.369, "step": 18292 }, { "epoch": 1.7166854354354353, "grad_norm": 1.2013757943986463, "learning_rate": 4.613025101531963e-06, "loss": 0.3614, "step": 18293 }, { "epoch": 1.7167792792792793, "grad_norm": 1.010213055506771, "learning_rate": 4.612480776309151e-06, "loss": 0.4096, "step": 18294 }, { "epoch": 1.7168731231231231, "grad_norm": 1.049388613910875, "learning_rate": 4.611936455706776e-06, "loss": 0.433, "step": 18295 }, { "epoch": 1.716966966966967, "grad_norm": 0.8368525914610817, "learning_rate": 4.611392139731326e-06, "loss": 0.3774, "step": 18296 }, { "epoch": 1.717060810810811, "grad_norm": 0.8593261121204513, "learning_rate": 4.610847828389296e-06, "loss": 0.4015, "step": 18297 }, { "epoch": 1.7171546546546548, "grad_norm": 1.023651469219808, "learning_rate": 4.6103035216871725e-06, "loss": 0.4209, "step": 18298 }, { "epoch": 1.7172484984984986, "grad_norm": 1.007017795413685, "learning_rate": 4.609759219631445e-06, "loss": 0.3962, "step": 18299 }, { "epoch": 1.7173423423423424, "grad_norm": 0.9871976622305435, "learning_rate": 4.609214922228605e-06, "loss": 0.4031, "step": 18300 }, { "epoch": 1.7174361861861862, "grad_norm": 1.5534067412164725, "learning_rate": 4.608670629485142e-06, "loss": 0.3939, "step": 18301 }, { "epoch": 1.71753003003003, "grad_norm": 1.036450871047892, "learning_rate": 4.608126341407544e-06, "loss": 0.3904, "step": 18302 }, { "epoch": 1.7176238738738738, "grad_norm": 0.9112339748214607, "learning_rate": 4.607582058002302e-06, "loss": 0.4192, "step": 18303 }, { "epoch": 1.7177177177177176, "grad_norm": 0.9467522482614844, "learning_rate": 4.607037779275907e-06, "loss": 0.4288, "step": 18304 }, { "epoch": 1.7178115615615615, "grad_norm": 0.8501127754961112, "learning_rate": 4.6064935052348445e-06, "loss": 0.3678, "step": 18305 }, { "epoch": 1.7179054054054053, "grad_norm": 1.7746674436828598, "learning_rate": 4.605949235885607e-06, "loss": 0.3908, "step": 18306 }, { "epoch": 1.7179992492492493, "grad_norm": 1.0463383279029315, "learning_rate": 4.6054049712346835e-06, "loss": 0.4267, "step": 18307 }, { "epoch": 1.718093093093093, "grad_norm": 1.0308712754437623, "learning_rate": 4.604860711288561e-06, "loss": 0.3594, "step": 18308 }, { "epoch": 1.718186936936937, "grad_norm": 0.9577213776888182, "learning_rate": 4.604316456053734e-06, "loss": 0.4306, "step": 18309 }, { "epoch": 1.7182807807807807, "grad_norm": 0.9613823197191689, "learning_rate": 4.6037722055366874e-06, "loss": 0.4239, "step": 18310 }, { "epoch": 1.7183746246246248, "grad_norm": 1.3663970906021643, "learning_rate": 4.60322795974391e-06, "loss": 0.3967, "step": 18311 }, { "epoch": 1.7184684684684686, "grad_norm": 1.0153294042334118, "learning_rate": 4.602683718681894e-06, "loss": 0.3989, "step": 18312 }, { "epoch": 1.7185623123123124, "grad_norm": 0.9893669362941275, "learning_rate": 4.602139482357126e-06, "loss": 0.4489, "step": 18313 }, { "epoch": 1.7186561561561562, "grad_norm": 0.8650567593104098, "learning_rate": 4.601595250776094e-06, "loss": 0.4047, "step": 18314 }, { "epoch": 1.71875, "grad_norm": 0.990801236538371, "learning_rate": 4.601051023945292e-06, "loss": 0.3058, "step": 18315 }, { "epoch": 1.7188438438438438, "grad_norm": 1.1659998392621136, "learning_rate": 4.6005068018712046e-06, "loss": 0.4271, "step": 18316 }, { "epoch": 1.7189376876876876, "grad_norm": 0.9538605773802404, "learning_rate": 4.5999625845603206e-06, "loss": 0.3741, "step": 18317 }, { "epoch": 1.7190315315315314, "grad_norm": 1.32880251007786, "learning_rate": 4.599418372019131e-06, "loss": 0.4347, "step": 18318 }, { "epoch": 1.7191253753753752, "grad_norm": 0.945365332830014, "learning_rate": 4.5988741642541234e-06, "loss": 0.4235, "step": 18319 }, { "epoch": 1.7192192192192193, "grad_norm": 0.9846520010896749, "learning_rate": 4.5983299612717835e-06, "loss": 0.3923, "step": 18320 }, { "epoch": 1.719313063063063, "grad_norm": 1.066877028125822, "learning_rate": 4.597785763078605e-06, "loss": 0.3774, "step": 18321 }, { "epoch": 1.719406906906907, "grad_norm": 0.8581617820425976, "learning_rate": 4.597241569681075e-06, "loss": 0.3499, "step": 18322 }, { "epoch": 1.7195007507507507, "grad_norm": 1.4436935630707797, "learning_rate": 4.596697381085678e-06, "loss": 0.3777, "step": 18323 }, { "epoch": 1.7195945945945947, "grad_norm": 1.18435574251254, "learning_rate": 4.596153197298911e-06, "loss": 0.3455, "step": 18324 }, { "epoch": 1.7196884384384385, "grad_norm": 0.9163488320859406, "learning_rate": 4.595609018327254e-06, "loss": 0.399, "step": 18325 }, { "epoch": 1.7197822822822824, "grad_norm": 0.9033094086051754, "learning_rate": 4.595064844177196e-06, "loss": 0.4111, "step": 18326 }, { "epoch": 1.7198761261261262, "grad_norm": 0.9377039319620776, "learning_rate": 4.59452067485523e-06, "loss": 0.3539, "step": 18327 }, { "epoch": 1.71996996996997, "grad_norm": 1.0010481125280506, "learning_rate": 4.593976510367842e-06, "loss": 0.4086, "step": 18328 }, { "epoch": 1.7200638138138138, "grad_norm": 0.9465865127437421, "learning_rate": 4.593432350721518e-06, "loss": 0.3198, "step": 18329 }, { "epoch": 1.7201576576576576, "grad_norm": 0.9877971168055755, "learning_rate": 4.5928881959227485e-06, "loss": 0.4628, "step": 18330 }, { "epoch": 1.7202515015015014, "grad_norm": 0.949389500311267, "learning_rate": 4.592344045978024e-06, "loss": 0.4248, "step": 18331 }, { "epoch": 1.7203453453453452, "grad_norm": 0.9179454051720974, "learning_rate": 4.5917999008938255e-06, "loss": 0.3876, "step": 18332 }, { "epoch": 1.720439189189189, "grad_norm": 1.7313450430365842, "learning_rate": 4.591255760676646e-06, "loss": 0.3997, "step": 18333 }, { "epoch": 1.720533033033033, "grad_norm": 0.9640599904455519, "learning_rate": 4.590711625332973e-06, "loss": 0.4157, "step": 18334 }, { "epoch": 1.7206268768768769, "grad_norm": 0.8804085379853822, "learning_rate": 4.590167494869292e-06, "loss": 0.4122, "step": 18335 }, { "epoch": 1.7207207207207207, "grad_norm": 0.8827582311966038, "learning_rate": 4.589623369292093e-06, "loss": 0.3927, "step": 18336 }, { "epoch": 1.7208145645645647, "grad_norm": 1.031167278099768, "learning_rate": 4.589079248607863e-06, "loss": 0.3865, "step": 18337 }, { "epoch": 1.7209084084084085, "grad_norm": 1.317454774530814, "learning_rate": 4.588535132823089e-06, "loss": 0.3709, "step": 18338 }, { "epoch": 1.7210022522522523, "grad_norm": 0.9801040726512931, "learning_rate": 4.58799102194426e-06, "loss": 0.3759, "step": 18339 }, { "epoch": 1.7210960960960962, "grad_norm": 0.9422803180214571, "learning_rate": 4.587446915977863e-06, "loss": 0.4081, "step": 18340 }, { "epoch": 1.72118993993994, "grad_norm": 1.0827585104254809, "learning_rate": 4.586902814930382e-06, "loss": 0.4276, "step": 18341 }, { "epoch": 1.7212837837837838, "grad_norm": 0.962292516799236, "learning_rate": 4.58635871880831e-06, "loss": 0.3846, "step": 18342 }, { "epoch": 1.7213776276276276, "grad_norm": 0.9854385706607522, "learning_rate": 4.585814627618132e-06, "loss": 0.4554, "step": 18343 }, { "epoch": 1.7214714714714714, "grad_norm": 1.0567724160689702, "learning_rate": 4.585270541366334e-06, "loss": 0.3766, "step": 18344 }, { "epoch": 1.7215653153153152, "grad_norm": 0.966228932418933, "learning_rate": 4.5847264600594034e-06, "loss": 0.3711, "step": 18345 }, { "epoch": 1.721659159159159, "grad_norm": 0.9799839816484726, "learning_rate": 4.5841823837038296e-06, "loss": 0.4556, "step": 18346 }, { "epoch": 1.721753003003003, "grad_norm": 0.8189168336455055, "learning_rate": 4.583638312306095e-06, "loss": 0.3802, "step": 18347 }, { "epoch": 1.7218468468468469, "grad_norm": 0.9644525648788584, "learning_rate": 4.583094245872693e-06, "loss": 0.426, "step": 18348 }, { "epoch": 1.7219406906906907, "grad_norm": 0.9544163865205711, "learning_rate": 4.582550184410108e-06, "loss": 0.3499, "step": 18349 }, { "epoch": 1.7220345345345347, "grad_norm": 1.0470728884057976, "learning_rate": 4.582006127924824e-06, "loss": 0.3471, "step": 18350 }, { "epoch": 1.7221283783783785, "grad_norm": 0.8625117792246281, "learning_rate": 4.581462076423331e-06, "loss": 0.3896, "step": 18351 }, { "epoch": 1.7222222222222223, "grad_norm": 0.8936181911576144, "learning_rate": 4.580918029912116e-06, "loss": 0.3529, "step": 18352 }, { "epoch": 1.7223160660660661, "grad_norm": 0.8837638721140039, "learning_rate": 4.580373988397662e-06, "loss": 0.3723, "step": 18353 }, { "epoch": 1.72240990990991, "grad_norm": 0.9186479316637811, "learning_rate": 4.57982995188646e-06, "loss": 0.3709, "step": 18354 }, { "epoch": 1.7225037537537538, "grad_norm": 1.138505054675403, "learning_rate": 4.579285920384995e-06, "loss": 0.4177, "step": 18355 }, { "epoch": 1.7225975975975976, "grad_norm": 1.0093915168263845, "learning_rate": 4.578741893899753e-06, "loss": 0.4042, "step": 18356 }, { "epoch": 1.7226914414414414, "grad_norm": 1.2438027598896175, "learning_rate": 4.578197872437222e-06, "loss": 0.4283, "step": 18357 }, { "epoch": 1.7227852852852852, "grad_norm": 1.0011800845285965, "learning_rate": 4.577653856003888e-06, "loss": 0.4229, "step": 18358 }, { "epoch": 1.722879129129129, "grad_norm": 0.8392847118667472, "learning_rate": 4.577109844606234e-06, "loss": 0.3468, "step": 18359 }, { "epoch": 1.722972972972973, "grad_norm": 1.1882899579727246, "learning_rate": 4.576565838250751e-06, "loss": 0.3917, "step": 18360 }, { "epoch": 1.7230668168168168, "grad_norm": 0.9868561312839008, "learning_rate": 4.576021836943923e-06, "loss": 0.3926, "step": 18361 }, { "epoch": 1.7231606606606606, "grad_norm": 1.0078611085184181, "learning_rate": 4.575477840692236e-06, "loss": 0.4056, "step": 18362 }, { "epoch": 1.7232545045045045, "grad_norm": 0.882586808360309, "learning_rate": 4.574933849502177e-06, "loss": 0.4366, "step": 18363 }, { "epoch": 1.7233483483483485, "grad_norm": 1.6674064755005986, "learning_rate": 4.574389863380232e-06, "loss": 0.4461, "step": 18364 }, { "epoch": 1.7234421921921923, "grad_norm": 0.9203457165935411, "learning_rate": 4.573845882332886e-06, "loss": 0.4355, "step": 18365 }, { "epoch": 1.7235360360360361, "grad_norm": 1.0734429692529985, "learning_rate": 4.573301906366627e-06, "loss": 0.4154, "step": 18366 }, { "epoch": 1.72362987987988, "grad_norm": 0.9497321630324644, "learning_rate": 4.572757935487939e-06, "loss": 0.3889, "step": 18367 }, { "epoch": 1.7237237237237237, "grad_norm": 0.8798359268357178, "learning_rate": 4.572213969703308e-06, "loss": 0.3908, "step": 18368 }, { "epoch": 1.7238175675675675, "grad_norm": 1.1521853740828174, "learning_rate": 4.571670009019221e-06, "loss": 0.4028, "step": 18369 }, { "epoch": 1.7239114114114114, "grad_norm": 1.0069215790117558, "learning_rate": 4.571126053442163e-06, "loss": 0.3715, "step": 18370 }, { "epoch": 1.7240052552552552, "grad_norm": 0.9864468729324046, "learning_rate": 4.5705821029786165e-06, "loss": 0.3887, "step": 18371 }, { "epoch": 1.724099099099099, "grad_norm": 1.0085103492137464, "learning_rate": 4.570038157635074e-06, "loss": 0.4047, "step": 18372 }, { "epoch": 1.7241929429429428, "grad_norm": 0.8626334190878959, "learning_rate": 4.5694942174180156e-06, "loss": 0.3707, "step": 18373 }, { "epoch": 1.7242867867867868, "grad_norm": 1.8313364246410309, "learning_rate": 4.568950282333928e-06, "loss": 0.3829, "step": 18374 }, { "epoch": 1.7243806306306306, "grad_norm": 1.0057540232175983, "learning_rate": 4.568406352389298e-06, "loss": 0.4109, "step": 18375 }, { "epoch": 1.7244744744744744, "grad_norm": 1.1600821155063508, "learning_rate": 4.56786242759061e-06, "loss": 0.408, "step": 18376 }, { "epoch": 1.7245683183183185, "grad_norm": 0.8825772018029717, "learning_rate": 4.567318507944346e-06, "loss": 0.403, "step": 18377 }, { "epoch": 1.7246621621621623, "grad_norm": 1.0163726728817672, "learning_rate": 4.566774593456998e-06, "loss": 0.4079, "step": 18378 }, { "epoch": 1.724756006006006, "grad_norm": 1.0592100153503958, "learning_rate": 4.566230684135046e-06, "loss": 0.3891, "step": 18379 }, { "epoch": 1.72484984984985, "grad_norm": 0.9753850636333664, "learning_rate": 4.565686779984976e-06, "loss": 0.3741, "step": 18380 }, { "epoch": 1.7249436936936937, "grad_norm": 1.0238915028707756, "learning_rate": 4.565142881013275e-06, "loss": 0.3995, "step": 18381 }, { "epoch": 1.7250375375375375, "grad_norm": 0.866791031054406, "learning_rate": 4.564598987226427e-06, "loss": 0.3938, "step": 18382 }, { "epoch": 1.7251313813813813, "grad_norm": 1.1355682487945273, "learning_rate": 4.564055098630914e-06, "loss": 0.3615, "step": 18383 }, { "epoch": 1.7252252252252251, "grad_norm": 0.9672605961702134, "learning_rate": 4.563511215233226e-06, "loss": 0.3976, "step": 18384 }, { "epoch": 1.725319069069069, "grad_norm": 1.1064406294525932, "learning_rate": 4.562967337039844e-06, "loss": 0.3389, "step": 18385 }, { "epoch": 1.7254129129129128, "grad_norm": 0.8323780800420895, "learning_rate": 4.562423464057254e-06, "loss": 0.3368, "step": 18386 }, { "epoch": 1.7255067567567568, "grad_norm": 0.9725418821555875, "learning_rate": 4.5618795962919405e-06, "loss": 0.422, "step": 18387 }, { "epoch": 1.7256006006006006, "grad_norm": 1.3273078588412002, "learning_rate": 4.561335733750389e-06, "loss": 0.3608, "step": 18388 }, { "epoch": 1.7256944444444444, "grad_norm": 0.8868339933885109, "learning_rate": 4.56079187643908e-06, "loss": 0.443, "step": 18389 }, { "epoch": 1.7257882882882885, "grad_norm": 0.8674736024305919, "learning_rate": 4.560248024364504e-06, "loss": 0.3803, "step": 18390 }, { "epoch": 1.7258821321321323, "grad_norm": 1.0160608531846818, "learning_rate": 4.559704177533143e-06, "loss": 0.4573, "step": 18391 }, { "epoch": 1.725975975975976, "grad_norm": 1.0174564030338644, "learning_rate": 4.559160335951479e-06, "loss": 0.3746, "step": 18392 }, { "epoch": 1.7260698198198199, "grad_norm": 0.8717772291582168, "learning_rate": 4.558616499626e-06, "loss": 0.4111, "step": 18393 }, { "epoch": 1.7261636636636637, "grad_norm": 0.868923266641027, "learning_rate": 4.558072668563188e-06, "loss": 0.4069, "step": 18394 }, { "epoch": 1.7262575075075075, "grad_norm": 0.9392190193260975, "learning_rate": 4.557528842769525e-06, "loss": 0.3904, "step": 18395 }, { "epoch": 1.7263513513513513, "grad_norm": 1.067920580188786, "learning_rate": 4.556985022251501e-06, "loss": 0.424, "step": 18396 }, { "epoch": 1.7264451951951951, "grad_norm": 1.0665625342823029, "learning_rate": 4.556441207015596e-06, "loss": 0.4127, "step": 18397 }, { "epoch": 1.726539039039039, "grad_norm": 1.1286259675231052, "learning_rate": 4.555897397068293e-06, "loss": 0.4135, "step": 18398 }, { "epoch": 1.7266328828828827, "grad_norm": 0.9763304507746201, "learning_rate": 4.55535359241608e-06, "loss": 0.4046, "step": 18399 }, { "epoch": 1.7267267267267268, "grad_norm": 0.9614231984265501, "learning_rate": 4.554809793065438e-06, "loss": 0.3808, "step": 18400 }, { "epoch": 1.7268205705705706, "grad_norm": 1.038267151368231, "learning_rate": 4.5542659990228485e-06, "loss": 0.3835, "step": 18401 }, { "epoch": 1.7269144144144144, "grad_norm": 1.132286755333119, "learning_rate": 4.5537222102948005e-06, "loss": 0.4012, "step": 18402 }, { "epoch": 1.7270082582582582, "grad_norm": 0.8818769824297923, "learning_rate": 4.553178426887775e-06, "loss": 0.4094, "step": 18403 }, { "epoch": 1.7271021021021022, "grad_norm": 0.9406817799647854, "learning_rate": 4.5526346488082555e-06, "loss": 0.3826, "step": 18404 }, { "epoch": 1.727195945945946, "grad_norm": 1.1311819505047098, "learning_rate": 4.552090876062726e-06, "loss": 0.3671, "step": 18405 }, { "epoch": 1.7272897897897899, "grad_norm": 0.9537591506822022, "learning_rate": 4.551547108657669e-06, "loss": 0.391, "step": 18406 }, { "epoch": 1.7273836336336337, "grad_norm": 0.8853361113280981, "learning_rate": 4.551003346599568e-06, "loss": 0.353, "step": 18407 }, { "epoch": 1.7274774774774775, "grad_norm": 0.9930295575545511, "learning_rate": 4.550459589894908e-06, "loss": 0.3892, "step": 18408 }, { "epoch": 1.7275713213213213, "grad_norm": 0.9657716046973369, "learning_rate": 4.549915838550172e-06, "loss": 0.4193, "step": 18409 }, { "epoch": 1.727665165165165, "grad_norm": 0.9288587503390212, "learning_rate": 4.54937209257184e-06, "loss": 0.4276, "step": 18410 }, { "epoch": 1.727759009009009, "grad_norm": 1.0017174018541326, "learning_rate": 4.5488283519664e-06, "loss": 0.3589, "step": 18411 }, { "epoch": 1.7278528528528527, "grad_norm": 0.9033031748537297, "learning_rate": 4.5482846167403324e-06, "loss": 0.3858, "step": 18412 }, { "epoch": 1.7279466966966965, "grad_norm": 1.0890595710681141, "learning_rate": 4.547740886900117e-06, "loss": 0.4297, "step": 18413 }, { "epoch": 1.7280405405405406, "grad_norm": 1.6201426904529548, "learning_rate": 4.547197162452244e-06, "loss": 0.4267, "step": 18414 }, { "epoch": 1.7281343843843844, "grad_norm": 1.0988590094234003, "learning_rate": 4.5466534434031926e-06, "loss": 0.3843, "step": 18415 }, { "epoch": 1.7282282282282282, "grad_norm": 1.0613502039065628, "learning_rate": 4.546109729759443e-06, "loss": 0.3767, "step": 18416 }, { "epoch": 1.7283220720720722, "grad_norm": 0.9148710394742975, "learning_rate": 4.5455660215274834e-06, "loss": 0.4252, "step": 18417 }, { "epoch": 1.728415915915916, "grad_norm": 0.942486432430085, "learning_rate": 4.5450223187137925e-06, "loss": 0.4162, "step": 18418 }, { "epoch": 1.7285097597597598, "grad_norm": 0.8899647615944923, "learning_rate": 4.544478621324852e-06, "loss": 0.3609, "step": 18419 }, { "epoch": 1.7286036036036037, "grad_norm": 1.0884951736500126, "learning_rate": 4.543934929367149e-06, "loss": 0.4093, "step": 18420 }, { "epoch": 1.7286974474474475, "grad_norm": 0.9351833932531657, "learning_rate": 4.543391242847162e-06, "loss": 0.3572, "step": 18421 }, { "epoch": 1.7287912912912913, "grad_norm": 1.099001329268018, "learning_rate": 4.542847561771376e-06, "loss": 0.4141, "step": 18422 }, { "epoch": 1.728885135135135, "grad_norm": 0.9047403570649118, "learning_rate": 4.542303886146273e-06, "loss": 0.3877, "step": 18423 }, { "epoch": 1.728978978978979, "grad_norm": 0.7725745035027165, "learning_rate": 4.541760215978334e-06, "loss": 0.3179, "step": 18424 }, { "epoch": 1.7290728228228227, "grad_norm": 0.9506722027569565, "learning_rate": 4.54121655127404e-06, "loss": 0.3657, "step": 18425 }, { "epoch": 1.7291666666666665, "grad_norm": 1.4078661769613656, "learning_rate": 4.5406728920398765e-06, "loss": 0.4421, "step": 18426 }, { "epoch": 1.7292605105105106, "grad_norm": 0.872156668885557, "learning_rate": 4.540129238282325e-06, "loss": 0.3984, "step": 18427 }, { "epoch": 1.7293543543543544, "grad_norm": 0.888383218261407, "learning_rate": 4.539585590007866e-06, "loss": 0.4105, "step": 18428 }, { "epoch": 1.7294481981981982, "grad_norm": 0.8618666440850149, "learning_rate": 4.539041947222982e-06, "loss": 0.3989, "step": 18429 }, { "epoch": 1.7295420420420422, "grad_norm": 1.071907975665646, "learning_rate": 4.538498309934157e-06, "loss": 0.3977, "step": 18430 }, { "epoch": 1.729635885885886, "grad_norm": 0.9684507620301946, "learning_rate": 4.537954678147867e-06, "loss": 0.4317, "step": 18431 }, { "epoch": 1.7297297297297298, "grad_norm": 1.0440106761039694, "learning_rate": 4.5374110518706e-06, "loss": 0.4219, "step": 18432 }, { "epoch": 1.7298235735735736, "grad_norm": 0.9209261667600881, "learning_rate": 4.536867431108836e-06, "loss": 0.3681, "step": 18433 }, { "epoch": 1.7299174174174174, "grad_norm": 0.9659937969491977, "learning_rate": 4.536323815869055e-06, "loss": 0.4095, "step": 18434 }, { "epoch": 1.7300112612612613, "grad_norm": 0.9912397498336424, "learning_rate": 4.535780206157741e-06, "loss": 0.3738, "step": 18435 }, { "epoch": 1.730105105105105, "grad_norm": 0.9023152113585869, "learning_rate": 4.535236601981374e-06, "loss": 0.4382, "step": 18436 }, { "epoch": 1.7301989489489489, "grad_norm": 1.1291658727063978, "learning_rate": 4.534693003346434e-06, "loss": 0.4251, "step": 18437 }, { "epoch": 1.7302927927927927, "grad_norm": 1.0950973163086875, "learning_rate": 4.534149410259405e-06, "loss": 0.388, "step": 18438 }, { "epoch": 1.7303866366366365, "grad_norm": 1.1754749548190673, "learning_rate": 4.533605822726769e-06, "loss": 0.3821, "step": 18439 }, { "epoch": 1.7304804804804805, "grad_norm": 0.8613475675477589, "learning_rate": 4.533062240755004e-06, "loss": 0.395, "step": 18440 }, { "epoch": 1.7305743243243243, "grad_norm": 0.9617219527896405, "learning_rate": 4.532518664350593e-06, "loss": 0.4019, "step": 18441 }, { "epoch": 1.7306681681681682, "grad_norm": 1.0120617005657861, "learning_rate": 4.531975093520018e-06, "loss": 0.4177, "step": 18442 }, { "epoch": 1.730762012012012, "grad_norm": 0.8602103138100168, "learning_rate": 4.531431528269757e-06, "loss": 0.3784, "step": 18443 }, { "epoch": 1.730855855855856, "grad_norm": 1.0556823537163151, "learning_rate": 4.530887968606295e-06, "loss": 0.4256, "step": 18444 }, { "epoch": 1.7309496996996998, "grad_norm": 0.9114352378173889, "learning_rate": 4.53034441453611e-06, "loss": 0.3854, "step": 18445 }, { "epoch": 1.7310435435435436, "grad_norm": 1.0115492385631324, "learning_rate": 4.529800866065683e-06, "loss": 0.436, "step": 18446 }, { "epoch": 1.7311373873873874, "grad_norm": 0.9242230075421447, "learning_rate": 4.529257323201498e-06, "loss": 0.4221, "step": 18447 }, { "epoch": 1.7312312312312312, "grad_norm": 1.2059630169619757, "learning_rate": 4.528713785950033e-06, "loss": 0.4132, "step": 18448 }, { "epoch": 1.731325075075075, "grad_norm": 0.8338077039445194, "learning_rate": 4.528170254317766e-06, "loss": 0.393, "step": 18449 }, { "epoch": 1.7314189189189189, "grad_norm": 0.7814615973326822, "learning_rate": 4.527626728311183e-06, "loss": 0.3887, "step": 18450 }, { "epoch": 1.7315127627627627, "grad_norm": 0.9136655821892945, "learning_rate": 4.5270832079367625e-06, "loss": 0.3993, "step": 18451 }, { "epoch": 1.7316066066066065, "grad_norm": 0.9492733801431524, "learning_rate": 4.526539693200982e-06, "loss": 0.3684, "step": 18452 }, { "epoch": 1.7317004504504503, "grad_norm": 0.8751360772073165, "learning_rate": 4.525996184110327e-06, "loss": 0.3676, "step": 18453 }, { "epoch": 1.7317942942942943, "grad_norm": 0.9671283301102458, "learning_rate": 4.5254526806712766e-06, "loss": 0.3862, "step": 18454 }, { "epoch": 1.7318881381381381, "grad_norm": 0.9632138415957352, "learning_rate": 4.5249091828903055e-06, "loss": 0.3975, "step": 18455 }, { "epoch": 1.731981981981982, "grad_norm": 0.8849543389822146, "learning_rate": 4.524365690773901e-06, "loss": 0.3739, "step": 18456 }, { "epoch": 1.732075825825826, "grad_norm": 1.8180644238136652, "learning_rate": 4.52382220432854e-06, "loss": 0.3474, "step": 18457 }, { "epoch": 1.7321696696696698, "grad_norm": 0.936426466967585, "learning_rate": 4.5232787235607015e-06, "loss": 0.3895, "step": 18458 }, { "epoch": 1.7322635135135136, "grad_norm": 0.9181100946518462, "learning_rate": 4.522735248476868e-06, "loss": 0.3688, "step": 18459 }, { "epoch": 1.7323573573573574, "grad_norm": 0.8536903627592928, "learning_rate": 4.522191779083519e-06, "loss": 0.3137, "step": 18460 }, { "epoch": 1.7324512012012012, "grad_norm": 0.8747038901305936, "learning_rate": 4.521648315387132e-06, "loss": 0.3848, "step": 18461 }, { "epoch": 1.732545045045045, "grad_norm": 1.620524353253841, "learning_rate": 4.5211048573941904e-06, "loss": 0.3742, "step": 18462 }, { "epoch": 1.7326388888888888, "grad_norm": 1.012338716581379, "learning_rate": 4.520561405111171e-06, "loss": 0.3581, "step": 18463 }, { "epoch": 1.7327327327327327, "grad_norm": 0.8984511628355251, "learning_rate": 4.520017958544553e-06, "loss": 0.3608, "step": 18464 }, { "epoch": 1.7328265765765765, "grad_norm": 0.9979139049118659, "learning_rate": 4.519474517700819e-06, "loss": 0.4413, "step": 18465 }, { "epoch": 1.7329204204204203, "grad_norm": 0.9098106323979254, "learning_rate": 4.5189310825864474e-06, "loss": 0.3382, "step": 18466 }, { "epoch": 1.7330142642642643, "grad_norm": 0.8970130290954444, "learning_rate": 4.518387653207916e-06, "loss": 0.3615, "step": 18467 }, { "epoch": 1.7331081081081081, "grad_norm": 0.8574611842693661, "learning_rate": 4.517844229571705e-06, "loss": 0.4017, "step": 18468 }, { "epoch": 1.733201951951952, "grad_norm": 0.9155203001247698, "learning_rate": 4.5173008116842955e-06, "loss": 0.4023, "step": 18469 }, { "epoch": 1.733295795795796, "grad_norm": 0.8810082501557716, "learning_rate": 4.516757399552163e-06, "loss": 0.3744, "step": 18470 }, { "epoch": 1.7333896396396398, "grad_norm": 0.9348693890954367, "learning_rate": 4.5162139931817905e-06, "loss": 0.4287, "step": 18471 }, { "epoch": 1.7334834834834836, "grad_norm": 0.8290475309429906, "learning_rate": 4.515670592579656e-06, "loss": 0.4304, "step": 18472 }, { "epoch": 1.7335773273273274, "grad_norm": 1.0695706150603146, "learning_rate": 4.515127197752237e-06, "loss": 0.4098, "step": 18473 }, { "epoch": 1.7336711711711712, "grad_norm": 0.9488336746725469, "learning_rate": 4.514583808706014e-06, "loss": 0.3988, "step": 18474 }, { "epoch": 1.733765015015015, "grad_norm": 0.9758273574160861, "learning_rate": 4.514040425447465e-06, "loss": 0.3984, "step": 18475 }, { "epoch": 1.7338588588588588, "grad_norm": 0.8514672753919127, "learning_rate": 4.513497047983068e-06, "loss": 0.3645, "step": 18476 }, { "epoch": 1.7339527027027026, "grad_norm": 0.8733417799932386, "learning_rate": 4.512953676319305e-06, "loss": 0.4077, "step": 18477 }, { "epoch": 1.7340465465465464, "grad_norm": 0.9958869381503586, "learning_rate": 4.5124103104626525e-06, "loss": 0.3972, "step": 18478 }, { "epoch": 1.7341403903903903, "grad_norm": 0.8617668412998681, "learning_rate": 4.511866950419588e-06, "loss": 0.3809, "step": 18479 }, { "epoch": 1.7342342342342343, "grad_norm": 0.9438832925733087, "learning_rate": 4.511323596196592e-06, "loss": 0.4122, "step": 18480 }, { "epoch": 1.734328078078078, "grad_norm": 0.9548480061214087, "learning_rate": 4.510780247800143e-06, "loss": 0.3715, "step": 18481 }, { "epoch": 1.734421921921922, "grad_norm": 0.9269217251213538, "learning_rate": 4.510236905236716e-06, "loss": 0.3927, "step": 18482 }, { "epoch": 1.7345157657657657, "grad_norm": 1.0666575875188276, "learning_rate": 4.509693568512794e-06, "loss": 0.4125, "step": 18483 }, { "epoch": 1.7346096096096097, "grad_norm": 0.9274323577200848, "learning_rate": 4.509150237634854e-06, "loss": 0.4027, "step": 18484 }, { "epoch": 1.7347034534534536, "grad_norm": 0.8961219401235316, "learning_rate": 4.508606912609373e-06, "loss": 0.3695, "step": 18485 }, { "epoch": 1.7347972972972974, "grad_norm": 0.8205561076014497, "learning_rate": 4.508063593442829e-06, "loss": 0.4089, "step": 18486 }, { "epoch": 1.7348911411411412, "grad_norm": 0.9060605409406046, "learning_rate": 4.507520280141702e-06, "loss": 0.3817, "step": 18487 }, { "epoch": 1.734984984984985, "grad_norm": 0.936376355988066, "learning_rate": 4.506976972712467e-06, "loss": 0.4098, "step": 18488 }, { "epoch": 1.7350788288288288, "grad_norm": 0.9480914831242031, "learning_rate": 4.506433671161603e-06, "loss": 0.3962, "step": 18489 }, { "epoch": 1.7351726726726726, "grad_norm": 0.8789856157362612, "learning_rate": 4.50589037549559e-06, "loss": 0.3957, "step": 18490 }, { "epoch": 1.7352665165165164, "grad_norm": 0.9294970467606737, "learning_rate": 4.505347085720904e-06, "loss": 0.3534, "step": 18491 }, { "epoch": 1.7353603603603602, "grad_norm": 1.4667484150586931, "learning_rate": 4.504803801844024e-06, "loss": 0.3391, "step": 18492 }, { "epoch": 1.735454204204204, "grad_norm": 1.2618537087379778, "learning_rate": 4.504260523871425e-06, "loss": 0.4038, "step": 18493 }, { "epoch": 1.735548048048048, "grad_norm": 1.01059633155356, "learning_rate": 4.503717251809585e-06, "loss": 0.3637, "step": 18494 }, { "epoch": 1.7356418918918919, "grad_norm": 0.9783285705301991, "learning_rate": 4.503173985664985e-06, "loss": 0.3759, "step": 18495 }, { "epoch": 1.7357357357357357, "grad_norm": 0.9573310059100171, "learning_rate": 4.5026307254441e-06, "loss": 0.3314, "step": 18496 }, { "epoch": 1.7358295795795797, "grad_norm": 1.3177668110435803, "learning_rate": 4.502087471153406e-06, "loss": 0.4031, "step": 18497 }, { "epoch": 1.7359234234234235, "grad_norm": 1.2049675085655844, "learning_rate": 4.501544222799382e-06, "loss": 0.3846, "step": 18498 }, { "epoch": 1.7360172672672673, "grad_norm": 0.98869313930284, "learning_rate": 4.5010009803885056e-06, "loss": 0.4426, "step": 18499 }, { "epoch": 1.7361111111111112, "grad_norm": 1.0013112265383985, "learning_rate": 4.500457743927251e-06, "loss": 0.4385, "step": 18500 }, { "epoch": 1.736204954954955, "grad_norm": 1.633632632181118, "learning_rate": 4.4999145134221e-06, "loss": 0.3808, "step": 18501 }, { "epoch": 1.7362987987987988, "grad_norm": 0.8470289717176654, "learning_rate": 4.499371288879527e-06, "loss": 0.4259, "step": 18502 }, { "epoch": 1.7363926426426426, "grad_norm": 0.967968053423667, "learning_rate": 4.498828070306008e-06, "loss": 0.4081, "step": 18503 }, { "epoch": 1.7364864864864864, "grad_norm": 0.9894717735849726, "learning_rate": 4.4982848577080215e-06, "loss": 0.3551, "step": 18504 }, { "epoch": 1.7365803303303302, "grad_norm": 0.9514356011013148, "learning_rate": 4.497741651092044e-06, "loss": 0.3847, "step": 18505 }, { "epoch": 1.736674174174174, "grad_norm": 0.9720972629658806, "learning_rate": 4.49719845046455e-06, "loss": 0.4236, "step": 18506 }, { "epoch": 1.736768018018018, "grad_norm": 1.2429641315705775, "learning_rate": 4.49665525583202e-06, "loss": 0.4016, "step": 18507 }, { "epoch": 1.7368618618618619, "grad_norm": 1.810543583115681, "learning_rate": 4.496112067200929e-06, "loss": 0.3755, "step": 18508 }, { "epoch": 1.7369557057057057, "grad_norm": 0.9141137583273569, "learning_rate": 4.495568884577751e-06, "loss": 0.3957, "step": 18509 }, { "epoch": 1.7370495495495497, "grad_norm": 1.0822135598040565, "learning_rate": 4.495025707968966e-06, "loss": 0.437, "step": 18510 }, { "epoch": 1.7371433933933935, "grad_norm": 0.9194064528666149, "learning_rate": 4.49448253738105e-06, "loss": 0.3754, "step": 18511 }, { "epoch": 1.7372372372372373, "grad_norm": 0.9266969467607623, "learning_rate": 4.493939372820475e-06, "loss": 0.3779, "step": 18512 }, { "epoch": 1.7373310810810811, "grad_norm": 0.98810056735708, "learning_rate": 4.493396214293722e-06, "loss": 0.3849, "step": 18513 }, { "epoch": 1.737424924924925, "grad_norm": 1.3412146803022034, "learning_rate": 4.492853061807267e-06, "loss": 0.3872, "step": 18514 }, { "epoch": 1.7375187687687688, "grad_norm": 0.9995005100179023, "learning_rate": 4.492309915367583e-06, "loss": 0.3992, "step": 18515 }, { "epoch": 1.7376126126126126, "grad_norm": 0.7848267817902007, "learning_rate": 4.491766774981149e-06, "loss": 0.3459, "step": 18516 }, { "epoch": 1.7377064564564564, "grad_norm": 0.9879977247352313, "learning_rate": 4.4912236406544385e-06, "loss": 0.3583, "step": 18517 }, { "epoch": 1.7378003003003002, "grad_norm": 1.3614109224353705, "learning_rate": 4.490680512393928e-06, "loss": 0.4035, "step": 18518 }, { "epoch": 1.737894144144144, "grad_norm": 0.8511076008750054, "learning_rate": 4.490137390206094e-06, "loss": 0.3388, "step": 18519 }, { "epoch": 1.737987987987988, "grad_norm": 0.8930126478770736, "learning_rate": 4.4895942740974125e-06, "loss": 0.3696, "step": 18520 }, { "epoch": 1.7380818318318318, "grad_norm": 0.9608795582855826, "learning_rate": 4.489051164074358e-06, "loss": 0.4046, "step": 18521 }, { "epoch": 1.7381756756756757, "grad_norm": 1.1163292695687788, "learning_rate": 4.488508060143407e-06, "loss": 0.3724, "step": 18522 }, { "epoch": 1.7382695195195195, "grad_norm": 1.1322708814899178, "learning_rate": 4.4879649623110345e-06, "loss": 0.411, "step": 18523 }, { "epoch": 1.7383633633633635, "grad_norm": 1.0574524844749151, "learning_rate": 4.4874218705837145e-06, "loss": 0.4192, "step": 18524 }, { "epoch": 1.7384572072072073, "grad_norm": 1.331405154767199, "learning_rate": 4.486878784967926e-06, "loss": 0.3864, "step": 18525 }, { "epoch": 1.7385510510510511, "grad_norm": 1.0504676918879872, "learning_rate": 4.486335705470142e-06, "loss": 0.4013, "step": 18526 }, { "epoch": 1.738644894894895, "grad_norm": 1.0062469425377103, "learning_rate": 4.485792632096837e-06, "loss": 0.4424, "step": 18527 }, { "epoch": 1.7387387387387387, "grad_norm": 0.9042830090499367, "learning_rate": 4.485249564854488e-06, "loss": 0.358, "step": 18528 }, { "epoch": 1.7388325825825826, "grad_norm": 1.4762798906206611, "learning_rate": 4.484706503749568e-06, "loss": 0.3871, "step": 18529 }, { "epoch": 1.7389264264264264, "grad_norm": 0.8935565165751161, "learning_rate": 4.484163448788552e-06, "loss": 0.3856, "step": 18530 }, { "epoch": 1.7390202702702702, "grad_norm": 0.8811004503862029, "learning_rate": 4.483620399977918e-06, "loss": 0.4165, "step": 18531 }, { "epoch": 1.739114114114114, "grad_norm": 1.0043858383777506, "learning_rate": 4.483077357324139e-06, "loss": 0.3928, "step": 18532 }, { "epoch": 1.7392079579579578, "grad_norm": 0.935892088359513, "learning_rate": 4.482534320833688e-06, "loss": 0.4221, "step": 18533 }, { "epoch": 1.7393018018018018, "grad_norm": 1.0270770174088653, "learning_rate": 4.481991290513041e-06, "loss": 0.404, "step": 18534 }, { "epoch": 1.7393956456456456, "grad_norm": 0.9785538162073378, "learning_rate": 4.481448266368674e-06, "loss": 0.3963, "step": 18535 }, { "epoch": 1.7394894894894894, "grad_norm": 0.9349650041982704, "learning_rate": 4.480905248407059e-06, "loss": 0.3748, "step": 18536 }, { "epoch": 1.7395833333333335, "grad_norm": 0.8913720795585685, "learning_rate": 4.480362236634673e-06, "loss": 0.3888, "step": 18537 }, { "epoch": 1.7396771771771773, "grad_norm": 0.866997617034937, "learning_rate": 4.479819231057989e-06, "loss": 0.3888, "step": 18538 }, { "epoch": 1.739771021021021, "grad_norm": 0.9409605334932519, "learning_rate": 4.4792762316834805e-06, "loss": 0.4077, "step": 18539 }, { "epoch": 1.739864864864865, "grad_norm": 0.9624361100261601, "learning_rate": 4.478733238517625e-06, "loss": 0.3859, "step": 18540 }, { "epoch": 1.7399587087087087, "grad_norm": 1.3754582800002815, "learning_rate": 4.478190251566893e-06, "loss": 0.4119, "step": 18541 }, { "epoch": 1.7400525525525525, "grad_norm": 0.7631094693281439, "learning_rate": 4.477647270837759e-06, "loss": 0.3888, "step": 18542 }, { "epoch": 1.7401463963963963, "grad_norm": 1.021459849709038, "learning_rate": 4.477104296336699e-06, "loss": 0.4237, "step": 18543 }, { "epoch": 1.7402402402402402, "grad_norm": 1.0699531128883897, "learning_rate": 4.476561328070187e-06, "loss": 0.4166, "step": 18544 }, { "epoch": 1.740334084084084, "grad_norm": 0.8184199522965018, "learning_rate": 4.4760183660446945e-06, "loss": 0.3809, "step": 18545 }, { "epoch": 1.7404279279279278, "grad_norm": 1.348213544156987, "learning_rate": 4.475475410266698e-06, "loss": 0.4108, "step": 18546 }, { "epoch": 1.7405217717717718, "grad_norm": 1.1258176075023605, "learning_rate": 4.47493246074267e-06, "loss": 0.3492, "step": 18547 }, { "epoch": 1.7406156156156156, "grad_norm": 2.0095004984946927, "learning_rate": 4.474389517479082e-06, "loss": 0.3811, "step": 18548 }, { "epoch": 1.7407094594594594, "grad_norm": 2.862357851638087, "learning_rate": 4.47384658048241e-06, "loss": 0.3769, "step": 18549 }, { "epoch": 1.7408033033033035, "grad_norm": 1.1553964096438563, "learning_rate": 4.47330364975913e-06, "loss": 0.3871, "step": 18550 }, { "epoch": 1.7408971471471473, "grad_norm": 1.41258143799482, "learning_rate": 4.472760725315709e-06, "loss": 0.373, "step": 18551 }, { "epoch": 1.740990990990991, "grad_norm": 1.0339452021115576, "learning_rate": 4.4722178071586264e-06, "loss": 0.4171, "step": 18552 }, { "epoch": 1.741084834834835, "grad_norm": 0.8717141550542774, "learning_rate": 4.471674895294353e-06, "loss": 0.4364, "step": 18553 }, { "epoch": 1.7411786786786787, "grad_norm": 0.9004193879133298, "learning_rate": 4.471131989729359e-06, "loss": 0.3633, "step": 18554 }, { "epoch": 1.7412725225225225, "grad_norm": 1.0088083936930792, "learning_rate": 4.470589090470124e-06, "loss": 0.4352, "step": 18555 }, { "epoch": 1.7413663663663663, "grad_norm": 0.8034528033349482, "learning_rate": 4.470046197523117e-06, "loss": 0.3681, "step": 18556 }, { "epoch": 1.7414602102102101, "grad_norm": 1.0554332667835193, "learning_rate": 4.4695033108948095e-06, "loss": 0.3701, "step": 18557 }, { "epoch": 1.741554054054054, "grad_norm": 0.9601930587190791, "learning_rate": 4.4689604305916785e-06, "loss": 0.4003, "step": 18558 }, { "epoch": 1.7416478978978978, "grad_norm": 0.8331863572724537, "learning_rate": 4.468417556620194e-06, "loss": 0.4073, "step": 18559 }, { "epoch": 1.7417417417417418, "grad_norm": 0.9982075810345079, "learning_rate": 4.467874688986829e-06, "loss": 0.3903, "step": 18560 }, { "epoch": 1.7418355855855856, "grad_norm": 0.91150041747713, "learning_rate": 4.467331827698057e-06, "loss": 0.3934, "step": 18561 }, { "epoch": 1.7419294294294294, "grad_norm": 0.9658599682454827, "learning_rate": 4.466788972760351e-06, "loss": 0.3879, "step": 18562 }, { "epoch": 1.7420232732732732, "grad_norm": 0.8981418615232108, "learning_rate": 4.466246124180182e-06, "loss": 0.3658, "step": 18563 }, { "epoch": 1.7421171171171173, "grad_norm": 0.8763030613803624, "learning_rate": 4.465703281964024e-06, "loss": 0.4425, "step": 18564 }, { "epoch": 1.742210960960961, "grad_norm": 1.5266757007822178, "learning_rate": 4.46516044611835e-06, "loss": 0.3727, "step": 18565 }, { "epoch": 1.7423048048048049, "grad_norm": 1.0807319551682946, "learning_rate": 4.464617616649628e-06, "loss": 0.4058, "step": 18566 }, { "epoch": 1.7423986486486487, "grad_norm": 0.8092123735125382, "learning_rate": 4.4640747935643345e-06, "loss": 0.4041, "step": 18567 }, { "epoch": 1.7424924924924925, "grad_norm": 1.1207791784568264, "learning_rate": 4.46353197686894e-06, "loss": 0.3682, "step": 18568 }, { "epoch": 1.7425863363363363, "grad_norm": 1.3016859883379421, "learning_rate": 4.462989166569916e-06, "loss": 0.4099, "step": 18569 }, { "epoch": 1.7426801801801801, "grad_norm": 0.8737360419455991, "learning_rate": 4.4624463626737386e-06, "loss": 0.3747, "step": 18570 }, { "epoch": 1.742774024024024, "grad_norm": 0.9596194009289127, "learning_rate": 4.461903565186875e-06, "loss": 0.391, "step": 18571 }, { "epoch": 1.7428678678678677, "grad_norm": 0.8213848924979439, "learning_rate": 4.461360774115796e-06, "loss": 0.3739, "step": 18572 }, { "epoch": 1.7429617117117115, "grad_norm": 0.9331201202722041, "learning_rate": 4.460817989466978e-06, "loss": 0.3788, "step": 18573 }, { "epoch": 1.7430555555555556, "grad_norm": 0.8334504686800056, "learning_rate": 4.460275211246892e-06, "loss": 0.3107, "step": 18574 }, { "epoch": 1.7431493993993994, "grad_norm": 0.8589831374542181, "learning_rate": 4.459732439462004e-06, "loss": 0.3627, "step": 18575 }, { "epoch": 1.7432432432432432, "grad_norm": 0.992168291466863, "learning_rate": 4.459189674118794e-06, "loss": 0.4061, "step": 18576 }, { "epoch": 1.7433370870870872, "grad_norm": 0.9844586165055081, "learning_rate": 4.4586469152237285e-06, "loss": 0.3833, "step": 18577 }, { "epoch": 1.743430930930931, "grad_norm": 1.3154371572927395, "learning_rate": 4.458104162783277e-06, "loss": 0.4258, "step": 18578 }, { "epoch": 1.7435247747747749, "grad_norm": 1.0196923501763022, "learning_rate": 4.457561416803916e-06, "loss": 0.3779, "step": 18579 }, { "epoch": 1.7436186186186187, "grad_norm": 0.9242514912418093, "learning_rate": 4.457018677292113e-06, "loss": 0.401, "step": 18580 }, { "epoch": 1.7437124624624625, "grad_norm": 2.4897938806774604, "learning_rate": 4.456475944254338e-06, "loss": 0.4229, "step": 18581 }, { "epoch": 1.7438063063063063, "grad_norm": 1.3685844691816775, "learning_rate": 4.455933217697067e-06, "loss": 0.4404, "step": 18582 }, { "epoch": 1.74390015015015, "grad_norm": 0.9495895731467255, "learning_rate": 4.455390497626768e-06, "loss": 0.3895, "step": 18583 }, { "epoch": 1.743993993993994, "grad_norm": 1.025724759593351, "learning_rate": 4.454847784049912e-06, "loss": 0.4334, "step": 18584 }, { "epoch": 1.7440878378378377, "grad_norm": 0.8965519787068483, "learning_rate": 4.45430507697297e-06, "loss": 0.4255, "step": 18585 }, { "epoch": 1.7441816816816815, "grad_norm": 0.9518986115473227, "learning_rate": 4.453762376402414e-06, "loss": 0.3732, "step": 18586 }, { "epoch": 1.7442755255255256, "grad_norm": 0.9656433700213501, "learning_rate": 4.45321968234471e-06, "loss": 0.3917, "step": 18587 }, { "epoch": 1.7443693693693694, "grad_norm": 0.8603837185432749, "learning_rate": 4.4526769948063355e-06, "loss": 0.3621, "step": 18588 }, { "epoch": 1.7444632132132132, "grad_norm": 1.2383797777395327, "learning_rate": 4.452134313793757e-06, "loss": 0.4183, "step": 18589 }, { "epoch": 1.7445570570570572, "grad_norm": 1.0330184820279216, "learning_rate": 4.451591639313444e-06, "loss": 0.3668, "step": 18590 }, { "epoch": 1.744650900900901, "grad_norm": 0.9722110463346816, "learning_rate": 4.45104897137187e-06, "loss": 0.4195, "step": 18591 }, { "epoch": 1.7447447447447448, "grad_norm": 1.0357185661818917, "learning_rate": 4.450506309975503e-06, "loss": 0.3955, "step": 18592 }, { "epoch": 1.7448385885885886, "grad_norm": 0.9435493454310664, "learning_rate": 4.449963655130813e-06, "loss": 0.4241, "step": 18593 }, { "epoch": 1.7449324324324325, "grad_norm": 0.9195914206443179, "learning_rate": 4.449421006844273e-06, "loss": 0.3846, "step": 18594 }, { "epoch": 1.7450262762762763, "grad_norm": 1.3742641358346221, "learning_rate": 4.448878365122351e-06, "loss": 0.3645, "step": 18595 }, { "epoch": 1.74512012012012, "grad_norm": 0.9818715428964225, "learning_rate": 4.448335729971515e-06, "loss": 0.4299, "step": 18596 }, { "epoch": 1.7452139639639639, "grad_norm": 1.1722223425869263, "learning_rate": 4.447793101398239e-06, "loss": 0.3658, "step": 18597 }, { "epoch": 1.7453078078078077, "grad_norm": 0.9423942479469892, "learning_rate": 4.447250479408991e-06, "loss": 0.4094, "step": 18598 }, { "epoch": 1.7454016516516515, "grad_norm": 0.9619299318472319, "learning_rate": 4.446707864010238e-06, "loss": 0.426, "step": 18599 }, { "epoch": 1.7454954954954955, "grad_norm": 0.9110495769818476, "learning_rate": 4.4461652552084545e-06, "loss": 0.4074, "step": 18600 }, { "epoch": 1.7455893393393394, "grad_norm": 1.0329378086397567, "learning_rate": 4.445622653010108e-06, "loss": 0.425, "step": 18601 }, { "epoch": 1.7456831831831832, "grad_norm": 0.8785624276165751, "learning_rate": 4.4450800574216665e-06, "loss": 0.3908, "step": 18602 }, { "epoch": 1.745777027027027, "grad_norm": 1.2841880805014065, "learning_rate": 4.444537468449602e-06, "loss": 0.3875, "step": 18603 }, { "epoch": 1.745870870870871, "grad_norm": 0.9337067035979357, "learning_rate": 4.443994886100383e-06, "loss": 0.4003, "step": 18604 }, { "epoch": 1.7459647147147148, "grad_norm": 0.9004220440894753, "learning_rate": 4.443452310380476e-06, "loss": 0.3498, "step": 18605 }, { "epoch": 1.7460585585585586, "grad_norm": 1.1019864419926026, "learning_rate": 4.442909741296355e-06, "loss": 0.4329, "step": 18606 }, { "epoch": 1.7461524024024024, "grad_norm": 0.85175861857968, "learning_rate": 4.442367178854486e-06, "loss": 0.3783, "step": 18607 }, { "epoch": 1.7462462462462462, "grad_norm": 0.9488531424294033, "learning_rate": 4.441824623061338e-06, "loss": 0.375, "step": 18608 }, { "epoch": 1.74634009009009, "grad_norm": 1.2405489995891652, "learning_rate": 4.441282073923383e-06, "loss": 0.3552, "step": 18609 }, { "epoch": 1.7464339339339339, "grad_norm": 0.86750808990544, "learning_rate": 4.440739531447086e-06, "loss": 0.3683, "step": 18610 }, { "epoch": 1.7465277777777777, "grad_norm": 0.966196504307949, "learning_rate": 4.440196995638916e-06, "loss": 0.3971, "step": 18611 }, { "epoch": 1.7466216216216215, "grad_norm": 0.8919102567391232, "learning_rate": 4.439654466505345e-06, "loss": 0.3677, "step": 18612 }, { "epoch": 1.7467154654654653, "grad_norm": 0.9057257979383756, "learning_rate": 4.439111944052839e-06, "loss": 0.3629, "step": 18613 }, { "epoch": 1.7468093093093093, "grad_norm": 0.9532427172156417, "learning_rate": 4.438569428287867e-06, "loss": 0.4156, "step": 18614 }, { "epoch": 1.7469031531531531, "grad_norm": 0.8456037121291552, "learning_rate": 4.438026919216899e-06, "loss": 0.3888, "step": 18615 }, { "epoch": 1.746996996996997, "grad_norm": 0.9358466641673672, "learning_rate": 4.437484416846402e-06, "loss": 0.3759, "step": 18616 }, { "epoch": 1.747090840840841, "grad_norm": 0.9257129842373588, "learning_rate": 4.436941921182843e-06, "loss": 0.4213, "step": 18617 }, { "epoch": 1.7471846846846848, "grad_norm": 1.2142591122435082, "learning_rate": 4.436399432232693e-06, "loss": 0.4061, "step": 18618 }, { "epoch": 1.7472785285285286, "grad_norm": 0.9847757835512229, "learning_rate": 4.435856950002419e-06, "loss": 0.3561, "step": 18619 }, { "epoch": 1.7473723723723724, "grad_norm": 0.8918186821550652, "learning_rate": 4.4353144744984885e-06, "loss": 0.3869, "step": 18620 }, { "epoch": 1.7474662162162162, "grad_norm": 3.2232919697981957, "learning_rate": 4.43477200572737e-06, "loss": 0.3796, "step": 18621 }, { "epoch": 1.74756006006006, "grad_norm": 0.8875562016782563, "learning_rate": 4.434229543695533e-06, "loss": 0.3092, "step": 18622 }, { "epoch": 1.7476539039039038, "grad_norm": 0.9285914083200625, "learning_rate": 4.433687088409441e-06, "loss": 0.3887, "step": 18623 }, { "epoch": 1.7477477477477477, "grad_norm": 0.8687668049377848, "learning_rate": 4.433144639875566e-06, "loss": 0.3831, "step": 18624 }, { "epoch": 1.7478415915915915, "grad_norm": 1.0838900528384168, "learning_rate": 4.4326021981003745e-06, "loss": 0.4254, "step": 18625 }, { "epoch": 1.7479354354354353, "grad_norm": 0.96530633026943, "learning_rate": 4.432059763090332e-06, "loss": 0.4075, "step": 18626 }, { "epoch": 1.7480292792792793, "grad_norm": 1.1240194370344672, "learning_rate": 4.4315173348519096e-06, "loss": 0.3644, "step": 18627 }, { "epoch": 1.7481231231231231, "grad_norm": 1.1085194212993306, "learning_rate": 4.430974913391573e-06, "loss": 0.362, "step": 18628 }, { "epoch": 1.748216966966967, "grad_norm": 1.0042378122861522, "learning_rate": 4.430432498715787e-06, "loss": 0.4174, "step": 18629 }, { "epoch": 1.748310810810811, "grad_norm": 1.1179688636695448, "learning_rate": 4.429890090831024e-06, "loss": 0.363, "step": 18630 }, { "epoch": 1.7484046546546548, "grad_norm": 1.1356426680148601, "learning_rate": 4.429347689743748e-06, "loss": 0.3615, "step": 18631 }, { "epoch": 1.7484984984984986, "grad_norm": 0.9003799608462332, "learning_rate": 4.428805295460426e-06, "loss": 0.3743, "step": 18632 }, { "epoch": 1.7485923423423424, "grad_norm": 0.9842462220395357, "learning_rate": 4.428262907987527e-06, "loss": 0.4032, "step": 18633 }, { "epoch": 1.7486861861861862, "grad_norm": 0.9171655614479233, "learning_rate": 4.427720527331516e-06, "loss": 0.3949, "step": 18634 }, { "epoch": 1.74878003003003, "grad_norm": 1.211458083625517, "learning_rate": 4.427178153498859e-06, "loss": 0.3979, "step": 18635 }, { "epoch": 1.7488738738738738, "grad_norm": 1.0189749073999266, "learning_rate": 4.426635786496025e-06, "loss": 0.3716, "step": 18636 }, { "epoch": 1.7489677177177176, "grad_norm": 0.8643174359731742, "learning_rate": 4.426093426329482e-06, "loss": 0.3842, "step": 18637 }, { "epoch": 1.7490615615615615, "grad_norm": 1.0144906112015066, "learning_rate": 4.4255510730056915e-06, "loss": 0.3945, "step": 18638 }, { "epoch": 1.7491554054054053, "grad_norm": 1.0186893368113126, "learning_rate": 4.425008726531126e-06, "loss": 0.4277, "step": 18639 }, { "epoch": 1.7492492492492493, "grad_norm": 0.935990614391816, "learning_rate": 4.424466386912248e-06, "loss": 0.375, "step": 18640 }, { "epoch": 1.749343093093093, "grad_norm": 1.0743142539676407, "learning_rate": 4.423924054155523e-06, "loss": 0.4316, "step": 18641 }, { "epoch": 1.749436936936937, "grad_norm": 0.8692736354879879, "learning_rate": 4.4233817282674215e-06, "loss": 0.3617, "step": 18642 }, { "epoch": 1.7495307807807807, "grad_norm": 0.9576610941318254, "learning_rate": 4.422839409254408e-06, "loss": 0.4037, "step": 18643 }, { "epoch": 1.7496246246246248, "grad_norm": 1.1144951381731243, "learning_rate": 4.422297097122948e-06, "loss": 0.4065, "step": 18644 }, { "epoch": 1.7497184684684686, "grad_norm": 0.8471277706885726, "learning_rate": 4.421754791879507e-06, "loss": 0.385, "step": 18645 }, { "epoch": 1.7498123123123124, "grad_norm": 0.933130507261276, "learning_rate": 4.421212493530552e-06, "loss": 0.3583, "step": 18646 }, { "epoch": 1.7499061561561562, "grad_norm": 1.1145319620455683, "learning_rate": 4.420670202082547e-06, "loss": 0.404, "step": 18647 }, { "epoch": 1.75, "grad_norm": 1.3020521666135982, "learning_rate": 4.420127917541961e-06, "loss": 0.3877, "step": 18648 }, { "epoch": 1.7500938438438438, "grad_norm": 1.725516908146038, "learning_rate": 4.419585639915258e-06, "loss": 0.4709, "step": 18649 }, { "epoch": 1.7501876876876876, "grad_norm": 0.9173079154106442, "learning_rate": 4.419043369208903e-06, "loss": 0.3919, "step": 18650 }, { "epoch": 1.7502815315315314, "grad_norm": 1.0119866800720114, "learning_rate": 4.418501105429364e-06, "loss": 0.4198, "step": 18651 }, { "epoch": 1.7503753753753752, "grad_norm": 0.9364244607399445, "learning_rate": 4.417958848583103e-06, "loss": 0.4121, "step": 18652 }, { "epoch": 1.7504692192192193, "grad_norm": 0.9209395459801379, "learning_rate": 4.417416598676587e-06, "loss": 0.3818, "step": 18653 }, { "epoch": 1.750563063063063, "grad_norm": 0.9503629881810276, "learning_rate": 4.416874355716282e-06, "loss": 0.3872, "step": 18654 }, { "epoch": 1.750656906906907, "grad_norm": 0.9609677848274898, "learning_rate": 4.416332119708654e-06, "loss": 0.4122, "step": 18655 }, { "epoch": 1.7507507507507507, "grad_norm": 0.9592008568009749, "learning_rate": 4.4157898906601645e-06, "loss": 0.4005, "step": 18656 }, { "epoch": 1.7508445945945947, "grad_norm": 0.9492996673215301, "learning_rate": 4.415247668577283e-06, "loss": 0.4462, "step": 18657 }, { "epoch": 1.7509384384384385, "grad_norm": 0.9555783939748462, "learning_rate": 4.414705453466472e-06, "loss": 0.3823, "step": 18658 }, { "epoch": 1.7510322822822824, "grad_norm": 1.1312060770243173, "learning_rate": 4.414163245334194e-06, "loss": 0.4142, "step": 18659 }, { "epoch": 1.7511261261261262, "grad_norm": 0.9444250427047289, "learning_rate": 4.413621044186919e-06, "loss": 0.441, "step": 18660 }, { "epoch": 1.75121996996997, "grad_norm": 0.9699466603705643, "learning_rate": 4.41307885003111e-06, "loss": 0.4001, "step": 18661 }, { "epoch": 1.7513138138138138, "grad_norm": 0.8729854057382641, "learning_rate": 4.412536662873229e-06, "loss": 0.3481, "step": 18662 }, { "epoch": 1.7514076576576576, "grad_norm": 0.9087345467364508, "learning_rate": 4.411994482719744e-06, "loss": 0.4222, "step": 18663 }, { "epoch": 1.7515015015015014, "grad_norm": 0.8388000874129797, "learning_rate": 4.411452309577118e-06, "loss": 0.3622, "step": 18664 }, { "epoch": 1.7515953453453452, "grad_norm": 1.0260681107310807, "learning_rate": 4.410910143451814e-06, "loss": 0.3954, "step": 18665 }, { "epoch": 1.751689189189189, "grad_norm": 1.1155822508679065, "learning_rate": 4.410367984350299e-06, "loss": 0.3853, "step": 18666 }, { "epoch": 1.751783033033033, "grad_norm": 0.9576747847513345, "learning_rate": 4.409825832279036e-06, "loss": 0.4071, "step": 18667 }, { "epoch": 1.7518768768768769, "grad_norm": 10.67290150598407, "learning_rate": 4.409283687244488e-06, "loss": 0.3928, "step": 18668 }, { "epoch": 1.7519707207207207, "grad_norm": 0.8376453167675525, "learning_rate": 4.408741549253121e-06, "loss": 0.3759, "step": 18669 }, { "epoch": 1.7520645645645647, "grad_norm": 0.9587125034579033, "learning_rate": 4.408199418311399e-06, "loss": 0.4268, "step": 18670 }, { "epoch": 1.7521584084084085, "grad_norm": 0.8952203152080744, "learning_rate": 4.407657294425781e-06, "loss": 0.3684, "step": 18671 }, { "epoch": 1.7522522522522523, "grad_norm": 0.8845640629510964, "learning_rate": 4.4071151776027385e-06, "loss": 0.4758, "step": 18672 }, { "epoch": 1.7523460960960962, "grad_norm": 0.9512375153903311, "learning_rate": 4.406573067848731e-06, "loss": 0.4187, "step": 18673 }, { "epoch": 1.75243993993994, "grad_norm": 0.9370421320524797, "learning_rate": 4.406030965170222e-06, "loss": 0.4036, "step": 18674 }, { "epoch": 1.7525337837837838, "grad_norm": 0.9359951204591035, "learning_rate": 4.4054888695736765e-06, "loss": 0.4147, "step": 18675 }, { "epoch": 1.7526276276276276, "grad_norm": 1.866307695143369, "learning_rate": 4.404946781065556e-06, "loss": 0.3905, "step": 18676 }, { "epoch": 1.7527214714714714, "grad_norm": 0.851927573877992, "learning_rate": 4.4044046996523246e-06, "loss": 0.4086, "step": 18677 }, { "epoch": 1.7528153153153152, "grad_norm": 1.0243440438902853, "learning_rate": 4.403862625340448e-06, "loss": 0.3841, "step": 18678 }, { "epoch": 1.752909159159159, "grad_norm": 1.0170014778210381, "learning_rate": 4.403320558136387e-06, "loss": 0.3528, "step": 18679 }, { "epoch": 1.753003003003003, "grad_norm": 0.9141630372597962, "learning_rate": 4.402778498046604e-06, "loss": 0.4057, "step": 18680 }, { "epoch": 1.7530968468468469, "grad_norm": 1.0043890659062509, "learning_rate": 4.4022364450775644e-06, "loss": 0.3816, "step": 18681 }, { "epoch": 1.7531906906906907, "grad_norm": 0.9533337076058248, "learning_rate": 4.401694399235731e-06, "loss": 0.395, "step": 18682 }, { "epoch": 1.7532845345345347, "grad_norm": 0.9652860093003925, "learning_rate": 4.401152360527562e-06, "loss": 0.3924, "step": 18683 }, { "epoch": 1.7533783783783785, "grad_norm": 1.041807808961153, "learning_rate": 4.400610328959527e-06, "loss": 0.4037, "step": 18684 }, { "epoch": 1.7534722222222223, "grad_norm": 1.105198450529719, "learning_rate": 4.400068304538085e-06, "loss": 0.3867, "step": 18685 }, { "epoch": 1.7535660660660661, "grad_norm": 0.912572790192492, "learning_rate": 4.399526287269699e-06, "loss": 0.4105, "step": 18686 }, { "epoch": 1.75365990990991, "grad_norm": 0.8384538626575233, "learning_rate": 4.398984277160832e-06, "loss": 0.3905, "step": 18687 }, { "epoch": 1.7537537537537538, "grad_norm": 1.0657575766342717, "learning_rate": 4.3984422742179474e-06, "loss": 0.3937, "step": 18688 }, { "epoch": 1.7538475975975976, "grad_norm": 0.7956910042730397, "learning_rate": 4.397900278447503e-06, "loss": 0.424, "step": 18689 }, { "epoch": 1.7539414414414414, "grad_norm": 0.9007044975352058, "learning_rate": 4.397358289855967e-06, "loss": 0.3901, "step": 18690 }, { "epoch": 1.7540352852852852, "grad_norm": 1.5451782835176084, "learning_rate": 4.3968163084497985e-06, "loss": 0.3984, "step": 18691 }, { "epoch": 1.754129129129129, "grad_norm": 0.9774104422362578, "learning_rate": 4.396274334235459e-06, "loss": 0.371, "step": 18692 }, { "epoch": 1.754222972972973, "grad_norm": 0.9940961505639719, "learning_rate": 4.395732367219414e-06, "loss": 0.4129, "step": 18693 }, { "epoch": 1.7543168168168168, "grad_norm": 0.9415734887329933, "learning_rate": 4.395190407408123e-06, "loss": 0.3628, "step": 18694 }, { "epoch": 1.7544106606606606, "grad_norm": 0.8858218741668521, "learning_rate": 4.394648454808045e-06, "loss": 0.3878, "step": 18695 }, { "epoch": 1.7545045045045045, "grad_norm": 0.8355184027216168, "learning_rate": 4.3941065094256465e-06, "loss": 0.4368, "step": 18696 }, { "epoch": 1.7545983483483485, "grad_norm": 1.0696030308030788, "learning_rate": 4.393564571267389e-06, "loss": 0.423, "step": 18697 }, { "epoch": 1.7546921921921923, "grad_norm": 0.8769585792707447, "learning_rate": 4.393022640339729e-06, "loss": 0.3529, "step": 18698 }, { "epoch": 1.7547860360360361, "grad_norm": 0.9241922717167199, "learning_rate": 4.392480716649134e-06, "loss": 0.4052, "step": 18699 }, { "epoch": 1.75487987987988, "grad_norm": 0.8733672157235363, "learning_rate": 4.3919388002020635e-06, "loss": 0.3884, "step": 18700 }, { "epoch": 1.7549737237237237, "grad_norm": 0.9500047392995213, "learning_rate": 4.391396891004976e-06, "loss": 0.414, "step": 18701 }, { "epoch": 1.7550675675675675, "grad_norm": 1.068549972951478, "learning_rate": 4.390854989064336e-06, "loss": 0.4394, "step": 18702 }, { "epoch": 1.7551614114114114, "grad_norm": 24.262333258424857, "learning_rate": 4.390313094386605e-06, "loss": 0.4004, "step": 18703 }, { "epoch": 1.7552552552552552, "grad_norm": 0.9178780197307014, "learning_rate": 4.389771206978239e-06, "loss": 0.413, "step": 18704 }, { "epoch": 1.755349099099099, "grad_norm": 0.9727429434139319, "learning_rate": 4.389229326845707e-06, "loss": 0.3669, "step": 18705 }, { "epoch": 1.7554429429429428, "grad_norm": 0.9357233733660841, "learning_rate": 4.388687453995464e-06, "loss": 0.3641, "step": 18706 }, { "epoch": 1.7555367867867868, "grad_norm": 0.8987674188692775, "learning_rate": 4.3881455884339716e-06, "loss": 0.3551, "step": 18707 }, { "epoch": 1.7556306306306306, "grad_norm": 1.0097184639602519, "learning_rate": 4.387603730167692e-06, "loss": 0.4268, "step": 18708 }, { "epoch": 1.7557244744744744, "grad_norm": 0.8500999001671046, "learning_rate": 4.3870618792030865e-06, "loss": 0.3726, "step": 18709 }, { "epoch": 1.7558183183183185, "grad_norm": 1.0258832162533493, "learning_rate": 4.386520035546611e-06, "loss": 0.371, "step": 18710 }, { "epoch": 1.7559121621621623, "grad_norm": 0.9213991073845111, "learning_rate": 4.385978199204732e-06, "loss": 0.3764, "step": 18711 }, { "epoch": 1.756006006006006, "grad_norm": 1.033845351050858, "learning_rate": 4.385436370183907e-06, "loss": 0.3757, "step": 18712 }, { "epoch": 1.75609984984985, "grad_norm": 1.028461456876568, "learning_rate": 4.384894548490596e-06, "loss": 0.4152, "step": 18713 }, { "epoch": 1.7561936936936937, "grad_norm": 0.9274246659693851, "learning_rate": 4.384352734131259e-06, "loss": 0.3727, "step": 18714 }, { "epoch": 1.7562875375375375, "grad_norm": 1.127831566421717, "learning_rate": 4.383810927112359e-06, "loss": 0.429, "step": 18715 }, { "epoch": 1.7563813813813813, "grad_norm": 1.1759427146343389, "learning_rate": 4.383269127440351e-06, "loss": 0.4313, "step": 18716 }, { "epoch": 1.7564752252252251, "grad_norm": 0.9421516348427273, "learning_rate": 4.3827273351217e-06, "loss": 0.4397, "step": 18717 }, { "epoch": 1.756569069069069, "grad_norm": 0.9624625900535014, "learning_rate": 4.382185550162863e-06, "loss": 0.3857, "step": 18718 }, { "epoch": 1.7566629129129128, "grad_norm": 1.9141212692232248, "learning_rate": 4.3816437725703e-06, "loss": 0.4364, "step": 18719 }, { "epoch": 1.7567567567567568, "grad_norm": 0.850636914278036, "learning_rate": 4.3811020023504715e-06, "loss": 0.4257, "step": 18720 }, { "epoch": 1.7568506006006006, "grad_norm": 0.9386513114094484, "learning_rate": 4.380560239509837e-06, "loss": 0.3384, "step": 18721 }, { "epoch": 1.7569444444444444, "grad_norm": 1.0680116037572343, "learning_rate": 4.380018484054854e-06, "loss": 0.4093, "step": 18722 }, { "epoch": 1.7570382882882885, "grad_norm": 0.9077567142783703, "learning_rate": 4.379476735991985e-06, "loss": 0.4061, "step": 18723 }, { "epoch": 1.7571321321321323, "grad_norm": 0.972604109166369, "learning_rate": 4.378934995327688e-06, "loss": 0.4149, "step": 18724 }, { "epoch": 1.757225975975976, "grad_norm": 0.9204071249267245, "learning_rate": 4.378393262068421e-06, "loss": 0.4482, "step": 18725 }, { "epoch": 1.7573198198198199, "grad_norm": 0.9218604082751406, "learning_rate": 4.377851536220646e-06, "loss": 0.3882, "step": 18726 }, { "epoch": 1.7574136636636637, "grad_norm": 0.8731943101466897, "learning_rate": 4.3773098177908195e-06, "loss": 0.4041, "step": 18727 }, { "epoch": 1.7575075075075075, "grad_norm": 2.6421023520871882, "learning_rate": 4.3767681067854e-06, "loss": 0.418, "step": 18728 }, { "epoch": 1.7576013513513513, "grad_norm": 1.0834936452940525, "learning_rate": 4.376226403210849e-06, "loss": 0.4112, "step": 18729 }, { "epoch": 1.7576951951951951, "grad_norm": 0.8903467038249622, "learning_rate": 4.375684707073625e-06, "loss": 0.4001, "step": 18730 }, { "epoch": 1.757789039039039, "grad_norm": 0.950784334010296, "learning_rate": 4.375143018380184e-06, "loss": 0.3961, "step": 18731 }, { "epoch": 1.7578828828828827, "grad_norm": 0.9708065643605536, "learning_rate": 4.374601337136988e-06, "loss": 0.3869, "step": 18732 }, { "epoch": 1.7579767267267268, "grad_norm": 1.083684186006167, "learning_rate": 4.374059663350493e-06, "loss": 0.4237, "step": 18733 }, { "epoch": 1.7580705705705706, "grad_norm": 1.0216519054519353, "learning_rate": 4.373517997027157e-06, "loss": 0.4039, "step": 18734 }, { "epoch": 1.7581644144144144, "grad_norm": 0.8746181377963027, "learning_rate": 4.372976338173442e-06, "loss": 0.4025, "step": 18735 }, { "epoch": 1.7582582582582582, "grad_norm": 0.9659369644339765, "learning_rate": 4.372434686795802e-06, "loss": 0.3803, "step": 18736 }, { "epoch": 1.7583521021021022, "grad_norm": 0.9488213196293752, "learning_rate": 4.371893042900698e-06, "loss": 0.3759, "step": 18737 }, { "epoch": 1.758445945945946, "grad_norm": 0.8751950003499467, "learning_rate": 4.371351406494587e-06, "loss": 0.3946, "step": 18738 }, { "epoch": 1.7585397897897899, "grad_norm": 0.9880151104744023, "learning_rate": 4.370809777583928e-06, "loss": 0.4477, "step": 18739 }, { "epoch": 1.7586336336336337, "grad_norm": 1.1462567570572153, "learning_rate": 4.370268156175176e-06, "loss": 0.3948, "step": 18740 }, { "epoch": 1.7587274774774775, "grad_norm": 1.083076443847189, "learning_rate": 4.3697265422747925e-06, "loss": 0.3528, "step": 18741 }, { "epoch": 1.7588213213213213, "grad_norm": 0.9152394421727047, "learning_rate": 4.3691849358892335e-06, "loss": 0.4177, "step": 18742 }, { "epoch": 1.758915165165165, "grad_norm": 1.0897611953435384, "learning_rate": 4.368643337024956e-06, "loss": 0.3864, "step": 18743 }, { "epoch": 1.759009009009009, "grad_norm": 1.047130203845212, "learning_rate": 4.36810174568842e-06, "loss": 0.3689, "step": 18744 }, { "epoch": 1.7591028528528527, "grad_norm": 0.9329554753003, "learning_rate": 4.3675601618860804e-06, "loss": 0.394, "step": 18745 }, { "epoch": 1.7591966966966965, "grad_norm": 0.9606083802691019, "learning_rate": 4.367018585624393e-06, "loss": 0.4011, "step": 18746 }, { "epoch": 1.7592905405405406, "grad_norm": 0.8641356469427072, "learning_rate": 4.36647701690982e-06, "loss": 0.3744, "step": 18747 }, { "epoch": 1.7593843843843844, "grad_norm": 1.097434478787498, "learning_rate": 4.3659354557488165e-06, "loss": 0.4172, "step": 18748 }, { "epoch": 1.7594782282282282, "grad_norm": 1.2662968697931802, "learning_rate": 4.3653939021478384e-06, "loss": 0.3881, "step": 18749 }, { "epoch": 1.7595720720720722, "grad_norm": 0.8840604366728445, "learning_rate": 4.364852356113344e-06, "loss": 0.4272, "step": 18750 }, { "epoch": 1.759665915915916, "grad_norm": 1.0244192827203387, "learning_rate": 4.364310817651789e-06, "loss": 0.4138, "step": 18751 }, { "epoch": 1.7597597597597598, "grad_norm": 0.9967904698589218, "learning_rate": 4.36376928676963e-06, "loss": 0.4125, "step": 18752 }, { "epoch": 1.7598536036036037, "grad_norm": 1.0002659345365126, "learning_rate": 4.363227763473326e-06, "loss": 0.3887, "step": 18753 }, { "epoch": 1.7599474474474475, "grad_norm": 0.95699915924878, "learning_rate": 4.3626862477693324e-06, "loss": 0.3908, "step": 18754 }, { "epoch": 1.7600412912912913, "grad_norm": 0.9329261554437753, "learning_rate": 4.362144739664105e-06, "loss": 0.3701, "step": 18755 }, { "epoch": 1.760135135135135, "grad_norm": 1.2216045437905632, "learning_rate": 4.361603239164102e-06, "loss": 0.3676, "step": 18756 }, { "epoch": 1.760228978978979, "grad_norm": 1.0141919457040045, "learning_rate": 4.361061746275779e-06, "loss": 0.3825, "step": 18757 }, { "epoch": 1.7603228228228227, "grad_norm": 0.9683107611327413, "learning_rate": 4.360520261005589e-06, "loss": 0.3949, "step": 18758 }, { "epoch": 1.7604166666666665, "grad_norm": 0.898264761822414, "learning_rate": 4.359978783359994e-06, "loss": 0.4077, "step": 18759 }, { "epoch": 1.7605105105105106, "grad_norm": 0.997788198154476, "learning_rate": 4.3594373133454474e-06, "loss": 0.3753, "step": 18760 }, { "epoch": 1.7606043543543544, "grad_norm": 0.9263225115854301, "learning_rate": 4.358895850968403e-06, "loss": 0.3903, "step": 18761 }, { "epoch": 1.7606981981981982, "grad_norm": 0.9100863624467711, "learning_rate": 4.358354396235321e-06, "loss": 0.3768, "step": 18762 }, { "epoch": 1.7607920420420422, "grad_norm": 0.8558639923376742, "learning_rate": 4.357812949152654e-06, "loss": 0.422, "step": 18763 }, { "epoch": 1.760885885885886, "grad_norm": 0.8840321111757078, "learning_rate": 4.357271509726858e-06, "loss": 0.378, "step": 18764 }, { "epoch": 1.7609797297297298, "grad_norm": 0.9483611721010765, "learning_rate": 4.356730077964391e-06, "loss": 0.4017, "step": 18765 }, { "epoch": 1.7610735735735736, "grad_norm": 0.8165896311538808, "learning_rate": 4.356188653871707e-06, "loss": 0.3816, "step": 18766 }, { "epoch": 1.7611674174174174, "grad_norm": 0.8466985794787351, "learning_rate": 4.35564723745526e-06, "loss": 0.4144, "step": 18767 }, { "epoch": 1.7612612612612613, "grad_norm": 0.8680694907177293, "learning_rate": 4.355105828721508e-06, "loss": 0.4018, "step": 18768 }, { "epoch": 1.761355105105105, "grad_norm": 1.1571324025986596, "learning_rate": 4.354564427676905e-06, "loss": 0.4143, "step": 18769 }, { "epoch": 1.7614489489489489, "grad_norm": 1.6318494835242574, "learning_rate": 4.354023034327905e-06, "loss": 0.3918, "step": 18770 }, { "epoch": 1.7615427927927927, "grad_norm": 0.9339122760762621, "learning_rate": 4.353481648680966e-06, "loss": 0.4147, "step": 18771 }, { "epoch": 1.7616366366366365, "grad_norm": 1.0631258869269191, "learning_rate": 4.3529402707425405e-06, "loss": 0.4416, "step": 18772 }, { "epoch": 1.7617304804804805, "grad_norm": 0.8937767641028789, "learning_rate": 4.3523989005190845e-06, "loss": 0.3943, "step": 18773 }, { "epoch": 1.7618243243243243, "grad_norm": 1.0169648040658517, "learning_rate": 4.351857538017053e-06, "loss": 0.4339, "step": 18774 }, { "epoch": 1.7619181681681682, "grad_norm": 0.9293976291087044, "learning_rate": 4.3513161832429e-06, "loss": 0.4172, "step": 18775 }, { "epoch": 1.762012012012012, "grad_norm": 0.9001950889864463, "learning_rate": 4.350774836203079e-06, "loss": 0.4154, "step": 18776 }, { "epoch": 1.762105855855856, "grad_norm": 0.9869387688648666, "learning_rate": 4.350233496904049e-06, "loss": 0.3923, "step": 18777 }, { "epoch": 1.7621996996996998, "grad_norm": 1.1932433252104395, "learning_rate": 4.349692165352261e-06, "loss": 0.3727, "step": 18778 }, { "epoch": 1.7622935435435436, "grad_norm": 0.9276945947179271, "learning_rate": 4.349150841554168e-06, "loss": 0.3894, "step": 18779 }, { "epoch": 1.7623873873873874, "grad_norm": 0.9549649707299211, "learning_rate": 4.3486095255162275e-06, "loss": 0.3963, "step": 18780 }, { "epoch": 1.7624812312312312, "grad_norm": 0.9727468386616733, "learning_rate": 4.348068217244893e-06, "loss": 0.3961, "step": 18781 }, { "epoch": 1.762575075075075, "grad_norm": 1.023952699037515, "learning_rate": 4.347526916746615e-06, "loss": 0.3831, "step": 18782 }, { "epoch": 1.7626689189189189, "grad_norm": 0.8947360391053111, "learning_rate": 4.346985624027852e-06, "loss": 0.4105, "step": 18783 }, { "epoch": 1.7627627627627627, "grad_norm": 1.057615272669783, "learning_rate": 4.346444339095057e-06, "loss": 0.4203, "step": 18784 }, { "epoch": 1.7628566066066065, "grad_norm": 0.9110050814355973, "learning_rate": 4.345903061954682e-06, "loss": 0.3687, "step": 18785 }, { "epoch": 1.7629504504504503, "grad_norm": 0.9023354910475782, "learning_rate": 4.345361792613183e-06, "loss": 0.4172, "step": 18786 }, { "epoch": 1.7630442942942943, "grad_norm": 1.0323735261497173, "learning_rate": 4.344820531077012e-06, "loss": 0.3497, "step": 18787 }, { "epoch": 1.7631381381381381, "grad_norm": 0.9367705912459717, "learning_rate": 4.344279277352621e-06, "loss": 0.3727, "step": 18788 }, { "epoch": 1.763231981981982, "grad_norm": 0.9295563206284067, "learning_rate": 4.343738031446467e-06, "loss": 0.3547, "step": 18789 }, { "epoch": 1.763325825825826, "grad_norm": 1.1229502624406071, "learning_rate": 4.3431967933650025e-06, "loss": 0.4135, "step": 18790 }, { "epoch": 1.7634196696696698, "grad_norm": 0.9533646728513492, "learning_rate": 4.342655563114678e-06, "loss": 0.3855, "step": 18791 }, { "epoch": 1.7635135135135136, "grad_norm": 1.6419429089555588, "learning_rate": 4.34211434070195e-06, "loss": 0.3885, "step": 18792 }, { "epoch": 1.7636073573573574, "grad_norm": 0.9901494268548001, "learning_rate": 4.34157312613327e-06, "loss": 0.4289, "step": 18793 }, { "epoch": 1.7637012012012012, "grad_norm": 0.8363375548113892, "learning_rate": 4.341031919415089e-06, "loss": 0.3991, "step": 18794 }, { "epoch": 1.763795045045045, "grad_norm": 1.0595176582591217, "learning_rate": 4.340490720553865e-06, "loss": 0.4166, "step": 18795 }, { "epoch": 1.7638888888888888, "grad_norm": 0.9303198998013186, "learning_rate": 4.339949529556047e-06, "loss": 0.445, "step": 18796 }, { "epoch": 1.7639827327327327, "grad_norm": 1.0567591176930689, "learning_rate": 4.339408346428087e-06, "loss": 0.345, "step": 18797 }, { "epoch": 1.7640765765765765, "grad_norm": 0.869641945586053, "learning_rate": 4.338867171176441e-06, "loss": 0.3695, "step": 18798 }, { "epoch": 1.7641704204204203, "grad_norm": 1.2174556232151885, "learning_rate": 4.3383260038075595e-06, "loss": 0.3804, "step": 18799 }, { "epoch": 1.7642642642642643, "grad_norm": 1.0126808851381632, "learning_rate": 4.337784844327891e-06, "loss": 0.3813, "step": 18800 }, { "epoch": 1.7643581081081081, "grad_norm": 1.035749847155719, "learning_rate": 4.337243692743896e-06, "loss": 0.4587, "step": 18801 }, { "epoch": 1.764451951951952, "grad_norm": 0.9367005248644973, "learning_rate": 4.336702549062022e-06, "loss": 0.3759, "step": 18802 }, { "epoch": 1.764545795795796, "grad_norm": 0.889201798865674, "learning_rate": 4.336161413288719e-06, "loss": 0.3913, "step": 18803 }, { "epoch": 1.7646396396396398, "grad_norm": 1.0784452271129432, "learning_rate": 4.335620285430445e-06, "loss": 0.3997, "step": 18804 }, { "epoch": 1.7647334834834836, "grad_norm": 0.9279248491941502, "learning_rate": 4.335079165493647e-06, "loss": 0.3656, "step": 18805 }, { "epoch": 1.7648273273273274, "grad_norm": 0.9603010115517409, "learning_rate": 4.334538053484777e-06, "loss": 0.4017, "step": 18806 }, { "epoch": 1.7649211711711712, "grad_norm": 1.3355397987552, "learning_rate": 4.333996949410289e-06, "loss": 0.3912, "step": 18807 }, { "epoch": 1.765015015015015, "grad_norm": 0.8770270673177671, "learning_rate": 4.333455853276635e-06, "loss": 0.3966, "step": 18808 }, { "epoch": 1.7651088588588588, "grad_norm": 0.8991217026947821, "learning_rate": 4.332914765090264e-06, "loss": 0.3539, "step": 18809 }, { "epoch": 1.7652027027027026, "grad_norm": 0.8617812358238988, "learning_rate": 4.332373684857629e-06, "loss": 0.357, "step": 18810 }, { "epoch": 1.7652965465465464, "grad_norm": 1.1027025108536583, "learning_rate": 4.331832612585183e-06, "loss": 0.381, "step": 18811 }, { "epoch": 1.7653903903903903, "grad_norm": 0.9072838264105302, "learning_rate": 4.3312915482793716e-06, "loss": 0.419, "step": 18812 }, { "epoch": 1.7654842342342343, "grad_norm": 0.8920637948002813, "learning_rate": 4.330750491946653e-06, "loss": 0.362, "step": 18813 }, { "epoch": 1.765578078078078, "grad_norm": 1.0411896595884977, "learning_rate": 4.330209443593475e-06, "loss": 0.429, "step": 18814 }, { "epoch": 1.765671921921922, "grad_norm": 1.002201849499578, "learning_rate": 4.329668403226287e-06, "loss": 0.4226, "step": 18815 }, { "epoch": 1.7657657657657657, "grad_norm": 0.8742661462233541, "learning_rate": 4.329127370851542e-06, "loss": 0.3974, "step": 18816 }, { "epoch": 1.7658596096096097, "grad_norm": 0.8548933372738488, "learning_rate": 4.328586346475691e-06, "loss": 0.3492, "step": 18817 }, { "epoch": 1.7659534534534536, "grad_norm": 0.8190708201978788, "learning_rate": 4.328045330105182e-06, "loss": 0.3556, "step": 18818 }, { "epoch": 1.7660472972972974, "grad_norm": 0.9747314801103585, "learning_rate": 4.327504321746469e-06, "loss": 0.4053, "step": 18819 }, { "epoch": 1.7661411411411412, "grad_norm": 0.9413004713296449, "learning_rate": 4.326963321406002e-06, "loss": 0.3771, "step": 18820 }, { "epoch": 1.766234984984985, "grad_norm": 0.9910725145201031, "learning_rate": 4.326422329090228e-06, "loss": 0.4164, "step": 18821 }, { "epoch": 1.7663288288288288, "grad_norm": 0.9973002586780821, "learning_rate": 4.325881344805602e-06, "loss": 0.4292, "step": 18822 }, { "epoch": 1.7664226726726726, "grad_norm": 0.8347849854015877, "learning_rate": 4.325340368558573e-06, "loss": 0.3879, "step": 18823 }, { "epoch": 1.7665165165165164, "grad_norm": 0.9165273633668225, "learning_rate": 4.324799400355588e-06, "loss": 0.402, "step": 18824 }, { "epoch": 1.7666103603603602, "grad_norm": 0.9596113757819554, "learning_rate": 4.3242584402031e-06, "loss": 0.3673, "step": 18825 }, { "epoch": 1.766704204204204, "grad_norm": 1.187119115630785, "learning_rate": 4.3237174881075585e-06, "loss": 0.3524, "step": 18826 }, { "epoch": 1.766798048048048, "grad_norm": 0.9826255219733188, "learning_rate": 4.323176544075411e-06, "loss": 0.3861, "step": 18827 }, { "epoch": 1.7668918918918919, "grad_norm": 0.9968553234450626, "learning_rate": 4.322635608113111e-06, "loss": 0.4424, "step": 18828 }, { "epoch": 1.7669857357357357, "grad_norm": 0.9366013559244563, "learning_rate": 4.322094680227107e-06, "loss": 0.4356, "step": 18829 }, { "epoch": 1.7670795795795797, "grad_norm": 0.91521859345038, "learning_rate": 4.321553760423847e-06, "loss": 0.3701, "step": 18830 }, { "epoch": 1.7671734234234235, "grad_norm": 1.081776919270044, "learning_rate": 4.321012848709782e-06, "loss": 0.4251, "step": 18831 }, { "epoch": 1.7672672672672673, "grad_norm": 1.6589363211548473, "learning_rate": 4.320471945091361e-06, "loss": 0.4289, "step": 18832 }, { "epoch": 1.7673611111111112, "grad_norm": 0.9339987508844675, "learning_rate": 4.3199310495750306e-06, "loss": 0.3693, "step": 18833 }, { "epoch": 1.767454954954955, "grad_norm": 0.8353482183529409, "learning_rate": 4.319390162167244e-06, "loss": 0.4114, "step": 18834 }, { "epoch": 1.7675487987987988, "grad_norm": 0.9834053749285225, "learning_rate": 4.31884928287445e-06, "loss": 0.4229, "step": 18835 }, { "epoch": 1.7676426426426426, "grad_norm": 1.58385742518637, "learning_rate": 4.318308411703093e-06, "loss": 0.3714, "step": 18836 }, { "epoch": 1.7677364864864864, "grad_norm": 1.0372139323727436, "learning_rate": 4.317767548659628e-06, "loss": 0.4057, "step": 18837 }, { "epoch": 1.7678303303303302, "grad_norm": 0.9076278486293664, "learning_rate": 4.3172266937505e-06, "loss": 0.4022, "step": 18838 }, { "epoch": 1.767924174174174, "grad_norm": 0.8392303972199995, "learning_rate": 4.316685846982156e-06, "loss": 0.3982, "step": 18839 }, { "epoch": 1.768018018018018, "grad_norm": 0.9156366314321541, "learning_rate": 4.31614500836105e-06, "loss": 0.4093, "step": 18840 }, { "epoch": 1.7681118618618619, "grad_norm": 1.010783469710001, "learning_rate": 4.3156041778936266e-06, "loss": 0.4409, "step": 18841 }, { "epoch": 1.7682057057057057, "grad_norm": 0.8784850089970184, "learning_rate": 4.315063355586335e-06, "loss": 0.3832, "step": 18842 }, { "epoch": 1.7682995495495497, "grad_norm": 0.7629525325142704, "learning_rate": 4.314522541445624e-06, "loss": 0.3406, "step": 18843 }, { "epoch": 1.7683933933933935, "grad_norm": 0.9090917967544159, "learning_rate": 4.313981735477943e-06, "loss": 0.378, "step": 18844 }, { "epoch": 1.7684872372372373, "grad_norm": 1.077065299860768, "learning_rate": 4.3134409376897345e-06, "loss": 0.4274, "step": 18845 }, { "epoch": 1.7685810810810811, "grad_norm": 0.8799928523628257, "learning_rate": 4.3129001480874535e-06, "loss": 0.4193, "step": 18846 }, { "epoch": 1.768674924924925, "grad_norm": 0.8680369991083481, "learning_rate": 4.312359366677544e-06, "loss": 0.4199, "step": 18847 }, { "epoch": 1.7687687687687688, "grad_norm": 1.3690306397645984, "learning_rate": 4.311818593466455e-06, "loss": 0.3819, "step": 18848 }, { "epoch": 1.7688626126126126, "grad_norm": 0.9756637856304282, "learning_rate": 4.311277828460634e-06, "loss": 0.4252, "step": 18849 }, { "epoch": 1.7689564564564564, "grad_norm": 0.9160317821636403, "learning_rate": 4.310737071666529e-06, "loss": 0.3711, "step": 18850 }, { "epoch": 1.7690503003003002, "grad_norm": 1.3445971992585781, "learning_rate": 4.310196323090584e-06, "loss": 0.3767, "step": 18851 }, { "epoch": 1.769144144144144, "grad_norm": 2.3247145599896433, "learning_rate": 4.309655582739253e-06, "loss": 0.3779, "step": 18852 }, { "epoch": 1.769237987987988, "grad_norm": 0.9112046771261395, "learning_rate": 4.309114850618978e-06, "loss": 0.3805, "step": 18853 }, { "epoch": 1.7693318318318318, "grad_norm": 0.9727610696389346, "learning_rate": 4.308574126736209e-06, "loss": 0.4132, "step": 18854 }, { "epoch": 1.7694256756756757, "grad_norm": 0.9514072218200056, "learning_rate": 4.308033411097391e-06, "loss": 0.3839, "step": 18855 }, { "epoch": 1.7695195195195195, "grad_norm": 1.3147464357289307, "learning_rate": 4.3074927037089735e-06, "loss": 0.3905, "step": 18856 }, { "epoch": 1.7696133633633635, "grad_norm": 0.8308391460726141, "learning_rate": 4.306952004577399e-06, "loss": 0.3765, "step": 18857 }, { "epoch": 1.7697072072072073, "grad_norm": 1.2066733645211558, "learning_rate": 4.3064113137091206e-06, "loss": 0.4676, "step": 18858 }, { "epoch": 1.7698010510510511, "grad_norm": 0.989008083731932, "learning_rate": 4.305870631110581e-06, "loss": 0.4484, "step": 18859 }, { "epoch": 1.769894894894895, "grad_norm": 4.259944151594583, "learning_rate": 4.305329956788226e-06, "loss": 0.4076, "step": 18860 }, { "epoch": 1.7699887387387387, "grad_norm": 1.150343855129068, "learning_rate": 4.304789290748506e-06, "loss": 0.4082, "step": 18861 }, { "epoch": 1.7700825825825826, "grad_norm": 1.2257624800921405, "learning_rate": 4.304248632997864e-06, "loss": 0.3796, "step": 18862 }, { "epoch": 1.7701764264264264, "grad_norm": 0.9354989151878004, "learning_rate": 4.3037079835427455e-06, "loss": 0.3734, "step": 18863 }, { "epoch": 1.7702702702702702, "grad_norm": 1.3627411436356403, "learning_rate": 4.3031673423896005e-06, "loss": 0.3905, "step": 18864 }, { "epoch": 1.770364114114114, "grad_norm": 0.9248249130564037, "learning_rate": 4.302626709544875e-06, "loss": 0.3702, "step": 18865 }, { "epoch": 1.7704579579579578, "grad_norm": 0.8565103959833978, "learning_rate": 4.30208608501501e-06, "loss": 0.4189, "step": 18866 }, { "epoch": 1.7705518018018018, "grad_norm": 1.0456760998241168, "learning_rate": 4.301545468806457e-06, "loss": 0.3955, "step": 18867 }, { "epoch": 1.7706456456456456, "grad_norm": 0.8909242959140635, "learning_rate": 4.301004860925659e-06, "loss": 0.3471, "step": 18868 }, { "epoch": 1.7707394894894894, "grad_norm": 0.9245055354083762, "learning_rate": 4.300464261379061e-06, "loss": 0.384, "step": 18869 }, { "epoch": 1.7708333333333335, "grad_norm": 1.1580262027695365, "learning_rate": 4.299923670173112e-06, "loss": 0.3746, "step": 18870 }, { "epoch": 1.7709271771771773, "grad_norm": 1.0311307311829911, "learning_rate": 4.299383087314254e-06, "loss": 0.4167, "step": 18871 }, { "epoch": 1.771021021021021, "grad_norm": 0.9301774350787854, "learning_rate": 4.2988425128089345e-06, "loss": 0.4113, "step": 18872 }, { "epoch": 1.771114864864865, "grad_norm": 0.8707627029634643, "learning_rate": 4.298301946663599e-06, "loss": 0.4255, "step": 18873 }, { "epoch": 1.7712087087087087, "grad_norm": 0.9830036065318625, "learning_rate": 4.297761388884691e-06, "loss": 0.4104, "step": 18874 }, { "epoch": 1.7713025525525525, "grad_norm": 0.9567916001335475, "learning_rate": 4.297220839478656e-06, "loss": 0.3787, "step": 18875 }, { "epoch": 1.7713963963963963, "grad_norm": 0.9863765706371733, "learning_rate": 4.2966802984519406e-06, "loss": 0.4155, "step": 18876 }, { "epoch": 1.7714902402402402, "grad_norm": 0.9371123061453045, "learning_rate": 4.2961397658109896e-06, "loss": 0.3735, "step": 18877 }, { "epoch": 1.771584084084084, "grad_norm": 0.9040547210382546, "learning_rate": 4.295599241562245e-06, "loss": 0.4017, "step": 18878 }, { "epoch": 1.7716779279279278, "grad_norm": 1.296697155710615, "learning_rate": 4.2950587257121555e-06, "loss": 0.4697, "step": 18879 }, { "epoch": 1.7717717717717718, "grad_norm": 1.0319950285421178, "learning_rate": 4.2945182182671634e-06, "loss": 0.4011, "step": 18880 }, { "epoch": 1.7718656156156156, "grad_norm": 1.2040827531041487, "learning_rate": 4.293977719233712e-06, "loss": 0.4434, "step": 18881 }, { "epoch": 1.7719594594594594, "grad_norm": 0.8404851050126688, "learning_rate": 4.293437228618249e-06, "loss": 0.3647, "step": 18882 }, { "epoch": 1.7720533033033035, "grad_norm": 1.0020265891842457, "learning_rate": 4.292896746427216e-06, "loss": 0.4334, "step": 18883 }, { "epoch": 1.7721471471471473, "grad_norm": 0.981082398048652, "learning_rate": 4.292356272667058e-06, "loss": 0.3774, "step": 18884 }, { "epoch": 1.772240990990991, "grad_norm": 0.8912076747411255, "learning_rate": 4.291815807344222e-06, "loss": 0.4173, "step": 18885 }, { "epoch": 1.772334834834835, "grad_norm": 0.9725153143839221, "learning_rate": 4.291275350465148e-06, "loss": 0.4097, "step": 18886 }, { "epoch": 1.7724286786786787, "grad_norm": 0.8533997853914688, "learning_rate": 4.290734902036279e-06, "loss": 0.378, "step": 18887 }, { "epoch": 1.7725225225225225, "grad_norm": 0.9003762519135824, "learning_rate": 4.290194462064064e-06, "loss": 0.4048, "step": 18888 }, { "epoch": 1.7726163663663663, "grad_norm": 0.9222068604748184, "learning_rate": 4.289654030554945e-06, "loss": 0.3837, "step": 18889 }, { "epoch": 1.7727102102102101, "grad_norm": 1.334225774473939, "learning_rate": 4.289113607515362e-06, "loss": 0.3839, "step": 18890 }, { "epoch": 1.772804054054054, "grad_norm": 0.8734309084485771, "learning_rate": 4.288573192951763e-06, "loss": 0.3596, "step": 18891 }, { "epoch": 1.7728978978978978, "grad_norm": 1.0198947340708737, "learning_rate": 4.2880327868705885e-06, "loss": 0.4444, "step": 18892 }, { "epoch": 1.7729917417417418, "grad_norm": 0.9784496867776286, "learning_rate": 4.287492389278282e-06, "loss": 0.4263, "step": 18893 }, { "epoch": 1.7730855855855856, "grad_norm": 1.023260472382815, "learning_rate": 4.286952000181289e-06, "loss": 0.3932, "step": 18894 }, { "epoch": 1.7731794294294294, "grad_norm": 1.0204393025200764, "learning_rate": 4.286411619586051e-06, "loss": 0.3793, "step": 18895 }, { "epoch": 1.7732732732732732, "grad_norm": 0.9611112580932095, "learning_rate": 4.28587124749901e-06, "loss": 0.4127, "step": 18896 }, { "epoch": 1.7733671171171173, "grad_norm": 0.987447210126726, "learning_rate": 4.285330883926612e-06, "loss": 0.4011, "step": 18897 }, { "epoch": 1.773460960960961, "grad_norm": 0.8694058324019692, "learning_rate": 4.284790528875298e-06, "loss": 0.3736, "step": 18898 }, { "epoch": 1.7735548048048049, "grad_norm": 0.8178242325375927, "learning_rate": 4.284250182351509e-06, "loss": 0.3739, "step": 18899 }, { "epoch": 1.7736486486486487, "grad_norm": 0.9448578624982753, "learning_rate": 4.2837098443616906e-06, "loss": 0.4293, "step": 18900 }, { "epoch": 1.7737424924924925, "grad_norm": 0.8847447828373713, "learning_rate": 4.283169514912284e-06, "loss": 0.4021, "step": 18901 }, { "epoch": 1.7738363363363363, "grad_norm": 1.1300272576643, "learning_rate": 4.282629194009731e-06, "loss": 0.3817, "step": 18902 }, { "epoch": 1.7739301801801801, "grad_norm": 1.6187777939256447, "learning_rate": 4.282088881660475e-06, "loss": 0.4339, "step": 18903 }, { "epoch": 1.774024024024024, "grad_norm": 1.020692257887571, "learning_rate": 4.281548577870958e-06, "loss": 0.3783, "step": 18904 }, { "epoch": 1.7741178678678677, "grad_norm": 1.382606706079357, "learning_rate": 4.28100828264762e-06, "loss": 0.4024, "step": 18905 }, { "epoch": 1.7742117117117115, "grad_norm": 0.9161702319944015, "learning_rate": 4.280467995996907e-06, "loss": 0.3785, "step": 18906 }, { "epoch": 1.7743055555555556, "grad_norm": 1.017470179461734, "learning_rate": 4.279927717925259e-06, "loss": 0.4055, "step": 18907 }, { "epoch": 1.7743993993993994, "grad_norm": 1.1380809260834042, "learning_rate": 4.2793874484391165e-06, "loss": 0.3732, "step": 18908 }, { "epoch": 1.7744932432432432, "grad_norm": 0.9549915611232687, "learning_rate": 4.278847187544922e-06, "loss": 0.4176, "step": 18909 }, { "epoch": 1.7745870870870872, "grad_norm": 1.0148145787031289, "learning_rate": 4.278306935249119e-06, "loss": 0.4235, "step": 18910 }, { "epoch": 1.774680930930931, "grad_norm": 1.0104486196728537, "learning_rate": 4.277766691558144e-06, "loss": 0.3923, "step": 18911 }, { "epoch": 1.7747747747747749, "grad_norm": 0.866095437336861, "learning_rate": 4.277226456478445e-06, "loss": 0.3854, "step": 18912 }, { "epoch": 1.7748686186186187, "grad_norm": 1.5842839115832457, "learning_rate": 4.276686230016459e-06, "loss": 0.3824, "step": 18913 }, { "epoch": 1.7749624624624625, "grad_norm": 0.9040637930117423, "learning_rate": 4.276146012178628e-06, "loss": 0.3437, "step": 18914 }, { "epoch": 1.7750563063063063, "grad_norm": 0.8720007064149632, "learning_rate": 4.275605802971393e-06, "loss": 0.4116, "step": 18915 }, { "epoch": 1.77515015015015, "grad_norm": 0.9957241037669284, "learning_rate": 4.2750656024011975e-06, "loss": 0.398, "step": 18916 }, { "epoch": 1.775243993993994, "grad_norm": 0.9250055556321563, "learning_rate": 4.274525410474476e-06, "loss": 0.3803, "step": 18917 }, { "epoch": 1.7753378378378377, "grad_norm": 1.1355148141947837, "learning_rate": 4.273985227197677e-06, "loss": 0.3808, "step": 18918 }, { "epoch": 1.7754316816816815, "grad_norm": 1.096624889228816, "learning_rate": 4.273445052577236e-06, "loss": 0.3967, "step": 18919 }, { "epoch": 1.7755255255255256, "grad_norm": 0.8657882840452464, "learning_rate": 4.272904886619596e-06, "loss": 0.3709, "step": 18920 }, { "epoch": 1.7756193693693694, "grad_norm": 0.9913112665760646, "learning_rate": 4.272364729331196e-06, "loss": 0.4622, "step": 18921 }, { "epoch": 1.7757132132132132, "grad_norm": 1.3229951836066633, "learning_rate": 4.271824580718478e-06, "loss": 0.3807, "step": 18922 }, { "epoch": 1.7758070570570572, "grad_norm": 1.0893241023518596, "learning_rate": 4.271284440787878e-06, "loss": 0.4401, "step": 18923 }, { "epoch": 1.775900900900901, "grad_norm": 0.8684859959279292, "learning_rate": 4.270744309545843e-06, "loss": 0.3991, "step": 18924 }, { "epoch": 1.7759947447447448, "grad_norm": 0.972530793449761, "learning_rate": 4.2702041869988085e-06, "loss": 0.3188, "step": 18925 }, { "epoch": 1.7760885885885886, "grad_norm": 0.9248149029535785, "learning_rate": 4.269664073153214e-06, "loss": 0.3875, "step": 18926 }, { "epoch": 1.7761824324324325, "grad_norm": 0.9135538144321472, "learning_rate": 4.2691239680155024e-06, "loss": 0.4112, "step": 18927 }, { "epoch": 1.7762762762762763, "grad_norm": 0.8027066118844742, "learning_rate": 4.268583871592111e-06, "loss": 0.4146, "step": 18928 }, { "epoch": 1.77637012012012, "grad_norm": 0.8848235672110317, "learning_rate": 4.268043783889479e-06, "loss": 0.4231, "step": 18929 }, { "epoch": 1.7764639639639639, "grad_norm": 1.0402836642542665, "learning_rate": 4.267503704914049e-06, "loss": 0.4213, "step": 18930 }, { "epoch": 1.7765578078078077, "grad_norm": 1.009479587840213, "learning_rate": 4.266963634672258e-06, "loss": 0.4264, "step": 18931 }, { "epoch": 1.7766516516516515, "grad_norm": 0.8432220949828724, "learning_rate": 4.266423573170545e-06, "loss": 0.4365, "step": 18932 }, { "epoch": 1.7767454954954955, "grad_norm": 0.9806053117396407, "learning_rate": 4.2658835204153515e-06, "loss": 0.4175, "step": 18933 }, { "epoch": 1.7768393393393394, "grad_norm": 0.9152036875490063, "learning_rate": 4.265343476413115e-06, "loss": 0.4392, "step": 18934 }, { "epoch": 1.7769331831831832, "grad_norm": 0.8660193952216312, "learning_rate": 4.264803441170273e-06, "loss": 0.36, "step": 18935 }, { "epoch": 1.777027027027027, "grad_norm": 0.9863244512345789, "learning_rate": 4.264263414693267e-06, "loss": 0.4176, "step": 18936 }, { "epoch": 1.777120870870871, "grad_norm": 0.9614325225794907, "learning_rate": 4.263723396988536e-06, "loss": 0.4258, "step": 18937 }, { "epoch": 1.7772147147147148, "grad_norm": 0.8264176222534984, "learning_rate": 4.2631833880625165e-06, "loss": 0.3723, "step": 18938 }, { "epoch": 1.7773085585585586, "grad_norm": 0.9727618431738717, "learning_rate": 4.2626433879216485e-06, "loss": 0.3983, "step": 18939 }, { "epoch": 1.7774024024024024, "grad_norm": 0.8855272407428316, "learning_rate": 4.262103396572371e-06, "loss": 0.3967, "step": 18940 }, { "epoch": 1.7774962462462462, "grad_norm": 1.0705971381353163, "learning_rate": 4.2615634140211195e-06, "loss": 0.3928, "step": 18941 }, { "epoch": 1.77759009009009, "grad_norm": 0.9785534696482903, "learning_rate": 4.261023440274335e-06, "loss": 0.4105, "step": 18942 }, { "epoch": 1.7776839339339339, "grad_norm": 0.8318041541349215, "learning_rate": 4.260483475338457e-06, "loss": 0.3999, "step": 18943 }, { "epoch": 1.7777777777777777, "grad_norm": 0.9311473785984994, "learning_rate": 4.259943519219918e-06, "loss": 0.4119, "step": 18944 }, { "epoch": 1.7778716216216215, "grad_norm": 0.9180939072940972, "learning_rate": 4.259403571925164e-06, "loss": 0.4001, "step": 18945 }, { "epoch": 1.7779654654654653, "grad_norm": 0.9401841090418648, "learning_rate": 4.2588636334606255e-06, "loss": 0.4227, "step": 18946 }, { "epoch": 1.7780593093093093, "grad_norm": 0.9820717343392259, "learning_rate": 4.258323703832742e-06, "loss": 0.4146, "step": 18947 }, { "epoch": 1.7781531531531531, "grad_norm": 0.8267950620295612, "learning_rate": 4.257783783047954e-06, "loss": 0.3565, "step": 18948 }, { "epoch": 1.778246996996997, "grad_norm": 0.8465189571201791, "learning_rate": 4.257243871112699e-06, "loss": 0.3665, "step": 18949 }, { "epoch": 1.778340840840841, "grad_norm": 1.1019212944036176, "learning_rate": 4.256703968033409e-06, "loss": 0.3608, "step": 18950 }, { "epoch": 1.7784346846846848, "grad_norm": 3.848475963869906, "learning_rate": 4.2561640738165275e-06, "loss": 0.4072, "step": 18951 }, { "epoch": 1.7785285285285286, "grad_norm": 1.0043273786363496, "learning_rate": 4.255624188468489e-06, "loss": 0.4087, "step": 18952 }, { "epoch": 1.7786223723723724, "grad_norm": 0.9256426343092719, "learning_rate": 4.25508431199573e-06, "loss": 0.4131, "step": 18953 }, { "epoch": 1.7787162162162162, "grad_norm": 0.8789527074104648, "learning_rate": 4.25454444440469e-06, "loss": 0.4121, "step": 18954 }, { "epoch": 1.77881006006006, "grad_norm": 0.8714458097530444, "learning_rate": 4.2540045857018035e-06, "loss": 0.4023, "step": 18955 }, { "epoch": 1.7789039039039038, "grad_norm": 1.4330637451042536, "learning_rate": 4.253464735893507e-06, "loss": 0.4147, "step": 18956 }, { "epoch": 1.7789977477477477, "grad_norm": 0.9036379051248666, "learning_rate": 4.25292489498624e-06, "loss": 0.3977, "step": 18957 }, { "epoch": 1.7790915915915915, "grad_norm": 1.1013021590888186, "learning_rate": 4.252385062986436e-06, "loss": 0.3393, "step": 18958 }, { "epoch": 1.7791854354354353, "grad_norm": 1.1317559590618498, "learning_rate": 4.251845239900534e-06, "loss": 0.3977, "step": 18959 }, { "epoch": 1.7792792792792793, "grad_norm": 1.1610919167087312, "learning_rate": 4.251305425734969e-06, "loss": 0.3689, "step": 18960 }, { "epoch": 1.7793731231231231, "grad_norm": 0.8988838437027846, "learning_rate": 4.250765620496178e-06, "loss": 0.3897, "step": 18961 }, { "epoch": 1.779466966966967, "grad_norm": 0.8957187155086285, "learning_rate": 4.250225824190594e-06, "loss": 0.392, "step": 18962 }, { "epoch": 1.779560810810811, "grad_norm": 1.4510191865509938, "learning_rate": 4.249686036824659e-06, "loss": 0.4032, "step": 18963 }, { "epoch": 1.7796546546546548, "grad_norm": 1.1110161536272902, "learning_rate": 4.249146258404805e-06, "loss": 0.4184, "step": 18964 }, { "epoch": 1.7797484984984986, "grad_norm": 0.9125237451789215, "learning_rate": 4.248606488937467e-06, "loss": 0.4194, "step": 18965 }, { "epoch": 1.7798423423423424, "grad_norm": 0.9874116115599085, "learning_rate": 4.2480667284290845e-06, "loss": 0.4094, "step": 18966 }, { "epoch": 1.7799361861861862, "grad_norm": 0.9714510444097509, "learning_rate": 4.24752697688609e-06, "loss": 0.4281, "step": 18967 }, { "epoch": 1.78003003003003, "grad_norm": 0.9094313194247304, "learning_rate": 4.246987234314919e-06, "loss": 0.4099, "step": 18968 }, { "epoch": 1.7801238738738738, "grad_norm": 1.0093167172148159, "learning_rate": 4.246447500722008e-06, "loss": 0.4495, "step": 18969 }, { "epoch": 1.7802177177177176, "grad_norm": 1.0180820421914103, "learning_rate": 4.2459077761137935e-06, "loss": 0.4078, "step": 18970 }, { "epoch": 1.7803115615615615, "grad_norm": 1.0079867554758526, "learning_rate": 4.2453680604967075e-06, "loss": 0.4125, "step": 18971 }, { "epoch": 1.7804054054054053, "grad_norm": 0.8758249982999543, "learning_rate": 4.244828353877188e-06, "loss": 0.4079, "step": 18972 }, { "epoch": 1.7804992492492493, "grad_norm": 0.9283760489466062, "learning_rate": 4.244288656261669e-06, "loss": 0.3829, "step": 18973 }, { "epoch": 1.780593093093093, "grad_norm": 1.2824494188581828, "learning_rate": 4.243748967656584e-06, "loss": 0.4199, "step": 18974 }, { "epoch": 1.780686936936937, "grad_norm": 0.888904677071297, "learning_rate": 4.243209288068371e-06, "loss": 0.361, "step": 18975 }, { "epoch": 1.7807807807807807, "grad_norm": 0.9066804848462052, "learning_rate": 4.242669617503462e-06, "loss": 0.4036, "step": 18976 }, { "epoch": 1.7808746246246248, "grad_norm": 1.0326411153443833, "learning_rate": 4.242129955968292e-06, "loss": 0.3586, "step": 18977 }, { "epoch": 1.7809684684684686, "grad_norm": 1.3780749445903084, "learning_rate": 4.241590303469296e-06, "loss": 0.373, "step": 18978 }, { "epoch": 1.7810623123123124, "grad_norm": 1.1260638539855754, "learning_rate": 4.241050660012909e-06, "loss": 0.3308, "step": 18979 }, { "epoch": 1.7811561561561562, "grad_norm": 0.9474728749651561, "learning_rate": 4.240511025605562e-06, "loss": 0.4034, "step": 18980 }, { "epoch": 1.78125, "grad_norm": 1.7545135855170484, "learning_rate": 4.239971400253693e-06, "loss": 0.3747, "step": 18981 }, { "epoch": 1.7813438438438438, "grad_norm": 0.9874274738869118, "learning_rate": 4.239431783963735e-06, "loss": 0.4313, "step": 18982 }, { "epoch": 1.7814376876876876, "grad_norm": 0.9681878458213353, "learning_rate": 4.238892176742119e-06, "loss": 0.3831, "step": 18983 }, { "epoch": 1.7815315315315314, "grad_norm": 0.8757003459404916, "learning_rate": 4.238352578595284e-06, "loss": 0.4146, "step": 18984 }, { "epoch": 1.7816253753753752, "grad_norm": 1.0266077905353213, "learning_rate": 4.23781298952966e-06, "loss": 0.3888, "step": 18985 }, { "epoch": 1.7817192192192193, "grad_norm": 0.9534700500872514, "learning_rate": 4.237273409551679e-06, "loss": 0.4163, "step": 18986 }, { "epoch": 1.781813063063063, "grad_norm": 1.055152090669337, "learning_rate": 4.236733838667779e-06, "loss": 0.4269, "step": 18987 }, { "epoch": 1.781906906906907, "grad_norm": 0.9878923260881929, "learning_rate": 4.236194276884392e-06, "loss": 0.3976, "step": 18988 }, { "epoch": 1.7820007507507507, "grad_norm": 0.9643763652939109, "learning_rate": 4.23565472420795e-06, "loss": 0.4146, "step": 18989 }, { "epoch": 1.7820945945945947, "grad_norm": 0.9827735383187644, "learning_rate": 4.235115180644886e-06, "loss": 0.3766, "step": 18990 }, { "epoch": 1.7821884384384385, "grad_norm": 4.655781884663967, "learning_rate": 4.2345756462016345e-06, "loss": 0.3999, "step": 18991 }, { "epoch": 1.7822822822822824, "grad_norm": 1.2325907310407704, "learning_rate": 4.234036120884626e-06, "loss": 0.3917, "step": 18992 }, { "epoch": 1.7823761261261262, "grad_norm": 0.9031075286234207, "learning_rate": 4.2334966047002965e-06, "loss": 0.3954, "step": 18993 }, { "epoch": 1.78246996996997, "grad_norm": 0.9810114491850912, "learning_rate": 4.232957097655077e-06, "loss": 0.3664, "step": 18994 }, { "epoch": 1.7825638138138138, "grad_norm": 0.9724151827176051, "learning_rate": 4.2324175997553996e-06, "loss": 0.4431, "step": 18995 }, { "epoch": 1.7826576576576576, "grad_norm": 1.3746582648925283, "learning_rate": 4.231878111007699e-06, "loss": 0.4431, "step": 18996 }, { "epoch": 1.7827515015015014, "grad_norm": 1.0383356756335704, "learning_rate": 4.231338631418406e-06, "loss": 0.3563, "step": 18997 }, { "epoch": 1.7828453453453452, "grad_norm": 1.2333675635821952, "learning_rate": 4.2307991609939505e-06, "loss": 0.4067, "step": 18998 }, { "epoch": 1.782939189189189, "grad_norm": 1.014434390809757, "learning_rate": 4.230259699740769e-06, "loss": 0.4253, "step": 18999 }, { "epoch": 1.783033033033033, "grad_norm": 0.9796696695283204, "learning_rate": 4.2297202476652925e-06, "loss": 0.3975, "step": 19000 }, { "epoch": 1.7831268768768769, "grad_norm": 1.057706840539519, "learning_rate": 4.22918080477395e-06, "loss": 0.4194, "step": 19001 }, { "epoch": 1.7832207207207207, "grad_norm": 1.02034350073052, "learning_rate": 4.228641371073177e-06, "loss": 0.3771, "step": 19002 }, { "epoch": 1.7833145645645647, "grad_norm": 0.9678053378962813, "learning_rate": 4.228101946569404e-06, "loss": 0.41, "step": 19003 }, { "epoch": 1.7834084084084085, "grad_norm": 1.017103660231467, "learning_rate": 4.22756253126906e-06, "loss": 0.4018, "step": 19004 }, { "epoch": 1.7835022522522523, "grad_norm": 0.942583157644339, "learning_rate": 4.227023125178581e-06, "loss": 0.3572, "step": 19005 }, { "epoch": 1.7835960960960962, "grad_norm": 1.001823436840969, "learning_rate": 4.226483728304396e-06, "loss": 0.3664, "step": 19006 }, { "epoch": 1.78368993993994, "grad_norm": 2.5778093437874894, "learning_rate": 4.225944340652935e-06, "loss": 0.3858, "step": 19007 }, { "epoch": 1.7837837837837838, "grad_norm": 0.8600271984722525, "learning_rate": 4.225404962230633e-06, "loss": 0.371, "step": 19008 }, { "epoch": 1.7838776276276276, "grad_norm": 0.9197132860237437, "learning_rate": 4.224865593043919e-06, "loss": 0.3692, "step": 19009 }, { "epoch": 1.7839714714714714, "grad_norm": 0.9361429911509958, "learning_rate": 4.224326233099221e-06, "loss": 0.3443, "step": 19010 }, { "epoch": 1.7840653153153152, "grad_norm": 0.9836693813533965, "learning_rate": 4.223786882402974e-06, "loss": 0.416, "step": 19011 }, { "epoch": 1.784159159159159, "grad_norm": 0.8819982580042398, "learning_rate": 4.223247540961608e-06, "loss": 0.3689, "step": 19012 }, { "epoch": 1.784253003003003, "grad_norm": 0.7704066731039935, "learning_rate": 4.222708208781553e-06, "loss": 0.3655, "step": 19013 }, { "epoch": 1.7843468468468469, "grad_norm": 0.9893595817636488, "learning_rate": 4.222168885869239e-06, "loss": 0.3858, "step": 19014 }, { "epoch": 1.7844406906906907, "grad_norm": 0.9829936083881596, "learning_rate": 4.221629572231097e-06, "loss": 0.4137, "step": 19015 }, { "epoch": 1.7845345345345347, "grad_norm": 0.9832016664396577, "learning_rate": 4.221090267873556e-06, "loss": 0.3766, "step": 19016 }, { "epoch": 1.7846283783783785, "grad_norm": 1.165560527256732, "learning_rate": 4.220550972803049e-06, "loss": 0.422, "step": 19017 }, { "epoch": 1.7847222222222223, "grad_norm": 0.9576964512828439, "learning_rate": 4.220011687026005e-06, "loss": 0.3828, "step": 19018 }, { "epoch": 1.7848160660660661, "grad_norm": 1.0276028891904445, "learning_rate": 4.219472410548852e-06, "loss": 0.3675, "step": 19019 }, { "epoch": 1.78490990990991, "grad_norm": 1.329096314615121, "learning_rate": 4.218933143378022e-06, "loss": 0.3896, "step": 19020 }, { "epoch": 1.7850037537537538, "grad_norm": 2.6398603941786076, "learning_rate": 4.218393885519944e-06, "loss": 0.3968, "step": 19021 }, { "epoch": 1.7850975975975976, "grad_norm": 0.8896943248687315, "learning_rate": 4.217854636981046e-06, "loss": 0.4234, "step": 19022 }, { "epoch": 1.7851914414414414, "grad_norm": 1.901850915484371, "learning_rate": 4.217315397767762e-06, "loss": 0.3998, "step": 19023 }, { "epoch": 1.7852852852852852, "grad_norm": 1.0487065918203262, "learning_rate": 4.216776167886518e-06, "loss": 0.4235, "step": 19024 }, { "epoch": 1.785379129129129, "grad_norm": 0.9029031084417698, "learning_rate": 4.216236947343743e-06, "loss": 0.3506, "step": 19025 }, { "epoch": 1.785472972972973, "grad_norm": 1.0370729627084436, "learning_rate": 4.215697736145867e-06, "loss": 0.3954, "step": 19026 }, { "epoch": 1.7855668168168168, "grad_norm": 0.9892800488556625, "learning_rate": 4.215158534299321e-06, "loss": 0.3762, "step": 19027 }, { "epoch": 1.7856606606606606, "grad_norm": 1.5556050066360325, "learning_rate": 4.214619341810529e-06, "loss": 0.3634, "step": 19028 }, { "epoch": 1.7857545045045045, "grad_norm": 0.9740772018929788, "learning_rate": 4.214080158685925e-06, "loss": 0.3816, "step": 19029 }, { "epoch": 1.7858483483483485, "grad_norm": 0.9033146512075997, "learning_rate": 4.213540984931936e-06, "loss": 0.3749, "step": 19030 }, { "epoch": 1.7859421921921923, "grad_norm": 0.9420751163058227, "learning_rate": 4.21300182055499e-06, "loss": 0.3736, "step": 19031 }, { "epoch": 1.7860360360360361, "grad_norm": 0.9408466957641249, "learning_rate": 4.212462665561516e-06, "loss": 0.4353, "step": 19032 }, { "epoch": 1.78612987987988, "grad_norm": 1.002826582271759, "learning_rate": 4.211923519957943e-06, "loss": 0.4075, "step": 19033 }, { "epoch": 1.7862237237237237, "grad_norm": 0.9924403635713521, "learning_rate": 4.211384383750697e-06, "loss": 0.4447, "step": 19034 }, { "epoch": 1.7863175675675675, "grad_norm": 0.9891476345787134, "learning_rate": 4.210845256946209e-06, "loss": 0.4276, "step": 19035 }, { "epoch": 1.7864114114114114, "grad_norm": 0.9459551199035945, "learning_rate": 4.210306139550906e-06, "loss": 0.3662, "step": 19036 }, { "epoch": 1.7865052552552552, "grad_norm": 0.9987906016739774, "learning_rate": 4.209767031571215e-06, "loss": 0.3579, "step": 19037 }, { "epoch": 1.786599099099099, "grad_norm": 0.8891016253531915, "learning_rate": 4.209227933013565e-06, "loss": 0.3635, "step": 19038 }, { "epoch": 1.7866929429429428, "grad_norm": 0.9522244627772711, "learning_rate": 4.208688843884385e-06, "loss": 0.4079, "step": 19039 }, { "epoch": 1.7867867867867868, "grad_norm": 1.026636274916389, "learning_rate": 4.208149764190097e-06, "loss": 0.3267, "step": 19040 }, { "epoch": 1.7868806306306306, "grad_norm": 1.1002445207563007, "learning_rate": 4.207610693937137e-06, "loss": 0.3845, "step": 19041 }, { "epoch": 1.7869744744744744, "grad_norm": 0.9056230042630231, "learning_rate": 4.207071633131926e-06, "loss": 0.3845, "step": 19042 }, { "epoch": 1.7870683183183185, "grad_norm": 1.3012699614893166, "learning_rate": 4.206532581780893e-06, "loss": 0.3747, "step": 19043 }, { "epoch": 1.7871621621621623, "grad_norm": 2.0290791141526254, "learning_rate": 4.205993539890466e-06, "loss": 0.3923, "step": 19044 }, { "epoch": 1.787256006006006, "grad_norm": 1.113748851902478, "learning_rate": 4.205454507467072e-06, "loss": 0.3792, "step": 19045 }, { "epoch": 1.78734984984985, "grad_norm": 1.2846746534354392, "learning_rate": 4.204915484517135e-06, "loss": 0.3842, "step": 19046 }, { "epoch": 1.7874436936936937, "grad_norm": 0.9347605035287456, "learning_rate": 4.204376471047086e-06, "loss": 0.3965, "step": 19047 }, { "epoch": 1.7875375375375375, "grad_norm": 0.9239948337432198, "learning_rate": 4.203837467063351e-06, "loss": 0.3718, "step": 19048 }, { "epoch": 1.7876313813813813, "grad_norm": 1.7084856946969973, "learning_rate": 4.203298472572353e-06, "loss": 0.4161, "step": 19049 }, { "epoch": 1.7877252252252251, "grad_norm": 0.9714520383507977, "learning_rate": 4.202759487580524e-06, "loss": 0.3423, "step": 19050 }, { "epoch": 1.787819069069069, "grad_norm": 0.9656996120730379, "learning_rate": 4.202220512094286e-06, "loss": 0.3693, "step": 19051 }, { "epoch": 1.7879129129129128, "grad_norm": 0.8117329596158928, "learning_rate": 4.201681546120066e-06, "loss": 0.396, "step": 19052 }, { "epoch": 1.7880067567567568, "grad_norm": 0.8206147361606246, "learning_rate": 4.201142589664291e-06, "loss": 0.3636, "step": 19053 }, { "epoch": 1.7881006006006006, "grad_norm": 1.0943962661184217, "learning_rate": 4.200603642733389e-06, "loss": 0.3635, "step": 19054 }, { "epoch": 1.7881944444444444, "grad_norm": 0.9465232679341262, "learning_rate": 4.200064705333781e-06, "loss": 0.399, "step": 19055 }, { "epoch": 1.7882882882882885, "grad_norm": 1.0075229656104139, "learning_rate": 4.199525777471898e-06, "loss": 0.3817, "step": 19056 }, { "epoch": 1.7883821321321323, "grad_norm": 0.8439565853026011, "learning_rate": 4.198986859154162e-06, "loss": 0.3776, "step": 19057 }, { "epoch": 1.788475975975976, "grad_norm": 1.0033880472483347, "learning_rate": 4.198447950386999e-06, "loss": 0.3997, "step": 19058 }, { "epoch": 1.7885698198198199, "grad_norm": 0.9225447089101539, "learning_rate": 4.197909051176838e-06, "loss": 0.3867, "step": 19059 }, { "epoch": 1.7886636636636637, "grad_norm": 1.0013925017156242, "learning_rate": 4.197370161530101e-06, "loss": 0.4432, "step": 19060 }, { "epoch": 1.7887575075075075, "grad_norm": 0.8287925461154607, "learning_rate": 4.196831281453213e-06, "loss": 0.3534, "step": 19061 }, { "epoch": 1.7888513513513513, "grad_norm": 1.0415871410261839, "learning_rate": 4.196292410952601e-06, "loss": 0.3783, "step": 19062 }, { "epoch": 1.7889451951951951, "grad_norm": 0.8576257528205422, "learning_rate": 4.19575355003469e-06, "loss": 0.3993, "step": 19063 }, { "epoch": 1.789039039039039, "grad_norm": 1.0890651382189422, "learning_rate": 4.195214698705901e-06, "loss": 0.4026, "step": 19064 }, { "epoch": 1.7891328828828827, "grad_norm": 1.086010836466416, "learning_rate": 4.194675856972664e-06, "loss": 0.4259, "step": 19065 }, { "epoch": 1.7892267267267268, "grad_norm": 0.9098804240581921, "learning_rate": 4.1941370248414026e-06, "loss": 0.4096, "step": 19066 }, { "epoch": 1.7893205705705706, "grad_norm": 1.070664078352659, "learning_rate": 4.193598202318537e-06, "loss": 0.3684, "step": 19067 }, { "epoch": 1.7894144144144144, "grad_norm": 1.0749343407246759, "learning_rate": 4.193059389410499e-06, "loss": 0.4326, "step": 19068 }, { "epoch": 1.7895082582582582, "grad_norm": 1.0299417921215024, "learning_rate": 4.1925205861237075e-06, "loss": 0.3973, "step": 19069 }, { "epoch": 1.7896021021021022, "grad_norm": 0.9773498117810024, "learning_rate": 4.191981792464586e-06, "loss": 0.4471, "step": 19070 }, { "epoch": 1.789695945945946, "grad_norm": 0.8264953396490053, "learning_rate": 4.191443008439563e-06, "loss": 0.3946, "step": 19071 }, { "epoch": 1.7897897897897899, "grad_norm": 0.8513873549357812, "learning_rate": 4.1909042340550596e-06, "loss": 0.348, "step": 19072 }, { "epoch": 1.7898836336336337, "grad_norm": 1.2911326459018353, "learning_rate": 4.190365469317499e-06, "loss": 0.3794, "step": 19073 }, { "epoch": 1.7899774774774775, "grad_norm": 0.9567139873186609, "learning_rate": 4.189826714233307e-06, "loss": 0.3601, "step": 19074 }, { "epoch": 1.7900713213213213, "grad_norm": 1.0256376714853717, "learning_rate": 4.189287968808909e-06, "loss": 0.3764, "step": 19075 }, { "epoch": 1.790165165165165, "grad_norm": 0.8758495358710683, "learning_rate": 4.188749233050722e-06, "loss": 0.4142, "step": 19076 }, { "epoch": 1.790259009009009, "grad_norm": 0.9649455313642453, "learning_rate": 4.188210506965175e-06, "loss": 0.4034, "step": 19077 }, { "epoch": 1.7903528528528527, "grad_norm": 1.0027965957050102, "learning_rate": 4.18767179055869e-06, "loss": 0.4032, "step": 19078 }, { "epoch": 1.7904466966966965, "grad_norm": 1.0470488952975487, "learning_rate": 4.1871330838376884e-06, "loss": 0.3876, "step": 19079 }, { "epoch": 1.7905405405405406, "grad_norm": 0.8387864802837626, "learning_rate": 4.186594386808596e-06, "loss": 0.3639, "step": 19080 }, { "epoch": 1.7906343843843844, "grad_norm": 1.1127511215393375, "learning_rate": 4.186055699477835e-06, "loss": 0.4022, "step": 19081 }, { "epoch": 1.7907282282282282, "grad_norm": 0.9430568425807024, "learning_rate": 4.185517021851826e-06, "loss": 0.3508, "step": 19082 }, { "epoch": 1.7908220720720722, "grad_norm": 0.9756818579679634, "learning_rate": 4.184978353936996e-06, "loss": 0.3698, "step": 19083 }, { "epoch": 1.790915915915916, "grad_norm": 0.9859138202289212, "learning_rate": 4.184439695739763e-06, "loss": 0.4092, "step": 19084 }, { "epoch": 1.7910097597597598, "grad_norm": 0.9345699835499426, "learning_rate": 4.183901047266551e-06, "loss": 0.3598, "step": 19085 }, { "epoch": 1.7911036036036037, "grad_norm": 0.8695923278265829, "learning_rate": 4.183362408523784e-06, "loss": 0.3708, "step": 19086 }, { "epoch": 1.7911974474474475, "grad_norm": 0.86998702529383, "learning_rate": 4.1828237795178835e-06, "loss": 0.4095, "step": 19087 }, { "epoch": 1.7912912912912913, "grad_norm": 0.9481079504118323, "learning_rate": 4.18228516025527e-06, "loss": 0.3782, "step": 19088 }, { "epoch": 1.791385135135135, "grad_norm": 0.8697645736665321, "learning_rate": 4.181746550742367e-06, "loss": 0.3955, "step": 19089 }, { "epoch": 1.791478978978979, "grad_norm": 12.139636585764633, "learning_rate": 4.181207950985598e-06, "loss": 0.3368, "step": 19090 }, { "epoch": 1.7915728228228227, "grad_norm": 0.923289336769107, "learning_rate": 4.1806693609913795e-06, "loss": 0.3934, "step": 19091 }, { "epoch": 1.7916666666666665, "grad_norm": 1.0385348348016787, "learning_rate": 4.18013078076614e-06, "loss": 0.3791, "step": 19092 }, { "epoch": 1.7917605105105106, "grad_norm": 0.8650189959421563, "learning_rate": 4.179592210316297e-06, "loss": 0.3963, "step": 19093 }, { "epoch": 1.7918543543543544, "grad_norm": 0.9002878743504915, "learning_rate": 4.179053649648271e-06, "loss": 0.4235, "step": 19094 }, { "epoch": 1.7919481981981982, "grad_norm": 1.270654252294935, "learning_rate": 4.178515098768486e-06, "loss": 0.4032, "step": 19095 }, { "epoch": 1.7920420420420422, "grad_norm": 0.8995830948929682, "learning_rate": 4.177976557683363e-06, "loss": 0.3436, "step": 19096 }, { "epoch": 1.792135885885886, "grad_norm": 0.8753206929214208, "learning_rate": 4.17743802639932e-06, "loss": 0.386, "step": 19097 }, { "epoch": 1.7922297297297298, "grad_norm": 1.2232421985464297, "learning_rate": 4.176899504922782e-06, "loss": 0.4002, "step": 19098 }, { "epoch": 1.7923235735735736, "grad_norm": 1.0571923028036108, "learning_rate": 4.176360993260168e-06, "loss": 0.4354, "step": 19099 }, { "epoch": 1.7924174174174174, "grad_norm": 0.9355284637663277, "learning_rate": 4.175822491417898e-06, "loss": 0.3778, "step": 19100 }, { "epoch": 1.7925112612612613, "grad_norm": 1.2000674496771075, "learning_rate": 4.175283999402394e-06, "loss": 0.3733, "step": 19101 }, { "epoch": 1.792605105105105, "grad_norm": 1.403249986687085, "learning_rate": 4.174745517220077e-06, "loss": 0.3817, "step": 19102 }, { "epoch": 1.7926989489489489, "grad_norm": 0.939607955758325, "learning_rate": 4.174207044877363e-06, "loss": 0.3688, "step": 19103 }, { "epoch": 1.7927927927927927, "grad_norm": 0.8907649982100224, "learning_rate": 4.173668582380679e-06, "loss": 0.4208, "step": 19104 }, { "epoch": 1.7928866366366365, "grad_norm": 1.2148069947766291, "learning_rate": 4.17313012973644e-06, "loss": 0.3729, "step": 19105 }, { "epoch": 1.7929804804804805, "grad_norm": 0.9775617681035044, "learning_rate": 4.172591686951068e-06, "loss": 0.4513, "step": 19106 }, { "epoch": 1.7930743243243243, "grad_norm": 1.0112988845685864, "learning_rate": 4.172053254030984e-06, "loss": 0.434, "step": 19107 }, { "epoch": 1.7931681681681682, "grad_norm": 1.0268675540585062, "learning_rate": 4.171514830982606e-06, "loss": 0.4081, "step": 19108 }, { "epoch": 1.793262012012012, "grad_norm": 0.9263749620078073, "learning_rate": 4.170976417812352e-06, "loss": 0.3948, "step": 19109 }, { "epoch": 1.793355855855856, "grad_norm": 4.864058596233097, "learning_rate": 4.170438014526646e-06, "loss": 0.3835, "step": 19110 }, { "epoch": 1.7934496996996998, "grad_norm": 0.9332853569221004, "learning_rate": 4.169899621131906e-06, "loss": 0.4355, "step": 19111 }, { "epoch": 1.7935435435435436, "grad_norm": 0.8711891031622083, "learning_rate": 4.169361237634548e-06, "loss": 0.3818, "step": 19112 }, { "epoch": 1.7936373873873874, "grad_norm": 0.933584130497991, "learning_rate": 4.168822864040995e-06, "loss": 0.4086, "step": 19113 }, { "epoch": 1.7937312312312312, "grad_norm": 0.9577264800626202, "learning_rate": 4.168284500357665e-06, "loss": 0.38, "step": 19114 }, { "epoch": 1.793825075075075, "grad_norm": 1.0005949346088903, "learning_rate": 4.167746146590975e-06, "loss": 0.426, "step": 19115 }, { "epoch": 1.7939189189189189, "grad_norm": 0.8812543590155657, "learning_rate": 4.167207802747348e-06, "loss": 0.4037, "step": 19116 }, { "epoch": 1.7940127627627627, "grad_norm": 0.8980322826190706, "learning_rate": 4.1666694688332e-06, "loss": 0.4106, "step": 19117 }, { "epoch": 1.7941066066066065, "grad_norm": 1.1467414649919974, "learning_rate": 4.166131144854949e-06, "loss": 0.4044, "step": 19118 }, { "epoch": 1.7942004504504503, "grad_norm": 0.9115335793185738, "learning_rate": 4.165592830819015e-06, "loss": 0.3756, "step": 19119 }, { "epoch": 1.7942942942942943, "grad_norm": 0.9190158313160288, "learning_rate": 4.165054526731817e-06, "loss": 0.3889, "step": 19120 }, { "epoch": 1.7943881381381381, "grad_norm": 2.098093375065292, "learning_rate": 4.164516232599769e-06, "loss": 0.3885, "step": 19121 }, { "epoch": 1.794481981981982, "grad_norm": 1.1376498188849615, "learning_rate": 4.163977948429294e-06, "loss": 0.3601, "step": 19122 }, { "epoch": 1.794575825825826, "grad_norm": 0.9493020867067411, "learning_rate": 4.16343967422681e-06, "loss": 0.4018, "step": 19123 }, { "epoch": 1.7946696696696698, "grad_norm": 0.9891066652312605, "learning_rate": 4.1629014099987305e-06, "loss": 0.3872, "step": 19124 }, { "epoch": 1.7947635135135136, "grad_norm": 1.2498859842490138, "learning_rate": 4.162363155751479e-06, "loss": 0.423, "step": 19125 }, { "epoch": 1.7948573573573574, "grad_norm": 0.8726029985062191, "learning_rate": 4.161824911491469e-06, "loss": 0.4217, "step": 19126 }, { "epoch": 1.7949512012012012, "grad_norm": 0.9716211883187077, "learning_rate": 4.161286677225117e-06, "loss": 0.3894, "step": 19127 }, { "epoch": 1.795045045045045, "grad_norm": 0.9384168775047697, "learning_rate": 4.1607484529588445e-06, "loss": 0.3756, "step": 19128 }, { "epoch": 1.7951388888888888, "grad_norm": 0.9023889421806016, "learning_rate": 4.160210238699067e-06, "loss": 0.3938, "step": 19129 }, { "epoch": 1.7952327327327327, "grad_norm": 0.9028135591986136, "learning_rate": 4.159672034452201e-06, "loss": 0.3807, "step": 19130 }, { "epoch": 1.7953265765765765, "grad_norm": 0.748442755481733, "learning_rate": 4.159133840224666e-06, "loss": 0.4007, "step": 19131 }, { "epoch": 1.7954204204204203, "grad_norm": 0.9297870040455372, "learning_rate": 4.1585956560228765e-06, "loss": 0.329, "step": 19132 }, { "epoch": 1.7955142642642643, "grad_norm": 0.8971078723414946, "learning_rate": 4.158057481853248e-06, "loss": 0.3686, "step": 19133 }, { "epoch": 1.7956081081081081, "grad_norm": 0.9458575901703201, "learning_rate": 4.157519317722201e-06, "loss": 0.4682, "step": 19134 }, { "epoch": 1.795701951951952, "grad_norm": 0.8859673340202101, "learning_rate": 4.15698116363615e-06, "loss": 0.3903, "step": 19135 }, { "epoch": 1.795795795795796, "grad_norm": 1.0276919437275105, "learning_rate": 4.156443019601511e-06, "loss": 0.3779, "step": 19136 }, { "epoch": 1.7958896396396398, "grad_norm": 0.9270837279162787, "learning_rate": 4.155904885624703e-06, "loss": 0.4033, "step": 19137 }, { "epoch": 1.7959834834834836, "grad_norm": 0.953171066136692, "learning_rate": 4.1553667617121405e-06, "loss": 0.381, "step": 19138 }, { "epoch": 1.7960773273273274, "grad_norm": 1.001632070351741, "learning_rate": 4.154828647870236e-06, "loss": 0.4163, "step": 19139 }, { "epoch": 1.7961711711711712, "grad_norm": 1.019626422566985, "learning_rate": 4.154290544105413e-06, "loss": 0.4087, "step": 19140 }, { "epoch": 1.796265015015015, "grad_norm": 1.0924847650579161, "learning_rate": 4.1537524504240815e-06, "loss": 0.4058, "step": 19141 }, { "epoch": 1.7963588588588588, "grad_norm": 0.9021035178250757, "learning_rate": 4.153214366832659e-06, "loss": 0.3416, "step": 19142 }, { "epoch": 1.7964527027027026, "grad_norm": 0.9994975203862175, "learning_rate": 4.1526762933375625e-06, "loss": 0.3465, "step": 19143 }, { "epoch": 1.7965465465465464, "grad_norm": 0.9622115912991802, "learning_rate": 4.152138229945206e-06, "loss": 0.4332, "step": 19144 }, { "epoch": 1.7966403903903903, "grad_norm": 1.0063742163301654, "learning_rate": 4.151600176662003e-06, "loss": 0.4054, "step": 19145 }, { "epoch": 1.7967342342342343, "grad_norm": 0.8467766703396538, "learning_rate": 4.151062133494373e-06, "loss": 0.3998, "step": 19146 }, { "epoch": 1.796828078078078, "grad_norm": 0.8753203034296447, "learning_rate": 4.150524100448729e-06, "loss": 0.3873, "step": 19147 }, { "epoch": 1.796921921921922, "grad_norm": 0.9242659017218768, "learning_rate": 4.1499860775314855e-06, "loss": 0.3662, "step": 19148 }, { "epoch": 1.7970157657657657, "grad_norm": 0.8509273052766613, "learning_rate": 4.149448064749059e-06, "loss": 0.4343, "step": 19149 }, { "epoch": 1.7971096096096097, "grad_norm": 0.8925771604079041, "learning_rate": 4.148910062107863e-06, "loss": 0.3599, "step": 19150 }, { "epoch": 1.7972034534534536, "grad_norm": 1.1008160231571522, "learning_rate": 4.14837206961431e-06, "loss": 0.3556, "step": 19151 }, { "epoch": 1.7972972972972974, "grad_norm": 1.159740199770995, "learning_rate": 4.14783408727482e-06, "loss": 0.4063, "step": 19152 }, { "epoch": 1.7973911411411412, "grad_norm": 1.3610272158405152, "learning_rate": 4.147296115095805e-06, "loss": 0.4365, "step": 19153 }, { "epoch": 1.797484984984985, "grad_norm": 1.4806371649236487, "learning_rate": 4.146758153083677e-06, "loss": 0.3694, "step": 19154 }, { "epoch": 1.7975788288288288, "grad_norm": 5.255125351959506, "learning_rate": 4.146220201244853e-06, "loss": 0.4037, "step": 19155 }, { "epoch": 1.7976726726726726, "grad_norm": 0.957696223150261, "learning_rate": 4.145682259585745e-06, "loss": 0.3744, "step": 19156 }, { "epoch": 1.7977665165165164, "grad_norm": 0.865265292702777, "learning_rate": 4.145144328112767e-06, "loss": 0.4164, "step": 19157 }, { "epoch": 1.7978603603603602, "grad_norm": 0.9336598535108436, "learning_rate": 4.144606406832336e-06, "loss": 0.3638, "step": 19158 }, { "epoch": 1.797954204204204, "grad_norm": 1.0152855444754034, "learning_rate": 4.144068495750863e-06, "loss": 0.3851, "step": 19159 }, { "epoch": 1.798048048048048, "grad_norm": 0.9224934239327723, "learning_rate": 4.1435305948747614e-06, "loss": 0.411, "step": 19160 }, { "epoch": 1.7981418918918919, "grad_norm": 0.8589640022021791, "learning_rate": 4.142992704210446e-06, "loss": 0.3724, "step": 19161 }, { "epoch": 1.7982357357357357, "grad_norm": 0.9391233483375055, "learning_rate": 4.142454823764329e-06, "loss": 0.3895, "step": 19162 }, { "epoch": 1.7983295795795797, "grad_norm": 1.0244252789518713, "learning_rate": 4.141916953542824e-06, "loss": 0.386, "step": 19163 }, { "epoch": 1.7984234234234235, "grad_norm": 0.9110964309330037, "learning_rate": 4.141379093552344e-06, "loss": 0.3503, "step": 19164 }, { "epoch": 1.7985172672672673, "grad_norm": 0.9518778291447656, "learning_rate": 4.1408412437993035e-06, "loss": 0.3706, "step": 19165 }, { "epoch": 1.7986111111111112, "grad_norm": 1.0980317080059252, "learning_rate": 4.140303404290112e-06, "loss": 0.4025, "step": 19166 }, { "epoch": 1.798704954954955, "grad_norm": 0.864843384650212, "learning_rate": 4.139765575031186e-06, "loss": 0.4276, "step": 19167 }, { "epoch": 1.7987987987987988, "grad_norm": 0.9371187642622771, "learning_rate": 4.139227756028936e-06, "loss": 0.3943, "step": 19168 }, { "epoch": 1.7988926426426426, "grad_norm": 0.9376916957332727, "learning_rate": 4.1386899472897724e-06, "loss": 0.4263, "step": 19169 }, { "epoch": 1.7989864864864864, "grad_norm": 0.9937163773406781, "learning_rate": 4.138152148820112e-06, "loss": 0.4259, "step": 19170 }, { "epoch": 1.7990803303303302, "grad_norm": 0.9450193299211166, "learning_rate": 4.137614360626365e-06, "loss": 0.3787, "step": 19171 }, { "epoch": 1.799174174174174, "grad_norm": 0.9234193549528987, "learning_rate": 4.137076582714943e-06, "loss": 0.4209, "step": 19172 }, { "epoch": 1.799268018018018, "grad_norm": 1.1939282824798547, "learning_rate": 4.136538815092259e-06, "loss": 0.3827, "step": 19173 }, { "epoch": 1.7993618618618619, "grad_norm": 0.8981117165096354, "learning_rate": 4.136001057764725e-06, "loss": 0.3651, "step": 19174 }, { "epoch": 1.7994557057057057, "grad_norm": 0.8947763729044933, "learning_rate": 4.135463310738749e-06, "loss": 0.3715, "step": 19175 }, { "epoch": 1.7995495495495497, "grad_norm": 0.892957206362172, "learning_rate": 4.134925574020749e-06, "loss": 0.3786, "step": 19176 }, { "epoch": 1.7996433933933935, "grad_norm": 1.077757087175202, "learning_rate": 4.1343878476171315e-06, "loss": 0.3781, "step": 19177 }, { "epoch": 1.7997372372372373, "grad_norm": 1.0486549830853693, "learning_rate": 4.133850131534309e-06, "loss": 0.3864, "step": 19178 }, { "epoch": 1.7998310810810811, "grad_norm": 1.056085372956427, "learning_rate": 4.1333124257786956e-06, "loss": 0.4048, "step": 19179 }, { "epoch": 1.799924924924925, "grad_norm": 0.853710003536852, "learning_rate": 4.132774730356699e-06, "loss": 0.3929, "step": 19180 }, { "epoch": 1.8000187687687688, "grad_norm": 0.8758210100358351, "learning_rate": 4.13223704527473e-06, "loss": 0.3928, "step": 19181 }, { "epoch": 1.8001126126126126, "grad_norm": 1.1464774108598454, "learning_rate": 4.131699370539203e-06, "loss": 0.3706, "step": 19182 }, { "epoch": 1.8002064564564564, "grad_norm": 1.018805349582556, "learning_rate": 4.131161706156526e-06, "loss": 0.4482, "step": 19183 }, { "epoch": 1.8003003003003002, "grad_norm": 2.3280070596944387, "learning_rate": 4.130624052133109e-06, "loss": 0.3635, "step": 19184 }, { "epoch": 1.800394144144144, "grad_norm": 1.0272652029088774, "learning_rate": 4.130086408475365e-06, "loss": 0.403, "step": 19185 }, { "epoch": 1.800487987987988, "grad_norm": 1.0041677574832617, "learning_rate": 4.129548775189704e-06, "loss": 0.3521, "step": 19186 }, { "epoch": 1.8005818318318318, "grad_norm": 0.9439392187368892, "learning_rate": 4.1290111522825325e-06, "loss": 0.4158, "step": 19187 }, { "epoch": 1.8006756756756757, "grad_norm": 0.982255673659015, "learning_rate": 4.128473539760266e-06, "loss": 0.3998, "step": 19188 }, { "epoch": 1.8007695195195195, "grad_norm": 0.8897838232334008, "learning_rate": 4.127935937629311e-06, "loss": 0.4375, "step": 19189 }, { "epoch": 1.8008633633633635, "grad_norm": 0.9741498785337667, "learning_rate": 4.127398345896077e-06, "loss": 0.3711, "step": 19190 }, { "epoch": 1.8009572072072073, "grad_norm": 1.6862547251291582, "learning_rate": 4.126860764566979e-06, "loss": 0.3826, "step": 19191 }, { "epoch": 1.8010510510510511, "grad_norm": 0.8485070810078482, "learning_rate": 4.126323193648421e-06, "loss": 0.3994, "step": 19192 }, { "epoch": 1.801144894894895, "grad_norm": 1.1784554232229563, "learning_rate": 4.125785633146813e-06, "loss": 0.4107, "step": 19193 }, { "epoch": 1.8012387387387387, "grad_norm": 0.8720242568667519, "learning_rate": 4.125248083068567e-06, "loss": 0.3857, "step": 19194 }, { "epoch": 1.8013325825825826, "grad_norm": 0.9078773769298812, "learning_rate": 4.124710543420091e-06, "loss": 0.3139, "step": 19195 }, { "epoch": 1.8014264264264264, "grad_norm": 0.9020104906902212, "learning_rate": 4.124173014207793e-06, "loss": 0.4144, "step": 19196 }, { "epoch": 1.8015202702702702, "grad_norm": 1.0715705733610421, "learning_rate": 4.123635495438085e-06, "loss": 0.4105, "step": 19197 }, { "epoch": 1.801614114114114, "grad_norm": 1.5338950661620994, "learning_rate": 4.123097987117375e-06, "loss": 0.3641, "step": 19198 }, { "epoch": 1.8017079579579578, "grad_norm": 3.3889577451287725, "learning_rate": 4.122560489252069e-06, "loss": 0.3793, "step": 19199 }, { "epoch": 1.8018018018018018, "grad_norm": 1.1763535681261827, "learning_rate": 4.122023001848578e-06, "loss": 0.3631, "step": 19200 }, { "epoch": 1.8018956456456456, "grad_norm": 0.8670329667771297, "learning_rate": 4.121485524913311e-06, "loss": 0.4166, "step": 19201 }, { "epoch": 1.8019894894894894, "grad_norm": 0.8250072872242191, "learning_rate": 4.1209480584526725e-06, "loss": 0.3817, "step": 19202 }, { "epoch": 1.8020833333333335, "grad_norm": 0.906160855415529, "learning_rate": 4.120410602473077e-06, "loss": 0.4107, "step": 19203 }, { "epoch": 1.8021771771771773, "grad_norm": 1.005063922794613, "learning_rate": 4.119873156980928e-06, "loss": 0.3771, "step": 19204 }, { "epoch": 1.802271021021021, "grad_norm": 1.023832987877061, "learning_rate": 4.119335721982635e-06, "loss": 0.4049, "step": 19205 }, { "epoch": 1.802364864864865, "grad_norm": 0.9815679443001891, "learning_rate": 4.1187982974846065e-06, "loss": 0.3682, "step": 19206 }, { "epoch": 1.8024587087087087, "grad_norm": 0.8423603949436661, "learning_rate": 4.11826088349325e-06, "loss": 0.4048, "step": 19207 }, { "epoch": 1.8025525525525525, "grad_norm": 0.9398027161918859, "learning_rate": 4.1177234800149705e-06, "loss": 0.3744, "step": 19208 }, { "epoch": 1.8026463963963963, "grad_norm": 0.8114986842207597, "learning_rate": 4.11718608705618e-06, "loss": 0.406, "step": 19209 }, { "epoch": 1.8027402402402402, "grad_norm": 1.0681514465282869, "learning_rate": 4.116648704623283e-06, "loss": 0.425, "step": 19210 }, { "epoch": 1.802834084084084, "grad_norm": 3.623160110901072, "learning_rate": 4.116111332722687e-06, "loss": 0.3425, "step": 19211 }, { "epoch": 1.8029279279279278, "grad_norm": 0.9701565158531388, "learning_rate": 4.1155739713608e-06, "loss": 0.391, "step": 19212 }, { "epoch": 1.8030217717717718, "grad_norm": 0.9845110397399809, "learning_rate": 4.11503662054403e-06, "loss": 0.4356, "step": 19213 }, { "epoch": 1.8031156156156156, "grad_norm": 1.0110451801235236, "learning_rate": 4.114499280278779e-06, "loss": 0.4159, "step": 19214 }, { "epoch": 1.8032094594594594, "grad_norm": 3.292775853521228, "learning_rate": 4.11396195057146e-06, "loss": 0.3846, "step": 19215 }, { "epoch": 1.8033033033033035, "grad_norm": 1.7522256306039932, "learning_rate": 4.113424631428477e-06, "loss": 0.4258, "step": 19216 }, { "epoch": 1.8033971471471473, "grad_norm": 0.9914931462130935, "learning_rate": 4.112887322856235e-06, "loss": 0.3827, "step": 19217 }, { "epoch": 1.803490990990991, "grad_norm": 1.1880269629626852, "learning_rate": 4.1123500248611434e-06, "loss": 0.3989, "step": 19218 }, { "epoch": 1.803584834834835, "grad_norm": 1.0193597848731821, "learning_rate": 4.111812737449607e-06, "loss": 0.4076, "step": 19219 }, { "epoch": 1.8036786786786787, "grad_norm": 0.8232683992141006, "learning_rate": 4.1112754606280295e-06, "loss": 0.383, "step": 19220 }, { "epoch": 1.8037725225225225, "grad_norm": 3.920432553983562, "learning_rate": 4.1107381944028215e-06, "loss": 0.3743, "step": 19221 }, { "epoch": 1.8038663663663663, "grad_norm": 0.9515622284218489, "learning_rate": 4.1102009387803866e-06, "loss": 0.362, "step": 19222 }, { "epoch": 1.8039602102102101, "grad_norm": 0.869617427714249, "learning_rate": 4.109663693767129e-06, "loss": 0.3713, "step": 19223 }, { "epoch": 1.804054054054054, "grad_norm": 1.0177465514143353, "learning_rate": 4.109126459369458e-06, "loss": 0.3714, "step": 19224 }, { "epoch": 1.8041478978978978, "grad_norm": 0.8540586889516756, "learning_rate": 4.108589235593776e-06, "loss": 0.3861, "step": 19225 }, { "epoch": 1.8042417417417418, "grad_norm": 0.9571593461889631, "learning_rate": 4.108052022446488e-06, "loss": 0.3882, "step": 19226 }, { "epoch": 1.8043355855855856, "grad_norm": 0.964975136706972, "learning_rate": 4.1075148199340035e-06, "loss": 0.3881, "step": 19227 }, { "epoch": 1.8044294294294294, "grad_norm": 0.9274559034356572, "learning_rate": 4.106977628062724e-06, "loss": 0.3951, "step": 19228 }, { "epoch": 1.8045232732732732, "grad_norm": 0.8661090629129052, "learning_rate": 4.106440446839054e-06, "loss": 0.3642, "step": 19229 }, { "epoch": 1.8046171171171173, "grad_norm": 1.0948313969191965, "learning_rate": 4.105903276269402e-06, "loss": 0.3958, "step": 19230 }, { "epoch": 1.804710960960961, "grad_norm": 1.0407811850616022, "learning_rate": 4.1053661163601694e-06, "loss": 0.3798, "step": 19231 }, { "epoch": 1.8048048048048049, "grad_norm": 1.0142394547340134, "learning_rate": 4.10482896711776e-06, "loss": 0.404, "step": 19232 }, { "epoch": 1.8048986486486487, "grad_norm": 2.225473345412825, "learning_rate": 4.104291828548582e-06, "loss": 0.4063, "step": 19233 }, { "epoch": 1.8049924924924925, "grad_norm": 1.2112512166626397, "learning_rate": 4.103754700659038e-06, "loss": 0.4017, "step": 19234 }, { "epoch": 1.8050863363363363, "grad_norm": 0.9352794825038049, "learning_rate": 4.1032175834555325e-06, "loss": 0.3966, "step": 19235 }, { "epoch": 1.8051801801801801, "grad_norm": 1.334340273999149, "learning_rate": 4.1026804769444684e-06, "loss": 0.3797, "step": 19236 }, { "epoch": 1.805274024024024, "grad_norm": 1.1584701870081398, "learning_rate": 4.102143381132252e-06, "loss": 0.3742, "step": 19237 }, { "epoch": 1.8053678678678677, "grad_norm": 0.8915860485204613, "learning_rate": 4.101606296025283e-06, "loss": 0.4135, "step": 19238 }, { "epoch": 1.8054617117117115, "grad_norm": 0.9367297399086101, "learning_rate": 4.101069221629969e-06, "loss": 0.3964, "step": 19239 }, { "epoch": 1.8055555555555556, "grad_norm": 0.886224359108874, "learning_rate": 4.100532157952714e-06, "loss": 0.3924, "step": 19240 }, { "epoch": 1.8056493993993994, "grad_norm": 0.8624676146946244, "learning_rate": 4.099995104999919e-06, "loss": 0.3961, "step": 19241 }, { "epoch": 1.8057432432432432, "grad_norm": 1.0098420093419946, "learning_rate": 4.099458062777989e-06, "loss": 0.3886, "step": 19242 }, { "epoch": 1.8058370870870872, "grad_norm": 0.9263687253562009, "learning_rate": 4.098921031293326e-06, "loss": 0.4065, "step": 19243 }, { "epoch": 1.805930930930931, "grad_norm": 1.2755770475937678, "learning_rate": 4.098384010552331e-06, "loss": 0.3674, "step": 19244 }, { "epoch": 1.8060247747747749, "grad_norm": 0.9087925726365101, "learning_rate": 4.097847000561413e-06, "loss": 0.3621, "step": 19245 }, { "epoch": 1.8061186186186187, "grad_norm": 1.1773895449690788, "learning_rate": 4.097310001326971e-06, "loss": 0.3632, "step": 19246 }, { "epoch": 1.8062124624624625, "grad_norm": 0.8743431319238453, "learning_rate": 4.096773012855407e-06, "loss": 0.345, "step": 19247 }, { "epoch": 1.8063063063063063, "grad_norm": 0.9243466731361993, "learning_rate": 4.096236035153125e-06, "loss": 0.3795, "step": 19248 }, { "epoch": 1.80640015015015, "grad_norm": 1.2908174735824773, "learning_rate": 4.095699068226528e-06, "loss": 0.4461, "step": 19249 }, { "epoch": 1.806493993993994, "grad_norm": 1.0975813293108427, "learning_rate": 4.095162112082015e-06, "loss": 0.3625, "step": 19250 }, { "epoch": 1.8065878378378377, "grad_norm": 0.973976822379838, "learning_rate": 4.0946251667259925e-06, "loss": 0.3574, "step": 19251 }, { "epoch": 1.8066816816816815, "grad_norm": 0.9248858997797131, "learning_rate": 4.094088232164861e-06, "loss": 0.3939, "step": 19252 }, { "epoch": 1.8067755255255256, "grad_norm": 0.9526982636290527, "learning_rate": 4.093551308405021e-06, "loss": 0.4085, "step": 19253 }, { "epoch": 1.8068693693693694, "grad_norm": 0.907683790419555, "learning_rate": 4.093014395452877e-06, "loss": 0.3776, "step": 19254 }, { "epoch": 1.8069632132132132, "grad_norm": 1.3536876663666841, "learning_rate": 4.092477493314828e-06, "loss": 0.3816, "step": 19255 }, { "epoch": 1.8070570570570572, "grad_norm": 0.9225514326441665, "learning_rate": 4.091940601997275e-06, "loss": 0.3929, "step": 19256 }, { "epoch": 1.807150900900901, "grad_norm": 1.8912992904347565, "learning_rate": 4.091403721506624e-06, "loss": 0.3902, "step": 19257 }, { "epoch": 1.8072447447447448, "grad_norm": 1.0377200191364617, "learning_rate": 4.090866851849273e-06, "loss": 0.4429, "step": 19258 }, { "epoch": 1.8073385885885886, "grad_norm": 0.9347109825264275, "learning_rate": 4.090329993031622e-06, "loss": 0.4108, "step": 19259 }, { "epoch": 1.8074324324324325, "grad_norm": 1.094070487906443, "learning_rate": 4.089793145060074e-06, "loss": 0.4093, "step": 19260 }, { "epoch": 1.8075262762762763, "grad_norm": 0.8225211411255651, "learning_rate": 4.089256307941031e-06, "loss": 0.3807, "step": 19261 }, { "epoch": 1.80762012012012, "grad_norm": 1.1130068538561217, "learning_rate": 4.088719481680889e-06, "loss": 0.4316, "step": 19262 }, { "epoch": 1.8077139639639639, "grad_norm": 0.9988979295487301, "learning_rate": 4.0881826662860546e-06, "loss": 0.3885, "step": 19263 }, { "epoch": 1.8078078078078077, "grad_norm": 0.7790296680208069, "learning_rate": 4.087645861762926e-06, "loss": 0.3567, "step": 19264 }, { "epoch": 1.8079016516516515, "grad_norm": 1.4370687681480165, "learning_rate": 4.087109068117901e-06, "loss": 0.3825, "step": 19265 }, { "epoch": 1.8079954954954955, "grad_norm": 0.8633330175769648, "learning_rate": 4.086572285357383e-06, "loss": 0.3689, "step": 19266 }, { "epoch": 1.8080893393393394, "grad_norm": 1.4646758610685227, "learning_rate": 4.086035513487772e-06, "loss": 0.4174, "step": 19267 }, { "epoch": 1.8081831831831832, "grad_norm": 1.0251442544925202, "learning_rate": 4.085498752515465e-06, "loss": 0.3981, "step": 19268 }, { "epoch": 1.808277027027027, "grad_norm": 0.8819062990186994, "learning_rate": 4.084962002446866e-06, "loss": 0.3681, "step": 19269 }, { "epoch": 1.808370870870871, "grad_norm": 0.9023996256081894, "learning_rate": 4.084425263288372e-06, "loss": 0.4041, "step": 19270 }, { "epoch": 1.8084647147147148, "grad_norm": 0.9119445539300473, "learning_rate": 4.083888535046383e-06, "loss": 0.4226, "step": 19271 }, { "epoch": 1.8085585585585586, "grad_norm": 1.0067765114290343, "learning_rate": 4.0833518177272995e-06, "loss": 0.4302, "step": 19272 }, { "epoch": 1.8086524024024024, "grad_norm": 0.9420095086692087, "learning_rate": 4.082815111337521e-06, "loss": 0.4128, "step": 19273 }, { "epoch": 1.8087462462462462, "grad_norm": 0.973051273571718, "learning_rate": 4.082278415883443e-06, "loss": 0.3563, "step": 19274 }, { "epoch": 1.80884009009009, "grad_norm": 0.9887562314006371, "learning_rate": 4.08174173137147e-06, "loss": 0.4283, "step": 19275 }, { "epoch": 1.8089339339339339, "grad_norm": 1.1855484787248434, "learning_rate": 4.0812050578079975e-06, "loss": 0.3886, "step": 19276 }, { "epoch": 1.8090277777777777, "grad_norm": 1.1983605186944288, "learning_rate": 4.080668395199425e-06, "loss": 0.4063, "step": 19277 }, { "epoch": 1.8091216216216215, "grad_norm": 0.879285368330778, "learning_rate": 4.080131743552153e-06, "loss": 0.3803, "step": 19278 }, { "epoch": 1.8092154654654653, "grad_norm": 1.0165312650092244, "learning_rate": 4.079595102872577e-06, "loss": 0.4028, "step": 19279 }, { "epoch": 1.8093093093093093, "grad_norm": 1.0418097483920281, "learning_rate": 4.079058473167096e-06, "loss": 0.4179, "step": 19280 }, { "epoch": 1.8094031531531531, "grad_norm": 0.7681470585627497, "learning_rate": 4.07852185444211e-06, "loss": 0.3384, "step": 19281 }, { "epoch": 1.809496996996997, "grad_norm": 0.9799797670255589, "learning_rate": 4.077985246704018e-06, "loss": 0.4277, "step": 19282 }, { "epoch": 1.809590840840841, "grad_norm": 0.9796972544984922, "learning_rate": 4.077448649959215e-06, "loss": 0.4332, "step": 19283 }, { "epoch": 1.8096846846846848, "grad_norm": 1.1718195339631157, "learning_rate": 4.0769120642141e-06, "loss": 0.437, "step": 19284 }, { "epoch": 1.8097785285285286, "grad_norm": 0.9223543617691472, "learning_rate": 4.076375489475073e-06, "loss": 0.4061, "step": 19285 }, { "epoch": 1.8098723723723724, "grad_norm": 0.9625075110841259, "learning_rate": 4.075838925748526e-06, "loss": 0.4298, "step": 19286 }, { "epoch": 1.8099662162162162, "grad_norm": 0.8290109712406284, "learning_rate": 4.075302373040863e-06, "loss": 0.4148, "step": 19287 }, { "epoch": 1.81006006006006, "grad_norm": 1.0554513002060786, "learning_rate": 4.074765831358479e-06, "loss": 0.4044, "step": 19288 }, { "epoch": 1.8101539039039038, "grad_norm": 0.9395734144752202, "learning_rate": 4.07422930070777e-06, "loss": 0.4222, "step": 19289 }, { "epoch": 1.8102477477477477, "grad_norm": 0.9121604744311634, "learning_rate": 4.073692781095134e-06, "loss": 0.3766, "step": 19290 }, { "epoch": 1.8103415915915915, "grad_norm": 0.9414020565203997, "learning_rate": 4.073156272526969e-06, "loss": 0.4098, "step": 19291 }, { "epoch": 1.8104354354354353, "grad_norm": 1.2999847057602794, "learning_rate": 4.072619775009669e-06, "loss": 0.3907, "step": 19292 }, { "epoch": 1.8105292792792793, "grad_norm": 1.0207577730294928, "learning_rate": 4.072083288549634e-06, "loss": 0.4034, "step": 19293 }, { "epoch": 1.8106231231231231, "grad_norm": 0.8884309231370918, "learning_rate": 4.071546813153259e-06, "loss": 0.3629, "step": 19294 }, { "epoch": 1.810716966966967, "grad_norm": 0.8480817882354271, "learning_rate": 4.071010348826939e-06, "loss": 0.3806, "step": 19295 }, { "epoch": 1.810810810810811, "grad_norm": 0.9468192209917232, "learning_rate": 4.070473895577074e-06, "loss": 0.3783, "step": 19296 }, { "epoch": 1.8109046546546548, "grad_norm": 0.9601089924888364, "learning_rate": 4.069937453410058e-06, "loss": 0.3653, "step": 19297 }, { "epoch": 1.8109984984984986, "grad_norm": 1.0632781726768479, "learning_rate": 4.069401022332285e-06, "loss": 0.411, "step": 19298 }, { "epoch": 1.8110923423423424, "grad_norm": 1.0583813838877096, "learning_rate": 4.068864602350155e-06, "loss": 0.394, "step": 19299 }, { "epoch": 1.8111861861861862, "grad_norm": 0.9221114998423066, "learning_rate": 4.068328193470062e-06, "loss": 0.3709, "step": 19300 }, { "epoch": 1.81128003003003, "grad_norm": 1.005520604860257, "learning_rate": 4.067791795698402e-06, "loss": 0.3944, "step": 19301 }, { "epoch": 1.8113738738738738, "grad_norm": 0.9882815935310222, "learning_rate": 4.067255409041569e-06, "loss": 0.383, "step": 19302 }, { "epoch": 1.8114677177177176, "grad_norm": 1.3427167575788248, "learning_rate": 4.066719033505961e-06, "loss": 0.3827, "step": 19303 }, { "epoch": 1.8115615615615615, "grad_norm": 1.07292546585932, "learning_rate": 4.0661826690979685e-06, "loss": 0.3818, "step": 19304 }, { "epoch": 1.8116554054054053, "grad_norm": 1.0310523872991877, "learning_rate": 4.065646315823994e-06, "loss": 0.4396, "step": 19305 }, { "epoch": 1.8117492492492493, "grad_norm": 0.9404888927285795, "learning_rate": 4.065109973690427e-06, "loss": 0.354, "step": 19306 }, { "epoch": 1.811843093093093, "grad_norm": 0.8854379028354364, "learning_rate": 4.064573642703662e-06, "loss": 0.422, "step": 19307 }, { "epoch": 1.811936936936937, "grad_norm": 0.894240707751233, "learning_rate": 4.064037322870098e-06, "loss": 0.3577, "step": 19308 }, { "epoch": 1.8120307807807807, "grad_norm": 1.0172530742511887, "learning_rate": 4.063501014196127e-06, "loss": 0.3684, "step": 19309 }, { "epoch": 1.8121246246246248, "grad_norm": 0.9680787825768378, "learning_rate": 4.062964716688141e-06, "loss": 0.4366, "step": 19310 }, { "epoch": 1.8122184684684686, "grad_norm": 0.9771883711820168, "learning_rate": 4.062428430352539e-06, "loss": 0.3326, "step": 19311 }, { "epoch": 1.8123123123123124, "grad_norm": 0.958704542695812, "learning_rate": 4.061892155195713e-06, "loss": 0.3609, "step": 19312 }, { "epoch": 1.8124061561561562, "grad_norm": 0.8940559883499112, "learning_rate": 4.0613558912240554e-06, "loss": 0.3849, "step": 19313 }, { "epoch": 1.8125, "grad_norm": 1.1348917973888621, "learning_rate": 4.060819638443966e-06, "loss": 0.3962, "step": 19314 }, { "epoch": 1.8125938438438438, "grad_norm": 0.8513851125521995, "learning_rate": 4.0602833968618325e-06, "loss": 0.3733, "step": 19315 }, { "epoch": 1.8126876876876876, "grad_norm": 0.9444641513813719, "learning_rate": 4.059747166484049e-06, "loss": 0.4365, "step": 19316 }, { "epoch": 1.8127815315315314, "grad_norm": 0.9252371147108988, "learning_rate": 4.059210947317013e-06, "loss": 0.3908, "step": 19317 }, { "epoch": 1.8128753753753752, "grad_norm": 0.948171241117714, "learning_rate": 4.058674739367116e-06, "loss": 0.3982, "step": 19318 }, { "epoch": 1.8129692192192193, "grad_norm": 1.177001290112401, "learning_rate": 4.058138542640747e-06, "loss": 0.414, "step": 19319 }, { "epoch": 1.813063063063063, "grad_norm": 0.8509109583143641, "learning_rate": 4.057602357144307e-06, "loss": 0.3931, "step": 19320 }, { "epoch": 1.813156906906907, "grad_norm": 1.0224024965492458, "learning_rate": 4.057066182884185e-06, "loss": 0.3892, "step": 19321 }, { "epoch": 1.8132507507507507, "grad_norm": 0.8780616025299867, "learning_rate": 4.056530019866771e-06, "loss": 0.4358, "step": 19322 }, { "epoch": 1.8133445945945947, "grad_norm": 1.02880168890024, "learning_rate": 4.0559938680984625e-06, "loss": 0.3976, "step": 19323 }, { "epoch": 1.8134384384384385, "grad_norm": 1.0505192964584118, "learning_rate": 4.0554577275856505e-06, "loss": 0.427, "step": 19324 }, { "epoch": 1.8135322822822824, "grad_norm": 0.9670356643556958, "learning_rate": 4.054921598334725e-06, "loss": 0.4056, "step": 19325 }, { "epoch": 1.8136261261261262, "grad_norm": 1.0257487285645475, "learning_rate": 4.054385480352082e-06, "loss": 0.3853, "step": 19326 }, { "epoch": 1.81371996996997, "grad_norm": 0.9226433826486748, "learning_rate": 4.053849373644113e-06, "loss": 0.4186, "step": 19327 }, { "epoch": 1.8138138138138138, "grad_norm": 1.1557939820538974, "learning_rate": 4.053313278217209e-06, "loss": 0.358, "step": 19328 }, { "epoch": 1.8139076576576576, "grad_norm": 1.6154572980171313, "learning_rate": 4.0527771940777615e-06, "loss": 0.4103, "step": 19329 }, { "epoch": 1.8140015015015014, "grad_norm": 1.0169296740661447, "learning_rate": 4.052241121232164e-06, "loss": 0.3783, "step": 19330 }, { "epoch": 1.8140953453453452, "grad_norm": 0.9618564334119525, "learning_rate": 4.051705059686804e-06, "loss": 0.3932, "step": 19331 }, { "epoch": 1.814189189189189, "grad_norm": 0.7867975096627604, "learning_rate": 4.0511690094480796e-06, "loss": 0.3527, "step": 19332 }, { "epoch": 1.814283033033033, "grad_norm": 0.9220235267514606, "learning_rate": 4.0506329705223785e-06, "loss": 0.354, "step": 19333 }, { "epoch": 1.8143768768768769, "grad_norm": 1.0540095399592377, "learning_rate": 4.05009694291609e-06, "loss": 0.442, "step": 19334 }, { "epoch": 1.8144707207207207, "grad_norm": 1.085412998408329, "learning_rate": 4.04956092663561e-06, "loss": 0.3893, "step": 19335 }, { "epoch": 1.8145645645645647, "grad_norm": 1.395381358318808, "learning_rate": 4.049024921687326e-06, "loss": 0.3406, "step": 19336 }, { "epoch": 1.8146584084084085, "grad_norm": 0.9084056184120594, "learning_rate": 4.048488928077629e-06, "loss": 0.3742, "step": 19337 }, { "epoch": 1.8147522522522523, "grad_norm": 0.9863839745035707, "learning_rate": 4.047952945812912e-06, "loss": 0.3827, "step": 19338 }, { "epoch": 1.8148460960960962, "grad_norm": 0.8738016880525067, "learning_rate": 4.047416974899564e-06, "loss": 0.3604, "step": 19339 }, { "epoch": 1.81493993993994, "grad_norm": 1.0296404833816082, "learning_rate": 4.046881015343974e-06, "loss": 0.404, "step": 19340 }, { "epoch": 1.8150337837837838, "grad_norm": 0.9622533207476494, "learning_rate": 4.046345067152535e-06, "loss": 0.3965, "step": 19341 }, { "epoch": 1.8151276276276276, "grad_norm": 0.9464831143041778, "learning_rate": 4.0458091303316364e-06, "loss": 0.386, "step": 19342 }, { "epoch": 1.8152214714714714, "grad_norm": 1.0537461276885391, "learning_rate": 4.045273204887665e-06, "loss": 0.3929, "step": 19343 }, { "epoch": 1.8153153153153152, "grad_norm": 1.055448023383713, "learning_rate": 4.044737290827016e-06, "loss": 0.381, "step": 19344 }, { "epoch": 1.815409159159159, "grad_norm": 0.9727950550495965, "learning_rate": 4.044201388156077e-06, "loss": 0.4279, "step": 19345 }, { "epoch": 1.815503003003003, "grad_norm": 0.9721042485469772, "learning_rate": 4.043665496881236e-06, "loss": 0.4037, "step": 19346 }, { "epoch": 1.8155968468468469, "grad_norm": 1.003888321302264, "learning_rate": 4.043129617008885e-06, "loss": 0.3915, "step": 19347 }, { "epoch": 1.8156906906906907, "grad_norm": 0.8465242293889508, "learning_rate": 4.042593748545413e-06, "loss": 0.3892, "step": 19348 }, { "epoch": 1.8157845345345347, "grad_norm": 0.9265928126916734, "learning_rate": 4.042057891497205e-06, "loss": 0.4188, "step": 19349 }, { "epoch": 1.8158783783783785, "grad_norm": 2.0372082402430416, "learning_rate": 4.0415220458706554e-06, "loss": 0.3924, "step": 19350 }, { "epoch": 1.8159722222222223, "grad_norm": 0.9261763953698106, "learning_rate": 4.040986211672152e-06, "loss": 0.3951, "step": 19351 }, { "epoch": 1.8160660660660661, "grad_norm": 0.8624638867013275, "learning_rate": 4.040450388908082e-06, "loss": 0.3615, "step": 19352 }, { "epoch": 1.81615990990991, "grad_norm": 0.8409905278413888, "learning_rate": 4.0399145775848355e-06, "loss": 0.3256, "step": 19353 }, { "epoch": 1.8162537537537538, "grad_norm": 1.190862400158993, "learning_rate": 4.039378777708801e-06, "loss": 0.3925, "step": 19354 }, { "epoch": 1.8163475975975976, "grad_norm": 0.9370517131344009, "learning_rate": 4.038842989286364e-06, "loss": 0.3536, "step": 19355 }, { "epoch": 1.8164414414414414, "grad_norm": 0.8853867925136336, "learning_rate": 4.0383072123239175e-06, "loss": 0.39, "step": 19356 }, { "epoch": 1.8165352852852852, "grad_norm": 0.833117022189806, "learning_rate": 4.037771446827847e-06, "loss": 0.3357, "step": 19357 }, { "epoch": 1.816629129129129, "grad_norm": 0.9331806784072826, "learning_rate": 4.037235692804539e-06, "loss": 0.4229, "step": 19358 }, { "epoch": 1.816722972972973, "grad_norm": 0.9669833178383298, "learning_rate": 4.036699950260386e-06, "loss": 0.3573, "step": 19359 }, { "epoch": 1.8168168168168168, "grad_norm": 0.8836894266896688, "learning_rate": 4.036164219201772e-06, "loss": 0.3495, "step": 19360 }, { "epoch": 1.8169106606606606, "grad_norm": 0.8930495260730585, "learning_rate": 4.035628499635083e-06, "loss": 0.3726, "step": 19361 }, { "epoch": 1.8170045045045045, "grad_norm": 1.5055665199806942, "learning_rate": 4.035092791566711e-06, "loss": 0.3777, "step": 19362 }, { "epoch": 1.8170983483483485, "grad_norm": 0.9565997337865147, "learning_rate": 4.034557095003042e-06, "loss": 0.3984, "step": 19363 }, { "epoch": 1.8171921921921923, "grad_norm": 0.9962788810187424, "learning_rate": 4.034021409950461e-06, "loss": 0.4216, "step": 19364 }, { "epoch": 1.8172860360360361, "grad_norm": 0.9748527584852158, "learning_rate": 4.0334857364153565e-06, "loss": 0.3765, "step": 19365 }, { "epoch": 1.81737987987988, "grad_norm": 0.9653574449528756, "learning_rate": 4.032950074404116e-06, "loss": 0.4405, "step": 19366 }, { "epoch": 1.8174737237237237, "grad_norm": 0.9823446173210645, "learning_rate": 4.032414423923124e-06, "loss": 0.3764, "step": 19367 }, { "epoch": 1.8175675675675675, "grad_norm": 1.3685259008185429, "learning_rate": 4.03187878497877e-06, "loss": 0.4054, "step": 19368 }, { "epoch": 1.8176614114114114, "grad_norm": 1.246108603746738, "learning_rate": 4.03134315757744e-06, "loss": 0.3472, "step": 19369 }, { "epoch": 1.8177552552552552, "grad_norm": 1.3874897173468266, "learning_rate": 4.030807541725517e-06, "loss": 0.4107, "step": 19370 }, { "epoch": 1.817849099099099, "grad_norm": 0.9490431035787084, "learning_rate": 4.030271937429393e-06, "loss": 0.4019, "step": 19371 }, { "epoch": 1.8179429429429428, "grad_norm": 0.9655044163335904, "learning_rate": 4.02973634469545e-06, "loss": 0.3999, "step": 19372 }, { "epoch": 1.8180367867867868, "grad_norm": 0.94346402418835, "learning_rate": 4.029200763530071e-06, "loss": 0.3734, "step": 19373 }, { "epoch": 1.8181306306306306, "grad_norm": 0.9779988880006873, "learning_rate": 4.0286651939396496e-06, "loss": 0.4162, "step": 19374 }, { "epoch": 1.8182244744744744, "grad_norm": 1.1375225232753048, "learning_rate": 4.028129635930567e-06, "loss": 0.4092, "step": 19375 }, { "epoch": 1.8183183183183185, "grad_norm": 1.181440424965541, "learning_rate": 4.027594089509208e-06, "loss": 0.4022, "step": 19376 }, { "epoch": 1.8184121621621623, "grad_norm": 1.102048504517117, "learning_rate": 4.02705855468196e-06, "loss": 0.4218, "step": 19377 }, { "epoch": 1.818506006006006, "grad_norm": 0.97520004413026, "learning_rate": 4.026523031455207e-06, "loss": 0.4211, "step": 19378 }, { "epoch": 1.81859984984985, "grad_norm": 0.9450262792706803, "learning_rate": 4.025987519835332e-06, "loss": 0.4076, "step": 19379 }, { "epoch": 1.8186936936936937, "grad_norm": 0.8784827120418255, "learning_rate": 4.025452019828726e-06, "loss": 0.4257, "step": 19380 }, { "epoch": 1.8187875375375375, "grad_norm": 1.0369230799457234, "learning_rate": 4.024916531441769e-06, "loss": 0.4191, "step": 19381 }, { "epoch": 1.8188813813813813, "grad_norm": 0.9818889614564136, "learning_rate": 4.024381054680847e-06, "loss": 0.4142, "step": 19382 }, { "epoch": 1.8189752252252251, "grad_norm": 0.8644851607198097, "learning_rate": 4.0238455895523455e-06, "loss": 0.4011, "step": 19383 }, { "epoch": 1.819069069069069, "grad_norm": 0.9448552065080089, "learning_rate": 4.023310136062647e-06, "loss": 0.3755, "step": 19384 }, { "epoch": 1.8191629129129128, "grad_norm": 0.9157121542174602, "learning_rate": 4.022774694218135e-06, "loss": 0.4067, "step": 19385 }, { "epoch": 1.8192567567567568, "grad_norm": 0.9724050039166445, "learning_rate": 4.022239264025197e-06, "loss": 0.3881, "step": 19386 }, { "epoch": 1.8193506006006006, "grad_norm": 1.0672972048572196, "learning_rate": 4.021703845490216e-06, "loss": 0.4251, "step": 19387 }, { "epoch": 1.8194444444444444, "grad_norm": 0.9659678940577955, "learning_rate": 4.021168438619574e-06, "loss": 0.4107, "step": 19388 }, { "epoch": 1.8195382882882885, "grad_norm": 1.075042959862319, "learning_rate": 4.020633043419657e-06, "loss": 0.4159, "step": 19389 }, { "epoch": 1.8196321321321323, "grad_norm": 1.0429378213096152, "learning_rate": 4.020097659896848e-06, "loss": 0.4125, "step": 19390 }, { "epoch": 1.819725975975976, "grad_norm": 0.9197038256460961, "learning_rate": 4.019562288057527e-06, "loss": 0.4327, "step": 19391 }, { "epoch": 1.8198198198198199, "grad_norm": 1.078312224098983, "learning_rate": 4.019026927908083e-06, "loss": 0.3741, "step": 19392 }, { "epoch": 1.8199136636636637, "grad_norm": 0.8800183786698886, "learning_rate": 4.0184915794548954e-06, "loss": 0.3708, "step": 19393 }, { "epoch": 1.8200075075075075, "grad_norm": 0.9864852948004227, "learning_rate": 4.0179562427043475e-06, "loss": 0.4399, "step": 19394 }, { "epoch": 1.8201013513513513, "grad_norm": 0.9519223061729717, "learning_rate": 4.017420917662824e-06, "loss": 0.3895, "step": 19395 }, { "epoch": 1.8201951951951951, "grad_norm": 0.9405924368572134, "learning_rate": 4.016885604336707e-06, "loss": 0.3783, "step": 19396 }, { "epoch": 1.820289039039039, "grad_norm": 0.9238002408377067, "learning_rate": 4.016350302732377e-06, "loss": 0.3803, "step": 19397 }, { "epoch": 1.8203828828828827, "grad_norm": 0.9309861414258279, "learning_rate": 4.015815012856219e-06, "loss": 0.4007, "step": 19398 }, { "epoch": 1.8204767267267268, "grad_norm": 1.0919848378640429, "learning_rate": 4.0152797347146145e-06, "loss": 0.4309, "step": 19399 }, { "epoch": 1.8205705705705706, "grad_norm": 0.9381126986509122, "learning_rate": 4.014744468313945e-06, "loss": 0.417, "step": 19400 }, { "epoch": 1.8206644144144144, "grad_norm": 0.8402782881629675, "learning_rate": 4.014209213660594e-06, "loss": 0.3348, "step": 19401 }, { "epoch": 1.8207582582582582, "grad_norm": 1.1003939920375327, "learning_rate": 4.013673970760943e-06, "loss": 0.4198, "step": 19402 }, { "epoch": 1.8208521021021022, "grad_norm": 1.0254155548981694, "learning_rate": 4.0131387396213704e-06, "loss": 0.4182, "step": 19403 }, { "epoch": 1.820945945945946, "grad_norm": 1.0769947562089943, "learning_rate": 4.012603520248264e-06, "loss": 0.3751, "step": 19404 }, { "epoch": 1.8210397897897899, "grad_norm": 0.8302423326367239, "learning_rate": 4.012068312648002e-06, "loss": 0.3008, "step": 19405 }, { "epoch": 1.8211336336336337, "grad_norm": 0.9710204918595841, "learning_rate": 4.011533116826963e-06, "loss": 0.3617, "step": 19406 }, { "epoch": 1.8212274774774775, "grad_norm": 0.9078931410954489, "learning_rate": 4.0109979327915336e-06, "loss": 0.4382, "step": 19407 }, { "epoch": 1.8213213213213213, "grad_norm": 0.8648196738269885, "learning_rate": 4.0104627605480926e-06, "loss": 0.3987, "step": 19408 }, { "epoch": 1.821415165165165, "grad_norm": 0.9278601079435647, "learning_rate": 4.0099276001030175e-06, "loss": 0.3626, "step": 19409 }, { "epoch": 1.821509009009009, "grad_norm": 1.0004292570052709, "learning_rate": 4.009392451462694e-06, "loss": 0.3972, "step": 19410 }, { "epoch": 1.8216028528528527, "grad_norm": 0.8780298454575798, "learning_rate": 4.008857314633502e-06, "loss": 0.3676, "step": 19411 }, { "epoch": 1.8216966966966965, "grad_norm": 0.9454967419669176, "learning_rate": 4.008322189621819e-06, "loss": 0.41, "step": 19412 }, { "epoch": 1.8217905405405406, "grad_norm": 0.8954266553262178, "learning_rate": 4.007787076434028e-06, "loss": 0.4127, "step": 19413 }, { "epoch": 1.8218843843843844, "grad_norm": 0.9971152100324211, "learning_rate": 4.00725197507651e-06, "loss": 0.4322, "step": 19414 }, { "epoch": 1.8219782282282282, "grad_norm": 0.9374149698268213, "learning_rate": 4.0067168855556395e-06, "loss": 0.414, "step": 19415 }, { "epoch": 1.8220720720720722, "grad_norm": 0.88373316826118, "learning_rate": 4.006181807877804e-06, "loss": 0.3854, "step": 19416 }, { "epoch": 1.822165915915916, "grad_norm": 0.9588997915488268, "learning_rate": 4.005646742049379e-06, "loss": 0.3782, "step": 19417 }, { "epoch": 1.8222597597597598, "grad_norm": 0.8728974793592105, "learning_rate": 4.005111688076744e-06, "loss": 0.4036, "step": 19418 }, { "epoch": 1.8223536036036037, "grad_norm": 0.994706876954933, "learning_rate": 4.00457664596628e-06, "loss": 0.3613, "step": 19419 }, { "epoch": 1.8224474474474475, "grad_norm": 1.1531194132400715, "learning_rate": 4.004041615724367e-06, "loss": 0.409, "step": 19420 }, { "epoch": 1.8225412912912913, "grad_norm": 0.88394221854778, "learning_rate": 4.003506597357381e-06, "loss": 0.3902, "step": 19421 }, { "epoch": 1.822635135135135, "grad_norm": 0.9441489294583245, "learning_rate": 4.002971590871705e-06, "loss": 0.3997, "step": 19422 }, { "epoch": 1.822728978978979, "grad_norm": 0.981268423967672, "learning_rate": 4.002436596273715e-06, "loss": 0.422, "step": 19423 }, { "epoch": 1.8228228228228227, "grad_norm": 1.038517384714331, "learning_rate": 4.00190161356979e-06, "loss": 0.3964, "step": 19424 }, { "epoch": 1.8229166666666665, "grad_norm": 1.0135596203240789, "learning_rate": 4.001366642766311e-06, "loss": 0.3983, "step": 19425 }, { "epoch": 1.8230105105105106, "grad_norm": 0.8763478245751343, "learning_rate": 4.000831683869655e-06, "loss": 0.3998, "step": 19426 }, { "epoch": 1.8231043543543544, "grad_norm": 0.9941412897951732, "learning_rate": 4.0002967368861986e-06, "loss": 0.4272, "step": 19427 }, { "epoch": 1.8231981981981982, "grad_norm": 0.8650846441587191, "learning_rate": 3.999761801822324e-06, "loss": 0.4116, "step": 19428 }, { "epoch": 1.8232920420420422, "grad_norm": 1.1372517515296365, "learning_rate": 3.999226878684407e-06, "loss": 0.4695, "step": 19429 }, { "epoch": 1.823385885885886, "grad_norm": 0.9852491246794887, "learning_rate": 3.998691967478825e-06, "loss": 0.3566, "step": 19430 }, { "epoch": 1.8234797297297298, "grad_norm": 0.9869752528472242, "learning_rate": 3.998157068211957e-06, "loss": 0.3236, "step": 19431 }, { "epoch": 1.8235735735735736, "grad_norm": 1.4077591806951981, "learning_rate": 3.997622180890181e-06, "loss": 0.3854, "step": 19432 }, { "epoch": 1.8236674174174174, "grad_norm": 1.0681402407689338, "learning_rate": 3.99708730551987e-06, "loss": 0.3637, "step": 19433 }, { "epoch": 1.8237612612612613, "grad_norm": 1.1110493401643065, "learning_rate": 3.996552442107408e-06, "loss": 0.4198, "step": 19434 }, { "epoch": 1.823855105105105, "grad_norm": 0.8912775419924241, "learning_rate": 3.9960175906591695e-06, "loss": 0.4072, "step": 19435 }, { "epoch": 1.8239489489489489, "grad_norm": 0.8443822617569772, "learning_rate": 3.995482751181531e-06, "loss": 0.3743, "step": 19436 }, { "epoch": 1.8240427927927927, "grad_norm": 0.888978265746083, "learning_rate": 3.99494792368087e-06, "loss": 0.3766, "step": 19437 }, { "epoch": 1.8241366366366365, "grad_norm": 0.9816577070856031, "learning_rate": 3.994413108163564e-06, "loss": 0.3934, "step": 19438 }, { "epoch": 1.8242304804804805, "grad_norm": 0.963960783139475, "learning_rate": 3.9938783046359854e-06, "loss": 0.4308, "step": 19439 }, { "epoch": 1.8243243243243243, "grad_norm": 0.8937766433621509, "learning_rate": 3.993343513104517e-06, "loss": 0.3809, "step": 19440 }, { "epoch": 1.8244181681681682, "grad_norm": 1.1591494684547572, "learning_rate": 3.992808733575533e-06, "loss": 0.3775, "step": 19441 }, { "epoch": 1.824512012012012, "grad_norm": 0.926192756465837, "learning_rate": 3.992273966055407e-06, "loss": 0.4176, "step": 19442 }, { "epoch": 1.824605855855856, "grad_norm": 0.8537820778324998, "learning_rate": 3.991739210550518e-06, "loss": 0.3826, "step": 19443 }, { "epoch": 1.8246996996996998, "grad_norm": 0.8897197110036545, "learning_rate": 3.991204467067243e-06, "loss": 0.4116, "step": 19444 }, { "epoch": 1.8247935435435436, "grad_norm": 0.9600929825167613, "learning_rate": 3.990669735611952e-06, "loss": 0.3637, "step": 19445 }, { "epoch": 1.8248873873873874, "grad_norm": 1.3103688386963692, "learning_rate": 3.990135016191027e-06, "loss": 0.38, "step": 19446 }, { "epoch": 1.8249812312312312, "grad_norm": 0.9532255380682289, "learning_rate": 3.9896003088108415e-06, "loss": 0.3849, "step": 19447 }, { "epoch": 1.825075075075075, "grad_norm": 0.9933583036606751, "learning_rate": 3.989065613477768e-06, "loss": 0.3736, "step": 19448 }, { "epoch": 1.8251689189189189, "grad_norm": 0.9751488127872889, "learning_rate": 3.988530930198186e-06, "loss": 0.4439, "step": 19449 }, { "epoch": 1.8252627627627627, "grad_norm": 0.8914914673332212, "learning_rate": 3.987996258978469e-06, "loss": 0.4101, "step": 19450 }, { "epoch": 1.8253566066066065, "grad_norm": 1.0259678006066368, "learning_rate": 3.987461599824991e-06, "loss": 0.4176, "step": 19451 }, { "epoch": 1.8254504504504503, "grad_norm": 0.9182418147147229, "learning_rate": 3.986926952744129e-06, "loss": 0.3933, "step": 19452 }, { "epoch": 1.8255442942942943, "grad_norm": 2.0646384934542965, "learning_rate": 3.986392317742255e-06, "loss": 0.3426, "step": 19453 }, { "epoch": 1.8256381381381381, "grad_norm": 1.302266043974837, "learning_rate": 3.985857694825743e-06, "loss": 0.4158, "step": 19454 }, { "epoch": 1.825731981981982, "grad_norm": 0.9490237723087178, "learning_rate": 3.985323084000971e-06, "loss": 0.3731, "step": 19455 }, { "epoch": 1.825825825825826, "grad_norm": 0.8636272291123973, "learning_rate": 3.984788485274312e-06, "loss": 0.402, "step": 19456 }, { "epoch": 1.8259196696696698, "grad_norm": 0.9547736751471066, "learning_rate": 3.984253898652137e-06, "loss": 0.4211, "step": 19457 }, { "epoch": 1.8260135135135136, "grad_norm": 1.5230322241816057, "learning_rate": 3.983719324140824e-06, "loss": 0.3785, "step": 19458 }, { "epoch": 1.8261073573573574, "grad_norm": 1.1028609948071721, "learning_rate": 3.9831847617467454e-06, "loss": 0.3641, "step": 19459 }, { "epoch": 1.8262012012012012, "grad_norm": 0.9307523583872068, "learning_rate": 3.982650211476273e-06, "loss": 0.3754, "step": 19460 }, { "epoch": 1.826295045045045, "grad_norm": 1.1240654152329095, "learning_rate": 3.982115673335783e-06, "loss": 0.443, "step": 19461 }, { "epoch": 1.8263888888888888, "grad_norm": 0.94699876176877, "learning_rate": 3.981581147331648e-06, "loss": 0.3946, "step": 19462 }, { "epoch": 1.8264827327327327, "grad_norm": 0.8843139615825257, "learning_rate": 3.981046633470239e-06, "loss": 0.4062, "step": 19463 }, { "epoch": 1.8265765765765765, "grad_norm": 0.895303601035941, "learning_rate": 3.980512131757933e-06, "loss": 0.3648, "step": 19464 }, { "epoch": 1.8266704204204203, "grad_norm": 0.8462706155448366, "learning_rate": 3.979977642201101e-06, "loss": 0.4003, "step": 19465 }, { "epoch": 1.8267642642642643, "grad_norm": 0.825735646899498, "learning_rate": 3.979443164806113e-06, "loss": 0.4151, "step": 19466 }, { "epoch": 1.8268581081081081, "grad_norm": 0.8820533928430562, "learning_rate": 3.9789086995793464e-06, "loss": 0.4038, "step": 19467 }, { "epoch": 1.826951951951952, "grad_norm": 1.101627759392777, "learning_rate": 3.978374246527171e-06, "loss": 0.3859, "step": 19468 }, { "epoch": 1.827045795795796, "grad_norm": 1.0437786663933548, "learning_rate": 3.97783980565596e-06, "loss": 0.4328, "step": 19469 }, { "epoch": 1.8271396396396398, "grad_norm": 1.0333726164572334, "learning_rate": 3.977305376972085e-06, "loss": 0.3861, "step": 19470 }, { "epoch": 1.8272334834834836, "grad_norm": 0.9387940163979147, "learning_rate": 3.97677096048192e-06, "loss": 0.3808, "step": 19471 }, { "epoch": 1.8273273273273274, "grad_norm": 1.052622013850504, "learning_rate": 3.976236556191831e-06, "loss": 0.4025, "step": 19472 }, { "epoch": 1.8274211711711712, "grad_norm": 0.8932628367782601, "learning_rate": 3.975702164108197e-06, "loss": 0.3279, "step": 19473 }, { "epoch": 1.827515015015015, "grad_norm": 0.8381579732033066, "learning_rate": 3.9751677842373874e-06, "loss": 0.4022, "step": 19474 }, { "epoch": 1.8276088588588588, "grad_norm": 0.9124694370620715, "learning_rate": 3.974633416585771e-06, "loss": 0.4039, "step": 19475 }, { "epoch": 1.8277027027027026, "grad_norm": 0.9433473035959462, "learning_rate": 3.974099061159722e-06, "loss": 0.4021, "step": 19476 }, { "epoch": 1.8277965465465464, "grad_norm": 1.100656241089573, "learning_rate": 3.973564717965612e-06, "loss": 0.3696, "step": 19477 }, { "epoch": 1.8278903903903903, "grad_norm": 0.9270620455960332, "learning_rate": 3.973030387009807e-06, "loss": 0.3858, "step": 19478 }, { "epoch": 1.8279842342342343, "grad_norm": 1.0079079014056218, "learning_rate": 3.972496068298684e-06, "loss": 0.432, "step": 19479 }, { "epoch": 1.828078078078078, "grad_norm": 0.9022024800178803, "learning_rate": 3.971961761838612e-06, "loss": 0.3893, "step": 19480 }, { "epoch": 1.828171921921922, "grad_norm": 1.026601148279527, "learning_rate": 3.9714274676359595e-06, "loss": 0.36, "step": 19481 }, { "epoch": 1.8282657657657657, "grad_norm": 0.8747902639398721, "learning_rate": 3.970893185697099e-06, "loss": 0.3182, "step": 19482 }, { "epoch": 1.8283596096096097, "grad_norm": 0.9582195144445231, "learning_rate": 3.970358916028401e-06, "loss": 0.4136, "step": 19483 }, { "epoch": 1.8284534534534536, "grad_norm": 1.0711019667768145, "learning_rate": 3.969824658636232e-06, "loss": 0.3922, "step": 19484 }, { "epoch": 1.8285472972972974, "grad_norm": 0.8832501266661184, "learning_rate": 3.969290413526967e-06, "loss": 0.3918, "step": 19485 }, { "epoch": 1.8286411411411412, "grad_norm": 0.8348104561387706, "learning_rate": 3.968756180706974e-06, "loss": 0.3997, "step": 19486 }, { "epoch": 1.828734984984985, "grad_norm": 0.8285929585073293, "learning_rate": 3.968221960182622e-06, "loss": 0.3757, "step": 19487 }, { "epoch": 1.8288288288288288, "grad_norm": 0.968651934239501, "learning_rate": 3.967687751960281e-06, "loss": 0.398, "step": 19488 }, { "epoch": 1.8289226726726726, "grad_norm": 0.764278053627726, "learning_rate": 3.967153556046321e-06, "loss": 0.4008, "step": 19489 }, { "epoch": 1.8290165165165164, "grad_norm": 0.9371403670324542, "learning_rate": 3.966619372447108e-06, "loss": 0.3688, "step": 19490 }, { "epoch": 1.8291103603603602, "grad_norm": 1.3266597092857675, "learning_rate": 3.966085201169016e-06, "loss": 0.4054, "step": 19491 }, { "epoch": 1.829204204204204, "grad_norm": 0.9185163345927636, "learning_rate": 3.965551042218412e-06, "loss": 0.3924, "step": 19492 }, { "epoch": 1.829298048048048, "grad_norm": 0.9856154867353758, "learning_rate": 3.965016895601665e-06, "loss": 0.4135, "step": 19493 }, { "epoch": 1.8293918918918919, "grad_norm": 0.9125041387314632, "learning_rate": 3.9644827613251424e-06, "loss": 0.4119, "step": 19494 }, { "epoch": 1.8294857357357357, "grad_norm": 1.1677922077077125, "learning_rate": 3.963948639395215e-06, "loss": 0.4352, "step": 19495 }, { "epoch": 1.8295795795795797, "grad_norm": 1.5442714473721497, "learning_rate": 3.963414529818247e-06, "loss": 0.3604, "step": 19496 }, { "epoch": 1.8296734234234235, "grad_norm": 0.966464130985582, "learning_rate": 3.962880432600612e-06, "loss": 0.4267, "step": 19497 }, { "epoch": 1.8297672672672673, "grad_norm": 0.9133447357046355, "learning_rate": 3.962346347748675e-06, "loss": 0.3856, "step": 19498 }, { "epoch": 1.8298611111111112, "grad_norm": 0.9917063682643014, "learning_rate": 3.961812275268805e-06, "loss": 0.3816, "step": 19499 }, { "epoch": 1.829954954954955, "grad_norm": 1.0132037573646528, "learning_rate": 3.961278215167369e-06, "loss": 0.4132, "step": 19500 }, { "epoch": 1.8300487987987988, "grad_norm": 1.2585887294413314, "learning_rate": 3.9607441674507366e-06, "loss": 0.3681, "step": 19501 }, { "epoch": 1.8301426426426426, "grad_norm": 0.9051523619954885, "learning_rate": 3.960210132125271e-06, "loss": 0.4068, "step": 19502 }, { "epoch": 1.8302364864864864, "grad_norm": 0.7964887481097179, "learning_rate": 3.959676109197344e-06, "loss": 0.4414, "step": 19503 }, { "epoch": 1.8303303303303302, "grad_norm": 0.9691396786577147, "learning_rate": 3.959142098673322e-06, "loss": 0.3701, "step": 19504 }, { "epoch": 1.830424174174174, "grad_norm": 0.8546026841079682, "learning_rate": 3.958608100559569e-06, "loss": 0.3535, "step": 19505 }, { "epoch": 1.830518018018018, "grad_norm": 0.8807099167258927, "learning_rate": 3.958074114862456e-06, "loss": 0.4113, "step": 19506 }, { "epoch": 1.8306118618618619, "grad_norm": 0.9629260971535145, "learning_rate": 3.957540141588348e-06, "loss": 0.3972, "step": 19507 }, { "epoch": 1.8307057057057057, "grad_norm": 0.90672047230625, "learning_rate": 3.95700618074361e-06, "loss": 0.3841, "step": 19508 }, { "epoch": 1.8307995495495497, "grad_norm": 0.9383541320878983, "learning_rate": 3.956472232334611e-06, "loss": 0.4338, "step": 19509 }, { "epoch": 1.8308933933933935, "grad_norm": 0.9103198516466927, "learning_rate": 3.955938296367717e-06, "loss": 0.3783, "step": 19510 }, { "epoch": 1.8309872372372373, "grad_norm": 1.3271740056320465, "learning_rate": 3.9554043728492925e-06, "loss": 0.4147, "step": 19511 }, { "epoch": 1.8310810810810811, "grad_norm": 1.0367298931670377, "learning_rate": 3.9548704617857056e-06, "loss": 0.4059, "step": 19512 }, { "epoch": 1.831174924924925, "grad_norm": 1.019841011328121, "learning_rate": 3.954336563183321e-06, "loss": 0.4265, "step": 19513 }, { "epoch": 1.8312687687687688, "grad_norm": 1.373060641207112, "learning_rate": 3.953802677048503e-06, "loss": 0.4019, "step": 19514 }, { "epoch": 1.8313626126126126, "grad_norm": 0.9638216952556891, "learning_rate": 3.953268803387621e-06, "loss": 0.3684, "step": 19515 }, { "epoch": 1.8314564564564564, "grad_norm": 0.9558697578241067, "learning_rate": 3.952734942207038e-06, "loss": 0.3805, "step": 19516 }, { "epoch": 1.8315503003003002, "grad_norm": 0.9414809327889717, "learning_rate": 3.952201093513119e-06, "loss": 0.3927, "step": 19517 }, { "epoch": 1.831644144144144, "grad_norm": 1.028032152706693, "learning_rate": 3.95166725731223e-06, "loss": 0.3816, "step": 19518 }, { "epoch": 1.831737987987988, "grad_norm": 0.8371384756135863, "learning_rate": 3.951133433610735e-06, "loss": 0.3757, "step": 19519 }, { "epoch": 1.8318318318318318, "grad_norm": 1.1509614293480401, "learning_rate": 3.950599622414998e-06, "loss": 0.4391, "step": 19520 }, { "epoch": 1.8319256756756757, "grad_norm": 1.0192602658524512, "learning_rate": 3.9500658237313874e-06, "loss": 0.4177, "step": 19521 }, { "epoch": 1.8320195195195195, "grad_norm": 0.934350107788797, "learning_rate": 3.949532037566266e-06, "loss": 0.3823, "step": 19522 }, { "epoch": 1.8321133633633635, "grad_norm": 0.8452102870671443, "learning_rate": 3.948998263925997e-06, "loss": 0.393, "step": 19523 }, { "epoch": 1.8322072072072073, "grad_norm": 0.8879893602531735, "learning_rate": 3.948464502816945e-06, "loss": 0.4245, "step": 19524 }, { "epoch": 1.8323010510510511, "grad_norm": 1.4090112233749745, "learning_rate": 3.9479307542454755e-06, "loss": 0.3875, "step": 19525 }, { "epoch": 1.832394894894895, "grad_norm": 0.9386746137868134, "learning_rate": 3.94739701821795e-06, "loss": 0.4455, "step": 19526 }, { "epoch": 1.8324887387387387, "grad_norm": 0.8775701995064197, "learning_rate": 3.946863294740735e-06, "loss": 0.3979, "step": 19527 }, { "epoch": 1.8325825825825826, "grad_norm": 0.9988348191691672, "learning_rate": 3.946329583820192e-06, "loss": 0.3951, "step": 19528 }, { "epoch": 1.8326764264264264, "grad_norm": 0.9698348359016356, "learning_rate": 3.945795885462686e-06, "loss": 0.398, "step": 19529 }, { "epoch": 1.8327702702702702, "grad_norm": 1.0125902605449781, "learning_rate": 3.9452621996745795e-06, "loss": 0.3795, "step": 19530 }, { "epoch": 1.832864114114114, "grad_norm": 1.036425199402722, "learning_rate": 3.944728526462236e-06, "loss": 0.322, "step": 19531 }, { "epoch": 1.8329579579579578, "grad_norm": 0.9959129053591257, "learning_rate": 3.944194865832017e-06, "loss": 0.3486, "step": 19532 }, { "epoch": 1.8330518018018018, "grad_norm": 1.7386751862817613, "learning_rate": 3.943661217790289e-06, "loss": 0.4287, "step": 19533 }, { "epoch": 1.8331456456456456, "grad_norm": 1.7381044680766462, "learning_rate": 3.9431275823434125e-06, "loss": 0.4403, "step": 19534 }, { "epoch": 1.8332394894894894, "grad_norm": 1.1146409055387687, "learning_rate": 3.9425939594977496e-06, "loss": 0.392, "step": 19535 }, { "epoch": 1.8333333333333335, "grad_norm": 0.9009116354613692, "learning_rate": 3.942060349259664e-06, "loss": 0.3658, "step": 19536 }, { "epoch": 1.8334271771771773, "grad_norm": 0.9614236968438915, "learning_rate": 3.941526751635517e-06, "loss": 0.3676, "step": 19537 }, { "epoch": 1.833521021021021, "grad_norm": 0.935167944515293, "learning_rate": 3.940993166631669e-06, "loss": 0.3608, "step": 19538 }, { "epoch": 1.833614864864865, "grad_norm": 0.878920221731276, "learning_rate": 3.940459594254486e-06, "loss": 0.3843, "step": 19539 }, { "epoch": 1.8337087087087087, "grad_norm": 0.8908532671739862, "learning_rate": 3.939926034510329e-06, "loss": 0.3956, "step": 19540 }, { "epoch": 1.8338025525525525, "grad_norm": 3.158030656238927, "learning_rate": 3.939392487405557e-06, "loss": 0.4278, "step": 19541 }, { "epoch": 1.8338963963963963, "grad_norm": 0.8912305442095485, "learning_rate": 3.938858952946535e-06, "loss": 0.4014, "step": 19542 }, { "epoch": 1.8339902402402402, "grad_norm": 0.9020350738206264, "learning_rate": 3.938325431139621e-06, "loss": 0.3926, "step": 19543 }, { "epoch": 1.834084084084084, "grad_norm": 1.0988628888960814, "learning_rate": 3.937791921991177e-06, "loss": 0.3539, "step": 19544 }, { "epoch": 1.8341779279279278, "grad_norm": 1.0374117006865717, "learning_rate": 3.9372584255075665e-06, "loss": 0.4219, "step": 19545 }, { "epoch": 1.8342717717717718, "grad_norm": 0.9868322152506932, "learning_rate": 3.9367249416951494e-06, "loss": 0.3989, "step": 19546 }, { "epoch": 1.8343656156156156, "grad_norm": 1.030326312823524, "learning_rate": 3.936191470560285e-06, "loss": 0.4061, "step": 19547 }, { "epoch": 1.8344594594594594, "grad_norm": 0.8812749439857646, "learning_rate": 3.935658012109335e-06, "loss": 0.4053, "step": 19548 }, { "epoch": 1.8345533033033035, "grad_norm": 0.8793740371099003, "learning_rate": 3.93512456634866e-06, "loss": 0.386, "step": 19549 }, { "epoch": 1.8346471471471473, "grad_norm": 0.9504665847330845, "learning_rate": 3.934591133284619e-06, "loss": 0.3961, "step": 19550 }, { "epoch": 1.834740990990991, "grad_norm": 0.9999291271512276, "learning_rate": 3.934057712923574e-06, "loss": 0.4018, "step": 19551 }, { "epoch": 1.834834834834835, "grad_norm": 0.984823317125913, "learning_rate": 3.933524305271885e-06, "loss": 0.4113, "step": 19552 }, { "epoch": 1.8349286786786787, "grad_norm": 0.9637631197327539, "learning_rate": 3.932990910335911e-06, "loss": 0.3862, "step": 19553 }, { "epoch": 1.8350225225225225, "grad_norm": 0.9196149456658202, "learning_rate": 3.932457528122012e-06, "loss": 0.3788, "step": 19554 }, { "epoch": 1.8351163663663663, "grad_norm": 0.9648018572194669, "learning_rate": 3.931924158636547e-06, "loss": 0.3881, "step": 19555 }, { "epoch": 1.8352102102102101, "grad_norm": 1.2453086969871605, "learning_rate": 3.9313908018858755e-06, "loss": 0.4351, "step": 19556 }, { "epoch": 1.835304054054054, "grad_norm": 1.3237853184795052, "learning_rate": 3.930857457876358e-06, "loss": 0.3681, "step": 19557 }, { "epoch": 1.8353978978978978, "grad_norm": 0.9430282378953799, "learning_rate": 3.930324126614354e-06, "loss": 0.4203, "step": 19558 }, { "epoch": 1.8354917417417418, "grad_norm": 0.9706805954559216, "learning_rate": 3.929790808106219e-06, "loss": 0.4088, "step": 19559 }, { "epoch": 1.8355855855855856, "grad_norm": 1.2256553960862717, "learning_rate": 3.929257502358316e-06, "loss": 0.3474, "step": 19560 }, { "epoch": 1.8356794294294294, "grad_norm": 0.9020597109263683, "learning_rate": 3.928724209377002e-06, "loss": 0.3671, "step": 19561 }, { "epoch": 1.8357732732732732, "grad_norm": 1.0020210333795339, "learning_rate": 3.928190929168633e-06, "loss": 0.4309, "step": 19562 }, { "epoch": 1.8358671171171173, "grad_norm": 0.9471761701835001, "learning_rate": 3.927657661739571e-06, "loss": 0.3617, "step": 19563 }, { "epoch": 1.835960960960961, "grad_norm": 0.8394082265453713, "learning_rate": 3.927124407096174e-06, "loss": 0.3785, "step": 19564 }, { "epoch": 1.8360548048048049, "grad_norm": 0.8791677752727446, "learning_rate": 3.926591165244797e-06, "loss": 0.3934, "step": 19565 }, { "epoch": 1.8361486486486487, "grad_norm": 0.875902055731251, "learning_rate": 3.9260579361918024e-06, "loss": 0.367, "step": 19566 }, { "epoch": 1.8362424924924925, "grad_norm": 0.8331567705881022, "learning_rate": 3.925524719943544e-06, "loss": 0.3695, "step": 19567 }, { "epoch": 1.8363363363363363, "grad_norm": 1.1820361278951232, "learning_rate": 3.924991516506379e-06, "loss": 0.3787, "step": 19568 }, { "epoch": 1.8364301801801801, "grad_norm": 0.9436685386322395, "learning_rate": 3.924458325886669e-06, "loss": 0.3884, "step": 19569 }, { "epoch": 1.836524024024024, "grad_norm": 0.9115382564983273, "learning_rate": 3.923925148090769e-06, "loss": 0.4127, "step": 19570 }, { "epoch": 1.8366178678678677, "grad_norm": 0.8794468331946498, "learning_rate": 3.923391983125034e-06, "loss": 0.4009, "step": 19571 }, { "epoch": 1.8367117117117115, "grad_norm": 0.9868734088192912, "learning_rate": 3.9228588309958246e-06, "loss": 0.3901, "step": 19572 }, { "epoch": 1.8368055555555556, "grad_norm": 1.1041173162935711, "learning_rate": 3.922325691709497e-06, "loss": 0.4203, "step": 19573 }, { "epoch": 1.8368993993993994, "grad_norm": 0.9332461007033857, "learning_rate": 3.921792565272406e-06, "loss": 0.3881, "step": 19574 }, { "epoch": 1.8369932432432432, "grad_norm": 1.0898950768438826, "learning_rate": 3.92125945169091e-06, "loss": 0.4241, "step": 19575 }, { "epoch": 1.8370870870870872, "grad_norm": 0.974055723904483, "learning_rate": 3.9207263509713635e-06, "loss": 0.4125, "step": 19576 }, { "epoch": 1.837180930930931, "grad_norm": 0.9249856124322081, "learning_rate": 3.920193263120123e-06, "loss": 0.3569, "step": 19577 }, { "epoch": 1.8372747747747749, "grad_norm": 0.8764410684460929, "learning_rate": 3.919660188143547e-06, "loss": 0.3668, "step": 19578 }, { "epoch": 1.8373686186186187, "grad_norm": 0.8997406504733692, "learning_rate": 3.919127126047989e-06, "loss": 0.3564, "step": 19579 }, { "epoch": 1.8374624624624625, "grad_norm": 0.9164525782269347, "learning_rate": 3.918594076839805e-06, "loss": 0.4121, "step": 19580 }, { "epoch": 1.8375563063063063, "grad_norm": 0.9549908728803376, "learning_rate": 3.918061040525353e-06, "loss": 0.3885, "step": 19581 }, { "epoch": 1.83765015015015, "grad_norm": 0.9737520363237635, "learning_rate": 3.9175280171109855e-06, "loss": 0.4025, "step": 19582 }, { "epoch": 1.837743993993994, "grad_norm": 1.4333407599306982, "learning_rate": 3.916995006603057e-06, "loss": 0.4039, "step": 19583 }, { "epoch": 1.8378378378378377, "grad_norm": 1.0233376410518975, "learning_rate": 3.916462009007927e-06, "loss": 0.3976, "step": 19584 }, { "epoch": 1.8379316816816815, "grad_norm": 0.929469946437895, "learning_rate": 3.915929024331947e-06, "loss": 0.4074, "step": 19585 }, { "epoch": 1.8380255255255256, "grad_norm": 0.8662727059568021, "learning_rate": 3.915396052581472e-06, "loss": 0.3664, "step": 19586 }, { "epoch": 1.8381193693693694, "grad_norm": 0.9053983771344069, "learning_rate": 3.914863093762859e-06, "loss": 0.3643, "step": 19587 }, { "epoch": 1.8382132132132132, "grad_norm": 1.1764181845903665, "learning_rate": 3.914330147882461e-06, "loss": 0.3684, "step": 19588 }, { "epoch": 1.8383070570570572, "grad_norm": 0.9071612854167374, "learning_rate": 3.9137972149466304e-06, "loss": 0.3993, "step": 19589 }, { "epoch": 1.838400900900901, "grad_norm": 1.2059806033683835, "learning_rate": 3.913264294961725e-06, "loss": 0.3728, "step": 19590 }, { "epoch": 1.8384947447447448, "grad_norm": 0.9413331254623576, "learning_rate": 3.912731387934098e-06, "loss": 0.412, "step": 19591 }, { "epoch": 1.8385885885885886, "grad_norm": 1.0364159852778903, "learning_rate": 3.912198493870101e-06, "loss": 0.3953, "step": 19592 }, { "epoch": 1.8386824324324325, "grad_norm": 1.1751432413910041, "learning_rate": 3.911665612776091e-06, "loss": 0.3706, "step": 19593 }, { "epoch": 1.8387762762762763, "grad_norm": 0.901597279254587, "learning_rate": 3.91113274465842e-06, "loss": 0.4296, "step": 19594 }, { "epoch": 1.83887012012012, "grad_norm": 0.8681286463944541, "learning_rate": 3.9105998895234395e-06, "loss": 0.3912, "step": 19595 }, { "epoch": 1.8389639639639639, "grad_norm": 1.0543934010627827, "learning_rate": 3.910067047377506e-06, "loss": 0.3802, "step": 19596 }, { "epoch": 1.8390578078078077, "grad_norm": 1.0500706573189464, "learning_rate": 3.909534218226972e-06, "loss": 0.3923, "step": 19597 }, { "epoch": 1.8391516516516515, "grad_norm": 0.9248295219536019, "learning_rate": 3.909001402078189e-06, "loss": 0.3833, "step": 19598 }, { "epoch": 1.8392454954954955, "grad_norm": 0.9012487718708666, "learning_rate": 3.90846859893751e-06, "loss": 0.4206, "step": 19599 }, { "epoch": 1.8393393393393394, "grad_norm": 0.8716906861849438, "learning_rate": 3.9079358088112905e-06, "loss": 0.3301, "step": 19600 }, { "epoch": 1.8394331831831832, "grad_norm": 0.9436288394313146, "learning_rate": 3.907403031705877e-06, "loss": 0.4026, "step": 19601 }, { "epoch": 1.839527027027027, "grad_norm": 1.0042826172524264, "learning_rate": 3.90687026762763e-06, "loss": 0.4265, "step": 19602 }, { "epoch": 1.839620870870871, "grad_norm": 1.3784594712548763, "learning_rate": 3.906337516582896e-06, "loss": 0.4186, "step": 19603 }, { "epoch": 1.8397147147147148, "grad_norm": 1.8247804419467668, "learning_rate": 3.905804778578028e-06, "loss": 0.3961, "step": 19604 }, { "epoch": 1.8398085585585586, "grad_norm": 1.0391223980204565, "learning_rate": 3.905272053619379e-06, "loss": 0.3613, "step": 19605 }, { "epoch": 1.8399024024024024, "grad_norm": 1.2203623701811603, "learning_rate": 3.9047393417133e-06, "loss": 0.3883, "step": 19606 }, { "epoch": 1.8399962462462462, "grad_norm": 1.126356464086461, "learning_rate": 3.904206642866141e-06, "loss": 0.4072, "step": 19607 }, { "epoch": 1.84009009009009, "grad_norm": 0.8553802987049196, "learning_rate": 3.903673957084258e-06, "loss": 0.3672, "step": 19608 }, { "epoch": 1.8401839339339339, "grad_norm": 0.8250867480582017, "learning_rate": 3.903141284373998e-06, "loss": 0.3658, "step": 19609 }, { "epoch": 1.8402777777777777, "grad_norm": 1.2148294909675128, "learning_rate": 3.902608624741713e-06, "loss": 0.4002, "step": 19610 }, { "epoch": 1.8403716216216215, "grad_norm": 1.004204957697038, "learning_rate": 3.9020759781937564e-06, "loss": 0.4567, "step": 19611 }, { "epoch": 1.8404654654654653, "grad_norm": 0.8549839108675605, "learning_rate": 3.901543344736476e-06, "loss": 0.3891, "step": 19612 }, { "epoch": 1.8405593093093093, "grad_norm": 0.8231027725712831, "learning_rate": 3.9010107243762215e-06, "loss": 0.393, "step": 19613 }, { "epoch": 1.8406531531531531, "grad_norm": 1.3593598249146512, "learning_rate": 3.900478117119348e-06, "loss": 0.3761, "step": 19614 }, { "epoch": 1.840746996996997, "grad_norm": 1.0226740859154073, "learning_rate": 3.8999455229722035e-06, "loss": 0.3893, "step": 19615 }, { "epoch": 1.840840840840841, "grad_norm": 1.0399110899597774, "learning_rate": 3.899412941941137e-06, "loss": 0.4194, "step": 19616 }, { "epoch": 1.8409346846846848, "grad_norm": 1.1045418513099425, "learning_rate": 3.898880374032501e-06, "loss": 0.4036, "step": 19617 }, { "epoch": 1.8410285285285286, "grad_norm": 1.0260049945572327, "learning_rate": 3.898347819252643e-06, "loss": 0.3944, "step": 19618 }, { "epoch": 1.8411223723723724, "grad_norm": 0.9119881848075675, "learning_rate": 3.897815277607912e-06, "loss": 0.3805, "step": 19619 }, { "epoch": 1.8412162162162162, "grad_norm": 0.9250816808228917, "learning_rate": 3.89728274910466e-06, "loss": 0.3792, "step": 19620 }, { "epoch": 1.84131006006006, "grad_norm": 0.8435319065008953, "learning_rate": 3.896750233749238e-06, "loss": 0.3591, "step": 19621 }, { "epoch": 1.8414039039039038, "grad_norm": 1.028204023775073, "learning_rate": 3.89621773154799e-06, "loss": 0.374, "step": 19622 }, { "epoch": 1.8414977477477477, "grad_norm": 1.106735235859318, "learning_rate": 3.895685242507269e-06, "loss": 0.4716, "step": 19623 }, { "epoch": 1.8415915915915915, "grad_norm": 0.9701093503235257, "learning_rate": 3.8951527666334235e-06, "loss": 0.3991, "step": 19624 }, { "epoch": 1.8416854354354353, "grad_norm": 0.8725505249925296, "learning_rate": 3.894620303932799e-06, "loss": 0.3699, "step": 19625 }, { "epoch": 1.8417792792792793, "grad_norm": 0.9570989869096779, "learning_rate": 3.8940878544117485e-06, "loss": 0.3845, "step": 19626 }, { "epoch": 1.8418731231231231, "grad_norm": 1.9929727944074143, "learning_rate": 3.893555418076619e-06, "loss": 0.4002, "step": 19627 }, { "epoch": 1.841966966966967, "grad_norm": 0.9629833750698606, "learning_rate": 3.893022994933758e-06, "loss": 0.4079, "step": 19628 }, { "epoch": 1.842060810810811, "grad_norm": 0.8807587692866014, "learning_rate": 3.892490584989514e-06, "loss": 0.3797, "step": 19629 }, { "epoch": 1.8421546546546548, "grad_norm": 0.8644853533896663, "learning_rate": 3.891958188250237e-06, "loss": 0.4089, "step": 19630 }, { "epoch": 1.8422484984984986, "grad_norm": 1.0361667356993876, "learning_rate": 3.891425804722269e-06, "loss": 0.412, "step": 19631 }, { "epoch": 1.8423423423423424, "grad_norm": 0.954657709687576, "learning_rate": 3.890893434411965e-06, "loss": 0.3673, "step": 19632 }, { "epoch": 1.8424361861861862, "grad_norm": 0.9407358234994934, "learning_rate": 3.890361077325668e-06, "loss": 0.41, "step": 19633 }, { "epoch": 1.84253003003003, "grad_norm": 0.9334919487795564, "learning_rate": 3.889828733469726e-06, "loss": 0.3943, "step": 19634 }, { "epoch": 1.8426238738738738, "grad_norm": 0.9315342435087608, "learning_rate": 3.889296402850488e-06, "loss": 0.4356, "step": 19635 }, { "epoch": 1.8427177177177176, "grad_norm": 0.960404157470951, "learning_rate": 3.8887640854742995e-06, "loss": 0.3748, "step": 19636 }, { "epoch": 1.8428115615615615, "grad_norm": 0.9753386998267779, "learning_rate": 3.888231781347505e-06, "loss": 0.4436, "step": 19637 }, { "epoch": 1.8429054054054053, "grad_norm": 0.9190234032773702, "learning_rate": 3.887699490476456e-06, "loss": 0.3767, "step": 19638 }, { "epoch": 1.8429992492492493, "grad_norm": 0.9285753515758103, "learning_rate": 3.887167212867497e-06, "loss": 0.4054, "step": 19639 }, { "epoch": 1.843093093093093, "grad_norm": 0.9088408548019187, "learning_rate": 3.886634948526973e-06, "loss": 0.3811, "step": 19640 }, { "epoch": 1.843186936936937, "grad_norm": 0.9056039287761337, "learning_rate": 3.886102697461233e-06, "loss": 0.3899, "step": 19641 }, { "epoch": 1.8432807807807807, "grad_norm": 0.8408328258666083, "learning_rate": 3.885570459676622e-06, "loss": 0.3641, "step": 19642 }, { "epoch": 1.8433746246246248, "grad_norm": 0.9689889165643957, "learning_rate": 3.885038235179483e-06, "loss": 0.4283, "step": 19643 }, { "epoch": 1.8434684684684686, "grad_norm": 0.954712379940996, "learning_rate": 3.884506023976167e-06, "loss": 0.3692, "step": 19644 }, { "epoch": 1.8435623123123124, "grad_norm": 0.8951553912027839, "learning_rate": 3.883973826073016e-06, "loss": 0.3716, "step": 19645 }, { "epoch": 1.8436561561561562, "grad_norm": 0.7958115900182466, "learning_rate": 3.883441641476376e-06, "loss": 0.3598, "step": 19646 }, { "epoch": 1.84375, "grad_norm": 1.0067127258856516, "learning_rate": 3.882909470192594e-06, "loss": 0.3968, "step": 19647 }, { "epoch": 1.8438438438438438, "grad_norm": 0.9261298222589478, "learning_rate": 3.882377312228013e-06, "loss": 0.4225, "step": 19648 }, { "epoch": 1.8439376876876876, "grad_norm": 0.9283425775659204, "learning_rate": 3.881845167588977e-06, "loss": 0.4092, "step": 19649 }, { "epoch": 1.8440315315315314, "grad_norm": 0.8919682363637969, "learning_rate": 3.881313036281835e-06, "loss": 0.371, "step": 19650 }, { "epoch": 1.8441253753753752, "grad_norm": 0.9128355726924255, "learning_rate": 3.880780918312929e-06, "loss": 0.3987, "step": 19651 }, { "epoch": 1.8442192192192193, "grad_norm": 0.8539104355185706, "learning_rate": 3.8802488136886036e-06, "loss": 0.332, "step": 19652 }, { "epoch": 1.844313063063063, "grad_norm": 1.1325514740084492, "learning_rate": 3.8797167224152035e-06, "loss": 0.3774, "step": 19653 }, { "epoch": 1.844406906906907, "grad_norm": 1.0253119876204395, "learning_rate": 3.879184644499073e-06, "loss": 0.4416, "step": 19654 }, { "epoch": 1.8445007507507507, "grad_norm": 0.9618273109493396, "learning_rate": 3.878652579946555e-06, "loss": 0.3981, "step": 19655 }, { "epoch": 1.8445945945945947, "grad_norm": 2.7275494239711877, "learning_rate": 3.878120528763995e-06, "loss": 0.3773, "step": 19656 }, { "epoch": 1.8446884384384385, "grad_norm": 1.0252771313332232, "learning_rate": 3.877588490957736e-06, "loss": 0.3635, "step": 19657 }, { "epoch": 1.8447822822822824, "grad_norm": 1.414925112267178, "learning_rate": 3.877056466534121e-06, "loss": 0.3868, "step": 19658 }, { "epoch": 1.8448761261261262, "grad_norm": 0.8628121493301252, "learning_rate": 3.876524455499495e-06, "loss": 0.4197, "step": 19659 }, { "epoch": 1.84496996996997, "grad_norm": 0.93356581514172, "learning_rate": 3.8759924578602e-06, "loss": 0.3237, "step": 19660 }, { "epoch": 1.8450638138138138, "grad_norm": 0.8576828676970881, "learning_rate": 3.8754604736225765e-06, "loss": 0.3694, "step": 19661 }, { "epoch": 1.8451576576576576, "grad_norm": 1.0593863451590668, "learning_rate": 3.874928502792973e-06, "loss": 0.411, "step": 19662 }, { "epoch": 1.8452515015015014, "grad_norm": 1.0592361937819754, "learning_rate": 3.874396545377729e-06, "loss": 0.4231, "step": 19663 }, { "epoch": 1.8453453453453452, "grad_norm": 0.8834525322658747, "learning_rate": 3.873864601383187e-06, "loss": 0.3598, "step": 19664 }, { "epoch": 1.845439189189189, "grad_norm": 0.8742995768936774, "learning_rate": 3.87333267081569e-06, "loss": 0.3928, "step": 19665 }, { "epoch": 1.845533033033033, "grad_norm": 1.0188148777887671, "learning_rate": 3.872800753681581e-06, "loss": 0.3958, "step": 19666 }, { "epoch": 1.8456268768768769, "grad_norm": 0.943856595773071, "learning_rate": 3.872268849987198e-06, "loss": 0.4171, "step": 19667 }, { "epoch": 1.8457207207207207, "grad_norm": 1.0578229598696798, "learning_rate": 3.87173695973889e-06, "loss": 0.3737, "step": 19668 }, { "epoch": 1.8458145645645647, "grad_norm": 0.9524136990127815, "learning_rate": 3.871205082942994e-06, "loss": 0.3887, "step": 19669 }, { "epoch": 1.8459084084084085, "grad_norm": 0.9382148834421937, "learning_rate": 3.870673219605852e-06, "loss": 0.4172, "step": 19670 }, { "epoch": 1.8460022522522523, "grad_norm": 0.9284271268092713, "learning_rate": 3.870141369733807e-06, "loss": 0.4016, "step": 19671 }, { "epoch": 1.8460960960960962, "grad_norm": 0.9894410900829669, "learning_rate": 3.869609533333199e-06, "loss": 0.4214, "step": 19672 }, { "epoch": 1.84618993993994, "grad_norm": 0.8255925452881631, "learning_rate": 3.869077710410368e-06, "loss": 0.3673, "step": 19673 }, { "epoch": 1.8462837837837838, "grad_norm": 0.9699890524158427, "learning_rate": 3.868545900971659e-06, "loss": 0.405, "step": 19674 }, { "epoch": 1.8463776276276276, "grad_norm": 1.0409270516146774, "learning_rate": 3.868014105023409e-06, "loss": 0.4301, "step": 19675 }, { "epoch": 1.8464714714714714, "grad_norm": 0.9188843820723673, "learning_rate": 3.8674823225719605e-06, "loss": 0.4136, "step": 19676 }, { "epoch": 1.8465653153153152, "grad_norm": 0.9761578343930419, "learning_rate": 3.866950553623654e-06, "loss": 0.3678, "step": 19677 }, { "epoch": 1.846659159159159, "grad_norm": 0.9022343132029524, "learning_rate": 3.866418798184829e-06, "loss": 0.4052, "step": 19678 }, { "epoch": 1.846753003003003, "grad_norm": 0.8506897568788756, "learning_rate": 3.865887056261825e-06, "loss": 0.4017, "step": 19679 }, { "epoch": 1.8468468468468469, "grad_norm": 0.972795654627213, "learning_rate": 3.865355327860984e-06, "loss": 0.38, "step": 19680 }, { "epoch": 1.8469406906906907, "grad_norm": 0.9261015871004892, "learning_rate": 3.864823612988645e-06, "loss": 0.4354, "step": 19681 }, { "epoch": 1.8470345345345347, "grad_norm": 0.910581563064193, "learning_rate": 3.864291911651148e-06, "loss": 0.4197, "step": 19682 }, { "epoch": 1.8471283783783785, "grad_norm": 1.1651637814488127, "learning_rate": 3.863760223854832e-06, "loss": 0.3541, "step": 19683 }, { "epoch": 1.8472222222222223, "grad_norm": 0.9545936497747433, "learning_rate": 3.8632285496060375e-06, "loss": 0.376, "step": 19684 }, { "epoch": 1.8473160660660661, "grad_norm": 0.9612248109749053, "learning_rate": 3.8626968889111e-06, "loss": 0.4504, "step": 19685 }, { "epoch": 1.84740990990991, "grad_norm": 0.9720342206794713, "learning_rate": 3.862165241776363e-06, "loss": 0.4056, "step": 19686 }, { "epoch": 1.8475037537537538, "grad_norm": 0.8669965438211049, "learning_rate": 3.8616336082081655e-06, "loss": 0.3872, "step": 19687 }, { "epoch": 1.8475975975975976, "grad_norm": 0.9241734971184472, "learning_rate": 3.861101988212841e-06, "loss": 0.3737, "step": 19688 }, { "epoch": 1.8476914414414414, "grad_norm": 1.0123645407099036, "learning_rate": 3.860570381796734e-06, "loss": 0.3745, "step": 19689 }, { "epoch": 1.8477852852852852, "grad_norm": 0.9972212542571899, "learning_rate": 3.860038788966181e-06, "loss": 0.408, "step": 19690 }, { "epoch": 1.847879129129129, "grad_norm": 0.9662447113073213, "learning_rate": 3.859507209727517e-06, "loss": 0.3924, "step": 19691 }, { "epoch": 1.847972972972973, "grad_norm": 1.1343708086112838, "learning_rate": 3.858975644087084e-06, "loss": 0.4224, "step": 19692 }, { "epoch": 1.8480668168168168, "grad_norm": 0.9096528668211209, "learning_rate": 3.85844409205122e-06, "loss": 0.3999, "step": 19693 }, { "epoch": 1.8481606606606606, "grad_norm": 0.9760482104734406, "learning_rate": 3.857912553626259e-06, "loss": 0.3284, "step": 19694 }, { "epoch": 1.8482545045045045, "grad_norm": 0.8451839812362538, "learning_rate": 3.857381028818543e-06, "loss": 0.3837, "step": 19695 }, { "epoch": 1.8483483483483485, "grad_norm": 0.9259096712113178, "learning_rate": 3.856849517634408e-06, "loss": 0.4129, "step": 19696 }, { "epoch": 1.8484421921921923, "grad_norm": 1.2079520290213686, "learning_rate": 3.856318020080189e-06, "loss": 0.3626, "step": 19697 }, { "epoch": 1.8485360360360361, "grad_norm": 1.0496724140496867, "learning_rate": 3.855786536162225e-06, "loss": 0.4011, "step": 19698 }, { "epoch": 1.84862987987988, "grad_norm": 0.9206881525089574, "learning_rate": 3.855255065886855e-06, "loss": 0.3855, "step": 19699 }, { "epoch": 1.8487237237237237, "grad_norm": 0.9394760511449781, "learning_rate": 3.854723609260409e-06, "loss": 0.401, "step": 19700 }, { "epoch": 1.8488175675675675, "grad_norm": 0.9381667086976029, "learning_rate": 3.854192166289232e-06, "loss": 0.4018, "step": 19701 }, { "epoch": 1.8489114114114114, "grad_norm": 0.9504079011236728, "learning_rate": 3.853660736979656e-06, "loss": 0.4026, "step": 19702 }, { "epoch": 1.8490052552552552, "grad_norm": 0.8732507077055525, "learning_rate": 3.853129321338017e-06, "loss": 0.3851, "step": 19703 }, { "epoch": 1.849099099099099, "grad_norm": 0.895197598084215, "learning_rate": 3.852597919370654e-06, "loss": 0.4065, "step": 19704 }, { "epoch": 1.8491929429429428, "grad_norm": 0.9342327221506271, "learning_rate": 3.8520665310839e-06, "loss": 0.3829, "step": 19705 }, { "epoch": 1.8492867867867868, "grad_norm": 1.0089569036804267, "learning_rate": 3.85153515648409e-06, "loss": 0.3909, "step": 19706 }, { "epoch": 1.8493806306306306, "grad_norm": 0.9058155150247901, "learning_rate": 3.851003795577563e-06, "loss": 0.3393, "step": 19707 }, { "epoch": 1.8494744744744744, "grad_norm": 0.8949778924460473, "learning_rate": 3.850472448370655e-06, "loss": 0.3643, "step": 19708 }, { "epoch": 1.8495683183183185, "grad_norm": 0.875932353152591, "learning_rate": 3.849941114869696e-06, "loss": 0.3588, "step": 19709 }, { "epoch": 1.8496621621621623, "grad_norm": 1.0851756887723363, "learning_rate": 3.849409795081027e-06, "loss": 0.4264, "step": 19710 }, { "epoch": 1.849756006006006, "grad_norm": 1.099157424757645, "learning_rate": 3.84887848901098e-06, "loss": 0.3747, "step": 19711 }, { "epoch": 1.84984984984985, "grad_norm": 1.9143493558709963, "learning_rate": 3.848347196665888e-06, "loss": 0.4077, "step": 19712 }, { "epoch": 1.8499436936936937, "grad_norm": 0.9567688896699896, "learning_rate": 3.847815918052089e-06, "loss": 0.3689, "step": 19713 }, { "epoch": 1.8500375375375375, "grad_norm": 1.1718865735700574, "learning_rate": 3.8472846531759174e-06, "loss": 0.4301, "step": 19714 }, { "epoch": 1.8501313813813813, "grad_norm": 0.9235566433451369, "learning_rate": 3.846753402043706e-06, "loss": 0.4279, "step": 19715 }, { "epoch": 1.8502252252252251, "grad_norm": 0.9295844798398162, "learning_rate": 3.84622216466179e-06, "loss": 0.4187, "step": 19716 }, { "epoch": 1.850319069069069, "grad_norm": 0.959452557274758, "learning_rate": 3.845690941036503e-06, "loss": 0.3999, "step": 19717 }, { "epoch": 1.8504129129129128, "grad_norm": 0.8788632749560172, "learning_rate": 3.845159731174177e-06, "loss": 0.4099, "step": 19718 }, { "epoch": 1.8505067567567568, "grad_norm": 0.8689105099855794, "learning_rate": 3.8446285350811494e-06, "loss": 0.3843, "step": 19719 }, { "epoch": 1.8506006006006006, "grad_norm": 0.9434646337714484, "learning_rate": 3.844097352763752e-06, "loss": 0.3625, "step": 19720 }, { "epoch": 1.8506944444444444, "grad_norm": 0.8773902428871975, "learning_rate": 3.843566184228315e-06, "loss": 0.4066, "step": 19721 }, { "epoch": 1.8507882882882885, "grad_norm": 1.0054187098192595, "learning_rate": 3.843035029481178e-06, "loss": 0.3952, "step": 19722 }, { "epoch": 1.8508821321321323, "grad_norm": 0.98721434457555, "learning_rate": 3.8425038885286694e-06, "loss": 0.423, "step": 19723 }, { "epoch": 1.850975975975976, "grad_norm": 1.0120674833113898, "learning_rate": 3.841972761377121e-06, "loss": 0.3616, "step": 19724 }, { "epoch": 1.8510698198198199, "grad_norm": 2.2318708762456834, "learning_rate": 3.84144164803287e-06, "loss": 0.383, "step": 19725 }, { "epoch": 1.8511636636636637, "grad_norm": 1.1729613001119308, "learning_rate": 3.840910548502246e-06, "loss": 0.4109, "step": 19726 }, { "epoch": 1.8512575075075075, "grad_norm": 0.9843721915633074, "learning_rate": 3.840379462791581e-06, "loss": 0.3696, "step": 19727 }, { "epoch": 1.8513513513513513, "grad_norm": 0.8969719376430376, "learning_rate": 3.83984839090721e-06, "loss": 0.3559, "step": 19728 }, { "epoch": 1.8514451951951951, "grad_norm": 0.9453301090601539, "learning_rate": 3.839317332855463e-06, "loss": 0.3667, "step": 19729 }, { "epoch": 1.851539039039039, "grad_norm": 1.3927422593738608, "learning_rate": 3.83878628864267e-06, "loss": 0.3783, "step": 19730 }, { "epoch": 1.8516328828828827, "grad_norm": 0.8653610340088679, "learning_rate": 3.838255258275166e-06, "loss": 0.4078, "step": 19731 }, { "epoch": 1.8517267267267268, "grad_norm": 1.1880495582175665, "learning_rate": 3.837724241759282e-06, "loss": 0.3678, "step": 19732 }, { "epoch": 1.8518205705705706, "grad_norm": 0.8906468966538831, "learning_rate": 3.8371932391013475e-06, "loss": 0.3819, "step": 19733 }, { "epoch": 1.8519144144144144, "grad_norm": 1.0825328183368976, "learning_rate": 3.836662250307696e-06, "loss": 0.3395, "step": 19734 }, { "epoch": 1.8520082582582582, "grad_norm": 0.9208347374912528, "learning_rate": 3.836131275384657e-06, "loss": 0.3876, "step": 19735 }, { "epoch": 1.8521021021021022, "grad_norm": 0.9433538219244941, "learning_rate": 3.83560031433856e-06, "loss": 0.371, "step": 19736 }, { "epoch": 1.852195945945946, "grad_norm": 0.9228024698592858, "learning_rate": 3.835069367175739e-06, "loss": 0.3785, "step": 19737 }, { "epoch": 1.8522897897897899, "grad_norm": 0.9402433688064946, "learning_rate": 3.834538433902524e-06, "loss": 0.3633, "step": 19738 }, { "epoch": 1.8523836336336337, "grad_norm": 1.2343846521920596, "learning_rate": 3.8340075145252434e-06, "loss": 0.4008, "step": 19739 }, { "epoch": 1.8524774774774775, "grad_norm": 0.8304258428034984, "learning_rate": 3.833476609050228e-06, "loss": 0.3828, "step": 19740 }, { "epoch": 1.8525713213213213, "grad_norm": 0.8832113570661395, "learning_rate": 3.83294571748381e-06, "loss": 0.3864, "step": 19741 }, { "epoch": 1.852665165165165, "grad_norm": 0.8940723161605877, "learning_rate": 3.832414839832315e-06, "loss": 0.3714, "step": 19742 }, { "epoch": 1.852759009009009, "grad_norm": 0.8526491161352245, "learning_rate": 3.831883976102077e-06, "loss": 0.3714, "step": 19743 }, { "epoch": 1.8528528528528527, "grad_norm": 0.939380985083036, "learning_rate": 3.8313531262994235e-06, "loss": 0.3627, "step": 19744 }, { "epoch": 1.8529466966966965, "grad_norm": 0.8771708690471506, "learning_rate": 3.830822290430683e-06, "loss": 0.3814, "step": 19745 }, { "epoch": 1.8530405405405406, "grad_norm": 0.9960938039324242, "learning_rate": 3.8302914685021884e-06, "loss": 0.3797, "step": 19746 }, { "epoch": 1.8531343843843844, "grad_norm": 1.9444534080122968, "learning_rate": 3.829760660520266e-06, "loss": 0.3694, "step": 19747 }, { "epoch": 1.8532282282282282, "grad_norm": 1.104703054600497, "learning_rate": 3.829229866491242e-06, "loss": 0.3484, "step": 19748 }, { "epoch": 1.8533220720720722, "grad_norm": 0.8522299422957158, "learning_rate": 3.828699086421451e-06, "loss": 0.3736, "step": 19749 }, { "epoch": 1.853415915915916, "grad_norm": 0.8616827482464743, "learning_rate": 3.8281683203172175e-06, "loss": 0.3605, "step": 19750 }, { "epoch": 1.8535097597597598, "grad_norm": 0.9112148605229962, "learning_rate": 3.827637568184871e-06, "loss": 0.3997, "step": 19751 }, { "epoch": 1.8536036036036037, "grad_norm": 0.9333714548048265, "learning_rate": 3.82710683003074e-06, "loss": 0.3795, "step": 19752 }, { "epoch": 1.8536974474474475, "grad_norm": 0.8917490068864147, "learning_rate": 3.826576105861153e-06, "loss": 0.3756, "step": 19753 }, { "epoch": 1.8537912912912913, "grad_norm": 1.3860479667691727, "learning_rate": 3.826045395682435e-06, "loss": 0.4178, "step": 19754 }, { "epoch": 1.853885135135135, "grad_norm": 0.8693998986824911, "learning_rate": 3.8255146995009185e-06, "loss": 0.3824, "step": 19755 }, { "epoch": 1.853978978978979, "grad_norm": 0.9699917906553054, "learning_rate": 3.824984017322928e-06, "loss": 0.3843, "step": 19756 }, { "epoch": 1.8540728228228227, "grad_norm": 0.948519911873071, "learning_rate": 3.82445334915479e-06, "loss": 0.3443, "step": 19757 }, { "epoch": 1.8541666666666665, "grad_norm": 0.8788673171929632, "learning_rate": 3.823922695002834e-06, "loss": 0.3138, "step": 19758 }, { "epoch": 1.8542605105105106, "grad_norm": 0.8560452737309102, "learning_rate": 3.8233920548733866e-06, "loss": 0.3783, "step": 19759 }, { "epoch": 1.8543543543543544, "grad_norm": 0.9987808548149201, "learning_rate": 3.8228614287727716e-06, "loss": 0.3868, "step": 19760 }, { "epoch": 1.8544481981981982, "grad_norm": 0.9179641729324453, "learning_rate": 3.8223308167073214e-06, "loss": 0.4302, "step": 19761 }, { "epoch": 1.8545420420420422, "grad_norm": 0.878774222486256, "learning_rate": 3.821800218683358e-06, "loss": 0.4548, "step": 19762 }, { "epoch": 1.854635885885886, "grad_norm": 0.8511185822768472, "learning_rate": 3.82126963470721e-06, "loss": 0.3589, "step": 19763 }, { "epoch": 1.8547297297297298, "grad_norm": 0.8736041979950767, "learning_rate": 3.820739064785203e-06, "loss": 0.4209, "step": 19764 }, { "epoch": 1.8548235735735736, "grad_norm": 1.0125999868259286, "learning_rate": 3.8202085089236626e-06, "loss": 0.393, "step": 19765 }, { "epoch": 1.8549174174174174, "grad_norm": 1.0396701240296968, "learning_rate": 3.819677967128913e-06, "loss": 0.3838, "step": 19766 }, { "epoch": 1.8550112612612613, "grad_norm": 1.4374501132512312, "learning_rate": 3.819147439407285e-06, "loss": 0.356, "step": 19767 }, { "epoch": 1.855105105105105, "grad_norm": 0.9177197695227732, "learning_rate": 3.8186169257651e-06, "loss": 0.4321, "step": 19768 }, { "epoch": 1.8551989489489489, "grad_norm": 0.8839396589361476, "learning_rate": 3.818086426208684e-06, "loss": 0.4072, "step": 19769 }, { "epoch": 1.8552927927927927, "grad_norm": 1.029897412147609, "learning_rate": 3.817555940744363e-06, "loss": 0.4492, "step": 19770 }, { "epoch": 1.8553866366366365, "grad_norm": 1.0487679737958182, "learning_rate": 3.817025469378462e-06, "loss": 0.3981, "step": 19771 }, { "epoch": 1.8554804804804805, "grad_norm": 1.175718915869716, "learning_rate": 3.816495012117303e-06, "loss": 0.4078, "step": 19772 }, { "epoch": 1.8555743243243243, "grad_norm": 0.9419351137583389, "learning_rate": 3.8159645689672165e-06, "loss": 0.3526, "step": 19773 }, { "epoch": 1.8556681681681682, "grad_norm": 1.347033704520831, "learning_rate": 3.815434139934523e-06, "loss": 0.3828, "step": 19774 }, { "epoch": 1.855762012012012, "grad_norm": 1.4862003975727616, "learning_rate": 3.814903725025547e-06, "loss": 0.3968, "step": 19775 }, { "epoch": 1.855855855855856, "grad_norm": 1.0238856899378628, "learning_rate": 3.814373324246614e-06, "loss": 0.4153, "step": 19776 }, { "epoch": 1.8559496996996998, "grad_norm": 0.9889842743577023, "learning_rate": 3.8138429376040476e-06, "loss": 0.4192, "step": 19777 }, { "epoch": 1.8560435435435436, "grad_norm": 2.137858912840886, "learning_rate": 3.813312565104169e-06, "loss": 0.3775, "step": 19778 }, { "epoch": 1.8561373873873874, "grad_norm": 1.2724119769870228, "learning_rate": 3.8127822067533073e-06, "loss": 0.383, "step": 19779 }, { "epoch": 1.8562312312312312, "grad_norm": 0.9830021034529998, "learning_rate": 3.8122518625577826e-06, "loss": 0.4232, "step": 19780 }, { "epoch": 1.856325075075075, "grad_norm": 0.9098929809657039, "learning_rate": 3.8117215325239174e-06, "loss": 0.3888, "step": 19781 }, { "epoch": 1.8564189189189189, "grad_norm": 1.297590951109327, "learning_rate": 3.811191216658038e-06, "loss": 0.445, "step": 19782 }, { "epoch": 1.8565127627627627, "grad_norm": 0.9573141868187114, "learning_rate": 3.810660914966465e-06, "loss": 0.3775, "step": 19783 }, { "epoch": 1.8566066066066065, "grad_norm": 0.8485907070311509, "learning_rate": 3.81013062745552e-06, "loss": 0.3353, "step": 19784 }, { "epoch": 1.8567004504504503, "grad_norm": 1.054353918622532, "learning_rate": 3.8096003541315295e-06, "loss": 0.3602, "step": 19785 }, { "epoch": 1.8567942942942943, "grad_norm": 0.9666279650904095, "learning_rate": 3.809070095000814e-06, "loss": 0.3726, "step": 19786 }, { "epoch": 1.8568881381381381, "grad_norm": 0.884459696514055, "learning_rate": 3.8085398500696955e-06, "loss": 0.4087, "step": 19787 }, { "epoch": 1.856981981981982, "grad_norm": 0.8438689480342495, "learning_rate": 3.8080096193444973e-06, "loss": 0.365, "step": 19788 }, { "epoch": 1.857075825825826, "grad_norm": 1.163727332063006, "learning_rate": 3.8074794028315403e-06, "loss": 0.3719, "step": 19789 }, { "epoch": 1.8571696696696698, "grad_norm": 1.0515381513052435, "learning_rate": 3.806949200537145e-06, "loss": 0.3798, "step": 19790 }, { "epoch": 1.8572635135135136, "grad_norm": 0.8939389719249694, "learning_rate": 3.806419012467637e-06, "loss": 0.3782, "step": 19791 }, { "epoch": 1.8573573573573574, "grad_norm": 0.9743484340280973, "learning_rate": 3.805888838629336e-06, "loss": 0.3967, "step": 19792 }, { "epoch": 1.8574512012012012, "grad_norm": 0.8297634804094645, "learning_rate": 3.805358679028561e-06, "loss": 0.3788, "step": 19793 }, { "epoch": 1.857545045045045, "grad_norm": 0.9736737801318526, "learning_rate": 3.804828533671636e-06, "loss": 0.3657, "step": 19794 }, { "epoch": 1.8576388888888888, "grad_norm": 0.9793353361895, "learning_rate": 3.8042984025648817e-06, "loss": 0.3853, "step": 19795 }, { "epoch": 1.8577327327327327, "grad_norm": 0.8363087015436056, "learning_rate": 3.8037682857146162e-06, "loss": 0.3635, "step": 19796 }, { "epoch": 1.8578265765765765, "grad_norm": 0.8271599013124868, "learning_rate": 3.8032381831271638e-06, "loss": 0.3946, "step": 19797 }, { "epoch": 1.8579204204204203, "grad_norm": 0.8710717028321705, "learning_rate": 3.8027080948088436e-06, "loss": 0.38, "step": 19798 }, { "epoch": 1.8580142642642643, "grad_norm": 0.9888594759318149, "learning_rate": 3.802178020765975e-06, "loss": 0.4122, "step": 19799 }, { "epoch": 1.8581081081081081, "grad_norm": 0.8579259883646019, "learning_rate": 3.8016479610048797e-06, "loss": 0.3884, "step": 19800 }, { "epoch": 1.858201951951952, "grad_norm": 1.534782151610135, "learning_rate": 3.8011179155318768e-06, "loss": 0.3922, "step": 19801 }, { "epoch": 1.858295795795796, "grad_norm": 0.9223109884100301, "learning_rate": 3.800587884353284e-06, "loss": 0.414, "step": 19802 }, { "epoch": 1.8583896396396398, "grad_norm": 0.9798491829834524, "learning_rate": 3.8000578674754248e-06, "loss": 0.3959, "step": 19803 }, { "epoch": 1.8584834834834836, "grad_norm": 1.2615241099117986, "learning_rate": 3.799527864904617e-06, "loss": 0.3455, "step": 19804 }, { "epoch": 1.8585773273273274, "grad_norm": 0.9186143819487141, "learning_rate": 3.7989978766471792e-06, "loss": 0.3792, "step": 19805 }, { "epoch": 1.8586711711711712, "grad_norm": 0.8192498357317002, "learning_rate": 3.798467902709431e-06, "loss": 0.3917, "step": 19806 }, { "epoch": 1.858765015015015, "grad_norm": 1.0962106522618922, "learning_rate": 3.7979379430976927e-06, "loss": 0.4092, "step": 19807 }, { "epoch": 1.8588588588588588, "grad_norm": 0.8356543879962958, "learning_rate": 3.7974079978182787e-06, "loss": 0.3852, "step": 19808 }, { "epoch": 1.8589527027027026, "grad_norm": 0.9045593503743835, "learning_rate": 3.7968780668775133e-06, "loss": 0.414, "step": 19809 }, { "epoch": 1.8590465465465464, "grad_norm": 0.9267026961511007, "learning_rate": 3.7963481502817125e-06, "loss": 0.4098, "step": 19810 }, { "epoch": 1.8591403903903903, "grad_norm": 1.3969788411601465, "learning_rate": 3.795818248037192e-06, "loss": 0.3944, "step": 19811 }, { "epoch": 1.8592342342342343, "grad_norm": 0.8124489524572948, "learning_rate": 3.7952883601502755e-06, "loss": 0.3843, "step": 19812 }, { "epoch": 1.859328078078078, "grad_norm": 0.9446280977017926, "learning_rate": 3.7947584866272762e-06, "loss": 0.3921, "step": 19813 }, { "epoch": 1.859421921921922, "grad_norm": 0.9018283784503047, "learning_rate": 3.7942286274745115e-06, "loss": 0.4053, "step": 19814 }, { "epoch": 1.8595157657657657, "grad_norm": 1.0145953117807676, "learning_rate": 3.793698782698303e-06, "loss": 0.3554, "step": 19815 }, { "epoch": 1.8596096096096097, "grad_norm": 0.9018622563229556, "learning_rate": 3.7931689523049653e-06, "loss": 0.4107, "step": 19816 }, { "epoch": 1.8597034534534536, "grad_norm": 0.7772903723455914, "learning_rate": 3.792639136300815e-06, "loss": 0.4214, "step": 19817 }, { "epoch": 1.8597972972972974, "grad_norm": 0.8549218653966593, "learning_rate": 3.7921093346921723e-06, "loss": 0.3636, "step": 19818 }, { "epoch": 1.8598911411411412, "grad_norm": 0.8378139519960929, "learning_rate": 3.7915795474853527e-06, "loss": 0.3758, "step": 19819 }, { "epoch": 1.859984984984985, "grad_norm": 0.8767039739523171, "learning_rate": 3.7910497746866694e-06, "loss": 0.3788, "step": 19820 }, { "epoch": 1.8600788288288288, "grad_norm": 1.1814515990120389, "learning_rate": 3.7905200163024443e-06, "loss": 0.3844, "step": 19821 }, { "epoch": 1.8601726726726726, "grad_norm": 1.0031539225619543, "learning_rate": 3.7899902723389915e-06, "loss": 0.4257, "step": 19822 }, { "epoch": 1.8602665165165164, "grad_norm": 1.0105864792205748, "learning_rate": 3.7894605428026244e-06, "loss": 0.416, "step": 19823 }, { "epoch": 1.8603603603603602, "grad_norm": 0.9033004952394108, "learning_rate": 3.788930827699664e-06, "loss": 0.3788, "step": 19824 }, { "epoch": 1.860454204204204, "grad_norm": 1.004830411932138, "learning_rate": 3.788401127036424e-06, "loss": 0.3901, "step": 19825 }, { "epoch": 1.860548048048048, "grad_norm": 0.9347132617444407, "learning_rate": 3.7878714408192193e-06, "loss": 0.3926, "step": 19826 }, { "epoch": 1.8606418918918919, "grad_norm": 0.9499029934836423, "learning_rate": 3.7873417690543667e-06, "loss": 0.4095, "step": 19827 }, { "epoch": 1.8607357357357357, "grad_norm": 0.9239148031327655, "learning_rate": 3.7868121117481803e-06, "loss": 0.401, "step": 19828 }, { "epoch": 1.8608295795795797, "grad_norm": 0.8966830021585344, "learning_rate": 3.786282468906975e-06, "loss": 0.3822, "step": 19829 }, { "epoch": 1.8609234234234235, "grad_norm": 1.00274602738877, "learning_rate": 3.785752840537068e-06, "loss": 0.3825, "step": 19830 }, { "epoch": 1.8610172672672673, "grad_norm": 0.9217550007116815, "learning_rate": 3.785223226644773e-06, "loss": 0.3707, "step": 19831 }, { "epoch": 1.8611111111111112, "grad_norm": 1.5920890636702583, "learning_rate": 3.7846936272364033e-06, "loss": 0.3864, "step": 19832 }, { "epoch": 1.861204954954955, "grad_norm": 1.8199914925447076, "learning_rate": 3.7841640423182755e-06, "loss": 0.4014, "step": 19833 }, { "epoch": 1.8612987987987988, "grad_norm": 1.2597863970258731, "learning_rate": 3.7836344718967032e-06, "loss": 0.4006, "step": 19834 }, { "epoch": 1.8613926426426426, "grad_norm": 0.9027010672592314, "learning_rate": 3.783104915977998e-06, "loss": 0.3223, "step": 19835 }, { "epoch": 1.8614864864864864, "grad_norm": 0.952150555163463, "learning_rate": 3.7825753745684784e-06, "loss": 0.4042, "step": 19836 }, { "epoch": 1.8615803303303302, "grad_norm": 0.8050026387223395, "learning_rate": 3.782045847674456e-06, "loss": 0.3851, "step": 19837 }, { "epoch": 1.861674174174174, "grad_norm": 1.125224840208875, "learning_rate": 3.7815163353022427e-06, "loss": 0.4347, "step": 19838 }, { "epoch": 1.861768018018018, "grad_norm": 0.8864818171828206, "learning_rate": 3.780986837458155e-06, "loss": 0.369, "step": 19839 }, { "epoch": 1.8618618618618619, "grad_norm": 0.9646096776133181, "learning_rate": 3.7804573541485045e-06, "loss": 0.4202, "step": 19840 }, { "epoch": 1.8619557057057057, "grad_norm": 0.8920972447514135, "learning_rate": 3.7799278853796023e-06, "loss": 0.414, "step": 19841 }, { "epoch": 1.8620495495495497, "grad_norm": 0.9061838743555777, "learning_rate": 3.779398431157766e-06, "loss": 0.4222, "step": 19842 }, { "epoch": 1.8621433933933935, "grad_norm": 1.061572039563664, "learning_rate": 3.7788689914893057e-06, "loss": 0.3748, "step": 19843 }, { "epoch": 1.8622372372372373, "grad_norm": 1.0146719903829784, "learning_rate": 3.778339566380533e-06, "loss": 0.4179, "step": 19844 }, { "epoch": 1.8623310810810811, "grad_norm": 0.8541818483957004, "learning_rate": 3.7778101558377623e-06, "loss": 0.4106, "step": 19845 }, { "epoch": 1.862424924924925, "grad_norm": 0.9488104323319548, "learning_rate": 3.7772807598673054e-06, "loss": 0.3454, "step": 19846 }, { "epoch": 1.8625187687687688, "grad_norm": 0.9817137621664443, "learning_rate": 3.776751378475472e-06, "loss": 0.3851, "step": 19847 }, { "epoch": 1.8626126126126126, "grad_norm": 0.8548056244117218, "learning_rate": 3.7762220116685774e-06, "loss": 0.3432, "step": 19848 }, { "epoch": 1.8627064564564564, "grad_norm": 0.9085593131875375, "learning_rate": 3.7756926594529312e-06, "loss": 0.3579, "step": 19849 }, { "epoch": 1.8628003003003002, "grad_norm": 0.9775578329450901, "learning_rate": 3.7751633218348455e-06, "loss": 0.3397, "step": 19850 }, { "epoch": 1.862894144144144, "grad_norm": 0.954720285442518, "learning_rate": 3.7746339988206327e-06, "loss": 0.3971, "step": 19851 }, { "epoch": 1.862987987987988, "grad_norm": 0.9902255506867979, "learning_rate": 3.7741046904166024e-06, "loss": 0.4045, "step": 19852 }, { "epoch": 1.8630818318318318, "grad_norm": 0.9287193509881504, "learning_rate": 3.7735753966290645e-06, "loss": 0.3577, "step": 19853 }, { "epoch": 1.8631756756756757, "grad_norm": 0.8895791527466145, "learning_rate": 3.7730461174643333e-06, "loss": 0.4217, "step": 19854 }, { "epoch": 1.8632695195195195, "grad_norm": 1.0163616183026094, "learning_rate": 3.7725168529287174e-06, "loss": 0.3825, "step": 19855 }, { "epoch": 1.8633633633633635, "grad_norm": 0.9490670886290391, "learning_rate": 3.7719876030285273e-06, "loss": 0.4033, "step": 19856 }, { "epoch": 1.8634572072072073, "grad_norm": 0.7871146540067205, "learning_rate": 3.7714583677700737e-06, "loss": 0.3503, "step": 19857 }, { "epoch": 1.8635510510510511, "grad_norm": 0.8654354417279483, "learning_rate": 3.7709291471596675e-06, "loss": 0.4045, "step": 19858 }, { "epoch": 1.863644894894895, "grad_norm": 1.0370644331820082, "learning_rate": 3.7703999412036153e-06, "loss": 0.3448, "step": 19859 }, { "epoch": 1.8637387387387387, "grad_norm": 1.1624465242321493, "learning_rate": 3.769870749908231e-06, "loss": 0.4014, "step": 19860 }, { "epoch": 1.8638325825825826, "grad_norm": 0.8449118725546773, "learning_rate": 3.769341573279823e-06, "loss": 0.3501, "step": 19861 }, { "epoch": 1.8639264264264264, "grad_norm": 0.8421113481593541, "learning_rate": 3.7688124113246987e-06, "loss": 0.4041, "step": 19862 }, { "epoch": 1.8640202702702702, "grad_norm": 0.8462704103073461, "learning_rate": 3.7682832640491705e-06, "loss": 0.3737, "step": 19863 }, { "epoch": 1.864114114114114, "grad_norm": 0.8058717419038606, "learning_rate": 3.7677541314595458e-06, "loss": 0.38, "step": 19864 }, { "epoch": 1.8642079579579578, "grad_norm": 0.9295651879276118, "learning_rate": 3.7672250135621314e-06, "loss": 0.3903, "step": 19865 }, { "epoch": 1.8643018018018018, "grad_norm": 0.957098731751365, "learning_rate": 3.76669591036324e-06, "loss": 0.3793, "step": 19866 }, { "epoch": 1.8643956456456456, "grad_norm": 0.9208622708210292, "learning_rate": 3.766166821869179e-06, "loss": 0.421, "step": 19867 }, { "epoch": 1.8644894894894894, "grad_norm": 0.8726186747825214, "learning_rate": 3.7656377480862548e-06, "loss": 0.4056, "step": 19868 }, { "epoch": 1.8645833333333335, "grad_norm": 1.0522064301629772, "learning_rate": 3.7651086890207784e-06, "loss": 0.4336, "step": 19869 }, { "epoch": 1.8646771771771773, "grad_norm": 0.9102781524114432, "learning_rate": 3.764579644679056e-06, "loss": 0.355, "step": 19870 }, { "epoch": 1.864771021021021, "grad_norm": 0.9661335736460573, "learning_rate": 3.764050615067394e-06, "loss": 0.4314, "step": 19871 }, { "epoch": 1.864864864864865, "grad_norm": 0.8948059075780134, "learning_rate": 3.7635216001921037e-06, "loss": 0.428, "step": 19872 }, { "epoch": 1.8649587087087087, "grad_norm": 1.0374659423761934, "learning_rate": 3.762992600059492e-06, "loss": 0.3923, "step": 19873 }, { "epoch": 1.8650525525525525, "grad_norm": 1.123081203032636, "learning_rate": 3.762463614675863e-06, "loss": 0.3732, "step": 19874 }, { "epoch": 1.8651463963963963, "grad_norm": 0.9732892666090842, "learning_rate": 3.7619346440475273e-06, "loss": 0.3284, "step": 19875 }, { "epoch": 1.8652402402402402, "grad_norm": 0.8855415454287303, "learning_rate": 3.7614056881807903e-06, "loss": 0.4203, "step": 19876 }, { "epoch": 1.865334084084084, "grad_norm": 1.06804134368949, "learning_rate": 3.7608767470819575e-06, "loss": 0.3911, "step": 19877 }, { "epoch": 1.8654279279279278, "grad_norm": 1.0389155518104114, "learning_rate": 3.760347820757339e-06, "loss": 0.4043, "step": 19878 }, { "epoch": 1.8655217717717718, "grad_norm": 2.131846133623516, "learning_rate": 3.759818909213239e-06, "loss": 0.437, "step": 19879 }, { "epoch": 1.8656156156156156, "grad_norm": 1.0360066899252953, "learning_rate": 3.759290012455964e-06, "loss": 0.3875, "step": 19880 }, { "epoch": 1.8657094594594594, "grad_norm": 0.8792227362702336, "learning_rate": 3.7587611304918205e-06, "loss": 0.4193, "step": 19881 }, { "epoch": 1.8658033033033035, "grad_norm": 0.9324636232066137, "learning_rate": 3.758232263327114e-06, "loss": 0.3502, "step": 19882 }, { "epoch": 1.8658971471471473, "grad_norm": 0.8697627011124581, "learning_rate": 3.757703410968148e-06, "loss": 0.4164, "step": 19883 }, { "epoch": 1.865990990990991, "grad_norm": 0.9649890167752112, "learning_rate": 3.7571745734212335e-06, "loss": 0.4295, "step": 19884 }, { "epoch": 1.866084834834835, "grad_norm": 0.9068597829425342, "learning_rate": 3.7566457506926725e-06, "loss": 0.4147, "step": 19885 }, { "epoch": 1.8661786786786787, "grad_norm": 0.9633216744832239, "learning_rate": 3.756116942788769e-06, "loss": 0.3736, "step": 19886 }, { "epoch": 1.8662725225225225, "grad_norm": 0.9337410586798679, "learning_rate": 3.7555881497158307e-06, "loss": 0.3742, "step": 19887 }, { "epoch": 1.8663663663663663, "grad_norm": 1.0362391818817143, "learning_rate": 3.755059371480162e-06, "loss": 0.4063, "step": 19888 }, { "epoch": 1.8664602102102101, "grad_norm": 0.9820474126507137, "learning_rate": 3.7545306080880646e-06, "loss": 0.3946, "step": 19889 }, { "epoch": 1.866554054054054, "grad_norm": 0.8471681412575036, "learning_rate": 3.754001859545847e-06, "loss": 0.3907, "step": 19890 }, { "epoch": 1.8666478978978978, "grad_norm": 1.032414465052432, "learning_rate": 3.753473125859812e-06, "loss": 0.4051, "step": 19891 }, { "epoch": 1.8667417417417418, "grad_norm": 0.9191551238647839, "learning_rate": 3.752944407036263e-06, "loss": 0.3762, "step": 19892 }, { "epoch": 1.8668355855855856, "grad_norm": 0.8789290421440181, "learning_rate": 3.7524157030815055e-06, "loss": 0.3702, "step": 19893 }, { "epoch": 1.8669294294294294, "grad_norm": 1.5208640018998778, "learning_rate": 3.7518870140018424e-06, "loss": 0.3902, "step": 19894 }, { "epoch": 1.8670232732732732, "grad_norm": 0.9940148600834184, "learning_rate": 3.751358339803575e-06, "loss": 0.3886, "step": 19895 }, { "epoch": 1.8671171171171173, "grad_norm": 1.0308465150677022, "learning_rate": 3.7508296804930115e-06, "loss": 0.3654, "step": 19896 }, { "epoch": 1.867210960960961, "grad_norm": 0.943019264255922, "learning_rate": 3.7503010360764526e-06, "loss": 0.3826, "step": 19897 }, { "epoch": 1.8673048048048049, "grad_norm": 3.7407444243654004, "learning_rate": 3.7497724065602e-06, "loss": 0.3614, "step": 19898 }, { "epoch": 1.8673986486486487, "grad_norm": 0.8824366898203047, "learning_rate": 3.7492437919505605e-06, "loss": 0.3723, "step": 19899 }, { "epoch": 1.8674924924924925, "grad_norm": 0.8676719682560261, "learning_rate": 3.7487151922538336e-06, "loss": 0.3364, "step": 19900 }, { "epoch": 1.8675863363363363, "grad_norm": 0.9098666201616321, "learning_rate": 3.7481866074763207e-06, "loss": 0.4085, "step": 19901 }, { "epoch": 1.8676801801801801, "grad_norm": 1.0687963416376736, "learning_rate": 3.7476580376243275e-06, "loss": 0.369, "step": 19902 }, { "epoch": 1.867774024024024, "grad_norm": 0.9945944680606672, "learning_rate": 3.747129482704156e-06, "loss": 0.3761, "step": 19903 }, { "epoch": 1.8678678678678677, "grad_norm": 0.9471968785145102, "learning_rate": 3.7466009427221063e-06, "loss": 0.3689, "step": 19904 }, { "epoch": 1.8679617117117115, "grad_norm": 0.8884019570217868, "learning_rate": 3.7460724176844808e-06, "loss": 0.438, "step": 19905 }, { "epoch": 1.8680555555555556, "grad_norm": 0.8753686259890541, "learning_rate": 3.745543907597583e-06, "loss": 0.3987, "step": 19906 }, { "epoch": 1.8681493993993994, "grad_norm": 0.814701078548779, "learning_rate": 3.7450154124677097e-06, "loss": 0.4017, "step": 19907 }, { "epoch": 1.8682432432432432, "grad_norm": 1.0047888538217544, "learning_rate": 3.744486932301168e-06, "loss": 0.4209, "step": 19908 }, { "epoch": 1.8683370870870872, "grad_norm": 0.8927646474082128, "learning_rate": 3.7439584671042556e-06, "loss": 0.3922, "step": 19909 }, { "epoch": 1.868430930930931, "grad_norm": 1.1158121794995413, "learning_rate": 3.7434300168832737e-06, "loss": 0.3806, "step": 19910 }, { "epoch": 1.8685247747747749, "grad_norm": 0.9047420600558785, "learning_rate": 3.7429015816445246e-06, "loss": 0.3703, "step": 19911 }, { "epoch": 1.8686186186186187, "grad_norm": 1.0294843519827492, "learning_rate": 3.742373161394308e-06, "loss": 0.4088, "step": 19912 }, { "epoch": 1.8687124624624625, "grad_norm": 0.8302011907684242, "learning_rate": 3.7418447561389215e-06, "loss": 0.3724, "step": 19913 }, { "epoch": 1.8688063063063063, "grad_norm": 1.0130429283666025, "learning_rate": 3.741316365884671e-06, "loss": 0.4029, "step": 19914 }, { "epoch": 1.86890015015015, "grad_norm": 1.0207617432067109, "learning_rate": 3.740787990637853e-06, "loss": 0.3731, "step": 19915 }, { "epoch": 1.868993993993994, "grad_norm": 2.8241017705717057, "learning_rate": 3.7402596304047677e-06, "loss": 0.3988, "step": 19916 }, { "epoch": 1.8690878378378377, "grad_norm": 1.062245727177359, "learning_rate": 3.7397312851917156e-06, "loss": 0.3665, "step": 19917 }, { "epoch": 1.8691816816816815, "grad_norm": 0.9352572939319312, "learning_rate": 3.7392029550049957e-06, "loss": 0.4099, "step": 19918 }, { "epoch": 1.8692755255255256, "grad_norm": 0.9752482393192785, "learning_rate": 3.7386746398509055e-06, "loss": 0.3776, "step": 19919 }, { "epoch": 1.8693693693693694, "grad_norm": 0.8759412935812251, "learning_rate": 3.7381463397357475e-06, "loss": 0.3834, "step": 19920 }, { "epoch": 1.8694632132132132, "grad_norm": 0.8424237929261346, "learning_rate": 3.7376180546658196e-06, "loss": 0.3862, "step": 19921 }, { "epoch": 1.8695570570570572, "grad_norm": 0.9883626267766362, "learning_rate": 3.73708978464742e-06, "loss": 0.424, "step": 19922 }, { "epoch": 1.869650900900901, "grad_norm": 0.8884245590588667, "learning_rate": 3.736561529686847e-06, "loss": 0.3768, "step": 19923 }, { "epoch": 1.8697447447447448, "grad_norm": 1.22101828581252, "learning_rate": 3.7360332897904006e-06, "loss": 0.4297, "step": 19924 }, { "epoch": 1.8698385885885886, "grad_norm": 0.9423144770085358, "learning_rate": 3.735505064964375e-06, "loss": 0.3727, "step": 19925 }, { "epoch": 1.8699324324324325, "grad_norm": 0.8729281687196171, "learning_rate": 3.7349768552150746e-06, "loss": 0.3912, "step": 19926 }, { "epoch": 1.8700262762762763, "grad_norm": 0.9468370578759612, "learning_rate": 3.734448660548794e-06, "loss": 0.3833, "step": 19927 }, { "epoch": 1.87012012012012, "grad_norm": 0.9697638121581422, "learning_rate": 3.733920480971829e-06, "loss": 0.3725, "step": 19928 }, { "epoch": 1.8702139639639639, "grad_norm": 1.5732964629977837, "learning_rate": 3.7333923164904806e-06, "loss": 0.4145, "step": 19929 }, { "epoch": 1.8703078078078077, "grad_norm": 0.8370641757214312, "learning_rate": 3.7328641671110448e-06, "loss": 0.3826, "step": 19930 }, { "epoch": 1.8704016516516515, "grad_norm": 1.1786816998404501, "learning_rate": 3.7323360328398167e-06, "loss": 0.4335, "step": 19931 }, { "epoch": 1.8704954954954955, "grad_norm": 1.062385539931077, "learning_rate": 3.731807913683097e-06, "loss": 0.4101, "step": 19932 }, { "epoch": 1.8705893393393394, "grad_norm": 1.1300056217197971, "learning_rate": 3.7312798096471815e-06, "loss": 0.3493, "step": 19933 }, { "epoch": 1.8706831831831832, "grad_norm": 1.0186050349340188, "learning_rate": 3.730751720738363e-06, "loss": 0.4497, "step": 19934 }, { "epoch": 1.870777027027027, "grad_norm": 0.9632407677336342, "learning_rate": 3.730223646962945e-06, "loss": 0.3726, "step": 19935 }, { "epoch": 1.870870870870871, "grad_norm": 0.809013341405789, "learning_rate": 3.729695588327219e-06, "loss": 0.3618, "step": 19936 }, { "epoch": 1.8709647147147148, "grad_norm": 1.337708226501796, "learning_rate": 3.729167544837479e-06, "loss": 0.4057, "step": 19937 }, { "epoch": 1.8710585585585586, "grad_norm": 0.9843798584397007, "learning_rate": 3.728639516500027e-06, "loss": 0.4104, "step": 19938 }, { "epoch": 1.8711524024024024, "grad_norm": 0.9548450970796275, "learning_rate": 3.728111503321155e-06, "loss": 0.4042, "step": 19939 }, { "epoch": 1.8712462462462462, "grad_norm": 0.8839553886858715, "learning_rate": 3.7275835053071573e-06, "loss": 0.407, "step": 19940 }, { "epoch": 1.87134009009009, "grad_norm": 0.9141412546005271, "learning_rate": 3.7270555224643334e-06, "loss": 0.341, "step": 19941 }, { "epoch": 1.8714339339339339, "grad_norm": 0.8817566217675623, "learning_rate": 3.726527554798978e-06, "loss": 0.4034, "step": 19942 }, { "epoch": 1.8715277777777777, "grad_norm": 0.9629385183226096, "learning_rate": 3.7259996023173807e-06, "loss": 0.43, "step": 19943 }, { "epoch": 1.8716216216216215, "grad_norm": 1.0353267379661264, "learning_rate": 3.7254716650258415e-06, "loss": 0.3671, "step": 19944 }, { "epoch": 1.8717154654654653, "grad_norm": 0.9380570185937271, "learning_rate": 3.7249437429306543e-06, "loss": 0.3637, "step": 19945 }, { "epoch": 1.8718093093093093, "grad_norm": 0.8939634465759808, "learning_rate": 3.724415836038111e-06, "loss": 0.435, "step": 19946 }, { "epoch": 1.8719031531531531, "grad_norm": 0.8786248894022793, "learning_rate": 3.7238879443545095e-06, "loss": 0.3441, "step": 19947 }, { "epoch": 1.871996996996997, "grad_norm": 0.9827914531519821, "learning_rate": 3.7233600678861425e-06, "loss": 0.3958, "step": 19948 }, { "epoch": 1.872090840840841, "grad_norm": 1.0258452312779334, "learning_rate": 3.722832206639303e-06, "loss": 0.3816, "step": 19949 }, { "epoch": 1.8721846846846848, "grad_norm": 1.0089572116535026, "learning_rate": 3.7223043606202857e-06, "loss": 0.4201, "step": 19950 }, { "epoch": 1.8722785285285286, "grad_norm": 0.9333311059453949, "learning_rate": 3.7217765298353845e-06, "loss": 0.3886, "step": 19951 }, { "epoch": 1.8723723723723724, "grad_norm": 0.8799917362689865, "learning_rate": 3.7212487142908897e-06, "loss": 0.4197, "step": 19952 }, { "epoch": 1.8724662162162162, "grad_norm": 1.0670354928590349, "learning_rate": 3.720720913993099e-06, "loss": 0.4073, "step": 19953 }, { "epoch": 1.87256006006006, "grad_norm": 0.8934179254401506, "learning_rate": 3.720193128948304e-06, "loss": 0.3672, "step": 19954 }, { "epoch": 1.8726539039039038, "grad_norm": 0.915856074402246, "learning_rate": 3.719665359162795e-06, "loss": 0.3824, "step": 19955 }, { "epoch": 1.8727477477477477, "grad_norm": 1.357127899135755, "learning_rate": 3.7191376046428685e-06, "loss": 0.3811, "step": 19956 }, { "epoch": 1.8728415915915915, "grad_norm": 0.8750187127345456, "learning_rate": 3.7186098653948143e-06, "loss": 0.3898, "step": 19957 }, { "epoch": 1.8729354354354353, "grad_norm": 1.0243206191416423, "learning_rate": 3.7180821414249246e-06, "loss": 0.4663, "step": 19958 }, { "epoch": 1.8730292792792793, "grad_norm": 0.9031533612460128, "learning_rate": 3.717554432739493e-06, "loss": 0.3754, "step": 19959 }, { "epoch": 1.8731231231231231, "grad_norm": 1.1448866725144342, "learning_rate": 3.717026739344812e-06, "loss": 0.415, "step": 19960 }, { "epoch": 1.873216966966967, "grad_norm": 0.9316591587141163, "learning_rate": 3.7164990612471708e-06, "loss": 0.3616, "step": 19961 }, { "epoch": 1.873310810810811, "grad_norm": 1.0204605313725428, "learning_rate": 3.7159713984528635e-06, "loss": 0.3891, "step": 19962 }, { "epoch": 1.8734046546546548, "grad_norm": 1.4257724658524413, "learning_rate": 3.7154437509681795e-06, "loss": 0.3931, "step": 19963 }, { "epoch": 1.8734984984984986, "grad_norm": 0.9514076846671503, "learning_rate": 3.71491611879941e-06, "loss": 0.3679, "step": 19964 }, { "epoch": 1.8735923423423424, "grad_norm": 1.0152525973933981, "learning_rate": 3.714388501952848e-06, "loss": 0.3825, "step": 19965 }, { "epoch": 1.8736861861861862, "grad_norm": 1.0318916212267042, "learning_rate": 3.7138609004347835e-06, "loss": 0.3798, "step": 19966 }, { "epoch": 1.87378003003003, "grad_norm": 1.4226439672018176, "learning_rate": 3.713333314251506e-06, "loss": 0.3654, "step": 19967 }, { "epoch": 1.8738738738738738, "grad_norm": 0.9612371306484718, "learning_rate": 3.712805743409308e-06, "loss": 0.4024, "step": 19968 }, { "epoch": 1.8739677177177176, "grad_norm": 0.8995686938364356, "learning_rate": 3.712278187914478e-06, "loss": 0.4026, "step": 19969 }, { "epoch": 1.8740615615615615, "grad_norm": 1.0102373486237528, "learning_rate": 3.711750647773305e-06, "loss": 0.3894, "step": 19970 }, { "epoch": 1.8741554054054053, "grad_norm": 0.9841942101242567, "learning_rate": 3.7112231229920826e-06, "loss": 0.3446, "step": 19971 }, { "epoch": 1.8742492492492493, "grad_norm": 1.0571038743664583, "learning_rate": 3.7106956135770988e-06, "loss": 0.4059, "step": 19972 }, { "epoch": 1.874343093093093, "grad_norm": 1.1963593133572246, "learning_rate": 3.710168119534642e-06, "loss": 0.4189, "step": 19973 }, { "epoch": 1.874436936936937, "grad_norm": 1.055935059449473, "learning_rate": 3.7096406408710028e-06, "loss": 0.3715, "step": 19974 }, { "epoch": 1.8745307807807807, "grad_norm": 0.8357295383012238, "learning_rate": 3.709113177592471e-06, "loss": 0.3935, "step": 19975 }, { "epoch": 1.8746246246246248, "grad_norm": 0.8825088016559952, "learning_rate": 3.7085857297053325e-06, "loss": 0.3881, "step": 19976 }, { "epoch": 1.8747184684684686, "grad_norm": 0.8819303107488197, "learning_rate": 3.7080582972158794e-06, "loss": 0.3518, "step": 19977 }, { "epoch": 1.8748123123123124, "grad_norm": 0.9412845720706607, "learning_rate": 3.707530880130401e-06, "loss": 0.3503, "step": 19978 }, { "epoch": 1.8749061561561562, "grad_norm": 0.9309781028880598, "learning_rate": 3.707003478455182e-06, "loss": 0.4093, "step": 19979 }, { "epoch": 1.875, "grad_norm": 0.9406444337526363, "learning_rate": 3.706476092196514e-06, "loss": 0.3755, "step": 19980 }, { "epoch": 1.8750938438438438, "grad_norm": 0.9686730252731673, "learning_rate": 3.705948721360683e-06, "loss": 0.4035, "step": 19981 }, { "epoch": 1.8751876876876876, "grad_norm": 0.8165946407214765, "learning_rate": 3.705421365953976e-06, "loss": 0.3737, "step": 19982 }, { "epoch": 1.8752815315315314, "grad_norm": 0.850817614269203, "learning_rate": 3.704894025982685e-06, "loss": 0.355, "step": 19983 }, { "epoch": 1.8753753753753752, "grad_norm": 1.0040753761049874, "learning_rate": 3.7043667014530947e-06, "loss": 0.4163, "step": 19984 }, { "epoch": 1.8754692192192193, "grad_norm": 0.8683015290525243, "learning_rate": 3.7038393923714923e-06, "loss": 0.363, "step": 19985 }, { "epoch": 1.875563063063063, "grad_norm": 0.8869055932350964, "learning_rate": 3.7033120987441653e-06, "loss": 0.3949, "step": 19986 }, { "epoch": 1.875656906906907, "grad_norm": 1.0921827836095377, "learning_rate": 3.7027848205774007e-06, "loss": 0.3551, "step": 19987 }, { "epoch": 1.8757507507507507, "grad_norm": 0.9006430360561575, "learning_rate": 3.7022575578774844e-06, "loss": 0.3911, "step": 19988 }, { "epoch": 1.8758445945945947, "grad_norm": 1.005510387271912, "learning_rate": 3.7017303106507052e-06, "loss": 0.3922, "step": 19989 }, { "epoch": 1.8759384384384385, "grad_norm": 0.9382442431711647, "learning_rate": 3.7012030789033485e-06, "loss": 0.3528, "step": 19990 }, { "epoch": 1.8760322822822824, "grad_norm": 0.9251339552358723, "learning_rate": 3.7006758626416993e-06, "loss": 0.3982, "step": 19991 }, { "epoch": 1.8761261261261262, "grad_norm": 1.0806306111156936, "learning_rate": 3.700148661872046e-06, "loss": 0.411, "step": 19992 }, { "epoch": 1.87621996996997, "grad_norm": 0.8794901456894981, "learning_rate": 3.699621476600673e-06, "loss": 0.374, "step": 19993 }, { "epoch": 1.8763138138138138, "grad_norm": 0.8578805129852598, "learning_rate": 3.6990943068338643e-06, "loss": 0.3512, "step": 19994 }, { "epoch": 1.8764076576576576, "grad_norm": 0.9901249142458758, "learning_rate": 3.698567152577909e-06, "loss": 0.3669, "step": 19995 }, { "epoch": 1.8765015015015014, "grad_norm": 0.8131531418854534, "learning_rate": 3.698040013839091e-06, "loss": 0.4207, "step": 19996 }, { "epoch": 1.8765953453453452, "grad_norm": 0.9872632655128505, "learning_rate": 3.6975128906236934e-06, "loss": 0.3611, "step": 19997 }, { "epoch": 1.876689189189189, "grad_norm": 0.9208261165749723, "learning_rate": 3.696985782938005e-06, "loss": 0.3691, "step": 19998 }, { "epoch": 1.876783033033033, "grad_norm": 0.9336771521015342, "learning_rate": 3.696458690788308e-06, "loss": 0.3741, "step": 19999 }, { "epoch": 1.8768768768768769, "grad_norm": 0.8501913847375918, "learning_rate": 3.6959316141808853e-06, "loss": 0.3515, "step": 20000 }, { "epoch": 1.8769707207207207, "grad_norm": 1.4456217472740143, "learning_rate": 3.695404553122026e-06, "loss": 0.4319, "step": 20001 }, { "epoch": 1.8770645645645647, "grad_norm": 0.8031605309947373, "learning_rate": 3.694877507618011e-06, "loss": 0.3614, "step": 20002 }, { "epoch": 1.8771584084084085, "grad_norm": 0.8722029522660174, "learning_rate": 3.694350477675124e-06, "loss": 0.4207, "step": 20003 }, { "epoch": 1.8772522522522523, "grad_norm": 0.8938655559267837, "learning_rate": 3.6938234632996517e-06, "loss": 0.3942, "step": 20004 }, { "epoch": 1.8773460960960962, "grad_norm": 0.8694828357041748, "learning_rate": 3.693296464497875e-06, "loss": 0.3788, "step": 20005 }, { "epoch": 1.87743993993994, "grad_norm": 1.07960406204464, "learning_rate": 3.6927694812760774e-06, "loss": 0.3982, "step": 20006 }, { "epoch": 1.8775337837837838, "grad_norm": 1.244062704398002, "learning_rate": 3.692242513640545e-06, "loss": 0.4072, "step": 20007 }, { "epoch": 1.8776276276276276, "grad_norm": 1.82918008697421, "learning_rate": 3.6917155615975586e-06, "loss": 0.3641, "step": 20008 }, { "epoch": 1.8777214714714714, "grad_norm": 0.9460163280163161, "learning_rate": 3.6911886251534003e-06, "loss": 0.3624, "step": 20009 }, { "epoch": 1.8778153153153152, "grad_norm": 1.7254696301776185, "learning_rate": 3.6906617043143556e-06, "loss": 0.357, "step": 20010 }, { "epoch": 1.877909159159159, "grad_norm": 1.4271098832801734, "learning_rate": 3.6901347990867054e-06, "loss": 0.3927, "step": 20011 }, { "epoch": 1.878003003003003, "grad_norm": 4.827832482624994, "learning_rate": 3.68960790947673e-06, "loss": 0.445, "step": 20012 }, { "epoch": 1.8780968468468469, "grad_norm": 0.9706001195979218, "learning_rate": 3.6890810354907157e-06, "loss": 0.39, "step": 20013 }, { "epoch": 1.8781906906906907, "grad_norm": 0.8524866562406936, "learning_rate": 3.6885541771349428e-06, "loss": 0.3624, "step": 20014 }, { "epoch": 1.8782845345345347, "grad_norm": 0.8634116343514883, "learning_rate": 3.6880273344156915e-06, "loss": 0.3812, "step": 20015 }, { "epoch": 1.8783783783783785, "grad_norm": 1.0930979666048044, "learning_rate": 3.6875005073392457e-06, "loss": 0.4114, "step": 20016 }, { "epoch": 1.8784722222222223, "grad_norm": 0.9095696564759598, "learning_rate": 3.6869736959118856e-06, "loss": 0.3927, "step": 20017 }, { "epoch": 1.8785660660660661, "grad_norm": 0.930106274221102, "learning_rate": 3.6864469001398907e-06, "loss": 0.3667, "step": 20018 }, { "epoch": 1.87865990990991, "grad_norm": 1.0087837412800713, "learning_rate": 3.685920120029547e-06, "loss": 0.4031, "step": 20019 }, { "epoch": 1.8787537537537538, "grad_norm": 0.8361129991309302, "learning_rate": 3.685393355587131e-06, "loss": 0.403, "step": 20020 }, { "epoch": 1.8788475975975976, "grad_norm": 0.8050172608851808, "learning_rate": 3.6848666068189244e-06, "loss": 0.3769, "step": 20021 }, { "epoch": 1.8789414414414414, "grad_norm": 0.8327133910662304, "learning_rate": 3.684339873731209e-06, "loss": 0.3547, "step": 20022 }, { "epoch": 1.8790352852852852, "grad_norm": 1.187918356528124, "learning_rate": 3.683813156330264e-06, "loss": 0.4326, "step": 20023 }, { "epoch": 1.879129129129129, "grad_norm": 1.0441187283391131, "learning_rate": 3.6832864546223678e-06, "loss": 0.4342, "step": 20024 }, { "epoch": 1.879222972972973, "grad_norm": 0.965426158366139, "learning_rate": 3.6827597686138044e-06, "loss": 0.3966, "step": 20025 }, { "epoch": 1.8793168168168168, "grad_norm": 0.8749234104690734, "learning_rate": 3.682233098310851e-06, "loss": 0.4025, "step": 20026 }, { "epoch": 1.8794106606606606, "grad_norm": 0.967259689244228, "learning_rate": 3.681706443719787e-06, "loss": 0.4172, "step": 20027 }, { "epoch": 1.8795045045045045, "grad_norm": 0.9643130806652084, "learning_rate": 3.6811798048468924e-06, "loss": 0.3776, "step": 20028 }, { "epoch": 1.8795983483483485, "grad_norm": 1.003840331746138, "learning_rate": 3.6806531816984465e-06, "loss": 0.4177, "step": 20029 }, { "epoch": 1.8796921921921923, "grad_norm": 0.9376347516357355, "learning_rate": 3.6801265742807255e-06, "loss": 0.3592, "step": 20030 }, { "epoch": 1.8797860360360361, "grad_norm": 1.4674555338213948, "learning_rate": 3.679599982600014e-06, "loss": 0.4272, "step": 20031 }, { "epoch": 1.87987987987988, "grad_norm": 0.820799861195502, "learning_rate": 3.679073406662586e-06, "loss": 0.4075, "step": 20032 }, { "epoch": 1.8799737237237237, "grad_norm": 0.8979111564888, "learning_rate": 3.678546846474721e-06, "loss": 0.3918, "step": 20033 }, { "epoch": 1.8800675675675675, "grad_norm": 0.9723727188366148, "learning_rate": 3.6780203020426984e-06, "loss": 0.3408, "step": 20034 }, { "epoch": 1.8801614114114114, "grad_norm": 0.9548857024139787, "learning_rate": 3.677493773372795e-06, "loss": 0.3741, "step": 20035 }, { "epoch": 1.8802552552552552, "grad_norm": 0.8356082585760924, "learning_rate": 3.676967260471287e-06, "loss": 0.4071, "step": 20036 }, { "epoch": 1.880349099099099, "grad_norm": 0.9663361593840516, "learning_rate": 3.6764407633444563e-06, "loss": 0.422, "step": 20037 }, { "epoch": 1.8804429429429428, "grad_norm": 1.2064081737579335, "learning_rate": 3.6759142819985784e-06, "loss": 0.3642, "step": 20038 }, { "epoch": 1.8805367867867868, "grad_norm": 0.9154948369336626, "learning_rate": 3.675387816439929e-06, "loss": 0.4215, "step": 20039 }, { "epoch": 1.8806306306306306, "grad_norm": 1.0678066138759832, "learning_rate": 3.6748613666747875e-06, "loss": 0.3944, "step": 20040 }, { "epoch": 1.8807244744744744, "grad_norm": 0.9352678689675982, "learning_rate": 3.67433493270943e-06, "loss": 0.4019, "step": 20041 }, { "epoch": 1.8808183183183185, "grad_norm": 0.8559613890185709, "learning_rate": 3.673808514550131e-06, "loss": 0.3945, "step": 20042 }, { "epoch": 1.8809121621621623, "grad_norm": 0.8853652079383458, "learning_rate": 3.6732821122031714e-06, "loss": 0.4153, "step": 20043 }, { "epoch": 1.881006006006006, "grad_norm": 0.9632082129774528, "learning_rate": 3.672755725674825e-06, "loss": 0.3788, "step": 20044 }, { "epoch": 1.88109984984985, "grad_norm": 1.0888441893206338, "learning_rate": 3.6722293549713674e-06, "loss": 0.3817, "step": 20045 }, { "epoch": 1.8811936936936937, "grad_norm": 1.0326213467029692, "learning_rate": 3.671703000099077e-06, "loss": 0.3815, "step": 20046 }, { "epoch": 1.8812875375375375, "grad_norm": 0.9475297570817048, "learning_rate": 3.6711766610642276e-06, "loss": 0.3607, "step": 20047 }, { "epoch": 1.8813813813813813, "grad_norm": 0.8294432014352912, "learning_rate": 3.6706503378730928e-06, "loss": 0.3312, "step": 20048 }, { "epoch": 1.8814752252252251, "grad_norm": 0.8742237541790691, "learning_rate": 3.670124030531953e-06, "loss": 0.4241, "step": 20049 }, { "epoch": 1.881569069069069, "grad_norm": 1.008628688830381, "learning_rate": 3.669597739047081e-06, "loss": 0.3673, "step": 20050 }, { "epoch": 1.8816629129129128, "grad_norm": 0.914615347252947, "learning_rate": 3.6690714634247503e-06, "loss": 0.4079, "step": 20051 }, { "epoch": 1.8817567567567568, "grad_norm": 0.8810561246073345, "learning_rate": 3.6685452036712388e-06, "loss": 0.3954, "step": 20052 }, { "epoch": 1.8818506006006006, "grad_norm": 0.867972097490912, "learning_rate": 3.6680189597928196e-06, "loss": 0.41, "step": 20053 }, { "epoch": 1.8819444444444444, "grad_norm": 1.048251820566163, "learning_rate": 3.667492731795764e-06, "loss": 0.4345, "step": 20054 }, { "epoch": 1.8820382882882885, "grad_norm": 0.9549330551867232, "learning_rate": 3.6669665196863525e-06, "loss": 0.4164, "step": 20055 }, { "epoch": 1.8821321321321323, "grad_norm": 0.9335904547089563, "learning_rate": 3.6664403234708555e-06, "loss": 0.4088, "step": 20056 }, { "epoch": 1.882225975975976, "grad_norm": 0.9694548883646947, "learning_rate": 3.6659141431555467e-06, "loss": 0.4047, "step": 20057 }, { "epoch": 1.8823198198198199, "grad_norm": 0.974024649905426, "learning_rate": 3.6653879787467018e-06, "loss": 0.3771, "step": 20058 }, { "epoch": 1.8824136636636637, "grad_norm": 0.918858287833856, "learning_rate": 3.6648618302505922e-06, "loss": 0.4169, "step": 20059 }, { "epoch": 1.8825075075075075, "grad_norm": 1.212847781087817, "learning_rate": 3.664335697673491e-06, "loss": 0.3877, "step": 20060 }, { "epoch": 1.8826013513513513, "grad_norm": 1.113339009074383, "learning_rate": 3.6638095810216746e-06, "loss": 0.4072, "step": 20061 }, { "epoch": 1.8826951951951951, "grad_norm": 1.0935984505519927, "learning_rate": 3.6632834803014132e-06, "loss": 0.3731, "step": 20062 }, { "epoch": 1.882789039039039, "grad_norm": 1.2463159651723836, "learning_rate": 3.662757395518979e-06, "loss": 0.3903, "step": 20063 }, { "epoch": 1.8828828828828827, "grad_norm": 1.5523606675457966, "learning_rate": 3.6622313266806462e-06, "loss": 0.3571, "step": 20064 }, { "epoch": 1.8829767267267268, "grad_norm": 0.8318847286836447, "learning_rate": 3.6617052737926894e-06, "loss": 0.39, "step": 20065 }, { "epoch": 1.8830705705705706, "grad_norm": 0.8621575358357628, "learning_rate": 3.661179236861374e-06, "loss": 0.366, "step": 20066 }, { "epoch": 1.8831644144144144, "grad_norm": 1.0906525751008815, "learning_rate": 3.660653215892979e-06, "loss": 0.4159, "step": 20067 }, { "epoch": 1.8832582582582582, "grad_norm": 1.088298075176469, "learning_rate": 3.6601272108937736e-06, "loss": 0.3867, "step": 20068 }, { "epoch": 1.8833521021021022, "grad_norm": 1.2038845192236376, "learning_rate": 3.6596012218700262e-06, "loss": 0.3647, "step": 20069 }, { "epoch": 1.883445945945946, "grad_norm": 0.9004796697796638, "learning_rate": 3.6590752488280135e-06, "loss": 0.4354, "step": 20070 }, { "epoch": 1.8835397897897899, "grad_norm": 1.3369434225540837, "learning_rate": 3.658549291774004e-06, "loss": 0.3953, "step": 20071 }, { "epoch": 1.8836336336336337, "grad_norm": 0.8696201751347512, "learning_rate": 3.658023350714269e-06, "loss": 0.3664, "step": 20072 }, { "epoch": 1.8837274774774775, "grad_norm": 0.9312785016284455, "learning_rate": 3.65749742565508e-06, "loss": 0.3918, "step": 20073 }, { "epoch": 1.8838213213213213, "grad_norm": 0.8578786278873163, "learning_rate": 3.6569715166027075e-06, "loss": 0.3726, "step": 20074 }, { "epoch": 1.883915165165165, "grad_norm": 1.0120497944006077, "learning_rate": 3.6564456235634195e-06, "loss": 0.3898, "step": 20075 }, { "epoch": 1.884009009009009, "grad_norm": 0.9285325190292906, "learning_rate": 3.6559197465434905e-06, "loss": 0.4359, "step": 20076 }, { "epoch": 1.8841028528528527, "grad_norm": 0.9790523993493433, "learning_rate": 3.655393885549189e-06, "loss": 0.3835, "step": 20077 }, { "epoch": 1.8841966966966965, "grad_norm": 0.9139267822912115, "learning_rate": 3.654868040586782e-06, "loss": 0.3882, "step": 20078 }, { "epoch": 1.8842905405405406, "grad_norm": 0.8821887798814959, "learning_rate": 3.654342211662544e-06, "loss": 0.3608, "step": 20079 }, { "epoch": 1.8843843843843844, "grad_norm": 0.9570100300777201, "learning_rate": 3.653816398782742e-06, "loss": 0.3974, "step": 20080 }, { "epoch": 1.8844782282282282, "grad_norm": 0.8046081834442509, "learning_rate": 3.653290601953643e-06, "loss": 0.3719, "step": 20081 }, { "epoch": 1.8845720720720722, "grad_norm": 0.9602130651125043, "learning_rate": 3.6527648211815215e-06, "loss": 0.3855, "step": 20082 }, { "epoch": 1.884665915915916, "grad_norm": 0.9122314441915673, "learning_rate": 3.652239056472643e-06, "loss": 0.3899, "step": 20083 }, { "epoch": 1.8847597597597598, "grad_norm": 1.0538095337655002, "learning_rate": 3.6517133078332757e-06, "loss": 0.4057, "step": 20084 }, { "epoch": 1.8848536036036037, "grad_norm": 0.8548054465595791, "learning_rate": 3.651187575269691e-06, "loss": 0.3794, "step": 20085 }, { "epoch": 1.8849474474474475, "grad_norm": 0.8825384700776633, "learning_rate": 3.650661858788155e-06, "loss": 0.3973, "step": 20086 }, { "epoch": 1.8850412912912913, "grad_norm": 0.9599357068685274, "learning_rate": 3.6501361583949347e-06, "loss": 0.4111, "step": 20087 }, { "epoch": 1.885135135135135, "grad_norm": 0.9309370284435602, "learning_rate": 3.649610474096301e-06, "loss": 0.3811, "step": 20088 }, { "epoch": 1.885228978978979, "grad_norm": 0.8225680786034437, "learning_rate": 3.6490848058985217e-06, "loss": 0.3896, "step": 20089 }, { "epoch": 1.8853228228228227, "grad_norm": 1.0220139057291713, "learning_rate": 3.648559153807862e-06, "loss": 0.398, "step": 20090 }, { "epoch": 1.8854166666666665, "grad_norm": 1.0651442574592451, "learning_rate": 3.6480335178305916e-06, "loss": 0.3991, "step": 20091 }, { "epoch": 1.8855105105105106, "grad_norm": 0.810882136275163, "learning_rate": 3.647507897972976e-06, "loss": 0.3727, "step": 20092 }, { "epoch": 1.8856043543543544, "grad_norm": 0.9593849335002759, "learning_rate": 3.6469822942412815e-06, "loss": 0.4248, "step": 20093 }, { "epoch": 1.8856981981981982, "grad_norm": 1.6602744570105468, "learning_rate": 3.6464567066417787e-06, "loss": 0.431, "step": 20094 }, { "epoch": 1.8857920420420422, "grad_norm": 0.9204903465581115, "learning_rate": 3.6459311351807315e-06, "loss": 0.3534, "step": 20095 }, { "epoch": 1.885885885885886, "grad_norm": 0.9945288123956882, "learning_rate": 3.6454055798644055e-06, "loss": 0.3996, "step": 20096 }, { "epoch": 1.8859797297297298, "grad_norm": 1.0706437878241992, "learning_rate": 3.644880040699069e-06, "loss": 0.3915, "step": 20097 }, { "epoch": 1.8860735735735736, "grad_norm": 1.0940235040334552, "learning_rate": 3.6443545176909877e-06, "loss": 0.327, "step": 20098 }, { "epoch": 1.8861674174174174, "grad_norm": 0.9779132074490968, "learning_rate": 3.6438290108464258e-06, "loss": 0.3652, "step": 20099 }, { "epoch": 1.8862612612612613, "grad_norm": 0.8961897687852285, "learning_rate": 3.643303520171651e-06, "loss": 0.364, "step": 20100 }, { "epoch": 1.886355105105105, "grad_norm": 1.4574498037518002, "learning_rate": 3.642778045672929e-06, "loss": 0.3821, "step": 20101 }, { "epoch": 1.8864489489489489, "grad_norm": 0.8918726504508699, "learning_rate": 3.6422525873565216e-06, "loss": 0.4077, "step": 20102 }, { "epoch": 1.8865427927927927, "grad_norm": 0.9810666336862005, "learning_rate": 3.6417271452286984e-06, "loss": 0.3968, "step": 20103 }, { "epoch": 1.8866366366366365, "grad_norm": 1.038407152844537, "learning_rate": 3.641201719295723e-06, "loss": 0.4113, "step": 20104 }, { "epoch": 1.8867304804804805, "grad_norm": 1.3218331105034542, "learning_rate": 3.6406763095638564e-06, "loss": 0.341, "step": 20105 }, { "epoch": 1.8868243243243243, "grad_norm": 0.8985898672209817, "learning_rate": 3.640150916039368e-06, "loss": 0.3859, "step": 20106 }, { "epoch": 1.8869181681681682, "grad_norm": 0.8853985312422313, "learning_rate": 3.6396255387285207e-06, "loss": 0.4128, "step": 20107 }, { "epoch": 1.887012012012012, "grad_norm": 0.8970253326683584, "learning_rate": 3.639100177637578e-06, "loss": 0.3992, "step": 20108 }, { "epoch": 1.887105855855856, "grad_norm": 1.1168650211580415, "learning_rate": 3.6385748327728036e-06, "loss": 0.3958, "step": 20109 }, { "epoch": 1.8871996996996998, "grad_norm": 0.9573630569586791, "learning_rate": 3.6380495041404628e-06, "loss": 0.3494, "step": 20110 }, { "epoch": 1.8872935435435436, "grad_norm": 1.1504042881126513, "learning_rate": 3.637524191746815e-06, "loss": 0.4128, "step": 20111 }, { "epoch": 1.8873873873873874, "grad_norm": 0.852119118543535, "learning_rate": 3.6369988955981293e-06, "loss": 0.3721, "step": 20112 }, { "epoch": 1.8874812312312312, "grad_norm": 0.8609472990286378, "learning_rate": 3.6364736157006664e-06, "loss": 0.4108, "step": 20113 }, { "epoch": 1.887575075075075, "grad_norm": 0.9229483107670895, "learning_rate": 3.6359483520606876e-06, "loss": 0.4025, "step": 20114 }, { "epoch": 1.8876689189189189, "grad_norm": 0.8415276822058756, "learning_rate": 3.635423104684458e-06, "loss": 0.372, "step": 20115 }, { "epoch": 1.8877627627627627, "grad_norm": 1.112268537150977, "learning_rate": 3.63489787357824e-06, "loss": 0.4452, "step": 20116 }, { "epoch": 1.8878566066066065, "grad_norm": 0.9087847586024028, "learning_rate": 3.634372658748293e-06, "loss": 0.4351, "step": 20117 }, { "epoch": 1.8879504504504503, "grad_norm": 1.0966883935248781, "learning_rate": 3.6338474602008832e-06, "loss": 0.3804, "step": 20118 }, { "epoch": 1.8880442942942943, "grad_norm": 0.9325678952235852, "learning_rate": 3.633322277942271e-06, "loss": 0.4278, "step": 20119 }, { "epoch": 1.8881381381381381, "grad_norm": 0.8003552072205503, "learning_rate": 3.6327971119787177e-06, "loss": 0.3182, "step": 20120 }, { "epoch": 1.888231981981982, "grad_norm": 0.9600171066986524, "learning_rate": 3.632271962316486e-06, "loss": 0.408, "step": 20121 }, { "epoch": 1.888325825825826, "grad_norm": 1.0149907310967317, "learning_rate": 3.6317468289618363e-06, "loss": 0.4148, "step": 20122 }, { "epoch": 1.8884196696696698, "grad_norm": 1.0152351193647313, "learning_rate": 3.6312217119210286e-06, "loss": 0.3852, "step": 20123 }, { "epoch": 1.8885135135135136, "grad_norm": 0.9337551609791241, "learning_rate": 3.6306966112003274e-06, "loss": 0.3986, "step": 20124 }, { "epoch": 1.8886073573573574, "grad_norm": 1.007920047775304, "learning_rate": 3.630171526805992e-06, "loss": 0.4088, "step": 20125 }, { "epoch": 1.8887012012012012, "grad_norm": 0.8626481756988654, "learning_rate": 3.6296464587442816e-06, "loss": 0.4245, "step": 20126 }, { "epoch": 1.888795045045045, "grad_norm": 1.1105567653924313, "learning_rate": 3.6291214070214583e-06, "loss": 0.3974, "step": 20127 }, { "epoch": 1.8888888888888888, "grad_norm": 1.0020455174735459, "learning_rate": 3.6285963716437822e-06, "loss": 0.4159, "step": 20128 }, { "epoch": 1.8889827327327327, "grad_norm": 1.0954793747505451, "learning_rate": 3.6280713526175106e-06, "loss": 0.4118, "step": 20129 }, { "epoch": 1.8890765765765765, "grad_norm": 0.8893131926115022, "learning_rate": 3.6275463499489076e-06, "loss": 0.4037, "step": 20130 }, { "epoch": 1.8891704204204203, "grad_norm": 0.8885467261853471, "learning_rate": 3.6270213636442315e-06, "loss": 0.4014, "step": 20131 }, { "epoch": 1.8892642642642643, "grad_norm": 0.8316045932736942, "learning_rate": 3.6264963937097398e-06, "loss": 0.377, "step": 20132 }, { "epoch": 1.8893581081081081, "grad_norm": 0.7794776189963757, "learning_rate": 3.625971440151695e-06, "loss": 0.3753, "step": 20133 }, { "epoch": 1.889451951951952, "grad_norm": 0.9644422188952905, "learning_rate": 3.625446502976354e-06, "loss": 0.3796, "step": 20134 }, { "epoch": 1.889545795795796, "grad_norm": 0.9283575983181558, "learning_rate": 3.624921582189974e-06, "loss": 0.4467, "step": 20135 }, { "epoch": 1.8896396396396398, "grad_norm": 0.9793784451271977, "learning_rate": 3.6243966777988182e-06, "loss": 0.3883, "step": 20136 }, { "epoch": 1.8897334834834836, "grad_norm": 0.7825450061492565, "learning_rate": 3.6238717898091424e-06, "loss": 0.3289, "step": 20137 }, { "epoch": 1.8898273273273274, "grad_norm": 0.8698702239461086, "learning_rate": 3.623346918227204e-06, "loss": 0.3851, "step": 20138 }, { "epoch": 1.8899211711711712, "grad_norm": 0.9368771498328923, "learning_rate": 3.6228220630592635e-06, "loss": 0.4029, "step": 20139 }, { "epoch": 1.890015015015015, "grad_norm": 1.2216144450653623, "learning_rate": 3.622297224311578e-06, "loss": 0.4017, "step": 20140 }, { "epoch": 1.8901088588588588, "grad_norm": 0.9272949664574213, "learning_rate": 3.621772401990402e-06, "loss": 0.4072, "step": 20141 }, { "epoch": 1.8902027027027026, "grad_norm": 0.8293122107912692, "learning_rate": 3.621247596101999e-06, "loss": 0.394, "step": 20142 }, { "epoch": 1.8902965465465464, "grad_norm": 0.9010456259134911, "learning_rate": 3.620722806652622e-06, "loss": 0.3556, "step": 20143 }, { "epoch": 1.8903903903903903, "grad_norm": 1.2986876812675092, "learning_rate": 3.620198033648529e-06, "loss": 0.397, "step": 20144 }, { "epoch": 1.8904842342342343, "grad_norm": 1.1443508171728094, "learning_rate": 3.6196732770959788e-06, "loss": 0.3297, "step": 20145 }, { "epoch": 1.890578078078078, "grad_norm": 1.1895260466351367, "learning_rate": 3.6191485370012256e-06, "loss": 0.4165, "step": 20146 }, { "epoch": 1.890671921921922, "grad_norm": 0.8709604923478187, "learning_rate": 3.6186238133705255e-06, "loss": 0.3985, "step": 20147 }, { "epoch": 1.8907657657657657, "grad_norm": 0.9833926886783716, "learning_rate": 3.618099106210138e-06, "loss": 0.4073, "step": 20148 }, { "epoch": 1.8908596096096097, "grad_norm": 0.9606790640176313, "learning_rate": 3.617574415526317e-06, "loss": 0.4407, "step": 20149 }, { "epoch": 1.8909534534534536, "grad_norm": 1.111850375315251, "learning_rate": 3.6170497413253184e-06, "loss": 0.4044, "step": 20150 }, { "epoch": 1.8910472972972974, "grad_norm": 0.9402481521538987, "learning_rate": 3.6165250836133997e-06, "loss": 0.4076, "step": 20151 }, { "epoch": 1.8911411411411412, "grad_norm": 1.8798238344469862, "learning_rate": 3.6160004423968153e-06, "loss": 0.3857, "step": 20152 }, { "epoch": 1.891234984984985, "grad_norm": 0.9600149412843115, "learning_rate": 3.6154758176818185e-06, "loss": 0.4175, "step": 20153 }, { "epoch": 1.8913288288288288, "grad_norm": 0.8878585491915545, "learning_rate": 3.6149512094746676e-06, "loss": 0.4345, "step": 20154 }, { "epoch": 1.8914226726726726, "grad_norm": 0.9848489116443718, "learning_rate": 3.6144266177816174e-06, "loss": 0.4001, "step": 20155 }, { "epoch": 1.8915165165165164, "grad_norm": 0.9189812393417851, "learning_rate": 3.61390204260892e-06, "loss": 0.4279, "step": 20156 }, { "epoch": 1.8916103603603602, "grad_norm": 1.1964249908820723, "learning_rate": 3.613377483962833e-06, "loss": 0.3959, "step": 20157 }, { "epoch": 1.891704204204204, "grad_norm": 0.8471151795267117, "learning_rate": 3.61285294184961e-06, "loss": 0.4048, "step": 20158 }, { "epoch": 1.891798048048048, "grad_norm": 0.8490898884696805, "learning_rate": 3.6123284162755024e-06, "loss": 0.3595, "step": 20159 }, { "epoch": 1.8918918918918919, "grad_norm": 1.0018230820423586, "learning_rate": 3.611803907246768e-06, "loss": 0.4093, "step": 20160 }, { "epoch": 1.8919857357357357, "grad_norm": 1.2449959187186137, "learning_rate": 3.6112794147696587e-06, "loss": 0.3879, "step": 20161 }, { "epoch": 1.8920795795795797, "grad_norm": 0.9405327602965244, "learning_rate": 3.6107549388504283e-06, "loss": 0.3827, "step": 20162 }, { "epoch": 1.8921734234234235, "grad_norm": 0.8563946162996225, "learning_rate": 3.6102304794953308e-06, "loss": 0.3565, "step": 20163 }, { "epoch": 1.8922672672672673, "grad_norm": 1.0036960082842634, "learning_rate": 3.6097060367106185e-06, "loss": 0.367, "step": 20164 }, { "epoch": 1.8923611111111112, "grad_norm": 1.0181795767115784, "learning_rate": 3.6091816105025435e-06, "loss": 0.3812, "step": 20165 }, { "epoch": 1.892454954954955, "grad_norm": 0.926685378227033, "learning_rate": 3.6086572008773614e-06, "loss": 0.4173, "step": 20166 }, { "epoch": 1.8925487987987988, "grad_norm": 1.0138266421261277, "learning_rate": 3.6081328078413234e-06, "loss": 0.4179, "step": 20167 }, { "epoch": 1.8926426426426426, "grad_norm": 1.0254813806370926, "learning_rate": 3.607608431400681e-06, "loss": 0.4024, "step": 20168 }, { "epoch": 1.8927364864864864, "grad_norm": 0.9111524826838358, "learning_rate": 3.607084071561687e-06, "loss": 0.3995, "step": 20169 }, { "epoch": 1.8928303303303302, "grad_norm": 0.9646270713254836, "learning_rate": 3.606559728330595e-06, "loss": 0.3947, "step": 20170 }, { "epoch": 1.892924174174174, "grad_norm": 0.966170758884886, "learning_rate": 3.6060354017136522e-06, "loss": 0.3845, "step": 20171 }, { "epoch": 1.893018018018018, "grad_norm": 0.8285605587496698, "learning_rate": 3.6055110917171164e-06, "loss": 0.3278, "step": 20172 }, { "epoch": 1.8931118618618619, "grad_norm": 0.9167564041814918, "learning_rate": 3.604986798347235e-06, "loss": 0.3549, "step": 20173 }, { "epoch": 1.8932057057057057, "grad_norm": 0.8913489710783947, "learning_rate": 3.6044625216102604e-06, "loss": 0.4216, "step": 20174 }, { "epoch": 1.8932995495495497, "grad_norm": 1.0141358754584955, "learning_rate": 3.6039382615124428e-06, "loss": 0.4036, "step": 20175 }, { "epoch": 1.8933933933933935, "grad_norm": 0.8630911766556612, "learning_rate": 3.603414018060035e-06, "loss": 0.3759, "step": 20176 }, { "epoch": 1.8934872372372373, "grad_norm": 5.3429778098441485, "learning_rate": 3.6028897912592837e-06, "loss": 0.426, "step": 20177 }, { "epoch": 1.8935810810810811, "grad_norm": 1.1940711885500994, "learning_rate": 3.6023655811164437e-06, "loss": 0.4355, "step": 20178 }, { "epoch": 1.893674924924925, "grad_norm": 0.8982283899541676, "learning_rate": 3.601841387637764e-06, "loss": 0.376, "step": 20179 }, { "epoch": 1.8937687687687688, "grad_norm": 1.1751229567055193, "learning_rate": 3.6013172108294923e-06, "loss": 0.4227, "step": 20180 }, { "epoch": 1.8938626126126126, "grad_norm": 0.961893157159734, "learning_rate": 3.6007930506978818e-06, "loss": 0.3858, "step": 20181 }, { "epoch": 1.8939564564564564, "grad_norm": 0.9504224468575115, "learning_rate": 3.6002689072491793e-06, "loss": 0.3771, "step": 20182 }, { "epoch": 1.8940503003003002, "grad_norm": 0.8863237446293474, "learning_rate": 3.5997447804896344e-06, "loss": 0.4145, "step": 20183 }, { "epoch": 1.894144144144144, "grad_norm": 0.9442340195114961, "learning_rate": 3.5992206704254985e-06, "loss": 0.3678, "step": 20184 }, { "epoch": 1.894237987987988, "grad_norm": 0.8568297841471965, "learning_rate": 3.59869657706302e-06, "loss": 0.3666, "step": 20185 }, { "epoch": 1.8943318318318318, "grad_norm": 1.674956974941655, "learning_rate": 3.598172500408445e-06, "loss": 0.3239, "step": 20186 }, { "epoch": 1.8944256756756757, "grad_norm": 0.9451106684943791, "learning_rate": 3.5976484404680272e-06, "loss": 0.3936, "step": 20187 }, { "epoch": 1.8945195195195195, "grad_norm": 1.1079386787857568, "learning_rate": 3.5971243972480114e-06, "loss": 0.4333, "step": 20188 }, { "epoch": 1.8946133633633635, "grad_norm": 0.9273870592213247, "learning_rate": 3.5966003707546435e-06, "loss": 0.3923, "step": 20189 }, { "epoch": 1.8947072072072073, "grad_norm": 1.1749701406892346, "learning_rate": 3.596076360994178e-06, "loss": 0.4014, "step": 20190 }, { "epoch": 1.8948010510510511, "grad_norm": 1.0233077388775367, "learning_rate": 3.595552367972859e-06, "loss": 0.4247, "step": 20191 }, { "epoch": 1.894894894894895, "grad_norm": 1.1400456052205228, "learning_rate": 3.5950283916969316e-06, "loss": 0.4195, "step": 20192 }, { "epoch": 1.8949887387387387, "grad_norm": 0.8497637696435856, "learning_rate": 3.5945044321726487e-06, "loss": 0.3274, "step": 20193 }, { "epoch": 1.8950825825825826, "grad_norm": 0.8847083252122796, "learning_rate": 3.593980489406254e-06, "loss": 0.389, "step": 20194 }, { "epoch": 1.8951764264264264, "grad_norm": 1.2082077553731105, "learning_rate": 3.5934565634039954e-06, "loss": 0.3995, "step": 20195 }, { "epoch": 1.8952702702702702, "grad_norm": 0.9053131070403189, "learning_rate": 3.59293265417212e-06, "loss": 0.3626, "step": 20196 }, { "epoch": 1.895364114114114, "grad_norm": 0.8785466710671371, "learning_rate": 3.5924087617168745e-06, "loss": 0.3733, "step": 20197 }, { "epoch": 1.8954579579579578, "grad_norm": 0.9748716181480667, "learning_rate": 3.5918848860445034e-06, "loss": 0.377, "step": 20198 }, { "epoch": 1.8955518018018018, "grad_norm": 0.871875959490933, "learning_rate": 3.5913610271612554e-06, "loss": 0.3684, "step": 20199 }, { "epoch": 1.8956456456456456, "grad_norm": 1.4771194897707272, "learning_rate": 3.590837185073377e-06, "loss": 0.4069, "step": 20200 }, { "epoch": 1.8957394894894894, "grad_norm": 0.990319040941747, "learning_rate": 3.590313359787111e-06, "loss": 0.3839, "step": 20201 }, { "epoch": 1.8958333333333335, "grad_norm": 0.9146328921083124, "learning_rate": 3.5897895513087056e-06, "loss": 0.3824, "step": 20202 }, { "epoch": 1.8959271771771773, "grad_norm": 1.0068052500106601, "learning_rate": 3.589265759644406e-06, "loss": 0.3361, "step": 20203 }, { "epoch": 1.896021021021021, "grad_norm": 0.9253566930560942, "learning_rate": 3.588741984800454e-06, "loss": 0.3916, "step": 20204 }, { "epoch": 1.896114864864865, "grad_norm": 0.9731952417836429, "learning_rate": 3.5882182267831002e-06, "loss": 0.4432, "step": 20205 }, { "epoch": 1.8962087087087087, "grad_norm": 0.8573000425964097, "learning_rate": 3.5876944855985857e-06, "loss": 0.3981, "step": 20206 }, { "epoch": 1.8963025525525525, "grad_norm": 1.0185088185083133, "learning_rate": 3.587170761253156e-06, "loss": 0.4202, "step": 20207 }, { "epoch": 1.8963963963963963, "grad_norm": 0.9767256483362193, "learning_rate": 3.5866470537530555e-06, "loss": 0.4212, "step": 20208 }, { "epoch": 1.8964902402402402, "grad_norm": 1.1351646986170028, "learning_rate": 3.5861233631045294e-06, "loss": 0.4009, "step": 20209 }, { "epoch": 1.896584084084084, "grad_norm": 0.8346239906567561, "learning_rate": 3.5855996893138186e-06, "loss": 0.4228, "step": 20210 }, { "epoch": 1.8966779279279278, "grad_norm": 0.9930458694345866, "learning_rate": 3.5850760323871712e-06, "loss": 0.3944, "step": 20211 }, { "epoch": 1.8967717717717718, "grad_norm": 1.0166970393791899, "learning_rate": 3.584552392330829e-06, "loss": 0.394, "step": 20212 }, { "epoch": 1.8968656156156156, "grad_norm": 1.0033415033259239, "learning_rate": 3.5840287691510336e-06, "loss": 0.3723, "step": 20213 }, { "epoch": 1.8969594594594594, "grad_norm": 13.869844432846907, "learning_rate": 3.583505162854031e-06, "loss": 0.348, "step": 20214 }, { "epoch": 1.8970533033033035, "grad_norm": 1.121101048396543, "learning_rate": 3.582981573446064e-06, "loss": 0.4243, "step": 20215 }, { "epoch": 1.8971471471471473, "grad_norm": 1.0121584789499958, "learning_rate": 3.5824580009333716e-06, "loss": 0.4122, "step": 20216 }, { "epoch": 1.897240990990991, "grad_norm": 1.2310516158803528, "learning_rate": 3.5819344453222015e-06, "loss": 0.4091, "step": 20217 }, { "epoch": 1.897334834834835, "grad_norm": 1.0356935217793353, "learning_rate": 3.5814109066187937e-06, "loss": 0.3649, "step": 20218 }, { "epoch": 1.8974286786786787, "grad_norm": 0.978633139920969, "learning_rate": 3.5808873848293895e-06, "loss": 0.4044, "step": 20219 }, { "epoch": 1.8975225225225225, "grad_norm": 0.9733129941363302, "learning_rate": 3.5803638799602334e-06, "loss": 0.4034, "step": 20220 }, { "epoch": 1.8976163663663663, "grad_norm": 1.0117292429340317, "learning_rate": 3.579840392017566e-06, "loss": 0.4283, "step": 20221 }, { "epoch": 1.8977102102102101, "grad_norm": 0.8944069374258224, "learning_rate": 3.5793169210076263e-06, "loss": 0.3834, "step": 20222 }, { "epoch": 1.897804054054054, "grad_norm": 0.848402332930589, "learning_rate": 3.57879346693666e-06, "loss": 0.358, "step": 20223 }, { "epoch": 1.8978978978978978, "grad_norm": 1.0596716236355193, "learning_rate": 3.5782700298109075e-06, "loss": 0.3808, "step": 20224 }, { "epoch": 1.8979917417417418, "grad_norm": 0.8815954964355037, "learning_rate": 3.5777466096366063e-06, "loss": 0.3665, "step": 20225 }, { "epoch": 1.8980855855855856, "grad_norm": 1.1172883800441256, "learning_rate": 3.577223206420002e-06, "loss": 0.4113, "step": 20226 }, { "epoch": 1.8981794294294294, "grad_norm": 0.8847941457759474, "learning_rate": 3.576699820167332e-06, "loss": 0.4527, "step": 20227 }, { "epoch": 1.8982732732732732, "grad_norm": 1.0709500754391987, "learning_rate": 3.576176450884836e-06, "loss": 0.3913, "step": 20228 }, { "epoch": 1.8983671171171173, "grad_norm": 0.9936433838596492, "learning_rate": 3.5756530985787575e-06, "loss": 0.4344, "step": 20229 }, { "epoch": 1.898460960960961, "grad_norm": 1.3302656256524956, "learning_rate": 3.575129763255335e-06, "loss": 0.3907, "step": 20230 }, { "epoch": 1.8985548048048049, "grad_norm": 1.6411977643640228, "learning_rate": 3.5746064449208075e-06, "loss": 0.3764, "step": 20231 }, { "epoch": 1.8986486486486487, "grad_norm": 0.9768254320009894, "learning_rate": 3.574083143581415e-06, "loss": 0.4127, "step": 20232 }, { "epoch": 1.8987424924924925, "grad_norm": 0.9608252886029687, "learning_rate": 3.573559859243397e-06, "loss": 0.4275, "step": 20233 }, { "epoch": 1.8988363363363363, "grad_norm": 1.0797323781004464, "learning_rate": 3.5730365919129916e-06, "loss": 0.4205, "step": 20234 }, { "epoch": 1.8989301801801801, "grad_norm": 0.7749322596984549, "learning_rate": 3.572513341596441e-06, "loss": 0.343, "step": 20235 }, { "epoch": 1.899024024024024, "grad_norm": 1.190207335273873, "learning_rate": 3.571990108299981e-06, "loss": 0.4318, "step": 20236 }, { "epoch": 1.8991178678678677, "grad_norm": 0.877851335489638, "learning_rate": 3.57146689202985e-06, "loss": 0.3731, "step": 20237 }, { "epoch": 1.8992117117117115, "grad_norm": 2.0560442410408073, "learning_rate": 3.5709436927922893e-06, "loss": 0.4204, "step": 20238 }, { "epoch": 1.8993055555555556, "grad_norm": 1.0099714724963595, "learning_rate": 3.570420510593534e-06, "loss": 0.3617, "step": 20239 }, { "epoch": 1.8993993993993994, "grad_norm": 1.061765669893869, "learning_rate": 3.5698973454398218e-06, "loss": 0.3925, "step": 20240 }, { "epoch": 1.8994932432432432, "grad_norm": 0.9455308093104474, "learning_rate": 3.5693741973373935e-06, "loss": 0.3895, "step": 20241 }, { "epoch": 1.8995870870870872, "grad_norm": 1.0255183985982101, "learning_rate": 3.568851066292486e-06, "loss": 0.3901, "step": 20242 }, { "epoch": 1.899680930930931, "grad_norm": 1.0230189084333363, "learning_rate": 3.5683279523113333e-06, "loss": 0.4184, "step": 20243 }, { "epoch": 1.8997747747747749, "grad_norm": 1.0030682137356466, "learning_rate": 3.5678048554001764e-06, "loss": 0.3624, "step": 20244 }, { "epoch": 1.8998686186186187, "grad_norm": 1.0330040281900457, "learning_rate": 3.567281775565251e-06, "loss": 0.3674, "step": 20245 }, { "epoch": 1.8999624624624625, "grad_norm": 5.2919699790257, "learning_rate": 3.5667587128127917e-06, "loss": 0.3806, "step": 20246 }, { "epoch": 1.9000563063063063, "grad_norm": 0.9721135296383803, "learning_rate": 3.566235667149039e-06, "loss": 0.4, "step": 20247 }, { "epoch": 1.90015015015015, "grad_norm": 0.9794932344225534, "learning_rate": 3.5657126385802264e-06, "loss": 0.3776, "step": 20248 }, { "epoch": 1.900243993993994, "grad_norm": 1.0547611102590813, "learning_rate": 3.565189627112591e-06, "loss": 0.3539, "step": 20249 }, { "epoch": 1.9003378378378377, "grad_norm": 0.963539402469363, "learning_rate": 3.564666632752369e-06, "loss": 0.4257, "step": 20250 }, { "epoch": 1.9004316816816815, "grad_norm": 0.9234133181579215, "learning_rate": 3.5641436555057955e-06, "loss": 0.4035, "step": 20251 }, { "epoch": 1.9005255255255256, "grad_norm": 0.9783950040535669, "learning_rate": 3.5636206953791043e-06, "loss": 0.3457, "step": 20252 }, { "epoch": 1.9006193693693694, "grad_norm": 0.9520499157268952, "learning_rate": 3.563097752378535e-06, "loss": 0.4173, "step": 20253 }, { "epoch": 1.9007132132132132, "grad_norm": 0.9026856393045797, "learning_rate": 3.56257482651032e-06, "loss": 0.3909, "step": 20254 }, { "epoch": 1.9008070570570572, "grad_norm": 0.9181758508082145, "learning_rate": 3.5620519177806935e-06, "loss": 0.4245, "step": 20255 }, { "epoch": 1.900900900900901, "grad_norm": 0.8473240965646844, "learning_rate": 3.5615290261958918e-06, "loss": 0.3701, "step": 20256 }, { "epoch": 1.9009947447447448, "grad_norm": 0.8348381699041083, "learning_rate": 3.561006151762149e-06, "loss": 0.3598, "step": 20257 }, { "epoch": 1.9010885885885886, "grad_norm": 1.1932849050013072, "learning_rate": 3.560483294485697e-06, "loss": 0.3717, "step": 20258 }, { "epoch": 1.9011824324324325, "grad_norm": 0.8745748494353912, "learning_rate": 3.559960454372775e-06, "loss": 0.3829, "step": 20259 }, { "epoch": 1.9012762762762763, "grad_norm": 2.577688533636731, "learning_rate": 3.559437631429614e-06, "loss": 0.3707, "step": 20260 }, { "epoch": 1.90137012012012, "grad_norm": 1.0977819768420398, "learning_rate": 3.5589148256624456e-06, "loss": 0.3802, "step": 20261 }, { "epoch": 1.9014639639639639, "grad_norm": 0.975293656039636, "learning_rate": 3.5583920370775072e-06, "loss": 0.3725, "step": 20262 }, { "epoch": 1.9015578078078077, "grad_norm": 0.8616143041669805, "learning_rate": 3.55786926568103e-06, "loss": 0.392, "step": 20263 }, { "epoch": 1.9016516516516515, "grad_norm": 0.9081156561813727, "learning_rate": 3.557346511479245e-06, "loss": 0.412, "step": 20264 }, { "epoch": 1.9017454954954955, "grad_norm": 0.865768886748504, "learning_rate": 3.5568237744783894e-06, "loss": 0.3687, "step": 20265 }, { "epoch": 1.9018393393393394, "grad_norm": 2.0858413377662663, "learning_rate": 3.556301054684694e-06, "loss": 0.3297, "step": 20266 }, { "epoch": 1.9019331831831832, "grad_norm": 0.9667358295994619, "learning_rate": 3.5557783521043898e-06, "loss": 0.398, "step": 20267 }, { "epoch": 1.902027027027027, "grad_norm": 0.9748751214444634, "learning_rate": 3.555255666743711e-06, "loss": 0.3591, "step": 20268 }, { "epoch": 1.902120870870871, "grad_norm": 0.9159707908131551, "learning_rate": 3.5547329986088897e-06, "loss": 0.3904, "step": 20269 }, { "epoch": 1.9022147147147148, "grad_norm": 2.5660982551710196, "learning_rate": 3.5542103477061546e-06, "loss": 0.3785, "step": 20270 }, { "epoch": 1.9023085585585586, "grad_norm": 0.9428779872292761, "learning_rate": 3.5536877140417414e-06, "loss": 0.386, "step": 20271 }, { "epoch": 1.9024024024024024, "grad_norm": 1.3611120884064807, "learning_rate": 3.55316509762188e-06, "loss": 0.3666, "step": 20272 }, { "epoch": 1.9024962462462462, "grad_norm": 0.9527942835748162, "learning_rate": 3.552642498452801e-06, "loss": 0.3782, "step": 20273 }, { "epoch": 1.90259009009009, "grad_norm": 0.9324600182292081, "learning_rate": 3.552119916540736e-06, "loss": 0.4173, "step": 20274 }, { "epoch": 1.9026839339339339, "grad_norm": 1.0639188844794512, "learning_rate": 3.5515973518919166e-06, "loss": 0.4268, "step": 20275 }, { "epoch": 1.9027777777777777, "grad_norm": 0.9466781763767113, "learning_rate": 3.55107480451257e-06, "loss": 0.3826, "step": 20276 }, { "epoch": 1.9028716216216215, "grad_norm": 1.0445772404834262, "learning_rate": 3.5505522744089304e-06, "loss": 0.4034, "step": 20277 }, { "epoch": 1.9029654654654653, "grad_norm": 0.8861737567730811, "learning_rate": 3.550029761587227e-06, "loss": 0.4191, "step": 20278 }, { "epoch": 1.9030593093093093, "grad_norm": 0.989378769384193, "learning_rate": 3.5495072660536887e-06, "loss": 0.4212, "step": 20279 }, { "epoch": 1.9031531531531531, "grad_norm": 0.9423974445249819, "learning_rate": 3.5489847878145465e-06, "loss": 0.4079, "step": 20280 }, { "epoch": 1.903246996996997, "grad_norm": 1.0430424685191813, "learning_rate": 3.5484623268760298e-06, "loss": 0.3969, "step": 20281 }, { "epoch": 1.903340840840841, "grad_norm": 0.8806956693980784, "learning_rate": 3.5479398832443655e-06, "loss": 0.3649, "step": 20282 }, { "epoch": 1.9034346846846848, "grad_norm": 0.9643652368600993, "learning_rate": 3.547417456925787e-06, "loss": 0.3682, "step": 20283 }, { "epoch": 1.9035285285285286, "grad_norm": 0.8502335143903201, "learning_rate": 3.5468950479265213e-06, "loss": 0.3981, "step": 20284 }, { "epoch": 1.9036223723723724, "grad_norm": 1.017139836168815, "learning_rate": 3.5463726562527957e-06, "loss": 0.4116, "step": 20285 }, { "epoch": 1.9037162162162162, "grad_norm": 0.8979668379991074, "learning_rate": 3.545850281910841e-06, "loss": 0.3872, "step": 20286 }, { "epoch": 1.90381006006006, "grad_norm": 0.9951145301306719, "learning_rate": 3.5453279249068845e-06, "loss": 0.421, "step": 20287 }, { "epoch": 1.9039039039039038, "grad_norm": 0.8969988056648258, "learning_rate": 3.544805585247152e-06, "loss": 0.3571, "step": 20288 }, { "epoch": 1.9039977477477477, "grad_norm": 1.000667915675977, "learning_rate": 3.5442832629378765e-06, "loss": 0.3894, "step": 20289 }, { "epoch": 1.9040915915915915, "grad_norm": 0.7963523170024642, "learning_rate": 3.5437609579852827e-06, "loss": 0.3604, "step": 20290 }, { "epoch": 1.9041854354354353, "grad_norm": 1.2743895577699866, "learning_rate": 3.5432386703955975e-06, "loss": 0.3841, "step": 20291 }, { "epoch": 1.9042792792792793, "grad_norm": 0.899511159828413, "learning_rate": 3.5427164001750497e-06, "loss": 0.3973, "step": 20292 }, { "epoch": 1.9043731231231231, "grad_norm": 0.8911178749366624, "learning_rate": 3.542194147329866e-06, "loss": 0.3813, "step": 20293 }, { "epoch": 1.904466966966967, "grad_norm": 0.9555889331009482, "learning_rate": 3.5416719118662714e-06, "loss": 0.4239, "step": 20294 }, { "epoch": 1.904560810810811, "grad_norm": 1.0626094557928096, "learning_rate": 3.5411496937904956e-06, "loss": 0.4127, "step": 20295 }, { "epoch": 1.9046546546546548, "grad_norm": 0.9149117846878198, "learning_rate": 3.540627493108764e-06, "loss": 0.3657, "step": 20296 }, { "epoch": 1.9047484984984986, "grad_norm": 0.8856642545359993, "learning_rate": 3.540105309827301e-06, "loss": 0.3953, "step": 20297 }, { "epoch": 1.9048423423423424, "grad_norm": 0.8972277606924013, "learning_rate": 3.5395831439523364e-06, "loss": 0.4263, "step": 20298 }, { "epoch": 1.9049361861861862, "grad_norm": 0.8656796012293421, "learning_rate": 3.5390609954900935e-06, "loss": 0.3509, "step": 20299 }, { "epoch": 1.90503003003003, "grad_norm": 0.9813283937307077, "learning_rate": 3.5385388644467955e-06, "loss": 0.3704, "step": 20300 }, { "epoch": 1.9051238738738738, "grad_norm": 0.9706617536601789, "learning_rate": 3.5380167508286733e-06, "loss": 0.4299, "step": 20301 }, { "epoch": 1.9052177177177176, "grad_norm": 1.1712901570865732, "learning_rate": 3.5374946546419492e-06, "loss": 0.435, "step": 20302 }, { "epoch": 1.9053115615615615, "grad_norm": 1.037019232555636, "learning_rate": 3.5369725758928476e-06, "loss": 0.3823, "step": 20303 }, { "epoch": 1.9054054054054053, "grad_norm": 0.9173434234928403, "learning_rate": 3.536450514587595e-06, "loss": 0.3635, "step": 20304 }, { "epoch": 1.9054992492492493, "grad_norm": 1.3005465878233802, "learning_rate": 3.535928470732415e-06, "loss": 0.3956, "step": 20305 }, { "epoch": 1.905593093093093, "grad_norm": 0.8876919674678857, "learning_rate": 3.535406444333531e-06, "loss": 0.399, "step": 20306 }, { "epoch": 1.905686936936937, "grad_norm": 0.9407161029034445, "learning_rate": 3.534884435397169e-06, "loss": 0.4055, "step": 20307 }, { "epoch": 1.9057807807807807, "grad_norm": 0.9433464479385373, "learning_rate": 3.5343624439295532e-06, "loss": 0.3923, "step": 20308 }, { "epoch": 1.9058746246246248, "grad_norm": 1.0077339119560105, "learning_rate": 3.5338404699369045e-06, "loss": 0.3854, "step": 20309 }, { "epoch": 1.9059684684684686, "grad_norm": 0.9833389081882434, "learning_rate": 3.533318513425451e-06, "loss": 0.3791, "step": 20310 }, { "epoch": 1.9060623123123124, "grad_norm": 0.9432590806397911, "learning_rate": 3.532796574401413e-06, "loss": 0.3761, "step": 20311 }, { "epoch": 1.9061561561561562, "grad_norm": 0.954861935033013, "learning_rate": 3.5322746528710116e-06, "loss": 0.3737, "step": 20312 }, { "epoch": 1.90625, "grad_norm": 0.9237967128705494, "learning_rate": 3.5317527488404747e-06, "loss": 0.3698, "step": 20313 }, { "epoch": 1.9063438438438438, "grad_norm": 0.7409437173888324, "learning_rate": 3.531230862316023e-06, "loss": 0.3587, "step": 20314 }, { "epoch": 1.9064376876876876, "grad_norm": 0.843811859937778, "learning_rate": 3.530708993303876e-06, "loss": 0.3805, "step": 20315 }, { "epoch": 1.9065315315315314, "grad_norm": 1.175295409288735, "learning_rate": 3.5301871418102606e-06, "loss": 0.3903, "step": 20316 }, { "epoch": 1.9066253753753752, "grad_norm": 1.242880649402301, "learning_rate": 3.529665307841398e-06, "loss": 0.4082, "step": 20317 }, { "epoch": 1.9067192192192193, "grad_norm": 2.5755350388023097, "learning_rate": 3.5291434914035054e-06, "loss": 0.3329, "step": 20318 }, { "epoch": 1.906813063063063, "grad_norm": 1.6089162594388127, "learning_rate": 3.5286216925028105e-06, "loss": 0.4078, "step": 20319 }, { "epoch": 1.906906906906907, "grad_norm": 0.8290825577668487, "learning_rate": 3.528099911145533e-06, "loss": 0.3362, "step": 20320 }, { "epoch": 1.9070007507507507, "grad_norm": 0.9750972523246764, "learning_rate": 3.52757814733789e-06, "loss": 0.379, "step": 20321 }, { "epoch": 1.9070945945945947, "grad_norm": 0.9314721046338106, "learning_rate": 3.5270564010861096e-06, "loss": 0.4137, "step": 20322 }, { "epoch": 1.9071884384384385, "grad_norm": 0.9597624194998162, "learning_rate": 3.5265346723964076e-06, "loss": 0.3949, "step": 20323 }, { "epoch": 1.9072822822822824, "grad_norm": 0.8837117433593121, "learning_rate": 3.5260129612750067e-06, "loss": 0.368, "step": 20324 }, { "epoch": 1.9073761261261262, "grad_norm": 1.0781115209912322, "learning_rate": 3.5254912677281266e-06, "loss": 0.3694, "step": 20325 }, { "epoch": 1.90746996996997, "grad_norm": 0.9179805986396156, "learning_rate": 3.5249695917619888e-06, "loss": 0.3613, "step": 20326 }, { "epoch": 1.9075638138138138, "grad_norm": 1.135456322647772, "learning_rate": 3.5244479333828087e-06, "loss": 0.3972, "step": 20327 }, { "epoch": 1.9076576576576576, "grad_norm": 0.8991683449703491, "learning_rate": 3.523926292596812e-06, "loss": 0.3781, "step": 20328 }, { "epoch": 1.9077515015015014, "grad_norm": 0.8590940533788669, "learning_rate": 3.5234046694102163e-06, "loss": 0.4185, "step": 20329 }, { "epoch": 1.9078453453453452, "grad_norm": 0.8672960387431383, "learning_rate": 3.5228830638292396e-06, "loss": 0.3857, "step": 20330 }, { "epoch": 1.907939189189189, "grad_norm": 0.8900788442348322, "learning_rate": 3.5223614758601023e-06, "loss": 0.357, "step": 20331 }, { "epoch": 1.908033033033033, "grad_norm": 1.101937041797286, "learning_rate": 3.5218399055090237e-06, "loss": 0.4007, "step": 20332 }, { "epoch": 1.9081268768768769, "grad_norm": 0.9617205416159401, "learning_rate": 3.5213183527822194e-06, "loss": 0.4122, "step": 20333 }, { "epoch": 1.9082207207207207, "grad_norm": 0.9076380286254899, "learning_rate": 3.5207968176859125e-06, "loss": 0.4086, "step": 20334 }, { "epoch": 1.9083145645645647, "grad_norm": 0.8082343482011585, "learning_rate": 3.520275300226319e-06, "loss": 0.364, "step": 20335 }, { "epoch": 1.9084084084084085, "grad_norm": 0.9107253579026826, "learning_rate": 3.5197538004096563e-06, "loss": 0.4165, "step": 20336 }, { "epoch": 1.9085022522522523, "grad_norm": 0.8685344551411003, "learning_rate": 3.519232318242145e-06, "loss": 0.3448, "step": 20337 }, { "epoch": 1.9085960960960962, "grad_norm": 1.5188467413109685, "learning_rate": 3.5187108537300007e-06, "loss": 0.3795, "step": 20338 }, { "epoch": 1.90868993993994, "grad_norm": 0.9845472894464739, "learning_rate": 3.5181894068794387e-06, "loss": 0.403, "step": 20339 }, { "epoch": 1.9087837837837838, "grad_norm": 0.9258221270083872, "learning_rate": 3.5176679776966816e-06, "loss": 0.3727, "step": 20340 }, { "epoch": 1.9088776276276276, "grad_norm": 1.0810942323823471, "learning_rate": 3.5171465661879434e-06, "loss": 0.385, "step": 20341 }, { "epoch": 1.9089714714714714, "grad_norm": 0.9350722278803947, "learning_rate": 3.5166251723594403e-06, "loss": 0.4088, "step": 20342 }, { "epoch": 1.9090653153153152, "grad_norm": 0.9963610707593982, "learning_rate": 3.5161037962173905e-06, "loss": 0.4152, "step": 20343 }, { "epoch": 1.909159159159159, "grad_norm": 1.479450158228341, "learning_rate": 3.51558243776801e-06, "loss": 0.3724, "step": 20344 }, { "epoch": 1.909253003003003, "grad_norm": 1.0127762019432287, "learning_rate": 3.5150610970175127e-06, "loss": 0.3858, "step": 20345 }, { "epoch": 1.9093468468468469, "grad_norm": 1.0659610153873895, "learning_rate": 3.5145397739721187e-06, "loss": 0.4521, "step": 20346 }, { "epoch": 1.9094406906906907, "grad_norm": 1.0302495478412521, "learning_rate": 3.5140184686380418e-06, "loss": 0.3518, "step": 20347 }, { "epoch": 1.9095345345345347, "grad_norm": 1.0188518511978508, "learning_rate": 3.513497181021496e-06, "loss": 0.3424, "step": 20348 }, { "epoch": 1.9096283783783785, "grad_norm": 0.952971234900664, "learning_rate": 3.5129759111287e-06, "loss": 0.4083, "step": 20349 }, { "epoch": 1.9097222222222223, "grad_norm": 0.9866239249473194, "learning_rate": 3.512454658965867e-06, "loss": 0.425, "step": 20350 }, { "epoch": 1.9098160660660661, "grad_norm": 0.9132605408517113, "learning_rate": 3.5119334245392105e-06, "loss": 0.3984, "step": 20351 }, { "epoch": 1.90990990990991, "grad_norm": 0.8668578551705378, "learning_rate": 3.5114122078549483e-06, "loss": 0.3563, "step": 20352 }, { "epoch": 1.9100037537537538, "grad_norm": 0.832844271564148, "learning_rate": 3.5108910089192943e-06, "loss": 0.3964, "step": 20353 }, { "epoch": 1.9100975975975976, "grad_norm": 0.9535735950668005, "learning_rate": 3.5103698277384605e-06, "loss": 0.3581, "step": 20354 }, { "epoch": 1.9101914414414414, "grad_norm": 0.9910939546076063, "learning_rate": 3.509848664318664e-06, "loss": 0.4145, "step": 20355 }, { "epoch": 1.9102852852852852, "grad_norm": 1.029021842800056, "learning_rate": 3.509327518666117e-06, "loss": 0.4454, "step": 20356 }, { "epoch": 1.910379129129129, "grad_norm": 0.8228363100842313, "learning_rate": 3.508806390787032e-06, "loss": 0.3918, "step": 20357 }, { "epoch": 1.910472972972973, "grad_norm": 0.9626757022613637, "learning_rate": 3.508285280687625e-06, "loss": 0.3738, "step": 20358 }, { "epoch": 1.9105668168168168, "grad_norm": 1.852699165408724, "learning_rate": 3.507764188374109e-06, "loss": 0.3999, "step": 20359 }, { "epoch": 1.9106606606606606, "grad_norm": 0.9028413417276516, "learning_rate": 3.507243113852694e-06, "loss": 0.3662, "step": 20360 }, { "epoch": 1.9107545045045045, "grad_norm": 0.9317170519606024, "learning_rate": 3.506722057129597e-06, "loss": 0.3567, "step": 20361 }, { "epoch": 1.9108483483483485, "grad_norm": 0.9186953547674347, "learning_rate": 3.5062010182110286e-06, "loss": 0.4381, "step": 20362 }, { "epoch": 1.9109421921921923, "grad_norm": 0.8301421023457601, "learning_rate": 3.5056799971031986e-06, "loss": 0.3909, "step": 20363 }, { "epoch": 1.9110360360360361, "grad_norm": 0.8868831131042098, "learning_rate": 3.5051589938123242e-06, "loss": 0.4157, "step": 20364 }, { "epoch": 1.91112987987988, "grad_norm": 0.8654502450984568, "learning_rate": 3.504638008344616e-06, "loss": 0.3815, "step": 20365 }, { "epoch": 1.9112237237237237, "grad_norm": 0.7610181916759037, "learning_rate": 3.504117040706283e-06, "loss": 0.3817, "step": 20366 }, { "epoch": 1.9113175675675675, "grad_norm": 0.8803471244118497, "learning_rate": 3.503596090903539e-06, "loss": 0.3698, "step": 20367 }, { "epoch": 1.9114114114114114, "grad_norm": 1.047434031367705, "learning_rate": 3.503075158942596e-06, "loss": 0.3591, "step": 20368 }, { "epoch": 1.9115052552552552, "grad_norm": 1.0184690811503454, "learning_rate": 3.5025542448296613e-06, "loss": 0.4382, "step": 20369 }, { "epoch": 1.911599099099099, "grad_norm": 1.0826203785681983, "learning_rate": 3.5020333485709514e-06, "loss": 0.3862, "step": 20370 }, { "epoch": 1.9116929429429428, "grad_norm": 0.9070131102720022, "learning_rate": 3.501512470172673e-06, "loss": 0.4, "step": 20371 }, { "epoch": 1.9117867867867868, "grad_norm": 1.1426014220667682, "learning_rate": 3.500991609641038e-06, "loss": 0.4166, "step": 20372 }, { "epoch": 1.9118806306306306, "grad_norm": 0.8147918568514597, "learning_rate": 3.500470766982257e-06, "loss": 0.3726, "step": 20373 }, { "epoch": 1.9119744744744744, "grad_norm": 0.857383908557862, "learning_rate": 3.499949942202539e-06, "loss": 0.3667, "step": 20374 }, { "epoch": 1.9120683183183185, "grad_norm": 0.994909672799783, "learning_rate": 3.499429135308093e-06, "loss": 0.4071, "step": 20375 }, { "epoch": 1.9121621621621623, "grad_norm": 0.9526045403104044, "learning_rate": 3.4989083463051323e-06, "loss": 0.3992, "step": 20376 }, { "epoch": 1.912256006006006, "grad_norm": 1.9317540183711817, "learning_rate": 3.498387575199863e-06, "loss": 0.4041, "step": 20377 }, { "epoch": 1.91234984984985, "grad_norm": 0.8893661243403633, "learning_rate": 3.497866821998495e-06, "loss": 0.3764, "step": 20378 }, { "epoch": 1.9124436936936937, "grad_norm": 0.9532938885778128, "learning_rate": 3.4973460867072383e-06, "loss": 0.4543, "step": 20379 }, { "epoch": 1.9125375375375375, "grad_norm": 0.9354879383699389, "learning_rate": 3.4968253693323018e-06, "loss": 0.4101, "step": 20380 }, { "epoch": 1.9126313813813813, "grad_norm": 1.1754083398763595, "learning_rate": 3.496304669879891e-06, "loss": 0.3818, "step": 20381 }, { "epoch": 1.9127252252252251, "grad_norm": 1.0313937277274052, "learning_rate": 3.4957839883562184e-06, "loss": 0.3679, "step": 20382 }, { "epoch": 1.912819069069069, "grad_norm": 1.0026069759948633, "learning_rate": 3.4952633247674904e-06, "loss": 0.4192, "step": 20383 }, { "epoch": 1.9129129129129128, "grad_norm": 1.0390852632534662, "learning_rate": 3.494742679119914e-06, "loss": 0.3935, "step": 20384 }, { "epoch": 1.9130067567567568, "grad_norm": 1.4982277324418005, "learning_rate": 3.494222051419699e-06, "loss": 0.4097, "step": 20385 }, { "epoch": 1.9131006006006006, "grad_norm": 0.9742702860653887, "learning_rate": 3.4937014416730513e-06, "loss": 0.3957, "step": 20386 }, { "epoch": 1.9131944444444444, "grad_norm": 0.9340143113234505, "learning_rate": 3.493180849886177e-06, "loss": 0.4195, "step": 20387 }, { "epoch": 1.9132882882882885, "grad_norm": 0.8121547956606713, "learning_rate": 3.4926602760652863e-06, "loss": 0.3818, "step": 20388 }, { "epoch": 1.9133821321321323, "grad_norm": 0.9507128813390142, "learning_rate": 3.492139720216585e-06, "loss": 0.3669, "step": 20389 }, { "epoch": 1.913475975975976, "grad_norm": 4.9733251848478455, "learning_rate": 3.4916191823462773e-06, "loss": 0.3683, "step": 20390 }, { "epoch": 1.9135698198198199, "grad_norm": 1.0535601071098128, "learning_rate": 3.4910986624605737e-06, "loss": 0.4265, "step": 20391 }, { "epoch": 1.9136636636636637, "grad_norm": 0.9174675482907478, "learning_rate": 3.4905781605656787e-06, "loss": 0.3866, "step": 20392 }, { "epoch": 1.9137575075075075, "grad_norm": 0.9299746965231874, "learning_rate": 3.4900576766677945e-06, "loss": 0.3757, "step": 20393 }, { "epoch": 1.9138513513513513, "grad_norm": 0.8685075963596786, "learning_rate": 3.489537210773133e-06, "loss": 0.3764, "step": 20394 }, { "epoch": 1.9139451951951951, "grad_norm": 0.9477804417184673, "learning_rate": 3.489016762887897e-06, "loss": 0.3705, "step": 20395 }, { "epoch": 1.914039039039039, "grad_norm": 0.9087759412371963, "learning_rate": 3.488496333018292e-06, "loss": 0.3824, "step": 20396 }, { "epoch": 1.9141328828828827, "grad_norm": 1.2623270146342802, "learning_rate": 3.4879759211705227e-06, "loss": 0.4037, "step": 20397 }, { "epoch": 1.9142267267267268, "grad_norm": 1.0319104244752435, "learning_rate": 3.4874555273507947e-06, "loss": 0.3625, "step": 20398 }, { "epoch": 1.9143205705705706, "grad_norm": 0.978440538393696, "learning_rate": 3.4869351515653112e-06, "loss": 0.372, "step": 20399 }, { "epoch": 1.9144144144144144, "grad_norm": 0.9913861383008183, "learning_rate": 3.48641479382028e-06, "loss": 0.4007, "step": 20400 }, { "epoch": 1.9145082582582582, "grad_norm": 0.9497736402747726, "learning_rate": 3.4858944541219024e-06, "loss": 0.3738, "step": 20401 }, { "epoch": 1.9146021021021022, "grad_norm": 1.1137042057460376, "learning_rate": 3.485374132476383e-06, "loss": 0.3916, "step": 20402 }, { "epoch": 1.914695945945946, "grad_norm": 0.8610069469313094, "learning_rate": 3.484853828889927e-06, "loss": 0.3933, "step": 20403 }, { "epoch": 1.9147897897897899, "grad_norm": 1.1944495106822324, "learning_rate": 3.484333543368737e-06, "loss": 0.4113, "step": 20404 }, { "epoch": 1.9148836336336337, "grad_norm": 1.8538853542708766, "learning_rate": 3.4838132759190146e-06, "loss": 0.38, "step": 20405 }, { "epoch": 1.9149774774774775, "grad_norm": 0.8893320180439701, "learning_rate": 3.4832930265469665e-06, "loss": 0.3529, "step": 20406 }, { "epoch": 1.9150713213213213, "grad_norm": 1.0443496221797617, "learning_rate": 3.4827727952587953e-06, "loss": 0.3798, "step": 20407 }, { "epoch": 1.915165165165165, "grad_norm": 1.0025584518839754, "learning_rate": 3.482252582060701e-06, "loss": 0.4032, "step": 20408 }, { "epoch": 1.915259009009009, "grad_norm": 1.0127498202588985, "learning_rate": 3.4817323869588887e-06, "loss": 0.3664, "step": 20409 }, { "epoch": 1.9153528528528527, "grad_norm": 1.0416487822179992, "learning_rate": 3.4812122099595604e-06, "loss": 0.3531, "step": 20410 }, { "epoch": 1.9154466966966965, "grad_norm": 0.8819333505527044, "learning_rate": 3.4806920510689147e-06, "loss": 0.4175, "step": 20411 }, { "epoch": 1.9155405405405406, "grad_norm": 1.2497039657147038, "learning_rate": 3.4801719102931586e-06, "loss": 0.412, "step": 20412 }, { "epoch": 1.9156343843843844, "grad_norm": 0.9674410112406736, "learning_rate": 3.4796517876384926e-06, "loss": 0.3589, "step": 20413 }, { "epoch": 1.9157282282282282, "grad_norm": 0.8824408821843571, "learning_rate": 3.4791316831111156e-06, "loss": 0.4021, "step": 20414 }, { "epoch": 1.9158220720720722, "grad_norm": 1.2328474405916376, "learning_rate": 3.4786115967172317e-06, "loss": 0.4248, "step": 20415 }, { "epoch": 1.915915915915916, "grad_norm": 1.0732105124492977, "learning_rate": 3.478091528463041e-06, "loss": 0.4074, "step": 20416 }, { "epoch": 1.9160097597597598, "grad_norm": 0.9296506208075924, "learning_rate": 3.4775714783547417e-06, "loss": 0.356, "step": 20417 }, { "epoch": 1.9161036036036037, "grad_norm": 1.2174984110108884, "learning_rate": 3.4770514463985396e-06, "loss": 0.3919, "step": 20418 }, { "epoch": 1.9161974474474475, "grad_norm": 1.0608803153986872, "learning_rate": 3.4765314326006317e-06, "loss": 0.3677, "step": 20419 }, { "epoch": 1.9162912912912913, "grad_norm": 0.8593311109550962, "learning_rate": 3.476011436967217e-06, "loss": 0.3629, "step": 20420 }, { "epoch": 1.916385135135135, "grad_norm": 0.889482355645442, "learning_rate": 3.4754914595044997e-06, "loss": 0.3845, "step": 20421 }, { "epoch": 1.916478978978979, "grad_norm": 0.979117712741227, "learning_rate": 3.474971500218677e-06, "loss": 0.391, "step": 20422 }, { "epoch": 1.9165728228228227, "grad_norm": 0.8158122320492006, "learning_rate": 3.4744515591159458e-06, "loss": 0.4041, "step": 20423 }, { "epoch": 1.9166666666666665, "grad_norm": 0.9123179784328019, "learning_rate": 3.473931636202511e-06, "loss": 0.3633, "step": 20424 }, { "epoch": 1.9167605105105106, "grad_norm": 1.2808049008524602, "learning_rate": 3.4734117314845684e-06, "loss": 0.3977, "step": 20425 }, { "epoch": 1.9168543543543544, "grad_norm": 3.3606200486193227, "learning_rate": 3.472891844968317e-06, "loss": 0.4173, "step": 20426 }, { "epoch": 1.9169481981981982, "grad_norm": 1.07795452981538, "learning_rate": 3.4723719766599564e-06, "loss": 0.3889, "step": 20427 }, { "epoch": 1.9170420420420422, "grad_norm": 1.0844985064752537, "learning_rate": 3.471852126565684e-06, "loss": 0.3775, "step": 20428 }, { "epoch": 1.917135885885886, "grad_norm": 0.9306632677802216, "learning_rate": 3.471332294691697e-06, "loss": 0.3662, "step": 20429 }, { "epoch": 1.9172297297297298, "grad_norm": 1.22045645235034, "learning_rate": 3.4708124810441975e-06, "loss": 0.3696, "step": 20430 }, { "epoch": 1.9173235735735736, "grad_norm": 0.8294527973655713, "learning_rate": 3.47029268562938e-06, "loss": 0.3648, "step": 20431 }, { "epoch": 1.9174174174174174, "grad_norm": 1.0038734311139437, "learning_rate": 3.4697729084534415e-06, "loss": 0.4199, "step": 20432 }, { "epoch": 1.9175112612612613, "grad_norm": 1.2501603377119443, "learning_rate": 3.4692531495225835e-06, "loss": 0.4127, "step": 20433 }, { "epoch": 1.917605105105105, "grad_norm": 1.040164841002769, "learning_rate": 3.468733408842999e-06, "loss": 0.3729, "step": 20434 }, { "epoch": 1.9176989489489489, "grad_norm": 1.0135831313068258, "learning_rate": 3.4682136864208843e-06, "loss": 0.4279, "step": 20435 }, { "epoch": 1.9177927927927927, "grad_norm": 0.8632128816206023, "learning_rate": 3.46769398226244e-06, "loss": 0.3966, "step": 20436 }, { "epoch": 1.9178866366366365, "grad_norm": 0.8986135789914669, "learning_rate": 3.4671742963738612e-06, "loss": 0.4013, "step": 20437 }, { "epoch": 1.9179804804804805, "grad_norm": 1.0569190971409363, "learning_rate": 3.4666546287613415e-06, "loss": 0.4194, "step": 20438 }, { "epoch": 1.9180743243243243, "grad_norm": 0.937390350825429, "learning_rate": 3.466134979431082e-06, "loss": 0.4109, "step": 20439 }, { "epoch": 1.9181681681681682, "grad_norm": 0.9858075547611042, "learning_rate": 3.4656153483892753e-06, "loss": 0.4078, "step": 20440 }, { "epoch": 1.918262012012012, "grad_norm": 0.9272151953261305, "learning_rate": 3.4650957356421145e-06, "loss": 0.3721, "step": 20441 }, { "epoch": 1.918355855855856, "grad_norm": 0.9463397569434957, "learning_rate": 3.4645761411958e-06, "loss": 0.3445, "step": 20442 }, { "epoch": 1.9184496996996998, "grad_norm": 1.0374329413939667, "learning_rate": 3.4640565650565254e-06, "loss": 0.4181, "step": 20443 }, { "epoch": 1.9185435435435436, "grad_norm": 0.92097456689277, "learning_rate": 3.4635370072304832e-06, "loss": 0.383, "step": 20444 }, { "epoch": 1.9186373873873874, "grad_norm": 2.48684203398079, "learning_rate": 3.4630174677238715e-06, "loss": 0.3828, "step": 20445 }, { "epoch": 1.9187312312312312, "grad_norm": 1.0405946354797408, "learning_rate": 3.4624979465428844e-06, "loss": 0.365, "step": 20446 }, { "epoch": 1.918825075075075, "grad_norm": 0.9510552954896618, "learning_rate": 3.461978443693713e-06, "loss": 0.3573, "step": 20447 }, { "epoch": 1.9189189189189189, "grad_norm": 1.7007948305032832, "learning_rate": 3.461458959182555e-06, "loss": 0.3802, "step": 20448 }, { "epoch": 1.9190127627627627, "grad_norm": 0.8689732495826115, "learning_rate": 3.460939493015603e-06, "loss": 0.3936, "step": 20449 }, { "epoch": 1.9191066066066065, "grad_norm": 0.891550996575762, "learning_rate": 3.4604200451990486e-06, "loss": 0.3805, "step": 20450 }, { "epoch": 1.9192004504504503, "grad_norm": 1.087256141211913, "learning_rate": 3.4599006157390895e-06, "loss": 0.4244, "step": 20451 }, { "epoch": 1.9192942942942943, "grad_norm": 0.9251357925674245, "learning_rate": 3.459381204641916e-06, "loss": 0.4063, "step": 20452 }, { "epoch": 1.9193881381381381, "grad_norm": 0.9306924782515018, "learning_rate": 3.4588618119137216e-06, "loss": 0.3802, "step": 20453 }, { "epoch": 1.919481981981982, "grad_norm": 0.9309416630689069, "learning_rate": 3.4583424375606995e-06, "loss": 0.397, "step": 20454 }, { "epoch": 1.919575825825826, "grad_norm": 0.9218620385808842, "learning_rate": 3.4578230815890423e-06, "loss": 0.3847, "step": 20455 }, { "epoch": 1.9196696696696698, "grad_norm": 0.9499632344352803, "learning_rate": 3.4573037440049394e-06, "loss": 0.4036, "step": 20456 }, { "epoch": 1.9197635135135136, "grad_norm": 1.6126239633235908, "learning_rate": 3.456784424814588e-06, "loss": 0.4186, "step": 20457 }, { "epoch": 1.9198573573573574, "grad_norm": 0.9047364422257558, "learning_rate": 3.456265124024178e-06, "loss": 0.4058, "step": 20458 }, { "epoch": 1.9199512012012012, "grad_norm": 0.905841292298645, "learning_rate": 3.455745841639899e-06, "loss": 0.395, "step": 20459 }, { "epoch": 1.920045045045045, "grad_norm": 0.9922312495172582, "learning_rate": 3.455226577667945e-06, "loss": 0.3428, "step": 20460 }, { "epoch": 1.9201388888888888, "grad_norm": 2.475893727633707, "learning_rate": 3.454707332114506e-06, "loss": 0.3799, "step": 20461 }, { "epoch": 1.9202327327327327, "grad_norm": 1.3558115145101723, "learning_rate": 3.4541881049857722e-06, "loss": 0.4011, "step": 20462 }, { "epoch": 1.9203265765765765, "grad_norm": 0.83598023192369, "learning_rate": 3.4536688962879373e-06, "loss": 0.396, "step": 20463 }, { "epoch": 1.9204204204204203, "grad_norm": 0.9787664789564327, "learning_rate": 3.4531497060271903e-06, "loss": 0.4198, "step": 20464 }, { "epoch": 1.9205142642642643, "grad_norm": 0.8228239294621982, "learning_rate": 3.45263053420972e-06, "loss": 0.3887, "step": 20465 }, { "epoch": 1.9206081081081081, "grad_norm": 1.0050130609864616, "learning_rate": 3.4521113808417197e-06, "loss": 0.3995, "step": 20466 }, { "epoch": 1.920701951951952, "grad_norm": 1.062597241860593, "learning_rate": 3.4515922459293767e-06, "loss": 0.429, "step": 20467 }, { "epoch": 1.920795795795796, "grad_norm": 0.9840440958348614, "learning_rate": 3.4510731294788795e-06, "loss": 0.3725, "step": 20468 }, { "epoch": 1.9208896396396398, "grad_norm": 0.8911658014162495, "learning_rate": 3.4505540314964226e-06, "loss": 0.3934, "step": 20469 }, { "epoch": 1.9209834834834836, "grad_norm": 0.8308936702901337, "learning_rate": 3.4500349519881917e-06, "loss": 0.4125, "step": 20470 }, { "epoch": 1.9210773273273274, "grad_norm": 0.843119108781774, "learning_rate": 3.4495158909603753e-06, "loss": 0.385, "step": 20471 }, { "epoch": 1.9211711711711712, "grad_norm": 0.8974670733777619, "learning_rate": 3.4489968484191648e-06, "loss": 0.3645, "step": 20472 }, { "epoch": 1.921265015015015, "grad_norm": 1.1836647849332578, "learning_rate": 3.4484778243707473e-06, "loss": 0.3842, "step": 20473 }, { "epoch": 1.9213588588588588, "grad_norm": 1.1660285029546882, "learning_rate": 3.447958818821309e-06, "loss": 0.3827, "step": 20474 }, { "epoch": 1.9214527027027026, "grad_norm": 1.4483722333849183, "learning_rate": 3.447439831777043e-06, "loss": 0.3843, "step": 20475 }, { "epoch": 1.9215465465465464, "grad_norm": 0.8337153297073652, "learning_rate": 3.4469208632441343e-06, "loss": 0.3995, "step": 20476 }, { "epoch": 1.9216403903903903, "grad_norm": 1.006410611146049, "learning_rate": 3.4464019132287703e-06, "loss": 0.4293, "step": 20477 }, { "epoch": 1.9217342342342343, "grad_norm": 0.9570746696410619, "learning_rate": 3.4458829817371396e-06, "loss": 0.3582, "step": 20478 }, { "epoch": 1.921828078078078, "grad_norm": 0.8358686573324952, "learning_rate": 3.445364068775429e-06, "loss": 0.4129, "step": 20479 }, { "epoch": 1.921921921921922, "grad_norm": 1.4878958049962965, "learning_rate": 3.444845174349824e-06, "loss": 0.3635, "step": 20480 }, { "epoch": 1.9220157657657657, "grad_norm": 0.9945306667250523, "learning_rate": 3.444326298466515e-06, "loss": 0.4547, "step": 20481 }, { "epoch": 1.9221096096096097, "grad_norm": 0.8998108076052651, "learning_rate": 3.4438074411316863e-06, "loss": 0.3538, "step": 20482 }, { "epoch": 1.9222034534534536, "grad_norm": 0.8264514237444368, "learning_rate": 3.4432886023515234e-06, "loss": 0.3912, "step": 20483 }, { "epoch": 1.9222972972972974, "grad_norm": 1.0191377976357299, "learning_rate": 3.4427697821322152e-06, "loss": 0.3861, "step": 20484 }, { "epoch": 1.9223911411411412, "grad_norm": 1.0749862174819607, "learning_rate": 3.4422509804799463e-06, "loss": 0.369, "step": 20485 }, { "epoch": 1.922484984984985, "grad_norm": 1.1418969467728608, "learning_rate": 3.4417321974009e-06, "loss": 0.4072, "step": 20486 }, { "epoch": 1.9225788288288288, "grad_norm": 1.1683126371728354, "learning_rate": 3.4412134329012657e-06, "loss": 0.3733, "step": 20487 }, { "epoch": 1.9226726726726726, "grad_norm": 0.8568167807930103, "learning_rate": 3.4406946869872283e-06, "loss": 0.3677, "step": 20488 }, { "epoch": 1.9227665165165164, "grad_norm": 1.171579391245883, "learning_rate": 3.44017595966497e-06, "loss": 0.3593, "step": 20489 }, { "epoch": 1.9228603603603602, "grad_norm": 1.3204715658515207, "learning_rate": 3.439657250940678e-06, "loss": 0.3941, "step": 20490 }, { "epoch": 1.922954204204204, "grad_norm": 0.9533641824198761, "learning_rate": 3.4391385608205364e-06, "loss": 0.411, "step": 20491 }, { "epoch": 1.923048048048048, "grad_norm": 1.0070810676876785, "learning_rate": 3.438619889310727e-06, "loss": 0.3386, "step": 20492 }, { "epoch": 1.9231418918918919, "grad_norm": 0.9806871837498737, "learning_rate": 3.4381012364174394e-06, "loss": 0.3895, "step": 20493 }, { "epoch": 1.9232357357357357, "grad_norm": 0.9646722194471662, "learning_rate": 3.437582602146854e-06, "loss": 0.4387, "step": 20494 }, { "epoch": 1.9233295795795797, "grad_norm": 1.0362971600690998, "learning_rate": 3.437063986505155e-06, "loss": 0.3743, "step": 20495 }, { "epoch": 1.9234234234234235, "grad_norm": 0.9990765250583612, "learning_rate": 3.4365453894985257e-06, "loss": 0.3652, "step": 20496 }, { "epoch": 1.9235172672672673, "grad_norm": 0.8234738197936394, "learning_rate": 3.4360268111331504e-06, "loss": 0.359, "step": 20497 }, { "epoch": 1.9236111111111112, "grad_norm": 1.057903888454573, "learning_rate": 3.4355082514152096e-06, "loss": 0.4045, "step": 20498 }, { "epoch": 1.923704954954955, "grad_norm": 0.7851527227823604, "learning_rate": 3.4349897103508896e-06, "loss": 0.3621, "step": 20499 }, { "epoch": 1.9237987987987988, "grad_norm": 0.9616572080024365, "learning_rate": 3.4344711879463714e-06, "loss": 0.424, "step": 20500 }, { "epoch": 1.9238926426426426, "grad_norm": 1.0066682520033525, "learning_rate": 3.4339526842078373e-06, "loss": 0.4199, "step": 20501 }, { "epoch": 1.9239864864864864, "grad_norm": 1.0013640358709577, "learning_rate": 3.43343419914147e-06, "loss": 0.4241, "step": 20502 }, { "epoch": 1.9240803303303302, "grad_norm": 1.0537509780579624, "learning_rate": 3.432915732753451e-06, "loss": 0.4196, "step": 20503 }, { "epoch": 1.924174174174174, "grad_norm": 1.12176230872332, "learning_rate": 3.432397285049961e-06, "loss": 0.3814, "step": 20504 }, { "epoch": 1.924268018018018, "grad_norm": 0.9260702963340595, "learning_rate": 3.431878856037184e-06, "loss": 0.4135, "step": 20505 }, { "epoch": 1.9243618618618619, "grad_norm": 1.0354539668562341, "learning_rate": 3.4313604457213e-06, "loss": 0.4174, "step": 20506 }, { "epoch": 1.9244557057057057, "grad_norm": 1.085780463810397, "learning_rate": 3.4308420541084885e-06, "loss": 0.3871, "step": 20507 }, { "epoch": 1.9245495495495497, "grad_norm": 0.8914865113392231, "learning_rate": 3.430323681204934e-06, "loss": 0.3436, "step": 20508 }, { "epoch": 1.9246433933933935, "grad_norm": 0.8630964431908669, "learning_rate": 3.429805327016814e-06, "loss": 0.4129, "step": 20509 }, { "epoch": 1.9247372372372373, "grad_norm": 1.0838891372121966, "learning_rate": 3.429286991550309e-06, "loss": 0.4249, "step": 20510 }, { "epoch": 1.9248310810810811, "grad_norm": 1.288112714185862, "learning_rate": 3.4287686748116013e-06, "loss": 0.407, "step": 20511 }, { "epoch": 1.924924924924925, "grad_norm": 0.8512352636988668, "learning_rate": 3.42825037680687e-06, "loss": 0.3774, "step": 20512 }, { "epoch": 1.9250187687687688, "grad_norm": 0.919900304200923, "learning_rate": 3.427732097542292e-06, "loss": 0.3933, "step": 20513 }, { "epoch": 1.9251126126126126, "grad_norm": 1.0342959904302576, "learning_rate": 3.427213837024052e-06, "loss": 0.4076, "step": 20514 }, { "epoch": 1.9252064564564564, "grad_norm": 1.1208164527769762, "learning_rate": 3.426695595258326e-06, "loss": 0.423, "step": 20515 }, { "epoch": 1.9253003003003002, "grad_norm": 1.0282049051024247, "learning_rate": 3.4261773722512914e-06, "loss": 0.4082, "step": 20516 }, { "epoch": 1.925394144144144, "grad_norm": 0.9671313834987227, "learning_rate": 3.425659168009131e-06, "loss": 0.373, "step": 20517 }, { "epoch": 1.925487987987988, "grad_norm": 1.1306186403765153, "learning_rate": 3.425140982538022e-06, "loss": 0.3466, "step": 20518 }, { "epoch": 1.9255818318318318, "grad_norm": 1.5478951266034855, "learning_rate": 3.424622815844142e-06, "loss": 0.3774, "step": 20519 }, { "epoch": 1.9256756756756757, "grad_norm": 1.0129029503432114, "learning_rate": 3.4241046679336697e-06, "loss": 0.3587, "step": 20520 }, { "epoch": 1.9257695195195195, "grad_norm": 0.9934225320321108, "learning_rate": 3.423586538812783e-06, "loss": 0.3796, "step": 20521 }, { "epoch": 1.9258633633633635, "grad_norm": 1.0791886436288296, "learning_rate": 3.423068428487658e-06, "loss": 0.4106, "step": 20522 }, { "epoch": 1.9259572072072073, "grad_norm": 0.8211036557646131, "learning_rate": 3.4225503369644752e-06, "loss": 0.396, "step": 20523 }, { "epoch": 1.9260510510510511, "grad_norm": 0.9773501108203624, "learning_rate": 3.4220322642494107e-06, "loss": 0.4051, "step": 20524 }, { "epoch": 1.926144894894895, "grad_norm": 1.000983071134387, "learning_rate": 3.4215142103486403e-06, "loss": 0.4135, "step": 20525 }, { "epoch": 1.9262387387387387, "grad_norm": 1.1053514387659513, "learning_rate": 3.4209961752683427e-06, "loss": 0.4397, "step": 20526 }, { "epoch": 1.9263325825825826, "grad_norm": 1.01524022819609, "learning_rate": 3.4204781590146937e-06, "loss": 0.3955, "step": 20527 }, { "epoch": 1.9264264264264264, "grad_norm": 0.9845791469166536, "learning_rate": 3.419960161593867e-06, "loss": 0.453, "step": 20528 }, { "epoch": 1.9265202702702702, "grad_norm": 1.021581662498979, "learning_rate": 3.419442183012043e-06, "loss": 0.3684, "step": 20529 }, { "epoch": 1.926614114114114, "grad_norm": 1.2772651510473283, "learning_rate": 3.4189242232753974e-06, "loss": 0.3887, "step": 20530 }, { "epoch": 1.9267079579579578, "grad_norm": 0.9537448338450123, "learning_rate": 3.418406282390102e-06, "loss": 0.4157, "step": 20531 }, { "epoch": 1.9268018018018018, "grad_norm": 0.866972302223199, "learning_rate": 3.4178883603623358e-06, "loss": 0.4005, "step": 20532 }, { "epoch": 1.9268956456456456, "grad_norm": 0.991257120406639, "learning_rate": 3.417370457198273e-06, "loss": 0.3369, "step": 20533 }, { "epoch": 1.9269894894894894, "grad_norm": 0.9370671680057612, "learning_rate": 3.4168525729040867e-06, "loss": 0.3695, "step": 20534 }, { "epoch": 1.9270833333333335, "grad_norm": 0.8231476169599373, "learning_rate": 3.4163347074859555e-06, "loss": 0.4137, "step": 20535 }, { "epoch": 1.9271771771771773, "grad_norm": 0.9088919943364808, "learning_rate": 3.4158168609500518e-06, "loss": 0.3993, "step": 20536 }, { "epoch": 1.927271021021021, "grad_norm": 1.7564100180874005, "learning_rate": 3.4152990333025495e-06, "loss": 0.3903, "step": 20537 }, { "epoch": 1.927364864864865, "grad_norm": 0.8529373916556886, "learning_rate": 3.4147812245496236e-06, "loss": 0.3861, "step": 20538 }, { "epoch": 1.9274587087087087, "grad_norm": 0.9588617572194427, "learning_rate": 3.414263434697448e-06, "loss": 0.3632, "step": 20539 }, { "epoch": 1.9275525525525525, "grad_norm": 0.9244721611188299, "learning_rate": 3.4137456637521942e-06, "loss": 0.4173, "step": 20540 }, { "epoch": 1.9276463963963963, "grad_norm": 1.0015225594027661, "learning_rate": 3.4132279117200393e-06, "loss": 0.4452, "step": 20541 }, { "epoch": 1.9277402402402402, "grad_norm": 1.2040000260834753, "learning_rate": 3.4127101786071556e-06, "loss": 0.361, "step": 20542 }, { "epoch": 1.927834084084084, "grad_norm": 0.9248890550485117, "learning_rate": 3.4121924644197136e-06, "loss": 0.4249, "step": 20543 }, { "epoch": 1.9279279279279278, "grad_norm": 1.420488161176233, "learning_rate": 3.4116747691638883e-06, "loss": 0.4382, "step": 20544 }, { "epoch": 1.9280217717717718, "grad_norm": 0.8285535441058572, "learning_rate": 3.4111570928458524e-06, "loss": 0.3628, "step": 20545 }, { "epoch": 1.9281156156156156, "grad_norm": 0.8689528911812552, "learning_rate": 3.4106394354717753e-06, "loss": 0.3901, "step": 20546 }, { "epoch": 1.9282094594594594, "grad_norm": 0.9076648954179167, "learning_rate": 3.4101217970478327e-06, "loss": 0.3949, "step": 20547 }, { "epoch": 1.9283033033033035, "grad_norm": 0.9827183919352291, "learning_rate": 3.4096041775801957e-06, "loss": 0.374, "step": 20548 }, { "epoch": 1.9283971471471473, "grad_norm": 0.9872181995324303, "learning_rate": 3.409086577075035e-06, "loss": 0.3777, "step": 20549 }, { "epoch": 1.928490990990991, "grad_norm": 0.8475706989362964, "learning_rate": 3.4085689955385217e-06, "loss": 0.3894, "step": 20550 }, { "epoch": 1.928584834834835, "grad_norm": 1.0810497143054307, "learning_rate": 3.408051432976829e-06, "loss": 0.4436, "step": 20551 }, { "epoch": 1.9286786786786787, "grad_norm": 1.0677537530169556, "learning_rate": 3.407533889396123e-06, "loss": 0.3991, "step": 20552 }, { "epoch": 1.9287725225225225, "grad_norm": 1.1492957726499553, "learning_rate": 3.4070163648025805e-06, "loss": 0.3648, "step": 20553 }, { "epoch": 1.9288663663663663, "grad_norm": 0.9590094283632916, "learning_rate": 3.4064988592023694e-06, "loss": 0.3919, "step": 20554 }, { "epoch": 1.9289602102102101, "grad_norm": 1.2705491476584654, "learning_rate": 3.405981372601658e-06, "loss": 0.3964, "step": 20555 }, { "epoch": 1.929054054054054, "grad_norm": 0.86450585494488, "learning_rate": 3.4054639050066213e-06, "loss": 0.4083, "step": 20556 }, { "epoch": 1.9291478978978978, "grad_norm": 0.9000343116600434, "learning_rate": 3.404946456423425e-06, "loss": 0.4083, "step": 20557 }, { "epoch": 1.9292417417417418, "grad_norm": 2.0985033497020393, "learning_rate": 3.404429026858238e-06, "loss": 0.4094, "step": 20558 }, { "epoch": 1.9293355855855856, "grad_norm": 0.9483782572859294, "learning_rate": 3.4039116163172337e-06, "loss": 0.3741, "step": 20559 }, { "epoch": 1.9294294294294294, "grad_norm": 0.9059491625934792, "learning_rate": 3.4033942248065788e-06, "loss": 0.3622, "step": 20560 }, { "epoch": 1.9295232732732732, "grad_norm": 1.049459054110362, "learning_rate": 3.40287685233244e-06, "loss": 0.4543, "step": 20561 }, { "epoch": 1.9296171171171173, "grad_norm": 0.9407348462344874, "learning_rate": 3.402359498900991e-06, "loss": 0.3939, "step": 20562 }, { "epoch": 1.929710960960961, "grad_norm": 0.936069539594913, "learning_rate": 3.4018421645183984e-06, "loss": 0.4039, "step": 20563 }, { "epoch": 1.9298048048048049, "grad_norm": 0.9921156984472728, "learning_rate": 3.401324849190827e-06, "loss": 0.4015, "step": 20564 }, { "epoch": 1.9298986486486487, "grad_norm": 1.0299958366250748, "learning_rate": 3.40080755292445e-06, "loss": 0.3941, "step": 20565 }, { "epoch": 1.9299924924924925, "grad_norm": 1.70902953881803, "learning_rate": 3.400290275725432e-06, "loss": 0.4243, "step": 20566 }, { "epoch": 1.9300863363363363, "grad_norm": 0.9384552956432624, "learning_rate": 3.3997730175999398e-06, "loss": 0.4034, "step": 20567 }, { "epoch": 1.9301801801801801, "grad_norm": 0.942812157196475, "learning_rate": 3.3992557785541443e-06, "loss": 0.3751, "step": 20568 }, { "epoch": 1.930274024024024, "grad_norm": 1.0285345473258443, "learning_rate": 3.39873855859421e-06, "loss": 0.3817, "step": 20569 }, { "epoch": 1.9303678678678677, "grad_norm": 0.9243227495404412, "learning_rate": 3.3982213577263034e-06, "loss": 0.4168, "step": 20570 }, { "epoch": 1.9304617117117115, "grad_norm": 0.9352503379242081, "learning_rate": 3.397704175956594e-06, "loss": 0.3959, "step": 20571 }, { "epoch": 1.9305555555555556, "grad_norm": 0.8904997908310476, "learning_rate": 3.3971870132912456e-06, "loss": 0.388, "step": 20572 }, { "epoch": 1.9306493993993994, "grad_norm": 1.0442003381511793, "learning_rate": 3.396669869736423e-06, "loss": 0.392, "step": 20573 }, { "epoch": 1.9307432432432432, "grad_norm": 0.8839591908060547, "learning_rate": 3.3961527452982964e-06, "loss": 0.3782, "step": 20574 }, { "epoch": 1.9308370870870872, "grad_norm": 1.0648090657209375, "learning_rate": 3.395635639983029e-06, "loss": 0.3705, "step": 20575 }, { "epoch": 1.930930930930931, "grad_norm": 0.9346150792272324, "learning_rate": 3.3951185537967863e-06, "loss": 0.3564, "step": 20576 }, { "epoch": 1.9310247747747749, "grad_norm": 1.0330054907382114, "learning_rate": 3.3946014867457343e-06, "loss": 0.3986, "step": 20577 }, { "epoch": 1.9311186186186187, "grad_norm": 0.850845045350504, "learning_rate": 3.3940844388360384e-06, "loss": 0.3731, "step": 20578 }, { "epoch": 1.9312124624624625, "grad_norm": 0.9038185616166211, "learning_rate": 3.39356741007386e-06, "loss": 0.3833, "step": 20579 }, { "epoch": 1.9313063063063063, "grad_norm": 0.9707253657796662, "learning_rate": 3.393050400465369e-06, "loss": 0.4312, "step": 20580 }, { "epoch": 1.93140015015015, "grad_norm": 0.9070195134228488, "learning_rate": 3.392533410016727e-06, "loss": 0.3747, "step": 20581 }, { "epoch": 1.931493993993994, "grad_norm": 1.250063196003914, "learning_rate": 3.392016438734098e-06, "loss": 0.3441, "step": 20582 }, { "epoch": 1.9315878378378377, "grad_norm": 1.0063122003789642, "learning_rate": 3.3914994866236463e-06, "loss": 0.4173, "step": 20583 }, { "epoch": 1.9316816816816815, "grad_norm": 0.8589184191550028, "learning_rate": 3.3909825536915363e-06, "loss": 0.3703, "step": 20584 }, { "epoch": 1.9317755255255256, "grad_norm": 0.9590190554389807, "learning_rate": 3.3904656399439284e-06, "loss": 0.3703, "step": 20585 }, { "epoch": 1.9318693693693694, "grad_norm": 0.8686858122844409, "learning_rate": 3.3899487453869895e-06, "loss": 0.3743, "step": 20586 }, { "epoch": 1.9319632132132132, "grad_norm": 0.9764300996997538, "learning_rate": 3.3894318700268817e-06, "loss": 0.3652, "step": 20587 }, { "epoch": 1.9320570570570572, "grad_norm": 1.9101979986946933, "learning_rate": 3.3889150138697665e-06, "loss": 0.4041, "step": 20588 }, { "epoch": 1.932150900900901, "grad_norm": 0.9542485140691357, "learning_rate": 3.3883981769218075e-06, "loss": 0.3868, "step": 20589 }, { "epoch": 1.9322447447447448, "grad_norm": 1.220037947734155, "learning_rate": 3.387881359189168e-06, "loss": 0.3436, "step": 20590 }, { "epoch": 1.9323385885885886, "grad_norm": 1.0358153783943422, "learning_rate": 3.3873645606780056e-06, "loss": 0.4259, "step": 20591 }, { "epoch": 1.9324324324324325, "grad_norm": 0.8159965175196575, "learning_rate": 3.3868477813944877e-06, "loss": 0.412, "step": 20592 }, { "epoch": 1.9325262762762763, "grad_norm": 0.9116909776648696, "learning_rate": 3.386331021344773e-06, "loss": 0.3728, "step": 20593 }, { "epoch": 1.93262012012012, "grad_norm": 1.0324911107975943, "learning_rate": 3.3858142805350237e-06, "loss": 0.3965, "step": 20594 }, { "epoch": 1.9327139639639639, "grad_norm": 0.9719147593600703, "learning_rate": 3.385297558971401e-06, "loss": 0.4085, "step": 20595 }, { "epoch": 1.9328078078078077, "grad_norm": 0.9983703558877751, "learning_rate": 3.3847808566600654e-06, "loss": 0.4099, "step": 20596 }, { "epoch": 1.9329016516516515, "grad_norm": 0.9759065622493023, "learning_rate": 3.3842641736071756e-06, "loss": 0.4277, "step": 20597 }, { "epoch": 1.9329954954954955, "grad_norm": 0.9312911048008564, "learning_rate": 3.3837475098188964e-06, "loss": 0.4052, "step": 20598 }, { "epoch": 1.9330893393393394, "grad_norm": 0.9682230321191133, "learning_rate": 3.3832308653013867e-06, "loss": 0.435, "step": 20599 }, { "epoch": 1.9331831831831832, "grad_norm": 1.0465675376332675, "learning_rate": 3.3827142400608036e-06, "loss": 0.3854, "step": 20600 }, { "epoch": 1.933277027027027, "grad_norm": 1.1755786150661778, "learning_rate": 3.38219763410331e-06, "loss": 0.3659, "step": 20601 }, { "epoch": 1.933370870870871, "grad_norm": 0.9294988064103493, "learning_rate": 3.3816810474350655e-06, "loss": 0.3656, "step": 20602 }, { "epoch": 1.9334647147147148, "grad_norm": 0.8935526028905225, "learning_rate": 3.381164480062226e-06, "loss": 0.406, "step": 20603 }, { "epoch": 1.9335585585585586, "grad_norm": 0.9699653269536823, "learning_rate": 3.3806479319909534e-06, "loss": 0.3857, "step": 20604 }, { "epoch": 1.9336524024024024, "grad_norm": 0.8275505536665456, "learning_rate": 3.380131403227408e-06, "loss": 0.3639, "step": 20605 }, { "epoch": 1.9337462462462462, "grad_norm": 1.2014494121457873, "learning_rate": 3.3796148937777447e-06, "loss": 0.4148, "step": 20606 }, { "epoch": 1.93384009009009, "grad_norm": 0.9471673060201057, "learning_rate": 3.3790984036481244e-06, "loss": 0.4011, "step": 20607 }, { "epoch": 1.9339339339339339, "grad_norm": 0.9168627932843019, "learning_rate": 3.3785819328447055e-06, "loss": 0.3716, "step": 20608 }, { "epoch": 1.9340277777777777, "grad_norm": 0.8866463234949026, "learning_rate": 3.3780654813736423e-06, "loss": 0.3861, "step": 20609 }, { "epoch": 1.9341216216216215, "grad_norm": 0.9399210917908698, "learning_rate": 3.377549049241097e-06, "loss": 0.4091, "step": 20610 }, { "epoch": 1.9342154654654653, "grad_norm": 1.0293310578273607, "learning_rate": 3.3770326364532264e-06, "loss": 0.3633, "step": 20611 }, { "epoch": 1.9343093093093093, "grad_norm": 0.9414112582337032, "learning_rate": 3.3765162430161846e-06, "loss": 0.4064, "step": 20612 }, { "epoch": 1.9344031531531531, "grad_norm": 1.2720305959301816, "learning_rate": 3.3759998689361327e-06, "loss": 0.4208, "step": 20613 }, { "epoch": 1.934496996996997, "grad_norm": 1.0203130060673686, "learning_rate": 3.375483514219225e-06, "loss": 0.386, "step": 20614 }, { "epoch": 1.934590840840841, "grad_norm": 0.9272572317854184, "learning_rate": 3.3749671788716164e-06, "loss": 0.3463, "step": 20615 }, { "epoch": 1.9346846846846848, "grad_norm": 1.008434364774811, "learning_rate": 3.3744508628994676e-06, "loss": 0.4067, "step": 20616 }, { "epoch": 1.9347785285285286, "grad_norm": 0.9932131960179736, "learning_rate": 3.3739345663089322e-06, "loss": 0.3851, "step": 20617 }, { "epoch": 1.9348723723723724, "grad_norm": 0.8463615376640657, "learning_rate": 3.3734182891061658e-06, "loss": 0.3894, "step": 20618 }, { "epoch": 1.9349662162162162, "grad_norm": 0.9863701533556452, "learning_rate": 3.3729020312973253e-06, "loss": 0.3955, "step": 20619 }, { "epoch": 1.93506006006006, "grad_norm": 1.1613557112862363, "learning_rate": 3.372385792888565e-06, "loss": 0.4286, "step": 20620 }, { "epoch": 1.9351539039039038, "grad_norm": 0.8138801300464827, "learning_rate": 3.3718695738860396e-06, "loss": 0.3502, "step": 20621 }, { "epoch": 1.9352477477477477, "grad_norm": 1.0593568676455978, "learning_rate": 3.371353374295906e-06, "loss": 0.4023, "step": 20622 }, { "epoch": 1.9353415915915915, "grad_norm": 1.1668284261911877, "learning_rate": 3.3708371941243178e-06, "loss": 0.3559, "step": 20623 }, { "epoch": 1.9354354354354353, "grad_norm": 0.9703056824706122, "learning_rate": 3.3703210333774283e-06, "loss": 0.3807, "step": 20624 }, { "epoch": 1.9355292792792793, "grad_norm": 0.9491279295385873, "learning_rate": 3.3698048920613945e-06, "loss": 0.3947, "step": 20625 }, { "epoch": 1.9356231231231231, "grad_norm": 0.9605844104253484, "learning_rate": 3.3692887701823685e-06, "loss": 0.4252, "step": 20626 }, { "epoch": 1.935716966966967, "grad_norm": 0.8579822480763846, "learning_rate": 3.3687726677465027e-06, "loss": 0.3986, "step": 20627 }, { "epoch": 1.935810810810811, "grad_norm": 0.9732850325428256, "learning_rate": 3.3682565847599546e-06, "loss": 0.3961, "step": 20628 }, { "epoch": 1.9359046546546548, "grad_norm": 0.9008246566978121, "learning_rate": 3.367740521228875e-06, "loss": 0.3587, "step": 20629 }, { "epoch": 1.9359984984984986, "grad_norm": 0.925160970532783, "learning_rate": 3.3672244771594153e-06, "loss": 0.3803, "step": 20630 }, { "epoch": 1.9360923423423424, "grad_norm": 0.9428245288085659, "learning_rate": 3.3667084525577327e-06, "loss": 0.4143, "step": 20631 }, { "epoch": 1.9361861861861862, "grad_norm": 0.9961477737660189, "learning_rate": 3.3661924474299767e-06, "loss": 0.4105, "step": 20632 }, { "epoch": 1.93628003003003, "grad_norm": 0.9012632455278053, "learning_rate": 3.365676461782299e-06, "loss": 0.3965, "step": 20633 }, { "epoch": 1.9363738738738738, "grad_norm": 0.9404624307728896, "learning_rate": 3.365160495620855e-06, "loss": 0.4302, "step": 20634 }, { "epoch": 1.9364677177177176, "grad_norm": 1.1135585007399238, "learning_rate": 3.364644548951794e-06, "loss": 0.3953, "step": 20635 }, { "epoch": 1.9365615615615615, "grad_norm": 0.8754610989535133, "learning_rate": 3.3641286217812685e-06, "loss": 0.357, "step": 20636 }, { "epoch": 1.9366554054054053, "grad_norm": 1.150444056816668, "learning_rate": 3.3636127141154307e-06, "loss": 0.4059, "step": 20637 }, { "epoch": 1.9367492492492493, "grad_norm": 1.0637850480875692, "learning_rate": 3.3630968259604315e-06, "loss": 0.3903, "step": 20638 }, { "epoch": 1.936843093093093, "grad_norm": 1.0750995983294924, "learning_rate": 3.362580957322419e-06, "loss": 0.4063, "step": 20639 }, { "epoch": 1.936936936936937, "grad_norm": 0.9811264092573649, "learning_rate": 3.3620651082075485e-06, "loss": 0.3621, "step": 20640 }, { "epoch": 1.9370307807807807, "grad_norm": 0.8631857125148399, "learning_rate": 3.3615492786219684e-06, "loss": 0.3573, "step": 20641 }, { "epoch": 1.9371246246246248, "grad_norm": 0.9936961028606502, "learning_rate": 3.361033468571828e-06, "loss": 0.4134, "step": 20642 }, { "epoch": 1.9372184684684686, "grad_norm": 1.585723128172479, "learning_rate": 3.36051767806328e-06, "loss": 0.4046, "step": 20643 }, { "epoch": 1.9373123123123124, "grad_norm": 0.9199272861514332, "learning_rate": 3.360001907102472e-06, "loss": 0.4101, "step": 20644 }, { "epoch": 1.9374061561561562, "grad_norm": 0.9494311575561838, "learning_rate": 3.3594861556955526e-06, "loss": 0.3909, "step": 20645 }, { "epoch": 1.9375, "grad_norm": 0.8441035130568122, "learning_rate": 3.358970423848675e-06, "loss": 0.3772, "step": 20646 }, { "epoch": 1.9375938438438438, "grad_norm": 0.8629700739315571, "learning_rate": 3.3584547115679854e-06, "loss": 0.3615, "step": 20647 }, { "epoch": 1.9376876876876876, "grad_norm": 0.9529051822034509, "learning_rate": 3.3579390188596334e-06, "loss": 0.3999, "step": 20648 }, { "epoch": 1.9377815315315314, "grad_norm": 1.329774835359455, "learning_rate": 3.357423345729769e-06, "loss": 0.3858, "step": 20649 }, { "epoch": 1.9378753753753752, "grad_norm": 0.9811220136071462, "learning_rate": 3.3569076921845384e-06, "loss": 0.4286, "step": 20650 }, { "epoch": 1.9379692192192193, "grad_norm": 0.892752256068276, "learning_rate": 3.3563920582300892e-06, "loss": 0.3919, "step": 20651 }, { "epoch": 1.938063063063063, "grad_norm": 1.0193919515207126, "learning_rate": 3.355876443872573e-06, "loss": 0.4026, "step": 20652 }, { "epoch": 1.938156906906907, "grad_norm": 1.0151421487547856, "learning_rate": 3.3553608491181355e-06, "loss": 0.3832, "step": 20653 }, { "epoch": 1.9382507507507507, "grad_norm": 0.7902725691369693, "learning_rate": 3.3548452739729226e-06, "loss": 0.3617, "step": 20654 }, { "epoch": 1.9383445945945947, "grad_norm": 1.4545368492109982, "learning_rate": 3.3543297184430844e-06, "loss": 0.4289, "step": 20655 }, { "epoch": 1.9384384384384385, "grad_norm": 1.0499414959834472, "learning_rate": 3.353814182534767e-06, "loss": 0.3704, "step": 20656 }, { "epoch": 1.9385322822822824, "grad_norm": 0.882396633067186, "learning_rate": 3.3532986662541143e-06, "loss": 0.4033, "step": 20657 }, { "epoch": 1.9386261261261262, "grad_norm": 0.9525785508936746, "learning_rate": 3.3527831696072783e-06, "loss": 0.378, "step": 20658 }, { "epoch": 1.93871996996997, "grad_norm": 0.8532495088445983, "learning_rate": 3.3522676926004017e-06, "loss": 0.3706, "step": 20659 }, { "epoch": 1.9388138138138138, "grad_norm": 1.0990066019311655, "learning_rate": 3.35175223523963e-06, "loss": 0.4017, "step": 20660 }, { "epoch": 1.9389076576576576, "grad_norm": 1.044088603628147, "learning_rate": 3.3512367975311123e-06, "loss": 0.3976, "step": 20661 }, { "epoch": 1.9390015015015014, "grad_norm": 1.1061508951106427, "learning_rate": 3.3507213794809924e-06, "loss": 0.3938, "step": 20662 }, { "epoch": 1.9390953453453452, "grad_norm": 0.8680056897572326, "learning_rate": 3.3502059810954125e-06, "loss": 0.3885, "step": 20663 }, { "epoch": 1.939189189189189, "grad_norm": 1.1284808801827846, "learning_rate": 3.3496906023805235e-06, "loss": 0.4259, "step": 20664 }, { "epoch": 1.939283033033033, "grad_norm": 1.2478234539033006, "learning_rate": 3.349175243342468e-06, "loss": 0.4017, "step": 20665 }, { "epoch": 1.9393768768768769, "grad_norm": 1.0042706684505718, "learning_rate": 3.348659903987389e-06, "loss": 0.3867, "step": 20666 }, { "epoch": 1.9394707207207207, "grad_norm": 1.028653169470168, "learning_rate": 3.3481445843214335e-06, "loss": 0.3792, "step": 20667 }, { "epoch": 1.9395645645645647, "grad_norm": 0.934060340980592, "learning_rate": 3.347629284350744e-06, "loss": 0.3622, "step": 20668 }, { "epoch": 1.9396584084084085, "grad_norm": 0.9595291977382494, "learning_rate": 3.347114004081463e-06, "loss": 0.3808, "step": 20669 }, { "epoch": 1.9397522522522523, "grad_norm": 1.2489933666692477, "learning_rate": 3.346598743519739e-06, "loss": 0.3886, "step": 20670 }, { "epoch": 1.9398460960960962, "grad_norm": 0.8763293456892274, "learning_rate": 3.3460835026717125e-06, "loss": 0.383, "step": 20671 }, { "epoch": 1.93993993993994, "grad_norm": 0.9408930522287319, "learning_rate": 3.3455682815435265e-06, "loss": 0.3631, "step": 20672 }, { "epoch": 1.9400337837837838, "grad_norm": 0.8776180600315631, "learning_rate": 3.345053080141325e-06, "loss": 0.3632, "step": 20673 }, { "epoch": 1.9401276276276276, "grad_norm": 1.2493259057128072, "learning_rate": 3.34453789847125e-06, "loss": 0.414, "step": 20674 }, { "epoch": 1.9402214714714714, "grad_norm": 0.9109014592844171, "learning_rate": 3.344022736539443e-06, "loss": 0.4224, "step": 20675 }, { "epoch": 1.9403153153153152, "grad_norm": 1.099406280041746, "learning_rate": 3.3435075943520503e-06, "loss": 0.3819, "step": 20676 }, { "epoch": 1.940409159159159, "grad_norm": 1.3861402431671788, "learning_rate": 3.3429924719152105e-06, "loss": 0.4155, "step": 20677 }, { "epoch": 1.940503003003003, "grad_norm": 0.9663138796533319, "learning_rate": 3.3424773692350658e-06, "loss": 0.4102, "step": 20678 }, { "epoch": 1.9405968468468469, "grad_norm": 2.4321421589165824, "learning_rate": 3.34196228631776e-06, "loss": 0.4131, "step": 20679 }, { "epoch": 1.9406906906906907, "grad_norm": 0.848381113860035, "learning_rate": 3.3414472231694326e-06, "loss": 0.3783, "step": 20680 }, { "epoch": 1.9407845345345347, "grad_norm": 0.9105559638109753, "learning_rate": 3.340932179796223e-06, "loss": 0.3878, "step": 20681 }, { "epoch": 1.9408783783783785, "grad_norm": 0.9767473911874691, "learning_rate": 3.3404171562042766e-06, "loss": 0.383, "step": 20682 }, { "epoch": 1.9409722222222223, "grad_norm": 0.902281643468685, "learning_rate": 3.3399021523997315e-06, "loss": 0.3894, "step": 20683 }, { "epoch": 1.9410660660660661, "grad_norm": 1.3762868116428606, "learning_rate": 3.339387168388727e-06, "loss": 0.4041, "step": 20684 }, { "epoch": 1.94115990990991, "grad_norm": 1.0763081761943099, "learning_rate": 3.338872204177406e-06, "loss": 0.3733, "step": 20685 }, { "epoch": 1.9412537537537538, "grad_norm": 1.1863120169572605, "learning_rate": 3.3383572597719085e-06, "loss": 0.3913, "step": 20686 }, { "epoch": 1.9413475975975976, "grad_norm": 0.8538562796398089, "learning_rate": 3.33784233517837e-06, "loss": 0.3912, "step": 20687 }, { "epoch": 1.9414414414414414, "grad_norm": 0.9360321481514003, "learning_rate": 3.3373274304029352e-06, "loss": 0.3996, "step": 20688 }, { "epoch": 1.9415352852852852, "grad_norm": 0.9681409119099814, "learning_rate": 3.336812545451741e-06, "loss": 0.3761, "step": 20689 }, { "epoch": 1.941629129129129, "grad_norm": 1.0218726287071913, "learning_rate": 3.3362976803309245e-06, "loss": 0.4223, "step": 20690 }, { "epoch": 1.941722972972973, "grad_norm": 0.8262460343638807, "learning_rate": 3.3357828350466283e-06, "loss": 0.4002, "step": 20691 }, { "epoch": 1.9418168168168168, "grad_norm": 1.154225885714614, "learning_rate": 3.3352680096049894e-06, "loss": 0.4086, "step": 20692 }, { "epoch": 1.9419106606606606, "grad_norm": 3.5391767379621775, "learning_rate": 3.334753204012145e-06, "loss": 0.402, "step": 20693 }, { "epoch": 1.9420045045045045, "grad_norm": 1.029481086012671, "learning_rate": 3.334238418274235e-06, "loss": 0.4593, "step": 20694 }, { "epoch": 1.9420983483483485, "grad_norm": 0.9299450654517191, "learning_rate": 3.333723652397397e-06, "loss": 0.408, "step": 20695 }, { "epoch": 1.9421921921921923, "grad_norm": 1.0199663284665483, "learning_rate": 3.333208906387765e-06, "loss": 0.3858, "step": 20696 }, { "epoch": 1.9422860360360361, "grad_norm": 0.9979479450836976, "learning_rate": 3.3326941802514823e-06, "loss": 0.3938, "step": 20697 }, { "epoch": 1.94237987987988, "grad_norm": 1.1453011470410321, "learning_rate": 3.3321794739946822e-06, "loss": 0.3689, "step": 20698 }, { "epoch": 1.9424737237237237, "grad_norm": 1.0228336629464754, "learning_rate": 3.331664787623502e-06, "loss": 0.4499, "step": 20699 }, { "epoch": 1.9425675675675675, "grad_norm": 0.8002877992173639, "learning_rate": 3.33115012114408e-06, "loss": 0.4129, "step": 20700 }, { "epoch": 1.9426614114114114, "grad_norm": 2.4252161623981277, "learning_rate": 3.3306354745625515e-06, "loss": 0.3865, "step": 20701 }, { "epoch": 1.9427552552552552, "grad_norm": 0.9587185382565627, "learning_rate": 3.330120847885051e-06, "loss": 0.3842, "step": 20702 }, { "epoch": 1.942849099099099, "grad_norm": 0.879202788148623, "learning_rate": 3.3296062411177176e-06, "loss": 0.3448, "step": 20703 }, { "epoch": 1.9429429429429428, "grad_norm": 1.1372249798837377, "learning_rate": 3.3290916542666862e-06, "loss": 0.3912, "step": 20704 }, { "epoch": 1.9430367867867868, "grad_norm": 1.1849843348129068, "learning_rate": 3.3285770873380896e-06, "loss": 0.4494, "step": 20705 }, { "epoch": 1.9431306306306306, "grad_norm": 0.9891517669616052, "learning_rate": 3.3280625403380674e-06, "loss": 0.423, "step": 20706 }, { "epoch": 1.9432244744744744, "grad_norm": 1.1793738166048133, "learning_rate": 3.3275480132727513e-06, "loss": 0.4158, "step": 20707 }, { "epoch": 1.9433183183183185, "grad_norm": 0.8505496690991744, "learning_rate": 3.3270335061482755e-06, "loss": 0.3999, "step": 20708 }, { "epoch": 1.9434121621621623, "grad_norm": 0.8712125889760278, "learning_rate": 3.326519018970778e-06, "loss": 0.4022, "step": 20709 }, { "epoch": 1.943506006006006, "grad_norm": 0.9814159100531763, "learning_rate": 3.326004551746391e-06, "loss": 0.3614, "step": 20710 }, { "epoch": 1.94359984984985, "grad_norm": 0.9446801674727743, "learning_rate": 3.3254901044812484e-06, "loss": 0.3733, "step": 20711 }, { "epoch": 1.9436936936936937, "grad_norm": 1.0168379318602678, "learning_rate": 3.324975677181485e-06, "loss": 0.35, "step": 20712 }, { "epoch": 1.9437875375375375, "grad_norm": 0.9717302578515834, "learning_rate": 3.3244612698532338e-06, "loss": 0.4122, "step": 20713 }, { "epoch": 1.9438813813813813, "grad_norm": 1.0657124471555426, "learning_rate": 3.3239468825026256e-06, "loss": 0.3713, "step": 20714 }, { "epoch": 1.9439752252252251, "grad_norm": 0.8704739657823428, "learning_rate": 3.323432515135798e-06, "loss": 0.3745, "step": 20715 }, { "epoch": 1.944069069069069, "grad_norm": 1.2056766169439725, "learning_rate": 3.322918167758883e-06, "loss": 0.382, "step": 20716 }, { "epoch": 1.9441629129129128, "grad_norm": 1.1043285441659474, "learning_rate": 3.32240384037801e-06, "loss": 0.3713, "step": 20717 }, { "epoch": 1.9442567567567568, "grad_norm": 0.9578331424996117, "learning_rate": 3.321889532999315e-06, "loss": 0.4008, "step": 20718 }, { "epoch": 1.9443506006006006, "grad_norm": 0.8922870671043441, "learning_rate": 3.3213752456289283e-06, "loss": 0.4013, "step": 20719 }, { "epoch": 1.9444444444444444, "grad_norm": 0.9440242263682047, "learning_rate": 3.32086097827298e-06, "loss": 0.3937, "step": 20720 }, { "epoch": 1.9445382882882885, "grad_norm": 0.9447270833729661, "learning_rate": 3.320346730937606e-06, "loss": 0.3827, "step": 20721 }, { "epoch": 1.9446321321321323, "grad_norm": 0.9270715235928975, "learning_rate": 3.3198325036289358e-06, "loss": 0.3628, "step": 20722 }, { "epoch": 1.944725975975976, "grad_norm": 0.9436965577054492, "learning_rate": 3.319318296353099e-06, "loss": 0.3668, "step": 20723 }, { "epoch": 1.9448198198198199, "grad_norm": 0.9434501080965774, "learning_rate": 3.3188041091162293e-06, "loss": 0.411, "step": 20724 }, { "epoch": 1.9449136636636637, "grad_norm": 0.9128095788062404, "learning_rate": 3.3182899419244563e-06, "loss": 0.3644, "step": 20725 }, { "epoch": 1.9450075075075075, "grad_norm": 0.9362665717312044, "learning_rate": 3.3177757947839073e-06, "loss": 0.3861, "step": 20726 }, { "epoch": 1.9451013513513513, "grad_norm": 0.9422049888748983, "learning_rate": 3.3172616677007185e-06, "loss": 0.4163, "step": 20727 }, { "epoch": 1.9451951951951951, "grad_norm": 0.9348680492007029, "learning_rate": 3.3167475606810163e-06, "loss": 0.4123, "step": 20728 }, { "epoch": 1.945289039039039, "grad_norm": 0.9111010886430394, "learning_rate": 3.3162334737309304e-06, "loss": 0.3628, "step": 20729 }, { "epoch": 1.9453828828828827, "grad_norm": 0.9714652134782402, "learning_rate": 3.315719406856592e-06, "loss": 0.425, "step": 20730 }, { "epoch": 1.9454767267267268, "grad_norm": 1.2488568646350906, "learning_rate": 3.315205360064129e-06, "loss": 0.3586, "step": 20731 }, { "epoch": 1.9455705705705706, "grad_norm": 0.9967791580799392, "learning_rate": 3.314691333359669e-06, "loss": 0.4066, "step": 20732 }, { "epoch": 1.9456644144144144, "grad_norm": 1.0263651650552803, "learning_rate": 3.3141773267493443e-06, "loss": 0.4061, "step": 20733 }, { "epoch": 1.9457582582582582, "grad_norm": 1.0398246216216045, "learning_rate": 3.3136633402392814e-06, "loss": 0.4556, "step": 20734 }, { "epoch": 1.9458521021021022, "grad_norm": 0.9323083448578049, "learning_rate": 3.313149373835609e-06, "loss": 0.4136, "step": 20735 }, { "epoch": 1.945945945945946, "grad_norm": 0.9593284748714417, "learning_rate": 3.3126354275444556e-06, "loss": 0.3321, "step": 20736 }, { "epoch": 1.9460397897897899, "grad_norm": 0.8660136362695326, "learning_rate": 3.3121215013719487e-06, "loss": 0.3942, "step": 20737 }, { "epoch": 1.9461336336336337, "grad_norm": 0.8859193257595198, "learning_rate": 3.311607595324213e-06, "loss": 0.4316, "step": 20738 }, { "epoch": 1.9462274774774775, "grad_norm": 0.9521956241130987, "learning_rate": 3.311093709407382e-06, "loss": 0.4024, "step": 20739 }, { "epoch": 1.9463213213213213, "grad_norm": 0.8257584690285459, "learning_rate": 3.3105798436275794e-06, "loss": 0.4083, "step": 20740 }, { "epoch": 1.946415165165165, "grad_norm": 0.9946976270319733, "learning_rate": 3.31006599799093e-06, "loss": 0.4141, "step": 20741 }, { "epoch": 1.946509009009009, "grad_norm": 0.8875589859040542, "learning_rate": 3.309552172503564e-06, "loss": 0.4064, "step": 20742 }, { "epoch": 1.9466028528528527, "grad_norm": 0.910825940059204, "learning_rate": 3.309038367171607e-06, "loss": 0.3713, "step": 20743 }, { "epoch": 1.9466966966966965, "grad_norm": 0.9494225144346555, "learning_rate": 3.3085245820011822e-06, "loss": 0.3773, "step": 20744 }, { "epoch": 1.9467905405405406, "grad_norm": 0.9551083347335003, "learning_rate": 3.30801081699842e-06, "loss": 0.4052, "step": 20745 }, { "epoch": 1.9468843843843844, "grad_norm": 0.939493515698953, "learning_rate": 3.307497072169443e-06, "loss": 0.4309, "step": 20746 }, { "epoch": 1.9469782282282282, "grad_norm": 0.8531401533259682, "learning_rate": 3.306983347520378e-06, "loss": 0.3916, "step": 20747 }, { "epoch": 1.9470720720720722, "grad_norm": 0.8482682485891583, "learning_rate": 3.3064696430573506e-06, "loss": 0.3687, "step": 20748 }, { "epoch": 1.947165915915916, "grad_norm": 2.2975769007194304, "learning_rate": 3.3059559587864844e-06, "loss": 0.3888, "step": 20749 }, { "epoch": 1.9472597597597598, "grad_norm": 0.7978162227890263, "learning_rate": 3.305442294713903e-06, "loss": 0.4071, "step": 20750 }, { "epoch": 1.9473536036036037, "grad_norm": 1.0235813896144854, "learning_rate": 3.3049286508457347e-06, "loss": 0.401, "step": 20751 }, { "epoch": 1.9474474474474475, "grad_norm": 0.9955409743193613, "learning_rate": 3.304415027188101e-06, "loss": 0.3777, "step": 20752 }, { "epoch": 1.9475412912912913, "grad_norm": 0.8182975510708, "learning_rate": 3.303901423747126e-06, "loss": 0.3639, "step": 20753 }, { "epoch": 1.947635135135135, "grad_norm": 1.0409390042187474, "learning_rate": 3.3033878405289343e-06, "loss": 0.3983, "step": 20754 }, { "epoch": 1.947728978978979, "grad_norm": 0.957150411625971, "learning_rate": 3.30287427753965e-06, "loss": 0.3466, "step": 20755 }, { "epoch": 1.9478228228228227, "grad_norm": 1.0190052781279924, "learning_rate": 3.3023607347853927e-06, "loss": 0.3466, "step": 20756 }, { "epoch": 1.9479166666666665, "grad_norm": 1.0130573278150008, "learning_rate": 3.3018472122722906e-06, "loss": 0.4107, "step": 20757 }, { "epoch": 1.9480105105105106, "grad_norm": 0.8552122472190655, "learning_rate": 3.3013337100064634e-06, "loss": 0.3868, "step": 20758 }, { "epoch": 1.9481043543543544, "grad_norm": 0.9091899878638545, "learning_rate": 3.300820227994034e-06, "loss": 0.3952, "step": 20759 }, { "epoch": 1.9481981981981982, "grad_norm": 1.267999560363376, "learning_rate": 3.300306766241126e-06, "loss": 0.3601, "step": 20760 }, { "epoch": 1.9482920420420422, "grad_norm": 1.518700713515037, "learning_rate": 3.2997933247538607e-06, "loss": 0.354, "step": 20761 }, { "epoch": 1.948385885885886, "grad_norm": 0.9484522354158638, "learning_rate": 3.2992799035383567e-06, "loss": 0.4183, "step": 20762 }, { "epoch": 1.9484797297297298, "grad_norm": 0.9222420864793454, "learning_rate": 3.298766502600741e-06, "loss": 0.4294, "step": 20763 }, { "epoch": 1.9485735735735736, "grad_norm": 0.8805270630290759, "learning_rate": 3.2982531219471327e-06, "loss": 0.411, "step": 20764 }, { "epoch": 1.9486674174174174, "grad_norm": 1.0970736690834553, "learning_rate": 3.2977397615836514e-06, "loss": 0.3591, "step": 20765 }, { "epoch": 1.9487612612612613, "grad_norm": 0.9847078688731253, "learning_rate": 3.2972264215164207e-06, "loss": 0.4074, "step": 20766 }, { "epoch": 1.948855105105105, "grad_norm": 1.0265226305105855, "learning_rate": 3.2967131017515597e-06, "loss": 0.3967, "step": 20767 }, { "epoch": 1.9489489489489489, "grad_norm": 0.8850640135627891, "learning_rate": 3.296199802295187e-06, "loss": 0.3668, "step": 20768 }, { "epoch": 1.9490427927927927, "grad_norm": 1.0879988400781146, "learning_rate": 3.2956865231534264e-06, "loss": 0.3734, "step": 20769 }, { "epoch": 1.9491366366366365, "grad_norm": 1.0710630360706592, "learning_rate": 3.2951732643323954e-06, "loss": 0.4054, "step": 20770 }, { "epoch": 1.9492304804804805, "grad_norm": 1.0027850109880208, "learning_rate": 3.2946600258382138e-06, "loss": 0.3823, "step": 20771 }, { "epoch": 1.9493243243243243, "grad_norm": 1.0831170511726027, "learning_rate": 3.294146807677002e-06, "loss": 0.407, "step": 20772 }, { "epoch": 1.9494181681681682, "grad_norm": 0.8644089130418128, "learning_rate": 3.2936336098548794e-06, "loss": 0.3858, "step": 20773 }, { "epoch": 1.949512012012012, "grad_norm": 1.0094089157145292, "learning_rate": 3.293120432377961e-06, "loss": 0.4572, "step": 20774 }, { "epoch": 1.949605855855856, "grad_norm": 1.0537816094340071, "learning_rate": 3.292607275252372e-06, "loss": 0.3843, "step": 20775 }, { "epoch": 1.9496996996996998, "grad_norm": 0.9722126703231277, "learning_rate": 3.2920941384842266e-06, "loss": 0.3796, "step": 20776 }, { "epoch": 1.9497935435435436, "grad_norm": 0.7533926995030396, "learning_rate": 3.2915810220796422e-06, "loss": 0.3989, "step": 20777 }, { "epoch": 1.9498873873873874, "grad_norm": 0.9747470615091703, "learning_rate": 3.29106792604474e-06, "loss": 0.4014, "step": 20778 }, { "epoch": 1.9499812312312312, "grad_norm": 0.8987838822362677, "learning_rate": 3.290554850385636e-06, "loss": 0.337, "step": 20779 }, { "epoch": 1.950075075075075, "grad_norm": 1.0505374321700356, "learning_rate": 3.2900417951084456e-06, "loss": 0.4146, "step": 20780 }, { "epoch": 1.9501689189189189, "grad_norm": 0.9444029032028004, "learning_rate": 3.2895287602192906e-06, "loss": 0.4219, "step": 20781 }, { "epoch": 1.9502627627627627, "grad_norm": 0.8480205066210251, "learning_rate": 3.2890157457242848e-06, "loss": 0.3452, "step": 20782 }, { "epoch": 1.9503566066066065, "grad_norm": 1.208426585494867, "learning_rate": 3.2885027516295452e-06, "loss": 0.4072, "step": 20783 }, { "epoch": 1.9504504504504503, "grad_norm": 0.9510546199222415, "learning_rate": 3.2879897779411897e-06, "loss": 0.4112, "step": 20784 }, { "epoch": 1.9505442942942943, "grad_norm": 0.9014003341942766, "learning_rate": 3.2874768246653334e-06, "loss": 0.3511, "step": 20785 }, { "epoch": 1.9506381381381381, "grad_norm": 0.9192708543169612, "learning_rate": 3.28696389180809e-06, "loss": 0.3701, "step": 20786 }, { "epoch": 1.950731981981982, "grad_norm": 0.9347244772476981, "learning_rate": 3.2864509793755807e-06, "loss": 0.3484, "step": 20787 }, { "epoch": 1.950825825825826, "grad_norm": 1.2739158823394605, "learning_rate": 3.2859380873739167e-06, "loss": 0.4332, "step": 20788 }, { "epoch": 1.9509196696696698, "grad_norm": 0.9258485136248046, "learning_rate": 3.2854252158092146e-06, "loss": 0.3439, "step": 20789 }, { "epoch": 1.9510135135135136, "grad_norm": 1.0361392352631247, "learning_rate": 3.28491236468759e-06, "loss": 0.3732, "step": 20790 }, { "epoch": 1.9511073573573574, "grad_norm": 0.8336537986933095, "learning_rate": 3.284399534015158e-06, "loss": 0.3704, "step": 20791 }, { "epoch": 1.9512012012012012, "grad_norm": 1.1417798726709831, "learning_rate": 3.2838867237980294e-06, "loss": 0.4478, "step": 20792 }, { "epoch": 1.951295045045045, "grad_norm": 0.9161587471093341, "learning_rate": 3.2833739340423236e-06, "loss": 0.3967, "step": 20793 }, { "epoch": 1.9513888888888888, "grad_norm": 1.124036335386985, "learning_rate": 3.2828611647541525e-06, "loss": 0.4164, "step": 20794 }, { "epoch": 1.9514827327327327, "grad_norm": 1.031019250524597, "learning_rate": 3.282348415939629e-06, "loss": 0.382, "step": 20795 }, { "epoch": 1.9515765765765765, "grad_norm": 1.8639942656377952, "learning_rate": 3.2818356876048684e-06, "loss": 0.3796, "step": 20796 }, { "epoch": 1.9516704204204203, "grad_norm": 0.9373462259330407, "learning_rate": 3.2813229797559827e-06, "loss": 0.4265, "step": 20797 }, { "epoch": 1.9517642642642643, "grad_norm": 1.009492329742124, "learning_rate": 3.280810292399084e-06, "loss": 0.4359, "step": 20798 }, { "epoch": 1.9518581081081081, "grad_norm": 1.0487754781782321, "learning_rate": 3.2802976255402875e-06, "loss": 0.373, "step": 20799 }, { "epoch": 1.951951951951952, "grad_norm": 1.3540593080328067, "learning_rate": 3.2797849791857062e-06, "loss": 0.4209, "step": 20800 }, { "epoch": 1.952045795795796, "grad_norm": 0.9883264983625776, "learning_rate": 3.279272353341449e-06, "loss": 0.4097, "step": 20801 }, { "epoch": 1.9521396396396398, "grad_norm": 1.1612711179124826, "learning_rate": 3.2787597480136312e-06, "loss": 0.4149, "step": 20802 }, { "epoch": 1.9522334834834836, "grad_norm": 0.8576373572261321, "learning_rate": 3.278247163208364e-06, "loss": 0.3929, "step": 20803 }, { "epoch": 1.9523273273273274, "grad_norm": 1.1953589545937346, "learning_rate": 3.277734598931756e-06, "loss": 0.386, "step": 20804 }, { "epoch": 1.9524211711711712, "grad_norm": 1.0065103813955842, "learning_rate": 3.2772220551899236e-06, "loss": 0.4122, "step": 20805 }, { "epoch": 1.952515015015015, "grad_norm": 1.1074565620463002, "learning_rate": 3.276709531988975e-06, "loss": 0.4283, "step": 20806 }, { "epoch": 1.9526088588588588, "grad_norm": 0.9211576254491765, "learning_rate": 3.27619702933502e-06, "loss": 0.4008, "step": 20807 }, { "epoch": 1.9527027027027026, "grad_norm": 0.9238381117201934, "learning_rate": 3.2756845472341726e-06, "loss": 0.4125, "step": 20808 }, { "epoch": 1.9527965465465464, "grad_norm": 0.8846112847780566, "learning_rate": 3.275172085692541e-06, "loss": 0.3651, "step": 20809 }, { "epoch": 1.9528903903903903, "grad_norm": 0.9631637917509417, "learning_rate": 3.2746596447162336e-06, "loss": 0.3962, "step": 20810 }, { "epoch": 1.9529842342342343, "grad_norm": 0.9743867078969106, "learning_rate": 3.274147224311364e-06, "loss": 0.4081, "step": 20811 }, { "epoch": 1.953078078078078, "grad_norm": 1.1357312860712123, "learning_rate": 3.273634824484041e-06, "loss": 0.4265, "step": 20812 }, { "epoch": 1.953171921921922, "grad_norm": 0.91285775891937, "learning_rate": 3.2731224452403702e-06, "loss": 0.3631, "step": 20813 }, { "epoch": 1.9532657657657657, "grad_norm": 1.0531095243360502, "learning_rate": 3.2726100865864663e-06, "loss": 0.3767, "step": 20814 }, { "epoch": 1.9533596096096097, "grad_norm": 0.9203514256930253, "learning_rate": 3.2720977485284353e-06, "loss": 0.3757, "step": 20815 }, { "epoch": 1.9534534534534536, "grad_norm": 1.47215906787547, "learning_rate": 3.271585431072386e-06, "loss": 0.3866, "step": 20816 }, { "epoch": 1.9535472972972974, "grad_norm": 1.3704747256297836, "learning_rate": 3.2710731342244274e-06, "loss": 0.3652, "step": 20817 }, { "epoch": 1.9536411411411412, "grad_norm": 7.861912399161531, "learning_rate": 3.2705608579906667e-06, "loss": 0.3965, "step": 20818 }, { "epoch": 1.953734984984985, "grad_norm": 0.9873933591521925, "learning_rate": 3.2700486023772117e-06, "loss": 0.421, "step": 20819 }, { "epoch": 1.9538288288288288, "grad_norm": 0.9389818801749201, "learning_rate": 3.269536367390172e-06, "loss": 0.3652, "step": 20820 }, { "epoch": 1.9539226726726726, "grad_norm": 0.8678072965908877, "learning_rate": 3.2690241530356535e-06, "loss": 0.3819, "step": 20821 }, { "epoch": 1.9540165165165164, "grad_norm": 1.269988402277195, "learning_rate": 3.2685119593197634e-06, "loss": 0.4319, "step": 20822 }, { "epoch": 1.9541103603603602, "grad_norm": 0.8783935878347798, "learning_rate": 3.2679997862486103e-06, "loss": 0.3935, "step": 20823 }, { "epoch": 1.954204204204204, "grad_norm": 0.8807804835672068, "learning_rate": 3.267487633828299e-06, "loss": 0.3761, "step": 20824 }, { "epoch": 1.954298048048048, "grad_norm": 1.4981705296228316, "learning_rate": 3.2669755020649353e-06, "loss": 0.4272, "step": 20825 }, { "epoch": 1.9543918918918919, "grad_norm": 1.003362105207485, "learning_rate": 3.2664633909646278e-06, "loss": 0.3892, "step": 20826 }, { "epoch": 1.9544857357357357, "grad_norm": 1.0247293733337264, "learning_rate": 3.2659513005334814e-06, "loss": 0.4398, "step": 20827 }, { "epoch": 1.9545795795795797, "grad_norm": 1.1273510649792422, "learning_rate": 3.2654392307776007e-06, "loss": 0.4006, "step": 20828 }, { "epoch": 1.9546734234234235, "grad_norm": 1.2254644851082586, "learning_rate": 3.2649271817030938e-06, "loss": 0.416, "step": 20829 }, { "epoch": 1.9547672672672673, "grad_norm": 0.9262144188990712, "learning_rate": 3.264415153316064e-06, "loss": 0.387, "step": 20830 }, { "epoch": 1.9548611111111112, "grad_norm": 1.0936209738294163, "learning_rate": 3.2639031456226146e-06, "loss": 0.419, "step": 20831 }, { "epoch": 1.954954954954955, "grad_norm": 0.9814698557446092, "learning_rate": 3.2633911586288536e-06, "loss": 0.3905, "step": 20832 }, { "epoch": 1.9550487987987988, "grad_norm": 0.9770274217387617, "learning_rate": 3.2628791923408855e-06, "loss": 0.4159, "step": 20833 }, { "epoch": 1.9551426426426426, "grad_norm": 0.8562633241056278, "learning_rate": 3.2623672467648113e-06, "loss": 0.3673, "step": 20834 }, { "epoch": 1.9552364864864864, "grad_norm": 0.9096944715163091, "learning_rate": 3.2618553219067383e-06, "loss": 0.4029, "step": 20835 }, { "epoch": 1.9553303303303302, "grad_norm": 1.108351275631341, "learning_rate": 3.2613434177727688e-06, "loss": 0.3758, "step": 20836 }, { "epoch": 1.955424174174174, "grad_norm": 0.9712457935796306, "learning_rate": 3.2608315343690044e-06, "loss": 0.3755, "step": 20837 }, { "epoch": 1.955518018018018, "grad_norm": 1.0353636905768557, "learning_rate": 3.2603196717015523e-06, "loss": 0.3858, "step": 20838 }, { "epoch": 1.9556118618618619, "grad_norm": 0.8898084166428459, "learning_rate": 3.2598078297765133e-06, "loss": 0.3835, "step": 20839 }, { "epoch": 1.9557057057057057, "grad_norm": 0.9172609793714713, "learning_rate": 3.2592960085999892e-06, "loss": 0.3797, "step": 20840 }, { "epoch": 1.9557995495495497, "grad_norm": 1.0004235027727284, "learning_rate": 3.2587842081780852e-06, "loss": 0.3909, "step": 20841 }, { "epoch": 1.9558933933933935, "grad_norm": 1.041606632517533, "learning_rate": 3.258272428516902e-06, "loss": 0.4096, "step": 20842 }, { "epoch": 1.9559872372372373, "grad_norm": 0.8455709151029932, "learning_rate": 3.2577606696225395e-06, "loss": 0.3182, "step": 20843 }, { "epoch": 1.9560810810810811, "grad_norm": 1.0810121138398296, "learning_rate": 3.2572489315011035e-06, "loss": 0.3879, "step": 20844 }, { "epoch": 1.956174924924925, "grad_norm": 0.8940190059857817, "learning_rate": 3.2567372141586935e-06, "loss": 0.3811, "step": 20845 }, { "epoch": 1.9562687687687688, "grad_norm": 1.1333564823347981, "learning_rate": 3.2562255176014103e-06, "loss": 0.3582, "step": 20846 }, { "epoch": 1.9563626126126126, "grad_norm": 0.9017812515218572, "learning_rate": 3.2557138418353555e-06, "loss": 0.3505, "step": 20847 }, { "epoch": 1.9564564564564564, "grad_norm": 1.0175129317023213, "learning_rate": 3.2552021868666306e-06, "loss": 0.4054, "step": 20848 }, { "epoch": 1.9565503003003002, "grad_norm": 1.1931428225826706, "learning_rate": 3.2546905527013335e-06, "loss": 0.3386, "step": 20849 }, { "epoch": 1.956644144144144, "grad_norm": 1.0152882241196428, "learning_rate": 3.254178939345568e-06, "loss": 0.3628, "step": 20850 }, { "epoch": 1.956737987987988, "grad_norm": 1.0600268746426411, "learning_rate": 3.2536673468054324e-06, "loss": 0.4236, "step": 20851 }, { "epoch": 1.9568318318318318, "grad_norm": 1.035204042703916, "learning_rate": 3.2531557750870257e-06, "loss": 0.4389, "step": 20852 }, { "epoch": 1.9569256756756757, "grad_norm": 1.0369226112560535, "learning_rate": 3.2526442241964497e-06, "loss": 0.4157, "step": 20853 }, { "epoch": 1.9570195195195195, "grad_norm": 0.9991086189865838, "learning_rate": 3.252132694139802e-06, "loss": 0.4211, "step": 20854 }, { "epoch": 1.9571133633633635, "grad_norm": 0.8456332594684961, "learning_rate": 3.251621184923179e-06, "loss": 0.4024, "step": 20855 }, { "epoch": 1.9572072072072073, "grad_norm": 0.9182951920444331, "learning_rate": 3.2511096965526857e-06, "loss": 0.3811, "step": 20856 }, { "epoch": 1.9573010510510511, "grad_norm": 0.9435173054523635, "learning_rate": 3.250598229034417e-06, "loss": 0.3923, "step": 20857 }, { "epoch": 1.957394894894895, "grad_norm": 0.8128708228499764, "learning_rate": 3.2500867823744704e-06, "loss": 0.3935, "step": 20858 }, { "epoch": 1.9574887387387387, "grad_norm": 0.9524375785388249, "learning_rate": 3.2495753565789455e-06, "loss": 0.4128, "step": 20859 }, { "epoch": 1.9575825825825826, "grad_norm": 1.9367404203574259, "learning_rate": 3.249063951653941e-06, "loss": 0.3766, "step": 20860 }, { "epoch": 1.9576764264264264, "grad_norm": 0.9678892079567394, "learning_rate": 3.2485525676055495e-06, "loss": 0.3744, "step": 20861 }, { "epoch": 1.9577702702702702, "grad_norm": 0.9867292627722177, "learning_rate": 3.2480412044398744e-06, "loss": 0.4104, "step": 20862 }, { "epoch": 1.957864114114114, "grad_norm": 1.1708271793689122, "learning_rate": 3.247529862163011e-06, "loss": 0.3823, "step": 20863 }, { "epoch": 1.9579579579579578, "grad_norm": 0.9946389658962588, "learning_rate": 3.247018540781054e-06, "loss": 0.4171, "step": 20864 }, { "epoch": 1.9580518018018018, "grad_norm": 0.9571245050305734, "learning_rate": 3.246507240300102e-06, "loss": 0.3618, "step": 20865 }, { "epoch": 1.9581456456456456, "grad_norm": 0.9307627509296242, "learning_rate": 3.2459959607262504e-06, "loss": 0.3817, "step": 20866 }, { "epoch": 1.9582394894894894, "grad_norm": 0.9674434718404811, "learning_rate": 3.2454847020655933e-06, "loss": 0.4121, "step": 20867 }, { "epoch": 1.9583333333333335, "grad_norm": 0.812905804274034, "learning_rate": 3.2449734643242304e-06, "loss": 0.3673, "step": 20868 }, { "epoch": 1.9584271771771773, "grad_norm": 0.9130127313764157, "learning_rate": 3.244462247508256e-06, "loss": 0.3712, "step": 20869 }, { "epoch": 1.958521021021021, "grad_norm": 0.8935670259620732, "learning_rate": 3.2439510516237637e-06, "loss": 0.3688, "step": 20870 }, { "epoch": 1.958614864864865, "grad_norm": 4.401264992778986, "learning_rate": 3.2434398766768504e-06, "loss": 0.4127, "step": 20871 }, { "epoch": 1.9587087087087087, "grad_norm": 1.0144154494374271, "learning_rate": 3.2429287226736107e-06, "loss": 0.3791, "step": 20872 }, { "epoch": 1.9588025525525525, "grad_norm": 1.2187684427954242, "learning_rate": 3.242417589620136e-06, "loss": 0.3848, "step": 20873 }, { "epoch": 1.9588963963963963, "grad_norm": 0.9740382408746547, "learning_rate": 3.241906477522526e-06, "loss": 0.3464, "step": 20874 }, { "epoch": 1.9589902402402402, "grad_norm": 0.8645426690693012, "learning_rate": 3.2413953863868717e-06, "loss": 0.4243, "step": 20875 }, { "epoch": 1.959084084084084, "grad_norm": 1.02161846513409, "learning_rate": 3.240884316219266e-06, "loss": 0.3788, "step": 20876 }, { "epoch": 1.9591779279279278, "grad_norm": 1.1222555132191274, "learning_rate": 3.240373267025805e-06, "loss": 0.3761, "step": 20877 }, { "epoch": 1.9592717717717718, "grad_norm": 0.9388387403990075, "learning_rate": 3.23986223881258e-06, "loss": 0.4083, "step": 20878 }, { "epoch": 1.9593656156156156, "grad_norm": 1.1473931551612968, "learning_rate": 3.2393512315856827e-06, "loss": 0.4021, "step": 20879 }, { "epoch": 1.9594594594594594, "grad_norm": 1.1315357451390418, "learning_rate": 3.2388402453512104e-06, "loss": 0.4035, "step": 20880 }, { "epoch": 1.9595533033033035, "grad_norm": 0.9417680340131511, "learning_rate": 3.238329280115253e-06, "loss": 0.3672, "step": 20881 }, { "epoch": 1.9596471471471473, "grad_norm": 1.0768473600909167, "learning_rate": 3.237818335883902e-06, "loss": 0.3816, "step": 20882 }, { "epoch": 1.959740990990991, "grad_norm": 1.1919123458162508, "learning_rate": 3.237307412663251e-06, "loss": 0.398, "step": 20883 }, { "epoch": 1.959834834834835, "grad_norm": 0.9017725049419724, "learning_rate": 3.236796510459391e-06, "loss": 0.3765, "step": 20884 }, { "epoch": 1.9599286786786787, "grad_norm": 1.1099632043924632, "learning_rate": 3.2362856292784126e-06, "loss": 0.3903, "step": 20885 }, { "epoch": 1.9600225225225225, "grad_norm": 0.9296860664047025, "learning_rate": 3.2357747691264098e-06, "loss": 0.4106, "step": 20886 }, { "epoch": 1.9601163663663663, "grad_norm": 0.8660023165937261, "learning_rate": 3.2352639300094724e-06, "loss": 0.3694, "step": 20887 }, { "epoch": 1.9602102102102101, "grad_norm": 0.9268993781900655, "learning_rate": 3.2347531119336893e-06, "loss": 0.3747, "step": 20888 }, { "epoch": 1.960304054054054, "grad_norm": 1.0238133834130543, "learning_rate": 3.2342423149051537e-06, "loss": 0.4067, "step": 20889 }, { "epoch": 1.9603978978978978, "grad_norm": 0.9743825849108158, "learning_rate": 3.2337315389299553e-06, "loss": 0.3833, "step": 20890 }, { "epoch": 1.9604917417417418, "grad_norm": 1.0330184302099288, "learning_rate": 3.2332207840141804e-06, "loss": 0.4095, "step": 20891 }, { "epoch": 1.9605855855855856, "grad_norm": 1.0171238296281988, "learning_rate": 3.2327100501639252e-06, "loss": 0.4049, "step": 20892 }, { "epoch": 1.9606794294294294, "grad_norm": 1.0723331649587726, "learning_rate": 3.2321993373852757e-06, "loss": 0.4233, "step": 20893 }, { "epoch": 1.9607732732732732, "grad_norm": 0.92094804611856, "learning_rate": 3.231688645684321e-06, "loss": 0.3671, "step": 20894 }, { "epoch": 1.9608671171171173, "grad_norm": 1.0502123826401668, "learning_rate": 3.2311779750671514e-06, "loss": 0.4219, "step": 20895 }, { "epoch": 1.960960960960961, "grad_norm": 0.9320831594043856, "learning_rate": 3.2306673255398545e-06, "loss": 0.3904, "step": 20896 }, { "epoch": 1.9610548048048049, "grad_norm": 0.8993323499614967, "learning_rate": 3.2301566971085176e-06, "loss": 0.393, "step": 20897 }, { "epoch": 1.9611486486486487, "grad_norm": 1.1917721173549178, "learning_rate": 3.2296460897792324e-06, "loss": 0.3837, "step": 20898 }, { "epoch": 1.9612424924924925, "grad_norm": 0.988075659353183, "learning_rate": 3.229135503558086e-06, "loss": 0.4126, "step": 20899 }, { "epoch": 1.9613363363363363, "grad_norm": 0.9783972956186863, "learning_rate": 3.2286249384511644e-06, "loss": 0.3755, "step": 20900 }, { "epoch": 1.9614301801801801, "grad_norm": 1.2562498737678103, "learning_rate": 3.2281143944645565e-06, "loss": 0.412, "step": 20901 }, { "epoch": 1.961524024024024, "grad_norm": 0.9016684020127118, "learning_rate": 3.2276038716043495e-06, "loss": 0.4005, "step": 20902 }, { "epoch": 1.9616178678678677, "grad_norm": 0.8419970536419982, "learning_rate": 3.2270933698766282e-06, "loss": 0.3572, "step": 20903 }, { "epoch": 1.9617117117117115, "grad_norm": 1.1662511678863066, "learning_rate": 3.226582889287483e-06, "loss": 0.3637, "step": 20904 }, { "epoch": 1.9618055555555556, "grad_norm": 2.4071738786301533, "learning_rate": 3.226072429842999e-06, "loss": 0.3899, "step": 20905 }, { "epoch": 1.9618993993993994, "grad_norm": 1.008940240692758, "learning_rate": 3.225561991549261e-06, "loss": 0.4052, "step": 20906 }, { "epoch": 1.9619932432432432, "grad_norm": 1.0682449904487812, "learning_rate": 3.225051574412358e-06, "loss": 0.375, "step": 20907 }, { "epoch": 1.9620870870870872, "grad_norm": 0.9954043329732501, "learning_rate": 3.224541178438373e-06, "loss": 0.4208, "step": 20908 }, { "epoch": 1.962180930930931, "grad_norm": 0.8731116270022958, "learning_rate": 3.2240308036333913e-06, "loss": 0.3865, "step": 20909 }, { "epoch": 1.9622747747747749, "grad_norm": 1.8537891858629016, "learning_rate": 3.2235204500035004e-06, "loss": 0.3802, "step": 20910 }, { "epoch": 1.9623686186186187, "grad_norm": 0.8775327410866527, "learning_rate": 3.2230101175547846e-06, "loss": 0.3272, "step": 20911 }, { "epoch": 1.9624624624624625, "grad_norm": 1.01217077466363, "learning_rate": 3.2224998062933275e-06, "loss": 0.4011, "step": 20912 }, { "epoch": 1.9625563063063063, "grad_norm": 1.165222377747834, "learning_rate": 3.221989516225216e-06, "loss": 0.3809, "step": 20913 }, { "epoch": 1.96265015015015, "grad_norm": 1.0369502926206207, "learning_rate": 3.2214792473565316e-06, "loss": 0.4123, "step": 20914 }, { "epoch": 1.962743993993994, "grad_norm": 0.9091078885630188, "learning_rate": 3.220968999693358e-06, "loss": 0.3872, "step": 20915 }, { "epoch": 1.9628378378378377, "grad_norm": 1.067966071715171, "learning_rate": 3.2204587732417823e-06, "loss": 0.4219, "step": 20916 }, { "epoch": 1.9629316816816815, "grad_norm": 0.9353560508543584, "learning_rate": 3.219948568007886e-06, "loss": 0.4014, "step": 20917 }, { "epoch": 1.9630255255255256, "grad_norm": 0.8986911185394458, "learning_rate": 3.2194383839977515e-06, "loss": 0.3912, "step": 20918 }, { "epoch": 1.9631193693693694, "grad_norm": 0.8570886618688695, "learning_rate": 3.2189282212174643e-06, "loss": 0.3915, "step": 20919 }, { "epoch": 1.9632132132132132, "grad_norm": 0.86975765778845, "learning_rate": 3.2184180796731045e-06, "loss": 0.3945, "step": 20920 }, { "epoch": 1.9633070570570572, "grad_norm": 2.1070738074107664, "learning_rate": 3.2179079593707547e-06, "loss": 0.4206, "step": 20921 }, { "epoch": 1.963400900900901, "grad_norm": 0.8960564222254459, "learning_rate": 3.217397860316499e-06, "loss": 0.4083, "step": 20922 }, { "epoch": 1.9634947447447448, "grad_norm": 1.0510006848156013, "learning_rate": 3.216887782516419e-06, "loss": 0.3964, "step": 20923 }, { "epoch": 1.9635885885885886, "grad_norm": 1.036437786066818, "learning_rate": 3.216377725976595e-06, "loss": 0.3374, "step": 20924 }, { "epoch": 1.9636824324324325, "grad_norm": 0.8420750088950186, "learning_rate": 3.21586769070311e-06, "loss": 0.3899, "step": 20925 }, { "epoch": 1.9637762762762763, "grad_norm": 0.9304332126526839, "learning_rate": 3.2153576767020446e-06, "loss": 0.3934, "step": 20926 }, { "epoch": 1.96387012012012, "grad_norm": 0.8709470821310119, "learning_rate": 3.2148476839794775e-06, "loss": 0.3918, "step": 20927 }, { "epoch": 1.9639639639639639, "grad_norm": 0.9483044749869302, "learning_rate": 3.2143377125414936e-06, "loss": 0.4163, "step": 20928 }, { "epoch": 1.9640578078078077, "grad_norm": 1.0713043701466352, "learning_rate": 3.2138277623941716e-06, "loss": 0.3268, "step": 20929 }, { "epoch": 1.9641516516516515, "grad_norm": 0.8982923627049835, "learning_rate": 3.213317833543589e-06, "loss": 0.3783, "step": 20930 }, { "epoch": 1.9642454954954955, "grad_norm": 0.9925017453531927, "learning_rate": 3.212807925995831e-06, "loss": 0.4059, "step": 20931 }, { "epoch": 1.9643393393393394, "grad_norm": 0.9593303869587714, "learning_rate": 3.212298039756974e-06, "loss": 0.37, "step": 20932 }, { "epoch": 1.9644331831831832, "grad_norm": 0.9962334454991446, "learning_rate": 3.211788174833096e-06, "loss": 0.3633, "step": 20933 }, { "epoch": 1.964527027027027, "grad_norm": 1.1590359364500968, "learning_rate": 3.21127833123028e-06, "loss": 0.3882, "step": 20934 }, { "epoch": 1.964620870870871, "grad_norm": 0.9270764666519252, "learning_rate": 3.2107685089546027e-06, "loss": 0.3835, "step": 20935 }, { "epoch": 1.9647147147147148, "grad_norm": 0.9776919273724434, "learning_rate": 3.2102587080121407e-06, "loss": 0.4008, "step": 20936 }, { "epoch": 1.9648085585585586, "grad_norm": 1.5172069811464701, "learning_rate": 3.2097489284089767e-06, "loss": 0.4184, "step": 20937 }, { "epoch": 1.9649024024024024, "grad_norm": 1.0238242637579575, "learning_rate": 3.2092391701511883e-06, "loss": 0.3852, "step": 20938 }, { "epoch": 1.9649962462462462, "grad_norm": 0.9003958228282443, "learning_rate": 3.2087294332448495e-06, "loss": 0.401, "step": 20939 }, { "epoch": 1.96509009009009, "grad_norm": 1.1369202739508628, "learning_rate": 3.2082197176960416e-06, "loss": 0.3673, "step": 20940 }, { "epoch": 1.9651839339339339, "grad_norm": 1.1432356736033649, "learning_rate": 3.2077100235108416e-06, "loss": 0.3553, "step": 20941 }, { "epoch": 1.9652777777777777, "grad_norm": 2.2072691181075124, "learning_rate": 3.2072003506953236e-06, "loss": 0.3591, "step": 20942 }, { "epoch": 1.9653716216216215, "grad_norm": 0.9434179770746467, "learning_rate": 3.2066906992555693e-06, "loss": 0.3959, "step": 20943 }, { "epoch": 1.9654654654654653, "grad_norm": 1.0448394146008313, "learning_rate": 3.206181069197652e-06, "loss": 0.3573, "step": 20944 }, { "epoch": 1.9655593093093093, "grad_norm": 0.8740950699134148, "learning_rate": 3.2056714605276485e-06, "loss": 0.3981, "step": 20945 }, { "epoch": 1.9656531531531531, "grad_norm": 1.0973121826302425, "learning_rate": 3.2051618732516364e-06, "loss": 0.3873, "step": 20946 }, { "epoch": 1.965746996996997, "grad_norm": 1.2365523999932253, "learning_rate": 3.2046523073756903e-06, "loss": 0.3676, "step": 20947 }, { "epoch": 1.965840840840841, "grad_norm": 1.0971610254062658, "learning_rate": 3.2041427629058842e-06, "loss": 0.3575, "step": 20948 }, { "epoch": 1.9659346846846848, "grad_norm": 0.9828254958495238, "learning_rate": 3.2036332398482972e-06, "loss": 0.3676, "step": 20949 }, { "epoch": 1.9660285285285286, "grad_norm": 0.9146290016442775, "learning_rate": 3.203123738209002e-06, "loss": 0.4261, "step": 20950 }, { "epoch": 1.9661223723723724, "grad_norm": 1.6501029545049337, "learning_rate": 3.202614257994073e-06, "loss": 0.4276, "step": 20951 }, { "epoch": 1.9662162162162162, "grad_norm": 1.5323893470363394, "learning_rate": 3.2021047992095877e-06, "loss": 0.4209, "step": 20952 }, { "epoch": 1.96631006006006, "grad_norm": 0.9320394585748085, "learning_rate": 3.201595361861618e-06, "loss": 0.3601, "step": 20953 }, { "epoch": 1.9664039039039038, "grad_norm": 1.3880024962712176, "learning_rate": 3.2010859459562366e-06, "loss": 0.3507, "step": 20954 }, { "epoch": 1.9664977477477477, "grad_norm": 0.8317080784107398, "learning_rate": 3.2005765514995212e-06, "loss": 0.3554, "step": 20955 }, { "epoch": 1.9665915915915915, "grad_norm": 1.0139454829249461, "learning_rate": 3.2000671784975427e-06, "loss": 0.3383, "step": 20956 }, { "epoch": 1.9666854354354353, "grad_norm": 0.9145841776274267, "learning_rate": 3.1995578269563743e-06, "loss": 0.3501, "step": 20957 }, { "epoch": 1.9667792792792793, "grad_norm": 1.121769802273827, "learning_rate": 3.199048496882091e-06, "loss": 0.4012, "step": 20958 }, { "epoch": 1.9668731231231231, "grad_norm": 1.0746409183963592, "learning_rate": 3.198539188280765e-06, "loss": 0.4106, "step": 20959 }, { "epoch": 1.966966966966967, "grad_norm": 0.9170501674643601, "learning_rate": 3.198029901158466e-06, "loss": 0.4054, "step": 20960 }, { "epoch": 1.967060810810811, "grad_norm": 0.8053541130450103, "learning_rate": 3.1975206355212703e-06, "loss": 0.3505, "step": 20961 }, { "epoch": 1.9671546546546548, "grad_norm": 1.1639072838413327, "learning_rate": 3.1970113913752483e-06, "loss": 0.4062, "step": 20962 }, { "epoch": 1.9672484984984986, "grad_norm": 0.9812703550309968, "learning_rate": 3.196502168726471e-06, "loss": 0.4152, "step": 20963 }, { "epoch": 1.9673423423423424, "grad_norm": 0.9628548726082636, "learning_rate": 3.195992967581011e-06, "loss": 0.3975, "step": 20964 }, { "epoch": 1.9674361861861862, "grad_norm": 0.9570166593259077, "learning_rate": 3.1954837879449393e-06, "loss": 0.3576, "step": 20965 }, { "epoch": 1.96753003003003, "grad_norm": 1.5964917372005318, "learning_rate": 3.1949746298243245e-06, "loss": 0.426, "step": 20966 }, { "epoch": 1.9676238738738738, "grad_norm": 0.865894721186402, "learning_rate": 3.1944654932252423e-06, "loss": 0.4084, "step": 20967 }, { "epoch": 1.9677177177177176, "grad_norm": 2.200413504537943, "learning_rate": 3.1939563781537595e-06, "loss": 0.4015, "step": 20968 }, { "epoch": 1.9678115615615615, "grad_norm": 1.042014230669028, "learning_rate": 3.1934472846159477e-06, "loss": 0.3791, "step": 20969 }, { "epoch": 1.9679054054054053, "grad_norm": 1.4783586149881216, "learning_rate": 3.1929382126178763e-06, "loss": 0.392, "step": 20970 }, { "epoch": 1.9679992492492493, "grad_norm": 0.868762051751484, "learning_rate": 3.192429162165617e-06, "loss": 0.3932, "step": 20971 }, { "epoch": 1.968093093093093, "grad_norm": 1.0352610733759486, "learning_rate": 3.191920133265234e-06, "loss": 0.3759, "step": 20972 }, { "epoch": 1.968186936936937, "grad_norm": 0.8814029184785375, "learning_rate": 3.1914111259228027e-06, "loss": 0.3729, "step": 20973 }, { "epoch": 1.9682807807807807, "grad_norm": 0.8306201729026369, "learning_rate": 3.190902140144389e-06, "loss": 0.3896, "step": 20974 }, { "epoch": 1.9683746246246248, "grad_norm": 0.9127123489149048, "learning_rate": 3.190393175936061e-06, "loss": 0.4464, "step": 20975 }, { "epoch": 1.9684684684684686, "grad_norm": 0.9448884658450565, "learning_rate": 3.189884233303889e-06, "loss": 0.3852, "step": 20976 }, { "epoch": 1.9685623123123124, "grad_norm": 0.8830152811394599, "learning_rate": 3.189375312253941e-06, "loss": 0.3609, "step": 20977 }, { "epoch": 1.9686561561561562, "grad_norm": 1.1272194751668834, "learning_rate": 3.188866412792282e-06, "loss": 0.3964, "step": 20978 }, { "epoch": 1.96875, "grad_norm": 1.2566121334510345, "learning_rate": 3.188357534924984e-06, "loss": 0.3404, "step": 20979 }, { "epoch": 1.9688438438438438, "grad_norm": 1.5032302262155213, "learning_rate": 3.187848678658112e-06, "loss": 0.3846, "step": 20980 }, { "epoch": 1.9689376876876876, "grad_norm": 0.9565100343165998, "learning_rate": 3.1873398439977326e-06, "loss": 0.3834, "step": 20981 }, { "epoch": 1.9690315315315314, "grad_norm": 1.812821734385052, "learning_rate": 3.186831030949915e-06, "loss": 0.4116, "step": 20982 }, { "epoch": 1.9691253753753752, "grad_norm": 1.1476618629525523, "learning_rate": 3.186322239520724e-06, "loss": 0.386, "step": 20983 }, { "epoch": 1.9692192192192193, "grad_norm": 1.4163021236987232, "learning_rate": 3.1858134697162245e-06, "loss": 0.4028, "step": 20984 }, { "epoch": 1.969313063063063, "grad_norm": 1.3565611020123445, "learning_rate": 3.185304721542486e-06, "loss": 0.3738, "step": 20985 }, { "epoch": 1.969406906906907, "grad_norm": 0.8647253151898514, "learning_rate": 3.184795995005574e-06, "loss": 0.4019, "step": 20986 }, { "epoch": 1.9695007507507507, "grad_norm": 1.0657777258348393, "learning_rate": 3.184287290111551e-06, "loss": 0.3102, "step": 20987 }, { "epoch": 1.9695945945945947, "grad_norm": 1.0773167193153592, "learning_rate": 3.1837786068664855e-06, "loss": 0.3912, "step": 20988 }, { "epoch": 1.9696884384384385, "grad_norm": 0.9613974030481792, "learning_rate": 3.1832699452764417e-06, "loss": 0.4076, "step": 20989 }, { "epoch": 1.9697822822822824, "grad_norm": 0.9055577378987143, "learning_rate": 3.182761305347482e-06, "loss": 0.3291, "step": 20990 }, { "epoch": 1.9698761261261262, "grad_norm": 1.0126415719622914, "learning_rate": 3.1822526870856753e-06, "loss": 0.4254, "step": 20991 }, { "epoch": 1.96996996996997, "grad_norm": 1.089053581274512, "learning_rate": 3.181744090497084e-06, "loss": 0.3796, "step": 20992 }, { "epoch": 1.9700638138138138, "grad_norm": 2.3952031557580615, "learning_rate": 3.1812355155877705e-06, "loss": 0.4027, "step": 20993 }, { "epoch": 1.9701576576576576, "grad_norm": 0.9403283233986142, "learning_rate": 3.1807269623638003e-06, "loss": 0.3556, "step": 20994 }, { "epoch": 1.9702515015015014, "grad_norm": 1.1260881507970155, "learning_rate": 3.1802184308312377e-06, "loss": 0.3776, "step": 20995 }, { "epoch": 1.9703453453453452, "grad_norm": 1.0192933030266424, "learning_rate": 3.1797099209961423e-06, "loss": 0.3709, "step": 20996 }, { "epoch": 1.970439189189189, "grad_norm": 0.9300425627388054, "learning_rate": 3.1792014328645827e-06, "loss": 0.3909, "step": 20997 }, { "epoch": 1.970533033033033, "grad_norm": 0.88739385397283, "learning_rate": 3.178692966442618e-06, "loss": 0.3687, "step": 20998 }, { "epoch": 1.9706268768768769, "grad_norm": 0.9419905719459502, "learning_rate": 3.1781845217363105e-06, "loss": 0.356, "step": 20999 }, { "epoch": 1.9707207207207207, "grad_norm": 0.8127835086236049, "learning_rate": 3.177676098751724e-06, "loss": 0.4094, "step": 21000 }, { "epoch": 1.9708145645645647, "grad_norm": 0.8237710658969551, "learning_rate": 3.177167697494921e-06, "loss": 0.349, "step": 21001 }, { "epoch": 1.9709084084084085, "grad_norm": 0.9487569176323247, "learning_rate": 3.1766593179719586e-06, "loss": 0.3843, "step": 21002 }, { "epoch": 1.9710022522522523, "grad_norm": 0.8366504766764565, "learning_rate": 3.176150960188905e-06, "loss": 0.3461, "step": 21003 }, { "epoch": 1.9710960960960962, "grad_norm": 1.0489667384972963, "learning_rate": 3.1756426241518173e-06, "loss": 0.4313, "step": 21004 }, { "epoch": 1.97118993993994, "grad_norm": 1.0135740068991437, "learning_rate": 3.1751343098667563e-06, "loss": 0.3594, "step": 21005 }, { "epoch": 1.9712837837837838, "grad_norm": 0.8726903034060168, "learning_rate": 3.1746260173397855e-06, "loss": 0.3298, "step": 21006 }, { "epoch": 1.9713776276276276, "grad_norm": 0.9574107818684375, "learning_rate": 3.174117746576963e-06, "loss": 0.3832, "step": 21007 }, { "epoch": 1.9714714714714714, "grad_norm": 0.9776642500858815, "learning_rate": 3.1736094975843473e-06, "loss": 0.3741, "step": 21008 }, { "epoch": 1.9715653153153152, "grad_norm": 1.0841724416136234, "learning_rate": 3.173101270368003e-06, "loss": 0.3974, "step": 21009 }, { "epoch": 1.971659159159159, "grad_norm": 0.9874357701777688, "learning_rate": 3.172593064933987e-06, "loss": 0.4518, "step": 21010 }, { "epoch": 1.971753003003003, "grad_norm": 0.9667138598796675, "learning_rate": 3.1720848812883577e-06, "loss": 0.4175, "step": 21011 }, { "epoch": 1.9718468468468469, "grad_norm": 0.9203971064738294, "learning_rate": 3.171576719437177e-06, "loss": 0.3109, "step": 21012 }, { "epoch": 1.9719406906906907, "grad_norm": 0.9074905968866297, "learning_rate": 3.171068579386502e-06, "loss": 0.3647, "step": 21013 }, { "epoch": 1.9720345345345347, "grad_norm": 0.8522163852280814, "learning_rate": 3.1705604611423894e-06, "loss": 0.4003, "step": 21014 }, { "epoch": 1.9721283783783785, "grad_norm": 1.0027745171298694, "learning_rate": 3.170052364710902e-06, "loss": 0.3757, "step": 21015 }, { "epoch": 1.9722222222222223, "grad_norm": 2.0489653445222658, "learning_rate": 3.1695442900980956e-06, "loss": 0.3492, "step": 21016 }, { "epoch": 1.9723160660660661, "grad_norm": 3.5824225066785953, "learning_rate": 3.169036237310027e-06, "loss": 0.4023, "step": 21017 }, { "epoch": 1.97240990990991, "grad_norm": 0.9206201841395063, "learning_rate": 3.168528206352756e-06, "loss": 0.4254, "step": 21018 }, { "epoch": 1.9725037537537538, "grad_norm": 1.0006331776394248, "learning_rate": 3.168020197232339e-06, "loss": 0.4331, "step": 21019 }, { "epoch": 1.9725975975975976, "grad_norm": 1.0401555712073765, "learning_rate": 3.1675122099548305e-06, "loss": 0.3957, "step": 21020 }, { "epoch": 1.9726914414414414, "grad_norm": 0.9693409985079965, "learning_rate": 3.1670042445262917e-06, "loss": 0.421, "step": 21021 }, { "epoch": 1.9727852852852852, "grad_norm": 1.0676857493362477, "learning_rate": 3.166496300952776e-06, "loss": 0.4003, "step": 21022 }, { "epoch": 1.972879129129129, "grad_norm": 1.3112314407569141, "learning_rate": 3.165988379240341e-06, "loss": 0.4117, "step": 21023 }, { "epoch": 1.972972972972973, "grad_norm": 0.9681451016486535, "learning_rate": 3.1654804793950427e-06, "loss": 0.3459, "step": 21024 }, { "epoch": 1.9730668168168168, "grad_norm": 1.0205300975728882, "learning_rate": 3.164972601422937e-06, "loss": 0.3866, "step": 21025 }, { "epoch": 1.9731606606606606, "grad_norm": 1.4668128118949293, "learning_rate": 3.164464745330077e-06, "loss": 0.4342, "step": 21026 }, { "epoch": 1.9732545045045045, "grad_norm": 1.661931400831183, "learning_rate": 3.163956911122521e-06, "loss": 0.4102, "step": 21027 }, { "epoch": 1.9733483483483485, "grad_norm": 0.9672081089793239, "learning_rate": 3.1634490988063238e-06, "loss": 0.3927, "step": 21028 }, { "epoch": 1.9734421921921923, "grad_norm": 0.8290708334537847, "learning_rate": 3.162941308387538e-06, "loss": 0.3304, "step": 21029 }, { "epoch": 1.9735360360360361, "grad_norm": 1.0269639323125015, "learning_rate": 3.1624335398722193e-06, "loss": 0.3585, "step": 21030 }, { "epoch": 1.97362987987988, "grad_norm": 1.0558997616989305, "learning_rate": 3.161925793266423e-06, "loss": 0.3863, "step": 21031 }, { "epoch": 1.9737237237237237, "grad_norm": 0.8959626489256712, "learning_rate": 3.161418068576199e-06, "loss": 0.3649, "step": 21032 }, { "epoch": 1.9738175675675675, "grad_norm": 0.9227968811534114, "learning_rate": 3.1609103658076064e-06, "loss": 0.3887, "step": 21033 }, { "epoch": 1.9739114114114114, "grad_norm": 1.0561077850379235, "learning_rate": 3.1604026849666957e-06, "loss": 0.4369, "step": 21034 }, { "epoch": 1.9740052552552552, "grad_norm": 0.8605151398201177, "learning_rate": 3.159895026059519e-06, "loss": 0.3933, "step": 21035 }, { "epoch": 1.974099099099099, "grad_norm": 0.9053472123123858, "learning_rate": 3.159387389092132e-06, "loss": 0.385, "step": 21036 }, { "epoch": 1.9741929429429428, "grad_norm": 0.9583783555154923, "learning_rate": 3.158879774070585e-06, "loss": 0.4304, "step": 21037 }, { "epoch": 1.9742867867867868, "grad_norm": 0.9129220247521309, "learning_rate": 3.1583721810009295e-06, "loss": 0.381, "step": 21038 }, { "epoch": 1.9743806306306306, "grad_norm": 0.9780024002315032, "learning_rate": 3.1578646098892217e-06, "loss": 0.4227, "step": 21039 }, { "epoch": 1.9744744744744744, "grad_norm": 0.9169901878953257, "learning_rate": 3.1573570607415106e-06, "loss": 0.3421, "step": 21040 }, { "epoch": 1.9745683183183185, "grad_norm": 0.8527775011608049, "learning_rate": 3.156849533563847e-06, "loss": 0.3888, "step": 21041 }, { "epoch": 1.9746621621621623, "grad_norm": 0.8642109479805636, "learning_rate": 3.156342028362284e-06, "loss": 0.4176, "step": 21042 }, { "epoch": 1.974756006006006, "grad_norm": 0.955179068446192, "learning_rate": 3.155834545142873e-06, "loss": 0.4032, "step": 21043 }, { "epoch": 1.97484984984985, "grad_norm": 0.9683122167148795, "learning_rate": 3.1553270839116613e-06, "loss": 0.3929, "step": 21044 }, { "epoch": 1.9749436936936937, "grad_norm": 0.8972388560587798, "learning_rate": 3.1548196446747036e-06, "loss": 0.3902, "step": 21045 }, { "epoch": 1.9750375375375375, "grad_norm": 1.0138600143568306, "learning_rate": 3.154312227438049e-06, "loss": 0.3816, "step": 21046 }, { "epoch": 1.9751313813813813, "grad_norm": 0.9283963518425087, "learning_rate": 3.1538048322077454e-06, "loss": 0.3888, "step": 21047 }, { "epoch": 1.9752252252252251, "grad_norm": 0.8424621525234481, "learning_rate": 3.1532974589898447e-06, "loss": 0.3928, "step": 21048 }, { "epoch": 1.975319069069069, "grad_norm": 0.9553631841807674, "learning_rate": 3.1527901077903964e-06, "loss": 0.4021, "step": 21049 }, { "epoch": 1.9754129129129128, "grad_norm": 1.106440038466254, "learning_rate": 3.152282778615447e-06, "loss": 0.3971, "step": 21050 }, { "epoch": 1.9755067567567568, "grad_norm": 0.8964292530942887, "learning_rate": 3.151775471471049e-06, "loss": 0.3975, "step": 21051 }, { "epoch": 1.9756006006006006, "grad_norm": 1.5504490719809385, "learning_rate": 3.1512681863632503e-06, "loss": 0.4306, "step": 21052 }, { "epoch": 1.9756944444444444, "grad_norm": 0.9928074531104296, "learning_rate": 3.150760923298096e-06, "loss": 0.3892, "step": 21053 }, { "epoch": 1.9757882882882885, "grad_norm": 1.1245190283483972, "learning_rate": 3.1502536822816406e-06, "loss": 0.3902, "step": 21054 }, { "epoch": 1.9758821321321323, "grad_norm": 0.8124820672669906, "learning_rate": 3.149746463319927e-06, "loss": 0.3668, "step": 21055 }, { "epoch": 1.975975975975976, "grad_norm": 0.9876784516079558, "learning_rate": 3.149239266419002e-06, "loss": 0.3798, "step": 21056 }, { "epoch": 1.9760698198198199, "grad_norm": 1.085828883149265, "learning_rate": 3.148732091584916e-06, "loss": 0.3714, "step": 21057 }, { "epoch": 1.9761636636636637, "grad_norm": 0.8745657057214219, "learning_rate": 3.148224938823716e-06, "loss": 0.3669, "step": 21058 }, { "epoch": 1.9762575075075075, "grad_norm": 1.0056244949329587, "learning_rate": 3.1477178081414466e-06, "loss": 0.3767, "step": 21059 }, { "epoch": 1.9763513513513513, "grad_norm": 0.970861228676062, "learning_rate": 3.1472106995441578e-06, "loss": 0.4037, "step": 21060 }, { "epoch": 1.9764451951951951, "grad_norm": 1.0231941580403703, "learning_rate": 3.1467036130378935e-06, "loss": 0.3516, "step": 21061 }, { "epoch": 1.976539039039039, "grad_norm": 0.943379532431651, "learning_rate": 3.146196548628698e-06, "loss": 0.3981, "step": 21062 }, { "epoch": 1.9766328828828827, "grad_norm": 0.854189884500183, "learning_rate": 3.1456895063226218e-06, "loss": 0.4192, "step": 21063 }, { "epoch": 1.9767267267267268, "grad_norm": 0.9412604830157473, "learning_rate": 3.1451824861257075e-06, "loss": 0.3655, "step": 21064 }, { "epoch": 1.9768205705705706, "grad_norm": 1.0142899769396012, "learning_rate": 3.1446754880439976e-06, "loss": 0.3496, "step": 21065 }, { "epoch": 1.9769144144144144, "grad_norm": 0.8896520198484784, "learning_rate": 3.1441685120835437e-06, "loss": 0.4081, "step": 21066 }, { "epoch": 1.9770082582582582, "grad_norm": 0.8854684107732056, "learning_rate": 3.143661558250386e-06, "loss": 0.3744, "step": 21067 }, { "epoch": 1.9771021021021022, "grad_norm": 1.1942061082201578, "learning_rate": 3.1431546265505695e-06, "loss": 0.3887, "step": 21068 }, { "epoch": 1.977195945945946, "grad_norm": 0.8800113680532002, "learning_rate": 3.1426477169901405e-06, "loss": 0.3712, "step": 21069 }, { "epoch": 1.9772897897897899, "grad_norm": 0.8794079355905225, "learning_rate": 3.1421408295751406e-06, "loss": 0.3765, "step": 21070 }, { "epoch": 1.9773836336336337, "grad_norm": 0.8388287725519111, "learning_rate": 3.141633964311612e-06, "loss": 0.3684, "step": 21071 }, { "epoch": 1.9774774774774775, "grad_norm": 0.8747775885829393, "learning_rate": 3.1411271212056027e-06, "loss": 0.3429, "step": 21072 }, { "epoch": 1.9775713213213213, "grad_norm": 0.9482493513527384, "learning_rate": 3.140620300263154e-06, "loss": 0.3544, "step": 21073 }, { "epoch": 1.977665165165165, "grad_norm": 0.8588028385753809, "learning_rate": 3.140113501490307e-06, "loss": 0.38, "step": 21074 }, { "epoch": 1.977759009009009, "grad_norm": 1.7715067163772904, "learning_rate": 3.1396067248931063e-06, "loss": 0.3812, "step": 21075 }, { "epoch": 1.9778528528528527, "grad_norm": 0.8908429694831634, "learning_rate": 3.139099970477594e-06, "loss": 0.3631, "step": 21076 }, { "epoch": 1.9779466966966965, "grad_norm": 0.8969810373824578, "learning_rate": 3.1385932382498096e-06, "loss": 0.4013, "step": 21077 }, { "epoch": 1.9780405405405406, "grad_norm": 0.9651585503643656, "learning_rate": 3.138086528215799e-06, "loss": 0.4152, "step": 21078 }, { "epoch": 1.9781343843843844, "grad_norm": 1.0154933579074832, "learning_rate": 3.1375798403816014e-06, "loss": 0.3998, "step": 21079 }, { "epoch": 1.9782282282282282, "grad_norm": 0.83070836725453, "learning_rate": 3.1370731747532587e-06, "loss": 0.3516, "step": 21080 }, { "epoch": 1.9783220720720722, "grad_norm": 0.9630239276496766, "learning_rate": 3.136566531336812e-06, "loss": 0.3949, "step": 21081 }, { "epoch": 1.978415915915916, "grad_norm": 1.082356104398653, "learning_rate": 3.1360599101383014e-06, "loss": 0.3846, "step": 21082 }, { "epoch": 1.9785097597597598, "grad_norm": 0.9604477564243936, "learning_rate": 3.135553311163767e-06, "loss": 0.3547, "step": 21083 }, { "epoch": 1.9786036036036037, "grad_norm": 1.1310173217012556, "learning_rate": 3.1350467344192516e-06, "loss": 0.3773, "step": 21084 }, { "epoch": 1.9786974474474475, "grad_norm": 1.025664904719893, "learning_rate": 3.1345401799107933e-06, "loss": 0.3843, "step": 21085 }, { "epoch": 1.9787912912912913, "grad_norm": 0.8496350104942489, "learning_rate": 3.134033647644431e-06, "loss": 0.3743, "step": 21086 }, { "epoch": 1.978885135135135, "grad_norm": 1.1234656137789574, "learning_rate": 3.1335271376262053e-06, "loss": 0.406, "step": 21087 }, { "epoch": 1.978978978978979, "grad_norm": 0.825855881389025, "learning_rate": 3.1330206498621565e-06, "loss": 0.3471, "step": 21088 }, { "epoch": 1.9790728228228227, "grad_norm": 1.0511257048486369, "learning_rate": 3.1325141843583194e-06, "loss": 0.3943, "step": 21089 }, { "epoch": 1.9791666666666665, "grad_norm": 1.0667179127098123, "learning_rate": 3.1320077411207385e-06, "loss": 0.3721, "step": 21090 }, { "epoch": 1.9792605105105106, "grad_norm": 0.9841850744224134, "learning_rate": 3.131501320155448e-06, "loss": 0.4257, "step": 21091 }, { "epoch": 1.9793543543543544, "grad_norm": 0.9424450875659858, "learning_rate": 3.1309949214684866e-06, "loss": 0.4029, "step": 21092 }, { "epoch": 1.9794481981981982, "grad_norm": 0.8601849168063602, "learning_rate": 3.130488545065894e-06, "loss": 0.3728, "step": 21093 }, { "epoch": 1.9795420420420422, "grad_norm": 0.8803794274831974, "learning_rate": 3.1299821909537067e-06, "loss": 0.3676, "step": 21094 }, { "epoch": 1.979635885885886, "grad_norm": 0.8643946254532482, "learning_rate": 3.129475859137959e-06, "loss": 0.3985, "step": 21095 }, { "epoch": 1.9797297297297298, "grad_norm": 0.9349630429434129, "learning_rate": 3.128969549624693e-06, "loss": 0.406, "step": 21096 }, { "epoch": 1.9798235735735736, "grad_norm": 0.8762501210450638, "learning_rate": 3.128463262419943e-06, "loss": 0.4004, "step": 21097 }, { "epoch": 1.9799174174174174, "grad_norm": 0.9451092740965545, "learning_rate": 3.127956997529745e-06, "loss": 0.3796, "step": 21098 }, { "epoch": 1.9800112612612613, "grad_norm": 0.899100617837317, "learning_rate": 3.1274507549601373e-06, "loss": 0.3728, "step": 21099 }, { "epoch": 1.980105105105105, "grad_norm": 0.8831333784494123, "learning_rate": 3.126944534717155e-06, "loss": 0.4109, "step": 21100 }, { "epoch": 1.9801989489489489, "grad_norm": 1.2375235283133559, "learning_rate": 3.1264383368068306e-06, "loss": 0.4085, "step": 21101 }, { "epoch": 1.9802927927927927, "grad_norm": 0.9942400771269282, "learning_rate": 3.1259321612352055e-06, "loss": 0.3716, "step": 21102 }, { "epoch": 1.9803866366366365, "grad_norm": 0.9914794084152776, "learning_rate": 3.125426008008311e-06, "loss": 0.3943, "step": 21103 }, { "epoch": 1.9804804804804805, "grad_norm": 0.9411319626321402, "learning_rate": 3.124919877132182e-06, "loss": 0.3986, "step": 21104 }, { "epoch": 1.9805743243243243, "grad_norm": 0.9539089811301612, "learning_rate": 3.124413768612855e-06, "loss": 0.3842, "step": 21105 }, { "epoch": 1.9806681681681682, "grad_norm": 0.9614773011836611, "learning_rate": 3.123907682456363e-06, "loss": 0.4238, "step": 21106 }, { "epoch": 1.980762012012012, "grad_norm": 1.2354290005268622, "learning_rate": 3.1234016186687386e-06, "loss": 0.4173, "step": 21107 }, { "epoch": 1.980855855855856, "grad_norm": 1.2229393396052315, "learning_rate": 3.1228955772560205e-06, "loss": 0.3294, "step": 21108 }, { "epoch": 1.9809496996996998, "grad_norm": 0.9259150559829697, "learning_rate": 3.122389558224239e-06, "loss": 0.3681, "step": 21109 }, { "epoch": 1.9810435435435436, "grad_norm": 0.967234597268066, "learning_rate": 3.121883561579426e-06, "loss": 0.4025, "step": 21110 }, { "epoch": 1.9811373873873874, "grad_norm": 0.8960929411079904, "learning_rate": 3.121377587327618e-06, "loss": 0.3965, "step": 21111 }, { "epoch": 1.9812312312312312, "grad_norm": 1.048935768733468, "learning_rate": 3.120871635474846e-06, "loss": 0.4049, "step": 21112 }, { "epoch": 1.981325075075075, "grad_norm": 0.9790593016669659, "learning_rate": 3.1203657060271408e-06, "loss": 0.3647, "step": 21113 }, { "epoch": 1.9814189189189189, "grad_norm": 0.9515661192891793, "learning_rate": 3.1198597989905377e-06, "loss": 0.3407, "step": 21114 }, { "epoch": 1.9815127627627627, "grad_norm": 0.8801481038871332, "learning_rate": 3.1193539143710682e-06, "loss": 0.413, "step": 21115 }, { "epoch": 1.9816066066066065, "grad_norm": 0.8805620702947671, "learning_rate": 3.1188480521747623e-06, "loss": 0.3677, "step": 21116 }, { "epoch": 1.9817004504504503, "grad_norm": 0.8837515376238939, "learning_rate": 3.1183422124076533e-06, "loss": 0.3987, "step": 21117 }, { "epoch": 1.9817942942942943, "grad_norm": 0.9990507030779331, "learning_rate": 3.1178363950757718e-06, "loss": 0.4009, "step": 21118 }, { "epoch": 1.9818881381381381, "grad_norm": 1.0785400434381465, "learning_rate": 3.117330600185147e-06, "loss": 0.4356, "step": 21119 }, { "epoch": 1.981981981981982, "grad_norm": 1.0311032785320864, "learning_rate": 3.116824827741812e-06, "loss": 0.4101, "step": 21120 }, { "epoch": 1.982075825825826, "grad_norm": 0.8985604744562182, "learning_rate": 3.116319077751796e-06, "loss": 0.3972, "step": 21121 }, { "epoch": 1.9821696696696698, "grad_norm": 1.0078519579167777, "learning_rate": 3.1158133502211296e-06, "loss": 0.4194, "step": 21122 }, { "epoch": 1.9822635135135136, "grad_norm": 3.974902647304459, "learning_rate": 3.1153076451558424e-06, "loss": 0.3881, "step": 21123 }, { "epoch": 1.9823573573573574, "grad_norm": 0.9826969758673776, "learning_rate": 3.114801962561964e-06, "loss": 0.4353, "step": 21124 }, { "epoch": 1.9824512012012012, "grad_norm": 0.9687978627812196, "learning_rate": 3.114296302445522e-06, "loss": 0.376, "step": 21125 }, { "epoch": 1.982545045045045, "grad_norm": 0.9938912367912491, "learning_rate": 3.113790664812549e-06, "loss": 0.3756, "step": 21126 }, { "epoch": 1.9826388888888888, "grad_norm": 0.856271150662734, "learning_rate": 3.1132850496690724e-06, "loss": 0.3638, "step": 21127 }, { "epoch": 1.9827327327327327, "grad_norm": 0.9864996966102297, "learning_rate": 3.112779457021119e-06, "loss": 0.3365, "step": 21128 }, { "epoch": 1.9828265765765765, "grad_norm": 1.121419551461249, "learning_rate": 3.1122738868747184e-06, "loss": 0.3914, "step": 21129 }, { "epoch": 1.9829204204204203, "grad_norm": 1.0852231084323771, "learning_rate": 3.1117683392358987e-06, "loss": 0.3809, "step": 21130 }, { "epoch": 1.9830142642642643, "grad_norm": 1.0822632306548763, "learning_rate": 3.1112628141106858e-06, "loss": 0.4098, "step": 21131 }, { "epoch": 1.9831081081081081, "grad_norm": 0.9728269331354057, "learning_rate": 3.110757311505111e-06, "loss": 0.3685, "step": 21132 }, { "epoch": 1.983201951951952, "grad_norm": 0.8706887553282909, "learning_rate": 3.1102518314251982e-06, "loss": 0.3713, "step": 21133 }, { "epoch": 1.983295795795796, "grad_norm": 0.9225020338249984, "learning_rate": 3.109746373876975e-06, "loss": 0.441, "step": 21134 }, { "epoch": 1.9833896396396398, "grad_norm": 1.2193610049653025, "learning_rate": 3.1092409388664693e-06, "loss": 0.3829, "step": 21135 }, { "epoch": 1.9834834834834836, "grad_norm": 0.8703197417827307, "learning_rate": 3.1087355263997054e-06, "loss": 0.3999, "step": 21136 }, { "epoch": 1.9835773273273274, "grad_norm": 0.8701374156433488, "learning_rate": 3.1082301364827095e-06, "loss": 0.4019, "step": 21137 }, { "epoch": 1.9836711711711712, "grad_norm": 0.894568034082219, "learning_rate": 3.1077247691215097e-06, "loss": 0.3689, "step": 21138 }, { "epoch": 1.983765015015015, "grad_norm": 0.8158436084957865, "learning_rate": 3.1072194243221303e-06, "loss": 0.3642, "step": 21139 }, { "epoch": 1.9838588588588588, "grad_norm": 0.8827067390123124, "learning_rate": 3.1067141020905956e-06, "loss": 0.437, "step": 21140 }, { "epoch": 1.9839527027027026, "grad_norm": 0.8632905764819072, "learning_rate": 3.1062088024329325e-06, "loss": 0.3764, "step": 21141 }, { "epoch": 1.9840465465465464, "grad_norm": 0.9634681644373888, "learning_rate": 3.105703525355165e-06, "loss": 0.3464, "step": 21142 }, { "epoch": 1.9841403903903903, "grad_norm": 0.9901975838006367, "learning_rate": 3.1051982708633144e-06, "loss": 0.3836, "step": 21143 }, { "epoch": 1.9842342342342343, "grad_norm": 0.9521082441089658, "learning_rate": 3.1046930389634107e-06, "loss": 0.3845, "step": 21144 }, { "epoch": 1.984328078078078, "grad_norm": 2.1996893370189663, "learning_rate": 3.1041878296614748e-06, "loss": 0.4022, "step": 21145 }, { "epoch": 1.984421921921922, "grad_norm": 0.9504857414584744, "learning_rate": 3.103682642963529e-06, "loss": 0.4034, "step": 21146 }, { "epoch": 1.9845157657657657, "grad_norm": 0.8807054849790881, "learning_rate": 3.1031774788755996e-06, "loss": 0.3542, "step": 21147 }, { "epoch": 1.9846096096096097, "grad_norm": 1.0581087860683152, "learning_rate": 3.102672337403708e-06, "loss": 0.4016, "step": 21148 }, { "epoch": 1.9847034534534536, "grad_norm": 1.0373055145815084, "learning_rate": 3.1021672185538765e-06, "loss": 0.3749, "step": 21149 }, { "epoch": 1.9847972972972974, "grad_norm": 1.0385071437389937, "learning_rate": 3.1016621223321296e-06, "loss": 0.4092, "step": 21150 }, { "epoch": 1.9848911411411412, "grad_norm": 0.9760583896545245, "learning_rate": 3.1011570487444887e-06, "loss": 0.4172, "step": 21151 }, { "epoch": 1.984984984984985, "grad_norm": 0.9801580234875812, "learning_rate": 3.100651997796975e-06, "loss": 0.3644, "step": 21152 }, { "epoch": 1.9850788288288288, "grad_norm": 0.9139424284758235, "learning_rate": 3.1001469694956123e-06, "loss": 0.3797, "step": 21153 }, { "epoch": 1.9851726726726726, "grad_norm": 1.1184194939495138, "learning_rate": 3.0996419638464208e-06, "loss": 0.3729, "step": 21154 }, { "epoch": 1.9852665165165164, "grad_norm": 1.000655069594861, "learning_rate": 3.0991369808554194e-06, "loss": 0.3838, "step": 21155 }, { "epoch": 1.9853603603603602, "grad_norm": 0.9772445771710879, "learning_rate": 3.098632020528634e-06, "loss": 0.4166, "step": 21156 }, { "epoch": 1.985454204204204, "grad_norm": 1.1593937642217504, "learning_rate": 3.098127082872083e-06, "loss": 0.4459, "step": 21157 }, { "epoch": 1.985548048048048, "grad_norm": 0.9300600016661413, "learning_rate": 3.097622167891785e-06, "loss": 0.3606, "step": 21158 }, { "epoch": 1.9856418918918919, "grad_norm": 1.127590648771841, "learning_rate": 3.097117275593763e-06, "loss": 0.4013, "step": 21159 }, { "epoch": 1.9857357357357357, "grad_norm": 0.8956354610236562, "learning_rate": 3.0966124059840356e-06, "loss": 0.4051, "step": 21160 }, { "epoch": 1.9858295795795797, "grad_norm": 0.9803811999640244, "learning_rate": 3.0961075590686206e-06, "loss": 0.3455, "step": 21161 }, { "epoch": 1.9859234234234235, "grad_norm": 1.039192945901374, "learning_rate": 3.0956027348535416e-06, "loss": 0.4061, "step": 21162 }, { "epoch": 1.9860172672672673, "grad_norm": 1.0727022352780506, "learning_rate": 3.095097933344815e-06, "loss": 0.3964, "step": 21163 }, { "epoch": 1.9861111111111112, "grad_norm": 0.8221550926213147, "learning_rate": 3.094593154548459e-06, "loss": 0.3747, "step": 21164 }, { "epoch": 1.986204954954955, "grad_norm": 0.9561113818709762, "learning_rate": 3.094088398470494e-06, "loss": 0.4329, "step": 21165 }, { "epoch": 1.9862987987987988, "grad_norm": 0.9440523892784379, "learning_rate": 3.093583665116937e-06, "loss": 0.3546, "step": 21166 }, { "epoch": 1.9863926426426426, "grad_norm": 1.028147454016659, "learning_rate": 3.093078954493805e-06, "loss": 0.4274, "step": 21167 }, { "epoch": 1.9864864864864864, "grad_norm": 1.025517799784353, "learning_rate": 3.092574266607119e-06, "loss": 0.3767, "step": 21168 }, { "epoch": 1.9865803303303302, "grad_norm": 1.1821195937395566, "learning_rate": 3.0920696014628935e-06, "loss": 0.3671, "step": 21169 }, { "epoch": 1.986674174174174, "grad_norm": 1.174951366719084, "learning_rate": 3.091564959067147e-06, "loss": 0.3639, "step": 21170 }, { "epoch": 1.986768018018018, "grad_norm": 1.0368051353809224, "learning_rate": 3.0910603394258964e-06, "loss": 0.4259, "step": 21171 }, { "epoch": 1.9868618618618619, "grad_norm": 0.8815955891763656, "learning_rate": 3.0905557425451583e-06, "loss": 0.3745, "step": 21172 }, { "epoch": 1.9869557057057057, "grad_norm": 0.8927966158444623, "learning_rate": 3.0900511684309464e-06, "loss": 0.3875, "step": 21173 }, { "epoch": 1.9870495495495497, "grad_norm": 0.8912048570940175, "learning_rate": 3.0895466170892818e-06, "loss": 0.3608, "step": 21174 }, { "epoch": 1.9871433933933935, "grad_norm": 1.1570585566487395, "learning_rate": 3.0890420885261775e-06, "loss": 0.3899, "step": 21175 }, { "epoch": 1.9872372372372373, "grad_norm": 1.1833288747830404, "learning_rate": 3.088537582747647e-06, "loss": 0.38, "step": 21176 }, { "epoch": 1.9873310810810811, "grad_norm": 0.9316473844574313, "learning_rate": 3.088033099759711e-06, "loss": 0.3807, "step": 21177 }, { "epoch": 1.987424924924925, "grad_norm": 0.8653420220195212, "learning_rate": 3.0875286395683806e-06, "loss": 0.3582, "step": 21178 }, { "epoch": 1.9875187687687688, "grad_norm": 0.9834384464434694, "learning_rate": 3.0870242021796694e-06, "loss": 0.4014, "step": 21179 }, { "epoch": 1.9876126126126126, "grad_norm": 0.9903786156676772, "learning_rate": 3.0865197875995955e-06, "loss": 0.3751, "step": 21180 }, { "epoch": 1.9877064564564564, "grad_norm": 0.9313469015045877, "learning_rate": 3.086015395834171e-06, "loss": 0.4402, "step": 21181 }, { "epoch": 1.9878003003003002, "grad_norm": 1.1608613064760405, "learning_rate": 3.085511026889409e-06, "loss": 0.3858, "step": 21182 }, { "epoch": 1.987894144144144, "grad_norm": 0.9753925480279642, "learning_rate": 3.0850066807713258e-06, "loss": 0.3994, "step": 21183 }, { "epoch": 1.987987987987988, "grad_norm": 1.1465514043667546, "learning_rate": 3.0845023574859346e-06, "loss": 0.3721, "step": 21184 }, { "epoch": 1.9880818318318318, "grad_norm": 1.2012133468230646, "learning_rate": 3.083998057039244e-06, "loss": 0.4046, "step": 21185 }, { "epoch": 1.9881756756756757, "grad_norm": 1.0413191762784906, "learning_rate": 3.083493779437272e-06, "loss": 0.4031, "step": 21186 }, { "epoch": 1.9882695195195195, "grad_norm": 1.0310057141593583, "learning_rate": 3.08298952468603e-06, "loss": 0.4002, "step": 21187 }, { "epoch": 1.9883633633633635, "grad_norm": 0.8213402672090445, "learning_rate": 3.082485292791527e-06, "loss": 0.3439, "step": 21188 }, { "epoch": 1.9884572072072073, "grad_norm": 1.0176147835191869, "learning_rate": 3.0819810837597796e-06, "loss": 0.3446, "step": 21189 }, { "epoch": 1.9885510510510511, "grad_norm": 1.0527712686096262, "learning_rate": 3.081476897596797e-06, "loss": 0.4127, "step": 21190 }, { "epoch": 1.988644894894895, "grad_norm": 0.9232768291167365, "learning_rate": 3.0809727343085904e-06, "loss": 0.3746, "step": 21191 }, { "epoch": 1.9887387387387387, "grad_norm": 1.252443638208367, "learning_rate": 3.080468593901173e-06, "loss": 0.3822, "step": 21192 }, { "epoch": 1.9888325825825826, "grad_norm": 0.9565546425747883, "learning_rate": 3.0799644763805546e-06, "loss": 0.4075, "step": 21193 }, { "epoch": 1.9889264264264264, "grad_norm": 0.9586056618904449, "learning_rate": 3.0794603817527434e-06, "loss": 0.3704, "step": 21194 }, { "epoch": 1.9890202702702702, "grad_norm": 0.9417818714339798, "learning_rate": 3.078956310023754e-06, "loss": 0.3777, "step": 21195 }, { "epoch": 1.989114114114114, "grad_norm": 0.8871403469962792, "learning_rate": 3.078452261199595e-06, "loss": 0.3768, "step": 21196 }, { "epoch": 1.9892079579579578, "grad_norm": 0.9481588293917156, "learning_rate": 3.0779482352862742e-06, "loss": 0.4245, "step": 21197 }, { "epoch": 1.9893018018018018, "grad_norm": 1.0021689252477761, "learning_rate": 3.0774442322898046e-06, "loss": 0.4144, "step": 21198 }, { "epoch": 1.9893956456456456, "grad_norm": 0.8510972778619964, "learning_rate": 3.076940252216193e-06, "loss": 0.4136, "step": 21199 }, { "epoch": 1.9894894894894894, "grad_norm": 1.2854687109117016, "learning_rate": 3.0764362950714476e-06, "loss": 0.3566, "step": 21200 }, { "epoch": 1.9895833333333335, "grad_norm": 1.0166052669766923, "learning_rate": 3.0759323608615804e-06, "loss": 0.4053, "step": 21201 }, { "epoch": 1.9896771771771773, "grad_norm": 0.9011012740580673, "learning_rate": 3.075428449592598e-06, "loss": 0.4094, "step": 21202 }, { "epoch": 1.989771021021021, "grad_norm": 1.0139413745432397, "learning_rate": 3.074924561270508e-06, "loss": 0.3821, "step": 21203 }, { "epoch": 1.989864864864865, "grad_norm": 1.2116445305886576, "learning_rate": 3.07442069590132e-06, "loss": 0.4229, "step": 21204 }, { "epoch": 1.9899587087087087, "grad_norm": 0.9322468834319562, "learning_rate": 3.0739168534910406e-06, "loss": 0.3493, "step": 21205 }, { "epoch": 1.9900525525525525, "grad_norm": 0.8880166981016785, "learning_rate": 3.073413034045675e-06, "loss": 0.3852, "step": 21206 }, { "epoch": 1.9901463963963963, "grad_norm": 0.957994142778327, "learning_rate": 3.072909237571235e-06, "loss": 0.3465, "step": 21207 }, { "epoch": 1.9902402402402402, "grad_norm": 1.0104153581581092, "learning_rate": 3.0724054640737244e-06, "loss": 0.4048, "step": 21208 }, { "epoch": 1.990334084084084, "grad_norm": 0.9165095559996591, "learning_rate": 3.07190171355915e-06, "loss": 0.3965, "step": 21209 }, { "epoch": 1.9904279279279278, "grad_norm": 1.0064000856424073, "learning_rate": 3.0713979860335193e-06, "loss": 0.4417, "step": 21210 }, { "epoch": 1.9905217717717718, "grad_norm": 0.9484709375200173, "learning_rate": 3.070894281502837e-06, "loss": 0.4297, "step": 21211 }, { "epoch": 1.9906156156156156, "grad_norm": 0.9672324673972548, "learning_rate": 3.0703905999731076e-06, "loss": 0.3248, "step": 21212 }, { "epoch": 1.9907094594594594, "grad_norm": 1.3786646251805577, "learning_rate": 3.0698869414503395e-06, "loss": 0.3822, "step": 21213 }, { "epoch": 1.9908033033033035, "grad_norm": 0.942945351555728, "learning_rate": 3.069383305940537e-06, "loss": 0.4111, "step": 21214 }, { "epoch": 1.9908971471471473, "grad_norm": 0.9384626548314539, "learning_rate": 3.0688796934497032e-06, "loss": 0.3792, "step": 21215 }, { "epoch": 1.990990990990991, "grad_norm": 0.8918027643492344, "learning_rate": 3.0683761039838457e-06, "loss": 0.3937, "step": 21216 }, { "epoch": 1.991084834834835, "grad_norm": 1.0332772563309718, "learning_rate": 3.0678725375489672e-06, "loss": 0.409, "step": 21217 }, { "epoch": 1.9911786786786787, "grad_norm": 1.0582399970154168, "learning_rate": 3.0673689941510696e-06, "loss": 0.4165, "step": 21218 }, { "epoch": 1.9912725225225225, "grad_norm": 0.8672434061201827, "learning_rate": 3.0668654737961612e-06, "loss": 0.3391, "step": 21219 }, { "epoch": 1.9913663663663663, "grad_norm": 0.9803986835194859, "learning_rate": 3.066361976490243e-06, "loss": 0.4032, "step": 21220 }, { "epoch": 1.9914602102102101, "grad_norm": 0.9466170309008979, "learning_rate": 3.065858502239317e-06, "loss": 0.4024, "step": 21221 }, { "epoch": 1.991554054054054, "grad_norm": 1.7914336552692078, "learning_rate": 3.06535505104939e-06, "loss": 0.4071, "step": 21222 }, { "epoch": 1.9916478978978978, "grad_norm": 1.0137785880737333, "learning_rate": 3.0648516229264615e-06, "loss": 0.4049, "step": 21223 }, { "epoch": 1.9917417417417418, "grad_norm": 0.9239187316029991, "learning_rate": 3.0643482178765333e-06, "loss": 0.362, "step": 21224 }, { "epoch": 1.9918355855855856, "grad_norm": 1.0952856675553109, "learning_rate": 3.0638448359056107e-06, "loss": 0.3952, "step": 21225 }, { "epoch": 1.9919294294294294, "grad_norm": 1.1748170293085833, "learning_rate": 3.0633414770196943e-06, "loss": 0.3843, "step": 21226 }, { "epoch": 1.9920232732732732, "grad_norm": 0.8613382121181541, "learning_rate": 3.0628381412247842e-06, "loss": 0.3958, "step": 21227 }, { "epoch": 1.9921171171171173, "grad_norm": 0.9177278017503199, "learning_rate": 3.0623348285268837e-06, "loss": 0.3777, "step": 21228 }, { "epoch": 1.992210960960961, "grad_norm": 0.9490811991474085, "learning_rate": 3.061831538931994e-06, "loss": 0.4136, "step": 21229 }, { "epoch": 1.9923048048048049, "grad_norm": 1.0260391071701873, "learning_rate": 3.0613282724461122e-06, "loss": 0.4469, "step": 21230 }, { "epoch": 1.9923986486486487, "grad_norm": 0.8178126850340289, "learning_rate": 3.060825029075244e-06, "loss": 0.375, "step": 21231 }, { "epoch": 1.9924924924924925, "grad_norm": 0.9021025398449027, "learning_rate": 3.0603218088253867e-06, "loss": 0.4009, "step": 21232 }, { "epoch": 1.9925863363363363, "grad_norm": 0.9027027595748295, "learning_rate": 3.0598186117025395e-06, "loss": 0.3228, "step": 21233 }, { "epoch": 1.9926801801801801, "grad_norm": 0.8961981935950497, "learning_rate": 3.0593154377127055e-06, "loss": 0.3942, "step": 21234 }, { "epoch": 1.992774024024024, "grad_norm": 1.4646753731659368, "learning_rate": 3.0588122868618807e-06, "loss": 0.3918, "step": 21235 }, { "epoch": 1.9928678678678677, "grad_norm": 1.2563561104068748, "learning_rate": 3.0583091591560644e-06, "loss": 0.3623, "step": 21236 }, { "epoch": 1.9929617117117115, "grad_norm": 0.9023307842123679, "learning_rate": 3.057806054601258e-06, "loss": 0.4138, "step": 21237 }, { "epoch": 1.9930555555555556, "grad_norm": 1.4474584732916356, "learning_rate": 3.0573029732034595e-06, "loss": 0.3758, "step": 21238 }, { "epoch": 1.9931493993993994, "grad_norm": 1.0207428170717614, "learning_rate": 3.0567999149686643e-06, "loss": 0.4348, "step": 21239 }, { "epoch": 1.9932432432432432, "grad_norm": 0.8257088293890128, "learning_rate": 3.0562968799028737e-06, "loss": 0.4192, "step": 21240 }, { "epoch": 1.9933370870870872, "grad_norm": 0.9166934686928715, "learning_rate": 3.055793868012084e-06, "loss": 0.373, "step": 21241 }, { "epoch": 1.993430930930931, "grad_norm": 1.102312228999199, "learning_rate": 3.055290879302291e-06, "loss": 0.3898, "step": 21242 }, { "epoch": 1.9935247747747749, "grad_norm": 1.048916437894171, "learning_rate": 3.0547879137794957e-06, "loss": 0.4114, "step": 21243 }, { "epoch": 1.9936186186186187, "grad_norm": 0.9543423533656272, "learning_rate": 3.0542849714496935e-06, "loss": 0.3846, "step": 21244 }, { "epoch": 1.9937124624624625, "grad_norm": 1.1348802524764332, "learning_rate": 3.0537820523188787e-06, "loss": 0.3908, "step": 21245 }, { "epoch": 1.9938063063063063, "grad_norm": 0.9548448490623183, "learning_rate": 3.0532791563930506e-06, "loss": 0.3983, "step": 21246 }, { "epoch": 1.99390015015015, "grad_norm": 0.9744857866690083, "learning_rate": 3.0527762836782047e-06, "loss": 0.3639, "step": 21247 }, { "epoch": 1.993993993993994, "grad_norm": 0.899476606899128, "learning_rate": 3.052273434180335e-06, "loss": 0.3754, "step": 21248 }, { "epoch": 1.9940878378378377, "grad_norm": 0.8366501425112586, "learning_rate": 3.051770607905439e-06, "loss": 0.3592, "step": 21249 }, { "epoch": 1.9941816816816815, "grad_norm": 1.1339767484863728, "learning_rate": 3.0512678048595123e-06, "loss": 0.3625, "step": 21250 }, { "epoch": 1.9942755255255256, "grad_norm": 0.8479219825383064, "learning_rate": 3.050765025048548e-06, "loss": 0.3344, "step": 21251 }, { "epoch": 1.9943693693693694, "grad_norm": 1.0593635348745765, "learning_rate": 3.050262268478542e-06, "loss": 0.4076, "step": 21252 }, { "epoch": 1.9944632132132132, "grad_norm": 0.8829821170138833, "learning_rate": 3.049759535155489e-06, "loss": 0.3543, "step": 21253 }, { "epoch": 1.9945570570570572, "grad_norm": 0.9369361502841114, "learning_rate": 3.0492568250853807e-06, "loss": 0.3787, "step": 21254 }, { "epoch": 1.994650900900901, "grad_norm": 1.0344931796635, "learning_rate": 3.0487541382742136e-06, "loss": 0.4304, "step": 21255 }, { "epoch": 1.9947447447447448, "grad_norm": 0.9114980908492989, "learning_rate": 3.0482514747279825e-06, "loss": 0.3809, "step": 21256 }, { "epoch": 1.9948385885885886, "grad_norm": 0.9190359723116703, "learning_rate": 3.0477488344526764e-06, "loss": 0.3793, "step": 21257 }, { "epoch": 1.9949324324324325, "grad_norm": 1.0908100477390152, "learning_rate": 3.047246217454292e-06, "loss": 0.3997, "step": 21258 }, { "epoch": 1.9950262762762763, "grad_norm": 0.8223347255547363, "learning_rate": 3.04674362373882e-06, "loss": 0.3494, "step": 21259 }, { "epoch": 1.99512012012012, "grad_norm": 0.9412434890028732, "learning_rate": 3.046241053312253e-06, "loss": 0.3417, "step": 21260 }, { "epoch": 1.9952139639639639, "grad_norm": 0.9973203688539828, "learning_rate": 3.0457385061805853e-06, "loss": 0.4137, "step": 21261 }, { "epoch": 1.9953078078078077, "grad_norm": 0.9060627481755203, "learning_rate": 3.045235982349808e-06, "loss": 0.402, "step": 21262 }, { "epoch": 1.9954016516516515, "grad_norm": 1.0139588555464107, "learning_rate": 3.04473348182591e-06, "loss": 0.4186, "step": 21263 }, { "epoch": 1.9954954954954955, "grad_norm": 1.9202777555384827, "learning_rate": 3.044231004614887e-06, "loss": 0.3836, "step": 21264 }, { "epoch": 1.9955893393393394, "grad_norm": 1.0966462028521926, "learning_rate": 3.0437285507227272e-06, "loss": 0.3801, "step": 21265 }, { "epoch": 1.9956831831831832, "grad_norm": 1.0490993163712388, "learning_rate": 3.04322612015542e-06, "loss": 0.4243, "step": 21266 }, { "epoch": 1.995777027027027, "grad_norm": 0.9473212251426698, "learning_rate": 3.0427237129189606e-06, "loss": 0.3814, "step": 21267 }, { "epoch": 1.995870870870871, "grad_norm": 0.8341772563833784, "learning_rate": 3.0422213290193365e-06, "loss": 0.3928, "step": 21268 }, { "epoch": 1.9959647147147148, "grad_norm": 1.1640252774291089, "learning_rate": 3.041718968462537e-06, "loss": 0.3671, "step": 21269 }, { "epoch": 1.9960585585585586, "grad_norm": 0.8526008296566722, "learning_rate": 3.0412166312545534e-06, "loss": 0.3788, "step": 21270 }, { "epoch": 1.9961524024024024, "grad_norm": 0.9496520081331075, "learning_rate": 3.0407143174013747e-06, "loss": 0.4135, "step": 21271 }, { "epoch": 1.9962462462462462, "grad_norm": 0.939964997146504, "learning_rate": 3.0402120269089874e-06, "loss": 0.4181, "step": 21272 }, { "epoch": 1.99634009009009, "grad_norm": 0.9340216187807118, "learning_rate": 3.039709759783386e-06, "loss": 0.3856, "step": 21273 }, { "epoch": 1.9964339339339339, "grad_norm": 0.8371204756121919, "learning_rate": 3.0392075160305544e-06, "loss": 0.3899, "step": 21274 }, { "epoch": 1.9965277777777777, "grad_norm": 1.3431545610863815, "learning_rate": 3.0387052956564824e-06, "loss": 0.4359, "step": 21275 }, { "epoch": 1.9966216216216215, "grad_norm": 1.1897209410231124, "learning_rate": 3.038203098667158e-06, "loss": 0.3672, "step": 21276 }, { "epoch": 1.9967154654654653, "grad_norm": 0.9352596151408762, "learning_rate": 3.0377009250685695e-06, "loss": 0.3532, "step": 21277 }, { "epoch": 1.9968093093093093, "grad_norm": 1.0053252307199119, "learning_rate": 3.037198774866702e-06, "loss": 0.4065, "step": 21278 }, { "epoch": 1.9969031531531531, "grad_norm": 0.9300268624188274, "learning_rate": 3.0366966480675464e-06, "loss": 0.3975, "step": 21279 }, { "epoch": 1.996996996996997, "grad_norm": 1.0110114110456319, "learning_rate": 3.0361945446770867e-06, "loss": 0.3931, "step": 21280 }, { "epoch": 1.997090840840841, "grad_norm": 1.2004786646111165, "learning_rate": 3.035692464701311e-06, "loss": 0.3917, "step": 21281 }, { "epoch": 1.9971846846846848, "grad_norm": 0.9709931439423227, "learning_rate": 3.0351904081462056e-06, "loss": 0.4137, "step": 21282 }, { "epoch": 1.9972785285285286, "grad_norm": 1.043271812464869, "learning_rate": 3.0346883750177566e-06, "loss": 0.4103, "step": 21283 }, { "epoch": 1.9973723723723724, "grad_norm": 0.8218457272226268, "learning_rate": 3.034186365321947e-06, "loss": 0.358, "step": 21284 }, { "epoch": 1.9974662162162162, "grad_norm": 0.881038337349545, "learning_rate": 3.0336843790647667e-06, "loss": 0.3879, "step": 21285 }, { "epoch": 1.99756006006006, "grad_norm": 0.9868463132933677, "learning_rate": 3.0331824162521996e-06, "loss": 0.382, "step": 21286 }, { "epoch": 1.9976539039039038, "grad_norm": 0.9509982653372111, "learning_rate": 3.0326804768902283e-06, "loss": 0.4063, "step": 21287 }, { "epoch": 1.9977477477477477, "grad_norm": 2.8662043362265557, "learning_rate": 3.0321785609848402e-06, "loss": 0.3967, "step": 21288 }, { "epoch": 1.9978415915915915, "grad_norm": 0.9628594296095777, "learning_rate": 3.0316766685420195e-06, "loss": 0.4278, "step": 21289 }, { "epoch": 1.9979354354354353, "grad_norm": 0.9895480681885236, "learning_rate": 3.0311747995677463e-06, "loss": 0.3652, "step": 21290 }, { "epoch": 1.9980292792792793, "grad_norm": 0.9755917949447035, "learning_rate": 3.030672954068011e-06, "loss": 0.3822, "step": 21291 }, { "epoch": 1.9981231231231231, "grad_norm": 0.9194784771232414, "learning_rate": 3.030171132048793e-06, "loss": 0.3667, "step": 21292 }, { "epoch": 1.998216966966967, "grad_norm": 0.8866140179482136, "learning_rate": 3.0296693335160756e-06, "loss": 0.4095, "step": 21293 }, { "epoch": 1.998310810810811, "grad_norm": 0.9600182324647322, "learning_rate": 3.029167558475844e-06, "loss": 0.3482, "step": 21294 }, { "epoch": 1.9984046546546548, "grad_norm": 1.0100844124015915, "learning_rate": 3.028665806934079e-06, "loss": 0.3594, "step": 21295 }, { "epoch": 1.9984984984984986, "grad_norm": 0.9393838402508338, "learning_rate": 3.028164078896762e-06, "loss": 0.368, "step": 21296 }, { "epoch": 1.9985923423423424, "grad_norm": 0.8675909686739588, "learning_rate": 3.0276623743698785e-06, "loss": 0.3805, "step": 21297 }, { "epoch": 1.9986861861861862, "grad_norm": 0.8454577338006951, "learning_rate": 3.027160693359409e-06, "loss": 0.3607, "step": 21298 }, { "epoch": 1.99878003003003, "grad_norm": 0.9208101276841769, "learning_rate": 3.026659035871332e-06, "loss": 0.3813, "step": 21299 }, { "epoch": 1.9988738738738738, "grad_norm": 1.0190989120128464, "learning_rate": 3.0261574019116356e-06, "loss": 0.4417, "step": 21300 }, { "epoch": 1.9989677177177176, "grad_norm": 0.9409518122083617, "learning_rate": 3.025655791486295e-06, "loss": 0.3945, "step": 21301 }, { "epoch": 1.9990615615615615, "grad_norm": 1.0007061275430482, "learning_rate": 3.0251542046012915e-06, "loss": 0.4027, "step": 21302 }, { "epoch": 1.9991554054054053, "grad_norm": 0.8374781493740747, "learning_rate": 3.0246526412626082e-06, "loss": 0.3473, "step": 21303 }, { "epoch": 1.9992492492492493, "grad_norm": 0.9264304138142293, "learning_rate": 3.024151101476224e-06, "loss": 0.4028, "step": 21304 }, { "epoch": 1.999343093093093, "grad_norm": 0.9043086354380665, "learning_rate": 3.0236495852481175e-06, "loss": 0.3986, "step": 21305 }, { "epoch": 1.999436936936937, "grad_norm": 0.9143001694050525, "learning_rate": 3.0231480925842705e-06, "loss": 0.3703, "step": 21306 }, { "epoch": 1.9995307807807807, "grad_norm": 0.8974820513040658, "learning_rate": 3.0226466234906632e-06, "loss": 0.3531, "step": 21307 }, { "epoch": 1.9996246246246248, "grad_norm": 0.8780434287516722, "learning_rate": 3.0221451779732698e-06, "loss": 0.3546, "step": 21308 }, { "epoch": 1.9997184684684686, "grad_norm": 0.8806982564160187, "learning_rate": 3.021643756038074e-06, "loss": 0.3947, "step": 21309 }, { "epoch": 1.9998123123123124, "grad_norm": 0.8805831841012655, "learning_rate": 3.0211423576910527e-06, "loss": 0.3552, "step": 21310 }, { "epoch": 1.9999061561561562, "grad_norm": 1.0424925575321113, "learning_rate": 3.0206409829381815e-06, "loss": 0.392, "step": 21311 }, { "epoch": 2.0, "grad_norm": 1.4968050893402205, "learning_rate": 3.0201396317854435e-06, "loss": 0.3539, "step": 21312 }, { "epoch": 2.000093843843844, "grad_norm": 0.9081405246524153, "learning_rate": 3.019638304238813e-06, "loss": 0.344, "step": 21313 }, { "epoch": 2.0001876876876876, "grad_norm": 0.8163486691855487, "learning_rate": 3.0191370003042674e-06, "loss": 0.3389, "step": 21314 }, { "epoch": 2.0002815315315314, "grad_norm": 1.1541887142686913, "learning_rate": 3.018635719987786e-06, "loss": 0.341, "step": 21315 }, { "epoch": 2.0003753753753752, "grad_norm": 1.0595310465775845, "learning_rate": 3.0181344632953436e-06, "loss": 0.3837, "step": 21316 }, { "epoch": 2.000469219219219, "grad_norm": 0.8598139296122248, "learning_rate": 3.017633230232916e-06, "loss": 0.3216, "step": 21317 }, { "epoch": 2.000563063063063, "grad_norm": 0.8292669605055832, "learning_rate": 3.0171320208064812e-06, "loss": 0.3514, "step": 21318 }, { "epoch": 2.000656906906907, "grad_norm": 0.8865187803238204, "learning_rate": 3.016630835022016e-06, "loss": 0.3282, "step": 21319 }, { "epoch": 2.000750750750751, "grad_norm": 0.9775200018867837, "learning_rate": 3.0161296728854928e-06, "loss": 0.3848, "step": 21320 }, { "epoch": 2.0008445945945947, "grad_norm": 0.820484336336404, "learning_rate": 3.015628534402891e-06, "loss": 0.3021, "step": 21321 }, { "epoch": 2.0009384384384385, "grad_norm": 0.9730842402315266, "learning_rate": 3.0151274195801826e-06, "loss": 0.3499, "step": 21322 }, { "epoch": 2.0010322822822824, "grad_norm": 0.9440659560828359, "learning_rate": 3.0146263284233425e-06, "loss": 0.3389, "step": 21323 }, { "epoch": 2.001126126126126, "grad_norm": 0.8851529625474465, "learning_rate": 3.014125260938349e-06, "loss": 0.3152, "step": 21324 }, { "epoch": 2.00121996996997, "grad_norm": 0.9072108966165313, "learning_rate": 3.0136242171311723e-06, "loss": 0.314, "step": 21325 }, { "epoch": 2.001313813813814, "grad_norm": 1.0345227233417955, "learning_rate": 3.013123197007787e-06, "loss": 0.3462, "step": 21326 }, { "epoch": 2.0014076576576576, "grad_norm": 0.8061670821095731, "learning_rate": 3.012622200574169e-06, "loss": 0.3177, "step": 21327 }, { "epoch": 2.0015015015015014, "grad_norm": 0.8473266265145425, "learning_rate": 3.012121227836291e-06, "loss": 0.3163, "step": 21328 }, { "epoch": 2.0015953453453452, "grad_norm": 0.9370781626158852, "learning_rate": 3.0116202788001223e-06, "loss": 0.325, "step": 21329 }, { "epoch": 2.001689189189189, "grad_norm": 1.4102663373727689, "learning_rate": 3.011119353471642e-06, "loss": 0.3438, "step": 21330 }, { "epoch": 2.001783033033033, "grad_norm": 0.9524441007797466, "learning_rate": 3.0106184518568193e-06, "loss": 0.341, "step": 21331 }, { "epoch": 2.001876876876877, "grad_norm": 0.9845205565126447, "learning_rate": 3.0101175739616262e-06, "loss": 0.2963, "step": 21332 }, { "epoch": 2.001970720720721, "grad_norm": 0.94364805968496, "learning_rate": 3.009616719792037e-06, "loss": 0.3339, "step": 21333 }, { "epoch": 2.0020645645645647, "grad_norm": 0.9592855293212063, "learning_rate": 3.009115889354021e-06, "loss": 0.2914, "step": 21334 }, { "epoch": 2.0021584084084085, "grad_norm": 0.9049011116197511, "learning_rate": 3.0086150826535488e-06, "loss": 0.2992, "step": 21335 }, { "epoch": 2.0022522522522523, "grad_norm": 1.0973927459373913, "learning_rate": 3.008114299696595e-06, "loss": 0.3288, "step": 21336 }, { "epoch": 2.002346096096096, "grad_norm": 1.1482603651540721, "learning_rate": 3.0076135404891296e-06, "loss": 0.2919, "step": 21337 }, { "epoch": 2.00243993993994, "grad_norm": 0.93497110204415, "learning_rate": 3.0071128050371205e-06, "loss": 0.2948, "step": 21338 }, { "epoch": 2.0025337837837838, "grad_norm": 0.9335940286761314, "learning_rate": 3.0066120933465414e-06, "loss": 0.319, "step": 21339 }, { "epoch": 2.0026276276276276, "grad_norm": 1.0681623568328373, "learning_rate": 3.0061114054233616e-06, "loss": 0.3029, "step": 21340 }, { "epoch": 2.0027214714714714, "grad_norm": 1.122885150654968, "learning_rate": 3.0056107412735476e-06, "loss": 0.3488, "step": 21341 }, { "epoch": 2.002815315315315, "grad_norm": 1.0395271649912345, "learning_rate": 3.005110100903073e-06, "loss": 0.2925, "step": 21342 }, { "epoch": 2.002909159159159, "grad_norm": 1.0846601047346134, "learning_rate": 3.0046094843179064e-06, "loss": 0.3318, "step": 21343 }, { "epoch": 2.003003003003003, "grad_norm": 0.9874689278062239, "learning_rate": 3.004108891524014e-06, "loss": 0.3021, "step": 21344 }, { "epoch": 2.0030968468468466, "grad_norm": 0.997839585324498, "learning_rate": 3.0036083225273676e-06, "loss": 0.2836, "step": 21345 }, { "epoch": 2.003190690690691, "grad_norm": 1.6159699130071725, "learning_rate": 3.003107777333934e-06, "loss": 0.3255, "step": 21346 }, { "epoch": 2.0032845345345347, "grad_norm": 0.9602359705898186, "learning_rate": 3.0026072559496794e-06, "loss": 0.3321, "step": 21347 }, { "epoch": 2.0033783783783785, "grad_norm": 0.9972117606878094, "learning_rate": 3.002106758380575e-06, "loss": 0.3124, "step": 21348 }, { "epoch": 2.0034722222222223, "grad_norm": 0.9375542280653565, "learning_rate": 3.0016062846325876e-06, "loss": 0.3346, "step": 21349 }, { "epoch": 2.003566066066066, "grad_norm": 0.9866840341506127, "learning_rate": 3.0011058347116827e-06, "loss": 0.3528, "step": 21350 }, { "epoch": 2.00365990990991, "grad_norm": 0.8783945256566102, "learning_rate": 3.000605408623829e-06, "loss": 0.3265, "step": 21351 }, { "epoch": 2.0037537537537538, "grad_norm": 0.9307361243694483, "learning_rate": 3.0001050063749925e-06, "loss": 0.3122, "step": 21352 }, { "epoch": 2.0038475975975976, "grad_norm": 0.9328384681735988, "learning_rate": 2.9996046279711372e-06, "loss": 0.3092, "step": 21353 }, { "epoch": 2.0039414414414414, "grad_norm": 1.1134883512133058, "learning_rate": 2.999104273418233e-06, "loss": 0.3303, "step": 21354 }, { "epoch": 2.004035285285285, "grad_norm": 0.937229354226789, "learning_rate": 2.9986039427222447e-06, "loss": 0.3545, "step": 21355 }, { "epoch": 2.004129129129129, "grad_norm": 1.3548828971534452, "learning_rate": 2.998103635889136e-06, "loss": 0.3312, "step": 21356 }, { "epoch": 2.004222972972973, "grad_norm": 0.9045627103123911, "learning_rate": 2.9976033529248738e-06, "loss": 0.3372, "step": 21357 }, { "epoch": 2.0043168168168166, "grad_norm": 0.9307599637837077, "learning_rate": 2.997103093835423e-06, "loss": 0.3551, "step": 21358 }, { "epoch": 2.004410660660661, "grad_norm": 1.055788912489275, "learning_rate": 2.9966028586267453e-06, "loss": 0.3385, "step": 21359 }, { "epoch": 2.0045045045045047, "grad_norm": 1.0784097815452907, "learning_rate": 2.99610264730481e-06, "loss": 0.2929, "step": 21360 }, { "epoch": 2.0045983483483485, "grad_norm": 0.9809567389146006, "learning_rate": 2.9956024598755783e-06, "loss": 0.3272, "step": 21361 }, { "epoch": 2.0046921921921923, "grad_norm": 1.060457085168304, "learning_rate": 2.9951022963450137e-06, "loss": 0.2813, "step": 21362 }, { "epoch": 2.004786036036036, "grad_norm": 0.8729678136478892, "learning_rate": 2.994602156719081e-06, "loss": 0.3399, "step": 21363 }, { "epoch": 2.00487987987988, "grad_norm": 0.9710156885688854, "learning_rate": 2.994102041003743e-06, "loss": 0.3381, "step": 21364 }, { "epoch": 2.0049737237237237, "grad_norm": 0.9368078278222431, "learning_rate": 2.9936019492049606e-06, "loss": 0.3393, "step": 21365 }, { "epoch": 2.0050675675675675, "grad_norm": 0.9998630415648397, "learning_rate": 2.9931018813286997e-06, "loss": 0.3094, "step": 21366 }, { "epoch": 2.0051614114114114, "grad_norm": 0.9145754716377311, "learning_rate": 2.992601837380922e-06, "loss": 0.2724, "step": 21367 }, { "epoch": 2.005255255255255, "grad_norm": 1.054643707977147, "learning_rate": 2.9921018173675876e-06, "loss": 0.3214, "step": 21368 }, { "epoch": 2.005349099099099, "grad_norm": 1.0866060766610406, "learning_rate": 2.9916018212946608e-06, "loss": 0.3157, "step": 21369 }, { "epoch": 2.005442942942943, "grad_norm": 0.9437854029822559, "learning_rate": 2.991101849168101e-06, "loss": 0.3403, "step": 21370 }, { "epoch": 2.0055367867867866, "grad_norm": 1.08722307112711, "learning_rate": 2.990601900993869e-06, "loss": 0.3333, "step": 21371 }, { "epoch": 2.005630630630631, "grad_norm": 1.0147368036853228, "learning_rate": 2.990101976777929e-06, "loss": 0.326, "step": 21372 }, { "epoch": 2.0057244744744747, "grad_norm": 1.275508922916959, "learning_rate": 2.9896020765262396e-06, "loss": 0.3148, "step": 21373 }, { "epoch": 2.0058183183183185, "grad_norm": 0.9475668033536605, "learning_rate": 2.98910220024476e-06, "loss": 0.3159, "step": 21374 }, { "epoch": 2.0059121621621623, "grad_norm": 1.030579313844934, "learning_rate": 2.9886023479394534e-06, "loss": 0.3026, "step": 21375 }, { "epoch": 2.006006006006006, "grad_norm": 1.104808922538592, "learning_rate": 2.9881025196162768e-06, "loss": 0.3229, "step": 21376 }, { "epoch": 2.00609984984985, "grad_norm": 1.1852686377945458, "learning_rate": 2.987602715281189e-06, "loss": 0.3039, "step": 21377 }, { "epoch": 2.0061936936936937, "grad_norm": 0.9574908713890389, "learning_rate": 2.9871029349401524e-06, "loss": 0.3426, "step": 21378 }, { "epoch": 2.0062875375375375, "grad_norm": 1.0295894715551155, "learning_rate": 2.9866031785991244e-06, "loss": 0.294, "step": 21379 }, { "epoch": 2.0063813813813813, "grad_norm": 0.9764892384063956, "learning_rate": 2.9861034462640635e-06, "loss": 0.3115, "step": 21380 }, { "epoch": 2.006475225225225, "grad_norm": 1.316834510980091, "learning_rate": 2.9856037379409284e-06, "loss": 0.3101, "step": 21381 }, { "epoch": 2.006569069069069, "grad_norm": 1.3211405826995863, "learning_rate": 2.985104053635678e-06, "loss": 0.3376, "step": 21382 }, { "epoch": 2.0066629129129128, "grad_norm": 1.0865521911521816, "learning_rate": 2.984604393354267e-06, "loss": 0.3142, "step": 21383 }, { "epoch": 2.0067567567567566, "grad_norm": 0.9918618277666034, "learning_rate": 2.984104757102656e-06, "loss": 0.3226, "step": 21384 }, { "epoch": 2.0068506006006004, "grad_norm": 1.011224311462686, "learning_rate": 2.983605144886802e-06, "loss": 0.3313, "step": 21385 }, { "epoch": 2.0069444444444446, "grad_norm": 1.1894895640385954, "learning_rate": 2.9831055567126598e-06, "loss": 0.2938, "step": 21386 }, { "epoch": 2.0070382882882885, "grad_norm": 0.9842102429104848, "learning_rate": 2.9826059925861885e-06, "loss": 0.3228, "step": 21387 }, { "epoch": 2.0071321321321323, "grad_norm": 0.9722586045058585, "learning_rate": 2.982106452513344e-06, "loss": 0.3074, "step": 21388 }, { "epoch": 2.007225975975976, "grad_norm": 2.3184493262262262, "learning_rate": 2.9816069365000786e-06, "loss": 0.3036, "step": 21389 }, { "epoch": 2.00731981981982, "grad_norm": 1.0005423233297988, "learning_rate": 2.9811074445523536e-06, "loss": 0.2918, "step": 21390 }, { "epoch": 2.0074136636636637, "grad_norm": 0.9312040495411454, "learning_rate": 2.9806079766761233e-06, "loss": 0.3509, "step": 21391 }, { "epoch": 2.0075075075075075, "grad_norm": 1.2663176091887012, "learning_rate": 2.9801085328773395e-06, "loss": 0.3081, "step": 21392 }, { "epoch": 2.0076013513513513, "grad_norm": 1.257624301067923, "learning_rate": 2.979609113161961e-06, "loss": 0.2915, "step": 21393 }, { "epoch": 2.007695195195195, "grad_norm": 0.9856866275244837, "learning_rate": 2.97910971753594e-06, "loss": 0.3046, "step": 21394 }, { "epoch": 2.007789039039039, "grad_norm": 1.0192556355505737, "learning_rate": 2.9786103460052306e-06, "loss": 0.3103, "step": 21395 }, { "epoch": 2.0078828828828827, "grad_norm": 0.9335923912487643, "learning_rate": 2.9781109985757895e-06, "loss": 0.2668, "step": 21396 }, { "epoch": 2.0079767267267266, "grad_norm": 1.0613222511646296, "learning_rate": 2.977611675253569e-06, "loss": 0.372, "step": 21397 }, { "epoch": 2.0080705705705704, "grad_norm": 1.077378990881451, "learning_rate": 2.977112376044522e-06, "loss": 0.357, "step": 21398 }, { "epoch": 2.0081644144144146, "grad_norm": 0.9890549439366112, "learning_rate": 2.976613100954603e-06, "loss": 0.3067, "step": 21399 }, { "epoch": 2.0082582582582584, "grad_norm": 1.0172546958175621, "learning_rate": 2.9761138499897634e-06, "loss": 0.3363, "step": 21400 }, { "epoch": 2.0083521021021022, "grad_norm": 1.0366925796265332, "learning_rate": 2.975614623155955e-06, "loss": 0.2972, "step": 21401 }, { "epoch": 2.008445945945946, "grad_norm": 1.1996712389379895, "learning_rate": 2.9751154204591335e-06, "loss": 0.3593, "step": 21402 }, { "epoch": 2.00853978978979, "grad_norm": 1.3967332047104808, "learning_rate": 2.9746162419052497e-06, "loss": 0.3414, "step": 21403 }, { "epoch": 2.0086336336336337, "grad_norm": 1.0256126569525335, "learning_rate": 2.974117087500254e-06, "loss": 0.3196, "step": 21404 }, { "epoch": 2.0087274774774775, "grad_norm": 1.184155165215518, "learning_rate": 2.9736179572500988e-06, "loss": 0.3282, "step": 21405 }, { "epoch": 2.0088213213213213, "grad_norm": 1.3172733922814308, "learning_rate": 2.973118851160736e-06, "loss": 0.3375, "step": 21406 }, { "epoch": 2.008915165165165, "grad_norm": 0.9704045601833954, "learning_rate": 2.972619769238113e-06, "loss": 0.3111, "step": 21407 }, { "epoch": 2.009009009009009, "grad_norm": 1.0594338260674467, "learning_rate": 2.9721207114881855e-06, "loss": 0.3015, "step": 21408 }, { "epoch": 2.0091028528528527, "grad_norm": 1.018323742334726, "learning_rate": 2.9716216779169015e-06, "loss": 0.2695, "step": 21409 }, { "epoch": 2.0091966966966965, "grad_norm": 1.220607386904739, "learning_rate": 2.9711226685302103e-06, "loss": 0.3123, "step": 21410 }, { "epoch": 2.0092905405405403, "grad_norm": 0.9636632679366895, "learning_rate": 2.970623683334063e-06, "loss": 0.2907, "step": 21411 }, { "epoch": 2.0093843843843846, "grad_norm": 1.1260446342824602, "learning_rate": 2.9701247223344076e-06, "loss": 0.2886, "step": 21412 }, { "epoch": 2.0094782282282284, "grad_norm": 1.2996757153630603, "learning_rate": 2.969625785537193e-06, "loss": 0.3049, "step": 21413 }, { "epoch": 2.0095720720720722, "grad_norm": 1.0918764640324006, "learning_rate": 2.969126872948371e-06, "loss": 0.3012, "step": 21414 }, { "epoch": 2.009665915915916, "grad_norm": 1.0825601504329112, "learning_rate": 2.968627984573888e-06, "loss": 0.3023, "step": 21415 }, { "epoch": 2.00975975975976, "grad_norm": 1.1058471764435427, "learning_rate": 2.9681291204196915e-06, "loss": 0.3049, "step": 21416 }, { "epoch": 2.0098536036036037, "grad_norm": 2.6019576554540165, "learning_rate": 2.967630280491732e-06, "loss": 0.3225, "step": 21417 }, { "epoch": 2.0099474474474475, "grad_norm": 1.7230998424127735, "learning_rate": 2.9671314647959557e-06, "loss": 0.3269, "step": 21418 }, { "epoch": 2.0100412912912913, "grad_norm": 0.9365712047805465, "learning_rate": 2.9666326733383083e-06, "loss": 0.3198, "step": 21419 }, { "epoch": 2.010135135135135, "grad_norm": 1.0810408192274945, "learning_rate": 2.9661339061247407e-06, "loss": 0.3473, "step": 21420 }, { "epoch": 2.010228978978979, "grad_norm": 1.4444928023878425, "learning_rate": 2.9656351631611978e-06, "loss": 0.3246, "step": 21421 }, { "epoch": 2.0103228228228227, "grad_norm": 1.0956410136647787, "learning_rate": 2.965136444453626e-06, "loss": 0.2999, "step": 21422 }, { "epoch": 2.0104166666666665, "grad_norm": 1.1332098608186933, "learning_rate": 2.964637750007972e-06, "loss": 0.3018, "step": 21423 }, { "epoch": 2.0105105105105103, "grad_norm": 1.0830926439596367, "learning_rate": 2.9641390798301818e-06, "loss": 0.2882, "step": 21424 }, { "epoch": 2.010604354354354, "grad_norm": 1.1257983386420234, "learning_rate": 2.963640433926199e-06, "loss": 0.3224, "step": 21425 }, { "epoch": 2.0106981981981984, "grad_norm": 1.1910776071713653, "learning_rate": 2.9631418123019733e-06, "loss": 0.3208, "step": 21426 }, { "epoch": 2.010792042042042, "grad_norm": 1.0326910705735322, "learning_rate": 2.9626432149634475e-06, "loss": 0.2993, "step": 21427 }, { "epoch": 2.010885885885886, "grad_norm": 0.9613943368454684, "learning_rate": 2.9621446419165646e-06, "loss": 0.3412, "step": 21428 }, { "epoch": 2.01097972972973, "grad_norm": 0.9891654980668048, "learning_rate": 2.961646093167272e-06, "loss": 0.2863, "step": 21429 }, { "epoch": 2.0110735735735736, "grad_norm": 0.9343100716398044, "learning_rate": 2.961147568721515e-06, "loss": 0.3156, "step": 21430 }, { "epoch": 2.0111674174174174, "grad_norm": 0.9153126911983327, "learning_rate": 2.9606490685852318e-06, "loss": 0.3336, "step": 21431 }, { "epoch": 2.0112612612612613, "grad_norm": 0.9288300214570817, "learning_rate": 2.9601505927643727e-06, "loss": 0.3312, "step": 21432 }, { "epoch": 2.011355105105105, "grad_norm": 0.9922220262948839, "learning_rate": 2.959652141264877e-06, "loss": 0.3465, "step": 21433 }, { "epoch": 2.011448948948949, "grad_norm": 0.9376935834952492, "learning_rate": 2.9591537140926874e-06, "loss": 0.3341, "step": 21434 }, { "epoch": 2.0115427927927927, "grad_norm": 1.1587837613953715, "learning_rate": 2.9586553112537502e-06, "loss": 0.3677, "step": 21435 }, { "epoch": 2.0116366366366365, "grad_norm": 0.9038481927022881, "learning_rate": 2.9581569327540062e-06, "loss": 0.3117, "step": 21436 }, { "epoch": 2.0117304804804803, "grad_norm": 0.9616729763040369, "learning_rate": 2.957658578599396e-06, "loss": 0.276, "step": 21437 }, { "epoch": 2.011824324324324, "grad_norm": 1.0051020017875172, "learning_rate": 2.9571602487958646e-06, "loss": 0.3125, "step": 21438 }, { "epoch": 2.0119181681681684, "grad_norm": 2.0357803695053702, "learning_rate": 2.9566619433493517e-06, "loss": 0.2971, "step": 21439 }, { "epoch": 2.012012012012012, "grad_norm": 0.9303126420577486, "learning_rate": 2.9561636622657963e-06, "loss": 0.3431, "step": 21440 }, { "epoch": 2.012105855855856, "grad_norm": 0.9926351560886382, "learning_rate": 2.9556654055511445e-06, "loss": 0.3354, "step": 21441 }, { "epoch": 2.0121996996997, "grad_norm": 0.9913991381979375, "learning_rate": 2.955167173211335e-06, "loss": 0.3339, "step": 21442 }, { "epoch": 2.0122935435435436, "grad_norm": 0.9219727309225522, "learning_rate": 2.954668965252307e-06, "loss": 0.3259, "step": 21443 }, { "epoch": 2.0123873873873874, "grad_norm": 1.0972449273777782, "learning_rate": 2.9541707816800025e-06, "loss": 0.284, "step": 21444 }, { "epoch": 2.0124812312312312, "grad_norm": 1.995957040991863, "learning_rate": 2.9536726225003603e-06, "loss": 0.3336, "step": 21445 }, { "epoch": 2.012575075075075, "grad_norm": 1.3721298099122727, "learning_rate": 2.9531744877193184e-06, "loss": 0.3524, "step": 21446 }, { "epoch": 2.012668918918919, "grad_norm": 1.0245945910708223, "learning_rate": 2.9526763773428204e-06, "loss": 0.3344, "step": 21447 }, { "epoch": 2.0127627627627627, "grad_norm": 0.950795775938915, "learning_rate": 2.9521782913768026e-06, "loss": 0.2974, "step": 21448 }, { "epoch": 2.0128566066066065, "grad_norm": 1.0518403671100842, "learning_rate": 2.951680229827203e-06, "loss": 0.2905, "step": 21449 }, { "epoch": 2.0129504504504503, "grad_norm": 1.0512982450904866, "learning_rate": 2.9511821926999618e-06, "loss": 0.3239, "step": 21450 }, { "epoch": 2.013044294294294, "grad_norm": 1.234934311844842, "learning_rate": 2.9506841800010166e-06, "loss": 0.3199, "step": 21451 }, { "epoch": 2.0131381381381384, "grad_norm": 1.0238579186055823, "learning_rate": 2.9501861917363034e-06, "loss": 0.2914, "step": 21452 }, { "epoch": 2.013231981981982, "grad_norm": 1.5076034618201672, "learning_rate": 2.9496882279117633e-06, "loss": 0.3582, "step": 21453 }, { "epoch": 2.013325825825826, "grad_norm": 1.206038145751009, "learning_rate": 2.9491902885333317e-06, "loss": 0.289, "step": 21454 }, { "epoch": 2.01341966966967, "grad_norm": 0.9446374658528363, "learning_rate": 2.948692373606945e-06, "loss": 0.2627, "step": 21455 }, { "epoch": 2.0135135135135136, "grad_norm": 0.9844445501247556, "learning_rate": 2.948194483138541e-06, "loss": 0.3171, "step": 21456 }, { "epoch": 2.0136073573573574, "grad_norm": 1.0964462180477537, "learning_rate": 2.947696617134056e-06, "loss": 0.2938, "step": 21457 }, { "epoch": 2.013701201201201, "grad_norm": 1.0150856905389782, "learning_rate": 2.9471987755994234e-06, "loss": 0.3715, "step": 21458 }, { "epoch": 2.013795045045045, "grad_norm": 2.2139575743742563, "learning_rate": 2.9467009585405844e-06, "loss": 0.3355, "step": 21459 }, { "epoch": 2.013888888888889, "grad_norm": 1.0723257707576623, "learning_rate": 2.9462031659634705e-06, "loss": 0.303, "step": 21460 }, { "epoch": 2.0139827327327327, "grad_norm": 1.1498216657743896, "learning_rate": 2.9457053978740173e-06, "loss": 0.3341, "step": 21461 }, { "epoch": 2.0140765765765765, "grad_norm": 0.9779123218531738, "learning_rate": 2.9452076542781616e-06, "loss": 0.2957, "step": 21462 }, { "epoch": 2.0141704204204203, "grad_norm": 1.5253295795850619, "learning_rate": 2.9447099351818365e-06, "loss": 0.3388, "step": 21463 }, { "epoch": 2.014264264264264, "grad_norm": 1.5252023644833896, "learning_rate": 2.944212240590975e-06, "loss": 0.3548, "step": 21464 }, { "epoch": 2.014358108108108, "grad_norm": 1.0392494517311237, "learning_rate": 2.943714570511515e-06, "loss": 0.3283, "step": 21465 }, { "epoch": 2.014451951951952, "grad_norm": 1.011259820795803, "learning_rate": 2.943216924949388e-06, "loss": 0.2831, "step": 21466 }, { "epoch": 2.014545795795796, "grad_norm": 1.2225349290652539, "learning_rate": 2.9427193039105266e-06, "loss": 0.2971, "step": 21467 }, { "epoch": 2.0146396396396398, "grad_norm": 0.8353742257980044, "learning_rate": 2.9422217074008664e-06, "loss": 0.3108, "step": 21468 }, { "epoch": 2.0147334834834836, "grad_norm": 0.9788812669964811, "learning_rate": 2.941724135426339e-06, "loss": 0.2968, "step": 21469 }, { "epoch": 2.0148273273273274, "grad_norm": 1.048421266840928, "learning_rate": 2.9412265879928745e-06, "loss": 0.3133, "step": 21470 }, { "epoch": 2.014921171171171, "grad_norm": 0.814194103939838, "learning_rate": 2.94072906510641e-06, "loss": 0.2992, "step": 21471 }, { "epoch": 2.015015015015015, "grad_norm": 0.9586894564396924, "learning_rate": 2.9402315667728754e-06, "loss": 0.3048, "step": 21472 }, { "epoch": 2.015108858858859, "grad_norm": 1.0468815315205342, "learning_rate": 2.939734092998201e-06, "loss": 0.3596, "step": 21473 }, { "epoch": 2.0152027027027026, "grad_norm": 1.1073254843064648, "learning_rate": 2.9392366437883202e-06, "loss": 0.3473, "step": 21474 }, { "epoch": 2.0152965465465464, "grad_norm": 1.8972846639611483, "learning_rate": 2.9387392191491638e-06, "loss": 0.3165, "step": 21475 }, { "epoch": 2.0153903903903903, "grad_norm": 1.1314115734466774, "learning_rate": 2.9382418190866602e-06, "loss": 0.3182, "step": 21476 }, { "epoch": 2.015484234234234, "grad_norm": 0.9422392194771675, "learning_rate": 2.9377444436067443e-06, "loss": 0.329, "step": 21477 }, { "epoch": 2.015578078078078, "grad_norm": 1.1459368739191444, "learning_rate": 2.937247092715344e-06, "loss": 0.3209, "step": 21478 }, { "epoch": 2.015671921921922, "grad_norm": 1.1475734402267856, "learning_rate": 2.936749766418388e-06, "loss": 0.3297, "step": 21479 }, { "epoch": 2.015765765765766, "grad_norm": 1.0112229620287865, "learning_rate": 2.936252464721809e-06, "loss": 0.3664, "step": 21480 }, { "epoch": 2.0158596096096097, "grad_norm": 1.1166764848609563, "learning_rate": 2.935755187631534e-06, "loss": 0.2512, "step": 21481 }, { "epoch": 2.0159534534534536, "grad_norm": 1.1611907252085278, "learning_rate": 2.9352579351534918e-06, "loss": 0.3325, "step": 21482 }, { "epoch": 2.0160472972972974, "grad_norm": 1.2535575951074194, "learning_rate": 2.9347607072936135e-06, "loss": 0.3349, "step": 21483 }, { "epoch": 2.016141141141141, "grad_norm": 0.9789632138718707, "learning_rate": 2.934263504057826e-06, "loss": 0.2899, "step": 21484 }, { "epoch": 2.016234984984985, "grad_norm": 1.2840541701088726, "learning_rate": 2.9337663254520576e-06, "loss": 0.3142, "step": 21485 }, { "epoch": 2.016328828828829, "grad_norm": 0.9922080951945694, "learning_rate": 2.933269171482237e-06, "loss": 0.2918, "step": 21486 }, { "epoch": 2.0164226726726726, "grad_norm": 0.9260192744880613, "learning_rate": 2.9327720421542914e-06, "loss": 0.3097, "step": 21487 }, { "epoch": 2.0165165165165164, "grad_norm": 1.2740358322276784, "learning_rate": 2.9322749374741465e-06, "loss": 0.311, "step": 21488 }, { "epoch": 2.0166103603603602, "grad_norm": 0.9717641198595031, "learning_rate": 2.931777857447733e-06, "loss": 0.3373, "step": 21489 }, { "epoch": 2.016704204204204, "grad_norm": 1.1277977411021616, "learning_rate": 2.9312808020809745e-06, "loss": 0.3302, "step": 21490 }, { "epoch": 2.016798048048048, "grad_norm": 1.155625574135493, "learning_rate": 2.9307837713797986e-06, "loss": 0.3197, "step": 21491 }, { "epoch": 2.016891891891892, "grad_norm": 3.115483496973139, "learning_rate": 2.930286765350132e-06, "loss": 0.3473, "step": 21492 }, { "epoch": 2.016985735735736, "grad_norm": 0.9716072220468226, "learning_rate": 2.9297897839978995e-06, "loss": 0.3382, "step": 21493 }, { "epoch": 2.0170795795795797, "grad_norm": 1.0343599246203135, "learning_rate": 2.9292928273290257e-06, "loss": 0.3594, "step": 21494 }, { "epoch": 2.0171734234234235, "grad_norm": 1.340649830797006, "learning_rate": 2.928795895349439e-06, "loss": 0.3163, "step": 21495 }, { "epoch": 2.0172672672672673, "grad_norm": 0.9996253952170727, "learning_rate": 2.928298988065062e-06, "loss": 0.2918, "step": 21496 }, { "epoch": 2.017361111111111, "grad_norm": 0.9696707704336642, "learning_rate": 2.9278021054818195e-06, "loss": 0.3581, "step": 21497 }, { "epoch": 2.017454954954955, "grad_norm": 11.2781183967423, "learning_rate": 2.9273052476056374e-06, "loss": 0.3164, "step": 21498 }, { "epoch": 2.017548798798799, "grad_norm": 1.0629643886428475, "learning_rate": 2.926808414442438e-06, "loss": 0.3027, "step": 21499 }, { "epoch": 2.0176426426426426, "grad_norm": 1.2138117562637374, "learning_rate": 2.9263116059981452e-06, "loss": 0.3296, "step": 21500 }, { "epoch": 2.0177364864864864, "grad_norm": 1.033624760279725, "learning_rate": 2.925814822278684e-06, "loss": 0.3698, "step": 21501 }, { "epoch": 2.01783033033033, "grad_norm": 0.9716884762654119, "learning_rate": 2.9253180632899777e-06, "loss": 0.299, "step": 21502 }, { "epoch": 2.017924174174174, "grad_norm": 1.2063860105884634, "learning_rate": 2.9248213290379468e-06, "loss": 0.3466, "step": 21503 }, { "epoch": 2.018018018018018, "grad_norm": 1.3406799174004673, "learning_rate": 2.924324619528517e-06, "loss": 0.3358, "step": 21504 }, { "epoch": 2.0181118618618616, "grad_norm": 1.1804263180419903, "learning_rate": 2.923827934767608e-06, "loss": 0.3661, "step": 21505 }, { "epoch": 2.018205705705706, "grad_norm": 0.9190193841209707, "learning_rate": 2.923331274761142e-06, "loss": 0.3163, "step": 21506 }, { "epoch": 2.0182995495495497, "grad_norm": 1.2401550331010998, "learning_rate": 2.9228346395150432e-06, "loss": 0.2666, "step": 21507 }, { "epoch": 2.0183933933933935, "grad_norm": 1.029417553741486, "learning_rate": 2.922338029035231e-06, "loss": 0.3603, "step": 21508 }, { "epoch": 2.0184872372372373, "grad_norm": 1.0074743084544127, "learning_rate": 2.9218414433276266e-06, "loss": 0.3263, "step": 21509 }, { "epoch": 2.018581081081081, "grad_norm": 0.9287706769689877, "learning_rate": 2.9213448823981515e-06, "loss": 0.3396, "step": 21510 }, { "epoch": 2.018674924924925, "grad_norm": 1.2781527951067706, "learning_rate": 2.920848346252727e-06, "loss": 0.2953, "step": 21511 }, { "epoch": 2.0187687687687688, "grad_norm": 1.1637667286612683, "learning_rate": 2.9203518348972696e-06, "loss": 0.3301, "step": 21512 }, { "epoch": 2.0188626126126126, "grad_norm": 1.8004850361059213, "learning_rate": 2.9198553483377044e-06, "loss": 0.3173, "step": 21513 }, { "epoch": 2.0189564564564564, "grad_norm": 1.1184912070387698, "learning_rate": 2.9193588865799487e-06, "loss": 0.3224, "step": 21514 }, { "epoch": 2.0190503003003, "grad_norm": 1.129941668249936, "learning_rate": 2.9188624496299212e-06, "loss": 0.289, "step": 21515 }, { "epoch": 2.019144144144144, "grad_norm": 0.933548549220586, "learning_rate": 2.9183660374935418e-06, "loss": 0.3346, "step": 21516 }, { "epoch": 2.019237987987988, "grad_norm": 0.9337903431573417, "learning_rate": 2.91786965017673e-06, "loss": 0.3272, "step": 21517 }, { "epoch": 2.0193318318318316, "grad_norm": 0.9376280942368953, "learning_rate": 2.9173732876854013e-06, "loss": 0.3514, "step": 21518 }, { "epoch": 2.019425675675676, "grad_norm": 0.9781057277479434, "learning_rate": 2.9168769500254775e-06, "loss": 0.345, "step": 21519 }, { "epoch": 2.0195195195195197, "grad_norm": 1.7568166233663869, "learning_rate": 2.9163806372028747e-06, "loss": 0.2947, "step": 21520 }, { "epoch": 2.0196133633633635, "grad_norm": 1.2579393074691139, "learning_rate": 2.9158843492235093e-06, "loss": 0.297, "step": 21521 }, { "epoch": 2.0197072072072073, "grad_norm": 1.0146912737901232, "learning_rate": 2.9153880860933026e-06, "loss": 0.3168, "step": 21522 }, { "epoch": 2.019801051051051, "grad_norm": 1.0774169355014274, "learning_rate": 2.9148918478181686e-06, "loss": 0.2969, "step": 21523 }, { "epoch": 2.019894894894895, "grad_norm": 1.1298495884165678, "learning_rate": 2.914395634404024e-06, "loss": 0.2988, "step": 21524 }, { "epoch": 2.0199887387387387, "grad_norm": 0.912816233472564, "learning_rate": 2.9138994458567867e-06, "loss": 0.3219, "step": 21525 }, { "epoch": 2.0200825825825826, "grad_norm": 1.4164739556623667, "learning_rate": 2.913403282182371e-06, "loss": 0.3171, "step": 21526 }, { "epoch": 2.0201764264264264, "grad_norm": 1.031198918163417, "learning_rate": 2.9129071433866916e-06, "loss": 0.3403, "step": 21527 }, { "epoch": 2.02027027027027, "grad_norm": 1.0075199836246649, "learning_rate": 2.912411029475669e-06, "loss": 0.3004, "step": 21528 }, { "epoch": 2.020364114114114, "grad_norm": 1.0929550593580966, "learning_rate": 2.9119149404552146e-06, "loss": 0.2776, "step": 21529 }, { "epoch": 2.020457957957958, "grad_norm": 0.9874281204911969, "learning_rate": 2.9114188763312425e-06, "loss": 0.3154, "step": 21530 }, { "epoch": 2.0205518018018016, "grad_norm": 3.4997942370444592, "learning_rate": 2.9109228371096697e-06, "loss": 0.3385, "step": 21531 }, { "epoch": 2.020645645645646, "grad_norm": 1.0226291330537387, "learning_rate": 2.910426822796411e-06, "loss": 0.3249, "step": 21532 }, { "epoch": 2.0207394894894897, "grad_norm": 1.4432196610765338, "learning_rate": 2.909930833397377e-06, "loss": 0.3261, "step": 21533 }, { "epoch": 2.0208333333333335, "grad_norm": 0.884608994081415, "learning_rate": 2.909434868918486e-06, "loss": 0.2888, "step": 21534 }, { "epoch": 2.0209271771771773, "grad_norm": 1.1380175579339316, "learning_rate": 2.908938929365648e-06, "loss": 0.3118, "step": 21535 }, { "epoch": 2.021021021021021, "grad_norm": 1.5603288753115356, "learning_rate": 2.9084430147447782e-06, "loss": 0.3221, "step": 21536 }, { "epoch": 2.021114864864865, "grad_norm": 0.9450896802672163, "learning_rate": 2.9079471250617884e-06, "loss": 0.2958, "step": 21537 }, { "epoch": 2.0212087087087087, "grad_norm": 0.9783118222698913, "learning_rate": 2.907451260322591e-06, "loss": 0.3246, "step": 21538 }, { "epoch": 2.0213025525525525, "grad_norm": 1.1429732207764385, "learning_rate": 2.9069554205330962e-06, "loss": 0.3565, "step": 21539 }, { "epoch": 2.0213963963963963, "grad_norm": 0.9872806377727219, "learning_rate": 2.906459605699221e-06, "loss": 0.2977, "step": 21540 }, { "epoch": 2.02149024024024, "grad_norm": 1.0572493656581308, "learning_rate": 2.905963815826874e-06, "loss": 0.3311, "step": 21541 }, { "epoch": 2.021584084084084, "grad_norm": 1.1636826654045525, "learning_rate": 2.9054680509219648e-06, "loss": 0.335, "step": 21542 }, { "epoch": 2.0216779279279278, "grad_norm": 1.2062430665180408, "learning_rate": 2.9049723109904084e-06, "loss": 0.3053, "step": 21543 }, { "epoch": 2.0217717717717716, "grad_norm": 2.224778316973476, "learning_rate": 2.9044765960381136e-06, "loss": 0.3131, "step": 21544 }, { "epoch": 2.0218656156156154, "grad_norm": 1.0829668922709197, "learning_rate": 2.90398090607099e-06, "loss": 0.3104, "step": 21545 }, { "epoch": 2.0219594594594597, "grad_norm": 1.039284465662683, "learning_rate": 2.90348524109495e-06, "loss": 0.2959, "step": 21546 }, { "epoch": 2.0220533033033035, "grad_norm": 1.2073859010851318, "learning_rate": 2.902989601115902e-06, "loss": 0.3017, "step": 21547 }, { "epoch": 2.0221471471471473, "grad_norm": 1.1763312154341303, "learning_rate": 2.902493986139754e-06, "loss": 0.3344, "step": 21548 }, { "epoch": 2.022240990990991, "grad_norm": 1.1967956632662855, "learning_rate": 2.9019983961724208e-06, "loss": 0.3116, "step": 21549 }, { "epoch": 2.022334834834835, "grad_norm": 1.708148109984411, "learning_rate": 2.901502831219806e-06, "loss": 0.3422, "step": 21550 }, { "epoch": 2.0224286786786787, "grad_norm": 1.0562050118545767, "learning_rate": 2.9010072912878177e-06, "loss": 0.2985, "step": 21551 }, { "epoch": 2.0225225225225225, "grad_norm": 1.02146849741576, "learning_rate": 2.900511776382369e-06, "loss": 0.3413, "step": 21552 }, { "epoch": 2.0226163663663663, "grad_norm": 0.9912944360418621, "learning_rate": 2.900016286509365e-06, "loss": 0.3089, "step": 21553 }, { "epoch": 2.02271021021021, "grad_norm": 1.0810022123735703, "learning_rate": 2.899520821674712e-06, "loss": 0.3312, "step": 21554 }, { "epoch": 2.022804054054054, "grad_norm": 1.253277772403075, "learning_rate": 2.8990253818843216e-06, "loss": 0.3254, "step": 21555 }, { "epoch": 2.0228978978978978, "grad_norm": 1.05586710327855, "learning_rate": 2.898529967144099e-06, "loss": 0.3029, "step": 21556 }, { "epoch": 2.0229917417417416, "grad_norm": 1.318887336253256, "learning_rate": 2.898034577459948e-06, "loss": 0.3094, "step": 21557 }, { "epoch": 2.0230855855855854, "grad_norm": 1.1052428334714572, "learning_rate": 2.897539212837782e-06, "loss": 0.3375, "step": 21558 }, { "epoch": 2.0231794294294296, "grad_norm": 1.0788758776643819, "learning_rate": 2.8970438732835017e-06, "loss": 0.3154, "step": 21559 }, { "epoch": 2.0232732732732734, "grad_norm": 1.2107389738128316, "learning_rate": 2.8965485588030134e-06, "loss": 0.2823, "step": 21560 }, { "epoch": 2.0233671171171173, "grad_norm": 0.9635304678058075, "learning_rate": 2.8960532694022258e-06, "loss": 0.3195, "step": 21561 }, { "epoch": 2.023460960960961, "grad_norm": 1.0778265487683785, "learning_rate": 2.8955580050870435e-06, "loss": 0.2843, "step": 21562 }, { "epoch": 2.023554804804805, "grad_norm": 1.1080425080910452, "learning_rate": 2.89506276586337e-06, "loss": 0.3191, "step": 21563 }, { "epoch": 2.0236486486486487, "grad_norm": 0.9850467082189951, "learning_rate": 2.8945675517371117e-06, "loss": 0.3217, "step": 21564 }, { "epoch": 2.0237424924924925, "grad_norm": 1.281331575261921, "learning_rate": 2.894072362714172e-06, "loss": 0.3264, "step": 21565 }, { "epoch": 2.0238363363363363, "grad_norm": 1.0015167328702175, "learning_rate": 2.893577198800453e-06, "loss": 0.3556, "step": 21566 }, { "epoch": 2.02393018018018, "grad_norm": 1.333040734610686, "learning_rate": 2.8930820600018626e-06, "loss": 0.3139, "step": 21567 }, { "epoch": 2.024024024024024, "grad_norm": 1.237058497535925, "learning_rate": 2.892586946324304e-06, "loss": 0.3259, "step": 21568 }, { "epoch": 2.0241178678678677, "grad_norm": 1.0504079022937634, "learning_rate": 2.8920918577736763e-06, "loss": 0.317, "step": 21569 }, { "epoch": 2.0242117117117115, "grad_norm": 1.2558165572812643, "learning_rate": 2.8915967943558877e-06, "loss": 0.3618, "step": 21570 }, { "epoch": 2.0243055555555554, "grad_norm": 1.064070810357323, "learning_rate": 2.8911017560768383e-06, "loss": 0.3172, "step": 21571 }, { "epoch": 2.0243993993993996, "grad_norm": 1.2308057697686599, "learning_rate": 2.89060674294243e-06, "loss": 0.2821, "step": 21572 }, { "epoch": 2.0244932432432434, "grad_norm": 0.9848342370676402, "learning_rate": 2.8901117549585666e-06, "loss": 0.2875, "step": 21573 }, { "epoch": 2.0245870870870872, "grad_norm": 1.1674510205509054, "learning_rate": 2.8896167921311497e-06, "loss": 0.2792, "step": 21574 }, { "epoch": 2.024680930930931, "grad_norm": 1.196903205703763, "learning_rate": 2.8891218544660806e-06, "loss": 0.3139, "step": 21575 }, { "epoch": 2.024774774774775, "grad_norm": 1.3850881319559238, "learning_rate": 2.8886269419692593e-06, "loss": 0.3159, "step": 21576 }, { "epoch": 2.0248686186186187, "grad_norm": 1.1051803747867868, "learning_rate": 2.888132054646587e-06, "loss": 0.3103, "step": 21577 }, { "epoch": 2.0249624624624625, "grad_norm": 1.172869691544014, "learning_rate": 2.8876371925039638e-06, "loss": 0.3204, "step": 21578 }, { "epoch": 2.0250563063063063, "grad_norm": 1.1151786738870115, "learning_rate": 2.887142355547292e-06, "loss": 0.3429, "step": 21579 }, { "epoch": 2.02515015015015, "grad_norm": 1.0859970348124015, "learning_rate": 2.8866475437824715e-06, "loss": 0.3617, "step": 21580 }, { "epoch": 2.025243993993994, "grad_norm": 1.0636886444105633, "learning_rate": 2.8861527572153984e-06, "loss": 0.2919, "step": 21581 }, { "epoch": 2.0253378378378377, "grad_norm": 0.9589895756195919, "learning_rate": 2.885657995851977e-06, "loss": 0.2971, "step": 21582 }, { "epoch": 2.0254316816816815, "grad_norm": 1.0922454358510796, "learning_rate": 2.8851632596981044e-06, "loss": 0.3295, "step": 21583 }, { "epoch": 2.0255255255255253, "grad_norm": 1.1461687420036204, "learning_rate": 2.884668548759677e-06, "loss": 0.3603, "step": 21584 }, { "epoch": 2.025619369369369, "grad_norm": 1.1085142951419036, "learning_rate": 2.8841738630425973e-06, "loss": 0.2976, "step": 21585 }, { "epoch": 2.0257132132132134, "grad_norm": 1.1092769388857857, "learning_rate": 2.8836792025527616e-06, "loss": 0.3323, "step": 21586 }, { "epoch": 2.025807057057057, "grad_norm": 0.9735877597414246, "learning_rate": 2.883184567296068e-06, "loss": 0.329, "step": 21587 }, { "epoch": 2.025900900900901, "grad_norm": 1.8048949176566589, "learning_rate": 2.882689957278414e-06, "loss": 0.3485, "step": 21588 }, { "epoch": 2.025994744744745, "grad_norm": 1.3570455493269158, "learning_rate": 2.882195372505697e-06, "loss": 0.3335, "step": 21589 }, { "epoch": 2.0260885885885886, "grad_norm": 1.6694329171465303, "learning_rate": 2.8817008129838113e-06, "loss": 0.3762, "step": 21590 }, { "epoch": 2.0261824324324325, "grad_norm": 1.2488214302192648, "learning_rate": 2.881206278718659e-06, "loss": 0.3038, "step": 21591 }, { "epoch": 2.0262762762762763, "grad_norm": 1.2266318883286236, "learning_rate": 2.8807117697161325e-06, "loss": 0.3385, "step": 21592 }, { "epoch": 2.02637012012012, "grad_norm": 1.229813448988618, "learning_rate": 2.8802172859821274e-06, "loss": 0.3297, "step": 21593 }, { "epoch": 2.026463963963964, "grad_norm": 1.5664354480878566, "learning_rate": 2.879722827522543e-06, "loss": 0.3404, "step": 21594 }, { "epoch": 2.0265578078078077, "grad_norm": 0.9565329207197236, "learning_rate": 2.8792283943432734e-06, "loss": 0.3555, "step": 21595 }, { "epoch": 2.0266516516516515, "grad_norm": 0.995411031829342, "learning_rate": 2.878733986450211e-06, "loss": 0.317, "step": 21596 }, { "epoch": 2.0267454954954953, "grad_norm": 0.9905310505478448, "learning_rate": 2.8782396038492547e-06, "loss": 0.2842, "step": 21597 }, { "epoch": 2.026839339339339, "grad_norm": 0.9256135463217341, "learning_rate": 2.8777452465462973e-06, "loss": 0.2541, "step": 21598 }, { "epoch": 2.0269331831831834, "grad_norm": 2.3663581562641163, "learning_rate": 2.8772509145472337e-06, "loss": 0.3495, "step": 21599 }, { "epoch": 2.027027027027027, "grad_norm": 1.26272120820393, "learning_rate": 2.8767566078579567e-06, "loss": 0.3016, "step": 21600 }, { "epoch": 2.027120870870871, "grad_norm": 1.2833759734806163, "learning_rate": 2.8762623264843608e-06, "loss": 0.3249, "step": 21601 }, { "epoch": 2.027214714714715, "grad_norm": 1.4227835092527705, "learning_rate": 2.875768070432337e-06, "loss": 0.3506, "step": 21602 }, { "epoch": 2.0273085585585586, "grad_norm": 1.27048074056341, "learning_rate": 2.8752738397077827e-06, "loss": 0.3541, "step": 21603 }, { "epoch": 2.0274024024024024, "grad_norm": 0.8469059334827649, "learning_rate": 2.8747796343165883e-06, "loss": 0.2864, "step": 21604 }, { "epoch": 2.0274962462462462, "grad_norm": 2.2647203028164307, "learning_rate": 2.874285454264645e-06, "loss": 0.2994, "step": 21605 }, { "epoch": 2.02759009009009, "grad_norm": 0.9753781473297246, "learning_rate": 2.8737912995578476e-06, "loss": 0.341, "step": 21606 }, { "epoch": 2.027683933933934, "grad_norm": 2.314500682017732, "learning_rate": 2.8732971702020874e-06, "loss": 0.3266, "step": 21607 }, { "epoch": 2.0277777777777777, "grad_norm": 1.1784878519841722, "learning_rate": 2.8728030662032525e-06, "loss": 0.3267, "step": 21608 }, { "epoch": 2.0278716216216215, "grad_norm": 0.990838887824455, "learning_rate": 2.8723089875672394e-06, "loss": 0.3205, "step": 21609 }, { "epoch": 2.0279654654654653, "grad_norm": 1.0708475617925635, "learning_rate": 2.871814934299937e-06, "loss": 0.3008, "step": 21610 }, { "epoch": 2.028059309309309, "grad_norm": 1.3041545961887984, "learning_rate": 2.8713209064072354e-06, "loss": 0.3467, "step": 21611 }, { "epoch": 2.0281531531531534, "grad_norm": 1.0522534406377009, "learning_rate": 2.8708269038950254e-06, "loss": 0.3144, "step": 21612 }, { "epoch": 2.028246996996997, "grad_norm": 1.091891457912171, "learning_rate": 2.8703329267691958e-06, "loss": 0.3113, "step": 21613 }, { "epoch": 2.028340840840841, "grad_norm": 1.1873685205409972, "learning_rate": 2.8698389750356364e-06, "loss": 0.3292, "step": 21614 }, { "epoch": 2.028434684684685, "grad_norm": 0.9611677397224252, "learning_rate": 2.8693450487002393e-06, "loss": 0.335, "step": 21615 }, { "epoch": 2.0285285285285286, "grad_norm": 0.931281451001647, "learning_rate": 2.8688511477688917e-06, "loss": 0.3063, "step": 21616 }, { "epoch": 2.0286223723723724, "grad_norm": 1.4919421085651692, "learning_rate": 2.868357272247482e-06, "loss": 0.3021, "step": 21617 }, { "epoch": 2.0287162162162162, "grad_norm": 0.9923721184591546, "learning_rate": 2.8678634221419e-06, "loss": 0.3139, "step": 21618 }, { "epoch": 2.02881006006006, "grad_norm": 1.1808694926067456, "learning_rate": 2.8673695974580335e-06, "loss": 0.331, "step": 21619 }, { "epoch": 2.028903903903904, "grad_norm": 1.1370491664334978, "learning_rate": 2.866875798201768e-06, "loss": 0.3124, "step": 21620 }, { "epoch": 2.0289977477477477, "grad_norm": 1.0359424623242282, "learning_rate": 2.8663820243789962e-06, "loss": 0.2806, "step": 21621 }, { "epoch": 2.0290915915915915, "grad_norm": 0.914453475250065, "learning_rate": 2.8658882759956033e-06, "loss": 0.3373, "step": 21622 }, { "epoch": 2.0291854354354353, "grad_norm": 1.246257172999993, "learning_rate": 2.865394553057474e-06, "loss": 0.32, "step": 21623 }, { "epoch": 2.029279279279279, "grad_norm": 1.007881068682937, "learning_rate": 2.864900855570497e-06, "loss": 0.3328, "step": 21624 }, { "epoch": 2.029373123123123, "grad_norm": 1.077135285727354, "learning_rate": 2.8644071835405594e-06, "loss": 0.3397, "step": 21625 }, { "epoch": 2.029466966966967, "grad_norm": 1.0954530512855163, "learning_rate": 2.863913536973544e-06, "loss": 0.329, "step": 21626 }, { "epoch": 2.029560810810811, "grad_norm": 1.055178019400102, "learning_rate": 2.8634199158753403e-06, "loss": 0.2689, "step": 21627 }, { "epoch": 2.0296546546546548, "grad_norm": 1.0361812882220522, "learning_rate": 2.8629263202518325e-06, "loss": 0.3043, "step": 21628 }, { "epoch": 2.0297484984984986, "grad_norm": 1.9601925902569115, "learning_rate": 2.8624327501089038e-06, "loss": 0.3187, "step": 21629 }, { "epoch": 2.0298423423423424, "grad_norm": 0.9868969287666556, "learning_rate": 2.861939205452443e-06, "loss": 0.3192, "step": 21630 }, { "epoch": 2.029936186186186, "grad_norm": 1.1466962280518322, "learning_rate": 2.861445686288333e-06, "loss": 0.3389, "step": 21631 }, { "epoch": 2.03003003003003, "grad_norm": 1.9193139340269714, "learning_rate": 2.8609521926224557e-06, "loss": 0.3316, "step": 21632 }, { "epoch": 2.030123873873874, "grad_norm": 1.0369084654049285, "learning_rate": 2.860458724460699e-06, "loss": 0.3056, "step": 21633 }, { "epoch": 2.0302177177177176, "grad_norm": 1.1839089444519932, "learning_rate": 2.8599652818089447e-06, "loss": 0.3656, "step": 21634 }, { "epoch": 2.0303115615615615, "grad_norm": 1.0496654445615097, "learning_rate": 2.8594718646730764e-06, "loss": 0.3242, "step": 21635 }, { "epoch": 2.0304054054054053, "grad_norm": 1.143888171444441, "learning_rate": 2.8589784730589766e-06, "loss": 0.3658, "step": 21636 }, { "epoch": 2.030499249249249, "grad_norm": 1.1911307529380768, "learning_rate": 2.8584851069725287e-06, "loss": 0.3472, "step": 21637 }, { "epoch": 2.030593093093093, "grad_norm": 1.1704106219846007, "learning_rate": 2.8579917664196133e-06, "loss": 0.3228, "step": 21638 }, { "epoch": 2.030686936936937, "grad_norm": 1.1365747200043899, "learning_rate": 2.8574984514061156e-06, "loss": 0.2939, "step": 21639 }, { "epoch": 2.030780780780781, "grad_norm": 1.045933544196691, "learning_rate": 2.8570051619379157e-06, "loss": 0.2903, "step": 21640 }, { "epoch": 2.0308746246246248, "grad_norm": 1.2433626530985864, "learning_rate": 2.8565118980208945e-06, "loss": 0.32, "step": 21641 }, { "epoch": 2.0309684684684686, "grad_norm": 0.9963861564261122, "learning_rate": 2.856018659660936e-06, "loss": 0.2942, "step": 21642 }, { "epoch": 2.0310623123123124, "grad_norm": 1.1175090039342552, "learning_rate": 2.8555254468639188e-06, "loss": 0.3251, "step": 21643 }, { "epoch": 2.031156156156156, "grad_norm": 1.3415839124789628, "learning_rate": 2.8550322596357224e-06, "loss": 0.2832, "step": 21644 }, { "epoch": 2.03125, "grad_norm": 2.0938573490711496, "learning_rate": 2.8545390979822306e-06, "loss": 0.3268, "step": 21645 }, { "epoch": 2.031343843843844, "grad_norm": 1.1064566175843016, "learning_rate": 2.854045961909322e-06, "loss": 0.2863, "step": 21646 }, { "epoch": 2.0314376876876876, "grad_norm": 1.0783785940622348, "learning_rate": 2.8535528514228755e-06, "loss": 0.3084, "step": 21647 }, { "epoch": 2.0315315315315314, "grad_norm": 1.3026396183886455, "learning_rate": 2.853059766528771e-06, "loss": 0.3508, "step": 21648 }, { "epoch": 2.0316253753753752, "grad_norm": 1.415637433925775, "learning_rate": 2.852566707232888e-06, "loss": 0.2694, "step": 21649 }, { "epoch": 2.031719219219219, "grad_norm": 1.1187685983383362, "learning_rate": 2.8520736735411027e-06, "loss": 0.3308, "step": 21650 }, { "epoch": 2.031813063063063, "grad_norm": 1.107954320123191, "learning_rate": 2.851580665459298e-06, "loss": 0.3373, "step": 21651 }, { "epoch": 2.031906906906907, "grad_norm": 1.102781130037989, "learning_rate": 2.85108768299335e-06, "loss": 0.3406, "step": 21652 }, { "epoch": 2.032000750750751, "grad_norm": 1.0527800931624867, "learning_rate": 2.850594726149134e-06, "loss": 0.318, "step": 21653 }, { "epoch": 2.0320945945945947, "grad_norm": 0.9535342113271653, "learning_rate": 2.8501017949325316e-06, "loss": 0.3061, "step": 21654 }, { "epoch": 2.0321884384384385, "grad_norm": 2.1911676655404153, "learning_rate": 2.8496088893494195e-06, "loss": 0.3611, "step": 21655 }, { "epoch": 2.0322822822822824, "grad_norm": 1.185440744089801, "learning_rate": 2.8491160094056715e-06, "loss": 0.352, "step": 21656 }, { "epoch": 2.032376126126126, "grad_norm": 1.0313598569718714, "learning_rate": 2.848623155107168e-06, "loss": 0.317, "step": 21657 }, { "epoch": 2.03246996996997, "grad_norm": 1.0135702790559744, "learning_rate": 2.8481303264597843e-06, "loss": 0.2461, "step": 21658 }, { "epoch": 2.032563813813814, "grad_norm": 0.9942800516974195, "learning_rate": 2.8476375234693955e-06, "loss": 0.316, "step": 21659 }, { "epoch": 2.0326576576576576, "grad_norm": 1.119651023633729, "learning_rate": 2.847144746141878e-06, "loss": 0.3117, "step": 21660 }, { "epoch": 2.0327515015015014, "grad_norm": 0.9187379754008937, "learning_rate": 2.8466519944831073e-06, "loss": 0.3384, "step": 21661 }, { "epoch": 2.0328453453453452, "grad_norm": 0.9885678857048659, "learning_rate": 2.846159268498956e-06, "loss": 0.3332, "step": 21662 }, { "epoch": 2.032939189189189, "grad_norm": 1.5494892151021467, "learning_rate": 2.8456665681953038e-06, "loss": 0.2908, "step": 21663 }, { "epoch": 2.033033033033033, "grad_norm": 0.9964708331134452, "learning_rate": 2.845173893578023e-06, "loss": 0.3163, "step": 21664 }, { "epoch": 2.033126876876877, "grad_norm": 1.2423522673232457, "learning_rate": 2.844681244652984e-06, "loss": 0.3004, "step": 21665 }, { "epoch": 2.033220720720721, "grad_norm": 1.1007939611030175, "learning_rate": 2.8441886214260672e-06, "loss": 0.3373, "step": 21666 }, { "epoch": 2.0333145645645647, "grad_norm": 1.233552833359189, "learning_rate": 2.843696023903143e-06, "loss": 0.343, "step": 21667 }, { "epoch": 2.0334084084084085, "grad_norm": 1.0556557931182842, "learning_rate": 2.8432034520900823e-06, "loss": 0.3356, "step": 21668 }, { "epoch": 2.0335022522522523, "grad_norm": 1.0385568027883745, "learning_rate": 2.8427109059927637e-06, "loss": 0.3356, "step": 21669 }, { "epoch": 2.033596096096096, "grad_norm": 1.347081645146603, "learning_rate": 2.842218385617056e-06, "loss": 0.3194, "step": 21670 }, { "epoch": 2.03368993993994, "grad_norm": 0.9512766656265613, "learning_rate": 2.841725890968833e-06, "loss": 0.2812, "step": 21671 }, { "epoch": 2.0337837837837838, "grad_norm": 1.047730519786211, "learning_rate": 2.841233422053966e-06, "loss": 0.3244, "step": 21672 }, { "epoch": 2.0338776276276276, "grad_norm": 1.2367554632870317, "learning_rate": 2.840740978878327e-06, "loss": 0.3194, "step": 21673 }, { "epoch": 2.0339714714714714, "grad_norm": 0.978104357806105, "learning_rate": 2.840248561447786e-06, "loss": 0.3164, "step": 21674 }, { "epoch": 2.034065315315315, "grad_norm": 1.462157971913028, "learning_rate": 2.839756169768216e-06, "loss": 0.3324, "step": 21675 }, { "epoch": 2.034159159159159, "grad_norm": 1.0276067819202135, "learning_rate": 2.839263803845489e-06, "loss": 0.3214, "step": 21676 }, { "epoch": 2.034253003003003, "grad_norm": 1.1638956410163825, "learning_rate": 2.8387714636854713e-06, "loss": 0.3105, "step": 21677 }, { "epoch": 2.0343468468468466, "grad_norm": 0.9834967738606234, "learning_rate": 2.8382791492940382e-06, "loss": 0.3429, "step": 21678 }, { "epoch": 2.034440690690691, "grad_norm": 1.0644445038154204, "learning_rate": 2.837786860677057e-06, "loss": 0.3502, "step": 21679 }, { "epoch": 2.0345345345345347, "grad_norm": 1.1823418143264326, "learning_rate": 2.8372945978403953e-06, "loss": 0.2771, "step": 21680 }, { "epoch": 2.0346283783783785, "grad_norm": 1.0400945200352494, "learning_rate": 2.836802360789927e-06, "loss": 0.322, "step": 21681 }, { "epoch": 2.0347222222222223, "grad_norm": 1.1710830806780268, "learning_rate": 2.836310149531519e-06, "loss": 0.2989, "step": 21682 }, { "epoch": 2.034816066066066, "grad_norm": 1.2599207837539317, "learning_rate": 2.8358179640710372e-06, "loss": 0.2896, "step": 21683 }, { "epoch": 2.03490990990991, "grad_norm": 1.0154837701131196, "learning_rate": 2.8353258044143543e-06, "loss": 0.3087, "step": 21684 }, { "epoch": 2.0350037537537538, "grad_norm": 1.0292833166480404, "learning_rate": 2.8348336705673375e-06, "loss": 0.3271, "step": 21685 }, { "epoch": 2.0350975975975976, "grad_norm": 0.9772260959681708, "learning_rate": 2.834341562535853e-06, "loss": 0.3006, "step": 21686 }, { "epoch": 2.0351914414414414, "grad_norm": 1.3186902596350842, "learning_rate": 2.8338494803257687e-06, "loss": 0.3026, "step": 21687 }, { "epoch": 2.035285285285285, "grad_norm": 1.0496355545893337, "learning_rate": 2.8333574239429527e-06, "loss": 0.313, "step": 21688 }, { "epoch": 2.035379129129129, "grad_norm": 2.6403129995950763, "learning_rate": 2.8328653933932687e-06, "loss": 0.3003, "step": 21689 }, { "epoch": 2.035472972972973, "grad_norm": 1.1223413747994584, "learning_rate": 2.8323733886825876e-06, "loss": 0.3282, "step": 21690 }, { "epoch": 2.0355668168168166, "grad_norm": 1.0502498111072265, "learning_rate": 2.831881409816774e-06, "loss": 0.3268, "step": 21691 }, { "epoch": 2.035660660660661, "grad_norm": 1.0751656839891024, "learning_rate": 2.831389456801692e-06, "loss": 0.3094, "step": 21692 }, { "epoch": 2.0357545045045047, "grad_norm": 1.1801960246255254, "learning_rate": 2.8308975296432094e-06, "loss": 0.314, "step": 21693 }, { "epoch": 2.0358483483483485, "grad_norm": 1.2052720476107648, "learning_rate": 2.830405628347192e-06, "loss": 0.2861, "step": 21694 }, { "epoch": 2.0359421921921923, "grad_norm": 1.1876181507810823, "learning_rate": 2.829913752919501e-06, "loss": 0.3302, "step": 21695 }, { "epoch": 2.036036036036036, "grad_norm": 1.0837495288856691, "learning_rate": 2.8294219033660062e-06, "loss": 0.3474, "step": 21696 }, { "epoch": 2.03612987987988, "grad_norm": 1.6488286978146445, "learning_rate": 2.828930079692569e-06, "loss": 0.354, "step": 21697 }, { "epoch": 2.0362237237237237, "grad_norm": 1.0819290329984572, "learning_rate": 2.8284382819050544e-06, "loss": 0.3343, "step": 21698 }, { "epoch": 2.0363175675675675, "grad_norm": 1.1254891545234944, "learning_rate": 2.827946510009326e-06, "loss": 0.3212, "step": 21699 }, { "epoch": 2.0364114114114114, "grad_norm": 0.9933919540885847, "learning_rate": 2.827454764011246e-06, "loss": 0.3109, "step": 21700 }, { "epoch": 2.036505255255255, "grad_norm": 1.4956258362759884, "learning_rate": 2.8269630439166774e-06, "loss": 0.2949, "step": 21701 }, { "epoch": 2.036599099099099, "grad_norm": 1.243635882754593, "learning_rate": 2.8264713497314855e-06, "loss": 0.3488, "step": 21702 }, { "epoch": 2.036692942942943, "grad_norm": 1.1143325463475793, "learning_rate": 2.8259796814615326e-06, "loss": 0.3396, "step": 21703 }, { "epoch": 2.0367867867867866, "grad_norm": 1.1794554560056505, "learning_rate": 2.825488039112677e-06, "loss": 0.348, "step": 21704 }, { "epoch": 2.0368806306306304, "grad_norm": 1.0982128354095477, "learning_rate": 2.8249964226907854e-06, "loss": 0.3092, "step": 21705 }, { "epoch": 2.0369744744744747, "grad_norm": 1.0515015075701968, "learning_rate": 2.8245048322017175e-06, "loss": 0.3193, "step": 21706 }, { "epoch": 2.0370683183183185, "grad_norm": 1.1307650622541252, "learning_rate": 2.824013267651333e-06, "loss": 0.2912, "step": 21707 }, { "epoch": 2.0371621621621623, "grad_norm": 1.389013749104621, "learning_rate": 2.823521729045496e-06, "loss": 0.3438, "step": 21708 }, { "epoch": 2.037256006006006, "grad_norm": 1.1489217178228512, "learning_rate": 2.8230302163900657e-06, "loss": 0.3041, "step": 21709 }, { "epoch": 2.03734984984985, "grad_norm": 1.0620265294394735, "learning_rate": 2.822538729690902e-06, "loss": 0.2928, "step": 21710 }, { "epoch": 2.0374436936936937, "grad_norm": 0.9347379361971088, "learning_rate": 2.822047268953866e-06, "loss": 0.3297, "step": 21711 }, { "epoch": 2.0375375375375375, "grad_norm": 0.9952925891948744, "learning_rate": 2.821555834184816e-06, "loss": 0.3481, "step": 21712 }, { "epoch": 2.0376313813813813, "grad_norm": 0.9754597506566633, "learning_rate": 2.82106442538961e-06, "loss": 0.3303, "step": 21713 }, { "epoch": 2.037725225225225, "grad_norm": 1.1311006914981376, "learning_rate": 2.8205730425741126e-06, "loss": 0.3233, "step": 21714 }, { "epoch": 2.037819069069069, "grad_norm": 1.0243207671286303, "learning_rate": 2.820081685744178e-06, "loss": 0.3238, "step": 21715 }, { "epoch": 2.0379129129129128, "grad_norm": 0.9989155918571984, "learning_rate": 2.8195903549056646e-06, "loss": 0.3566, "step": 21716 }, { "epoch": 2.0380067567567566, "grad_norm": 1.0983376739886934, "learning_rate": 2.819099050064434e-06, "loss": 0.3409, "step": 21717 }, { "epoch": 2.0381006006006004, "grad_norm": 1.8034863692480787, "learning_rate": 2.818607771226343e-06, "loss": 0.3362, "step": 21718 }, { "epoch": 2.0381944444444446, "grad_norm": 1.0142570118416299, "learning_rate": 2.8181165183972446e-06, "loss": 0.2792, "step": 21719 }, { "epoch": 2.0382882882882885, "grad_norm": 1.076866032766867, "learning_rate": 2.8176252915830025e-06, "loss": 0.326, "step": 21720 }, { "epoch": 2.0383821321321323, "grad_norm": 1.099436239030231, "learning_rate": 2.8171340907894706e-06, "loss": 0.2925, "step": 21721 }, { "epoch": 2.038475975975976, "grad_norm": 1.7046826761550165, "learning_rate": 2.816642916022506e-06, "loss": 0.2883, "step": 21722 }, { "epoch": 2.03856981981982, "grad_norm": 1.4766382603630168, "learning_rate": 2.8161517672879657e-06, "loss": 0.3056, "step": 21723 }, { "epoch": 2.0386636636636637, "grad_norm": 0.9271138318257086, "learning_rate": 2.815660644591704e-06, "loss": 0.258, "step": 21724 }, { "epoch": 2.0387575075075075, "grad_norm": 0.9461098629700464, "learning_rate": 2.815169547939576e-06, "loss": 0.3062, "step": 21725 }, { "epoch": 2.0388513513513513, "grad_norm": 1.2327371657423665, "learning_rate": 2.8146784773374402e-06, "loss": 0.3179, "step": 21726 }, { "epoch": 2.038945195195195, "grad_norm": 0.9172762249567268, "learning_rate": 2.81418743279115e-06, "loss": 0.3136, "step": 21727 }, { "epoch": 2.039039039039039, "grad_norm": 1.104036251080679, "learning_rate": 2.8136964143065588e-06, "loss": 0.3279, "step": 21728 }, { "epoch": 2.0391328828828827, "grad_norm": 1.0673532728339317, "learning_rate": 2.8132054218895245e-06, "loss": 0.3467, "step": 21729 }, { "epoch": 2.0392267267267266, "grad_norm": 0.9166218825137467, "learning_rate": 2.812714455545899e-06, "loss": 0.3074, "step": 21730 }, { "epoch": 2.0393205705705704, "grad_norm": 1.0281808202962377, "learning_rate": 2.8122235152815345e-06, "loss": 0.3451, "step": 21731 }, { "epoch": 2.0394144144144146, "grad_norm": 1.2023370359963876, "learning_rate": 2.8117326011022887e-06, "loss": 0.3496, "step": 21732 }, { "epoch": 2.0395082582582584, "grad_norm": 1.104745338785732, "learning_rate": 2.8112417130140123e-06, "loss": 0.355, "step": 21733 }, { "epoch": 2.0396021021021022, "grad_norm": 0.940913170212732, "learning_rate": 2.810750851022559e-06, "loss": 0.3188, "step": 21734 }, { "epoch": 2.039695945945946, "grad_norm": 1.0769397173336008, "learning_rate": 2.810260015133781e-06, "loss": 0.3067, "step": 21735 }, { "epoch": 2.03978978978979, "grad_norm": 0.9018086660107199, "learning_rate": 2.80976920535353e-06, "loss": 0.3255, "step": 21736 }, { "epoch": 2.0398836336336337, "grad_norm": 1.3732139474898064, "learning_rate": 2.8092784216876572e-06, "loss": 0.3137, "step": 21737 }, { "epoch": 2.0399774774774775, "grad_norm": 0.9622163578749406, "learning_rate": 2.8087876641420176e-06, "loss": 0.3173, "step": 21738 }, { "epoch": 2.0400713213213213, "grad_norm": 1.0126639072481485, "learning_rate": 2.80829693272246e-06, "loss": 0.3341, "step": 21739 }, { "epoch": 2.040165165165165, "grad_norm": 1.0172674264740393, "learning_rate": 2.8078062274348344e-06, "loss": 0.3117, "step": 21740 }, { "epoch": 2.040259009009009, "grad_norm": 0.9583166606113599, "learning_rate": 2.807315548284995e-06, "loss": 0.3147, "step": 21741 }, { "epoch": 2.0403528528528527, "grad_norm": 1.1216705222268235, "learning_rate": 2.80682489527879e-06, "loss": 0.2935, "step": 21742 }, { "epoch": 2.0404466966966965, "grad_norm": 0.9971800528848438, "learning_rate": 2.806334268422069e-06, "loss": 0.3322, "step": 21743 }, { "epoch": 2.0405405405405403, "grad_norm": 1.0873278452832744, "learning_rate": 2.805843667720684e-06, "loss": 0.3295, "step": 21744 }, { "epoch": 2.0406343843843846, "grad_norm": 1.2572787446046534, "learning_rate": 2.805353093180483e-06, "loss": 0.3702, "step": 21745 }, { "epoch": 2.0407282282282284, "grad_norm": 1.2098499297886438, "learning_rate": 2.8048625448073157e-06, "loss": 0.3436, "step": 21746 }, { "epoch": 2.0408220720720722, "grad_norm": 1.0408999447542246, "learning_rate": 2.8043720226070305e-06, "loss": 0.3371, "step": 21747 }, { "epoch": 2.040915915915916, "grad_norm": 1.0530098061352489, "learning_rate": 2.8038815265854764e-06, "loss": 0.3376, "step": 21748 }, { "epoch": 2.04100975975976, "grad_norm": 0.9896422711073368, "learning_rate": 2.803391056748499e-06, "loss": 0.3334, "step": 21749 }, { "epoch": 2.0411036036036037, "grad_norm": 1.4858218515391135, "learning_rate": 2.802900613101951e-06, "loss": 0.2972, "step": 21750 }, { "epoch": 2.0411974474474475, "grad_norm": 1.1319012920423057, "learning_rate": 2.802410195651677e-06, "loss": 0.3077, "step": 21751 }, { "epoch": 2.0412912912912913, "grad_norm": 1.2379417913763666, "learning_rate": 2.8019198044035228e-06, "loss": 0.2907, "step": 21752 }, { "epoch": 2.041385135135135, "grad_norm": 0.9216561530011831, "learning_rate": 2.80142943936334e-06, "loss": 0.2746, "step": 21753 }, { "epoch": 2.041478978978979, "grad_norm": 1.0767590021291618, "learning_rate": 2.8009391005369724e-06, "loss": 0.3212, "step": 21754 }, { "epoch": 2.0415728228228227, "grad_norm": 1.6080436048482942, "learning_rate": 2.8004487879302645e-06, "loss": 0.3391, "step": 21755 }, { "epoch": 2.0416666666666665, "grad_norm": 1.081513996129738, "learning_rate": 2.799958501549067e-06, "loss": 0.3288, "step": 21756 }, { "epoch": 2.0417605105105103, "grad_norm": 1.1382645422678666, "learning_rate": 2.799468241399223e-06, "loss": 0.3412, "step": 21757 }, { "epoch": 2.041854354354354, "grad_norm": 1.210106736005307, "learning_rate": 2.798978007486578e-06, "loss": 0.3203, "step": 21758 }, { "epoch": 2.0419481981981984, "grad_norm": 1.057015987758514, "learning_rate": 2.798487799816978e-06, "loss": 0.3254, "step": 21759 }, { "epoch": 2.042042042042042, "grad_norm": 1.0985206406525365, "learning_rate": 2.7979976183962663e-06, "loss": 0.2738, "step": 21760 }, { "epoch": 2.042135885885886, "grad_norm": 1.4250772901120088, "learning_rate": 2.7975074632302866e-06, "loss": 0.3085, "step": 21761 }, { "epoch": 2.04222972972973, "grad_norm": 1.175243076178754, "learning_rate": 2.7970173343248863e-06, "loss": 0.3307, "step": 21762 }, { "epoch": 2.0423235735735736, "grad_norm": 1.0632314263552936, "learning_rate": 2.7965272316859083e-06, "loss": 0.337, "step": 21763 }, { "epoch": 2.0424174174174174, "grad_norm": 0.8953190221852728, "learning_rate": 2.7960371553191923e-06, "loss": 0.3232, "step": 21764 }, { "epoch": 2.0425112612612613, "grad_norm": 1.0454127860772957, "learning_rate": 2.7955471052305876e-06, "loss": 0.3032, "step": 21765 }, { "epoch": 2.042605105105105, "grad_norm": 0.9554985820208316, "learning_rate": 2.7950570814259337e-06, "loss": 0.2986, "step": 21766 }, { "epoch": 2.042698948948949, "grad_norm": 1.0589818913459874, "learning_rate": 2.794567083911072e-06, "loss": 0.3046, "step": 21767 }, { "epoch": 2.0427927927927927, "grad_norm": 1.110083986597934, "learning_rate": 2.7940771126918485e-06, "loss": 0.3371, "step": 21768 }, { "epoch": 2.0428866366366365, "grad_norm": 1.122387350169427, "learning_rate": 2.7935871677741034e-06, "loss": 0.3031, "step": 21769 }, { "epoch": 2.0429804804804803, "grad_norm": 1.053834941074879, "learning_rate": 2.7930972491636778e-06, "loss": 0.3278, "step": 21770 }, { "epoch": 2.043074324324324, "grad_norm": 1.0077408269719816, "learning_rate": 2.7926073568664137e-06, "loss": 0.297, "step": 21771 }, { "epoch": 2.0431681681681684, "grad_norm": 0.9961753141553497, "learning_rate": 2.792117490888152e-06, "loss": 0.3241, "step": 21772 }, { "epoch": 2.043262012012012, "grad_norm": 3.000414361630965, "learning_rate": 2.7916276512347307e-06, "loss": 0.3064, "step": 21773 }, { "epoch": 2.043355855855856, "grad_norm": 1.1907712223174591, "learning_rate": 2.791137837911995e-06, "loss": 0.3526, "step": 21774 }, { "epoch": 2.0434496996997, "grad_norm": 1.3740971440403744, "learning_rate": 2.7906480509257833e-06, "loss": 0.323, "step": 21775 }, { "epoch": 2.0435435435435436, "grad_norm": 1.3692803270171539, "learning_rate": 2.7901582902819325e-06, "loss": 0.3267, "step": 21776 }, { "epoch": 2.0436373873873874, "grad_norm": 0.8798378400841425, "learning_rate": 2.789668555986287e-06, "loss": 0.2942, "step": 21777 }, { "epoch": 2.0437312312312312, "grad_norm": 1.0255734542357349, "learning_rate": 2.789178848044683e-06, "loss": 0.323, "step": 21778 }, { "epoch": 2.043825075075075, "grad_norm": 1.4069609222761452, "learning_rate": 2.7886891664629577e-06, "loss": 0.2933, "step": 21779 }, { "epoch": 2.043918918918919, "grad_norm": 1.4136164654375634, "learning_rate": 2.788199511246954e-06, "loss": 0.3399, "step": 21780 }, { "epoch": 2.0440127627627627, "grad_norm": 1.1048306725102854, "learning_rate": 2.787709882402508e-06, "loss": 0.3243, "step": 21781 }, { "epoch": 2.0441066066066065, "grad_norm": 1.3271748003717527, "learning_rate": 2.787220279935457e-06, "loss": 0.3282, "step": 21782 }, { "epoch": 2.0442004504504503, "grad_norm": 1.3978556133966196, "learning_rate": 2.7867307038516387e-06, "loss": 0.3277, "step": 21783 }, { "epoch": 2.044294294294294, "grad_norm": 1.0817733840488901, "learning_rate": 2.7862411541568918e-06, "loss": 0.2889, "step": 21784 }, { "epoch": 2.0443881381381384, "grad_norm": 0.9875057197992395, "learning_rate": 2.7857516308570497e-06, "loss": 0.3189, "step": 21785 }, { "epoch": 2.044481981981982, "grad_norm": 1.2227854984457849, "learning_rate": 2.785262133957953e-06, "loss": 0.3309, "step": 21786 }, { "epoch": 2.044575825825826, "grad_norm": 1.3080023520771658, "learning_rate": 2.784772663465437e-06, "loss": 0.2466, "step": 21787 }, { "epoch": 2.04466966966967, "grad_norm": 1.0784014497272065, "learning_rate": 2.784283219385335e-06, "loss": 0.3349, "step": 21788 }, { "epoch": 2.0447635135135136, "grad_norm": 1.0544709940474586, "learning_rate": 2.783793801723488e-06, "loss": 0.3487, "step": 21789 }, { "epoch": 2.0448573573573574, "grad_norm": 1.100863281674999, "learning_rate": 2.783304410485727e-06, "loss": 0.3107, "step": 21790 }, { "epoch": 2.044951201201201, "grad_norm": 1.1176194214110147, "learning_rate": 2.782815045677887e-06, "loss": 0.2802, "step": 21791 }, { "epoch": 2.045045045045045, "grad_norm": 1.1753863689513127, "learning_rate": 2.782325707305806e-06, "loss": 0.3116, "step": 21792 }, { "epoch": 2.045138888888889, "grad_norm": 1.0177608539668552, "learning_rate": 2.781836395375317e-06, "loss": 0.308, "step": 21793 }, { "epoch": 2.0452327327327327, "grad_norm": 2.000427311930357, "learning_rate": 2.7813471098922527e-06, "loss": 0.2908, "step": 21794 }, { "epoch": 2.0453265765765765, "grad_norm": 1.1573369162236717, "learning_rate": 2.7808578508624482e-06, "loss": 0.3024, "step": 21795 }, { "epoch": 2.0454204204204203, "grad_norm": 1.1062804025238715, "learning_rate": 2.7803686182917376e-06, "loss": 0.3433, "step": 21796 }, { "epoch": 2.045514264264264, "grad_norm": 1.316639978125086, "learning_rate": 2.7798794121859505e-06, "loss": 0.2688, "step": 21797 }, { "epoch": 2.045608108108108, "grad_norm": 1.36937338687625, "learning_rate": 2.779390232550925e-06, "loss": 0.3123, "step": 21798 }, { "epoch": 2.045701951951952, "grad_norm": 1.2165573961953078, "learning_rate": 2.778901079392491e-06, "loss": 0.3399, "step": 21799 }, { "epoch": 2.045795795795796, "grad_norm": 1.0479513736148243, "learning_rate": 2.7784119527164786e-06, "loss": 0.3614, "step": 21800 }, { "epoch": 2.0458896396396398, "grad_norm": 0.9382652037899433, "learning_rate": 2.777922852528724e-06, "loss": 0.3019, "step": 21801 }, { "epoch": 2.0459834834834836, "grad_norm": 1.094159738376821, "learning_rate": 2.777433778835057e-06, "loss": 0.2776, "step": 21802 }, { "epoch": 2.0460773273273274, "grad_norm": 0.9907097431363721, "learning_rate": 2.776944731641306e-06, "loss": 0.3294, "step": 21803 }, { "epoch": 2.046171171171171, "grad_norm": 1.1395543054114088, "learning_rate": 2.776455710953307e-06, "loss": 0.2809, "step": 21804 }, { "epoch": 2.046265015015015, "grad_norm": 2.7746752837620847, "learning_rate": 2.7759667167768878e-06, "loss": 0.3801, "step": 21805 }, { "epoch": 2.046358858858859, "grad_norm": 1.063221143860448, "learning_rate": 2.775477749117878e-06, "loss": 0.3135, "step": 21806 }, { "epoch": 2.0464527027027026, "grad_norm": 0.9465536999173063, "learning_rate": 2.774988807982111e-06, "loss": 0.3288, "step": 21807 }, { "epoch": 2.0465465465465464, "grad_norm": 1.567198474628393, "learning_rate": 2.774499893375413e-06, "loss": 0.2949, "step": 21808 }, { "epoch": 2.0466403903903903, "grad_norm": 1.0171264573188232, "learning_rate": 2.7740110053036156e-06, "loss": 0.3258, "step": 21809 }, { "epoch": 2.046734234234234, "grad_norm": 1.0180681737622848, "learning_rate": 2.7735221437725467e-06, "loss": 0.3316, "step": 21810 }, { "epoch": 2.046828078078078, "grad_norm": 1.46395001871699, "learning_rate": 2.7730333087880355e-06, "loss": 0.3421, "step": 21811 }, { "epoch": 2.046921921921922, "grad_norm": 1.0701138997222022, "learning_rate": 2.772544500355908e-06, "loss": 0.3617, "step": 21812 }, { "epoch": 2.047015765765766, "grad_norm": 1.0832692238867978, "learning_rate": 2.7720557184819975e-06, "loss": 0.343, "step": 21813 }, { "epoch": 2.0471096096096097, "grad_norm": 1.2461194058925156, "learning_rate": 2.7715669631721277e-06, "loss": 0.3464, "step": 21814 }, { "epoch": 2.0472034534534536, "grad_norm": 1.46478339890335, "learning_rate": 2.7710782344321258e-06, "loss": 0.2873, "step": 21815 }, { "epoch": 2.0472972972972974, "grad_norm": 1.0541293543663754, "learning_rate": 2.7705895322678224e-06, "loss": 0.3223, "step": 21816 }, { "epoch": 2.047391141141141, "grad_norm": 1.0824548483440006, "learning_rate": 2.770100856685042e-06, "loss": 0.3295, "step": 21817 }, { "epoch": 2.047484984984985, "grad_norm": 0.983952969351998, "learning_rate": 2.7696122076896102e-06, "loss": 0.3345, "step": 21818 }, { "epoch": 2.047578828828829, "grad_norm": 0.9392602583483566, "learning_rate": 2.769123585287356e-06, "loss": 0.3425, "step": 21819 }, { "epoch": 2.0476726726726726, "grad_norm": 1.9129920599933627, "learning_rate": 2.768634989484104e-06, "loss": 0.324, "step": 21820 }, { "epoch": 2.0477665165165164, "grad_norm": 1.0640015985625018, "learning_rate": 2.76814642028568e-06, "loss": 0.328, "step": 21821 }, { "epoch": 2.0478603603603602, "grad_norm": 1.3448696558429432, "learning_rate": 2.7676578776979083e-06, "loss": 0.3286, "step": 21822 }, { "epoch": 2.047954204204204, "grad_norm": 1.1086188759118647, "learning_rate": 2.767169361726615e-06, "loss": 0.3353, "step": 21823 }, { "epoch": 2.048048048048048, "grad_norm": 0.9515812016239279, "learning_rate": 2.766680872377622e-06, "loss": 0.344, "step": 21824 }, { "epoch": 2.048141891891892, "grad_norm": 1.0217903036220168, "learning_rate": 2.7661924096567578e-06, "loss": 0.293, "step": 21825 }, { "epoch": 2.048235735735736, "grad_norm": 1.1362820734652184, "learning_rate": 2.7657039735698443e-06, "loss": 0.3086, "step": 21826 }, { "epoch": 2.0483295795795797, "grad_norm": 1.0135725723074986, "learning_rate": 2.7652155641227037e-06, "loss": 0.3284, "step": 21827 }, { "epoch": 2.0484234234234235, "grad_norm": 1.1423872545774512, "learning_rate": 2.7647271813211625e-06, "loss": 0.3237, "step": 21828 }, { "epoch": 2.0485172672672673, "grad_norm": 1.3333689434143492, "learning_rate": 2.764238825171042e-06, "loss": 0.2926, "step": 21829 }, { "epoch": 2.048611111111111, "grad_norm": 1.0685909827234212, "learning_rate": 2.7637504956781636e-06, "loss": 0.3233, "step": 21830 }, { "epoch": 2.048704954954955, "grad_norm": 1.0988953337777152, "learning_rate": 2.7632621928483526e-06, "loss": 0.3239, "step": 21831 }, { "epoch": 2.048798798798799, "grad_norm": 0.9478844385644807, "learning_rate": 2.7627739166874306e-06, "loss": 0.2985, "step": 21832 }, { "epoch": 2.0488926426426426, "grad_norm": 1.2627951804858037, "learning_rate": 2.762285667201218e-06, "loss": 0.3422, "step": 21833 }, { "epoch": 2.0489864864864864, "grad_norm": 1.165110400531391, "learning_rate": 2.761797444395537e-06, "loss": 0.3471, "step": 21834 }, { "epoch": 2.04908033033033, "grad_norm": 1.0613146861686764, "learning_rate": 2.7613092482762084e-06, "loss": 0.304, "step": 21835 }, { "epoch": 2.049174174174174, "grad_norm": 0.9209811743154532, "learning_rate": 2.760821078849051e-06, "loss": 0.3056, "step": 21836 }, { "epoch": 2.049268018018018, "grad_norm": 1.1167087257146795, "learning_rate": 2.7603329361198896e-06, "loss": 0.3038, "step": 21837 }, { "epoch": 2.0493618618618616, "grad_norm": 1.1008513295833182, "learning_rate": 2.7598448200945418e-06, "loss": 0.3288, "step": 21838 }, { "epoch": 2.049455705705706, "grad_norm": 1.4853130105305083, "learning_rate": 2.759356730778826e-06, "loss": 0.3025, "step": 21839 }, { "epoch": 2.0495495495495497, "grad_norm": 1.1674891893635966, "learning_rate": 2.7588686681785658e-06, "loss": 0.3283, "step": 21840 }, { "epoch": 2.0496433933933935, "grad_norm": 1.1408182617178984, "learning_rate": 2.758380632299579e-06, "loss": 0.286, "step": 21841 }, { "epoch": 2.0497372372372373, "grad_norm": 1.4604722955966787, "learning_rate": 2.7578926231476805e-06, "loss": 0.3472, "step": 21842 }, { "epoch": 2.049831081081081, "grad_norm": 1.1064900221668217, "learning_rate": 2.7574046407286948e-06, "loss": 0.2698, "step": 21843 }, { "epoch": 2.049924924924925, "grad_norm": 1.1775022310662289, "learning_rate": 2.756916685048437e-06, "loss": 0.3171, "step": 21844 }, { "epoch": 2.0500187687687688, "grad_norm": 1.1385422163375052, "learning_rate": 2.756428756112726e-06, "loss": 0.3042, "step": 21845 }, { "epoch": 2.0501126126126126, "grad_norm": 0.9884561555237352, "learning_rate": 2.7559408539273787e-06, "loss": 0.2997, "step": 21846 }, { "epoch": 2.0502064564564564, "grad_norm": 0.9533726969123568, "learning_rate": 2.7554529784982125e-06, "loss": 0.3156, "step": 21847 }, { "epoch": 2.0503003003003, "grad_norm": 1.1092773557081754, "learning_rate": 2.754965129831043e-06, "loss": 0.3291, "step": 21848 }, { "epoch": 2.050394144144144, "grad_norm": 1.1166238680846747, "learning_rate": 2.7544773079316905e-06, "loss": 0.3277, "step": 21849 }, { "epoch": 2.050487987987988, "grad_norm": 1.3503990352695272, "learning_rate": 2.753989512805969e-06, "loss": 0.3302, "step": 21850 }, { "epoch": 2.0505818318318316, "grad_norm": 1.0257590629223354, "learning_rate": 2.7535017444596936e-06, "loss": 0.2964, "step": 21851 }, { "epoch": 2.050675675675676, "grad_norm": 1.0964064316340392, "learning_rate": 2.753014002898683e-06, "loss": 0.3259, "step": 21852 }, { "epoch": 2.0507695195195197, "grad_norm": 1.1482840301583064, "learning_rate": 2.7525262881287507e-06, "loss": 0.3359, "step": 21853 }, { "epoch": 2.0508633633633635, "grad_norm": 1.218874272244901, "learning_rate": 2.75203860015571e-06, "loss": 0.3045, "step": 21854 }, { "epoch": 2.0509572072072073, "grad_norm": 1.1334211388439428, "learning_rate": 2.751550938985379e-06, "loss": 0.3368, "step": 21855 }, { "epoch": 2.051051051051051, "grad_norm": 1.0713908337668336, "learning_rate": 2.7510633046235713e-06, "loss": 0.3373, "step": 21856 }, { "epoch": 2.051144894894895, "grad_norm": 1.0502297681312809, "learning_rate": 2.750575697076101e-06, "loss": 0.299, "step": 21857 }, { "epoch": 2.0512387387387387, "grad_norm": 1.0088486314121572, "learning_rate": 2.7500881163487813e-06, "loss": 0.3247, "step": 21858 }, { "epoch": 2.0513325825825826, "grad_norm": 0.9736821509174682, "learning_rate": 2.7496005624474253e-06, "loss": 0.3062, "step": 21859 }, { "epoch": 2.0514264264264264, "grad_norm": 1.2294433383327092, "learning_rate": 2.749113035377845e-06, "loss": 0.3192, "step": 21860 }, { "epoch": 2.05152027027027, "grad_norm": 1.23452616926578, "learning_rate": 2.7486255351458568e-06, "loss": 0.3161, "step": 21861 }, { "epoch": 2.051614114114114, "grad_norm": 1.0587036099031644, "learning_rate": 2.748138061757271e-06, "loss": 0.3067, "step": 21862 }, { "epoch": 2.051707957957958, "grad_norm": 1.1040104401775834, "learning_rate": 2.7476506152178985e-06, "loss": 0.2946, "step": 21863 }, { "epoch": 2.0518018018018016, "grad_norm": 1.2387687120771933, "learning_rate": 2.7471631955335547e-06, "loss": 0.3207, "step": 21864 }, { "epoch": 2.051895645645646, "grad_norm": 1.1780246582247644, "learning_rate": 2.7466758027100495e-06, "loss": 0.3348, "step": 21865 }, { "epoch": 2.0519894894894897, "grad_norm": 0.9871498964975602, "learning_rate": 2.7461884367531915e-06, "loss": 0.2666, "step": 21866 }, { "epoch": 2.0520833333333335, "grad_norm": 1.5169788225096903, "learning_rate": 2.7457010976687963e-06, "loss": 0.3264, "step": 21867 }, { "epoch": 2.0521771771771773, "grad_norm": 1.1392174518112037, "learning_rate": 2.745213785462672e-06, "loss": 0.3012, "step": 21868 }, { "epoch": 2.052271021021021, "grad_norm": 1.0548689121933306, "learning_rate": 2.7447265001406296e-06, "loss": 0.3292, "step": 21869 }, { "epoch": 2.052364864864865, "grad_norm": 1.0111669794890628, "learning_rate": 2.7442392417084785e-06, "loss": 0.3163, "step": 21870 }, { "epoch": 2.0524587087087087, "grad_norm": 1.3575030081667057, "learning_rate": 2.743752010172028e-06, "loss": 0.3272, "step": 21871 }, { "epoch": 2.0525525525525525, "grad_norm": 1.1426778054661226, "learning_rate": 2.743264805537086e-06, "loss": 0.3558, "step": 21872 }, { "epoch": 2.0526463963963963, "grad_norm": 1.2319138243883523, "learning_rate": 2.7427776278094654e-06, "loss": 0.3291, "step": 21873 }, { "epoch": 2.05274024024024, "grad_norm": 1.0700884148515046, "learning_rate": 2.742290476994972e-06, "loss": 0.3456, "step": 21874 }, { "epoch": 2.052834084084084, "grad_norm": 1.2005081662884851, "learning_rate": 2.7418033530994137e-06, "loss": 0.3204, "step": 21875 }, { "epoch": 2.0529279279279278, "grad_norm": 1.3382540250789672, "learning_rate": 2.741316256128602e-06, "loss": 0.285, "step": 21876 }, { "epoch": 2.0530217717717716, "grad_norm": 1.0157149595955122, "learning_rate": 2.740829186088342e-06, "loss": 0.3157, "step": 21877 }, { "epoch": 2.0531156156156154, "grad_norm": 1.2037414392717458, "learning_rate": 2.74034214298444e-06, "loss": 0.3137, "step": 21878 }, { "epoch": 2.0532094594594597, "grad_norm": 1.2467252990027529, "learning_rate": 2.739855126822706e-06, "loss": 0.3318, "step": 21879 }, { "epoch": 2.0533033033033035, "grad_norm": 1.1643870100634883, "learning_rate": 2.7393681376089454e-06, "loss": 0.3151, "step": 21880 }, { "epoch": 2.0533971471471473, "grad_norm": 1.1566090239205669, "learning_rate": 2.7388811753489655e-06, "loss": 0.3356, "step": 21881 }, { "epoch": 2.053490990990991, "grad_norm": 2.182410950288089, "learning_rate": 2.7383942400485695e-06, "loss": 0.2802, "step": 21882 }, { "epoch": 2.053584834834835, "grad_norm": 0.9819219802030825, "learning_rate": 2.737907331713567e-06, "loss": 0.3235, "step": 21883 }, { "epoch": 2.0536786786786787, "grad_norm": 1.1267586211139418, "learning_rate": 2.737420450349759e-06, "loss": 0.3421, "step": 21884 }, { "epoch": 2.0537725225225225, "grad_norm": 1.0231735367778654, "learning_rate": 2.736933595962955e-06, "loss": 0.3548, "step": 21885 }, { "epoch": 2.0538663663663663, "grad_norm": 0.9656461752267428, "learning_rate": 2.7364467685589583e-06, "loss": 0.312, "step": 21886 }, { "epoch": 2.05396021021021, "grad_norm": 1.1786394050969498, "learning_rate": 2.735959968143571e-06, "loss": 0.2863, "step": 21887 }, { "epoch": 2.054054054054054, "grad_norm": 1.055892574257377, "learning_rate": 2.735473194722602e-06, "loss": 0.3318, "step": 21888 }, { "epoch": 2.0541478978978978, "grad_norm": 1.290330139788095, "learning_rate": 2.7349864483018525e-06, "loss": 0.2853, "step": 21889 }, { "epoch": 2.0542417417417416, "grad_norm": 2.442147132415031, "learning_rate": 2.7344997288871235e-06, "loss": 0.3078, "step": 21890 }, { "epoch": 2.0543355855855854, "grad_norm": 1.1182432249080865, "learning_rate": 2.734013036484223e-06, "loss": 0.3116, "step": 21891 }, { "epoch": 2.0544294294294296, "grad_norm": 1.1049500380414619, "learning_rate": 2.7335263710989523e-06, "loss": 0.3479, "step": 21892 }, { "epoch": 2.0545232732732734, "grad_norm": 0.9122470842376305, "learning_rate": 2.733039732737113e-06, "loss": 0.3537, "step": 21893 }, { "epoch": 2.0546171171171173, "grad_norm": 1.0061570136971207, "learning_rate": 2.7325531214045086e-06, "loss": 0.3404, "step": 21894 }, { "epoch": 2.054710960960961, "grad_norm": 1.1641621037122465, "learning_rate": 2.7320665371069393e-06, "loss": 0.2966, "step": 21895 }, { "epoch": 2.054804804804805, "grad_norm": 1.0723674271144639, "learning_rate": 2.7315799798502057e-06, "loss": 0.3397, "step": 21896 }, { "epoch": 2.0548986486486487, "grad_norm": 1.3674165627629615, "learning_rate": 2.7310934496401132e-06, "loss": 0.27, "step": 21897 }, { "epoch": 2.0549924924924925, "grad_norm": 0.8966829281081311, "learning_rate": 2.7306069464824604e-06, "loss": 0.3309, "step": 21898 }, { "epoch": 2.0550863363363363, "grad_norm": 1.0742177036102685, "learning_rate": 2.7301204703830464e-06, "loss": 0.3334, "step": 21899 }, { "epoch": 2.05518018018018, "grad_norm": 1.170832341310155, "learning_rate": 2.7296340213476746e-06, "loss": 0.3177, "step": 21900 }, { "epoch": 2.055274024024024, "grad_norm": 0.9781953880046572, "learning_rate": 2.729147599382144e-06, "loss": 0.3351, "step": 21901 }, { "epoch": 2.0553678678678677, "grad_norm": 1.3242288381391913, "learning_rate": 2.7286612044922516e-06, "loss": 0.2808, "step": 21902 }, { "epoch": 2.0554617117117115, "grad_norm": 1.0529096218073715, "learning_rate": 2.7281748366838014e-06, "loss": 0.2839, "step": 21903 }, { "epoch": 2.0555555555555554, "grad_norm": 1.1753955289886335, "learning_rate": 2.7276884959625895e-06, "loss": 0.2758, "step": 21904 }, { "epoch": 2.0556493993993996, "grad_norm": 0.998733320695438, "learning_rate": 2.7272021823344154e-06, "loss": 0.3159, "step": 21905 }, { "epoch": 2.0557432432432434, "grad_norm": 1.0876201017363063, "learning_rate": 2.726715895805077e-06, "loss": 0.3155, "step": 21906 }, { "epoch": 2.0558370870870872, "grad_norm": 1.122297508171841, "learning_rate": 2.726229636380373e-06, "loss": 0.3223, "step": 21907 }, { "epoch": 2.055930930930931, "grad_norm": 0.9502556848541008, "learning_rate": 2.725743404066098e-06, "loss": 0.3157, "step": 21908 }, { "epoch": 2.056024774774775, "grad_norm": 0.9559283692168389, "learning_rate": 2.725257198868054e-06, "loss": 0.3625, "step": 21909 }, { "epoch": 2.0561186186186187, "grad_norm": 1.1361245301442169, "learning_rate": 2.7247710207920364e-06, "loss": 0.2651, "step": 21910 }, { "epoch": 2.0562124624624625, "grad_norm": 1.002910477699796, "learning_rate": 2.72428486984384e-06, "loss": 0.3239, "step": 21911 }, { "epoch": 2.0563063063063063, "grad_norm": 1.2385491846768786, "learning_rate": 2.7237987460292645e-06, "loss": 0.3012, "step": 21912 }, { "epoch": 2.05640015015015, "grad_norm": 1.022629546059794, "learning_rate": 2.723312649354105e-06, "loss": 0.3332, "step": 21913 }, { "epoch": 2.056493993993994, "grad_norm": 1.1617328056819578, "learning_rate": 2.722826579824154e-06, "loss": 0.3299, "step": 21914 }, { "epoch": 2.0565878378378377, "grad_norm": 1.5263767675392943, "learning_rate": 2.722340537445213e-06, "loss": 0.304, "step": 21915 }, { "epoch": 2.0566816816816815, "grad_norm": 1.252292838567842, "learning_rate": 2.7218545222230718e-06, "loss": 0.3241, "step": 21916 }, { "epoch": 2.0567755255255253, "grad_norm": 1.0805439188970547, "learning_rate": 2.721368534163529e-06, "loss": 0.3083, "step": 21917 }, { "epoch": 2.0568693693693696, "grad_norm": 1.3000075300377243, "learning_rate": 2.7208825732723764e-06, "loss": 0.35, "step": 21918 }, { "epoch": 2.0569632132132134, "grad_norm": 1.2112347371877652, "learning_rate": 2.7203966395554093e-06, "loss": 0.3013, "step": 21919 }, { "epoch": 2.057057057057057, "grad_norm": 1.1546337998611123, "learning_rate": 2.7199107330184194e-06, "loss": 0.3367, "step": 21920 }, { "epoch": 2.057150900900901, "grad_norm": 0.9291997411816675, "learning_rate": 2.7194248536672045e-06, "loss": 0.2904, "step": 21921 }, { "epoch": 2.057244744744745, "grad_norm": 0.9727989927101167, "learning_rate": 2.7189390015075555e-06, "loss": 0.3253, "step": 21922 }, { "epoch": 2.0573385885885886, "grad_norm": 1.2191909030022443, "learning_rate": 2.7184531765452625e-06, "loss": 0.3351, "step": 21923 }, { "epoch": 2.0574324324324325, "grad_norm": 1.27154188701924, "learning_rate": 2.7179673787861238e-06, "loss": 0.3057, "step": 21924 }, { "epoch": 2.0575262762762763, "grad_norm": 1.3685734638171365, "learning_rate": 2.717481608235928e-06, "loss": 0.2887, "step": 21925 }, { "epoch": 2.05762012012012, "grad_norm": 1.0330090277278186, "learning_rate": 2.7169958649004657e-06, "loss": 0.3126, "step": 21926 }, { "epoch": 2.057713963963964, "grad_norm": 1.0506252423462785, "learning_rate": 2.7165101487855326e-06, "loss": 0.2684, "step": 21927 }, { "epoch": 2.0578078078078077, "grad_norm": 1.9225325763198606, "learning_rate": 2.7160244598969175e-06, "loss": 0.2854, "step": 21928 }, { "epoch": 2.0579016516516515, "grad_norm": 1.1580324410319534, "learning_rate": 2.7155387982404096e-06, "loss": 0.3157, "step": 21929 }, { "epoch": 2.0579954954954953, "grad_norm": 1.9418074385410373, "learning_rate": 2.7150531638218037e-06, "loss": 0.3255, "step": 21930 }, { "epoch": 2.058089339339339, "grad_norm": 1.2201009112856476, "learning_rate": 2.7145675566468895e-06, "loss": 0.3379, "step": 21931 }, { "epoch": 2.0581831831831834, "grad_norm": 1.0744476358767991, "learning_rate": 2.7140819767214517e-06, "loss": 0.2708, "step": 21932 }, { "epoch": 2.058277027027027, "grad_norm": 1.0861703895638077, "learning_rate": 2.713596424051285e-06, "loss": 0.3313, "step": 21933 }, { "epoch": 2.058370870870871, "grad_norm": 1.418733309629821, "learning_rate": 2.7131108986421774e-06, "loss": 0.3145, "step": 21934 }, { "epoch": 2.058464714714715, "grad_norm": 0.9938264845238877, "learning_rate": 2.712625400499916e-06, "loss": 0.3068, "step": 21935 }, { "epoch": 2.0585585585585586, "grad_norm": 1.029140945910411, "learning_rate": 2.712139929630293e-06, "loss": 0.3647, "step": 21936 }, { "epoch": 2.0586524024024024, "grad_norm": 1.5637243542330141, "learning_rate": 2.711654486039095e-06, "loss": 0.3282, "step": 21937 }, { "epoch": 2.0587462462462462, "grad_norm": 1.0545503491206751, "learning_rate": 2.711169069732108e-06, "loss": 0.3332, "step": 21938 }, { "epoch": 2.05884009009009, "grad_norm": 1.1613722983413621, "learning_rate": 2.710683680715123e-06, "loss": 0.305, "step": 21939 }, { "epoch": 2.058933933933934, "grad_norm": 1.1293049201473215, "learning_rate": 2.710198318993926e-06, "loss": 0.3019, "step": 21940 }, { "epoch": 2.0590277777777777, "grad_norm": 1.0996792757337954, "learning_rate": 2.7097129845743025e-06, "loss": 0.2886, "step": 21941 }, { "epoch": 2.0591216216216215, "grad_norm": 1.2985551777653819, "learning_rate": 2.7092276774620422e-06, "loss": 0.2965, "step": 21942 }, { "epoch": 2.0592154654654653, "grad_norm": 1.1392002166223985, "learning_rate": 2.7087423976629303e-06, "loss": 0.3019, "step": 21943 }, { "epoch": 2.059309309309309, "grad_norm": 1.0829727847219057, "learning_rate": 2.7082571451827523e-06, "loss": 0.3135, "step": 21944 }, { "epoch": 2.0594031531531534, "grad_norm": 0.9863020899854511, "learning_rate": 2.707771920027294e-06, "loss": 0.3159, "step": 21945 }, { "epoch": 2.059496996996997, "grad_norm": 1.1681455240683627, "learning_rate": 2.7072867222023407e-06, "loss": 0.3531, "step": 21946 }, { "epoch": 2.059590840840841, "grad_norm": 1.1756438468249577, "learning_rate": 2.706801551713676e-06, "loss": 0.3051, "step": 21947 }, { "epoch": 2.059684684684685, "grad_norm": 1.1561697911429967, "learning_rate": 2.7063164085670878e-06, "loss": 0.3195, "step": 21948 }, { "epoch": 2.0597785285285286, "grad_norm": 0.9169147335328323, "learning_rate": 2.7058312927683593e-06, "loss": 0.3224, "step": 21949 }, { "epoch": 2.0598723723723724, "grad_norm": 1.1165142872413047, "learning_rate": 2.7053462043232727e-06, "loss": 0.2806, "step": 21950 }, { "epoch": 2.0599662162162162, "grad_norm": 1.0670454752540988, "learning_rate": 2.7048611432376147e-06, "loss": 0.3133, "step": 21951 }, { "epoch": 2.06006006006006, "grad_norm": 1.1858051316725393, "learning_rate": 2.7043761095171678e-06, "loss": 0.3399, "step": 21952 }, { "epoch": 2.060153903903904, "grad_norm": 1.0883021560971218, "learning_rate": 2.703891103167713e-06, "loss": 0.2835, "step": 21953 }, { "epoch": 2.0602477477477477, "grad_norm": 1.0694811031539844, "learning_rate": 2.703406124195036e-06, "loss": 0.311, "step": 21954 }, { "epoch": 2.0603415915915915, "grad_norm": 1.2612922191623188, "learning_rate": 2.702921172604919e-06, "loss": 0.3264, "step": 21955 }, { "epoch": 2.0604354354354353, "grad_norm": 0.9685148491419389, "learning_rate": 2.7024362484031423e-06, "loss": 0.3335, "step": 21956 }, { "epoch": 2.060529279279279, "grad_norm": 1.040314650955172, "learning_rate": 2.7019513515954887e-06, "loss": 0.3674, "step": 21957 }, { "epoch": 2.060623123123123, "grad_norm": 1.0654923524172848, "learning_rate": 2.7014664821877403e-06, "loss": 0.3211, "step": 21958 }, { "epoch": 2.060716966966967, "grad_norm": 1.1664242441168027, "learning_rate": 2.7009816401856755e-06, "loss": 0.3428, "step": 21959 }, { "epoch": 2.060810810810811, "grad_norm": 1.0919040239506614, "learning_rate": 2.700496825595079e-06, "loss": 0.3248, "step": 21960 }, { "epoch": 2.0609046546546548, "grad_norm": 1.107348387824343, "learning_rate": 2.7000120384217286e-06, "loss": 0.304, "step": 21961 }, { "epoch": 2.0609984984984986, "grad_norm": 1.0886006090313327, "learning_rate": 2.6995272786714043e-06, "loss": 0.3029, "step": 21962 }, { "epoch": 2.0610923423423424, "grad_norm": 1.2383750774218345, "learning_rate": 2.6990425463498893e-06, "loss": 0.3544, "step": 21963 }, { "epoch": 2.061186186186186, "grad_norm": 1.0937085623692249, "learning_rate": 2.69855784146296e-06, "loss": 0.3393, "step": 21964 }, { "epoch": 2.06128003003003, "grad_norm": 0.9615231423689001, "learning_rate": 2.698073164016395e-06, "loss": 0.3141, "step": 21965 }, { "epoch": 2.061373873873874, "grad_norm": 1.107385584513615, "learning_rate": 2.697588514015976e-06, "loss": 0.3273, "step": 21966 }, { "epoch": 2.0614677177177176, "grad_norm": 0.9486462204379157, "learning_rate": 2.6971038914674807e-06, "loss": 0.3529, "step": 21967 }, { "epoch": 2.0615615615615615, "grad_norm": 1.1183377596161241, "learning_rate": 2.696619296376687e-06, "loss": 0.3066, "step": 21968 }, { "epoch": 2.0616554054054053, "grad_norm": 1.002799790229274, "learning_rate": 2.696134728749372e-06, "loss": 0.2914, "step": 21969 }, { "epoch": 2.061749249249249, "grad_norm": 1.2725511797659705, "learning_rate": 2.6956501885913133e-06, "loss": 0.2858, "step": 21970 }, { "epoch": 2.061843093093093, "grad_norm": 0.967819715529542, "learning_rate": 2.695165675908288e-06, "loss": 0.2902, "step": 21971 }, { "epoch": 2.061936936936937, "grad_norm": 1.3368195745229445, "learning_rate": 2.6946811907060743e-06, "loss": 0.3366, "step": 21972 }, { "epoch": 2.062030780780781, "grad_norm": 2.106957847495275, "learning_rate": 2.6941967329904483e-06, "loss": 0.3064, "step": 21973 }, { "epoch": 2.0621246246246248, "grad_norm": 1.0338202599518571, "learning_rate": 2.6937123027671845e-06, "loss": 0.3482, "step": 21974 }, { "epoch": 2.0622184684684686, "grad_norm": 1.7328436979962452, "learning_rate": 2.6932279000420623e-06, "loss": 0.2956, "step": 21975 }, { "epoch": 2.0623123123123124, "grad_norm": 0.9654289094660377, "learning_rate": 2.692743524820855e-06, "loss": 0.3018, "step": 21976 }, { "epoch": 2.062406156156156, "grad_norm": 1.2118487148663213, "learning_rate": 2.6922591771093364e-06, "loss": 0.2838, "step": 21977 }, { "epoch": 2.0625, "grad_norm": 1.2982034633688957, "learning_rate": 2.691774856913285e-06, "loss": 0.3233, "step": 21978 }, { "epoch": 2.062593843843844, "grad_norm": 0.932766493878726, "learning_rate": 2.691290564238474e-06, "loss": 0.3089, "step": 21979 }, { "epoch": 2.0626876876876876, "grad_norm": 1.1810236895343564, "learning_rate": 2.690806299090677e-06, "loss": 0.3195, "step": 21980 }, { "epoch": 2.0627815315315314, "grad_norm": 0.9579190224940335, "learning_rate": 2.6903220614756685e-06, "loss": 0.3316, "step": 21981 }, { "epoch": 2.0628753753753752, "grad_norm": 1.1640650665290035, "learning_rate": 2.6898378513992217e-06, "loss": 0.3141, "step": 21982 }, { "epoch": 2.062969219219219, "grad_norm": 1.2353379884846505, "learning_rate": 2.6893536688671086e-06, "loss": 0.3157, "step": 21983 }, { "epoch": 2.063063063063063, "grad_norm": 1.0679770120992933, "learning_rate": 2.688869513885105e-06, "loss": 0.3481, "step": 21984 }, { "epoch": 2.063156906906907, "grad_norm": 1.3786178881859308, "learning_rate": 2.6883853864589827e-06, "loss": 0.3203, "step": 21985 }, { "epoch": 2.063250750750751, "grad_norm": 1.0748875854184097, "learning_rate": 2.6879012865945116e-06, "loss": 0.3482, "step": 21986 }, { "epoch": 2.0633445945945947, "grad_norm": 1.0594783906370688, "learning_rate": 2.687417214297467e-06, "loss": 0.3049, "step": 21987 }, { "epoch": 2.0634384384384385, "grad_norm": 0.9905140576396991, "learning_rate": 2.6869331695736188e-06, "loss": 0.3348, "step": 21988 }, { "epoch": 2.0635322822822824, "grad_norm": 1.6223556924353653, "learning_rate": 2.686449152428738e-06, "loss": 0.3411, "step": 21989 }, { "epoch": 2.063626126126126, "grad_norm": 1.1711119053032546, "learning_rate": 2.6859651628685974e-06, "loss": 0.2758, "step": 21990 }, { "epoch": 2.06371996996997, "grad_norm": 0.9603303406821712, "learning_rate": 2.685481200898966e-06, "loss": 0.3322, "step": 21991 }, { "epoch": 2.063813813813814, "grad_norm": 0.955497727602026, "learning_rate": 2.684997266525616e-06, "loss": 0.3069, "step": 21992 }, { "epoch": 2.0639076576576576, "grad_norm": 1.0396876574171237, "learning_rate": 2.6845133597543148e-06, "loss": 0.3153, "step": 21993 }, { "epoch": 2.0640015015015014, "grad_norm": 0.9955160199172718, "learning_rate": 2.684029480590834e-06, "loss": 0.2797, "step": 21994 }, { "epoch": 2.0640953453453452, "grad_norm": 0.9783302974597414, "learning_rate": 2.68354562904094e-06, "loss": 0.3381, "step": 21995 }, { "epoch": 2.064189189189189, "grad_norm": 1.297024962959817, "learning_rate": 2.683061805110406e-06, "loss": 0.3337, "step": 21996 }, { "epoch": 2.064283033033033, "grad_norm": 0.905191922855304, "learning_rate": 2.682578008804999e-06, "loss": 0.3399, "step": 21997 }, { "epoch": 2.064376876876877, "grad_norm": 0.9454843629301488, "learning_rate": 2.6820942401304854e-06, "loss": 0.3276, "step": 21998 }, { "epoch": 2.064470720720721, "grad_norm": 1.1532876840229815, "learning_rate": 2.6816104990926366e-06, "loss": 0.3314, "step": 21999 }, { "epoch": 2.0645645645645647, "grad_norm": 1.0996726890588102, "learning_rate": 2.6811267856972188e-06, "loss": 0.3732, "step": 22000 }, { "epoch": 2.0646584084084085, "grad_norm": 1.0759670153125782, "learning_rate": 2.680643099949998e-06, "loss": 0.29, "step": 22001 }, { "epoch": 2.0647522522522523, "grad_norm": 0.9317387878876193, "learning_rate": 2.6801594418567432e-06, "loss": 0.3111, "step": 22002 }, { "epoch": 2.064846096096096, "grad_norm": 1.0322744252845584, "learning_rate": 2.679675811423221e-06, "loss": 0.3369, "step": 22003 }, { "epoch": 2.06493993993994, "grad_norm": 1.144677848027221, "learning_rate": 2.6791922086551967e-06, "loss": 0.3241, "step": 22004 }, { "epoch": 2.0650337837837838, "grad_norm": 1.4843043496090833, "learning_rate": 2.678708633558438e-06, "loss": 0.2954, "step": 22005 }, { "epoch": 2.0651276276276276, "grad_norm": 1.0762702043143795, "learning_rate": 2.678225086138708e-06, "loss": 0.3663, "step": 22006 }, { "epoch": 2.0652214714714714, "grad_norm": 1.121265255860177, "learning_rate": 2.6777415664017727e-06, "loss": 0.294, "step": 22007 }, { "epoch": 2.065315315315315, "grad_norm": 1.1466930503010706, "learning_rate": 2.6772580743533994e-06, "loss": 0.326, "step": 22008 }, { "epoch": 2.065409159159159, "grad_norm": 0.9323979725065126, "learning_rate": 2.676774609999352e-06, "loss": 0.3229, "step": 22009 }, { "epoch": 2.065503003003003, "grad_norm": 1.2152936051690435, "learning_rate": 2.676291173345392e-06, "loss": 0.3031, "step": 22010 }, { "epoch": 2.0655968468468466, "grad_norm": 1.151660138485968, "learning_rate": 2.6758077643972877e-06, "loss": 0.3089, "step": 22011 }, { "epoch": 2.065690690690691, "grad_norm": 1.0304680934518702, "learning_rate": 2.6753243831608012e-06, "loss": 0.3019, "step": 22012 }, { "epoch": 2.0657845345345347, "grad_norm": 1.0493251552126195, "learning_rate": 2.6748410296416934e-06, "loss": 0.319, "step": 22013 }, { "epoch": 2.0658783783783785, "grad_norm": 1.1075521017867687, "learning_rate": 2.674357703845732e-06, "loss": 0.3519, "step": 22014 }, { "epoch": 2.0659722222222223, "grad_norm": 1.3552931884127593, "learning_rate": 2.673874405778677e-06, "loss": 0.3436, "step": 22015 }, { "epoch": 2.066066066066066, "grad_norm": 1.1087863583110626, "learning_rate": 2.6733911354462917e-06, "loss": 0.311, "step": 22016 }, { "epoch": 2.06615990990991, "grad_norm": 0.9819461477650513, "learning_rate": 2.6729078928543374e-06, "loss": 0.3174, "step": 22017 }, { "epoch": 2.0662537537537538, "grad_norm": 1.1083062145160243, "learning_rate": 2.672424678008576e-06, "loss": 0.3091, "step": 22018 }, { "epoch": 2.0663475975975976, "grad_norm": 0.9188166188685889, "learning_rate": 2.6719414909147678e-06, "loss": 0.3197, "step": 22019 }, { "epoch": 2.0664414414414414, "grad_norm": 1.176768903078339, "learning_rate": 2.6714583315786767e-06, "loss": 0.3045, "step": 22020 }, { "epoch": 2.066535285285285, "grad_norm": 1.8294925462522509, "learning_rate": 2.670975200006062e-06, "loss": 0.3211, "step": 22021 }, { "epoch": 2.066629129129129, "grad_norm": 0.9891631125472938, "learning_rate": 2.670492096202682e-06, "loss": 0.2999, "step": 22022 }, { "epoch": 2.066722972972973, "grad_norm": 1.1299735525133257, "learning_rate": 2.670009020174301e-06, "loss": 0.2978, "step": 22023 }, { "epoch": 2.0668168168168166, "grad_norm": 1.3944202205637155, "learning_rate": 2.6695259719266764e-06, "loss": 0.3115, "step": 22024 }, { "epoch": 2.066910660660661, "grad_norm": 1.0137605223584703, "learning_rate": 2.669042951465567e-06, "loss": 0.3455, "step": 22025 }, { "epoch": 2.0670045045045047, "grad_norm": 3.427136142185071, "learning_rate": 2.6685599587967336e-06, "loss": 0.3062, "step": 22026 }, { "epoch": 2.0670983483483485, "grad_norm": 1.1928346878837066, "learning_rate": 2.6680769939259347e-06, "loss": 0.3286, "step": 22027 }, { "epoch": 2.0671921921921923, "grad_norm": 4.824559140344617, "learning_rate": 2.667594056858929e-06, "loss": 0.319, "step": 22028 }, { "epoch": 2.067286036036036, "grad_norm": 1.2683506981249681, "learning_rate": 2.6671111476014723e-06, "loss": 0.3154, "step": 22029 }, { "epoch": 2.06737987987988, "grad_norm": 1.0084254201361538, "learning_rate": 2.6666282661593246e-06, "loss": 0.3393, "step": 22030 }, { "epoch": 2.0674737237237237, "grad_norm": 1.1509522384378013, "learning_rate": 2.6661454125382413e-06, "loss": 0.2931, "step": 22031 }, { "epoch": 2.0675675675675675, "grad_norm": 1.0735504930445012, "learning_rate": 2.6656625867439822e-06, "loss": 0.3132, "step": 22032 }, { "epoch": 2.0676614114114114, "grad_norm": 1.1313463280965754, "learning_rate": 2.6651797887823034e-06, "loss": 0.2864, "step": 22033 }, { "epoch": 2.067755255255255, "grad_norm": 1.1674682547069337, "learning_rate": 2.6646970186589582e-06, "loss": 0.3227, "step": 22034 }, { "epoch": 2.067849099099099, "grad_norm": 1.209968745727698, "learning_rate": 2.6642142763797075e-06, "loss": 0.2754, "step": 22035 }, { "epoch": 2.067942942942943, "grad_norm": 1.246590373979506, "learning_rate": 2.663731561950305e-06, "loss": 0.2928, "step": 22036 }, { "epoch": 2.0680367867867866, "grad_norm": 1.0485930084063022, "learning_rate": 2.663248875376504e-06, "loss": 0.318, "step": 22037 }, { "epoch": 2.0681306306306304, "grad_norm": 0.968775451881574, "learning_rate": 2.6627662166640637e-06, "loss": 0.2853, "step": 22038 }, { "epoch": 2.0682244744744747, "grad_norm": 1.02595633749915, "learning_rate": 2.662283585818737e-06, "loss": 0.3103, "step": 22039 }, { "epoch": 2.0683183183183185, "grad_norm": 1.0469722417079999, "learning_rate": 2.6618009828462783e-06, "loss": 0.3006, "step": 22040 }, { "epoch": 2.0684121621621623, "grad_norm": 1.3464625488850082, "learning_rate": 2.661318407752441e-06, "loss": 0.3224, "step": 22041 }, { "epoch": 2.068506006006006, "grad_norm": 1.09744705836963, "learning_rate": 2.660835860542981e-06, "loss": 0.3417, "step": 22042 }, { "epoch": 2.06859984984985, "grad_norm": 0.9935588758602907, "learning_rate": 2.660353341223647e-06, "loss": 0.3193, "step": 22043 }, { "epoch": 2.0686936936936937, "grad_norm": 1.0863993004080932, "learning_rate": 2.659870849800198e-06, "loss": 0.3052, "step": 22044 }, { "epoch": 2.0687875375375375, "grad_norm": 1.0731172225946235, "learning_rate": 2.659388386278384e-06, "loss": 0.2837, "step": 22045 }, { "epoch": 2.0688813813813813, "grad_norm": 0.9705966564825002, "learning_rate": 2.658905950663956e-06, "loss": 0.3384, "step": 22046 }, { "epoch": 2.068975225225225, "grad_norm": 0.9392182998124697, "learning_rate": 2.6584235429626693e-06, "loss": 0.3437, "step": 22047 }, { "epoch": 2.069069069069069, "grad_norm": 1.6527070715631924, "learning_rate": 2.657941163180275e-06, "loss": 0.3543, "step": 22048 }, { "epoch": 2.0691629129129128, "grad_norm": 2.466759791846634, "learning_rate": 2.657458811322522e-06, "loss": 0.3472, "step": 22049 }, { "epoch": 2.0692567567567566, "grad_norm": 0.9886310592979716, "learning_rate": 2.6569764873951642e-06, "loss": 0.334, "step": 22050 }, { "epoch": 2.0693506006006004, "grad_norm": 1.0012046721990637, "learning_rate": 2.6564941914039522e-06, "loss": 0.3132, "step": 22051 }, { "epoch": 2.0694444444444446, "grad_norm": 1.2940136201243402, "learning_rate": 2.6560119233546343e-06, "loss": 0.3259, "step": 22052 }, { "epoch": 2.0695382882882885, "grad_norm": 1.0743930605910332, "learning_rate": 2.655529683252963e-06, "loss": 0.3279, "step": 22053 }, { "epoch": 2.0696321321321323, "grad_norm": 1.0965311701428355, "learning_rate": 2.6550474711046902e-06, "loss": 0.3031, "step": 22054 }, { "epoch": 2.069725975975976, "grad_norm": 1.4280555070378984, "learning_rate": 2.654565286915558e-06, "loss": 0.3206, "step": 22055 }, { "epoch": 2.06981981981982, "grad_norm": 1.100825056819068, "learning_rate": 2.6540831306913217e-06, "loss": 0.2848, "step": 22056 }, { "epoch": 2.0699136636636637, "grad_norm": 2.0184158982286973, "learning_rate": 2.653601002437729e-06, "loss": 0.2826, "step": 22057 }, { "epoch": 2.0700075075075075, "grad_norm": 1.6362363915523241, "learning_rate": 2.6531189021605253e-06, "loss": 0.3044, "step": 22058 }, { "epoch": 2.0701013513513513, "grad_norm": 1.208842454298784, "learning_rate": 2.6526368298654626e-06, "loss": 0.3423, "step": 22059 }, { "epoch": 2.070195195195195, "grad_norm": 1.0345837113417389, "learning_rate": 2.652154785558288e-06, "loss": 0.3096, "step": 22060 }, { "epoch": 2.070289039039039, "grad_norm": 1.5065592104002636, "learning_rate": 2.6516727692447457e-06, "loss": 0.3119, "step": 22061 }, { "epoch": 2.0703828828828827, "grad_norm": 1.3127841077110565, "learning_rate": 2.6511907809305882e-06, "loss": 0.3305, "step": 22062 }, { "epoch": 2.0704767267267266, "grad_norm": 0.9916363569953281, "learning_rate": 2.650708820621559e-06, "loss": 0.3039, "step": 22063 }, { "epoch": 2.0705705705705704, "grad_norm": 0.9423680855713907, "learning_rate": 2.650226888323403e-06, "loss": 0.2874, "step": 22064 }, { "epoch": 2.0706644144144146, "grad_norm": 0.9567527467844315, "learning_rate": 2.64974498404187e-06, "loss": 0.2804, "step": 22065 }, { "epoch": 2.0707582582582584, "grad_norm": 1.0230700311413194, "learning_rate": 2.649263107782704e-06, "loss": 0.3606, "step": 22066 }, { "epoch": 2.0708521021021022, "grad_norm": 1.138909126877018, "learning_rate": 2.6487812595516516e-06, "loss": 0.3301, "step": 22067 }, { "epoch": 2.070945945945946, "grad_norm": 1.2675372022140987, "learning_rate": 2.648299439354456e-06, "loss": 0.3741, "step": 22068 }, { "epoch": 2.07103978978979, "grad_norm": 1.1596786358883464, "learning_rate": 2.647817647196863e-06, "loss": 0.271, "step": 22069 }, { "epoch": 2.0711336336336337, "grad_norm": 1.0454694856134223, "learning_rate": 2.647335883084616e-06, "loss": 0.2774, "step": 22070 }, { "epoch": 2.0712274774774775, "grad_norm": 0.9787313474475772, "learning_rate": 2.6468541470234614e-06, "loss": 0.3223, "step": 22071 }, { "epoch": 2.0713213213213213, "grad_norm": 1.4261703850926752, "learning_rate": 2.646372439019142e-06, "loss": 0.3258, "step": 22072 }, { "epoch": 2.071415165165165, "grad_norm": 1.1156247596074658, "learning_rate": 2.6458907590773985e-06, "loss": 0.316, "step": 22073 }, { "epoch": 2.071509009009009, "grad_norm": 1.1647301310502987, "learning_rate": 2.6454091072039793e-06, "loss": 0.3069, "step": 22074 }, { "epoch": 2.0716028528528527, "grad_norm": 1.0182848011919505, "learning_rate": 2.6449274834046244e-06, "loss": 0.3087, "step": 22075 }, { "epoch": 2.0716966966966965, "grad_norm": 0.9511933312244394, "learning_rate": 2.644445887685074e-06, "loss": 0.278, "step": 22076 }, { "epoch": 2.0717905405405403, "grad_norm": 1.1185834604454363, "learning_rate": 2.6439643200510746e-06, "loss": 0.3527, "step": 22077 }, { "epoch": 2.0718843843843846, "grad_norm": 1.1016498982546616, "learning_rate": 2.643482780508366e-06, "loss": 0.3635, "step": 22078 }, { "epoch": 2.0719782282282284, "grad_norm": 1.1672300227180443, "learning_rate": 2.6430012690626895e-06, "loss": 0.3162, "step": 22079 }, { "epoch": 2.0720720720720722, "grad_norm": 1.4464806588943684, "learning_rate": 2.642519785719786e-06, "loss": 0.3322, "step": 22080 }, { "epoch": 2.072165915915916, "grad_norm": 2.1547973649924765, "learning_rate": 2.6420383304853962e-06, "loss": 0.3666, "step": 22081 }, { "epoch": 2.07225975975976, "grad_norm": 1.203059859859801, "learning_rate": 2.64155690336526e-06, "loss": 0.3226, "step": 22082 }, { "epoch": 2.0723536036036037, "grad_norm": 1.0463437470304944, "learning_rate": 2.641075504365119e-06, "loss": 0.3318, "step": 22083 }, { "epoch": 2.0724474474474475, "grad_norm": 1.2774034165255896, "learning_rate": 2.6405941334907137e-06, "loss": 0.3153, "step": 22084 }, { "epoch": 2.0725412912912913, "grad_norm": 1.231775436469562, "learning_rate": 2.6401127907477797e-06, "loss": 0.3302, "step": 22085 }, { "epoch": 2.072635135135135, "grad_norm": 1.0272057743807523, "learning_rate": 2.6396314761420605e-06, "loss": 0.3202, "step": 22086 }, { "epoch": 2.072728978978979, "grad_norm": 1.1668281393908555, "learning_rate": 2.639150189679294e-06, "loss": 0.3304, "step": 22087 }, { "epoch": 2.0728228228228227, "grad_norm": 1.3206800418663054, "learning_rate": 2.6386689313652147e-06, "loss": 0.2948, "step": 22088 }, { "epoch": 2.0729166666666665, "grad_norm": 1.1120861709752723, "learning_rate": 2.6381877012055658e-06, "loss": 0.2888, "step": 22089 }, { "epoch": 2.0730105105105103, "grad_norm": 1.1519004485081679, "learning_rate": 2.637706499206083e-06, "loss": 0.308, "step": 22090 }, { "epoch": 2.073104354354354, "grad_norm": 1.1651356513426379, "learning_rate": 2.6372253253725033e-06, "loss": 0.3078, "step": 22091 }, { "epoch": 2.0731981981981984, "grad_norm": 1.1285223476301391, "learning_rate": 2.636744179710564e-06, "loss": 0.337, "step": 22092 }, { "epoch": 2.073292042042042, "grad_norm": 1.2234102160507745, "learning_rate": 2.636263062226003e-06, "loss": 0.3207, "step": 22093 }, { "epoch": 2.073385885885886, "grad_norm": 1.091103030434379, "learning_rate": 2.6357819729245526e-06, "loss": 0.3123, "step": 22094 }, { "epoch": 2.07347972972973, "grad_norm": 1.03379170646482, "learning_rate": 2.6353009118119544e-06, "loss": 0.3234, "step": 22095 }, { "epoch": 2.0735735735735736, "grad_norm": 1.0157344315969854, "learning_rate": 2.6348198788939417e-06, "loss": 0.3162, "step": 22096 }, { "epoch": 2.0736674174174174, "grad_norm": 1.3070159040052443, "learning_rate": 2.6343388741762486e-06, "loss": 0.2685, "step": 22097 }, { "epoch": 2.0737612612612613, "grad_norm": 1.0067308276725193, "learning_rate": 2.633857897664613e-06, "loss": 0.3052, "step": 22098 }, { "epoch": 2.073855105105105, "grad_norm": 1.0450140432182726, "learning_rate": 2.6333769493647677e-06, "loss": 0.2664, "step": 22099 }, { "epoch": 2.073948948948949, "grad_norm": 1.1209707628493275, "learning_rate": 2.6328960292824462e-06, "loss": 0.3463, "step": 22100 }, { "epoch": 2.0740427927927927, "grad_norm": 1.0800033443795607, "learning_rate": 2.6324151374233858e-06, "loss": 0.3408, "step": 22101 }, { "epoch": 2.0741366366366365, "grad_norm": 0.9848177659748392, "learning_rate": 2.6319342737933186e-06, "loss": 0.3296, "step": 22102 }, { "epoch": 2.0742304804804803, "grad_norm": 1.0567072763250955, "learning_rate": 2.631453438397977e-06, "loss": 0.308, "step": 22103 }, { "epoch": 2.074324324324324, "grad_norm": 1.126748944034896, "learning_rate": 2.6309726312430957e-06, "loss": 0.2816, "step": 22104 }, { "epoch": 2.0744181681681684, "grad_norm": 0.9544869003703808, "learning_rate": 2.630491852334406e-06, "loss": 0.329, "step": 22105 }, { "epoch": 2.074512012012012, "grad_norm": 1.121567604084186, "learning_rate": 2.630011101677639e-06, "loss": 0.3233, "step": 22106 }, { "epoch": 2.074605855855856, "grad_norm": 1.274480527736388, "learning_rate": 2.6295303792785304e-06, "loss": 0.3406, "step": 22107 }, { "epoch": 2.0746996996997, "grad_norm": 1.3866154045932626, "learning_rate": 2.6290496851428105e-06, "loss": 0.3133, "step": 22108 }, { "epoch": 2.0747935435435436, "grad_norm": 1.2769580939612801, "learning_rate": 2.6285690192762074e-06, "loss": 0.3089, "step": 22109 }, { "epoch": 2.0748873873873874, "grad_norm": 1.061349438771014, "learning_rate": 2.6280883816844577e-06, "loss": 0.2915, "step": 22110 }, { "epoch": 2.0749812312312312, "grad_norm": 1.167102954132297, "learning_rate": 2.6276077723732894e-06, "loss": 0.3286, "step": 22111 }, { "epoch": 2.075075075075075, "grad_norm": 1.394127904700273, "learning_rate": 2.6271271913484307e-06, "loss": 0.3355, "step": 22112 }, { "epoch": 2.075168918918919, "grad_norm": 1.1206997006955715, "learning_rate": 2.6266466386156155e-06, "loss": 0.3592, "step": 22113 }, { "epoch": 2.0752627627627627, "grad_norm": 1.09766834905573, "learning_rate": 2.626166114180572e-06, "loss": 0.3648, "step": 22114 }, { "epoch": 2.0753566066066065, "grad_norm": 1.1021349919900532, "learning_rate": 2.625685618049029e-06, "loss": 0.3665, "step": 22115 }, { "epoch": 2.0754504504504503, "grad_norm": 1.1096997328033495, "learning_rate": 2.625205150226716e-06, "loss": 0.3125, "step": 22116 }, { "epoch": 2.075544294294294, "grad_norm": 1.0349160094189889, "learning_rate": 2.624724710719362e-06, "loss": 0.2646, "step": 22117 }, { "epoch": 2.075638138138138, "grad_norm": 1.0508050158313365, "learning_rate": 2.6242442995326923e-06, "loss": 0.3317, "step": 22118 }, { "epoch": 2.075731981981982, "grad_norm": 1.3932638005583413, "learning_rate": 2.6237639166724393e-06, "loss": 0.2888, "step": 22119 }, { "epoch": 2.075825825825826, "grad_norm": 1.2260535835260233, "learning_rate": 2.623283562144329e-06, "loss": 0.3482, "step": 22120 }, { "epoch": 2.07591966966967, "grad_norm": 1.1351018351778064, "learning_rate": 2.6228032359540866e-06, "loss": 0.3075, "step": 22121 }, { "epoch": 2.0760135135135136, "grad_norm": 28.58162207635378, "learning_rate": 2.6223229381074434e-06, "loss": 0.3236, "step": 22122 }, { "epoch": 2.0761073573573574, "grad_norm": 1.155136319680962, "learning_rate": 2.6218426686101227e-06, "loss": 0.3026, "step": 22123 }, { "epoch": 2.076201201201201, "grad_norm": 1.2370261898972814, "learning_rate": 2.621362427467851e-06, "loss": 0.327, "step": 22124 }, { "epoch": 2.076295045045045, "grad_norm": 1.152864014671706, "learning_rate": 2.620882214686356e-06, "loss": 0.3487, "step": 22125 }, { "epoch": 2.076388888888889, "grad_norm": 1.063749739546915, "learning_rate": 2.620402030271363e-06, "loss": 0.2721, "step": 22126 }, { "epoch": 2.0764827327327327, "grad_norm": 1.2487546472625894, "learning_rate": 2.619921874228596e-06, "loss": 0.3702, "step": 22127 }, { "epoch": 2.0765765765765765, "grad_norm": 1.0709192985104918, "learning_rate": 2.619441746563781e-06, "loss": 0.3072, "step": 22128 }, { "epoch": 2.0766704204204203, "grad_norm": 3.406664790081474, "learning_rate": 2.618961647282642e-06, "loss": 0.3172, "step": 22129 }, { "epoch": 2.076764264264264, "grad_norm": 1.132664577595112, "learning_rate": 2.6184815763909016e-06, "loss": 0.3057, "step": 22130 }, { "epoch": 2.076858108108108, "grad_norm": 0.9759704567841909, "learning_rate": 2.6180015338942878e-06, "loss": 0.3192, "step": 22131 }, { "epoch": 2.076951951951952, "grad_norm": 1.0532246621060506, "learning_rate": 2.6175215197985214e-06, "loss": 0.3392, "step": 22132 }, { "epoch": 2.077045795795796, "grad_norm": 1.1894313708839765, "learning_rate": 2.6170415341093255e-06, "loss": 0.3098, "step": 22133 }, { "epoch": 2.0771396396396398, "grad_norm": 1.0010681098083682, "learning_rate": 2.616561576832425e-06, "loss": 0.3627, "step": 22134 }, { "epoch": 2.0772334834834836, "grad_norm": 0.9609928408659841, "learning_rate": 2.616081647973542e-06, "loss": 0.3025, "step": 22135 }, { "epoch": 2.0773273273273274, "grad_norm": 1.0510690731374834, "learning_rate": 2.6156017475383954e-06, "loss": 0.2838, "step": 22136 }, { "epoch": 2.077421171171171, "grad_norm": 1.0628380234396737, "learning_rate": 2.615121875532712e-06, "loss": 0.3114, "step": 22137 }, { "epoch": 2.077515015015015, "grad_norm": 1.0819818808841482, "learning_rate": 2.6146420319622112e-06, "loss": 0.3294, "step": 22138 }, { "epoch": 2.077608858858859, "grad_norm": 1.1064496828129802, "learning_rate": 2.6141622168326144e-06, "loss": 0.2972, "step": 22139 }, { "epoch": 2.0777027027027026, "grad_norm": 1.1319421856949525, "learning_rate": 2.613682430149642e-06, "loss": 0.2964, "step": 22140 }, { "epoch": 2.0777965465465464, "grad_norm": 1.3387460421558994, "learning_rate": 2.613202671919015e-06, "loss": 0.3279, "step": 22141 }, { "epoch": 2.0778903903903903, "grad_norm": 1.1988939508106806, "learning_rate": 2.612722942146451e-06, "loss": 0.3208, "step": 22142 }, { "epoch": 2.077984234234234, "grad_norm": 0.9996580350959818, "learning_rate": 2.6122432408376752e-06, "loss": 0.3273, "step": 22143 }, { "epoch": 2.078078078078078, "grad_norm": 1.058507330770688, "learning_rate": 2.6117635679984035e-06, "loss": 0.2758, "step": 22144 }, { "epoch": 2.078171921921922, "grad_norm": 1.1511621700090875, "learning_rate": 2.611283923634354e-06, "loss": 0.3105, "step": 22145 }, { "epoch": 2.078265765765766, "grad_norm": 0.90840036182175, "learning_rate": 2.6108043077512497e-06, "loss": 0.3314, "step": 22146 }, { "epoch": 2.0783596096096097, "grad_norm": 1.1673717548229097, "learning_rate": 2.6103247203548066e-06, "loss": 0.3374, "step": 22147 }, { "epoch": 2.0784534534534536, "grad_norm": 1.1608388683496962, "learning_rate": 2.6098451614507416e-06, "loss": 0.3084, "step": 22148 }, { "epoch": 2.0785472972972974, "grad_norm": 1.1930105010182144, "learning_rate": 2.6093656310447758e-06, "loss": 0.3049, "step": 22149 }, { "epoch": 2.078641141141141, "grad_norm": 1.1295795013213663, "learning_rate": 2.6088861291426244e-06, "loss": 0.2486, "step": 22150 }, { "epoch": 2.078734984984985, "grad_norm": 1.0875980508839402, "learning_rate": 2.6084066557500053e-06, "loss": 0.3565, "step": 22151 }, { "epoch": 2.078828828828829, "grad_norm": 1.2464935584614336, "learning_rate": 2.607927210872635e-06, "loss": 0.3249, "step": 22152 }, { "epoch": 2.0789226726726726, "grad_norm": 1.0199721352886992, "learning_rate": 2.6074477945162308e-06, "loss": 0.2993, "step": 22153 }, { "epoch": 2.0790165165165164, "grad_norm": 1.2643200217669215, "learning_rate": 2.606968406686505e-06, "loss": 0.3251, "step": 22154 }, { "epoch": 2.0791103603603602, "grad_norm": 1.1003720824714989, "learning_rate": 2.60648904738918e-06, "loss": 0.3285, "step": 22155 }, { "epoch": 2.079204204204204, "grad_norm": 2.383294438970435, "learning_rate": 2.606009716629967e-06, "loss": 0.2572, "step": 22156 }, { "epoch": 2.079298048048048, "grad_norm": 1.0628546587981456, "learning_rate": 2.6055304144145797e-06, "loss": 0.3189, "step": 22157 }, { "epoch": 2.079391891891892, "grad_norm": 0.9262119144892311, "learning_rate": 2.605051140748738e-06, "loss": 0.3391, "step": 22158 }, { "epoch": 2.079485735735736, "grad_norm": 0.878649286114916, "learning_rate": 2.6045718956381527e-06, "loss": 0.3029, "step": 22159 }, { "epoch": 2.0795795795795797, "grad_norm": 1.1003553309482146, "learning_rate": 2.604092679088536e-06, "loss": 0.2851, "step": 22160 }, { "epoch": 2.0796734234234235, "grad_norm": 1.160116356593127, "learning_rate": 2.6036134911056075e-06, "loss": 0.3206, "step": 22161 }, { "epoch": 2.0797672672672673, "grad_norm": 1.1221773939132016, "learning_rate": 2.6031343316950763e-06, "loss": 0.3329, "step": 22162 }, { "epoch": 2.079861111111111, "grad_norm": 1.0679332229772114, "learning_rate": 2.602655200862657e-06, "loss": 0.2711, "step": 22163 }, { "epoch": 2.079954954954955, "grad_norm": 1.0313647944988706, "learning_rate": 2.602176098614062e-06, "loss": 0.2756, "step": 22164 }, { "epoch": 2.080048798798799, "grad_norm": 1.2617555122537165, "learning_rate": 2.601697024955004e-06, "loss": 0.3017, "step": 22165 }, { "epoch": 2.0801426426426426, "grad_norm": 1.7924496830787897, "learning_rate": 2.601217979891192e-06, "loss": 0.3694, "step": 22166 }, { "epoch": 2.0802364864864864, "grad_norm": 1.2934187919415185, "learning_rate": 2.6007389634283424e-06, "loss": 0.312, "step": 22167 }, { "epoch": 2.08033033033033, "grad_norm": 1.2198261948708755, "learning_rate": 2.6002599755721645e-06, "loss": 0.3337, "step": 22168 }, { "epoch": 2.080424174174174, "grad_norm": 1.1105188902982757, "learning_rate": 2.599781016328368e-06, "loss": 0.3228, "step": 22169 }, { "epoch": 2.080518018018018, "grad_norm": 1.0482054421929352, "learning_rate": 2.5993020857026664e-06, "loss": 0.3334, "step": 22170 }, { "epoch": 2.080611861861862, "grad_norm": 1.5088129168584579, "learning_rate": 2.598823183700768e-06, "loss": 0.3213, "step": 22171 }, { "epoch": 2.080705705705706, "grad_norm": 1.1307706836228615, "learning_rate": 2.598344310328382e-06, "loss": 0.3148, "step": 22172 }, { "epoch": 2.0807995495495497, "grad_norm": 1.2043008034382723, "learning_rate": 2.5978654655912215e-06, "loss": 0.3264, "step": 22173 }, { "epoch": 2.0808933933933935, "grad_norm": 1.0990697379999936, "learning_rate": 2.597386649494994e-06, "loss": 0.293, "step": 22174 }, { "epoch": 2.0809872372372373, "grad_norm": 1.1530725765755006, "learning_rate": 2.5969078620454065e-06, "loss": 0.3385, "step": 22175 }, { "epoch": 2.081081081081081, "grad_norm": 1.0972362165669254, "learning_rate": 2.5964291032481705e-06, "loss": 0.3379, "step": 22176 }, { "epoch": 2.081174924924925, "grad_norm": 1.101300690671893, "learning_rate": 2.5959503731089954e-06, "loss": 0.2913, "step": 22177 }, { "epoch": 2.0812687687687688, "grad_norm": 0.8591907210513002, "learning_rate": 2.595471671633584e-06, "loss": 0.2761, "step": 22178 }, { "epoch": 2.0813626126126126, "grad_norm": 1.1065094268262523, "learning_rate": 2.5949929988276473e-06, "loss": 0.3384, "step": 22179 }, { "epoch": 2.0814564564564564, "grad_norm": 1.1251302472623554, "learning_rate": 2.5945143546968926e-06, "loss": 0.3264, "step": 22180 }, { "epoch": 2.0815503003003, "grad_norm": 2.7723148631457697, "learning_rate": 2.594035739247025e-06, "loss": 0.294, "step": 22181 }, { "epoch": 2.081644144144144, "grad_norm": 1.1312547476612649, "learning_rate": 2.5935571524837533e-06, "loss": 0.3332, "step": 22182 }, { "epoch": 2.081737987987988, "grad_norm": 1.005652009195235, "learning_rate": 2.593078594412784e-06, "loss": 0.2779, "step": 22183 }, { "epoch": 2.0818318318318316, "grad_norm": 1.0386926951932494, "learning_rate": 2.59260006503982e-06, "loss": 0.2996, "step": 22184 }, { "epoch": 2.081925675675676, "grad_norm": 1.3795431371372424, "learning_rate": 2.59212156437057e-06, "loss": 0.3116, "step": 22185 }, { "epoch": 2.0820195195195197, "grad_norm": 1.0901098864119376, "learning_rate": 2.5916430924107376e-06, "loss": 0.3475, "step": 22186 }, { "epoch": 2.0821133633633635, "grad_norm": 1.1552697870164934, "learning_rate": 2.5911646491660268e-06, "loss": 0.3381, "step": 22187 }, { "epoch": 2.0822072072072073, "grad_norm": 1.0996389097464492, "learning_rate": 2.5906862346421457e-06, "loss": 0.2957, "step": 22188 }, { "epoch": 2.082301051051051, "grad_norm": 0.9693243431600638, "learning_rate": 2.590207848844796e-06, "loss": 0.3245, "step": 22189 }, { "epoch": 2.082394894894895, "grad_norm": 1.0749973611401669, "learning_rate": 2.589729491779681e-06, "loss": 0.2842, "step": 22190 }, { "epoch": 2.0824887387387387, "grad_norm": 1.0405866401415895, "learning_rate": 2.5892511634525052e-06, "loss": 0.3438, "step": 22191 }, { "epoch": 2.0825825825825826, "grad_norm": 1.0123758045116045, "learning_rate": 2.588772863868972e-06, "loss": 0.3215, "step": 22192 }, { "epoch": 2.0826764264264264, "grad_norm": 0.9590019010953159, "learning_rate": 2.5882945930347813e-06, "loss": 0.3109, "step": 22193 }, { "epoch": 2.08277027027027, "grad_norm": 1.3385652970980535, "learning_rate": 2.5878163509556398e-06, "loss": 0.3413, "step": 22194 }, { "epoch": 2.082864114114114, "grad_norm": 1.0457627652301802, "learning_rate": 2.5873381376372487e-06, "loss": 0.3103, "step": 22195 }, { "epoch": 2.082957957957958, "grad_norm": 1.503904928793863, "learning_rate": 2.586859953085307e-06, "loss": 0.3154, "step": 22196 }, { "epoch": 2.0830518018018016, "grad_norm": 1.0919254223243586, "learning_rate": 2.5863817973055187e-06, "loss": 0.2854, "step": 22197 }, { "epoch": 2.083145645645646, "grad_norm": 0.9923180417545399, "learning_rate": 2.5859036703035856e-06, "loss": 0.3402, "step": 22198 }, { "epoch": 2.0832394894894897, "grad_norm": 1.466259901540801, "learning_rate": 2.5854255720852046e-06, "loss": 0.2773, "step": 22199 }, { "epoch": 2.0833333333333335, "grad_norm": 1.1020555206700475, "learning_rate": 2.5849475026560806e-06, "loss": 0.282, "step": 22200 }, { "epoch": 2.0834271771771773, "grad_norm": 0.9598241363399, "learning_rate": 2.5844694620219123e-06, "loss": 0.2967, "step": 22201 }, { "epoch": 2.083521021021021, "grad_norm": 1.1422494595576653, "learning_rate": 2.583991450188398e-06, "loss": 0.2988, "step": 22202 }, { "epoch": 2.083614864864865, "grad_norm": 1.060704649863859, "learning_rate": 2.583513467161239e-06, "loss": 0.3545, "step": 22203 }, { "epoch": 2.0837087087087087, "grad_norm": 1.1306851055199219, "learning_rate": 2.5830355129461326e-06, "loss": 0.3705, "step": 22204 }, { "epoch": 2.0838025525525525, "grad_norm": 1.2271685081464607, "learning_rate": 2.582557587548776e-06, "loss": 0.34, "step": 22205 }, { "epoch": 2.0838963963963963, "grad_norm": 1.366723638908469, "learning_rate": 2.582079690974873e-06, "loss": 0.3393, "step": 22206 }, { "epoch": 2.08399024024024, "grad_norm": 1.0003711311179004, "learning_rate": 2.5816018232301176e-06, "loss": 0.3461, "step": 22207 }, { "epoch": 2.084084084084084, "grad_norm": 1.0517625478197359, "learning_rate": 2.5811239843202063e-06, "loss": 0.3069, "step": 22208 }, { "epoch": 2.0841779279279278, "grad_norm": 1.1824106109912391, "learning_rate": 2.5806461742508402e-06, "loss": 0.3188, "step": 22209 }, { "epoch": 2.0842717717717716, "grad_norm": 1.2787425696117933, "learning_rate": 2.5801683930277157e-06, "loss": 0.3116, "step": 22210 }, { "epoch": 2.0843656156156154, "grad_norm": 0.9892598701038817, "learning_rate": 2.5796906406565246e-06, "loss": 0.3162, "step": 22211 }, { "epoch": 2.0844594594594597, "grad_norm": 1.0731367299674357, "learning_rate": 2.5792129171429702e-06, "loss": 0.3303, "step": 22212 }, { "epoch": 2.0845533033033035, "grad_norm": 3.5126975326223375, "learning_rate": 2.578735222492744e-06, "loss": 0.2951, "step": 22213 }, { "epoch": 2.0846471471471473, "grad_norm": 1.0946318208450794, "learning_rate": 2.578257556711544e-06, "loss": 0.2972, "step": 22214 }, { "epoch": 2.084740990990991, "grad_norm": 1.0643804930663083, "learning_rate": 2.5777799198050635e-06, "loss": 0.2828, "step": 22215 }, { "epoch": 2.084834834834835, "grad_norm": 1.086062867380738, "learning_rate": 2.5773023117789975e-06, "loss": 0.3084, "step": 22216 }, { "epoch": 2.0849286786786787, "grad_norm": 1.1304966398391993, "learning_rate": 2.57682473263904e-06, "loss": 0.34, "step": 22217 }, { "epoch": 2.0850225225225225, "grad_norm": 1.1201357999568529, "learning_rate": 2.576347182390888e-06, "loss": 0.3053, "step": 22218 }, { "epoch": 2.0851163663663663, "grad_norm": 1.0810902300861105, "learning_rate": 2.5758696610402338e-06, "loss": 0.3456, "step": 22219 }, { "epoch": 2.08521021021021, "grad_norm": 1.2060761809661553, "learning_rate": 2.5753921685927692e-06, "loss": 0.3571, "step": 22220 }, { "epoch": 2.085304054054054, "grad_norm": 1.0600286172090259, "learning_rate": 2.574914705054191e-06, "loss": 0.284, "step": 22221 }, { "epoch": 2.0853978978978978, "grad_norm": 1.1353407657143648, "learning_rate": 2.57443727043019e-06, "loss": 0.3293, "step": 22222 }, { "epoch": 2.0854917417417416, "grad_norm": 1.1217212931877865, "learning_rate": 2.5739598647264573e-06, "loss": 0.3309, "step": 22223 }, { "epoch": 2.0855855855855854, "grad_norm": 0.9826784867060905, "learning_rate": 2.5734824879486884e-06, "loss": 0.2961, "step": 22224 }, { "epoch": 2.0856794294294296, "grad_norm": 1.005090475070697, "learning_rate": 2.5730051401025733e-06, "loss": 0.3241, "step": 22225 }, { "epoch": 2.0857732732732734, "grad_norm": 1.166524919813105, "learning_rate": 2.572527821193803e-06, "loss": 0.3421, "step": 22226 }, { "epoch": 2.0858671171171173, "grad_norm": 1.0328631461413202, "learning_rate": 2.57205053122807e-06, "loss": 0.2999, "step": 22227 }, { "epoch": 2.085960960960961, "grad_norm": 1.2603857466854325, "learning_rate": 2.571573270211064e-06, "loss": 0.3059, "step": 22228 }, { "epoch": 2.086054804804805, "grad_norm": 1.0523787985359756, "learning_rate": 2.571096038148474e-06, "loss": 0.2895, "step": 22229 }, { "epoch": 2.0861486486486487, "grad_norm": 1.0156705637750454, "learning_rate": 2.5706188350459937e-06, "loss": 0.3007, "step": 22230 }, { "epoch": 2.0862424924924925, "grad_norm": 1.2868718640157761, "learning_rate": 2.570141660909311e-06, "loss": 0.3078, "step": 22231 }, { "epoch": 2.0863363363363363, "grad_norm": 1.0957679949411152, "learning_rate": 2.5696645157441136e-06, "loss": 0.3116, "step": 22232 }, { "epoch": 2.08643018018018, "grad_norm": 1.3853155505643608, "learning_rate": 2.5691873995560933e-06, "loss": 0.3118, "step": 22233 }, { "epoch": 2.086524024024024, "grad_norm": 1.07198696009286, "learning_rate": 2.5687103123509382e-06, "loss": 0.3301, "step": 22234 }, { "epoch": 2.0866178678678677, "grad_norm": 1.1535631444704706, "learning_rate": 2.5682332541343343e-06, "loss": 0.3261, "step": 22235 }, { "epoch": 2.0867117117117115, "grad_norm": 1.0646084109465528, "learning_rate": 2.5677562249119735e-06, "loss": 0.3221, "step": 22236 }, { "epoch": 2.0868055555555554, "grad_norm": 5.272808509913023, "learning_rate": 2.567279224689541e-06, "loss": 0.3036, "step": 22237 }, { "epoch": 2.0868993993993996, "grad_norm": 1.0919044696868718, "learning_rate": 2.5668022534727254e-06, "loss": 0.3111, "step": 22238 }, { "epoch": 2.0869932432432434, "grad_norm": 1.147449990529424, "learning_rate": 2.566325311267212e-06, "loss": 0.3152, "step": 22239 }, { "epoch": 2.0870870870870872, "grad_norm": 1.2608507261296058, "learning_rate": 2.565848398078689e-06, "loss": 0.3102, "step": 22240 }, { "epoch": 2.087180930930931, "grad_norm": 0.9892996490257099, "learning_rate": 2.5653715139128397e-06, "loss": 0.3038, "step": 22241 }, { "epoch": 2.087274774774775, "grad_norm": 0.9570445129908008, "learning_rate": 2.564894658775355e-06, "loss": 0.3158, "step": 22242 }, { "epoch": 2.0873686186186187, "grad_norm": 1.0808188812279944, "learning_rate": 2.564417832671917e-06, "loss": 0.314, "step": 22243 }, { "epoch": 2.0874624624624625, "grad_norm": 1.0434653054851923, "learning_rate": 2.5639410356082108e-06, "loss": 0.3429, "step": 22244 }, { "epoch": 2.0875563063063063, "grad_norm": 1.2279755365007363, "learning_rate": 2.5634642675899235e-06, "loss": 0.3136, "step": 22245 }, { "epoch": 2.08765015015015, "grad_norm": 1.2869084707152436, "learning_rate": 2.562987528622739e-06, "loss": 0.313, "step": 22246 }, { "epoch": 2.087743993993994, "grad_norm": 1.0364157755783667, "learning_rate": 2.5625108187123383e-06, "loss": 0.2898, "step": 22247 }, { "epoch": 2.0878378378378377, "grad_norm": 0.9720862236322612, "learning_rate": 2.562034137864411e-06, "loss": 0.273, "step": 22248 }, { "epoch": 2.0879316816816815, "grad_norm": 1.2571319042985807, "learning_rate": 2.561557486084637e-06, "loss": 0.3176, "step": 22249 }, { "epoch": 2.0880255255255253, "grad_norm": 1.0904475237051032, "learning_rate": 2.5610808633787e-06, "loss": 0.3113, "step": 22250 }, { "epoch": 2.0881193693693696, "grad_norm": 1.3798038217003104, "learning_rate": 2.5606042697522832e-06, "loss": 0.3121, "step": 22251 }, { "epoch": 2.0882132132132134, "grad_norm": 1.2224307603358004, "learning_rate": 2.5601277052110686e-06, "loss": 0.2935, "step": 22252 }, { "epoch": 2.088307057057057, "grad_norm": 1.143317168064413, "learning_rate": 2.559651169760737e-06, "loss": 0.3295, "step": 22253 }, { "epoch": 2.088400900900901, "grad_norm": 1.1231965700138342, "learning_rate": 2.559174663406973e-06, "loss": 0.3232, "step": 22254 }, { "epoch": 2.088494744744745, "grad_norm": 1.0198632284139757, "learning_rate": 2.5586981861554574e-06, "loss": 0.3291, "step": 22255 }, { "epoch": 2.0885885885885886, "grad_norm": 1.0480178982464365, "learning_rate": 2.558221738011868e-06, "loss": 0.3016, "step": 22256 }, { "epoch": 2.0886824324324325, "grad_norm": 1.3491274556320954, "learning_rate": 2.5577453189818907e-06, "loss": 0.3006, "step": 22257 }, { "epoch": 2.0887762762762763, "grad_norm": 1.0520680235961106, "learning_rate": 2.557268929071203e-06, "loss": 0.3267, "step": 22258 }, { "epoch": 2.08887012012012, "grad_norm": 1.38593110977328, "learning_rate": 2.556792568285484e-06, "loss": 0.3291, "step": 22259 }, { "epoch": 2.088963963963964, "grad_norm": 1.262190052271723, "learning_rate": 2.556316236630416e-06, "loss": 0.3069, "step": 22260 }, { "epoch": 2.0890578078078077, "grad_norm": 1.416826781737408, "learning_rate": 2.555839934111678e-06, "loss": 0.3314, "step": 22261 }, { "epoch": 2.0891516516516515, "grad_norm": 1.262717418277214, "learning_rate": 2.5553636607349476e-06, "loss": 0.3415, "step": 22262 }, { "epoch": 2.0892454954954953, "grad_norm": 0.9522366272611924, "learning_rate": 2.5548874165059035e-06, "loss": 0.3499, "step": 22263 }, { "epoch": 2.089339339339339, "grad_norm": 1.158280385085133, "learning_rate": 2.554411201430225e-06, "loss": 0.3334, "step": 22264 }, { "epoch": 2.0894331831831834, "grad_norm": 1.268476782377248, "learning_rate": 2.5539350155135877e-06, "loss": 0.2925, "step": 22265 }, { "epoch": 2.089527027027027, "grad_norm": 1.0640390001856963, "learning_rate": 2.5534588587616726e-06, "loss": 0.3145, "step": 22266 }, { "epoch": 2.089620870870871, "grad_norm": 1.262821017179417, "learning_rate": 2.552982731180156e-06, "loss": 0.334, "step": 22267 }, { "epoch": 2.089714714714715, "grad_norm": 1.1966080183224952, "learning_rate": 2.552506632774713e-06, "loss": 0.3419, "step": 22268 }, { "epoch": 2.0898085585585586, "grad_norm": 1.0415187067805076, "learning_rate": 2.5520305635510223e-06, "loss": 0.3259, "step": 22269 }, { "epoch": 2.0899024024024024, "grad_norm": 1.2111845170503577, "learning_rate": 2.5515545235147594e-06, "loss": 0.3195, "step": 22270 }, { "epoch": 2.0899962462462462, "grad_norm": 0.9301577966223944, "learning_rate": 2.551078512671598e-06, "loss": 0.3307, "step": 22271 }, { "epoch": 2.09009009009009, "grad_norm": 1.2826440712514047, "learning_rate": 2.5506025310272183e-06, "loss": 0.3188, "step": 22272 }, { "epoch": 2.090183933933934, "grad_norm": 1.3893696055957585, "learning_rate": 2.5501265785872924e-06, "loss": 0.2818, "step": 22273 }, { "epoch": 2.0902777777777777, "grad_norm": 1.774759785355498, "learning_rate": 2.5496506553574964e-06, "loss": 0.2917, "step": 22274 }, { "epoch": 2.0903716216216215, "grad_norm": 0.9826130176677643, "learning_rate": 2.549174761343503e-06, "loss": 0.3631, "step": 22275 }, { "epoch": 2.0904654654654653, "grad_norm": 0.9383214538965166, "learning_rate": 2.548698896550988e-06, "loss": 0.297, "step": 22276 }, { "epoch": 2.090559309309309, "grad_norm": 1.6877018791604685, "learning_rate": 2.548223060985622e-06, "loss": 0.3084, "step": 22277 }, { "epoch": 2.0906531531531534, "grad_norm": 0.8391059774411136, "learning_rate": 2.5477472546530834e-06, "loss": 0.2323, "step": 22278 }, { "epoch": 2.090746996996997, "grad_norm": 1.494414031514454, "learning_rate": 2.5472714775590434e-06, "loss": 0.3142, "step": 22279 }, { "epoch": 2.090840840840841, "grad_norm": 1.0025155055147936, "learning_rate": 2.5467957297091715e-06, "loss": 0.3061, "step": 22280 }, { "epoch": 2.090934684684685, "grad_norm": 1.0603121106412627, "learning_rate": 2.546320011109145e-06, "loss": 0.3533, "step": 22281 }, { "epoch": 2.0910285285285286, "grad_norm": 1.1353089465287145, "learning_rate": 2.545844321764634e-06, "loss": 0.2725, "step": 22282 }, { "epoch": 2.0911223723723724, "grad_norm": 0.9364394973465998, "learning_rate": 2.545368661681308e-06, "loss": 0.2956, "step": 22283 }, { "epoch": 2.0912162162162162, "grad_norm": 3.353084983570719, "learning_rate": 2.544893030864842e-06, "loss": 0.3008, "step": 22284 }, { "epoch": 2.09131006006006, "grad_norm": 1.1007694556120182, "learning_rate": 2.544417429320906e-06, "loss": 0.3419, "step": 22285 }, { "epoch": 2.091403903903904, "grad_norm": 1.1325209634023345, "learning_rate": 2.543941857055169e-06, "loss": 0.2774, "step": 22286 }, { "epoch": 2.0914977477477477, "grad_norm": 1.087284646678366, "learning_rate": 2.543466314073303e-06, "loss": 0.3377, "step": 22287 }, { "epoch": 2.0915915915915915, "grad_norm": 1.008351084236317, "learning_rate": 2.5429908003809765e-06, "loss": 0.3595, "step": 22288 }, { "epoch": 2.0916854354354353, "grad_norm": 1.1379252044615444, "learning_rate": 2.542515315983859e-06, "loss": 0.3399, "step": 22289 }, { "epoch": 2.091779279279279, "grad_norm": 1.2232523786696945, "learning_rate": 2.5420398608876216e-06, "loss": 0.3205, "step": 22290 }, { "epoch": 2.091873123123123, "grad_norm": 1.0367223390353577, "learning_rate": 2.541564435097933e-06, "loss": 0.3076, "step": 22291 }, { "epoch": 2.091966966966967, "grad_norm": 1.1094953714705682, "learning_rate": 2.5410890386204588e-06, "loss": 0.3136, "step": 22292 }, { "epoch": 2.092060810810811, "grad_norm": 1.1796512282224925, "learning_rate": 2.540613671460871e-06, "loss": 0.2848, "step": 22293 }, { "epoch": 2.0921546546546548, "grad_norm": 1.105852378579352, "learning_rate": 2.5401383336248364e-06, "loss": 0.2851, "step": 22294 }, { "epoch": 2.0922484984984986, "grad_norm": 1.0825603711100689, "learning_rate": 2.5396630251180197e-06, "loss": 0.3458, "step": 22295 }, { "epoch": 2.0923423423423424, "grad_norm": 1.0723748368783386, "learning_rate": 2.5391877459460923e-06, "loss": 0.2848, "step": 22296 }, { "epoch": 2.092436186186186, "grad_norm": 1.2423959355296599, "learning_rate": 2.538712496114719e-06, "loss": 0.331, "step": 22297 }, { "epoch": 2.09253003003003, "grad_norm": 1.1966567008762365, "learning_rate": 2.5382372756295648e-06, "loss": 0.3122, "step": 22298 }, { "epoch": 2.092623873873874, "grad_norm": 0.8569975482885125, "learning_rate": 2.537762084496298e-06, "loss": 0.3022, "step": 22299 }, { "epoch": 2.0927177177177176, "grad_norm": 1.1153512192880322, "learning_rate": 2.5372869227205866e-06, "loss": 0.3149, "step": 22300 }, { "epoch": 2.0928115615615615, "grad_norm": 1.177652619476488, "learning_rate": 2.536811790308089e-06, "loss": 0.3059, "step": 22301 }, { "epoch": 2.0929054054054053, "grad_norm": 1.4211312956813444, "learning_rate": 2.5363366872644767e-06, "loss": 0.3153, "step": 22302 }, { "epoch": 2.092999249249249, "grad_norm": 1.219655597576915, "learning_rate": 2.5358616135954114e-06, "loss": 0.2666, "step": 22303 }, { "epoch": 2.093093093093093, "grad_norm": 1.0484229815697739, "learning_rate": 2.535386569306556e-06, "loss": 0.3384, "step": 22304 }, { "epoch": 2.093186936936937, "grad_norm": 1.4445542758029117, "learning_rate": 2.534911554403579e-06, "loss": 0.2688, "step": 22305 }, { "epoch": 2.093280780780781, "grad_norm": 1.4498971132548089, "learning_rate": 2.534436568892141e-06, "loss": 0.3136, "step": 22306 }, { "epoch": 2.0933746246246248, "grad_norm": 1.193479553658502, "learning_rate": 2.5339616127779056e-06, "loss": 0.3523, "step": 22307 }, { "epoch": 2.0934684684684686, "grad_norm": 1.031528012425399, "learning_rate": 2.5334866860665363e-06, "loss": 0.3065, "step": 22308 }, { "epoch": 2.0935623123123124, "grad_norm": 1.153420408156791, "learning_rate": 2.533011788763696e-06, "loss": 0.2577, "step": 22309 }, { "epoch": 2.093656156156156, "grad_norm": 0.9772869797414278, "learning_rate": 2.532536920875045e-06, "loss": 0.2911, "step": 22310 }, { "epoch": 2.09375, "grad_norm": 1.1344613347025612, "learning_rate": 2.532062082406249e-06, "loss": 0.2973, "step": 22311 }, { "epoch": 2.093843843843844, "grad_norm": 1.1198145116089664, "learning_rate": 2.5315872733629667e-06, "loss": 0.389, "step": 22312 }, { "epoch": 2.0939376876876876, "grad_norm": 0.9520384961729585, "learning_rate": 2.5311124937508597e-06, "loss": 0.383, "step": 22313 }, { "epoch": 2.0940315315315314, "grad_norm": 1.2010766649619435, "learning_rate": 2.530637743575589e-06, "loss": 0.3237, "step": 22314 }, { "epoch": 2.0941253753753752, "grad_norm": 1.0057243938324014, "learning_rate": 2.5301630228428154e-06, "loss": 0.292, "step": 22315 }, { "epoch": 2.094219219219219, "grad_norm": 1.1125875303011787, "learning_rate": 2.5296883315581966e-06, "loss": 0.2991, "step": 22316 }, { "epoch": 2.094313063063063, "grad_norm": 1.1113835144677087, "learning_rate": 2.5292136697273965e-06, "loss": 0.3492, "step": 22317 }, { "epoch": 2.094406906906907, "grad_norm": 0.9781097650105242, "learning_rate": 2.528739037356073e-06, "loss": 0.2593, "step": 22318 }, { "epoch": 2.094500750750751, "grad_norm": 0.994787279351381, "learning_rate": 2.5282644344498824e-06, "loss": 0.2761, "step": 22319 }, { "epoch": 2.0945945945945947, "grad_norm": 1.2504916259177299, "learning_rate": 2.5277898610144878e-06, "loss": 0.3308, "step": 22320 }, { "epoch": 2.0946884384384385, "grad_norm": 1.083813676119803, "learning_rate": 2.5273153170555466e-06, "loss": 0.3357, "step": 22321 }, { "epoch": 2.0947822822822824, "grad_norm": 1.0246057315284043, "learning_rate": 2.526840802578713e-06, "loss": 0.3525, "step": 22322 }, { "epoch": 2.094876126126126, "grad_norm": 1.1121689544284221, "learning_rate": 2.526366317589649e-06, "loss": 0.2714, "step": 22323 }, { "epoch": 2.09496996996997, "grad_norm": 1.2612506567116153, "learning_rate": 2.525891862094012e-06, "loss": 0.2917, "step": 22324 }, { "epoch": 2.095063813813814, "grad_norm": 1.4655336966498314, "learning_rate": 2.5254174360974564e-06, "loss": 0.3148, "step": 22325 }, { "epoch": 2.0951576576576576, "grad_norm": 0.9295307576198447, "learning_rate": 2.5249430396056405e-06, "loss": 0.3371, "step": 22326 }, { "epoch": 2.0952515015015014, "grad_norm": 1.3149576195830217, "learning_rate": 2.5244686726242197e-06, "loss": 0.3505, "step": 22327 }, { "epoch": 2.0953453453453452, "grad_norm": 0.9620952413793873, "learning_rate": 2.5239943351588485e-06, "loss": 0.2945, "step": 22328 }, { "epoch": 2.095439189189189, "grad_norm": 1.0527277290345267, "learning_rate": 2.523520027215186e-06, "loss": 0.2851, "step": 22329 }, { "epoch": 2.095533033033033, "grad_norm": 1.738646810072046, "learning_rate": 2.5230457487988856e-06, "loss": 0.3135, "step": 22330 }, { "epoch": 2.095626876876877, "grad_norm": 1.2969315148588563, "learning_rate": 2.522571499915601e-06, "loss": 0.3301, "step": 22331 }, { "epoch": 2.095720720720721, "grad_norm": 1.0300629472988296, "learning_rate": 2.52209728057099e-06, "loss": 0.2906, "step": 22332 }, { "epoch": 2.0958145645645647, "grad_norm": 1.136595132927592, "learning_rate": 2.521623090770704e-06, "loss": 0.3037, "step": 22333 }, { "epoch": 2.0959084084084085, "grad_norm": 0.9492104790630589, "learning_rate": 2.5211489305203956e-06, "loss": 0.3077, "step": 22334 }, { "epoch": 2.0960022522522523, "grad_norm": 4.894591116117125, "learning_rate": 2.5206747998257225e-06, "loss": 0.2806, "step": 22335 }, { "epoch": 2.096096096096096, "grad_norm": 1.0179802477981341, "learning_rate": 2.5202006986923353e-06, "loss": 0.3471, "step": 22336 }, { "epoch": 2.09618993993994, "grad_norm": 1.4959061345076634, "learning_rate": 2.5197266271258876e-06, "loss": 0.2716, "step": 22337 }, { "epoch": 2.0962837837837838, "grad_norm": 5.606720114554673, "learning_rate": 2.5192525851320304e-06, "loss": 0.3347, "step": 22338 }, { "epoch": 2.0963776276276276, "grad_norm": 0.9982407178197034, "learning_rate": 2.518778572716417e-06, "loss": 0.3058, "step": 22339 }, { "epoch": 2.0964714714714714, "grad_norm": 1.0964887595619661, "learning_rate": 2.518304589884697e-06, "loss": 0.3713, "step": 22340 }, { "epoch": 2.096565315315315, "grad_norm": 1.1235476695259796, "learning_rate": 2.5178306366425253e-06, "loss": 0.3632, "step": 22341 }, { "epoch": 2.096659159159159, "grad_norm": 1.1597117070363658, "learning_rate": 2.517356712995551e-06, "loss": 0.3423, "step": 22342 }, { "epoch": 2.096753003003003, "grad_norm": 1.0160758533414056, "learning_rate": 2.5168828189494233e-06, "loss": 0.3432, "step": 22343 }, { "epoch": 2.0968468468468466, "grad_norm": 1.0210772011458757, "learning_rate": 2.516408954509795e-06, "loss": 0.3031, "step": 22344 }, { "epoch": 2.096940690690691, "grad_norm": 1.0996372472661713, "learning_rate": 2.5159351196823155e-06, "loss": 0.2828, "step": 22345 }, { "epoch": 2.0970345345345347, "grad_norm": 5.929442393060678, "learning_rate": 2.5154613144726325e-06, "loss": 0.3648, "step": 22346 }, { "epoch": 2.0971283783783785, "grad_norm": 1.2215891314593816, "learning_rate": 2.514987538886398e-06, "loss": 0.3285, "step": 22347 }, { "epoch": 2.0972222222222223, "grad_norm": 0.9570495195921932, "learning_rate": 2.5145137929292594e-06, "loss": 0.3286, "step": 22348 }, { "epoch": 2.097316066066066, "grad_norm": 1.0629946759948292, "learning_rate": 2.514040076606866e-06, "loss": 0.2871, "step": 22349 }, { "epoch": 2.09740990990991, "grad_norm": 1.126579516233947, "learning_rate": 2.5135663899248654e-06, "loss": 0.2865, "step": 22350 }, { "epoch": 2.0975037537537538, "grad_norm": 0.9977566018148712, "learning_rate": 2.5130927328889044e-06, "loss": 0.3176, "step": 22351 }, { "epoch": 2.0975975975975976, "grad_norm": 1.0861332372786459, "learning_rate": 2.51261910550463e-06, "loss": 0.2844, "step": 22352 }, { "epoch": 2.0976914414414414, "grad_norm": 1.1662478867784227, "learning_rate": 2.5121455077776928e-06, "loss": 0.31, "step": 22353 }, { "epoch": 2.097785285285285, "grad_norm": 1.0821355303023825, "learning_rate": 2.511671939713738e-06, "loss": 0.3031, "step": 22354 }, { "epoch": 2.097879129129129, "grad_norm": 1.0789486768179435, "learning_rate": 2.511198401318409e-06, "loss": 0.3105, "step": 22355 }, { "epoch": 2.097972972972973, "grad_norm": 1.1259621779408011, "learning_rate": 2.510724892597356e-06, "loss": 0.3338, "step": 22356 }, { "epoch": 2.0980668168168166, "grad_norm": 1.2117573744666483, "learning_rate": 2.5102514135562234e-06, "loss": 0.3117, "step": 22357 }, { "epoch": 2.098160660660661, "grad_norm": 1.011836494551039, "learning_rate": 2.5097779642006547e-06, "loss": 0.3024, "step": 22358 }, { "epoch": 2.0982545045045047, "grad_norm": 1.2770619316047385, "learning_rate": 2.5093045445362975e-06, "loss": 0.3331, "step": 22359 }, { "epoch": 2.0983483483483485, "grad_norm": 0.9318357111091037, "learning_rate": 2.508831154568796e-06, "loss": 0.3102, "step": 22360 }, { "epoch": 2.0984421921921923, "grad_norm": 1.1289840213410203, "learning_rate": 2.508357794303794e-06, "loss": 0.2869, "step": 22361 }, { "epoch": 2.098536036036036, "grad_norm": 1.130741857201762, "learning_rate": 2.5078844637469346e-06, "loss": 0.353, "step": 22362 }, { "epoch": 2.09862987987988, "grad_norm": 1.0774861606940098, "learning_rate": 2.507411162903862e-06, "loss": 0.2881, "step": 22363 }, { "epoch": 2.0987237237237237, "grad_norm": 1.0648437334579839, "learning_rate": 2.5069378917802186e-06, "loss": 0.3316, "step": 22364 }, { "epoch": 2.0988175675675675, "grad_norm": 1.2981569180200911, "learning_rate": 2.5064646503816493e-06, "loss": 0.3288, "step": 22365 }, { "epoch": 2.0989114114114114, "grad_norm": 1.1069957261076202, "learning_rate": 2.505991438713795e-06, "loss": 0.326, "step": 22366 }, { "epoch": 2.099005255255255, "grad_norm": 1.1585418460033878, "learning_rate": 2.5055182567822973e-06, "loss": 0.3069, "step": 22367 }, { "epoch": 2.099099099099099, "grad_norm": 1.0601270904771922, "learning_rate": 2.5050451045928005e-06, "loss": 0.2696, "step": 22368 }, { "epoch": 2.099192942942943, "grad_norm": 1.2300583299970553, "learning_rate": 2.5045719821509446e-06, "loss": 0.2996, "step": 22369 }, { "epoch": 2.0992867867867866, "grad_norm": 2.4939044244705615, "learning_rate": 2.5040988894623686e-06, "loss": 0.2713, "step": 22370 }, { "epoch": 2.0993806306306304, "grad_norm": 1.045392060208906, "learning_rate": 2.503625826532717e-06, "loss": 0.3105, "step": 22371 }, { "epoch": 2.0994744744744747, "grad_norm": 0.9280277143920808, "learning_rate": 2.503152793367629e-06, "loss": 0.327, "step": 22372 }, { "epoch": 2.0995683183183185, "grad_norm": 1.1421479003423023, "learning_rate": 2.502679789972744e-06, "loss": 0.3389, "step": 22373 }, { "epoch": 2.0996621621621623, "grad_norm": 1.0679034364666, "learning_rate": 2.5022068163537016e-06, "loss": 0.3336, "step": 22374 }, { "epoch": 2.099756006006006, "grad_norm": 1.0974162434836352, "learning_rate": 2.5017338725161422e-06, "loss": 0.3239, "step": 22375 }, { "epoch": 2.09984984984985, "grad_norm": 1.0533192148737198, "learning_rate": 2.501260958465701e-06, "loss": 0.3013, "step": 22376 }, { "epoch": 2.0999436936936937, "grad_norm": 0.9920719042683517, "learning_rate": 2.5007880742080215e-06, "loss": 0.3341, "step": 22377 }, { "epoch": 2.1000375375375375, "grad_norm": 1.1473020897000916, "learning_rate": 2.50031521974874e-06, "loss": 0.2673, "step": 22378 }, { "epoch": 2.1001313813813813, "grad_norm": 1.0186509301858557, "learning_rate": 2.4998423950934925e-06, "loss": 0.324, "step": 22379 }, { "epoch": 2.100225225225225, "grad_norm": 1.0810342621950968, "learning_rate": 2.49936960024792e-06, "loss": 0.3269, "step": 22380 }, { "epoch": 2.100319069069069, "grad_norm": 1.0897654646770423, "learning_rate": 2.4988968352176586e-06, "loss": 0.3352, "step": 22381 }, { "epoch": 2.1004129129129128, "grad_norm": 1.1411506958442852, "learning_rate": 2.498424100008342e-06, "loss": 0.3454, "step": 22382 }, { "epoch": 2.1005067567567566, "grad_norm": 1.6204331674777501, "learning_rate": 2.497951394625612e-06, "loss": 0.3212, "step": 22383 }, { "epoch": 2.1006006006006004, "grad_norm": 1.8363731453130683, "learning_rate": 2.497478719075101e-06, "loss": 0.3245, "step": 22384 }, { "epoch": 2.1006944444444446, "grad_norm": 1.192152162177911, "learning_rate": 2.497006073362446e-06, "loss": 0.3368, "step": 22385 }, { "epoch": 2.1007882882882885, "grad_norm": 1.0417224104637766, "learning_rate": 2.4965334574932826e-06, "loss": 0.3075, "step": 22386 }, { "epoch": 2.1008821321321323, "grad_norm": 1.0979548854122974, "learning_rate": 2.4960608714732455e-06, "loss": 0.3551, "step": 22387 }, { "epoch": 2.100975975975976, "grad_norm": 0.9618821234185526, "learning_rate": 2.4955883153079676e-06, "loss": 0.2671, "step": 22388 }, { "epoch": 2.10106981981982, "grad_norm": 1.164691012104861, "learning_rate": 2.495115789003086e-06, "loss": 0.3181, "step": 22389 }, { "epoch": 2.1011636636636637, "grad_norm": 1.1267376374052125, "learning_rate": 2.4946432925642346e-06, "loss": 0.3392, "step": 22390 }, { "epoch": 2.1012575075075075, "grad_norm": 1.34907936911711, "learning_rate": 2.494170825997043e-06, "loss": 0.2722, "step": 22391 }, { "epoch": 2.1013513513513513, "grad_norm": 1.1687238497731471, "learning_rate": 2.49369838930715e-06, "loss": 0.3319, "step": 22392 }, { "epoch": 2.101445195195195, "grad_norm": 1.8062229004830601, "learning_rate": 2.493225982500187e-06, "loss": 0.3639, "step": 22393 }, { "epoch": 2.101539039039039, "grad_norm": 1.2028665500395035, "learning_rate": 2.492753605581783e-06, "loss": 0.297, "step": 22394 }, { "epoch": 2.1016328828828827, "grad_norm": 0.968129680203435, "learning_rate": 2.4922812585575745e-06, "loss": 0.2917, "step": 22395 }, { "epoch": 2.1017267267267266, "grad_norm": 1.2175609311830864, "learning_rate": 2.491808941433192e-06, "loss": 0.3195, "step": 22396 }, { "epoch": 2.1018205705705704, "grad_norm": 1.1794054593653132, "learning_rate": 2.491336654214267e-06, "loss": 0.3472, "step": 22397 }, { "epoch": 2.1019144144144146, "grad_norm": 1.2754159748197311, "learning_rate": 2.49086439690643e-06, "loss": 0.3521, "step": 22398 }, { "epoch": 2.1020082582582584, "grad_norm": 1.0285558670033321, "learning_rate": 2.490392169515312e-06, "loss": 0.3567, "step": 22399 }, { "epoch": 2.1021021021021022, "grad_norm": 1.1033797877089682, "learning_rate": 2.4899199720465416e-06, "loss": 0.3141, "step": 22400 }, { "epoch": 2.102195945945946, "grad_norm": 2.2369396149436502, "learning_rate": 2.489447804505753e-06, "loss": 0.3314, "step": 22401 }, { "epoch": 2.10228978978979, "grad_norm": 0.939849446327619, "learning_rate": 2.488975666898573e-06, "loss": 0.2972, "step": 22402 }, { "epoch": 2.1023836336336337, "grad_norm": 1.1163966048407572, "learning_rate": 2.4885035592306304e-06, "loss": 0.3534, "step": 22403 }, { "epoch": 2.1024774774774775, "grad_norm": 1.0080316351912824, "learning_rate": 2.4880314815075573e-06, "loss": 0.3058, "step": 22404 }, { "epoch": 2.1025713213213213, "grad_norm": 0.9933369098742904, "learning_rate": 2.4875594337349805e-06, "loss": 0.2978, "step": 22405 }, { "epoch": 2.102665165165165, "grad_norm": 1.3475384499080365, "learning_rate": 2.487087415918526e-06, "loss": 0.2963, "step": 22406 }, { "epoch": 2.102759009009009, "grad_norm": 0.9220433080584665, "learning_rate": 2.4866154280638266e-06, "loss": 0.309, "step": 22407 }, { "epoch": 2.1028528528528527, "grad_norm": 1.0158955756399066, "learning_rate": 2.4861434701765066e-06, "loss": 0.3459, "step": 22408 }, { "epoch": 2.1029466966966965, "grad_norm": 1.1346219903757229, "learning_rate": 2.485671542262194e-06, "loss": 0.3028, "step": 22409 }, { "epoch": 2.1030405405405403, "grad_norm": 1.1504435668353339, "learning_rate": 2.485199644326516e-06, "loss": 0.3093, "step": 22410 }, { "epoch": 2.1031343843843846, "grad_norm": 0.9401136566893661, "learning_rate": 2.484727776375098e-06, "loss": 0.3114, "step": 22411 }, { "epoch": 2.1032282282282284, "grad_norm": 1.0151310061501084, "learning_rate": 2.4842559384135652e-06, "loss": 0.3093, "step": 22412 }, { "epoch": 2.1033220720720722, "grad_norm": 0.994274906845867, "learning_rate": 2.483784130447547e-06, "loss": 0.3145, "step": 22413 }, { "epoch": 2.103415915915916, "grad_norm": 1.0830055220555543, "learning_rate": 2.4833123524826664e-06, "loss": 0.3272, "step": 22414 }, { "epoch": 2.10350975975976, "grad_norm": 1.174740121066524, "learning_rate": 2.482840604524547e-06, "loss": 0.3155, "step": 22415 }, { "epoch": 2.1036036036036037, "grad_norm": 1.2204297464857157, "learning_rate": 2.482368886578817e-06, "loss": 0.3266, "step": 22416 }, { "epoch": 2.1036974474474475, "grad_norm": 1.0570699670523458, "learning_rate": 2.481897198651099e-06, "loss": 0.303, "step": 22417 }, { "epoch": 2.1037912912912913, "grad_norm": 0.9981168732923545, "learning_rate": 2.4814255407470157e-06, "loss": 0.3104, "step": 22418 }, { "epoch": 2.103885135135135, "grad_norm": 1.0929835262843064, "learning_rate": 2.480953912872194e-06, "loss": 0.3375, "step": 22419 }, { "epoch": 2.103978978978979, "grad_norm": 1.1543988897112747, "learning_rate": 2.4804823150322547e-06, "loss": 0.3289, "step": 22420 }, { "epoch": 2.1040728228228227, "grad_norm": 1.193408465827726, "learning_rate": 2.48001074723282e-06, "loss": 0.3135, "step": 22421 }, { "epoch": 2.1041666666666665, "grad_norm": 0.9815811177523838, "learning_rate": 2.479539209479515e-06, "loss": 0.3024, "step": 22422 }, { "epoch": 2.1042605105105103, "grad_norm": 1.4743806113408549, "learning_rate": 2.479067701777963e-06, "loss": 0.3227, "step": 22423 }, { "epoch": 2.104354354354354, "grad_norm": 1.2028464128718785, "learning_rate": 2.4785962241337797e-06, "loss": 0.3036, "step": 22424 }, { "epoch": 2.1044481981981984, "grad_norm": 1.444326390041782, "learning_rate": 2.478124776552592e-06, "loss": 0.3139, "step": 22425 }, { "epoch": 2.104542042042042, "grad_norm": 1.190556136559907, "learning_rate": 2.4776533590400197e-06, "loss": 0.3379, "step": 22426 }, { "epoch": 2.104635885885886, "grad_norm": 1.0991959531937967, "learning_rate": 2.477181971601681e-06, "loss": 0.3094, "step": 22427 }, { "epoch": 2.10472972972973, "grad_norm": 1.0530998298343726, "learning_rate": 2.4767106142432006e-06, "loss": 0.3551, "step": 22428 }, { "epoch": 2.1048235735735736, "grad_norm": 0.9890098554041769, "learning_rate": 2.4762392869701967e-06, "loss": 0.3608, "step": 22429 }, { "epoch": 2.1049174174174174, "grad_norm": 1.2660180955797287, "learning_rate": 2.475767989788287e-06, "loss": 0.3442, "step": 22430 }, { "epoch": 2.1050112612612613, "grad_norm": 1.1841875314569645, "learning_rate": 2.4752967227030945e-06, "loss": 0.2865, "step": 22431 }, { "epoch": 2.105105105105105, "grad_norm": 1.064004861528635, "learning_rate": 2.4748254857202364e-06, "loss": 0.3403, "step": 22432 }, { "epoch": 2.105198948948949, "grad_norm": 1.2468897004407815, "learning_rate": 2.474354278845329e-06, "loss": 0.2972, "step": 22433 }, { "epoch": 2.1052927927927927, "grad_norm": 1.2228270262092986, "learning_rate": 2.473883102083995e-06, "loss": 0.2758, "step": 22434 }, { "epoch": 2.1053866366366365, "grad_norm": 1.286597975426864, "learning_rate": 2.4734119554418494e-06, "loss": 0.3345, "step": 22435 }, { "epoch": 2.1054804804804803, "grad_norm": 1.1964032651374716, "learning_rate": 2.4729408389245112e-06, "loss": 0.2832, "step": 22436 }, { "epoch": 2.105574324324324, "grad_norm": 1.0918354171601976, "learning_rate": 2.472469752537596e-06, "loss": 0.4012, "step": 22437 }, { "epoch": 2.1056681681681684, "grad_norm": 1.280024737972016, "learning_rate": 2.4719986962867225e-06, "loss": 0.2918, "step": 22438 }, { "epoch": 2.105762012012012, "grad_norm": 1.4323014198485078, "learning_rate": 2.4715276701775036e-06, "loss": 0.2962, "step": 22439 }, { "epoch": 2.105855855855856, "grad_norm": 1.132017611959832, "learning_rate": 2.4710566742155594e-06, "loss": 0.2985, "step": 22440 }, { "epoch": 2.1059496996997, "grad_norm": 1.846911505047233, "learning_rate": 2.470585708406505e-06, "loss": 0.3055, "step": 22441 }, { "epoch": 2.1060435435435436, "grad_norm": 1.3218846110540619, "learning_rate": 2.470114772755952e-06, "loss": 0.2954, "step": 22442 }, { "epoch": 2.1061373873873874, "grad_norm": 1.061686411386594, "learning_rate": 2.4696438672695213e-06, "loss": 0.2783, "step": 22443 }, { "epoch": 2.1062312312312312, "grad_norm": 1.3387104909109881, "learning_rate": 2.4691729919528235e-06, "loss": 0.3119, "step": 22444 }, { "epoch": 2.106325075075075, "grad_norm": 1.1280512200430601, "learning_rate": 2.4687021468114725e-06, "loss": 0.2959, "step": 22445 }, { "epoch": 2.106418918918919, "grad_norm": 1.072434233041533, "learning_rate": 2.4682313318510858e-06, "loss": 0.3097, "step": 22446 }, { "epoch": 2.1065127627627627, "grad_norm": 1.1139005409832177, "learning_rate": 2.467760547077275e-06, "loss": 0.3185, "step": 22447 }, { "epoch": 2.1066066066066065, "grad_norm": 1.1492591594765729, "learning_rate": 2.467289792495653e-06, "loss": 0.3325, "step": 22448 }, { "epoch": 2.1067004504504503, "grad_norm": 0.9119387949821652, "learning_rate": 2.4668190681118325e-06, "loss": 0.2621, "step": 22449 }, { "epoch": 2.106794294294294, "grad_norm": 1.125072737068455, "learning_rate": 2.466348373931427e-06, "loss": 0.328, "step": 22450 }, { "epoch": 2.106888138138138, "grad_norm": 1.797284264838513, "learning_rate": 2.4658777099600457e-06, "loss": 0.3328, "step": 22451 }, { "epoch": 2.106981981981982, "grad_norm": 1.5030361178321732, "learning_rate": 2.465407076203305e-06, "loss": 0.3034, "step": 22452 }, { "epoch": 2.107075825825826, "grad_norm": 1.1322060801848473, "learning_rate": 2.464936472666814e-06, "loss": 0.3036, "step": 22453 }, { "epoch": 2.10716966966967, "grad_norm": 1.109133570105471, "learning_rate": 2.4644658993561815e-06, "loss": 0.2869, "step": 22454 }, { "epoch": 2.1072635135135136, "grad_norm": 1.550350750530525, "learning_rate": 2.463995356277023e-06, "loss": 0.3159, "step": 22455 }, { "epoch": 2.1073573573573574, "grad_norm": 1.021605591884003, "learning_rate": 2.4635248434349457e-06, "loss": 0.3333, "step": 22456 }, { "epoch": 2.107451201201201, "grad_norm": 0.976774619533134, "learning_rate": 2.463054360835559e-06, "loss": 0.2893, "step": 22457 }, { "epoch": 2.107545045045045, "grad_norm": 1.0858244913024007, "learning_rate": 2.462583908484475e-06, "loss": 0.324, "step": 22458 }, { "epoch": 2.107638888888889, "grad_norm": 1.0368023749500872, "learning_rate": 2.462113486387302e-06, "loss": 0.3038, "step": 22459 }, { "epoch": 2.1077327327327327, "grad_norm": 1.3842199635984103, "learning_rate": 2.4616430945496483e-06, "loss": 0.3071, "step": 22460 }, { "epoch": 2.1078265765765765, "grad_norm": 1.1315091156905368, "learning_rate": 2.461172732977123e-06, "loss": 0.3363, "step": 22461 }, { "epoch": 2.1079204204204203, "grad_norm": 0.9866510607295256, "learning_rate": 2.460702401675334e-06, "loss": 0.3251, "step": 22462 }, { "epoch": 2.108014264264264, "grad_norm": 1.0719213289816958, "learning_rate": 2.4602321006498875e-06, "loss": 0.3437, "step": 22463 }, { "epoch": 2.108108108108108, "grad_norm": 0.9833894993204679, "learning_rate": 2.459761829906394e-06, "loss": 0.3492, "step": 22464 }, { "epoch": 2.108201951951952, "grad_norm": 0.9342064533098052, "learning_rate": 2.4592915894504595e-06, "loss": 0.3219, "step": 22465 }, { "epoch": 2.108295795795796, "grad_norm": 1.0596973107170051, "learning_rate": 2.458821379287688e-06, "loss": 0.3374, "step": 22466 }, { "epoch": 2.1083896396396398, "grad_norm": 1.722548900717801, "learning_rate": 2.4583511994236907e-06, "loss": 0.3421, "step": 22467 }, { "epoch": 2.1084834834834836, "grad_norm": 1.1402110395681024, "learning_rate": 2.457881049864071e-06, "loss": 0.3259, "step": 22468 }, { "epoch": 2.1085773273273274, "grad_norm": 1.3054321555352841, "learning_rate": 2.4574109306144326e-06, "loss": 0.3251, "step": 22469 }, { "epoch": 2.108671171171171, "grad_norm": 1.2099851198927556, "learning_rate": 2.456940841680385e-06, "loss": 0.3018, "step": 22470 }, { "epoch": 2.108765015015015, "grad_norm": 1.1516464656774372, "learning_rate": 2.4564707830675315e-06, "loss": 0.3313, "step": 22471 }, { "epoch": 2.108858858858859, "grad_norm": 1.063566769526063, "learning_rate": 2.456000754781476e-06, "loss": 0.321, "step": 22472 }, { "epoch": 2.1089527027027026, "grad_norm": 1.2054412033821493, "learning_rate": 2.4555307568278225e-06, "loss": 0.3166, "step": 22473 }, { "epoch": 2.1090465465465464, "grad_norm": 1.134983092862027, "learning_rate": 2.455060789212176e-06, "loss": 0.3075, "step": 22474 }, { "epoch": 2.1091403903903903, "grad_norm": 1.101787722081981, "learning_rate": 2.4545908519401364e-06, "loss": 0.3457, "step": 22475 }, { "epoch": 2.109234234234234, "grad_norm": 1.004737305435847, "learning_rate": 2.454120945017312e-06, "loss": 0.3146, "step": 22476 }, { "epoch": 2.109328078078078, "grad_norm": 1.2494558599871053, "learning_rate": 2.453651068449303e-06, "loss": 0.3037, "step": 22477 }, { "epoch": 2.109421921921922, "grad_norm": 1.7661039170288904, "learning_rate": 2.4531812222417105e-06, "loss": 0.3077, "step": 22478 }, { "epoch": 2.109515765765766, "grad_norm": 1.1437700772468402, "learning_rate": 2.4527114064001395e-06, "loss": 0.3355, "step": 22479 }, { "epoch": 2.1096096096096097, "grad_norm": 1.1283096339719931, "learning_rate": 2.4522416209301906e-06, "loss": 0.3138, "step": 22480 }, { "epoch": 2.1097034534534536, "grad_norm": 1.128928488182506, "learning_rate": 2.451771865837463e-06, "loss": 0.3705, "step": 22481 }, { "epoch": 2.1097972972972974, "grad_norm": 1.2094280264821422, "learning_rate": 2.451302141127561e-06, "loss": 0.3282, "step": 22482 }, { "epoch": 2.109891141141141, "grad_norm": 4.301025078678128, "learning_rate": 2.4508324468060828e-06, "loss": 0.3106, "step": 22483 }, { "epoch": 2.109984984984985, "grad_norm": 1.2187821228982192, "learning_rate": 2.4503627828786304e-06, "loss": 0.3109, "step": 22484 }, { "epoch": 2.110078828828829, "grad_norm": 1.0319184689696637, "learning_rate": 2.449893149350802e-06, "loss": 0.3472, "step": 22485 }, { "epoch": 2.1101726726726726, "grad_norm": 0.9727360289166695, "learning_rate": 2.4494235462281977e-06, "loss": 0.2976, "step": 22486 }, { "epoch": 2.1102665165165164, "grad_norm": 1.1361276451443518, "learning_rate": 2.4489539735164152e-06, "loss": 0.2809, "step": 22487 }, { "epoch": 2.1103603603603602, "grad_norm": 1.1034969815744073, "learning_rate": 2.4484844312210564e-06, "loss": 0.346, "step": 22488 }, { "epoch": 2.110454204204204, "grad_norm": 1.0926641823086738, "learning_rate": 2.4480149193477177e-06, "loss": 0.3316, "step": 22489 }, { "epoch": 2.110548048048048, "grad_norm": 0.9902102359092659, "learning_rate": 2.447545437901996e-06, "loss": 0.3292, "step": 22490 }, { "epoch": 2.110641891891892, "grad_norm": 1.468345052000562, "learning_rate": 2.4470759868894923e-06, "loss": 0.3271, "step": 22491 }, { "epoch": 2.110735735735736, "grad_norm": 1.1236528355709412, "learning_rate": 2.4466065663158017e-06, "loss": 0.3381, "step": 22492 }, { "epoch": 2.1108295795795797, "grad_norm": 1.0179333944045195, "learning_rate": 2.44613717618652e-06, "loss": 0.3164, "step": 22493 }, { "epoch": 2.1109234234234235, "grad_norm": 0.9430028008049385, "learning_rate": 2.4456678165072472e-06, "loss": 0.3187, "step": 22494 }, { "epoch": 2.1110172672672673, "grad_norm": 1.1157809130895733, "learning_rate": 2.4451984872835778e-06, "loss": 0.3527, "step": 22495 }, { "epoch": 2.111111111111111, "grad_norm": 1.529695007726106, "learning_rate": 2.444729188521107e-06, "loss": 0.3338, "step": 22496 }, { "epoch": 2.111204954954955, "grad_norm": 1.3505003860834428, "learning_rate": 2.4442599202254314e-06, "loss": 0.3951, "step": 22497 }, { "epoch": 2.111298798798799, "grad_norm": 1.23574150543529, "learning_rate": 2.443790682402145e-06, "loss": 0.2966, "step": 22498 }, { "epoch": 2.1113926426426426, "grad_norm": 1.037134234484544, "learning_rate": 2.443321475056842e-06, "loss": 0.3218, "step": 22499 }, { "epoch": 2.1114864864864864, "grad_norm": 1.7903618019495005, "learning_rate": 2.442852298195119e-06, "loss": 0.3097, "step": 22500 }, { "epoch": 2.11158033033033, "grad_norm": 1.2029469170119114, "learning_rate": 2.4423831518225695e-06, "loss": 0.3172, "step": 22501 }, { "epoch": 2.111674174174174, "grad_norm": 1.0583197772432638, "learning_rate": 2.4419140359447844e-06, "loss": 0.2682, "step": 22502 }, { "epoch": 2.111768018018018, "grad_norm": 1.0924508796973569, "learning_rate": 2.4414449505673616e-06, "loss": 0.2969, "step": 22503 }, { "epoch": 2.111861861861862, "grad_norm": 1.0963544200729785, "learning_rate": 2.4409758956958917e-06, "loss": 0.311, "step": 22504 }, { "epoch": 2.111955705705706, "grad_norm": 1.0142629484990318, "learning_rate": 2.4405068713359657e-06, "loss": 0.2852, "step": 22505 }, { "epoch": 2.1120495495495497, "grad_norm": 1.085049897399675, "learning_rate": 2.4400378774931794e-06, "loss": 0.2823, "step": 22506 }, { "epoch": 2.1121433933933935, "grad_norm": 1.109806780133423, "learning_rate": 2.439568914173122e-06, "loss": 0.3531, "step": 22507 }, { "epoch": 2.1122372372372373, "grad_norm": 0.9970265112834241, "learning_rate": 2.4390999813813867e-06, "loss": 0.3164, "step": 22508 }, { "epoch": 2.112331081081081, "grad_norm": 1.2706513078480568, "learning_rate": 2.4386310791235635e-06, "loss": 0.3276, "step": 22509 }, { "epoch": 2.112424924924925, "grad_norm": 1.4999074384253142, "learning_rate": 2.438162207405243e-06, "loss": 0.3133, "step": 22510 }, { "epoch": 2.1125187687687688, "grad_norm": 1.8960716363498682, "learning_rate": 2.4376933662320144e-06, "loss": 0.3221, "step": 22511 }, { "epoch": 2.1126126126126126, "grad_norm": 1.1064829296410155, "learning_rate": 2.4372245556094715e-06, "loss": 0.3335, "step": 22512 }, { "epoch": 2.1127064564564564, "grad_norm": 0.9488945488954137, "learning_rate": 2.4367557755432013e-06, "loss": 0.2952, "step": 22513 }, { "epoch": 2.1128003003003, "grad_norm": 1.0944173366514436, "learning_rate": 2.436287026038792e-06, "loss": 0.295, "step": 22514 }, { "epoch": 2.112894144144144, "grad_norm": 1.2741841082487306, "learning_rate": 2.4358183071018367e-06, "loss": 0.3307, "step": 22515 }, { "epoch": 2.112987987987988, "grad_norm": 1.1866098831206608, "learning_rate": 2.4353496187379207e-06, "loss": 0.3144, "step": 22516 }, { "epoch": 2.1130818318318316, "grad_norm": 1.1744670922060974, "learning_rate": 2.4348809609526314e-06, "loss": 0.3231, "step": 22517 }, { "epoch": 2.113175675675676, "grad_norm": 1.1249376733855374, "learning_rate": 2.43441233375156e-06, "loss": 0.3339, "step": 22518 }, { "epoch": 2.1132695195195197, "grad_norm": 1.146882659971018, "learning_rate": 2.4339437371402922e-06, "loss": 0.3052, "step": 22519 }, { "epoch": 2.1133633633633635, "grad_norm": 1.800601288922732, "learning_rate": 2.433475171124416e-06, "loss": 0.3037, "step": 22520 }, { "epoch": 2.1134572072072073, "grad_norm": 0.9773584208112931, "learning_rate": 2.4330066357095166e-06, "loss": 0.324, "step": 22521 }, { "epoch": 2.113551051051051, "grad_norm": 1.0328068303234526, "learning_rate": 2.432538130901182e-06, "loss": 0.3307, "step": 22522 }, { "epoch": 2.113644894894895, "grad_norm": 1.098548460003373, "learning_rate": 2.432069656704995e-06, "loss": 0.3105, "step": 22523 }, { "epoch": 2.1137387387387387, "grad_norm": 1.1595431549263513, "learning_rate": 2.4316012131265452e-06, "loss": 0.3095, "step": 22524 }, { "epoch": 2.1138325825825826, "grad_norm": 0.9747819665547595, "learning_rate": 2.4311328001714173e-06, "loss": 0.2807, "step": 22525 }, { "epoch": 2.1139264264264264, "grad_norm": 0.9767084293629775, "learning_rate": 2.430664417845193e-06, "loss": 0.3127, "step": 22526 }, { "epoch": 2.11402027027027, "grad_norm": 1.5692329412743424, "learning_rate": 2.430196066153461e-06, "loss": 0.2974, "step": 22527 }, { "epoch": 2.114114114114114, "grad_norm": 1.5623158814467228, "learning_rate": 2.429727745101804e-06, "loss": 0.3174, "step": 22528 }, { "epoch": 2.114207957957958, "grad_norm": 1.0671448195312734, "learning_rate": 2.4292594546958044e-06, "loss": 0.3196, "step": 22529 }, { "epoch": 2.1143018018018016, "grad_norm": 1.0590801179474276, "learning_rate": 2.4287911949410476e-06, "loss": 0.3115, "step": 22530 }, { "epoch": 2.114395645645646, "grad_norm": 1.0297767636870878, "learning_rate": 2.4283229658431166e-06, "loss": 0.3113, "step": 22531 }, { "epoch": 2.1144894894894897, "grad_norm": 2.224538677297542, "learning_rate": 2.427854767407593e-06, "loss": 0.3161, "step": 22532 }, { "epoch": 2.1145833333333335, "grad_norm": 1.3031223006884967, "learning_rate": 2.4273865996400605e-06, "loss": 0.321, "step": 22533 }, { "epoch": 2.1146771771771773, "grad_norm": 1.1977911908086605, "learning_rate": 2.4269184625461e-06, "loss": 0.334, "step": 22534 }, { "epoch": 2.114771021021021, "grad_norm": 1.1016573345702536, "learning_rate": 2.4264503561312913e-06, "loss": 0.3553, "step": 22535 }, { "epoch": 2.114864864864865, "grad_norm": 1.0003878618050501, "learning_rate": 2.4259822804012203e-06, "loss": 0.3521, "step": 22536 }, { "epoch": 2.1149587087087087, "grad_norm": 1.3257731553975336, "learning_rate": 2.4255142353614654e-06, "loss": 0.2949, "step": 22537 }, { "epoch": 2.1150525525525525, "grad_norm": 1.0728080297123574, "learning_rate": 2.425046221017605e-06, "loss": 0.3188, "step": 22538 }, { "epoch": 2.1151463963963963, "grad_norm": 1.2802031757177277, "learning_rate": 2.424578237375224e-06, "loss": 0.3248, "step": 22539 }, { "epoch": 2.11524024024024, "grad_norm": 1.1834250451061727, "learning_rate": 2.4241102844398996e-06, "loss": 0.2674, "step": 22540 }, { "epoch": 2.115334084084084, "grad_norm": 1.0756812249322452, "learning_rate": 2.4236423622172094e-06, "loss": 0.2997, "step": 22541 }, { "epoch": 2.1154279279279278, "grad_norm": 1.0614897409942357, "learning_rate": 2.423174470712736e-06, "loss": 0.3326, "step": 22542 }, { "epoch": 2.1155217717717716, "grad_norm": 1.472986120592728, "learning_rate": 2.422706609932058e-06, "loss": 0.29, "step": 22543 }, { "epoch": 2.1156156156156154, "grad_norm": 1.087265120527237, "learning_rate": 2.4222387798807496e-06, "loss": 0.3468, "step": 22544 }, { "epoch": 2.1157094594594597, "grad_norm": 0.9681251881305826, "learning_rate": 2.421770980564395e-06, "loss": 0.3263, "step": 22545 }, { "epoch": 2.1158033033033035, "grad_norm": 1.106500281655638, "learning_rate": 2.421303211988567e-06, "loss": 0.257, "step": 22546 }, { "epoch": 2.1158971471471473, "grad_norm": 1.0071521450537222, "learning_rate": 2.4208354741588423e-06, "loss": 0.3005, "step": 22547 }, { "epoch": 2.115990990990991, "grad_norm": 1.5063760803230266, "learning_rate": 2.4203677670808025e-06, "loss": 0.2734, "step": 22548 }, { "epoch": 2.116084834834835, "grad_norm": 0.9569418531688515, "learning_rate": 2.4199000907600207e-06, "loss": 0.319, "step": 22549 }, { "epoch": 2.1161786786786787, "grad_norm": 1.0862873281265035, "learning_rate": 2.419432445202072e-06, "loss": 0.3152, "step": 22550 }, { "epoch": 2.1162725225225225, "grad_norm": 1.2026886901360607, "learning_rate": 2.4189648304125357e-06, "loss": 0.3367, "step": 22551 }, { "epoch": 2.1163663663663663, "grad_norm": 1.1981217791405239, "learning_rate": 2.418497246396985e-06, "loss": 0.2819, "step": 22552 }, { "epoch": 2.11646021021021, "grad_norm": 1.03533813075228, "learning_rate": 2.4180296931609947e-06, "loss": 0.299, "step": 22553 }, { "epoch": 2.116554054054054, "grad_norm": 1.1007849074766205, "learning_rate": 2.4175621707101416e-06, "loss": 0.2981, "step": 22554 }, { "epoch": 2.1166478978978978, "grad_norm": 1.2249391838982169, "learning_rate": 2.4170946790499982e-06, "loss": 0.3331, "step": 22555 }, { "epoch": 2.1167417417417416, "grad_norm": 1.0963941089230196, "learning_rate": 2.4166272181861377e-06, "loss": 0.3123, "step": 22556 }, { "epoch": 2.1168355855855854, "grad_norm": 1.1789363619440314, "learning_rate": 2.4161597881241366e-06, "loss": 0.2992, "step": 22557 }, { "epoch": 2.1169294294294296, "grad_norm": 1.3599926929955588, "learning_rate": 2.4156923888695665e-06, "loss": 0.3212, "step": 22558 }, { "epoch": 2.1170232732732734, "grad_norm": 1.2460906774843306, "learning_rate": 2.415225020428e-06, "loss": 0.332, "step": 22559 }, { "epoch": 2.1171171171171173, "grad_norm": 2.088373367138051, "learning_rate": 2.41475768280501e-06, "loss": 0.2967, "step": 22560 }, { "epoch": 2.117210960960961, "grad_norm": 2.552079743885139, "learning_rate": 2.4142903760061687e-06, "loss": 0.3174, "step": 22561 }, { "epoch": 2.117304804804805, "grad_norm": 1.326410091908701, "learning_rate": 2.4138231000370454e-06, "loss": 0.3392, "step": 22562 }, { "epoch": 2.1173986486486487, "grad_norm": 1.213274099522266, "learning_rate": 2.4133558549032156e-06, "loss": 0.3633, "step": 22563 }, { "epoch": 2.1174924924924925, "grad_norm": 4.027651562460863, "learning_rate": 2.412888640610248e-06, "loss": 0.3329, "step": 22564 }, { "epoch": 2.1175863363363363, "grad_norm": 1.0300217876609317, "learning_rate": 2.412421457163712e-06, "loss": 0.3264, "step": 22565 }, { "epoch": 2.11768018018018, "grad_norm": 1.3294005955412276, "learning_rate": 2.4119543045691818e-06, "loss": 0.3591, "step": 22566 }, { "epoch": 2.117774024024024, "grad_norm": 1.136166571984537, "learning_rate": 2.4114871828322247e-06, "loss": 0.2987, "step": 22567 }, { "epoch": 2.1178678678678677, "grad_norm": 1.3979611596876658, "learning_rate": 2.411020091958408e-06, "loss": 0.3195, "step": 22568 }, { "epoch": 2.1179617117117115, "grad_norm": 1.09072341605767, "learning_rate": 2.4105530319533066e-06, "loss": 0.3223, "step": 22569 }, { "epoch": 2.1180555555555554, "grad_norm": 1.0386140703281461, "learning_rate": 2.4100860028224854e-06, "loss": 0.3418, "step": 22570 }, { "epoch": 2.1181493993993996, "grad_norm": 2.163709478888436, "learning_rate": 2.4096190045715136e-06, "loss": 0.332, "step": 22571 }, { "epoch": 2.1182432432432434, "grad_norm": 1.104893306122754, "learning_rate": 2.4091520372059596e-06, "loss": 0.3405, "step": 22572 }, { "epoch": 2.1183370870870872, "grad_norm": 0.9357016771211429, "learning_rate": 2.4086851007313906e-06, "loss": 0.3081, "step": 22573 }, { "epoch": 2.118430930930931, "grad_norm": 1.0241214470768065, "learning_rate": 2.4082181951533724e-06, "loss": 0.3487, "step": 22574 }, { "epoch": 2.118524774774775, "grad_norm": 1.0504178599918266, "learning_rate": 2.407751320477475e-06, "loss": 0.2762, "step": 22575 }, { "epoch": 2.1186186186186187, "grad_norm": 0.9824202576116711, "learning_rate": 2.407284476709264e-06, "loss": 0.2859, "step": 22576 }, { "epoch": 2.1187124624624625, "grad_norm": 1.0708630156326648, "learning_rate": 2.406817663854304e-06, "loss": 0.3465, "step": 22577 }, { "epoch": 2.1188063063063063, "grad_norm": 0.985872864568307, "learning_rate": 2.4063508819181638e-06, "loss": 0.2597, "step": 22578 }, { "epoch": 2.11890015015015, "grad_norm": 1.2578171833034586, "learning_rate": 2.405884130906407e-06, "loss": 0.3171, "step": 22579 }, { "epoch": 2.118993993993994, "grad_norm": 1.2920024575323394, "learning_rate": 2.405417410824597e-06, "loss": 0.278, "step": 22580 }, { "epoch": 2.1190878378378377, "grad_norm": 1.2053264883571668, "learning_rate": 2.4049507216783024e-06, "loss": 0.3094, "step": 22581 }, { "epoch": 2.1191816816816815, "grad_norm": 1.067818729686507, "learning_rate": 2.4044840634730864e-06, "loss": 0.3383, "step": 22582 }, { "epoch": 2.1192755255255253, "grad_norm": 1.0896625157349842, "learning_rate": 2.404017436214512e-06, "loss": 0.3338, "step": 22583 }, { "epoch": 2.1193693693693696, "grad_norm": 1.0859356706685526, "learning_rate": 2.4035508399081436e-06, "loss": 0.2953, "step": 22584 }, { "epoch": 2.1194632132132134, "grad_norm": 0.9339529754189297, "learning_rate": 2.4030842745595434e-06, "loss": 0.3324, "step": 22585 }, { "epoch": 2.119557057057057, "grad_norm": 1.9664162371082345, "learning_rate": 2.402617740174274e-06, "loss": 0.3031, "step": 22586 }, { "epoch": 2.119650900900901, "grad_norm": 1.074555005106691, "learning_rate": 2.4021512367579003e-06, "loss": 0.2999, "step": 22587 }, { "epoch": 2.119744744744745, "grad_norm": 1.0388513762945966, "learning_rate": 2.4016847643159835e-06, "loss": 0.3061, "step": 22588 }, { "epoch": 2.1198385885885886, "grad_norm": 1.006656264269689, "learning_rate": 2.4012183228540825e-06, "loss": 0.3159, "step": 22589 }, { "epoch": 2.1199324324324325, "grad_norm": 1.0360141242415684, "learning_rate": 2.400751912377764e-06, "loss": 0.2969, "step": 22590 }, { "epoch": 2.1200262762762763, "grad_norm": 1.1265744204159334, "learning_rate": 2.400285532892586e-06, "loss": 0.3458, "step": 22591 }, { "epoch": 2.12012012012012, "grad_norm": 1.0386631993829998, "learning_rate": 2.3998191844041076e-06, "loss": 0.3202, "step": 22592 }, { "epoch": 2.120213963963964, "grad_norm": 1.5876608516211739, "learning_rate": 2.399352866917893e-06, "loss": 0.2831, "step": 22593 }, { "epoch": 2.1203078078078077, "grad_norm": 1.0271554524049271, "learning_rate": 2.3988865804395005e-06, "loss": 0.309, "step": 22594 }, { "epoch": 2.1204016516516515, "grad_norm": 1.0142407266727504, "learning_rate": 2.398420324974489e-06, "loss": 0.2832, "step": 22595 }, { "epoch": 2.1204954954954953, "grad_norm": 1.1087406654335017, "learning_rate": 2.3979541005284186e-06, "loss": 0.3062, "step": 22596 }, { "epoch": 2.120589339339339, "grad_norm": 2.715483991503473, "learning_rate": 2.3974879071068475e-06, "loss": 0.3262, "step": 22597 }, { "epoch": 2.1206831831831834, "grad_norm": 1.3755677149038152, "learning_rate": 2.3970217447153326e-06, "loss": 0.3018, "step": 22598 }, { "epoch": 2.120777027027027, "grad_norm": 0.8658558567011332, "learning_rate": 2.3965556133594353e-06, "loss": 0.3497, "step": 22599 }, { "epoch": 2.120870870870871, "grad_norm": 1.0716891128347485, "learning_rate": 2.396089513044712e-06, "loss": 0.3117, "step": 22600 }, { "epoch": 2.120964714714715, "grad_norm": 1.0482991633619765, "learning_rate": 2.395623443776718e-06, "loss": 0.3453, "step": 22601 }, { "epoch": 2.1210585585585586, "grad_norm": 1.2440697820060056, "learning_rate": 2.3951574055610144e-06, "loss": 0.3526, "step": 22602 }, { "epoch": 2.1211524024024024, "grad_norm": 2.5101554138595965, "learning_rate": 2.3946913984031555e-06, "loss": 0.3497, "step": 22603 }, { "epoch": 2.1212462462462462, "grad_norm": 1.2519791615031182, "learning_rate": 2.3942254223086956e-06, "loss": 0.3065, "step": 22604 }, { "epoch": 2.12134009009009, "grad_norm": 1.0443116305435172, "learning_rate": 2.393759477283195e-06, "loss": 0.3134, "step": 22605 }, { "epoch": 2.121433933933934, "grad_norm": 1.2143217016988685, "learning_rate": 2.393293563332207e-06, "loss": 0.2775, "step": 22606 }, { "epoch": 2.1215277777777777, "grad_norm": 1.105279250749407, "learning_rate": 2.392827680461286e-06, "loss": 0.3369, "step": 22607 }, { "epoch": 2.1216216216216215, "grad_norm": 1.2043425091531879, "learning_rate": 2.3923618286759875e-06, "loss": 0.3257, "step": 22608 }, { "epoch": 2.1217154654654653, "grad_norm": 2.4141568629668773, "learning_rate": 2.391896007981866e-06, "loss": 0.3269, "step": 22609 }, { "epoch": 2.121809309309309, "grad_norm": 1.000116994368159, "learning_rate": 2.3914302183844736e-06, "loss": 0.3299, "step": 22610 }, { "epoch": 2.1219031531531534, "grad_norm": 1.1091379215986448, "learning_rate": 2.3909644598893672e-06, "loss": 0.3465, "step": 22611 }, { "epoch": 2.121996996996997, "grad_norm": 1.1345735140364377, "learning_rate": 2.390498732502099e-06, "loss": 0.3198, "step": 22612 }, { "epoch": 2.122090840840841, "grad_norm": 1.382636861513182, "learning_rate": 2.3900330362282194e-06, "loss": 0.348, "step": 22613 }, { "epoch": 2.122184684684685, "grad_norm": 1.3641090433277676, "learning_rate": 2.3895673710732847e-06, "loss": 0.2986, "step": 22614 }, { "epoch": 2.1222785285285286, "grad_norm": 1.2271207704772966, "learning_rate": 2.389101737042846e-06, "loss": 0.3428, "step": 22615 }, { "epoch": 2.1223723723723724, "grad_norm": 1.367250747019884, "learning_rate": 2.3886361341424526e-06, "loss": 0.3242, "step": 22616 }, { "epoch": 2.1224662162162162, "grad_norm": 1.0224162087506967, "learning_rate": 2.38817056237766e-06, "loss": 0.2928, "step": 22617 }, { "epoch": 2.12256006006006, "grad_norm": 0.9625482401749782, "learning_rate": 2.3877050217540164e-06, "loss": 0.2672, "step": 22618 }, { "epoch": 2.122653903903904, "grad_norm": 0.9590870251808532, "learning_rate": 2.387239512277074e-06, "loss": 0.3105, "step": 22619 }, { "epoch": 2.1227477477477477, "grad_norm": 0.9350219450189746, "learning_rate": 2.386774033952382e-06, "loss": 0.2732, "step": 22620 }, { "epoch": 2.1228415915915915, "grad_norm": 1.141621141232169, "learning_rate": 2.3863085867854913e-06, "loss": 0.2949, "step": 22621 }, { "epoch": 2.1229354354354353, "grad_norm": 1.0296937433219833, "learning_rate": 2.3858431707819484e-06, "loss": 0.3628, "step": 22622 }, { "epoch": 2.123029279279279, "grad_norm": 1.6317438927064662, "learning_rate": 2.385377785947307e-06, "loss": 0.3257, "step": 22623 }, { "epoch": 2.123123123123123, "grad_norm": 1.1356171885159305, "learning_rate": 2.384912432287114e-06, "loss": 0.3588, "step": 22624 }, { "epoch": 2.123216966966967, "grad_norm": 0.9527718192699842, "learning_rate": 2.3844471098069167e-06, "loss": 0.3065, "step": 22625 }, { "epoch": 2.123310810810811, "grad_norm": 1.1237247012860734, "learning_rate": 2.3839818185122655e-06, "loss": 0.2879, "step": 22626 }, { "epoch": 2.1234046546546548, "grad_norm": 1.350332903584117, "learning_rate": 2.3835165584087072e-06, "loss": 0.2668, "step": 22627 }, { "epoch": 2.1234984984984986, "grad_norm": 1.1199192451001645, "learning_rate": 2.383051329501787e-06, "loss": 0.2941, "step": 22628 }, { "epoch": 2.1235923423423424, "grad_norm": 1.0543843210249162, "learning_rate": 2.3825861317970555e-06, "loss": 0.3112, "step": 22629 }, { "epoch": 2.123686186186186, "grad_norm": 1.2463839751150536, "learning_rate": 2.3821209653000578e-06, "loss": 0.3303, "step": 22630 }, { "epoch": 2.12378003003003, "grad_norm": 1.0915335462103053, "learning_rate": 2.38165583001634e-06, "loss": 0.3285, "step": 22631 }, { "epoch": 2.123873873873874, "grad_norm": 1.1046397428761268, "learning_rate": 2.381190725951448e-06, "loss": 0.3297, "step": 22632 }, { "epoch": 2.1239677177177176, "grad_norm": 1.1675356080530483, "learning_rate": 2.3807256531109267e-06, "loss": 0.2804, "step": 22633 }, { "epoch": 2.1240615615615615, "grad_norm": 0.9213493400135868, "learning_rate": 2.38026061150032e-06, "loss": 0.3284, "step": 22634 }, { "epoch": 2.1241554054054053, "grad_norm": 1.3800035960332222, "learning_rate": 2.3797956011251767e-06, "loss": 0.3493, "step": 22635 }, { "epoch": 2.124249249249249, "grad_norm": 1.1171737475821646, "learning_rate": 2.3793306219910385e-06, "loss": 0.2973, "step": 22636 }, { "epoch": 2.124343093093093, "grad_norm": 1.7447903437445156, "learning_rate": 2.3788656741034477e-06, "loss": 0.3332, "step": 22637 }, { "epoch": 2.124436936936937, "grad_norm": 1.18545048980704, "learning_rate": 2.378400757467952e-06, "loss": 0.2985, "step": 22638 }, { "epoch": 2.124530780780781, "grad_norm": 1.0336616348960481, "learning_rate": 2.3779358720900924e-06, "loss": 0.3365, "step": 22639 }, { "epoch": 2.1246246246246248, "grad_norm": 1.5518738011178446, "learning_rate": 2.377471017975411e-06, "loss": 0.2994, "step": 22640 }, { "epoch": 2.1247184684684686, "grad_norm": 0.9577805075837809, "learning_rate": 2.377006195129452e-06, "loss": 0.3255, "step": 22641 }, { "epoch": 2.1248123123123124, "grad_norm": 1.2088058903879044, "learning_rate": 2.376541403557757e-06, "loss": 0.3398, "step": 22642 }, { "epoch": 2.124906156156156, "grad_norm": 0.9671155780074295, "learning_rate": 2.3760766432658685e-06, "loss": 0.3436, "step": 22643 }, { "epoch": 2.125, "grad_norm": 1.2751334920613386, "learning_rate": 2.375611914259326e-06, "loss": 0.3395, "step": 22644 }, { "epoch": 2.125093843843844, "grad_norm": 1.2773647594874697, "learning_rate": 2.3751472165436724e-06, "loss": 0.2995, "step": 22645 }, { "epoch": 2.1251876876876876, "grad_norm": 1.136274859809921, "learning_rate": 2.3746825501244447e-06, "loss": 0.319, "step": 22646 }, { "epoch": 2.1252815315315314, "grad_norm": 1.173088164154621, "learning_rate": 2.3742179150071887e-06, "loss": 0.2962, "step": 22647 }, { "epoch": 2.1253753753753752, "grad_norm": 1.0526255980395671, "learning_rate": 2.373753311197441e-06, "loss": 0.314, "step": 22648 }, { "epoch": 2.125469219219219, "grad_norm": 1.1412215432820016, "learning_rate": 2.3732887387007396e-06, "loss": 0.3311, "step": 22649 }, { "epoch": 2.125563063063063, "grad_norm": 1.206307935480618, "learning_rate": 2.3728241975226278e-06, "loss": 0.2791, "step": 22650 }, { "epoch": 2.125656906906907, "grad_norm": 1.150530087484558, "learning_rate": 2.3723596876686426e-06, "loss": 0.299, "step": 22651 }, { "epoch": 2.125750750750751, "grad_norm": 1.0187287447004527, "learning_rate": 2.37189520914432e-06, "loss": 0.3148, "step": 22652 }, { "epoch": 2.1258445945945947, "grad_norm": 1.1425215140729799, "learning_rate": 2.371430761955202e-06, "loss": 0.3381, "step": 22653 }, { "epoch": 2.1259384384384385, "grad_norm": 1.1947326151513, "learning_rate": 2.3709663461068243e-06, "loss": 0.2803, "step": 22654 }, { "epoch": 2.1260322822822824, "grad_norm": 1.242572386640779, "learning_rate": 2.3705019616047246e-06, "loss": 0.2898, "step": 22655 }, { "epoch": 2.126126126126126, "grad_norm": 1.06379954661758, "learning_rate": 2.370037608454439e-06, "loss": 0.3574, "step": 22656 }, { "epoch": 2.12621996996997, "grad_norm": 2.0919292194255177, "learning_rate": 2.3695732866615052e-06, "loss": 0.3424, "step": 22657 }, { "epoch": 2.126313813813814, "grad_norm": 1.1628154927830252, "learning_rate": 2.3691089962314563e-06, "loss": 0.3347, "step": 22658 }, { "epoch": 2.1264076576576576, "grad_norm": 0.8980156417102544, "learning_rate": 2.3686447371698323e-06, "loss": 0.3116, "step": 22659 }, { "epoch": 2.1265015015015014, "grad_norm": 0.9740458965594189, "learning_rate": 2.368180509482167e-06, "loss": 0.3125, "step": 22660 }, { "epoch": 2.1265953453453452, "grad_norm": 0.9806460505648632, "learning_rate": 2.3677163131739933e-06, "loss": 0.3232, "step": 22661 }, { "epoch": 2.126689189189189, "grad_norm": 0.969957392849657, "learning_rate": 2.36725214825085e-06, "loss": 0.289, "step": 22662 }, { "epoch": 2.126783033033033, "grad_norm": 1.1876067432980018, "learning_rate": 2.3667880147182686e-06, "loss": 0.3492, "step": 22663 }, { "epoch": 2.126876876876877, "grad_norm": 1.0794804778420282, "learning_rate": 2.366323912581782e-06, "loss": 0.3096, "step": 22664 }, { "epoch": 2.126970720720721, "grad_norm": 1.176849222166026, "learning_rate": 2.365859841846927e-06, "loss": 0.3135, "step": 22665 }, { "epoch": 2.1270645645645647, "grad_norm": 1.203899741815394, "learning_rate": 2.3653958025192355e-06, "loss": 0.3579, "step": 22666 }, { "epoch": 2.1271584084084085, "grad_norm": 1.644533299569461, "learning_rate": 2.3649317946042377e-06, "loss": 0.3198, "step": 22667 }, { "epoch": 2.1272522522522523, "grad_norm": 1.0371663900976675, "learning_rate": 2.3644678181074727e-06, "loss": 0.296, "step": 22668 }, { "epoch": 2.127346096096096, "grad_norm": 1.0058360211350486, "learning_rate": 2.3640038730344655e-06, "loss": 0.3202, "step": 22669 }, { "epoch": 2.12743993993994, "grad_norm": 0.9890387985443428, "learning_rate": 2.3635399593907487e-06, "loss": 0.3048, "step": 22670 }, { "epoch": 2.1275337837837838, "grad_norm": 0.983846576519585, "learning_rate": 2.3630760771818574e-06, "loss": 0.2972, "step": 22671 }, { "epoch": 2.1276276276276276, "grad_norm": 1.7145137666354215, "learning_rate": 2.36261222641332e-06, "loss": 0.3213, "step": 22672 }, { "epoch": 2.1277214714714714, "grad_norm": 1.17539131573717, "learning_rate": 2.3621484070906653e-06, "loss": 0.3091, "step": 22673 }, { "epoch": 2.127815315315315, "grad_norm": 1.0193470962858904, "learning_rate": 2.361684619219428e-06, "loss": 0.3048, "step": 22674 }, { "epoch": 2.127909159159159, "grad_norm": 1.0190829178008898, "learning_rate": 2.3612208628051353e-06, "loss": 0.3278, "step": 22675 }, { "epoch": 2.128003003003003, "grad_norm": 1.0808226673445303, "learning_rate": 2.360757137853315e-06, "loss": 0.3104, "step": 22676 }, { "epoch": 2.128096846846847, "grad_norm": 1.173117693778392, "learning_rate": 2.3602934443695004e-06, "loss": 0.3496, "step": 22677 }, { "epoch": 2.128190690690691, "grad_norm": 1.027488441395074, "learning_rate": 2.359829782359217e-06, "loss": 0.3146, "step": 22678 }, { "epoch": 2.1282845345345347, "grad_norm": 1.458973986029439, "learning_rate": 2.359366151827992e-06, "loss": 0.3044, "step": 22679 }, { "epoch": 2.1283783783783785, "grad_norm": 1.3401340455042836, "learning_rate": 2.358902552781357e-06, "loss": 0.3146, "step": 22680 }, { "epoch": 2.1284722222222223, "grad_norm": 1.1199016117983904, "learning_rate": 2.358438985224838e-06, "loss": 0.3237, "step": 22681 }, { "epoch": 2.128566066066066, "grad_norm": 0.9967216901843324, "learning_rate": 2.3579754491639618e-06, "loss": 0.284, "step": 22682 }, { "epoch": 2.12865990990991, "grad_norm": 1.0254697955846863, "learning_rate": 2.357511944604255e-06, "loss": 0.3182, "step": 22683 }, { "epoch": 2.1287537537537538, "grad_norm": 1.1304390560291764, "learning_rate": 2.357048471551245e-06, "loss": 0.2872, "step": 22684 }, { "epoch": 2.1288475975975976, "grad_norm": 1.566585183129857, "learning_rate": 2.3565850300104542e-06, "loss": 0.3214, "step": 22685 }, { "epoch": 2.1289414414414414, "grad_norm": 1.0528974242164966, "learning_rate": 2.3561216199874138e-06, "loss": 0.3143, "step": 22686 }, { "epoch": 2.129035285285285, "grad_norm": 1.051178947793347, "learning_rate": 2.355658241487646e-06, "loss": 0.331, "step": 22687 }, { "epoch": 2.129129129129129, "grad_norm": 1.1390889424436346, "learning_rate": 2.355194894516675e-06, "loss": 0.3241, "step": 22688 }, { "epoch": 2.129222972972973, "grad_norm": 1.116669888453625, "learning_rate": 2.354731579080028e-06, "loss": 0.3257, "step": 22689 }, { "epoch": 2.1293168168168166, "grad_norm": 1.1279169872222723, "learning_rate": 2.354268295183228e-06, "loss": 0.2975, "step": 22690 }, { "epoch": 2.1294106606606604, "grad_norm": 1.0555518251042078, "learning_rate": 2.3538050428317966e-06, "loss": 0.3023, "step": 22691 }, { "epoch": 2.1295045045045047, "grad_norm": 1.116379041696858, "learning_rate": 2.3533418220312605e-06, "loss": 0.2946, "step": 22692 }, { "epoch": 2.1295983483483485, "grad_norm": 1.3773393492821244, "learning_rate": 2.3528786327871417e-06, "loss": 0.2768, "step": 22693 }, { "epoch": 2.1296921921921923, "grad_norm": 1.11690024988995, "learning_rate": 2.3524154751049627e-06, "loss": 0.3261, "step": 22694 }, { "epoch": 2.129786036036036, "grad_norm": 1.3542382431762892, "learning_rate": 2.351952348990245e-06, "loss": 0.3248, "step": 22695 }, { "epoch": 2.12987987987988, "grad_norm": 1.4510913153052092, "learning_rate": 2.351489254448512e-06, "loss": 0.3203, "step": 22696 }, { "epoch": 2.1299737237237237, "grad_norm": 1.3132585147275466, "learning_rate": 2.3510261914852815e-06, "loss": 0.3141, "step": 22697 }, { "epoch": 2.1300675675675675, "grad_norm": 1.0616057545247728, "learning_rate": 2.3505631601060796e-06, "loss": 0.3423, "step": 22698 }, { "epoch": 2.1301614114114114, "grad_norm": 1.273229807153015, "learning_rate": 2.350100160316425e-06, "loss": 0.3035, "step": 22699 }, { "epoch": 2.130255255255255, "grad_norm": 1.038192078105383, "learning_rate": 2.349637192121837e-06, "loss": 0.322, "step": 22700 }, { "epoch": 2.130349099099099, "grad_norm": 0.9779633011683015, "learning_rate": 2.3491742555278373e-06, "loss": 0.3306, "step": 22701 }, { "epoch": 2.130442942942943, "grad_norm": 1.7194774955899204, "learning_rate": 2.348711350539946e-06, "loss": 0.3206, "step": 22702 }, { "epoch": 2.1305367867867866, "grad_norm": 1.2135275515325765, "learning_rate": 2.3482484771636787e-06, "loss": 0.2871, "step": 22703 }, { "epoch": 2.1306306306306304, "grad_norm": 0.9747987715671108, "learning_rate": 2.3477856354045593e-06, "loss": 0.3331, "step": 22704 }, { "epoch": 2.1307244744744747, "grad_norm": 1.0032632257432443, "learning_rate": 2.3473228252681034e-06, "loss": 0.3234, "step": 22705 }, { "epoch": 2.1308183183183185, "grad_norm": 1.0893585526908762, "learning_rate": 2.34686004675983e-06, "loss": 0.3366, "step": 22706 }, { "epoch": 2.1309121621621623, "grad_norm": 0.9936907396020023, "learning_rate": 2.3463972998852563e-06, "loss": 0.3092, "step": 22707 }, { "epoch": 2.131006006006006, "grad_norm": 1.175607226176662, "learning_rate": 2.3459345846499004e-06, "loss": 0.3197, "step": 22708 }, { "epoch": 2.13109984984985, "grad_norm": 1.0714475857367634, "learning_rate": 2.3454719010592765e-06, "loss": 0.3376, "step": 22709 }, { "epoch": 2.1311936936936937, "grad_norm": 1.0617651714012388, "learning_rate": 2.3450092491189056e-06, "loss": 0.3339, "step": 22710 }, { "epoch": 2.1312875375375375, "grad_norm": 1.0536843214749871, "learning_rate": 2.3445466288343015e-06, "loss": 0.3253, "step": 22711 }, { "epoch": 2.1313813813813813, "grad_norm": 1.0296636757201265, "learning_rate": 2.3440840402109792e-06, "loss": 0.3064, "step": 22712 }, { "epoch": 2.131475225225225, "grad_norm": 1.0873371524270736, "learning_rate": 2.3436214832544567e-06, "loss": 0.2957, "step": 22713 }, { "epoch": 2.131569069069069, "grad_norm": 1.1461806158036283, "learning_rate": 2.3431589579702487e-06, "loss": 0.3122, "step": 22714 }, { "epoch": 2.1316629129129128, "grad_norm": 1.061802153482722, "learning_rate": 2.3426964643638666e-06, "loss": 0.3655, "step": 22715 }, { "epoch": 2.1317567567567566, "grad_norm": 1.3464273966743652, "learning_rate": 2.3422340024408295e-06, "loss": 0.3028, "step": 22716 }, { "epoch": 2.1318506006006004, "grad_norm": 1.1873191573354729, "learning_rate": 2.3417715722066493e-06, "loss": 0.3051, "step": 22717 }, { "epoch": 2.1319444444444446, "grad_norm": 1.1335170120976026, "learning_rate": 2.341309173666839e-06, "loss": 0.2993, "step": 22718 }, { "epoch": 2.1320382882882885, "grad_norm": 0.9567631048671666, "learning_rate": 2.3408468068269124e-06, "loss": 0.3069, "step": 22719 }, { "epoch": 2.1321321321321323, "grad_norm": 2.069688441012328, "learning_rate": 2.340384471692383e-06, "loss": 0.3268, "step": 22720 }, { "epoch": 2.132225975975976, "grad_norm": 1.0421414915162888, "learning_rate": 2.33992216826876e-06, "loss": 0.2885, "step": 22721 }, { "epoch": 2.13231981981982, "grad_norm": 1.494530128471423, "learning_rate": 2.33945989656156e-06, "loss": 0.2942, "step": 22722 }, { "epoch": 2.1324136636636637, "grad_norm": 1.1354368041100962, "learning_rate": 2.3389976565762928e-06, "loss": 0.3123, "step": 22723 }, { "epoch": 2.1325075075075075, "grad_norm": 1.1001495175594531, "learning_rate": 2.338535448318468e-06, "loss": 0.2957, "step": 22724 }, { "epoch": 2.1326013513513513, "grad_norm": 1.0784531494121146, "learning_rate": 2.3380732717936e-06, "loss": 0.2775, "step": 22725 }, { "epoch": 2.132695195195195, "grad_norm": 1.1628337507553563, "learning_rate": 2.337611127007198e-06, "loss": 0.3109, "step": 22726 }, { "epoch": 2.132789039039039, "grad_norm": 1.2068592513351735, "learning_rate": 2.33714901396477e-06, "loss": 0.3168, "step": 22727 }, { "epoch": 2.1328828828828827, "grad_norm": 1.2371570238738656, "learning_rate": 2.3366869326718293e-06, "loss": 0.3533, "step": 22728 }, { "epoch": 2.1329767267267266, "grad_norm": 1.2400354116106909, "learning_rate": 2.3362248831338836e-06, "loss": 0.3136, "step": 22729 }, { "epoch": 2.1330705705705704, "grad_norm": 1.2395838772444772, "learning_rate": 2.3357628653564425e-06, "loss": 0.2979, "step": 22730 }, { "epoch": 2.1331644144144146, "grad_norm": 1.076302678555295, "learning_rate": 2.3353008793450145e-06, "loss": 0.3103, "step": 22731 }, { "epoch": 2.1332582582582584, "grad_norm": 1.0499936953157987, "learning_rate": 2.334838925105107e-06, "loss": 0.2888, "step": 22732 }, { "epoch": 2.1333521021021022, "grad_norm": 0.9868096398726345, "learning_rate": 2.3343770026422274e-06, "loss": 0.3228, "step": 22733 }, { "epoch": 2.133445945945946, "grad_norm": 1.113434701211004, "learning_rate": 2.3339151119618865e-06, "loss": 0.3409, "step": 22734 }, { "epoch": 2.13353978978979, "grad_norm": 1.073592470431085, "learning_rate": 2.3334532530695888e-06, "loss": 0.3023, "step": 22735 }, { "epoch": 2.1336336336336337, "grad_norm": 2.3476562125765246, "learning_rate": 2.3329914259708404e-06, "loss": 0.3248, "step": 22736 }, { "epoch": 2.1337274774774775, "grad_norm": 1.0635306020865785, "learning_rate": 2.332529630671151e-06, "loss": 0.2836, "step": 22737 }, { "epoch": 2.1338213213213213, "grad_norm": 1.0500528091247279, "learning_rate": 2.3320678671760246e-06, "loss": 0.3056, "step": 22738 }, { "epoch": 2.133915165165165, "grad_norm": 1.4783464844882197, "learning_rate": 2.3316061354909655e-06, "loss": 0.3433, "step": 22739 }, { "epoch": 2.134009009009009, "grad_norm": 1.2182095644068143, "learning_rate": 2.331144435621482e-06, "loss": 0.3341, "step": 22740 }, { "epoch": 2.1341028528528527, "grad_norm": 1.2240889781550328, "learning_rate": 2.3306827675730773e-06, "loss": 0.3087, "step": 22741 }, { "epoch": 2.1341966966966965, "grad_norm": 1.0284387766525611, "learning_rate": 2.3302211313512563e-06, "loss": 0.3009, "step": 22742 }, { "epoch": 2.1342905405405403, "grad_norm": 1.0223248324026515, "learning_rate": 2.329759526961523e-06, "loss": 0.2951, "step": 22743 }, { "epoch": 2.1343843843843846, "grad_norm": 1.0212502955692708, "learning_rate": 2.3292979544093815e-06, "loss": 0.2896, "step": 22744 }, { "epoch": 2.1344782282282284, "grad_norm": 0.992490327031262, "learning_rate": 2.3288364137003327e-06, "loss": 0.3435, "step": 22745 }, { "epoch": 2.1345720720720722, "grad_norm": 1.114754817524945, "learning_rate": 2.3283749048398836e-06, "loss": 0.3006, "step": 22746 }, { "epoch": 2.134665915915916, "grad_norm": 1.094978445045334, "learning_rate": 2.3279134278335345e-06, "loss": 0.3108, "step": 22747 }, { "epoch": 2.13475975975976, "grad_norm": 1.0615768758635145, "learning_rate": 2.327451982686787e-06, "loss": 0.305, "step": 22748 }, { "epoch": 2.1348536036036037, "grad_norm": 1.046792206638887, "learning_rate": 2.3269905694051455e-06, "loss": 0.3463, "step": 22749 }, { "epoch": 2.1349474474474475, "grad_norm": 1.2270134684071412, "learning_rate": 2.32652918799411e-06, "loss": 0.3189, "step": 22750 }, { "epoch": 2.1350412912912913, "grad_norm": 1.11124305047528, "learning_rate": 2.32606783845918e-06, "loss": 0.349, "step": 22751 }, { "epoch": 2.135135135135135, "grad_norm": 1.1779844316290773, "learning_rate": 2.3256065208058593e-06, "loss": 0.3124, "step": 22752 }, { "epoch": 2.135228978978979, "grad_norm": 0.978637358478629, "learning_rate": 2.3251452350396473e-06, "loss": 0.3574, "step": 22753 }, { "epoch": 2.1353228228228227, "grad_norm": 1.0270814486800988, "learning_rate": 2.324683981166043e-06, "loss": 0.3071, "step": 22754 }, { "epoch": 2.1354166666666665, "grad_norm": 1.0980845276593663, "learning_rate": 2.324222759190547e-06, "loss": 0.3441, "step": 22755 }, { "epoch": 2.1355105105105103, "grad_norm": 1.135205170474318, "learning_rate": 2.323761569118658e-06, "loss": 0.3052, "step": 22756 }, { "epoch": 2.1356043543543546, "grad_norm": 1.0646149658759771, "learning_rate": 2.3233004109558723e-06, "loss": 0.2911, "step": 22757 }, { "epoch": 2.1356981981981984, "grad_norm": 1.1103188891274358, "learning_rate": 2.322839284707693e-06, "loss": 0.3048, "step": 22758 }, { "epoch": 2.135792042042042, "grad_norm": 1.1777671271555983, "learning_rate": 2.322378190379616e-06, "loss": 0.3288, "step": 22759 }, { "epoch": 2.135885885885886, "grad_norm": 1.0974725697585046, "learning_rate": 2.321917127977137e-06, "loss": 0.3069, "step": 22760 }, { "epoch": 2.13597972972973, "grad_norm": 1.0691007148243115, "learning_rate": 2.3214560975057575e-06, "loss": 0.272, "step": 22761 }, { "epoch": 2.1360735735735736, "grad_norm": 1.1126233566758563, "learning_rate": 2.3209950989709718e-06, "loss": 0.3138, "step": 22762 }, { "epoch": 2.1361674174174174, "grad_norm": 1.3225583775715926, "learning_rate": 2.320534132378275e-06, "loss": 0.3325, "step": 22763 }, { "epoch": 2.1362612612612613, "grad_norm": 1.202629330241129, "learning_rate": 2.320073197733167e-06, "loss": 0.3076, "step": 22764 }, { "epoch": 2.136355105105105, "grad_norm": 0.9523847733678857, "learning_rate": 2.319612295041142e-06, "loss": 0.3124, "step": 22765 }, { "epoch": 2.136448948948949, "grad_norm": 1.1027114920415446, "learning_rate": 2.3191514243076946e-06, "loss": 0.2718, "step": 22766 }, { "epoch": 2.1365427927927927, "grad_norm": 1.2149648259192742, "learning_rate": 2.3186905855383207e-06, "loss": 0.3037, "step": 22767 }, { "epoch": 2.1366366366366365, "grad_norm": 1.6350344263535594, "learning_rate": 2.3182297787385144e-06, "loss": 0.3699, "step": 22768 }, { "epoch": 2.1367304804804803, "grad_norm": 1.082856639993081, "learning_rate": 2.317769003913768e-06, "loss": 0.3436, "step": 22769 }, { "epoch": 2.136824324324324, "grad_norm": 1.1848977514465848, "learning_rate": 2.3173082610695796e-06, "loss": 0.3285, "step": 22770 }, { "epoch": 2.1369181681681684, "grad_norm": 1.0576984326485142, "learning_rate": 2.31684755021144e-06, "loss": 0.3145, "step": 22771 }, { "epoch": 2.137012012012012, "grad_norm": 1.0171842012045784, "learning_rate": 2.316386871344842e-06, "loss": 0.3552, "step": 22772 }, { "epoch": 2.137105855855856, "grad_norm": 0.9034969749661117, "learning_rate": 2.31592622447528e-06, "loss": 0.2689, "step": 22773 }, { "epoch": 2.1371996996997, "grad_norm": 1.1166759670844848, "learning_rate": 2.315465609608246e-06, "loss": 0.3294, "step": 22774 }, { "epoch": 2.1372935435435436, "grad_norm": 0.9712785646589299, "learning_rate": 2.3150050267492295e-06, "loss": 0.295, "step": 22775 }, { "epoch": 2.1373873873873874, "grad_norm": 6.070560654010039, "learning_rate": 2.3145444759037262e-06, "loss": 0.3215, "step": 22776 }, { "epoch": 2.1374812312312312, "grad_norm": 1.049208370681451, "learning_rate": 2.314083957077225e-06, "loss": 0.309, "step": 22777 }, { "epoch": 2.137575075075075, "grad_norm": 0.9931235087006883, "learning_rate": 2.313623470275217e-06, "loss": 0.3058, "step": 22778 }, { "epoch": 2.137668918918919, "grad_norm": 1.0315340531905466, "learning_rate": 2.313163015503192e-06, "loss": 0.3006, "step": 22779 }, { "epoch": 2.1377627627627627, "grad_norm": 1.0708213484889435, "learning_rate": 2.3127025927666415e-06, "loss": 0.3242, "step": 22780 }, { "epoch": 2.1378566066066065, "grad_norm": 0.9879730792405504, "learning_rate": 2.3122422020710517e-06, "loss": 0.2871, "step": 22781 }, { "epoch": 2.1379504504504503, "grad_norm": 1.0069855832772903, "learning_rate": 2.3117818434219164e-06, "loss": 0.3225, "step": 22782 }, { "epoch": 2.138044294294294, "grad_norm": 1.1504283776352968, "learning_rate": 2.311321516824723e-06, "loss": 0.313, "step": 22783 }, { "epoch": 2.138138138138138, "grad_norm": 0.9977553543042581, "learning_rate": 2.310861222284957e-06, "loss": 0.2929, "step": 22784 }, { "epoch": 2.138231981981982, "grad_norm": 1.1765350938970625, "learning_rate": 2.310400959808112e-06, "loss": 0.2686, "step": 22785 }, { "epoch": 2.138325825825826, "grad_norm": 1.138723212886717, "learning_rate": 2.3099407293996716e-06, "loss": 0.2838, "step": 22786 }, { "epoch": 2.13841966966967, "grad_norm": 1.222604731866697, "learning_rate": 2.3094805310651236e-06, "loss": 0.3638, "step": 22787 }, { "epoch": 2.1385135135135136, "grad_norm": 1.2148118887951165, "learning_rate": 2.3090203648099575e-06, "loss": 0.3165, "step": 22788 }, { "epoch": 2.1386073573573574, "grad_norm": 1.150290376080228, "learning_rate": 2.3085602306396583e-06, "loss": 0.3303, "step": 22789 }, { "epoch": 2.138701201201201, "grad_norm": 1.2074081922297317, "learning_rate": 2.3081001285597105e-06, "loss": 0.3385, "step": 22790 }, { "epoch": 2.138795045045045, "grad_norm": 1.294196511922568, "learning_rate": 2.3076400585756055e-06, "loss": 0.3282, "step": 22791 }, { "epoch": 2.138888888888889, "grad_norm": 0.9844056579016524, "learning_rate": 2.3071800206928226e-06, "loss": 0.3119, "step": 22792 }, { "epoch": 2.1389827327327327, "grad_norm": 1.0581433821025872, "learning_rate": 2.3067200149168478e-06, "loss": 0.364, "step": 22793 }, { "epoch": 2.1390765765765765, "grad_norm": 1.4442472575817384, "learning_rate": 2.306260041253169e-06, "loss": 0.3328, "step": 22794 }, { "epoch": 2.1391704204204203, "grad_norm": 0.9918745573763441, "learning_rate": 2.3058000997072683e-06, "loss": 0.3414, "step": 22795 }, { "epoch": 2.139264264264264, "grad_norm": 1.055173181797805, "learning_rate": 2.305340190284629e-06, "loss": 0.3145, "step": 22796 }, { "epoch": 2.139358108108108, "grad_norm": 1.2061934224282553, "learning_rate": 2.304880312990737e-06, "loss": 0.315, "step": 22797 }, { "epoch": 2.139451951951952, "grad_norm": 1.2209758531691015, "learning_rate": 2.3044204678310746e-06, "loss": 0.3365, "step": 22798 }, { "epoch": 2.139545795795796, "grad_norm": 1.041056199865221, "learning_rate": 2.3039606548111222e-06, "loss": 0.3418, "step": 22799 }, { "epoch": 2.1396396396396398, "grad_norm": 1.1898621173898674, "learning_rate": 2.3035008739363663e-06, "loss": 0.3003, "step": 22800 }, { "epoch": 2.1397334834834836, "grad_norm": 1.0524997602183201, "learning_rate": 2.3030411252122866e-06, "loss": 0.3048, "step": 22801 }, { "epoch": 2.1398273273273274, "grad_norm": 1.058589507866208, "learning_rate": 2.302581408644363e-06, "loss": 0.2871, "step": 22802 }, { "epoch": 2.139921171171171, "grad_norm": 0.9798741885895452, "learning_rate": 2.302121724238081e-06, "loss": 0.3048, "step": 22803 }, { "epoch": 2.140015015015015, "grad_norm": 0.9992503286990978, "learning_rate": 2.301662071998919e-06, "loss": 0.2855, "step": 22804 }, { "epoch": 2.140108858858859, "grad_norm": 1.4921148651399738, "learning_rate": 2.3012024519323577e-06, "loss": 0.3396, "step": 22805 }, { "epoch": 2.1402027027027026, "grad_norm": 1.1798360943212176, "learning_rate": 2.300742864043877e-06, "loss": 0.3541, "step": 22806 }, { "epoch": 2.1402965465465464, "grad_norm": 1.2181697221866716, "learning_rate": 2.300283308338957e-06, "loss": 0.3443, "step": 22807 }, { "epoch": 2.1403903903903903, "grad_norm": 1.1002763862989395, "learning_rate": 2.2998237848230753e-06, "loss": 0.334, "step": 22808 }, { "epoch": 2.140484234234234, "grad_norm": 1.1443334606742441, "learning_rate": 2.2993642935017136e-06, "loss": 0.3013, "step": 22809 }, { "epoch": 2.140578078078078, "grad_norm": 0.9115066753729033, "learning_rate": 2.29890483438035e-06, "loss": 0.3367, "step": 22810 }, { "epoch": 2.140671921921922, "grad_norm": 1.7864918438602544, "learning_rate": 2.2984454074644595e-06, "loss": 0.3135, "step": 22811 }, { "epoch": 2.140765765765766, "grad_norm": 1.3465297450827836, "learning_rate": 2.297986012759524e-06, "loss": 0.2471, "step": 22812 }, { "epoch": 2.1408596096096097, "grad_norm": 1.115240121636451, "learning_rate": 2.2975266502710204e-06, "loss": 0.3328, "step": 22813 }, { "epoch": 2.1409534534534536, "grad_norm": 1.2814574007812392, "learning_rate": 2.297067320004422e-06, "loss": 0.3255, "step": 22814 }, { "epoch": 2.1410472972972974, "grad_norm": 1.0701792508915533, "learning_rate": 2.2966080219652097e-06, "loss": 0.3463, "step": 22815 }, { "epoch": 2.141141141141141, "grad_norm": 0.96519163642922, "learning_rate": 2.2961487561588587e-06, "loss": 0.3256, "step": 22816 }, { "epoch": 2.141234984984985, "grad_norm": 1.0882336408065478, "learning_rate": 2.295689522590844e-06, "loss": 0.3227, "step": 22817 }, { "epoch": 2.141328828828829, "grad_norm": 1.2701372974986702, "learning_rate": 2.2952303212666416e-06, "loss": 0.3027, "step": 22818 }, { "epoch": 2.1414226726726726, "grad_norm": 0.99142329939559, "learning_rate": 2.294771152191726e-06, "loss": 0.3305, "step": 22819 }, { "epoch": 2.1415165165165164, "grad_norm": 1.1588048658097394, "learning_rate": 2.2943120153715708e-06, "loss": 0.3638, "step": 22820 }, { "epoch": 2.1416103603603602, "grad_norm": 1.349381300720115, "learning_rate": 2.2938529108116535e-06, "loss": 0.2549, "step": 22821 }, { "epoch": 2.141704204204204, "grad_norm": 0.9795289308285275, "learning_rate": 2.293393838517447e-06, "loss": 0.3407, "step": 22822 }, { "epoch": 2.141798048048048, "grad_norm": 0.9222686170036812, "learning_rate": 2.2929347984944224e-06, "loss": 0.3172, "step": 22823 }, { "epoch": 2.141891891891892, "grad_norm": 1.045558737507498, "learning_rate": 2.292475790748056e-06, "loss": 0.3233, "step": 22824 }, { "epoch": 2.141985735735736, "grad_norm": 1.0018792886968724, "learning_rate": 2.2920168152838195e-06, "loss": 0.2834, "step": 22825 }, { "epoch": 2.1420795795795797, "grad_norm": 1.1956701714646902, "learning_rate": 2.2915578721071834e-06, "loss": 0.3408, "step": 22826 }, { "epoch": 2.1421734234234235, "grad_norm": 1.240292821541345, "learning_rate": 2.291098961223624e-06, "loss": 0.2642, "step": 22827 }, { "epoch": 2.1422672672672673, "grad_norm": 1.0594835704086154, "learning_rate": 2.29064008263861e-06, "loss": 0.3553, "step": 22828 }, { "epoch": 2.142361111111111, "grad_norm": 1.0986833350212646, "learning_rate": 2.2901812363576137e-06, "loss": 0.3261, "step": 22829 }, { "epoch": 2.142454954954955, "grad_norm": 1.1180091960988996, "learning_rate": 2.289722422386105e-06, "loss": 0.3541, "step": 22830 }, { "epoch": 2.142548798798799, "grad_norm": 1.2407891926663863, "learning_rate": 2.289263640729555e-06, "loss": 0.3136, "step": 22831 }, { "epoch": 2.1426426426426426, "grad_norm": 1.1184501437714656, "learning_rate": 2.288804891393431e-06, "loss": 0.2908, "step": 22832 }, { "epoch": 2.1427364864864864, "grad_norm": 1.294176132355605, "learning_rate": 2.288346174383208e-06, "loss": 0.2976, "step": 22833 }, { "epoch": 2.14283033033033, "grad_norm": 1.2299980183664465, "learning_rate": 2.2878874897043528e-06, "loss": 0.3046, "step": 22834 }, { "epoch": 2.142924174174174, "grad_norm": 1.0750512312748401, "learning_rate": 2.287428837362332e-06, "loss": 0.3293, "step": 22835 }, { "epoch": 2.143018018018018, "grad_norm": 2.4674360966886, "learning_rate": 2.286970217362619e-06, "loss": 0.3529, "step": 22836 }, { "epoch": 2.143111861861862, "grad_norm": 1.0980565754376876, "learning_rate": 2.2865116297106786e-06, "loss": 0.3104, "step": 22837 }, { "epoch": 2.143205705705706, "grad_norm": 1.3603290414320575, "learning_rate": 2.286053074411978e-06, "loss": 0.3379, "step": 22838 }, { "epoch": 2.1432995495495497, "grad_norm": 0.9780110144381323, "learning_rate": 2.2855945514719875e-06, "loss": 0.3302, "step": 22839 }, { "epoch": 2.1433933933933935, "grad_norm": 1.165142302697868, "learning_rate": 2.285136060896173e-06, "loss": 0.3379, "step": 22840 }, { "epoch": 2.1434872372372373, "grad_norm": 1.1016501212400365, "learning_rate": 2.284677602690001e-06, "loss": 0.2961, "step": 22841 }, { "epoch": 2.143581081081081, "grad_norm": 1.0039828331167346, "learning_rate": 2.2842191768589365e-06, "loss": 0.3223, "step": 22842 }, { "epoch": 2.143674924924925, "grad_norm": 1.044309301413084, "learning_rate": 2.2837607834084475e-06, "loss": 0.2835, "step": 22843 }, { "epoch": 2.1437687687687688, "grad_norm": 1.2011221062286075, "learning_rate": 2.283302422343996e-06, "loss": 0.3676, "step": 22844 }, { "epoch": 2.1438626126126126, "grad_norm": 1.1878116079994914, "learning_rate": 2.282844093671051e-06, "loss": 0.3394, "step": 22845 }, { "epoch": 2.1439564564564564, "grad_norm": 1.4193339960849494, "learning_rate": 2.2823857973950756e-06, "loss": 0.3008, "step": 22846 }, { "epoch": 2.1440503003003, "grad_norm": 1.196120126225907, "learning_rate": 2.281927533521533e-06, "loss": 0.3231, "step": 22847 }, { "epoch": 2.144144144144144, "grad_norm": 1.0726994435886341, "learning_rate": 2.2814693020558897e-06, "loss": 0.3161, "step": 22848 }, { "epoch": 2.144237987987988, "grad_norm": 1.233028959178222, "learning_rate": 2.2810111030036088e-06, "loss": 0.3625, "step": 22849 }, { "epoch": 2.1443318318318316, "grad_norm": 1.1608530987744294, "learning_rate": 2.2805529363701494e-06, "loss": 0.3115, "step": 22850 }, { "epoch": 2.144425675675676, "grad_norm": 1.1853417852943156, "learning_rate": 2.28009480216098e-06, "loss": 0.3386, "step": 22851 }, { "epoch": 2.1445195195195197, "grad_norm": 1.1711457496709554, "learning_rate": 2.27963670038156e-06, "loss": 0.3073, "step": 22852 }, { "epoch": 2.1446133633633635, "grad_norm": 1.100279752848172, "learning_rate": 2.279178631037352e-06, "loss": 0.3069, "step": 22853 }, { "epoch": 2.1447072072072073, "grad_norm": 1.1663064595734352, "learning_rate": 2.2787205941338175e-06, "loss": 0.3448, "step": 22854 }, { "epoch": 2.144801051051051, "grad_norm": 1.0509640902270898, "learning_rate": 2.2782625896764176e-06, "loss": 0.3071, "step": 22855 }, { "epoch": 2.144894894894895, "grad_norm": 4.443548291423958, "learning_rate": 2.277804617670611e-06, "loss": 0.3472, "step": 22856 }, { "epoch": 2.1449887387387387, "grad_norm": 1.102403135653155, "learning_rate": 2.277346678121862e-06, "loss": 0.3186, "step": 22857 }, { "epoch": 2.1450825825825826, "grad_norm": 1.9001077517433145, "learning_rate": 2.27688877103563e-06, "loss": 0.3231, "step": 22858 }, { "epoch": 2.1451764264264264, "grad_norm": 1.0546814381578618, "learning_rate": 2.276430896417371e-06, "loss": 0.2872, "step": 22859 }, { "epoch": 2.14527027027027, "grad_norm": 1.0450172116090903, "learning_rate": 2.2759730542725487e-06, "loss": 0.3663, "step": 22860 }, { "epoch": 2.145364114114114, "grad_norm": 1.2762701389963118, "learning_rate": 2.2755152446066207e-06, "loss": 0.3535, "step": 22861 }, { "epoch": 2.145457957957958, "grad_norm": 1.0384470529856824, "learning_rate": 2.2750574674250437e-06, "loss": 0.2714, "step": 22862 }, { "epoch": 2.1455518018018016, "grad_norm": 1.3797043141783683, "learning_rate": 2.2745997227332784e-06, "loss": 0.3445, "step": 22863 }, { "epoch": 2.1456456456456454, "grad_norm": 1.2547143148435325, "learning_rate": 2.2741420105367814e-06, "loss": 0.3281, "step": 22864 }, { "epoch": 2.1457394894894897, "grad_norm": 1.1134322804359593, "learning_rate": 2.27368433084101e-06, "loss": 0.2723, "step": 22865 }, { "epoch": 2.1458333333333335, "grad_norm": 2.2347352154260194, "learning_rate": 2.2732266836514217e-06, "loss": 0.3523, "step": 22866 }, { "epoch": 2.1459271771771773, "grad_norm": 1.1361888296277955, "learning_rate": 2.272769068973472e-06, "loss": 0.301, "step": 22867 }, { "epoch": 2.146021021021021, "grad_norm": 1.100556704265204, "learning_rate": 2.272311486812616e-06, "loss": 0.3243, "step": 22868 }, { "epoch": 2.146114864864865, "grad_norm": 1.1359512634395643, "learning_rate": 2.2718539371743133e-06, "loss": 0.3034, "step": 22869 }, { "epoch": 2.1462087087087087, "grad_norm": 1.1493571845165087, "learning_rate": 2.2713964200640163e-06, "loss": 0.2992, "step": 22870 }, { "epoch": 2.1463025525525525, "grad_norm": 1.4653269049607478, "learning_rate": 2.2709389354871803e-06, "loss": 0.2938, "step": 22871 }, { "epoch": 2.1463963963963963, "grad_norm": 1.4169803347471013, "learning_rate": 2.2704814834492616e-06, "loss": 0.2964, "step": 22872 }, { "epoch": 2.14649024024024, "grad_norm": 1.0358560088318418, "learning_rate": 2.2700240639557136e-06, "loss": 0.3216, "step": 22873 }, { "epoch": 2.146584084084084, "grad_norm": 1.1332548840945125, "learning_rate": 2.2695666770119885e-06, "loss": 0.3039, "step": 22874 }, { "epoch": 2.1466779279279278, "grad_norm": 1.3365297029505119, "learning_rate": 2.269109322623542e-06, "loss": 0.2944, "step": 22875 }, { "epoch": 2.1467717717717716, "grad_norm": 1.0161452988635273, "learning_rate": 2.268652000795828e-06, "loss": 0.3189, "step": 22876 }, { "epoch": 2.1468656156156154, "grad_norm": 1.1812809552296066, "learning_rate": 2.2681947115342967e-06, "loss": 0.3046, "step": 22877 }, { "epoch": 2.1469594594594597, "grad_norm": 1.0292951279612341, "learning_rate": 2.2677374548444012e-06, "loss": 0.363, "step": 22878 }, { "epoch": 2.1470533033033035, "grad_norm": 1.1462248542669573, "learning_rate": 2.2672802307315946e-06, "loss": 0.2864, "step": 22879 }, { "epoch": 2.1471471471471473, "grad_norm": 1.4426632684129441, "learning_rate": 2.2668230392013248e-06, "loss": 0.3046, "step": 22880 }, { "epoch": 2.147240990990991, "grad_norm": 1.1992312893967976, "learning_rate": 2.266365880259047e-06, "loss": 0.3604, "step": 22881 }, { "epoch": 2.147334834834835, "grad_norm": 1.0389164876251755, "learning_rate": 2.2659087539102115e-06, "loss": 0.3401, "step": 22882 }, { "epoch": 2.1474286786786787, "grad_norm": 1.011001411034781, "learning_rate": 2.2654516601602656e-06, "loss": 0.2649, "step": 22883 }, { "epoch": 2.1475225225225225, "grad_norm": 1.1580840465498403, "learning_rate": 2.2649945990146625e-06, "loss": 0.2785, "step": 22884 }, { "epoch": 2.1476163663663663, "grad_norm": 1.1158025443585458, "learning_rate": 2.264537570478852e-06, "loss": 0.3156, "step": 22885 }, { "epoch": 2.14771021021021, "grad_norm": 0.9846294929279534, "learning_rate": 2.2640805745582795e-06, "loss": 0.3086, "step": 22886 }, { "epoch": 2.147804054054054, "grad_norm": 1.0617245528292907, "learning_rate": 2.2636236112583976e-06, "loss": 0.322, "step": 22887 }, { "epoch": 2.1478978978978978, "grad_norm": 1.0892038070764403, "learning_rate": 2.2631666805846544e-06, "loss": 0.2859, "step": 22888 }, { "epoch": 2.1479917417417416, "grad_norm": 1.507909149173781, "learning_rate": 2.2627097825424967e-06, "loss": 0.3189, "step": 22889 }, { "epoch": 2.1480855855855854, "grad_norm": 1.1614302841664086, "learning_rate": 2.262252917137372e-06, "loss": 0.3309, "step": 22890 }, { "epoch": 2.1481794294294296, "grad_norm": 1.2541664268100432, "learning_rate": 2.2617960843747285e-06, "loss": 0.3184, "step": 22891 }, { "epoch": 2.1482732732732734, "grad_norm": 1.4171398586151729, "learning_rate": 2.26133928426001e-06, "loss": 0.3401, "step": 22892 }, { "epoch": 2.1483671171171173, "grad_norm": 1.1283267674442223, "learning_rate": 2.260882516798668e-06, "loss": 0.3509, "step": 22893 }, { "epoch": 2.148460960960961, "grad_norm": 2.478192283326732, "learning_rate": 2.2604257819961457e-06, "loss": 0.3435, "step": 22894 }, { "epoch": 2.148554804804805, "grad_norm": 1.2657633877645387, "learning_rate": 2.259969079857888e-06, "loss": 0.3, "step": 22895 }, { "epoch": 2.1486486486486487, "grad_norm": 0.9959854991271803, "learning_rate": 2.259512410389343e-06, "loss": 0.3101, "step": 22896 }, { "epoch": 2.1487424924924925, "grad_norm": 1.1190025036898177, "learning_rate": 2.2590557735959534e-06, "loss": 0.3132, "step": 22897 }, { "epoch": 2.1488363363363363, "grad_norm": 1.4185270763669546, "learning_rate": 2.2585991694831634e-06, "loss": 0.3115, "step": 22898 }, { "epoch": 2.14893018018018, "grad_norm": 1.1666573677589656, "learning_rate": 2.2581425980564198e-06, "loss": 0.3415, "step": 22899 }, { "epoch": 2.149024024024024, "grad_norm": 1.0197035723744574, "learning_rate": 2.2576860593211648e-06, "loss": 0.3294, "step": 22900 }, { "epoch": 2.1491178678678677, "grad_norm": 0.9688812740436428, "learning_rate": 2.2572295532828414e-06, "loss": 0.2778, "step": 22901 }, { "epoch": 2.1492117117117115, "grad_norm": 1.0222427780426755, "learning_rate": 2.256773079946893e-06, "loss": 0.2961, "step": 22902 }, { "epoch": 2.1493055555555554, "grad_norm": 1.1365483096650044, "learning_rate": 2.256316639318762e-06, "loss": 0.3221, "step": 22903 }, { "epoch": 2.1493993993993996, "grad_norm": 0.9636636226605599, "learning_rate": 2.2558602314038887e-06, "loss": 0.3246, "step": 22904 }, { "epoch": 2.1494932432432434, "grad_norm": 1.1569682301875792, "learning_rate": 2.2554038562077184e-06, "loss": 0.3357, "step": 22905 }, { "epoch": 2.1495870870870872, "grad_norm": 1.0664820234826964, "learning_rate": 2.2549475137356914e-06, "loss": 0.3333, "step": 22906 }, { "epoch": 2.149680930930931, "grad_norm": 1.1784726356531625, "learning_rate": 2.2544912039932466e-06, "loss": 0.3112, "step": 22907 }, { "epoch": 2.149774774774775, "grad_norm": 1.3793856710037993, "learning_rate": 2.2540349269858276e-06, "loss": 0.3135, "step": 22908 }, { "epoch": 2.1498686186186187, "grad_norm": 1.2139132865484112, "learning_rate": 2.2535786827188738e-06, "loss": 0.3464, "step": 22909 }, { "epoch": 2.1499624624624625, "grad_norm": 1.0897377496239093, "learning_rate": 2.2531224711978224e-06, "loss": 0.2805, "step": 22910 }, { "epoch": 2.1500563063063063, "grad_norm": 1.526201613591956, "learning_rate": 2.2526662924281173e-06, "loss": 0.3037, "step": 22911 }, { "epoch": 2.15015015015015, "grad_norm": 0.9620823767026301, "learning_rate": 2.252210146415195e-06, "loss": 0.3637, "step": 22912 }, { "epoch": 2.150243993993994, "grad_norm": 1.1066230450617174, "learning_rate": 2.2517540331644927e-06, "loss": 0.3167, "step": 22913 }, { "epoch": 2.1503378378378377, "grad_norm": 1.3190951577905086, "learning_rate": 2.2512979526814542e-06, "loss": 0.3231, "step": 22914 }, { "epoch": 2.1504316816816815, "grad_norm": 1.118583941167924, "learning_rate": 2.2508419049715117e-06, "loss": 0.3195, "step": 22915 }, { "epoch": 2.1505255255255253, "grad_norm": 1.2825737315413794, "learning_rate": 2.250385890040104e-06, "loss": 0.2913, "step": 22916 }, { "epoch": 2.1506193693693696, "grad_norm": 1.1493805474333019, "learning_rate": 2.2499299078926694e-06, "loss": 0.3593, "step": 22917 }, { "epoch": 2.1507132132132134, "grad_norm": 1.2970158445511486, "learning_rate": 2.2494739585346447e-06, "loss": 0.3391, "step": 22918 }, { "epoch": 2.150807057057057, "grad_norm": 1.1266118833164083, "learning_rate": 2.2490180419714643e-06, "loss": 0.282, "step": 22919 }, { "epoch": 2.150900900900901, "grad_norm": 0.9981516057119796, "learning_rate": 2.2485621582085663e-06, "loss": 0.2877, "step": 22920 }, { "epoch": 2.150994744744745, "grad_norm": 1.542765508697204, "learning_rate": 2.2481063072513865e-06, "loss": 0.2816, "step": 22921 }, { "epoch": 2.1510885885885886, "grad_norm": 1.2190123320056874, "learning_rate": 2.247650489105357e-06, "loss": 0.2885, "step": 22922 }, { "epoch": 2.1511824324324325, "grad_norm": 1.0552716178275807, "learning_rate": 2.2471947037759163e-06, "loss": 0.3211, "step": 22923 }, { "epoch": 2.1512762762762763, "grad_norm": 1.0732104208368691, "learning_rate": 2.246738951268497e-06, "loss": 0.281, "step": 22924 }, { "epoch": 2.15137012012012, "grad_norm": 1.3126623011553313, "learning_rate": 2.246283231588532e-06, "loss": 0.3266, "step": 22925 }, { "epoch": 2.151463963963964, "grad_norm": 1.0789836672510458, "learning_rate": 2.245827544741457e-06, "loss": 0.3489, "step": 22926 }, { "epoch": 2.1515578078078077, "grad_norm": 1.2781664124298393, "learning_rate": 2.2453718907327055e-06, "loss": 0.3249, "step": 22927 }, { "epoch": 2.1516516516516515, "grad_norm": 0.9538877400727177, "learning_rate": 2.2449162695677085e-06, "loss": 0.3132, "step": 22928 }, { "epoch": 2.1517454954954953, "grad_norm": 1.3122672674581557, "learning_rate": 2.2444606812518994e-06, "loss": 0.3244, "step": 22929 }, { "epoch": 2.1518393393393396, "grad_norm": 1.0206999167347373, "learning_rate": 2.24400512579071e-06, "loss": 0.2775, "step": 22930 }, { "epoch": 2.1519331831831834, "grad_norm": 1.2083032076379214, "learning_rate": 2.243549603189569e-06, "loss": 0.303, "step": 22931 }, { "epoch": 2.152027027027027, "grad_norm": 1.2636484172106264, "learning_rate": 2.243094113453913e-06, "loss": 0.3054, "step": 22932 }, { "epoch": 2.152120870870871, "grad_norm": 1.211402339371858, "learning_rate": 2.2426386565891707e-06, "loss": 0.3163, "step": 22933 }, { "epoch": 2.152214714714715, "grad_norm": 1.1190639428668467, "learning_rate": 2.2421832326007696e-06, "loss": 0.3208, "step": 22934 }, { "epoch": 2.1523085585585586, "grad_norm": 1.1831176041140674, "learning_rate": 2.241727841494145e-06, "loss": 0.2907, "step": 22935 }, { "epoch": 2.1524024024024024, "grad_norm": 1.1504229781794066, "learning_rate": 2.2412724832747236e-06, "loss": 0.3176, "step": 22936 }, { "epoch": 2.1524962462462462, "grad_norm": 1.0947413534646029, "learning_rate": 2.2408171579479333e-06, "loss": 0.285, "step": 22937 }, { "epoch": 2.15259009009009, "grad_norm": 0.9521790199619646, "learning_rate": 2.2403618655192064e-06, "loss": 0.3072, "step": 22938 }, { "epoch": 2.152683933933934, "grad_norm": 1.0604111944661658, "learning_rate": 2.23990660599397e-06, "loss": 0.3121, "step": 22939 }, { "epoch": 2.1527777777777777, "grad_norm": 1.004251879998671, "learning_rate": 2.2394513793776524e-06, "loss": 0.2945, "step": 22940 }, { "epoch": 2.1528716216216215, "grad_norm": 1.1905663282363055, "learning_rate": 2.2389961856756804e-06, "loss": 0.3132, "step": 22941 }, { "epoch": 2.1529654654654653, "grad_norm": 1.3347239444387757, "learning_rate": 2.2385410248934813e-06, "loss": 0.3058, "step": 22942 }, { "epoch": 2.153059309309309, "grad_norm": 1.6150978684744968, "learning_rate": 2.2380858970364815e-06, "loss": 0.2825, "step": 22943 }, { "epoch": 2.153153153153153, "grad_norm": 1.048268771978969, "learning_rate": 2.23763080211011e-06, "loss": 0.3297, "step": 22944 }, { "epoch": 2.153246996996997, "grad_norm": 1.0177833511069083, "learning_rate": 2.237175740119792e-06, "loss": 0.2801, "step": 22945 }, { "epoch": 2.153340840840841, "grad_norm": 1.927755565096389, "learning_rate": 2.236720711070951e-06, "loss": 0.3223, "step": 22946 }, { "epoch": 2.153434684684685, "grad_norm": 1.3060350478803275, "learning_rate": 2.236265714969016e-06, "loss": 0.3114, "step": 22947 }, { "epoch": 2.1535285285285286, "grad_norm": 1.26470092278585, "learning_rate": 2.2358107518194095e-06, "loss": 0.2931, "step": 22948 }, { "epoch": 2.1536223723723724, "grad_norm": 1.0578243099583373, "learning_rate": 2.2353558216275557e-06, "loss": 0.3091, "step": 22949 }, { "epoch": 2.1537162162162162, "grad_norm": 1.0994825591525363, "learning_rate": 2.2349009243988813e-06, "loss": 0.3628, "step": 22950 }, { "epoch": 2.15381006006006, "grad_norm": 1.3158634529276982, "learning_rate": 2.2344460601388083e-06, "loss": 0.2989, "step": 22951 }, { "epoch": 2.153903903903904, "grad_norm": 1.0309161517039525, "learning_rate": 2.233991228852761e-06, "loss": 0.3077, "step": 22952 }, { "epoch": 2.1539977477477477, "grad_norm": 1.3691574953203989, "learning_rate": 2.2335364305461614e-06, "loss": 0.3494, "step": 22953 }, { "epoch": 2.1540915915915915, "grad_norm": 1.1544343607630971, "learning_rate": 2.233081665224433e-06, "loss": 0.3051, "step": 22954 }, { "epoch": 2.1541854354354353, "grad_norm": 1.23206274586307, "learning_rate": 2.232626932892995e-06, "loss": 0.3295, "step": 22955 }, { "epoch": 2.154279279279279, "grad_norm": 1.1529991569780695, "learning_rate": 2.2321722335572744e-06, "loss": 0.3515, "step": 22956 }, { "epoch": 2.154373123123123, "grad_norm": 1.0909583828572664, "learning_rate": 2.2317175672226892e-06, "loss": 0.3161, "step": 22957 }, { "epoch": 2.154466966966967, "grad_norm": 1.1631488741892115, "learning_rate": 2.23126293389466e-06, "loss": 0.2859, "step": 22958 }, { "epoch": 2.154560810810811, "grad_norm": 1.1928537951541929, "learning_rate": 2.2308083335786107e-06, "loss": 0.3261, "step": 22959 }, { "epoch": 2.1546546546546548, "grad_norm": 1.0357246858746714, "learning_rate": 2.230353766279959e-06, "loss": 0.3299, "step": 22960 }, { "epoch": 2.1547484984984986, "grad_norm": 1.002113254212446, "learning_rate": 2.2298992320041236e-06, "loss": 0.3065, "step": 22961 }, { "epoch": 2.1548423423423424, "grad_norm": 1.0736235324051329, "learning_rate": 2.2294447307565275e-06, "loss": 0.3135, "step": 22962 }, { "epoch": 2.154936186186186, "grad_norm": 1.0881802650016408, "learning_rate": 2.2289902625425883e-06, "loss": 0.348, "step": 22963 }, { "epoch": 2.15503003003003, "grad_norm": 1.0700205385192814, "learning_rate": 2.2285358273677245e-06, "loss": 0.3596, "step": 22964 }, { "epoch": 2.155123873873874, "grad_norm": 1.506475891396378, "learning_rate": 2.228081425237353e-06, "loss": 0.3374, "step": 22965 }, { "epoch": 2.1552177177177176, "grad_norm": 1.0716857916368248, "learning_rate": 2.227627056156894e-06, "loss": 0.3089, "step": 22966 }, { "epoch": 2.1553115615615615, "grad_norm": 1.238253045033091, "learning_rate": 2.227172720131762e-06, "loss": 0.3531, "step": 22967 }, { "epoch": 2.1554054054054053, "grad_norm": 1.1148852032669059, "learning_rate": 2.226718417167378e-06, "loss": 0.308, "step": 22968 }, { "epoch": 2.155499249249249, "grad_norm": 2.4298681043924457, "learning_rate": 2.226264147269156e-06, "loss": 0.3164, "step": 22969 }, { "epoch": 2.155593093093093, "grad_norm": 1.2350176839345044, "learning_rate": 2.2258099104425117e-06, "loss": 0.3638, "step": 22970 }, { "epoch": 2.155686936936937, "grad_norm": 1.1012601478655126, "learning_rate": 2.225355706692864e-06, "loss": 0.3342, "step": 22971 }, { "epoch": 2.155780780780781, "grad_norm": 1.32034231733402, "learning_rate": 2.2249015360256267e-06, "loss": 0.2998, "step": 22972 }, { "epoch": 2.1558746246246248, "grad_norm": 0.97437481451782, "learning_rate": 2.224447398446213e-06, "loss": 0.3186, "step": 22973 }, { "epoch": 2.1559684684684686, "grad_norm": 1.3981093218103922, "learning_rate": 2.2239932939600423e-06, "loss": 0.3365, "step": 22974 }, { "epoch": 2.1560623123123124, "grad_norm": 1.1286947437287307, "learning_rate": 2.2235392225725254e-06, "loss": 0.3155, "step": 22975 }, { "epoch": 2.156156156156156, "grad_norm": 1.105939956428001, "learning_rate": 2.223085184289077e-06, "loss": 0.275, "step": 22976 }, { "epoch": 2.15625, "grad_norm": 1.0596347812227356, "learning_rate": 2.2226311791151116e-06, "loss": 0.323, "step": 22977 }, { "epoch": 2.156343843843844, "grad_norm": 0.9942364416156299, "learning_rate": 2.222177207056041e-06, "loss": 0.3314, "step": 22978 }, { "epoch": 2.1564376876876876, "grad_norm": 1.0649696603507997, "learning_rate": 2.2217232681172768e-06, "loss": 0.3085, "step": 22979 }, { "epoch": 2.1565315315315314, "grad_norm": 1.201191415268058, "learning_rate": 2.2212693623042346e-06, "loss": 0.35, "step": 22980 }, { "epoch": 2.1566253753753752, "grad_norm": 1.0087617476424753, "learning_rate": 2.2208154896223254e-06, "loss": 0.3061, "step": 22981 }, { "epoch": 2.156719219219219, "grad_norm": 0.956364454583302, "learning_rate": 2.220361650076958e-06, "loss": 0.3269, "step": 22982 }, { "epoch": 2.156813063063063, "grad_norm": 0.9493484403804946, "learning_rate": 2.2199078436735478e-06, "loss": 0.2932, "step": 22983 }, { "epoch": 2.156906906906907, "grad_norm": 1.1912330914609284, "learning_rate": 2.219454070417503e-06, "loss": 0.3016, "step": 22984 }, { "epoch": 2.157000750750751, "grad_norm": 1.2176575989042844, "learning_rate": 2.219000330314234e-06, "loss": 0.3358, "step": 22985 }, { "epoch": 2.1570945945945947, "grad_norm": 1.1626671113798794, "learning_rate": 2.218546623369152e-06, "loss": 0.2965, "step": 22986 }, { "epoch": 2.1571884384384385, "grad_norm": 1.0266035786032048, "learning_rate": 2.218092949587666e-06, "loss": 0.3602, "step": 22987 }, { "epoch": 2.1572822822822824, "grad_norm": 1.2969664440583744, "learning_rate": 2.2176393089751865e-06, "loss": 0.305, "step": 22988 }, { "epoch": 2.157376126126126, "grad_norm": 0.9203787484321303, "learning_rate": 2.2171857015371195e-06, "loss": 0.3065, "step": 22989 }, { "epoch": 2.15746996996997, "grad_norm": 0.9948005947223788, "learning_rate": 2.2167321272788755e-06, "loss": 0.2868, "step": 22990 }, { "epoch": 2.157563813813814, "grad_norm": 0.9918048518032769, "learning_rate": 2.21627858620586e-06, "loss": 0.2976, "step": 22991 }, { "epoch": 2.1576576576576576, "grad_norm": 0.8996166317922218, "learning_rate": 2.2158250783234846e-06, "loss": 0.2986, "step": 22992 }, { "epoch": 2.1577515015015014, "grad_norm": 1.1817678356948145, "learning_rate": 2.2153716036371543e-06, "loss": 0.3265, "step": 22993 }, { "epoch": 2.1578453453453452, "grad_norm": 1.0925088876821942, "learning_rate": 2.2149181621522743e-06, "loss": 0.329, "step": 22994 }, { "epoch": 2.157939189189189, "grad_norm": 1.1618892910080616, "learning_rate": 2.214464753874254e-06, "loss": 0.345, "step": 22995 }, { "epoch": 2.158033033033033, "grad_norm": 1.1520259966024957, "learning_rate": 2.214011378808499e-06, "loss": 0.2908, "step": 22996 }, { "epoch": 2.158126876876877, "grad_norm": 1.0822853888418917, "learning_rate": 2.2135580369604126e-06, "loss": 0.3177, "step": 22997 }, { "epoch": 2.158220720720721, "grad_norm": 1.109529433292809, "learning_rate": 2.2131047283354033e-06, "loss": 0.3353, "step": 22998 }, { "epoch": 2.1583145645645647, "grad_norm": 0.7910807519181138, "learning_rate": 2.2126514529388743e-06, "loss": 0.3098, "step": 22999 }, { "epoch": 2.1584084084084085, "grad_norm": 1.082654774416494, "learning_rate": 2.2121982107762297e-06, "loss": 0.2958, "step": 23000 }, { "epoch": 2.1585022522522523, "grad_norm": 21.94782740688855, "learning_rate": 2.211745001852874e-06, "loss": 0.3169, "step": 23001 }, { "epoch": 2.158596096096096, "grad_norm": 1.2461957547325022, "learning_rate": 2.211291826174211e-06, "loss": 0.3328, "step": 23002 }, { "epoch": 2.15868993993994, "grad_norm": 1.1819645869617972, "learning_rate": 2.2108386837456426e-06, "loss": 0.3054, "step": 23003 }, { "epoch": 2.1587837837837838, "grad_norm": 1.1504541812688316, "learning_rate": 2.210385574572574e-06, "loss": 0.3496, "step": 23004 }, { "epoch": 2.1588776276276276, "grad_norm": 1.125322264655744, "learning_rate": 2.2099324986604063e-06, "loss": 0.3222, "step": 23005 }, { "epoch": 2.1589714714714714, "grad_norm": 1.0301196156016212, "learning_rate": 2.2094794560145404e-06, "loss": 0.3297, "step": 23006 }, { "epoch": 2.159065315315315, "grad_norm": 1.0242672308807346, "learning_rate": 2.209026446640381e-06, "loss": 0.3131, "step": 23007 }, { "epoch": 2.159159159159159, "grad_norm": 1.1921673578657603, "learning_rate": 2.2085734705433276e-06, "loss": 0.302, "step": 23008 }, { "epoch": 2.159253003003003, "grad_norm": 1.03256577543858, "learning_rate": 2.2081205277287796e-06, "loss": 0.3032, "step": 23009 }, { "epoch": 2.159346846846847, "grad_norm": 1.0832835774748564, "learning_rate": 2.207667618202141e-06, "loss": 0.3011, "step": 23010 }, { "epoch": 2.159440690690691, "grad_norm": 1.179518612661446, "learning_rate": 2.20721474196881e-06, "loss": 0.3398, "step": 23011 }, { "epoch": 2.1595345345345347, "grad_norm": 1.1176278095837, "learning_rate": 2.206761899034186e-06, "loss": 0.325, "step": 23012 }, { "epoch": 2.1596283783783785, "grad_norm": 1.0891536539043292, "learning_rate": 2.2063090894036694e-06, "loss": 0.3239, "step": 23013 }, { "epoch": 2.1597222222222223, "grad_norm": 1.152380974987066, "learning_rate": 2.205856313082658e-06, "loss": 0.2813, "step": 23014 }, { "epoch": 2.159816066066066, "grad_norm": 0.9586611363158701, "learning_rate": 2.2054035700765486e-06, "loss": 0.3229, "step": 23015 }, { "epoch": 2.15990990990991, "grad_norm": 1.1650315261265853, "learning_rate": 2.204950860390743e-06, "loss": 0.3115, "step": 23016 }, { "epoch": 2.1600037537537538, "grad_norm": 1.1413118147447832, "learning_rate": 2.204498184030637e-06, "loss": 0.3431, "step": 23017 }, { "epoch": 2.1600975975975976, "grad_norm": 1.2855826343451475, "learning_rate": 2.204045541001627e-06, "loss": 0.3154, "step": 23018 }, { "epoch": 2.1601914414414414, "grad_norm": 1.393702682305719, "learning_rate": 2.203592931309113e-06, "loss": 0.347, "step": 23019 }, { "epoch": 2.160285285285285, "grad_norm": 1.1488249141607618, "learning_rate": 2.2031403549584895e-06, "loss": 0.3118, "step": 23020 }, { "epoch": 2.160379129129129, "grad_norm": 1.0205041447976544, "learning_rate": 2.2026878119551504e-06, "loss": 0.3068, "step": 23021 }, { "epoch": 2.160472972972973, "grad_norm": 1.2293385148573224, "learning_rate": 2.2022353023044963e-06, "loss": 0.2944, "step": 23022 }, { "epoch": 2.1605668168168166, "grad_norm": 1.0162204631979774, "learning_rate": 2.2017828260119194e-06, "loss": 0.2967, "step": 23023 }, { "epoch": 2.1606606606606604, "grad_norm": 1.1910046666839398, "learning_rate": 2.2013303830828155e-06, "loss": 0.3649, "step": 23024 }, { "epoch": 2.1607545045045047, "grad_norm": 1.0999458306106804, "learning_rate": 2.2008779735225783e-06, "loss": 0.3309, "step": 23025 }, { "epoch": 2.1608483483483485, "grad_norm": 1.0322103755148708, "learning_rate": 2.200425597336603e-06, "loss": 0.3317, "step": 23026 }, { "epoch": 2.1609421921921923, "grad_norm": 1.466938541106226, "learning_rate": 2.199973254530281e-06, "loss": 0.3495, "step": 23027 }, { "epoch": 2.161036036036036, "grad_norm": 0.993532500030364, "learning_rate": 2.199520945109009e-06, "loss": 0.2815, "step": 23028 }, { "epoch": 2.16112987987988, "grad_norm": 1.0639564612994723, "learning_rate": 2.199068669078178e-06, "loss": 0.3197, "step": 23029 }, { "epoch": 2.1612237237237237, "grad_norm": 1.178792925924626, "learning_rate": 2.1986164264431796e-06, "loss": 0.3178, "step": 23030 }, { "epoch": 2.1613175675675675, "grad_norm": 0.9288520941064762, "learning_rate": 2.198164217209409e-06, "loss": 0.3066, "step": 23031 }, { "epoch": 2.1614114114114114, "grad_norm": 1.04105227203205, "learning_rate": 2.1977120413822563e-06, "loss": 0.3632, "step": 23032 }, { "epoch": 2.161505255255255, "grad_norm": 1.3192730804704897, "learning_rate": 2.1972598989671106e-06, "loss": 0.3115, "step": 23033 }, { "epoch": 2.161599099099099, "grad_norm": 1.3477236840463636, "learning_rate": 2.1968077899693674e-06, "loss": 0.3251, "step": 23034 }, { "epoch": 2.161692942942943, "grad_norm": 1.1395166507408345, "learning_rate": 2.196355714394414e-06, "loss": 0.3461, "step": 23035 }, { "epoch": 2.1617867867867866, "grad_norm": 1.1318890266921926, "learning_rate": 2.195903672247642e-06, "loss": 0.3165, "step": 23036 }, { "epoch": 2.1618806306306304, "grad_norm": 1.3242598124684979, "learning_rate": 2.195451663534441e-06, "loss": 0.3227, "step": 23037 }, { "epoch": 2.1619744744744747, "grad_norm": 1.0217627543044738, "learning_rate": 2.1949996882601987e-06, "loss": 0.3079, "step": 23038 }, { "epoch": 2.1620683183183185, "grad_norm": 0.9864779184428798, "learning_rate": 2.1945477464303043e-06, "loss": 0.2959, "step": 23039 }, { "epoch": 2.1621621621621623, "grad_norm": 1.1191455768268777, "learning_rate": 2.194095838050149e-06, "loss": 0.3443, "step": 23040 }, { "epoch": 2.162256006006006, "grad_norm": 1.1047618812395839, "learning_rate": 2.193643963125119e-06, "loss": 0.2949, "step": 23041 }, { "epoch": 2.16234984984985, "grad_norm": 1.2561071533800336, "learning_rate": 2.1931921216606007e-06, "loss": 0.3447, "step": 23042 }, { "epoch": 2.1624436936936937, "grad_norm": 1.030431280415951, "learning_rate": 2.1927403136619844e-06, "loss": 0.3175, "step": 23043 }, { "epoch": 2.1625375375375375, "grad_norm": 1.0148115024772297, "learning_rate": 2.1922885391346564e-06, "loss": 0.3245, "step": 23044 }, { "epoch": 2.1626313813813813, "grad_norm": 1.2419881756846451, "learning_rate": 2.1918367980840005e-06, "loss": 0.3412, "step": 23045 }, { "epoch": 2.162725225225225, "grad_norm": 1.0662822740757982, "learning_rate": 2.1913850905154066e-06, "loss": 0.3227, "step": 23046 }, { "epoch": 2.162819069069069, "grad_norm": 0.9482971032515656, "learning_rate": 2.1909334164342587e-06, "loss": 0.3212, "step": 23047 }, { "epoch": 2.1629129129129128, "grad_norm": 1.1630974670924952, "learning_rate": 2.1904817758459412e-06, "loss": 0.3348, "step": 23048 }, { "epoch": 2.1630067567567566, "grad_norm": 1.2057414944027272, "learning_rate": 2.1900301687558418e-06, "loss": 0.2877, "step": 23049 }, { "epoch": 2.1631006006006004, "grad_norm": 1.259971148547065, "learning_rate": 2.1895785951693428e-06, "loss": 0.3075, "step": 23050 }, { "epoch": 2.1631944444444446, "grad_norm": 3.3858895947747625, "learning_rate": 2.1891270550918293e-06, "loss": 0.2786, "step": 23051 }, { "epoch": 2.1632882882882885, "grad_norm": 0.985041130428461, "learning_rate": 2.1886755485286846e-06, "loss": 0.3393, "step": 23052 }, { "epoch": 2.1633821321321323, "grad_norm": 1.058278350093226, "learning_rate": 2.1882240754852925e-06, "loss": 0.3333, "step": 23053 }, { "epoch": 2.163475975975976, "grad_norm": 0.9659634503020977, "learning_rate": 2.1877726359670338e-06, "loss": 0.3204, "step": 23054 }, { "epoch": 2.16356981981982, "grad_norm": 0.977886171545905, "learning_rate": 2.1873212299792946e-06, "loss": 0.2781, "step": 23055 }, { "epoch": 2.1636636636636637, "grad_norm": 1.6611343333350945, "learning_rate": 2.1868698575274555e-06, "loss": 0.3178, "step": 23056 }, { "epoch": 2.1637575075075075, "grad_norm": 1.0476583480196136, "learning_rate": 2.186418518616896e-06, "loss": 0.2925, "step": 23057 }, { "epoch": 2.1638513513513513, "grad_norm": 1.1025217405286691, "learning_rate": 2.1859672132530023e-06, "loss": 0.2938, "step": 23058 }, { "epoch": 2.163945195195195, "grad_norm": 1.112168319030393, "learning_rate": 2.1855159414411523e-06, "loss": 0.3638, "step": 23059 }, { "epoch": 2.164039039039039, "grad_norm": 1.1265534081090365, "learning_rate": 2.185064703186725e-06, "loss": 0.3117, "step": 23060 }, { "epoch": 2.1641328828828827, "grad_norm": 1.0538002299990867, "learning_rate": 2.1846134984951046e-06, "loss": 0.2475, "step": 23061 }, { "epoch": 2.1642267267267266, "grad_norm": 0.974091932737665, "learning_rate": 2.1841623273716688e-06, "loss": 0.3257, "step": 23062 }, { "epoch": 2.1643205705705704, "grad_norm": 1.5309125345578698, "learning_rate": 2.1837111898217966e-06, "loss": 0.351, "step": 23063 }, { "epoch": 2.1644144144144146, "grad_norm": 1.3620540277926083, "learning_rate": 2.1832600858508678e-06, "loss": 0.3065, "step": 23064 }, { "epoch": 2.1645082582582584, "grad_norm": 1.1774115617216656, "learning_rate": 2.18280901546426e-06, "loss": 0.3381, "step": 23065 }, { "epoch": 2.1646021021021022, "grad_norm": 1.072322570998821, "learning_rate": 2.1823579786673505e-06, "loss": 0.345, "step": 23066 }, { "epoch": 2.164695945945946, "grad_norm": 1.3540186013087496, "learning_rate": 2.1819069754655202e-06, "loss": 0.3158, "step": 23067 }, { "epoch": 2.16478978978979, "grad_norm": 1.1879915072721017, "learning_rate": 2.1814560058641445e-06, "loss": 0.3363, "step": 23068 }, { "epoch": 2.1648836336336337, "grad_norm": 1.2188449417557998, "learning_rate": 2.1810050698685984e-06, "loss": 0.3187, "step": 23069 }, { "epoch": 2.1649774774774775, "grad_norm": 1.084428513690629, "learning_rate": 2.1805541674842633e-06, "loss": 0.294, "step": 23070 }, { "epoch": 2.1650713213213213, "grad_norm": 1.0917466231713404, "learning_rate": 2.1801032987165124e-06, "loss": 0.3259, "step": 23071 }, { "epoch": 2.165165165165165, "grad_norm": 1.1460015050173111, "learning_rate": 2.17965246357072e-06, "loss": 0.2726, "step": 23072 }, { "epoch": 2.165259009009009, "grad_norm": 1.066425538889303, "learning_rate": 2.179201662052265e-06, "loss": 0.2953, "step": 23073 }, { "epoch": 2.1653528528528527, "grad_norm": 1.0995094410805204, "learning_rate": 2.1787508941665205e-06, "loss": 0.3201, "step": 23074 }, { "epoch": 2.1654466966966965, "grad_norm": 1.305700308518312, "learning_rate": 2.1783001599188607e-06, "loss": 0.3341, "step": 23075 }, { "epoch": 2.1655405405405403, "grad_norm": 1.2736269615440639, "learning_rate": 2.1778494593146605e-06, "loss": 0.3467, "step": 23076 }, { "epoch": 2.1656343843843846, "grad_norm": 1.2050955604195412, "learning_rate": 2.1773987923592936e-06, "loss": 0.3264, "step": 23077 }, { "epoch": 2.1657282282282284, "grad_norm": 1.0456991108283875, "learning_rate": 2.1769481590581308e-06, "loss": 0.2901, "step": 23078 }, { "epoch": 2.1658220720720722, "grad_norm": 1.1384859873778241, "learning_rate": 2.176497559416549e-06, "loss": 0.3163, "step": 23079 }, { "epoch": 2.165915915915916, "grad_norm": 1.1483959457726733, "learning_rate": 2.1760469934399202e-06, "loss": 0.2556, "step": 23080 }, { "epoch": 2.16600975975976, "grad_norm": 0.9176088741394105, "learning_rate": 2.175596461133613e-06, "loss": 0.2927, "step": 23081 }, { "epoch": 2.1661036036036037, "grad_norm": 2.094900625376029, "learning_rate": 2.1751459625030035e-06, "loss": 0.308, "step": 23082 }, { "epoch": 2.1661974474474475, "grad_norm": 1.1166455063679948, "learning_rate": 2.1746954975534613e-06, "loss": 0.3103, "step": 23083 }, { "epoch": 2.1662912912912913, "grad_norm": 1.0947887242488548, "learning_rate": 2.1742450662903545e-06, "loss": 0.3541, "step": 23084 }, { "epoch": 2.166385135135135, "grad_norm": 1.1177800603566759, "learning_rate": 2.1737946687190594e-06, "loss": 0.3174, "step": 23085 }, { "epoch": 2.166478978978979, "grad_norm": 1.11497538653278, "learning_rate": 2.1733443048449427e-06, "loss": 0.3363, "step": 23086 }, { "epoch": 2.1665728228228227, "grad_norm": 1.1460549090501873, "learning_rate": 2.172893974673374e-06, "loss": 0.3585, "step": 23087 }, { "epoch": 2.1666666666666665, "grad_norm": 1.0216638690914206, "learning_rate": 2.1724436782097237e-06, "loss": 0.2793, "step": 23088 }, { "epoch": 2.1667605105105103, "grad_norm": 1.1698690476642133, "learning_rate": 2.1719934154593596e-06, "loss": 0.3392, "step": 23089 }, { "epoch": 2.1668543543543546, "grad_norm": 1.1713411926536272, "learning_rate": 2.17154318642765e-06, "loss": 0.324, "step": 23090 }, { "epoch": 2.1669481981981984, "grad_norm": 1.0460290768758864, "learning_rate": 2.1710929911199652e-06, "loss": 0.3104, "step": 23091 }, { "epoch": 2.167042042042042, "grad_norm": 1.14784022601212, "learning_rate": 2.1706428295416715e-06, "loss": 0.3025, "step": 23092 }, { "epoch": 2.167135885885886, "grad_norm": 1.1216892183392915, "learning_rate": 2.1701927016981344e-06, "loss": 0.3193, "step": 23093 }, { "epoch": 2.16722972972973, "grad_norm": 1.0331008153159795, "learning_rate": 2.169742607594725e-06, "loss": 0.3296, "step": 23094 }, { "epoch": 2.1673235735735736, "grad_norm": 1.0489792340865862, "learning_rate": 2.169292547236807e-06, "loss": 0.2782, "step": 23095 }, { "epoch": 2.1674174174174174, "grad_norm": 1.1118841319610766, "learning_rate": 2.1688425206297457e-06, "loss": 0.3182, "step": 23096 }, { "epoch": 2.1675112612612613, "grad_norm": 1.2212816986162556, "learning_rate": 2.1683925277789103e-06, "loss": 0.3139, "step": 23097 }, { "epoch": 2.167605105105105, "grad_norm": 1.0122869701699913, "learning_rate": 2.167942568689664e-06, "loss": 0.2769, "step": 23098 }, { "epoch": 2.167698948948949, "grad_norm": 2.189504122734327, "learning_rate": 2.1674926433673714e-06, "loss": 0.2887, "step": 23099 }, { "epoch": 2.1677927927927927, "grad_norm": 1.085328935418874, "learning_rate": 2.1670427518173977e-06, "loss": 0.3403, "step": 23100 }, { "epoch": 2.1678866366366365, "grad_norm": 1.2860741153581496, "learning_rate": 2.1665928940451066e-06, "loss": 0.3641, "step": 23101 }, { "epoch": 2.1679804804804803, "grad_norm": 1.3373747370237676, "learning_rate": 2.1661430700558593e-06, "loss": 0.2996, "step": 23102 }, { "epoch": 2.168074324324324, "grad_norm": 1.0664831454376116, "learning_rate": 2.1656932798550244e-06, "loss": 0.3522, "step": 23103 }, { "epoch": 2.1681681681681684, "grad_norm": 1.1326762203054626, "learning_rate": 2.165243523447962e-06, "loss": 0.3335, "step": 23104 }, { "epoch": 2.168262012012012, "grad_norm": 1.1544470732929408, "learning_rate": 2.1647938008400326e-06, "loss": 0.3085, "step": 23105 }, { "epoch": 2.168355855855856, "grad_norm": 1.04806392713685, "learning_rate": 2.1643441120366025e-06, "loss": 0.3011, "step": 23106 }, { "epoch": 2.1684496996997, "grad_norm": 1.245122167135348, "learning_rate": 2.1638944570430317e-06, "loss": 0.3246, "step": 23107 }, { "epoch": 2.1685435435435436, "grad_norm": 1.143529144948509, "learning_rate": 2.1634448358646788e-06, "loss": 0.3217, "step": 23108 }, { "epoch": 2.1686373873873874, "grad_norm": 1.3024437387894676, "learning_rate": 2.162995248506908e-06, "loss": 0.3148, "step": 23109 }, { "epoch": 2.1687312312312312, "grad_norm": 1.2771831478335414, "learning_rate": 2.16254569497508e-06, "loss": 0.2865, "step": 23110 }, { "epoch": 2.168825075075075, "grad_norm": 0.9670739724698261, "learning_rate": 2.1620961752745532e-06, "loss": 0.2897, "step": 23111 }, { "epoch": 2.168918918918919, "grad_norm": 1.064096671350827, "learning_rate": 2.1616466894106874e-06, "loss": 0.3189, "step": 23112 }, { "epoch": 2.1690127627627627, "grad_norm": 1.1087427084851702, "learning_rate": 2.161197237388843e-06, "loss": 0.2969, "step": 23113 }, { "epoch": 2.1691066066066065, "grad_norm": 1.4083852573444167, "learning_rate": 2.1607478192143754e-06, "loss": 0.3131, "step": 23114 }, { "epoch": 2.1692004504504503, "grad_norm": 1.034723607308386, "learning_rate": 2.1602984348926477e-06, "loss": 0.3086, "step": 23115 }, { "epoch": 2.169294294294294, "grad_norm": 0.988059795372943, "learning_rate": 2.159849084429016e-06, "loss": 0.3202, "step": 23116 }, { "epoch": 2.169388138138138, "grad_norm": 1.0541881839711673, "learning_rate": 2.1593997678288363e-06, "loss": 0.3522, "step": 23117 }, { "epoch": 2.169481981981982, "grad_norm": 1.0090595000539444, "learning_rate": 2.158950485097469e-06, "loss": 0.296, "step": 23118 }, { "epoch": 2.169575825825826, "grad_norm": 1.0926538607327358, "learning_rate": 2.1585012362402698e-06, "loss": 0.319, "step": 23119 }, { "epoch": 2.16966966966967, "grad_norm": 1.0290930814280403, "learning_rate": 2.1580520212625926e-06, "loss": 0.3139, "step": 23120 }, { "epoch": 2.1697635135135136, "grad_norm": 1.0838166115893981, "learning_rate": 2.157602840169798e-06, "loss": 0.3062, "step": 23121 }, { "epoch": 2.1698573573573574, "grad_norm": 1.051328773262955, "learning_rate": 2.157153692967239e-06, "loss": 0.2723, "step": 23122 }, { "epoch": 2.169951201201201, "grad_norm": 1.1572413052307975, "learning_rate": 2.156704579660271e-06, "loss": 0.3182, "step": 23123 }, { "epoch": 2.170045045045045, "grad_norm": 0.9732676781055786, "learning_rate": 2.1562555002542483e-06, "loss": 0.2864, "step": 23124 }, { "epoch": 2.170138888888889, "grad_norm": 1.2411553812480631, "learning_rate": 2.1558064547545273e-06, "loss": 0.2768, "step": 23125 }, { "epoch": 2.1702327327327327, "grad_norm": 1.2721314621840885, "learning_rate": 2.155357443166458e-06, "loss": 0.3185, "step": 23126 }, { "epoch": 2.1703265765765765, "grad_norm": 0.9661890697987958, "learning_rate": 2.1549084654953982e-06, "loss": 0.2544, "step": 23127 }, { "epoch": 2.1704204204204203, "grad_norm": 1.2450629002114773, "learning_rate": 2.1544595217467005e-06, "loss": 0.3114, "step": 23128 }, { "epoch": 2.170514264264264, "grad_norm": 0.9702209374628722, "learning_rate": 2.1540106119257148e-06, "loss": 0.3424, "step": 23129 }, { "epoch": 2.170608108108108, "grad_norm": 0.9519743271202277, "learning_rate": 2.153561736037797e-06, "loss": 0.3549, "step": 23130 }, { "epoch": 2.170701951951952, "grad_norm": 1.092086227334232, "learning_rate": 2.153112894088298e-06, "loss": 0.3351, "step": 23131 }, { "epoch": 2.170795795795796, "grad_norm": 1.1242226284108803, "learning_rate": 2.1526640860825663e-06, "loss": 0.3033, "step": 23132 }, { "epoch": 2.1708896396396398, "grad_norm": 1.204551080099905, "learning_rate": 2.1522153120259586e-06, "loss": 0.3059, "step": 23133 }, { "epoch": 2.1709834834834836, "grad_norm": 1.2722172913710035, "learning_rate": 2.1517665719238227e-06, "loss": 0.2795, "step": 23134 }, { "epoch": 2.1710773273273274, "grad_norm": 3.1570761882937504, "learning_rate": 2.1513178657815094e-06, "loss": 0.3203, "step": 23135 }, { "epoch": 2.171171171171171, "grad_norm": 1.147583946078012, "learning_rate": 2.150869193604368e-06, "loss": 0.3416, "step": 23136 }, { "epoch": 2.171265015015015, "grad_norm": 0.9333838320663863, "learning_rate": 2.150420555397749e-06, "loss": 0.2748, "step": 23137 }, { "epoch": 2.171358858858859, "grad_norm": 0.9411113232196593, "learning_rate": 2.1499719511669986e-06, "loss": 0.2787, "step": 23138 }, { "epoch": 2.1714527027027026, "grad_norm": 1.0499185875618413, "learning_rate": 2.1495233809174705e-06, "loss": 0.2926, "step": 23139 }, { "epoch": 2.1715465465465464, "grad_norm": 1.3841658029565878, "learning_rate": 2.14907484465451e-06, "loss": 0.3003, "step": 23140 }, { "epoch": 2.1716403903903903, "grad_norm": 1.1611107079917136, "learning_rate": 2.1486263423834646e-06, "loss": 0.3599, "step": 23141 }, { "epoch": 2.171734234234234, "grad_norm": 1.1641329924922905, "learning_rate": 2.1481778741096844e-06, "loss": 0.3514, "step": 23142 }, { "epoch": 2.171828078078078, "grad_norm": 1.0823900458738607, "learning_rate": 2.1477294398385147e-06, "loss": 0.2978, "step": 23143 }, { "epoch": 2.171921921921922, "grad_norm": 1.1275384368635626, "learning_rate": 2.1472810395753015e-06, "loss": 0.3366, "step": 23144 }, { "epoch": 2.172015765765766, "grad_norm": 11.159538206816235, "learning_rate": 2.146832673325393e-06, "loss": 0.3135, "step": 23145 }, { "epoch": 2.1721096096096097, "grad_norm": 1.5613912744127674, "learning_rate": 2.1463843410941347e-06, "loss": 0.3196, "step": 23146 }, { "epoch": 2.1722034534534536, "grad_norm": 1.0294463007293415, "learning_rate": 2.1459360428868725e-06, "loss": 0.3075, "step": 23147 }, { "epoch": 2.1722972972972974, "grad_norm": 1.0735577188243561, "learning_rate": 2.14548777870895e-06, "loss": 0.3222, "step": 23148 }, { "epoch": 2.172391141141141, "grad_norm": 1.2173270794224023, "learning_rate": 2.1450395485657127e-06, "loss": 0.32, "step": 23149 }, { "epoch": 2.172484984984985, "grad_norm": 1.983299445557018, "learning_rate": 2.1445913524625034e-06, "loss": 0.3153, "step": 23150 }, { "epoch": 2.172578828828829, "grad_norm": 1.1567416178740995, "learning_rate": 2.1441431904046693e-06, "loss": 0.3504, "step": 23151 }, { "epoch": 2.1726726726726726, "grad_norm": 1.6542994404311029, "learning_rate": 2.1436950623975515e-06, "loss": 0.3009, "step": 23152 }, { "epoch": 2.1727665165165164, "grad_norm": 1.0213729551690958, "learning_rate": 2.1432469684464924e-06, "loss": 0.3221, "step": 23153 }, { "epoch": 2.1728603603603602, "grad_norm": 1.0235374964620125, "learning_rate": 2.142798908556838e-06, "loss": 0.3008, "step": 23154 }, { "epoch": 2.172954204204204, "grad_norm": 0.9349150137024769, "learning_rate": 2.1423508827339277e-06, "loss": 0.3301, "step": 23155 }, { "epoch": 2.173048048048048, "grad_norm": 1.0202747110515051, "learning_rate": 2.141902890983103e-06, "loss": 0.2747, "step": 23156 }, { "epoch": 2.173141891891892, "grad_norm": 1.1053806448811083, "learning_rate": 2.141454933309708e-06, "loss": 0.323, "step": 23157 }, { "epoch": 2.173235735735736, "grad_norm": 1.0150907937589098, "learning_rate": 2.141007009719082e-06, "loss": 0.3448, "step": 23158 }, { "epoch": 2.1733295795795797, "grad_norm": 1.1419714766960234, "learning_rate": 2.140559120216566e-06, "loss": 0.3075, "step": 23159 }, { "epoch": 2.1734234234234235, "grad_norm": 1.171475865588468, "learning_rate": 2.1401112648075005e-06, "loss": 0.3223, "step": 23160 }, { "epoch": 2.1735172672672673, "grad_norm": 1.2369747424828976, "learning_rate": 2.1396634434972245e-06, "loss": 0.3034, "step": 23161 }, { "epoch": 2.173611111111111, "grad_norm": 1.0132961400011442, "learning_rate": 2.1392156562910763e-06, "loss": 0.2767, "step": 23162 }, { "epoch": 2.173704954954955, "grad_norm": 1.2359696304322247, "learning_rate": 2.1387679031943986e-06, "loss": 0.3097, "step": 23163 }, { "epoch": 2.173798798798799, "grad_norm": 1.2304392800834054, "learning_rate": 2.1383201842125276e-06, "loss": 0.3588, "step": 23164 }, { "epoch": 2.1738926426426426, "grad_norm": 2.643109494363059, "learning_rate": 2.1378724993508006e-06, "loss": 0.3273, "step": 23165 }, { "epoch": 2.1739864864864864, "grad_norm": 1.4772282590575676, "learning_rate": 2.1374248486145584e-06, "loss": 0.3512, "step": 23166 }, { "epoch": 2.17408033033033, "grad_norm": 1.040830763383551, "learning_rate": 2.1369772320091357e-06, "loss": 0.3424, "step": 23167 }, { "epoch": 2.174174174174174, "grad_norm": 1.2197475132741968, "learning_rate": 2.1365296495398697e-06, "loss": 0.2821, "step": 23168 }, { "epoch": 2.174268018018018, "grad_norm": 1.1889497943506573, "learning_rate": 2.1360821012120987e-06, "loss": 0.3067, "step": 23169 }, { "epoch": 2.174361861861862, "grad_norm": 1.0307079364646696, "learning_rate": 2.1356345870311582e-06, "loss": 0.2986, "step": 23170 }, { "epoch": 2.174455705705706, "grad_norm": 1.2543703287847896, "learning_rate": 2.135187107002382e-06, "loss": 0.3279, "step": 23171 }, { "epoch": 2.1745495495495497, "grad_norm": 0.9981423514497958, "learning_rate": 2.1347396611311087e-06, "loss": 0.3046, "step": 23172 }, { "epoch": 2.1746433933933935, "grad_norm": 1.0077220447622535, "learning_rate": 2.1342922494226713e-06, "loss": 0.2952, "step": 23173 }, { "epoch": 2.1747372372372373, "grad_norm": 1.2386948467844845, "learning_rate": 2.1338448718824055e-06, "loss": 0.3172, "step": 23174 }, { "epoch": 2.174831081081081, "grad_norm": 1.7128056389660673, "learning_rate": 2.1333975285156437e-06, "loss": 0.3138, "step": 23175 }, { "epoch": 2.174924924924925, "grad_norm": 1.2709892684605053, "learning_rate": 2.1329502193277214e-06, "loss": 0.2691, "step": 23176 }, { "epoch": 2.1750187687687688, "grad_norm": 1.1048462415283735, "learning_rate": 2.1325029443239685e-06, "loss": 0.2939, "step": 23177 }, { "epoch": 2.1751126126126126, "grad_norm": 1.0032995212830897, "learning_rate": 2.1320557035097226e-06, "loss": 0.2758, "step": 23178 }, { "epoch": 2.1752064564564564, "grad_norm": 1.0016544705879527, "learning_rate": 2.1316084968903146e-06, "loss": 0.3188, "step": 23179 }, { "epoch": 2.1753003003003, "grad_norm": 1.1820506853878299, "learning_rate": 2.131161324471074e-06, "loss": 0.3344, "step": 23180 }, { "epoch": 2.175394144144144, "grad_norm": 1.2443226134477459, "learning_rate": 2.130714186257336e-06, "loss": 0.2821, "step": 23181 }, { "epoch": 2.175487987987988, "grad_norm": 1.0279913290968938, "learning_rate": 2.130267082254431e-06, "loss": 0.3288, "step": 23182 }, { "epoch": 2.1755818318318316, "grad_norm": 1.197134979653281, "learning_rate": 2.1298200124676864e-06, "loss": 0.3199, "step": 23183 }, { "epoch": 2.175675675675676, "grad_norm": 1.1825749791250286, "learning_rate": 2.1293729769024383e-06, "loss": 0.2893, "step": 23184 }, { "epoch": 2.1757695195195197, "grad_norm": 0.9887073326496983, "learning_rate": 2.128925975564014e-06, "loss": 0.2983, "step": 23185 }, { "epoch": 2.1758633633633635, "grad_norm": 1.1535951825954884, "learning_rate": 2.1284790084577435e-06, "loss": 0.289, "step": 23186 }, { "epoch": 2.1759572072072073, "grad_norm": 1.0298531707802197, "learning_rate": 2.128032075588955e-06, "loss": 0.3081, "step": 23187 }, { "epoch": 2.176051051051051, "grad_norm": 1.0931724079042822, "learning_rate": 2.1275851769629784e-06, "loss": 0.269, "step": 23188 }, { "epoch": 2.176144894894895, "grad_norm": 1.1565594115427213, "learning_rate": 2.12713831258514e-06, "loss": 0.3221, "step": 23189 }, { "epoch": 2.1762387387387387, "grad_norm": 1.1506280616232205, "learning_rate": 2.126691482460772e-06, "loss": 0.3392, "step": 23190 }, { "epoch": 2.1763325825825826, "grad_norm": 1.5439310271026998, "learning_rate": 2.1262446865951992e-06, "loss": 0.3183, "step": 23191 }, { "epoch": 2.1764264264264264, "grad_norm": 1.0179717852678825, "learning_rate": 2.1257979249937472e-06, "loss": 0.3322, "step": 23192 }, { "epoch": 2.17652027027027, "grad_norm": 1.1622472066526806, "learning_rate": 2.1253511976617465e-06, "loss": 0.3004, "step": 23193 }, { "epoch": 2.176614114114114, "grad_norm": 0.925556605700849, "learning_rate": 2.124904504604522e-06, "loss": 0.3382, "step": 23194 }, { "epoch": 2.176707957957958, "grad_norm": 1.2687783764353475, "learning_rate": 2.124457845827398e-06, "loss": 0.2981, "step": 23195 }, { "epoch": 2.1768018018018016, "grad_norm": 1.6436245069761175, "learning_rate": 2.1240112213357027e-06, "loss": 0.3061, "step": 23196 }, { "epoch": 2.1768956456456454, "grad_norm": 1.1300431467277872, "learning_rate": 2.123564631134761e-06, "loss": 0.2824, "step": 23197 }, { "epoch": 2.1769894894894897, "grad_norm": 2.069758322566539, "learning_rate": 2.123118075229896e-06, "loss": 0.3103, "step": 23198 }, { "epoch": 2.1770833333333335, "grad_norm": 0.9925400064889289, "learning_rate": 2.1226715536264326e-06, "loss": 0.2965, "step": 23199 }, { "epoch": 2.1771771771771773, "grad_norm": 1.1910708913198764, "learning_rate": 2.122225066329695e-06, "loss": 0.3113, "step": 23200 }, { "epoch": 2.177271021021021, "grad_norm": 1.071558558118486, "learning_rate": 2.1217786133450054e-06, "loss": 0.3028, "step": 23201 }, { "epoch": 2.177364864864865, "grad_norm": 1.1044358917048505, "learning_rate": 2.1213321946776895e-06, "loss": 0.2915, "step": 23202 }, { "epoch": 2.1774587087087087, "grad_norm": 1.0591551193867499, "learning_rate": 2.120885810333069e-06, "loss": 0.2959, "step": 23203 }, { "epoch": 2.1775525525525525, "grad_norm": 1.1119668714516673, "learning_rate": 2.1204394603164634e-06, "loss": 0.3142, "step": 23204 }, { "epoch": 2.1776463963963963, "grad_norm": 1.0226931625920097, "learning_rate": 2.1199931446331996e-06, "loss": 0.3173, "step": 23205 }, { "epoch": 2.17774024024024, "grad_norm": 1.180315419687316, "learning_rate": 2.1195468632885965e-06, "loss": 0.3176, "step": 23206 }, { "epoch": 2.177834084084084, "grad_norm": 1.3187064605286245, "learning_rate": 2.1191006162879724e-06, "loss": 0.3226, "step": 23207 }, { "epoch": 2.1779279279279278, "grad_norm": 1.1345058829407564, "learning_rate": 2.118654403636653e-06, "loss": 0.3131, "step": 23208 }, { "epoch": 2.1780217717717716, "grad_norm": 1.1910095850156002, "learning_rate": 2.1182082253399567e-06, "loss": 0.3348, "step": 23209 }, { "epoch": 2.1781156156156154, "grad_norm": 1.1197395647680075, "learning_rate": 2.1177620814032024e-06, "loss": 0.3105, "step": 23210 }, { "epoch": 2.1782094594594597, "grad_norm": 1.0883961723037665, "learning_rate": 2.1173159718317106e-06, "loss": 0.3656, "step": 23211 }, { "epoch": 2.1783033033033035, "grad_norm": 1.0595339458650876, "learning_rate": 2.1168698966307986e-06, "loss": 0.3032, "step": 23212 }, { "epoch": 2.1783971471471473, "grad_norm": 1.0785409494149547, "learning_rate": 2.1164238558057854e-06, "loss": 0.2856, "step": 23213 }, { "epoch": 2.178490990990991, "grad_norm": 1.0836856269516588, "learning_rate": 2.1159778493619916e-06, "loss": 0.3069, "step": 23214 }, { "epoch": 2.178584834834835, "grad_norm": 1.2645142572979229, "learning_rate": 2.115531877304733e-06, "loss": 0.3406, "step": 23215 }, { "epoch": 2.1786786786786787, "grad_norm": 1.1105967229946099, "learning_rate": 2.115085939639326e-06, "loss": 0.3192, "step": 23216 }, { "epoch": 2.1787725225225225, "grad_norm": 1.2966637897945865, "learning_rate": 2.1146400363710902e-06, "loss": 0.3488, "step": 23217 }, { "epoch": 2.1788663663663663, "grad_norm": 0.9825381152990212, "learning_rate": 2.114194167505341e-06, "loss": 0.3153, "step": 23218 }, { "epoch": 2.17896021021021, "grad_norm": 1.1395747979224506, "learning_rate": 2.1137483330473924e-06, "loss": 0.3066, "step": 23219 }, { "epoch": 2.179054054054054, "grad_norm": 1.1626569208712099, "learning_rate": 2.1133025330025643e-06, "loss": 0.3112, "step": 23220 }, { "epoch": 2.1791478978978978, "grad_norm": 1.1348775514045704, "learning_rate": 2.112856767376169e-06, "loss": 0.3278, "step": 23221 }, { "epoch": 2.1792417417417416, "grad_norm": 1.2159508873269977, "learning_rate": 2.1124110361735233e-06, "loss": 0.333, "step": 23222 }, { "epoch": 2.1793355855855854, "grad_norm": 1.1891345601305894, "learning_rate": 2.11196533939994e-06, "loss": 0.3112, "step": 23223 }, { "epoch": 2.1794294294294296, "grad_norm": 1.0387944111600138, "learning_rate": 2.1115196770607345e-06, "loss": 0.3429, "step": 23224 }, { "epoch": 2.1795232732732734, "grad_norm": 1.155043643227065, "learning_rate": 2.1110740491612177e-06, "loss": 0.3462, "step": 23225 }, { "epoch": 2.1796171171171173, "grad_norm": 0.999273756890425, "learning_rate": 2.1106284557067064e-06, "loss": 0.3065, "step": 23226 }, { "epoch": 2.179710960960961, "grad_norm": 1.0585938690907226, "learning_rate": 2.110182896702512e-06, "loss": 0.3315, "step": 23227 }, { "epoch": 2.179804804804805, "grad_norm": 0.9720917339031633, "learning_rate": 2.109737372153946e-06, "loss": 0.3254, "step": 23228 }, { "epoch": 2.1798986486486487, "grad_norm": 1.4187825262733673, "learning_rate": 2.1092918820663234e-06, "loss": 0.3108, "step": 23229 }, { "epoch": 2.1799924924924925, "grad_norm": 0.8907477336529651, "learning_rate": 2.1088464264449533e-06, "loss": 0.3275, "step": 23230 }, { "epoch": 2.1800863363363363, "grad_norm": 0.9994717721285524, "learning_rate": 2.108401005295146e-06, "loss": 0.307, "step": 23231 }, { "epoch": 2.18018018018018, "grad_norm": 1.3972340566518466, "learning_rate": 2.1079556186222157e-06, "loss": 0.3156, "step": 23232 }, { "epoch": 2.180274024024024, "grad_norm": 1.1424507290079928, "learning_rate": 2.1075102664314704e-06, "loss": 0.3209, "step": 23233 }, { "epoch": 2.1803678678678677, "grad_norm": 1.2040003289153711, "learning_rate": 2.107064948728221e-06, "loss": 0.3082, "step": 23234 }, { "epoch": 2.1804617117117115, "grad_norm": 1.0104817812742684, "learning_rate": 2.106619665517777e-06, "loss": 0.3088, "step": 23235 }, { "epoch": 2.1805555555555554, "grad_norm": 1.3936874883933257, "learning_rate": 2.1061744168054464e-06, "loss": 0.3157, "step": 23236 }, { "epoch": 2.1806493993993996, "grad_norm": 3.739028548227022, "learning_rate": 2.1057292025965376e-06, "loss": 0.2558, "step": 23237 }, { "epoch": 2.1807432432432434, "grad_norm": 1.0104104148498085, "learning_rate": 2.1052840228963623e-06, "loss": 0.3514, "step": 23238 }, { "epoch": 2.1808370870870872, "grad_norm": 1.1237698557350273, "learning_rate": 2.1048388777102257e-06, "loss": 0.3165, "step": 23239 }, { "epoch": 2.180930930930931, "grad_norm": 1.0024015570116651, "learning_rate": 2.1043937670434347e-06, "loss": 0.3261, "step": 23240 }, { "epoch": 2.181024774774775, "grad_norm": 1.0328665015232437, "learning_rate": 2.1039486909012986e-06, "loss": 0.3556, "step": 23241 }, { "epoch": 2.1811186186186187, "grad_norm": 1.3115656281405959, "learning_rate": 2.1035036492891235e-06, "loss": 0.3119, "step": 23242 }, { "epoch": 2.1812124624624625, "grad_norm": 1.1147071309911871, "learning_rate": 2.1030586422122134e-06, "loss": 0.3661, "step": 23243 }, { "epoch": 2.1813063063063063, "grad_norm": 1.0577345378124219, "learning_rate": 2.102613669675878e-06, "loss": 0.3275, "step": 23244 }, { "epoch": 2.18140015015015, "grad_norm": 1.2845305253852157, "learning_rate": 2.1021687316854213e-06, "loss": 0.3461, "step": 23245 }, { "epoch": 2.181493993993994, "grad_norm": 1.0738712825813321, "learning_rate": 2.101723828246147e-06, "loss": 0.2892, "step": 23246 }, { "epoch": 2.1815878378378377, "grad_norm": 2.3516745969485324, "learning_rate": 2.1012789593633607e-06, "loss": 0.3092, "step": 23247 }, { "epoch": 2.1816816816816815, "grad_norm": 1.0366789814132318, "learning_rate": 2.1008341250423668e-06, "loss": 0.3456, "step": 23248 }, { "epoch": 2.1817755255255253, "grad_norm": 1.031079648170968, "learning_rate": 2.100389325288467e-06, "loss": 0.3402, "step": 23249 }, { "epoch": 2.1818693693693696, "grad_norm": 1.0753829549538183, "learning_rate": 2.0999445601069683e-06, "loss": 0.3455, "step": 23250 }, { "epoch": 2.1819632132132134, "grad_norm": 1.6908477618851006, "learning_rate": 2.099499829503172e-06, "loss": 0.2804, "step": 23251 }, { "epoch": 2.182057057057057, "grad_norm": 1.1528917598739894, "learning_rate": 2.099055133482379e-06, "loss": 0.3093, "step": 23252 }, { "epoch": 2.182150900900901, "grad_norm": 1.3111567516025657, "learning_rate": 2.0986104720498945e-06, "loss": 0.2787, "step": 23253 }, { "epoch": 2.182244744744745, "grad_norm": 1.1689030155445395, "learning_rate": 2.0981658452110194e-06, "loss": 0.3044, "step": 23254 }, { "epoch": 2.1823385885885886, "grad_norm": 1.2184729389076567, "learning_rate": 2.0977212529710524e-06, "loss": 0.3257, "step": 23255 }, { "epoch": 2.1824324324324325, "grad_norm": 1.0483553459451704, "learning_rate": 2.097276695335298e-06, "loss": 0.3234, "step": 23256 }, { "epoch": 2.1825262762762763, "grad_norm": 0.8630147756001697, "learning_rate": 2.096832172309055e-06, "loss": 0.3077, "step": 23257 }, { "epoch": 2.18262012012012, "grad_norm": 1.0765490112756158, "learning_rate": 2.096387683897624e-06, "loss": 0.3189, "step": 23258 }, { "epoch": 2.182713963963964, "grad_norm": 1.3194024491084275, "learning_rate": 2.0959432301063044e-06, "loss": 0.3155, "step": 23259 }, { "epoch": 2.1828078078078077, "grad_norm": 1.0864788515370518, "learning_rate": 2.0954988109403957e-06, "loss": 0.3043, "step": 23260 }, { "epoch": 2.1829016516516515, "grad_norm": 0.9573185454413687, "learning_rate": 2.095054426405194e-06, "loss": 0.3088, "step": 23261 }, { "epoch": 2.1829954954954953, "grad_norm": 1.0725908719502575, "learning_rate": 2.094610076506003e-06, "loss": 0.3258, "step": 23262 }, { "epoch": 2.1830893393393396, "grad_norm": 1.2078078933371288, "learning_rate": 2.0941657612481175e-06, "loss": 0.2645, "step": 23263 }, { "epoch": 2.1831831831831834, "grad_norm": 1.0966832764294567, "learning_rate": 2.093721480636834e-06, "loss": 0.3174, "step": 23264 }, { "epoch": 2.183277027027027, "grad_norm": 1.2606183771838628, "learning_rate": 2.093277234677453e-06, "loss": 0.3121, "step": 23265 }, { "epoch": 2.183370870870871, "grad_norm": 1.2022644852039694, "learning_rate": 2.092833023375269e-06, "loss": 0.3159, "step": 23266 }, { "epoch": 2.183464714714715, "grad_norm": 1.3274925050809334, "learning_rate": 2.092388846735578e-06, "loss": 0.2814, "step": 23267 }, { "epoch": 2.1835585585585586, "grad_norm": 1.2621810610720607, "learning_rate": 2.0919447047636788e-06, "loss": 0.3029, "step": 23268 }, { "epoch": 2.1836524024024024, "grad_norm": 1.1862840032134359, "learning_rate": 2.091500597464865e-06, "loss": 0.3172, "step": 23269 }, { "epoch": 2.1837462462462462, "grad_norm": 1.075797943297939, "learning_rate": 2.091056524844432e-06, "loss": 0.3371, "step": 23270 }, { "epoch": 2.18384009009009, "grad_norm": 1.058194359001487, "learning_rate": 2.090612486907674e-06, "loss": 0.2979, "step": 23271 }, { "epoch": 2.183933933933934, "grad_norm": 1.239429296350585, "learning_rate": 2.090168483659886e-06, "loss": 0.3366, "step": 23272 }, { "epoch": 2.1840277777777777, "grad_norm": 1.1827605209719398, "learning_rate": 2.08972451510636e-06, "loss": 0.3354, "step": 23273 }, { "epoch": 2.1841216216216215, "grad_norm": 1.253405537211979, "learning_rate": 2.0892805812523923e-06, "loss": 0.3569, "step": 23274 }, { "epoch": 2.1842154654654653, "grad_norm": 1.1182979691184414, "learning_rate": 2.088836682103276e-06, "loss": 0.2917, "step": 23275 }, { "epoch": 2.184309309309309, "grad_norm": 1.0606191379311178, "learning_rate": 2.0883928176643e-06, "loss": 0.2965, "step": 23276 }, { "epoch": 2.184403153153153, "grad_norm": 1.89600607724076, "learning_rate": 2.0879489879407604e-06, "loss": 0.3377, "step": 23277 }, { "epoch": 2.184496996996997, "grad_norm": 1.1612857552344102, "learning_rate": 2.0875051929379485e-06, "loss": 0.3052, "step": 23278 }, { "epoch": 2.184590840840841, "grad_norm": 1.0386496708998063, "learning_rate": 2.0870614326611534e-06, "loss": 0.3188, "step": 23279 }, { "epoch": 2.184684684684685, "grad_norm": 0.9799363231963939, "learning_rate": 2.0866177071156683e-06, "loss": 0.3265, "step": 23280 }, { "epoch": 2.1847785285285286, "grad_norm": 1.0897122046241505, "learning_rate": 2.0861740163067845e-06, "loss": 0.3545, "step": 23281 }, { "epoch": 2.1848723723723724, "grad_norm": 2.4347918761432563, "learning_rate": 2.0857303602397897e-06, "loss": 0.3054, "step": 23282 }, { "epoch": 2.1849662162162162, "grad_norm": 1.1155607424505753, "learning_rate": 2.085286738919976e-06, "loss": 0.3282, "step": 23283 }, { "epoch": 2.18506006006006, "grad_norm": 1.0174017221392355, "learning_rate": 2.0848431523526306e-06, "loss": 0.2887, "step": 23284 }, { "epoch": 2.185153903903904, "grad_norm": 1.3816084466095366, "learning_rate": 2.0843996005430423e-06, "loss": 0.2908, "step": 23285 }, { "epoch": 2.1852477477477477, "grad_norm": 1.138561185143461, "learning_rate": 2.083956083496502e-06, "loss": 0.2943, "step": 23286 }, { "epoch": 2.1853415915915915, "grad_norm": 1.5230319430183392, "learning_rate": 2.0835126012182964e-06, "loss": 0.3339, "step": 23287 }, { "epoch": 2.1854354354354353, "grad_norm": 1.1313716085618417, "learning_rate": 2.0830691537137115e-06, "loss": 0.3333, "step": 23288 }, { "epoch": 2.185529279279279, "grad_norm": 1.0301357141980303, "learning_rate": 2.0826257409880385e-06, "loss": 0.3388, "step": 23289 }, { "epoch": 2.185623123123123, "grad_norm": 0.9698118568736548, "learning_rate": 2.082182363046562e-06, "loss": 0.3406, "step": 23290 }, { "epoch": 2.185716966966967, "grad_norm": 1.1422331982411436, "learning_rate": 2.081739019894567e-06, "loss": 0.3103, "step": 23291 }, { "epoch": 2.185810810810811, "grad_norm": 1.2106047043438797, "learning_rate": 2.081295711537343e-06, "loss": 0.3078, "step": 23292 }, { "epoch": 2.1859046546546548, "grad_norm": 1.0574728851028854, "learning_rate": 2.080852437980174e-06, "loss": 0.3141, "step": 23293 }, { "epoch": 2.1859984984984986, "grad_norm": 1.129599426484906, "learning_rate": 2.080409199228342e-06, "loss": 0.3082, "step": 23294 }, { "epoch": 2.1860923423423424, "grad_norm": 1.0871968183057579, "learning_rate": 2.079965995287137e-06, "loss": 0.2686, "step": 23295 }, { "epoch": 2.186186186186186, "grad_norm": 1.0974237673613048, "learning_rate": 2.079522826161843e-06, "loss": 0.2955, "step": 23296 }, { "epoch": 2.18628003003003, "grad_norm": 1.1846251961508514, "learning_rate": 2.079079691857739e-06, "loss": 0.2964, "step": 23297 }, { "epoch": 2.186373873873874, "grad_norm": 1.0970987562924353, "learning_rate": 2.078636592380113e-06, "loss": 0.3137, "step": 23298 }, { "epoch": 2.1864677177177176, "grad_norm": 1.3636604308295985, "learning_rate": 2.078193527734247e-06, "loss": 0.2981, "step": 23299 }, { "epoch": 2.1865615615615615, "grad_norm": 1.0236314511979405, "learning_rate": 2.077750497925421e-06, "loss": 0.2897, "step": 23300 }, { "epoch": 2.1866554054054053, "grad_norm": 1.147320870333312, "learning_rate": 2.0773075029589217e-06, "loss": 0.3113, "step": 23301 }, { "epoch": 2.186749249249249, "grad_norm": 1.1373730916748446, "learning_rate": 2.0768645428400296e-06, "loss": 0.3177, "step": 23302 }, { "epoch": 2.186843093093093, "grad_norm": 1.0054173937566064, "learning_rate": 2.0764216175740233e-06, "loss": 0.2605, "step": 23303 }, { "epoch": 2.186936936936937, "grad_norm": 1.4924819727848455, "learning_rate": 2.075978727166188e-06, "loss": 0.3055, "step": 23304 }, { "epoch": 2.187030780780781, "grad_norm": 1.0541440138009441, "learning_rate": 2.0755358716218027e-06, "loss": 0.3232, "step": 23305 }, { "epoch": 2.1871246246246248, "grad_norm": 1.0419889921178576, "learning_rate": 2.0750930509461454e-06, "loss": 0.3277, "step": 23306 }, { "epoch": 2.1872184684684686, "grad_norm": 1.0110030160389885, "learning_rate": 2.0746502651445e-06, "loss": 0.3303, "step": 23307 }, { "epoch": 2.1873123123123124, "grad_norm": 1.1737661151465386, "learning_rate": 2.0742075142221436e-06, "loss": 0.3558, "step": 23308 }, { "epoch": 2.187406156156156, "grad_norm": 1.0604483003431664, "learning_rate": 2.073764798184355e-06, "loss": 0.2888, "step": 23309 }, { "epoch": 2.1875, "grad_norm": 1.1689416170212663, "learning_rate": 2.0733221170364133e-06, "loss": 0.3213, "step": 23310 }, { "epoch": 2.187593843843844, "grad_norm": 1.338397314859886, "learning_rate": 2.0728794707835965e-06, "loss": 0.3102, "step": 23311 }, { "epoch": 2.1876876876876876, "grad_norm": 1.209746711721319, "learning_rate": 2.0724368594311807e-06, "loss": 0.2926, "step": 23312 }, { "epoch": 2.1877815315315314, "grad_norm": 1.2518839670136908, "learning_rate": 2.0719942829844463e-06, "loss": 0.3513, "step": 23313 }, { "epoch": 2.1878753753753752, "grad_norm": 1.1529126841308148, "learning_rate": 2.0715517414486686e-06, "loss": 0.2845, "step": 23314 }, { "epoch": 2.187969219219219, "grad_norm": 1.1432705651707702, "learning_rate": 2.0711092348291226e-06, "loss": 0.2892, "step": 23315 }, { "epoch": 2.188063063063063, "grad_norm": 1.330059919353408, "learning_rate": 2.070666763131088e-06, "loss": 0.3378, "step": 23316 }, { "epoch": 2.188156906906907, "grad_norm": 1.0944804231030112, "learning_rate": 2.0702243263598374e-06, "loss": 0.3078, "step": 23317 }, { "epoch": 2.188250750750751, "grad_norm": 1.1387761085352572, "learning_rate": 2.069781924520646e-06, "loss": 0.3376, "step": 23318 }, { "epoch": 2.1883445945945947, "grad_norm": 1.5382800558735785, "learning_rate": 2.0693395576187907e-06, "loss": 0.311, "step": 23319 }, { "epoch": 2.1884384384384385, "grad_norm": 1.0904523146236071, "learning_rate": 2.0688972256595454e-06, "loss": 0.2935, "step": 23320 }, { "epoch": 2.1885322822822824, "grad_norm": 1.3591937423402332, "learning_rate": 2.0684549286481832e-06, "loss": 0.3196, "step": 23321 }, { "epoch": 2.188626126126126, "grad_norm": 1.0829063469526243, "learning_rate": 2.068012666589978e-06, "loss": 0.3412, "step": 23322 }, { "epoch": 2.18871996996997, "grad_norm": 1.0471316477325154, "learning_rate": 2.067570439490203e-06, "loss": 0.3113, "step": 23323 }, { "epoch": 2.188813813813814, "grad_norm": 1.1707767945264953, "learning_rate": 2.067128247354129e-06, "loss": 0.2843, "step": 23324 }, { "epoch": 2.1889076576576576, "grad_norm": 1.1057502727903985, "learning_rate": 2.066686090187031e-06, "loss": 0.299, "step": 23325 }, { "epoch": 2.1890015015015014, "grad_norm": 0.9803180990073562, "learning_rate": 2.066243967994181e-06, "loss": 0.3434, "step": 23326 }, { "epoch": 2.1890953453453452, "grad_norm": 1.1637156291035138, "learning_rate": 2.0658018807808473e-06, "loss": 0.3279, "step": 23327 }, { "epoch": 2.189189189189189, "grad_norm": 1.4577813219713118, "learning_rate": 2.0653598285523046e-06, "loss": 0.3203, "step": 23328 }, { "epoch": 2.189283033033033, "grad_norm": 1.042576964366707, "learning_rate": 2.0649178113138226e-06, "loss": 0.3211, "step": 23329 }, { "epoch": 2.189376876876877, "grad_norm": 1.1428836175242938, "learning_rate": 2.064475829070669e-06, "loss": 0.3145, "step": 23330 }, { "epoch": 2.189470720720721, "grad_norm": 1.1097256136237472, "learning_rate": 2.0640338818281175e-06, "loss": 0.3181, "step": 23331 }, { "epoch": 2.1895645645645647, "grad_norm": 1.075681305697788, "learning_rate": 2.0635919695914353e-06, "loss": 0.2756, "step": 23332 }, { "epoch": 2.1896584084084085, "grad_norm": 1.175711938766207, "learning_rate": 2.0631500923658914e-06, "loss": 0.3076, "step": 23333 }, { "epoch": 2.1897522522522523, "grad_norm": 0.9856779943045858, "learning_rate": 2.062708250156755e-06, "loss": 0.3419, "step": 23334 }, { "epoch": 2.189846096096096, "grad_norm": 1.220792722342683, "learning_rate": 2.062266442969294e-06, "loss": 0.2878, "step": 23335 }, { "epoch": 2.18993993993994, "grad_norm": 1.0372465648288238, "learning_rate": 2.0618246708087735e-06, "loss": 0.3068, "step": 23336 }, { "epoch": 2.1900337837837838, "grad_norm": 1.6886768573422837, "learning_rate": 2.0613829336804657e-06, "loss": 0.3329, "step": 23337 }, { "epoch": 2.1901276276276276, "grad_norm": 1.1572574162184994, "learning_rate": 2.0609412315896334e-06, "loss": 0.3012, "step": 23338 }, { "epoch": 2.1902214714714714, "grad_norm": 1.0415609484151225, "learning_rate": 2.0604995645415456e-06, "loss": 0.3312, "step": 23339 }, { "epoch": 2.190315315315315, "grad_norm": 1.0715859975403463, "learning_rate": 2.060057932541468e-06, "loss": 0.339, "step": 23340 }, { "epoch": 2.190409159159159, "grad_norm": 1.050103622515277, "learning_rate": 2.059616335594664e-06, "loss": 0.281, "step": 23341 }, { "epoch": 2.190503003003003, "grad_norm": 1.1722223814330412, "learning_rate": 2.059174773706402e-06, "loss": 0.2788, "step": 23342 }, { "epoch": 2.190596846846847, "grad_norm": 0.9986302085830426, "learning_rate": 2.0587332468819455e-06, "loss": 0.317, "step": 23343 }, { "epoch": 2.190690690690691, "grad_norm": 1.0641836615431297, "learning_rate": 2.0582917551265587e-06, "loss": 0.2869, "step": 23344 }, { "epoch": 2.1907845345345347, "grad_norm": 1.0485468976117043, "learning_rate": 2.057850298445505e-06, "loss": 0.3557, "step": 23345 }, { "epoch": 2.1908783783783785, "grad_norm": 1.0738957610115962, "learning_rate": 2.0574088768440493e-06, "loss": 0.3269, "step": 23346 }, { "epoch": 2.1909722222222223, "grad_norm": 1.3270181874196956, "learning_rate": 2.0569674903274515e-06, "loss": 0.3271, "step": 23347 }, { "epoch": 2.191066066066066, "grad_norm": 2.385673011926756, "learning_rate": 2.0565261389009786e-06, "loss": 0.3249, "step": 23348 }, { "epoch": 2.19115990990991, "grad_norm": 1.0186716492589263, "learning_rate": 2.056084822569891e-06, "loss": 0.3333, "step": 23349 }, { "epoch": 2.1912537537537538, "grad_norm": 1.0916759050274967, "learning_rate": 2.0556435413394486e-06, "loss": 0.3112, "step": 23350 }, { "epoch": 2.1913475975975976, "grad_norm": 1.0561147948124496, "learning_rate": 2.055202295214917e-06, "loss": 0.3567, "step": 23351 }, { "epoch": 2.1914414414414414, "grad_norm": 1.11968052358273, "learning_rate": 2.0547610842015545e-06, "loss": 0.2892, "step": 23352 }, { "epoch": 2.191535285285285, "grad_norm": 1.485737851833265, "learning_rate": 2.054319908304621e-06, "loss": 0.3137, "step": 23353 }, { "epoch": 2.191629129129129, "grad_norm": 1.070627673190989, "learning_rate": 2.053878767529379e-06, "loss": 0.3689, "step": 23354 }, { "epoch": 2.191722972972973, "grad_norm": 1.676320221531028, "learning_rate": 2.053437661881088e-06, "loss": 0.3064, "step": 23355 }, { "epoch": 2.1918168168168166, "grad_norm": 1.0416924070025522, "learning_rate": 2.052996591365006e-06, "loss": 0.2923, "step": 23356 }, { "epoch": 2.1919106606606604, "grad_norm": 2.009466633155866, "learning_rate": 2.052555555986393e-06, "loss": 0.2943, "step": 23357 }, { "epoch": 2.1920045045045047, "grad_norm": 1.051202384909908, "learning_rate": 2.052114555750507e-06, "loss": 0.3335, "step": 23358 }, { "epoch": 2.1920983483483485, "grad_norm": 1.2210871359044198, "learning_rate": 2.0516735906626033e-06, "loss": 0.3392, "step": 23359 }, { "epoch": 2.1921921921921923, "grad_norm": 1.044763289985998, "learning_rate": 2.0512326607279446e-06, "loss": 0.2686, "step": 23360 }, { "epoch": 2.192286036036036, "grad_norm": 0.957373023226531, "learning_rate": 2.0507917659517868e-06, "loss": 0.3089, "step": 23361 }, { "epoch": 2.19237987987988, "grad_norm": 1.2205637949860617, "learning_rate": 2.0503509063393835e-06, "loss": 0.3233, "step": 23362 }, { "epoch": 2.1924737237237237, "grad_norm": 1.175173960863761, "learning_rate": 2.049910081895995e-06, "loss": 0.3212, "step": 23363 }, { "epoch": 2.1925675675675675, "grad_norm": 1.119999258549841, "learning_rate": 2.049469292626876e-06, "loss": 0.316, "step": 23364 }, { "epoch": 2.1926614114114114, "grad_norm": 1.062186229354117, "learning_rate": 2.04902853853728e-06, "loss": 0.326, "step": 23365 }, { "epoch": 2.192755255255255, "grad_norm": 1.1564275453631947, "learning_rate": 2.048587819632466e-06, "loss": 0.3274, "step": 23366 }, { "epoch": 2.192849099099099, "grad_norm": 1.1379684967523411, "learning_rate": 2.048147135917687e-06, "loss": 0.3146, "step": 23367 }, { "epoch": 2.192942942942943, "grad_norm": 1.0159125400293625, "learning_rate": 2.0477064873981968e-06, "loss": 0.3178, "step": 23368 }, { "epoch": 2.1930367867867866, "grad_norm": 1.1131074952434659, "learning_rate": 2.0472658740792496e-06, "loss": 0.3806, "step": 23369 }, { "epoch": 2.1931306306306304, "grad_norm": 0.9960655883524074, "learning_rate": 2.046825295966099e-06, "loss": 0.3564, "step": 23370 }, { "epoch": 2.1932244744744747, "grad_norm": 0.9966190935728908, "learning_rate": 2.0463847530639964e-06, "loss": 0.3013, "step": 23371 }, { "epoch": 2.1933183183183185, "grad_norm": 0.895561258270834, "learning_rate": 2.0459442453781973e-06, "loss": 0.3117, "step": 23372 }, { "epoch": 2.1934121621621623, "grad_norm": 1.0669760817249918, "learning_rate": 2.0455037729139526e-06, "loss": 0.2978, "step": 23373 }, { "epoch": 2.193506006006006, "grad_norm": 1.0261806266381226, "learning_rate": 2.045063335676513e-06, "loss": 0.3427, "step": 23374 }, { "epoch": 2.19359984984985, "grad_norm": 0.9865822342968384, "learning_rate": 2.0446229336711325e-06, "loss": 0.3278, "step": 23375 }, { "epoch": 2.1936936936936937, "grad_norm": 1.1116089997368601, "learning_rate": 2.0441825669030607e-06, "loss": 0.3291, "step": 23376 }, { "epoch": 2.1937875375375375, "grad_norm": 1.0835022442488953, "learning_rate": 2.043742235377546e-06, "loss": 0.314, "step": 23377 }, { "epoch": 2.1938813813813813, "grad_norm": 0.9844452123722219, "learning_rate": 2.0433019390998422e-06, "loss": 0.331, "step": 23378 }, { "epoch": 2.193975225225225, "grad_norm": 1.1452138803627039, "learning_rate": 2.0428616780751975e-06, "loss": 0.3177, "step": 23379 }, { "epoch": 2.194069069069069, "grad_norm": 1.3798237974155048, "learning_rate": 2.0424214523088613e-06, "loss": 0.303, "step": 23380 }, { "epoch": 2.1941629129129128, "grad_norm": 1.0108967717803579, "learning_rate": 2.041981261806082e-06, "loss": 0.3155, "step": 23381 }, { "epoch": 2.1942567567567566, "grad_norm": 0.9624759301327663, "learning_rate": 2.0415411065721086e-06, "loss": 0.3357, "step": 23382 }, { "epoch": 2.1943506006006004, "grad_norm": 1.0750135467710042, "learning_rate": 2.0411009866121865e-06, "loss": 0.3314, "step": 23383 }, { "epoch": 2.1944444444444446, "grad_norm": 1.0689244799680995, "learning_rate": 2.040660901931568e-06, "loss": 0.3465, "step": 23384 }, { "epoch": 2.1945382882882885, "grad_norm": 1.2960450917953925, "learning_rate": 2.040220852535497e-06, "loss": 0.3434, "step": 23385 }, { "epoch": 2.1946321321321323, "grad_norm": 1.1124652634670418, "learning_rate": 2.03978083842922e-06, "loss": 0.3461, "step": 23386 }, { "epoch": 2.194725975975976, "grad_norm": 1.0666019117524839, "learning_rate": 2.0393408596179857e-06, "loss": 0.3435, "step": 23387 }, { "epoch": 2.19481981981982, "grad_norm": 1.0047462691116806, "learning_rate": 2.0389009161070396e-06, "loss": 0.3098, "step": 23388 }, { "epoch": 2.1949136636636637, "grad_norm": 0.9956767269412043, "learning_rate": 2.0384610079016235e-06, "loss": 0.3064, "step": 23389 }, { "epoch": 2.1950075075075075, "grad_norm": 1.088340861661486, "learning_rate": 2.0380211350069884e-06, "loss": 0.314, "step": 23390 }, { "epoch": 2.1951013513513513, "grad_norm": 1.1312745868465428, "learning_rate": 2.037581297428375e-06, "loss": 0.2994, "step": 23391 }, { "epoch": 2.195195195195195, "grad_norm": 1.0995271974313832, "learning_rate": 2.037141495171029e-06, "loss": 0.2917, "step": 23392 }, { "epoch": 2.195289039039039, "grad_norm": 1.1647797870357925, "learning_rate": 2.036701728240193e-06, "loss": 0.2892, "step": 23393 }, { "epoch": 2.1953828828828827, "grad_norm": 1.2341498845778849, "learning_rate": 2.0362619966411113e-06, "loss": 0.3257, "step": 23394 }, { "epoch": 2.1954767267267266, "grad_norm": 0.9747955986964829, "learning_rate": 2.035822300379025e-06, "loss": 0.3418, "step": 23395 }, { "epoch": 2.1955705705705704, "grad_norm": 0.9907423276434052, "learning_rate": 2.03538263945918e-06, "loss": 0.3335, "step": 23396 }, { "epoch": 2.1956644144144146, "grad_norm": 1.3098097138598828, "learning_rate": 2.034943013886817e-06, "loss": 0.3188, "step": 23397 }, { "epoch": 2.1957582582582584, "grad_norm": 1.61086954451243, "learning_rate": 2.0345034236671752e-06, "loss": 0.2867, "step": 23398 }, { "epoch": 2.1958521021021022, "grad_norm": 1.2643624728767069, "learning_rate": 2.0340638688055003e-06, "loss": 0.3261, "step": 23399 }, { "epoch": 2.195945945945946, "grad_norm": 1.1138324218611666, "learning_rate": 2.033624349307031e-06, "loss": 0.3423, "step": 23400 }, { "epoch": 2.19603978978979, "grad_norm": 1.1013969368295358, "learning_rate": 2.0331848651770052e-06, "loss": 0.3116, "step": 23401 }, { "epoch": 2.1961336336336337, "grad_norm": 0.8816942549085511, "learning_rate": 2.032745416420668e-06, "loss": 0.3012, "step": 23402 }, { "epoch": 2.1962274774774775, "grad_norm": 0.9824453777144556, "learning_rate": 2.0323060030432557e-06, "loss": 0.3064, "step": 23403 }, { "epoch": 2.1963213213213213, "grad_norm": 1.3947137550026083, "learning_rate": 2.031866625050007e-06, "loss": 0.3183, "step": 23404 }, { "epoch": 2.196415165165165, "grad_norm": 1.2853001604424612, "learning_rate": 2.031427282446165e-06, "loss": 0.3076, "step": 23405 }, { "epoch": 2.196509009009009, "grad_norm": 1.2951042796658574, "learning_rate": 2.030987975236963e-06, "loss": 0.2985, "step": 23406 }, { "epoch": 2.1966028528528527, "grad_norm": 3.1833793594199324, "learning_rate": 2.03054870342764e-06, "loss": 0.3415, "step": 23407 }, { "epoch": 2.1966966966966965, "grad_norm": 1.1029916172936174, "learning_rate": 2.030109467023435e-06, "loss": 0.3159, "step": 23408 }, { "epoch": 2.1967905405405403, "grad_norm": 1.2472142658965768, "learning_rate": 2.029670266029585e-06, "loss": 0.3118, "step": 23409 }, { "epoch": 2.1968843843843846, "grad_norm": 1.1231971978121678, "learning_rate": 2.029231100451324e-06, "loss": 0.3574, "step": 23410 }, { "epoch": 2.1969782282282284, "grad_norm": 1.063191849504287, "learning_rate": 2.028791970293892e-06, "loss": 0.2334, "step": 23411 }, { "epoch": 2.1970720720720722, "grad_norm": 1.3186767103081665, "learning_rate": 2.0283528755625232e-06, "loss": 0.2673, "step": 23412 }, { "epoch": 2.197165915915916, "grad_norm": 1.2817798342941245, "learning_rate": 2.027913816262451e-06, "loss": 0.2699, "step": 23413 }, { "epoch": 2.19725975975976, "grad_norm": 2.2560818930097213, "learning_rate": 2.0274747923989137e-06, "loss": 0.352, "step": 23414 }, { "epoch": 2.1973536036036037, "grad_norm": 1.1023930737302545, "learning_rate": 2.027035803977144e-06, "loss": 0.3141, "step": 23415 }, { "epoch": 2.1974474474474475, "grad_norm": 1.270514886492366, "learning_rate": 2.0265968510023754e-06, "loss": 0.3668, "step": 23416 }, { "epoch": 2.1975412912912913, "grad_norm": 1.0327288642631134, "learning_rate": 2.0261579334798437e-06, "loss": 0.3082, "step": 23417 }, { "epoch": 2.197635135135135, "grad_norm": 1.163831511007405, "learning_rate": 2.025719051414781e-06, "loss": 0.3195, "step": 23418 }, { "epoch": 2.197728978978979, "grad_norm": 1.1623065196859672, "learning_rate": 2.0252802048124203e-06, "loss": 0.3103, "step": 23419 }, { "epoch": 2.1978228228228227, "grad_norm": 2.0225193834434716, "learning_rate": 2.0248413936779937e-06, "loss": 0.3068, "step": 23420 }, { "epoch": 2.1979166666666665, "grad_norm": 0.9501836652878366, "learning_rate": 2.0244026180167336e-06, "loss": 0.307, "step": 23421 }, { "epoch": 2.1980105105105103, "grad_norm": 1.122025276455994, "learning_rate": 2.023963877833869e-06, "loss": 0.3198, "step": 23422 }, { "epoch": 2.1981043543543546, "grad_norm": 0.931173359047057, "learning_rate": 2.0235251731346354e-06, "loss": 0.2679, "step": 23423 }, { "epoch": 2.1981981981981984, "grad_norm": 1.2308899641550044, "learning_rate": 2.0230865039242616e-06, "loss": 0.3373, "step": 23424 }, { "epoch": 2.198292042042042, "grad_norm": 1.6343945137850773, "learning_rate": 2.0226478702079755e-06, "loss": 0.3026, "step": 23425 }, { "epoch": 2.198385885885886, "grad_norm": 1.012928717651014, "learning_rate": 2.0222092719910113e-06, "loss": 0.2821, "step": 23426 }, { "epoch": 2.19847972972973, "grad_norm": 3.7959587301917876, "learning_rate": 2.021770709278597e-06, "loss": 0.3128, "step": 23427 }, { "epoch": 2.1985735735735736, "grad_norm": 1.0724216696178945, "learning_rate": 2.0213321820759586e-06, "loss": 0.3072, "step": 23428 }, { "epoch": 2.1986674174174174, "grad_norm": 1.2399145265613816, "learning_rate": 2.0208936903883292e-06, "loss": 0.3418, "step": 23429 }, { "epoch": 2.1987612612612613, "grad_norm": 1.1317671817036021, "learning_rate": 2.020455234220935e-06, "loss": 0.3216, "step": 23430 }, { "epoch": 2.198855105105105, "grad_norm": 1.113701784844924, "learning_rate": 2.0200168135790034e-06, "loss": 0.3514, "step": 23431 }, { "epoch": 2.198948948948949, "grad_norm": 1.4793117524408343, "learning_rate": 2.0195784284677623e-06, "loss": 0.3238, "step": 23432 }, { "epoch": 2.1990427927927927, "grad_norm": 1.0610701186916045, "learning_rate": 2.019140078892438e-06, "loss": 0.3329, "step": 23433 }, { "epoch": 2.1991366366366365, "grad_norm": 1.1720234387199413, "learning_rate": 2.0187017648582564e-06, "loss": 0.3321, "step": 23434 }, { "epoch": 2.1992304804804803, "grad_norm": 1.225850351051054, "learning_rate": 2.0182634863704453e-06, "loss": 0.3312, "step": 23435 }, { "epoch": 2.199324324324324, "grad_norm": 1.0954652538124416, "learning_rate": 2.0178252434342303e-06, "loss": 0.347, "step": 23436 }, { "epoch": 2.1994181681681684, "grad_norm": 1.2242046488274554, "learning_rate": 2.0173870360548343e-06, "loss": 0.3325, "step": 23437 }, { "epoch": 2.199512012012012, "grad_norm": 1.2836977979943403, "learning_rate": 2.0169488642374847e-06, "loss": 0.3485, "step": 23438 }, { "epoch": 2.199605855855856, "grad_norm": 1.0761721288726054, "learning_rate": 2.016510727987405e-06, "loss": 0.33, "step": 23439 }, { "epoch": 2.1996996996997, "grad_norm": 0.9434120430869728, "learning_rate": 2.016072627309818e-06, "loss": 0.3123, "step": 23440 }, { "epoch": 2.1997935435435436, "grad_norm": 1.0366646909468644, "learning_rate": 2.0156345622099494e-06, "loss": 0.2653, "step": 23441 }, { "epoch": 2.1998873873873874, "grad_norm": 0.9615424935342599, "learning_rate": 2.015196532693022e-06, "loss": 0.3402, "step": 23442 }, { "epoch": 2.1999812312312312, "grad_norm": 0.9243279908778754, "learning_rate": 2.0147585387642566e-06, "loss": 0.2945, "step": 23443 }, { "epoch": 2.200075075075075, "grad_norm": 0.9970562747197419, "learning_rate": 2.014320580428877e-06, "loss": 0.3103, "step": 23444 }, { "epoch": 2.200168918918919, "grad_norm": 1.1440371815246189, "learning_rate": 2.013882657692104e-06, "loss": 0.3272, "step": 23445 }, { "epoch": 2.2002627627627627, "grad_norm": 1.2293496221495046, "learning_rate": 2.0134447705591583e-06, "loss": 0.3456, "step": 23446 }, { "epoch": 2.2003566066066065, "grad_norm": 1.1396270987817954, "learning_rate": 2.0130069190352635e-06, "loss": 0.2933, "step": 23447 }, { "epoch": 2.2004504504504503, "grad_norm": 0.9601101921952382, "learning_rate": 2.012569103125639e-06, "loss": 0.2907, "step": 23448 }, { "epoch": 2.200544294294294, "grad_norm": 0.9793634787861318, "learning_rate": 2.012131322835503e-06, "loss": 0.3419, "step": 23449 }, { "epoch": 2.200638138138138, "grad_norm": 1.0322227963812431, "learning_rate": 2.0116935781700782e-06, "loss": 0.2918, "step": 23450 }, { "epoch": 2.200731981981982, "grad_norm": 1.1515831249209199, "learning_rate": 2.011255869134583e-06, "loss": 0.288, "step": 23451 }, { "epoch": 2.200825825825826, "grad_norm": 1.1400100479545345, "learning_rate": 2.010818195734234e-06, "loss": 0.2999, "step": 23452 }, { "epoch": 2.20091966966967, "grad_norm": 1.104667298185522, "learning_rate": 2.010380557974253e-06, "loss": 0.3369, "step": 23453 }, { "epoch": 2.2010135135135136, "grad_norm": 1.095521855812178, "learning_rate": 2.0099429558598567e-06, "loss": 0.2951, "step": 23454 }, { "epoch": 2.2011073573573574, "grad_norm": 1.2284719323584505, "learning_rate": 2.0095053893962623e-06, "loss": 0.2872, "step": 23455 }, { "epoch": 2.201201201201201, "grad_norm": 1.0565013597879733, "learning_rate": 2.0090678585886875e-06, "loss": 0.2686, "step": 23456 }, { "epoch": 2.201295045045045, "grad_norm": 1.186111729925012, "learning_rate": 2.008630363442348e-06, "loss": 0.358, "step": 23457 }, { "epoch": 2.201388888888889, "grad_norm": 1.2274812019823809, "learning_rate": 2.0081929039624593e-06, "loss": 0.3152, "step": 23458 }, { "epoch": 2.2014827327327327, "grad_norm": 1.0599446449671477, "learning_rate": 2.0077554801542404e-06, "loss": 0.2999, "step": 23459 }, { "epoch": 2.2015765765765765, "grad_norm": 1.060268636446421, "learning_rate": 2.0073180920229053e-06, "loss": 0.2985, "step": 23460 }, { "epoch": 2.2016704204204203, "grad_norm": 0.9988651021414764, "learning_rate": 2.0068807395736665e-06, "loss": 0.3172, "step": 23461 }, { "epoch": 2.201764264264264, "grad_norm": 1.2100114257225179, "learning_rate": 2.0064434228117436e-06, "loss": 0.3217, "step": 23462 }, { "epoch": 2.201858108108108, "grad_norm": 1.0739141117649629, "learning_rate": 2.006006141742348e-06, "loss": 0.3154, "step": 23463 }, { "epoch": 2.201951951951952, "grad_norm": 1.2798512597012908, "learning_rate": 2.005568896370691e-06, "loss": 0.296, "step": 23464 }, { "epoch": 2.202045795795796, "grad_norm": 1.4693177956279453, "learning_rate": 2.0051316867019905e-06, "loss": 0.3085, "step": 23465 }, { "epoch": 2.2021396396396398, "grad_norm": 1.2146875732596547, "learning_rate": 2.0046945127414575e-06, "loss": 0.3048, "step": 23466 }, { "epoch": 2.2022334834834836, "grad_norm": 1.2228468069890053, "learning_rate": 2.0042573744943045e-06, "loss": 0.2626, "step": 23467 }, { "epoch": 2.2023273273273274, "grad_norm": 1.6795770388874394, "learning_rate": 2.003820271965743e-06, "loss": 0.2955, "step": 23468 }, { "epoch": 2.202421171171171, "grad_norm": 0.9409130317381259, "learning_rate": 2.003383205160985e-06, "loss": 0.2868, "step": 23469 }, { "epoch": 2.202515015015015, "grad_norm": 1.1118706287471434, "learning_rate": 2.0029461740852402e-06, "loss": 0.3126, "step": 23470 }, { "epoch": 2.202608858858859, "grad_norm": 1.1785052862250256, "learning_rate": 2.002509178743723e-06, "loss": 0.3235, "step": 23471 }, { "epoch": 2.2027027027027026, "grad_norm": 1.3432805716613092, "learning_rate": 2.0020722191416413e-06, "loss": 0.3297, "step": 23472 }, { "epoch": 2.2027965465465464, "grad_norm": 1.9847148814588973, "learning_rate": 2.001635295284203e-06, "loss": 0.3206, "step": 23473 }, { "epoch": 2.2028903903903903, "grad_norm": 1.866542194372775, "learning_rate": 2.0011984071766227e-06, "loss": 0.3294, "step": 23474 }, { "epoch": 2.202984234234234, "grad_norm": 0.9437708679261899, "learning_rate": 2.0007615548241068e-06, "loss": 0.2922, "step": 23475 }, { "epoch": 2.203078078078078, "grad_norm": 1.041976048209418, "learning_rate": 2.000324738231861e-06, "loss": 0.3393, "step": 23476 }, { "epoch": 2.203171921921922, "grad_norm": 1.596336663025999, "learning_rate": 1.9998879574050994e-06, "loss": 0.3061, "step": 23477 }, { "epoch": 2.203265765765766, "grad_norm": 1.11661501519616, "learning_rate": 1.999451212349026e-06, "loss": 0.3023, "step": 23478 }, { "epoch": 2.2033596096096097, "grad_norm": 1.5427781095512683, "learning_rate": 1.9990145030688494e-06, "loss": 0.3318, "step": 23479 }, { "epoch": 2.2034534534534536, "grad_norm": 2.4146197579270283, "learning_rate": 1.9985778295697755e-06, "loss": 0.3231, "step": 23480 }, { "epoch": 2.2035472972972974, "grad_norm": 1.4651796092328668, "learning_rate": 1.998141191857012e-06, "loss": 0.337, "step": 23481 }, { "epoch": 2.203641141141141, "grad_norm": 1.1679545258998343, "learning_rate": 1.997704589935762e-06, "loss": 0.3225, "step": 23482 }, { "epoch": 2.203734984984985, "grad_norm": 1.0661004159404057, "learning_rate": 1.9972680238112357e-06, "loss": 0.3038, "step": 23483 }, { "epoch": 2.203828828828829, "grad_norm": 1.1682896966289202, "learning_rate": 1.9968314934886362e-06, "loss": 0.3006, "step": 23484 }, { "epoch": 2.2039226726726726, "grad_norm": 0.9699593989456965, "learning_rate": 1.9963949989731667e-06, "loss": 0.324, "step": 23485 }, { "epoch": 2.2040165165165164, "grad_norm": 1.111125910682532, "learning_rate": 1.9959585402700345e-06, "loss": 0.3403, "step": 23486 }, { "epoch": 2.2041103603603602, "grad_norm": 1.3312199358771097, "learning_rate": 1.995522117384442e-06, "loss": 0.3174, "step": 23487 }, { "epoch": 2.204204204204204, "grad_norm": 1.5009524273517851, "learning_rate": 1.9950857303215916e-06, "loss": 0.3104, "step": 23488 }, { "epoch": 2.204298048048048, "grad_norm": 1.180454651353779, "learning_rate": 1.9946493790866896e-06, "loss": 0.3391, "step": 23489 }, { "epoch": 2.204391891891892, "grad_norm": 1.1466976730585718, "learning_rate": 1.994213063684936e-06, "loss": 0.2938, "step": 23490 }, { "epoch": 2.204485735735736, "grad_norm": 0.9431555249551178, "learning_rate": 1.9937767841215343e-06, "loss": 0.3429, "step": 23491 }, { "epoch": 2.2045795795795797, "grad_norm": 1.1444719194233366, "learning_rate": 1.993340540401686e-06, "loss": 0.3296, "step": 23492 }, { "epoch": 2.2046734234234235, "grad_norm": 1.3372193451585508, "learning_rate": 1.992904332530592e-06, "loss": 0.3076, "step": 23493 }, { "epoch": 2.2047672672672673, "grad_norm": 1.8063759067659222, "learning_rate": 1.9924681605134516e-06, "loss": 0.334, "step": 23494 }, { "epoch": 2.204861111111111, "grad_norm": 1.0181811274888732, "learning_rate": 1.992032024355469e-06, "loss": 0.316, "step": 23495 }, { "epoch": 2.204954954954955, "grad_norm": 1.0165345215833537, "learning_rate": 1.9915959240618433e-06, "loss": 0.3499, "step": 23496 }, { "epoch": 2.205048798798799, "grad_norm": 1.1827121799992877, "learning_rate": 1.991159859637771e-06, "loss": 0.346, "step": 23497 }, { "epoch": 2.2051426426426426, "grad_norm": 1.5777237460902203, "learning_rate": 1.9907238310884557e-06, "loss": 0.2779, "step": 23498 }, { "epoch": 2.2052364864864864, "grad_norm": 0.9593978619582455, "learning_rate": 1.990287838419095e-06, "loss": 0.3292, "step": 23499 }, { "epoch": 2.20533033033033, "grad_norm": 1.312138749275098, "learning_rate": 1.989851881634884e-06, "loss": 0.2768, "step": 23500 }, { "epoch": 2.205424174174174, "grad_norm": 1.0572103447617835, "learning_rate": 1.9894159607410258e-06, "loss": 0.3398, "step": 23501 }, { "epoch": 2.205518018018018, "grad_norm": 7.229306374348022, "learning_rate": 1.9889800757427147e-06, "loss": 0.339, "step": 23502 }, { "epoch": 2.205611861861862, "grad_norm": 1.1900684118415172, "learning_rate": 1.988544226645149e-06, "loss": 0.3149, "step": 23503 }, { "epoch": 2.205705705705706, "grad_norm": 1.1738426453296562, "learning_rate": 1.9881084134535243e-06, "loss": 0.3241, "step": 23504 }, { "epoch": 2.2057995495495497, "grad_norm": 1.0033798797785212, "learning_rate": 1.987672636173038e-06, "loss": 0.3292, "step": 23505 }, { "epoch": 2.2058933933933935, "grad_norm": 1.0857054486101256, "learning_rate": 1.987236894808884e-06, "loss": 0.3132, "step": 23506 }, { "epoch": 2.2059872372372373, "grad_norm": 1.1236706908267067, "learning_rate": 1.98680118936626e-06, "loss": 0.3193, "step": 23507 }, { "epoch": 2.206081081081081, "grad_norm": 1.170875755575133, "learning_rate": 1.9863655198503606e-06, "loss": 0.3058, "step": 23508 }, { "epoch": 2.206174924924925, "grad_norm": 1.0492886231148044, "learning_rate": 1.985929886266378e-06, "loss": 0.3188, "step": 23509 }, { "epoch": 2.2062687687687688, "grad_norm": 1.093986025142085, "learning_rate": 1.98549428861951e-06, "loss": 0.2791, "step": 23510 }, { "epoch": 2.2063626126126126, "grad_norm": 1.1207567021228466, "learning_rate": 1.985058726914948e-06, "loss": 0.319, "step": 23511 }, { "epoch": 2.2064564564564564, "grad_norm": 1.2757222004184003, "learning_rate": 1.984623201157884e-06, "loss": 0.3244, "step": 23512 }, { "epoch": 2.2065503003003, "grad_norm": 0.9666841274860887, "learning_rate": 1.9841877113535143e-06, "loss": 0.3326, "step": 23513 }, { "epoch": 2.206644144144144, "grad_norm": 1.4559193683963738, "learning_rate": 1.9837522575070296e-06, "loss": 0.3375, "step": 23514 }, { "epoch": 2.206737987987988, "grad_norm": 1.0617535777755633, "learning_rate": 1.983316839623621e-06, "loss": 0.3753, "step": 23515 }, { "epoch": 2.2068318318318316, "grad_norm": 1.3541351033488092, "learning_rate": 1.9828814577084813e-06, "loss": 0.3075, "step": 23516 }, { "epoch": 2.206925675675676, "grad_norm": 1.1616215815526765, "learning_rate": 1.9824461117668015e-06, "loss": 0.278, "step": 23517 }, { "epoch": 2.2070195195195197, "grad_norm": 1.1494515900937283, "learning_rate": 1.98201080180377e-06, "loss": 0.2818, "step": 23518 }, { "epoch": 2.2071133633633635, "grad_norm": 1.0129915608448954, "learning_rate": 1.9815755278245803e-06, "loss": 0.3477, "step": 23519 }, { "epoch": 2.2072072072072073, "grad_norm": 1.347125084967948, "learning_rate": 1.9811402898344205e-06, "loss": 0.3327, "step": 23520 }, { "epoch": 2.207301051051051, "grad_norm": 1.1162969599995443, "learning_rate": 1.9807050878384786e-06, "loss": 0.304, "step": 23521 }, { "epoch": 2.207394894894895, "grad_norm": 1.1817510254750292, "learning_rate": 1.980269921841948e-06, "loss": 0.3023, "step": 23522 }, { "epoch": 2.2074887387387387, "grad_norm": 1.0235100447990872, "learning_rate": 1.979834791850014e-06, "loss": 0.3184, "step": 23523 }, { "epoch": 2.2075825825825826, "grad_norm": 1.0412708636613677, "learning_rate": 1.9793996978678633e-06, "loss": 0.3062, "step": 23524 }, { "epoch": 2.2076764264264264, "grad_norm": 1.240061781914084, "learning_rate": 1.978964639900687e-06, "loss": 0.304, "step": 23525 }, { "epoch": 2.20777027027027, "grad_norm": 1.0165310492551138, "learning_rate": 1.978529617953671e-06, "loss": 0.2974, "step": 23526 }, { "epoch": 2.207864114114114, "grad_norm": 1.1272709779733836, "learning_rate": 1.9780946320320028e-06, "loss": 0.2436, "step": 23527 }, { "epoch": 2.207957957957958, "grad_norm": 0.9867600428381329, "learning_rate": 1.9776596821408673e-06, "loss": 0.2869, "step": 23528 }, { "epoch": 2.2080518018018016, "grad_norm": 1.0973123530679976, "learning_rate": 1.9772247682854513e-06, "loss": 0.3041, "step": 23529 }, { "epoch": 2.2081456456456454, "grad_norm": 1.108596613850653, "learning_rate": 1.9767898904709374e-06, "loss": 0.263, "step": 23530 }, { "epoch": 2.2082394894894897, "grad_norm": 1.1048853973549944, "learning_rate": 1.9763550487025162e-06, "loss": 0.3334, "step": 23531 }, { "epoch": 2.2083333333333335, "grad_norm": 1.1472554427286232, "learning_rate": 1.9759202429853695e-06, "loss": 0.3116, "step": 23532 }, { "epoch": 2.2084271771771773, "grad_norm": 1.108575325162291, "learning_rate": 1.97548547332468e-06, "loss": 0.2803, "step": 23533 }, { "epoch": 2.208521021021021, "grad_norm": 0.9862803578949962, "learning_rate": 1.9750507397256354e-06, "loss": 0.3051, "step": 23534 }, { "epoch": 2.208614864864865, "grad_norm": 1.1492087524850785, "learning_rate": 1.974616042193416e-06, "loss": 0.3079, "step": 23535 }, { "epoch": 2.2087087087087087, "grad_norm": 1.068623449812071, "learning_rate": 1.974181380733204e-06, "loss": 0.308, "step": 23536 }, { "epoch": 2.2088025525525525, "grad_norm": 1.0460382095151728, "learning_rate": 1.9737467553501856e-06, "loss": 0.3141, "step": 23537 }, { "epoch": 2.2088963963963963, "grad_norm": 1.4233049552562596, "learning_rate": 1.97331216604954e-06, "loss": 0.2849, "step": 23538 }, { "epoch": 2.20899024024024, "grad_norm": 1.075056757529725, "learning_rate": 1.9728776128364487e-06, "loss": 0.3097, "step": 23539 }, { "epoch": 2.209084084084084, "grad_norm": 1.0671911410935493, "learning_rate": 1.9724430957160957e-06, "loss": 0.3071, "step": 23540 }, { "epoch": 2.2091779279279278, "grad_norm": 1.1329475948516243, "learning_rate": 1.9720086146936595e-06, "loss": 0.3067, "step": 23541 }, { "epoch": 2.2092717717717716, "grad_norm": 1.098673522575166, "learning_rate": 1.971574169774321e-06, "loss": 0.2957, "step": 23542 }, { "epoch": 2.2093656156156154, "grad_norm": 1.0849753773832043, "learning_rate": 1.9711397609632606e-06, "loss": 0.2901, "step": 23543 }, { "epoch": 2.2094594594594597, "grad_norm": 1.072458433156256, "learning_rate": 1.9707053882656567e-06, "loss": 0.3231, "step": 23544 }, { "epoch": 2.2095533033033035, "grad_norm": 1.218279401696229, "learning_rate": 1.9702710516866876e-06, "loss": 0.3076, "step": 23545 }, { "epoch": 2.2096471471471473, "grad_norm": 0.994291015233566, "learning_rate": 1.9698367512315354e-06, "loss": 0.2995, "step": 23546 }, { "epoch": 2.209740990990991, "grad_norm": 1.0943125002663077, "learning_rate": 1.969402486905376e-06, "loss": 0.2888, "step": 23547 }, { "epoch": 2.209834834834835, "grad_norm": 1.207341947815469, "learning_rate": 1.968968258713385e-06, "loss": 0.2891, "step": 23548 }, { "epoch": 2.2099286786786787, "grad_norm": 1.0707395026351199, "learning_rate": 1.9685340666607446e-06, "loss": 0.3118, "step": 23549 }, { "epoch": 2.2100225225225225, "grad_norm": 1.0437635563337349, "learning_rate": 1.9680999107526287e-06, "loss": 0.3292, "step": 23550 }, { "epoch": 2.2101163663663663, "grad_norm": 1.244736393976711, "learning_rate": 1.967665790994214e-06, "loss": 0.296, "step": 23551 }, { "epoch": 2.21021021021021, "grad_norm": 1.0893513112273936, "learning_rate": 1.9672317073906773e-06, "loss": 0.3086, "step": 23552 }, { "epoch": 2.210304054054054, "grad_norm": 1.0663288296674793, "learning_rate": 1.966797659947194e-06, "loss": 0.3654, "step": 23553 }, { "epoch": 2.2103978978978978, "grad_norm": 1.0856450117112597, "learning_rate": 1.9663636486689402e-06, "loss": 0.3132, "step": 23554 }, { "epoch": 2.2104917417417416, "grad_norm": 0.9941700835325243, "learning_rate": 1.965929673561089e-06, "loss": 0.303, "step": 23555 }, { "epoch": 2.2105855855855854, "grad_norm": 1.2007345759100547, "learning_rate": 1.9654957346288157e-06, "loss": 0.3324, "step": 23556 }, { "epoch": 2.2106794294294296, "grad_norm": 1.2179571550292998, "learning_rate": 1.9650618318772917e-06, "loss": 0.3296, "step": 23557 }, { "epoch": 2.2107732732732734, "grad_norm": 1.1267680064636305, "learning_rate": 1.9646279653116945e-06, "loss": 0.268, "step": 23558 }, { "epoch": 2.2108671171171173, "grad_norm": 1.0037708703415682, "learning_rate": 1.9641941349371957e-06, "loss": 0.3465, "step": 23559 }, { "epoch": 2.210960960960961, "grad_norm": 0.9961679052532393, "learning_rate": 1.963760340758965e-06, "loss": 0.3409, "step": 23560 }, { "epoch": 2.211054804804805, "grad_norm": 1.5695237489273157, "learning_rate": 1.9633265827821794e-06, "loss": 0.302, "step": 23561 }, { "epoch": 2.2111486486486487, "grad_norm": 1.1630314900374001, "learning_rate": 1.9628928610120085e-06, "loss": 0.3286, "step": 23562 }, { "epoch": 2.2112424924924925, "grad_norm": 1.0047774060509225, "learning_rate": 1.9624591754536214e-06, "loss": 0.2778, "step": 23563 }, { "epoch": 2.2113363363363363, "grad_norm": 1.1766331837729795, "learning_rate": 1.9620255261121934e-06, "loss": 0.3519, "step": 23564 }, { "epoch": 2.21143018018018, "grad_norm": 1.3058070788323917, "learning_rate": 1.961591912992892e-06, "loss": 0.3218, "step": 23565 }, { "epoch": 2.211524024024024, "grad_norm": 1.0176538208007913, "learning_rate": 1.961158336100888e-06, "loss": 0.2996, "step": 23566 }, { "epoch": 2.2116178678678677, "grad_norm": 1.2208265614357332, "learning_rate": 1.9607247954413507e-06, "loss": 0.3189, "step": 23567 }, { "epoch": 2.2117117117117115, "grad_norm": 1.0150892504646092, "learning_rate": 1.9602912910194495e-06, "loss": 0.3224, "step": 23568 }, { "epoch": 2.2118055555555554, "grad_norm": 1.1130488158563168, "learning_rate": 1.959857822840351e-06, "loss": 0.2975, "step": 23569 }, { "epoch": 2.2118993993993996, "grad_norm": 1.0642356571313647, "learning_rate": 1.9594243909092276e-06, "loss": 0.3225, "step": 23570 }, { "epoch": 2.2119932432432434, "grad_norm": 1.0807218435155532, "learning_rate": 1.9589909952312445e-06, "loss": 0.3134, "step": 23571 }, { "epoch": 2.2120870870870872, "grad_norm": 1.0829048123741725, "learning_rate": 1.958557635811569e-06, "loss": 0.2958, "step": 23572 }, { "epoch": 2.212180930930931, "grad_norm": 1.079259386006578, "learning_rate": 1.9581243126553693e-06, "loss": 0.3133, "step": 23573 }, { "epoch": 2.212274774774775, "grad_norm": 1.4257340398071099, "learning_rate": 1.9576910257678115e-06, "loss": 0.3178, "step": 23574 }, { "epoch": 2.2123686186186187, "grad_norm": 1.126670421850755, "learning_rate": 1.9572577751540606e-06, "loss": 0.3492, "step": 23575 }, { "epoch": 2.2124624624624625, "grad_norm": 1.1253338665361918, "learning_rate": 1.9568245608192848e-06, "loss": 0.341, "step": 23576 }, { "epoch": 2.2125563063063063, "grad_norm": 1.0668330028310637, "learning_rate": 1.956391382768648e-06, "loss": 0.34, "step": 23577 }, { "epoch": 2.21265015015015, "grad_norm": 1.1820161728660274, "learning_rate": 1.9559582410073154e-06, "loss": 0.3035, "step": 23578 }, { "epoch": 2.212743993993994, "grad_norm": 1.1617334940914563, "learning_rate": 1.95552513554045e-06, "loss": 0.3295, "step": 23579 }, { "epoch": 2.2128378378378377, "grad_norm": 1.0799336218936821, "learning_rate": 1.9550920663732175e-06, "loss": 0.3381, "step": 23580 }, { "epoch": 2.2129316816816815, "grad_norm": 1.1358853763086607, "learning_rate": 1.9546590335107783e-06, "loss": 0.3411, "step": 23581 }, { "epoch": 2.2130255255255253, "grad_norm": 1.171015073776329, "learning_rate": 1.9542260369583e-06, "loss": 0.2992, "step": 23582 }, { "epoch": 2.2131193693693696, "grad_norm": 1.245027156251079, "learning_rate": 1.953793076720943e-06, "loss": 0.3123, "step": 23583 }, { "epoch": 2.2132132132132134, "grad_norm": 1.0617189999209582, "learning_rate": 1.9533601528038682e-06, "loss": 0.3161, "step": 23584 }, { "epoch": 2.213307057057057, "grad_norm": 1.0128011013560696, "learning_rate": 1.9529272652122407e-06, "loss": 0.3128, "step": 23585 }, { "epoch": 2.213400900900901, "grad_norm": 1.0503804914320043, "learning_rate": 1.952494413951219e-06, "loss": 0.3063, "step": 23586 }, { "epoch": 2.213494744744745, "grad_norm": 1.0388428663284044, "learning_rate": 1.9520615990259646e-06, "loss": 0.2919, "step": 23587 }, { "epoch": 2.2135885885885886, "grad_norm": 1.0960657450402695, "learning_rate": 1.9516288204416396e-06, "loss": 0.3222, "step": 23588 }, { "epoch": 2.2136824324324325, "grad_norm": 0.951959170708095, "learning_rate": 1.9511960782034027e-06, "loss": 0.3083, "step": 23589 }, { "epoch": 2.2137762762762763, "grad_norm": 1.1496531902902578, "learning_rate": 1.9507633723164144e-06, "loss": 0.3308, "step": 23590 }, { "epoch": 2.21387012012012, "grad_norm": 1.0125274379945148, "learning_rate": 1.9503307027858327e-06, "loss": 0.3269, "step": 23591 }, { "epoch": 2.213963963963964, "grad_norm": 1.0764541978467372, "learning_rate": 1.9498980696168173e-06, "loss": 0.3542, "step": 23592 }, { "epoch": 2.2140578078078077, "grad_norm": 1.2092571746738097, "learning_rate": 1.9494654728145245e-06, "loss": 0.2963, "step": 23593 }, { "epoch": 2.2141516516516515, "grad_norm": 1.009171692242086, "learning_rate": 1.9490329123841157e-06, "loss": 0.3147, "step": 23594 }, { "epoch": 2.2142454954954953, "grad_norm": 1.133031921334562, "learning_rate": 1.9486003883307465e-06, "loss": 0.3128, "step": 23595 }, { "epoch": 2.2143393393393396, "grad_norm": 1.1686008499256004, "learning_rate": 1.9481679006595717e-06, "loss": 0.3026, "step": 23596 }, { "epoch": 2.2144331831831834, "grad_norm": 1.0234474697662683, "learning_rate": 1.947735449375753e-06, "loss": 0.2795, "step": 23597 }, { "epoch": 2.214527027027027, "grad_norm": 1.4461019490703761, "learning_rate": 1.947303034484443e-06, "loss": 0.2725, "step": 23598 }, { "epoch": 2.214620870870871, "grad_norm": 1.408163763859837, "learning_rate": 1.9468706559907964e-06, "loss": 0.2934, "step": 23599 }, { "epoch": 2.214714714714715, "grad_norm": 1.1101434220213264, "learning_rate": 1.946438313899972e-06, "loss": 0.2984, "step": 23600 }, { "epoch": 2.2148085585585586, "grad_norm": 1.1239017524120443, "learning_rate": 1.946006008217123e-06, "loss": 0.3323, "step": 23601 }, { "epoch": 2.2149024024024024, "grad_norm": 1.3375499327398361, "learning_rate": 1.9455737389474045e-06, "loss": 0.3703, "step": 23602 }, { "epoch": 2.2149962462462462, "grad_norm": 0.9651196038872815, "learning_rate": 1.9451415060959695e-06, "loss": 0.3166, "step": 23603 }, { "epoch": 2.21509009009009, "grad_norm": 1.0556182606894424, "learning_rate": 1.9447093096679714e-06, "loss": 0.3297, "step": 23604 }, { "epoch": 2.215183933933934, "grad_norm": 1.0885117762677745, "learning_rate": 1.9442771496685624e-06, "loss": 0.3221, "step": 23605 }, { "epoch": 2.2152777777777777, "grad_norm": 1.1402272543926855, "learning_rate": 1.9438450261028984e-06, "loss": 0.3194, "step": 23606 }, { "epoch": 2.2153716216216215, "grad_norm": 1.1011626266865608, "learning_rate": 1.9434129389761297e-06, "loss": 0.2787, "step": 23607 }, { "epoch": 2.2154654654654653, "grad_norm": 1.250337145188169, "learning_rate": 1.9429808882934063e-06, "loss": 0.3311, "step": 23608 }, { "epoch": 2.215559309309309, "grad_norm": 1.1108716119054203, "learning_rate": 1.9425488740598836e-06, "loss": 0.3356, "step": 23609 }, { "epoch": 2.215653153153153, "grad_norm": 1.0087087932148082, "learning_rate": 1.942116896280711e-06, "loss": 0.2995, "step": 23610 }, { "epoch": 2.215746996996997, "grad_norm": 1.0375914412465022, "learning_rate": 1.941684954961036e-06, "loss": 0.3172, "step": 23611 }, { "epoch": 2.215840840840841, "grad_norm": 1.3626756657113042, "learning_rate": 1.9412530501060135e-06, "loss": 0.3135, "step": 23612 }, { "epoch": 2.215934684684685, "grad_norm": 1.1988420495809784, "learning_rate": 1.94082118172079e-06, "loss": 0.3105, "step": 23613 }, { "epoch": 2.2160285285285286, "grad_norm": 1.2491144436882808, "learning_rate": 1.9403893498105164e-06, "loss": 0.2901, "step": 23614 }, { "epoch": 2.2161223723723724, "grad_norm": 1.182837174580659, "learning_rate": 1.9399575543803407e-06, "loss": 0.286, "step": 23615 }, { "epoch": 2.2162162162162162, "grad_norm": 1.234662163325147, "learning_rate": 1.939525795435411e-06, "loss": 0.2875, "step": 23616 }, { "epoch": 2.21631006006006, "grad_norm": 2.525029160958776, "learning_rate": 1.9390940729808733e-06, "loss": 0.3221, "step": 23617 }, { "epoch": 2.216403903903904, "grad_norm": 1.0457332320931987, "learning_rate": 1.9386623870218795e-06, "loss": 0.327, "step": 23618 }, { "epoch": 2.2164977477477477, "grad_norm": 1.1587878608061302, "learning_rate": 1.9382307375635746e-06, "loss": 0.2927, "step": 23619 }, { "epoch": 2.2165915915915915, "grad_norm": 1.5442870176432377, "learning_rate": 1.9377991246111026e-06, "loss": 0.3103, "step": 23620 }, { "epoch": 2.2166854354354353, "grad_norm": 0.9073375253058632, "learning_rate": 1.937367548169614e-06, "loss": 0.2992, "step": 23621 }, { "epoch": 2.216779279279279, "grad_norm": 1.1708889325770309, "learning_rate": 1.9369360082442523e-06, "loss": 0.3038, "step": 23622 }, { "epoch": 2.216873123123123, "grad_norm": 0.9538496191252932, "learning_rate": 1.936504504840162e-06, "loss": 0.2963, "step": 23623 }, { "epoch": 2.216966966966967, "grad_norm": 1.1125263379998958, "learning_rate": 1.93607303796249e-06, "loss": 0.3268, "step": 23624 }, { "epoch": 2.217060810810811, "grad_norm": 1.1890122802504393, "learning_rate": 1.9356416076163804e-06, "loss": 0.3525, "step": 23625 }, { "epoch": 2.2171546546546548, "grad_norm": 1.0319467677438434, "learning_rate": 1.9352102138069766e-06, "loss": 0.2884, "step": 23626 }, { "epoch": 2.2172484984984986, "grad_norm": 1.1115252585720954, "learning_rate": 1.9347788565394215e-06, "loss": 0.3025, "step": 23627 }, { "epoch": 2.2173423423423424, "grad_norm": 1.2826702175351392, "learning_rate": 1.93434753581886e-06, "loss": 0.3492, "step": 23628 }, { "epoch": 2.217436186186186, "grad_norm": 1.1871855687377015, "learning_rate": 1.9339162516504307e-06, "loss": 0.2748, "step": 23629 }, { "epoch": 2.21753003003003, "grad_norm": 1.111764394205749, "learning_rate": 1.933485004039281e-06, "loss": 0.2984, "step": 23630 }, { "epoch": 2.217623873873874, "grad_norm": 1.0853578361396568, "learning_rate": 1.9330537929905502e-06, "loss": 0.3443, "step": 23631 }, { "epoch": 2.2177177177177176, "grad_norm": 1.2215552396147655, "learning_rate": 1.932622618509379e-06, "loss": 0.3065, "step": 23632 }, { "epoch": 2.2178115615615615, "grad_norm": 1.1685202439658904, "learning_rate": 1.93219148060091e-06, "loss": 0.2539, "step": 23633 }, { "epoch": 2.2179054054054053, "grad_norm": 1.309666557558141, "learning_rate": 1.931760379270284e-06, "loss": 0.278, "step": 23634 }, { "epoch": 2.217999249249249, "grad_norm": 0.9856879875668566, "learning_rate": 1.9313293145226383e-06, "loss": 0.3399, "step": 23635 }, { "epoch": 2.218093093093093, "grad_norm": 1.1450775621962623, "learning_rate": 1.9308982863631153e-06, "loss": 0.3331, "step": 23636 }, { "epoch": 2.218186936936937, "grad_norm": 1.3511020389928778, "learning_rate": 1.930467294796854e-06, "loss": 0.3541, "step": 23637 }, { "epoch": 2.218280780780781, "grad_norm": 1.0797769784431313, "learning_rate": 1.930036339828992e-06, "loss": 0.3544, "step": 23638 }, { "epoch": 2.2183746246246248, "grad_norm": 0.9781780066431278, "learning_rate": 1.929605421464668e-06, "loss": 0.29, "step": 23639 }, { "epoch": 2.2184684684684686, "grad_norm": 1.4974009550129426, "learning_rate": 1.92917453970902e-06, "loss": 0.315, "step": 23640 }, { "epoch": 2.2185623123123124, "grad_norm": 1.01465052049409, "learning_rate": 1.928743694567184e-06, "loss": 0.3041, "step": 23641 }, { "epoch": 2.218656156156156, "grad_norm": 1.3153852760311664, "learning_rate": 1.9283128860442996e-06, "loss": 0.2738, "step": 23642 }, { "epoch": 2.21875, "grad_norm": 1.1434324663357505, "learning_rate": 1.927882114145503e-06, "loss": 0.3508, "step": 23643 }, { "epoch": 2.218843843843844, "grad_norm": 1.0031833418050966, "learning_rate": 1.927451378875927e-06, "loss": 0.3311, "step": 23644 }, { "epoch": 2.2189376876876876, "grad_norm": 1.1364531482406588, "learning_rate": 1.9270206802407115e-06, "loss": 0.2911, "step": 23645 }, { "epoch": 2.2190315315315314, "grad_norm": 1.082728957874177, "learning_rate": 1.92659001824499e-06, "loss": 0.2962, "step": 23646 }, { "epoch": 2.2191253753753752, "grad_norm": 2.0951931633222904, "learning_rate": 1.9261593928938967e-06, "loss": 0.3298, "step": 23647 }, { "epoch": 2.219219219219219, "grad_norm": 1.3805501302221057, "learning_rate": 1.9257288041925678e-06, "loss": 0.3296, "step": 23648 }, { "epoch": 2.219313063063063, "grad_norm": 1.2989071460411656, "learning_rate": 1.925298252146136e-06, "loss": 0.3084, "step": 23649 }, { "epoch": 2.219406906906907, "grad_norm": 1.0086664873573565, "learning_rate": 1.924867736759736e-06, "loss": 0.3486, "step": 23650 }, { "epoch": 2.219500750750751, "grad_norm": 1.2586392662427925, "learning_rate": 1.9244372580384985e-06, "loss": 0.3154, "step": 23651 }, { "epoch": 2.2195945945945947, "grad_norm": 1.0639896136388893, "learning_rate": 1.9240068159875586e-06, "loss": 0.2901, "step": 23652 }, { "epoch": 2.2196884384384385, "grad_norm": 1.9504919286329534, "learning_rate": 1.923576410612045e-06, "loss": 0.2934, "step": 23653 }, { "epoch": 2.2197822822822824, "grad_norm": 0.9906621336700383, "learning_rate": 1.923146041917094e-06, "loss": 0.3671, "step": 23654 }, { "epoch": 2.219876126126126, "grad_norm": 1.1984553702830438, "learning_rate": 1.9227157099078346e-06, "loss": 0.3108, "step": 23655 }, { "epoch": 2.21996996996997, "grad_norm": 1.2691786523362538, "learning_rate": 1.922285414589397e-06, "loss": 0.3557, "step": 23656 }, { "epoch": 2.220063813813814, "grad_norm": 1.1560491510377666, "learning_rate": 1.9218551559669133e-06, "loss": 0.3173, "step": 23657 }, { "epoch": 2.2201576576576576, "grad_norm": 1.0734461472600767, "learning_rate": 1.9214249340455136e-06, "loss": 0.3174, "step": 23658 }, { "epoch": 2.2202515015015014, "grad_norm": 1.0453916664957286, "learning_rate": 1.9209947488303247e-06, "loss": 0.3083, "step": 23659 }, { "epoch": 2.2203453453453452, "grad_norm": 1.1186714172998649, "learning_rate": 1.92056460032648e-06, "loss": 0.3112, "step": 23660 }, { "epoch": 2.220439189189189, "grad_norm": 0.9376980591427756, "learning_rate": 1.9201344885391053e-06, "loss": 0.3178, "step": 23661 }, { "epoch": 2.220533033033033, "grad_norm": 1.119399430129749, "learning_rate": 1.919704413473328e-06, "loss": 0.3075, "step": 23662 }, { "epoch": 2.220626876876877, "grad_norm": 1.2655231632719381, "learning_rate": 1.91927437513428e-06, "loss": 0.316, "step": 23663 }, { "epoch": 2.220720720720721, "grad_norm": 1.084628509786937, "learning_rate": 1.918844373527086e-06, "loss": 0.2871, "step": 23664 }, { "epoch": 2.2208145645645647, "grad_norm": 0.9643725242977568, "learning_rate": 1.9184144086568722e-06, "loss": 0.2868, "step": 23665 }, { "epoch": 2.2209084084084085, "grad_norm": 1.36505754643445, "learning_rate": 1.917984480528767e-06, "loss": 0.2924, "step": 23666 }, { "epoch": 2.2210022522522523, "grad_norm": 1.2213235273308103, "learning_rate": 1.9175545891478953e-06, "loss": 0.3491, "step": 23667 }, { "epoch": 2.221096096096096, "grad_norm": 1.0229387392410418, "learning_rate": 1.917124734519381e-06, "loss": 0.345, "step": 23668 }, { "epoch": 2.22118993993994, "grad_norm": 1.0750929299072438, "learning_rate": 1.9166949166483533e-06, "loss": 0.326, "step": 23669 }, { "epoch": 2.2212837837837838, "grad_norm": 1.0855157431528955, "learning_rate": 1.916265135539935e-06, "loss": 0.3198, "step": 23670 }, { "epoch": 2.2213776276276276, "grad_norm": 0.9978826225892962, "learning_rate": 1.915835391199249e-06, "loss": 0.32, "step": 23671 }, { "epoch": 2.2214714714714714, "grad_norm": 0.9775392863132629, "learning_rate": 1.915405683631422e-06, "loss": 0.3439, "step": 23672 }, { "epoch": 2.221565315315315, "grad_norm": 1.136965198984402, "learning_rate": 1.9149760128415756e-06, "loss": 0.3415, "step": 23673 }, { "epoch": 2.221659159159159, "grad_norm": 1.187787924307226, "learning_rate": 1.9145463788348322e-06, "loss": 0.3232, "step": 23674 }, { "epoch": 2.221753003003003, "grad_norm": 1.1478450637589928, "learning_rate": 1.914116781616317e-06, "loss": 0.3255, "step": 23675 }, { "epoch": 2.221846846846847, "grad_norm": 1.165335077601902, "learning_rate": 1.9136872211911505e-06, "loss": 0.3016, "step": 23676 }, { "epoch": 2.221940690690691, "grad_norm": 0.9832896239125555, "learning_rate": 1.9132576975644553e-06, "loss": 0.3226, "step": 23677 }, { "epoch": 2.2220345345345347, "grad_norm": 1.163182204131214, "learning_rate": 1.912828210741351e-06, "loss": 0.3196, "step": 23678 }, { "epoch": 2.2221283783783785, "grad_norm": 1.2853368717460918, "learning_rate": 1.9123987607269595e-06, "loss": 0.3111, "step": 23679 }, { "epoch": 2.2222222222222223, "grad_norm": 1.158214159737323, "learning_rate": 1.911969347526399e-06, "loss": 0.3253, "step": 23680 }, { "epoch": 2.222316066066066, "grad_norm": 1.0331250188855992, "learning_rate": 1.9115399711447936e-06, "loss": 0.311, "step": 23681 }, { "epoch": 2.22240990990991, "grad_norm": 1.1453462155723706, "learning_rate": 1.91111063158726e-06, "loss": 0.291, "step": 23682 }, { "epoch": 2.2225037537537538, "grad_norm": 1.372831359744851, "learning_rate": 1.9106813288589166e-06, "loss": 0.2948, "step": 23683 }, { "epoch": 2.2225975975975976, "grad_norm": 1.1670217559062486, "learning_rate": 1.910252062964885e-06, "loss": 0.312, "step": 23684 }, { "epoch": 2.2226914414414414, "grad_norm": 1.0937234666520792, "learning_rate": 1.9098228339102822e-06, "loss": 0.2967, "step": 23685 }, { "epoch": 2.222785285285285, "grad_norm": 1.3602398253404926, "learning_rate": 1.909393641700223e-06, "loss": 0.3096, "step": 23686 }, { "epoch": 2.222879129129129, "grad_norm": 1.3653136903801895, "learning_rate": 1.9089644863398288e-06, "loss": 0.3453, "step": 23687 }, { "epoch": 2.222972972972973, "grad_norm": 1.0653708778725988, "learning_rate": 1.908535367834215e-06, "loss": 0.3247, "step": 23688 }, { "epoch": 2.2230668168168166, "grad_norm": 1.0552556812095644, "learning_rate": 1.908106286188498e-06, "loss": 0.336, "step": 23689 }, { "epoch": 2.2231606606606604, "grad_norm": 1.0575887043154475, "learning_rate": 1.9076772414077937e-06, "loss": 0.2933, "step": 23690 }, { "epoch": 2.2232545045045047, "grad_norm": 1.0911883375522278, "learning_rate": 1.9072482334972175e-06, "loss": 0.3058, "step": 23691 }, { "epoch": 2.2233483483483485, "grad_norm": 0.9717238706466818, "learning_rate": 1.9068192624618831e-06, "loss": 0.3474, "step": 23692 }, { "epoch": 2.2234421921921923, "grad_norm": 1.2378382307886135, "learning_rate": 1.906390328306908e-06, "loss": 0.3253, "step": 23693 }, { "epoch": 2.223536036036036, "grad_norm": 1.093738557551887, "learning_rate": 1.9059614310374052e-06, "loss": 0.341, "step": 23694 }, { "epoch": 2.22362987987988, "grad_norm": 1.182433016589684, "learning_rate": 1.905532570658487e-06, "loss": 0.2969, "step": 23695 }, { "epoch": 2.2237237237237237, "grad_norm": 1.0437777487009707, "learning_rate": 1.9051037471752699e-06, "loss": 0.3325, "step": 23696 }, { "epoch": 2.2238175675675675, "grad_norm": 1.1289149748486915, "learning_rate": 1.9046749605928644e-06, "loss": 0.3278, "step": 23697 }, { "epoch": 2.2239114114114114, "grad_norm": 1.1137934477909763, "learning_rate": 1.9042462109163827e-06, "loss": 0.2985, "step": 23698 }, { "epoch": 2.224005255255255, "grad_norm": 1.2214302947917197, "learning_rate": 1.9038174981509388e-06, "loss": 0.3307, "step": 23699 }, { "epoch": 2.224099099099099, "grad_norm": 1.0484262423928863, "learning_rate": 1.9033888223016439e-06, "loss": 0.2975, "step": 23700 }, { "epoch": 2.224192942942943, "grad_norm": 1.1705175696053314, "learning_rate": 1.9029601833736084e-06, "loss": 0.2977, "step": 23701 }, { "epoch": 2.2242867867867866, "grad_norm": 1.0335035509640305, "learning_rate": 1.902531581371943e-06, "loss": 0.324, "step": 23702 }, { "epoch": 2.2243806306306304, "grad_norm": 1.142877582630571, "learning_rate": 1.902103016301758e-06, "loss": 0.3098, "step": 23703 }, { "epoch": 2.2244744744744747, "grad_norm": 1.0634808571849679, "learning_rate": 1.9016744881681614e-06, "loss": 0.3111, "step": 23704 }, { "epoch": 2.2245683183183185, "grad_norm": 1.1425698183394233, "learning_rate": 1.901245996976267e-06, "loss": 0.3352, "step": 23705 }, { "epoch": 2.2246621621621623, "grad_norm": 1.1725424730677685, "learning_rate": 1.9008175427311808e-06, "loss": 0.3124, "step": 23706 }, { "epoch": 2.224756006006006, "grad_norm": 1.8216264316717692, "learning_rate": 1.9003891254380101e-06, "loss": 0.3028, "step": 23707 }, { "epoch": 2.22484984984985, "grad_norm": 0.9974513149233732, "learning_rate": 1.8999607451018665e-06, "loss": 0.3113, "step": 23708 }, { "epoch": 2.2249436936936937, "grad_norm": 1.151687666027371, "learning_rate": 1.8995324017278555e-06, "loss": 0.3004, "step": 23709 }, { "epoch": 2.2250375375375375, "grad_norm": 1.2396660479547537, "learning_rate": 1.8991040953210832e-06, "loss": 0.3428, "step": 23710 }, { "epoch": 2.2251313813813813, "grad_norm": 1.4952978997238588, "learning_rate": 1.8986758258866588e-06, "loss": 0.3084, "step": 23711 }, { "epoch": 2.225225225225225, "grad_norm": 1.2249837884123458, "learning_rate": 1.8982475934296884e-06, "loss": 0.3136, "step": 23712 }, { "epoch": 2.225319069069069, "grad_norm": 1.2680450016800875, "learning_rate": 1.8978193979552768e-06, "loss": 0.3113, "step": 23713 }, { "epoch": 2.2254129129129128, "grad_norm": 1.234300498315018, "learning_rate": 1.897391239468529e-06, "loss": 0.3455, "step": 23714 }, { "epoch": 2.2255067567567566, "grad_norm": 1.1916091603894328, "learning_rate": 1.8969631179745512e-06, "loss": 0.3216, "step": 23715 }, { "epoch": 2.2256006006006004, "grad_norm": 2.352627372219644, "learning_rate": 1.8965350334784454e-06, "loss": 0.3508, "step": 23716 }, { "epoch": 2.2256944444444446, "grad_norm": 1.0867919871633775, "learning_rate": 1.89610698598532e-06, "loss": 0.2888, "step": 23717 }, { "epoch": 2.2257882882882885, "grad_norm": 1.1717708752537026, "learning_rate": 1.8956789755002752e-06, "loss": 0.3073, "step": 23718 }, { "epoch": 2.2258821321321323, "grad_norm": 0.964591977162368, "learning_rate": 1.8952510020284144e-06, "loss": 0.2974, "step": 23719 }, { "epoch": 2.225975975975976, "grad_norm": 2.5015012112005146, "learning_rate": 1.894823065574843e-06, "loss": 0.2918, "step": 23720 }, { "epoch": 2.22606981981982, "grad_norm": 1.3525988317703423, "learning_rate": 1.8943951661446614e-06, "loss": 0.2936, "step": 23721 }, { "epoch": 2.2261636636636637, "grad_norm": 1.1922182799809171, "learning_rate": 1.8939673037429702e-06, "loss": 0.3501, "step": 23722 }, { "epoch": 2.2262575075075075, "grad_norm": 1.060499697946265, "learning_rate": 1.893539478374874e-06, "loss": 0.336, "step": 23723 }, { "epoch": 2.2263513513513513, "grad_norm": 1.007446259132719, "learning_rate": 1.893111690045472e-06, "loss": 0.2939, "step": 23724 }, { "epoch": 2.226445195195195, "grad_norm": 1.1972058497688243, "learning_rate": 1.8926839387598655e-06, "loss": 0.3148, "step": 23725 }, { "epoch": 2.226539039039039, "grad_norm": 1.4599607117527518, "learning_rate": 1.8922562245231535e-06, "loss": 0.3154, "step": 23726 }, { "epoch": 2.2266328828828827, "grad_norm": 1.0258878887133676, "learning_rate": 1.8918285473404369e-06, "loss": 0.328, "step": 23727 }, { "epoch": 2.2267267267267266, "grad_norm": 1.2017748515138316, "learning_rate": 1.891400907216812e-06, "loss": 0.3551, "step": 23728 }, { "epoch": 2.2268205705705704, "grad_norm": 0.9829093958769868, "learning_rate": 1.8909733041573824e-06, "loss": 0.2918, "step": 23729 }, { "epoch": 2.2269144144144146, "grad_norm": 1.180366441968448, "learning_rate": 1.890545738167243e-06, "loss": 0.3268, "step": 23730 }, { "epoch": 2.2270082582582584, "grad_norm": 0.93733598131047, "learning_rate": 1.890118209251492e-06, "loss": 0.322, "step": 23731 }, { "epoch": 2.2271021021021022, "grad_norm": 1.6697461845873944, "learning_rate": 1.889690717415229e-06, "loss": 0.3177, "step": 23732 }, { "epoch": 2.227195945945946, "grad_norm": 1.0621811411196134, "learning_rate": 1.8892632626635493e-06, "loss": 0.3128, "step": 23733 }, { "epoch": 2.22728978978979, "grad_norm": 1.3213807905904822, "learning_rate": 1.8888358450015487e-06, "loss": 0.2808, "step": 23734 }, { "epoch": 2.2273836336336337, "grad_norm": 1.0186821951932419, "learning_rate": 1.8884084644343259e-06, "loss": 0.2849, "step": 23735 }, { "epoch": 2.2274774774774775, "grad_norm": 1.0412049229805862, "learning_rate": 1.8879811209669752e-06, "loss": 0.2885, "step": 23736 }, { "epoch": 2.2275713213213213, "grad_norm": 1.1861934026797867, "learning_rate": 1.887553814604592e-06, "loss": 0.3174, "step": 23737 }, { "epoch": 2.227665165165165, "grad_norm": 1.6527208717006379, "learning_rate": 1.8871265453522708e-06, "loss": 0.289, "step": 23738 }, { "epoch": 2.227759009009009, "grad_norm": 1.1980032091693666, "learning_rate": 1.8866993132151062e-06, "loss": 0.3371, "step": 23739 }, { "epoch": 2.2278528528528527, "grad_norm": 1.2475024653398934, "learning_rate": 1.8862721181981902e-06, "loss": 0.3109, "step": 23740 }, { "epoch": 2.2279466966966965, "grad_norm": 1.0518011640815417, "learning_rate": 1.8858449603066203e-06, "loss": 0.2925, "step": 23741 }, { "epoch": 2.2280405405405403, "grad_norm": 1.0850864753258305, "learning_rate": 1.8854178395454869e-06, "loss": 0.3138, "step": 23742 }, { "epoch": 2.2281343843843846, "grad_norm": 1.1564999190960805, "learning_rate": 1.8849907559198822e-06, "loss": 0.3415, "step": 23743 }, { "epoch": 2.2282282282282284, "grad_norm": 1.0777186223116506, "learning_rate": 1.8845637094349007e-06, "loss": 0.3152, "step": 23744 }, { "epoch": 2.2283220720720722, "grad_norm": 1.2752557784032583, "learning_rate": 1.8841367000956324e-06, "loss": 0.3239, "step": 23745 }, { "epoch": 2.228415915915916, "grad_norm": 1.0456199974621505, "learning_rate": 1.8837097279071675e-06, "loss": 0.2789, "step": 23746 }, { "epoch": 2.22850975975976, "grad_norm": 1.1729560948529492, "learning_rate": 1.8832827928746e-06, "loss": 0.2788, "step": 23747 }, { "epoch": 2.2286036036036037, "grad_norm": 1.071019084816948, "learning_rate": 1.882855895003019e-06, "loss": 0.3015, "step": 23748 }, { "epoch": 2.2286974474474475, "grad_norm": 1.2127124717571973, "learning_rate": 1.8824290342975132e-06, "loss": 0.2958, "step": 23749 }, { "epoch": 2.2287912912912913, "grad_norm": 0.9767068590750633, "learning_rate": 1.8820022107631736e-06, "loss": 0.3264, "step": 23750 }, { "epoch": 2.228885135135135, "grad_norm": 1.1502644655646934, "learning_rate": 1.8815754244050881e-06, "loss": 0.3349, "step": 23751 }, { "epoch": 2.228978978978979, "grad_norm": 1.1059657813383454, "learning_rate": 1.8811486752283448e-06, "loss": 0.3335, "step": 23752 }, { "epoch": 2.2290728228228227, "grad_norm": 1.1067334643925364, "learning_rate": 1.880721963238034e-06, "loss": 0.3041, "step": 23753 }, { "epoch": 2.2291666666666665, "grad_norm": 1.2129720664867556, "learning_rate": 1.8802952884392428e-06, "loss": 0.3038, "step": 23754 }, { "epoch": 2.2292605105105103, "grad_norm": 1.4533492092634095, "learning_rate": 1.8798686508370562e-06, "loss": 0.3102, "step": 23755 }, { "epoch": 2.2293543543543546, "grad_norm": 1.2198428250849285, "learning_rate": 1.8794420504365646e-06, "loss": 0.3025, "step": 23756 }, { "epoch": 2.2294481981981984, "grad_norm": 1.136454167477137, "learning_rate": 1.8790154872428523e-06, "loss": 0.364, "step": 23757 }, { "epoch": 2.229542042042042, "grad_norm": 1.0810033384070634, "learning_rate": 1.8785889612610043e-06, "loss": 0.2949, "step": 23758 }, { "epoch": 2.229635885885886, "grad_norm": 1.4869089817377177, "learning_rate": 1.8781624724961094e-06, "loss": 0.3327, "step": 23759 }, { "epoch": 2.22972972972973, "grad_norm": 0.9498965738038861, "learning_rate": 1.8777360209532503e-06, "loss": 0.3135, "step": 23760 }, { "epoch": 2.2298235735735736, "grad_norm": 1.4101963841401381, "learning_rate": 1.8773096066375125e-06, "loss": 0.323, "step": 23761 }, { "epoch": 2.2299174174174174, "grad_norm": 0.9784305184633485, "learning_rate": 1.8768832295539797e-06, "loss": 0.3308, "step": 23762 }, { "epoch": 2.2300112612612613, "grad_norm": 1.051247645677345, "learning_rate": 1.8764568897077356e-06, "loss": 0.3232, "step": 23763 }, { "epoch": 2.230105105105105, "grad_norm": 1.0976584428897613, "learning_rate": 1.876030587103862e-06, "loss": 0.3211, "step": 23764 }, { "epoch": 2.230198948948949, "grad_norm": 1.1937895508682477, "learning_rate": 1.8756043217474452e-06, "loss": 0.3282, "step": 23765 }, { "epoch": 2.2302927927927927, "grad_norm": 1.050669604613511, "learning_rate": 1.8751780936435654e-06, "loss": 0.3246, "step": 23766 }, { "epoch": 2.2303866366366365, "grad_norm": 1.2000774966455372, "learning_rate": 1.8747519027973037e-06, "loss": 0.2937, "step": 23767 }, { "epoch": 2.2304804804804803, "grad_norm": 1.0536214234164776, "learning_rate": 1.874325749213744e-06, "loss": 0.3048, "step": 23768 }, { "epoch": 2.230574324324324, "grad_norm": 0.9585675249439213, "learning_rate": 1.8738996328979668e-06, "loss": 0.3136, "step": 23769 }, { "epoch": 2.2306681681681684, "grad_norm": 1.1317856374926893, "learning_rate": 1.8734735538550504e-06, "loss": 0.2718, "step": 23770 }, { "epoch": 2.230762012012012, "grad_norm": 1.087736333056155, "learning_rate": 1.8730475120900782e-06, "loss": 0.3271, "step": 23771 }, { "epoch": 2.230855855855856, "grad_norm": 0.9948749322104539, "learning_rate": 1.8726215076081288e-06, "loss": 0.3037, "step": 23772 }, { "epoch": 2.2309496996997, "grad_norm": 0.9883088613023606, "learning_rate": 1.8721955404142817e-06, "loss": 0.2759, "step": 23773 }, { "epoch": 2.2310435435435436, "grad_norm": 0.9867340149833596, "learning_rate": 1.8717696105136146e-06, "loss": 0.3051, "step": 23774 }, { "epoch": 2.2311373873873874, "grad_norm": 1.4930574408590984, "learning_rate": 1.871343717911207e-06, "loss": 0.3295, "step": 23775 }, { "epoch": 2.2312312312312312, "grad_norm": 0.9874558170315229, "learning_rate": 1.8709178626121344e-06, "loss": 0.2975, "step": 23776 }, { "epoch": 2.231325075075075, "grad_norm": 1.0694055082481129, "learning_rate": 1.870492044621478e-06, "loss": 0.2793, "step": 23777 }, { "epoch": 2.231418918918919, "grad_norm": 1.1706371344817195, "learning_rate": 1.870066263944314e-06, "loss": 0.3, "step": 23778 }, { "epoch": 2.2315127627627627, "grad_norm": 1.031369610503444, "learning_rate": 1.869640520585716e-06, "loss": 0.3198, "step": 23779 }, { "epoch": 2.2316066066066065, "grad_norm": 1.4301048295520828, "learning_rate": 1.8692148145507643e-06, "loss": 0.3546, "step": 23780 }, { "epoch": 2.2317004504504503, "grad_norm": 1.0797060424186908, "learning_rate": 1.8687891458445328e-06, "loss": 0.3173, "step": 23781 }, { "epoch": 2.231794294294294, "grad_norm": 1.0264152131289745, "learning_rate": 1.8683635144720953e-06, "loss": 0.3327, "step": 23782 }, { "epoch": 2.231888138138138, "grad_norm": 1.3623552930617966, "learning_rate": 1.86793792043853e-06, "loss": 0.2842, "step": 23783 }, { "epoch": 2.231981981981982, "grad_norm": 1.0537598463257667, "learning_rate": 1.867512363748909e-06, "loss": 0.3087, "step": 23784 }, { "epoch": 2.232075825825826, "grad_norm": 1.4842847286665317, "learning_rate": 1.8670868444083057e-06, "loss": 0.2998, "step": 23785 }, { "epoch": 2.23216966966967, "grad_norm": 1.1346726057789884, "learning_rate": 1.866661362421796e-06, "loss": 0.3227, "step": 23786 }, { "epoch": 2.2322635135135136, "grad_norm": 1.1953896401339044, "learning_rate": 1.8662359177944534e-06, "loss": 0.3143, "step": 23787 }, { "epoch": 2.2323573573573574, "grad_norm": 1.0370624659300833, "learning_rate": 1.8658105105313457e-06, "loss": 0.3389, "step": 23788 }, { "epoch": 2.232451201201201, "grad_norm": 1.336352805118394, "learning_rate": 1.8653851406375495e-06, "loss": 0.3299, "step": 23789 }, { "epoch": 2.232545045045045, "grad_norm": 1.0415758323247086, "learning_rate": 1.8649598081181353e-06, "loss": 0.3128, "step": 23790 }, { "epoch": 2.232638888888889, "grad_norm": 1.3159461523491633, "learning_rate": 1.8645345129781722e-06, "loss": 0.3099, "step": 23791 }, { "epoch": 2.2327327327327327, "grad_norm": 1.1189143012439768, "learning_rate": 1.8641092552227352e-06, "loss": 0.336, "step": 23792 }, { "epoch": 2.2328265765765765, "grad_norm": 1.1814516954518286, "learning_rate": 1.8636840348568923e-06, "loss": 0.3621, "step": 23793 }, { "epoch": 2.2329204204204203, "grad_norm": 1.3188716812051788, "learning_rate": 1.8632588518857126e-06, "loss": 0.324, "step": 23794 }, { "epoch": 2.233014264264264, "grad_norm": 1.2926849081169878, "learning_rate": 1.8628337063142681e-06, "loss": 0.3187, "step": 23795 }, { "epoch": 2.233108108108108, "grad_norm": 1.4517100114439931, "learning_rate": 1.862408598147627e-06, "loss": 0.3289, "step": 23796 }, { "epoch": 2.233201951951952, "grad_norm": 1.1282227094741089, "learning_rate": 1.8619835273908555e-06, "loss": 0.3208, "step": 23797 }, { "epoch": 2.233295795795796, "grad_norm": 1.1093423926934292, "learning_rate": 1.861558494049025e-06, "loss": 0.2803, "step": 23798 }, { "epoch": 2.2333896396396398, "grad_norm": 1.133744870778841, "learning_rate": 1.8611334981272021e-06, "loss": 0.3447, "step": 23799 }, { "epoch": 2.2334834834834836, "grad_norm": 1.2061451169170343, "learning_rate": 1.8607085396304542e-06, "loss": 0.2858, "step": 23800 }, { "epoch": 2.2335773273273274, "grad_norm": 0.9970387173520848, "learning_rate": 1.8602836185638479e-06, "loss": 0.2977, "step": 23801 }, { "epoch": 2.233671171171171, "grad_norm": 1.051790950449891, "learning_rate": 1.8598587349324499e-06, "loss": 0.3478, "step": 23802 }, { "epoch": 2.233765015015015, "grad_norm": 1.2539922217576984, "learning_rate": 1.8594338887413233e-06, "loss": 0.3229, "step": 23803 }, { "epoch": 2.233858858858859, "grad_norm": 1.1257298224983634, "learning_rate": 1.8590090799955378e-06, "loss": 0.3598, "step": 23804 }, { "epoch": 2.2339527027027026, "grad_norm": 2.0466721137077837, "learning_rate": 1.8585843087001571e-06, "loss": 0.3395, "step": 23805 }, { "epoch": 2.2340465465465464, "grad_norm": 1.0635953329743262, "learning_rate": 1.8581595748602439e-06, "loss": 0.3494, "step": 23806 }, { "epoch": 2.2341403903903903, "grad_norm": 1.088629108421558, "learning_rate": 1.8577348784808652e-06, "loss": 0.3126, "step": 23807 }, { "epoch": 2.234234234234234, "grad_norm": 1.2404790240392547, "learning_rate": 1.8573102195670834e-06, "loss": 0.3348, "step": 23808 }, { "epoch": 2.234328078078078, "grad_norm": 1.0957184194951093, "learning_rate": 1.8568855981239597e-06, "loss": 0.3141, "step": 23809 }, { "epoch": 2.234421921921922, "grad_norm": 1.1306208390161838, "learning_rate": 1.8564610141565615e-06, "loss": 0.3466, "step": 23810 }, { "epoch": 2.234515765765766, "grad_norm": 1.4116212120012277, "learning_rate": 1.8560364676699477e-06, "loss": 0.3283, "step": 23811 }, { "epoch": 2.2346096096096097, "grad_norm": 0.9945872808394303, "learning_rate": 1.8556119586691817e-06, "loss": 0.3361, "step": 23812 }, { "epoch": 2.2347034534534536, "grad_norm": 1.263387403464888, "learning_rate": 1.8551874871593244e-06, "loss": 0.2842, "step": 23813 }, { "epoch": 2.2347972972972974, "grad_norm": 0.9244104992233283, "learning_rate": 1.8547630531454363e-06, "loss": 0.3233, "step": 23814 }, { "epoch": 2.234891141141141, "grad_norm": 1.4132141111716987, "learning_rate": 1.8543386566325771e-06, "loss": 0.2882, "step": 23815 }, { "epoch": 2.234984984984985, "grad_norm": 1.1230798410898768, "learning_rate": 1.85391429762581e-06, "loss": 0.3156, "step": 23816 }, { "epoch": 2.235078828828829, "grad_norm": 1.173273749016633, "learning_rate": 1.8534899761301934e-06, "loss": 0.3414, "step": 23817 }, { "epoch": 2.2351726726726726, "grad_norm": 1.1826703068007265, "learning_rate": 1.8530656921507839e-06, "loss": 0.3468, "step": 23818 }, { "epoch": 2.2352665165165164, "grad_norm": 1.1699164752721658, "learning_rate": 1.8526414456926444e-06, "loss": 0.3458, "step": 23819 }, { "epoch": 2.2353603603603602, "grad_norm": 1.2525389190322442, "learning_rate": 1.8522172367608315e-06, "loss": 0.3053, "step": 23820 }, { "epoch": 2.235454204204204, "grad_norm": 1.2094086246730293, "learning_rate": 1.851793065360401e-06, "loss": 0.2603, "step": 23821 }, { "epoch": 2.235548048048048, "grad_norm": 1.032894339528391, "learning_rate": 1.8513689314964145e-06, "loss": 0.2904, "step": 23822 }, { "epoch": 2.235641891891892, "grad_norm": 2.5680853822016743, "learning_rate": 1.8509448351739262e-06, "loss": 0.2887, "step": 23823 }, { "epoch": 2.235735735735736, "grad_norm": 1.353807680585815, "learning_rate": 1.8505207763979938e-06, "loss": 0.3103, "step": 23824 }, { "epoch": 2.2358295795795797, "grad_norm": 1.1229129189235214, "learning_rate": 1.850096755173672e-06, "loss": 0.3348, "step": 23825 }, { "epoch": 2.2359234234234235, "grad_norm": 1.2750453874551189, "learning_rate": 1.8496727715060182e-06, "loss": 0.3288, "step": 23826 }, { "epoch": 2.2360172672672673, "grad_norm": 1.1203052990209637, "learning_rate": 1.8492488254000846e-06, "loss": 0.3229, "step": 23827 }, { "epoch": 2.236111111111111, "grad_norm": 0.992772153197371, "learning_rate": 1.8488249168609302e-06, "loss": 0.3158, "step": 23828 }, { "epoch": 2.236204954954955, "grad_norm": 1.0884136508096394, "learning_rate": 1.8484010458936065e-06, "loss": 0.4011, "step": 23829 }, { "epoch": 2.236298798798799, "grad_norm": 1.0207962767865404, "learning_rate": 1.847977212503167e-06, "loss": 0.2958, "step": 23830 }, { "epoch": 2.2363926426426426, "grad_norm": 1.2555351245906696, "learning_rate": 1.8475534166946673e-06, "loss": 0.2954, "step": 23831 }, { "epoch": 2.2364864864864864, "grad_norm": 3.8426093411389575, "learning_rate": 1.8471296584731597e-06, "loss": 0.2899, "step": 23832 }, { "epoch": 2.23658033033033, "grad_norm": 1.3914937158719918, "learning_rate": 1.846705937843694e-06, "loss": 0.3283, "step": 23833 }, { "epoch": 2.236674174174174, "grad_norm": 1.1892764192032939, "learning_rate": 1.846282254811327e-06, "loss": 0.307, "step": 23834 }, { "epoch": 2.236768018018018, "grad_norm": 1.1627181467062644, "learning_rate": 1.845858609381107e-06, "loss": 0.3079, "step": 23835 }, { "epoch": 2.236861861861862, "grad_norm": 1.0870810105850575, "learning_rate": 1.8454350015580869e-06, "loss": 0.277, "step": 23836 }, { "epoch": 2.236955705705706, "grad_norm": 1.0172455308287034, "learning_rate": 1.845011431347316e-06, "loss": 0.2971, "step": 23837 }, { "epoch": 2.2370495495495497, "grad_norm": 1.1576913239277447, "learning_rate": 1.8445878987538452e-06, "loss": 0.3229, "step": 23838 }, { "epoch": 2.2371433933933935, "grad_norm": 0.9716448551795068, "learning_rate": 1.8441644037827228e-06, "loss": 0.3343, "step": 23839 }, { "epoch": 2.2372372372372373, "grad_norm": 1.0843135125817258, "learning_rate": 1.843740946439001e-06, "loss": 0.3287, "step": 23840 }, { "epoch": 2.237331081081081, "grad_norm": 1.121017970232432, "learning_rate": 1.8433175267277276e-06, "loss": 0.3465, "step": 23841 }, { "epoch": 2.237424924924925, "grad_norm": 1.30181395999552, "learning_rate": 1.842894144653949e-06, "loss": 0.3182, "step": 23842 }, { "epoch": 2.2375187687687688, "grad_norm": 0.969561045329589, "learning_rate": 1.8424708002227166e-06, "loss": 0.3419, "step": 23843 }, { "epoch": 2.2376126126126126, "grad_norm": 1.1453337599082571, "learning_rate": 1.842047493439077e-06, "loss": 0.2835, "step": 23844 }, { "epoch": 2.2377064564564564, "grad_norm": 1.0052911640507032, "learning_rate": 1.8416242243080744e-06, "loss": 0.2979, "step": 23845 }, { "epoch": 2.2378003003003, "grad_norm": 1.2988984607702396, "learning_rate": 1.8412009928347601e-06, "loss": 0.2951, "step": 23846 }, { "epoch": 2.237894144144144, "grad_norm": 1.039342978933946, "learning_rate": 1.8407777990241782e-06, "loss": 0.3676, "step": 23847 }, { "epoch": 2.237987987987988, "grad_norm": 1.1294884624830617, "learning_rate": 1.840354642881374e-06, "loss": 0.3286, "step": 23848 }, { "epoch": 2.2380818318318316, "grad_norm": 1.007503910239373, "learning_rate": 1.8399315244113935e-06, "loss": 0.3172, "step": 23849 }, { "epoch": 2.238175675675676, "grad_norm": 1.14248052874566, "learning_rate": 1.8395084436192816e-06, "loss": 0.2737, "step": 23850 }, { "epoch": 2.2382695195195197, "grad_norm": 1.1442492660024974, "learning_rate": 1.8390854005100806e-06, "loss": 0.3026, "step": 23851 }, { "epoch": 2.2383633633633635, "grad_norm": 1.0154214051133645, "learning_rate": 1.8386623950888378e-06, "loss": 0.3287, "step": 23852 }, { "epoch": 2.2384572072072073, "grad_norm": 1.0752161971568053, "learning_rate": 1.8382394273605957e-06, "loss": 0.3231, "step": 23853 }, { "epoch": 2.238551051051051, "grad_norm": 1.0526005077496223, "learning_rate": 1.8378164973303952e-06, "loss": 0.3073, "step": 23854 }, { "epoch": 2.238644894894895, "grad_norm": 1.6866050352920017, "learning_rate": 1.8373936050032826e-06, "loss": 0.3167, "step": 23855 }, { "epoch": 2.2387387387387387, "grad_norm": 1.4645539381705777, "learning_rate": 1.8369707503842981e-06, "loss": 0.3122, "step": 23856 }, { "epoch": 2.2388325825825826, "grad_norm": 1.399989647537269, "learning_rate": 1.836547933478482e-06, "loss": 0.2817, "step": 23857 }, { "epoch": 2.2389264264264264, "grad_norm": 1.2929203644651508, "learning_rate": 1.8361251542908786e-06, "loss": 0.3159, "step": 23858 }, { "epoch": 2.23902027027027, "grad_norm": 1.2016585805034241, "learning_rate": 1.8357024128265277e-06, "loss": 0.3211, "step": 23859 }, { "epoch": 2.239114114114114, "grad_norm": 1.0036651844090865, "learning_rate": 1.8352797090904695e-06, "loss": 0.3446, "step": 23860 }, { "epoch": 2.239207957957958, "grad_norm": 1.187809276296999, "learning_rate": 1.8348570430877432e-06, "loss": 0.2889, "step": 23861 }, { "epoch": 2.2393018018018016, "grad_norm": 1.1619686968658929, "learning_rate": 1.8344344148233894e-06, "loss": 0.2716, "step": 23862 }, { "epoch": 2.2393956456456454, "grad_norm": 1.0978647791222769, "learning_rate": 1.8340118243024446e-06, "loss": 0.3134, "step": 23863 }, { "epoch": 2.2394894894894897, "grad_norm": 1.2386312088754794, "learning_rate": 1.8335892715299513e-06, "loss": 0.3237, "step": 23864 }, { "epoch": 2.2395833333333335, "grad_norm": 1.2018936451454192, "learning_rate": 1.8331667565109461e-06, "loss": 0.2839, "step": 23865 }, { "epoch": 2.2396771771771773, "grad_norm": 0.9571477397479379, "learning_rate": 1.8327442792504645e-06, "loss": 0.3183, "step": 23866 }, { "epoch": 2.239771021021021, "grad_norm": 1.7868989742635801, "learning_rate": 1.832321839753547e-06, "loss": 0.3427, "step": 23867 }, { "epoch": 2.239864864864865, "grad_norm": 1.4401055830451972, "learning_rate": 1.8318994380252292e-06, "loss": 0.2717, "step": 23868 }, { "epoch": 2.2399587087087087, "grad_norm": 3.7213063988613984, "learning_rate": 1.8314770740705456e-06, "loss": 0.3376, "step": 23869 }, { "epoch": 2.2400525525525525, "grad_norm": 1.2173114195622756, "learning_rate": 1.8310547478945356e-06, "loss": 0.3198, "step": 23870 }, { "epoch": 2.2401463963963963, "grad_norm": 1.1221365519437627, "learning_rate": 1.830632459502233e-06, "loss": 0.3054, "step": 23871 }, { "epoch": 2.24024024024024, "grad_norm": 1.6093829779828928, "learning_rate": 1.8302102088986717e-06, "loss": 0.3224, "step": 23872 }, { "epoch": 2.240334084084084, "grad_norm": 1.0421665396607924, "learning_rate": 1.8297879960888882e-06, "loss": 0.333, "step": 23873 }, { "epoch": 2.2404279279279278, "grad_norm": 1.0625324491484196, "learning_rate": 1.8293658210779148e-06, "loss": 0.3136, "step": 23874 }, { "epoch": 2.2405217717717716, "grad_norm": 0.9500448408690757, "learning_rate": 1.828943683870784e-06, "loss": 0.2795, "step": 23875 }, { "epoch": 2.2406156156156154, "grad_norm": 1.0641652867373759, "learning_rate": 1.8285215844725335e-06, "loss": 0.2847, "step": 23876 }, { "epoch": 2.2407094594594597, "grad_norm": 1.411316099822493, "learning_rate": 1.8280995228881927e-06, "loss": 0.2785, "step": 23877 }, { "epoch": 2.2408033033033035, "grad_norm": 1.1525155747848204, "learning_rate": 1.827677499122793e-06, "loss": 0.3261, "step": 23878 }, { "epoch": 2.2408971471471473, "grad_norm": 1.4568681406309747, "learning_rate": 1.8272555131813696e-06, "loss": 0.3115, "step": 23879 }, { "epoch": 2.240990990990991, "grad_norm": 1.1029224095515826, "learning_rate": 1.8268335650689522e-06, "loss": 0.3704, "step": 23880 }, { "epoch": 2.241084834834835, "grad_norm": 1.0942161929813066, "learning_rate": 1.8264116547905703e-06, "loss": 0.3257, "step": 23881 }, { "epoch": 2.2411786786786787, "grad_norm": 1.3831217948242098, "learning_rate": 1.8259897823512567e-06, "loss": 0.3047, "step": 23882 }, { "epoch": 2.2412725225225225, "grad_norm": 1.070208441803606, "learning_rate": 1.825567947756041e-06, "loss": 0.3293, "step": 23883 }, { "epoch": 2.2413663663663663, "grad_norm": 2.2249465265054362, "learning_rate": 1.8251461510099521e-06, "loss": 0.3285, "step": 23884 }, { "epoch": 2.24146021021021, "grad_norm": 1.0366606190539092, "learning_rate": 1.8247243921180197e-06, "loss": 0.3102, "step": 23885 }, { "epoch": 2.241554054054054, "grad_norm": 1.0850873099854126, "learning_rate": 1.8243026710852713e-06, "loss": 0.3196, "step": 23886 }, { "epoch": 2.2416478978978978, "grad_norm": 1.2164685607402617, "learning_rate": 1.8238809879167346e-06, "loss": 0.2516, "step": 23887 }, { "epoch": 2.2417417417417416, "grad_norm": 1.1776957666262895, "learning_rate": 1.8234593426174407e-06, "loss": 0.321, "step": 23888 }, { "epoch": 2.2418355855855854, "grad_norm": 1.2454020018456797, "learning_rate": 1.8230377351924145e-06, "loss": 0.2995, "step": 23889 }, { "epoch": 2.2419294294294296, "grad_norm": 1.0766803340697655, "learning_rate": 1.8226161656466817e-06, "loss": 0.2864, "step": 23890 }, { "epoch": 2.2420232732732734, "grad_norm": 1.0616463753046972, "learning_rate": 1.822194633985272e-06, "loss": 0.2686, "step": 23891 }, { "epoch": 2.2421171171171173, "grad_norm": 1.173059418824638, "learning_rate": 1.8217731402132099e-06, "loss": 0.3226, "step": 23892 }, { "epoch": 2.242210960960961, "grad_norm": 1.0557907252235823, "learning_rate": 1.821351684335519e-06, "loss": 0.3028, "step": 23893 }, { "epoch": 2.242304804804805, "grad_norm": 1.32648829360448, "learning_rate": 1.8209302663572276e-06, "loss": 0.2992, "step": 23894 }, { "epoch": 2.2423986486486487, "grad_norm": 1.2047890588292003, "learning_rate": 1.8205088862833586e-06, "loss": 0.3209, "step": 23895 }, { "epoch": 2.2424924924924925, "grad_norm": 1.037506865250453, "learning_rate": 1.8200875441189364e-06, "loss": 0.3242, "step": 23896 }, { "epoch": 2.2425863363363363, "grad_norm": 1.199511697178359, "learning_rate": 1.8196662398689852e-06, "loss": 0.3205, "step": 23897 }, { "epoch": 2.24268018018018, "grad_norm": 1.1223932656706823, "learning_rate": 1.819244973538527e-06, "loss": 0.3301, "step": 23898 }, { "epoch": 2.242774024024024, "grad_norm": 1.2021867707311273, "learning_rate": 1.818823745132584e-06, "loss": 0.2912, "step": 23899 }, { "epoch": 2.2428678678678677, "grad_norm": 1.448336325422747, "learning_rate": 1.8184025546561818e-06, "loss": 0.3078, "step": 23900 }, { "epoch": 2.2429617117117115, "grad_norm": 1.0700828028446063, "learning_rate": 1.81798140211434e-06, "loss": 0.2962, "step": 23901 }, { "epoch": 2.2430555555555554, "grad_norm": 1.007399657631387, "learning_rate": 1.817560287512079e-06, "loss": 0.3101, "step": 23902 }, { "epoch": 2.2431493993993996, "grad_norm": 1.2850205464764473, "learning_rate": 1.8171392108544228e-06, "loss": 0.3018, "step": 23903 }, { "epoch": 2.2432432432432434, "grad_norm": 1.0376338268685108, "learning_rate": 1.8167181721463906e-06, "loss": 0.304, "step": 23904 }, { "epoch": 2.2433370870870872, "grad_norm": 1.1370577176647936, "learning_rate": 1.8162971713930005e-06, "loss": 0.3038, "step": 23905 }, { "epoch": 2.243430930930931, "grad_norm": 1.0138735485008241, "learning_rate": 1.8158762085992753e-06, "loss": 0.295, "step": 23906 }, { "epoch": 2.243524774774775, "grad_norm": 0.9674327188261371, "learning_rate": 1.8154552837702332e-06, "loss": 0.2749, "step": 23907 }, { "epoch": 2.2436186186186187, "grad_norm": 1.330153858176741, "learning_rate": 1.8150343969108908e-06, "loss": 0.3244, "step": 23908 }, { "epoch": 2.2437124624624625, "grad_norm": 2.5487196443885445, "learning_rate": 1.81461354802627e-06, "loss": 0.3044, "step": 23909 }, { "epoch": 2.2438063063063063, "grad_norm": 1.188280836102541, "learning_rate": 1.814192737121388e-06, "loss": 0.3568, "step": 23910 }, { "epoch": 2.24390015015015, "grad_norm": 1.2371594251596314, "learning_rate": 1.813771964201258e-06, "loss": 0.3164, "step": 23911 }, { "epoch": 2.243993993993994, "grad_norm": 1.2837275818108236, "learning_rate": 1.813351229270901e-06, "loss": 0.3102, "step": 23912 }, { "epoch": 2.2440878378378377, "grad_norm": 1.2186048438722437, "learning_rate": 1.812930532335333e-06, "loss": 0.3241, "step": 23913 }, { "epoch": 2.2441816816816815, "grad_norm": 1.248072217587068, "learning_rate": 1.8125098733995672e-06, "loss": 0.3354, "step": 23914 }, { "epoch": 2.2442755255255253, "grad_norm": 1.089473524466656, "learning_rate": 1.8120892524686235e-06, "loss": 0.3208, "step": 23915 }, { "epoch": 2.2443693693693696, "grad_norm": 1.171677414985572, "learning_rate": 1.8116686695475144e-06, "loss": 0.317, "step": 23916 }, { "epoch": 2.2444632132132134, "grad_norm": 1.0209474889433774, "learning_rate": 1.8112481246412533e-06, "loss": 0.2924, "step": 23917 }, { "epoch": 2.244557057057057, "grad_norm": 1.043667830541982, "learning_rate": 1.8108276177548578e-06, "loss": 0.3288, "step": 23918 }, { "epoch": 2.244650900900901, "grad_norm": 0.9993807040295813, "learning_rate": 1.81040714889334e-06, "loss": 0.2822, "step": 23919 }, { "epoch": 2.244744744744745, "grad_norm": 1.0580170100803137, "learning_rate": 1.8099867180617114e-06, "loss": 0.3075, "step": 23920 }, { "epoch": 2.2448385885885886, "grad_norm": 2.064702133905894, "learning_rate": 1.8095663252649887e-06, "loss": 0.3374, "step": 23921 }, { "epoch": 2.2449324324324325, "grad_norm": 1.2770894433702331, "learning_rate": 1.8091459705081821e-06, "loss": 0.2983, "step": 23922 }, { "epoch": 2.2450262762762763, "grad_norm": 1.1341847551154667, "learning_rate": 1.8087256537963032e-06, "loss": 0.3222, "step": 23923 }, { "epoch": 2.24512012012012, "grad_norm": 1.070230658217272, "learning_rate": 1.8083053751343642e-06, "loss": 0.3101, "step": 23924 }, { "epoch": 2.245213963963964, "grad_norm": 1.2897073611766205, "learning_rate": 1.8078851345273762e-06, "loss": 0.3557, "step": 23925 }, { "epoch": 2.2453078078078077, "grad_norm": 0.9640796054437545, "learning_rate": 1.8074649319803473e-06, "loss": 0.3345, "step": 23926 }, { "epoch": 2.2454016516516515, "grad_norm": 1.339222594090557, "learning_rate": 1.8070447674982916e-06, "loss": 0.29, "step": 23927 }, { "epoch": 2.2454954954954953, "grad_norm": 1.1803649277353372, "learning_rate": 1.8066246410862164e-06, "loss": 0.3166, "step": 23928 }, { "epoch": 2.2455893393393396, "grad_norm": 0.9164481501151268, "learning_rate": 1.8062045527491307e-06, "loss": 0.3239, "step": 23929 }, { "epoch": 2.2456831831831834, "grad_norm": 1.1595586927801231, "learning_rate": 1.805784502492045e-06, "loss": 0.3147, "step": 23930 }, { "epoch": 2.245777027027027, "grad_norm": 0.921807347103904, "learning_rate": 1.805364490319967e-06, "loss": 0.3189, "step": 23931 }, { "epoch": 2.245870870870871, "grad_norm": 2.6944488195327994, "learning_rate": 1.804944516237902e-06, "loss": 0.3131, "step": 23932 }, { "epoch": 2.245964714714715, "grad_norm": 1.2111969960541387, "learning_rate": 1.8045245802508616e-06, "loss": 0.3267, "step": 23933 }, { "epoch": 2.2460585585585586, "grad_norm": 1.3423391878768267, "learning_rate": 1.804104682363851e-06, "loss": 0.3027, "step": 23934 }, { "epoch": 2.2461524024024024, "grad_norm": 1.1733550114452367, "learning_rate": 1.803684822581876e-06, "loss": 0.3113, "step": 23935 }, { "epoch": 2.2462462462462462, "grad_norm": 1.280576937513054, "learning_rate": 1.8032650009099434e-06, "loss": 0.3373, "step": 23936 }, { "epoch": 2.24634009009009, "grad_norm": 1.4226179007293933, "learning_rate": 1.802845217353058e-06, "loss": 0.2864, "step": 23937 }, { "epoch": 2.246433933933934, "grad_norm": 1.2058360001056319, "learning_rate": 1.8024254719162237e-06, "loss": 0.3001, "step": 23938 }, { "epoch": 2.2465277777777777, "grad_norm": 0.908407669305509, "learning_rate": 1.8020057646044487e-06, "loss": 0.3052, "step": 23939 }, { "epoch": 2.2466216216216215, "grad_norm": 1.7087572708284722, "learning_rate": 1.8015860954227354e-06, "loss": 0.299, "step": 23940 }, { "epoch": 2.2467154654654653, "grad_norm": 1.0922063570088083, "learning_rate": 1.8011664643760857e-06, "loss": 0.3318, "step": 23941 }, { "epoch": 2.246809309309309, "grad_norm": 1.1626124461012657, "learning_rate": 1.8007468714695064e-06, "loss": 0.3027, "step": 23942 }, { "epoch": 2.246903153153153, "grad_norm": 1.003187703349982, "learning_rate": 1.8003273167079988e-06, "loss": 0.3066, "step": 23943 }, { "epoch": 2.246996996996997, "grad_norm": 1.160172988505773, "learning_rate": 1.7999078000965637e-06, "loss": 0.3376, "step": 23944 }, { "epoch": 2.247090840840841, "grad_norm": 1.118987587932416, "learning_rate": 1.799488321640206e-06, "loss": 0.2806, "step": 23945 }, { "epoch": 2.247184684684685, "grad_norm": 1.1152810166712908, "learning_rate": 1.7990688813439256e-06, "loss": 0.3035, "step": 23946 }, { "epoch": 2.2472785285285286, "grad_norm": 1.3791441641368376, "learning_rate": 1.7986494792127235e-06, "loss": 0.3246, "step": 23947 }, { "epoch": 2.2473723723723724, "grad_norm": 1.0783862222189795, "learning_rate": 1.7982301152516008e-06, "loss": 0.2738, "step": 23948 }, { "epoch": 2.2474662162162162, "grad_norm": 1.0776019467717115, "learning_rate": 1.7978107894655573e-06, "loss": 0.3072, "step": 23949 }, { "epoch": 2.24756006006006, "grad_norm": 1.0600330319318638, "learning_rate": 1.7973915018595905e-06, "loss": 0.3215, "step": 23950 }, { "epoch": 2.247653903903904, "grad_norm": 1.116862046011174, "learning_rate": 1.7969722524387034e-06, "loss": 0.2925, "step": 23951 }, { "epoch": 2.2477477477477477, "grad_norm": 1.0302303978846645, "learning_rate": 1.7965530412078935e-06, "loss": 0.2861, "step": 23952 }, { "epoch": 2.2478415915915915, "grad_norm": 1.1611731060242627, "learning_rate": 1.7961338681721574e-06, "loss": 0.3366, "step": 23953 }, { "epoch": 2.2479354354354353, "grad_norm": 1.2648453448149017, "learning_rate": 1.7957147333364955e-06, "loss": 0.3239, "step": 23954 }, { "epoch": 2.248029279279279, "grad_norm": 1.15366660927103, "learning_rate": 1.7952956367059037e-06, "loss": 0.308, "step": 23955 }, { "epoch": 2.248123123123123, "grad_norm": 0.9328601233944527, "learning_rate": 1.7948765782853778e-06, "loss": 0.2812, "step": 23956 }, { "epoch": 2.248216966966967, "grad_norm": 1.0315947124213738, "learning_rate": 1.7944575580799173e-06, "loss": 0.2783, "step": 23957 }, { "epoch": 2.248310810810811, "grad_norm": 1.33892045408477, "learning_rate": 1.7940385760945167e-06, "loss": 0.3703, "step": 23958 }, { "epoch": 2.2484046546546548, "grad_norm": 0.9817543648390425, "learning_rate": 1.7936196323341714e-06, "loss": 0.2607, "step": 23959 }, { "epoch": 2.2484984984984986, "grad_norm": 0.9598062105003546, "learning_rate": 1.7932007268038765e-06, "loss": 0.3167, "step": 23960 }, { "epoch": 2.2485923423423424, "grad_norm": 1.099658036564604, "learning_rate": 1.7927818595086271e-06, "loss": 0.3098, "step": 23961 }, { "epoch": 2.248686186186186, "grad_norm": 1.1512482605749437, "learning_rate": 1.7923630304534152e-06, "loss": 0.2742, "step": 23962 }, { "epoch": 2.24878003003003, "grad_norm": 1.0747385680649315, "learning_rate": 1.7919442396432379e-06, "loss": 0.2914, "step": 23963 }, { "epoch": 2.248873873873874, "grad_norm": 1.0377452210424034, "learning_rate": 1.7915254870830873e-06, "loss": 0.2995, "step": 23964 }, { "epoch": 2.2489677177177176, "grad_norm": 0.9726004801234837, "learning_rate": 1.7911067727779541e-06, "loss": 0.3011, "step": 23965 }, { "epoch": 2.2490615615615615, "grad_norm": 1.2571478399842662, "learning_rate": 1.790688096732834e-06, "loss": 0.3481, "step": 23966 }, { "epoch": 2.2491554054054053, "grad_norm": 0.9946633890450659, "learning_rate": 1.7902694589527175e-06, "loss": 0.2952, "step": 23967 }, { "epoch": 2.249249249249249, "grad_norm": 8.146677170800094, "learning_rate": 1.7898508594425946e-06, "loss": 0.3195, "step": 23968 }, { "epoch": 2.249343093093093, "grad_norm": 1.1262511057135576, "learning_rate": 1.7894322982074591e-06, "loss": 0.2853, "step": 23969 }, { "epoch": 2.249436936936937, "grad_norm": 1.016931848338715, "learning_rate": 1.7890137752523007e-06, "loss": 0.309, "step": 23970 }, { "epoch": 2.249530780780781, "grad_norm": 1.0798095175677596, "learning_rate": 1.7885952905821085e-06, "loss": 0.3344, "step": 23971 }, { "epoch": 2.2496246246246248, "grad_norm": 1.0781022688995874, "learning_rate": 1.7881768442018727e-06, "loss": 0.2998, "step": 23972 }, { "epoch": 2.2497184684684686, "grad_norm": 1.0654733553953781, "learning_rate": 1.7877584361165823e-06, "loss": 0.2948, "step": 23973 }, { "epoch": 2.2498123123123124, "grad_norm": 1.2647615828658987, "learning_rate": 1.7873400663312245e-06, "loss": 0.332, "step": 23974 }, { "epoch": 2.249906156156156, "grad_norm": 0.9540877554896938, "learning_rate": 1.786921734850791e-06, "loss": 0.3018, "step": 23975 }, { "epoch": 2.25, "grad_norm": 1.6914653815229432, "learning_rate": 1.7865034416802679e-06, "loss": 0.3236, "step": 23976 }, { "epoch": 2.250093843843844, "grad_norm": 1.0496986261129277, "learning_rate": 1.7860851868246403e-06, "loss": 0.2789, "step": 23977 }, { "epoch": 2.2501876876876876, "grad_norm": 2.1199546390658934, "learning_rate": 1.7856669702888996e-06, "loss": 0.3076, "step": 23978 }, { "epoch": 2.2502815315315314, "grad_norm": 1.0772128224247706, "learning_rate": 1.7852487920780294e-06, "loss": 0.262, "step": 23979 }, { "epoch": 2.2503753753753752, "grad_norm": 1.0103054634445883, "learning_rate": 1.7848306521970148e-06, "loss": 0.2811, "step": 23980 }, { "epoch": 2.250469219219219, "grad_norm": 1.0495799480481915, "learning_rate": 1.7844125506508442e-06, "loss": 0.3059, "step": 23981 }, { "epoch": 2.250563063063063, "grad_norm": 1.0530936255070689, "learning_rate": 1.7839944874445015e-06, "loss": 0.2917, "step": 23982 }, { "epoch": 2.250656906906907, "grad_norm": 1.0698754534679962, "learning_rate": 1.783576462582971e-06, "loss": 0.3194, "step": 23983 }, { "epoch": 2.250750750750751, "grad_norm": 1.2655478969450182, "learning_rate": 1.7831584760712368e-06, "loss": 0.277, "step": 23984 }, { "epoch": 2.2508445945945947, "grad_norm": 1.2253860688065619, "learning_rate": 1.7827405279142823e-06, "loss": 0.3085, "step": 23985 }, { "epoch": 2.2509384384384385, "grad_norm": 1.0666127898377227, "learning_rate": 1.78232261811709e-06, "loss": 0.3468, "step": 23986 }, { "epoch": 2.2510322822822824, "grad_norm": 1.3120465938350707, "learning_rate": 1.781904746684645e-06, "loss": 0.3069, "step": 23987 }, { "epoch": 2.251126126126126, "grad_norm": 1.4239404947605903, "learning_rate": 1.7814869136219294e-06, "loss": 0.3436, "step": 23988 }, { "epoch": 2.25121996996997, "grad_norm": 1.2227080678511704, "learning_rate": 1.7810691189339214e-06, "loss": 0.359, "step": 23989 }, { "epoch": 2.251313813813814, "grad_norm": 1.0085693068630126, "learning_rate": 1.7806513626256072e-06, "loss": 0.34, "step": 23990 }, { "epoch": 2.2514076576576576, "grad_norm": 1.0650398574154252, "learning_rate": 1.7802336447019658e-06, "loss": 0.342, "step": 23991 }, { "epoch": 2.2515015015015014, "grad_norm": 1.0913097415399293, "learning_rate": 1.7798159651679758e-06, "loss": 0.3093, "step": 23992 }, { "epoch": 2.2515953453453452, "grad_norm": 1.0995477656893358, "learning_rate": 1.7793983240286212e-06, "loss": 0.2589, "step": 23993 }, { "epoch": 2.251689189189189, "grad_norm": 1.8270978138921403, "learning_rate": 1.7789807212888788e-06, "loss": 0.3609, "step": 23994 }, { "epoch": 2.251783033033033, "grad_norm": 0.9526639345611688, "learning_rate": 1.7785631569537288e-06, "loss": 0.3031, "step": 23995 }, { "epoch": 2.251876876876877, "grad_norm": 1.0467042702696918, "learning_rate": 1.7781456310281492e-06, "loss": 0.3152, "step": 23996 }, { "epoch": 2.251970720720721, "grad_norm": 1.4934170258253947, "learning_rate": 1.7777281435171183e-06, "loss": 0.3054, "step": 23997 }, { "epoch": 2.2520645645645647, "grad_norm": 1.1412355046228935, "learning_rate": 1.7773106944256124e-06, "loss": 0.2881, "step": 23998 }, { "epoch": 2.2521584084084085, "grad_norm": 1.1962749624763565, "learning_rate": 1.7768932837586122e-06, "loss": 0.2998, "step": 23999 }, { "epoch": 2.2522522522522523, "grad_norm": 1.1194872739488413, "learning_rate": 1.7764759115210922e-06, "loss": 0.3395, "step": 24000 }, { "epoch": 2.252346096096096, "grad_norm": 1.2215609793896647, "learning_rate": 1.7760585777180284e-06, "loss": 0.2669, "step": 24001 }, { "epoch": 2.25243993993994, "grad_norm": 1.1244550953171297, "learning_rate": 1.7756412823543988e-06, "loss": 0.3344, "step": 24002 }, { "epoch": 2.2525337837837838, "grad_norm": 1.0601150695043182, "learning_rate": 1.7752240254351778e-06, "loss": 0.3387, "step": 24003 }, { "epoch": 2.2526276276276276, "grad_norm": 1.2827282849215564, "learning_rate": 1.7748068069653385e-06, "loss": 0.3124, "step": 24004 }, { "epoch": 2.2527214714714714, "grad_norm": 1.0986291569780664, "learning_rate": 1.774389626949859e-06, "loss": 0.2987, "step": 24005 }, { "epoch": 2.252815315315315, "grad_norm": 1.127722937906075, "learning_rate": 1.7739724853937118e-06, "loss": 0.3076, "step": 24006 }, { "epoch": 2.252909159159159, "grad_norm": 1.1244772258367683, "learning_rate": 1.7735553823018702e-06, "loss": 0.3327, "step": 24007 }, { "epoch": 2.253003003003003, "grad_norm": 1.053432971860651, "learning_rate": 1.773138317679307e-06, "loss": 0.2907, "step": 24008 }, { "epoch": 2.253096846846847, "grad_norm": 1.037724687147664, "learning_rate": 1.772721291530996e-06, "loss": 0.2995, "step": 24009 }, { "epoch": 2.2531906906906904, "grad_norm": 1.067580352888551, "learning_rate": 1.772304303861907e-06, "loss": 0.3156, "step": 24010 }, { "epoch": 2.2532845345345347, "grad_norm": 1.159381387269466, "learning_rate": 1.7718873546770154e-06, "loss": 0.3515, "step": 24011 }, { "epoch": 2.2533783783783785, "grad_norm": 1.1835384316461446, "learning_rate": 1.7714704439812914e-06, "loss": 0.3149, "step": 24012 }, { "epoch": 2.2534722222222223, "grad_norm": 1.561974121371527, "learning_rate": 1.7710535717797028e-06, "loss": 0.3045, "step": 24013 }, { "epoch": 2.253566066066066, "grad_norm": 2.040611648864702, "learning_rate": 1.7706367380772243e-06, "loss": 0.3217, "step": 24014 }, { "epoch": 2.25365990990991, "grad_norm": 1.2621872445727549, "learning_rate": 1.7702199428788241e-06, "loss": 0.3159, "step": 24015 }, { "epoch": 2.2537537537537538, "grad_norm": 0.940403219026861, "learning_rate": 1.76980318618947e-06, "loss": 0.2971, "step": 24016 }, { "epoch": 2.2538475975975976, "grad_norm": 1.380799924242121, "learning_rate": 1.7693864680141342e-06, "loss": 0.3267, "step": 24017 }, { "epoch": 2.2539414414414414, "grad_norm": 1.098217506374492, "learning_rate": 1.768969788357784e-06, "loss": 0.3191, "step": 24018 }, { "epoch": 2.254035285285285, "grad_norm": 1.1357504901279998, "learning_rate": 1.7685531472253875e-06, "loss": 0.321, "step": 24019 }, { "epoch": 2.254129129129129, "grad_norm": 1.2535141236064928, "learning_rate": 1.7681365446219118e-06, "loss": 0.2832, "step": 24020 }, { "epoch": 2.254222972972973, "grad_norm": 1.221811053514704, "learning_rate": 1.7677199805523242e-06, "loss": 0.2975, "step": 24021 }, { "epoch": 2.254316816816817, "grad_norm": 1.1524818108304853, "learning_rate": 1.7673034550215906e-06, "loss": 0.3231, "step": 24022 }, { "epoch": 2.2544106606606604, "grad_norm": 1.045378359807458, "learning_rate": 1.7668869680346796e-06, "loss": 0.3103, "step": 24023 }, { "epoch": 2.2545045045045047, "grad_norm": 1.6775893973149063, "learning_rate": 1.7664705195965558e-06, "loss": 0.3436, "step": 24024 }, { "epoch": 2.2545983483483485, "grad_norm": 1.128246343230086, "learning_rate": 1.766054109712183e-06, "loss": 0.3013, "step": 24025 }, { "epoch": 2.2546921921921923, "grad_norm": 1.1452736920494302, "learning_rate": 1.7656377383865297e-06, "loss": 0.2971, "step": 24026 }, { "epoch": 2.254786036036036, "grad_norm": 1.115977940684733, "learning_rate": 1.7652214056245575e-06, "loss": 0.2853, "step": 24027 }, { "epoch": 2.25487987987988, "grad_norm": 1.7241873627368183, "learning_rate": 1.7648051114312304e-06, "loss": 0.3133, "step": 24028 }, { "epoch": 2.2549737237237237, "grad_norm": 1.1971366050717525, "learning_rate": 1.764388855811514e-06, "loss": 0.2818, "step": 24029 }, { "epoch": 2.2550675675675675, "grad_norm": 1.1777003693673225, "learning_rate": 1.76397263877037e-06, "loss": 0.2971, "step": 24030 }, { "epoch": 2.2551614114114114, "grad_norm": 0.9844630420073528, "learning_rate": 1.7635564603127597e-06, "loss": 0.3397, "step": 24031 }, { "epoch": 2.255255255255255, "grad_norm": 1.104809497858426, "learning_rate": 1.7631403204436482e-06, "loss": 0.2661, "step": 24032 }, { "epoch": 2.255349099099099, "grad_norm": 1.0913278486386426, "learning_rate": 1.7627242191679973e-06, "loss": 0.2644, "step": 24033 }, { "epoch": 2.255442942942943, "grad_norm": 2.3579180784960903, "learning_rate": 1.7623081564907628e-06, "loss": 0.3259, "step": 24034 }, { "epoch": 2.2555367867867866, "grad_norm": 2.076208872831646, "learning_rate": 1.7618921324169107e-06, "loss": 0.3067, "step": 24035 }, { "epoch": 2.2556306306306304, "grad_norm": 1.51092359472001, "learning_rate": 1.7614761469514008e-06, "loss": 0.3193, "step": 24036 }, { "epoch": 2.2557244744744747, "grad_norm": 1.1994882343123703, "learning_rate": 1.7610602000991895e-06, "loss": 0.3253, "step": 24037 }, { "epoch": 2.2558183183183185, "grad_norm": 1.2476520380264082, "learning_rate": 1.7606442918652405e-06, "loss": 0.3094, "step": 24038 }, { "epoch": 2.2559121621621623, "grad_norm": 3.743595669585261, "learning_rate": 1.7602284222545112e-06, "loss": 0.3492, "step": 24039 }, { "epoch": 2.256006006006006, "grad_norm": 0.9773701584294618, "learning_rate": 1.7598125912719576e-06, "loss": 0.3242, "step": 24040 }, { "epoch": 2.25609984984985, "grad_norm": 1.0873376214225865, "learning_rate": 1.759396798922542e-06, "loss": 0.3303, "step": 24041 }, { "epoch": 2.2561936936936937, "grad_norm": 0.9954725775545927, "learning_rate": 1.7589810452112193e-06, "loss": 0.3074, "step": 24042 }, { "epoch": 2.2562875375375375, "grad_norm": 3.0121202276452275, "learning_rate": 1.7585653301429457e-06, "loss": 0.3288, "step": 24043 }, { "epoch": 2.2563813813813813, "grad_norm": 1.565653494719691, "learning_rate": 1.7581496537226806e-06, "loss": 0.2988, "step": 24044 }, { "epoch": 2.256475225225225, "grad_norm": 0.9799270105265343, "learning_rate": 1.7577340159553786e-06, "loss": 0.3201, "step": 24045 }, { "epoch": 2.256569069069069, "grad_norm": 1.1120996682324962, "learning_rate": 1.7573184168459955e-06, "loss": 0.3023, "step": 24046 }, { "epoch": 2.2566629129129128, "grad_norm": 1.187216991785676, "learning_rate": 1.7569028563994866e-06, "loss": 0.3287, "step": 24047 }, { "epoch": 2.2567567567567566, "grad_norm": 1.6259768164144164, "learning_rate": 1.7564873346208072e-06, "loss": 0.2967, "step": 24048 }, { "epoch": 2.2568506006006004, "grad_norm": 1.1500870970115953, "learning_rate": 1.7560718515149089e-06, "loss": 0.3301, "step": 24049 }, { "epoch": 2.2569444444444446, "grad_norm": 1.0886122966871383, "learning_rate": 1.7556564070867494e-06, "loss": 0.3366, "step": 24050 }, { "epoch": 2.2570382882882885, "grad_norm": 1.0692214274909662, "learning_rate": 1.75524100134128e-06, "loss": 0.2845, "step": 24051 }, { "epoch": 2.2571321321321323, "grad_norm": 1.5739147512888172, "learning_rate": 1.754825634283453e-06, "loss": 0.2954, "step": 24052 }, { "epoch": 2.257225975975976, "grad_norm": 1.116090623203187, "learning_rate": 1.7544103059182232e-06, "loss": 0.3238, "step": 24053 }, { "epoch": 2.25731981981982, "grad_norm": 1.0961694096452148, "learning_rate": 1.753995016250542e-06, "loss": 0.3004, "step": 24054 }, { "epoch": 2.2574136636636637, "grad_norm": 1.0736227605011204, "learning_rate": 1.7535797652853576e-06, "loss": 0.3084, "step": 24055 }, { "epoch": 2.2575075075075075, "grad_norm": 1.0983728949330702, "learning_rate": 1.7531645530276258e-06, "loss": 0.3346, "step": 24056 }, { "epoch": 2.2576013513513513, "grad_norm": 1.1378847035523052, "learning_rate": 1.7527493794822953e-06, "loss": 0.3266, "step": 24057 }, { "epoch": 2.257695195195195, "grad_norm": 1.2427076840671396, "learning_rate": 1.7523342446543163e-06, "loss": 0.3202, "step": 24058 }, { "epoch": 2.257789039039039, "grad_norm": 1.131554334895746, "learning_rate": 1.7519191485486382e-06, "loss": 0.2763, "step": 24059 }, { "epoch": 2.2578828828828827, "grad_norm": 1.1781482950151208, "learning_rate": 1.7515040911702102e-06, "loss": 0.325, "step": 24060 }, { "epoch": 2.2579767267267266, "grad_norm": 1.1395587895410015, "learning_rate": 1.7510890725239793e-06, "loss": 0.3329, "step": 24061 }, { "epoch": 2.2580705705705704, "grad_norm": 1.5790999283671732, "learning_rate": 1.750674092614898e-06, "loss": 0.2669, "step": 24062 }, { "epoch": 2.2581644144144146, "grad_norm": 1.2691046969860074, "learning_rate": 1.7502591514479116e-06, "loss": 0.3126, "step": 24063 }, { "epoch": 2.2582582582582584, "grad_norm": 1.1958484726769507, "learning_rate": 1.7498442490279666e-06, "loss": 0.3701, "step": 24064 }, { "epoch": 2.2583521021021022, "grad_norm": 1.4530158303194536, "learning_rate": 1.749429385360012e-06, "loss": 0.3108, "step": 24065 }, { "epoch": 2.258445945945946, "grad_norm": 1.234831615508447, "learning_rate": 1.749014560448994e-06, "loss": 0.264, "step": 24066 }, { "epoch": 2.25853978978979, "grad_norm": 1.1777268969197565, "learning_rate": 1.7485997742998561e-06, "loss": 0.3182, "step": 24067 }, { "epoch": 2.2586336336336337, "grad_norm": 1.1803429188903478, "learning_rate": 1.7481850269175477e-06, "loss": 0.3148, "step": 24068 }, { "epoch": 2.2587274774774775, "grad_norm": 1.3957334737159566, "learning_rate": 1.7477703183070117e-06, "loss": 0.2745, "step": 24069 }, { "epoch": 2.2588213213213213, "grad_norm": 1.0158163735435883, "learning_rate": 1.7473556484731929e-06, "loss": 0.2658, "step": 24070 }, { "epoch": 2.258915165165165, "grad_norm": 1.1158834150397767, "learning_rate": 1.7469410174210361e-06, "loss": 0.2909, "step": 24071 }, { "epoch": 2.259009009009009, "grad_norm": 1.0489914889973384, "learning_rate": 1.7465264251554837e-06, "loss": 0.2763, "step": 24072 }, { "epoch": 2.2591028528528527, "grad_norm": 1.5385379804363686, "learning_rate": 1.7461118716814785e-06, "loss": 0.3346, "step": 24073 }, { "epoch": 2.2591966966966965, "grad_norm": 1.0340119149504763, "learning_rate": 1.7456973570039653e-06, "loss": 0.2823, "step": 24074 }, { "epoch": 2.2592905405405403, "grad_norm": 1.066454890738738, "learning_rate": 1.7452828811278866e-06, "loss": 0.3043, "step": 24075 }, { "epoch": 2.2593843843843846, "grad_norm": 1.1255306052582046, "learning_rate": 1.7448684440581809e-06, "loss": 0.315, "step": 24076 }, { "epoch": 2.2594782282282284, "grad_norm": 1.3347012705727646, "learning_rate": 1.7444540457997933e-06, "loss": 0.3389, "step": 24077 }, { "epoch": 2.2595720720720722, "grad_norm": 1.1853298824319427, "learning_rate": 1.7440396863576636e-06, "loss": 0.3479, "step": 24078 }, { "epoch": 2.259665915915916, "grad_norm": 1.0967864595755292, "learning_rate": 1.7436253657367303e-06, "loss": 0.3114, "step": 24079 }, { "epoch": 2.25975975975976, "grad_norm": 0.972813045354524, "learning_rate": 1.7432110839419364e-06, "loss": 0.2634, "step": 24080 }, { "epoch": 2.2598536036036037, "grad_norm": 1.3007407513746858, "learning_rate": 1.7427968409782204e-06, "loss": 0.2914, "step": 24081 }, { "epoch": 2.2599474474474475, "grad_norm": 1.0830273001192277, "learning_rate": 1.742382636850521e-06, "loss": 0.3238, "step": 24082 }, { "epoch": 2.2600412912912913, "grad_norm": 1.017039178529019, "learning_rate": 1.7419684715637763e-06, "loss": 0.3166, "step": 24083 }, { "epoch": 2.260135135135135, "grad_norm": 0.9655972213058159, "learning_rate": 1.7415543451229255e-06, "loss": 0.2914, "step": 24084 }, { "epoch": 2.260228978978979, "grad_norm": 1.0671739337028967, "learning_rate": 1.741140257532904e-06, "loss": 0.316, "step": 24085 }, { "epoch": 2.2603228228228227, "grad_norm": 0.9327192670839328, "learning_rate": 1.740726208798652e-06, "loss": 0.3259, "step": 24086 }, { "epoch": 2.2604166666666665, "grad_norm": 1.112509132989559, "learning_rate": 1.7403121989251053e-06, "loss": 0.3231, "step": 24087 }, { "epoch": 2.2605105105105103, "grad_norm": 1.18176192215943, "learning_rate": 1.7398982279171983e-06, "loss": 0.3251, "step": 24088 }, { "epoch": 2.2606043543543546, "grad_norm": 1.1204204962995274, "learning_rate": 1.7394842957798701e-06, "loss": 0.3261, "step": 24089 }, { "epoch": 2.260698198198198, "grad_norm": 1.0508961705587219, "learning_rate": 1.7390704025180545e-06, "loss": 0.321, "step": 24090 }, { "epoch": 2.260792042042042, "grad_norm": 1.1704635523285627, "learning_rate": 1.7386565481366841e-06, "loss": 0.3411, "step": 24091 }, { "epoch": 2.260885885885886, "grad_norm": 1.2048423431007182, "learning_rate": 1.7382427326406976e-06, "loss": 0.2979, "step": 24092 }, { "epoch": 2.26097972972973, "grad_norm": 1.026621029204308, "learning_rate": 1.737828956035027e-06, "loss": 0.3358, "step": 24093 }, { "epoch": 2.2610735735735736, "grad_norm": 1.2366596264749243, "learning_rate": 1.7374152183246057e-06, "loss": 0.3155, "step": 24094 }, { "epoch": 2.2611674174174174, "grad_norm": 1.1197248608775947, "learning_rate": 1.7370015195143664e-06, "loss": 0.3288, "step": 24095 }, { "epoch": 2.2612612612612613, "grad_norm": 1.0233992497924065, "learning_rate": 1.7365878596092423e-06, "loss": 0.304, "step": 24096 }, { "epoch": 2.261355105105105, "grad_norm": 1.0774616620246285, "learning_rate": 1.7361742386141634e-06, "loss": 0.2894, "step": 24097 }, { "epoch": 2.261448948948949, "grad_norm": 1.3266097392597502, "learning_rate": 1.7357606565340646e-06, "loss": 0.3096, "step": 24098 }, { "epoch": 2.2615427927927927, "grad_norm": 1.1257575632788102, "learning_rate": 1.735347113373876e-06, "loss": 0.3339, "step": 24099 }, { "epoch": 2.2616366366366365, "grad_norm": 1.6280541825624177, "learning_rate": 1.734933609138526e-06, "loss": 0.3091, "step": 24100 }, { "epoch": 2.2617304804804803, "grad_norm": 1.167925284698756, "learning_rate": 1.7345201438329489e-06, "loss": 0.3368, "step": 24101 }, { "epoch": 2.2618243243243246, "grad_norm": 1.0730368552482805, "learning_rate": 1.7341067174620723e-06, "loss": 0.3178, "step": 24102 }, { "epoch": 2.261918168168168, "grad_norm": 1.4203011038848021, "learning_rate": 1.7336933300308246e-06, "loss": 0.3195, "step": 24103 }, { "epoch": 2.262012012012012, "grad_norm": 1.1483691811433046, "learning_rate": 1.7332799815441364e-06, "loss": 0.3323, "step": 24104 }, { "epoch": 2.262105855855856, "grad_norm": 1.2018694761256876, "learning_rate": 1.732866672006936e-06, "loss": 0.3417, "step": 24105 }, { "epoch": 2.2621996996997, "grad_norm": 1.0126770748962268, "learning_rate": 1.7324534014241507e-06, "loss": 0.3629, "step": 24106 }, { "epoch": 2.2622935435435436, "grad_norm": 1.0964200997265257, "learning_rate": 1.7320401698007083e-06, "loss": 0.3103, "step": 24107 }, { "epoch": 2.2623873873873874, "grad_norm": 1.3387689985896782, "learning_rate": 1.731626977141535e-06, "loss": 0.2694, "step": 24108 }, { "epoch": 2.2624812312312312, "grad_norm": 1.1660756387292834, "learning_rate": 1.7312138234515568e-06, "loss": 0.3138, "step": 24109 }, { "epoch": 2.262575075075075, "grad_norm": 1.928063324915974, "learning_rate": 1.7308007087357026e-06, "loss": 0.3126, "step": 24110 }, { "epoch": 2.262668918918919, "grad_norm": 1.05421252752178, "learning_rate": 1.730387632998896e-06, "loss": 0.3585, "step": 24111 }, { "epoch": 2.2627627627627627, "grad_norm": 1.3596470379080499, "learning_rate": 1.7299745962460607e-06, "loss": 0.3101, "step": 24112 }, { "epoch": 2.2628566066066065, "grad_norm": 1.0655643341661254, "learning_rate": 1.7295615984821252e-06, "loss": 0.3285, "step": 24113 }, { "epoch": 2.2629504504504503, "grad_norm": 1.154555750599091, "learning_rate": 1.7291486397120117e-06, "loss": 0.2979, "step": 24114 }, { "epoch": 2.263044294294294, "grad_norm": 1.126563230923932, "learning_rate": 1.7287357199406419e-06, "loss": 0.3337, "step": 24115 }, { "epoch": 2.263138138138138, "grad_norm": 1.1763780596052271, "learning_rate": 1.728322839172943e-06, "loss": 0.3542, "step": 24116 }, { "epoch": 2.263231981981982, "grad_norm": 1.0471810553651062, "learning_rate": 1.727909997413836e-06, "loss": 0.322, "step": 24117 }, { "epoch": 2.263325825825826, "grad_norm": 1.2568455480252474, "learning_rate": 1.7274971946682429e-06, "loss": 0.3043, "step": 24118 }, { "epoch": 2.26341966966967, "grad_norm": 1.040821158698073, "learning_rate": 1.7270844309410862e-06, "loss": 0.3192, "step": 24119 }, { "epoch": 2.2635135135135136, "grad_norm": 1.066029637511114, "learning_rate": 1.7266717062372867e-06, "loss": 0.289, "step": 24120 }, { "epoch": 2.2636073573573574, "grad_norm": 1.209992996658526, "learning_rate": 1.726259020561764e-06, "loss": 0.287, "step": 24121 }, { "epoch": 2.263701201201201, "grad_norm": 1.4657414416167778, "learning_rate": 1.7258463739194424e-06, "loss": 0.2899, "step": 24122 }, { "epoch": 2.263795045045045, "grad_norm": 1.1174809206973533, "learning_rate": 1.72543376631524e-06, "loss": 0.3057, "step": 24123 }, { "epoch": 2.263888888888889, "grad_norm": 1.0813945192064935, "learning_rate": 1.7250211977540738e-06, "loss": 0.318, "step": 24124 }, { "epoch": 2.2639827327327327, "grad_norm": 1.0606385933476965, "learning_rate": 1.7246086682408674e-06, "loss": 0.365, "step": 24125 }, { "epoch": 2.2640765765765765, "grad_norm": 3.6590775778747147, "learning_rate": 1.7241961777805372e-06, "loss": 0.2979, "step": 24126 }, { "epoch": 2.2641704204204203, "grad_norm": 1.2632230884397628, "learning_rate": 1.723783726378e-06, "loss": 0.3268, "step": 24127 }, { "epoch": 2.264264264264264, "grad_norm": 1.1153580494569795, "learning_rate": 1.7233713140381764e-06, "loss": 0.3171, "step": 24128 }, { "epoch": 2.264358108108108, "grad_norm": 0.9672886270121344, "learning_rate": 1.7229589407659825e-06, "loss": 0.3268, "step": 24129 }, { "epoch": 2.264451951951952, "grad_norm": 1.033489424732321, "learning_rate": 1.7225466065663348e-06, "loss": 0.3225, "step": 24130 }, { "epoch": 2.264545795795796, "grad_norm": 1.1601266246731157, "learning_rate": 1.7221343114441496e-06, "loss": 0.3136, "step": 24131 }, { "epoch": 2.2646396396396398, "grad_norm": 1.3455883320111395, "learning_rate": 1.7217220554043429e-06, "loss": 0.3032, "step": 24132 }, { "epoch": 2.2647334834834836, "grad_norm": 1.0720166242572962, "learning_rate": 1.7213098384518284e-06, "loss": 0.3086, "step": 24133 }, { "epoch": 2.2648273273273274, "grad_norm": 0.9383974875216561, "learning_rate": 1.7208976605915239e-06, "loss": 0.3488, "step": 24134 }, { "epoch": 2.264921171171171, "grad_norm": 1.0443631462740242, "learning_rate": 1.720485521828343e-06, "loss": 0.3335, "step": 24135 }, { "epoch": 2.265015015015015, "grad_norm": 0.9983837567234983, "learning_rate": 1.720073422167197e-06, "loss": 0.2896, "step": 24136 }, { "epoch": 2.265108858858859, "grad_norm": 1.21443832882056, "learning_rate": 1.7196613616130036e-06, "loss": 0.2851, "step": 24137 }, { "epoch": 2.2652027027027026, "grad_norm": 2.0609524814394637, "learning_rate": 1.7192493401706734e-06, "loss": 0.3424, "step": 24138 }, { "epoch": 2.2652965465465464, "grad_norm": 1.0389079273307138, "learning_rate": 1.7188373578451185e-06, "loss": 0.3447, "step": 24139 }, { "epoch": 2.2653903903903903, "grad_norm": 1.361574826709105, "learning_rate": 1.7184254146412533e-06, "loss": 0.3779, "step": 24140 }, { "epoch": 2.265484234234234, "grad_norm": 1.082072851752253, "learning_rate": 1.7180135105639878e-06, "loss": 0.3243, "step": 24141 }, { "epoch": 2.265578078078078, "grad_norm": 1.0357363332172977, "learning_rate": 1.7176016456182337e-06, "loss": 0.3338, "step": 24142 }, { "epoch": 2.265671921921922, "grad_norm": 1.0033922598267273, "learning_rate": 1.7171898198089015e-06, "loss": 0.2756, "step": 24143 }, { "epoch": 2.265765765765766, "grad_norm": 1.0306199429986955, "learning_rate": 1.7167780331409017e-06, "loss": 0.3145, "step": 24144 }, { "epoch": 2.2658596096096097, "grad_norm": 1.0477968036987435, "learning_rate": 1.716366285619142e-06, "loss": 0.3043, "step": 24145 }, { "epoch": 2.2659534534534536, "grad_norm": 1.2231659890383855, "learning_rate": 1.7159545772485354e-06, "loss": 0.3228, "step": 24146 }, { "epoch": 2.2660472972972974, "grad_norm": 0.9783905992414251, "learning_rate": 1.715542908033988e-06, "loss": 0.3126, "step": 24147 }, { "epoch": 2.266141141141141, "grad_norm": 1.0284938399290955, "learning_rate": 1.7151312779804086e-06, "loss": 0.3088, "step": 24148 }, { "epoch": 2.266234984984985, "grad_norm": 1.0896806943598087, "learning_rate": 1.7147196870927063e-06, "loss": 0.2724, "step": 24149 }, { "epoch": 2.266328828828829, "grad_norm": 1.1014589496455582, "learning_rate": 1.7143081353757885e-06, "loss": 0.3524, "step": 24150 }, { "epoch": 2.2664226726726726, "grad_norm": 1.1058781846336623, "learning_rate": 1.7138966228345594e-06, "loss": 0.3155, "step": 24151 }, { "epoch": 2.2665165165165164, "grad_norm": 1.1952096713560134, "learning_rate": 1.7134851494739296e-06, "loss": 0.2553, "step": 24152 }, { "epoch": 2.2666103603603602, "grad_norm": 1.191698498451648, "learning_rate": 1.7130737152988032e-06, "loss": 0.2886, "step": 24153 }, { "epoch": 2.266704204204204, "grad_norm": 1.320437237795877, "learning_rate": 1.7126623203140835e-06, "loss": 0.357, "step": 24154 }, { "epoch": 2.266798048048048, "grad_norm": 1.0542045136062868, "learning_rate": 1.7122509645246798e-06, "loss": 0.2817, "step": 24155 }, { "epoch": 2.266891891891892, "grad_norm": 1.476225501229171, "learning_rate": 1.7118396479354964e-06, "loss": 0.287, "step": 24156 }, { "epoch": 2.266985735735736, "grad_norm": 1.2430307213584517, "learning_rate": 1.7114283705514329e-06, "loss": 0.2961, "step": 24157 }, { "epoch": 2.2670795795795797, "grad_norm": 1.447576592067282, "learning_rate": 1.7110171323773978e-06, "loss": 0.3378, "step": 24158 }, { "epoch": 2.2671734234234235, "grad_norm": 1.1026925955684812, "learning_rate": 1.7106059334182918e-06, "loss": 0.2748, "step": 24159 }, { "epoch": 2.2672672672672673, "grad_norm": 1.301422467726185, "learning_rate": 1.710194773679017e-06, "loss": 0.3267, "step": 24160 }, { "epoch": 2.267361111111111, "grad_norm": 1.0985370557925076, "learning_rate": 1.709783653164479e-06, "loss": 0.2999, "step": 24161 }, { "epoch": 2.267454954954955, "grad_norm": 0.924183980204306, "learning_rate": 1.7093725718795779e-06, "loss": 0.2884, "step": 24162 }, { "epoch": 2.267548798798799, "grad_norm": 2.6058235244400563, "learning_rate": 1.7089615298292128e-06, "loss": 0.3183, "step": 24163 }, { "epoch": 2.2676426426426426, "grad_norm": 1.0901830898257998, "learning_rate": 1.7085505270182883e-06, "loss": 0.3242, "step": 24164 }, { "epoch": 2.2677364864864864, "grad_norm": 0.9993837147320964, "learning_rate": 1.7081395634517039e-06, "loss": 0.3243, "step": 24165 }, { "epoch": 2.26783033033033, "grad_norm": 1.4622763779324301, "learning_rate": 1.7077286391343568e-06, "loss": 0.3557, "step": 24166 }, { "epoch": 2.267924174174174, "grad_norm": 0.9297445131693286, "learning_rate": 1.7073177540711499e-06, "loss": 0.3026, "step": 24167 }, { "epoch": 2.268018018018018, "grad_norm": 1.3523311342314634, "learning_rate": 1.7069069082669814e-06, "loss": 0.3356, "step": 24168 }, { "epoch": 2.268111861861862, "grad_norm": 1.0994631465905282, "learning_rate": 1.7064961017267494e-06, "loss": 0.3095, "step": 24169 }, { "epoch": 2.268205705705706, "grad_norm": 0.9772292450419006, "learning_rate": 1.7060853344553524e-06, "loss": 0.3384, "step": 24170 }, { "epoch": 2.2682995495495497, "grad_norm": 1.0457233696508421, "learning_rate": 1.7056746064576869e-06, "loss": 0.2396, "step": 24171 }, { "epoch": 2.2683933933933935, "grad_norm": 1.0355234446788848, "learning_rate": 1.7052639177386492e-06, "loss": 0.3364, "step": 24172 }, { "epoch": 2.2684872372372373, "grad_norm": 1.120398466124832, "learning_rate": 1.7048532683031398e-06, "loss": 0.2577, "step": 24173 }, { "epoch": 2.268581081081081, "grad_norm": 1.1908423522649636, "learning_rate": 1.7044426581560526e-06, "loss": 0.3193, "step": 24174 }, { "epoch": 2.268674924924925, "grad_norm": 2.316659973243111, "learning_rate": 1.7040320873022814e-06, "loss": 0.2976, "step": 24175 }, { "epoch": 2.2687687687687688, "grad_norm": 1.2739978579154663, "learning_rate": 1.703621555746725e-06, "loss": 0.3435, "step": 24176 }, { "epoch": 2.2688626126126126, "grad_norm": 0.9748672208723321, "learning_rate": 1.7032110634942772e-06, "loss": 0.3155, "step": 24177 }, { "epoch": 2.2689564564564564, "grad_norm": 1.1417540985568793, "learning_rate": 1.7028006105498302e-06, "loss": 0.3272, "step": 24178 }, { "epoch": 2.2690503003003, "grad_norm": 1.0149573046057652, "learning_rate": 1.7023901969182816e-06, "loss": 0.3127, "step": 24179 }, { "epoch": 2.269144144144144, "grad_norm": 1.126732220972094, "learning_rate": 1.701979822604522e-06, "loss": 0.319, "step": 24180 }, { "epoch": 2.269237987987988, "grad_norm": 1.6704704554895748, "learning_rate": 1.701569487613446e-06, "loss": 0.3015, "step": 24181 }, { "epoch": 2.269331831831832, "grad_norm": 1.175191698844466, "learning_rate": 1.7011591919499443e-06, "loss": 0.3014, "step": 24182 }, { "epoch": 2.2694256756756754, "grad_norm": 1.1526701845700393, "learning_rate": 1.70074893561891e-06, "loss": 0.3095, "step": 24183 }, { "epoch": 2.2695195195195197, "grad_norm": 1.1263667425107322, "learning_rate": 1.7003387186252335e-06, "loss": 0.3276, "step": 24184 }, { "epoch": 2.2696133633633635, "grad_norm": 1.180881429888253, "learning_rate": 1.6999285409738075e-06, "loss": 0.2884, "step": 24185 }, { "epoch": 2.2697072072072073, "grad_norm": 1.0569959029455684, "learning_rate": 1.6995184026695226e-06, "loss": 0.2781, "step": 24186 }, { "epoch": 2.269801051051051, "grad_norm": 1.0308675706106873, "learning_rate": 1.6991083037172661e-06, "loss": 0.3131, "step": 24187 }, { "epoch": 2.269894894894895, "grad_norm": 0.9485931769074629, "learning_rate": 1.6986982441219318e-06, "loss": 0.3474, "step": 24188 }, { "epoch": 2.2699887387387387, "grad_norm": 1.9698631615220845, "learning_rate": 1.6982882238884068e-06, "loss": 0.3008, "step": 24189 }, { "epoch": 2.2700825825825826, "grad_norm": 1.4695949834433741, "learning_rate": 1.6978782430215784e-06, "loss": 0.3206, "step": 24190 }, { "epoch": 2.2701764264264264, "grad_norm": 1.8192077049963347, "learning_rate": 1.6974683015263377e-06, "loss": 0.3412, "step": 24191 }, { "epoch": 2.27027027027027, "grad_norm": 0.9335909293189487, "learning_rate": 1.6970583994075713e-06, "loss": 0.3406, "step": 24192 }, { "epoch": 2.270364114114114, "grad_norm": 1.1967123039904177, "learning_rate": 1.6966485366701662e-06, "loss": 0.3316, "step": 24193 }, { "epoch": 2.270457957957958, "grad_norm": 1.6043578074715379, "learning_rate": 1.6962387133190094e-06, "loss": 0.3264, "step": 24194 }, { "epoch": 2.270551801801802, "grad_norm": 1.4947130909720996, "learning_rate": 1.6958289293589875e-06, "loss": 0.2879, "step": 24195 }, { "epoch": 2.2706456456456454, "grad_norm": 1.0861783823396194, "learning_rate": 1.695419184794984e-06, "loss": 0.2824, "step": 24196 }, { "epoch": 2.2707394894894897, "grad_norm": 1.1235062178598447, "learning_rate": 1.695009479631889e-06, "loss": 0.3174, "step": 24197 }, { "epoch": 2.2708333333333335, "grad_norm": 1.2321737449460273, "learning_rate": 1.6945998138745839e-06, "loss": 0.3169, "step": 24198 }, { "epoch": 2.2709271771771773, "grad_norm": 1.1889388598395294, "learning_rate": 1.6941901875279532e-06, "loss": 0.3254, "step": 24199 }, { "epoch": 2.271021021021021, "grad_norm": 1.0998876227570389, "learning_rate": 1.6937806005968837e-06, "loss": 0.3116, "step": 24200 }, { "epoch": 2.271114864864865, "grad_norm": 2.4850452973158688, "learning_rate": 1.6933710530862568e-06, "loss": 0.2863, "step": 24201 }, { "epoch": 2.2712087087087087, "grad_norm": 1.1465135984179005, "learning_rate": 1.6929615450009545e-06, "loss": 0.3001, "step": 24202 }, { "epoch": 2.2713025525525525, "grad_norm": 3.4784086362055753, "learning_rate": 1.6925520763458632e-06, "loss": 0.2932, "step": 24203 }, { "epoch": 2.2713963963963963, "grad_norm": 1.030628682579306, "learning_rate": 1.6921426471258618e-06, "loss": 0.2956, "step": 24204 }, { "epoch": 2.27149024024024, "grad_norm": 1.0881711914830807, "learning_rate": 1.6917332573458335e-06, "loss": 0.299, "step": 24205 }, { "epoch": 2.271584084084084, "grad_norm": 0.8471833164853999, "learning_rate": 1.6913239070106584e-06, "loss": 0.2989, "step": 24206 }, { "epoch": 2.2716779279279278, "grad_norm": 1.0375110303562978, "learning_rate": 1.6909145961252181e-06, "loss": 0.3245, "step": 24207 }, { "epoch": 2.2717717717717716, "grad_norm": 1.0573586729058781, "learning_rate": 1.6905053246943915e-06, "loss": 0.3062, "step": 24208 }, { "epoch": 2.2718656156156154, "grad_norm": 1.0534939415836375, "learning_rate": 1.6900960927230603e-06, "loss": 0.3271, "step": 24209 }, { "epoch": 2.2719594594594597, "grad_norm": 1.1186786940921634, "learning_rate": 1.6896869002161031e-06, "loss": 0.3043, "step": 24210 }, { "epoch": 2.2720533033033035, "grad_norm": 1.137717470996102, "learning_rate": 1.6892777471783967e-06, "loss": 0.3098, "step": 24211 }, { "epoch": 2.2721471471471473, "grad_norm": 1.1052226062349924, "learning_rate": 1.6888686336148236e-06, "loss": 0.3191, "step": 24212 }, { "epoch": 2.272240990990991, "grad_norm": 1.2439284642153865, "learning_rate": 1.6884595595302589e-06, "loss": 0.3228, "step": 24213 }, { "epoch": 2.272334834834835, "grad_norm": 1.2984961129244939, "learning_rate": 1.6880505249295791e-06, "loss": 0.2906, "step": 24214 }, { "epoch": 2.2724286786786787, "grad_norm": 1.0883546922476373, "learning_rate": 1.6876415298176645e-06, "loss": 0.3273, "step": 24215 }, { "epoch": 2.2725225225225225, "grad_norm": 1.4822228196481306, "learning_rate": 1.68723257419939e-06, "loss": 0.349, "step": 24216 }, { "epoch": 2.2726163663663663, "grad_norm": 1.2664404336751942, "learning_rate": 1.686823658079631e-06, "loss": 0.3172, "step": 24217 }, { "epoch": 2.27271021021021, "grad_norm": 1.0279946704646328, "learning_rate": 1.6864147814632637e-06, "loss": 0.2419, "step": 24218 }, { "epoch": 2.272804054054054, "grad_norm": 1.1430806255559667, "learning_rate": 1.686005944355163e-06, "loss": 0.2929, "step": 24219 }, { "epoch": 2.2728978978978978, "grad_norm": 1.0453263696355104, "learning_rate": 1.6855971467602017e-06, "loss": 0.3133, "step": 24220 }, { "epoch": 2.2729917417417416, "grad_norm": 1.2236516329395117, "learning_rate": 1.6851883886832576e-06, "loss": 0.3199, "step": 24221 }, { "epoch": 2.2730855855855854, "grad_norm": 1.651252247635173, "learning_rate": 1.6847796701292018e-06, "loss": 0.3539, "step": 24222 }, { "epoch": 2.2731794294294296, "grad_norm": 2.2764061367587485, "learning_rate": 1.6843709911029071e-06, "loss": 0.2893, "step": 24223 }, { "epoch": 2.2732732732732734, "grad_norm": 3.116602216101954, "learning_rate": 1.683962351609249e-06, "loss": 0.3015, "step": 24224 }, { "epoch": 2.2733671171171173, "grad_norm": 3.6071065649665095, "learning_rate": 1.6835537516530975e-06, "loss": 0.3566, "step": 24225 }, { "epoch": 2.273460960960961, "grad_norm": 1.291924127114154, "learning_rate": 1.6831451912393238e-06, "loss": 0.3436, "step": 24226 }, { "epoch": 2.273554804804805, "grad_norm": 0.9920617669442114, "learning_rate": 1.682736670372802e-06, "loss": 0.3105, "step": 24227 }, { "epoch": 2.2736486486486487, "grad_norm": 1.0892230736762656, "learning_rate": 1.6823281890584009e-06, "loss": 0.2903, "step": 24228 }, { "epoch": 2.2737424924924925, "grad_norm": 1.206790312363265, "learning_rate": 1.6819197473009918e-06, "loss": 0.3254, "step": 24229 }, { "epoch": 2.2738363363363363, "grad_norm": 1.2712729464511716, "learning_rate": 1.6815113451054438e-06, "loss": 0.3064, "step": 24230 }, { "epoch": 2.27393018018018, "grad_norm": 1.545798991617424, "learning_rate": 1.6811029824766268e-06, "loss": 0.3046, "step": 24231 }, { "epoch": 2.274024024024024, "grad_norm": 2.1188985587657254, "learning_rate": 1.6806946594194074e-06, "loss": 0.3014, "step": 24232 }, { "epoch": 2.2741178678678677, "grad_norm": 1.142134478610905, "learning_rate": 1.6802863759386585e-06, "loss": 0.3247, "step": 24233 }, { "epoch": 2.2742117117117115, "grad_norm": 1.035425000178746, "learning_rate": 1.6798781320392455e-06, "loss": 0.2932, "step": 24234 }, { "epoch": 2.2743055555555554, "grad_norm": 1.0520226832532977, "learning_rate": 1.6794699277260345e-06, "loss": 0.2825, "step": 24235 }, { "epoch": 2.2743993993993996, "grad_norm": 1.0550807026556381, "learning_rate": 1.6790617630038958e-06, "loss": 0.2852, "step": 24236 }, { "epoch": 2.2744932432432434, "grad_norm": 1.1673179582117985, "learning_rate": 1.6786536378776953e-06, "loss": 0.3079, "step": 24237 }, { "epoch": 2.2745870870870872, "grad_norm": 1.67438089320712, "learning_rate": 1.6782455523522962e-06, "loss": 0.3173, "step": 24238 }, { "epoch": 2.274680930930931, "grad_norm": 1.1884189160227778, "learning_rate": 1.677837506432568e-06, "loss": 0.3403, "step": 24239 }, { "epoch": 2.274774774774775, "grad_norm": 0.9861167828947638, "learning_rate": 1.6774295001233743e-06, "loss": 0.3067, "step": 24240 }, { "epoch": 2.2748686186186187, "grad_norm": 4.35523435107776, "learning_rate": 1.6770215334295798e-06, "loss": 0.3025, "step": 24241 }, { "epoch": 2.2749624624624625, "grad_norm": 1.0830795173603098, "learning_rate": 1.676613606356049e-06, "loss": 0.3049, "step": 24242 }, { "epoch": 2.2750563063063063, "grad_norm": 1.0797643987979477, "learning_rate": 1.6762057189076446e-06, "loss": 0.3721, "step": 24243 }, { "epoch": 2.27515015015015, "grad_norm": 2.047898025367001, "learning_rate": 1.6757978710892291e-06, "loss": 0.3088, "step": 24244 }, { "epoch": 2.275243993993994, "grad_norm": 1.0398351899965874, "learning_rate": 1.675390062905669e-06, "loss": 0.291, "step": 24245 }, { "epoch": 2.2753378378378377, "grad_norm": 1.0778981888326744, "learning_rate": 1.6749822943618239e-06, "loss": 0.3163, "step": 24246 }, { "epoch": 2.2754316816816815, "grad_norm": 1.0815389110437397, "learning_rate": 1.674574565462555e-06, "loss": 0.3338, "step": 24247 }, { "epoch": 2.2755255255255253, "grad_norm": 1.1598738700241484, "learning_rate": 1.674166876212726e-06, "loss": 0.3312, "step": 24248 }, { "epoch": 2.2756193693693696, "grad_norm": 1.4397823119778348, "learning_rate": 1.6737592266171976e-06, "loss": 0.3224, "step": 24249 }, { "epoch": 2.2757132132132134, "grad_norm": 1.0748306114320072, "learning_rate": 1.6733516166808273e-06, "loss": 0.3119, "step": 24250 }, { "epoch": 2.275807057057057, "grad_norm": 1.195619142772361, "learning_rate": 1.6729440464084785e-06, "loss": 0.3155, "step": 24251 }, { "epoch": 2.275900900900901, "grad_norm": 1.1664781536521411, "learning_rate": 1.67253651580501e-06, "loss": 0.3159, "step": 24252 }, { "epoch": 2.275994744744745, "grad_norm": 0.9858115732241328, "learning_rate": 1.6721290248752797e-06, "loss": 0.324, "step": 24253 }, { "epoch": 2.2760885885885886, "grad_norm": 1.1910103254760844, "learning_rate": 1.671721573624147e-06, "loss": 0.3009, "step": 24254 }, { "epoch": 2.2761824324324325, "grad_norm": 0.9219171282256967, "learning_rate": 1.67131416205647e-06, "loss": 0.3, "step": 24255 }, { "epoch": 2.2762762762762763, "grad_norm": 1.0563353997903548, "learning_rate": 1.6709067901771042e-06, "loss": 0.2983, "step": 24256 }, { "epoch": 2.27637012012012, "grad_norm": 1.1666631970467753, "learning_rate": 1.6704994579909101e-06, "loss": 0.2809, "step": 24257 }, { "epoch": 2.276463963963964, "grad_norm": 0.9992421449856779, "learning_rate": 1.6700921655027424e-06, "loss": 0.3319, "step": 24258 }, { "epoch": 2.2765578078078077, "grad_norm": 1.083408194679028, "learning_rate": 1.669684912717457e-06, "loss": 0.3312, "step": 24259 }, { "epoch": 2.2766516516516515, "grad_norm": 1.0566668815196527, "learning_rate": 1.6692776996399113e-06, "loss": 0.3143, "step": 24260 }, { "epoch": 2.2767454954954953, "grad_norm": 1.0983218218972142, "learning_rate": 1.6688705262749594e-06, "loss": 0.3317, "step": 24261 }, { "epoch": 2.2768393393393396, "grad_norm": 1.0224992146664944, "learning_rate": 1.668463392627455e-06, "loss": 0.2947, "step": 24262 }, { "epoch": 2.276933183183183, "grad_norm": 1.5983148069194764, "learning_rate": 1.6680562987022554e-06, "loss": 0.3393, "step": 24263 }, { "epoch": 2.277027027027027, "grad_norm": 1.129599396699386, "learning_rate": 1.6676492445042125e-06, "loss": 0.3626, "step": 24264 }, { "epoch": 2.277120870870871, "grad_norm": 1.0355999103451703, "learning_rate": 1.6672422300381802e-06, "loss": 0.3314, "step": 24265 }, { "epoch": 2.277214714714715, "grad_norm": 1.3679687271397305, "learning_rate": 1.6668352553090105e-06, "loss": 0.2791, "step": 24266 }, { "epoch": 2.2773085585585586, "grad_norm": 1.0548623439062987, "learning_rate": 1.666428320321557e-06, "loss": 0.3272, "step": 24267 }, { "epoch": 2.2774024024024024, "grad_norm": 1.0878326784661658, "learning_rate": 1.6660214250806688e-06, "loss": 0.3136, "step": 24268 }, { "epoch": 2.2774962462462462, "grad_norm": 1.1908267781215405, "learning_rate": 1.6656145695912013e-06, "loss": 0.3173, "step": 24269 }, { "epoch": 2.27759009009009, "grad_norm": 0.9919191813749506, "learning_rate": 1.6652077538580035e-06, "loss": 0.2894, "step": 24270 }, { "epoch": 2.277683933933934, "grad_norm": 0.9767087657409774, "learning_rate": 1.6648009778859249e-06, "loss": 0.3146, "step": 24271 }, { "epoch": 2.2777777777777777, "grad_norm": 1.0404826930122755, "learning_rate": 1.6643942416798181e-06, "loss": 0.3291, "step": 24272 }, { "epoch": 2.2778716216216215, "grad_norm": 1.0189029318821505, "learning_rate": 1.663987545244532e-06, "loss": 0.323, "step": 24273 }, { "epoch": 2.2779654654654653, "grad_norm": 1.057538857887489, "learning_rate": 1.6635808885849124e-06, "loss": 0.2878, "step": 24274 }, { "epoch": 2.2780593093093096, "grad_norm": 1.0322904212371076, "learning_rate": 1.663174271705813e-06, "loss": 0.2903, "step": 24275 }, { "epoch": 2.278153153153153, "grad_norm": 2.102206789355108, "learning_rate": 1.662767694612079e-06, "loss": 0.3531, "step": 24276 }, { "epoch": 2.278246996996997, "grad_norm": 1.0970553958621272, "learning_rate": 1.662361157308557e-06, "loss": 0.3324, "step": 24277 }, { "epoch": 2.278340840840841, "grad_norm": 2.07245431220207, "learning_rate": 1.6619546598000992e-06, "loss": 0.2902, "step": 24278 }, { "epoch": 2.278434684684685, "grad_norm": 1.0915361224950866, "learning_rate": 1.661548202091547e-06, "loss": 0.3292, "step": 24279 }, { "epoch": 2.2785285285285286, "grad_norm": 1.12778362168708, "learning_rate": 1.661141784187747e-06, "loss": 0.3466, "step": 24280 }, { "epoch": 2.2786223723723724, "grad_norm": 1.1445127879517885, "learning_rate": 1.6607354060935482e-06, "loss": 0.3244, "step": 24281 }, { "epoch": 2.2787162162162162, "grad_norm": 0.9918707332128324, "learning_rate": 1.660329067813794e-06, "loss": 0.3477, "step": 24282 }, { "epoch": 2.27881006006006, "grad_norm": 1.2698733549875267, "learning_rate": 1.6599227693533276e-06, "loss": 0.3206, "step": 24283 }, { "epoch": 2.278903903903904, "grad_norm": 1.1693106987083273, "learning_rate": 1.659516510716997e-06, "loss": 0.2847, "step": 24284 }, { "epoch": 2.2789977477477477, "grad_norm": 3.5578493859934275, "learning_rate": 1.659110291909644e-06, "loss": 0.3016, "step": 24285 }, { "epoch": 2.2790915915915915, "grad_norm": 1.2312090659706656, "learning_rate": 1.658704112936111e-06, "loss": 0.3427, "step": 24286 }, { "epoch": 2.2791854354354353, "grad_norm": 1.0493566411922517, "learning_rate": 1.6582979738012434e-06, "loss": 0.2966, "step": 24287 }, { "epoch": 2.279279279279279, "grad_norm": 1.030874898899058, "learning_rate": 1.657891874509882e-06, "loss": 0.3006, "step": 24288 }, { "epoch": 2.279373123123123, "grad_norm": 1.118095080210892, "learning_rate": 1.6574858150668683e-06, "loss": 0.3479, "step": 24289 }, { "epoch": 2.279466966966967, "grad_norm": 1.0947147656702125, "learning_rate": 1.6570797954770456e-06, "loss": 0.3422, "step": 24290 }, { "epoch": 2.279560810810811, "grad_norm": 1.0783997481680794, "learning_rate": 1.6566738157452545e-06, "loss": 0.3156, "step": 24291 }, { "epoch": 2.2796546546546548, "grad_norm": 1.2754313869825036, "learning_rate": 1.6562678758763345e-06, "loss": 0.3323, "step": 24292 }, { "epoch": 2.2797484984984986, "grad_norm": 1.1144688825649847, "learning_rate": 1.655861975875126e-06, "loss": 0.301, "step": 24293 }, { "epoch": 2.2798423423423424, "grad_norm": 1.2175770185086094, "learning_rate": 1.6554561157464694e-06, "loss": 0.278, "step": 24294 }, { "epoch": 2.279936186186186, "grad_norm": 1.078712206629471, "learning_rate": 1.6550502954952007e-06, "loss": 0.3379, "step": 24295 }, { "epoch": 2.28003003003003, "grad_norm": 1.245022342844122, "learning_rate": 1.654644515126163e-06, "loss": 0.3034, "step": 24296 }, { "epoch": 2.280123873873874, "grad_norm": 1.2666027185922426, "learning_rate": 1.654238774644193e-06, "loss": 0.2973, "step": 24297 }, { "epoch": 2.2802177177177176, "grad_norm": 1.9849082754932137, "learning_rate": 1.653833074054126e-06, "loss": 0.301, "step": 24298 }, { "epoch": 2.2803115615615615, "grad_norm": 1.1257022365928924, "learning_rate": 1.6534274133608025e-06, "loss": 0.3063, "step": 24299 }, { "epoch": 2.2804054054054053, "grad_norm": 1.8031596157130425, "learning_rate": 1.6530217925690579e-06, "loss": 0.3238, "step": 24300 }, { "epoch": 2.280499249249249, "grad_norm": 1.0882422651884982, "learning_rate": 1.6526162116837264e-06, "loss": 0.2968, "step": 24301 }, { "epoch": 2.280593093093093, "grad_norm": 1.1257131270375162, "learning_rate": 1.6522106707096474e-06, "loss": 0.3008, "step": 24302 }, { "epoch": 2.280686936936937, "grad_norm": 1.7215395428347922, "learning_rate": 1.651805169651655e-06, "loss": 0.3225, "step": 24303 }, { "epoch": 2.280780780780781, "grad_norm": 1.036843894427909, "learning_rate": 1.651399708514584e-06, "loss": 0.3325, "step": 24304 }, { "epoch": 2.2808746246246248, "grad_norm": 1.0989813757400886, "learning_rate": 1.6509942873032675e-06, "loss": 0.3512, "step": 24305 }, { "epoch": 2.2809684684684686, "grad_norm": 1.130333258305405, "learning_rate": 1.6505889060225411e-06, "loss": 0.3139, "step": 24306 }, { "epoch": 2.2810623123123124, "grad_norm": 0.9579492981997056, "learning_rate": 1.6501835646772352e-06, "loss": 0.3042, "step": 24307 }, { "epoch": 2.281156156156156, "grad_norm": 1.1072398931133998, "learning_rate": 1.6497782632721865e-06, "loss": 0.2779, "step": 24308 }, { "epoch": 2.28125, "grad_norm": 1.7563088435696288, "learning_rate": 1.649373001812226e-06, "loss": 0.3257, "step": 24309 }, { "epoch": 2.281343843843844, "grad_norm": 1.3091643789286742, "learning_rate": 1.6489677803021841e-06, "loss": 0.3018, "step": 24310 }, { "epoch": 2.2814376876876876, "grad_norm": 1.0182413491654074, "learning_rate": 1.648562598746895e-06, "loss": 0.3111, "step": 24311 }, { "epoch": 2.2815315315315314, "grad_norm": 1.1545941352995424, "learning_rate": 1.6481574571511887e-06, "loss": 0.3665, "step": 24312 }, { "epoch": 2.2816253753753752, "grad_norm": 1.000456470773249, "learning_rate": 1.6477523555198938e-06, "loss": 0.3034, "step": 24313 }, { "epoch": 2.281719219219219, "grad_norm": 1.0965473780817931, "learning_rate": 1.6473472938578439e-06, "loss": 0.2809, "step": 24314 }, { "epoch": 2.281813063063063, "grad_norm": 1.2694632296538464, "learning_rate": 1.6469422721698668e-06, "loss": 0.2989, "step": 24315 }, { "epoch": 2.281906906906907, "grad_norm": 2.45724133317122, "learning_rate": 1.6465372904607918e-06, "loss": 0.3215, "step": 24316 }, { "epoch": 2.282000750750751, "grad_norm": 1.2538749065530517, "learning_rate": 1.646132348735447e-06, "loss": 0.3265, "step": 24317 }, { "epoch": 2.2820945945945947, "grad_norm": 1.2148431329626599, "learning_rate": 1.6457274469986612e-06, "loss": 0.3494, "step": 24318 }, { "epoch": 2.2821884384384385, "grad_norm": 1.0832513211963717, "learning_rate": 1.6453225852552602e-06, "loss": 0.2968, "step": 24319 }, { "epoch": 2.2822822822822824, "grad_norm": 0.9484689805111091, "learning_rate": 1.6449177635100738e-06, "loss": 0.2874, "step": 24320 }, { "epoch": 2.282376126126126, "grad_norm": 1.1556168295444662, "learning_rate": 1.6445129817679284e-06, "loss": 0.3316, "step": 24321 }, { "epoch": 2.28246996996997, "grad_norm": 1.9280909471244227, "learning_rate": 1.6441082400336478e-06, "loss": 0.3023, "step": 24322 }, { "epoch": 2.282563813813814, "grad_norm": 1.1044616851262945, "learning_rate": 1.6437035383120615e-06, "loss": 0.3428, "step": 24323 }, { "epoch": 2.2826576576576576, "grad_norm": 1.1298556295056663, "learning_rate": 1.6432988766079928e-06, "loss": 0.2799, "step": 24324 }, { "epoch": 2.2827515015015014, "grad_norm": 1.5105132349381711, "learning_rate": 1.642894254926265e-06, "loss": 0.3298, "step": 24325 }, { "epoch": 2.2828453453453452, "grad_norm": 1.0561218378505635, "learning_rate": 1.6424896732717056e-06, "loss": 0.291, "step": 24326 }, { "epoch": 2.282939189189189, "grad_norm": 1.0697569666787405, "learning_rate": 1.6420851316491371e-06, "loss": 0.3259, "step": 24327 }, { "epoch": 2.283033033033033, "grad_norm": 1.119517492195606, "learning_rate": 1.641680630063383e-06, "loss": 0.3206, "step": 24328 }, { "epoch": 2.283126876876877, "grad_norm": 1.0023228158160535, "learning_rate": 1.6412761685192657e-06, "loss": 0.2764, "step": 24329 }, { "epoch": 2.283220720720721, "grad_norm": 1.0978401783394511, "learning_rate": 1.6408717470216084e-06, "loss": 0.3057, "step": 24330 }, { "epoch": 2.2833145645645647, "grad_norm": 1.0482702350282773, "learning_rate": 1.6404673655752308e-06, "loss": 0.3709, "step": 24331 }, { "epoch": 2.2834084084084085, "grad_norm": 0.9998393474224134, "learning_rate": 1.6400630241849574e-06, "loss": 0.3192, "step": 24332 }, { "epoch": 2.2835022522522523, "grad_norm": 1.0559743417343939, "learning_rate": 1.6396587228556082e-06, "loss": 0.3036, "step": 24333 }, { "epoch": 2.283596096096096, "grad_norm": 1.0549130993247313, "learning_rate": 1.639254461592002e-06, "loss": 0.2941, "step": 24334 }, { "epoch": 2.28368993993994, "grad_norm": 1.2176522162049757, "learning_rate": 1.638850240398962e-06, "loss": 0.2757, "step": 24335 }, { "epoch": 2.2837837837837838, "grad_norm": 1.1091065561609952, "learning_rate": 1.6384460592813062e-06, "loss": 0.3411, "step": 24336 }, { "epoch": 2.2838776276276276, "grad_norm": 1.129495915915064, "learning_rate": 1.6380419182438517e-06, "loss": 0.3362, "step": 24337 }, { "epoch": 2.2839714714714714, "grad_norm": 1.078593085160114, "learning_rate": 1.6376378172914215e-06, "loss": 0.3382, "step": 24338 }, { "epoch": 2.284065315315315, "grad_norm": 1.106059745751651, "learning_rate": 1.6372337564288304e-06, "loss": 0.3122, "step": 24339 }, { "epoch": 2.284159159159159, "grad_norm": 1.4720726220011762, "learning_rate": 1.6368297356608976e-06, "loss": 0.3434, "step": 24340 }, { "epoch": 2.284253003003003, "grad_norm": 1.1399292017775504, "learning_rate": 1.6364257549924394e-06, "loss": 0.36, "step": 24341 }, { "epoch": 2.284346846846847, "grad_norm": 1.117008554824599, "learning_rate": 1.636021814428273e-06, "loss": 0.3327, "step": 24342 }, { "epoch": 2.2844406906906904, "grad_norm": 1.1283513539313712, "learning_rate": 1.6356179139732126e-06, "loss": 0.3361, "step": 24343 }, { "epoch": 2.2845345345345347, "grad_norm": 1.1952084730869657, "learning_rate": 1.6352140536320777e-06, "loss": 0.2944, "step": 24344 }, { "epoch": 2.2846283783783785, "grad_norm": 1.0729780688995827, "learning_rate": 1.6348102334096816e-06, "loss": 0.2634, "step": 24345 }, { "epoch": 2.2847222222222223, "grad_norm": 1.1323638487311083, "learning_rate": 1.6344064533108374e-06, "loss": 0.314, "step": 24346 }, { "epoch": 2.284816066066066, "grad_norm": 1.0694802555598681, "learning_rate": 1.634002713340363e-06, "loss": 0.2799, "step": 24347 }, { "epoch": 2.28490990990991, "grad_norm": 1.11251709137038, "learning_rate": 1.6335990135030698e-06, "loss": 0.3485, "step": 24348 }, { "epoch": 2.2850037537537538, "grad_norm": 1.8139983482687738, "learning_rate": 1.633195353803771e-06, "loss": 0.2765, "step": 24349 }, { "epoch": 2.2850975975975976, "grad_norm": 1.1055173320133453, "learning_rate": 1.632791734247281e-06, "loss": 0.327, "step": 24350 }, { "epoch": 2.2851914414414414, "grad_norm": 1.242826996405675, "learning_rate": 1.6323881548384119e-06, "loss": 0.3159, "step": 24351 }, { "epoch": 2.285285285285285, "grad_norm": 1.325299536358376, "learning_rate": 1.6319846155819752e-06, "loss": 0.3443, "step": 24352 }, { "epoch": 2.285379129129129, "grad_norm": 1.1359742336978738, "learning_rate": 1.6315811164827822e-06, "loss": 0.3367, "step": 24353 }, { "epoch": 2.285472972972973, "grad_norm": 1.1064144489551724, "learning_rate": 1.631177657545644e-06, "loss": 0.3423, "step": 24354 }, { "epoch": 2.285566816816817, "grad_norm": 1.1242805108694225, "learning_rate": 1.6307742387753694e-06, "loss": 0.2969, "step": 24355 }, { "epoch": 2.2856606606606604, "grad_norm": 1.2004315090082787, "learning_rate": 1.630370860176772e-06, "loss": 0.3052, "step": 24356 }, { "epoch": 2.2857545045045047, "grad_norm": 0.9518631892797637, "learning_rate": 1.6299675217546596e-06, "loss": 0.293, "step": 24357 }, { "epoch": 2.2858483483483485, "grad_norm": 0.9219946288836471, "learning_rate": 1.6295642235138387e-06, "loss": 0.3113, "step": 24358 }, { "epoch": 2.2859421921921923, "grad_norm": 1.2815975648259257, "learning_rate": 1.6291609654591223e-06, "loss": 0.314, "step": 24359 }, { "epoch": 2.286036036036036, "grad_norm": 1.1957793693909347, "learning_rate": 1.6287577475953165e-06, "loss": 0.2909, "step": 24360 }, { "epoch": 2.28612987987988, "grad_norm": 1.1584117388896475, "learning_rate": 1.6283545699272268e-06, "loss": 0.3127, "step": 24361 }, { "epoch": 2.2862237237237237, "grad_norm": 1.219856936371292, "learning_rate": 1.6279514324596646e-06, "loss": 0.3107, "step": 24362 }, { "epoch": 2.2863175675675675, "grad_norm": 0.9451287246541255, "learning_rate": 1.6275483351974343e-06, "loss": 0.276, "step": 24363 }, { "epoch": 2.2864114114114114, "grad_norm": 1.1400170472589983, "learning_rate": 1.6271452781453417e-06, "loss": 0.3054, "step": 24364 }, { "epoch": 2.286505255255255, "grad_norm": 1.1969616142392314, "learning_rate": 1.6267422613081928e-06, "loss": 0.3239, "step": 24365 }, { "epoch": 2.286599099099099, "grad_norm": 1.1604759167753618, "learning_rate": 1.6263392846907933e-06, "loss": 0.3208, "step": 24366 }, { "epoch": 2.286692942942943, "grad_norm": 1.1817456835579352, "learning_rate": 1.6259363482979463e-06, "loss": 0.3, "step": 24367 }, { "epoch": 2.2867867867867866, "grad_norm": 1.9355524176651928, "learning_rate": 1.625533452134458e-06, "loss": 0.3454, "step": 24368 }, { "epoch": 2.2868806306306304, "grad_norm": 1.1688203315705563, "learning_rate": 1.6251305962051323e-06, "loss": 0.3352, "step": 24369 }, { "epoch": 2.2869744744744747, "grad_norm": 1.0306109804225225, "learning_rate": 1.62472778051477e-06, "loss": 0.2968, "step": 24370 }, { "epoch": 2.2870683183183185, "grad_norm": 1.1096669434134727, "learning_rate": 1.6243250050681769e-06, "loss": 0.2892, "step": 24371 }, { "epoch": 2.2871621621621623, "grad_norm": 1.3017889163166565, "learning_rate": 1.623922269870154e-06, "loss": 0.3354, "step": 24372 }, { "epoch": 2.287256006006006, "grad_norm": 1.274275029885959, "learning_rate": 1.6235195749255017e-06, "loss": 0.3182, "step": 24373 }, { "epoch": 2.28734984984985, "grad_norm": 1.0641308630598365, "learning_rate": 1.6231169202390246e-06, "loss": 0.3812, "step": 24374 }, { "epoch": 2.2874436936936937, "grad_norm": 1.4313333334026221, "learning_rate": 1.6227143058155221e-06, "loss": 0.2886, "step": 24375 }, { "epoch": 2.2875375375375375, "grad_norm": 1.1624559467288544, "learning_rate": 1.6223117316597942e-06, "loss": 0.3329, "step": 24376 }, { "epoch": 2.2876313813813813, "grad_norm": 1.4314743319589873, "learning_rate": 1.621909197776641e-06, "loss": 0.2977, "step": 24377 }, { "epoch": 2.287725225225225, "grad_norm": 1.0907469594980848, "learning_rate": 1.621506704170862e-06, "loss": 0.3125, "step": 24378 }, { "epoch": 2.287819069069069, "grad_norm": 1.2762971556008564, "learning_rate": 1.6211042508472546e-06, "loss": 0.3557, "step": 24379 }, { "epoch": 2.2879129129129128, "grad_norm": 1.031939479862047, "learning_rate": 1.620701837810621e-06, "loss": 0.3089, "step": 24380 }, { "epoch": 2.2880067567567566, "grad_norm": 1.0943905429697354, "learning_rate": 1.6202994650657567e-06, "loss": 0.3117, "step": 24381 }, { "epoch": 2.2881006006006004, "grad_norm": 1.1957439340889526, "learning_rate": 1.619897132617458e-06, "loss": 0.3166, "step": 24382 }, { "epoch": 2.2881944444444446, "grad_norm": 1.1848419438719102, "learning_rate": 1.6194948404705251e-06, "loss": 0.3546, "step": 24383 }, { "epoch": 2.2882882882882885, "grad_norm": 1.7204974127543098, "learning_rate": 1.6190925886297532e-06, "loss": 0.2795, "step": 24384 }, { "epoch": 2.2883821321321323, "grad_norm": 1.653979141728353, "learning_rate": 1.6186903770999363e-06, "loss": 0.3001, "step": 24385 }, { "epoch": 2.288475975975976, "grad_norm": 1.5594307624328492, "learning_rate": 1.618288205885874e-06, "loss": 0.3168, "step": 24386 }, { "epoch": 2.28856981981982, "grad_norm": 1.5639526802360393, "learning_rate": 1.6178860749923598e-06, "loss": 0.3055, "step": 24387 }, { "epoch": 2.2886636636636637, "grad_norm": 1.1095815100875062, "learning_rate": 1.6174839844241874e-06, "loss": 0.267, "step": 24388 }, { "epoch": 2.2887575075075075, "grad_norm": 1.0471661814105888, "learning_rate": 1.6170819341861516e-06, "loss": 0.3324, "step": 24389 }, { "epoch": 2.2888513513513513, "grad_norm": 1.3042641229700733, "learning_rate": 1.6166799242830462e-06, "loss": 0.3495, "step": 24390 }, { "epoch": 2.288945195195195, "grad_norm": 1.0555050986236507, "learning_rate": 1.6162779547196627e-06, "loss": 0.3186, "step": 24391 }, { "epoch": 2.289039039039039, "grad_norm": 0.9962967973011582, "learning_rate": 1.6158760255007972e-06, "loss": 0.3332, "step": 24392 }, { "epoch": 2.2891328828828827, "grad_norm": 0.9884097703723397, "learning_rate": 1.6154741366312399e-06, "loss": 0.334, "step": 24393 }, { "epoch": 2.2892267267267266, "grad_norm": 1.2309710480389227, "learning_rate": 1.615072288115781e-06, "loss": 0.3375, "step": 24394 }, { "epoch": 2.2893205705705704, "grad_norm": 1.042791945952614, "learning_rate": 1.6146704799592156e-06, "loss": 0.3091, "step": 24395 }, { "epoch": 2.2894144144144146, "grad_norm": 1.0575939170062192, "learning_rate": 1.614268712166332e-06, "loss": 0.2757, "step": 24396 }, { "epoch": 2.2895082582582584, "grad_norm": 1.0785849243920733, "learning_rate": 1.6138669847419198e-06, "loss": 0.3273, "step": 24397 }, { "epoch": 2.2896021021021022, "grad_norm": 1.0202714731760987, "learning_rate": 1.6134652976907716e-06, "loss": 0.2644, "step": 24398 }, { "epoch": 2.289695945945946, "grad_norm": 1.2822206393369868, "learning_rate": 1.613063651017675e-06, "loss": 0.3359, "step": 24399 }, { "epoch": 2.28978978978979, "grad_norm": 1.1397251936385353, "learning_rate": 1.6126620447274177e-06, "loss": 0.311, "step": 24400 }, { "epoch": 2.2898836336336337, "grad_norm": 1.2408399548696294, "learning_rate": 1.6122604788247926e-06, "loss": 0.2921, "step": 24401 }, { "epoch": 2.2899774774774775, "grad_norm": 1.0462614381080453, "learning_rate": 1.6118589533145834e-06, "loss": 0.3033, "step": 24402 }, { "epoch": 2.2900713213213213, "grad_norm": 1.0945331517565826, "learning_rate": 1.6114574682015765e-06, "loss": 0.308, "step": 24403 }, { "epoch": 2.290165165165165, "grad_norm": 1.3619877631341086, "learning_rate": 1.6110560234905625e-06, "loss": 0.2972, "step": 24404 }, { "epoch": 2.290259009009009, "grad_norm": 1.0718468787900586, "learning_rate": 1.610654619186327e-06, "loss": 0.3166, "step": 24405 }, { "epoch": 2.2903528528528527, "grad_norm": 1.087558522484418, "learning_rate": 1.6102532552936534e-06, "loss": 0.2949, "step": 24406 }, { "epoch": 2.2904466966966965, "grad_norm": 1.0698061956865168, "learning_rate": 1.6098519318173312e-06, "loss": 0.3241, "step": 24407 }, { "epoch": 2.2905405405405403, "grad_norm": 1.0342344381286603, "learning_rate": 1.6094506487621426e-06, "loss": 0.2971, "step": 24408 }, { "epoch": 2.2906343843843846, "grad_norm": 1.0455534019082706, "learning_rate": 1.609049406132872e-06, "loss": 0.2861, "step": 24409 }, { "epoch": 2.2907282282282284, "grad_norm": 1.2739913371011997, "learning_rate": 1.608648203934306e-06, "loss": 0.31, "step": 24410 }, { "epoch": 2.2908220720720722, "grad_norm": 0.9181567384956977, "learning_rate": 1.6082470421712265e-06, "loss": 0.3171, "step": 24411 }, { "epoch": 2.290915915915916, "grad_norm": 1.1198620634482357, "learning_rate": 1.6078459208484149e-06, "loss": 0.3101, "step": 24412 }, { "epoch": 2.29100975975976, "grad_norm": 1.0603747451560621, "learning_rate": 1.6074448399706572e-06, "loss": 0.3386, "step": 24413 }, { "epoch": 2.2911036036036037, "grad_norm": 1.099326414115563, "learning_rate": 1.6070437995427347e-06, "loss": 0.2942, "step": 24414 }, { "epoch": 2.2911974474474475, "grad_norm": 1.4953231031105152, "learning_rate": 1.6066427995694274e-06, "loss": 0.3227, "step": 24415 }, { "epoch": 2.2912912912912913, "grad_norm": 1.050541961127246, "learning_rate": 1.606241840055518e-06, "loss": 0.338, "step": 24416 }, { "epoch": 2.291385135135135, "grad_norm": 1.0913991054348602, "learning_rate": 1.6058409210057863e-06, "loss": 0.3259, "step": 24417 }, { "epoch": 2.291478978978979, "grad_norm": 1.1246476588606096, "learning_rate": 1.605440042425011e-06, "loss": 0.299, "step": 24418 }, { "epoch": 2.2915728228228227, "grad_norm": 1.0127683926859736, "learning_rate": 1.6050392043179753e-06, "loss": 0.3236, "step": 24419 }, { "epoch": 2.2916666666666665, "grad_norm": 1.2598690095489433, "learning_rate": 1.6046384066894565e-06, "loss": 0.3117, "step": 24420 }, { "epoch": 2.2917605105105103, "grad_norm": 1.129194257889687, "learning_rate": 1.6042376495442323e-06, "loss": 0.3143, "step": 24421 }, { "epoch": 2.2918543543543546, "grad_norm": 1.3113495503826083, "learning_rate": 1.603836932887084e-06, "loss": 0.3191, "step": 24422 }, { "epoch": 2.291948198198198, "grad_norm": 1.1896674614896274, "learning_rate": 1.6034362567227874e-06, "loss": 0.2839, "step": 24423 }, { "epoch": 2.292042042042042, "grad_norm": 0.9889714662485434, "learning_rate": 1.6030356210561188e-06, "loss": 0.3408, "step": 24424 }, { "epoch": 2.292135885885886, "grad_norm": 1.722028232523489, "learning_rate": 1.6026350258918577e-06, "loss": 0.2856, "step": 24425 }, { "epoch": 2.29222972972973, "grad_norm": 1.2041443100114408, "learning_rate": 1.6022344712347788e-06, "loss": 0.3478, "step": 24426 }, { "epoch": 2.2923235735735736, "grad_norm": 1.23681789122887, "learning_rate": 1.6018339570896585e-06, "loss": 0.3286, "step": 24427 }, { "epoch": 2.2924174174174174, "grad_norm": 0.9762825724431778, "learning_rate": 1.6014334834612715e-06, "loss": 0.3002, "step": 24428 }, { "epoch": 2.2925112612612613, "grad_norm": 0.9650093263256231, "learning_rate": 1.6010330503543935e-06, "loss": 0.3163, "step": 24429 }, { "epoch": 2.292605105105105, "grad_norm": 1.1136298639052957, "learning_rate": 1.6006326577737967e-06, "loss": 0.3227, "step": 24430 }, { "epoch": 2.292698948948949, "grad_norm": 1.1906083414288884, "learning_rate": 1.6002323057242586e-06, "loss": 0.2822, "step": 24431 }, { "epoch": 2.2927927927927927, "grad_norm": 1.1821012622351805, "learning_rate": 1.5998319942105505e-06, "loss": 0.3032, "step": 24432 }, { "epoch": 2.2928866366366365, "grad_norm": 1.20131327339919, "learning_rate": 1.5994317232374446e-06, "loss": 0.3283, "step": 24433 }, { "epoch": 2.2929804804804803, "grad_norm": 1.1079622883238465, "learning_rate": 1.5990314928097156e-06, "loss": 0.3133, "step": 24434 }, { "epoch": 2.2930743243243246, "grad_norm": 1.2922224985793942, "learning_rate": 1.5986313029321342e-06, "loss": 0.3122, "step": 24435 }, { "epoch": 2.293168168168168, "grad_norm": 1.249877680313643, "learning_rate": 1.5982311536094707e-06, "loss": 0.3564, "step": 24436 }, { "epoch": 2.293262012012012, "grad_norm": 1.1069492007096895, "learning_rate": 1.5978310448464995e-06, "loss": 0.3197, "step": 24437 }, { "epoch": 2.293355855855856, "grad_norm": 1.1325201706738794, "learning_rate": 1.597430976647989e-06, "loss": 0.3211, "step": 24438 }, { "epoch": 2.2934496996997, "grad_norm": 1.176490540829047, "learning_rate": 1.5970309490187091e-06, "loss": 0.2902, "step": 24439 }, { "epoch": 2.2935435435435436, "grad_norm": 10.784396879039063, "learning_rate": 1.5966309619634302e-06, "loss": 0.3254, "step": 24440 }, { "epoch": 2.2936373873873874, "grad_norm": 1.1119324334818406, "learning_rate": 1.596231015486921e-06, "loss": 0.2971, "step": 24441 }, { "epoch": 2.2937312312312312, "grad_norm": 1.5153067743260353, "learning_rate": 1.5958311095939477e-06, "loss": 0.3146, "step": 24442 }, { "epoch": 2.293825075075075, "grad_norm": 1.0855738945087776, "learning_rate": 1.5954312442892828e-06, "loss": 0.3472, "step": 24443 }, { "epoch": 2.293918918918919, "grad_norm": 1.013617889317078, "learning_rate": 1.5950314195776916e-06, "loss": 0.3149, "step": 24444 }, { "epoch": 2.2940127627627627, "grad_norm": 1.3747784343379372, "learning_rate": 1.59463163546394e-06, "loss": 0.2903, "step": 24445 }, { "epoch": 2.2941066066066065, "grad_norm": 1.3979110186393338, "learning_rate": 1.5942318919527978e-06, "loss": 0.2847, "step": 24446 }, { "epoch": 2.2942004504504503, "grad_norm": 1.2443018680727305, "learning_rate": 1.59383218904903e-06, "loss": 0.3001, "step": 24447 }, { "epoch": 2.294294294294294, "grad_norm": 1.3063435621040194, "learning_rate": 1.5934325267573996e-06, "loss": 0.3173, "step": 24448 }, { "epoch": 2.294388138138138, "grad_norm": 1.1122314801233097, "learning_rate": 1.5930329050826764e-06, "loss": 0.3081, "step": 24449 }, { "epoch": 2.294481981981982, "grad_norm": 1.306019477788375, "learning_rate": 1.5926333240296222e-06, "loss": 0.2865, "step": 24450 }, { "epoch": 2.294575825825826, "grad_norm": 1.1209955415637338, "learning_rate": 1.5922337836030021e-06, "loss": 0.342, "step": 24451 }, { "epoch": 2.29466966966967, "grad_norm": 1.0524078607939684, "learning_rate": 1.5918342838075802e-06, "loss": 0.2947, "step": 24452 }, { "epoch": 2.2947635135135136, "grad_norm": 1.2217240495620123, "learning_rate": 1.5914348246481187e-06, "loss": 0.2932, "step": 24453 }, { "epoch": 2.2948573573573574, "grad_norm": 1.1683188914284794, "learning_rate": 1.5910354061293798e-06, "loss": 0.3169, "step": 24454 }, { "epoch": 2.294951201201201, "grad_norm": 1.1951900860127767, "learning_rate": 1.5906360282561278e-06, "loss": 0.3251, "step": 24455 }, { "epoch": 2.295045045045045, "grad_norm": 1.268998424175038, "learning_rate": 1.5902366910331245e-06, "loss": 0.3345, "step": 24456 }, { "epoch": 2.295138888888889, "grad_norm": 1.1154381003572662, "learning_rate": 1.5898373944651286e-06, "loss": 0.255, "step": 24457 }, { "epoch": 2.2952327327327327, "grad_norm": 1.1137127258341744, "learning_rate": 1.589438138556904e-06, "loss": 0.2704, "step": 24458 }, { "epoch": 2.2953265765765765, "grad_norm": 1.1578232867469018, "learning_rate": 1.5890389233132103e-06, "loss": 0.3019, "step": 24459 }, { "epoch": 2.2954204204204203, "grad_norm": 0.9998107885621025, "learning_rate": 1.588639748738805e-06, "loss": 0.2745, "step": 24460 }, { "epoch": 2.295514264264264, "grad_norm": 1.1111647093596173, "learning_rate": 1.5882406148384517e-06, "loss": 0.3419, "step": 24461 }, { "epoch": 2.295608108108108, "grad_norm": 1.194490280530086, "learning_rate": 1.5878415216169064e-06, "loss": 0.339, "step": 24462 }, { "epoch": 2.295701951951952, "grad_norm": 0.9869244636661878, "learning_rate": 1.5874424690789287e-06, "loss": 0.2833, "step": 24463 }, { "epoch": 2.295795795795796, "grad_norm": 1.0635116049763884, "learning_rate": 1.587043457229276e-06, "loss": 0.3014, "step": 24464 }, { "epoch": 2.2958896396396398, "grad_norm": 1.0250271243739983, "learning_rate": 1.5866444860727053e-06, "loss": 0.3225, "step": 24465 }, { "epoch": 2.2959834834834836, "grad_norm": 1.0851117937800787, "learning_rate": 1.5862455556139732e-06, "loss": 0.3045, "step": 24466 }, { "epoch": 2.2960773273273274, "grad_norm": 0.9949590385037166, "learning_rate": 1.5858466658578387e-06, "loss": 0.3003, "step": 24467 }, { "epoch": 2.296171171171171, "grad_norm": 0.9698205214233707, "learning_rate": 1.585447816809056e-06, "loss": 0.3084, "step": 24468 }, { "epoch": 2.296265015015015, "grad_norm": 1.371326672749599, "learning_rate": 1.585049008472379e-06, "loss": 0.2845, "step": 24469 }, { "epoch": 2.296358858858859, "grad_norm": 1.111610632936318, "learning_rate": 1.5846502408525666e-06, "loss": 0.3442, "step": 24470 }, { "epoch": 2.2964527027027026, "grad_norm": 1.189456192769778, "learning_rate": 1.5842515139543713e-06, "loss": 0.2741, "step": 24471 }, { "epoch": 2.2965465465465464, "grad_norm": 1.083113295826151, "learning_rate": 1.5838528277825454e-06, "loss": 0.2971, "step": 24472 }, { "epoch": 2.2966403903903903, "grad_norm": 1.1664221516664557, "learning_rate": 1.583454182341846e-06, "loss": 0.3173, "step": 24473 }, { "epoch": 2.296734234234234, "grad_norm": 1.8404603019888701, "learning_rate": 1.5830555776370243e-06, "loss": 0.3112, "step": 24474 }, { "epoch": 2.296828078078078, "grad_norm": 1.261418574128449, "learning_rate": 1.5826570136728336e-06, "loss": 0.3065, "step": 24475 }, { "epoch": 2.296921921921922, "grad_norm": 1.2217069492029513, "learning_rate": 1.582258490454025e-06, "loss": 0.2981, "step": 24476 }, { "epoch": 2.297015765765766, "grad_norm": 1.1450346148426596, "learning_rate": 1.5818600079853508e-06, "loss": 0.3001, "step": 24477 }, { "epoch": 2.2971096096096097, "grad_norm": 1.0611833623871827, "learning_rate": 1.5814615662715604e-06, "loss": 0.3, "step": 24478 }, { "epoch": 2.2972034534534536, "grad_norm": 1.4195675605463782, "learning_rate": 1.581063165317408e-06, "loss": 0.3214, "step": 24479 }, { "epoch": 2.2972972972972974, "grad_norm": 1.1087340486238273, "learning_rate": 1.5806648051276418e-06, "loss": 0.3078, "step": 24480 }, { "epoch": 2.297391141141141, "grad_norm": 1.1749189158323, "learning_rate": 1.58026648570701e-06, "loss": 0.3178, "step": 24481 }, { "epoch": 2.297484984984985, "grad_norm": 1.0872526134859202, "learning_rate": 1.5798682070602644e-06, "loss": 0.304, "step": 24482 }, { "epoch": 2.297578828828829, "grad_norm": 1.2440425181619859, "learning_rate": 1.5794699691921533e-06, "loss": 0.2738, "step": 24483 }, { "epoch": 2.2976726726726726, "grad_norm": 0.978755355049057, "learning_rate": 1.5790717721074223e-06, "loss": 0.2785, "step": 24484 }, { "epoch": 2.2977665165165164, "grad_norm": 1.0666833210928734, "learning_rate": 1.5786736158108228e-06, "loss": 0.2945, "step": 24485 }, { "epoch": 2.2978603603603602, "grad_norm": 1.185399836649684, "learning_rate": 1.578275500307101e-06, "loss": 0.2888, "step": 24486 }, { "epoch": 2.297954204204204, "grad_norm": 1.471319781384057, "learning_rate": 1.5778774256010026e-06, "loss": 0.3185, "step": 24487 }, { "epoch": 2.298048048048048, "grad_norm": 1.1754708773389573, "learning_rate": 1.5774793916972748e-06, "loss": 0.3398, "step": 24488 }, { "epoch": 2.298141891891892, "grad_norm": 1.0239422641863427, "learning_rate": 1.5770813986006628e-06, "loss": 0.3012, "step": 24489 }, { "epoch": 2.298235735735736, "grad_norm": 1.0004792068785162, "learning_rate": 1.5766834463159102e-06, "loss": 0.3187, "step": 24490 }, { "epoch": 2.2983295795795797, "grad_norm": 1.0961139438795444, "learning_rate": 1.5762855348477657e-06, "loss": 0.3245, "step": 24491 }, { "epoch": 2.2984234234234235, "grad_norm": 1.2226480747628568, "learning_rate": 1.5758876642009719e-06, "loss": 0.2763, "step": 24492 }, { "epoch": 2.2985172672672673, "grad_norm": 1.0925647283516833, "learning_rate": 1.575489834380271e-06, "loss": 0.2765, "step": 24493 }, { "epoch": 2.298611111111111, "grad_norm": 1.164656030159957, "learning_rate": 1.575092045390409e-06, "loss": 0.2865, "step": 24494 }, { "epoch": 2.298704954954955, "grad_norm": 1.1984667559528819, "learning_rate": 1.574694297236128e-06, "loss": 0.3286, "step": 24495 }, { "epoch": 2.298798798798799, "grad_norm": 1.03999815813275, "learning_rate": 1.5742965899221679e-06, "loss": 0.2709, "step": 24496 }, { "epoch": 2.2988926426426426, "grad_norm": 1.1845044996925316, "learning_rate": 1.5738989234532747e-06, "loss": 0.2919, "step": 24497 }, { "epoch": 2.2989864864864864, "grad_norm": 1.103384414762348, "learning_rate": 1.573501297834188e-06, "loss": 0.3221, "step": 24498 }, { "epoch": 2.29908033033033, "grad_norm": 1.077644147170173, "learning_rate": 1.573103713069648e-06, "loss": 0.3, "step": 24499 }, { "epoch": 2.299174174174174, "grad_norm": 1.1065453444257434, "learning_rate": 1.5727061691643964e-06, "loss": 0.3046, "step": 24500 }, { "epoch": 2.299268018018018, "grad_norm": 1.0904889087292122, "learning_rate": 1.5723086661231724e-06, "loss": 0.3113, "step": 24501 }, { "epoch": 2.299361861861862, "grad_norm": 1.2733124824826552, "learning_rate": 1.571911203950714e-06, "loss": 0.3605, "step": 24502 }, { "epoch": 2.299455705705706, "grad_norm": 1.2385905955320158, "learning_rate": 1.571513782651763e-06, "loss": 0.3088, "step": 24503 }, { "epoch": 2.2995495495495497, "grad_norm": 1.1672897583593322, "learning_rate": 1.5711164022310571e-06, "loss": 0.3075, "step": 24504 }, { "epoch": 2.2996433933933935, "grad_norm": 1.4591283862690834, "learning_rate": 1.5707190626933322e-06, "loss": 0.3141, "step": 24505 }, { "epoch": 2.2997372372372373, "grad_norm": 1.3745576820966614, "learning_rate": 1.570321764043329e-06, "loss": 0.2808, "step": 24506 }, { "epoch": 2.299831081081081, "grad_norm": 1.5781729297043157, "learning_rate": 1.5699245062857832e-06, "loss": 0.3354, "step": 24507 }, { "epoch": 2.299924924924925, "grad_norm": 1.2088851241018548, "learning_rate": 1.5695272894254292e-06, "loss": 0.2946, "step": 24508 }, { "epoch": 2.3000187687687688, "grad_norm": 1.2341705255391688, "learning_rate": 1.5691301134670072e-06, "loss": 0.2885, "step": 24509 }, { "epoch": 2.3001126126126126, "grad_norm": 1.03968118721338, "learning_rate": 1.5687329784152506e-06, "loss": 0.307, "step": 24510 }, { "epoch": 2.3002064564564564, "grad_norm": 1.1170919435033713, "learning_rate": 1.5683358842748942e-06, "loss": 0.2866, "step": 24511 }, { "epoch": 2.3003003003003, "grad_norm": 1.0521307336967864, "learning_rate": 1.5679388310506733e-06, "loss": 0.3459, "step": 24512 }, { "epoch": 2.300394144144144, "grad_norm": 0.9592007093804781, "learning_rate": 1.5675418187473212e-06, "loss": 0.3202, "step": 24513 }, { "epoch": 2.300487987987988, "grad_norm": 1.1494918654824757, "learning_rate": 1.5671448473695706e-06, "loss": 0.2923, "step": 24514 }, { "epoch": 2.300581831831832, "grad_norm": 1.212178776859726, "learning_rate": 1.5667479169221577e-06, "loss": 0.3563, "step": 24515 }, { "epoch": 2.3006756756756754, "grad_norm": 1.1543571282220548, "learning_rate": 1.5663510274098137e-06, "loss": 0.3169, "step": 24516 }, { "epoch": 2.3007695195195197, "grad_norm": 3.5960885268286193, "learning_rate": 1.5659541788372685e-06, "loss": 0.3036, "step": 24517 }, { "epoch": 2.3008633633633635, "grad_norm": 1.205602465586082, "learning_rate": 1.5655573712092575e-06, "loss": 0.2999, "step": 24518 }, { "epoch": 2.3009572072072073, "grad_norm": 1.5980422190704653, "learning_rate": 1.5651606045305102e-06, "loss": 0.3665, "step": 24519 }, { "epoch": 2.301051051051051, "grad_norm": 1.089903642603186, "learning_rate": 1.5647638788057556e-06, "loss": 0.3342, "step": 24520 }, { "epoch": 2.301144894894895, "grad_norm": 1.1671755788280562, "learning_rate": 1.5643671940397271e-06, "loss": 0.3384, "step": 24521 }, { "epoch": 2.3012387387387387, "grad_norm": 1.156696100073983, "learning_rate": 1.5639705502371532e-06, "loss": 0.3173, "step": 24522 }, { "epoch": 2.3013325825825826, "grad_norm": 0.9369730409605787, "learning_rate": 1.5635739474027612e-06, "loss": 0.3413, "step": 24523 }, { "epoch": 2.3014264264264264, "grad_norm": 1.1911285194033912, "learning_rate": 1.5631773855412847e-06, "loss": 0.329, "step": 24524 }, { "epoch": 2.30152027027027, "grad_norm": 1.074264880460371, "learning_rate": 1.5627808646574466e-06, "loss": 0.3352, "step": 24525 }, { "epoch": 2.301614114114114, "grad_norm": 1.0162142633217057, "learning_rate": 1.5623843847559756e-06, "loss": 0.2862, "step": 24526 }, { "epoch": 2.301707957957958, "grad_norm": 1.0932873994269545, "learning_rate": 1.5619879458416016e-06, "loss": 0.2817, "step": 24527 }, { "epoch": 2.301801801801802, "grad_norm": 1.0979126884440829, "learning_rate": 1.5615915479190502e-06, "loss": 0.3277, "step": 24528 }, { "epoch": 2.3018956456456454, "grad_norm": 0.9512098639435, "learning_rate": 1.5611951909930457e-06, "loss": 0.314, "step": 24529 }, { "epoch": 2.3019894894894897, "grad_norm": 1.2609106402793477, "learning_rate": 1.5607988750683172e-06, "loss": 0.3343, "step": 24530 }, { "epoch": 2.3020833333333335, "grad_norm": 1.136716330452634, "learning_rate": 1.5604026001495886e-06, "loss": 0.2732, "step": 24531 }, { "epoch": 2.3021771771771773, "grad_norm": 0.9944286434307102, "learning_rate": 1.5600063662415832e-06, "loss": 0.3348, "step": 24532 }, { "epoch": 2.302271021021021, "grad_norm": 0.9775390648705236, "learning_rate": 1.5596101733490282e-06, "loss": 0.3233, "step": 24533 }, { "epoch": 2.302364864864865, "grad_norm": 1.0458952086781441, "learning_rate": 1.559214021476646e-06, "loss": 0.2522, "step": 24534 }, { "epoch": 2.3024587087087087, "grad_norm": 1.396970951140063, "learning_rate": 1.5588179106291585e-06, "loss": 0.3259, "step": 24535 }, { "epoch": 2.3025525525525525, "grad_norm": 1.2351677546049697, "learning_rate": 1.5584218408112911e-06, "loss": 0.2897, "step": 24536 }, { "epoch": 2.3026463963963963, "grad_norm": 1.1496060506876695, "learning_rate": 1.5580258120277657e-06, "loss": 0.3377, "step": 24537 }, { "epoch": 2.30274024024024, "grad_norm": 1.1630118424262839, "learning_rate": 1.5576298242833032e-06, "loss": 0.3258, "step": 24538 }, { "epoch": 2.302834084084084, "grad_norm": 1.0020340696901642, "learning_rate": 1.5572338775826262e-06, "loss": 0.3306, "step": 24539 }, { "epoch": 2.3029279279279278, "grad_norm": 2.693704159442037, "learning_rate": 1.5568379719304543e-06, "loss": 0.3258, "step": 24540 }, { "epoch": 2.3030217717717716, "grad_norm": 0.9559098572852814, "learning_rate": 1.5564421073315067e-06, "loss": 0.2903, "step": 24541 }, { "epoch": 2.3031156156156154, "grad_norm": 1.0403985125361466, "learning_rate": 1.5560462837905071e-06, "loss": 0.3039, "step": 24542 }, { "epoch": 2.3032094594594597, "grad_norm": 1.2405247268980946, "learning_rate": 1.555650501312173e-06, "loss": 0.2898, "step": 24543 }, { "epoch": 2.3033033033033035, "grad_norm": 1.1057563623735358, "learning_rate": 1.555254759901222e-06, "loss": 0.3082, "step": 24544 }, { "epoch": 2.3033971471471473, "grad_norm": 1.1883508837991832, "learning_rate": 1.5548590595623748e-06, "loss": 0.3098, "step": 24545 }, { "epoch": 2.303490990990991, "grad_norm": 0.9582554502896129, "learning_rate": 1.5544634003003489e-06, "loss": 0.312, "step": 24546 }, { "epoch": 2.303584834834835, "grad_norm": 1.1650303676408045, "learning_rate": 1.5540677821198601e-06, "loss": 0.286, "step": 24547 }, { "epoch": 2.3036786786786787, "grad_norm": 1.2237750888271248, "learning_rate": 1.5536722050256282e-06, "loss": 0.3102, "step": 24548 }, { "epoch": 2.3037725225225225, "grad_norm": 1.0681596429429698, "learning_rate": 1.5532766690223678e-06, "loss": 0.3194, "step": 24549 }, { "epoch": 2.3038663663663663, "grad_norm": 1.171101461712523, "learning_rate": 1.5528811741147953e-06, "loss": 0.2995, "step": 24550 }, { "epoch": 2.30396021021021, "grad_norm": 1.3139199061828781, "learning_rate": 1.5524857203076265e-06, "loss": 0.3011, "step": 24551 }, { "epoch": 2.304054054054054, "grad_norm": 1.0300136785774712, "learning_rate": 1.5520903076055765e-06, "loss": 0.3464, "step": 24552 }, { "epoch": 2.3041478978978978, "grad_norm": 1.124138169680231, "learning_rate": 1.5516949360133577e-06, "loss": 0.271, "step": 24553 }, { "epoch": 2.3042417417417416, "grad_norm": 1.4045493099866964, "learning_rate": 1.5512996055356872e-06, "loss": 0.264, "step": 24554 }, { "epoch": 2.3043355855855854, "grad_norm": 1.2107119187613913, "learning_rate": 1.5509043161772779e-06, "loss": 0.2956, "step": 24555 }, { "epoch": 2.3044294294294296, "grad_norm": 0.9086076317900814, "learning_rate": 1.5505090679428408e-06, "loss": 0.2997, "step": 24556 }, { "epoch": 2.3045232732732734, "grad_norm": 1.0631098399821417, "learning_rate": 1.5501138608370914e-06, "loss": 0.2838, "step": 24557 }, { "epoch": 2.3046171171171173, "grad_norm": 1.0595026736788935, "learning_rate": 1.5497186948647408e-06, "loss": 0.3105, "step": 24558 }, { "epoch": 2.304710960960961, "grad_norm": 1.2416099782477685, "learning_rate": 1.5493235700304981e-06, "loss": 0.3022, "step": 24559 }, { "epoch": 2.304804804804805, "grad_norm": 1.218678493912059, "learning_rate": 1.5489284863390786e-06, "loss": 0.3187, "step": 24560 }, { "epoch": 2.3048986486486487, "grad_norm": 0.9986349047643378, "learning_rate": 1.548533443795191e-06, "loss": 0.2994, "step": 24561 }, { "epoch": 2.3049924924924925, "grad_norm": 1.1320141364068785, "learning_rate": 1.5481384424035455e-06, "loss": 0.3373, "step": 24562 }, { "epoch": 2.3050863363363363, "grad_norm": 1.2629379531670757, "learning_rate": 1.5477434821688514e-06, "loss": 0.2844, "step": 24563 }, { "epoch": 2.30518018018018, "grad_norm": 1.156490648986087, "learning_rate": 1.547348563095818e-06, "loss": 0.3051, "step": 24564 }, { "epoch": 2.305274024024024, "grad_norm": 1.1537292690727465, "learning_rate": 1.5469536851891526e-06, "loss": 0.2991, "step": 24565 }, { "epoch": 2.3053678678678677, "grad_norm": 1.1433124560152956, "learning_rate": 1.546558848453566e-06, "loss": 0.3252, "step": 24566 }, { "epoch": 2.3054617117117115, "grad_norm": 0.9880520479430724, "learning_rate": 1.546164052893765e-06, "loss": 0.3272, "step": 24567 }, { "epoch": 2.3055555555555554, "grad_norm": 1.048777865511589, "learning_rate": 1.545769298514455e-06, "loss": 0.2798, "step": 24568 }, { "epoch": 2.3056493993993996, "grad_norm": 1.257605484896329, "learning_rate": 1.5453745853203456e-06, "loss": 0.3081, "step": 24569 }, { "epoch": 2.3057432432432434, "grad_norm": 0.9934582483517853, "learning_rate": 1.544979913316142e-06, "loss": 0.2849, "step": 24570 }, { "epoch": 2.3058370870870872, "grad_norm": 1.1419914998843474, "learning_rate": 1.544585282506547e-06, "loss": 0.3251, "step": 24571 }, { "epoch": 2.305930930930931, "grad_norm": 1.0712066537139668, "learning_rate": 1.5441906928962708e-06, "loss": 0.3121, "step": 24572 }, { "epoch": 2.306024774774775, "grad_norm": 1.1154579426959623, "learning_rate": 1.5437961444900157e-06, "loss": 0.3352, "step": 24573 }, { "epoch": 2.3061186186186187, "grad_norm": 1.067320989495518, "learning_rate": 1.5434016372924854e-06, "loss": 0.2885, "step": 24574 }, { "epoch": 2.3062124624624625, "grad_norm": 1.1325621547164293, "learning_rate": 1.5430071713083843e-06, "loss": 0.3235, "step": 24575 }, { "epoch": 2.3063063063063063, "grad_norm": 0.9847229479502716, "learning_rate": 1.5426127465424163e-06, "loss": 0.2907, "step": 24576 }, { "epoch": 2.30640015015015, "grad_norm": 1.1946646544937018, "learning_rate": 1.542218362999281e-06, "loss": 0.3644, "step": 24577 }, { "epoch": 2.306493993993994, "grad_norm": 1.2315129803691702, "learning_rate": 1.5418240206836849e-06, "loss": 0.3352, "step": 24578 }, { "epoch": 2.3065878378378377, "grad_norm": 1.3706596134458051, "learning_rate": 1.5414297196003286e-06, "loss": 0.3119, "step": 24579 }, { "epoch": 2.3066816816816815, "grad_norm": 1.1881852728170599, "learning_rate": 1.5410354597539107e-06, "loss": 0.3081, "step": 24580 }, { "epoch": 2.3067755255255253, "grad_norm": 1.0116760421352688, "learning_rate": 1.5406412411491355e-06, "loss": 0.2891, "step": 24581 }, { "epoch": 2.3068693693693696, "grad_norm": 1.100931807430284, "learning_rate": 1.5402470637907025e-06, "loss": 0.3372, "step": 24582 }, { "epoch": 2.3069632132132134, "grad_norm": 1.0174870190652632, "learning_rate": 1.5398529276833086e-06, "loss": 0.2996, "step": 24583 }, { "epoch": 2.307057057057057, "grad_norm": 1.1856619683563858, "learning_rate": 1.5394588328316578e-06, "loss": 0.3099, "step": 24584 }, { "epoch": 2.307150900900901, "grad_norm": 1.0777036962334365, "learning_rate": 1.5390647792404462e-06, "loss": 0.3297, "step": 24585 }, { "epoch": 2.307244744744745, "grad_norm": 1.1100050294261916, "learning_rate": 1.5386707669143725e-06, "loss": 0.3468, "step": 24586 }, { "epoch": 2.3073385885885886, "grad_norm": 1.0441511684674982, "learning_rate": 1.5382767958581352e-06, "loss": 0.3201, "step": 24587 }, { "epoch": 2.3074324324324325, "grad_norm": 1.0330219934547222, "learning_rate": 1.5378828660764306e-06, "loss": 0.2884, "step": 24588 }, { "epoch": 2.3075262762762763, "grad_norm": 1.2096261209754149, "learning_rate": 1.5374889775739542e-06, "loss": 0.3607, "step": 24589 }, { "epoch": 2.30762012012012, "grad_norm": 1.2782914228803455, "learning_rate": 1.5370951303554061e-06, "loss": 0.3371, "step": 24590 }, { "epoch": 2.307713963963964, "grad_norm": 1.0815927661221043, "learning_rate": 1.5367013244254807e-06, "loss": 0.3048, "step": 24591 }, { "epoch": 2.3078078078078077, "grad_norm": 1.0269735032436897, "learning_rate": 1.536307559788871e-06, "loss": 0.2927, "step": 24592 }, { "epoch": 2.3079016516516515, "grad_norm": 1.0620600020682316, "learning_rate": 1.5359138364502757e-06, "loss": 0.3431, "step": 24593 }, { "epoch": 2.3079954954954953, "grad_norm": 1.252237679691782, "learning_rate": 1.5355201544143872e-06, "loss": 0.3443, "step": 24594 }, { "epoch": 2.3080893393393396, "grad_norm": 1.4491181786815557, "learning_rate": 1.5351265136858983e-06, "loss": 0.3268, "step": 24595 }, { "epoch": 2.308183183183183, "grad_norm": 1.4953409958242003, "learning_rate": 1.5347329142695056e-06, "loss": 0.3071, "step": 24596 }, { "epoch": 2.308277027027027, "grad_norm": 1.0622014718230561, "learning_rate": 1.5343393561698995e-06, "loss": 0.3148, "step": 24597 }, { "epoch": 2.308370870870871, "grad_norm": 1.195134773109618, "learning_rate": 1.533945839391774e-06, "loss": 0.2796, "step": 24598 }, { "epoch": 2.308464714714715, "grad_norm": 1.1198426683222003, "learning_rate": 1.5335523639398192e-06, "loss": 0.3292, "step": 24599 }, { "epoch": 2.3085585585585586, "grad_norm": 1.1036669915605861, "learning_rate": 1.5331589298187283e-06, "loss": 0.28, "step": 24600 }, { "epoch": 2.3086524024024024, "grad_norm": 1.1077538142626842, "learning_rate": 1.5327655370331895e-06, "loss": 0.3196, "step": 24601 }, { "epoch": 2.3087462462462462, "grad_norm": 1.2127970246972415, "learning_rate": 1.532372185587897e-06, "loss": 0.3451, "step": 24602 }, { "epoch": 2.30884009009009, "grad_norm": 1.0284217833222724, "learning_rate": 1.5319788754875393e-06, "loss": 0.2839, "step": 24603 }, { "epoch": 2.308933933933934, "grad_norm": 0.9858730866558183, "learning_rate": 1.531585606736804e-06, "loss": 0.3032, "step": 24604 }, { "epoch": 2.3090277777777777, "grad_norm": 1.0210747963290039, "learning_rate": 1.5311923793403828e-06, "loss": 0.3072, "step": 24605 }, { "epoch": 2.3091216216216215, "grad_norm": 1.1351022288332893, "learning_rate": 1.5307991933029637e-06, "loss": 0.3507, "step": 24606 }, { "epoch": 2.3092154654654653, "grad_norm": 0.9897303107976605, "learning_rate": 1.530406048629232e-06, "loss": 0.3219, "step": 24607 }, { "epoch": 2.3093093093093096, "grad_norm": 1.1110659295291323, "learning_rate": 1.5300129453238794e-06, "loss": 0.3179, "step": 24608 }, { "epoch": 2.309403153153153, "grad_norm": 1.7589142716015544, "learning_rate": 1.5296198833915905e-06, "loss": 0.3592, "step": 24609 }, { "epoch": 2.309496996996997, "grad_norm": 1.020168957476316, "learning_rate": 1.529226862837052e-06, "loss": 0.3129, "step": 24610 }, { "epoch": 2.309590840840841, "grad_norm": 1.045461276499946, "learning_rate": 1.5288338836649508e-06, "loss": 0.3328, "step": 24611 }, { "epoch": 2.309684684684685, "grad_norm": 1.0637493847580215, "learning_rate": 1.5284409458799716e-06, "loss": 0.2931, "step": 24612 }, { "epoch": 2.3097785285285286, "grad_norm": 1.1662552396204335, "learning_rate": 1.5280480494867978e-06, "loss": 0.3025, "step": 24613 }, { "epoch": 2.3098723723723724, "grad_norm": 1.0720720130552484, "learning_rate": 1.5276551944901169e-06, "loss": 0.3526, "step": 24614 }, { "epoch": 2.3099662162162162, "grad_norm": 1.0781116924109013, "learning_rate": 1.5272623808946124e-06, "loss": 0.3379, "step": 24615 }, { "epoch": 2.31006006006006, "grad_norm": 1.2313266065848698, "learning_rate": 1.5268696087049656e-06, "loss": 0.2855, "step": 24616 }, { "epoch": 2.310153903903904, "grad_norm": 1.2171964634760832, "learning_rate": 1.5264768779258627e-06, "loss": 0.2956, "step": 24617 }, { "epoch": 2.3102477477477477, "grad_norm": 1.090330808509364, "learning_rate": 1.5260841885619842e-06, "loss": 0.3218, "step": 24618 }, { "epoch": 2.3103415915915915, "grad_norm": 1.1567386896292429, "learning_rate": 1.5256915406180116e-06, "loss": 0.3141, "step": 24619 }, { "epoch": 2.3104354354354353, "grad_norm": 1.0065997161699105, "learning_rate": 1.5252989340986296e-06, "loss": 0.279, "step": 24620 }, { "epoch": 2.310529279279279, "grad_norm": 1.0399695747910862, "learning_rate": 1.5249063690085175e-06, "loss": 0.3158, "step": 24621 }, { "epoch": 2.310623123123123, "grad_norm": 1.0528408983409245, "learning_rate": 1.524513845352355e-06, "loss": 0.3122, "step": 24622 }, { "epoch": 2.310716966966967, "grad_norm": 1.007589182956722, "learning_rate": 1.5241213631348233e-06, "loss": 0.2831, "step": 24623 }, { "epoch": 2.310810810810811, "grad_norm": 1.1502242690661408, "learning_rate": 1.5237289223606023e-06, "loss": 0.2967, "step": 24624 }, { "epoch": 2.3109046546546548, "grad_norm": 2.2167757935843495, "learning_rate": 1.5233365230343676e-06, "loss": 0.3174, "step": 24625 }, { "epoch": 2.3109984984984986, "grad_norm": 1.3204780722432354, "learning_rate": 1.5229441651608034e-06, "loss": 0.32, "step": 24626 }, { "epoch": 2.3110923423423424, "grad_norm": 1.1112414644428315, "learning_rate": 1.522551848744585e-06, "loss": 0.3137, "step": 24627 }, { "epoch": 2.311186186186186, "grad_norm": 1.3907776494924604, "learning_rate": 1.5221595737903883e-06, "loss": 0.3036, "step": 24628 }, { "epoch": 2.31128003003003, "grad_norm": 1.2257392330151564, "learning_rate": 1.5217673403028938e-06, "loss": 0.322, "step": 24629 }, { "epoch": 2.311373873873874, "grad_norm": 1.3617177898979018, "learning_rate": 1.5213751482867772e-06, "loss": 0.3157, "step": 24630 }, { "epoch": 2.3114677177177176, "grad_norm": 1.1310220861291158, "learning_rate": 1.5209829977467122e-06, "loss": 0.2983, "step": 24631 }, { "epoch": 2.3115615615615615, "grad_norm": 1.069191555436549, "learning_rate": 1.5205908886873777e-06, "loss": 0.3202, "step": 24632 }, { "epoch": 2.3116554054054053, "grad_norm": 1.3072203997252325, "learning_rate": 1.5201988211134477e-06, "loss": 0.3043, "step": 24633 }, { "epoch": 2.311749249249249, "grad_norm": 1.268668494815555, "learning_rate": 1.5198067950295968e-06, "loss": 0.2636, "step": 24634 }, { "epoch": 2.311843093093093, "grad_norm": 1.2417004441578354, "learning_rate": 1.5194148104404992e-06, "loss": 0.3105, "step": 24635 }, { "epoch": 2.311936936936937, "grad_norm": 1.1045494608864126, "learning_rate": 1.5190228673508284e-06, "loss": 0.3328, "step": 24636 }, { "epoch": 2.312030780780781, "grad_norm": 1.2224022027201475, "learning_rate": 1.5186309657652559e-06, "loss": 0.3414, "step": 24637 }, { "epoch": 2.3121246246246248, "grad_norm": 1.043460560605675, "learning_rate": 1.5182391056884576e-06, "loss": 0.3175, "step": 24638 }, { "epoch": 2.3122184684684686, "grad_norm": 1.0458906662363043, "learning_rate": 1.5178472871251049e-06, "loss": 0.33, "step": 24639 }, { "epoch": 2.3123123123123124, "grad_norm": 0.9876886906043638, "learning_rate": 1.5174555100798666e-06, "loss": 0.2768, "step": 24640 }, { "epoch": 2.312406156156156, "grad_norm": 1.1762596665980578, "learning_rate": 1.517063774557418e-06, "loss": 0.3353, "step": 24641 }, { "epoch": 2.3125, "grad_norm": 0.9590369809761308, "learning_rate": 1.5166720805624275e-06, "loss": 0.3285, "step": 24642 }, { "epoch": 2.312593843843844, "grad_norm": 2.0760330703430556, "learning_rate": 1.5162804280995646e-06, "loss": 0.3143, "step": 24643 }, { "epoch": 2.3126876876876876, "grad_norm": 1.145547025926427, "learning_rate": 1.5158888171735015e-06, "loss": 0.3591, "step": 24644 }, { "epoch": 2.3127815315315314, "grad_norm": 1.1870879587590328, "learning_rate": 1.5154972477889064e-06, "loss": 0.3158, "step": 24645 }, { "epoch": 2.3128753753753752, "grad_norm": 1.0481946040089356, "learning_rate": 1.515105719950446e-06, "loss": 0.3392, "step": 24646 }, { "epoch": 2.312969219219219, "grad_norm": 0.9879490667636582, "learning_rate": 1.5147142336627935e-06, "loss": 0.3292, "step": 24647 }, { "epoch": 2.313063063063063, "grad_norm": 1.204040307308373, "learning_rate": 1.5143227889306117e-06, "loss": 0.2986, "step": 24648 }, { "epoch": 2.313156906906907, "grad_norm": 1.4052139556352903, "learning_rate": 1.5139313857585674e-06, "loss": 0.294, "step": 24649 }, { "epoch": 2.313250750750751, "grad_norm": 0.9345915148972112, "learning_rate": 1.513540024151332e-06, "loss": 0.2961, "step": 24650 }, { "epoch": 2.3133445945945947, "grad_norm": 1.0378532642740868, "learning_rate": 1.5131487041135683e-06, "loss": 0.341, "step": 24651 }, { "epoch": 2.3134384384384385, "grad_norm": 1.0300991035917473, "learning_rate": 1.5127574256499421e-06, "loss": 0.2962, "step": 24652 }, { "epoch": 2.3135322822822824, "grad_norm": 1.100380236168865, "learning_rate": 1.512366188765121e-06, "loss": 0.2956, "step": 24653 }, { "epoch": 2.313626126126126, "grad_norm": 0.954668209211884, "learning_rate": 1.5119749934637684e-06, "loss": 0.2827, "step": 24654 }, { "epoch": 2.31371996996997, "grad_norm": 0.9808226509775161, "learning_rate": 1.5115838397505468e-06, "loss": 0.267, "step": 24655 }, { "epoch": 2.313813813813814, "grad_norm": 1.1309909740037916, "learning_rate": 1.511192727630123e-06, "loss": 0.3271, "step": 24656 }, { "epoch": 2.3139076576576576, "grad_norm": 1.0704338461430996, "learning_rate": 1.5108016571071594e-06, "loss": 0.3015, "step": 24657 }, { "epoch": 2.3140015015015014, "grad_norm": 1.0310434615917714, "learning_rate": 1.5104106281863163e-06, "loss": 0.3152, "step": 24658 }, { "epoch": 2.3140953453453452, "grad_norm": 1.0569889601417783, "learning_rate": 1.5100196408722595e-06, "loss": 0.3345, "step": 24659 }, { "epoch": 2.314189189189189, "grad_norm": 1.0586547806138022, "learning_rate": 1.5096286951696492e-06, "loss": 0.2864, "step": 24660 }, { "epoch": 2.314283033033033, "grad_norm": 1.0373270953763496, "learning_rate": 1.5092377910831467e-06, "loss": 0.269, "step": 24661 }, { "epoch": 2.314376876876877, "grad_norm": 1.1910478992680578, "learning_rate": 1.5088469286174134e-06, "loss": 0.3431, "step": 24662 }, { "epoch": 2.314470720720721, "grad_norm": 0.8857420079507007, "learning_rate": 1.5084561077771086e-06, "loss": 0.3038, "step": 24663 }, { "epoch": 2.3145645645645647, "grad_norm": 1.145095467012863, "learning_rate": 1.5080653285668916e-06, "loss": 0.3188, "step": 24664 }, { "epoch": 2.3146584084084085, "grad_norm": 2.0183386325525667, "learning_rate": 1.5076745909914236e-06, "loss": 0.339, "step": 24665 }, { "epoch": 2.3147522522522523, "grad_norm": 1.1371646435698406, "learning_rate": 1.5072838950553626e-06, "loss": 0.2941, "step": 24666 }, { "epoch": 2.314846096096096, "grad_norm": 1.1045262246936456, "learning_rate": 1.5068932407633651e-06, "loss": 0.3087, "step": 24667 }, { "epoch": 2.31493993993994, "grad_norm": 1.3582687212026632, "learning_rate": 1.5065026281200929e-06, "loss": 0.3179, "step": 24668 }, { "epoch": 2.3150337837837838, "grad_norm": 0.9647538900342981, "learning_rate": 1.5061120571302e-06, "loss": 0.3313, "step": 24669 }, { "epoch": 2.3151276276276276, "grad_norm": 1.315657862809766, "learning_rate": 1.5057215277983433e-06, "loss": 0.3447, "step": 24670 }, { "epoch": 2.3152214714714714, "grad_norm": 2.460782080943779, "learning_rate": 1.5053310401291816e-06, "loss": 0.3294, "step": 24671 }, { "epoch": 2.315315315315315, "grad_norm": 1.3185455284117487, "learning_rate": 1.5049405941273693e-06, "loss": 0.3759, "step": 24672 }, { "epoch": 2.315409159159159, "grad_norm": 1.1356375147112496, "learning_rate": 1.5045501897975618e-06, "loss": 0.3079, "step": 24673 }, { "epoch": 2.315503003003003, "grad_norm": 1.4946080997557831, "learning_rate": 1.504159827144413e-06, "loss": 0.3075, "step": 24674 }, { "epoch": 2.315596846846847, "grad_norm": 1.1588160915038657, "learning_rate": 1.5037695061725788e-06, "loss": 0.3343, "step": 24675 }, { "epoch": 2.3156906906906904, "grad_norm": 1.1289402784248266, "learning_rate": 1.5033792268867104e-06, "loss": 0.3093, "step": 24676 }, { "epoch": 2.3157845345345347, "grad_norm": 1.365109202331917, "learning_rate": 1.5029889892914645e-06, "loss": 0.2951, "step": 24677 }, { "epoch": 2.3158783783783785, "grad_norm": 1.1580146922725643, "learning_rate": 1.5025987933914927e-06, "loss": 0.2956, "step": 24678 }, { "epoch": 2.3159722222222223, "grad_norm": 0.9720015551316837, "learning_rate": 1.5022086391914448e-06, "loss": 0.3369, "step": 24679 }, { "epoch": 2.316066066066066, "grad_norm": 1.0914777301302139, "learning_rate": 1.5018185266959774e-06, "loss": 0.3382, "step": 24680 }, { "epoch": 2.31615990990991, "grad_norm": 5.850031734736585, "learning_rate": 1.5014284559097391e-06, "loss": 0.2909, "step": 24681 }, { "epoch": 2.3162537537537538, "grad_norm": 1.491878899722085, "learning_rate": 1.5010384268373795e-06, "loss": 0.3402, "step": 24682 }, { "epoch": 2.3163475975975976, "grad_norm": 1.1369961429314461, "learning_rate": 1.500648439483552e-06, "loss": 0.308, "step": 24683 }, { "epoch": 2.3164414414414414, "grad_norm": 1.3474396718210533, "learning_rate": 1.5002584938529052e-06, "loss": 0.3086, "step": 24684 }, { "epoch": 2.316535285285285, "grad_norm": 1.020362567145597, "learning_rate": 1.4998685899500882e-06, "loss": 0.312, "step": 24685 }, { "epoch": 2.316629129129129, "grad_norm": 1.0109204898068174, "learning_rate": 1.4994787277797502e-06, "loss": 0.2639, "step": 24686 }, { "epoch": 2.316722972972973, "grad_norm": 1.0064775316419554, "learning_rate": 1.4990889073465386e-06, "loss": 0.3096, "step": 24687 }, { "epoch": 2.316816816816817, "grad_norm": 1.3770494841968972, "learning_rate": 1.4986991286551012e-06, "loss": 0.2848, "step": 24688 }, { "epoch": 2.3169106606606604, "grad_norm": 1.1814611741385317, "learning_rate": 1.4983093917100872e-06, "loss": 0.345, "step": 24689 }, { "epoch": 2.3170045045045047, "grad_norm": 1.2198397855940435, "learning_rate": 1.497919696516143e-06, "loss": 0.2993, "step": 24690 }, { "epoch": 2.3170983483483485, "grad_norm": 1.2593025219663927, "learning_rate": 1.497530043077912e-06, "loss": 0.3047, "step": 24691 }, { "epoch": 2.3171921921921923, "grad_norm": 1.004409430912206, "learning_rate": 1.4971404314000448e-06, "loss": 0.3309, "step": 24692 }, { "epoch": 2.317286036036036, "grad_norm": 1.1402188113110852, "learning_rate": 1.4967508614871845e-06, "loss": 0.3485, "step": 24693 }, { "epoch": 2.31737987987988, "grad_norm": 1.0065497027540034, "learning_rate": 1.4963613333439742e-06, "loss": 0.2806, "step": 24694 }, { "epoch": 2.3174737237237237, "grad_norm": 1.0616652599973426, "learning_rate": 1.4959718469750617e-06, "loss": 0.3214, "step": 24695 }, { "epoch": 2.3175675675675675, "grad_norm": 1.0458621087875477, "learning_rate": 1.4955824023850896e-06, "loss": 0.3284, "step": 24696 }, { "epoch": 2.3176614114114114, "grad_norm": 1.0575353106049277, "learning_rate": 1.4951929995787001e-06, "loss": 0.295, "step": 24697 }, { "epoch": 2.317755255255255, "grad_norm": 0.9817140449407344, "learning_rate": 1.4948036385605375e-06, "loss": 0.3255, "step": 24698 }, { "epoch": 2.317849099099099, "grad_norm": 1.3452655141579204, "learning_rate": 1.494414319335244e-06, "loss": 0.3006, "step": 24699 }, { "epoch": 2.317942942942943, "grad_norm": 1.2243700633185428, "learning_rate": 1.4940250419074593e-06, "loss": 0.2823, "step": 24700 }, { "epoch": 2.3180367867867866, "grad_norm": 1.4644136237544905, "learning_rate": 1.4936358062818278e-06, "loss": 0.3056, "step": 24701 }, { "epoch": 2.3181306306306304, "grad_norm": 1.2370679041403951, "learning_rate": 1.4932466124629895e-06, "loss": 0.2876, "step": 24702 }, { "epoch": 2.3182244744744747, "grad_norm": 1.204015763311912, "learning_rate": 1.4928574604555834e-06, "loss": 0.2741, "step": 24703 }, { "epoch": 2.3183183183183185, "grad_norm": 1.3827414364234742, "learning_rate": 1.4924683502642517e-06, "loss": 0.3318, "step": 24704 }, { "epoch": 2.3184121621621623, "grad_norm": 1.2063079830413328, "learning_rate": 1.492079281893633e-06, "loss": 0.3274, "step": 24705 }, { "epoch": 2.318506006006006, "grad_norm": 0.9799474016607193, "learning_rate": 1.4916902553483643e-06, "loss": 0.3029, "step": 24706 }, { "epoch": 2.31859984984985, "grad_norm": 1.0584551958348742, "learning_rate": 1.4913012706330866e-06, "loss": 0.3229, "step": 24707 }, { "epoch": 2.3186936936936937, "grad_norm": 1.0919889177435478, "learning_rate": 1.4909123277524373e-06, "loss": 0.289, "step": 24708 }, { "epoch": 2.3187875375375375, "grad_norm": 1.027385832252426, "learning_rate": 1.490523426711053e-06, "loss": 0.327, "step": 24709 }, { "epoch": 2.3188813813813813, "grad_norm": 1.08598380948217, "learning_rate": 1.4901345675135714e-06, "loss": 0.3114, "step": 24710 }, { "epoch": 2.318975225225225, "grad_norm": 1.253958333631946, "learning_rate": 1.4897457501646279e-06, "loss": 0.3155, "step": 24711 }, { "epoch": 2.319069069069069, "grad_norm": 1.1438197723358396, "learning_rate": 1.489356974668858e-06, "loss": 0.3145, "step": 24712 }, { "epoch": 2.3191629129129128, "grad_norm": 1.106198927789249, "learning_rate": 1.4889682410308993e-06, "loss": 0.2939, "step": 24713 }, { "epoch": 2.3192567567567566, "grad_norm": 1.1856004205980268, "learning_rate": 1.4885795492553852e-06, "loss": 0.3352, "step": 24714 }, { "epoch": 2.3193506006006004, "grad_norm": 1.1027477150811724, "learning_rate": 1.488190899346949e-06, "loss": 0.3679, "step": 24715 }, { "epoch": 2.3194444444444446, "grad_norm": 1.1260119284611476, "learning_rate": 1.487802291310228e-06, "loss": 0.3201, "step": 24716 }, { "epoch": 2.3195382882882885, "grad_norm": 1.1136489269404464, "learning_rate": 1.4874137251498533e-06, "loss": 0.3257, "step": 24717 }, { "epoch": 2.3196321321321323, "grad_norm": 1.253102027350978, "learning_rate": 1.4870252008704566e-06, "loss": 0.3227, "step": 24718 }, { "epoch": 2.319725975975976, "grad_norm": 1.6377326026890424, "learning_rate": 1.4866367184766734e-06, "loss": 0.3139, "step": 24719 }, { "epoch": 2.31981981981982, "grad_norm": 1.2563924938580837, "learning_rate": 1.486248277973134e-06, "loss": 0.3161, "step": 24720 }, { "epoch": 2.3199136636636637, "grad_norm": 1.238022501609203, "learning_rate": 1.4858598793644707e-06, "loss": 0.3148, "step": 24721 }, { "epoch": 2.3200075075075075, "grad_norm": 1.0350259946084555, "learning_rate": 1.485471522655313e-06, "loss": 0.3028, "step": 24722 }, { "epoch": 2.3201013513513513, "grad_norm": 1.2182491146560979, "learning_rate": 1.4850832078502918e-06, "loss": 0.3181, "step": 24723 }, { "epoch": 2.320195195195195, "grad_norm": 3.326700283821064, "learning_rate": 1.4846949349540356e-06, "loss": 0.2801, "step": 24724 }, { "epoch": 2.320289039039039, "grad_norm": 1.1782065270884912, "learning_rate": 1.4843067039711779e-06, "loss": 0.3299, "step": 24725 }, { "epoch": 2.3203828828828827, "grad_norm": 1.1723363764559664, "learning_rate": 1.4839185149063444e-06, "loss": 0.3431, "step": 24726 }, { "epoch": 2.3204767267267266, "grad_norm": 0.9913673678905319, "learning_rate": 1.4835303677641626e-06, "loss": 0.313, "step": 24727 }, { "epoch": 2.3205705705705704, "grad_norm": 0.9938091080113303, "learning_rate": 1.4831422625492643e-06, "loss": 0.3401, "step": 24728 }, { "epoch": 2.3206644144144146, "grad_norm": 1.192967530820738, "learning_rate": 1.4827541992662741e-06, "loss": 0.3194, "step": 24729 }, { "epoch": 2.3207582582582584, "grad_norm": 1.1429701953797908, "learning_rate": 1.4823661779198179e-06, "loss": 0.304, "step": 24730 }, { "epoch": 2.3208521021021022, "grad_norm": 1.5137406226623384, "learning_rate": 1.481978198514526e-06, "loss": 0.289, "step": 24731 }, { "epoch": 2.320945945945946, "grad_norm": 1.0030432706701438, "learning_rate": 1.4815902610550214e-06, "loss": 0.3332, "step": 24732 }, { "epoch": 2.32103978978979, "grad_norm": 1.1514377105356135, "learning_rate": 1.4812023655459306e-06, "loss": 0.3069, "step": 24733 }, { "epoch": 2.3211336336336337, "grad_norm": 1.1552161355249762, "learning_rate": 1.4808145119918776e-06, "loss": 0.3492, "step": 24734 }, { "epoch": 2.3212274774774775, "grad_norm": 1.128584125101866, "learning_rate": 1.4804267003974881e-06, "loss": 0.3327, "step": 24735 }, { "epoch": 2.3213213213213213, "grad_norm": 1.0824967577597902, "learning_rate": 1.4800389307673835e-06, "loss": 0.2735, "step": 24736 }, { "epoch": 2.321415165165165, "grad_norm": 1.3335582261131815, "learning_rate": 1.4796512031061905e-06, "loss": 0.3161, "step": 24737 }, { "epoch": 2.321509009009009, "grad_norm": 1.1519847129798972, "learning_rate": 1.4792635174185305e-06, "loss": 0.3249, "step": 24738 }, { "epoch": 2.3216028528528527, "grad_norm": 0.952748529331791, "learning_rate": 1.4788758737090242e-06, "loss": 0.2942, "step": 24739 }, { "epoch": 2.3216966966966965, "grad_norm": 1.4550603136218065, "learning_rate": 1.4784882719822967e-06, "loss": 0.2852, "step": 24740 }, { "epoch": 2.3217905405405403, "grad_norm": 1.6089848326165657, "learning_rate": 1.4781007122429686e-06, "loss": 0.3173, "step": 24741 }, { "epoch": 2.3218843843843846, "grad_norm": 1.145652219198405, "learning_rate": 1.4777131944956585e-06, "loss": 0.3176, "step": 24742 }, { "epoch": 2.3219782282282284, "grad_norm": 1.0079315186476305, "learning_rate": 1.4773257187449896e-06, "loss": 0.302, "step": 24743 }, { "epoch": 2.3220720720720722, "grad_norm": 1.0626280093084945, "learning_rate": 1.4769382849955815e-06, "loss": 0.3541, "step": 24744 }, { "epoch": 2.322165915915916, "grad_norm": 1.3178099150738847, "learning_rate": 1.4765508932520523e-06, "loss": 0.3276, "step": 24745 }, { "epoch": 2.32225975975976, "grad_norm": 1.0968950992505302, "learning_rate": 1.476163543519022e-06, "loss": 0.3255, "step": 24746 }, { "epoch": 2.3223536036036037, "grad_norm": 1.012106378218815, "learning_rate": 1.4757762358011079e-06, "loss": 0.306, "step": 24747 }, { "epoch": 2.3224474474474475, "grad_norm": 1.179630629551867, "learning_rate": 1.4753889701029273e-06, "loss": 0.3308, "step": 24748 }, { "epoch": 2.3225412912912913, "grad_norm": 1.1269428334366087, "learning_rate": 1.4750017464291e-06, "loss": 0.3552, "step": 24749 }, { "epoch": 2.322635135135135, "grad_norm": 1.0639906522138975, "learning_rate": 1.4746145647842425e-06, "loss": 0.3168, "step": 24750 }, { "epoch": 2.322728978978979, "grad_norm": 1.1438260151087842, "learning_rate": 1.4742274251729682e-06, "loss": 0.3444, "step": 24751 }, { "epoch": 2.3228228228228227, "grad_norm": 1.1307076038506008, "learning_rate": 1.473840327599897e-06, "loss": 0.3314, "step": 24752 }, { "epoch": 2.3229166666666665, "grad_norm": 1.0132589649238792, "learning_rate": 1.4734532720696427e-06, "loss": 0.2851, "step": 24753 }, { "epoch": 2.3230105105105103, "grad_norm": 0.9003402556167517, "learning_rate": 1.4730662585868188e-06, "loss": 0.296, "step": 24754 }, { "epoch": 2.3231043543543546, "grad_norm": 1.3724364967981586, "learning_rate": 1.472679287156042e-06, "loss": 0.3172, "step": 24755 }, { "epoch": 2.323198198198198, "grad_norm": 1.2390549886483004, "learning_rate": 1.4722923577819253e-06, "loss": 0.3019, "step": 24756 }, { "epoch": 2.323292042042042, "grad_norm": 1.3435315003862751, "learning_rate": 1.4719054704690828e-06, "loss": 0.2968, "step": 24757 }, { "epoch": 2.323385885885886, "grad_norm": 1.0800318785649554, "learning_rate": 1.4715186252221258e-06, "loss": 0.3391, "step": 24758 }, { "epoch": 2.32347972972973, "grad_norm": 1.298263722854707, "learning_rate": 1.4711318220456678e-06, "loss": 0.3155, "step": 24759 }, { "epoch": 2.3235735735735736, "grad_norm": 1.1109018436899274, "learning_rate": 1.4707450609443186e-06, "loss": 0.3319, "step": 24760 }, { "epoch": 2.3236674174174174, "grad_norm": 1.0223377800028532, "learning_rate": 1.470358341922693e-06, "loss": 0.2976, "step": 24761 }, { "epoch": 2.3237612612612613, "grad_norm": 1.0889999200011886, "learning_rate": 1.4699716649854007e-06, "loss": 0.3029, "step": 24762 }, { "epoch": 2.323855105105105, "grad_norm": 1.2371371466354362, "learning_rate": 1.4695850301370495e-06, "loss": 0.2757, "step": 24763 }, { "epoch": 2.323948948948949, "grad_norm": 1.15514574871194, "learning_rate": 1.4691984373822533e-06, "loss": 0.3296, "step": 24764 }, { "epoch": 2.3240427927927927, "grad_norm": 1.051878561224767, "learning_rate": 1.4688118867256202e-06, "loss": 0.3375, "step": 24765 }, { "epoch": 2.3241366366366365, "grad_norm": 1.5656352700545924, "learning_rate": 1.4684253781717562e-06, "loss": 0.3307, "step": 24766 }, { "epoch": 2.3242304804804803, "grad_norm": 1.6528767173775247, "learning_rate": 1.468038911725274e-06, "loss": 0.2911, "step": 24767 }, { "epoch": 2.3243243243243246, "grad_norm": 1.2099825843151002, "learning_rate": 1.46765248739078e-06, "loss": 0.3088, "step": 24768 }, { "epoch": 2.324418168168168, "grad_norm": 1.137273203278737, "learning_rate": 1.4672661051728787e-06, "loss": 0.303, "step": 24769 }, { "epoch": 2.324512012012012, "grad_norm": 1.1329668391527399, "learning_rate": 1.4668797650761828e-06, "loss": 0.3084, "step": 24770 }, { "epoch": 2.324605855855856, "grad_norm": 1.0823772620001377, "learning_rate": 1.4664934671052944e-06, "loss": 0.3153, "step": 24771 }, { "epoch": 2.3246996996997, "grad_norm": 1.1415293710331933, "learning_rate": 1.466107211264818e-06, "loss": 0.3035, "step": 24772 }, { "epoch": 2.3247935435435436, "grad_norm": 1.0782398715193213, "learning_rate": 1.4657209975593628e-06, "loss": 0.306, "step": 24773 }, { "epoch": 2.3248873873873874, "grad_norm": 1.3420201788380577, "learning_rate": 1.4653348259935324e-06, "loss": 0.3134, "step": 24774 }, { "epoch": 2.3249812312312312, "grad_norm": 1.106987829230216, "learning_rate": 1.4649486965719295e-06, "loss": 0.3528, "step": 24775 }, { "epoch": 2.325075075075075, "grad_norm": 0.9904591344776369, "learning_rate": 1.4645626092991605e-06, "loss": 0.3025, "step": 24776 }, { "epoch": 2.325168918918919, "grad_norm": 1.1290817295669864, "learning_rate": 1.4641765641798278e-06, "loss": 0.3108, "step": 24777 }, { "epoch": 2.3252627627627627, "grad_norm": 1.076090593792955, "learning_rate": 1.4637905612185322e-06, "loss": 0.3333, "step": 24778 }, { "epoch": 2.3253566066066065, "grad_norm": 1.0590357875065302, "learning_rate": 1.46340460041988e-06, "loss": 0.3398, "step": 24779 }, { "epoch": 2.3254504504504503, "grad_norm": 1.2471312672394923, "learning_rate": 1.4630186817884707e-06, "loss": 0.3354, "step": 24780 }, { "epoch": 2.325544294294294, "grad_norm": 1.3289279521330832, "learning_rate": 1.462632805328904e-06, "loss": 0.3248, "step": 24781 }, { "epoch": 2.325638138138138, "grad_norm": 1.0425693301796741, "learning_rate": 1.4622469710457849e-06, "loss": 0.3288, "step": 24782 }, { "epoch": 2.325731981981982, "grad_norm": 1.3501433936123013, "learning_rate": 1.4618611789437115e-06, "loss": 0.3239, "step": 24783 }, { "epoch": 2.325825825825826, "grad_norm": 1.1438790926391684, "learning_rate": 1.4614754290272832e-06, "loss": 0.3418, "step": 24784 }, { "epoch": 2.32591966966967, "grad_norm": 1.2608150633858117, "learning_rate": 1.4610897213010998e-06, "loss": 0.2924, "step": 24785 }, { "epoch": 2.3260135135135136, "grad_norm": 1.1469803557019522, "learning_rate": 1.4607040557697605e-06, "loss": 0.3401, "step": 24786 }, { "epoch": 2.3261073573573574, "grad_norm": 1.0319176625099165, "learning_rate": 1.4603184324378616e-06, "loss": 0.3051, "step": 24787 }, { "epoch": 2.326201201201201, "grad_norm": 1.2576650883626495, "learning_rate": 1.4599328513100048e-06, "loss": 0.324, "step": 24788 }, { "epoch": 2.326295045045045, "grad_norm": 1.1071676064716356, "learning_rate": 1.4595473123907844e-06, "loss": 0.3094, "step": 24789 }, { "epoch": 2.326388888888889, "grad_norm": 1.1054093468791102, "learning_rate": 1.459161815684797e-06, "loss": 0.3276, "step": 24790 }, { "epoch": 2.3264827327327327, "grad_norm": 1.123145398559657, "learning_rate": 1.458776361196641e-06, "loss": 0.2906, "step": 24791 }, { "epoch": 2.3265765765765765, "grad_norm": 1.1378525244243025, "learning_rate": 1.4583909489309117e-06, "loss": 0.3289, "step": 24792 }, { "epoch": 2.3266704204204203, "grad_norm": 0.9606422540674018, "learning_rate": 1.4580055788922026e-06, "loss": 0.2516, "step": 24793 }, { "epoch": 2.326764264264264, "grad_norm": 1.9742886232558208, "learning_rate": 1.4576202510851113e-06, "loss": 0.3204, "step": 24794 }, { "epoch": 2.326858108108108, "grad_norm": 1.037027158662905, "learning_rate": 1.4572349655142304e-06, "loss": 0.321, "step": 24795 }, { "epoch": 2.326951951951952, "grad_norm": 1.2455469743841494, "learning_rate": 1.4568497221841538e-06, "loss": 0.2525, "step": 24796 }, { "epoch": 2.327045795795796, "grad_norm": 1.2266857050017665, "learning_rate": 1.4564645210994748e-06, "loss": 0.2878, "step": 24797 }, { "epoch": 2.3271396396396398, "grad_norm": 1.1350157751683767, "learning_rate": 1.4560793622647874e-06, "loss": 0.2887, "step": 24798 }, { "epoch": 2.3272334834834836, "grad_norm": 1.1540739061347212, "learning_rate": 1.4556942456846807e-06, "loss": 0.3167, "step": 24799 }, { "epoch": 2.3273273273273274, "grad_norm": 1.208060042222536, "learning_rate": 1.4553091713637502e-06, "loss": 0.3139, "step": 24800 }, { "epoch": 2.327421171171171, "grad_norm": 1.1644356701845424, "learning_rate": 1.4549241393065855e-06, "loss": 0.3227, "step": 24801 }, { "epoch": 2.327515015015015, "grad_norm": 1.1392316569869068, "learning_rate": 1.4545391495177758e-06, "loss": 0.2954, "step": 24802 }, { "epoch": 2.327608858858859, "grad_norm": 1.1197981666399315, "learning_rate": 1.454154202001915e-06, "loss": 0.3477, "step": 24803 }, { "epoch": 2.3277027027027026, "grad_norm": 1.1431543751068927, "learning_rate": 1.4537692967635908e-06, "loss": 0.3615, "step": 24804 }, { "epoch": 2.3277965465465464, "grad_norm": 1.160599852328051, "learning_rate": 1.4533844338073915e-06, "loss": 0.3056, "step": 24805 }, { "epoch": 2.3278903903903903, "grad_norm": 0.9883863161738514, "learning_rate": 1.452999613137908e-06, "loss": 0.3201, "step": 24806 }, { "epoch": 2.327984234234234, "grad_norm": 1.3943989266114931, "learning_rate": 1.4526148347597279e-06, "loss": 0.3358, "step": 24807 }, { "epoch": 2.328078078078078, "grad_norm": 1.0587546571080328, "learning_rate": 1.4522300986774385e-06, "loss": 0.3153, "step": 24808 }, { "epoch": 2.328171921921922, "grad_norm": 1.4435812200094773, "learning_rate": 1.4518454048956276e-06, "loss": 0.3168, "step": 24809 }, { "epoch": 2.328265765765766, "grad_norm": 1.076718124036621, "learning_rate": 1.4514607534188807e-06, "loss": 0.2914, "step": 24810 }, { "epoch": 2.3283596096096097, "grad_norm": 1.0728687602373201, "learning_rate": 1.451076144251784e-06, "loss": 0.3487, "step": 24811 }, { "epoch": 2.3284534534534536, "grad_norm": 1.2827933097627795, "learning_rate": 1.4506915773989256e-06, "loss": 0.3095, "step": 24812 }, { "epoch": 2.3285472972972974, "grad_norm": 1.290066127995682, "learning_rate": 1.450307052864889e-06, "loss": 0.3401, "step": 24813 }, { "epoch": 2.328641141141141, "grad_norm": 1.173861221868873, "learning_rate": 1.449922570654258e-06, "loss": 0.3128, "step": 24814 }, { "epoch": 2.328734984984985, "grad_norm": 0.9891071105371103, "learning_rate": 1.449538130771619e-06, "loss": 0.3356, "step": 24815 }, { "epoch": 2.328828828828829, "grad_norm": 1.1889934115725398, "learning_rate": 1.449153733221555e-06, "loss": 0.2727, "step": 24816 }, { "epoch": 2.3289226726726726, "grad_norm": 1.172106690214412, "learning_rate": 1.4487693780086477e-06, "loss": 0.3148, "step": 24817 }, { "epoch": 2.3290165165165164, "grad_norm": 1.3500860508539816, "learning_rate": 1.4483850651374827e-06, "loss": 0.2687, "step": 24818 }, { "epoch": 2.3291103603603602, "grad_norm": 1.0719675804814115, "learning_rate": 1.4480007946126402e-06, "loss": 0.3421, "step": 24819 }, { "epoch": 2.329204204204204, "grad_norm": 1.0808400915322485, "learning_rate": 1.4476165664387022e-06, "loss": 0.3029, "step": 24820 }, { "epoch": 2.329298048048048, "grad_norm": 1.1296956699818035, "learning_rate": 1.4472323806202499e-06, "loss": 0.3146, "step": 24821 }, { "epoch": 2.329391891891892, "grad_norm": 1.079660541341619, "learning_rate": 1.4468482371618642e-06, "loss": 0.3512, "step": 24822 }, { "epoch": 2.329485735735736, "grad_norm": 1.1077016068701444, "learning_rate": 1.4464641360681236e-06, "loss": 0.3023, "step": 24823 }, { "epoch": 2.3295795795795797, "grad_norm": 1.2135008877312559, "learning_rate": 1.4460800773436113e-06, "loss": 0.3257, "step": 24824 }, { "epoch": 2.3296734234234235, "grad_norm": 1.002318101745455, "learning_rate": 1.4456960609929038e-06, "loss": 0.3335, "step": 24825 }, { "epoch": 2.3297672672672673, "grad_norm": 1.28474557894185, "learning_rate": 1.4453120870205794e-06, "loss": 0.3272, "step": 24826 }, { "epoch": 2.329861111111111, "grad_norm": 1.3258381675675441, "learning_rate": 1.4449281554312188e-06, "loss": 0.2812, "step": 24827 }, { "epoch": 2.329954954954955, "grad_norm": 1.1506219779461804, "learning_rate": 1.4445442662293979e-06, "loss": 0.2702, "step": 24828 }, { "epoch": 2.330048798798799, "grad_norm": 1.2025556778272517, "learning_rate": 1.444160419419693e-06, "loss": 0.3668, "step": 24829 }, { "epoch": 2.3301426426426426, "grad_norm": 0.994119577890224, "learning_rate": 1.4437766150066833e-06, "loss": 0.2964, "step": 24830 }, { "epoch": 2.3302364864864864, "grad_norm": 1.0370146613073181, "learning_rate": 1.4433928529949437e-06, "loss": 0.3338, "step": 24831 }, { "epoch": 2.33033033033033, "grad_norm": 1.164922215258219, "learning_rate": 1.44300913338905e-06, "loss": 0.311, "step": 24832 }, { "epoch": 2.330424174174174, "grad_norm": 1.4334559473957469, "learning_rate": 1.4426254561935764e-06, "loss": 0.3071, "step": 24833 }, { "epoch": 2.330518018018018, "grad_norm": 0.9519349018076991, "learning_rate": 1.4422418214130985e-06, "loss": 0.2906, "step": 24834 }, { "epoch": 2.330611861861862, "grad_norm": 1.1765970185090164, "learning_rate": 1.4418582290521888e-06, "loss": 0.2705, "step": 24835 }, { "epoch": 2.330705705705706, "grad_norm": 1.1373570632957557, "learning_rate": 1.4414746791154233e-06, "loss": 0.2953, "step": 24836 }, { "epoch": 2.3307995495495497, "grad_norm": 1.3521517983609728, "learning_rate": 1.441091171607374e-06, "loss": 0.3381, "step": 24837 }, { "epoch": 2.3308933933933935, "grad_norm": 1.2672902632821839, "learning_rate": 1.440707706532612e-06, "loss": 0.2953, "step": 24838 }, { "epoch": 2.3309872372372373, "grad_norm": 1.0303834271598407, "learning_rate": 1.4403242838957127e-06, "loss": 0.2884, "step": 24839 }, { "epoch": 2.331081081081081, "grad_norm": 1.1503968539408456, "learning_rate": 1.4399409037012457e-06, "loss": 0.3185, "step": 24840 }, { "epoch": 2.331174924924925, "grad_norm": 1.0423495037850592, "learning_rate": 1.4395575659537808e-06, "loss": 0.3092, "step": 24841 }, { "epoch": 2.3312687687687688, "grad_norm": 1.0066351632966444, "learning_rate": 1.4391742706578922e-06, "loss": 0.2776, "step": 24842 }, { "epoch": 2.3313626126126126, "grad_norm": 1.0111808960141424, "learning_rate": 1.4387910178181468e-06, "loss": 0.3165, "step": 24843 }, { "epoch": 2.3314564564564564, "grad_norm": 1.0598236677559403, "learning_rate": 1.4384078074391162e-06, "loss": 0.2923, "step": 24844 }, { "epoch": 2.3315503003003, "grad_norm": 1.266986857111756, "learning_rate": 1.4380246395253677e-06, "loss": 0.3267, "step": 24845 }, { "epoch": 2.331644144144144, "grad_norm": 1.1741829296083504, "learning_rate": 1.4376415140814715e-06, "loss": 0.3324, "step": 24846 }, { "epoch": 2.331737987987988, "grad_norm": 1.0406858718270684, "learning_rate": 1.4372584311119924e-06, "loss": 0.2739, "step": 24847 }, { "epoch": 2.331831831831832, "grad_norm": 1.0854115026232953, "learning_rate": 1.4368753906215022e-06, "loss": 0.3454, "step": 24848 }, { "epoch": 2.3319256756756754, "grad_norm": 1.0179769582088103, "learning_rate": 1.4364923926145663e-06, "loss": 0.3004, "step": 24849 }, { "epoch": 2.3320195195195197, "grad_norm": 1.1167761333015378, "learning_rate": 1.436109437095749e-06, "loss": 0.3338, "step": 24850 }, { "epoch": 2.3321133633633635, "grad_norm": 1.2482652740832274, "learning_rate": 1.4357265240696205e-06, "loss": 0.368, "step": 24851 }, { "epoch": 2.3322072072072073, "grad_norm": 1.0168250745235934, "learning_rate": 1.4353436535407438e-06, "loss": 0.3148, "step": 24852 }, { "epoch": 2.332301051051051, "grad_norm": 1.1108498199458228, "learning_rate": 1.4349608255136826e-06, "loss": 0.3096, "step": 24853 }, { "epoch": 2.332394894894895, "grad_norm": 1.0663641680078713, "learning_rate": 1.4345780399930047e-06, "loss": 0.3075, "step": 24854 }, { "epoch": 2.3324887387387387, "grad_norm": 1.2354902089598245, "learning_rate": 1.4341952969832723e-06, "loss": 0.3374, "step": 24855 }, { "epoch": 2.3325825825825826, "grad_norm": 1.309855706100644, "learning_rate": 1.4338125964890492e-06, "loss": 0.3118, "step": 24856 }, { "epoch": 2.3326764264264264, "grad_norm": 1.0703538745329833, "learning_rate": 1.4334299385148987e-06, "loss": 0.3664, "step": 24857 }, { "epoch": 2.33277027027027, "grad_norm": 1.1691367031865905, "learning_rate": 1.433047323065382e-06, "loss": 0.3105, "step": 24858 }, { "epoch": 2.332864114114114, "grad_norm": 1.176076116324938, "learning_rate": 1.4326647501450608e-06, "loss": 0.3271, "step": 24859 }, { "epoch": 2.332957957957958, "grad_norm": 0.9888848506842424, "learning_rate": 1.432282219758499e-06, "loss": 0.3223, "step": 24860 }, { "epoch": 2.333051801801802, "grad_norm": 2.050166475411509, "learning_rate": 1.4318997319102563e-06, "loss": 0.3, "step": 24861 }, { "epoch": 2.3331456456456454, "grad_norm": 0.9965162538740012, "learning_rate": 1.4315172866048916e-06, "loss": 0.2667, "step": 24862 }, { "epoch": 2.3332394894894897, "grad_norm": 1.0274495115528897, "learning_rate": 1.4311348838469675e-06, "loss": 0.3239, "step": 24863 }, { "epoch": 2.3333333333333335, "grad_norm": 1.271868852144276, "learning_rate": 1.4307525236410425e-06, "loss": 0.3126, "step": 24864 }, { "epoch": 2.3334271771771773, "grad_norm": 1.3439003626811072, "learning_rate": 1.4303702059916735e-06, "loss": 0.3544, "step": 24865 }, { "epoch": 2.333521021021021, "grad_norm": 0.9836832203515065, "learning_rate": 1.429987930903422e-06, "loss": 0.3096, "step": 24866 }, { "epoch": 2.333614864864865, "grad_norm": 1.2574490954337234, "learning_rate": 1.4296056983808448e-06, "loss": 0.3, "step": 24867 }, { "epoch": 2.3337087087087087, "grad_norm": 1.497472064747764, "learning_rate": 1.4292235084284995e-06, "loss": 0.318, "step": 24868 }, { "epoch": 2.3338025525525525, "grad_norm": 1.244402090191378, "learning_rate": 1.4288413610509422e-06, "loss": 0.2968, "step": 24869 }, { "epoch": 2.3338963963963963, "grad_norm": 1.0534999506327762, "learning_rate": 1.4284592562527294e-06, "loss": 0.2907, "step": 24870 }, { "epoch": 2.33399024024024, "grad_norm": 1.122460803295304, "learning_rate": 1.428077194038416e-06, "loss": 0.3019, "step": 24871 }, { "epoch": 2.334084084084084, "grad_norm": 1.1060566507105256, "learning_rate": 1.42769517441256e-06, "loss": 0.2656, "step": 24872 }, { "epoch": 2.3341779279279278, "grad_norm": 1.4702476042442587, "learning_rate": 1.427313197379715e-06, "loss": 0.3318, "step": 24873 }, { "epoch": 2.3342717717717716, "grad_norm": 1.1610238493118041, "learning_rate": 1.4269312629444336e-06, "loss": 0.3011, "step": 24874 }, { "epoch": 2.3343656156156154, "grad_norm": 1.264694681308986, "learning_rate": 1.4265493711112727e-06, "loss": 0.28, "step": 24875 }, { "epoch": 2.3344594594594597, "grad_norm": 1.3495325181595452, "learning_rate": 1.4261675218847848e-06, "loss": 0.3079, "step": 24876 }, { "epoch": 2.3345533033033035, "grad_norm": 1.236767394653903, "learning_rate": 1.4257857152695199e-06, "loss": 0.341, "step": 24877 }, { "epoch": 2.3346471471471473, "grad_norm": 1.0246794380328508, "learning_rate": 1.4254039512700347e-06, "loss": 0.297, "step": 24878 }, { "epoch": 2.334740990990991, "grad_norm": 1.0873603942239798, "learning_rate": 1.4250222298908783e-06, "loss": 0.3315, "step": 24879 }, { "epoch": 2.334834834834835, "grad_norm": 1.0637345451339313, "learning_rate": 1.424640551136603e-06, "loss": 0.3321, "step": 24880 }, { "epoch": 2.3349286786786787, "grad_norm": 1.1667469592668427, "learning_rate": 1.4242589150117592e-06, "loss": 0.3116, "step": 24881 }, { "epoch": 2.3350225225225225, "grad_norm": 1.2073934792436687, "learning_rate": 1.4238773215208973e-06, "loss": 0.3456, "step": 24882 }, { "epoch": 2.3351163663663663, "grad_norm": 1.2708255186567416, "learning_rate": 1.4234957706685647e-06, "loss": 0.3458, "step": 24883 }, { "epoch": 2.33521021021021, "grad_norm": 0.980187390861993, "learning_rate": 1.4231142624593153e-06, "loss": 0.3031, "step": 24884 }, { "epoch": 2.335304054054054, "grad_norm": 1.0657785783459306, "learning_rate": 1.4227327968976956e-06, "loss": 0.3167, "step": 24885 }, { "epoch": 2.3353978978978978, "grad_norm": 1.078789066912795, "learning_rate": 1.4223513739882516e-06, "loss": 0.3278, "step": 24886 }, { "epoch": 2.3354917417417416, "grad_norm": 1.020812554685391, "learning_rate": 1.4219699937355352e-06, "loss": 0.2758, "step": 24887 }, { "epoch": 2.3355855855855854, "grad_norm": 1.03856645828811, "learning_rate": 1.4215886561440917e-06, "loss": 0.3124, "step": 24888 }, { "epoch": 2.3356794294294296, "grad_norm": 1.0418062547331963, "learning_rate": 1.4212073612184663e-06, "loss": 0.3201, "step": 24889 }, { "epoch": 2.3357732732732734, "grad_norm": 1.1711818767026232, "learning_rate": 1.420826108963208e-06, "loss": 0.3434, "step": 24890 }, { "epoch": 2.3358671171171173, "grad_norm": 1.4239787488996114, "learning_rate": 1.4204448993828611e-06, "loss": 0.3804, "step": 24891 }, { "epoch": 2.335960960960961, "grad_norm": 0.9345586849752029, "learning_rate": 1.420063732481971e-06, "loss": 0.2901, "step": 24892 }, { "epoch": 2.336054804804805, "grad_norm": 1.023794793718125, "learning_rate": 1.4196826082650827e-06, "loss": 0.2806, "step": 24893 }, { "epoch": 2.3361486486486487, "grad_norm": 1.0747370634825888, "learning_rate": 1.4193015267367399e-06, "loss": 0.335, "step": 24894 }, { "epoch": 2.3362424924924925, "grad_norm": 1.141945917065037, "learning_rate": 1.418920487901485e-06, "loss": 0.3229, "step": 24895 }, { "epoch": 2.3363363363363363, "grad_norm": 1.1578627894437497, "learning_rate": 1.418539491763864e-06, "loss": 0.3298, "step": 24896 }, { "epoch": 2.33643018018018, "grad_norm": 1.0933280833424872, "learning_rate": 1.4181585383284186e-06, "loss": 0.3197, "step": 24897 }, { "epoch": 2.336524024024024, "grad_norm": 1.1342551044134532, "learning_rate": 1.4177776275996886e-06, "loss": 0.331, "step": 24898 }, { "epoch": 2.3366178678678677, "grad_norm": 1.125528839300135, "learning_rate": 1.4173967595822187e-06, "loss": 0.3351, "step": 24899 }, { "epoch": 2.3367117117117115, "grad_norm": 1.1659611161792396, "learning_rate": 1.4170159342805489e-06, "loss": 0.2639, "step": 24900 }, { "epoch": 2.3368055555555554, "grad_norm": 0.9026777491174058, "learning_rate": 1.4166351516992188e-06, "loss": 0.3103, "step": 24901 }, { "epoch": 2.3368993993993996, "grad_norm": 1.1021952227090586, "learning_rate": 1.4162544118427713e-06, "loss": 0.2541, "step": 24902 }, { "epoch": 2.3369932432432434, "grad_norm": 1.1330201918381955, "learning_rate": 1.4158737147157437e-06, "loss": 0.3224, "step": 24903 }, { "epoch": 2.3370870870870872, "grad_norm": 1.6440878092286464, "learning_rate": 1.415493060322674e-06, "loss": 0.2729, "step": 24904 }, { "epoch": 2.337180930930931, "grad_norm": 1.0919337935535876, "learning_rate": 1.4151124486681039e-06, "loss": 0.2791, "step": 24905 }, { "epoch": 2.337274774774775, "grad_norm": 30.191592630877768, "learning_rate": 1.4147318797565706e-06, "loss": 0.2929, "step": 24906 }, { "epoch": 2.3373686186186187, "grad_norm": 1.1624611633978057, "learning_rate": 1.4143513535926107e-06, "loss": 0.3317, "step": 24907 }, { "epoch": 2.3374624624624625, "grad_norm": 2.748852515506832, "learning_rate": 1.4139708701807614e-06, "loss": 0.3335, "step": 24908 }, { "epoch": 2.3375563063063063, "grad_norm": 1.186800119499949, "learning_rate": 1.4135904295255598e-06, "loss": 0.2723, "step": 24909 }, { "epoch": 2.33765015015015, "grad_norm": 1.2802444661883585, "learning_rate": 1.41321003163154e-06, "loss": 0.2882, "step": 24910 }, { "epoch": 2.337743993993994, "grad_norm": 1.2391210534502104, "learning_rate": 1.4128296765032406e-06, "loss": 0.3669, "step": 24911 }, { "epoch": 2.3378378378378377, "grad_norm": 1.2032355325366888, "learning_rate": 1.4124493641451947e-06, "loss": 0.3063, "step": 24912 }, { "epoch": 2.3379316816816815, "grad_norm": 1.0145777423996767, "learning_rate": 1.4120690945619364e-06, "loss": 0.3415, "step": 24913 }, { "epoch": 2.3380255255255253, "grad_norm": 1.2105612800346537, "learning_rate": 1.411688867758002e-06, "loss": 0.3025, "step": 24914 }, { "epoch": 2.3381193693693696, "grad_norm": 1.0669349463734896, "learning_rate": 1.4113086837379237e-06, "loss": 0.2801, "step": 24915 }, { "epoch": 2.3382132132132134, "grad_norm": 1.1873530680619853, "learning_rate": 1.4109285425062325e-06, "loss": 0.3754, "step": 24916 }, { "epoch": 2.338307057057057, "grad_norm": 1.1421185146993011, "learning_rate": 1.410548444067464e-06, "loss": 0.2951, "step": 24917 }, { "epoch": 2.338400900900901, "grad_norm": 1.4069708500311717, "learning_rate": 1.4101683884261492e-06, "loss": 0.3096, "step": 24918 }, { "epoch": 2.338494744744745, "grad_norm": 1.2736701552698824, "learning_rate": 1.409788375586819e-06, "loss": 0.3005, "step": 24919 }, { "epoch": 2.3385885885885886, "grad_norm": 1.7612497473826094, "learning_rate": 1.4094084055540048e-06, "loss": 0.3108, "step": 24920 }, { "epoch": 2.3386824324324325, "grad_norm": 1.205329786923798, "learning_rate": 1.4090284783322368e-06, "loss": 0.3018, "step": 24921 }, { "epoch": 2.3387762762762763, "grad_norm": 1.0450502842758371, "learning_rate": 1.408648593926043e-06, "loss": 0.2769, "step": 24922 }, { "epoch": 2.33887012012012, "grad_norm": 1.1426516816091534, "learning_rate": 1.4082687523399563e-06, "loss": 0.3147, "step": 24923 }, { "epoch": 2.338963963963964, "grad_norm": 0.9416336044788004, "learning_rate": 1.4078889535785046e-06, "loss": 0.2707, "step": 24924 }, { "epoch": 2.3390578078078077, "grad_norm": 1.0794033363377251, "learning_rate": 1.4075091976462136e-06, "loss": 0.3169, "step": 24925 }, { "epoch": 2.3391516516516515, "grad_norm": 1.4535570570853213, "learning_rate": 1.4071294845476147e-06, "loss": 0.2987, "step": 24926 }, { "epoch": 2.3392454954954953, "grad_norm": 1.088413887914017, "learning_rate": 1.4067498142872338e-06, "loss": 0.345, "step": 24927 }, { "epoch": 2.3393393393393396, "grad_norm": 1.1503953419713158, "learning_rate": 1.406370186869596e-06, "loss": 0.2861, "step": 24928 }, { "epoch": 2.339433183183183, "grad_norm": 1.611522075504417, "learning_rate": 1.4059906022992315e-06, "loss": 0.3369, "step": 24929 }, { "epoch": 2.339527027027027, "grad_norm": 1.1723565529189424, "learning_rate": 1.4056110605806638e-06, "loss": 0.2915, "step": 24930 }, { "epoch": 2.339620870870871, "grad_norm": 1.590088848181146, "learning_rate": 1.4052315617184188e-06, "loss": 0.3114, "step": 24931 }, { "epoch": 2.339714714714715, "grad_norm": 1.1519855943434698, "learning_rate": 1.4048521057170205e-06, "loss": 0.3128, "step": 24932 }, { "epoch": 2.3398085585585586, "grad_norm": 1.3034889667060034, "learning_rate": 1.4044726925809942e-06, "loss": 0.3208, "step": 24933 }, { "epoch": 2.3399024024024024, "grad_norm": 1.102822371272774, "learning_rate": 1.4040933223148612e-06, "loss": 0.3193, "step": 24934 }, { "epoch": 2.3399962462462462, "grad_norm": 1.092335932982164, "learning_rate": 1.4037139949231481e-06, "loss": 0.2852, "step": 24935 }, { "epoch": 2.34009009009009, "grad_norm": 1.044916483965449, "learning_rate": 1.4033347104103768e-06, "loss": 0.2756, "step": 24936 }, { "epoch": 2.340183933933934, "grad_norm": 1.1493580047252316, "learning_rate": 1.402955468781067e-06, "loss": 0.3037, "step": 24937 }, { "epoch": 2.3402777777777777, "grad_norm": 1.0545113423462742, "learning_rate": 1.4025762700397443e-06, "loss": 0.3195, "step": 24938 }, { "epoch": 2.3403716216216215, "grad_norm": 1.1564638011687665, "learning_rate": 1.4021971141909284e-06, "loss": 0.3431, "step": 24939 }, { "epoch": 2.3404654654654653, "grad_norm": 1.1493073098515203, "learning_rate": 1.401818001239138e-06, "loss": 0.3398, "step": 24940 }, { "epoch": 2.3405593093093096, "grad_norm": 1.339013928446801, "learning_rate": 1.4014389311888965e-06, "loss": 0.3041, "step": 24941 }, { "epoch": 2.340653153153153, "grad_norm": 1.0586771306104517, "learning_rate": 1.4010599040447221e-06, "loss": 0.3169, "step": 24942 }, { "epoch": 2.340746996996997, "grad_norm": 5.064990543046987, "learning_rate": 1.4006809198111343e-06, "loss": 0.3451, "step": 24943 }, { "epoch": 2.340840840840841, "grad_norm": 1.002597869399624, "learning_rate": 1.4003019784926514e-06, "loss": 0.3049, "step": 24944 }, { "epoch": 2.340934684684685, "grad_norm": 1.0204819345822675, "learning_rate": 1.3999230800937913e-06, "loss": 0.3411, "step": 24945 }, { "epoch": 2.3410285285285286, "grad_norm": 2.3718497922692694, "learning_rate": 1.399544224619071e-06, "loss": 0.297, "step": 24946 }, { "epoch": 2.3411223723723724, "grad_norm": 1.1063751066589866, "learning_rate": 1.3991654120730098e-06, "loss": 0.3293, "step": 24947 }, { "epoch": 2.3412162162162162, "grad_norm": 1.3484301582865064, "learning_rate": 1.3987866424601232e-06, "loss": 0.2996, "step": 24948 }, { "epoch": 2.34131006006006, "grad_norm": 1.0025069839714704, "learning_rate": 1.3984079157849262e-06, "loss": 0.3112, "step": 24949 }, { "epoch": 2.341403903903904, "grad_norm": 1.2365182919202553, "learning_rate": 1.3980292320519368e-06, "loss": 0.2923, "step": 24950 }, { "epoch": 2.3414977477477477, "grad_norm": 1.2065989172246643, "learning_rate": 1.3976505912656685e-06, "loss": 0.2912, "step": 24951 }, { "epoch": 2.3415915915915915, "grad_norm": 1.1595785094783537, "learning_rate": 1.397271993430635e-06, "loss": 0.3192, "step": 24952 }, { "epoch": 2.3416854354354353, "grad_norm": 1.5606918231681384, "learning_rate": 1.3968934385513533e-06, "loss": 0.3284, "step": 24953 }, { "epoch": 2.341779279279279, "grad_norm": 1.1219413483889322, "learning_rate": 1.3965149266323347e-06, "loss": 0.3234, "step": 24954 }, { "epoch": 2.341873123123123, "grad_norm": 1.1875987266559387, "learning_rate": 1.3961364576780927e-06, "loss": 0.3241, "step": 24955 }, { "epoch": 2.341966966966967, "grad_norm": 1.4353846114282485, "learning_rate": 1.3957580316931401e-06, "loss": 0.3292, "step": 24956 }, { "epoch": 2.342060810810811, "grad_norm": 1.072070185301865, "learning_rate": 1.3953796486819887e-06, "loss": 0.3276, "step": 24957 }, { "epoch": 2.3421546546546548, "grad_norm": 2.492348240719766, "learning_rate": 1.3950013086491488e-06, "loss": 0.315, "step": 24958 }, { "epoch": 2.3422484984984986, "grad_norm": 1.0834593408577176, "learning_rate": 1.3946230115991332e-06, "loss": 0.2976, "step": 24959 }, { "epoch": 2.3423423423423424, "grad_norm": 1.1766559560053014, "learning_rate": 1.3942447575364526e-06, "loss": 0.3307, "step": 24960 }, { "epoch": 2.342436186186186, "grad_norm": 1.123975550642626, "learning_rate": 1.3938665464656143e-06, "loss": 0.3305, "step": 24961 }, { "epoch": 2.34253003003003, "grad_norm": 1.0842050348768117, "learning_rate": 1.3934883783911313e-06, "loss": 0.2981, "step": 24962 }, { "epoch": 2.342623873873874, "grad_norm": 1.1517135476353364, "learning_rate": 1.3931102533175106e-06, "loss": 0.3148, "step": 24963 }, { "epoch": 2.3427177177177176, "grad_norm": 1.1026613701728005, "learning_rate": 1.3927321712492593e-06, "loss": 0.2898, "step": 24964 }, { "epoch": 2.3428115615615615, "grad_norm": 1.1624303160058524, "learning_rate": 1.3923541321908885e-06, "loss": 0.3319, "step": 24965 }, { "epoch": 2.3429054054054053, "grad_norm": 1.0956757427244093, "learning_rate": 1.3919761361469037e-06, "loss": 0.2883, "step": 24966 }, { "epoch": 2.342999249249249, "grad_norm": 1.1531419243596122, "learning_rate": 1.3915981831218123e-06, "loss": 0.3382, "step": 24967 }, { "epoch": 2.343093093093093, "grad_norm": 0.9587665342421496, "learning_rate": 1.3912202731201208e-06, "loss": 0.3075, "step": 24968 }, { "epoch": 2.343186936936937, "grad_norm": 1.11449025569655, "learning_rate": 1.3908424061463344e-06, "loss": 0.3011, "step": 24969 }, { "epoch": 2.343280780780781, "grad_norm": 1.0882712121548201, "learning_rate": 1.3904645822049573e-06, "loss": 0.3128, "step": 24970 }, { "epoch": 2.3433746246246248, "grad_norm": 1.163426351870693, "learning_rate": 1.390086801300497e-06, "loss": 0.3217, "step": 24971 }, { "epoch": 2.3434684684684686, "grad_norm": 1.0045555585268116, "learning_rate": 1.3897090634374567e-06, "loss": 0.3034, "step": 24972 }, { "epoch": 2.3435623123123124, "grad_norm": 1.0310876182561277, "learning_rate": 1.389331368620339e-06, "loss": 0.3174, "step": 24973 }, { "epoch": 2.343656156156156, "grad_norm": 1.1180850737827215, "learning_rate": 1.38895371685365e-06, "loss": 0.3637, "step": 24974 }, { "epoch": 2.34375, "grad_norm": 1.1292374916966386, "learning_rate": 1.3885761081418902e-06, "loss": 0.3502, "step": 24975 }, { "epoch": 2.343843843843844, "grad_norm": 1.1492747307526239, "learning_rate": 1.388198542489561e-06, "loss": 0.3352, "step": 24976 }, { "epoch": 2.3439376876876876, "grad_norm": 0.9434829420699143, "learning_rate": 1.3878210199011677e-06, "loss": 0.3022, "step": 24977 }, { "epoch": 2.3440315315315314, "grad_norm": 2.0447260504826223, "learning_rate": 1.3874435403812093e-06, "loss": 0.2959, "step": 24978 }, { "epoch": 2.3441253753753752, "grad_norm": 0.9716087030659714, "learning_rate": 1.3870661039341865e-06, "loss": 0.3016, "step": 24979 }, { "epoch": 2.344219219219219, "grad_norm": 1.2781158072736503, "learning_rate": 1.3866887105646003e-06, "loss": 0.296, "step": 24980 }, { "epoch": 2.344313063063063, "grad_norm": 1.1710897452789466, "learning_rate": 1.3863113602769496e-06, "loss": 0.3256, "step": 24981 }, { "epoch": 2.344406906906907, "grad_norm": 1.4183258572066484, "learning_rate": 1.385934053075732e-06, "loss": 0.3039, "step": 24982 }, { "epoch": 2.344500750750751, "grad_norm": 1.1547877465932441, "learning_rate": 1.3855567889654497e-06, "loss": 0.3432, "step": 24983 }, { "epoch": 2.3445945945945947, "grad_norm": 1.2463174180072025, "learning_rate": 1.3851795679505996e-06, "loss": 0.3301, "step": 24984 }, { "epoch": 2.3446884384384385, "grad_norm": 1.0084215120002327, "learning_rate": 1.384802390035677e-06, "loss": 0.3123, "step": 24985 }, { "epoch": 2.3447822822822824, "grad_norm": 1.201001375421913, "learning_rate": 1.384425255225183e-06, "loss": 0.3269, "step": 24986 }, { "epoch": 2.344876126126126, "grad_norm": 1.1050343653933623, "learning_rate": 1.384048163523612e-06, "loss": 0.3039, "step": 24987 }, { "epoch": 2.34496996996997, "grad_norm": 1.0062111840185977, "learning_rate": 1.3836711149354588e-06, "loss": 0.2863, "step": 24988 }, { "epoch": 2.345063813813814, "grad_norm": 1.0098656102115693, "learning_rate": 1.383294109465222e-06, "loss": 0.3251, "step": 24989 }, { "epoch": 2.3451576576576576, "grad_norm": 0.9973508057079622, "learning_rate": 1.3829171471173947e-06, "loss": 0.3058, "step": 24990 }, { "epoch": 2.3452515015015014, "grad_norm": 1.1541569022739804, "learning_rate": 1.3825402278964722e-06, "loss": 0.3008, "step": 24991 }, { "epoch": 2.3453453453453452, "grad_norm": 1.1387750226647961, "learning_rate": 1.3821633518069488e-06, "loss": 0.2999, "step": 24992 }, { "epoch": 2.345439189189189, "grad_norm": 1.7139037827404378, "learning_rate": 1.3817865188533176e-06, "loss": 0.3183, "step": 24993 }, { "epoch": 2.345533033033033, "grad_norm": 1.053758455716207, "learning_rate": 1.3814097290400692e-06, "loss": 0.3205, "step": 24994 }, { "epoch": 2.345626876876877, "grad_norm": 1.0371368384138746, "learning_rate": 1.3810329823717005e-06, "loss": 0.3075, "step": 24995 }, { "epoch": 2.345720720720721, "grad_norm": 1.1163430635059184, "learning_rate": 1.3806562788527017e-06, "loss": 0.2969, "step": 24996 }, { "epoch": 2.3458145645645647, "grad_norm": 4.193321874834361, "learning_rate": 1.3802796184875622e-06, "loss": 0.3201, "step": 24997 }, { "epoch": 2.3459084084084085, "grad_norm": 1.898039500463423, "learning_rate": 1.379903001280777e-06, "loss": 0.3004, "step": 24998 }, { "epoch": 2.3460022522522523, "grad_norm": 1.1546967918244655, "learning_rate": 1.3795264272368335e-06, "loss": 0.3524, "step": 24999 }, { "epoch": 2.346096096096096, "grad_norm": 1.202456473120869, "learning_rate": 1.3791498963602213e-06, "loss": 0.3345, "step": 25000 }, { "epoch": 2.34618993993994, "grad_norm": 1.4571678173480396, "learning_rate": 1.3787734086554327e-06, "loss": 0.316, "step": 25001 }, { "epoch": 2.3462837837837838, "grad_norm": 1.1973032613688737, "learning_rate": 1.3783969641269552e-06, "loss": 0.3349, "step": 25002 }, { "epoch": 2.3463776276276276, "grad_norm": 1.3964500793039363, "learning_rate": 1.3780205627792765e-06, "loss": 0.3238, "step": 25003 }, { "epoch": 2.3464714714714714, "grad_norm": 1.0867987314671672, "learning_rate": 1.3776442046168848e-06, "loss": 0.3297, "step": 25004 }, { "epoch": 2.346565315315315, "grad_norm": 0.9920183266061184, "learning_rate": 1.3772678896442677e-06, "loss": 0.3247, "step": 25005 }, { "epoch": 2.346659159159159, "grad_norm": 1.532258347611686, "learning_rate": 1.3768916178659102e-06, "loss": 0.2945, "step": 25006 }, { "epoch": 2.346753003003003, "grad_norm": 1.3345738992262448, "learning_rate": 1.3765153892863025e-06, "loss": 0.3759, "step": 25007 }, { "epoch": 2.346846846846847, "grad_norm": 1.1540025962034366, "learning_rate": 1.376139203909928e-06, "loss": 0.3132, "step": 25008 }, { "epoch": 2.3469406906906904, "grad_norm": 1.2409088373702342, "learning_rate": 1.375763061741271e-06, "loss": 0.3377, "step": 25009 }, { "epoch": 2.3470345345345347, "grad_norm": 1.093647499078835, "learning_rate": 1.3753869627848183e-06, "loss": 0.2482, "step": 25010 }, { "epoch": 2.3471283783783785, "grad_norm": 1.0778070169804996, "learning_rate": 1.375010907045054e-06, "loss": 0.2886, "step": 25011 }, { "epoch": 2.3472222222222223, "grad_norm": 1.4381429056325605, "learning_rate": 1.3746348945264599e-06, "loss": 0.3335, "step": 25012 }, { "epoch": 2.347316066066066, "grad_norm": 1.2078780138458949, "learning_rate": 1.374258925233522e-06, "loss": 0.3318, "step": 25013 }, { "epoch": 2.34740990990991, "grad_norm": 1.1429391801972422, "learning_rate": 1.3738829991707213e-06, "loss": 0.2921, "step": 25014 }, { "epoch": 2.3475037537537538, "grad_norm": 1.0843740960326176, "learning_rate": 1.3735071163425406e-06, "loss": 0.3241, "step": 25015 }, { "epoch": 2.3475975975975976, "grad_norm": 1.0434959433051632, "learning_rate": 1.3731312767534616e-06, "loss": 0.2968, "step": 25016 }, { "epoch": 2.3476914414414414, "grad_norm": 1.9651055099007504, "learning_rate": 1.372755480407965e-06, "loss": 0.3518, "step": 25017 }, { "epoch": 2.347785285285285, "grad_norm": 1.192185067341077, "learning_rate": 1.3723797273105306e-06, "loss": 0.2957, "step": 25018 }, { "epoch": 2.347879129129129, "grad_norm": 1.1475164059064524, "learning_rate": 1.3720040174656407e-06, "loss": 0.3354, "step": 25019 }, { "epoch": 2.347972972972973, "grad_norm": 1.2012788050331773, "learning_rate": 1.371628350877774e-06, "loss": 0.3538, "step": 25020 }, { "epoch": 2.348066816816817, "grad_norm": 1.1711610229310314, "learning_rate": 1.3712527275514081e-06, "loss": 0.3315, "step": 25021 }, { "epoch": 2.3481606606606604, "grad_norm": 3.5375050823788925, "learning_rate": 1.3708771474910242e-06, "loss": 0.2784, "step": 25022 }, { "epoch": 2.3482545045045047, "grad_norm": 1.0897979001689007, "learning_rate": 1.3705016107011e-06, "loss": 0.3302, "step": 25023 }, { "epoch": 2.3483483483483485, "grad_norm": 0.9278147008412008, "learning_rate": 1.3701261171861103e-06, "loss": 0.2911, "step": 25024 }, { "epoch": 2.3484421921921923, "grad_norm": 1.268562795089125, "learning_rate": 1.369750666950535e-06, "loss": 0.3114, "step": 25025 }, { "epoch": 2.348536036036036, "grad_norm": 1.081907939876599, "learning_rate": 1.3693752599988508e-06, "loss": 0.3412, "step": 25026 }, { "epoch": 2.34862987987988, "grad_norm": 1.0898443699708773, "learning_rate": 1.3689998963355305e-06, "loss": 0.296, "step": 25027 }, { "epoch": 2.3487237237237237, "grad_norm": 1.193320686136849, "learning_rate": 1.3686245759650534e-06, "loss": 0.3333, "step": 25028 }, { "epoch": 2.3488175675675675, "grad_norm": 0.9685412401842712, "learning_rate": 1.3682492988918926e-06, "loss": 0.3128, "step": 25029 }, { "epoch": 2.3489114114114114, "grad_norm": 1.152254048108106, "learning_rate": 1.3678740651205236e-06, "loss": 0.2598, "step": 25030 }, { "epoch": 2.349005255255255, "grad_norm": 1.110175321885724, "learning_rate": 1.367498874655419e-06, "loss": 0.2966, "step": 25031 }, { "epoch": 2.349099099099099, "grad_norm": 1.0817553398116881, "learning_rate": 1.367123727501053e-06, "loss": 0.2873, "step": 25032 }, { "epoch": 2.349192942942943, "grad_norm": 1.0919987046954018, "learning_rate": 1.366748623661897e-06, "loss": 0.2949, "step": 25033 }, { "epoch": 2.3492867867867866, "grad_norm": 1.018373707432685, "learning_rate": 1.3663735631424263e-06, "loss": 0.347, "step": 25034 }, { "epoch": 2.3493806306306304, "grad_norm": 1.765248080002518, "learning_rate": 1.365998545947111e-06, "loss": 0.3392, "step": 25035 }, { "epoch": 2.3494744744744747, "grad_norm": 1.826068325435008, "learning_rate": 1.3656235720804212e-06, "loss": 0.2783, "step": 25036 }, { "epoch": 2.3495683183183185, "grad_norm": 1.3683536037596264, "learning_rate": 1.3652486415468307e-06, "loss": 0.3309, "step": 25037 }, { "epoch": 2.3496621621621623, "grad_norm": 1.0422412478689294, "learning_rate": 1.364873754350809e-06, "loss": 0.2896, "step": 25038 }, { "epoch": 2.349756006006006, "grad_norm": 1.6621999587696212, "learning_rate": 1.3644989104968232e-06, "loss": 0.3405, "step": 25039 }, { "epoch": 2.34984984984985, "grad_norm": 1.4841140749588961, "learning_rate": 1.364124109989347e-06, "loss": 0.3202, "step": 25040 }, { "epoch": 2.3499436936936937, "grad_norm": 1.0931715029790143, "learning_rate": 1.3637493528328467e-06, "loss": 0.2873, "step": 25041 }, { "epoch": 2.3500375375375375, "grad_norm": 1.139936736467653, "learning_rate": 1.3633746390317904e-06, "loss": 0.2953, "step": 25042 }, { "epoch": 2.3501313813813813, "grad_norm": 1.0142377599618835, "learning_rate": 1.3629999685906465e-06, "loss": 0.3229, "step": 25043 }, { "epoch": 2.350225225225225, "grad_norm": 1.189238634669638, "learning_rate": 1.3626253415138824e-06, "loss": 0.276, "step": 25044 }, { "epoch": 2.350319069069069, "grad_norm": 1.1347539490681708, "learning_rate": 1.3622507578059623e-06, "loss": 0.3423, "step": 25045 }, { "epoch": 2.3504129129129128, "grad_norm": 1.3297906079211832, "learning_rate": 1.3618762174713563e-06, "loss": 0.3493, "step": 25046 }, { "epoch": 2.3505067567567566, "grad_norm": 1.4617916843741054, "learning_rate": 1.3615017205145286e-06, "loss": 0.3317, "step": 25047 }, { "epoch": 2.3506006006006004, "grad_norm": 1.1158014727737287, "learning_rate": 1.3611272669399423e-06, "loss": 0.2972, "step": 25048 }, { "epoch": 2.3506944444444446, "grad_norm": 1.0251577829764447, "learning_rate": 1.3607528567520656e-06, "loss": 0.3053, "step": 25049 }, { "epoch": 2.3507882882882885, "grad_norm": 1.136665255894558, "learning_rate": 1.3603784899553608e-06, "loss": 0.3137, "step": 25050 }, { "epoch": 2.3508821321321323, "grad_norm": 1.1816243607059365, "learning_rate": 1.36000416655429e-06, "loss": 0.3124, "step": 25051 }, { "epoch": 2.350975975975976, "grad_norm": 0.9864739108395423, "learning_rate": 1.3596298865533197e-06, "loss": 0.3216, "step": 25052 }, { "epoch": 2.35106981981982, "grad_norm": 0.9371893570583104, "learning_rate": 1.3592556499569105e-06, "loss": 0.2774, "step": 25053 }, { "epoch": 2.3511636636636637, "grad_norm": 1.2110792595484454, "learning_rate": 1.3588814567695247e-06, "loss": 0.2556, "step": 25054 }, { "epoch": 2.3512575075075075, "grad_norm": 2.0945463164702027, "learning_rate": 1.358507306995624e-06, "loss": 0.3282, "step": 25055 }, { "epoch": 2.3513513513513513, "grad_norm": 1.2624114543957967, "learning_rate": 1.3581332006396692e-06, "loss": 0.3378, "step": 25056 }, { "epoch": 2.351445195195195, "grad_norm": 1.0271002321688738, "learning_rate": 1.3577591377061195e-06, "loss": 0.3149, "step": 25057 }, { "epoch": 2.351539039039039, "grad_norm": 1.5389377580898185, "learning_rate": 1.3573851181994374e-06, "loss": 0.3111, "step": 25058 }, { "epoch": 2.3516328828828827, "grad_norm": 0.9703849161636475, "learning_rate": 1.3570111421240817e-06, "loss": 0.3248, "step": 25059 }, { "epoch": 2.3517267267267266, "grad_norm": 1.124197880041425, "learning_rate": 1.3566372094845088e-06, "loss": 0.346, "step": 25060 }, { "epoch": 2.3518205705705704, "grad_norm": 1.079993785719837, "learning_rate": 1.3562633202851811e-06, "loss": 0.3343, "step": 25061 }, { "epoch": 2.3519144144144146, "grad_norm": 0.9703257784121226, "learning_rate": 1.3558894745305545e-06, "loss": 0.2953, "step": 25062 }, { "epoch": 2.3520082582582584, "grad_norm": 1.161668656207626, "learning_rate": 1.3555156722250855e-06, "loss": 0.3383, "step": 25063 }, { "epoch": 2.3521021021021022, "grad_norm": 1.0555678774811366, "learning_rate": 1.3551419133732335e-06, "loss": 0.3205, "step": 25064 }, { "epoch": 2.352195945945946, "grad_norm": 1.5920720437768445, "learning_rate": 1.354768197979453e-06, "loss": 0.3181, "step": 25065 }, { "epoch": 2.35228978978979, "grad_norm": 1.6248922308766205, "learning_rate": 1.3543945260482006e-06, "loss": 0.3138, "step": 25066 }, { "epoch": 2.3523836336336337, "grad_norm": 1.1056876462261442, "learning_rate": 1.3540208975839314e-06, "loss": 0.3325, "step": 25067 }, { "epoch": 2.3524774774774775, "grad_norm": 1.0536557712904773, "learning_rate": 1.3536473125910998e-06, "loss": 0.316, "step": 25068 }, { "epoch": 2.3525713213213213, "grad_norm": 1.233327062147596, "learning_rate": 1.3532737710741595e-06, "loss": 0.3104, "step": 25069 }, { "epoch": 2.352665165165165, "grad_norm": 1.2207543551050843, "learning_rate": 1.3529002730375662e-06, "loss": 0.2317, "step": 25070 }, { "epoch": 2.352759009009009, "grad_norm": 1.0594966661723009, "learning_rate": 1.3525268184857726e-06, "loss": 0.3232, "step": 25071 }, { "epoch": 2.3528528528528527, "grad_norm": 1.305849050758598, "learning_rate": 1.352153407423229e-06, "loss": 0.3235, "step": 25072 }, { "epoch": 2.3529466966966965, "grad_norm": 1.1251775202483216, "learning_rate": 1.3517800398543918e-06, "loss": 0.3455, "step": 25073 }, { "epoch": 2.3530405405405403, "grad_norm": 1.3964674767612117, "learning_rate": 1.3514067157837102e-06, "loss": 0.3197, "step": 25074 }, { "epoch": 2.3531343843843846, "grad_norm": 1.1793420679426259, "learning_rate": 1.351033435215634e-06, "loss": 0.2801, "step": 25075 }, { "epoch": 2.3532282282282284, "grad_norm": 1.5792766961386566, "learning_rate": 1.3506601981546175e-06, "loss": 0.34, "step": 25076 }, { "epoch": 2.3533220720720722, "grad_norm": 0.9729280896605165, "learning_rate": 1.3502870046051086e-06, "loss": 0.2956, "step": 25077 }, { "epoch": 2.353415915915916, "grad_norm": 1.0370434368419452, "learning_rate": 1.349913854571558e-06, "loss": 0.3294, "step": 25078 }, { "epoch": 2.35350975975976, "grad_norm": 1.1193618555945073, "learning_rate": 1.3495407480584133e-06, "loss": 0.3512, "step": 25079 }, { "epoch": 2.3536036036036037, "grad_norm": 1.8691414087617264, "learning_rate": 1.3491676850701247e-06, "loss": 0.3142, "step": 25080 }, { "epoch": 2.3536974474474475, "grad_norm": 0.9653515652309277, "learning_rate": 1.3487946656111372e-06, "loss": 0.352, "step": 25081 }, { "epoch": 2.3537912912912913, "grad_norm": 1.064957698865392, "learning_rate": 1.3484216896859026e-06, "loss": 0.3349, "step": 25082 }, { "epoch": 2.353885135135135, "grad_norm": 1.1486128975177758, "learning_rate": 1.3480487572988654e-06, "loss": 0.294, "step": 25083 }, { "epoch": 2.353978978978979, "grad_norm": 1.004361646955367, "learning_rate": 1.3476758684544717e-06, "loss": 0.3176, "step": 25084 }, { "epoch": 2.3540728228228227, "grad_norm": 1.3844165233182506, "learning_rate": 1.3473030231571698e-06, "loss": 0.322, "step": 25085 }, { "epoch": 2.3541666666666665, "grad_norm": 1.1553929176204836, "learning_rate": 1.346930221411404e-06, "loss": 0.3262, "step": 25086 }, { "epoch": 2.3542605105105103, "grad_norm": 1.2311510369723355, "learning_rate": 1.3465574632216178e-06, "loss": 0.3106, "step": 25087 }, { "epoch": 2.3543543543543546, "grad_norm": 1.0401131668207844, "learning_rate": 1.346184748592258e-06, "loss": 0.3309, "step": 25088 }, { "epoch": 2.354448198198198, "grad_norm": 1.0273372791967657, "learning_rate": 1.3458120775277678e-06, "loss": 0.3375, "step": 25089 }, { "epoch": 2.354542042042042, "grad_norm": 1.1245016673347275, "learning_rate": 1.34543945003259e-06, "loss": 0.2903, "step": 25090 }, { "epoch": 2.354635885885886, "grad_norm": 1.3397587489022706, "learning_rate": 1.3450668661111677e-06, "loss": 0.2986, "step": 25091 }, { "epoch": 2.35472972972973, "grad_norm": 1.1934594782379622, "learning_rate": 1.3446943257679434e-06, "loss": 0.3124, "step": 25092 }, { "epoch": 2.3548235735735736, "grad_norm": 1.082101957132337, "learning_rate": 1.3443218290073578e-06, "loss": 0.3037, "step": 25093 }, { "epoch": 2.3549174174174174, "grad_norm": 1.18565198235113, "learning_rate": 1.3439493758338546e-06, "loss": 0.3625, "step": 25094 }, { "epoch": 2.3550112612612613, "grad_norm": 1.1282632224530291, "learning_rate": 1.3435769662518727e-06, "loss": 0.3514, "step": 25095 }, { "epoch": 2.355105105105105, "grad_norm": 1.0462070887635595, "learning_rate": 1.3432046002658522e-06, "loss": 0.296, "step": 25096 }, { "epoch": 2.355198948948949, "grad_norm": 0.9622273208731681, "learning_rate": 1.3428322778802345e-06, "loss": 0.3419, "step": 25097 }, { "epoch": 2.3552927927927927, "grad_norm": 1.0982028421709742, "learning_rate": 1.3424599990994585e-06, "loss": 0.2582, "step": 25098 }, { "epoch": 2.3553866366366365, "grad_norm": 1.1674765245821017, "learning_rate": 1.3420877639279606e-06, "loss": 0.3215, "step": 25099 }, { "epoch": 2.3554804804804803, "grad_norm": 1.1904820992658827, "learning_rate": 1.3417155723701824e-06, "loss": 0.332, "step": 25100 }, { "epoch": 2.3555743243243246, "grad_norm": 1.0580873753415931, "learning_rate": 1.3413434244305601e-06, "loss": 0.3065, "step": 25101 }, { "epoch": 2.355668168168168, "grad_norm": 1.0176646729704126, "learning_rate": 1.3409713201135305e-06, "loss": 0.3499, "step": 25102 }, { "epoch": 2.355762012012012, "grad_norm": 1.236943657776592, "learning_rate": 1.3405992594235305e-06, "loss": 0.2888, "step": 25103 }, { "epoch": 2.355855855855856, "grad_norm": 1.103684700375983, "learning_rate": 1.3402272423649965e-06, "loss": 0.3918, "step": 25104 }, { "epoch": 2.3559496996997, "grad_norm": 1.0864226553328309, "learning_rate": 1.339855268942362e-06, "loss": 0.3236, "step": 25105 }, { "epoch": 2.3560435435435436, "grad_norm": 2.1120704120290714, "learning_rate": 1.339483339160066e-06, "loss": 0.3188, "step": 25106 }, { "epoch": 2.3561373873873874, "grad_norm": 1.1254839297734758, "learning_rate": 1.3391114530225401e-06, "loss": 0.3354, "step": 25107 }, { "epoch": 2.3562312312312312, "grad_norm": 1.6430599500124345, "learning_rate": 1.3387396105342183e-06, "loss": 0.3401, "step": 25108 }, { "epoch": 2.356325075075075, "grad_norm": 1.0735406948902964, "learning_rate": 1.3383678116995363e-06, "loss": 0.2554, "step": 25109 }, { "epoch": 2.356418918918919, "grad_norm": 1.1263605811502888, "learning_rate": 1.337996056522926e-06, "loss": 0.3272, "step": 25110 }, { "epoch": 2.3565127627627627, "grad_norm": 1.0762352065699017, "learning_rate": 1.3376243450088183e-06, "loss": 0.3275, "step": 25111 }, { "epoch": 2.3566066066066065, "grad_norm": 1.0979920763157631, "learning_rate": 1.3372526771616478e-06, "loss": 0.2922, "step": 25112 }, { "epoch": 2.3567004504504503, "grad_norm": 1.0188448080221342, "learning_rate": 1.3368810529858444e-06, "loss": 0.3417, "step": 25113 }, { "epoch": 2.356794294294294, "grad_norm": 1.2643351206632112, "learning_rate": 1.3365094724858402e-06, "loss": 0.3295, "step": 25114 }, { "epoch": 2.356888138138138, "grad_norm": 1.0911760530911048, "learning_rate": 1.3361379356660642e-06, "loss": 0.3089, "step": 25115 }, { "epoch": 2.356981981981982, "grad_norm": 1.339861952077293, "learning_rate": 1.335766442530947e-06, "loss": 0.3324, "step": 25116 }, { "epoch": 2.357075825825826, "grad_norm": 1.0171975811058986, "learning_rate": 1.3353949930849158e-06, "loss": 0.3585, "step": 25117 }, { "epoch": 2.35716966966967, "grad_norm": 1.1052692075694672, "learning_rate": 1.3350235873324035e-06, "loss": 0.315, "step": 25118 }, { "epoch": 2.3572635135135136, "grad_norm": 1.6758118528453316, "learning_rate": 1.334652225277836e-06, "loss": 0.3249, "step": 25119 }, { "epoch": 2.3573573573573574, "grad_norm": 1.1002921040502367, "learning_rate": 1.3342809069256396e-06, "loss": 0.3162, "step": 25120 }, { "epoch": 2.357451201201201, "grad_norm": 1.1527920144101353, "learning_rate": 1.3339096322802454e-06, "loss": 0.3656, "step": 25121 }, { "epoch": 2.357545045045045, "grad_norm": 1.0132062021010066, "learning_rate": 1.3335384013460778e-06, "loss": 0.3195, "step": 25122 }, { "epoch": 2.357638888888889, "grad_norm": 1.0249702175978814, "learning_rate": 1.333167214127562e-06, "loss": 0.2948, "step": 25123 }, { "epoch": 2.3577327327327327, "grad_norm": 1.1750896510606008, "learning_rate": 1.3327960706291264e-06, "loss": 0.3032, "step": 25124 }, { "epoch": 2.3578265765765765, "grad_norm": 1.207925501563017, "learning_rate": 1.3324249708551957e-06, "loss": 0.3406, "step": 25125 }, { "epoch": 2.3579204204204203, "grad_norm": 1.4402967045190214, "learning_rate": 1.332053914810193e-06, "loss": 0.2732, "step": 25126 }, { "epoch": 2.358014264264264, "grad_norm": 1.2215863612566618, "learning_rate": 1.3316829024985428e-06, "loss": 0.3176, "step": 25127 }, { "epoch": 2.358108108108108, "grad_norm": 1.2240480307273829, "learning_rate": 1.33131193392467e-06, "loss": 0.3169, "step": 25128 }, { "epoch": 2.358201951951952, "grad_norm": 1.418114058482605, "learning_rate": 1.3309410090929947e-06, "loss": 0.3053, "step": 25129 }, { "epoch": 2.358295795795796, "grad_norm": 1.3643630208702462, "learning_rate": 1.3305701280079431e-06, "loss": 0.3325, "step": 25130 }, { "epoch": 2.3583896396396398, "grad_norm": 1.2593409755826266, "learning_rate": 1.3301992906739358e-06, "loss": 0.3048, "step": 25131 }, { "epoch": 2.3584834834834836, "grad_norm": 1.165153282748418, "learning_rate": 1.329828497095393e-06, "loss": 0.3064, "step": 25132 }, { "epoch": 2.3585773273273274, "grad_norm": 1.0855143448178584, "learning_rate": 1.3294577472767378e-06, "loss": 0.2979, "step": 25133 }, { "epoch": 2.358671171171171, "grad_norm": 1.4344628206089325, "learning_rate": 1.3290870412223904e-06, "loss": 0.3059, "step": 25134 }, { "epoch": 2.358765015015015, "grad_norm": 1.1436938561240364, "learning_rate": 1.328716378936769e-06, "loss": 0.2978, "step": 25135 }, { "epoch": 2.358858858858859, "grad_norm": 1.0465714459216724, "learning_rate": 1.3283457604242956e-06, "loss": 0.3658, "step": 25136 }, { "epoch": 2.3589527027027026, "grad_norm": 1.0845875774073686, "learning_rate": 1.3279751856893875e-06, "loss": 0.2931, "step": 25137 }, { "epoch": 2.3590465465465464, "grad_norm": 1.3764297099060143, "learning_rate": 1.3276046547364635e-06, "loss": 0.3382, "step": 25138 }, { "epoch": 2.3591403903903903, "grad_norm": 1.091742404687602, "learning_rate": 1.327234167569942e-06, "loss": 0.3337, "step": 25139 }, { "epoch": 2.359234234234234, "grad_norm": 1.0355346231708509, "learning_rate": 1.326863724194239e-06, "loss": 0.3468, "step": 25140 }, { "epoch": 2.359328078078078, "grad_norm": 1.1696499799627398, "learning_rate": 1.3264933246137711e-06, "loss": 0.3211, "step": 25141 }, { "epoch": 2.359421921921922, "grad_norm": 1.1956454609101426, "learning_rate": 1.326122968832957e-06, "loss": 0.3361, "step": 25142 }, { "epoch": 2.359515765765766, "grad_norm": 1.0911068973741194, "learning_rate": 1.3257526568562113e-06, "loss": 0.363, "step": 25143 }, { "epoch": 2.3596096096096097, "grad_norm": 1.0343944462509744, "learning_rate": 1.3253823886879474e-06, "loss": 0.3049, "step": 25144 }, { "epoch": 2.3597034534534536, "grad_norm": 1.138749409156326, "learning_rate": 1.3250121643325831e-06, "loss": 0.3323, "step": 25145 }, { "epoch": 2.3597972972972974, "grad_norm": 0.9720698161827994, "learning_rate": 1.3246419837945319e-06, "loss": 0.3005, "step": 25146 }, { "epoch": 2.359891141141141, "grad_norm": 1.3522515216510314, "learning_rate": 1.3242718470782044e-06, "loss": 0.304, "step": 25147 }, { "epoch": 2.359984984984985, "grad_norm": 1.344926353512149, "learning_rate": 1.3239017541880188e-06, "loss": 0.3042, "step": 25148 }, { "epoch": 2.360078828828829, "grad_norm": 1.212950281974053, "learning_rate": 1.3235317051283846e-06, "loss": 0.3156, "step": 25149 }, { "epoch": 2.3601726726726726, "grad_norm": 1.0697995566754983, "learning_rate": 1.3231616999037134e-06, "loss": 0.3537, "step": 25150 }, { "epoch": 2.3602665165165164, "grad_norm": 1.2858528138908365, "learning_rate": 1.322791738518419e-06, "loss": 0.2929, "step": 25151 }, { "epoch": 2.3603603603603602, "grad_norm": 1.1826725890028122, "learning_rate": 1.3224218209769136e-06, "loss": 0.3383, "step": 25152 }, { "epoch": 2.360454204204204, "grad_norm": 1.0203230791382534, "learning_rate": 1.3220519472836024e-06, "loss": 0.3097, "step": 25153 }, { "epoch": 2.360548048048048, "grad_norm": 1.1093934154430796, "learning_rate": 1.3216821174429006e-06, "loss": 0.3372, "step": 25154 }, { "epoch": 2.360641891891892, "grad_norm": 1.8589291484286212, "learning_rate": 1.3213123314592152e-06, "loss": 0.2658, "step": 25155 }, { "epoch": 2.360735735735736, "grad_norm": 1.1390584663199417, "learning_rate": 1.320942589336955e-06, "loss": 0.282, "step": 25156 }, { "epoch": 2.3608295795795797, "grad_norm": 1.3746438779344414, "learning_rate": 1.3205728910805299e-06, "loss": 0.3457, "step": 25157 }, { "epoch": 2.3609234234234235, "grad_norm": 1.3475708096021206, "learning_rate": 1.3202032366943478e-06, "loss": 0.2939, "step": 25158 }, { "epoch": 2.3610172672672673, "grad_norm": 1.1291873218034714, "learning_rate": 1.3198336261828143e-06, "loss": 0.3277, "step": 25159 }, { "epoch": 2.361111111111111, "grad_norm": 1.0561485035336422, "learning_rate": 1.319464059550339e-06, "loss": 0.2971, "step": 25160 }, { "epoch": 2.361204954954955, "grad_norm": 1.2558500553384275, "learning_rate": 1.3190945368013263e-06, "loss": 0.3011, "step": 25161 }, { "epoch": 2.361298798798799, "grad_norm": 1.154573434068342, "learning_rate": 1.3187250579401817e-06, "loss": 0.2889, "step": 25162 }, { "epoch": 2.3613926426426426, "grad_norm": 1.021397022769262, "learning_rate": 1.318355622971313e-06, "loss": 0.2849, "step": 25163 }, { "epoch": 2.3614864864864864, "grad_norm": 1.2139617868741759, "learning_rate": 1.3179862318991232e-06, "loss": 0.3333, "step": 25164 }, { "epoch": 2.36158033033033, "grad_norm": 1.0129550593964187, "learning_rate": 1.3176168847280173e-06, "loss": 0.2642, "step": 25165 }, { "epoch": 2.361674174174174, "grad_norm": 1.170931037983836, "learning_rate": 1.3172475814623987e-06, "loss": 0.3662, "step": 25166 }, { "epoch": 2.361768018018018, "grad_norm": 1.020149071992553, "learning_rate": 1.3168783221066706e-06, "loss": 0.3203, "step": 25167 }, { "epoch": 2.361861861861862, "grad_norm": 1.1676373717986215, "learning_rate": 1.3165091066652336e-06, "loss": 0.319, "step": 25168 }, { "epoch": 2.361955705705706, "grad_norm": 1.3022762627147961, "learning_rate": 1.3161399351424941e-06, "loss": 0.2683, "step": 25169 }, { "epoch": 2.3620495495495497, "grad_norm": 1.2645868333070598, "learning_rate": 1.3157708075428516e-06, "loss": 0.2911, "step": 25170 }, { "epoch": 2.3621433933933935, "grad_norm": 1.0864030099883035, "learning_rate": 1.3154017238707062e-06, "loss": 0.3203, "step": 25171 }, { "epoch": 2.3622372372372373, "grad_norm": 1.0651942816617634, "learning_rate": 1.3150326841304605e-06, "loss": 0.3098, "step": 25172 }, { "epoch": 2.362331081081081, "grad_norm": 1.1182907643060267, "learning_rate": 1.3146636883265146e-06, "loss": 0.2865, "step": 25173 }, { "epoch": 2.362424924924925, "grad_norm": 1.0777386092050585, "learning_rate": 1.3142947364632652e-06, "loss": 0.3405, "step": 25174 }, { "epoch": 2.3625187687687688, "grad_norm": 1.0395884283911647, "learning_rate": 1.3139258285451152e-06, "loss": 0.3023, "step": 25175 }, { "epoch": 2.3626126126126126, "grad_norm": 1.9652025252045404, "learning_rate": 1.3135569645764612e-06, "loss": 0.3055, "step": 25176 }, { "epoch": 2.3627064564564564, "grad_norm": 1.2192684073226532, "learning_rate": 1.3131881445617018e-06, "loss": 0.2665, "step": 25177 }, { "epoch": 2.3628003003003, "grad_norm": 1.2533428952062702, "learning_rate": 1.3128193685052337e-06, "loss": 0.3282, "step": 25178 }, { "epoch": 2.362894144144144, "grad_norm": 1.030569681769279, "learning_rate": 1.3124506364114543e-06, "loss": 0.3477, "step": 25179 }, { "epoch": 2.362987987987988, "grad_norm": 1.3820705543084382, "learning_rate": 1.3120819482847586e-06, "loss": 0.2939, "step": 25180 }, { "epoch": 2.363081831831832, "grad_norm": 1.0357914085210194, "learning_rate": 1.3117133041295449e-06, "loss": 0.3327, "step": 25181 }, { "epoch": 2.3631756756756754, "grad_norm": 1.152414551243236, "learning_rate": 1.311344703950208e-06, "loss": 0.3576, "step": 25182 }, { "epoch": 2.3632695195195197, "grad_norm": 1.0989766856416912, "learning_rate": 1.3109761477511408e-06, "loss": 0.3408, "step": 25183 }, { "epoch": 2.3633633633633635, "grad_norm": 1.2536804107242756, "learning_rate": 1.3106076355367403e-06, "loss": 0.3122, "step": 25184 }, { "epoch": 2.3634572072072073, "grad_norm": 1.6117775758809192, "learning_rate": 1.3102391673113997e-06, "loss": 0.2913, "step": 25185 }, { "epoch": 2.363551051051051, "grad_norm": 0.9664296082934973, "learning_rate": 1.30987074307951e-06, "loss": 0.2848, "step": 25186 }, { "epoch": 2.363644894894895, "grad_norm": 1.100763111735172, "learning_rate": 1.3095023628454672e-06, "loss": 0.3201, "step": 25187 }, { "epoch": 2.3637387387387387, "grad_norm": 0.963331751797083, "learning_rate": 1.309134026613662e-06, "loss": 0.3284, "step": 25188 }, { "epoch": 2.3638325825825826, "grad_norm": 2.966845049643397, "learning_rate": 1.3087657343884862e-06, "loss": 0.3131, "step": 25189 }, { "epoch": 2.3639264264264264, "grad_norm": 1.2990181403347216, "learning_rate": 1.3083974861743315e-06, "loss": 0.3074, "step": 25190 }, { "epoch": 2.36402027027027, "grad_norm": 1.0902789878516277, "learning_rate": 1.3080292819755874e-06, "loss": 0.2971, "step": 25191 }, { "epoch": 2.364114114114114, "grad_norm": 1.1058114117357067, "learning_rate": 1.3076611217966435e-06, "loss": 0.3438, "step": 25192 }, { "epoch": 2.364207957957958, "grad_norm": 1.1827866284930835, "learning_rate": 1.307293005641892e-06, "loss": 0.3037, "step": 25193 }, { "epoch": 2.364301801801802, "grad_norm": 1.1914491441659838, "learning_rate": 1.3069249335157207e-06, "loss": 0.3258, "step": 25194 }, { "epoch": 2.3643956456456454, "grad_norm": 1.2067614847996786, "learning_rate": 1.306556905422517e-06, "loss": 0.2931, "step": 25195 }, { "epoch": 2.3644894894894897, "grad_norm": 1.1292496928950495, "learning_rate": 1.3061889213666706e-06, "loss": 0.3088, "step": 25196 }, { "epoch": 2.3645833333333335, "grad_norm": 1.52339932302039, "learning_rate": 1.305820981352569e-06, "loss": 0.3295, "step": 25197 }, { "epoch": 2.3646771771771773, "grad_norm": 1.116606004244102, "learning_rate": 1.305453085384597e-06, "loss": 0.3252, "step": 25198 }, { "epoch": 2.364771021021021, "grad_norm": 1.261118955710832, "learning_rate": 1.3050852334671443e-06, "loss": 0.2834, "step": 25199 }, { "epoch": 2.364864864864865, "grad_norm": 1.1599345013628608, "learning_rate": 1.3047174256045947e-06, "loss": 0.3152, "step": 25200 }, { "epoch": 2.3649587087087087, "grad_norm": 1.0986651823408315, "learning_rate": 1.3043496618013347e-06, "loss": 0.2952, "step": 25201 }, { "epoch": 2.3650525525525525, "grad_norm": 1.1691810307145276, "learning_rate": 1.3039819420617483e-06, "loss": 0.311, "step": 25202 }, { "epoch": 2.3651463963963963, "grad_norm": 1.7917544056611303, "learning_rate": 1.3036142663902206e-06, "loss": 0.3652, "step": 25203 }, { "epoch": 2.36524024024024, "grad_norm": 1.0615539755770893, "learning_rate": 1.3032466347911333e-06, "loss": 0.3083, "step": 25204 }, { "epoch": 2.365334084084084, "grad_norm": 1.544713455741075, "learning_rate": 1.302879047268873e-06, "loss": 0.3149, "step": 25205 }, { "epoch": 2.3654279279279278, "grad_norm": 2.9658656292270145, "learning_rate": 1.3025115038278213e-06, "loss": 0.3253, "step": 25206 }, { "epoch": 2.3655217717717716, "grad_norm": 1.111768595740056, "learning_rate": 1.3021440044723583e-06, "loss": 0.3349, "step": 25207 }, { "epoch": 2.3656156156156154, "grad_norm": 1.1939657417570295, "learning_rate": 1.3017765492068685e-06, "loss": 0.3303, "step": 25208 }, { "epoch": 2.3657094594594597, "grad_norm": 1.1746241961558859, "learning_rate": 1.301409138035733e-06, "loss": 0.2826, "step": 25209 }, { "epoch": 2.3658033033033035, "grad_norm": 0.8939372383551266, "learning_rate": 1.3010417709633293e-06, "loss": 0.3175, "step": 25210 }, { "epoch": 2.3658971471471473, "grad_norm": 1.028803258858196, "learning_rate": 1.3006744479940426e-06, "loss": 0.3303, "step": 25211 }, { "epoch": 2.365990990990991, "grad_norm": 1.061264225883034, "learning_rate": 1.3003071691322489e-06, "loss": 0.3086, "step": 25212 }, { "epoch": 2.366084834834835, "grad_norm": 1.1411531667817472, "learning_rate": 1.2999399343823283e-06, "loss": 0.3059, "step": 25213 }, { "epoch": 2.3661786786786787, "grad_norm": 1.2597650264543994, "learning_rate": 1.29957274374866e-06, "loss": 0.343, "step": 25214 }, { "epoch": 2.3662725225225225, "grad_norm": 1.2458919858716937, "learning_rate": 1.2992055972356205e-06, "loss": 0.3328, "step": 25215 }, { "epoch": 2.3663663663663663, "grad_norm": 1.3342840347941187, "learning_rate": 1.2988384948475874e-06, "loss": 0.3358, "step": 25216 }, { "epoch": 2.36646021021021, "grad_norm": 1.2326902480358741, "learning_rate": 1.2984714365889395e-06, "loss": 0.3452, "step": 25217 }, { "epoch": 2.366554054054054, "grad_norm": 1.2314969353607021, "learning_rate": 1.2981044224640526e-06, "loss": 0.3063, "step": 25218 }, { "epoch": 2.3666478978978978, "grad_norm": 2.4107515343719093, "learning_rate": 1.2977374524773012e-06, "loss": 0.3383, "step": 25219 }, { "epoch": 2.3667417417417416, "grad_norm": 1.0194907249988259, "learning_rate": 1.2973705266330628e-06, "loss": 0.278, "step": 25220 }, { "epoch": 2.3668355855855854, "grad_norm": 1.1060952136720066, "learning_rate": 1.2970036449357115e-06, "loss": 0.3099, "step": 25221 }, { "epoch": 2.3669294294294296, "grad_norm": 1.0246424316338802, "learning_rate": 1.2966368073896201e-06, "loss": 0.2924, "step": 25222 }, { "epoch": 2.3670232732732734, "grad_norm": 1.1815062956858515, "learning_rate": 1.2962700139991657e-06, "loss": 0.34, "step": 25223 }, { "epoch": 2.3671171171171173, "grad_norm": 1.1149510768270048, "learning_rate": 1.2959032647687198e-06, "loss": 0.3539, "step": 25224 }, { "epoch": 2.367210960960961, "grad_norm": 1.0681245720432877, "learning_rate": 1.2955365597026548e-06, "loss": 0.3014, "step": 25225 }, { "epoch": 2.367304804804805, "grad_norm": 1.3676845259998838, "learning_rate": 1.295169898805344e-06, "loss": 0.3035, "step": 25226 }, { "epoch": 2.3673986486486487, "grad_norm": 1.035551441624677, "learning_rate": 1.294803282081158e-06, "loss": 0.3025, "step": 25227 }, { "epoch": 2.3674924924924925, "grad_norm": 1.2988303887780743, "learning_rate": 1.2944367095344673e-06, "loss": 0.3003, "step": 25228 }, { "epoch": 2.3675863363363363, "grad_norm": 1.1187457756710866, "learning_rate": 1.2940701811696454e-06, "loss": 0.3061, "step": 25229 }, { "epoch": 2.36768018018018, "grad_norm": 1.0330212981203717, "learning_rate": 1.2937036969910604e-06, "loss": 0.2995, "step": 25230 }, { "epoch": 2.367774024024024, "grad_norm": 1.0217663292672685, "learning_rate": 1.2933372570030816e-06, "loss": 0.3583, "step": 25231 }, { "epoch": 2.3678678678678677, "grad_norm": 1.238214193132903, "learning_rate": 1.29297086121008e-06, "loss": 0.3295, "step": 25232 }, { "epoch": 2.3679617117117115, "grad_norm": 1.196935176460578, "learning_rate": 1.292604509616423e-06, "loss": 0.3348, "step": 25233 }, { "epoch": 2.3680555555555554, "grad_norm": 1.2573893184351244, "learning_rate": 1.292238202226478e-06, "loss": 0.3181, "step": 25234 }, { "epoch": 2.3681493993993996, "grad_norm": 1.6333695173116454, "learning_rate": 1.2918719390446139e-06, "loss": 0.2969, "step": 25235 }, { "epoch": 2.3682432432432434, "grad_norm": 1.1397927789662323, "learning_rate": 1.2915057200751973e-06, "loss": 0.3175, "step": 25236 }, { "epoch": 2.3683370870870872, "grad_norm": 1.263284634786533, "learning_rate": 1.291139545322595e-06, "loss": 0.284, "step": 25237 }, { "epoch": 2.368430930930931, "grad_norm": 1.393308317656978, "learning_rate": 1.290773414791172e-06, "loss": 0.3059, "step": 25238 }, { "epoch": 2.368524774774775, "grad_norm": 1.1122661837337, "learning_rate": 1.290407328485294e-06, "loss": 0.3247, "step": 25239 }, { "epoch": 2.3686186186186187, "grad_norm": 0.9374386633813536, "learning_rate": 1.2900412864093248e-06, "loss": 0.3118, "step": 25240 }, { "epoch": 2.3687124624624625, "grad_norm": 1.163236913945051, "learning_rate": 1.2896752885676311e-06, "loss": 0.3642, "step": 25241 }, { "epoch": 2.3688063063063063, "grad_norm": 1.1591998060080577, "learning_rate": 1.2893093349645764e-06, "loss": 0.3023, "step": 25242 }, { "epoch": 2.36890015015015, "grad_norm": 1.2013310971301545, "learning_rate": 1.2889434256045208e-06, "loss": 0.3297, "step": 25243 }, { "epoch": 2.368993993993994, "grad_norm": 1.246434811219651, "learning_rate": 1.2885775604918315e-06, "loss": 0.3297, "step": 25244 }, { "epoch": 2.3690878378378377, "grad_norm": 1.1170095316910336, "learning_rate": 1.2882117396308686e-06, "loss": 0.3383, "step": 25245 }, { "epoch": 2.3691816816816815, "grad_norm": 1.3564833792132935, "learning_rate": 1.2878459630259926e-06, "loss": 0.3007, "step": 25246 }, { "epoch": 2.3692755255255253, "grad_norm": 1.2817762844802383, "learning_rate": 1.2874802306815676e-06, "loss": 0.314, "step": 25247 }, { "epoch": 2.3693693693693696, "grad_norm": 1.2079472329733725, "learning_rate": 1.2871145426019526e-06, "loss": 0.3023, "step": 25248 }, { "epoch": 2.3694632132132134, "grad_norm": 1.0745137618474931, "learning_rate": 1.2867488987915083e-06, "loss": 0.3492, "step": 25249 }, { "epoch": 2.369557057057057, "grad_norm": 1.0944301082105052, "learning_rate": 1.2863832992545933e-06, "loss": 0.3369, "step": 25250 }, { "epoch": 2.369650900900901, "grad_norm": 0.9816130162870823, "learning_rate": 1.286017743995568e-06, "loss": 0.3218, "step": 25251 }, { "epoch": 2.369744744744745, "grad_norm": 1.2752130704830063, "learning_rate": 1.2856522330187881e-06, "loss": 0.2934, "step": 25252 }, { "epoch": 2.3698385885885886, "grad_norm": 1.3522636734448985, "learning_rate": 1.2852867663286156e-06, "loss": 0.3028, "step": 25253 }, { "epoch": 2.3699324324324325, "grad_norm": 1.0736281133745085, "learning_rate": 1.284921343929406e-06, "loss": 0.2734, "step": 25254 }, { "epoch": 2.3700262762762763, "grad_norm": 1.2318387971818954, "learning_rate": 1.2845559658255153e-06, "loss": 0.2509, "step": 25255 }, { "epoch": 2.37012012012012, "grad_norm": 1.0327101579736506, "learning_rate": 1.284190632021302e-06, "loss": 0.3142, "step": 25256 }, { "epoch": 2.370213963963964, "grad_norm": 1.532710751748581, "learning_rate": 1.2838253425211217e-06, "loss": 0.3069, "step": 25257 }, { "epoch": 2.3703078078078077, "grad_norm": 1.4509276281447874, "learning_rate": 1.2834600973293275e-06, "loss": 0.3418, "step": 25258 }, { "epoch": 2.3704016516516515, "grad_norm": 1.1587916214416834, "learning_rate": 1.2830948964502772e-06, "loss": 0.2725, "step": 25259 }, { "epoch": 2.3704954954954953, "grad_norm": 0.9906886352078073, "learning_rate": 1.2827297398883243e-06, "loss": 0.2967, "step": 25260 }, { "epoch": 2.3705893393393396, "grad_norm": 1.1668206628628774, "learning_rate": 1.282364627647822e-06, "loss": 0.346, "step": 25261 }, { "epoch": 2.370683183183183, "grad_norm": 1.4777804418938685, "learning_rate": 1.2819995597331237e-06, "loss": 0.2975, "step": 25262 }, { "epoch": 2.370777027027027, "grad_norm": 1.0405880496254065, "learning_rate": 1.2816345361485828e-06, "loss": 0.3144, "step": 25263 }, { "epoch": 2.370870870870871, "grad_norm": 1.090952075799309, "learning_rate": 1.2812695568985489e-06, "loss": 0.2778, "step": 25264 }, { "epoch": 2.370964714714715, "grad_norm": 1.1863769095042669, "learning_rate": 1.2809046219873772e-06, "loss": 0.3523, "step": 25265 }, { "epoch": 2.3710585585585586, "grad_norm": 1.1390026285459645, "learning_rate": 1.280539731419418e-06, "loss": 0.3192, "step": 25266 }, { "epoch": 2.3711524024024024, "grad_norm": 1.0240632141769301, "learning_rate": 1.2801748851990192e-06, "loss": 0.2857, "step": 25267 }, { "epoch": 2.3712462462462462, "grad_norm": 2.5565508234195162, "learning_rate": 1.279810083330535e-06, "loss": 0.31, "step": 25268 }, { "epoch": 2.37134009009009, "grad_norm": 1.4167749292079335, "learning_rate": 1.2794453258183126e-06, "loss": 0.3108, "step": 25269 }, { "epoch": 2.371433933933934, "grad_norm": 1.1090424812341344, "learning_rate": 1.2790806126667e-06, "loss": 0.2911, "step": 25270 }, { "epoch": 2.3715277777777777, "grad_norm": 1.1086402165302505, "learning_rate": 1.2787159438800489e-06, "loss": 0.348, "step": 25271 }, { "epoch": 2.3716216216216215, "grad_norm": 1.040518550380755, "learning_rate": 1.2783513194627055e-06, "loss": 0.3146, "step": 25272 }, { "epoch": 2.3717154654654653, "grad_norm": 1.00454648681329, "learning_rate": 1.277986739419016e-06, "loss": 0.3215, "step": 25273 }, { "epoch": 2.3718093093093096, "grad_norm": 1.1067904813863167, "learning_rate": 1.2776222037533297e-06, "loss": 0.3044, "step": 25274 }, { "epoch": 2.371903153153153, "grad_norm": 1.158138970935525, "learning_rate": 1.2772577124699942e-06, "loss": 0.3138, "step": 25275 }, { "epoch": 2.371996996996997, "grad_norm": 1.0751193558905898, "learning_rate": 1.2768932655733496e-06, "loss": 0.3216, "step": 25276 }, { "epoch": 2.372090840840841, "grad_norm": 1.2439442196714663, "learning_rate": 1.2765288630677463e-06, "loss": 0.248, "step": 25277 }, { "epoch": 2.372184684684685, "grad_norm": 1.339930051455983, "learning_rate": 1.2761645049575277e-06, "loss": 0.3242, "step": 25278 }, { "epoch": 2.3722785285285286, "grad_norm": 1.2210538677214728, "learning_rate": 1.2758001912470364e-06, "loss": 0.3202, "step": 25279 }, { "epoch": 2.3723723723723724, "grad_norm": 1.031172948338445, "learning_rate": 1.2754359219406194e-06, "loss": 0.2899, "step": 25280 }, { "epoch": 2.3724662162162162, "grad_norm": 0.9572604784674599, "learning_rate": 1.275071697042618e-06, "loss": 0.3408, "step": 25281 }, { "epoch": 2.37256006006006, "grad_norm": 1.2504249567900414, "learning_rate": 1.2747075165573741e-06, "loss": 0.344, "step": 25282 }, { "epoch": 2.372653903903904, "grad_norm": 1.1075830068709582, "learning_rate": 1.2743433804892319e-06, "loss": 0.3537, "step": 25283 }, { "epoch": 2.3727477477477477, "grad_norm": 0.9209865449733378, "learning_rate": 1.2739792888425323e-06, "loss": 0.3403, "step": 25284 }, { "epoch": 2.3728415915915915, "grad_norm": 1.1483205234290939, "learning_rate": 1.2736152416216147e-06, "loss": 0.2634, "step": 25285 }, { "epoch": 2.3729354354354353, "grad_norm": 1.2575511212110209, "learning_rate": 1.2732512388308227e-06, "loss": 0.3168, "step": 25286 }, { "epoch": 2.373029279279279, "grad_norm": 0.9234427536483809, "learning_rate": 1.2728872804744947e-06, "loss": 0.2922, "step": 25287 }, { "epoch": 2.373123123123123, "grad_norm": 1.2226077311537273, "learning_rate": 1.2725233665569703e-06, "loss": 0.3243, "step": 25288 }, { "epoch": 2.373216966966967, "grad_norm": 1.0912279798418334, "learning_rate": 1.2721594970825885e-06, "loss": 0.2939, "step": 25289 }, { "epoch": 2.373310810810811, "grad_norm": 1.146417323060057, "learning_rate": 1.271795672055688e-06, "loss": 0.3031, "step": 25290 }, { "epoch": 2.3734046546546548, "grad_norm": 1.1285837774380716, "learning_rate": 1.2714318914806046e-06, "loss": 0.3194, "step": 25291 }, { "epoch": 2.3734984984984986, "grad_norm": 1.0979927895493173, "learning_rate": 1.2710681553616795e-06, "loss": 0.2912, "step": 25292 }, { "epoch": 2.3735923423423424, "grad_norm": 1.1405842241761068, "learning_rate": 1.2707044637032474e-06, "loss": 0.3472, "step": 25293 }, { "epoch": 2.373686186186186, "grad_norm": 1.1166646736254047, "learning_rate": 1.2703408165096432e-06, "loss": 0.3233, "step": 25294 }, { "epoch": 2.37378003003003, "grad_norm": 1.1112861309125877, "learning_rate": 1.2699772137852063e-06, "loss": 0.3092, "step": 25295 }, { "epoch": 2.373873873873874, "grad_norm": 1.0919780106761972, "learning_rate": 1.2696136555342698e-06, "loss": 0.3009, "step": 25296 }, { "epoch": 2.3739677177177176, "grad_norm": 1.226265265329966, "learning_rate": 1.2692501417611668e-06, "loss": 0.3548, "step": 25297 }, { "epoch": 2.3740615615615615, "grad_norm": 1.154675077403192, "learning_rate": 1.2688866724702358e-06, "loss": 0.3407, "step": 25298 }, { "epoch": 2.3741554054054053, "grad_norm": 1.0029995982083677, "learning_rate": 1.2685232476658077e-06, "loss": 0.3315, "step": 25299 }, { "epoch": 2.374249249249249, "grad_norm": 1.0267049987732908, "learning_rate": 1.268159867352216e-06, "loss": 0.3003, "step": 25300 }, { "epoch": 2.374343093093093, "grad_norm": 2.2704750064275827, "learning_rate": 1.2677965315337936e-06, "loss": 0.3553, "step": 25301 }, { "epoch": 2.374436936936937, "grad_norm": 1.1293221372398552, "learning_rate": 1.2674332402148726e-06, "loss": 0.2836, "step": 25302 }, { "epoch": 2.374530780780781, "grad_norm": 1.0983004168951034, "learning_rate": 1.2670699933997826e-06, "loss": 0.3488, "step": 25303 }, { "epoch": 2.3746246246246248, "grad_norm": 1.0190367599568688, "learning_rate": 1.2667067910928582e-06, "loss": 0.3055, "step": 25304 }, { "epoch": 2.3747184684684686, "grad_norm": 1.1816854716174154, "learning_rate": 1.266343633298428e-06, "loss": 0.3224, "step": 25305 }, { "epoch": 2.3748123123123124, "grad_norm": 1.1198807166050266, "learning_rate": 1.2659805200208204e-06, "loss": 0.3108, "step": 25306 }, { "epoch": 2.374906156156156, "grad_norm": 1.216962639594654, "learning_rate": 1.2656174512643682e-06, "loss": 0.3493, "step": 25307 }, { "epoch": 2.375, "grad_norm": 1.0127255190269449, "learning_rate": 1.2652544270333982e-06, "loss": 0.2755, "step": 25308 }, { "epoch": 2.375093843843844, "grad_norm": 1.1837295090744488, "learning_rate": 1.2648914473322382e-06, "loss": 0.3165, "step": 25309 }, { "epoch": 2.3751876876876876, "grad_norm": 1.2558252031385322, "learning_rate": 1.2645285121652184e-06, "loss": 0.3345, "step": 25310 }, { "epoch": 2.3752815315315314, "grad_norm": 1.02806365277546, "learning_rate": 1.2641656215366648e-06, "loss": 0.3064, "step": 25311 }, { "epoch": 2.3753753753753752, "grad_norm": 1.4239842535152423, "learning_rate": 1.263802775450904e-06, "loss": 0.3125, "step": 25312 }, { "epoch": 2.375469219219219, "grad_norm": 1.0785162675756386, "learning_rate": 1.2634399739122633e-06, "loss": 0.2911, "step": 25313 }, { "epoch": 2.375563063063063, "grad_norm": 0.9781565307733493, "learning_rate": 1.2630772169250667e-06, "loss": 0.2856, "step": 25314 }, { "epoch": 2.375656906906907, "grad_norm": 1.0914376556210301, "learning_rate": 1.262714504493639e-06, "loss": 0.3076, "step": 25315 }, { "epoch": 2.375750750750751, "grad_norm": 1.0944525270677525, "learning_rate": 1.2623518366223074e-06, "loss": 0.3151, "step": 25316 }, { "epoch": 2.3758445945945947, "grad_norm": 1.3656674819481067, "learning_rate": 1.261989213315395e-06, "loss": 0.3229, "step": 25317 }, { "epoch": 2.3759384384384385, "grad_norm": 0.9845327257150213, "learning_rate": 1.2616266345772237e-06, "loss": 0.2872, "step": 25318 }, { "epoch": 2.3760322822822824, "grad_norm": 1.1441896844986874, "learning_rate": 1.2612641004121195e-06, "loss": 0.3109, "step": 25319 }, { "epoch": 2.376126126126126, "grad_norm": 2.481826730914231, "learning_rate": 1.2609016108244032e-06, "loss": 0.2985, "step": 25320 }, { "epoch": 2.37621996996997, "grad_norm": 1.0217505900080655, "learning_rate": 1.2605391658183957e-06, "loss": 0.2767, "step": 25321 }, { "epoch": 2.376313813813814, "grad_norm": 1.5204812638314136, "learning_rate": 1.2601767653984216e-06, "loss": 0.303, "step": 25322 }, { "epoch": 2.3764076576576576, "grad_norm": 1.0512208391376263, "learning_rate": 1.2598144095687998e-06, "loss": 0.269, "step": 25323 }, { "epoch": 2.3765015015015014, "grad_norm": 1.3015888427158109, "learning_rate": 1.259452098333851e-06, "loss": 0.3093, "step": 25324 }, { "epoch": 2.3765953453453452, "grad_norm": 1.067478634760994, "learning_rate": 1.259089831697895e-06, "loss": 0.2874, "step": 25325 }, { "epoch": 2.376689189189189, "grad_norm": 1.522140977871693, "learning_rate": 1.2587276096652512e-06, "loss": 0.3381, "step": 25326 }, { "epoch": 2.376783033033033, "grad_norm": 1.2198806702787626, "learning_rate": 1.2583654322402372e-06, "loss": 0.2968, "step": 25327 }, { "epoch": 2.376876876876877, "grad_norm": 2.5070518712029206, "learning_rate": 1.258003299427174e-06, "loss": 0.2979, "step": 25328 }, { "epoch": 2.376970720720721, "grad_norm": 1.1706324992937396, "learning_rate": 1.2576412112303776e-06, "loss": 0.3599, "step": 25329 }, { "epoch": 2.3770645645645647, "grad_norm": 0.909207077596373, "learning_rate": 1.2572791676541641e-06, "loss": 0.3253, "step": 25330 }, { "epoch": 2.3771584084084085, "grad_norm": 1.2621301537790395, "learning_rate": 1.2569171687028531e-06, "loss": 0.3061, "step": 25331 }, { "epoch": 2.3772522522522523, "grad_norm": 1.081566706967486, "learning_rate": 1.256555214380759e-06, "loss": 0.2775, "step": 25332 }, { "epoch": 2.377346096096096, "grad_norm": 1.153877555329757, "learning_rate": 1.2561933046921964e-06, "loss": 0.3645, "step": 25333 }, { "epoch": 2.37743993993994, "grad_norm": 1.2398752092443077, "learning_rate": 1.2558314396414834e-06, "loss": 0.321, "step": 25334 }, { "epoch": 2.3775337837837838, "grad_norm": 1.0622715749160951, "learning_rate": 1.2554696192329325e-06, "loss": 0.2888, "step": 25335 }, { "epoch": 2.3776276276276276, "grad_norm": 1.1320339742169734, "learning_rate": 1.255107843470858e-06, "loss": 0.3173, "step": 25336 }, { "epoch": 2.3777214714714714, "grad_norm": 1.3090138674461416, "learning_rate": 1.254746112359574e-06, "loss": 0.2846, "step": 25337 }, { "epoch": 2.377815315315315, "grad_norm": 1.1145825101146207, "learning_rate": 1.2543844259033921e-06, "loss": 0.331, "step": 25338 }, { "epoch": 2.377909159159159, "grad_norm": 1.258400101286503, "learning_rate": 1.2540227841066243e-06, "loss": 0.3301, "step": 25339 }, { "epoch": 2.378003003003003, "grad_norm": 1.1409103068877835, "learning_rate": 1.2536611869735854e-06, "loss": 0.3037, "step": 25340 }, { "epoch": 2.378096846846847, "grad_norm": 1.241754165595063, "learning_rate": 1.2532996345085847e-06, "loss": 0.3181, "step": 25341 }, { "epoch": 2.3781906906906904, "grad_norm": 1.2603695282834602, "learning_rate": 1.2529381267159325e-06, "loss": 0.3528, "step": 25342 }, { "epoch": 2.3782845345345347, "grad_norm": 1.0724632217877343, "learning_rate": 1.252576663599941e-06, "loss": 0.3416, "step": 25343 }, { "epoch": 2.3783783783783785, "grad_norm": 1.1117424136789928, "learning_rate": 1.2522152451649194e-06, "loss": 0.2974, "step": 25344 }, { "epoch": 2.3784722222222223, "grad_norm": 1.0236706834371205, "learning_rate": 1.2518538714151746e-06, "loss": 0.3167, "step": 25345 }, { "epoch": 2.378566066066066, "grad_norm": 1.1078193797738587, "learning_rate": 1.2514925423550189e-06, "loss": 0.3298, "step": 25346 }, { "epoch": 2.37865990990991, "grad_norm": 0.9687182797911598, "learning_rate": 1.251131257988759e-06, "loss": 0.326, "step": 25347 }, { "epoch": 2.3787537537537538, "grad_norm": 1.2062473276351915, "learning_rate": 1.2507700183207022e-06, "loss": 0.301, "step": 25348 }, { "epoch": 2.3788475975975976, "grad_norm": 1.1900886954574368, "learning_rate": 1.2504088233551553e-06, "loss": 0.3087, "step": 25349 }, { "epoch": 2.3789414414414414, "grad_norm": 1.06574940251824, "learning_rate": 1.2500476730964262e-06, "loss": 0.3363, "step": 25350 }, { "epoch": 2.379035285285285, "grad_norm": 1.0471171124525984, "learning_rate": 1.2496865675488178e-06, "loss": 0.3595, "step": 25351 }, { "epoch": 2.379129129129129, "grad_norm": 1.1636162199895546, "learning_rate": 1.2493255067166392e-06, "loss": 0.3095, "step": 25352 }, { "epoch": 2.379222972972973, "grad_norm": 1.1272826150327864, "learning_rate": 1.2489644906041947e-06, "loss": 0.3458, "step": 25353 }, { "epoch": 2.379316816816817, "grad_norm": 1.777819334583223, "learning_rate": 1.2486035192157865e-06, "loss": 0.3198, "step": 25354 }, { "epoch": 2.3794106606606604, "grad_norm": 1.1221774511950156, "learning_rate": 1.2482425925557213e-06, "loss": 0.2695, "step": 25355 }, { "epoch": 2.3795045045045047, "grad_norm": 1.2103140314334568, "learning_rate": 1.247881710628301e-06, "loss": 0.2771, "step": 25356 }, { "epoch": 2.3795983483483485, "grad_norm": 1.2914248079007842, "learning_rate": 1.2475208734378274e-06, "loss": 0.2798, "step": 25357 }, { "epoch": 2.3796921921921923, "grad_norm": 1.0799480990777701, "learning_rate": 1.2471600809886053e-06, "loss": 0.285, "step": 25358 }, { "epoch": 2.379786036036036, "grad_norm": 1.1102296483753653, "learning_rate": 1.2467993332849354e-06, "loss": 0.3224, "step": 25359 }, { "epoch": 2.37987987987988, "grad_norm": 1.1290155802882635, "learning_rate": 1.2464386303311194e-06, "loss": 0.32, "step": 25360 }, { "epoch": 2.3799737237237237, "grad_norm": 1.0450755269919325, "learning_rate": 1.2460779721314565e-06, "loss": 0.3141, "step": 25361 }, { "epoch": 2.3800675675675675, "grad_norm": 1.2204188467609445, "learning_rate": 1.2457173586902483e-06, "loss": 0.2953, "step": 25362 }, { "epoch": 2.3801614114114114, "grad_norm": 1.3304795955869824, "learning_rate": 1.245356790011792e-06, "loss": 0.2895, "step": 25363 }, { "epoch": 2.380255255255255, "grad_norm": 1.1191015530843313, "learning_rate": 1.24499626610039e-06, "loss": 0.3001, "step": 25364 }, { "epoch": 2.380349099099099, "grad_norm": 1.1808038875743077, "learning_rate": 1.2446357869603398e-06, "loss": 0.3035, "step": 25365 }, { "epoch": 2.380442942942943, "grad_norm": 0.9947097379692811, "learning_rate": 1.2442753525959377e-06, "loss": 0.3072, "step": 25366 }, { "epoch": 2.3805367867867866, "grad_norm": 1.22633931374229, "learning_rate": 1.2439149630114839e-06, "loss": 0.2965, "step": 25367 }, { "epoch": 2.3806306306306304, "grad_norm": 1.0355007847744078, "learning_rate": 1.243554618211274e-06, "loss": 0.302, "step": 25368 }, { "epoch": 2.3807244744744747, "grad_norm": 1.264299211317494, "learning_rate": 1.243194318199603e-06, "loss": 0.3196, "step": 25369 }, { "epoch": 2.3808183183183185, "grad_norm": 1.1457932706696587, "learning_rate": 1.24283406298077e-06, "loss": 0.3149, "step": 25370 }, { "epoch": 2.3809121621621623, "grad_norm": 0.9355032467862378, "learning_rate": 1.2424738525590684e-06, "loss": 0.3026, "step": 25371 }, { "epoch": 2.381006006006006, "grad_norm": 1.1066817630733938, "learning_rate": 1.2421136869387933e-06, "loss": 0.3129, "step": 25372 }, { "epoch": 2.38109984984985, "grad_norm": 1.329862508757573, "learning_rate": 1.2417535661242396e-06, "loss": 0.341, "step": 25373 }, { "epoch": 2.3811936936936937, "grad_norm": 1.0784802292581983, "learning_rate": 1.2413934901197e-06, "loss": 0.2744, "step": 25374 }, { "epoch": 2.3812875375375375, "grad_norm": 1.2572451869163639, "learning_rate": 1.2410334589294665e-06, "loss": 0.3324, "step": 25375 }, { "epoch": 2.3813813813813813, "grad_norm": 0.9983423253509971, "learning_rate": 1.240673472557835e-06, "loss": 0.2913, "step": 25376 }, { "epoch": 2.381475225225225, "grad_norm": 1.0192590405240782, "learning_rate": 1.2403135310090964e-06, "loss": 0.359, "step": 25377 }, { "epoch": 2.381569069069069, "grad_norm": 1.103573488017432, "learning_rate": 1.2399536342875407e-06, "loss": 0.3033, "step": 25378 }, { "epoch": 2.3816629129129128, "grad_norm": 1.2267413910948959, "learning_rate": 1.2395937823974614e-06, "loss": 0.2957, "step": 25379 }, { "epoch": 2.3817567567567566, "grad_norm": 1.1860580115237505, "learning_rate": 1.2392339753431481e-06, "loss": 0.3393, "step": 25380 }, { "epoch": 2.3818506006006004, "grad_norm": 1.031455913320191, "learning_rate": 1.2388742131288895e-06, "loss": 0.3179, "step": 25381 }, { "epoch": 2.3819444444444446, "grad_norm": 1.238419272439172, "learning_rate": 1.2385144957589774e-06, "loss": 0.3123, "step": 25382 }, { "epoch": 2.3820382882882885, "grad_norm": 1.1521024052385334, "learning_rate": 1.2381548232377e-06, "loss": 0.336, "step": 25383 }, { "epoch": 2.3821321321321323, "grad_norm": 1.0882742863852317, "learning_rate": 1.2377951955693451e-06, "loss": 0.3014, "step": 25384 }, { "epoch": 2.382225975975976, "grad_norm": 2.2545006389977167, "learning_rate": 1.237435612758201e-06, "loss": 0.2569, "step": 25385 }, { "epoch": 2.38231981981982, "grad_norm": 1.0023197999064513, "learning_rate": 1.2370760748085552e-06, "loss": 0.2982, "step": 25386 }, { "epoch": 2.3824136636636637, "grad_norm": 1.0654518947776819, "learning_rate": 1.2367165817246922e-06, "loss": 0.3152, "step": 25387 }, { "epoch": 2.3825075075075075, "grad_norm": 0.9208901233155271, "learning_rate": 1.2363571335109025e-06, "loss": 0.3197, "step": 25388 }, { "epoch": 2.3826013513513513, "grad_norm": 1.3310300974831515, "learning_rate": 1.2359977301714692e-06, "loss": 0.3371, "step": 25389 }, { "epoch": 2.382695195195195, "grad_norm": 1.412121487322233, "learning_rate": 1.2356383717106761e-06, "loss": 0.3247, "step": 25390 }, { "epoch": 2.382789039039039, "grad_norm": 1.1928682329564475, "learning_rate": 1.2352790581328123e-06, "loss": 0.3537, "step": 25391 }, { "epoch": 2.3828828828828827, "grad_norm": 1.00391647849037, "learning_rate": 1.2349197894421583e-06, "loss": 0.335, "step": 25392 }, { "epoch": 2.3829767267267266, "grad_norm": 1.2389770978147716, "learning_rate": 1.2345605656429982e-06, "loss": 0.3038, "step": 25393 }, { "epoch": 2.3830705705705704, "grad_norm": 1.222714677955569, "learning_rate": 1.2342013867396164e-06, "loss": 0.3178, "step": 25394 }, { "epoch": 2.3831644144144146, "grad_norm": 1.4765154194559105, "learning_rate": 1.2338422527362947e-06, "loss": 0.3268, "step": 25395 }, { "epoch": 2.3832582582582584, "grad_norm": 1.1070589518135752, "learning_rate": 1.233483163637314e-06, "loss": 0.2611, "step": 25396 }, { "epoch": 2.3833521021021022, "grad_norm": 1.1233545638102567, "learning_rate": 1.2331241194469584e-06, "loss": 0.3005, "step": 25397 }, { "epoch": 2.383445945945946, "grad_norm": 1.3621579489376934, "learning_rate": 1.2327651201695084e-06, "loss": 0.3425, "step": 25398 }, { "epoch": 2.38353978978979, "grad_norm": 1.1020219132568128, "learning_rate": 1.23240616580924e-06, "loss": 0.286, "step": 25399 }, { "epoch": 2.3836336336336337, "grad_norm": 1.6716751948451332, "learning_rate": 1.2320472563704383e-06, "loss": 0.331, "step": 25400 }, { "epoch": 2.3837274774774775, "grad_norm": 1.1219055240121716, "learning_rate": 1.2316883918573809e-06, "loss": 0.3467, "step": 25401 }, { "epoch": 2.3838213213213213, "grad_norm": 1.0742895174235478, "learning_rate": 1.2313295722743441e-06, "loss": 0.2792, "step": 25402 }, { "epoch": 2.383915165165165, "grad_norm": 1.1038164829628996, "learning_rate": 1.2309707976256103e-06, "loss": 0.3244, "step": 25403 }, { "epoch": 2.384009009009009, "grad_norm": 1.1922700740568541, "learning_rate": 1.2306120679154547e-06, "loss": 0.3714, "step": 25404 }, { "epoch": 2.3841028528528527, "grad_norm": 1.1036521113627318, "learning_rate": 1.2302533831481544e-06, "loss": 0.2857, "step": 25405 }, { "epoch": 2.3841966966966965, "grad_norm": 1.2706289454601667, "learning_rate": 1.2298947433279878e-06, "loss": 0.3074, "step": 25406 }, { "epoch": 2.3842905405405403, "grad_norm": 1.152702320180425, "learning_rate": 1.2295361484592294e-06, "loss": 0.364, "step": 25407 }, { "epoch": 2.3843843843843846, "grad_norm": 1.0510033034837793, "learning_rate": 1.2291775985461546e-06, "loss": 0.2889, "step": 25408 }, { "epoch": 2.3844782282282284, "grad_norm": 1.1730352002501787, "learning_rate": 1.22881909359304e-06, "loss": 0.3251, "step": 25409 }, { "epoch": 2.3845720720720722, "grad_norm": 1.252011577821919, "learning_rate": 1.2284606336041594e-06, "loss": 0.3233, "step": 25410 }, { "epoch": 2.384665915915916, "grad_norm": 0.9909461354038056, "learning_rate": 1.2281022185837865e-06, "loss": 0.2969, "step": 25411 }, { "epoch": 2.38475975975976, "grad_norm": 1.040934290231296, "learning_rate": 1.2277438485361949e-06, "loss": 0.2942, "step": 25412 }, { "epoch": 2.3848536036036037, "grad_norm": 1.2771244907465427, "learning_rate": 1.2273855234656574e-06, "loss": 0.3748, "step": 25413 }, { "epoch": 2.3849474474474475, "grad_norm": 1.1905456412561413, "learning_rate": 1.2270272433764446e-06, "loss": 0.3146, "step": 25414 }, { "epoch": 2.3850412912912913, "grad_norm": 1.1945200814116188, "learning_rate": 1.2266690082728317e-06, "loss": 0.3453, "step": 25415 }, { "epoch": 2.385135135135135, "grad_norm": 1.1741589188945298, "learning_rate": 1.2263108181590883e-06, "loss": 0.3696, "step": 25416 }, { "epoch": 2.385228978978979, "grad_norm": 1.1470852898119084, "learning_rate": 1.2259526730394838e-06, "loss": 0.2838, "step": 25417 }, { "epoch": 2.3853228228228227, "grad_norm": 1.1589966557841809, "learning_rate": 1.225594572918291e-06, "loss": 0.2574, "step": 25418 }, { "epoch": 2.3854166666666665, "grad_norm": 0.9661040041586598, "learning_rate": 1.2252365177997782e-06, "loss": 0.2655, "step": 25419 }, { "epoch": 2.3855105105105103, "grad_norm": 0.9296157979719532, "learning_rate": 1.2248785076882136e-06, "loss": 0.3057, "step": 25420 }, { "epoch": 2.3856043543543546, "grad_norm": 1.2118418796102488, "learning_rate": 1.224520542587868e-06, "loss": 0.3026, "step": 25421 }, { "epoch": 2.385698198198198, "grad_norm": 0.9890624637835328, "learning_rate": 1.2241626225030084e-06, "loss": 0.3049, "step": 25422 }, { "epoch": 2.385792042042042, "grad_norm": 1.1065598323195596, "learning_rate": 1.2238047474379023e-06, "loss": 0.3061, "step": 25423 }, { "epoch": 2.385885885885886, "grad_norm": 0.9666686993967661, "learning_rate": 1.2234469173968166e-06, "loss": 0.3081, "step": 25424 }, { "epoch": 2.38597972972973, "grad_norm": 1.1191596011628826, "learning_rate": 1.2230891323840178e-06, "loss": 0.314, "step": 25425 }, { "epoch": 2.3860735735735736, "grad_norm": 1.3804684474921727, "learning_rate": 1.2227313924037703e-06, "loss": 0.315, "step": 25426 }, { "epoch": 2.3861674174174174, "grad_norm": 1.1991826410115383, "learning_rate": 1.2223736974603422e-06, "loss": 0.2862, "step": 25427 }, { "epoch": 2.3862612612612613, "grad_norm": 1.227098262926584, "learning_rate": 1.2220160475579973e-06, "loss": 0.3234, "step": 25428 }, { "epoch": 2.386355105105105, "grad_norm": 1.252404627832169, "learning_rate": 1.2216584427009987e-06, "loss": 0.3109, "step": 25429 }, { "epoch": 2.386448948948949, "grad_norm": 0.9585731595497565, "learning_rate": 1.2213008828936124e-06, "loss": 0.3117, "step": 25430 }, { "epoch": 2.3865427927927927, "grad_norm": 2.467277137620603, "learning_rate": 1.2209433681401001e-06, "loss": 0.3012, "step": 25431 }, { "epoch": 2.3866366366366365, "grad_norm": 1.0140861247324706, "learning_rate": 1.2205858984447233e-06, "loss": 0.3282, "step": 25432 }, { "epoch": 2.3867304804804803, "grad_norm": 0.9779658881594756, "learning_rate": 1.220228473811747e-06, "loss": 0.3038, "step": 25433 }, { "epoch": 2.3868243243243246, "grad_norm": 1.0833261112299026, "learning_rate": 1.2198710942454322e-06, "loss": 0.2807, "step": 25434 }, { "epoch": 2.386918168168168, "grad_norm": 1.1459873310216917, "learning_rate": 1.2195137597500383e-06, "loss": 0.313, "step": 25435 }, { "epoch": 2.387012012012012, "grad_norm": 1.2228551990627512, "learning_rate": 1.2191564703298276e-06, "loss": 0.3048, "step": 25436 }, { "epoch": 2.387105855855856, "grad_norm": 1.1471526209604128, "learning_rate": 1.2187992259890591e-06, "loss": 0.3017, "step": 25437 }, { "epoch": 2.3871996996997, "grad_norm": 1.3454243760262992, "learning_rate": 1.2184420267319907e-06, "loss": 0.3176, "step": 25438 }, { "epoch": 2.3872935435435436, "grad_norm": 1.338163691316769, "learning_rate": 1.2180848725628847e-06, "loss": 0.2956, "step": 25439 }, { "epoch": 2.3873873873873874, "grad_norm": 0.996682899901754, "learning_rate": 1.2177277634859979e-06, "loss": 0.3426, "step": 25440 }, { "epoch": 2.3874812312312312, "grad_norm": 1.131420604362853, "learning_rate": 1.2173706995055867e-06, "loss": 0.3227, "step": 25441 }, { "epoch": 2.387575075075075, "grad_norm": 1.422836385604242, "learning_rate": 1.2170136806259108e-06, "loss": 0.3191, "step": 25442 }, { "epoch": 2.387668918918919, "grad_norm": 1.6924390402932632, "learning_rate": 1.2166567068512264e-06, "loss": 0.3193, "step": 25443 }, { "epoch": 2.3877627627627627, "grad_norm": 1.3042777936575725, "learning_rate": 1.2162997781857884e-06, "loss": 0.2928, "step": 25444 }, { "epoch": 2.3878566066066065, "grad_norm": 1.036502471023169, "learning_rate": 1.2159428946338543e-06, "loss": 0.2882, "step": 25445 }, { "epoch": 2.3879504504504503, "grad_norm": 1.1175669856657786, "learning_rate": 1.2155860561996786e-06, "loss": 0.3055, "step": 25446 }, { "epoch": 2.388044294294294, "grad_norm": 1.123855965895874, "learning_rate": 1.2152292628875157e-06, "loss": 0.3051, "step": 25447 }, { "epoch": 2.388138138138138, "grad_norm": 1.5361532407130039, "learning_rate": 1.2148725147016199e-06, "loss": 0.2972, "step": 25448 }, { "epoch": 2.388231981981982, "grad_norm": 0.9116340879110956, "learning_rate": 1.214515811646244e-06, "loss": 0.3195, "step": 25449 }, { "epoch": 2.388325825825826, "grad_norm": 1.1412248672229235, "learning_rate": 1.214159153725641e-06, "loss": 0.2821, "step": 25450 }, { "epoch": 2.38841966966967, "grad_norm": 1.121079752541884, "learning_rate": 1.213802540944065e-06, "loss": 0.3148, "step": 25451 }, { "epoch": 2.3885135135135136, "grad_norm": 1.1641868229100576, "learning_rate": 1.213445973305767e-06, "loss": 0.3147, "step": 25452 }, { "epoch": 2.3886073573573574, "grad_norm": 1.2348152008985738, "learning_rate": 1.2130894508149966e-06, "loss": 0.3671, "step": 25453 }, { "epoch": 2.388701201201201, "grad_norm": 1.0311090537841276, "learning_rate": 1.2127329734760079e-06, "loss": 0.3245, "step": 25454 }, { "epoch": 2.388795045045045, "grad_norm": 1.909415016915712, "learning_rate": 1.2123765412930498e-06, "loss": 0.3159, "step": 25455 }, { "epoch": 2.388888888888889, "grad_norm": 1.3730860401719311, "learning_rate": 1.2120201542703709e-06, "loss": 0.3632, "step": 25456 }, { "epoch": 2.3889827327327327, "grad_norm": 1.0013392012824975, "learning_rate": 1.2116638124122226e-06, "loss": 0.3187, "step": 25457 }, { "epoch": 2.3890765765765765, "grad_norm": 1.3535451914962786, "learning_rate": 1.2113075157228526e-06, "loss": 0.3035, "step": 25458 }, { "epoch": 2.3891704204204203, "grad_norm": 1.1033109475702723, "learning_rate": 1.2109512642065091e-06, "loss": 0.3245, "step": 25459 }, { "epoch": 2.389264264264264, "grad_norm": 1.15959754856196, "learning_rate": 1.210595057867439e-06, "loss": 0.344, "step": 25460 }, { "epoch": 2.389358108108108, "grad_norm": 1.0187974888593883, "learning_rate": 1.210238896709891e-06, "loss": 0.2863, "step": 25461 }, { "epoch": 2.389451951951952, "grad_norm": 1.2368268518180647, "learning_rate": 1.2098827807381086e-06, "loss": 0.2632, "step": 25462 }, { "epoch": 2.389545795795796, "grad_norm": 1.0413174618192942, "learning_rate": 1.2095267099563413e-06, "loss": 0.2757, "step": 25463 }, { "epoch": 2.3896396396396398, "grad_norm": 1.05244750546005, "learning_rate": 1.2091706843688333e-06, "loss": 0.2772, "step": 25464 }, { "epoch": 2.3897334834834836, "grad_norm": 1.1931679119815923, "learning_rate": 1.208814703979828e-06, "loss": 0.2886, "step": 25465 }, { "epoch": 2.3898273273273274, "grad_norm": 1.1094970653488319, "learning_rate": 1.2084587687935728e-06, "loss": 0.292, "step": 25466 }, { "epoch": 2.389921171171171, "grad_norm": 1.1100306334559453, "learning_rate": 1.2081028788143095e-06, "loss": 0.365, "step": 25467 }, { "epoch": 2.390015015015015, "grad_norm": 1.0594534550493044, "learning_rate": 1.2077470340462804e-06, "loss": 0.3019, "step": 25468 }, { "epoch": 2.390108858858859, "grad_norm": 1.1951099290930713, "learning_rate": 1.2073912344937317e-06, "loss": 0.2973, "step": 25469 }, { "epoch": 2.3902027027027026, "grad_norm": 1.108786298463048, "learning_rate": 1.2070354801609037e-06, "loss": 0.2873, "step": 25470 }, { "epoch": 2.3902965465465464, "grad_norm": 1.1781736661502138, "learning_rate": 1.2066797710520373e-06, "loss": 0.3311, "step": 25471 }, { "epoch": 2.3903903903903903, "grad_norm": 0.9935354778748691, "learning_rate": 1.206324107171375e-06, "loss": 0.3199, "step": 25472 }, { "epoch": 2.390484234234234, "grad_norm": 1.1053417260068163, "learning_rate": 1.2059684885231566e-06, "loss": 0.3345, "step": 25473 }, { "epoch": 2.390578078078078, "grad_norm": 1.3132653415666065, "learning_rate": 1.2056129151116215e-06, "loss": 0.2704, "step": 25474 }, { "epoch": 2.390671921921922, "grad_norm": 1.2405267004979281, "learning_rate": 1.2052573869410112e-06, "loss": 0.3436, "step": 25475 }, { "epoch": 2.390765765765766, "grad_norm": 1.1745107862894804, "learning_rate": 1.2049019040155634e-06, "loss": 0.2744, "step": 25476 }, { "epoch": 2.3908596096096097, "grad_norm": 1.1096727670845172, "learning_rate": 1.2045464663395163e-06, "loss": 0.283, "step": 25477 }, { "epoch": 2.3909534534534536, "grad_norm": 1.0648950746491592, "learning_rate": 1.2041910739171092e-06, "loss": 0.3266, "step": 25478 }, { "epoch": 2.3910472972972974, "grad_norm": 1.1331145397686728, "learning_rate": 1.2038357267525786e-06, "loss": 0.2722, "step": 25479 }, { "epoch": 2.391141141141141, "grad_norm": 0.9831073095080822, "learning_rate": 1.2034804248501603e-06, "loss": 0.3268, "step": 25480 }, { "epoch": 2.391234984984985, "grad_norm": 1.1030824016010534, "learning_rate": 1.2031251682140927e-06, "loss": 0.3309, "step": 25481 }, { "epoch": 2.391328828828829, "grad_norm": 1.1282021121345687, "learning_rate": 1.2027699568486112e-06, "loss": 0.2986, "step": 25482 }, { "epoch": 2.3914226726726726, "grad_norm": 1.1414369253765408, "learning_rate": 1.2024147907579498e-06, "loss": 0.318, "step": 25483 }, { "epoch": 2.3915165165165164, "grad_norm": 1.052016224515142, "learning_rate": 1.2020596699463444e-06, "loss": 0.3084, "step": 25484 }, { "epoch": 2.3916103603603602, "grad_norm": 1.0821514756145845, "learning_rate": 1.2017045944180283e-06, "loss": 0.2879, "step": 25485 }, { "epoch": 2.391704204204204, "grad_norm": 1.1250099839877639, "learning_rate": 1.2013495641772339e-06, "loss": 0.3012, "step": 25486 }, { "epoch": 2.391798048048048, "grad_norm": 1.1703499910333457, "learning_rate": 1.2009945792281968e-06, "loss": 0.3216, "step": 25487 }, { "epoch": 2.391891891891892, "grad_norm": 1.2849728856623435, "learning_rate": 1.200639639575149e-06, "loss": 0.3177, "step": 25488 }, { "epoch": 2.391985735735736, "grad_norm": 1.150865089910948, "learning_rate": 1.2002847452223203e-06, "loss": 0.2819, "step": 25489 }, { "epoch": 2.3920795795795797, "grad_norm": 1.2301278161822211, "learning_rate": 1.1999298961739454e-06, "loss": 0.3275, "step": 25490 }, { "epoch": 2.3921734234234235, "grad_norm": 1.7709992500582319, "learning_rate": 1.199575092434253e-06, "loss": 0.2941, "step": 25491 }, { "epoch": 2.3922672672672673, "grad_norm": 1.1517714186548422, "learning_rate": 1.1992203340074731e-06, "loss": 0.3223, "step": 25492 }, { "epoch": 2.392361111111111, "grad_norm": 0.9406314830019556, "learning_rate": 1.1988656208978379e-06, "loss": 0.3118, "step": 25493 }, { "epoch": 2.392454954954955, "grad_norm": 1.1847480144649891, "learning_rate": 1.198510953109575e-06, "loss": 0.2797, "step": 25494 }, { "epoch": 2.392548798798799, "grad_norm": 1.2852897808104293, "learning_rate": 1.198156330646914e-06, "loss": 0.3296, "step": 25495 }, { "epoch": 2.3926426426426426, "grad_norm": 1.1627343509806536, "learning_rate": 1.1978017535140824e-06, "loss": 0.2684, "step": 25496 }, { "epoch": 2.3927364864864864, "grad_norm": 1.2012152056771357, "learning_rate": 1.1974472217153077e-06, "loss": 0.3177, "step": 25497 }, { "epoch": 2.39283033033033, "grad_norm": 1.0641647631229183, "learning_rate": 1.1970927352548156e-06, "loss": 0.3213, "step": 25498 }, { "epoch": 2.392924174174174, "grad_norm": 1.1307304361391026, "learning_rate": 1.196738294136836e-06, "loss": 0.2974, "step": 25499 }, { "epoch": 2.393018018018018, "grad_norm": 1.1290875930062139, "learning_rate": 1.196383898365594e-06, "loss": 0.3198, "step": 25500 }, { "epoch": 2.393111861861862, "grad_norm": 1.197050053179044, "learning_rate": 1.196029547945312e-06, "loss": 0.3082, "step": 25501 }, { "epoch": 2.393205705705706, "grad_norm": 1.1633574447938742, "learning_rate": 1.1956752428802193e-06, "loss": 0.3223, "step": 25502 }, { "epoch": 2.3932995495495497, "grad_norm": 1.1703621505671031, "learning_rate": 1.1953209831745388e-06, "loss": 0.3416, "step": 25503 }, { "epoch": 2.3933933933933935, "grad_norm": 1.1672951648828862, "learning_rate": 1.1949667688324923e-06, "loss": 0.3639, "step": 25504 }, { "epoch": 2.3934872372372373, "grad_norm": 1.2174595667555212, "learning_rate": 1.1946125998583057e-06, "loss": 0.325, "step": 25505 }, { "epoch": 2.393581081081081, "grad_norm": 1.2131369714656923, "learning_rate": 1.1942584762562021e-06, "loss": 0.3084, "step": 25506 }, { "epoch": 2.393674924924925, "grad_norm": 1.2281515443534055, "learning_rate": 1.1939043980304017e-06, "loss": 0.2962, "step": 25507 }, { "epoch": 2.3937687687687688, "grad_norm": 1.1880883401341746, "learning_rate": 1.193550365185127e-06, "loss": 0.3085, "step": 25508 }, { "epoch": 2.3938626126126126, "grad_norm": 1.3123768031866017, "learning_rate": 1.1931963777245998e-06, "loss": 0.301, "step": 25509 }, { "epoch": 2.3939564564564564, "grad_norm": 1.2027910607640155, "learning_rate": 1.1928424356530388e-06, "loss": 0.3182, "step": 25510 }, { "epoch": 2.3940503003003, "grad_norm": 0.9995194554359081, "learning_rate": 1.1924885389746672e-06, "loss": 0.3198, "step": 25511 }, { "epoch": 2.394144144144144, "grad_norm": 0.9471767492659873, "learning_rate": 1.1921346876937023e-06, "loss": 0.2753, "step": 25512 }, { "epoch": 2.394237987987988, "grad_norm": 1.39632458275147, "learning_rate": 1.1917808818143622e-06, "loss": 0.3404, "step": 25513 }, { "epoch": 2.394331831831832, "grad_norm": 0.9376113673805876, "learning_rate": 1.1914271213408685e-06, "loss": 0.2508, "step": 25514 }, { "epoch": 2.3944256756756754, "grad_norm": 1.0565047401104155, "learning_rate": 1.1910734062774376e-06, "loss": 0.2974, "step": 25515 }, { "epoch": 2.3945195195195197, "grad_norm": 1.2589386340328181, "learning_rate": 1.1907197366282853e-06, "loss": 0.3035, "step": 25516 }, { "epoch": 2.3946133633633635, "grad_norm": 1.0830858041286422, "learning_rate": 1.1903661123976313e-06, "loss": 0.2987, "step": 25517 }, { "epoch": 2.3947072072072073, "grad_norm": 1.486362644483739, "learning_rate": 1.19001253358969e-06, "loss": 0.324, "step": 25518 }, { "epoch": 2.394801051051051, "grad_norm": 1.2810651814207028, "learning_rate": 1.1896590002086767e-06, "loss": 0.3381, "step": 25519 }, { "epoch": 2.394894894894895, "grad_norm": 1.4142599532677096, "learning_rate": 1.1893055122588093e-06, "loss": 0.3493, "step": 25520 }, { "epoch": 2.3949887387387387, "grad_norm": 2.0517696217847208, "learning_rate": 1.1889520697443019e-06, "loss": 0.2926, "step": 25521 }, { "epoch": 2.3950825825825826, "grad_norm": 1.0180820060881208, "learning_rate": 1.1885986726693644e-06, "loss": 0.3126, "step": 25522 }, { "epoch": 2.3951764264264264, "grad_norm": 1.178445497862718, "learning_rate": 1.1882453210382155e-06, "loss": 0.2707, "step": 25523 }, { "epoch": 2.39527027027027, "grad_norm": 1.154855207778593, "learning_rate": 1.187892014855066e-06, "loss": 0.3065, "step": 25524 }, { "epoch": 2.395364114114114, "grad_norm": 1.1238806804719834, "learning_rate": 1.1875387541241268e-06, "loss": 0.3331, "step": 25525 }, { "epoch": 2.395457957957958, "grad_norm": 1.2115254311192243, "learning_rate": 1.187185538849614e-06, "loss": 0.3336, "step": 25526 }, { "epoch": 2.395551801801802, "grad_norm": 1.0120795008948127, "learning_rate": 1.186832369035736e-06, "loss": 0.3038, "step": 25527 }, { "epoch": 2.3956456456456454, "grad_norm": 0.9593898601284909, "learning_rate": 1.1864792446867035e-06, "loss": 0.3009, "step": 25528 }, { "epoch": 2.3957394894894897, "grad_norm": 1.2568285866872189, "learning_rate": 1.1861261658067291e-06, "loss": 0.2867, "step": 25529 }, { "epoch": 2.3958333333333335, "grad_norm": 1.2948550218386834, "learning_rate": 1.185773132400021e-06, "loss": 0.3241, "step": 25530 }, { "epoch": 2.3959271771771773, "grad_norm": 1.1802583835601275, "learning_rate": 1.1854201444707874e-06, "loss": 0.3335, "step": 25531 }, { "epoch": 2.396021021021021, "grad_norm": 1.0073352642012963, "learning_rate": 1.1850672020232402e-06, "loss": 0.3, "step": 25532 }, { "epoch": 2.396114864864865, "grad_norm": 1.2183793078561433, "learning_rate": 1.1847143050615856e-06, "loss": 0.3033, "step": 25533 }, { "epoch": 2.3962087087087087, "grad_norm": 1.0874652152860098, "learning_rate": 1.1843614535900312e-06, "loss": 0.3287, "step": 25534 }, { "epoch": 2.3963025525525525, "grad_norm": 2.0913071364117837, "learning_rate": 1.1840086476127844e-06, "loss": 0.2935, "step": 25535 }, { "epoch": 2.3963963963963963, "grad_norm": 0.9919947675194621, "learning_rate": 1.1836558871340514e-06, "loss": 0.2845, "step": 25536 }, { "epoch": 2.39649024024024, "grad_norm": 2.6626277653538732, "learning_rate": 1.1833031721580373e-06, "loss": 0.301, "step": 25537 }, { "epoch": 2.396584084084084, "grad_norm": 1.0872614215920862, "learning_rate": 1.1829505026889499e-06, "loss": 0.2817, "step": 25538 }, { "epoch": 2.3966779279279278, "grad_norm": 1.3673940235690052, "learning_rate": 1.1825978787309933e-06, "loss": 0.3405, "step": 25539 }, { "epoch": 2.3967717717717716, "grad_norm": 1.1461269451227807, "learning_rate": 1.1822453002883693e-06, "loss": 0.3399, "step": 25540 }, { "epoch": 2.3968656156156154, "grad_norm": 1.2252954673856722, "learning_rate": 1.1818927673652857e-06, "loss": 0.2988, "step": 25541 }, { "epoch": 2.3969594594594597, "grad_norm": 1.1041270545511648, "learning_rate": 1.1815402799659442e-06, "loss": 0.3127, "step": 25542 }, { "epoch": 2.3970533033033035, "grad_norm": 1.1088706712320486, "learning_rate": 1.1811878380945458e-06, "loss": 0.3291, "step": 25543 }, { "epoch": 2.3971471471471473, "grad_norm": 1.1403126805475696, "learning_rate": 1.1808354417552958e-06, "loss": 0.2935, "step": 25544 }, { "epoch": 2.397240990990991, "grad_norm": 0.9659552140328049, "learning_rate": 1.180483090952394e-06, "loss": 0.3274, "step": 25545 }, { "epoch": 2.397334834834835, "grad_norm": 1.3655503368983102, "learning_rate": 1.1801307856900423e-06, "loss": 0.3064, "step": 25546 }, { "epoch": 2.3974286786786787, "grad_norm": 1.0924082625223843, "learning_rate": 1.1797785259724405e-06, "loss": 0.3152, "step": 25547 }, { "epoch": 2.3975225225225225, "grad_norm": 1.0652464495554066, "learning_rate": 1.179426311803789e-06, "loss": 0.3004, "step": 25548 }, { "epoch": 2.3976163663663663, "grad_norm": 0.9976232665409983, "learning_rate": 1.1790741431882857e-06, "loss": 0.2957, "step": 25549 }, { "epoch": 2.39771021021021, "grad_norm": 1.053126696616382, "learning_rate": 1.1787220201301325e-06, "loss": 0.3168, "step": 25550 }, { "epoch": 2.397804054054054, "grad_norm": 1.6001747259611836, "learning_rate": 1.1783699426335265e-06, "loss": 0.3294, "step": 25551 }, { "epoch": 2.3978978978978978, "grad_norm": 1.1820785564890517, "learning_rate": 1.178017910702664e-06, "loss": 0.3375, "step": 25552 }, { "epoch": 2.3979917417417416, "grad_norm": 1.0028593468628682, "learning_rate": 1.1776659243417454e-06, "loss": 0.3589, "step": 25553 }, { "epoch": 2.3980855855855854, "grad_norm": 1.0682446464297088, "learning_rate": 1.1773139835549658e-06, "loss": 0.3275, "step": 25554 }, { "epoch": 2.3981794294294296, "grad_norm": 0.8794450195154454, "learning_rate": 1.17696208834652e-06, "loss": 0.3178, "step": 25555 }, { "epoch": 2.3982732732732734, "grad_norm": 7.494432199483368, "learning_rate": 1.1766102387206063e-06, "loss": 0.2637, "step": 25556 }, { "epoch": 2.3983671171171173, "grad_norm": 2.4640934920635793, "learning_rate": 1.1762584346814192e-06, "loss": 0.3405, "step": 25557 }, { "epoch": 2.398460960960961, "grad_norm": 1.1094974186837194, "learning_rate": 1.1759066762331523e-06, "loss": 0.2911, "step": 25558 }, { "epoch": 2.398554804804805, "grad_norm": 1.1488298814511997, "learning_rate": 1.1755549633800006e-06, "loss": 0.343, "step": 25559 }, { "epoch": 2.3986486486486487, "grad_norm": 1.2898843148867554, "learning_rate": 1.1752032961261572e-06, "loss": 0.3277, "step": 25560 }, { "epoch": 2.3987424924924925, "grad_norm": 1.087450975754689, "learning_rate": 1.1748516744758131e-06, "loss": 0.3297, "step": 25561 }, { "epoch": 2.3988363363363363, "grad_norm": 2.3109624991381343, "learning_rate": 1.1745000984331645e-06, "loss": 0.2952, "step": 25562 }, { "epoch": 2.39893018018018, "grad_norm": 1.0156241356693823, "learning_rate": 1.1741485680024017e-06, "loss": 0.2757, "step": 25563 }, { "epoch": 2.399024024024024, "grad_norm": 1.1531410305107288, "learning_rate": 1.1737970831877137e-06, "loss": 0.3477, "step": 25564 }, { "epoch": 2.3991178678678677, "grad_norm": 1.187319899126375, "learning_rate": 1.1734456439932956e-06, "loss": 0.3157, "step": 25565 }, { "epoch": 2.3992117117117115, "grad_norm": 1.2512635653842772, "learning_rate": 1.173094250423335e-06, "loss": 0.3319, "step": 25566 }, { "epoch": 2.3993055555555554, "grad_norm": 1.255073185805941, "learning_rate": 1.1727429024820202e-06, "loss": 0.3043, "step": 25567 }, { "epoch": 2.3993993993993996, "grad_norm": 1.7786725615771042, "learning_rate": 1.1723916001735448e-06, "loss": 0.2874, "step": 25568 }, { "epoch": 2.3994932432432434, "grad_norm": 1.7346817184044545, "learning_rate": 1.172040343502094e-06, "loss": 0.2936, "step": 25569 }, { "epoch": 2.3995870870870872, "grad_norm": 1.1592739517344879, "learning_rate": 1.1716891324718565e-06, "loss": 0.327, "step": 25570 }, { "epoch": 2.399680930930931, "grad_norm": 1.1133721218342427, "learning_rate": 1.1713379670870207e-06, "loss": 0.2875, "step": 25571 }, { "epoch": 2.399774774774775, "grad_norm": 1.1209483272404264, "learning_rate": 1.1709868473517727e-06, "loss": 0.3025, "step": 25572 }, { "epoch": 2.3998686186186187, "grad_norm": 0.9488882340254238, "learning_rate": 1.1706357732702977e-06, "loss": 0.2988, "step": 25573 }, { "epoch": 2.3999624624624625, "grad_norm": 1.1286233768248135, "learning_rate": 1.170284744846784e-06, "loss": 0.3316, "step": 25574 }, { "epoch": 2.4000563063063063, "grad_norm": 1.8355948334392573, "learning_rate": 1.1699337620854168e-06, "loss": 0.267, "step": 25575 }, { "epoch": 2.40015015015015, "grad_norm": 1.0716295914520981, "learning_rate": 1.1695828249903784e-06, "loss": 0.3217, "step": 25576 }, { "epoch": 2.400243993993994, "grad_norm": 1.174228612151725, "learning_rate": 1.1692319335658564e-06, "loss": 0.3237, "step": 25577 }, { "epoch": 2.4003378378378377, "grad_norm": 1.012543160250421, "learning_rate": 1.1688810878160329e-06, "loss": 0.3104, "step": 25578 }, { "epoch": 2.4004316816816815, "grad_norm": 1.107835735806633, "learning_rate": 1.1685302877450893e-06, "loss": 0.2888, "step": 25579 }, { "epoch": 2.4005255255255253, "grad_norm": 0.9507544006735714, "learning_rate": 1.1681795333572117e-06, "loss": 0.2621, "step": 25580 }, { "epoch": 2.4006193693693696, "grad_norm": 1.308650161465193, "learning_rate": 1.1678288246565806e-06, "loss": 0.333, "step": 25581 }, { "epoch": 2.4007132132132134, "grad_norm": 1.089424539498261, "learning_rate": 1.1674781616473768e-06, "loss": 0.2885, "step": 25582 }, { "epoch": 2.400807057057057, "grad_norm": 1.1032703010900091, "learning_rate": 1.1671275443337826e-06, "loss": 0.2883, "step": 25583 }, { "epoch": 2.400900900900901, "grad_norm": 3.1144803220671036, "learning_rate": 1.1667769727199773e-06, "loss": 0.3059, "step": 25584 }, { "epoch": 2.400994744744745, "grad_norm": 1.1024171863426278, "learning_rate": 1.1664264468101404e-06, "loss": 0.3048, "step": 25585 }, { "epoch": 2.4010885885885886, "grad_norm": 1.0723078456729764, "learning_rate": 1.1660759666084536e-06, "loss": 0.3011, "step": 25586 }, { "epoch": 2.4011824324324325, "grad_norm": 1.419670815495807, "learning_rate": 1.165725532119094e-06, "loss": 0.3234, "step": 25587 }, { "epoch": 2.4012762762762763, "grad_norm": 1.115783492013357, "learning_rate": 1.165375143346239e-06, "loss": 0.3378, "step": 25588 }, { "epoch": 2.40137012012012, "grad_norm": 1.039029415877996, "learning_rate": 1.1650248002940685e-06, "loss": 0.2998, "step": 25589 }, { "epoch": 2.401463963963964, "grad_norm": 1.0909160501007593, "learning_rate": 1.1646745029667588e-06, "loss": 0.2845, "step": 25590 }, { "epoch": 2.4015578078078077, "grad_norm": 0.9501987576128234, "learning_rate": 1.164324251368485e-06, "loss": 0.3088, "step": 25591 }, { "epoch": 2.4016516516516515, "grad_norm": 1.3348055001329855, "learning_rate": 1.163974045503426e-06, "loss": 0.318, "step": 25592 }, { "epoch": 2.4017454954954953, "grad_norm": 1.1308535283447922, "learning_rate": 1.1636238853757558e-06, "loss": 0.3157, "step": 25593 }, { "epoch": 2.4018393393393396, "grad_norm": 1.127475629856495, "learning_rate": 1.1632737709896497e-06, "loss": 0.3508, "step": 25594 }, { "epoch": 2.401933183183183, "grad_norm": 0.9073294776623233, "learning_rate": 1.1629237023492822e-06, "loss": 0.2987, "step": 25595 }, { "epoch": 2.402027027027027, "grad_norm": 1.2493086290398705, "learning_rate": 1.1625736794588265e-06, "loss": 0.3001, "step": 25596 }, { "epoch": 2.402120870870871, "grad_norm": 1.0937282635876708, "learning_rate": 1.1622237023224554e-06, "loss": 0.2945, "step": 25597 }, { "epoch": 2.402214714714715, "grad_norm": 1.1625476182274812, "learning_rate": 1.1618737709443445e-06, "loss": 0.3375, "step": 25598 }, { "epoch": 2.4023085585585586, "grad_norm": 1.2612550065168233, "learning_rate": 1.1615238853286638e-06, "loss": 0.3318, "step": 25599 }, { "epoch": 2.4024024024024024, "grad_norm": 2.201356439495697, "learning_rate": 1.1611740454795845e-06, "loss": 0.2936, "step": 25600 }, { "epoch": 2.4024962462462462, "grad_norm": 2.7848898555170094, "learning_rate": 1.1608242514012797e-06, "loss": 0.2828, "step": 25601 }, { "epoch": 2.40259009009009, "grad_norm": 1.289554518992591, "learning_rate": 1.1604745030979199e-06, "loss": 0.2944, "step": 25602 }, { "epoch": 2.402683933933934, "grad_norm": 1.1148790765178214, "learning_rate": 1.1601248005736726e-06, "loss": 0.3164, "step": 25603 }, { "epoch": 2.4027777777777777, "grad_norm": 1.2023494671933759, "learning_rate": 1.159775143832711e-06, "loss": 0.2989, "step": 25604 }, { "epoch": 2.4028716216216215, "grad_norm": 1.0867286634025546, "learning_rate": 1.159425532879202e-06, "loss": 0.3239, "step": 25605 }, { "epoch": 2.4029654654654653, "grad_norm": 1.1249687586656492, "learning_rate": 1.1590759677173147e-06, "loss": 0.3037, "step": 25606 }, { "epoch": 2.4030593093093096, "grad_norm": 1.1648827319026398, "learning_rate": 1.158726448351216e-06, "loss": 0.3051, "step": 25607 }, { "epoch": 2.403153153153153, "grad_norm": 1.2331236087115138, "learning_rate": 1.158376974785075e-06, "loss": 0.2991, "step": 25608 }, { "epoch": 2.403246996996997, "grad_norm": 1.149155116339462, "learning_rate": 1.158027547023055e-06, "loss": 0.3122, "step": 25609 }, { "epoch": 2.403340840840841, "grad_norm": 1.2967401533348277, "learning_rate": 1.157678165069327e-06, "loss": 0.2935, "step": 25610 }, { "epoch": 2.403434684684685, "grad_norm": 1.3244179649650973, "learning_rate": 1.1573288289280543e-06, "loss": 0.3238, "step": 25611 }, { "epoch": 2.4035285285285286, "grad_norm": 1.068481026971421, "learning_rate": 1.1569795386034004e-06, "loss": 0.3253, "step": 25612 }, { "epoch": 2.4036223723723724, "grad_norm": 2.1807144085456485, "learning_rate": 1.1566302940995332e-06, "loss": 0.3491, "step": 25613 }, { "epoch": 2.4037162162162162, "grad_norm": 1.0579459143979169, "learning_rate": 1.1562810954206156e-06, "loss": 0.3256, "step": 25614 }, { "epoch": 2.40381006006006, "grad_norm": 1.09638113124694, "learning_rate": 1.1559319425708094e-06, "loss": 0.3144, "step": 25615 }, { "epoch": 2.403903903903904, "grad_norm": 1.1603925013246674, "learning_rate": 1.1555828355542809e-06, "loss": 0.3712, "step": 25616 }, { "epoch": 2.4039977477477477, "grad_norm": 1.1534637739903548, "learning_rate": 1.1552337743751902e-06, "loss": 0.3254, "step": 25617 }, { "epoch": 2.4040915915915915, "grad_norm": 1.2181837602880607, "learning_rate": 1.1548847590377e-06, "loss": 0.3125, "step": 25618 }, { "epoch": 2.4041854354354353, "grad_norm": 1.1540286003170794, "learning_rate": 1.1545357895459708e-06, "loss": 0.2887, "step": 25619 }, { "epoch": 2.404279279279279, "grad_norm": 1.1681148406743271, "learning_rate": 1.1541868659041645e-06, "loss": 0.3025, "step": 25620 }, { "epoch": 2.404373123123123, "grad_norm": 1.4420053034707143, "learning_rate": 1.153837988116439e-06, "loss": 0.3039, "step": 25621 }, { "epoch": 2.404466966966967, "grad_norm": 1.2297675653170697, "learning_rate": 1.1534891561869576e-06, "loss": 0.3416, "step": 25622 }, { "epoch": 2.404560810810811, "grad_norm": 1.0813545419737605, "learning_rate": 1.153140370119878e-06, "loss": 0.2668, "step": 25623 }, { "epoch": 2.4046546546546548, "grad_norm": 1.3297423343914316, "learning_rate": 1.1527916299193565e-06, "loss": 0.3275, "step": 25624 }, { "epoch": 2.4047484984984986, "grad_norm": 1.036431392331172, "learning_rate": 1.152442935589555e-06, "loss": 0.32, "step": 25625 }, { "epoch": 2.4048423423423424, "grad_norm": 1.0694096634864427, "learning_rate": 1.1520942871346292e-06, "loss": 0.283, "step": 25626 }, { "epoch": 2.404936186186186, "grad_norm": 1.2107264306795844, "learning_rate": 1.1517456845587343e-06, "loss": 0.311, "step": 25627 }, { "epoch": 2.40503003003003, "grad_norm": 1.079860070929734, "learning_rate": 1.1513971278660302e-06, "loss": 0.2787, "step": 25628 }, { "epoch": 2.405123873873874, "grad_norm": 1.3767210747609695, "learning_rate": 1.151048617060671e-06, "loss": 0.3108, "step": 25629 }, { "epoch": 2.4052177177177176, "grad_norm": 1.1179008896912819, "learning_rate": 1.150700152146812e-06, "loss": 0.3255, "step": 25630 }, { "epoch": 2.4053115615615615, "grad_norm": 1.0257416744705738, "learning_rate": 1.1503517331286086e-06, "loss": 0.3223, "step": 25631 }, { "epoch": 2.4054054054054053, "grad_norm": 1.0358902833486827, "learning_rate": 1.1500033600102139e-06, "loss": 0.3453, "step": 25632 }, { "epoch": 2.405499249249249, "grad_norm": 1.1917646524530512, "learning_rate": 1.1496550327957812e-06, "loss": 0.3077, "step": 25633 }, { "epoch": 2.405593093093093, "grad_norm": 1.0502162232188466, "learning_rate": 1.1493067514894662e-06, "loss": 0.3236, "step": 25634 }, { "epoch": 2.405686936936937, "grad_norm": 1.5329087298789306, "learning_rate": 1.1489585160954197e-06, "loss": 0.2575, "step": 25635 }, { "epoch": 2.405780780780781, "grad_norm": 1.094403953582594, "learning_rate": 1.1486103266177933e-06, "loss": 0.3123, "step": 25636 }, { "epoch": 2.4058746246246248, "grad_norm": 1.0581435318829229, "learning_rate": 1.14826218306074e-06, "loss": 0.2822, "step": 25637 }, { "epoch": 2.4059684684684686, "grad_norm": 1.0668205714802783, "learning_rate": 1.1479140854284104e-06, "loss": 0.3289, "step": 25638 }, { "epoch": 2.4060623123123124, "grad_norm": 1.1215854880080156, "learning_rate": 1.1475660337249528e-06, "loss": 0.3325, "step": 25639 }, { "epoch": 2.406156156156156, "grad_norm": 1.2853314145179044, "learning_rate": 1.1472180279545202e-06, "loss": 0.2558, "step": 25640 }, { "epoch": 2.40625, "grad_norm": 1.1347444723761584, "learning_rate": 1.1468700681212608e-06, "loss": 0.3506, "step": 25641 }, { "epoch": 2.406343843843844, "grad_norm": 1.2742042675791487, "learning_rate": 1.146522154229322e-06, "loss": 0.3111, "step": 25642 }, { "epoch": 2.4064376876876876, "grad_norm": 1.3021835420221293, "learning_rate": 1.146174286282855e-06, "loss": 0.3148, "step": 25643 }, { "epoch": 2.4065315315315314, "grad_norm": 1.1426106627258423, "learning_rate": 1.1458264642860045e-06, "loss": 0.3539, "step": 25644 }, { "epoch": 2.4066253753753752, "grad_norm": 1.4534288067488867, "learning_rate": 1.1454786882429175e-06, "loss": 0.3656, "step": 25645 }, { "epoch": 2.406719219219219, "grad_norm": 1.2696385496101354, "learning_rate": 1.145130958157743e-06, "loss": 0.2878, "step": 25646 }, { "epoch": 2.406813063063063, "grad_norm": 1.1590519890231303, "learning_rate": 1.144783274034626e-06, "loss": 0.3382, "step": 25647 }, { "epoch": 2.406906906906907, "grad_norm": 1.6343572557045394, "learning_rate": 1.1444356358777104e-06, "loss": 0.3039, "step": 25648 }, { "epoch": 2.407000750750751, "grad_norm": 1.0397698114326885, "learning_rate": 1.1440880436911434e-06, "loss": 0.3353, "step": 25649 }, { "epoch": 2.4070945945945947, "grad_norm": 0.9858414152078785, "learning_rate": 1.143740497479069e-06, "loss": 0.3497, "step": 25650 }, { "epoch": 2.4071884384384385, "grad_norm": 1.2151557410076803, "learning_rate": 1.1433929972456286e-06, "loss": 0.2873, "step": 25651 }, { "epoch": 2.4072822822822824, "grad_norm": 1.040381358068519, "learning_rate": 1.1430455429949689e-06, "loss": 0.3291, "step": 25652 }, { "epoch": 2.407376126126126, "grad_norm": 1.2117114653179955, "learning_rate": 1.142698134731231e-06, "loss": 0.2744, "step": 25653 }, { "epoch": 2.40746996996997, "grad_norm": 1.054377631640731, "learning_rate": 1.1423507724585563e-06, "loss": 0.3178, "step": 25654 }, { "epoch": 2.407563813813814, "grad_norm": 0.9809095527376962, "learning_rate": 1.1420034561810883e-06, "loss": 0.2874, "step": 25655 }, { "epoch": 2.4076576576576576, "grad_norm": 0.9608428694821237, "learning_rate": 1.1416561859029674e-06, "loss": 0.3431, "step": 25656 }, { "epoch": 2.4077515015015014, "grad_norm": 1.1117037190530625, "learning_rate": 1.1413089616283341e-06, "loss": 0.3533, "step": 25657 }, { "epoch": 2.4078453453453452, "grad_norm": 1.335272123809622, "learning_rate": 1.1409617833613279e-06, "loss": 0.2985, "step": 25658 }, { "epoch": 2.407939189189189, "grad_norm": 1.1375587330661643, "learning_rate": 1.1406146511060884e-06, "loss": 0.3003, "step": 25659 }, { "epoch": 2.408033033033033, "grad_norm": 0.9638708707816015, "learning_rate": 1.1402675648667533e-06, "loss": 0.3198, "step": 25660 }, { "epoch": 2.408126876876877, "grad_norm": 1.0540550044648824, "learning_rate": 1.1399205246474644e-06, "loss": 0.3164, "step": 25661 }, { "epoch": 2.408220720720721, "grad_norm": 1.40291762501674, "learning_rate": 1.1395735304523564e-06, "loss": 0.3577, "step": 25662 }, { "epoch": 2.4083145645645647, "grad_norm": 1.2052144981774986, "learning_rate": 1.139226582285567e-06, "loss": 0.2943, "step": 25663 }, { "epoch": 2.4084084084084085, "grad_norm": 1.1445067113868426, "learning_rate": 1.1388796801512341e-06, "loss": 0.3328, "step": 25664 }, { "epoch": 2.4085022522522523, "grad_norm": 1.0768067374774621, "learning_rate": 1.1385328240534938e-06, "loss": 0.3138, "step": 25665 }, { "epoch": 2.408596096096096, "grad_norm": 1.160627591214106, "learning_rate": 1.138186013996479e-06, "loss": 0.3199, "step": 25666 }, { "epoch": 2.40868993993994, "grad_norm": 1.147880570473127, "learning_rate": 1.1378392499843288e-06, "loss": 0.3134, "step": 25667 }, { "epoch": 2.4087837837837838, "grad_norm": 1.1392285354102778, "learning_rate": 1.1374925320211755e-06, "loss": 0.2982, "step": 25668 }, { "epoch": 2.4088776276276276, "grad_norm": 1.008642544791957, "learning_rate": 1.1371458601111534e-06, "loss": 0.3281, "step": 25669 }, { "epoch": 2.4089714714714714, "grad_norm": 1.1841382583053688, "learning_rate": 1.1367992342583961e-06, "loss": 0.3362, "step": 25670 }, { "epoch": 2.409065315315315, "grad_norm": 1.2081078410879535, "learning_rate": 1.1364526544670362e-06, "loss": 0.3156, "step": 25671 }, { "epoch": 2.409159159159159, "grad_norm": 1.2804686470135211, "learning_rate": 1.1361061207412044e-06, "loss": 0.3139, "step": 25672 }, { "epoch": 2.409253003003003, "grad_norm": 1.2181802619229005, "learning_rate": 1.1357596330850357e-06, "loss": 0.3144, "step": 25673 }, { "epoch": 2.409346846846847, "grad_norm": 1.0908637533641965, "learning_rate": 1.1354131915026595e-06, "loss": 0.337, "step": 25674 }, { "epoch": 2.4094406906906904, "grad_norm": 1.0541270827407594, "learning_rate": 1.1350667959982053e-06, "loss": 0.3087, "step": 25675 }, { "epoch": 2.4095345345345347, "grad_norm": 1.0132716168360476, "learning_rate": 1.1347204465758055e-06, "loss": 0.2999, "step": 25676 }, { "epoch": 2.4096283783783785, "grad_norm": 1.050072365456503, "learning_rate": 1.1343741432395894e-06, "loss": 0.2935, "step": 25677 }, { "epoch": 2.4097222222222223, "grad_norm": 1.1222279394680306, "learning_rate": 1.1340278859936837e-06, "loss": 0.3044, "step": 25678 }, { "epoch": 2.409816066066066, "grad_norm": 0.9924146411157062, "learning_rate": 1.1336816748422202e-06, "loss": 0.3449, "step": 25679 }, { "epoch": 2.40990990990991, "grad_norm": 1.0726389312382074, "learning_rate": 1.1333355097893246e-06, "loss": 0.2995, "step": 25680 }, { "epoch": 2.4100037537537538, "grad_norm": 5.11865230959768, "learning_rate": 1.1329893908391253e-06, "loss": 0.3083, "step": 25681 }, { "epoch": 2.4100975975975976, "grad_norm": 1.598332220480977, "learning_rate": 1.132643317995748e-06, "loss": 0.3612, "step": 25682 }, { "epoch": 2.4101914414414414, "grad_norm": 1.080909208832341, "learning_rate": 1.13229729126332e-06, "loss": 0.3134, "step": 25683 }, { "epoch": 2.410285285285285, "grad_norm": 1.1277639290508576, "learning_rate": 1.1319513106459657e-06, "loss": 0.3152, "step": 25684 }, { "epoch": 2.410379129129129, "grad_norm": 1.3379894653509545, "learning_rate": 1.1316053761478118e-06, "loss": 0.3114, "step": 25685 }, { "epoch": 2.410472972972973, "grad_norm": 1.0607508113947106, "learning_rate": 1.1312594877729828e-06, "loss": 0.3068, "step": 25686 }, { "epoch": 2.410566816816817, "grad_norm": 2.013086430086557, "learning_rate": 1.1309136455256003e-06, "loss": 0.3324, "step": 25687 }, { "epoch": 2.4106606606606604, "grad_norm": 1.1157368378437278, "learning_rate": 1.1305678494097915e-06, "loss": 0.2893, "step": 25688 }, { "epoch": 2.4107545045045047, "grad_norm": 1.755496753042084, "learning_rate": 1.1302220994296776e-06, "loss": 0.3588, "step": 25689 }, { "epoch": 2.4108483483483485, "grad_norm": 1.150929725267548, "learning_rate": 1.1298763955893793e-06, "loss": 0.2912, "step": 25690 }, { "epoch": 2.4109421921921923, "grad_norm": 1.22608298931299, "learning_rate": 1.1295307378930221e-06, "loss": 0.2933, "step": 25691 }, { "epoch": 2.411036036036036, "grad_norm": 1.0762609203974431, "learning_rate": 1.1291851263447256e-06, "loss": 0.338, "step": 25692 }, { "epoch": 2.41112987987988, "grad_norm": 1.0934615232343081, "learning_rate": 1.1288395609486096e-06, "loss": 0.3292, "step": 25693 }, { "epoch": 2.4112237237237237, "grad_norm": 1.0996562375701284, "learning_rate": 1.1284940417087959e-06, "loss": 0.3785, "step": 25694 }, { "epoch": 2.4113175675675675, "grad_norm": 1.385646410334298, "learning_rate": 1.1281485686294031e-06, "loss": 0.3577, "step": 25695 }, { "epoch": 2.4114114114114114, "grad_norm": 1.4052641104619108, "learning_rate": 1.1278031417145497e-06, "loss": 0.321, "step": 25696 }, { "epoch": 2.411505255255255, "grad_norm": 1.013049094218584, "learning_rate": 1.1274577609683557e-06, "loss": 0.3148, "step": 25697 }, { "epoch": 2.411599099099099, "grad_norm": 1.2281772850200212, "learning_rate": 1.127112426394939e-06, "loss": 0.2969, "step": 25698 }, { "epoch": 2.411692942942943, "grad_norm": 1.2324031424545592, "learning_rate": 1.1267671379984152e-06, "loss": 0.3115, "step": 25699 }, { "epoch": 2.4117867867867866, "grad_norm": 2.9123478785586423, "learning_rate": 1.1264218957829043e-06, "loss": 0.3268, "step": 25700 }, { "epoch": 2.4118806306306304, "grad_norm": 1.1633392804812086, "learning_rate": 1.1260766997525208e-06, "loss": 0.3164, "step": 25701 }, { "epoch": 2.4119744744744747, "grad_norm": 1.2161644498171014, "learning_rate": 1.1257315499113796e-06, "loss": 0.314, "step": 25702 }, { "epoch": 2.4120683183183185, "grad_norm": 1.0940452590925536, "learning_rate": 1.1253864462635984e-06, "loss": 0.281, "step": 25703 }, { "epoch": 2.4121621621621623, "grad_norm": 1.1217104829099966, "learning_rate": 1.1250413888132906e-06, "loss": 0.3251, "step": 25704 }, { "epoch": 2.412256006006006, "grad_norm": 1.1916392488937635, "learning_rate": 1.1246963775645698e-06, "loss": 0.2967, "step": 25705 }, { "epoch": 2.41234984984985, "grad_norm": 1.1788410971507752, "learning_rate": 1.124351412521551e-06, "loss": 0.3529, "step": 25706 }, { "epoch": 2.4124436936936937, "grad_norm": 0.9411379709534071, "learning_rate": 1.124006493688346e-06, "loss": 0.3276, "step": 25707 }, { "epoch": 2.4125375375375375, "grad_norm": 1.348370465289583, "learning_rate": 1.1236616210690666e-06, "loss": 0.2761, "step": 25708 }, { "epoch": 2.4126313813813813, "grad_norm": 1.3134654206003118, "learning_rate": 1.1233167946678269e-06, "loss": 0.3283, "step": 25709 }, { "epoch": 2.412725225225225, "grad_norm": 1.1167393247283393, "learning_rate": 1.122972014488738e-06, "loss": 0.2846, "step": 25710 }, { "epoch": 2.412819069069069, "grad_norm": 1.0228771457497696, "learning_rate": 1.1226272805359083e-06, "loss": 0.3319, "step": 25711 }, { "epoch": 2.4129129129129128, "grad_norm": 1.11595707978295, "learning_rate": 1.1222825928134513e-06, "loss": 0.3049, "step": 25712 }, { "epoch": 2.4130067567567566, "grad_norm": 1.1088385237530696, "learning_rate": 1.1219379513254753e-06, "loss": 0.2926, "step": 25713 }, { "epoch": 2.4131006006006004, "grad_norm": 1.2574671695038517, "learning_rate": 1.1215933560760877e-06, "loss": 0.2861, "step": 25714 }, { "epoch": 2.4131944444444446, "grad_norm": 1.3032646441022917, "learning_rate": 1.121248807069401e-06, "loss": 0.2855, "step": 25715 }, { "epoch": 2.4132882882882885, "grad_norm": 1.035653493549417, "learning_rate": 1.1209043043095214e-06, "loss": 0.3345, "step": 25716 }, { "epoch": 2.4133821321321323, "grad_norm": 1.3061605454461105, "learning_rate": 1.1205598478005559e-06, "loss": 0.3071, "step": 25717 }, { "epoch": 2.413475975975976, "grad_norm": 2.151880283575593, "learning_rate": 1.1202154375466118e-06, "loss": 0.3287, "step": 25718 }, { "epoch": 2.41356981981982, "grad_norm": 1.1480175883428478, "learning_rate": 1.1198710735517959e-06, "loss": 0.3157, "step": 25719 }, { "epoch": 2.4136636636636637, "grad_norm": 1.368317137549603, "learning_rate": 1.1195267558202127e-06, "loss": 0.2767, "step": 25720 }, { "epoch": 2.4137575075075075, "grad_norm": 1.149199314458692, "learning_rate": 1.1191824843559696e-06, "loss": 0.31, "step": 25721 }, { "epoch": 2.4138513513513513, "grad_norm": 1.1150274474099924, "learning_rate": 1.1188382591631707e-06, "loss": 0.3042, "step": 25722 }, { "epoch": 2.413945195195195, "grad_norm": 1.0782855989340394, "learning_rate": 1.1184940802459187e-06, "loss": 0.3281, "step": 25723 }, { "epoch": 2.414039039039039, "grad_norm": 1.0138788647250614, "learning_rate": 1.1181499476083202e-06, "loss": 0.2993, "step": 25724 }, { "epoch": 2.4141328828828827, "grad_norm": 1.3450944451545932, "learning_rate": 1.1178058612544761e-06, "loss": 0.3096, "step": 25725 }, { "epoch": 2.4142267267267266, "grad_norm": 1.0891478278034314, "learning_rate": 1.1174618211884892e-06, "loss": 0.2766, "step": 25726 }, { "epoch": 2.4143205705705704, "grad_norm": 1.1701807535373139, "learning_rate": 1.1171178274144628e-06, "loss": 0.3155, "step": 25727 }, { "epoch": 2.4144144144144146, "grad_norm": 1.1119999274450136, "learning_rate": 1.1167738799364975e-06, "loss": 0.3317, "step": 25728 }, { "epoch": 2.4145082582582584, "grad_norm": 1.0603913200841617, "learning_rate": 1.1164299787586941e-06, "loss": 0.2913, "step": 25729 }, { "epoch": 2.4146021021021022, "grad_norm": 1.1557078545791553, "learning_rate": 1.1160861238851534e-06, "loss": 0.3194, "step": 25730 }, { "epoch": 2.414695945945946, "grad_norm": 1.1172080938550717, "learning_rate": 1.1157423153199748e-06, "loss": 0.3545, "step": 25731 }, { "epoch": 2.41478978978979, "grad_norm": 1.2493294398150343, "learning_rate": 1.1153985530672562e-06, "loss": 0.2828, "step": 25732 }, { "epoch": 2.4148836336336337, "grad_norm": 1.0820199741839247, "learning_rate": 1.1150548371310998e-06, "loss": 0.3067, "step": 25733 }, { "epoch": 2.4149774774774775, "grad_norm": 1.0679940505063565, "learning_rate": 1.1147111675156013e-06, "loss": 0.2699, "step": 25734 }, { "epoch": 2.4150713213213213, "grad_norm": 1.3144939529621236, "learning_rate": 1.1143675442248575e-06, "loss": 0.3313, "step": 25735 }, { "epoch": 2.415165165165165, "grad_norm": 1.2026538319111546, "learning_rate": 1.1140239672629682e-06, "loss": 0.346, "step": 25736 }, { "epoch": 2.415259009009009, "grad_norm": 1.3877445175337728, "learning_rate": 1.1136804366340287e-06, "loss": 0.3051, "step": 25737 }, { "epoch": 2.4153528528528527, "grad_norm": 1.1241726560623657, "learning_rate": 1.1133369523421333e-06, "loss": 0.3482, "step": 25738 }, { "epoch": 2.4154466966966965, "grad_norm": 1.034968449414773, "learning_rate": 1.11299351439138e-06, "loss": 0.3131, "step": 25739 }, { "epoch": 2.4155405405405403, "grad_norm": 1.3832739736298942, "learning_rate": 1.1126501227858626e-06, "loss": 0.3157, "step": 25740 }, { "epoch": 2.4156343843843846, "grad_norm": 1.088427236285294, "learning_rate": 1.1123067775296758e-06, "loss": 0.2858, "step": 25741 }, { "epoch": 2.4157282282282284, "grad_norm": 1.3252337644936636, "learning_rate": 1.1119634786269124e-06, "loss": 0.3432, "step": 25742 }, { "epoch": 2.4158220720720722, "grad_norm": 1.0554153335100613, "learning_rate": 1.111620226081666e-06, "loss": 0.3379, "step": 25743 }, { "epoch": 2.415915915915916, "grad_norm": 1.1381350752300663, "learning_rate": 1.1112770198980277e-06, "loss": 0.3465, "step": 25744 }, { "epoch": 2.41600975975976, "grad_norm": 1.270800094734081, "learning_rate": 1.110933860080093e-06, "loss": 0.2765, "step": 25745 }, { "epoch": 2.4161036036036037, "grad_norm": 1.0532594650987923, "learning_rate": 1.1105907466319515e-06, "loss": 0.3195, "step": 25746 }, { "epoch": 2.4161974474474475, "grad_norm": 1.1349895201217937, "learning_rate": 1.1102476795576927e-06, "loss": 0.3086, "step": 25747 }, { "epoch": 2.4162912912912913, "grad_norm": 2.0668590323316223, "learning_rate": 1.1099046588614098e-06, "loss": 0.31, "step": 25748 }, { "epoch": 2.416385135135135, "grad_norm": 1.1221780532211438, "learning_rate": 1.1095616845471918e-06, "loss": 0.3355, "step": 25749 }, { "epoch": 2.416478978978979, "grad_norm": 1.285131202371431, "learning_rate": 1.1092187566191264e-06, "loss": 0.3189, "step": 25750 }, { "epoch": 2.4165728228228227, "grad_norm": 1.178349325207567, "learning_rate": 1.108875875081305e-06, "loss": 0.3096, "step": 25751 }, { "epoch": 2.4166666666666665, "grad_norm": 1.301869309256014, "learning_rate": 1.1085330399378147e-06, "loss": 0.3334, "step": 25752 }, { "epoch": 2.4167605105105103, "grad_norm": 1.1130513301804683, "learning_rate": 1.108190251192743e-06, "loss": 0.3313, "step": 25753 }, { "epoch": 2.4168543543543546, "grad_norm": 0.9923085613654652, "learning_rate": 1.1078475088501767e-06, "loss": 0.3491, "step": 25754 }, { "epoch": 2.416948198198198, "grad_norm": 1.204600104023328, "learning_rate": 1.1075048129142025e-06, "loss": 0.3287, "step": 25755 }, { "epoch": 2.417042042042042, "grad_norm": 1.1252229182295033, "learning_rate": 1.107162163388905e-06, "loss": 0.3207, "step": 25756 }, { "epoch": 2.417135885885886, "grad_norm": 1.1450636128005152, "learning_rate": 1.1068195602783733e-06, "loss": 0.3282, "step": 25757 }, { "epoch": 2.41722972972973, "grad_norm": 1.1270306976832007, "learning_rate": 1.1064770035866896e-06, "loss": 0.3149, "step": 25758 }, { "epoch": 2.4173235735735736, "grad_norm": 1.670649103710613, "learning_rate": 1.106134493317938e-06, "loss": 0.3145, "step": 25759 }, { "epoch": 2.4174174174174174, "grad_norm": 1.3208562886649993, "learning_rate": 1.1057920294762037e-06, "loss": 0.3375, "step": 25760 }, { "epoch": 2.4175112612612613, "grad_norm": 1.0660486219025056, "learning_rate": 1.10544961206557e-06, "loss": 0.3197, "step": 25761 }, { "epoch": 2.417605105105105, "grad_norm": 1.0992546069976583, "learning_rate": 1.1051072410901177e-06, "loss": 0.3083, "step": 25762 }, { "epoch": 2.417698948948949, "grad_norm": 1.2646859829986687, "learning_rate": 1.104764916553931e-06, "loss": 0.3032, "step": 25763 }, { "epoch": 2.4177927927927927, "grad_norm": 1.2623469470636357, "learning_rate": 1.1044226384610906e-06, "loss": 0.2916, "step": 25764 }, { "epoch": 2.4178866366366365, "grad_norm": 1.015456829128786, "learning_rate": 1.1040804068156762e-06, "loss": 0.33, "step": 25765 }, { "epoch": 2.4179804804804803, "grad_norm": 1.3080575846530988, "learning_rate": 1.103738221621773e-06, "loss": 0.3039, "step": 25766 }, { "epoch": 2.4180743243243246, "grad_norm": 1.2215077994671713, "learning_rate": 1.1033960828834557e-06, "loss": 0.3296, "step": 25767 }, { "epoch": 2.418168168168168, "grad_norm": 1.1187047944805106, "learning_rate": 1.1030539906048044e-06, "loss": 0.3044, "step": 25768 }, { "epoch": 2.418262012012012, "grad_norm": 1.1074739392703754, "learning_rate": 1.1027119447899e-06, "loss": 0.3178, "step": 25769 }, { "epoch": 2.418355855855856, "grad_norm": 1.4309720366835945, "learning_rate": 1.1023699454428199e-06, "loss": 0.329, "step": 25770 }, { "epoch": 2.4184496996997, "grad_norm": 1.0050323088357744, "learning_rate": 1.1020279925676403e-06, "loss": 0.3012, "step": 25771 }, { "epoch": 2.4185435435435436, "grad_norm": 1.087917823599963, "learning_rate": 1.1016860861684415e-06, "loss": 0.3021, "step": 25772 }, { "epoch": 2.4186373873873874, "grad_norm": 1.1605520224282735, "learning_rate": 1.101344226249298e-06, "loss": 0.3152, "step": 25773 }, { "epoch": 2.4187312312312312, "grad_norm": 1.0519838481589152, "learning_rate": 1.1010024128142843e-06, "loss": 0.3287, "step": 25774 }, { "epoch": 2.418825075075075, "grad_norm": 1.223455112204064, "learning_rate": 1.1006606458674797e-06, "loss": 0.3474, "step": 25775 }, { "epoch": 2.418918918918919, "grad_norm": 1.209884957699974, "learning_rate": 1.100318925412957e-06, "loss": 0.3486, "step": 25776 }, { "epoch": 2.4190127627627627, "grad_norm": 0.9514700536903682, "learning_rate": 1.0999772514547892e-06, "loss": 0.3182, "step": 25777 }, { "epoch": 2.4191066066066065, "grad_norm": 1.0907333626757503, "learning_rate": 1.0996356239970535e-06, "loss": 0.322, "step": 25778 }, { "epoch": 2.4192004504504503, "grad_norm": 1.2740172165348733, "learning_rate": 1.099294043043821e-06, "loss": 0.3465, "step": 25779 }, { "epoch": 2.419294294294294, "grad_norm": 1.0611943714069414, "learning_rate": 1.0989525085991643e-06, "loss": 0.3507, "step": 25780 }, { "epoch": 2.419388138138138, "grad_norm": 1.2320650490879617, "learning_rate": 1.0986110206671563e-06, "loss": 0.2882, "step": 25781 }, { "epoch": 2.419481981981982, "grad_norm": 1.0631732761442108, "learning_rate": 1.0982695792518682e-06, "loss": 0.307, "step": 25782 }, { "epoch": 2.419575825825826, "grad_norm": 1.6917485390627578, "learning_rate": 1.0979281843573696e-06, "loss": 0.3118, "step": 25783 }, { "epoch": 2.41966966966967, "grad_norm": 0.9657199040334469, "learning_rate": 1.0975868359877345e-06, "loss": 0.3188, "step": 25784 }, { "epoch": 2.4197635135135136, "grad_norm": 1.141839589019693, "learning_rate": 1.0972455341470306e-06, "loss": 0.3199, "step": 25785 }, { "epoch": 2.4198573573573574, "grad_norm": 1.1830676886087015, "learning_rate": 1.0969042788393259e-06, "loss": 0.285, "step": 25786 }, { "epoch": 2.419951201201201, "grad_norm": 1.0737766038472156, "learning_rate": 1.096563070068692e-06, "loss": 0.2942, "step": 25787 }, { "epoch": 2.420045045045045, "grad_norm": 1.1124383922381809, "learning_rate": 1.0962219078391968e-06, "loss": 0.2964, "step": 25788 }, { "epoch": 2.420138888888889, "grad_norm": 1.1951272068794796, "learning_rate": 1.0958807921549053e-06, "loss": 0.3032, "step": 25789 }, { "epoch": 2.4202327327327327, "grad_norm": 1.1652215609645682, "learning_rate": 1.0955397230198878e-06, "loss": 0.3341, "step": 25790 }, { "epoch": 2.4203265765765765, "grad_norm": 1.2987268917633672, "learning_rate": 1.0951987004382102e-06, "loss": 0.2868, "step": 25791 }, { "epoch": 2.4204204204204203, "grad_norm": 1.1177433789242384, "learning_rate": 1.094857724413938e-06, "loss": 0.321, "step": 25792 }, { "epoch": 2.420514264264264, "grad_norm": 1.529666555792999, "learning_rate": 1.0945167949511365e-06, "loss": 0.3147, "step": 25793 }, { "epoch": 2.420608108108108, "grad_norm": 1.1925266407426198, "learning_rate": 1.0941759120538708e-06, "loss": 0.3238, "step": 25794 }, { "epoch": 2.420701951951952, "grad_norm": 1.1051452207211292, "learning_rate": 1.0938350757262044e-06, "loss": 0.307, "step": 25795 }, { "epoch": 2.420795795795796, "grad_norm": 1.2314053051901293, "learning_rate": 1.0934942859722031e-06, "loss": 0.3369, "step": 25796 }, { "epoch": 2.4208896396396398, "grad_norm": 1.2044862223773753, "learning_rate": 1.0931535427959295e-06, "loss": 0.3153, "step": 25797 }, { "epoch": 2.4209834834834836, "grad_norm": 1.0430489191816816, "learning_rate": 1.0928128462014447e-06, "loss": 0.2967, "step": 25798 }, { "epoch": 2.4210773273273274, "grad_norm": 1.0554375613174451, "learning_rate": 1.0924721961928136e-06, "loss": 0.3175, "step": 25799 }, { "epoch": 2.421171171171171, "grad_norm": 1.3594068359311722, "learning_rate": 1.0921315927740961e-06, "loss": 0.3297, "step": 25800 }, { "epoch": 2.421265015015015, "grad_norm": 1.1158183202679035, "learning_rate": 1.0917910359493523e-06, "loss": 0.3034, "step": 25801 }, { "epoch": 2.421358858858859, "grad_norm": 1.0139832511536035, "learning_rate": 1.0914505257226454e-06, "loss": 0.2897, "step": 25802 }, { "epoch": 2.4214527027027026, "grad_norm": 1.319018693601514, "learning_rate": 1.0911100620980336e-06, "loss": 0.3337, "step": 25803 }, { "epoch": 2.4215465465465464, "grad_norm": 1.0669996004151854, "learning_rate": 1.0907696450795767e-06, "loss": 0.3173, "step": 25804 }, { "epoch": 2.4216403903903903, "grad_norm": 1.0936274878849819, "learning_rate": 1.090429274671334e-06, "loss": 0.3154, "step": 25805 }, { "epoch": 2.421734234234234, "grad_norm": 0.9386383181576718, "learning_rate": 1.0900889508773628e-06, "loss": 0.3372, "step": 25806 }, { "epoch": 2.421828078078078, "grad_norm": 1.0490587834635323, "learning_rate": 1.0897486737017193e-06, "loss": 0.3036, "step": 25807 }, { "epoch": 2.421921921921922, "grad_norm": 1.477122808867744, "learning_rate": 1.0894084431484647e-06, "loss": 0.2929, "step": 25808 }, { "epoch": 2.422015765765766, "grad_norm": 1.290171234037296, "learning_rate": 1.0890682592216533e-06, "loss": 0.3217, "step": 25809 }, { "epoch": 2.4221096096096097, "grad_norm": 1.0280983553606036, "learning_rate": 1.0887281219253398e-06, "loss": 0.3035, "step": 25810 }, { "epoch": 2.4222034534534536, "grad_norm": 1.1174863024639425, "learning_rate": 1.0883880312635826e-06, "loss": 0.3215, "step": 25811 }, { "epoch": 2.4222972972972974, "grad_norm": 1.179415639607507, "learning_rate": 1.0880479872404353e-06, "loss": 0.2885, "step": 25812 }, { "epoch": 2.422391141141141, "grad_norm": 1.2303031436103091, "learning_rate": 1.0877079898599507e-06, "loss": 0.3278, "step": 25813 }, { "epoch": 2.422484984984985, "grad_norm": 1.058793699126431, "learning_rate": 1.0873680391261858e-06, "loss": 0.2834, "step": 25814 }, { "epoch": 2.422578828828829, "grad_norm": 1.0947274738935653, "learning_rate": 1.0870281350431921e-06, "loss": 0.3327, "step": 25815 }, { "epoch": 2.4226726726726726, "grad_norm": 1.277508961796798, "learning_rate": 1.0866882776150228e-06, "loss": 0.3269, "step": 25816 }, { "epoch": 2.4227665165165164, "grad_norm": 1.5394930069011348, "learning_rate": 1.0863484668457297e-06, "loss": 0.2756, "step": 25817 }, { "epoch": 2.4228603603603602, "grad_norm": 1.0937338039657425, "learning_rate": 1.0860087027393645e-06, "loss": 0.3123, "step": 25818 }, { "epoch": 2.422954204204204, "grad_norm": 1.1773391867359235, "learning_rate": 1.0856689852999764e-06, "loss": 0.3166, "step": 25819 }, { "epoch": 2.423048048048048, "grad_norm": 0.9966412803530008, "learning_rate": 1.085329314531619e-06, "loss": 0.3103, "step": 25820 }, { "epoch": 2.423141891891892, "grad_norm": 1.474877719627404, "learning_rate": 1.0849896904383417e-06, "loss": 0.314, "step": 25821 }, { "epoch": 2.423235735735736, "grad_norm": 1.0571756962351389, "learning_rate": 1.084650113024191e-06, "loss": 0.2969, "step": 25822 }, { "epoch": 2.4233295795795797, "grad_norm": 1.0493217106999757, "learning_rate": 1.0843105822932193e-06, "loss": 0.331, "step": 25823 }, { "epoch": 2.4234234234234235, "grad_norm": 1.0781988128744187, "learning_rate": 1.0839710982494733e-06, "loss": 0.3165, "step": 25824 }, { "epoch": 2.4235172672672673, "grad_norm": 1.175933927873114, "learning_rate": 1.0836316608969998e-06, "loss": 0.283, "step": 25825 }, { "epoch": 2.423611111111111, "grad_norm": 1.8223799628686441, "learning_rate": 1.083292270239848e-06, "loss": 0.2793, "step": 25826 }, { "epoch": 2.423704954954955, "grad_norm": 1.304245375668834, "learning_rate": 1.0829529262820636e-06, "loss": 0.3043, "step": 25827 }, { "epoch": 2.423798798798799, "grad_norm": 1.1883795396174006, "learning_rate": 1.0826136290276923e-06, "loss": 0.3094, "step": 25828 }, { "epoch": 2.4238926426426426, "grad_norm": 1.3278016265847332, "learning_rate": 1.08227437848078e-06, "loss": 0.3148, "step": 25829 }, { "epoch": 2.4239864864864864, "grad_norm": 1.5024149466971701, "learning_rate": 1.081935174645371e-06, "loss": 0.337, "step": 25830 }, { "epoch": 2.42408033033033, "grad_norm": 1.061115888964361, "learning_rate": 1.0815960175255092e-06, "loss": 0.3252, "step": 25831 }, { "epoch": 2.424174174174174, "grad_norm": 0.9965137382709297, "learning_rate": 1.08125690712524e-06, "loss": 0.3088, "step": 25832 }, { "epoch": 2.424268018018018, "grad_norm": 1.0899338585562317, "learning_rate": 1.0809178434486061e-06, "loss": 0.3291, "step": 25833 }, { "epoch": 2.424361861861862, "grad_norm": 2.3786498890499668, "learning_rate": 1.0805788264996492e-06, "loss": 0.3083, "step": 25834 }, { "epoch": 2.424455705705706, "grad_norm": 1.3558787027512211, "learning_rate": 1.0802398562824129e-06, "loss": 0.3085, "step": 25835 }, { "epoch": 2.4245495495495497, "grad_norm": 1.2450008246603324, "learning_rate": 1.0799009328009385e-06, "loss": 0.3658, "step": 25836 }, { "epoch": 2.4246433933933935, "grad_norm": 1.5346664220051907, "learning_rate": 1.0795620560592652e-06, "loss": 0.2878, "step": 25837 }, { "epoch": 2.4247372372372373, "grad_norm": 1.1933025323966544, "learning_rate": 1.079223226061436e-06, "loss": 0.3198, "step": 25838 }, { "epoch": 2.424831081081081, "grad_norm": 1.2110552240718417, "learning_rate": 1.0788844428114902e-06, "loss": 0.3825, "step": 25839 }, { "epoch": 2.424924924924925, "grad_norm": 1.5747702040177716, "learning_rate": 1.0785457063134663e-06, "loss": 0.2904, "step": 25840 }, { "epoch": 2.4250187687687688, "grad_norm": 1.2462649056772106, "learning_rate": 1.0782070165714037e-06, "loss": 0.3064, "step": 25841 }, { "epoch": 2.4251126126126126, "grad_norm": 1.0300500264148977, "learning_rate": 1.0778683735893404e-06, "loss": 0.3392, "step": 25842 }, { "epoch": 2.4252064564564564, "grad_norm": 1.2312065164861656, "learning_rate": 1.0775297773713127e-06, "loss": 0.3497, "step": 25843 }, { "epoch": 2.4253003003003, "grad_norm": 1.2542223573319355, "learning_rate": 1.0771912279213604e-06, "loss": 0.3307, "step": 25844 }, { "epoch": 2.425394144144144, "grad_norm": 1.1735441340046524, "learning_rate": 1.076852725243519e-06, "loss": 0.3123, "step": 25845 }, { "epoch": 2.425487987987988, "grad_norm": 1.1759944515691811, "learning_rate": 1.0765142693418229e-06, "loss": 0.3086, "step": 25846 }, { "epoch": 2.425581831831832, "grad_norm": 4.266230954595159, "learning_rate": 1.07617586022031e-06, "loss": 0.2905, "step": 25847 }, { "epoch": 2.4256756756756754, "grad_norm": 1.057145303476394, "learning_rate": 1.0758374978830144e-06, "loss": 0.3272, "step": 25848 }, { "epoch": 2.4257695195195197, "grad_norm": 1.2061413029736223, "learning_rate": 1.0754991823339683e-06, "loss": 0.3119, "step": 25849 }, { "epoch": 2.4258633633633635, "grad_norm": 1.07208848738041, "learning_rate": 1.0751609135772096e-06, "loss": 0.3191, "step": 25850 }, { "epoch": 2.4259572072072073, "grad_norm": 1.2010540575185764, "learning_rate": 1.0748226916167686e-06, "loss": 0.3349, "step": 25851 }, { "epoch": 2.426051051051051, "grad_norm": 1.143031410617319, "learning_rate": 1.0744845164566787e-06, "loss": 0.3287, "step": 25852 }, { "epoch": 2.426144894894895, "grad_norm": 10.532185121388608, "learning_rate": 1.0741463881009718e-06, "loss": 0.2896, "step": 25853 }, { "epoch": 2.4262387387387387, "grad_norm": 1.1012173964220335, "learning_rate": 1.0738083065536797e-06, "loss": 0.3315, "step": 25854 }, { "epoch": 2.4263325825825826, "grad_norm": 1.1684208763466444, "learning_rate": 1.0734702718188323e-06, "loss": 0.2981, "step": 25855 }, { "epoch": 2.4264264264264264, "grad_norm": 1.483178586037909, "learning_rate": 1.0731322839004615e-06, "loss": 0.314, "step": 25856 }, { "epoch": 2.42652027027027, "grad_norm": 1.4393569041897623, "learning_rate": 1.0727943428025971e-06, "loss": 0.2385, "step": 25857 }, { "epoch": 2.426614114114114, "grad_norm": 1.158843628595159, "learning_rate": 1.0724564485292665e-06, "loss": 0.327, "step": 25858 }, { "epoch": 2.426707957957958, "grad_norm": 1.2968743014521058, "learning_rate": 1.0721186010845014e-06, "loss": 0.3134, "step": 25859 }, { "epoch": 2.426801801801802, "grad_norm": 1.1071778225034685, "learning_rate": 1.0717808004723285e-06, "loss": 0.3081, "step": 25860 }, { "epoch": 2.4268956456456454, "grad_norm": 1.0159893443723451, "learning_rate": 1.071443046696774e-06, "loss": 0.3005, "step": 25861 }, { "epoch": 2.4269894894894897, "grad_norm": 1.4535798772215969, "learning_rate": 1.0711053397618682e-06, "loss": 0.3215, "step": 25862 }, { "epoch": 2.4270833333333335, "grad_norm": 1.0335058638258054, "learning_rate": 1.0707676796716354e-06, "loss": 0.3224, "step": 25863 }, { "epoch": 2.4271771771771773, "grad_norm": 1.2062795637759578, "learning_rate": 1.0704300664301021e-06, "loss": 0.3068, "step": 25864 }, { "epoch": 2.427271021021021, "grad_norm": 1.038446490515355, "learning_rate": 1.0700925000412937e-06, "loss": 0.3264, "step": 25865 }, { "epoch": 2.427364864864865, "grad_norm": 1.099813474620145, "learning_rate": 1.0697549805092344e-06, "loss": 0.3071, "step": 25866 }, { "epoch": 2.4274587087087087, "grad_norm": 1.3262412013899523, "learning_rate": 1.0694175078379483e-06, "loss": 0.3475, "step": 25867 }, { "epoch": 2.4275525525525525, "grad_norm": 1.2110748517868006, "learning_rate": 1.0690800820314612e-06, "loss": 0.2703, "step": 25868 }, { "epoch": 2.4276463963963963, "grad_norm": 1.1269132947854712, "learning_rate": 1.0687427030937947e-06, "loss": 0.3097, "step": 25869 }, { "epoch": 2.42774024024024, "grad_norm": 1.0919823739882737, "learning_rate": 1.0684053710289706e-06, "loss": 0.3167, "step": 25870 }, { "epoch": 2.427834084084084, "grad_norm": 1.154888748621594, "learning_rate": 1.0680680858410126e-06, "loss": 0.3218, "step": 25871 }, { "epoch": 2.4279279279279278, "grad_norm": 1.1375480403679148, "learning_rate": 1.067730847533942e-06, "loss": 0.3335, "step": 25872 }, { "epoch": 2.4280217717717716, "grad_norm": 0.9828675902816806, "learning_rate": 1.067393656111778e-06, "loss": 0.3111, "step": 25873 }, { "epoch": 2.4281156156156154, "grad_norm": 1.0985195406096573, "learning_rate": 1.0670565115785436e-06, "loss": 0.3428, "step": 25874 }, { "epoch": 2.4282094594594597, "grad_norm": 1.2845840723103619, "learning_rate": 1.0667194139382574e-06, "loss": 0.3516, "step": 25875 }, { "epoch": 2.4283033033033035, "grad_norm": 1.1963475417356377, "learning_rate": 1.0663823631949389e-06, "loss": 0.35, "step": 25876 }, { "epoch": 2.4283971471471473, "grad_norm": 1.6107509406056104, "learning_rate": 1.0660453593526054e-06, "loss": 0.324, "step": 25877 }, { "epoch": 2.428490990990991, "grad_norm": 1.1584165211887738, "learning_rate": 1.065708402415277e-06, "loss": 0.3092, "step": 25878 }, { "epoch": 2.428584834834835, "grad_norm": 1.417364663160592, "learning_rate": 1.0653714923869684e-06, "loss": 0.3172, "step": 25879 }, { "epoch": 2.4286786786786787, "grad_norm": 1.233808692250865, "learning_rate": 1.0650346292717005e-06, "loss": 0.3301, "step": 25880 }, { "epoch": 2.4287725225225225, "grad_norm": 1.3338482061566723, "learning_rate": 1.0646978130734875e-06, "loss": 0.3283, "step": 25881 }, { "epoch": 2.4288663663663663, "grad_norm": 1.0926889366560573, "learning_rate": 1.0643610437963441e-06, "loss": 0.3423, "step": 25882 }, { "epoch": 2.42896021021021, "grad_norm": 1.3216736073648994, "learning_rate": 1.064024321444289e-06, "loss": 0.3319, "step": 25883 }, { "epoch": 2.429054054054054, "grad_norm": 0.9998042666779188, "learning_rate": 1.0636876460213352e-06, "loss": 0.2828, "step": 25884 }, { "epoch": 2.4291478978978978, "grad_norm": 1.2837883652991404, "learning_rate": 1.063351017531495e-06, "loss": 0.3105, "step": 25885 }, { "epoch": 2.4292417417417416, "grad_norm": 1.4636189166500304, "learning_rate": 1.063014435978786e-06, "loss": 0.328, "step": 25886 }, { "epoch": 2.4293355855855854, "grad_norm": 1.3412060514297666, "learning_rate": 1.0626779013672184e-06, "loss": 0.3026, "step": 25887 }, { "epoch": 2.4294294294294296, "grad_norm": 1.1695682952571942, "learning_rate": 1.0623414137008047e-06, "loss": 0.3212, "step": 25888 }, { "epoch": 2.4295232732732734, "grad_norm": 1.1418657049245764, "learning_rate": 1.0620049729835602e-06, "loss": 0.2982, "step": 25889 }, { "epoch": 2.4296171171171173, "grad_norm": 1.1545413157928803, "learning_rate": 1.0616685792194931e-06, "loss": 0.3134, "step": 25890 }, { "epoch": 2.429710960960961, "grad_norm": 1.206651148179332, "learning_rate": 1.061332232412613e-06, "loss": 0.3162, "step": 25891 }, { "epoch": 2.429804804804805, "grad_norm": 1.2811630351467231, "learning_rate": 1.060995932566934e-06, "loss": 0.3021, "step": 25892 }, { "epoch": 2.4298986486486487, "grad_norm": 1.0849761697102092, "learning_rate": 1.0606596796864637e-06, "loss": 0.3153, "step": 25893 }, { "epoch": 2.4299924924924925, "grad_norm": 1.1226928405066485, "learning_rate": 1.0603234737752104e-06, "loss": 0.3678, "step": 25894 }, { "epoch": 2.4300863363363363, "grad_norm": 1.197681600348998, "learning_rate": 1.059987314837186e-06, "loss": 0.3035, "step": 25895 }, { "epoch": 2.43018018018018, "grad_norm": 1.502290359743292, "learning_rate": 1.0596512028763962e-06, "loss": 0.2658, "step": 25896 }, { "epoch": 2.430274024024024, "grad_norm": 1.0492319857121373, "learning_rate": 1.0593151378968475e-06, "loss": 0.3016, "step": 25897 }, { "epoch": 2.4303678678678677, "grad_norm": 1.1998497510840214, "learning_rate": 1.0589791199025495e-06, "loss": 0.2985, "step": 25898 }, { "epoch": 2.4304617117117115, "grad_norm": 1.171747345527032, "learning_rate": 1.058643148897507e-06, "loss": 0.3046, "step": 25899 }, { "epoch": 2.4305555555555554, "grad_norm": 1.115434723137179, "learning_rate": 1.058307224885725e-06, "loss": 0.2831, "step": 25900 }, { "epoch": 2.4306493993993996, "grad_norm": 0.9528635136812516, "learning_rate": 1.057971347871211e-06, "loss": 0.3029, "step": 25901 }, { "epoch": 2.4307432432432434, "grad_norm": 1.1610869914594446, "learning_rate": 1.057635517857969e-06, "loss": 0.3983, "step": 25902 }, { "epoch": 2.4308370870870872, "grad_norm": 1.2053464021719889, "learning_rate": 1.0572997348500026e-06, "loss": 0.3445, "step": 25903 }, { "epoch": 2.430930930930931, "grad_norm": 1.2270306052900397, "learning_rate": 1.0569639988513152e-06, "loss": 0.2826, "step": 25904 }, { "epoch": 2.431024774774775, "grad_norm": 1.0076842429130923, "learning_rate": 1.0566283098659108e-06, "loss": 0.2872, "step": 25905 }, { "epoch": 2.4311186186186187, "grad_norm": 1.145615803367446, "learning_rate": 1.0562926678977892e-06, "loss": 0.2338, "step": 25906 }, { "epoch": 2.4312124624624625, "grad_norm": 1.1635865854970482, "learning_rate": 1.0559570729509555e-06, "loss": 0.3301, "step": 25907 }, { "epoch": 2.4313063063063063, "grad_norm": 2.667142248487935, "learning_rate": 1.0556215250294104e-06, "loss": 0.3243, "step": 25908 }, { "epoch": 2.43140015015015, "grad_norm": 3.743828418596916, "learning_rate": 1.055286024137152e-06, "loss": 0.3387, "step": 25909 }, { "epoch": 2.431493993993994, "grad_norm": 1.1303730506505372, "learning_rate": 1.0549505702781848e-06, "loss": 0.2843, "step": 25910 }, { "epoch": 2.4315878378378377, "grad_norm": 1.0159178073668769, "learning_rate": 1.0546151634565055e-06, "loss": 0.2957, "step": 25911 }, { "epoch": 2.4316816816816815, "grad_norm": 1.3691164163768204, "learning_rate": 1.0542798036761133e-06, "loss": 0.3455, "step": 25912 }, { "epoch": 2.4317755255255253, "grad_norm": 1.2389589286943432, "learning_rate": 1.0539444909410085e-06, "loss": 0.2793, "step": 25913 }, { "epoch": 2.4318693693693696, "grad_norm": 0.9751104524004999, "learning_rate": 1.0536092252551878e-06, "loss": 0.3028, "step": 25914 }, { "epoch": 2.4319632132132134, "grad_norm": 1.3006537182400357, "learning_rate": 1.0532740066226487e-06, "loss": 0.2976, "step": 25915 }, { "epoch": 2.432057057057057, "grad_norm": 1.0935791951182507, "learning_rate": 1.052938835047388e-06, "loss": 0.3095, "step": 25916 }, { "epoch": 2.432150900900901, "grad_norm": 1.0440610848323182, "learning_rate": 1.0526037105334025e-06, "loss": 0.3579, "step": 25917 }, { "epoch": 2.432244744744745, "grad_norm": 1.1951987029769324, "learning_rate": 1.052268633084686e-06, "loss": 0.351, "step": 25918 }, { "epoch": 2.4323385885885886, "grad_norm": 1.8595121158774437, "learning_rate": 1.0519336027052362e-06, "loss": 0.2949, "step": 25919 }, { "epoch": 2.4324324324324325, "grad_norm": 0.9988977950211478, "learning_rate": 1.0515986193990474e-06, "loss": 0.283, "step": 25920 }, { "epoch": 2.4325262762762763, "grad_norm": 1.053357697797244, "learning_rate": 1.0512636831701112e-06, "loss": 0.3076, "step": 25921 }, { "epoch": 2.43262012012012, "grad_norm": 1.50402438384269, "learning_rate": 1.0509287940224244e-06, "loss": 0.2847, "step": 25922 }, { "epoch": 2.432713963963964, "grad_norm": 0.987062918313076, "learning_rate": 1.0505939519599784e-06, "loss": 0.2798, "step": 25923 }, { "epoch": 2.4328078078078077, "grad_norm": 1.227150380500243, "learning_rate": 1.0502591569867637e-06, "loss": 0.3477, "step": 25924 }, { "epoch": 2.4329016516516515, "grad_norm": 1.9489162716782553, "learning_rate": 1.0499244091067762e-06, "loss": 0.2715, "step": 25925 }, { "epoch": 2.4329954954954953, "grad_norm": 1.0972185559999195, "learning_rate": 1.049589708324004e-06, "loss": 0.3467, "step": 25926 }, { "epoch": 2.4330893393393396, "grad_norm": 1.2266555293172337, "learning_rate": 1.049255054642439e-06, "loss": 0.3125, "step": 25927 }, { "epoch": 2.433183183183183, "grad_norm": 1.7249833375590697, "learning_rate": 1.0489204480660708e-06, "loss": 0.3427, "step": 25928 }, { "epoch": 2.433277027027027, "grad_norm": 1.0301202014446338, "learning_rate": 1.0485858885988892e-06, "loss": 0.3018, "step": 25929 }, { "epoch": 2.433370870870871, "grad_norm": 1.0868719272718097, "learning_rate": 1.048251376244882e-06, "loss": 0.3333, "step": 25930 }, { "epoch": 2.433464714714715, "grad_norm": 1.088996984952445, "learning_rate": 1.04791691100804e-06, "loss": 0.3388, "step": 25931 }, { "epoch": 2.4335585585585586, "grad_norm": 1.0011348470063384, "learning_rate": 1.0475824928923496e-06, "loss": 0.3028, "step": 25932 }, { "epoch": 2.4336524024024024, "grad_norm": 1.1550565745793002, "learning_rate": 1.047248121901797e-06, "loss": 0.2987, "step": 25933 }, { "epoch": 2.4337462462462462, "grad_norm": 1.1472163789375183, "learning_rate": 1.0469137980403715e-06, "loss": 0.3252, "step": 25934 }, { "epoch": 2.43384009009009, "grad_norm": 0.962994827748022, "learning_rate": 1.0465795213120584e-06, "loss": 0.3143, "step": 25935 }, { "epoch": 2.433933933933934, "grad_norm": 1.292444550493805, "learning_rate": 1.046245291720841e-06, "loss": 0.2691, "step": 25936 }, { "epoch": 2.4340277777777777, "grad_norm": 1.1486301321938983, "learning_rate": 1.045911109270708e-06, "loss": 0.3048, "step": 25937 }, { "epoch": 2.4341216216216215, "grad_norm": 0.9872999680554538, "learning_rate": 1.0455769739656423e-06, "loss": 0.3233, "step": 25938 }, { "epoch": 2.4342154654654653, "grad_norm": 1.0330748834086927, "learning_rate": 1.0452428858096282e-06, "loss": 0.3062, "step": 25939 }, { "epoch": 2.4343093093093096, "grad_norm": 1.0388865531716256, "learning_rate": 1.044908844806648e-06, "loss": 0.3403, "step": 25940 }, { "epoch": 2.434403153153153, "grad_norm": 1.0484334807503208, "learning_rate": 1.044574850960685e-06, "loss": 0.3144, "step": 25941 }, { "epoch": 2.434496996996997, "grad_norm": 1.0469624726258142, "learning_rate": 1.0442409042757207e-06, "loss": 0.3444, "step": 25942 }, { "epoch": 2.434590840840841, "grad_norm": 1.5315029399022677, "learning_rate": 1.0439070047557387e-06, "loss": 0.2975, "step": 25943 }, { "epoch": 2.434684684684685, "grad_norm": 2.367719053903034, "learning_rate": 1.0435731524047194e-06, "loss": 0.3542, "step": 25944 }, { "epoch": 2.4347785285285286, "grad_norm": 1.1921026631733906, "learning_rate": 1.0432393472266416e-06, "loss": 0.3258, "step": 25945 }, { "epoch": 2.4348723723723724, "grad_norm": 1.3512502502327735, "learning_rate": 1.042905589225488e-06, "loss": 0.3004, "step": 25946 }, { "epoch": 2.4349662162162162, "grad_norm": 1.0506285097030246, "learning_rate": 1.042571878405237e-06, "loss": 0.3382, "step": 25947 }, { "epoch": 2.43506006006006, "grad_norm": 1.4052680304055345, "learning_rate": 1.0422382147698651e-06, "loss": 0.2945, "step": 25948 }, { "epoch": 2.435153903903904, "grad_norm": 1.12695333342857, "learning_rate": 1.041904598323355e-06, "loss": 0.2739, "step": 25949 }, { "epoch": 2.4352477477477477, "grad_norm": 1.1802247120648262, "learning_rate": 1.0415710290696818e-06, "loss": 0.3173, "step": 25950 }, { "epoch": 2.4353415915915915, "grad_norm": 1.1062060465473709, "learning_rate": 1.0412375070128233e-06, "loss": 0.3445, "step": 25951 }, { "epoch": 2.4354354354354353, "grad_norm": 1.5938455274696617, "learning_rate": 1.040904032156756e-06, "loss": 0.3159, "step": 25952 }, { "epoch": 2.435529279279279, "grad_norm": 1.0785611490482465, "learning_rate": 1.0405706045054558e-06, "loss": 0.3124, "step": 25953 }, { "epoch": 2.435623123123123, "grad_norm": 1.1373561100230847, "learning_rate": 1.0402372240628966e-06, "loss": 0.3249, "step": 25954 }, { "epoch": 2.435716966966967, "grad_norm": 1.1967744256766324, "learning_rate": 1.0399038908330566e-06, "loss": 0.3258, "step": 25955 }, { "epoch": 2.435810810810811, "grad_norm": 1.0352509824536373, "learning_rate": 1.0395706048199089e-06, "loss": 0.2775, "step": 25956 }, { "epoch": 2.4359046546546548, "grad_norm": 1.038519266353295, "learning_rate": 1.0392373660274252e-06, "loss": 0.3196, "step": 25957 }, { "epoch": 2.4359984984984986, "grad_norm": 1.0675243773531238, "learning_rate": 1.0389041744595823e-06, "loss": 0.3263, "step": 25958 }, { "epoch": 2.4360923423423424, "grad_norm": 1.1759255100958876, "learning_rate": 1.0385710301203505e-06, "loss": 0.2919, "step": 25959 }, { "epoch": 2.436186186186186, "grad_norm": 1.6479420734809354, "learning_rate": 1.0382379330137022e-06, "loss": 0.3256, "step": 25960 }, { "epoch": 2.43628003003003, "grad_norm": 1.3585306964756496, "learning_rate": 1.0379048831436094e-06, "loss": 0.3258, "step": 25961 }, { "epoch": 2.436373873873874, "grad_norm": 1.243584445072567, "learning_rate": 1.0375718805140438e-06, "loss": 0.3427, "step": 25962 }, { "epoch": 2.4364677177177176, "grad_norm": 1.022252489173281, "learning_rate": 1.037238925128975e-06, "loss": 0.3218, "step": 25963 }, { "epoch": 2.4365615615615615, "grad_norm": 1.159380406708595, "learning_rate": 1.0369060169923728e-06, "loss": 0.3429, "step": 25964 }, { "epoch": 2.4366554054054053, "grad_norm": 1.3366636656585293, "learning_rate": 1.0365731561082065e-06, "loss": 0.3041, "step": 25965 }, { "epoch": 2.436749249249249, "grad_norm": 1.1427909450835065, "learning_rate": 1.0362403424804434e-06, "loss": 0.3216, "step": 25966 }, { "epoch": 2.436843093093093, "grad_norm": 1.0497918370858752, "learning_rate": 1.0359075761130548e-06, "loss": 0.2788, "step": 25967 }, { "epoch": 2.436936936936937, "grad_norm": 1.1096983603261392, "learning_rate": 1.0355748570100067e-06, "loss": 0.3053, "step": 25968 }, { "epoch": 2.437030780780781, "grad_norm": 1.080123494085641, "learning_rate": 1.0352421851752648e-06, "loss": 0.3552, "step": 25969 }, { "epoch": 2.4371246246246248, "grad_norm": 1.1363454676457447, "learning_rate": 1.0349095606127985e-06, "loss": 0.3075, "step": 25970 }, { "epoch": 2.4372184684684686, "grad_norm": 1.0367421440053939, "learning_rate": 1.0345769833265717e-06, "loss": 0.3598, "step": 25971 }, { "epoch": 2.4373123123123124, "grad_norm": 1.081852657306575, "learning_rate": 1.0342444533205492e-06, "loss": 0.3337, "step": 25972 }, { "epoch": 2.437406156156156, "grad_norm": 1.022268053731492, "learning_rate": 1.0339119705986983e-06, "loss": 0.3052, "step": 25973 }, { "epoch": 2.4375, "grad_norm": 1.1504880035784084, "learning_rate": 1.0335795351649814e-06, "loss": 0.3641, "step": 25974 }, { "epoch": 2.437593843843844, "grad_norm": 1.1757129688121393, "learning_rate": 1.033247147023363e-06, "loss": 0.3161, "step": 25975 }, { "epoch": 2.4376876876876876, "grad_norm": 1.1651840134478584, "learning_rate": 1.0329148061778056e-06, "loss": 0.2826, "step": 25976 }, { "epoch": 2.4377815315315314, "grad_norm": 1.1024602338452207, "learning_rate": 1.0325825126322713e-06, "loss": 0.266, "step": 25977 }, { "epoch": 2.4378753753753752, "grad_norm": 1.272338390646246, "learning_rate": 1.032250266390722e-06, "loss": 0.3247, "step": 25978 }, { "epoch": 2.437969219219219, "grad_norm": 1.289923692014683, "learning_rate": 1.0319180674571206e-06, "loss": 0.2748, "step": 25979 }, { "epoch": 2.438063063063063, "grad_norm": 1.1851466968665185, "learning_rate": 1.0315859158354274e-06, "loss": 0.3294, "step": 25980 }, { "epoch": 2.438156906906907, "grad_norm": 1.634701581776499, "learning_rate": 1.0312538115296006e-06, "loss": 0.297, "step": 25981 }, { "epoch": 2.438250750750751, "grad_norm": 1.0638629858502502, "learning_rate": 1.0309217545436034e-06, "loss": 0.3138, "step": 25982 }, { "epoch": 2.4383445945945947, "grad_norm": 1.1486023127175111, "learning_rate": 1.0305897448813935e-06, "loss": 0.3311, "step": 25983 }, { "epoch": 2.4384384384384385, "grad_norm": 1.1718528557337249, "learning_rate": 1.0302577825469273e-06, "loss": 0.3309, "step": 25984 }, { "epoch": 2.4385322822822824, "grad_norm": 1.4664681323021205, "learning_rate": 1.0299258675441664e-06, "loss": 0.3088, "step": 25985 }, { "epoch": 2.438626126126126, "grad_norm": 1.0621076602846524, "learning_rate": 1.029593999877067e-06, "loss": 0.3264, "step": 25986 }, { "epoch": 2.43871996996997, "grad_norm": 0.9806975434197133, "learning_rate": 1.0292621795495854e-06, "loss": 0.3303, "step": 25987 }, { "epoch": 2.438813813813814, "grad_norm": 0.9979985270983954, "learning_rate": 1.028930406565678e-06, "loss": 0.3027, "step": 25988 }, { "epoch": 2.4389076576576576, "grad_norm": 1.2309430956144018, "learning_rate": 1.0285986809293009e-06, "loss": 0.366, "step": 25989 }, { "epoch": 2.4390015015015014, "grad_norm": 1.0282489159064556, "learning_rate": 1.0282670026444075e-06, "loss": 0.3266, "step": 25990 }, { "epoch": 2.4390953453453452, "grad_norm": 0.9940303718113853, "learning_rate": 1.027935371714956e-06, "loss": 0.3361, "step": 25991 }, { "epoch": 2.439189189189189, "grad_norm": 1.2214725332619498, "learning_rate": 1.027603788144898e-06, "loss": 0.3529, "step": 25992 }, { "epoch": 2.439283033033033, "grad_norm": 1.0902444259424233, "learning_rate": 1.0272722519381861e-06, "loss": 0.3005, "step": 25993 }, { "epoch": 2.439376876876877, "grad_norm": 1.1423284605681192, "learning_rate": 1.0269407630987766e-06, "loss": 0.3387, "step": 25994 }, { "epoch": 2.439470720720721, "grad_norm": 1.104039496714512, "learning_rate": 1.0266093216306195e-06, "loss": 0.3274, "step": 25995 }, { "epoch": 2.4395645645645647, "grad_norm": 1.135114861883745, "learning_rate": 1.0262779275376661e-06, "loss": 0.3497, "step": 25996 }, { "epoch": 2.4396584084084085, "grad_norm": 1.6016594390617926, "learning_rate": 1.02594658082387e-06, "loss": 0.3278, "step": 25997 }, { "epoch": 2.4397522522522523, "grad_norm": 1.1518313217365503, "learning_rate": 1.0256152814931803e-06, "loss": 0.3237, "step": 25998 }, { "epoch": 2.439846096096096, "grad_norm": 0.9639344765405273, "learning_rate": 1.0252840295495475e-06, "loss": 0.3234, "step": 25999 }, { "epoch": 2.43993993993994, "grad_norm": 1.1083504291346862, "learning_rate": 1.024952824996921e-06, "loss": 0.303, "step": 26000 }, { "epoch": 2.4400337837837838, "grad_norm": 1.225739498371768, "learning_rate": 1.0246216678392496e-06, "loss": 0.3075, "step": 26001 }, { "epoch": 2.4401276276276276, "grad_norm": 1.0840696833708798, "learning_rate": 1.0242905580804808e-06, "loss": 0.297, "step": 26002 }, { "epoch": 2.4402214714714714, "grad_norm": 1.1234342441484773, "learning_rate": 1.0239594957245653e-06, "loss": 0.2994, "step": 26003 }, { "epoch": 2.440315315315315, "grad_norm": 1.0029391585083591, "learning_rate": 1.023628480775448e-06, "loss": 0.2955, "step": 26004 }, { "epoch": 2.440409159159159, "grad_norm": 1.3452124241387844, "learning_rate": 1.0232975132370754e-06, "loss": 0.2797, "step": 26005 }, { "epoch": 2.440503003003003, "grad_norm": 1.1706033252299868, "learning_rate": 1.022966593113396e-06, "loss": 0.2789, "step": 26006 }, { "epoch": 2.440596846846847, "grad_norm": 1.0879076335245994, "learning_rate": 1.0226357204083537e-06, "loss": 0.2698, "step": 26007 }, { "epoch": 2.4406906906906904, "grad_norm": 1.357899918048651, "learning_rate": 1.0223048951258924e-06, "loss": 0.2867, "step": 26008 }, { "epoch": 2.4407845345345347, "grad_norm": 1.0146872091158303, "learning_rate": 1.0219741172699593e-06, "loss": 0.326, "step": 26009 }, { "epoch": 2.4408783783783785, "grad_norm": 1.1795531346922008, "learning_rate": 1.0216433868444975e-06, "loss": 0.334, "step": 26010 }, { "epoch": 2.4409722222222223, "grad_norm": 1.1830190781727297, "learning_rate": 1.0213127038534483e-06, "loss": 0.3341, "step": 26011 }, { "epoch": 2.441066066066066, "grad_norm": 1.1980687747589374, "learning_rate": 1.0209820683007581e-06, "loss": 0.2897, "step": 26012 }, { "epoch": 2.44115990990991, "grad_norm": 1.1121893838343557, "learning_rate": 1.020651480190366e-06, "loss": 0.3015, "step": 26013 }, { "epoch": 2.4412537537537538, "grad_norm": 1.0721311651406311, "learning_rate": 1.0203209395262138e-06, "loss": 0.281, "step": 26014 }, { "epoch": 2.4413475975975976, "grad_norm": 1.1846861246413132, "learning_rate": 1.019990446312244e-06, "loss": 0.3071, "step": 26015 }, { "epoch": 2.4414414414414414, "grad_norm": 1.1920590672703109, "learning_rate": 1.019660000552397e-06, "loss": 0.309, "step": 26016 }, { "epoch": 2.441535285285285, "grad_norm": 1.0796951390132128, "learning_rate": 1.0193296022506112e-06, "loss": 0.3501, "step": 26017 }, { "epoch": 2.441629129129129, "grad_norm": 1.0313300600479887, "learning_rate": 1.0189992514108277e-06, "loss": 0.2952, "step": 26018 }, { "epoch": 2.441722972972973, "grad_norm": 1.1370444098419143, "learning_rate": 1.018668948036985e-06, "loss": 0.2734, "step": 26019 }, { "epoch": 2.441816816816817, "grad_norm": 1.0535992453695995, "learning_rate": 1.0183386921330196e-06, "loss": 0.318, "step": 26020 }, { "epoch": 2.4419106606606604, "grad_norm": 1.0589669112777644, "learning_rate": 1.018008483702872e-06, "loss": 0.3011, "step": 26021 }, { "epoch": 2.4420045045045047, "grad_norm": 0.957917974012516, "learning_rate": 1.0176783227504777e-06, "loss": 0.3293, "step": 26022 }, { "epoch": 2.4420983483483485, "grad_norm": 1.1065573059219005, "learning_rate": 1.0173482092797727e-06, "loss": 0.3288, "step": 26023 }, { "epoch": 2.4421921921921923, "grad_norm": 1.1009542304579762, "learning_rate": 1.0170181432946946e-06, "loss": 0.3444, "step": 26024 }, { "epoch": 2.442286036036036, "grad_norm": 1.5042035682081372, "learning_rate": 1.0166881247991778e-06, "loss": 0.3099, "step": 26025 }, { "epoch": 2.44237987987988, "grad_norm": 0.9809058507338084, "learning_rate": 1.0163581537971574e-06, "loss": 0.3261, "step": 26026 }, { "epoch": 2.4424737237237237, "grad_norm": 1.2645621845830215, "learning_rate": 1.0160282302925677e-06, "loss": 0.314, "step": 26027 }, { "epoch": 2.4425675675675675, "grad_norm": 1.0225482158606995, "learning_rate": 1.0156983542893423e-06, "loss": 0.2766, "step": 26028 }, { "epoch": 2.4426614114114114, "grad_norm": 1.3406735008785546, "learning_rate": 1.015368525791413e-06, "loss": 0.2851, "step": 26029 }, { "epoch": 2.442755255255255, "grad_norm": 1.3302584836405758, "learning_rate": 1.0150387448027149e-06, "loss": 0.3114, "step": 26030 }, { "epoch": 2.442849099099099, "grad_norm": 1.083351500682853, "learning_rate": 1.014709011327179e-06, "loss": 0.3124, "step": 26031 }, { "epoch": 2.442942942942943, "grad_norm": 1.1822193040324718, "learning_rate": 1.0143793253687357e-06, "loss": 0.2653, "step": 26032 }, { "epoch": 2.4430367867867866, "grad_norm": 1.6785228057883257, "learning_rate": 1.0140496869313176e-06, "loss": 0.3115, "step": 26033 }, { "epoch": 2.4431306306306304, "grad_norm": 0.9748837867735033, "learning_rate": 1.0137200960188543e-06, "loss": 0.2968, "step": 26034 }, { "epoch": 2.4432244744744747, "grad_norm": 1.377502018467876, "learning_rate": 1.0133905526352738e-06, "loss": 0.3151, "step": 26035 }, { "epoch": 2.4433183183183185, "grad_norm": 1.4404572433162155, "learning_rate": 1.0130610567845089e-06, "loss": 0.3088, "step": 26036 }, { "epoch": 2.4434121621621623, "grad_norm": 1.0526986280127086, "learning_rate": 1.0127316084704859e-06, "loss": 0.3179, "step": 26037 }, { "epoch": 2.443506006006006, "grad_norm": 1.170612271639857, "learning_rate": 1.0124022076971334e-06, "loss": 0.3321, "step": 26038 }, { "epoch": 2.44359984984985, "grad_norm": 1.0373812375394686, "learning_rate": 1.0120728544683783e-06, "loss": 0.2925, "step": 26039 }, { "epoch": 2.4436936936936937, "grad_norm": 1.0881330422766942, "learning_rate": 1.0117435487881483e-06, "loss": 0.3213, "step": 26040 }, { "epoch": 2.4437875375375375, "grad_norm": 1.1280789174058434, "learning_rate": 1.011414290660368e-06, "loss": 0.3389, "step": 26041 }, { "epoch": 2.4438813813813813, "grad_norm": 1.0832520795074208, "learning_rate": 1.011085080088966e-06, "loss": 0.3158, "step": 26042 }, { "epoch": 2.443975225225225, "grad_norm": 1.092193831482461, "learning_rate": 1.0107559170778658e-06, "loss": 0.2931, "step": 26043 }, { "epoch": 2.444069069069069, "grad_norm": 1.034877124709923, "learning_rate": 1.0104268016309909e-06, "loss": 0.2714, "step": 26044 }, { "epoch": 2.4441629129129128, "grad_norm": 1.265728473590354, "learning_rate": 1.0100977337522682e-06, "loss": 0.318, "step": 26045 }, { "epoch": 2.4442567567567566, "grad_norm": 1.1244885780416434, "learning_rate": 1.0097687134456198e-06, "loss": 0.3273, "step": 26046 }, { "epoch": 2.4443506006006004, "grad_norm": 1.165909923296124, "learning_rate": 1.0094397407149676e-06, "loss": 0.2808, "step": 26047 }, { "epoch": 2.4444444444444446, "grad_norm": 1.584471984108598, "learning_rate": 1.009110815564236e-06, "loss": 0.3158, "step": 26048 }, { "epoch": 2.4445382882882885, "grad_norm": 1.0754107355287725, "learning_rate": 1.0087819379973457e-06, "loss": 0.3165, "step": 26049 }, { "epoch": 2.4446321321321323, "grad_norm": 1.082286370145787, "learning_rate": 1.0084531080182185e-06, "loss": 0.3673, "step": 26050 }, { "epoch": 2.444725975975976, "grad_norm": 1.3506116713571459, "learning_rate": 1.0081243256307744e-06, "loss": 0.3333, "step": 26051 }, { "epoch": 2.44481981981982, "grad_norm": 1.2796891514926116, "learning_rate": 1.0077955908389343e-06, "loss": 0.2972, "step": 26052 }, { "epoch": 2.4449136636636637, "grad_norm": 1.8021596052229658, "learning_rate": 1.007466903646615e-06, "loss": 0.2787, "step": 26053 }, { "epoch": 2.4450075075075075, "grad_norm": 1.1135745436079143, "learning_rate": 1.0071382640577399e-06, "loss": 0.315, "step": 26054 }, { "epoch": 2.4451013513513513, "grad_norm": 1.6002012633122566, "learning_rate": 1.0068096720762249e-06, "loss": 0.3145, "step": 26055 }, { "epoch": 2.445195195195195, "grad_norm": 1.1821674212447506, "learning_rate": 1.0064811277059866e-06, "loss": 0.3195, "step": 26056 }, { "epoch": 2.445289039039039, "grad_norm": 1.3136250965209162, "learning_rate": 1.0061526309509456e-06, "loss": 0.3234, "step": 26057 }, { "epoch": 2.4453828828828827, "grad_norm": 1.3617467389564577, "learning_rate": 1.0058241818150167e-06, "loss": 0.3233, "step": 26058 }, { "epoch": 2.4454767267267266, "grad_norm": 1.0477919506246671, "learning_rate": 1.0054957803021143e-06, "loss": 0.2672, "step": 26059 }, { "epoch": 2.4455705705705704, "grad_norm": 5.894061497609702, "learning_rate": 1.0051674264161576e-06, "loss": 0.2876, "step": 26060 }, { "epoch": 2.4456644144144146, "grad_norm": 0.9770038095804375, "learning_rate": 1.0048391201610596e-06, "loss": 0.3137, "step": 26061 }, { "epoch": 2.4457582582582584, "grad_norm": 1.073958399422393, "learning_rate": 1.0045108615407357e-06, "loss": 0.3273, "step": 26062 }, { "epoch": 2.4458521021021022, "grad_norm": 1.0031639906683567, "learning_rate": 1.0041826505590985e-06, "loss": 0.3348, "step": 26063 }, { "epoch": 2.445945945945946, "grad_norm": 1.0546610848500522, "learning_rate": 1.0038544872200617e-06, "loss": 0.3112, "step": 26064 }, { "epoch": 2.44603978978979, "grad_norm": 1.1108790128680517, "learning_rate": 1.0035263715275373e-06, "loss": 0.3, "step": 26065 }, { "epoch": 2.4461336336336337, "grad_norm": 1.1792700427368936, "learning_rate": 1.0031983034854393e-06, "loss": 0.3119, "step": 26066 }, { "epoch": 2.4462274774774775, "grad_norm": 0.9179284521086334, "learning_rate": 1.0028702830976784e-06, "loss": 0.2816, "step": 26067 }, { "epoch": 2.4463213213213213, "grad_norm": 1.3037583370476342, "learning_rate": 1.0025423103681642e-06, "loss": 0.2934, "step": 26068 }, { "epoch": 2.446415165165165, "grad_norm": 1.6805596202372417, "learning_rate": 1.0022143853008099e-06, "loss": 0.3181, "step": 26069 }, { "epoch": 2.446509009009009, "grad_norm": 1.1624690190420082, "learning_rate": 1.0018865078995238e-06, "loss": 0.3069, "step": 26070 }, { "epoch": 2.4466028528528527, "grad_norm": 1.0725413884585433, "learning_rate": 1.001558678168214e-06, "loss": 0.3331, "step": 26071 }, { "epoch": 2.4466966966966965, "grad_norm": 2.04672245236052, "learning_rate": 1.0012308961107924e-06, "loss": 0.3734, "step": 26072 }, { "epoch": 2.4467905405405403, "grad_norm": 1.1330650660726629, "learning_rate": 1.0009031617311654e-06, "loss": 0.3291, "step": 26073 }, { "epoch": 2.4468843843843846, "grad_norm": 1.4059663010623602, "learning_rate": 1.0005754750332408e-06, "loss": 0.2819, "step": 26074 }, { "epoch": 2.4469782282282284, "grad_norm": 1.179380125938929, "learning_rate": 1.000247836020925e-06, "loss": 0.3136, "step": 26075 }, { "epoch": 2.4470720720720722, "grad_norm": 1.3518215534503004, "learning_rate": 9.999202446981255e-07, "loss": 0.2783, "step": 26076 }, { "epoch": 2.447165915915916, "grad_norm": 1.132189213180224, "learning_rate": 9.995927010687463e-07, "loss": 0.2637, "step": 26077 }, { "epoch": 2.44725975975976, "grad_norm": 2.9546071541724563, "learning_rate": 9.992652051366952e-07, "loss": 0.2732, "step": 26078 }, { "epoch": 2.4473536036036037, "grad_norm": 1.1482039607308117, "learning_rate": 9.989377569058762e-07, "loss": 0.3418, "step": 26079 }, { "epoch": 2.4474474474474475, "grad_norm": 1.1122542355657254, "learning_rate": 9.986103563801913e-07, "loss": 0.3218, "step": 26080 }, { "epoch": 2.4475412912912913, "grad_norm": 1.0732718879281762, "learning_rate": 9.982830035635483e-07, "loss": 0.3215, "step": 26081 }, { "epoch": 2.447635135135135, "grad_norm": 1.4594206869561739, "learning_rate": 9.97955698459847e-07, "loss": 0.3201, "step": 26082 }, { "epoch": 2.447728978978979, "grad_norm": 1.1359480205657964, "learning_rate": 9.976284410729902e-07, "loss": 0.2943, "step": 26083 }, { "epoch": 2.4478228228228227, "grad_norm": 1.2849044538857228, "learning_rate": 9.973012314068819e-07, "loss": 0.3272, "step": 26084 }, { "epoch": 2.4479166666666665, "grad_norm": 1.266049231327514, "learning_rate": 9.969740694654224e-07, "loss": 0.3062, "step": 26085 }, { "epoch": 2.4480105105105103, "grad_norm": 1.0972277254733656, "learning_rate": 9.966469552525116e-07, "loss": 0.3145, "step": 26086 }, { "epoch": 2.4481043543543546, "grad_norm": 1.1717239043841094, "learning_rate": 9.96319888772051e-07, "loss": 0.3042, "step": 26087 }, { "epoch": 2.448198198198198, "grad_norm": 1.1110216217948115, "learning_rate": 9.959928700279386e-07, "loss": 0.2846, "step": 26088 }, { "epoch": 2.448292042042042, "grad_norm": 1.051891743176221, "learning_rate": 9.95665899024074e-07, "loss": 0.3427, "step": 26089 }, { "epoch": 2.448385885885886, "grad_norm": 1.159527409048411, "learning_rate": 9.953389757643573e-07, "loss": 0.3007, "step": 26090 }, { "epoch": 2.44847972972973, "grad_norm": 1.2238216094440615, "learning_rate": 9.950121002526852e-07, "loss": 0.2937, "step": 26091 }, { "epoch": 2.4485735735735736, "grad_norm": 2.1364987892757985, "learning_rate": 9.946852724929534e-07, "loss": 0.2571, "step": 26092 }, { "epoch": 2.4486674174174174, "grad_norm": 1.0519406658322812, "learning_rate": 9.943584924890626e-07, "loss": 0.3061, "step": 26093 }, { "epoch": 2.4487612612612613, "grad_norm": 1.1563440700584473, "learning_rate": 9.940317602449068e-07, "loss": 0.3356, "step": 26094 }, { "epoch": 2.448855105105105, "grad_norm": 1.0093735547984868, "learning_rate": 9.9370507576438e-07, "loss": 0.303, "step": 26095 }, { "epoch": 2.448948948948949, "grad_norm": 1.041449205421993, "learning_rate": 9.933784390513807e-07, "loss": 0.3293, "step": 26096 }, { "epoch": 2.4490427927927927, "grad_norm": 1.1725821905379281, "learning_rate": 9.930518501098018e-07, "loss": 0.3211, "step": 26097 }, { "epoch": 2.4491366366366365, "grad_norm": 1.4262027278269693, "learning_rate": 9.927253089435373e-07, "loss": 0.2782, "step": 26098 }, { "epoch": 2.4492304804804803, "grad_norm": 1.2117649938435318, "learning_rate": 9.923988155564806e-07, "loss": 0.3092, "step": 26099 }, { "epoch": 2.4493243243243246, "grad_norm": 1.0333568460911633, "learning_rate": 9.920723699525248e-07, "loss": 0.3255, "step": 26100 }, { "epoch": 2.449418168168168, "grad_norm": 1.1327173822178482, "learning_rate": 9.917459721355599e-07, "loss": 0.3331, "step": 26101 }, { "epoch": 2.449512012012012, "grad_norm": 1.1698126311504358, "learning_rate": 9.914196221094813e-07, "loss": 0.3139, "step": 26102 }, { "epoch": 2.449605855855856, "grad_norm": 1.1166436866508898, "learning_rate": 9.91093319878178e-07, "loss": 0.2967, "step": 26103 }, { "epoch": 2.4496996996997, "grad_norm": 1.085776401032106, "learning_rate": 9.907670654455397e-07, "loss": 0.2994, "step": 26104 }, { "epoch": 2.4497935435435436, "grad_norm": 1.124501263838658, "learning_rate": 9.90440858815459e-07, "loss": 0.3137, "step": 26105 }, { "epoch": 2.4498873873873874, "grad_norm": 2.3379926590060855, "learning_rate": 9.901146999918238e-07, "loss": 0.3169, "step": 26106 }, { "epoch": 2.4499812312312312, "grad_norm": 1.2605808637640685, "learning_rate": 9.897885889785214e-07, "loss": 0.2998, "step": 26107 }, { "epoch": 2.450075075075075, "grad_norm": 1.0893705847359982, "learning_rate": 9.894625257794432e-07, "loss": 0.338, "step": 26108 }, { "epoch": 2.450168918918919, "grad_norm": 1.145973242313252, "learning_rate": 9.89136510398475e-07, "loss": 0.3167, "step": 26109 }, { "epoch": 2.4502627627627627, "grad_norm": 1.2594879783639552, "learning_rate": 9.888105428395046e-07, "loss": 0.3118, "step": 26110 }, { "epoch": 2.4503566066066065, "grad_norm": 1.1349438632270685, "learning_rate": 9.884846231064182e-07, "loss": 0.276, "step": 26111 }, { "epoch": 2.4504504504504503, "grad_norm": 1.1840805206612384, "learning_rate": 9.881587512031017e-07, "loss": 0.2911, "step": 26112 }, { "epoch": 2.450544294294294, "grad_norm": 1.3179404990268992, "learning_rate": 9.878329271334392e-07, "loss": 0.3479, "step": 26113 }, { "epoch": 2.450638138138138, "grad_norm": 1.1870362717142242, "learning_rate": 9.87507150901318e-07, "loss": 0.3142, "step": 26114 }, { "epoch": 2.450731981981982, "grad_norm": 1.1222378035277636, "learning_rate": 9.871814225106213e-07, "loss": 0.312, "step": 26115 }, { "epoch": 2.450825825825826, "grad_norm": 1.0829742982868917, "learning_rate": 9.868557419652318e-07, "loss": 0.3074, "step": 26116 }, { "epoch": 2.45091966966967, "grad_norm": 1.2542439425690646, "learning_rate": 9.865301092690349e-07, "loss": 0.2747, "step": 26117 }, { "epoch": 2.4510135135135136, "grad_norm": 1.078291847681543, "learning_rate": 9.862045244259115e-07, "loss": 0.2742, "step": 26118 }, { "epoch": 2.4511073573573574, "grad_norm": 1.2445139183404945, "learning_rate": 9.85878987439743e-07, "loss": 0.3147, "step": 26119 }, { "epoch": 2.451201201201201, "grad_norm": 1.1634211381383701, "learning_rate": 9.855534983144134e-07, "loss": 0.3566, "step": 26120 }, { "epoch": 2.451295045045045, "grad_norm": 1.0210733034008301, "learning_rate": 9.852280570538013e-07, "loss": 0.2931, "step": 26121 }, { "epoch": 2.451388888888889, "grad_norm": 1.1450616096835706, "learning_rate": 9.849026636617876e-07, "loss": 0.3628, "step": 26122 }, { "epoch": 2.4514827327327327, "grad_norm": 1.481203730232807, "learning_rate": 9.845773181422524e-07, "loss": 0.2906, "step": 26123 }, { "epoch": 2.4515765765765765, "grad_norm": 1.4252988571722145, "learning_rate": 9.84252020499074e-07, "loss": 0.295, "step": 26124 }, { "epoch": 2.4516704204204203, "grad_norm": 1.272762671026381, "learning_rate": 9.839267707361304e-07, "loss": 0.3317, "step": 26125 }, { "epoch": 2.451764264264264, "grad_norm": 1.067820018745505, "learning_rate": 9.83601568857302e-07, "loss": 0.3278, "step": 26126 }, { "epoch": 2.451858108108108, "grad_norm": 1.2261247561929358, "learning_rate": 9.832764148664643e-07, "loss": 0.3457, "step": 26127 }, { "epoch": 2.451951951951952, "grad_norm": 1.0560662683157207, "learning_rate": 9.829513087674935e-07, "loss": 0.2615, "step": 26128 }, { "epoch": 2.452045795795796, "grad_norm": 1.011346970775419, "learning_rate": 9.826262505642686e-07, "loss": 0.2926, "step": 26129 }, { "epoch": 2.4521396396396398, "grad_norm": 1.4410310125481909, "learning_rate": 9.823012402606635e-07, "loss": 0.3363, "step": 26130 }, { "epoch": 2.4522334834834836, "grad_norm": 0.9853823491702859, "learning_rate": 9.819762778605523e-07, "loss": 0.3224, "step": 26131 }, { "epoch": 2.4523273273273274, "grad_norm": 1.3377364483691052, "learning_rate": 9.816513633678121e-07, "loss": 0.3093, "step": 26132 }, { "epoch": 2.452421171171171, "grad_norm": 1.087673768454005, "learning_rate": 9.813264967863157e-07, "loss": 0.342, "step": 26133 }, { "epoch": 2.452515015015015, "grad_norm": 1.1530622048561514, "learning_rate": 9.810016781199366e-07, "loss": 0.2802, "step": 26134 }, { "epoch": 2.452608858858859, "grad_norm": 1.1211443071373264, "learning_rate": 9.806769073725475e-07, "loss": 0.2922, "step": 26135 }, { "epoch": 2.4527027027027026, "grad_norm": 1.2828826731507068, "learning_rate": 9.8035218454802e-07, "loss": 0.2914, "step": 26136 }, { "epoch": 2.4527965465465464, "grad_norm": 1.3009108559622515, "learning_rate": 9.80027509650226e-07, "loss": 0.3259, "step": 26137 }, { "epoch": 2.4528903903903903, "grad_norm": 1.0745578187867428, "learning_rate": 9.797028826830385e-07, "loss": 0.3152, "step": 26138 }, { "epoch": 2.452984234234234, "grad_norm": 1.026056383049138, "learning_rate": 9.793783036503263e-07, "loss": 0.3038, "step": 26139 }, { "epoch": 2.453078078078078, "grad_norm": 1.1276196141455994, "learning_rate": 9.790537725559584e-07, "loss": 0.2663, "step": 26140 }, { "epoch": 2.453171921921922, "grad_norm": 1.3814355395756743, "learning_rate": 9.78729289403807e-07, "loss": 0.2783, "step": 26141 }, { "epoch": 2.453265765765766, "grad_norm": 1.1085466899190053, "learning_rate": 9.784048541977397e-07, "loss": 0.2704, "step": 26142 }, { "epoch": 2.4533596096096097, "grad_norm": 1.1091965055624227, "learning_rate": 9.780804669416233e-07, "loss": 0.3146, "step": 26143 }, { "epoch": 2.4534534534534536, "grad_norm": 1.2297817325126763, "learning_rate": 9.777561276393277e-07, "loss": 0.2793, "step": 26144 }, { "epoch": 2.4535472972972974, "grad_norm": 1.0810812186334722, "learning_rate": 9.774318362947198e-07, "loss": 0.2966, "step": 26145 }, { "epoch": 2.453641141141141, "grad_norm": 1.4109470016004462, "learning_rate": 9.77107592911664e-07, "loss": 0.2765, "step": 26146 }, { "epoch": 2.453734984984985, "grad_norm": 1.1183542350785387, "learning_rate": 9.767833974940293e-07, "loss": 0.3457, "step": 26147 }, { "epoch": 2.453828828828829, "grad_norm": 1.4801231213520223, "learning_rate": 9.764592500456793e-07, "loss": 0.2878, "step": 26148 }, { "epoch": 2.4539226726726726, "grad_norm": 1.1419789234511084, "learning_rate": 9.761351505704797e-07, "loss": 0.3178, "step": 26149 }, { "epoch": 2.4540165165165164, "grad_norm": 2.812196712707521, "learning_rate": 9.758110990722936e-07, "loss": 0.343, "step": 26150 }, { "epoch": 2.4541103603603602, "grad_norm": 1.2468471177246976, "learning_rate": 9.75487095554986e-07, "loss": 0.3146, "step": 26151 }, { "epoch": 2.454204204204204, "grad_norm": 1.4104658741060312, "learning_rate": 9.751631400224177e-07, "loss": 0.2684, "step": 26152 }, { "epoch": 2.454298048048048, "grad_norm": 1.149875481994468, "learning_rate": 9.748392324784544e-07, "loss": 0.2603, "step": 26153 }, { "epoch": 2.454391891891892, "grad_norm": 1.3876987975177115, "learning_rate": 9.745153729269569e-07, "loss": 0.3309, "step": 26154 }, { "epoch": 2.454485735735736, "grad_norm": 1.1883501939804517, "learning_rate": 9.741915613717851e-07, "loss": 0.2926, "step": 26155 }, { "epoch": 2.4545795795795797, "grad_norm": 1.1883483769195005, "learning_rate": 9.73867797816802e-07, "loss": 0.2896, "step": 26156 }, { "epoch": 2.4546734234234235, "grad_norm": 0.9749930539924113, "learning_rate": 9.735440822658677e-07, "loss": 0.3148, "step": 26157 }, { "epoch": 2.4547672672672673, "grad_norm": 1.0725379795802037, "learning_rate": 9.73220414722839e-07, "loss": 0.3103, "step": 26158 }, { "epoch": 2.454861111111111, "grad_norm": 1.0935643472884193, "learning_rate": 9.728967951915791e-07, "loss": 0.3046, "step": 26159 }, { "epoch": 2.454954954954955, "grad_norm": 1.1612633585397687, "learning_rate": 9.725732236759443e-07, "loss": 0.2915, "step": 26160 }, { "epoch": 2.455048798798799, "grad_norm": 1.1113041437169195, "learning_rate": 9.722497001797936e-07, "loss": 0.3114, "step": 26161 }, { "epoch": 2.4551426426426426, "grad_norm": 1.3371563534111246, "learning_rate": 9.719262247069833e-07, "loss": 0.2903, "step": 26162 }, { "epoch": 2.4552364864864864, "grad_norm": 1.3683073584328889, "learning_rate": 9.716027972613707e-07, "loss": 0.3134, "step": 26163 }, { "epoch": 2.45533033033033, "grad_norm": 0.9983695329404859, "learning_rate": 9.71279417846811e-07, "loss": 0.3273, "step": 26164 }, { "epoch": 2.455424174174174, "grad_norm": 1.2392297042371165, "learning_rate": 9.709560864671618e-07, "loss": 0.3149, "step": 26165 }, { "epoch": 2.455518018018018, "grad_norm": 0.9673603836965171, "learning_rate": 9.70632803126278e-07, "loss": 0.2323, "step": 26166 }, { "epoch": 2.455611861861862, "grad_norm": 1.0682886305480357, "learning_rate": 9.703095678280117e-07, "loss": 0.3123, "step": 26167 }, { "epoch": 2.455705705705706, "grad_norm": 1.2907793880622933, "learning_rate": 9.699863805762206e-07, "loss": 0.3046, "step": 26168 }, { "epoch": 2.4557995495495497, "grad_norm": 1.0209942936478624, "learning_rate": 9.696632413747553e-07, "loss": 0.3024, "step": 26169 }, { "epoch": 2.4558933933933935, "grad_norm": 1.3823908517863464, "learning_rate": 9.693401502274691e-07, "loss": 0.319, "step": 26170 }, { "epoch": 2.4559872372372373, "grad_norm": 1.0609485650557011, "learning_rate": 9.690171071382153e-07, "loss": 0.3141, "step": 26171 }, { "epoch": 2.456081081081081, "grad_norm": 1.115064540198168, "learning_rate": 9.68694112110845e-07, "loss": 0.2917, "step": 26172 }, { "epoch": 2.456174924924925, "grad_norm": 1.5029739479350985, "learning_rate": 9.683711651492095e-07, "loss": 0.3571, "step": 26173 }, { "epoch": 2.4562687687687688, "grad_norm": 1.050252100533827, "learning_rate": 9.680482662571595e-07, "loss": 0.3216, "step": 26174 }, { "epoch": 2.4563626126126126, "grad_norm": 1.2664084891857472, "learning_rate": 9.67725415438544e-07, "loss": 0.2827, "step": 26175 }, { "epoch": 2.4564564564564564, "grad_norm": 1.5629945937511094, "learning_rate": 9.674026126972119e-07, "loss": 0.3249, "step": 26176 }, { "epoch": 2.4565503003003, "grad_norm": 1.214363636873272, "learning_rate": 9.670798580370139e-07, "loss": 0.2891, "step": 26177 }, { "epoch": 2.456644144144144, "grad_norm": 1.0476276642292612, "learning_rate": 9.66757151461798e-07, "loss": 0.3069, "step": 26178 }, { "epoch": 2.456737987987988, "grad_norm": 1.031391932536795, "learning_rate": 9.664344929754094e-07, "loss": 0.2847, "step": 26179 }, { "epoch": 2.456831831831832, "grad_norm": 1.1172230515584067, "learning_rate": 9.661118825816989e-07, "loss": 0.3052, "step": 26180 }, { "epoch": 2.4569256756756754, "grad_norm": 1.0593512822558084, "learning_rate": 9.657893202845109e-07, "loss": 0.3249, "step": 26181 }, { "epoch": 2.4570195195195197, "grad_norm": 1.0119236573150001, "learning_rate": 9.654668060876903e-07, "loss": 0.328, "step": 26182 }, { "epoch": 2.4571133633633635, "grad_norm": 1.1649879870598754, "learning_rate": 9.651443399950849e-07, "loss": 0.3316, "step": 26183 }, { "epoch": 2.4572072072072073, "grad_norm": 1.2210857910450632, "learning_rate": 9.648219220105387e-07, "loss": 0.3141, "step": 26184 }, { "epoch": 2.457301051051051, "grad_norm": 0.9516495649601316, "learning_rate": 9.644995521378958e-07, "loss": 0.2997, "step": 26185 }, { "epoch": 2.457394894894895, "grad_norm": 1.2423450792843744, "learning_rate": 9.641772303809999e-07, "loss": 0.3453, "step": 26186 }, { "epoch": 2.4574887387387387, "grad_norm": 0.9657399611843613, "learning_rate": 9.638549567436933e-07, "loss": 0.32, "step": 26187 }, { "epoch": 2.4575825825825826, "grad_norm": 1.1045127719248082, "learning_rate": 9.635327312298183e-07, "loss": 0.3406, "step": 26188 }, { "epoch": 2.4576764264264264, "grad_norm": 2.03340308758234, "learning_rate": 9.632105538432185e-07, "loss": 0.2685, "step": 26189 }, { "epoch": 2.45777027027027, "grad_norm": 0.9433634035151707, "learning_rate": 9.62888424587734e-07, "loss": 0.2915, "step": 26190 }, { "epoch": 2.457864114114114, "grad_norm": 1.1415598571340109, "learning_rate": 9.625663434672055e-07, "loss": 0.3571, "step": 26191 }, { "epoch": 2.457957957957958, "grad_norm": 1.7375374313499508, "learning_rate": 9.622443104854745e-07, "loss": 0.3294, "step": 26192 }, { "epoch": 2.458051801801802, "grad_norm": 1.3500394975631143, "learning_rate": 9.6192232564638e-07, "loss": 0.3186, "step": 26193 }, { "epoch": 2.4581456456456454, "grad_norm": 1.1472620232465638, "learning_rate": 9.616003889537595e-07, "loss": 0.3297, "step": 26194 }, { "epoch": 2.4582394894894897, "grad_norm": 1.369808559356996, "learning_rate": 9.612785004114538e-07, "loss": 0.2897, "step": 26195 }, { "epoch": 2.4583333333333335, "grad_norm": 0.9927315108881027, "learning_rate": 9.609566600233e-07, "loss": 0.3041, "step": 26196 }, { "epoch": 2.4584271771771773, "grad_norm": 2.039213344678113, "learning_rate": 9.60634867793136e-07, "loss": 0.324, "step": 26197 }, { "epoch": 2.458521021021021, "grad_norm": 1.1431650348681133, "learning_rate": 9.60313123724797e-07, "loss": 0.3227, "step": 26198 }, { "epoch": 2.458614864864865, "grad_norm": 1.011466796192879, "learning_rate": 9.599914278221207e-07, "loss": 0.3412, "step": 26199 }, { "epoch": 2.4587087087087087, "grad_norm": 2.0085698966004495, "learning_rate": 9.596697800889403e-07, "loss": 0.3111, "step": 26200 }, { "epoch": 2.4588025525525525, "grad_norm": 1.237271086700616, "learning_rate": 9.593481805290944e-07, "loss": 0.3173, "step": 26201 }, { "epoch": 2.4588963963963963, "grad_norm": 1.0914649309842421, "learning_rate": 9.590266291464156e-07, "loss": 0.3157, "step": 26202 }, { "epoch": 2.45899024024024, "grad_norm": 1.1243574009534847, "learning_rate": 9.587051259447362e-07, "loss": 0.3175, "step": 26203 }, { "epoch": 2.459084084084084, "grad_norm": 1.3509425883900337, "learning_rate": 9.583836709278927e-07, "loss": 0.3053, "step": 26204 }, { "epoch": 2.4591779279279278, "grad_norm": 1.3049315395883587, "learning_rate": 9.580622640997166e-07, "loss": 0.3292, "step": 26205 }, { "epoch": 2.4592717717717716, "grad_norm": 1.135910287323556, "learning_rate": 9.577409054640385e-07, "loss": 0.3178, "step": 26206 }, { "epoch": 2.4593656156156154, "grad_norm": 2.74103588533914, "learning_rate": 9.574195950246922e-07, "loss": 0.3325, "step": 26207 }, { "epoch": 2.4594594594594597, "grad_norm": 0.985943126066421, "learning_rate": 9.570983327855083e-07, "loss": 0.3362, "step": 26208 }, { "epoch": 2.4595533033033035, "grad_norm": 1.2457977834838083, "learning_rate": 9.567771187503167e-07, "loss": 0.3412, "step": 26209 }, { "epoch": 2.4596471471471473, "grad_norm": 1.11640841854362, "learning_rate": 9.564559529229472e-07, "loss": 0.3114, "step": 26210 }, { "epoch": 2.459740990990991, "grad_norm": 1.115977510746835, "learning_rate": 9.561348353072297e-07, "loss": 0.326, "step": 26211 }, { "epoch": 2.459834834834835, "grad_norm": 1.0887162808798863, "learning_rate": 9.558137659069911e-07, "loss": 0.2775, "step": 26212 }, { "epoch": 2.4599286786786787, "grad_norm": 1.0102521406767546, "learning_rate": 9.55492744726062e-07, "loss": 0.3135, "step": 26213 }, { "epoch": 2.4600225225225225, "grad_norm": 1.236137094142779, "learning_rate": 9.551717717682684e-07, "loss": 0.2924, "step": 26214 }, { "epoch": 2.4601163663663663, "grad_norm": 1.0331878798470575, "learning_rate": 9.548508470374374e-07, "loss": 0.3317, "step": 26215 }, { "epoch": 2.46021021021021, "grad_norm": 1.0797576295154445, "learning_rate": 9.545299705373967e-07, "loss": 0.2688, "step": 26216 }, { "epoch": 2.460304054054054, "grad_norm": 1.3411035153759068, "learning_rate": 9.542091422719717e-07, "loss": 0.3068, "step": 26217 }, { "epoch": 2.4603978978978978, "grad_norm": 1.2354774196767768, "learning_rate": 9.538883622449857e-07, "loss": 0.3197, "step": 26218 }, { "epoch": 2.4604917417417416, "grad_norm": 1.211129244002622, "learning_rate": 9.535676304602659e-07, "loss": 0.2814, "step": 26219 }, { "epoch": 2.4605855855855854, "grad_norm": 1.13770925152452, "learning_rate": 9.53246946921636e-07, "loss": 0.2852, "step": 26220 }, { "epoch": 2.4606794294294296, "grad_norm": 1.2281509212534378, "learning_rate": 9.529263116329191e-07, "loss": 0.3197, "step": 26221 }, { "epoch": 2.4607732732732734, "grad_norm": 0.9344217194931627, "learning_rate": 9.526057245979375e-07, "loss": 0.2996, "step": 26222 }, { "epoch": 2.4608671171171173, "grad_norm": 2.321359519783709, "learning_rate": 9.522851858205146e-07, "loss": 0.3427, "step": 26223 }, { "epoch": 2.460960960960961, "grad_norm": 1.105916739098212, "learning_rate": 9.519646953044703e-07, "loss": 0.2895, "step": 26224 }, { "epoch": 2.461054804804805, "grad_norm": 1.087179975800877, "learning_rate": 9.516442530536291e-07, "loss": 0.28, "step": 26225 }, { "epoch": 2.4611486486486487, "grad_norm": 1.1219728470449517, "learning_rate": 9.513238590718094e-07, "loss": 0.3668, "step": 26226 }, { "epoch": 2.4612424924924925, "grad_norm": 1.016825262724427, "learning_rate": 9.510035133628309e-07, "loss": 0.3579, "step": 26227 }, { "epoch": 2.4613363363363363, "grad_norm": 2.2757621132165307, "learning_rate": 9.506832159305152e-07, "loss": 0.3041, "step": 26228 }, { "epoch": 2.46143018018018, "grad_norm": 1.0890210635390292, "learning_rate": 9.503629667786801e-07, "loss": 0.2752, "step": 26229 }, { "epoch": 2.461524024024024, "grad_norm": 1.0734101765961352, "learning_rate": 9.500427659111428e-07, "loss": 0.3462, "step": 26230 }, { "epoch": 2.4616178678678677, "grad_norm": 1.022863090287369, "learning_rate": 9.497226133317234e-07, "loss": 0.3318, "step": 26231 }, { "epoch": 2.4617117117117115, "grad_norm": 2.318401734796897, "learning_rate": 9.494025090442383e-07, "loss": 0.3035, "step": 26232 }, { "epoch": 2.4618055555555554, "grad_norm": 0.9895792292769214, "learning_rate": 9.490824530525034e-07, "loss": 0.3031, "step": 26233 }, { "epoch": 2.4618993993993996, "grad_norm": 1.2112290566417463, "learning_rate": 9.487624453603356e-07, "loss": 0.2984, "step": 26234 }, { "epoch": 2.4619932432432434, "grad_norm": 1.0491629306998218, "learning_rate": 9.484424859715496e-07, "loss": 0.2915, "step": 26235 }, { "epoch": 2.4620870870870872, "grad_norm": 0.9664593014019538, "learning_rate": 9.481225748899592e-07, "loss": 0.3082, "step": 26236 }, { "epoch": 2.462180930930931, "grad_norm": 1.1791368694006192, "learning_rate": 9.478027121193817e-07, "loss": 0.3329, "step": 26237 }, { "epoch": 2.462274774774775, "grad_norm": 1.2000186843911005, "learning_rate": 9.474828976636297e-07, "loss": 0.3108, "step": 26238 }, { "epoch": 2.4623686186186187, "grad_norm": 1.0942073934402172, "learning_rate": 9.471631315265145e-07, "loss": 0.3271, "step": 26239 }, { "epoch": 2.4624624624624625, "grad_norm": 1.0068147214652308, "learning_rate": 9.468434137118515e-07, "loss": 0.3302, "step": 26240 }, { "epoch": 2.4625563063063063, "grad_norm": 1.3953846850000728, "learning_rate": 9.465237442234521e-07, "loss": 0.2931, "step": 26241 }, { "epoch": 2.46265015015015, "grad_norm": 1.0970845309149542, "learning_rate": 9.462041230651254e-07, "loss": 0.3131, "step": 26242 }, { "epoch": 2.462743993993994, "grad_norm": 1.20455040680898, "learning_rate": 9.458845502406849e-07, "loss": 0.2836, "step": 26243 }, { "epoch": 2.4628378378378377, "grad_norm": 1.0702123529150074, "learning_rate": 9.455650257539406e-07, "loss": 0.3247, "step": 26244 }, { "epoch": 2.4629316816816815, "grad_norm": 1.1810498576357418, "learning_rate": 9.452455496087015e-07, "loss": 0.3167, "step": 26245 }, { "epoch": 2.4630255255255253, "grad_norm": 1.0266221749300877, "learning_rate": 9.449261218087769e-07, "loss": 0.293, "step": 26246 }, { "epoch": 2.4631193693693696, "grad_norm": 1.0387518431617773, "learning_rate": 9.446067423579757e-07, "loss": 0.3456, "step": 26247 }, { "epoch": 2.4632132132132134, "grad_norm": 2.637179420687378, "learning_rate": 9.442874112601041e-07, "loss": 0.2935, "step": 26248 }, { "epoch": 2.463307057057057, "grad_norm": 1.177998634844525, "learning_rate": 9.439681285189722e-07, "loss": 0.2802, "step": 26249 }, { "epoch": 2.463400900900901, "grad_norm": 1.3928893888914389, "learning_rate": 9.436488941383859e-07, "loss": 0.326, "step": 26250 }, { "epoch": 2.463494744744745, "grad_norm": 1.1627550979825485, "learning_rate": 9.433297081221499e-07, "loss": 0.339, "step": 26251 }, { "epoch": 2.4635885885885886, "grad_norm": 1.5780927350354792, "learning_rate": 9.430105704740727e-07, "loss": 0.2939, "step": 26252 }, { "epoch": 2.4636824324324325, "grad_norm": 1.0360007264255922, "learning_rate": 9.426914811979576e-07, "loss": 0.2943, "step": 26253 }, { "epoch": 2.4637762762762763, "grad_norm": 0.941743139833291, "learning_rate": 9.423724402976081e-07, "loss": 0.2859, "step": 26254 }, { "epoch": 2.46387012012012, "grad_norm": 1.1508798291167697, "learning_rate": 9.420534477768311e-07, "loss": 0.3363, "step": 26255 }, { "epoch": 2.463963963963964, "grad_norm": 1.2085090040329383, "learning_rate": 9.417345036394288e-07, "loss": 0.2976, "step": 26256 }, { "epoch": 2.4640578078078077, "grad_norm": 0.9992981123774441, "learning_rate": 9.414156078892029e-07, "loss": 0.295, "step": 26257 }, { "epoch": 2.4641516516516515, "grad_norm": 1.086025555077077, "learning_rate": 9.41096760529957e-07, "loss": 0.3355, "step": 26258 }, { "epoch": 2.4642454954954953, "grad_norm": 1.1570415105941525, "learning_rate": 9.407779615654916e-07, "loss": 0.325, "step": 26259 }, { "epoch": 2.4643393393393396, "grad_norm": 1.227043674783908, "learning_rate": 9.404592109996075e-07, "loss": 0.3236, "step": 26260 }, { "epoch": 2.464433183183183, "grad_norm": 1.2227980019253313, "learning_rate": 9.401405088361071e-07, "loss": 0.3143, "step": 26261 }, { "epoch": 2.464527027027027, "grad_norm": 1.2319158068304674, "learning_rate": 9.398218550787891e-07, "loss": 0.3233, "step": 26262 }, { "epoch": 2.464620870870871, "grad_norm": 1.676674317129403, "learning_rate": 9.395032497314516e-07, "loss": 0.3326, "step": 26263 }, { "epoch": 2.464714714714715, "grad_norm": 1.1430136247374802, "learning_rate": 9.391846927978965e-07, "loss": 0.3006, "step": 26264 }, { "epoch": 2.4648085585585586, "grad_norm": 1.079256602612386, "learning_rate": 9.388661842819196e-07, "loss": 0.3239, "step": 26265 }, { "epoch": 2.4649024024024024, "grad_norm": 1.0892175286762578, "learning_rate": 9.385477241873187e-07, "loss": 0.3341, "step": 26266 }, { "epoch": 2.4649962462462462, "grad_norm": 0.9827920793822769, "learning_rate": 9.38229312517892e-07, "loss": 0.2903, "step": 26267 }, { "epoch": 2.46509009009009, "grad_norm": 1.1708568901880332, "learning_rate": 9.379109492774352e-07, "loss": 0.2917, "step": 26268 }, { "epoch": 2.465183933933934, "grad_norm": 1.2529431309588717, "learning_rate": 9.375926344697434e-07, "loss": 0.3434, "step": 26269 }, { "epoch": 2.4652777777777777, "grad_norm": 1.1873948255807243, "learning_rate": 9.372743680986135e-07, "loss": 0.3203, "step": 26270 }, { "epoch": 2.4653716216216215, "grad_norm": 1.1151170062028135, "learning_rate": 9.369561501678398e-07, "loss": 0.3544, "step": 26271 }, { "epoch": 2.4654654654654653, "grad_norm": 1.2030865678382034, "learning_rate": 9.36637980681216e-07, "loss": 0.3076, "step": 26272 }, { "epoch": 2.4655593093093096, "grad_norm": 1.3237230795979227, "learning_rate": 9.36319859642536e-07, "loss": 0.2783, "step": 26273 }, { "epoch": 2.465653153153153, "grad_norm": 1.6744352902612851, "learning_rate": 9.360017870555921e-07, "loss": 0.3363, "step": 26274 }, { "epoch": 2.465746996996997, "grad_norm": 1.3789954181211541, "learning_rate": 9.356837629241761e-07, "loss": 0.3145, "step": 26275 }, { "epoch": 2.465840840840841, "grad_norm": 1.1195395275547049, "learning_rate": 9.353657872520827e-07, "loss": 0.316, "step": 26276 }, { "epoch": 2.465934684684685, "grad_norm": 1.0618184234596861, "learning_rate": 9.350478600431007e-07, "loss": 0.2955, "step": 26277 }, { "epoch": 2.4660285285285286, "grad_norm": 1.142468601335425, "learning_rate": 9.347299813010207e-07, "loss": 0.2794, "step": 26278 }, { "epoch": 2.4661223723723724, "grad_norm": 1.1188205456761167, "learning_rate": 9.34412151029635e-07, "loss": 0.3216, "step": 26279 }, { "epoch": 2.4662162162162162, "grad_norm": 1.1143511333174363, "learning_rate": 9.340943692327309e-07, "loss": 0.3093, "step": 26280 }, { "epoch": 2.46631006006006, "grad_norm": 2.0779212071627646, "learning_rate": 9.337766359140976e-07, "loss": 0.2732, "step": 26281 }, { "epoch": 2.466403903903904, "grad_norm": 1.1860071424090106, "learning_rate": 9.334589510775249e-07, "loss": 0.3143, "step": 26282 }, { "epoch": 2.4664977477477477, "grad_norm": 1.125207824544484, "learning_rate": 9.331413147268004e-07, "loss": 0.3008, "step": 26283 }, { "epoch": 2.4665915915915915, "grad_norm": 0.9798558983740179, "learning_rate": 9.328237268657103e-07, "loss": 0.3105, "step": 26284 }, { "epoch": 2.4666854354354353, "grad_norm": 0.9482835524363639, "learning_rate": 9.325061874980413e-07, "loss": 0.2745, "step": 26285 }, { "epoch": 2.466779279279279, "grad_norm": 1.3294511964074933, "learning_rate": 9.321886966275801e-07, "loss": 0.3004, "step": 26286 }, { "epoch": 2.466873123123123, "grad_norm": 1.7163596274562416, "learning_rate": 9.318712542581105e-07, "loss": 0.2328, "step": 26287 }, { "epoch": 2.466966966966967, "grad_norm": 0.9552771222631851, "learning_rate": 9.315538603934193e-07, "loss": 0.3081, "step": 26288 }, { "epoch": 2.467060810810811, "grad_norm": 1.1507962090250345, "learning_rate": 9.312365150372909e-07, "loss": 0.3276, "step": 26289 }, { "epoch": 2.4671546546546548, "grad_norm": 1.0882770987064128, "learning_rate": 9.309192181935073e-07, "loss": 0.3002, "step": 26290 }, { "epoch": 2.4672484984984986, "grad_norm": 1.0720404679456337, "learning_rate": 9.306019698658536e-07, "loss": 0.3081, "step": 26291 }, { "epoch": 2.4673423423423424, "grad_norm": 1.0858474214036662, "learning_rate": 9.302847700581119e-07, "loss": 0.3427, "step": 26292 }, { "epoch": 2.467436186186186, "grad_norm": 1.0501941219099047, "learning_rate": 9.299676187740625e-07, "loss": 0.3106, "step": 26293 }, { "epoch": 2.46753003003003, "grad_norm": 1.0973420981297592, "learning_rate": 9.296505160174896e-07, "loss": 0.3326, "step": 26294 }, { "epoch": 2.467623873873874, "grad_norm": 1.0383486645471498, "learning_rate": 9.293334617921723e-07, "loss": 0.2893, "step": 26295 }, { "epoch": 2.4677177177177176, "grad_norm": 1.0842244236183811, "learning_rate": 9.290164561018916e-07, "loss": 0.3486, "step": 26296 }, { "epoch": 2.4678115615615615, "grad_norm": 1.1748108125403436, "learning_rate": 9.286994989504261e-07, "loss": 0.2774, "step": 26297 }, { "epoch": 2.4679054054054053, "grad_norm": 1.3419423046127974, "learning_rate": 9.283825903415566e-07, "loss": 0.3237, "step": 26298 }, { "epoch": 2.467999249249249, "grad_norm": 0.9982265916682931, "learning_rate": 9.280657302790585e-07, "loss": 0.2683, "step": 26299 }, { "epoch": 2.468093093093093, "grad_norm": 0.9834257075801801, "learning_rate": 9.277489187667138e-07, "loss": 0.305, "step": 26300 }, { "epoch": 2.468186936936937, "grad_norm": 1.0502249499020697, "learning_rate": 9.274321558082983e-07, "loss": 0.3277, "step": 26301 }, { "epoch": 2.468280780780781, "grad_norm": 1.1259839297465322, "learning_rate": 9.271154414075867e-07, "loss": 0.3226, "step": 26302 }, { "epoch": 2.4683746246246248, "grad_norm": 1.387661041280819, "learning_rate": 9.267987755683583e-07, "loss": 0.3168, "step": 26303 }, { "epoch": 2.4684684684684686, "grad_norm": 1.9863131228070632, "learning_rate": 9.264821582943878e-07, "loss": 0.3023, "step": 26304 }, { "epoch": 2.4685623123123124, "grad_norm": 1.1101389527644872, "learning_rate": 9.261655895894483e-07, "loss": 0.3894, "step": 26305 }, { "epoch": 2.468656156156156, "grad_norm": 1.0886085319201615, "learning_rate": 9.258490694573174e-07, "loss": 0.2933, "step": 26306 }, { "epoch": 2.46875, "grad_norm": 1.1032293172884895, "learning_rate": 9.255325979017676e-07, "loss": 0.3462, "step": 26307 }, { "epoch": 2.468843843843844, "grad_norm": 1.3463464653387873, "learning_rate": 9.252161749265726e-07, "loss": 0.2871, "step": 26308 }, { "epoch": 2.4689376876876876, "grad_norm": 1.104884203345053, "learning_rate": 9.248998005355042e-07, "loss": 0.3228, "step": 26309 }, { "epoch": 2.4690315315315314, "grad_norm": 1.137671254711742, "learning_rate": 9.245834747323351e-07, "loss": 0.3224, "step": 26310 }, { "epoch": 2.4691253753753752, "grad_norm": 0.9829764192040192, "learning_rate": 9.242671975208356e-07, "loss": 0.2802, "step": 26311 }, { "epoch": 2.469219219219219, "grad_norm": 0.9039785700828834, "learning_rate": 9.239509689047793e-07, "loss": 0.2813, "step": 26312 }, { "epoch": 2.469313063063063, "grad_norm": 0.9549138947564106, "learning_rate": 9.236347888879355e-07, "loss": 0.2878, "step": 26313 }, { "epoch": 2.469406906906907, "grad_norm": 2.01552556195168, "learning_rate": 9.233186574740721e-07, "loss": 0.3023, "step": 26314 }, { "epoch": 2.469500750750751, "grad_norm": 1.0596101489342185, "learning_rate": 9.230025746669619e-07, "loss": 0.2985, "step": 26315 }, { "epoch": 2.4695945945945947, "grad_norm": 1.0252501572285422, "learning_rate": 9.226865404703716e-07, "loss": 0.2711, "step": 26316 }, { "epoch": 2.4696884384384385, "grad_norm": 1.0727869140301602, "learning_rate": 9.223705548880685e-07, "loss": 0.2901, "step": 26317 }, { "epoch": 2.4697822822822824, "grad_norm": 1.0534312760732791, "learning_rate": 9.220546179238227e-07, "loss": 0.323, "step": 26318 }, { "epoch": 2.469876126126126, "grad_norm": 1.1344476058201551, "learning_rate": 9.21738729581399e-07, "loss": 0.2937, "step": 26319 }, { "epoch": 2.46996996996997, "grad_norm": 1.1523310690244386, "learning_rate": 9.214228898645649e-07, "loss": 0.2866, "step": 26320 }, { "epoch": 2.470063813813814, "grad_norm": 1.0636367342897401, "learning_rate": 9.211070987770854e-07, "loss": 0.3121, "step": 26321 }, { "epoch": 2.4701576576576576, "grad_norm": 1.143096127512659, "learning_rate": 9.207913563227261e-07, "loss": 0.2806, "step": 26322 }, { "epoch": 2.4702515015015014, "grad_norm": 1.1068287266972263, "learning_rate": 9.204756625052502e-07, "loss": 0.3104, "step": 26323 }, { "epoch": 2.4703453453453452, "grad_norm": 1.4552780321919025, "learning_rate": 9.201600173284248e-07, "loss": 0.3199, "step": 26324 }, { "epoch": 2.470439189189189, "grad_norm": 1.4227180718793173, "learning_rate": 9.198444207960117e-07, "loss": 0.2984, "step": 26325 }, { "epoch": 2.470533033033033, "grad_norm": 1.4146986772334038, "learning_rate": 9.195288729117718e-07, "loss": 0.297, "step": 26326 }, { "epoch": 2.470626876876877, "grad_norm": 1.1776305878453712, "learning_rate": 9.192133736794711e-07, "loss": 0.2937, "step": 26327 }, { "epoch": 2.470720720720721, "grad_norm": 1.328858891736945, "learning_rate": 9.188979231028694e-07, "loss": 0.3205, "step": 26328 }, { "epoch": 2.4708145645645647, "grad_norm": 1.1687162906214397, "learning_rate": 9.185825211857274e-07, "loss": 0.3046, "step": 26329 }, { "epoch": 2.4709084084084085, "grad_norm": 1.1590932133038492, "learning_rate": 9.182671679318073e-07, "loss": 0.2837, "step": 26330 }, { "epoch": 2.4710022522522523, "grad_norm": 1.1208406200942955, "learning_rate": 9.179518633448681e-07, "loss": 0.3511, "step": 26331 }, { "epoch": 2.471096096096096, "grad_norm": 1.2844262644750168, "learning_rate": 9.176366074286691e-07, "loss": 0.3139, "step": 26332 }, { "epoch": 2.47118993993994, "grad_norm": 1.4540892687868232, "learning_rate": 9.173214001869696e-07, "loss": 0.311, "step": 26333 }, { "epoch": 2.4712837837837838, "grad_norm": 1.0926898451596718, "learning_rate": 9.17006241623527e-07, "loss": 0.3109, "step": 26334 }, { "epoch": 2.4713776276276276, "grad_norm": 1.1766920345373875, "learning_rate": 9.16691131742099e-07, "loss": 0.3227, "step": 26335 }, { "epoch": 2.4714714714714714, "grad_norm": 1.6332529329285008, "learning_rate": 9.163760705464442e-07, "loss": 0.3167, "step": 26336 }, { "epoch": 2.471565315315315, "grad_norm": 1.9972302149815702, "learning_rate": 9.160610580403179e-07, "loss": 0.2934, "step": 26337 }, { "epoch": 2.471659159159159, "grad_norm": 1.3773786466284692, "learning_rate": 9.157460942274748e-07, "loss": 0.3269, "step": 26338 }, { "epoch": 2.471753003003003, "grad_norm": 1.1220935057923966, "learning_rate": 9.154311791116738e-07, "loss": 0.2602, "step": 26339 }, { "epoch": 2.471846846846847, "grad_norm": 1.3419152217399075, "learning_rate": 9.151163126966667e-07, "loss": 0.3114, "step": 26340 }, { "epoch": 2.4719406906906904, "grad_norm": 1.1070162096648362, "learning_rate": 9.148014949862077e-07, "loss": 0.325, "step": 26341 }, { "epoch": 2.4720345345345347, "grad_norm": 1.0858662866175541, "learning_rate": 9.144867259840528e-07, "loss": 0.3614, "step": 26342 }, { "epoch": 2.4721283783783785, "grad_norm": 1.099645478812008, "learning_rate": 9.141720056939529e-07, "loss": 0.3417, "step": 26343 }, { "epoch": 2.4722222222222223, "grad_norm": 1.0221987653143405, "learning_rate": 9.138573341196616e-07, "loss": 0.3013, "step": 26344 }, { "epoch": 2.472316066066066, "grad_norm": 1.0486863377603988, "learning_rate": 9.135427112649304e-07, "loss": 0.2943, "step": 26345 }, { "epoch": 2.47240990990991, "grad_norm": 1.1998522389736577, "learning_rate": 9.1322813713351e-07, "loss": 0.3098, "step": 26346 }, { "epoch": 2.4725037537537538, "grad_norm": 1.0905336514513644, "learning_rate": 9.129136117291498e-07, "loss": 0.3117, "step": 26347 }, { "epoch": 2.4725975975975976, "grad_norm": 1.5127814654495602, "learning_rate": 9.125991350556035e-07, "loss": 0.3244, "step": 26348 }, { "epoch": 2.4726914414414414, "grad_norm": 1.053828652782168, "learning_rate": 9.122847071166186e-07, "loss": 0.2765, "step": 26349 }, { "epoch": 2.472785285285285, "grad_norm": 1.1948543404109577, "learning_rate": 9.11970327915943e-07, "loss": 0.2705, "step": 26350 }, { "epoch": 2.472879129129129, "grad_norm": 1.4620077033382028, "learning_rate": 9.116559974573274e-07, "loss": 0.3052, "step": 26351 }, { "epoch": 2.472972972972973, "grad_norm": 1.1554490482417934, "learning_rate": 9.113417157445193e-07, "loss": 0.3018, "step": 26352 }, { "epoch": 2.473066816816817, "grad_norm": 1.0184092189458762, "learning_rate": 9.110274827812632e-07, "loss": 0.3465, "step": 26353 }, { "epoch": 2.4731606606606604, "grad_norm": 1.3413079179904692, "learning_rate": 9.107132985713086e-07, "loss": 0.3209, "step": 26354 }, { "epoch": 2.4732545045045047, "grad_norm": 0.9810575225124221, "learning_rate": 9.103991631184012e-07, "loss": 0.2999, "step": 26355 }, { "epoch": 2.4733483483483485, "grad_norm": 1.2372382534925705, "learning_rate": 9.100850764262859e-07, "loss": 0.3069, "step": 26356 }, { "epoch": 2.4734421921921923, "grad_norm": 1.180376452534911, "learning_rate": 9.097710384987074e-07, "loss": 0.3247, "step": 26357 }, { "epoch": 2.473536036036036, "grad_norm": 1.1310300310276742, "learning_rate": 9.0945704933941e-07, "loss": 0.2857, "step": 26358 }, { "epoch": 2.47362987987988, "grad_norm": 1.2873264439097105, "learning_rate": 9.091431089521368e-07, "loss": 0.2618, "step": 26359 }, { "epoch": 2.4737237237237237, "grad_norm": 1.1737633774290401, "learning_rate": 9.088292173406327e-07, "loss": 0.3535, "step": 26360 }, { "epoch": 2.4738175675675675, "grad_norm": 1.1030934781737252, "learning_rate": 9.085153745086395e-07, "loss": 0.3248, "step": 26361 }, { "epoch": 2.4739114114114114, "grad_norm": 1.0017975900363676, "learning_rate": 9.082015804598976e-07, "loss": 0.3511, "step": 26362 }, { "epoch": 2.474005255255255, "grad_norm": 1.393252783428483, "learning_rate": 9.078878351981512e-07, "loss": 0.3221, "step": 26363 }, { "epoch": 2.474099099099099, "grad_norm": 1.9326249389051093, "learning_rate": 9.075741387271398e-07, "loss": 0.3063, "step": 26364 }, { "epoch": 2.474192942942943, "grad_norm": 1.2334367183448312, "learning_rate": 9.072604910506022e-07, "loss": 0.3067, "step": 26365 }, { "epoch": 2.4742867867867866, "grad_norm": 1.1357133186621318, "learning_rate": 9.069468921722813e-07, "loss": 0.2981, "step": 26366 }, { "epoch": 2.4743806306306304, "grad_norm": 1.1552699155102608, "learning_rate": 9.06633342095914e-07, "loss": 0.315, "step": 26367 }, { "epoch": 2.4744744744744747, "grad_norm": 1.4958155389368606, "learning_rate": 9.063198408252389e-07, "loss": 0.318, "step": 26368 }, { "epoch": 2.4745683183183185, "grad_norm": 1.4459925180322601, "learning_rate": 9.060063883639942e-07, "loss": 0.3285, "step": 26369 }, { "epoch": 2.4746621621621623, "grad_norm": 1.1360977953656828, "learning_rate": 9.056929847159168e-07, "loss": 0.356, "step": 26370 }, { "epoch": 2.474756006006006, "grad_norm": 1.1404171310545643, "learning_rate": 9.053796298847433e-07, "loss": 0.3527, "step": 26371 }, { "epoch": 2.47484984984985, "grad_norm": 0.9372259980046902, "learning_rate": 9.050663238742113e-07, "loss": 0.2982, "step": 26372 }, { "epoch": 2.4749436936936937, "grad_norm": 1.1386631901642963, "learning_rate": 9.047530666880556e-07, "loss": 0.3208, "step": 26373 }, { "epoch": 2.4750375375375375, "grad_norm": 1.146215227471013, "learning_rate": 9.044398583300096e-07, "loss": 0.2811, "step": 26374 }, { "epoch": 2.4751313813813813, "grad_norm": 2.07259459131197, "learning_rate": 9.041266988038106e-07, "loss": 0.3138, "step": 26375 }, { "epoch": 2.475225225225225, "grad_norm": 1.2873875663257144, "learning_rate": 9.03813588113191e-07, "loss": 0.2829, "step": 26376 }, { "epoch": 2.475319069069069, "grad_norm": 1.1624720296943047, "learning_rate": 9.035005262618824e-07, "loss": 0.3107, "step": 26377 }, { "epoch": 2.4754129129129128, "grad_norm": 1.140637599206667, "learning_rate": 9.03187513253621e-07, "loss": 0.3329, "step": 26378 }, { "epoch": 2.4755067567567566, "grad_norm": 0.9542868329577315, "learning_rate": 9.02874549092137e-07, "loss": 0.3093, "step": 26379 }, { "epoch": 2.4756006006006004, "grad_norm": 1.0950813935033958, "learning_rate": 9.025616337811615e-07, "loss": 0.3265, "step": 26380 }, { "epoch": 2.4756944444444446, "grad_norm": 1.2378937529269742, "learning_rate": 9.022487673244262e-07, "loss": 0.3265, "step": 26381 }, { "epoch": 2.4757882882882885, "grad_norm": 1.6493731524292385, "learning_rate": 9.019359497256613e-07, "loss": 0.2923, "step": 26382 }, { "epoch": 2.4758821321321323, "grad_norm": 1.1512405727761175, "learning_rate": 9.01623180988595e-07, "loss": 0.3296, "step": 26383 }, { "epoch": 2.475975975975976, "grad_norm": 1.2586600736463454, "learning_rate": 9.013104611169587e-07, "loss": 0.3027, "step": 26384 }, { "epoch": 2.47606981981982, "grad_norm": 1.8429982649299996, "learning_rate": 9.009977901144806e-07, "loss": 0.2874, "step": 26385 }, { "epoch": 2.4761636636636637, "grad_norm": 1.1026742166075536, "learning_rate": 9.006851679848871e-07, "loss": 0.3085, "step": 26386 }, { "epoch": 2.4762575075075075, "grad_norm": 1.2332157239998707, "learning_rate": 9.00372594731908e-07, "loss": 0.314, "step": 26387 }, { "epoch": 2.4763513513513513, "grad_norm": 1.0862780853737481, "learning_rate": 9.000600703592688e-07, "loss": 0.34, "step": 26388 }, { "epoch": 2.476445195195195, "grad_norm": 1.2319904699763888, "learning_rate": 8.997475948706946e-07, "loss": 0.2883, "step": 26389 }, { "epoch": 2.476539039039039, "grad_norm": 1.0676807752522925, "learning_rate": 8.994351682699137e-07, "loss": 0.2963, "step": 26390 }, { "epoch": 2.4766328828828827, "grad_norm": 1.1053920775440977, "learning_rate": 8.991227905606503e-07, "loss": 0.292, "step": 26391 }, { "epoch": 2.4767267267267266, "grad_norm": 1.1299791405514346, "learning_rate": 8.988104617466264e-07, "loss": 0.2997, "step": 26392 }, { "epoch": 2.4768205705705704, "grad_norm": 1.2303044704820358, "learning_rate": 8.984981818315703e-07, "loss": 0.3203, "step": 26393 }, { "epoch": 2.4769144144144146, "grad_norm": 0.9852855195715495, "learning_rate": 8.981859508192037e-07, "loss": 0.3079, "step": 26394 }, { "epoch": 2.4770082582582584, "grad_norm": 5.941089412926619, "learning_rate": 8.978737687132466e-07, "loss": 0.344, "step": 26395 }, { "epoch": 2.4771021021021022, "grad_norm": 1.2116464171768586, "learning_rate": 8.975616355174243e-07, "loss": 0.2801, "step": 26396 }, { "epoch": 2.477195945945946, "grad_norm": 1.119227669288798, "learning_rate": 8.972495512354578e-07, "loss": 0.3243, "step": 26397 }, { "epoch": 2.47728978978979, "grad_norm": 1.2589071924999304, "learning_rate": 8.969375158710664e-07, "loss": 0.3461, "step": 26398 }, { "epoch": 2.4773836336336337, "grad_norm": 1.1238227273497159, "learning_rate": 8.96625529427973e-07, "loss": 0.3338, "step": 26399 }, { "epoch": 2.4774774774774775, "grad_norm": 1.3737747449634419, "learning_rate": 8.963135919098964e-07, "loss": 0.3069, "step": 26400 }, { "epoch": 2.4775713213213213, "grad_norm": 1.033334627346628, "learning_rate": 8.960017033205542e-07, "loss": 0.2951, "step": 26401 }, { "epoch": 2.477665165165165, "grad_norm": 0.9490082411673881, "learning_rate": 8.956898636636686e-07, "loss": 0.2677, "step": 26402 }, { "epoch": 2.477759009009009, "grad_norm": 1.0351502161174222, "learning_rate": 8.953780729429556e-07, "loss": 0.2907, "step": 26403 }, { "epoch": 2.4778528528528527, "grad_norm": 1.2458475123432677, "learning_rate": 8.950663311621316e-07, "loss": 0.3308, "step": 26404 }, { "epoch": 2.4779466966966965, "grad_norm": 1.1441673756023825, "learning_rate": 8.947546383249162e-07, "loss": 0.2757, "step": 26405 }, { "epoch": 2.4780405405405403, "grad_norm": 1.2662155403892303, "learning_rate": 8.944429944350241e-07, "loss": 0.298, "step": 26406 }, { "epoch": 2.4781343843843846, "grad_norm": 2.1294090392084715, "learning_rate": 8.94131399496172e-07, "loss": 0.2918, "step": 26407 }, { "epoch": 2.4782282282282284, "grad_norm": 1.00078687340254, "learning_rate": 8.93819853512074e-07, "loss": 0.3223, "step": 26408 }, { "epoch": 2.4783220720720722, "grad_norm": 1.1416407447594883, "learning_rate": 8.935083564864449e-07, "loss": 0.3159, "step": 26409 }, { "epoch": 2.478415915915916, "grad_norm": 1.0536680777637901, "learning_rate": 8.93196908422998e-07, "loss": 0.2987, "step": 26410 }, { "epoch": 2.47850975975976, "grad_norm": 1.338592618345122, "learning_rate": 8.92885509325449e-07, "loss": 0.2672, "step": 26411 }, { "epoch": 2.4786036036036037, "grad_norm": 0.9718260073286259, "learning_rate": 8.925741591975096e-07, "loss": 0.3195, "step": 26412 }, { "epoch": 2.4786974474474475, "grad_norm": 1.1373853137806202, "learning_rate": 8.922628580428904e-07, "loss": 0.3003, "step": 26413 }, { "epoch": 2.4787912912912913, "grad_norm": 1.2940212715527633, "learning_rate": 8.919516058653061e-07, "loss": 0.3047, "step": 26414 }, { "epoch": 2.478885135135135, "grad_norm": 1.3302962052443539, "learning_rate": 8.916404026684661e-07, "loss": 0.2751, "step": 26415 }, { "epoch": 2.478978978978979, "grad_norm": 1.1660552897789207, "learning_rate": 8.913292484560804e-07, "loss": 0.2703, "step": 26416 }, { "epoch": 2.4790728228228227, "grad_norm": 1.1991771026218367, "learning_rate": 8.910181432318604e-07, "loss": 0.3208, "step": 26417 }, { "epoch": 2.4791666666666665, "grad_norm": 2.0226305858435962, "learning_rate": 8.907070869995143e-07, "loss": 0.3087, "step": 26418 }, { "epoch": 2.4792605105105103, "grad_norm": 1.1290770351555237, "learning_rate": 8.90396079762752e-07, "loss": 0.3432, "step": 26419 }, { "epoch": 2.4793543543543546, "grad_norm": 1.2065218429396212, "learning_rate": 8.900851215252804e-07, "loss": 0.3567, "step": 26420 }, { "epoch": 2.479448198198198, "grad_norm": 1.2000006981271376, "learning_rate": 8.897742122908077e-07, "loss": 0.3291, "step": 26421 }, { "epoch": 2.479542042042042, "grad_norm": 1.8504469104462045, "learning_rate": 8.894633520630396e-07, "loss": 0.2955, "step": 26422 }, { "epoch": 2.479635885885886, "grad_norm": 1.4430911386878047, "learning_rate": 8.891525408456852e-07, "loss": 0.3118, "step": 26423 }, { "epoch": 2.47972972972973, "grad_norm": 1.1969660260853412, "learning_rate": 8.888417786424486e-07, "loss": 0.3159, "step": 26424 }, { "epoch": 2.4798235735735736, "grad_norm": 1.1431065019302291, "learning_rate": 8.88531065457034e-07, "loss": 0.3294, "step": 26425 }, { "epoch": 2.4799174174174174, "grad_norm": 1.1984262610167775, "learning_rate": 8.882204012931489e-07, "loss": 0.3274, "step": 26426 }, { "epoch": 2.4800112612612613, "grad_norm": 1.2754388775316412, "learning_rate": 8.879097861544961e-07, "loss": 0.2999, "step": 26427 }, { "epoch": 2.480105105105105, "grad_norm": 0.9539166353314316, "learning_rate": 8.875992200447769e-07, "loss": 0.324, "step": 26428 }, { "epoch": 2.480198948948949, "grad_norm": 1.1423515650492309, "learning_rate": 8.872887029676979e-07, "loss": 0.3453, "step": 26429 }, { "epoch": 2.4802927927927927, "grad_norm": 1.1127220217260716, "learning_rate": 8.869782349269601e-07, "loss": 0.3224, "step": 26430 }, { "epoch": 2.4803866366366365, "grad_norm": 1.7722112750846837, "learning_rate": 8.866678159262643e-07, "loss": 0.2588, "step": 26431 }, { "epoch": 2.4804804804804803, "grad_norm": 1.1251101260364875, "learning_rate": 8.863574459693125e-07, "loss": 0.3137, "step": 26432 }, { "epoch": 2.4805743243243246, "grad_norm": 1.0655481574177843, "learning_rate": 8.860471250598046e-07, "loss": 0.3575, "step": 26433 }, { "epoch": 2.480668168168168, "grad_norm": 1.090991186821609, "learning_rate": 8.857368532014404e-07, "loss": 0.3619, "step": 26434 }, { "epoch": 2.480762012012012, "grad_norm": 0.9892721763095524, "learning_rate": 8.854266303979209e-07, "loss": 0.3018, "step": 26435 }, { "epoch": 2.480855855855856, "grad_norm": 1.223121376267189, "learning_rate": 8.851164566529441e-07, "loss": 0.3001, "step": 26436 }, { "epoch": 2.4809496996997, "grad_norm": 1.1275598908276891, "learning_rate": 8.848063319702067e-07, "loss": 0.3185, "step": 26437 }, { "epoch": 2.4810435435435436, "grad_norm": 1.256923169280822, "learning_rate": 8.844962563534088e-07, "loss": 0.318, "step": 26438 }, { "epoch": 2.4811373873873874, "grad_norm": 1.1810210100211855, "learning_rate": 8.841862298062465e-07, "loss": 0.3381, "step": 26439 }, { "epoch": 2.4812312312312312, "grad_norm": 1.7987056270732191, "learning_rate": 8.838762523324152e-07, "loss": 0.3177, "step": 26440 }, { "epoch": 2.481325075075075, "grad_norm": 1.1975198099974214, "learning_rate": 8.835663239356129e-07, "loss": 0.3461, "step": 26441 }, { "epoch": 2.481418918918919, "grad_norm": 1.4072702415219909, "learning_rate": 8.832564446195335e-07, "loss": 0.3165, "step": 26442 }, { "epoch": 2.4815127627627627, "grad_norm": 1.001581823385182, "learning_rate": 8.829466143878723e-07, "loss": 0.3435, "step": 26443 }, { "epoch": 2.4816066066066065, "grad_norm": 1.3272510387129794, "learning_rate": 8.826368332443231e-07, "loss": 0.3112, "step": 26444 }, { "epoch": 2.4817004504504503, "grad_norm": 2.6998009371208624, "learning_rate": 8.823271011925794e-07, "loss": 0.2844, "step": 26445 }, { "epoch": 2.481794294294294, "grad_norm": 1.9453755861912185, "learning_rate": 8.820174182363334e-07, "loss": 0.3312, "step": 26446 }, { "epoch": 2.481888138138138, "grad_norm": 1.2292937824174905, "learning_rate": 8.817077843792793e-07, "loss": 0.342, "step": 26447 }, { "epoch": 2.481981981981982, "grad_norm": 1.1104430144752429, "learning_rate": 8.813981996251081e-07, "loss": 0.3282, "step": 26448 }, { "epoch": 2.482075825825826, "grad_norm": 1.3104526633905966, "learning_rate": 8.810886639775096e-07, "loss": 0.2784, "step": 26449 }, { "epoch": 2.48216966966967, "grad_norm": 1.3783282203530487, "learning_rate": 8.807791774401775e-07, "loss": 0.2948, "step": 26450 }, { "epoch": 2.4822635135135136, "grad_norm": 1.090803346245626, "learning_rate": 8.804697400167994e-07, "loss": 0.3346, "step": 26451 }, { "epoch": 2.4823573573573574, "grad_norm": 1.546889070126777, "learning_rate": 8.801603517110646e-07, "loss": 0.3017, "step": 26452 }, { "epoch": 2.482451201201201, "grad_norm": 1.0935730996648938, "learning_rate": 8.798510125266635e-07, "loss": 0.3861, "step": 26453 }, { "epoch": 2.482545045045045, "grad_norm": 1.0329173629133923, "learning_rate": 8.795417224672842e-07, "loss": 0.3384, "step": 26454 }, { "epoch": 2.482638888888889, "grad_norm": 1.5258930562211628, "learning_rate": 8.792324815366138e-07, "loss": 0.3329, "step": 26455 }, { "epoch": 2.4827327327327327, "grad_norm": 1.2500710025411108, "learning_rate": 8.789232897383393e-07, "loss": 0.3088, "step": 26456 }, { "epoch": 2.4828265765765765, "grad_norm": 1.0856765415592258, "learning_rate": 8.786141470761477e-07, "loss": 0.2844, "step": 26457 }, { "epoch": 2.4829204204204203, "grad_norm": 1.0245296003419881, "learning_rate": 8.783050535537236e-07, "loss": 0.2887, "step": 26458 }, { "epoch": 2.483014264264264, "grad_norm": 1.0401147745552068, "learning_rate": 8.779960091747541e-07, "loss": 0.3105, "step": 26459 }, { "epoch": 2.483108108108108, "grad_norm": 0.9789385377216601, "learning_rate": 8.776870139429238e-07, "loss": 0.2995, "step": 26460 }, { "epoch": 2.483201951951952, "grad_norm": 1.856961641612599, "learning_rate": 8.773780678619148e-07, "loss": 0.2964, "step": 26461 }, { "epoch": 2.483295795795796, "grad_norm": 0.9527486767434827, "learning_rate": 8.77069170935414e-07, "loss": 0.2806, "step": 26462 }, { "epoch": 2.4833896396396398, "grad_norm": 1.1621179241123667, "learning_rate": 8.767603231671023e-07, "loss": 0.2716, "step": 26463 }, { "epoch": 2.4834834834834836, "grad_norm": 1.2143004905641877, "learning_rate": 8.764515245606614e-07, "loss": 0.3198, "step": 26464 }, { "epoch": 2.4835773273273274, "grad_norm": 1.0685910953622015, "learning_rate": 8.761427751197755e-07, "loss": 0.313, "step": 26465 }, { "epoch": 2.483671171171171, "grad_norm": 1.0400541341277003, "learning_rate": 8.758340748481248e-07, "loss": 0.3167, "step": 26466 }, { "epoch": 2.483765015015015, "grad_norm": 0.994611686719718, "learning_rate": 8.755254237493899e-07, "loss": 0.3279, "step": 26467 }, { "epoch": 2.483858858858859, "grad_norm": 1.003501208979328, "learning_rate": 8.752168218272505e-07, "loss": 0.2633, "step": 26468 }, { "epoch": 2.4839527027027026, "grad_norm": 1.6049315492899399, "learning_rate": 8.749082690853866e-07, "loss": 0.3332, "step": 26469 }, { "epoch": 2.4840465465465464, "grad_norm": 0.9960168930050568, "learning_rate": 8.745997655274757e-07, "loss": 0.3363, "step": 26470 }, { "epoch": 2.4841403903903903, "grad_norm": 1.2668321734259764, "learning_rate": 8.742913111571983e-07, "loss": 0.3265, "step": 26471 }, { "epoch": 2.484234234234234, "grad_norm": 1.3098015091712938, "learning_rate": 8.739829059782312e-07, "loss": 0.3575, "step": 26472 }, { "epoch": 2.484328078078078, "grad_norm": 1.0946518838522727, "learning_rate": 8.736745499942507e-07, "loss": 0.2985, "step": 26473 }, { "epoch": 2.484421921921922, "grad_norm": 1.0012510199505549, "learning_rate": 8.733662432089351e-07, "loss": 0.3316, "step": 26474 }, { "epoch": 2.484515765765766, "grad_norm": 1.0611501060064181, "learning_rate": 8.730579856259592e-07, "loss": 0.3409, "step": 26475 }, { "epoch": 2.4846096096096097, "grad_norm": 1.0814054977300107, "learning_rate": 8.727497772489979e-07, "loss": 0.3522, "step": 26476 }, { "epoch": 2.4847034534534536, "grad_norm": 1.0525814956902684, "learning_rate": 8.72441618081728e-07, "loss": 0.2733, "step": 26477 }, { "epoch": 2.4847972972972974, "grad_norm": 1.273615407984104, "learning_rate": 8.72133508127822e-07, "loss": 0.3279, "step": 26478 }, { "epoch": 2.484891141141141, "grad_norm": 1.1720637521369184, "learning_rate": 8.718254473909544e-07, "loss": 0.2848, "step": 26479 }, { "epoch": 2.484984984984985, "grad_norm": 1.0696791091929445, "learning_rate": 8.715174358747974e-07, "loss": 0.3263, "step": 26480 }, { "epoch": 2.485078828828829, "grad_norm": 1.2035341407457016, "learning_rate": 8.712094735830245e-07, "loss": 0.2832, "step": 26481 }, { "epoch": 2.4851726726726726, "grad_norm": 1.4707092374238553, "learning_rate": 8.709015605193056e-07, "loss": 0.3284, "step": 26482 }, { "epoch": 2.4852665165165164, "grad_norm": 1.1384183807163855, "learning_rate": 8.705936966873146e-07, "loss": 0.3172, "step": 26483 }, { "epoch": 2.4853603603603602, "grad_norm": 1.1919581364476366, "learning_rate": 8.702858820907206e-07, "loss": 0.3352, "step": 26484 }, { "epoch": 2.485454204204204, "grad_norm": 1.0730833427993316, "learning_rate": 8.699781167331934e-07, "loss": 0.3213, "step": 26485 }, { "epoch": 2.485548048048048, "grad_norm": 1.1442470015640138, "learning_rate": 8.696704006184037e-07, "loss": 0.3177, "step": 26486 }, { "epoch": 2.485641891891892, "grad_norm": 1.3166958487913032, "learning_rate": 8.693627337500205e-07, "loss": 0.3615, "step": 26487 }, { "epoch": 2.485735735735736, "grad_norm": 1.1277217409004743, "learning_rate": 8.690551161317101e-07, "loss": 0.3601, "step": 26488 }, { "epoch": 2.4858295795795797, "grad_norm": 1.0585435820891536, "learning_rate": 8.687475477671431e-07, "loss": 0.2942, "step": 26489 }, { "epoch": 2.4859234234234235, "grad_norm": 0.9739169049403786, "learning_rate": 8.684400286599854e-07, "loss": 0.282, "step": 26490 }, { "epoch": 2.4860172672672673, "grad_norm": 1.3490836460097488, "learning_rate": 8.681325588139033e-07, "loss": 0.3196, "step": 26491 }, { "epoch": 2.486111111111111, "grad_norm": 1.4652411842917592, "learning_rate": 8.678251382325631e-07, "loss": 0.3105, "step": 26492 }, { "epoch": 2.486204954954955, "grad_norm": 1.0223985970255025, "learning_rate": 8.675177669196299e-07, "loss": 0.3347, "step": 26493 }, { "epoch": 2.486298798798799, "grad_norm": 1.143332186006192, "learning_rate": 8.672104448787677e-07, "loss": 0.3231, "step": 26494 }, { "epoch": 2.4863926426426426, "grad_norm": 1.3307974441034176, "learning_rate": 8.669031721136428e-07, "loss": 0.3176, "step": 26495 }, { "epoch": 2.4864864864864864, "grad_norm": 1.3414059939446463, "learning_rate": 8.665959486279179e-07, "loss": 0.2875, "step": 26496 }, { "epoch": 2.48658033033033, "grad_norm": 1.1731745235225752, "learning_rate": 8.662887744252551e-07, "loss": 0.2784, "step": 26497 }, { "epoch": 2.486674174174174, "grad_norm": 1.2202185299301316, "learning_rate": 8.659816495093188e-07, "loss": 0.3157, "step": 26498 }, { "epoch": 2.486768018018018, "grad_norm": 1.1844243106368597, "learning_rate": 8.656745738837696e-07, "loss": 0.3148, "step": 26499 }, { "epoch": 2.486861861861862, "grad_norm": 1.1075055027385363, "learning_rate": 8.653675475522677e-07, "loss": 0.3239, "step": 26500 }, { "epoch": 2.486955705705706, "grad_norm": 1.1114049228734373, "learning_rate": 8.650605705184767e-07, "loss": 0.3267, "step": 26501 }, { "epoch": 2.4870495495495497, "grad_norm": 1.3014193182898117, "learning_rate": 8.647536427860553e-07, "loss": 0.3253, "step": 26502 }, { "epoch": 2.4871433933933935, "grad_norm": 1.1106144153182047, "learning_rate": 8.644467643586624e-07, "loss": 0.3247, "step": 26503 }, { "epoch": 2.4872372372372373, "grad_norm": 1.4600493972668158, "learning_rate": 8.641399352399577e-07, "loss": 0.3699, "step": 26504 }, { "epoch": 2.487331081081081, "grad_norm": 1.0976200858845728, "learning_rate": 8.638331554335993e-07, "loss": 0.3166, "step": 26505 }, { "epoch": 2.487424924924925, "grad_norm": 1.1055670265559332, "learning_rate": 8.63526424943244e-07, "loss": 0.3273, "step": 26506 }, { "epoch": 2.4875187687687688, "grad_norm": 0.9407502889742786, "learning_rate": 8.632197437725509e-07, "loss": 0.3277, "step": 26507 }, { "epoch": 2.4876126126126126, "grad_norm": 1.1615154405723254, "learning_rate": 8.62913111925176e-07, "loss": 0.3213, "step": 26508 }, { "epoch": 2.4877064564564564, "grad_norm": 1.328810785078276, "learning_rate": 8.626065294047736e-07, "loss": 0.317, "step": 26509 }, { "epoch": 2.4878003003003, "grad_norm": 1.0196374194848525, "learning_rate": 8.622999962150019e-07, "loss": 0.3203, "step": 26510 }, { "epoch": 2.487894144144144, "grad_norm": 1.1117441179584697, "learning_rate": 8.619935123595141e-07, "loss": 0.2886, "step": 26511 }, { "epoch": 2.487987987987988, "grad_norm": 1.084392176815569, "learning_rate": 8.616870778419634e-07, "loss": 0.3113, "step": 26512 }, { "epoch": 2.488081831831832, "grad_norm": 1.168220964673389, "learning_rate": 8.613806926660057e-07, "loss": 0.2877, "step": 26513 }, { "epoch": 2.4881756756756754, "grad_norm": 1.0720162879289004, "learning_rate": 8.610743568352936e-07, "loss": 0.3129, "step": 26514 }, { "epoch": 2.4882695195195197, "grad_norm": 1.0124250355339657, "learning_rate": 8.607680703534776e-07, "loss": 0.308, "step": 26515 }, { "epoch": 2.4883633633633635, "grad_norm": 1.1036703174523055, "learning_rate": 8.60461833224212e-07, "loss": 0.2932, "step": 26516 }, { "epoch": 2.4884572072072073, "grad_norm": 1.1669435335744953, "learning_rate": 8.601556454511489e-07, "loss": 0.2719, "step": 26517 }, { "epoch": 2.488551051051051, "grad_norm": 1.0765487790958645, "learning_rate": 8.598495070379348e-07, "loss": 0.3269, "step": 26518 }, { "epoch": 2.488644894894895, "grad_norm": 1.2338476414120065, "learning_rate": 8.595434179882234e-07, "loss": 0.3369, "step": 26519 }, { "epoch": 2.4887387387387387, "grad_norm": 0.9728296627269931, "learning_rate": 8.592373783056629e-07, "loss": 0.2948, "step": 26520 }, { "epoch": 2.4888325825825826, "grad_norm": 1.4428472566900592, "learning_rate": 8.589313879939016e-07, "loss": 0.282, "step": 26521 }, { "epoch": 2.4889264264264264, "grad_norm": 1.0747185789216143, "learning_rate": 8.586254470565896e-07, "loss": 0.3221, "step": 26522 }, { "epoch": 2.48902027027027, "grad_norm": 0.9861434739432577, "learning_rate": 8.583195554973739e-07, "loss": 0.3434, "step": 26523 }, { "epoch": 2.489114114114114, "grad_norm": 1.2734446001666577, "learning_rate": 8.580137133199007e-07, "loss": 0.3171, "step": 26524 }, { "epoch": 2.489207957957958, "grad_norm": 1.8576217825540722, "learning_rate": 8.577079205278183e-07, "loss": 0.3262, "step": 26525 }, { "epoch": 2.489301801801802, "grad_norm": 1.3272455409885024, "learning_rate": 8.574021771247721e-07, "loss": 0.3124, "step": 26526 }, { "epoch": 2.4893956456456454, "grad_norm": 1.1372519976674975, "learning_rate": 8.570964831144057e-07, "loss": 0.3288, "step": 26527 }, { "epoch": 2.4894894894894897, "grad_norm": 1.0212605558213093, "learning_rate": 8.567908385003676e-07, "loss": 0.3276, "step": 26528 }, { "epoch": 2.4895833333333335, "grad_norm": 1.1587944413964786, "learning_rate": 8.564852432862992e-07, "loss": 0.308, "step": 26529 }, { "epoch": 2.4896771771771773, "grad_norm": 1.0639225281933047, "learning_rate": 8.561796974758452e-07, "loss": 0.2955, "step": 26530 }, { "epoch": 2.489771021021021, "grad_norm": 1.2646764768469143, "learning_rate": 8.558742010726484e-07, "loss": 0.3146, "step": 26531 }, { "epoch": 2.489864864864865, "grad_norm": 1.020330405824279, "learning_rate": 8.555687540803509e-07, "loss": 0.3063, "step": 26532 }, { "epoch": 2.4899587087087087, "grad_norm": 1.0313596674770327, "learning_rate": 8.552633565025941e-07, "loss": 0.2736, "step": 26533 }, { "epoch": 2.4900525525525525, "grad_norm": 1.3879668770032974, "learning_rate": 8.549580083430209e-07, "loss": 0.3181, "step": 26534 }, { "epoch": 2.4901463963963963, "grad_norm": 1.0301439604237155, "learning_rate": 8.546527096052715e-07, "loss": 0.3083, "step": 26535 }, { "epoch": 2.49024024024024, "grad_norm": 1.1508696012517625, "learning_rate": 8.543474602929847e-07, "loss": 0.2862, "step": 26536 }, { "epoch": 2.490334084084084, "grad_norm": 1.1426148317342926, "learning_rate": 8.540422604098019e-07, "loss": 0.2962, "step": 26537 }, { "epoch": 2.4904279279279278, "grad_norm": 1.2370862564472946, "learning_rate": 8.537371099593616e-07, "loss": 0.3036, "step": 26538 }, { "epoch": 2.4905217717717716, "grad_norm": 1.2724937861286538, "learning_rate": 8.534320089453002e-07, "loss": 0.3432, "step": 26539 }, { "epoch": 2.4906156156156154, "grad_norm": 1.6843769701471727, "learning_rate": 8.53126957371258e-07, "loss": 0.312, "step": 26540 }, { "epoch": 2.4907094594594597, "grad_norm": 1.2579143294633102, "learning_rate": 8.528219552408717e-07, "loss": 0.2798, "step": 26541 }, { "epoch": 2.4908033033033035, "grad_norm": 1.551681927005755, "learning_rate": 8.525170025577767e-07, "loss": 0.298, "step": 26542 }, { "epoch": 2.4908971471471473, "grad_norm": 1.0145395813500278, "learning_rate": 8.5221209932561e-07, "loss": 0.3111, "step": 26543 }, { "epoch": 2.490990990990991, "grad_norm": 1.2378495974176573, "learning_rate": 8.519072455480066e-07, "loss": 0.3183, "step": 26544 }, { "epoch": 2.491084834834835, "grad_norm": 1.1147127102183816, "learning_rate": 8.516024412286e-07, "loss": 0.3244, "step": 26545 }, { "epoch": 2.4911786786786787, "grad_norm": 0.9671328174548814, "learning_rate": 8.512976863710271e-07, "loss": 0.2559, "step": 26546 }, { "epoch": 2.4912725225225225, "grad_norm": 2.233099218557428, "learning_rate": 8.509929809789202e-07, "loss": 0.3632, "step": 26547 }, { "epoch": 2.4913663663663663, "grad_norm": 1.072013509812993, "learning_rate": 8.506883250559106e-07, "loss": 0.3209, "step": 26548 }, { "epoch": 2.49146021021021, "grad_norm": 1.3486718243476292, "learning_rate": 8.50383718605634e-07, "loss": 0.3512, "step": 26549 }, { "epoch": 2.491554054054054, "grad_norm": 1.249616406951724, "learning_rate": 8.500791616317211e-07, "loss": 0.2875, "step": 26550 }, { "epoch": 2.4916478978978978, "grad_norm": 2.058034714765193, "learning_rate": 8.49774654137801e-07, "loss": 0.3284, "step": 26551 }, { "epoch": 2.4917417417417416, "grad_norm": 0.9849123354301209, "learning_rate": 8.494701961275075e-07, "loss": 0.3339, "step": 26552 }, { "epoch": 2.4918355855855854, "grad_norm": 1.3480128175701553, "learning_rate": 8.491657876044695e-07, "loss": 0.363, "step": 26553 }, { "epoch": 2.4919294294294296, "grad_norm": 1.1730235867646297, "learning_rate": 8.488614285723162e-07, "loss": 0.3234, "step": 26554 }, { "epoch": 2.4920232732732734, "grad_norm": 1.396814796831658, "learning_rate": 8.485571190346764e-07, "loss": 0.2812, "step": 26555 }, { "epoch": 2.4921171171171173, "grad_norm": 1.0774572248036418, "learning_rate": 8.482528589951789e-07, "loss": 0.2772, "step": 26556 }, { "epoch": 2.492210960960961, "grad_norm": 1.9461479790179583, "learning_rate": 8.4794864845745e-07, "loss": 0.31, "step": 26557 }, { "epoch": 2.492304804804805, "grad_norm": 1.0800978191656594, "learning_rate": 8.476444874251194e-07, "loss": 0.2913, "step": 26558 }, { "epoch": 2.4923986486486487, "grad_norm": 1.0416659274822837, "learning_rate": 8.473403759018123e-07, "loss": 0.369, "step": 26559 }, { "epoch": 2.4924924924924925, "grad_norm": 1.7854495957450047, "learning_rate": 8.47036313891153e-07, "loss": 0.3305, "step": 26560 }, { "epoch": 2.4925863363363363, "grad_norm": 1.5074017631739474, "learning_rate": 8.467323013967699e-07, "loss": 0.343, "step": 26561 }, { "epoch": 2.49268018018018, "grad_norm": 1.0902892035064902, "learning_rate": 8.464283384222865e-07, "loss": 0.2782, "step": 26562 }, { "epoch": 2.492774024024024, "grad_norm": 1.1004969973479357, "learning_rate": 8.461244249713257e-07, "loss": 0.3353, "step": 26563 }, { "epoch": 2.4928678678678677, "grad_norm": 1.050205699884869, "learning_rate": 8.458205610475134e-07, "loss": 0.3218, "step": 26564 }, { "epoch": 2.4929617117117115, "grad_norm": 1.1152523226206297, "learning_rate": 8.455167466544717e-07, "loss": 0.2996, "step": 26565 }, { "epoch": 2.4930555555555554, "grad_norm": 1.1314590293291231, "learning_rate": 8.452129817958221e-07, "loss": 0.3131, "step": 26566 }, { "epoch": 2.4931493993993996, "grad_norm": 1.0654783798315612, "learning_rate": 8.449092664751879e-07, "loss": 0.3312, "step": 26567 }, { "epoch": 2.4932432432432434, "grad_norm": 1.0786229115993595, "learning_rate": 8.446056006961889e-07, "loss": 0.3265, "step": 26568 }, { "epoch": 2.4933370870870872, "grad_norm": 1.2090669899195639, "learning_rate": 8.443019844624456e-07, "loss": 0.3332, "step": 26569 }, { "epoch": 2.493430930930931, "grad_norm": 1.1342408533515664, "learning_rate": 8.4399841777758e-07, "loss": 0.2916, "step": 26570 }, { "epoch": 2.493524774774775, "grad_norm": 1.178631128995068, "learning_rate": 8.436949006452105e-07, "loss": 0.3434, "step": 26571 }, { "epoch": 2.4936186186186187, "grad_norm": 1.007327151739154, "learning_rate": 8.433914330689546e-07, "loss": 0.2775, "step": 26572 }, { "epoch": 2.4937124624624625, "grad_norm": 1.5578513424600906, "learning_rate": 8.430880150524329e-07, "loss": 0.2842, "step": 26573 }, { "epoch": 2.4938063063063063, "grad_norm": 1.0905044041401653, "learning_rate": 8.427846465992617e-07, "loss": 0.3304, "step": 26574 }, { "epoch": 2.49390015015015, "grad_norm": 1.2677663284868559, "learning_rate": 8.424813277130578e-07, "loss": 0.2981, "step": 26575 }, { "epoch": 2.493993993993994, "grad_norm": 1.4110052090384992, "learning_rate": 8.421780583974393e-07, "loss": 0.3113, "step": 26576 }, { "epoch": 2.4940878378378377, "grad_norm": 1.095601445976981, "learning_rate": 8.418748386560215e-07, "loss": 0.3281, "step": 26577 }, { "epoch": 2.4941816816816815, "grad_norm": 1.2678792382673336, "learning_rate": 8.415716684924192e-07, "loss": 0.3272, "step": 26578 }, { "epoch": 2.4942755255255253, "grad_norm": 1.0735465220026372, "learning_rate": 8.412685479102473e-07, "loss": 0.3544, "step": 26579 }, { "epoch": 2.4943693693693696, "grad_norm": 1.0638329559696031, "learning_rate": 8.409654769131198e-07, "loss": 0.3121, "step": 26580 }, { "epoch": 2.4944632132132134, "grad_norm": 1.1053703362137668, "learning_rate": 8.406624555046489e-07, "loss": 0.3325, "step": 26581 }, { "epoch": 2.494557057057057, "grad_norm": 1.1278946365237787, "learning_rate": 8.40359483688451e-07, "loss": 0.322, "step": 26582 }, { "epoch": 2.494650900900901, "grad_norm": 1.1272749519975878, "learning_rate": 8.40056561468136e-07, "loss": 0.3275, "step": 26583 }, { "epoch": 2.494744744744745, "grad_norm": 1.0289411105834523, "learning_rate": 8.397536888473151e-07, "loss": 0.291, "step": 26584 }, { "epoch": 2.4948385885885886, "grad_norm": 1.0545276169453563, "learning_rate": 8.394508658296019e-07, "loss": 0.3072, "step": 26585 }, { "epoch": 2.4949324324324325, "grad_norm": 0.9948572445917664, "learning_rate": 8.391480924186057e-07, "loss": 0.2966, "step": 26586 }, { "epoch": 2.4950262762762763, "grad_norm": 1.1295167553485723, "learning_rate": 8.388453686179354e-07, "loss": 0.3531, "step": 26587 }, { "epoch": 2.49512012012012, "grad_norm": 1.188247244428244, "learning_rate": 8.385426944312031e-07, "loss": 0.3085, "step": 26588 }, { "epoch": 2.495213963963964, "grad_norm": 1.2499775240600473, "learning_rate": 8.382400698620152e-07, "loss": 0.3381, "step": 26589 }, { "epoch": 2.4953078078078077, "grad_norm": 1.10149925475982, "learning_rate": 8.379374949139813e-07, "loss": 0.3017, "step": 26590 }, { "epoch": 2.4954016516516515, "grad_norm": 1.162288401603339, "learning_rate": 8.376349695907082e-07, "loss": 0.3206, "step": 26591 }, { "epoch": 2.4954954954954953, "grad_norm": 1.0609169106120868, "learning_rate": 8.373324938958032e-07, "loss": 0.2848, "step": 26592 }, { "epoch": 2.4955893393393396, "grad_norm": 1.602597492579478, "learning_rate": 8.370300678328719e-07, "loss": 0.3135, "step": 26593 }, { "epoch": 2.495683183183183, "grad_norm": 1.9289217098178923, "learning_rate": 8.367276914055223e-07, "loss": 0.3155, "step": 26594 }, { "epoch": 2.495777027027027, "grad_norm": 1.1251039698214411, "learning_rate": 8.364253646173581e-07, "loss": 0.25, "step": 26595 }, { "epoch": 2.495870870870871, "grad_norm": 1.151901731866429, "learning_rate": 8.361230874719833e-07, "loss": 0.3144, "step": 26596 }, { "epoch": 2.495964714714715, "grad_norm": 1.291957051719818, "learning_rate": 8.358208599730039e-07, "loss": 0.3149, "step": 26597 }, { "epoch": 2.4960585585585586, "grad_norm": 1.4715465384221493, "learning_rate": 8.35518682124023e-07, "loss": 0.2905, "step": 26598 }, { "epoch": 2.4961524024024024, "grad_norm": 1.1396680476838235, "learning_rate": 8.35216553928641e-07, "loss": 0.3195, "step": 26599 }, { "epoch": 2.4962462462462462, "grad_norm": 1.0386000390571657, "learning_rate": 8.349144753904642e-07, "loss": 0.33, "step": 26600 }, { "epoch": 2.49634009009009, "grad_norm": 1.049689982148703, "learning_rate": 8.346124465130923e-07, "loss": 0.3157, "step": 26601 }, { "epoch": 2.496433933933934, "grad_norm": 1.038064899508303, "learning_rate": 8.343104673001256e-07, "loss": 0.2948, "step": 26602 }, { "epoch": 2.4965277777777777, "grad_norm": 1.1077960235565527, "learning_rate": 8.340085377551665e-07, "loss": 0.2978, "step": 26603 }, { "epoch": 2.4966216216216215, "grad_norm": 1.111404227697693, "learning_rate": 8.337066578818131e-07, "loss": 0.327, "step": 26604 }, { "epoch": 2.4967154654654653, "grad_norm": 1.403510500804456, "learning_rate": 8.334048276836648e-07, "loss": 0.322, "step": 26605 }, { "epoch": 2.4968093093093096, "grad_norm": 1.8374748246716912, "learning_rate": 8.33103047164322e-07, "loss": 0.2802, "step": 26606 }, { "epoch": 2.496903153153153, "grad_norm": 1.2351744735579673, "learning_rate": 8.328013163273824e-07, "loss": 0.3078, "step": 26607 }, { "epoch": 2.496996996996997, "grad_norm": 1.2378029520444016, "learning_rate": 8.32499635176442e-07, "loss": 0.2871, "step": 26608 }, { "epoch": 2.497090840840841, "grad_norm": 1.1217153923447516, "learning_rate": 8.321980037150995e-07, "loss": 0.3332, "step": 26609 }, { "epoch": 2.497184684684685, "grad_norm": 1.215975734676631, "learning_rate": 8.31896421946951e-07, "loss": 0.3211, "step": 26610 }, { "epoch": 2.4972785285285286, "grad_norm": 1.1857167600547314, "learning_rate": 8.31594889875591e-07, "loss": 0.3136, "step": 26611 }, { "epoch": 2.4973723723723724, "grad_norm": 1.8233434402692228, "learning_rate": 8.312934075046169e-07, "loss": 0.3398, "step": 26612 }, { "epoch": 2.4974662162162162, "grad_norm": 1.3694966538448035, "learning_rate": 8.309919748376222e-07, "loss": 0.304, "step": 26613 }, { "epoch": 2.49756006006006, "grad_norm": 1.0690920200791922, "learning_rate": 8.30690591878201e-07, "loss": 0.329, "step": 26614 }, { "epoch": 2.497653903903904, "grad_norm": 1.934168315900444, "learning_rate": 8.303892586299462e-07, "loss": 0.3124, "step": 26615 }, { "epoch": 2.4977477477477477, "grad_norm": 1.0940208687170832, "learning_rate": 8.300879750964513e-07, "loss": 0.3202, "step": 26616 }, { "epoch": 2.4978415915915915, "grad_norm": 1.0469201595717237, "learning_rate": 8.297867412813066e-07, "loss": 0.3202, "step": 26617 }, { "epoch": 2.4979354354354353, "grad_norm": 1.1656312525152581, "learning_rate": 8.294855571881071e-07, "loss": 0.288, "step": 26618 }, { "epoch": 2.498029279279279, "grad_norm": 1.1550840151983601, "learning_rate": 8.291844228204416e-07, "loss": 0.2743, "step": 26619 }, { "epoch": 2.498123123123123, "grad_norm": 1.0855976709471873, "learning_rate": 8.288833381819006e-07, "loss": 0.3303, "step": 26620 }, { "epoch": 2.498216966966967, "grad_norm": 1.1157770220582528, "learning_rate": 8.285823032760748e-07, "loss": 0.3072, "step": 26621 }, { "epoch": 2.498310810810811, "grad_norm": 1.1875672593189588, "learning_rate": 8.282813181065541e-07, "loss": 0.3034, "step": 26622 }, { "epoch": 2.4984046546546548, "grad_norm": 1.6521302620376874, "learning_rate": 8.279803826769245e-07, "loss": 0.3043, "step": 26623 }, { "epoch": 2.4984984984984986, "grad_norm": 1.0292762372054636, "learning_rate": 8.27679496990777e-07, "loss": 0.2876, "step": 26624 }, { "epoch": 2.4985923423423424, "grad_norm": 1.0107201790945726, "learning_rate": 8.273786610516981e-07, "loss": 0.2876, "step": 26625 }, { "epoch": 2.498686186186186, "grad_norm": 1.6382163593614223, "learning_rate": 8.27077874863274e-07, "loss": 0.3394, "step": 26626 }, { "epoch": 2.49878003003003, "grad_norm": 1.1198746081340958, "learning_rate": 8.267771384290923e-07, "loss": 0.3231, "step": 26627 }, { "epoch": 2.498873873873874, "grad_norm": 1.0086563976262743, "learning_rate": 8.264764517527374e-07, "loss": 0.322, "step": 26628 }, { "epoch": 2.4989677177177176, "grad_norm": 1.133950872615837, "learning_rate": 8.261758148377941e-07, "loss": 0.3189, "step": 26629 }, { "epoch": 2.4990615615615615, "grad_norm": 1.6658268071620297, "learning_rate": 8.258752276878485e-07, "loss": 0.2917, "step": 26630 }, { "epoch": 2.4991554054054053, "grad_norm": 1.1716180457226941, "learning_rate": 8.255746903064843e-07, "loss": 0.294, "step": 26631 }, { "epoch": 2.499249249249249, "grad_norm": 1.0299746498297564, "learning_rate": 8.252742026972832e-07, "loss": 0.3118, "step": 26632 }, { "epoch": 2.499343093093093, "grad_norm": 1.049560186585623, "learning_rate": 8.249737648638301e-07, "loss": 0.3033, "step": 26633 }, { "epoch": 2.499436936936937, "grad_norm": 1.295919023582718, "learning_rate": 8.246733768097059e-07, "loss": 0.3346, "step": 26634 }, { "epoch": 2.499530780780781, "grad_norm": 1.0932117766170617, "learning_rate": 8.243730385384918e-07, "loss": 0.2952, "step": 26635 }, { "epoch": 2.4996246246246248, "grad_norm": 1.262809598790646, "learning_rate": 8.240727500537704e-07, "loss": 0.3646, "step": 26636 }, { "epoch": 2.4997184684684686, "grad_norm": 1.2866161633830495, "learning_rate": 8.237725113591211e-07, "loss": 0.3355, "step": 26637 }, { "epoch": 2.4998123123123124, "grad_norm": 1.1047010823390646, "learning_rate": 8.23472322458122e-07, "loss": 0.2873, "step": 26638 }, { "epoch": 2.499906156156156, "grad_norm": 1.389151288787715, "learning_rate": 8.23172183354356e-07, "loss": 0.3127, "step": 26639 }, { "epoch": 2.5, "grad_norm": 1.1476686990867382, "learning_rate": 8.228720940514002e-07, "loss": 0.2762, "step": 26640 }, { "epoch": 2.500093843843844, "grad_norm": 2.590185649246413, "learning_rate": 8.225720545528298e-07, "loss": 0.3264, "step": 26641 }, { "epoch": 2.5001876876876876, "grad_norm": 1.703523792728949, "learning_rate": 8.222720648622257e-07, "loss": 0.3404, "step": 26642 }, { "epoch": 2.5002815315315314, "grad_norm": 1.1240099523934945, "learning_rate": 8.219721249831636e-07, "loss": 0.3202, "step": 26643 }, { "epoch": 2.5003753753753752, "grad_norm": 1.0859140883807052, "learning_rate": 8.21672234919218e-07, "loss": 0.3153, "step": 26644 }, { "epoch": 2.5004692192192195, "grad_norm": 1.147993778442882, "learning_rate": 8.213723946739677e-07, "loss": 0.3294, "step": 26645 }, { "epoch": 2.500563063063063, "grad_norm": 1.1998600599107585, "learning_rate": 8.210726042509859e-07, "loss": 0.3209, "step": 26646 }, { "epoch": 2.500656906906907, "grad_norm": 1.0596104126552035, "learning_rate": 8.207728636538459e-07, "loss": 0.3368, "step": 26647 }, { "epoch": 2.5007507507507505, "grad_norm": 1.149456260496205, "learning_rate": 8.204731728861248e-07, "loss": 0.3339, "step": 26648 }, { "epoch": 2.5008445945945947, "grad_norm": 1.1995721791409384, "learning_rate": 8.201735319513931e-07, "loss": 0.2877, "step": 26649 }, { "epoch": 2.5009384384384385, "grad_norm": 1.2472572282765186, "learning_rate": 8.198739408532236e-07, "loss": 0.3067, "step": 26650 }, { "epoch": 2.5010322822822824, "grad_norm": 1.2439488093432534, "learning_rate": 8.195743995951905e-07, "loss": 0.3299, "step": 26651 }, { "epoch": 2.501126126126126, "grad_norm": 1.1473179469805341, "learning_rate": 8.192749081808637e-07, "loss": 0.3381, "step": 26652 }, { "epoch": 2.50121996996997, "grad_norm": 1.4641819508130436, "learning_rate": 8.189754666138139e-07, "loss": 0.3192, "step": 26653 }, { "epoch": 2.501313813813814, "grad_norm": 1.1563764720914906, "learning_rate": 8.18676074897612e-07, "loss": 0.3111, "step": 26654 }, { "epoch": 2.5014076576576576, "grad_norm": 2.121118047806217, "learning_rate": 8.183767330358272e-07, "loss": 0.31, "step": 26655 }, { "epoch": 2.5015015015015014, "grad_norm": 1.0382121143393002, "learning_rate": 8.180774410320274e-07, "loss": 0.2718, "step": 26656 }, { "epoch": 2.5015953453453452, "grad_norm": 1.0121590655959414, "learning_rate": 8.177781988897837e-07, "loss": 0.301, "step": 26657 }, { "epoch": 2.501689189189189, "grad_norm": 1.2198979768342653, "learning_rate": 8.174790066126631e-07, "loss": 0.3485, "step": 26658 }, { "epoch": 2.501783033033033, "grad_norm": 2.3241560640662833, "learning_rate": 8.171798642042311e-07, "loss": 0.2957, "step": 26659 }, { "epoch": 2.501876876876877, "grad_norm": 1.2074240692071887, "learning_rate": 8.168807716680571e-07, "loss": 0.3635, "step": 26660 }, { "epoch": 2.5019707207207205, "grad_norm": 1.0942406011119152, "learning_rate": 8.16581729007706e-07, "loss": 0.3059, "step": 26661 }, { "epoch": 2.5020645645645647, "grad_norm": 1.234485915674882, "learning_rate": 8.162827362267422e-07, "loss": 0.311, "step": 26662 }, { "epoch": 2.5021584084084085, "grad_norm": 1.1835665778314586, "learning_rate": 8.159837933287329e-07, "loss": 0.2968, "step": 26663 }, { "epoch": 2.5022522522522523, "grad_norm": 1.0273248033199776, "learning_rate": 8.156849003172412e-07, "loss": 0.2777, "step": 26664 }, { "epoch": 2.502346096096096, "grad_norm": 1.0727838390423141, "learning_rate": 8.15386057195831e-07, "loss": 0.3147, "step": 26665 }, { "epoch": 2.50243993993994, "grad_norm": 1.3010806074471397, "learning_rate": 8.150872639680652e-07, "loss": 0.3074, "step": 26666 }, { "epoch": 2.5025337837837838, "grad_norm": 1.1793167960464934, "learning_rate": 8.147885206375067e-07, "loss": 0.2953, "step": 26667 }, { "epoch": 2.5026276276276276, "grad_norm": 0.9781766373610428, "learning_rate": 8.144898272077156e-07, "loss": 0.3173, "step": 26668 }, { "epoch": 2.5027214714714714, "grad_norm": 1.324121988286081, "learning_rate": 8.141911836822569e-07, "loss": 0.3157, "step": 26669 }, { "epoch": 2.502815315315315, "grad_norm": 1.177080825661662, "learning_rate": 8.138925900646888e-07, "loss": 0.3217, "step": 26670 }, { "epoch": 2.502909159159159, "grad_norm": 1.2019842432394474, "learning_rate": 8.135940463585707e-07, "loss": 0.319, "step": 26671 }, { "epoch": 2.503003003003003, "grad_norm": 0.9889007425079576, "learning_rate": 8.132955525674651e-07, "loss": 0.3203, "step": 26672 }, { "epoch": 2.503096846846847, "grad_norm": 1.01245242226296, "learning_rate": 8.12997108694929e-07, "loss": 0.2961, "step": 26673 }, { "epoch": 2.5031906906906904, "grad_norm": 1.1948651553829803, "learning_rate": 8.126987147445203e-07, "loss": 0.2956, "step": 26674 }, { "epoch": 2.5032845345345347, "grad_norm": 1.1425074530836732, "learning_rate": 8.124003707197986e-07, "loss": 0.3307, "step": 26675 }, { "epoch": 2.5033783783783785, "grad_norm": 1.1898646307929583, "learning_rate": 8.121020766243199e-07, "loss": 0.3522, "step": 26676 }, { "epoch": 2.5034722222222223, "grad_norm": 1.148432689995968, "learning_rate": 8.118038324616411e-07, "loss": 0.3311, "step": 26677 }, { "epoch": 2.503566066066066, "grad_norm": 1.3748643721183789, "learning_rate": 8.115056382353187e-07, "loss": 0.351, "step": 26678 }, { "epoch": 2.50365990990991, "grad_norm": 1.3865794082341325, "learning_rate": 8.112074939489073e-07, "loss": 0.2869, "step": 26679 }, { "epoch": 2.5037537537537538, "grad_norm": 1.1079864272437823, "learning_rate": 8.109093996059603e-07, "loss": 0.2963, "step": 26680 }, { "epoch": 2.5038475975975976, "grad_norm": 1.0962573070841664, "learning_rate": 8.106113552100353e-07, "loss": 0.2963, "step": 26681 }, { "epoch": 2.5039414414414414, "grad_norm": 1.1117362966472557, "learning_rate": 8.103133607646841e-07, "loss": 0.3075, "step": 26682 }, { "epoch": 2.504035285285285, "grad_norm": 0.932696894318467, "learning_rate": 8.100154162734585e-07, "loss": 0.3286, "step": 26683 }, { "epoch": 2.504129129129129, "grad_norm": 1.0388605528790535, "learning_rate": 8.097175217399133e-07, "loss": 0.3067, "step": 26684 }, { "epoch": 2.504222972972973, "grad_norm": 1.0774234857446856, "learning_rate": 8.094196771675994e-07, "loss": 0.26, "step": 26685 }, { "epoch": 2.504316816816817, "grad_norm": 1.2611082272466931, "learning_rate": 8.091218825600666e-07, "loss": 0.3179, "step": 26686 }, { "epoch": 2.5044106606606604, "grad_norm": 1.2083574611382253, "learning_rate": 8.088241379208677e-07, "loss": 0.3148, "step": 26687 }, { "epoch": 2.5045045045045047, "grad_norm": 1.1229326190033524, "learning_rate": 8.085264432535522e-07, "loss": 0.2847, "step": 26688 }, { "epoch": 2.5045983483483485, "grad_norm": 1.2501078832091812, "learning_rate": 8.082287985616694e-07, "loss": 0.2944, "step": 26689 }, { "epoch": 2.5046921921921923, "grad_norm": 1.0510535951339741, "learning_rate": 8.07931203848768e-07, "loss": 0.2413, "step": 26690 }, { "epoch": 2.504786036036036, "grad_norm": 1.722793434324511, "learning_rate": 8.076336591183959e-07, "loss": 0.2874, "step": 26691 }, { "epoch": 2.50487987987988, "grad_norm": 1.8557929711421257, "learning_rate": 8.073361643741001e-07, "loss": 0.2909, "step": 26692 }, { "epoch": 2.5049737237237237, "grad_norm": 1.2123014511134123, "learning_rate": 8.070387196194301e-07, "loss": 0.3043, "step": 26693 }, { "epoch": 2.5050675675675675, "grad_norm": 0.924256147614552, "learning_rate": 8.067413248579309e-07, "loss": 0.282, "step": 26694 }, { "epoch": 2.5051614114114114, "grad_norm": 1.1611774396769037, "learning_rate": 8.064439800931467e-07, "loss": 0.3407, "step": 26695 }, { "epoch": 2.505255255255255, "grad_norm": 1.129869425935682, "learning_rate": 8.06146685328626e-07, "loss": 0.2959, "step": 26696 }, { "epoch": 2.505349099099099, "grad_norm": 1.1610642103054083, "learning_rate": 8.058494405679118e-07, "loss": 0.3021, "step": 26697 }, { "epoch": 2.505442942942943, "grad_norm": 1.1496975872587554, "learning_rate": 8.055522458145476e-07, "loss": 0.3395, "step": 26698 }, { "epoch": 2.505536786786787, "grad_norm": 1.5155014239041873, "learning_rate": 8.052551010720783e-07, "loss": 0.322, "step": 26699 }, { "epoch": 2.5056306306306304, "grad_norm": 1.0738175731095139, "learning_rate": 8.049580063440465e-07, "loss": 0.3151, "step": 26700 }, { "epoch": 2.5057244744744747, "grad_norm": 1.0360455797911625, "learning_rate": 8.046609616339939e-07, "loss": 0.3231, "step": 26701 }, { "epoch": 2.5058183183183185, "grad_norm": 1.0181935715858323, "learning_rate": 8.043639669454622e-07, "loss": 0.3066, "step": 26702 }, { "epoch": 2.5059121621621623, "grad_norm": 1.196173217250051, "learning_rate": 8.040670222819929e-07, "loss": 0.3761, "step": 26703 }, { "epoch": 2.506006006006006, "grad_norm": 1.3661397976630707, "learning_rate": 8.037701276471254e-07, "loss": 0.319, "step": 26704 }, { "epoch": 2.50609984984985, "grad_norm": 1.1390995171747487, "learning_rate": 8.034732830444014e-07, "loss": 0.2827, "step": 26705 }, { "epoch": 2.5061936936936937, "grad_norm": 1.1449818627046795, "learning_rate": 8.031764884773596e-07, "loss": 0.3031, "step": 26706 }, { "epoch": 2.5062875375375375, "grad_norm": 1.0833600093225269, "learning_rate": 8.028797439495372e-07, "loss": 0.3, "step": 26707 }, { "epoch": 2.5063813813813813, "grad_norm": 1.7671804254270032, "learning_rate": 8.025830494644748e-07, "loss": 0.2861, "step": 26708 }, { "epoch": 2.506475225225225, "grad_norm": 1.17219002536401, "learning_rate": 8.022864050257085e-07, "loss": 0.3047, "step": 26709 }, { "epoch": 2.506569069069069, "grad_norm": 1.3594236271057696, "learning_rate": 8.019898106367746e-07, "loss": 0.3355, "step": 26710 }, { "epoch": 2.5066629129129128, "grad_norm": 1.1782310018625506, "learning_rate": 8.016932663012112e-07, "loss": 0.3173, "step": 26711 }, { "epoch": 2.506756756756757, "grad_norm": 1.325923214498488, "learning_rate": 8.01396772022553e-07, "loss": 0.3273, "step": 26712 }, { "epoch": 2.5068506006006004, "grad_norm": 1.1975361974245633, "learning_rate": 8.011003278043356e-07, "loss": 0.2895, "step": 26713 }, { "epoch": 2.5069444444444446, "grad_norm": 1.2106110654059488, "learning_rate": 8.008039336500928e-07, "loss": 0.3406, "step": 26714 }, { "epoch": 2.5070382882882885, "grad_norm": 1.230344641753546, "learning_rate": 8.005075895633591e-07, "loss": 0.3056, "step": 26715 }, { "epoch": 2.5071321321321323, "grad_norm": 1.1219371062002408, "learning_rate": 8.002112955476665e-07, "loss": 0.2829, "step": 26716 }, { "epoch": 2.507225975975976, "grad_norm": 1.0344271211546279, "learning_rate": 7.999150516065502e-07, "loss": 0.3095, "step": 26717 }, { "epoch": 2.50731981981982, "grad_norm": 1.1338798303699362, "learning_rate": 7.99618857743541e-07, "loss": 0.2985, "step": 26718 }, { "epoch": 2.5074136636636637, "grad_norm": 1.556590504720368, "learning_rate": 7.99322713962169e-07, "loss": 0.309, "step": 26719 }, { "epoch": 2.5075075075075075, "grad_norm": 1.2398548013309794, "learning_rate": 7.99026620265968e-07, "loss": 0.3069, "step": 26720 }, { "epoch": 2.5076013513513513, "grad_norm": 1.1867237408929996, "learning_rate": 7.987305766584675e-07, "loss": 0.3451, "step": 26721 }, { "epoch": 2.507695195195195, "grad_norm": 1.6048054405099534, "learning_rate": 7.98434583143195e-07, "loss": 0.307, "step": 26722 }, { "epoch": 2.507789039039039, "grad_norm": 1.1424069417140714, "learning_rate": 7.981386397236828e-07, "loss": 0.3141, "step": 26723 }, { "epoch": 2.5078828828828827, "grad_norm": 1.1230414630253152, "learning_rate": 7.978427464034583e-07, "loss": 0.3174, "step": 26724 }, { "epoch": 2.507976726726727, "grad_norm": 1.1121555037943784, "learning_rate": 7.975469031860494e-07, "loss": 0.302, "step": 26725 }, { "epoch": 2.5080705705705704, "grad_norm": 1.1339113936988392, "learning_rate": 7.97251110074983e-07, "loss": 0.3629, "step": 26726 }, { "epoch": 2.5081644144144146, "grad_norm": 1.1952663700743187, "learning_rate": 7.969553670737868e-07, "loss": 0.28, "step": 26727 }, { "epoch": 2.508258258258258, "grad_norm": 1.1519633775589222, "learning_rate": 7.96659674185985e-07, "loss": 0.2808, "step": 26728 }, { "epoch": 2.5083521021021022, "grad_norm": 1.0222819912923673, "learning_rate": 7.963640314151055e-07, "loss": 0.2732, "step": 26729 }, { "epoch": 2.508445945945946, "grad_norm": 1.024761550614735, "learning_rate": 7.960684387646728e-07, "loss": 0.3224, "step": 26730 }, { "epoch": 2.50853978978979, "grad_norm": 1.409408572286302, "learning_rate": 7.957728962382094e-07, "loss": 0.3445, "step": 26731 }, { "epoch": 2.5086336336336337, "grad_norm": 1.9175052232441896, "learning_rate": 7.954774038392415e-07, "loss": 0.2826, "step": 26732 }, { "epoch": 2.5087274774774775, "grad_norm": 1.2630050590639235, "learning_rate": 7.951819615712919e-07, "loss": 0.3329, "step": 26733 }, { "epoch": 2.5088213213213213, "grad_norm": 1.0246849649572616, "learning_rate": 7.948865694378816e-07, "loss": 0.302, "step": 26734 }, { "epoch": 2.508915165165165, "grad_norm": 1.1377471535423789, "learning_rate": 7.945912274425344e-07, "loss": 0.3418, "step": 26735 }, { "epoch": 2.509009009009009, "grad_norm": 1.1227520853872817, "learning_rate": 7.942959355887708e-07, "loss": 0.3187, "step": 26736 }, { "epoch": 2.5091028528528527, "grad_norm": 1.0601275687233966, "learning_rate": 7.94000693880112e-07, "loss": 0.3381, "step": 26737 }, { "epoch": 2.5091966966966965, "grad_norm": 1.096748875215641, "learning_rate": 7.937055023200779e-07, "loss": 0.2746, "step": 26738 }, { "epoch": 2.5092905405405403, "grad_norm": 1.338106174919057, "learning_rate": 7.934103609121885e-07, "loss": 0.3312, "step": 26739 }, { "epoch": 2.5093843843843846, "grad_norm": 1.016535791055135, "learning_rate": 7.931152696599608e-07, "loss": 0.3598, "step": 26740 }, { "epoch": 2.509478228228228, "grad_norm": 1.1048493016486076, "learning_rate": 7.928202285669162e-07, "loss": 0.3225, "step": 26741 }, { "epoch": 2.5095720720720722, "grad_norm": 1.081047286181831, "learning_rate": 7.925252376365711e-07, "loss": 0.2994, "step": 26742 }, { "epoch": 2.509665915915916, "grad_norm": 1.036060139527162, "learning_rate": 7.922302968724416e-07, "loss": 0.302, "step": 26743 }, { "epoch": 2.50975975975976, "grad_norm": 1.1739038886437654, "learning_rate": 7.919354062780471e-07, "loss": 0.2814, "step": 26744 }, { "epoch": 2.5098536036036037, "grad_norm": 0.9567077164649871, "learning_rate": 7.916405658569016e-07, "loss": 0.3163, "step": 26745 }, { "epoch": 2.5099474474474475, "grad_norm": 1.0530048849728255, "learning_rate": 7.913457756125198e-07, "loss": 0.3152, "step": 26746 }, { "epoch": 2.5100412912912913, "grad_norm": 1.427859116284389, "learning_rate": 7.910510355484191e-07, "loss": 0.3005, "step": 26747 }, { "epoch": 2.510135135135135, "grad_norm": 1.061323717696895, "learning_rate": 7.907563456681117e-07, "loss": 0.3279, "step": 26748 }, { "epoch": 2.510228978978979, "grad_norm": 1.0629733362579241, "learning_rate": 7.904617059751124e-07, "loss": 0.3013, "step": 26749 }, { "epoch": 2.5103228228228227, "grad_norm": 1.66558736281131, "learning_rate": 7.90167116472933e-07, "loss": 0.2867, "step": 26750 }, { "epoch": 2.5104166666666665, "grad_norm": 1.088487855990473, "learning_rate": 7.898725771650872e-07, "loss": 0.2682, "step": 26751 }, { "epoch": 2.5105105105105103, "grad_norm": 1.0761800769730028, "learning_rate": 7.895780880550846e-07, "loss": 0.3101, "step": 26752 }, { "epoch": 2.5106043543543546, "grad_norm": 1.2529124982574043, "learning_rate": 7.89283649146439e-07, "loss": 0.3458, "step": 26753 }, { "epoch": 2.510698198198198, "grad_norm": 1.0824758616393924, "learning_rate": 7.889892604426603e-07, "loss": 0.2756, "step": 26754 }, { "epoch": 2.510792042042042, "grad_norm": 1.101778384382268, "learning_rate": 7.886949219472572e-07, "loss": 0.3783, "step": 26755 }, { "epoch": 2.510885885885886, "grad_norm": 1.237871114914634, "learning_rate": 7.884006336637412e-07, "loss": 0.2756, "step": 26756 }, { "epoch": 2.51097972972973, "grad_norm": 2.001498467992695, "learning_rate": 7.881063955956203e-07, "loss": 0.2899, "step": 26757 }, { "epoch": 2.5110735735735736, "grad_norm": 1.456564205836946, "learning_rate": 7.878122077464012e-07, "loss": 0.2918, "step": 26758 }, { "epoch": 2.5111674174174174, "grad_norm": 1.143870892881811, "learning_rate": 7.875180701195939e-07, "loss": 0.2895, "step": 26759 }, { "epoch": 2.5112612612612613, "grad_norm": 1.2672431043703056, "learning_rate": 7.872239827187045e-07, "loss": 0.3015, "step": 26760 }, { "epoch": 2.511355105105105, "grad_norm": 1.0790611967413148, "learning_rate": 7.869299455472385e-07, "loss": 0.2822, "step": 26761 }, { "epoch": 2.511448948948949, "grad_norm": 1.1481798916458252, "learning_rate": 7.866359586087036e-07, "loss": 0.3121, "step": 26762 }, { "epoch": 2.5115427927927927, "grad_norm": 1.3816856077222308, "learning_rate": 7.863420219066054e-07, "loss": 0.3014, "step": 26763 }, { "epoch": 2.5116366366366365, "grad_norm": 1.197931516203753, "learning_rate": 7.860481354444443e-07, "loss": 0.3225, "step": 26764 }, { "epoch": 2.5117304804804803, "grad_norm": 1.0160602258205589, "learning_rate": 7.857542992257289e-07, "loss": 0.3111, "step": 26765 }, { "epoch": 2.5118243243243246, "grad_norm": 1.0906636968247254, "learning_rate": 7.854605132539605e-07, "loss": 0.3528, "step": 26766 }, { "epoch": 2.511918168168168, "grad_norm": 1.559473290389111, "learning_rate": 7.851667775326416e-07, "loss": 0.3217, "step": 26767 }, { "epoch": 2.512012012012012, "grad_norm": 1.0168786671956627, "learning_rate": 7.848730920652769e-07, "loss": 0.3125, "step": 26768 }, { "epoch": 2.512105855855856, "grad_norm": 6.976777550590336, "learning_rate": 7.845794568553655e-07, "loss": 0.2958, "step": 26769 }, { "epoch": 2.5121996996997, "grad_norm": 1.057386291421555, "learning_rate": 7.842858719064089e-07, "loss": 0.3148, "step": 26770 }, { "epoch": 2.5122935435435436, "grad_norm": 1.0542828363204741, "learning_rate": 7.839923372219088e-07, "loss": 0.3076, "step": 26771 }, { "epoch": 2.5123873873873874, "grad_norm": 12.683161612643179, "learning_rate": 7.836988528053646e-07, "loss": 0.2716, "step": 26772 }, { "epoch": 2.5124812312312312, "grad_norm": 1.1377798364332568, "learning_rate": 7.834054186602736e-07, "loss": 0.2898, "step": 26773 }, { "epoch": 2.512575075075075, "grad_norm": 1.561146825022407, "learning_rate": 7.831120347901377e-07, "loss": 0.3365, "step": 26774 }, { "epoch": 2.512668918918919, "grad_norm": 1.2273069274692596, "learning_rate": 7.828187011984528e-07, "loss": 0.317, "step": 26775 }, { "epoch": 2.5127627627627627, "grad_norm": 1.1113091979721097, "learning_rate": 7.825254178887176e-07, "loss": 0.3144, "step": 26776 }, { "epoch": 2.5128566066066065, "grad_norm": 1.1779215452025145, "learning_rate": 7.822321848644277e-07, "loss": 0.3103, "step": 26777 }, { "epoch": 2.5129504504504503, "grad_norm": 1.1254053629460685, "learning_rate": 7.819390021290796e-07, "loss": 0.3179, "step": 26778 }, { "epoch": 2.5130442942942945, "grad_norm": 1.1089462297714399, "learning_rate": 7.816458696861684e-07, "loss": 0.3073, "step": 26779 }, { "epoch": 2.513138138138138, "grad_norm": 1.0963109480484459, "learning_rate": 7.813527875391913e-07, "loss": 0.337, "step": 26780 }, { "epoch": 2.513231981981982, "grad_norm": 1.0948416516155206, "learning_rate": 7.810597556916411e-07, "loss": 0.313, "step": 26781 }, { "epoch": 2.513325825825826, "grad_norm": 1.1654842190716188, "learning_rate": 7.80766774147011e-07, "loss": 0.309, "step": 26782 }, { "epoch": 2.51341966966967, "grad_norm": 1.0229711654270066, "learning_rate": 7.804738429087965e-07, "loss": 0.3272, "step": 26783 }, { "epoch": 2.5135135135135136, "grad_norm": 1.4305055680263028, "learning_rate": 7.801809619804884e-07, "loss": 0.3109, "step": 26784 }, { "epoch": 2.5136073573573574, "grad_norm": 3.2237892283224285, "learning_rate": 7.798881313655788e-07, "loss": 0.2962, "step": 26785 }, { "epoch": 2.513701201201201, "grad_norm": 1.1255071798365504, "learning_rate": 7.795953510675608e-07, "loss": 0.316, "step": 26786 }, { "epoch": 2.513795045045045, "grad_norm": 1.1922110733379867, "learning_rate": 7.793026210899241e-07, "loss": 0.3159, "step": 26787 }, { "epoch": 2.513888888888889, "grad_norm": 1.7255070450702115, "learning_rate": 7.790099414361585e-07, "loss": 0.3232, "step": 26788 }, { "epoch": 2.5139827327327327, "grad_norm": 1.0938287033577458, "learning_rate": 7.787173121097547e-07, "loss": 0.299, "step": 26789 }, { "epoch": 2.5140765765765765, "grad_norm": 1.2757546983888297, "learning_rate": 7.784247331142009e-07, "loss": 0.3498, "step": 26790 }, { "epoch": 2.5141704204204203, "grad_norm": 1.9830559089164572, "learning_rate": 7.781322044529849e-07, "loss": 0.2632, "step": 26791 }, { "epoch": 2.5142642642642645, "grad_norm": 2.1261189624653998, "learning_rate": 7.778397261295967e-07, "loss": 0.2939, "step": 26792 }, { "epoch": 2.514358108108108, "grad_norm": 1.0522612219885306, "learning_rate": 7.775472981475223e-07, "loss": 0.2972, "step": 26793 }, { "epoch": 2.514451951951952, "grad_norm": 1.174301588003422, "learning_rate": 7.772549205102469e-07, "loss": 0.3353, "step": 26794 }, { "epoch": 2.514545795795796, "grad_norm": 1.1603293066424187, "learning_rate": 7.769625932212599e-07, "loss": 0.3473, "step": 26795 }, { "epoch": 2.5146396396396398, "grad_norm": 1.1870090366726087, "learning_rate": 7.766703162840445e-07, "loss": 0.2896, "step": 26796 }, { "epoch": 2.5147334834834836, "grad_norm": 1.054231718893391, "learning_rate": 7.763780897020851e-07, "loss": 0.3104, "step": 26797 }, { "epoch": 2.5148273273273274, "grad_norm": 1.3698488995554337, "learning_rate": 7.760859134788673e-07, "loss": 0.2895, "step": 26798 }, { "epoch": 2.514921171171171, "grad_norm": 1.4739909015071126, "learning_rate": 7.75793787617875e-07, "loss": 0.33, "step": 26799 }, { "epoch": 2.515015015015015, "grad_norm": 1.1508756064125718, "learning_rate": 7.755017121225905e-07, "loss": 0.2831, "step": 26800 }, { "epoch": 2.515108858858859, "grad_norm": 1.09050362459387, "learning_rate": 7.752096869964959e-07, "loss": 0.2675, "step": 26801 }, { "epoch": 2.5152027027027026, "grad_norm": 0.9568097437212811, "learning_rate": 7.749177122430734e-07, "loss": 0.3153, "step": 26802 }, { "epoch": 2.5152965465465464, "grad_norm": 1.1924980548335091, "learning_rate": 7.746257878658036e-07, "loss": 0.3049, "step": 26803 }, { "epoch": 2.5153903903903903, "grad_norm": 1.4049085686705969, "learning_rate": 7.743339138681688e-07, "loss": 0.2718, "step": 26804 }, { "epoch": 2.5154842342342345, "grad_norm": 1.449949620968994, "learning_rate": 7.740420902536477e-07, "loss": 0.3105, "step": 26805 }, { "epoch": 2.515578078078078, "grad_norm": 1.2785564491103134, "learning_rate": 7.737503170257193e-07, "loss": 0.3124, "step": 26806 }, { "epoch": 2.515671921921922, "grad_norm": 1.2125562597887807, "learning_rate": 7.734585941878647e-07, "loss": 0.3398, "step": 26807 }, { "epoch": 2.5157657657657655, "grad_norm": 1.2071572919068854, "learning_rate": 7.731669217435606e-07, "loss": 0.3093, "step": 26808 }, { "epoch": 2.5158596096096097, "grad_norm": 1.0729901196171727, "learning_rate": 7.728752996962829e-07, "loss": 0.3009, "step": 26809 }, { "epoch": 2.5159534534534536, "grad_norm": 1.17647853771203, "learning_rate": 7.725837280495124e-07, "loss": 0.3171, "step": 26810 }, { "epoch": 2.5160472972972974, "grad_norm": 1.2594690649923899, "learning_rate": 7.722922068067235e-07, "loss": 0.2935, "step": 26811 }, { "epoch": 2.516141141141141, "grad_norm": 1.0680559447480689, "learning_rate": 7.720007359713921e-07, "loss": 0.2977, "step": 26812 }, { "epoch": 2.516234984984985, "grad_norm": 1.2079807102492621, "learning_rate": 7.717093155469934e-07, "loss": 0.3095, "step": 26813 }, { "epoch": 2.516328828828829, "grad_norm": 1.045993347241137, "learning_rate": 7.714179455370025e-07, "loss": 0.2851, "step": 26814 }, { "epoch": 2.5164226726726726, "grad_norm": 1.0404036864439634, "learning_rate": 7.711266259448919e-07, "loss": 0.364, "step": 26815 }, { "epoch": 2.5165165165165164, "grad_norm": 1.4368369771826954, "learning_rate": 7.708353567741372e-07, "loss": 0.2803, "step": 26816 }, { "epoch": 2.5166103603603602, "grad_norm": 1.3286985641402247, "learning_rate": 7.705441380282102e-07, "loss": 0.3352, "step": 26817 }, { "epoch": 2.516704204204204, "grad_norm": 1.168614506579954, "learning_rate": 7.702529697105826e-07, "loss": 0.319, "step": 26818 }, { "epoch": 2.516798048048048, "grad_norm": 0.984436278253288, "learning_rate": 7.699618518247276e-07, "loss": 0.2795, "step": 26819 }, { "epoch": 2.516891891891892, "grad_norm": 1.0907501500406156, "learning_rate": 7.696707843741147e-07, "loss": 0.3497, "step": 26820 }, { "epoch": 2.5169857357357355, "grad_norm": 1.1474769019626525, "learning_rate": 7.693797673622144e-07, "loss": 0.3114, "step": 26821 }, { "epoch": 2.5170795795795797, "grad_norm": 1.0512402463092825, "learning_rate": 7.690888007924979e-07, "loss": 0.3211, "step": 26822 }, { "epoch": 2.5171734234234235, "grad_norm": 0.9869865237666103, "learning_rate": 7.68797884668434e-07, "loss": 0.3198, "step": 26823 }, { "epoch": 2.5172672672672673, "grad_norm": 1.149976693210481, "learning_rate": 7.685070189934906e-07, "loss": 0.3289, "step": 26824 }, { "epoch": 2.517361111111111, "grad_norm": 1.5320678180990805, "learning_rate": 7.682162037711361e-07, "loss": 0.2965, "step": 26825 }, { "epoch": 2.517454954954955, "grad_norm": 1.0776548245471271, "learning_rate": 7.679254390048375e-07, "loss": 0.2748, "step": 26826 }, { "epoch": 2.517548798798799, "grad_norm": 1.1160267739684542, "learning_rate": 7.676347246980615e-07, "loss": 0.3166, "step": 26827 }, { "epoch": 2.5176426426426426, "grad_norm": 1.5416770876087877, "learning_rate": 7.673440608542754e-07, "loss": 0.3309, "step": 26828 }, { "epoch": 2.5177364864864864, "grad_norm": 2.0978436534926557, "learning_rate": 7.670534474769442e-07, "loss": 0.3323, "step": 26829 }, { "epoch": 2.51783033033033, "grad_norm": 1.1985182527340852, "learning_rate": 7.667628845695324e-07, "loss": 0.2973, "step": 26830 }, { "epoch": 2.517924174174174, "grad_norm": 0.9956655931172522, "learning_rate": 7.664723721355055e-07, "loss": 0.2915, "step": 26831 }, { "epoch": 2.518018018018018, "grad_norm": 2.858969107513336, "learning_rate": 7.661819101783269e-07, "loss": 0.2866, "step": 26832 }, { "epoch": 2.518111861861862, "grad_norm": 1.1894427044042712, "learning_rate": 7.658914987014587e-07, "loss": 0.2832, "step": 26833 }, { "epoch": 2.5182057057057055, "grad_norm": 1.1698509422888057, "learning_rate": 7.656011377083655e-07, "loss": 0.3523, "step": 26834 }, { "epoch": 2.5182995495495497, "grad_norm": 1.0115905617734493, "learning_rate": 7.653108272025084e-07, "loss": 0.2749, "step": 26835 }, { "epoch": 2.5183933933933935, "grad_norm": 1.1866193776319034, "learning_rate": 7.650205671873484e-07, "loss": 0.3194, "step": 26836 }, { "epoch": 2.5184872372372373, "grad_norm": 1.1165711516612715, "learning_rate": 7.647303576663467e-07, "loss": 0.2738, "step": 26837 }, { "epoch": 2.518581081081081, "grad_norm": 1.1233535142521966, "learning_rate": 7.644401986429639e-07, "loss": 0.3238, "step": 26838 }, { "epoch": 2.518674924924925, "grad_norm": 1.0838730324671062, "learning_rate": 7.641500901206572e-07, "loss": 0.3228, "step": 26839 }, { "epoch": 2.5187687687687688, "grad_norm": 13.733964302387825, "learning_rate": 7.638600321028894e-07, "loss": 0.3165, "step": 26840 }, { "epoch": 2.5188626126126126, "grad_norm": 1.2767387300271476, "learning_rate": 7.635700245931166e-07, "loss": 0.2898, "step": 26841 }, { "epoch": 2.5189564564564564, "grad_norm": 1.1340814574523894, "learning_rate": 7.63280067594796e-07, "loss": 0.3023, "step": 26842 }, { "epoch": 2.5190503003003, "grad_norm": 1.0989623805683628, "learning_rate": 7.629901611113866e-07, "loss": 0.3314, "step": 26843 }, { "epoch": 2.519144144144144, "grad_norm": 1.105237665018455, "learning_rate": 7.627003051463444e-07, "loss": 0.3045, "step": 26844 }, { "epoch": 2.519237987987988, "grad_norm": 1.1369194797056854, "learning_rate": 7.624104997031239e-07, "loss": 0.3337, "step": 26845 }, { "epoch": 2.519331831831832, "grad_norm": 1.2273866413443937, "learning_rate": 7.621207447851831e-07, "loss": 0.2929, "step": 26846 }, { "epoch": 2.5194256756756754, "grad_norm": 1.0196512092274101, "learning_rate": 7.618310403959755e-07, "loss": 0.3112, "step": 26847 }, { "epoch": 2.5195195195195197, "grad_norm": 1.1449788850458165, "learning_rate": 7.615413865389553e-07, "loss": 0.2706, "step": 26848 }, { "epoch": 2.5196133633633635, "grad_norm": 1.1775331227514572, "learning_rate": 7.612517832175753e-07, "loss": 0.2818, "step": 26849 }, { "epoch": 2.5197072072072073, "grad_norm": 1.6664608046238905, "learning_rate": 7.609622304352898e-07, "loss": 0.295, "step": 26850 }, { "epoch": 2.519801051051051, "grad_norm": 1.260549201776436, "learning_rate": 7.606727281955495e-07, "loss": 0.3102, "step": 26851 }, { "epoch": 2.519894894894895, "grad_norm": 1.1663618544983405, "learning_rate": 7.603832765018077e-07, "loss": 0.3064, "step": 26852 }, { "epoch": 2.5199887387387387, "grad_norm": 1.1269470520359481, "learning_rate": 7.600938753575154e-07, "loss": 0.2855, "step": 26853 }, { "epoch": 2.5200825825825826, "grad_norm": 1.0899566064525028, "learning_rate": 7.598045247661217e-07, "loss": 0.3436, "step": 26854 }, { "epoch": 2.5201764264264264, "grad_norm": 1.1476001500958244, "learning_rate": 7.595152247310788e-07, "loss": 0.2983, "step": 26855 }, { "epoch": 2.52027027027027, "grad_norm": 1.1242963784928846, "learning_rate": 7.59225975255835e-07, "loss": 0.3064, "step": 26856 }, { "epoch": 2.520364114114114, "grad_norm": 1.3227656457340597, "learning_rate": 7.589367763438383e-07, "loss": 0.3266, "step": 26857 }, { "epoch": 2.520457957957958, "grad_norm": 1.3031393697715339, "learning_rate": 7.586476279985383e-07, "loss": 0.321, "step": 26858 }, { "epoch": 2.520551801801802, "grad_norm": 1.0438607838048435, "learning_rate": 7.583585302233815e-07, "loss": 0.3304, "step": 26859 }, { "epoch": 2.5206456456456454, "grad_norm": 1.0219106006939027, "learning_rate": 7.580694830218155e-07, "loss": 0.3411, "step": 26860 }, { "epoch": 2.5207394894894897, "grad_norm": 1.2497934149137768, "learning_rate": 7.577804863972865e-07, "loss": 0.3043, "step": 26861 }, { "epoch": 2.5208333333333335, "grad_norm": 1.0920186840345953, "learning_rate": 7.574915403532396e-07, "loss": 0.2942, "step": 26862 }, { "epoch": 2.5209271771771773, "grad_norm": 1.2033345401629827, "learning_rate": 7.572026448931197e-07, "loss": 0.3441, "step": 26863 }, { "epoch": 2.521021021021021, "grad_norm": 1.093245967942084, "learning_rate": 7.569138000203729e-07, "loss": 0.3183, "step": 26864 }, { "epoch": 2.521114864864865, "grad_norm": 1.1603475021185206, "learning_rate": 7.566250057384422e-07, "loss": 0.3255, "step": 26865 }, { "epoch": 2.5212087087087087, "grad_norm": 1.1665400276075066, "learning_rate": 7.563362620507697e-07, "loss": 0.3591, "step": 26866 }, { "epoch": 2.5213025525525525, "grad_norm": 1.260033815466797, "learning_rate": 7.560475689608005e-07, "loss": 0.3379, "step": 26867 }, { "epoch": 2.5213963963963963, "grad_norm": 1.288093423999799, "learning_rate": 7.557589264719761e-07, "loss": 0.3125, "step": 26868 }, { "epoch": 2.52149024024024, "grad_norm": 1.159146746724594, "learning_rate": 7.554703345877362e-07, "loss": 0.2919, "step": 26869 }, { "epoch": 2.521584084084084, "grad_norm": 1.3428225545074757, "learning_rate": 7.551817933115241e-07, "loss": 0.3124, "step": 26870 }, { "epoch": 2.5216779279279278, "grad_norm": 1.508483363941284, "learning_rate": 7.548933026467791e-07, "loss": 0.2883, "step": 26871 }, { "epoch": 2.521771771771772, "grad_norm": 1.1921633494299129, "learning_rate": 7.54604862596941e-07, "loss": 0.2732, "step": 26872 }, { "epoch": 2.5218656156156154, "grad_norm": 1.2151179341729081, "learning_rate": 7.543164731654485e-07, "loss": 0.3416, "step": 26873 }, { "epoch": 2.5219594594594597, "grad_norm": 1.2105422688677703, "learning_rate": 7.540281343557404e-07, "loss": 0.3524, "step": 26874 }, { "epoch": 2.5220533033033035, "grad_norm": 1.048004432100062, "learning_rate": 7.537398461712536e-07, "loss": 0.2952, "step": 26875 }, { "epoch": 2.5221471471471473, "grad_norm": 1.06456150322786, "learning_rate": 7.534516086154275e-07, "loss": 0.3164, "step": 26876 }, { "epoch": 2.522240990990991, "grad_norm": 1.1187830804627035, "learning_rate": 7.531634216916977e-07, "loss": 0.3082, "step": 26877 }, { "epoch": 2.522334834834835, "grad_norm": 3.171818913743871, "learning_rate": 7.528752854034987e-07, "loss": 0.2962, "step": 26878 }, { "epoch": 2.5224286786786787, "grad_norm": 0.930050236284087, "learning_rate": 7.525871997542694e-07, "loss": 0.3022, "step": 26879 }, { "epoch": 2.5225225225225225, "grad_norm": 0.9536101467679808, "learning_rate": 7.52299164747442e-07, "loss": 0.2982, "step": 26880 }, { "epoch": 2.5226163663663663, "grad_norm": 1.123155139863817, "learning_rate": 7.52011180386451e-07, "loss": 0.2759, "step": 26881 }, { "epoch": 2.52271021021021, "grad_norm": 1.96877682617264, "learning_rate": 7.51723246674732e-07, "loss": 0.3359, "step": 26882 }, { "epoch": 2.522804054054054, "grad_norm": 1.213073349662629, "learning_rate": 7.514353636157163e-07, "loss": 0.3098, "step": 26883 }, { "epoch": 2.5228978978978978, "grad_norm": 1.0338539082031633, "learning_rate": 7.511475312128358e-07, "loss": 0.2784, "step": 26884 }, { "epoch": 2.522991741741742, "grad_norm": 1.303233329839444, "learning_rate": 7.508597494695253e-07, "loss": 0.2695, "step": 26885 }, { "epoch": 2.5230855855855854, "grad_norm": 1.1252989652681844, "learning_rate": 7.505720183892134e-07, "loss": 0.3293, "step": 26886 }, { "epoch": 2.5231794294294296, "grad_norm": 1.135813395924482, "learning_rate": 7.502843379753299e-07, "loss": 0.3145, "step": 26887 }, { "epoch": 2.523273273273273, "grad_norm": 1.0387896683898223, "learning_rate": 7.499967082313081e-07, "loss": 0.2723, "step": 26888 }, { "epoch": 2.5233671171171173, "grad_norm": 1.1372659205502424, "learning_rate": 7.497091291605752e-07, "loss": 0.3297, "step": 26889 }, { "epoch": 2.523460960960961, "grad_norm": 1.1017144289924834, "learning_rate": 7.4942160076656e-07, "loss": 0.2701, "step": 26890 }, { "epoch": 2.523554804804805, "grad_norm": 1.1212404137750167, "learning_rate": 7.491341230526922e-07, "loss": 0.3222, "step": 26891 }, { "epoch": 2.5236486486486487, "grad_norm": 1.3882154279363246, "learning_rate": 7.488466960223983e-07, "loss": 0.3206, "step": 26892 }, { "epoch": 2.5237424924924925, "grad_norm": 0.9028523256549946, "learning_rate": 7.485593196791047e-07, "loss": 0.3052, "step": 26893 }, { "epoch": 2.5238363363363363, "grad_norm": 1.1094778374008496, "learning_rate": 7.482719940262395e-07, "loss": 0.314, "step": 26894 }, { "epoch": 2.52393018018018, "grad_norm": 1.2352721829335, "learning_rate": 7.479847190672279e-07, "loss": 0.2931, "step": 26895 }, { "epoch": 2.524024024024024, "grad_norm": 1.0940732191624942, "learning_rate": 7.476974948054938e-07, "loss": 0.3114, "step": 26896 }, { "epoch": 2.5241178678678677, "grad_norm": 1.0074398191660499, "learning_rate": 7.474103212444644e-07, "loss": 0.3327, "step": 26897 }, { "epoch": 2.5242117117117115, "grad_norm": 1.1852275028693193, "learning_rate": 7.471231983875621e-07, "loss": 0.3147, "step": 26898 }, { "epoch": 2.5243055555555554, "grad_norm": 1.0230723063941793, "learning_rate": 7.468361262382101e-07, "loss": 0.3203, "step": 26899 }, { "epoch": 2.5243993993993996, "grad_norm": 1.0862861534056425, "learning_rate": 7.465491047998319e-07, "loss": 0.2907, "step": 26900 }, { "epoch": 2.524493243243243, "grad_norm": 1.2933720315951598, "learning_rate": 7.462621340758492e-07, "loss": 0.3168, "step": 26901 }, { "epoch": 2.5245870870870872, "grad_norm": 1.0949430345547437, "learning_rate": 7.459752140696825e-07, "loss": 0.3264, "step": 26902 }, { "epoch": 2.524680930930931, "grad_norm": 1.048090632406886, "learning_rate": 7.456883447847551e-07, "loss": 0.3418, "step": 26903 }, { "epoch": 2.524774774774775, "grad_norm": 1.238505481446539, "learning_rate": 7.454015262244868e-07, "loss": 0.3276, "step": 26904 }, { "epoch": 2.5248686186186187, "grad_norm": 1.0501608989300637, "learning_rate": 7.451147583922952e-07, "loss": 0.2938, "step": 26905 }, { "epoch": 2.5249624624624625, "grad_norm": 1.0971408021648619, "learning_rate": 7.44828041291602e-07, "loss": 0.3448, "step": 26906 }, { "epoch": 2.5250563063063063, "grad_norm": 1.1094996666969958, "learning_rate": 7.445413749258251e-07, "loss": 0.3211, "step": 26907 }, { "epoch": 2.52515015015015, "grad_norm": 1.3010406188105186, "learning_rate": 7.442547592983812e-07, "loss": 0.3208, "step": 26908 }, { "epoch": 2.525243993993994, "grad_norm": 1.0540512261440076, "learning_rate": 7.439681944126903e-07, "loss": 0.3015, "step": 26909 }, { "epoch": 2.5253378378378377, "grad_norm": 2.033871515652576, "learning_rate": 7.436816802721669e-07, "loss": 0.3214, "step": 26910 }, { "epoch": 2.5254316816816815, "grad_norm": 1.0698529660983598, "learning_rate": 7.433952168802277e-07, "loss": 0.297, "step": 26911 }, { "epoch": 2.5255255255255253, "grad_norm": 1.0194211062628584, "learning_rate": 7.431088042402884e-07, "loss": 0.341, "step": 26912 }, { "epoch": 2.5256193693693696, "grad_norm": 1.7199332421881874, "learning_rate": 7.42822442355764e-07, "loss": 0.26, "step": 26913 }, { "epoch": 2.525713213213213, "grad_norm": 1.0288129039596374, "learning_rate": 7.425361312300678e-07, "loss": 0.2302, "step": 26914 }, { "epoch": 2.525807057057057, "grad_norm": 1.0856866067154676, "learning_rate": 7.422498708666148e-07, "loss": 0.3299, "step": 26915 }, { "epoch": 2.525900900900901, "grad_norm": 1.010228618301667, "learning_rate": 7.41963661268818e-07, "loss": 0.3312, "step": 26916 }, { "epoch": 2.525994744744745, "grad_norm": 1.1376389542943264, "learning_rate": 7.416775024400885e-07, "loss": 0.3035, "step": 26917 }, { "epoch": 2.5260885885885886, "grad_norm": 0.956467399736435, "learning_rate": 7.413913943838407e-07, "loss": 0.2935, "step": 26918 }, { "epoch": 2.5261824324324325, "grad_norm": 1.1442173240566764, "learning_rate": 7.411053371034843e-07, "loss": 0.3337, "step": 26919 }, { "epoch": 2.5262762762762763, "grad_norm": 0.9842642469360493, "learning_rate": 7.408193306024292e-07, "loss": 0.2906, "step": 26920 }, { "epoch": 2.52637012012012, "grad_norm": 1.1146316611341769, "learning_rate": 7.405333748840877e-07, "loss": 0.2856, "step": 26921 }, { "epoch": 2.526463963963964, "grad_norm": 1.2454614282264787, "learning_rate": 7.402474699518675e-07, "loss": 0.3326, "step": 26922 }, { "epoch": 2.5265578078078077, "grad_norm": 1.0772269083014459, "learning_rate": 7.399616158091788e-07, "loss": 0.3003, "step": 26923 }, { "epoch": 2.5266516516516515, "grad_norm": 1.0604721768335788, "learning_rate": 7.396758124594289e-07, "loss": 0.304, "step": 26924 }, { "epoch": 2.5267454954954953, "grad_norm": 1.1194527345855485, "learning_rate": 7.393900599060255e-07, "loss": 0.3164, "step": 26925 }, { "epoch": 2.5268393393393396, "grad_norm": 1.0326439817176767, "learning_rate": 7.39104358152375e-07, "loss": 0.3045, "step": 26926 }, { "epoch": 2.526933183183183, "grad_norm": 0.9972983338890287, "learning_rate": 7.388187072018854e-07, "loss": 0.3041, "step": 26927 }, { "epoch": 2.527027027027027, "grad_norm": 1.2843352432086481, "learning_rate": 7.385331070579621e-07, "loss": 0.3249, "step": 26928 }, { "epoch": 2.527120870870871, "grad_norm": 0.9663041826065989, "learning_rate": 7.382475577240089e-07, "loss": 0.3353, "step": 26929 }, { "epoch": 2.527214714714715, "grad_norm": 1.209282960799061, "learning_rate": 7.379620592034332e-07, "loss": 0.3156, "step": 26930 }, { "epoch": 2.5273085585585586, "grad_norm": 1.0031120517430527, "learning_rate": 7.37676611499637e-07, "loss": 0.2944, "step": 26931 }, { "epoch": 2.5274024024024024, "grad_norm": 1.3909470669296111, "learning_rate": 7.373912146160234e-07, "loss": 0.309, "step": 26932 }, { "epoch": 2.5274962462462462, "grad_norm": 1.1049159300685085, "learning_rate": 7.371058685559973e-07, "loss": 0.3073, "step": 26933 }, { "epoch": 2.52759009009009, "grad_norm": 1.1424944308446259, "learning_rate": 7.368205733229589e-07, "loss": 0.2956, "step": 26934 }, { "epoch": 2.527683933933934, "grad_norm": 1.1231661790106229, "learning_rate": 7.365353289203115e-07, "loss": 0.2996, "step": 26935 }, { "epoch": 2.5277777777777777, "grad_norm": 1.1329037581892931, "learning_rate": 7.362501353514545e-07, "loss": 0.275, "step": 26936 }, { "epoch": 2.5278716216216215, "grad_norm": 1.139277220500988, "learning_rate": 7.359649926197892e-07, "loss": 0.3078, "step": 26937 }, { "epoch": 2.5279654654654653, "grad_norm": 1.1115221381846105, "learning_rate": 7.356799007287141e-07, "loss": 0.3368, "step": 26938 }, { "epoch": 2.5280593093093096, "grad_norm": 1.0900666849243008, "learning_rate": 7.353948596816301e-07, "loss": 0.3581, "step": 26939 }, { "epoch": 2.528153153153153, "grad_norm": 1.3153771759339605, "learning_rate": 7.351098694819352e-07, "loss": 0.3312, "step": 26940 }, { "epoch": 2.528246996996997, "grad_norm": 1.1127849371479934, "learning_rate": 7.348249301330268e-07, "loss": 0.315, "step": 26941 }, { "epoch": 2.528340840840841, "grad_norm": 1.3287551276905696, "learning_rate": 7.345400416383031e-07, "loss": 0.313, "step": 26942 }, { "epoch": 2.528434684684685, "grad_norm": 1.2607382459035228, "learning_rate": 7.342552040011608e-07, "loss": 0.3401, "step": 26943 }, { "epoch": 2.5285285285285286, "grad_norm": 1.0568539132746584, "learning_rate": 7.339704172249951e-07, "loss": 0.3109, "step": 26944 }, { "epoch": 2.5286223723723724, "grad_norm": 1.3925637053465452, "learning_rate": 7.336856813132026e-07, "loss": 0.3479, "step": 26945 }, { "epoch": 2.5287162162162162, "grad_norm": 1.4574253632142695, "learning_rate": 7.334009962691779e-07, "loss": 0.2693, "step": 26946 }, { "epoch": 2.52881006006006, "grad_norm": 2.226438672241125, "learning_rate": 7.331163620963156e-07, "loss": 0.3186, "step": 26947 }, { "epoch": 2.528903903903904, "grad_norm": 1.089859189594778, "learning_rate": 7.328317787980094e-07, "loss": 0.2721, "step": 26948 }, { "epoch": 2.5289977477477477, "grad_norm": 1.0137358338412674, "learning_rate": 7.325472463776517e-07, "loss": 0.3026, "step": 26949 }, { "epoch": 2.5290915915915915, "grad_norm": 1.4608916362412436, "learning_rate": 7.322627648386344e-07, "loss": 0.2941, "step": 26950 }, { "epoch": 2.5291854354354353, "grad_norm": 0.9422378401049206, "learning_rate": 7.319783341843511e-07, "loss": 0.3058, "step": 26951 }, { "epoch": 2.5292792792792795, "grad_norm": 1.5701380177997795, "learning_rate": 7.316939544181928e-07, "loss": 0.3015, "step": 26952 }, { "epoch": 2.529373123123123, "grad_norm": 1.1083240440895847, "learning_rate": 7.314096255435487e-07, "loss": 0.3397, "step": 26953 }, { "epoch": 2.529466966966967, "grad_norm": 1.151870633275076, "learning_rate": 7.311253475638114e-07, "loss": 0.3178, "step": 26954 }, { "epoch": 2.529560810810811, "grad_norm": 1.2188030908606622, "learning_rate": 7.308411204823685e-07, "loss": 0.3383, "step": 26955 }, { "epoch": 2.5296546546546548, "grad_norm": 1.94929213264949, "learning_rate": 7.305569443026089e-07, "loss": 0.3056, "step": 26956 }, { "epoch": 2.5297484984984986, "grad_norm": 1.18856536301268, "learning_rate": 7.302728190279218e-07, "loss": 0.289, "step": 26957 }, { "epoch": 2.5298423423423424, "grad_norm": 1.3006725365957286, "learning_rate": 7.299887446616949e-07, "loss": 0.3452, "step": 26958 }, { "epoch": 2.529936186186186, "grad_norm": 1.2568819391483081, "learning_rate": 7.297047212073144e-07, "loss": 0.2955, "step": 26959 }, { "epoch": 2.53003003003003, "grad_norm": 1.1420489364243076, "learning_rate": 7.294207486681671e-07, "loss": 0.3277, "step": 26960 }, { "epoch": 2.530123873873874, "grad_norm": 0.9853282117619219, "learning_rate": 7.29136827047639e-07, "loss": 0.3379, "step": 26961 }, { "epoch": 2.5302177177177176, "grad_norm": 1.3783642695771783, "learning_rate": 7.288529563491142e-07, "loss": 0.2893, "step": 26962 }, { "epoch": 2.5303115615615615, "grad_norm": 1.2254877082116449, "learning_rate": 7.28569136575979e-07, "loss": 0.2999, "step": 26963 }, { "epoch": 2.5304054054054053, "grad_norm": 1.2044216025006744, "learning_rate": 7.28285367731617e-07, "loss": 0.2971, "step": 26964 }, { "epoch": 2.5304992492492495, "grad_norm": 16.123899117891636, "learning_rate": 7.280016498194104e-07, "loss": 0.2989, "step": 26965 }, { "epoch": 2.530593093093093, "grad_norm": 0.9815257634917913, "learning_rate": 7.277179828427433e-07, "loss": 0.3025, "step": 26966 }, { "epoch": 2.530686936936937, "grad_norm": 1.0109638542728914, "learning_rate": 7.274343668049983e-07, "loss": 0.2862, "step": 26967 }, { "epoch": 2.5307807807807805, "grad_norm": 1.0040282780265513, "learning_rate": 7.271508017095547e-07, "loss": 0.3083, "step": 26968 }, { "epoch": 2.5308746246246248, "grad_norm": 1.780557219007505, "learning_rate": 7.268672875597959e-07, "loss": 0.3132, "step": 26969 }, { "epoch": 2.5309684684684686, "grad_norm": 1.0754189785841504, "learning_rate": 7.265838243591017e-07, "loss": 0.2824, "step": 26970 }, { "epoch": 2.5310623123123124, "grad_norm": 1.139034499824348, "learning_rate": 7.263004121108514e-07, "loss": 0.3028, "step": 26971 }, { "epoch": 2.531156156156156, "grad_norm": 1.547882961410233, "learning_rate": 7.260170508184244e-07, "loss": 0.3004, "step": 26972 }, { "epoch": 2.53125, "grad_norm": 1.002494840378588, "learning_rate": 7.257337404851989e-07, "loss": 0.3187, "step": 26973 }, { "epoch": 2.531343843843844, "grad_norm": 1.3682350440042341, "learning_rate": 7.254504811145524e-07, "loss": 0.3235, "step": 26974 }, { "epoch": 2.5314376876876876, "grad_norm": 1.374456590308725, "learning_rate": 7.251672727098635e-07, "loss": 0.3438, "step": 26975 }, { "epoch": 2.5315315315315314, "grad_norm": 1.1275454744877207, "learning_rate": 7.248841152745085e-07, "loss": 0.3176, "step": 26976 }, { "epoch": 2.5316253753753752, "grad_norm": 1.3358252709324556, "learning_rate": 7.246010088118621e-07, "loss": 0.3225, "step": 26977 }, { "epoch": 2.5317192192192195, "grad_norm": 0.973560458510545, "learning_rate": 7.243179533253025e-07, "loss": 0.2885, "step": 26978 }, { "epoch": 2.531813063063063, "grad_norm": 1.1869122568637833, "learning_rate": 7.24034948818203e-07, "loss": 0.2785, "step": 26979 }, { "epoch": 2.531906906906907, "grad_norm": 1.2253489899085086, "learning_rate": 7.237519952939365e-07, "loss": 0.3288, "step": 26980 }, { "epoch": 2.5320007507507505, "grad_norm": 1.0076088084732602, "learning_rate": 7.234690927558802e-07, "loss": 0.3348, "step": 26981 }, { "epoch": 2.5320945945945947, "grad_norm": 1.157215269648986, "learning_rate": 7.231862412074048e-07, "loss": 0.33, "step": 26982 }, { "epoch": 2.5321884384384385, "grad_norm": 1.0998854821833735, "learning_rate": 7.229034406518831e-07, "loss": 0.3532, "step": 26983 }, { "epoch": 2.5322822822822824, "grad_norm": 0.9814654604516296, "learning_rate": 7.22620691092687e-07, "loss": 0.2604, "step": 26984 }, { "epoch": 2.532376126126126, "grad_norm": 1.2039947481855529, "learning_rate": 7.22337992533188e-07, "loss": 0.2786, "step": 26985 }, { "epoch": 2.53246996996997, "grad_norm": 1.06587102215851, "learning_rate": 7.220553449767553e-07, "loss": 0.3238, "step": 26986 }, { "epoch": 2.532563813813814, "grad_norm": 1.0378656854303148, "learning_rate": 7.217727484267617e-07, "loss": 0.3144, "step": 26987 }, { "epoch": 2.5326576576576576, "grad_norm": 1.0549190285940786, "learning_rate": 7.21490202886575e-07, "loss": 0.2957, "step": 26988 }, { "epoch": 2.5327515015015014, "grad_norm": 1.2746862015056069, "learning_rate": 7.212077083595625e-07, "loss": 0.358, "step": 26989 }, { "epoch": 2.5328453453453452, "grad_norm": 0.986946513539934, "learning_rate": 7.209252648490961e-07, "loss": 0.2879, "step": 26990 }, { "epoch": 2.532939189189189, "grad_norm": 1.1355194336124539, "learning_rate": 7.206428723585407e-07, "loss": 0.2615, "step": 26991 }, { "epoch": 2.533033033033033, "grad_norm": 1.061547541274227, "learning_rate": 7.203605308912631e-07, "loss": 0.325, "step": 26992 }, { "epoch": 2.533126876876877, "grad_norm": 1.2536448320615803, "learning_rate": 7.200782404506318e-07, "loss": 0.2886, "step": 26993 }, { "epoch": 2.5332207207207205, "grad_norm": 1.4418315196968292, "learning_rate": 7.197960010400113e-07, "loss": 0.3346, "step": 26994 }, { "epoch": 2.5333145645645647, "grad_norm": 1.0674817586888985, "learning_rate": 7.195138126627665e-07, "loss": 0.2881, "step": 26995 }, { "epoch": 2.5334084084084085, "grad_norm": 1.2367526582518193, "learning_rate": 7.192316753222627e-07, "loss": 0.2482, "step": 26996 }, { "epoch": 2.5335022522522523, "grad_norm": 1.303008077437428, "learning_rate": 7.189495890218634e-07, "loss": 0.2792, "step": 26997 }, { "epoch": 2.533596096096096, "grad_norm": 1.3188694320015997, "learning_rate": 7.186675537649307e-07, "loss": 0.3317, "step": 26998 }, { "epoch": 2.53368993993994, "grad_norm": 1.0833240046342285, "learning_rate": 7.183855695548297e-07, "loss": 0.2942, "step": 26999 }, { "epoch": 2.5337837837837838, "grad_norm": 1.064681616730449, "learning_rate": 7.181036363949217e-07, "loss": 0.3244, "step": 27000 }, { "epoch": 2.5338776276276276, "grad_norm": 1.297888462089285, "learning_rate": 7.178217542885668e-07, "loss": 0.3234, "step": 27001 }, { "epoch": 2.5339714714714714, "grad_norm": 1.1298437228900418, "learning_rate": 7.175399232391284e-07, "loss": 0.2991, "step": 27002 }, { "epoch": 2.534065315315315, "grad_norm": 1.0362165100142817, "learning_rate": 7.172581432499653e-07, "loss": 0.3038, "step": 27003 }, { "epoch": 2.534159159159159, "grad_norm": 1.1840905664090464, "learning_rate": 7.169764143244368e-07, "loss": 0.3528, "step": 27004 }, { "epoch": 2.534253003003003, "grad_norm": 1.6695063707025686, "learning_rate": 7.166947364659033e-07, "loss": 0.2767, "step": 27005 }, { "epoch": 2.534346846846847, "grad_norm": 3.5690384214173, "learning_rate": 7.164131096777221e-07, "loss": 0.3191, "step": 27006 }, { "epoch": 2.5344406906906904, "grad_norm": 1.0620658015000304, "learning_rate": 7.161315339632508e-07, "loss": 0.3379, "step": 27007 }, { "epoch": 2.5345345345345347, "grad_norm": 1.3324708525722762, "learning_rate": 7.158500093258497e-07, "loss": 0.2885, "step": 27008 }, { "epoch": 2.5346283783783785, "grad_norm": 1.2410660453627937, "learning_rate": 7.155685357688724e-07, "loss": 0.315, "step": 27009 }, { "epoch": 2.5347222222222223, "grad_norm": 1.1748174345756153, "learning_rate": 7.15287113295674e-07, "loss": 0.3626, "step": 27010 }, { "epoch": 2.534816066066066, "grad_norm": 1.0804149322592813, "learning_rate": 7.150057419096129e-07, "loss": 0.3012, "step": 27011 }, { "epoch": 2.53490990990991, "grad_norm": 1.0423925834268155, "learning_rate": 7.147244216140431e-07, "loss": 0.3248, "step": 27012 }, { "epoch": 2.5350037537537538, "grad_norm": 1.0592886758967053, "learning_rate": 7.144431524123168e-07, "loss": 0.3122, "step": 27013 }, { "epoch": 2.5350975975975976, "grad_norm": 0.9768328382255473, "learning_rate": 7.141619343077904e-07, "loss": 0.3007, "step": 27014 }, { "epoch": 2.5351914414414414, "grad_norm": 1.0583729603143393, "learning_rate": 7.138807673038156e-07, "loss": 0.3634, "step": 27015 }, { "epoch": 2.535285285285285, "grad_norm": 1.1108508331848113, "learning_rate": 7.135996514037436e-07, "loss": 0.3167, "step": 27016 }, { "epoch": 2.535379129129129, "grad_norm": 1.0714585409338875, "learning_rate": 7.133185866109288e-07, "loss": 0.3056, "step": 27017 }, { "epoch": 2.535472972972973, "grad_norm": 1.2492679762853978, "learning_rate": 7.130375729287209e-07, "loss": 0.2871, "step": 27018 }, { "epoch": 2.535566816816817, "grad_norm": 1.1867963864880446, "learning_rate": 7.127566103604694e-07, "loss": 0.2891, "step": 27019 }, { "epoch": 2.5356606606606604, "grad_norm": 0.9518570977929234, "learning_rate": 7.124756989095266e-07, "loss": 0.319, "step": 27020 }, { "epoch": 2.5357545045045047, "grad_norm": 1.281677376978671, "learning_rate": 7.12194838579241e-07, "loss": 0.3383, "step": 27021 }, { "epoch": 2.5358483483483485, "grad_norm": 1.3108968442365028, "learning_rate": 7.11914029372961e-07, "loss": 0.3021, "step": 27022 }, { "epoch": 2.5359421921921923, "grad_norm": 1.3020094991499231, "learning_rate": 7.116332712940338e-07, "loss": 0.298, "step": 27023 }, { "epoch": 2.536036036036036, "grad_norm": 1.0831533793941783, "learning_rate": 7.113525643458086e-07, "loss": 0.3188, "step": 27024 }, { "epoch": 2.53612987987988, "grad_norm": 1.1149606904294547, "learning_rate": 7.110719085316303e-07, "loss": 0.2831, "step": 27025 }, { "epoch": 2.5362237237237237, "grad_norm": 1.1210228199257544, "learning_rate": 7.107913038548475e-07, "loss": 0.3197, "step": 27026 }, { "epoch": 2.5363175675675675, "grad_norm": 1.176039491683999, "learning_rate": 7.105107503188047e-07, "loss": 0.2922, "step": 27027 }, { "epoch": 2.5364114114114114, "grad_norm": 1.0982321516187157, "learning_rate": 7.102302479268458e-07, "loss": 0.3586, "step": 27028 }, { "epoch": 2.536505255255255, "grad_norm": 1.3669973666342132, "learning_rate": 7.099497966823183e-07, "loss": 0.3266, "step": 27029 }, { "epoch": 2.536599099099099, "grad_norm": 1.1539248708410177, "learning_rate": 7.096693965885637e-07, "loss": 0.2773, "step": 27030 }, { "epoch": 2.536692942942943, "grad_norm": 1.1055590384066376, "learning_rate": 7.093890476489246e-07, "loss": 0.3168, "step": 27031 }, { "epoch": 2.536786786786787, "grad_norm": 1.1143478143478174, "learning_rate": 7.091087498667465e-07, "loss": 0.3049, "step": 27032 }, { "epoch": 2.5368806306306304, "grad_norm": 1.1561355592769338, "learning_rate": 7.08828503245369e-07, "loss": 0.3421, "step": 27033 }, { "epoch": 2.5369744744744747, "grad_norm": 1.0775074654439039, "learning_rate": 7.08548307788135e-07, "loss": 0.3131, "step": 27034 }, { "epoch": 2.5370683183183185, "grad_norm": 1.117237243063089, "learning_rate": 7.082681634983845e-07, "loss": 0.3403, "step": 27035 }, { "epoch": 2.5371621621621623, "grad_norm": 1.1067249650798532, "learning_rate": 7.079880703794578e-07, "loss": 0.3116, "step": 27036 }, { "epoch": 2.537256006006006, "grad_norm": 1.3762646929676454, "learning_rate": 7.077080284346932e-07, "loss": 0.2884, "step": 27037 }, { "epoch": 2.53734984984985, "grad_norm": 1.066506197795923, "learning_rate": 7.07428037667432e-07, "loss": 0.2665, "step": 27038 }, { "epoch": 2.5374436936936937, "grad_norm": 1.2589698357583112, "learning_rate": 7.071480980810119e-07, "loss": 0.3067, "step": 27039 }, { "epoch": 2.5375375375375375, "grad_norm": 1.0798962237870076, "learning_rate": 7.068682096787688e-07, "loss": 0.2998, "step": 27040 }, { "epoch": 2.5376313813813813, "grad_norm": 1.2511075668138727, "learning_rate": 7.065883724640427e-07, "loss": 0.2698, "step": 27041 }, { "epoch": 2.537725225225225, "grad_norm": 1.0955629398081503, "learning_rate": 7.063085864401681e-07, "loss": 0.3175, "step": 27042 }, { "epoch": 2.537819069069069, "grad_norm": 0.9641492288854081, "learning_rate": 7.060288516104813e-07, "loss": 0.2893, "step": 27043 }, { "epoch": 2.5379129129129128, "grad_norm": 1.5716551169667388, "learning_rate": 7.057491679783185e-07, "loss": 0.3273, "step": 27044 }, { "epoch": 2.538006756756757, "grad_norm": 1.2015702862985425, "learning_rate": 7.054695355470136e-07, "loss": 0.3257, "step": 27045 }, { "epoch": 2.5381006006006004, "grad_norm": 1.0706384734438872, "learning_rate": 7.051899543199015e-07, "loss": 0.334, "step": 27046 }, { "epoch": 2.5381944444444446, "grad_norm": 1.132028788358314, "learning_rate": 7.049104243003146e-07, "loss": 0.3235, "step": 27047 }, { "epoch": 2.5382882882882885, "grad_norm": 1.2286869018230882, "learning_rate": 7.046309454915862e-07, "loss": 0.3106, "step": 27048 }, { "epoch": 2.5383821321321323, "grad_norm": 1.1750050190247903, "learning_rate": 7.043515178970478e-07, "loss": 0.3056, "step": 27049 }, { "epoch": 2.538475975975976, "grad_norm": 1.2116570303240253, "learning_rate": 7.040721415200324e-07, "loss": 0.2813, "step": 27050 }, { "epoch": 2.53856981981982, "grad_norm": 1.11325775601858, "learning_rate": 7.037928163638708e-07, "loss": 0.3181, "step": 27051 }, { "epoch": 2.5386636636636637, "grad_norm": 1.1041670655938058, "learning_rate": 7.035135424318917e-07, "loss": 0.2831, "step": 27052 }, { "epoch": 2.5387575075075075, "grad_norm": 1.148203225969148, "learning_rate": 7.032343197274277e-07, "loss": 0.3015, "step": 27053 }, { "epoch": 2.5388513513513513, "grad_norm": 1.2009052922599799, "learning_rate": 7.029551482538067e-07, "loss": 0.3221, "step": 27054 }, { "epoch": 2.538945195195195, "grad_norm": 1.257411828080279, "learning_rate": 7.026760280143563e-07, "loss": 0.3262, "step": 27055 }, { "epoch": 2.539039039039039, "grad_norm": 1.1741173456804155, "learning_rate": 7.023969590124058e-07, "loss": 0.3168, "step": 27056 }, { "epoch": 2.5391328828828827, "grad_norm": 1.1307063130490058, "learning_rate": 7.021179412512824e-07, "loss": 0.3232, "step": 27057 }, { "epoch": 2.539226726726727, "grad_norm": 1.1214750827909796, "learning_rate": 7.018389747343129e-07, "loss": 0.2858, "step": 27058 }, { "epoch": 2.5393205705705704, "grad_norm": 1.5497427647956905, "learning_rate": 7.01560059464823e-07, "loss": 0.32, "step": 27059 }, { "epoch": 2.5394144144144146, "grad_norm": 1.088219149678628, "learning_rate": 7.012811954461385e-07, "loss": 0.353, "step": 27060 }, { "epoch": 2.539508258258258, "grad_norm": 1.3480330614085776, "learning_rate": 7.010023826815832e-07, "loss": 0.3015, "step": 27061 }, { "epoch": 2.5396021021021022, "grad_norm": 1.1592512125619643, "learning_rate": 7.007236211744833e-07, "loss": 0.284, "step": 27062 }, { "epoch": 2.539695945945946, "grad_norm": 0.9132303658899666, "learning_rate": 7.004449109281614e-07, "loss": 0.3297, "step": 27063 }, { "epoch": 2.53978978978979, "grad_norm": 1.245909179688575, "learning_rate": 7.001662519459401e-07, "loss": 0.3208, "step": 27064 }, { "epoch": 2.5398836336336337, "grad_norm": 1.0591182517732736, "learning_rate": 6.998876442311442e-07, "loss": 0.3237, "step": 27065 }, { "epoch": 2.5399774774774775, "grad_norm": 1.1334355459738703, "learning_rate": 6.996090877870932e-07, "loss": 0.3293, "step": 27066 }, { "epoch": 2.5400713213213213, "grad_norm": 1.2658002539541924, "learning_rate": 6.993305826171087e-07, "loss": 0.3081, "step": 27067 }, { "epoch": 2.540165165165165, "grad_norm": 1.5134980599897232, "learning_rate": 6.990521287245128e-07, "loss": 0.3046, "step": 27068 }, { "epoch": 2.540259009009009, "grad_norm": 1.0623334153269404, "learning_rate": 6.987737261126243e-07, "loss": 0.2961, "step": 27069 }, { "epoch": 2.5403528528528527, "grad_norm": 1.1562249687370003, "learning_rate": 6.984953747847628e-07, "loss": 0.3241, "step": 27070 }, { "epoch": 2.5404466966966965, "grad_norm": 1.2144855099509293, "learning_rate": 6.982170747442473e-07, "loss": 0.3239, "step": 27071 }, { "epoch": 2.5405405405405403, "grad_norm": 1.1774202718855895, "learning_rate": 6.979388259943964e-07, "loss": 0.296, "step": 27072 }, { "epoch": 2.5406343843843846, "grad_norm": 1.3366202768314457, "learning_rate": 6.976606285385257e-07, "loss": 0.2785, "step": 27073 }, { "epoch": 2.540728228228228, "grad_norm": 1.2286234071175424, "learning_rate": 6.973824823799547e-07, "loss": 0.2786, "step": 27074 }, { "epoch": 2.5408220720720722, "grad_norm": 1.1194172500921982, "learning_rate": 6.971043875219991e-07, "loss": 0.3122, "step": 27075 }, { "epoch": 2.540915915915916, "grad_norm": 1.074292645752182, "learning_rate": 6.968263439679728e-07, "loss": 0.3041, "step": 27076 }, { "epoch": 2.54100975975976, "grad_norm": 0.9995963750716822, "learning_rate": 6.965483517211935e-07, "loss": 0.3082, "step": 27077 }, { "epoch": 2.5411036036036037, "grad_norm": 1.0843206089436284, "learning_rate": 6.962704107849749e-07, "loss": 0.3398, "step": 27078 }, { "epoch": 2.5411974474474475, "grad_norm": 1.0299010515341185, "learning_rate": 6.959925211626301e-07, "loss": 0.3093, "step": 27079 }, { "epoch": 2.5412912912912913, "grad_norm": 1.1996876999051127, "learning_rate": 6.957146828574734e-07, "loss": 0.3182, "step": 27080 }, { "epoch": 2.541385135135135, "grad_norm": 1.0751902616578661, "learning_rate": 6.954368958728175e-07, "loss": 0.328, "step": 27081 }, { "epoch": 2.541478978978979, "grad_norm": 1.1149968209104117, "learning_rate": 6.951591602119739e-07, "loss": 0.3148, "step": 27082 }, { "epoch": 2.5415728228228227, "grad_norm": 1.2686396666882125, "learning_rate": 6.948814758782546e-07, "loss": 0.3246, "step": 27083 }, { "epoch": 2.5416666666666665, "grad_norm": 0.904186111248788, "learning_rate": 6.946038428749696e-07, "loss": 0.3054, "step": 27084 }, { "epoch": 2.5417605105105103, "grad_norm": 1.0878337871543167, "learning_rate": 6.943262612054291e-07, "loss": 0.3444, "step": 27085 }, { "epoch": 2.5418543543543546, "grad_norm": 1.0551031601822012, "learning_rate": 6.940487308729443e-07, "loss": 0.3253, "step": 27086 }, { "epoch": 2.541948198198198, "grad_norm": 1.021727679279667, "learning_rate": 6.937712518808231e-07, "loss": 0.3298, "step": 27087 }, { "epoch": 2.542042042042042, "grad_norm": 1.1463255981574185, "learning_rate": 6.93493824232373e-07, "loss": 0.2897, "step": 27088 }, { "epoch": 2.542135885885886, "grad_norm": 1.341992126327147, "learning_rate": 6.932164479309045e-07, "loss": 0.3342, "step": 27089 }, { "epoch": 2.54222972972973, "grad_norm": 1.1492125383713494, "learning_rate": 6.929391229797228e-07, "loss": 0.3402, "step": 27090 }, { "epoch": 2.5423235735735736, "grad_norm": 1.0681115877538938, "learning_rate": 6.926618493821336e-07, "loss": 0.3096, "step": 27091 }, { "epoch": 2.5424174174174174, "grad_norm": 1.2168248554401713, "learning_rate": 6.923846271414453e-07, "loss": 0.3287, "step": 27092 }, { "epoch": 2.5425112612612613, "grad_norm": 1.5074506738355173, "learning_rate": 6.92107456260962e-07, "loss": 0.3361, "step": 27093 }, { "epoch": 2.542605105105105, "grad_norm": 1.1773493971380125, "learning_rate": 6.918303367439888e-07, "loss": 0.2907, "step": 27094 }, { "epoch": 2.542698948948949, "grad_norm": 0.978434299303415, "learning_rate": 6.915532685938292e-07, "loss": 0.3203, "step": 27095 }, { "epoch": 2.5427927927927927, "grad_norm": 1.4761372315986334, "learning_rate": 6.912762518137872e-07, "loss": 0.3303, "step": 27096 }, { "epoch": 2.5428866366366365, "grad_norm": 1.239681224107646, "learning_rate": 6.909992864071646e-07, "loss": 0.3083, "step": 27097 }, { "epoch": 2.5429804804804803, "grad_norm": 1.1128065481945026, "learning_rate": 6.907223723772655e-07, "loss": 0.3316, "step": 27098 }, { "epoch": 2.5430743243243246, "grad_norm": 1.0938337503772262, "learning_rate": 6.90445509727391e-07, "loss": 0.3173, "step": 27099 }, { "epoch": 2.543168168168168, "grad_norm": 1.0119621724795387, "learning_rate": 6.901686984608403e-07, "loss": 0.3042, "step": 27100 }, { "epoch": 2.543262012012012, "grad_norm": 1.634221707453168, "learning_rate": 6.898919385809171e-07, "loss": 0.2729, "step": 27101 }, { "epoch": 2.543355855855856, "grad_norm": 1.2369636767647634, "learning_rate": 6.896152300909193e-07, "loss": 0.3059, "step": 27102 }, { "epoch": 2.5434496996997, "grad_norm": 1.116828663616499, "learning_rate": 6.893385729941454e-07, "loss": 0.3167, "step": 27103 }, { "epoch": 2.5435435435435436, "grad_norm": 1.1072525506157713, "learning_rate": 6.890619672938964e-07, "loss": 0.2967, "step": 27104 }, { "epoch": 2.5436373873873874, "grad_norm": 1.1553107927566826, "learning_rate": 6.887854129934684e-07, "loss": 0.3247, "step": 27105 }, { "epoch": 2.5437312312312312, "grad_norm": 1.2565057523611878, "learning_rate": 6.885089100961595e-07, "loss": 0.3349, "step": 27106 }, { "epoch": 2.543825075075075, "grad_norm": 1.1319614773278486, "learning_rate": 6.882324586052663e-07, "loss": 0.2989, "step": 27107 }, { "epoch": 2.543918918918919, "grad_norm": 1.173054786556419, "learning_rate": 6.879560585240852e-07, "loss": 0.2854, "step": 27108 }, { "epoch": 2.5440127627627627, "grad_norm": 1.1016739992545976, "learning_rate": 6.876797098559101e-07, "loss": 0.3175, "step": 27109 }, { "epoch": 2.5441066066066065, "grad_norm": 1.119389656271312, "learning_rate": 6.874034126040385e-07, "loss": 0.2882, "step": 27110 }, { "epoch": 2.5442004504504503, "grad_norm": 1.1135158015561166, "learning_rate": 6.871271667717638e-07, "loss": 0.3078, "step": 27111 }, { "epoch": 2.5442942942942945, "grad_norm": 1.0849645213442478, "learning_rate": 6.868509723623784e-07, "loss": 0.2754, "step": 27112 }, { "epoch": 2.544388138138138, "grad_norm": 1.0328644048735203, "learning_rate": 6.865748293791775e-07, "loss": 0.2597, "step": 27113 }, { "epoch": 2.544481981981982, "grad_norm": 1.1419572681366408, "learning_rate": 6.862987378254526e-07, "loss": 0.2624, "step": 27114 }, { "epoch": 2.544575825825826, "grad_norm": 1.237255190508612, "learning_rate": 6.860226977044943e-07, "loss": 0.3366, "step": 27115 }, { "epoch": 2.54466966966967, "grad_norm": 1.4579662098272579, "learning_rate": 6.857467090195963e-07, "loss": 0.2569, "step": 27116 }, { "epoch": 2.5447635135135136, "grad_norm": 1.0683291326350095, "learning_rate": 6.854707717740477e-07, "loss": 0.2833, "step": 27117 }, { "epoch": 2.5448573573573574, "grad_norm": 1.3194299742451085, "learning_rate": 6.851948859711394e-07, "loss": 0.2791, "step": 27118 }, { "epoch": 2.544951201201201, "grad_norm": 1.0381742102242548, "learning_rate": 6.849190516141602e-07, "loss": 0.33, "step": 27119 }, { "epoch": 2.545045045045045, "grad_norm": 0.9709050875097303, "learning_rate": 6.846432687063986e-07, "loss": 0.3081, "step": 27120 }, { "epoch": 2.545138888888889, "grad_norm": 1.0643609753037426, "learning_rate": 6.843675372511427e-07, "loss": 0.3871, "step": 27121 }, { "epoch": 2.5452327327327327, "grad_norm": 0.997605804255727, "learning_rate": 6.840918572516814e-07, "loss": 0.3032, "step": 27122 }, { "epoch": 2.5453265765765765, "grad_norm": 1.785848341170832, "learning_rate": 6.838162287113009e-07, "loss": 0.3209, "step": 27123 }, { "epoch": 2.5454204204204203, "grad_norm": 1.1510282221631423, "learning_rate": 6.83540651633286e-07, "loss": 0.3065, "step": 27124 }, { "epoch": 2.5455142642642645, "grad_norm": 1.152060599836721, "learning_rate": 6.832651260209255e-07, "loss": 0.3209, "step": 27125 }, { "epoch": 2.545608108108108, "grad_norm": 1.0854694899995883, "learning_rate": 6.829896518775025e-07, "loss": 0.3153, "step": 27126 }, { "epoch": 2.545701951951952, "grad_norm": 1.2986210233541469, "learning_rate": 6.827142292063016e-07, "loss": 0.2782, "step": 27127 }, { "epoch": 2.545795795795796, "grad_norm": 1.467137224578542, "learning_rate": 6.824388580106079e-07, "loss": 0.2752, "step": 27128 }, { "epoch": 2.5458896396396398, "grad_norm": 1.0116992342970224, "learning_rate": 6.821635382937037e-07, "loss": 0.3183, "step": 27129 }, { "epoch": 2.5459834834834836, "grad_norm": 1.072930148092884, "learning_rate": 6.818882700588703e-07, "loss": 0.3168, "step": 27130 }, { "epoch": 2.5460773273273274, "grad_norm": 1.4506758144515455, "learning_rate": 6.81613053309394e-07, "loss": 0.3284, "step": 27131 }, { "epoch": 2.546171171171171, "grad_norm": 1.3100626288287018, "learning_rate": 6.813378880485522e-07, "loss": 0.3047, "step": 27132 }, { "epoch": 2.546265015015015, "grad_norm": 1.0095245279076228, "learning_rate": 6.810627742796261e-07, "loss": 0.2864, "step": 27133 }, { "epoch": 2.546358858858859, "grad_norm": 1.0217775296313094, "learning_rate": 6.807877120058976e-07, "loss": 0.3089, "step": 27134 }, { "epoch": 2.5464527027027026, "grad_norm": 1.1093953502061578, "learning_rate": 6.805127012306461e-07, "loss": 0.2659, "step": 27135 }, { "epoch": 2.5465465465465464, "grad_norm": 1.0218854785655807, "learning_rate": 6.802377419571481e-07, "loss": 0.3188, "step": 27136 }, { "epoch": 2.5466403903903903, "grad_norm": 0.9859480668448507, "learning_rate": 6.79962834188686e-07, "loss": 0.2158, "step": 27137 }, { "epoch": 2.5467342342342345, "grad_norm": 1.3448531279592126, "learning_rate": 6.796879779285348e-07, "loss": 0.2781, "step": 27138 }, { "epoch": 2.546828078078078, "grad_norm": 1.0241089756188604, "learning_rate": 6.794131731799714e-07, "loss": 0.3505, "step": 27139 }, { "epoch": 2.546921921921922, "grad_norm": 1.12128147517795, "learning_rate": 6.791384199462747e-07, "loss": 0.2985, "step": 27140 }, { "epoch": 2.5470157657657655, "grad_norm": 1.0737186652661825, "learning_rate": 6.788637182307189e-07, "loss": 0.3158, "step": 27141 }, { "epoch": 2.5471096096096097, "grad_norm": 1.07462692221711, "learning_rate": 6.785890680365781e-07, "loss": 0.2996, "step": 27142 }, { "epoch": 2.5472034534534536, "grad_norm": 1.1541669426603545, "learning_rate": 6.783144693671301e-07, "loss": 0.3028, "step": 27143 }, { "epoch": 2.5472972972972974, "grad_norm": 1.1430489286012109, "learning_rate": 6.780399222256467e-07, "loss": 0.2636, "step": 27144 }, { "epoch": 2.547391141141141, "grad_norm": 1.1649373282643658, "learning_rate": 6.777654266154027e-07, "loss": 0.3468, "step": 27145 }, { "epoch": 2.547484984984985, "grad_norm": 1.3106557648322923, "learning_rate": 6.774909825396698e-07, "loss": 0.3167, "step": 27146 }, { "epoch": 2.547578828828829, "grad_norm": 1.2870342251367066, "learning_rate": 6.772165900017208e-07, "loss": 0.2963, "step": 27147 }, { "epoch": 2.5476726726726726, "grad_norm": 1.1551680490559486, "learning_rate": 6.769422490048255e-07, "loss": 0.2929, "step": 27148 }, { "epoch": 2.5477665165165164, "grad_norm": 1.136332336128608, "learning_rate": 6.766679595522585e-07, "loss": 0.3268, "step": 27149 }, { "epoch": 2.5478603603603602, "grad_norm": 1.084626927697819, "learning_rate": 6.763937216472876e-07, "loss": 0.3202, "step": 27150 }, { "epoch": 2.547954204204204, "grad_norm": 1.0033617408829438, "learning_rate": 6.761195352931827e-07, "loss": 0.2968, "step": 27151 }, { "epoch": 2.548048048048048, "grad_norm": 1.7850700173883183, "learning_rate": 6.758454004932142e-07, "loss": 0.2927, "step": 27152 }, { "epoch": 2.548141891891892, "grad_norm": 1.0472898492087859, "learning_rate": 6.7557131725065e-07, "loss": 0.3267, "step": 27153 }, { "epoch": 2.5482357357357355, "grad_norm": 1.5080629969929624, "learning_rate": 6.752972855687567e-07, "loss": 0.3093, "step": 27154 }, { "epoch": 2.5483295795795797, "grad_norm": 1.047919871523142, "learning_rate": 6.750233054508038e-07, "loss": 0.2711, "step": 27155 }, { "epoch": 2.5484234234234235, "grad_norm": 1.4404218529930815, "learning_rate": 6.747493769000574e-07, "loss": 0.3108, "step": 27156 }, { "epoch": 2.5485172672672673, "grad_norm": 1.235560430872758, "learning_rate": 6.744754999197834e-07, "loss": 0.2835, "step": 27157 }, { "epoch": 2.548611111111111, "grad_norm": 1.0202675225860443, "learning_rate": 6.742016745132468e-07, "loss": 0.3183, "step": 27158 }, { "epoch": 2.548704954954955, "grad_norm": 0.9753291717055725, "learning_rate": 6.739279006837129e-07, "loss": 0.2702, "step": 27159 }, { "epoch": 2.548798798798799, "grad_norm": 1.2075302971380846, "learning_rate": 6.736541784344442e-07, "loss": 0.3242, "step": 27160 }, { "epoch": 2.5488926426426426, "grad_norm": 1.06160756957121, "learning_rate": 6.733805077687078e-07, "loss": 0.2843, "step": 27161 }, { "epoch": 2.5489864864864864, "grad_norm": 1.2369650013217828, "learning_rate": 6.731068886897641e-07, "loss": 0.3119, "step": 27162 }, { "epoch": 2.54908033033033, "grad_norm": 1.1504380943052992, "learning_rate": 6.728333212008753e-07, "loss": 0.2966, "step": 27163 }, { "epoch": 2.549174174174174, "grad_norm": 1.1897494842591205, "learning_rate": 6.725598053053056e-07, "loss": 0.3195, "step": 27164 }, { "epoch": 2.549268018018018, "grad_norm": 1.3362331775892997, "learning_rate": 6.722863410063146e-07, "loss": 0.3164, "step": 27165 }, { "epoch": 2.549361861861862, "grad_norm": 0.9066643648545968, "learning_rate": 6.720129283071613e-07, "loss": 0.2775, "step": 27166 }, { "epoch": 2.5494557057057055, "grad_norm": 1.3001215273960667, "learning_rate": 6.717395672111088e-07, "loss": 0.3431, "step": 27167 }, { "epoch": 2.5495495495495497, "grad_norm": 1.017889450058921, "learning_rate": 6.714662577214148e-07, "loss": 0.3125, "step": 27168 }, { "epoch": 2.5496433933933935, "grad_norm": 1.2736345901118393, "learning_rate": 6.711929998413374e-07, "loss": 0.3102, "step": 27169 }, { "epoch": 2.5497372372372373, "grad_norm": 1.0938929420197727, "learning_rate": 6.709197935741363e-07, "loss": 0.2815, "step": 27170 }, { "epoch": 2.549831081081081, "grad_norm": 1.2668249407641694, "learning_rate": 6.706466389230676e-07, "loss": 0.3168, "step": 27171 }, { "epoch": 2.549924924924925, "grad_norm": 1.1704851482049121, "learning_rate": 6.703735358913871e-07, "loss": 0.3157, "step": 27172 }, { "epoch": 2.5500187687687688, "grad_norm": 1.2611613346932, "learning_rate": 6.701004844823538e-07, "loss": 0.2956, "step": 27173 }, { "epoch": 2.5501126126126126, "grad_norm": 1.170305891236884, "learning_rate": 6.698274846992225e-07, "loss": 0.3419, "step": 27174 }, { "epoch": 2.5502064564564564, "grad_norm": 1.602415804790908, "learning_rate": 6.695545365452461e-07, "loss": 0.2798, "step": 27175 }, { "epoch": 2.5503003003003, "grad_norm": 0.9596865441041241, "learning_rate": 6.692816400236818e-07, "loss": 0.2865, "step": 27176 }, { "epoch": 2.550394144144144, "grad_norm": 1.0750878308701848, "learning_rate": 6.690087951377827e-07, "loss": 0.27, "step": 27177 }, { "epoch": 2.550487987987988, "grad_norm": 1.1804413425981355, "learning_rate": 6.687360018907996e-07, "loss": 0.3071, "step": 27178 }, { "epoch": 2.550581831831832, "grad_norm": 1.170780490490424, "learning_rate": 6.684632602859887e-07, "loss": 0.282, "step": 27179 }, { "epoch": 2.5506756756756754, "grad_norm": 1.0194775308997044, "learning_rate": 6.681905703265995e-07, "loss": 0.2951, "step": 27180 }, { "epoch": 2.5507695195195197, "grad_norm": 1.2962448390564763, "learning_rate": 6.679179320158847e-07, "loss": 0.3276, "step": 27181 }, { "epoch": 2.5508633633633635, "grad_norm": 1.113281634949305, "learning_rate": 6.676453453570936e-07, "loss": 0.355, "step": 27182 }, { "epoch": 2.5509572072072073, "grad_norm": 1.1840517261433223, "learning_rate": 6.673728103534771e-07, "loss": 0.2882, "step": 27183 }, { "epoch": 2.551051051051051, "grad_norm": 1.0831679918995722, "learning_rate": 6.671003270082837e-07, "loss": 0.3439, "step": 27184 }, { "epoch": 2.551144894894895, "grad_norm": 1.2278294346791951, "learning_rate": 6.668278953247636e-07, "loss": 0.295, "step": 27185 }, { "epoch": 2.5512387387387387, "grad_norm": 1.1624788660994885, "learning_rate": 6.665555153061648e-07, "loss": 0.2893, "step": 27186 }, { "epoch": 2.5513325825825826, "grad_norm": 1.1597695139515447, "learning_rate": 6.662831869557335e-07, "loss": 0.2923, "step": 27187 }, { "epoch": 2.5514264264264264, "grad_norm": 1.236735753762138, "learning_rate": 6.660109102767192e-07, "loss": 0.3051, "step": 27188 }, { "epoch": 2.55152027027027, "grad_norm": 1.0522999877415027, "learning_rate": 6.657386852723668e-07, "loss": 0.3276, "step": 27189 }, { "epoch": 2.551614114114114, "grad_norm": 1.3096834963531065, "learning_rate": 6.654665119459209e-07, "loss": 0.3384, "step": 27190 }, { "epoch": 2.551707957957958, "grad_norm": 1.08996832115753, "learning_rate": 6.651943903006286e-07, "loss": 0.3238, "step": 27191 }, { "epoch": 2.551801801801802, "grad_norm": 2.015363608186389, "learning_rate": 6.649223203397343e-07, "loss": 0.3216, "step": 27192 }, { "epoch": 2.5518956456456454, "grad_norm": 1.2064339551644097, "learning_rate": 6.646503020664813e-07, "loss": 0.2951, "step": 27193 }, { "epoch": 2.5519894894894897, "grad_norm": 1.1446545656524258, "learning_rate": 6.643783354841132e-07, "loss": 0.3078, "step": 27194 }, { "epoch": 2.5520833333333335, "grad_norm": 1.0483220568791318, "learning_rate": 6.641064205958719e-07, "loss": 0.3133, "step": 27195 }, { "epoch": 2.5521771771771773, "grad_norm": 1.1750049525929203, "learning_rate": 6.638345574049998e-07, "loss": 0.2807, "step": 27196 }, { "epoch": 2.552271021021021, "grad_norm": 1.0702414056437408, "learning_rate": 6.635627459147392e-07, "loss": 0.2983, "step": 27197 }, { "epoch": 2.552364864864865, "grad_norm": 1.2050792897186782, "learning_rate": 6.632909861283305e-07, "loss": 0.32, "step": 27198 }, { "epoch": 2.5524587087087087, "grad_norm": 1.1788817980307917, "learning_rate": 6.630192780490125e-07, "loss": 0.3326, "step": 27199 }, { "epoch": 2.5525525525525525, "grad_norm": 1.235977817618701, "learning_rate": 6.627476216800271e-07, "loss": 0.2735, "step": 27200 }, { "epoch": 2.5526463963963963, "grad_norm": 1.0865611923070317, "learning_rate": 6.624760170246125e-07, "loss": 0.3001, "step": 27201 }, { "epoch": 2.55274024024024, "grad_norm": 1.671518145714334, "learning_rate": 6.622044640860054e-07, "loss": 0.29, "step": 27202 }, { "epoch": 2.552834084084084, "grad_norm": 1.2024499295715403, "learning_rate": 6.619329628674464e-07, "loss": 0.301, "step": 27203 }, { "epoch": 2.5529279279279278, "grad_norm": 1.0883735448003498, "learning_rate": 6.61661513372171e-07, "loss": 0.3092, "step": 27204 }, { "epoch": 2.553021771771772, "grad_norm": 2.648755849372813, "learning_rate": 6.613901156034164e-07, "loss": 0.2721, "step": 27205 }, { "epoch": 2.5531156156156154, "grad_norm": 1.2038645026097863, "learning_rate": 6.611187695644178e-07, "loss": 0.3113, "step": 27206 }, { "epoch": 2.5532094594594597, "grad_norm": 1.1868724329172895, "learning_rate": 6.60847475258411e-07, "loss": 0.3123, "step": 27207 }, { "epoch": 2.5533033033033035, "grad_norm": 1.4333487600778332, "learning_rate": 6.605762326886289e-07, "loss": 0.2824, "step": 27208 }, { "epoch": 2.5533971471471473, "grad_norm": 1.0862201160847065, "learning_rate": 6.603050418583085e-07, "loss": 0.2999, "step": 27209 }, { "epoch": 2.553490990990991, "grad_norm": 1.0710754365138102, "learning_rate": 6.60033902770682e-07, "loss": 0.2848, "step": 27210 }, { "epoch": 2.553584834834835, "grad_norm": 1.0701794115921877, "learning_rate": 6.597628154289809e-07, "loss": 0.326, "step": 27211 }, { "epoch": 2.5536786786786787, "grad_norm": 1.037962942389748, "learning_rate": 6.594917798364392e-07, "loss": 0.3051, "step": 27212 }, { "epoch": 2.5537725225225225, "grad_norm": 1.0814391188731431, "learning_rate": 6.592207959962882e-07, "loss": 0.2497, "step": 27213 }, { "epoch": 2.5538663663663663, "grad_norm": 1.0465754215601768, "learning_rate": 6.589498639117575e-07, "loss": 0.2776, "step": 27214 }, { "epoch": 2.55396021021021, "grad_norm": 1.3346850138200208, "learning_rate": 6.586789835860796e-07, "loss": 0.2956, "step": 27215 }, { "epoch": 2.554054054054054, "grad_norm": 1.0695609299598536, "learning_rate": 6.58408155022483e-07, "loss": 0.2901, "step": 27216 }, { "epoch": 2.5541478978978978, "grad_norm": 1.7513474443448767, "learning_rate": 6.581373782241968e-07, "loss": 0.3157, "step": 27217 }, { "epoch": 2.554241741741742, "grad_norm": 1.2579626359261553, "learning_rate": 6.5786665319445e-07, "loss": 0.3057, "step": 27218 }, { "epoch": 2.5543355855855854, "grad_norm": 1.003840759405607, "learning_rate": 6.575959799364701e-07, "loss": 0.2875, "step": 27219 }, { "epoch": 2.5544294294294296, "grad_norm": 1.6300002269798104, "learning_rate": 6.573253584534828e-07, "loss": 0.3156, "step": 27220 }, { "epoch": 2.554523273273273, "grad_norm": 1.1646582332543856, "learning_rate": 6.570547887487183e-07, "loss": 0.3474, "step": 27221 }, { "epoch": 2.5546171171171173, "grad_norm": 0.9945183808733714, "learning_rate": 6.567842708254002e-07, "loss": 0.3295, "step": 27222 }, { "epoch": 2.554710960960961, "grad_norm": 0.9149512519885893, "learning_rate": 6.565138046867531e-07, "loss": 0.2998, "step": 27223 }, { "epoch": 2.554804804804805, "grad_norm": 1.047398701379021, "learning_rate": 6.562433903360043e-07, "loss": 0.3087, "step": 27224 }, { "epoch": 2.5548986486486487, "grad_norm": 1.2630909766082907, "learning_rate": 6.559730277763771e-07, "loss": 0.2965, "step": 27225 }, { "epoch": 2.5549924924924925, "grad_norm": 1.2961882231765063, "learning_rate": 6.557027170110935e-07, "loss": 0.3128, "step": 27226 }, { "epoch": 2.5550863363363363, "grad_norm": 1.0392059157335687, "learning_rate": 6.554324580433785e-07, "loss": 0.2987, "step": 27227 }, { "epoch": 2.55518018018018, "grad_norm": 1.5858535789828987, "learning_rate": 6.551622508764544e-07, "loss": 0.2793, "step": 27228 }, { "epoch": 2.555274024024024, "grad_norm": 1.5474643467594462, "learning_rate": 6.548920955135412e-07, "loss": 0.3342, "step": 27229 }, { "epoch": 2.5553678678678677, "grad_norm": 1.120799612808725, "learning_rate": 6.546219919578617e-07, "loss": 0.2983, "step": 27230 }, { "epoch": 2.5554617117117115, "grad_norm": 1.449338632719687, "learning_rate": 6.543519402126352e-07, "loss": 0.3009, "step": 27231 }, { "epoch": 2.5555555555555554, "grad_norm": 1.8339073033021902, "learning_rate": 6.54081940281081e-07, "loss": 0.3074, "step": 27232 }, { "epoch": 2.5556493993993996, "grad_norm": 1.1841556495066081, "learning_rate": 6.538119921664205e-07, "loss": 0.3445, "step": 27233 }, { "epoch": 2.555743243243243, "grad_norm": 1.3075025654945434, "learning_rate": 6.535420958718708e-07, "loss": 0.3251, "step": 27234 }, { "epoch": 2.5558370870870872, "grad_norm": 1.1108277252260261, "learning_rate": 6.532722514006495e-07, "loss": 0.3085, "step": 27235 }, { "epoch": 2.555930930930931, "grad_norm": 1.296136756257657, "learning_rate": 6.530024587559758e-07, "loss": 0.3435, "step": 27236 }, { "epoch": 2.556024774774775, "grad_norm": 1.1277642277639388, "learning_rate": 6.527327179410647e-07, "loss": 0.3514, "step": 27237 }, { "epoch": 2.5561186186186187, "grad_norm": 1.0688491575187258, "learning_rate": 6.524630289591327e-07, "loss": 0.3439, "step": 27238 }, { "epoch": 2.5562124624624625, "grad_norm": 1.1468777460854096, "learning_rate": 6.521933918133966e-07, "loss": 0.302, "step": 27239 }, { "epoch": 2.5563063063063063, "grad_norm": 1.136032573906156, "learning_rate": 6.519238065070705e-07, "loss": 0.3418, "step": 27240 }, { "epoch": 2.55640015015015, "grad_norm": 1.0921541971152027, "learning_rate": 6.516542730433684e-07, "loss": 0.311, "step": 27241 }, { "epoch": 2.556493993993994, "grad_norm": 1.13028540482799, "learning_rate": 6.513847914255045e-07, "loss": 0.3376, "step": 27242 }, { "epoch": 2.5565878378378377, "grad_norm": 1.0886378664255012, "learning_rate": 6.511153616566912e-07, "loss": 0.3151, "step": 27243 }, { "epoch": 2.5566816816816815, "grad_norm": 1.0318828069820825, "learning_rate": 6.508459837401404e-07, "loss": 0.2807, "step": 27244 }, { "epoch": 2.5567755255255253, "grad_norm": 1.4249397604080094, "learning_rate": 6.505766576790657e-07, "loss": 0.2974, "step": 27245 }, { "epoch": 2.5568693693693696, "grad_norm": 1.5397914714561058, "learning_rate": 6.503073834766777e-07, "loss": 0.3205, "step": 27246 }, { "epoch": 2.556963213213213, "grad_norm": 1.2628769370075332, "learning_rate": 6.500381611361856e-07, "loss": 0.338, "step": 27247 }, { "epoch": 2.557057057057057, "grad_norm": 1.1392962241128894, "learning_rate": 6.497689906608013e-07, "loss": 0.3343, "step": 27248 }, { "epoch": 2.557150900900901, "grad_norm": 0.9946297537012768, "learning_rate": 6.494998720537332e-07, "loss": 0.2877, "step": 27249 }, { "epoch": 2.557244744744745, "grad_norm": 1.0789771395754229, "learning_rate": 6.492308053181895e-07, "loss": 0.3155, "step": 27250 }, { "epoch": 2.5573385885885886, "grad_norm": 1.0565358791468251, "learning_rate": 6.489617904573797e-07, "loss": 0.2987, "step": 27251 }, { "epoch": 2.5574324324324325, "grad_norm": 1.1856065054644962, "learning_rate": 6.486928274745108e-07, "loss": 0.2923, "step": 27252 }, { "epoch": 2.5575262762762763, "grad_norm": 1.1214001609505475, "learning_rate": 6.484239163727879e-07, "loss": 0.3303, "step": 27253 }, { "epoch": 2.55762012012012, "grad_norm": 1.394810621702738, "learning_rate": 6.481550571554212e-07, "loss": 0.3312, "step": 27254 }, { "epoch": 2.557713963963964, "grad_norm": 1.0449748965979708, "learning_rate": 6.478862498256133e-07, "loss": 0.2911, "step": 27255 }, { "epoch": 2.5578078078078077, "grad_norm": 0.9274669500586079, "learning_rate": 6.476174943865681e-07, "loss": 0.3317, "step": 27256 }, { "epoch": 2.5579016516516515, "grad_norm": 1.0925328165290433, "learning_rate": 6.473487908414932e-07, "loss": 0.325, "step": 27257 }, { "epoch": 2.5579954954954953, "grad_norm": 1.1126655317487795, "learning_rate": 6.47080139193591e-07, "loss": 0.2726, "step": 27258 }, { "epoch": 2.5580893393393396, "grad_norm": 1.11821466828128, "learning_rate": 6.468115394460634e-07, "loss": 0.307, "step": 27259 }, { "epoch": 2.558183183183183, "grad_norm": 1.0607598153827265, "learning_rate": 6.465429916021155e-07, "loss": 0.3183, "step": 27260 }, { "epoch": 2.558277027027027, "grad_norm": 1.180530029223343, "learning_rate": 6.462744956649475e-07, "loss": 0.2925, "step": 27261 }, { "epoch": 2.558370870870871, "grad_norm": 0.952114318943869, "learning_rate": 6.460060516377603e-07, "loss": 0.2954, "step": 27262 }, { "epoch": 2.558464714714715, "grad_norm": 1.0901938055370592, "learning_rate": 6.457376595237563e-07, "loss": 0.3051, "step": 27263 }, { "epoch": 2.5585585585585586, "grad_norm": 1.110017813963609, "learning_rate": 6.454693193261347e-07, "loss": 0.3081, "step": 27264 }, { "epoch": 2.5586524024024024, "grad_norm": 1.1277367177456472, "learning_rate": 6.452010310480938e-07, "loss": 0.3212, "step": 27265 }, { "epoch": 2.5587462462462462, "grad_norm": 0.936249989967207, "learning_rate": 6.449327946928346e-07, "loss": 0.3048, "step": 27266 }, { "epoch": 2.55884009009009, "grad_norm": 1.2097737475862906, "learning_rate": 6.44664610263554e-07, "loss": 0.3247, "step": 27267 }, { "epoch": 2.558933933933934, "grad_norm": 1.0951956689313775, "learning_rate": 6.443964777634503e-07, "loss": 0.2836, "step": 27268 }, { "epoch": 2.5590277777777777, "grad_norm": 1.0431456610611336, "learning_rate": 6.441283971957202e-07, "loss": 0.276, "step": 27269 }, { "epoch": 2.5591216216216215, "grad_norm": 1.2606143422288787, "learning_rate": 6.438603685635592e-07, "loss": 0.3153, "step": 27270 }, { "epoch": 2.5592154654654653, "grad_norm": 1.3932692439517869, "learning_rate": 6.435923918701631e-07, "loss": 0.3205, "step": 27271 }, { "epoch": 2.5593093093093096, "grad_norm": 1.1172359405837788, "learning_rate": 6.433244671187283e-07, "loss": 0.3482, "step": 27272 }, { "epoch": 2.559403153153153, "grad_norm": 1.370348477988784, "learning_rate": 6.43056594312449e-07, "loss": 0.2934, "step": 27273 }, { "epoch": 2.559496996996997, "grad_norm": 1.279061056389946, "learning_rate": 6.427887734545174e-07, "loss": 0.3041, "step": 27274 }, { "epoch": 2.559590840840841, "grad_norm": 1.601051250110345, "learning_rate": 6.425210045481295e-07, "loss": 0.3377, "step": 27275 }, { "epoch": 2.559684684684685, "grad_norm": 1.8255185396684745, "learning_rate": 6.422532875964765e-07, "loss": 0.3217, "step": 27276 }, { "epoch": 2.5597785285285286, "grad_norm": 1.2406843410114141, "learning_rate": 6.419856226027487e-07, "loss": 0.2987, "step": 27277 }, { "epoch": 2.5598723723723724, "grad_norm": 1.3367841937339955, "learning_rate": 6.417180095701409e-07, "loss": 0.3432, "step": 27278 }, { "epoch": 2.5599662162162162, "grad_norm": 1.2785340319811616, "learning_rate": 6.414504485018425e-07, "loss": 0.3186, "step": 27279 }, { "epoch": 2.56006006006006, "grad_norm": 1.4381456670959518, "learning_rate": 6.411829394010427e-07, "loss": 0.302, "step": 27280 }, { "epoch": 2.560153903903904, "grad_norm": 1.0596486550944058, "learning_rate": 6.409154822709318e-07, "loss": 0.2883, "step": 27281 }, { "epoch": 2.5602477477477477, "grad_norm": 0.9838693569963511, "learning_rate": 6.406480771146989e-07, "loss": 0.2752, "step": 27282 }, { "epoch": 2.5603415915915915, "grad_norm": 1.0611481235880207, "learning_rate": 6.403807239355308e-07, "loss": 0.2812, "step": 27283 }, { "epoch": 2.5604354354354353, "grad_norm": 1.233723443901612, "learning_rate": 6.401134227366179e-07, "loss": 0.3287, "step": 27284 }, { "epoch": 2.5605292792792795, "grad_norm": 1.0739563013778675, "learning_rate": 6.398461735211453e-07, "loss": 0.298, "step": 27285 }, { "epoch": 2.560623123123123, "grad_norm": 0.9493666773409325, "learning_rate": 6.395789762922994e-07, "loss": 0.3007, "step": 27286 }, { "epoch": 2.560716966966967, "grad_norm": 1.4098902798126802, "learning_rate": 6.39311831053267e-07, "loss": 0.296, "step": 27287 }, { "epoch": 2.560810810810811, "grad_norm": 1.1346823271250035, "learning_rate": 6.390447378072329e-07, "loss": 0.3175, "step": 27288 }, { "epoch": 2.5609046546546548, "grad_norm": 1.2698315263805942, "learning_rate": 6.38777696557381e-07, "loss": 0.319, "step": 27289 }, { "epoch": 2.5609984984984986, "grad_norm": 1.0604814786817367, "learning_rate": 6.385107073068969e-07, "loss": 0.3294, "step": 27290 }, { "epoch": 2.5610923423423424, "grad_norm": 1.43837871260112, "learning_rate": 6.382437700589628e-07, "loss": 0.33, "step": 27291 }, { "epoch": 2.561186186186186, "grad_norm": 1.0910745008646916, "learning_rate": 6.379768848167617e-07, "loss": 0.271, "step": 27292 }, { "epoch": 2.56128003003003, "grad_norm": 1.0506546101451562, "learning_rate": 6.377100515834756e-07, "loss": 0.2683, "step": 27293 }, { "epoch": 2.561373873873874, "grad_norm": 1.295750049605996, "learning_rate": 6.374432703622857e-07, "loss": 0.3342, "step": 27294 }, { "epoch": 2.5614677177177176, "grad_norm": 1.2314683204213939, "learning_rate": 6.371765411563724e-07, "loss": 0.266, "step": 27295 }, { "epoch": 2.5615615615615615, "grad_norm": 1.4788650053039176, "learning_rate": 6.369098639689175e-07, "loss": 0.3184, "step": 27296 }, { "epoch": 2.5616554054054053, "grad_norm": 2.1800171752614466, "learning_rate": 6.366432388030997e-07, "loss": 0.3138, "step": 27297 }, { "epoch": 2.5617492492492495, "grad_norm": 1.0357670794416904, "learning_rate": 6.363766656620973e-07, "loss": 0.2755, "step": 27298 }, { "epoch": 2.561843093093093, "grad_norm": 1.2110957087632017, "learning_rate": 6.361101445490903e-07, "loss": 0.3011, "step": 27299 }, { "epoch": 2.561936936936937, "grad_norm": 1.0547700324252915, "learning_rate": 6.358436754672559e-07, "loss": 0.28, "step": 27300 }, { "epoch": 2.5620307807807805, "grad_norm": 1.1051669031718292, "learning_rate": 6.3557725841977e-07, "loss": 0.3273, "step": 27301 }, { "epoch": 2.5621246246246248, "grad_norm": 1.6254189388883387, "learning_rate": 6.353108934098113e-07, "loss": 0.3202, "step": 27302 }, { "epoch": 2.5622184684684686, "grad_norm": 1.4765032922332586, "learning_rate": 6.350445804405542e-07, "loss": 0.3137, "step": 27303 }, { "epoch": 2.5623123123123124, "grad_norm": 1.1169589565990343, "learning_rate": 6.347783195151741e-07, "loss": 0.3122, "step": 27304 }, { "epoch": 2.562406156156156, "grad_norm": 1.1501471981264666, "learning_rate": 6.345121106368463e-07, "loss": 0.2676, "step": 27305 }, { "epoch": 2.5625, "grad_norm": 1.0924678400437706, "learning_rate": 6.342459538087442e-07, "loss": 0.3324, "step": 27306 }, { "epoch": 2.562593843843844, "grad_norm": 1.2110347159045245, "learning_rate": 6.339798490340399e-07, "loss": 0.3137, "step": 27307 }, { "epoch": 2.5626876876876876, "grad_norm": 1.190162282631156, "learning_rate": 6.337137963159095e-07, "loss": 0.3278, "step": 27308 }, { "epoch": 2.5627815315315314, "grad_norm": 1.2413677902523041, "learning_rate": 6.33447795657523e-07, "loss": 0.2977, "step": 27309 }, { "epoch": 2.5628753753753752, "grad_norm": 1.215229476585528, "learning_rate": 6.331818470620516e-07, "loss": 0.3267, "step": 27310 }, { "epoch": 2.5629692192192195, "grad_norm": 1.185417106747728, "learning_rate": 6.329159505326676e-07, "loss": 0.3542, "step": 27311 }, { "epoch": 2.563063063063063, "grad_norm": 1.187863830592877, "learning_rate": 6.326501060725409e-07, "loss": 0.3121, "step": 27312 }, { "epoch": 2.563156906906907, "grad_norm": 1.0557996020660092, "learning_rate": 6.323843136848401e-07, "loss": 0.3054, "step": 27313 }, { "epoch": 2.5632507507507505, "grad_norm": 1.0239686822338354, "learning_rate": 6.321185733727359e-07, "loss": 0.309, "step": 27314 }, { "epoch": 2.5633445945945947, "grad_norm": 1.2173412413016418, "learning_rate": 6.318528851393962e-07, "loss": 0.3322, "step": 27315 }, { "epoch": 2.5634384384384385, "grad_norm": 0.8999978890354823, "learning_rate": 6.315872489879887e-07, "loss": 0.3104, "step": 27316 }, { "epoch": 2.5635322822822824, "grad_norm": 1.1852318054624755, "learning_rate": 6.313216649216808e-07, "loss": 0.3279, "step": 27317 }, { "epoch": 2.563626126126126, "grad_norm": 0.9956259531142728, "learning_rate": 6.310561329436382e-07, "loss": 0.3205, "step": 27318 }, { "epoch": 2.56371996996997, "grad_norm": 1.183286407445259, "learning_rate": 6.307906530570274e-07, "loss": 0.3486, "step": 27319 }, { "epoch": 2.563813813813814, "grad_norm": 1.1028154398306058, "learning_rate": 6.30525225265014e-07, "loss": 0.3112, "step": 27320 }, { "epoch": 2.5639076576576576, "grad_norm": 1.2262012107477724, "learning_rate": 6.30259849570763e-07, "loss": 0.31, "step": 27321 }, { "epoch": 2.5640015015015014, "grad_norm": 1.011928923540362, "learning_rate": 6.29994525977437e-07, "loss": 0.2941, "step": 27322 }, { "epoch": 2.5640953453453452, "grad_norm": 0.923853426318496, "learning_rate": 6.297292544882016e-07, "loss": 0.3234, "step": 27323 }, { "epoch": 2.564189189189189, "grad_norm": 1.2062424210709068, "learning_rate": 6.294640351062187e-07, "loss": 0.2886, "step": 27324 }, { "epoch": 2.564283033033033, "grad_norm": 1.2195984395637023, "learning_rate": 6.291988678346495e-07, "loss": 0.3499, "step": 27325 }, { "epoch": 2.564376876876877, "grad_norm": 1.099247894208592, "learning_rate": 6.289337526766575e-07, "loss": 0.3043, "step": 27326 }, { "epoch": 2.5644707207207205, "grad_norm": 3.27656893657617, "learning_rate": 6.286686896354033e-07, "loss": 0.3158, "step": 27327 }, { "epoch": 2.5645645645645647, "grad_norm": 1.9925292055089594, "learning_rate": 6.284036787140463e-07, "loss": 0.3097, "step": 27328 }, { "epoch": 2.5646584084084085, "grad_norm": 1.2204431037588246, "learning_rate": 6.281387199157468e-07, "loss": 0.2971, "step": 27329 }, { "epoch": 2.5647522522522523, "grad_norm": 1.0098419725223127, "learning_rate": 6.27873813243664e-07, "loss": 0.3362, "step": 27330 }, { "epoch": 2.564846096096096, "grad_norm": 1.0496668447706499, "learning_rate": 6.276089587009554e-07, "loss": 0.323, "step": 27331 }, { "epoch": 2.56493993993994, "grad_norm": 1.2187531898585722, "learning_rate": 6.273441562907806e-07, "loss": 0.3234, "step": 27332 }, { "epoch": 2.5650337837837838, "grad_norm": 1.0811764772892114, "learning_rate": 6.27079406016296e-07, "loss": 0.3028, "step": 27333 }, { "epoch": 2.5651276276276276, "grad_norm": 1.4225285126363383, "learning_rate": 6.268147078806575e-07, "loss": 0.2868, "step": 27334 }, { "epoch": 2.5652214714714714, "grad_norm": 1.2187665604661333, "learning_rate": 6.265500618870229e-07, "loss": 0.3442, "step": 27335 }, { "epoch": 2.565315315315315, "grad_norm": 1.4083798898510236, "learning_rate": 6.262854680385466e-07, "loss": 0.3248, "step": 27336 }, { "epoch": 2.565409159159159, "grad_norm": 0.9975481153847432, "learning_rate": 6.260209263383821e-07, "loss": 0.3389, "step": 27337 }, { "epoch": 2.565503003003003, "grad_norm": 1.1528379177932093, "learning_rate": 6.257564367896862e-07, "loss": 0.3223, "step": 27338 }, { "epoch": 2.565596846846847, "grad_norm": 1.1455943533882673, "learning_rate": 6.25491999395611e-07, "loss": 0.3172, "step": 27339 }, { "epoch": 2.5656906906906904, "grad_norm": 1.1485428907154802, "learning_rate": 6.2522761415931e-07, "loss": 0.3261, "step": 27340 }, { "epoch": 2.5657845345345347, "grad_norm": 0.9657983226010055, "learning_rate": 6.249632810839346e-07, "loss": 0.2993, "step": 27341 }, { "epoch": 2.5658783783783785, "grad_norm": 1.1803807001321032, "learning_rate": 6.246990001726372e-07, "loss": 0.3233, "step": 27342 }, { "epoch": 2.5659722222222223, "grad_norm": 1.2314945900663317, "learning_rate": 6.24434771428567e-07, "loss": 0.3117, "step": 27343 }, { "epoch": 2.566066066066066, "grad_norm": 1.0732605923150351, "learning_rate": 6.241705948548777e-07, "loss": 0.302, "step": 27344 }, { "epoch": 2.56615990990991, "grad_norm": 1.0646441980000354, "learning_rate": 6.239064704547171e-07, "loss": 0.3115, "step": 27345 }, { "epoch": 2.5662537537537538, "grad_norm": 1.0560579205162637, "learning_rate": 6.236423982312334e-07, "loss": 0.3127, "step": 27346 }, { "epoch": 2.5663475975975976, "grad_norm": 1.0164088938403577, "learning_rate": 6.233783781875774e-07, "loss": 0.2939, "step": 27347 }, { "epoch": 2.5664414414414414, "grad_norm": 1.0147270432671378, "learning_rate": 6.231144103268966e-07, "loss": 0.3244, "step": 27348 }, { "epoch": 2.566535285285285, "grad_norm": 1.156119525378847, "learning_rate": 6.228504946523367e-07, "loss": 0.3418, "step": 27349 }, { "epoch": 2.566629129129129, "grad_norm": 1.1615489409955804, "learning_rate": 6.225866311670464e-07, "loss": 0.3315, "step": 27350 }, { "epoch": 2.566722972972973, "grad_norm": 1.5348630030239812, "learning_rate": 6.22322819874171e-07, "loss": 0.2992, "step": 27351 }, { "epoch": 2.566816816816817, "grad_norm": 1.3834689910474631, "learning_rate": 6.220590607768556e-07, "loss": 0.3164, "step": 27352 }, { "epoch": 2.5669106606606604, "grad_norm": 1.3046436189365431, "learning_rate": 6.217953538782456e-07, "loss": 0.3285, "step": 27353 }, { "epoch": 2.5670045045045047, "grad_norm": 1.730233628395172, "learning_rate": 6.215316991814845e-07, "loss": 0.2738, "step": 27354 }, { "epoch": 2.5670983483483485, "grad_norm": 1.357503346386906, "learning_rate": 6.212680966897155e-07, "loss": 0.3024, "step": 27355 }, { "epoch": 2.5671921921921923, "grad_norm": 1.2930199921962044, "learning_rate": 6.210045464060837e-07, "loss": 0.3033, "step": 27356 }, { "epoch": 2.567286036036036, "grad_norm": 1.119049960220361, "learning_rate": 6.207410483337295e-07, "loss": 0.312, "step": 27357 }, { "epoch": 2.56737987987988, "grad_norm": 1.2110996438740265, "learning_rate": 6.204776024757942e-07, "loss": 0.3031, "step": 27358 }, { "epoch": 2.5674737237237237, "grad_norm": 1.3077202115167192, "learning_rate": 6.202142088354213e-07, "loss": 0.3184, "step": 27359 }, { "epoch": 2.5675675675675675, "grad_norm": 1.0500439193563271, "learning_rate": 6.199508674157495e-07, "loss": 0.3177, "step": 27360 }, { "epoch": 2.5676614114114114, "grad_norm": 1.3997341937201935, "learning_rate": 6.19687578219918e-07, "loss": 0.286, "step": 27361 }, { "epoch": 2.567755255255255, "grad_norm": 1.1206590778471857, "learning_rate": 6.194243412510687e-07, "loss": 0.3195, "step": 27362 }, { "epoch": 2.567849099099099, "grad_norm": 1.0527711255877867, "learning_rate": 6.191611565123379e-07, "loss": 0.3334, "step": 27363 }, { "epoch": 2.567942942942943, "grad_norm": 1.0147981119749832, "learning_rate": 6.188980240068643e-07, "loss": 0.3389, "step": 27364 }, { "epoch": 2.568036786786787, "grad_norm": 1.1546703589741922, "learning_rate": 6.186349437377853e-07, "loss": 0.3388, "step": 27365 }, { "epoch": 2.5681306306306304, "grad_norm": 1.5730251181635355, "learning_rate": 6.183719157082374e-07, "loss": 0.326, "step": 27366 }, { "epoch": 2.5682244744744747, "grad_norm": 1.3700865996614946, "learning_rate": 6.181089399213558e-07, "loss": 0.3497, "step": 27367 }, { "epoch": 2.5683183183183185, "grad_norm": 1.055764951403731, "learning_rate": 6.17846016380278e-07, "loss": 0.315, "step": 27368 }, { "epoch": 2.5684121621621623, "grad_norm": 1.064811914448483, "learning_rate": 6.175831450881381e-07, "loss": 0.3149, "step": 27369 }, { "epoch": 2.568506006006006, "grad_norm": 1.0019721707419074, "learning_rate": 6.173203260480693e-07, "loss": 0.3676, "step": 27370 }, { "epoch": 2.56859984984985, "grad_norm": 1.072488424642232, "learning_rate": 6.170575592632067e-07, "loss": 0.2937, "step": 27371 }, { "epoch": 2.5686936936936937, "grad_norm": 1.1781648196648402, "learning_rate": 6.167948447366829e-07, "loss": 0.281, "step": 27372 }, { "epoch": 2.5687875375375375, "grad_norm": 1.0360515325108441, "learning_rate": 6.165321824716286e-07, "loss": 0.3182, "step": 27373 }, { "epoch": 2.5688813813813813, "grad_norm": 1.1288271171857747, "learning_rate": 6.162695724711787e-07, "loss": 0.326, "step": 27374 }, { "epoch": 2.568975225225225, "grad_norm": 1.1964597186007733, "learning_rate": 6.160070147384622e-07, "loss": 0.3035, "step": 27375 }, { "epoch": 2.569069069069069, "grad_norm": 1.05252985871328, "learning_rate": 6.15744509276609e-07, "loss": 0.3269, "step": 27376 }, { "epoch": 2.5691629129129128, "grad_norm": 1.1643430282028677, "learning_rate": 6.15482056088752e-07, "loss": 0.3199, "step": 27377 }, { "epoch": 2.569256756756757, "grad_norm": 1.3904649761128225, "learning_rate": 6.152196551780176e-07, "loss": 0.3023, "step": 27378 }, { "epoch": 2.5693506006006004, "grad_norm": 0.9664557947109749, "learning_rate": 6.149573065475344e-07, "loss": 0.3132, "step": 27379 }, { "epoch": 2.5694444444444446, "grad_norm": 1.0731327758640428, "learning_rate": 6.146950102004324e-07, "loss": 0.3476, "step": 27380 }, { "epoch": 2.5695382882882885, "grad_norm": 1.239462757600611, "learning_rate": 6.144327661398375e-07, "loss": 0.3257, "step": 27381 }, { "epoch": 2.5696321321321323, "grad_norm": 1.1921865890820045, "learning_rate": 6.141705743688764e-07, "loss": 0.3226, "step": 27382 }, { "epoch": 2.569725975975976, "grad_norm": 1.3266553044044034, "learning_rate": 6.139084348906765e-07, "loss": 0.3489, "step": 27383 }, { "epoch": 2.56981981981982, "grad_norm": 1.0267732151941964, "learning_rate": 6.136463477083627e-07, "loss": 0.3035, "step": 27384 }, { "epoch": 2.5699136636636637, "grad_norm": 1.0879275506331625, "learning_rate": 6.133843128250582e-07, "loss": 0.3377, "step": 27385 }, { "epoch": 2.5700075075075075, "grad_norm": 1.0344222559321088, "learning_rate": 6.131223302438899e-07, "loss": 0.3262, "step": 27386 }, { "epoch": 2.5701013513513513, "grad_norm": 1.0205921769984683, "learning_rate": 6.128603999679811e-07, "loss": 0.2973, "step": 27387 }, { "epoch": 2.570195195195195, "grad_norm": 3.1775877816143594, "learning_rate": 6.125985220004521e-07, "loss": 0.3159, "step": 27388 }, { "epoch": 2.570289039039039, "grad_norm": 1.0430391332708402, "learning_rate": 6.123366963444289e-07, "loss": 0.2836, "step": 27389 }, { "epoch": 2.5703828828828827, "grad_norm": 1.2296492401498853, "learning_rate": 6.120749230030315e-07, "loss": 0.3431, "step": 27390 }, { "epoch": 2.570476726726727, "grad_norm": 1.085713934365538, "learning_rate": 6.118132019793816e-07, "loss": 0.2842, "step": 27391 }, { "epoch": 2.5705705705705704, "grad_norm": 1.4925413980958, "learning_rate": 6.115515332765992e-07, "loss": 0.3216, "step": 27392 }, { "epoch": 2.5706644144144146, "grad_norm": 1.1792939486530725, "learning_rate": 6.112899168978042e-07, "loss": 0.2935, "step": 27393 }, { "epoch": 2.570758258258258, "grad_norm": 1.116831444361736, "learning_rate": 6.110283528461148e-07, "loss": 0.3201, "step": 27394 }, { "epoch": 2.5708521021021022, "grad_norm": 1.2305425172089557, "learning_rate": 6.107668411246521e-07, "loss": 0.2942, "step": 27395 }, { "epoch": 2.570945945945946, "grad_norm": 1.0272083669032717, "learning_rate": 6.105053817365331e-07, "loss": 0.3193, "step": 27396 }, { "epoch": 2.57103978978979, "grad_norm": 1.1268200837994515, "learning_rate": 6.102439746848737e-07, "loss": 0.3008, "step": 27397 }, { "epoch": 2.5711336336336337, "grad_norm": 1.0752079857032777, "learning_rate": 6.099826199727938e-07, "loss": 0.2942, "step": 27398 }, { "epoch": 2.5712274774774775, "grad_norm": 1.2971893405778945, "learning_rate": 6.097213176034067e-07, "loss": 0.3114, "step": 27399 }, { "epoch": 2.5713213213213213, "grad_norm": 1.070034064546686, "learning_rate": 6.09460067579829e-07, "loss": 0.2709, "step": 27400 }, { "epoch": 2.571415165165165, "grad_norm": 1.5653877219460746, "learning_rate": 6.09198869905176e-07, "loss": 0.3189, "step": 27401 }, { "epoch": 2.571509009009009, "grad_norm": 1.0759160922164674, "learning_rate": 6.089377245825623e-07, "loss": 0.2852, "step": 27402 }, { "epoch": 2.5716028528528527, "grad_norm": 1.0363901994573443, "learning_rate": 6.086766316151005e-07, "loss": 0.3182, "step": 27403 }, { "epoch": 2.5716966966966965, "grad_norm": 1.6053630017373441, "learning_rate": 6.084155910059036e-07, "loss": 0.2805, "step": 27404 }, { "epoch": 2.5717905405405403, "grad_norm": 1.7132167512381793, "learning_rate": 6.081546027580854e-07, "loss": 0.3323, "step": 27405 }, { "epoch": 2.5718843843843846, "grad_norm": 1.408549075417352, "learning_rate": 6.078936668747548e-07, "loss": 0.3032, "step": 27406 }, { "epoch": 2.571978228228228, "grad_norm": 1.1500657130054657, "learning_rate": 6.076327833590268e-07, "loss": 0.2799, "step": 27407 }, { "epoch": 2.5720720720720722, "grad_norm": 1.164011913514088, "learning_rate": 6.073719522140098e-07, "loss": 0.3071, "step": 27408 }, { "epoch": 2.572165915915916, "grad_norm": 1.1443708846604848, "learning_rate": 6.071111734428126e-07, "loss": 0.2961, "step": 27409 }, { "epoch": 2.57225975975976, "grad_norm": 1.0565027710469161, "learning_rate": 6.06850447048547e-07, "loss": 0.3394, "step": 27410 }, { "epoch": 2.5723536036036037, "grad_norm": 1.0090360617079206, "learning_rate": 6.065897730343213e-07, "loss": 0.3299, "step": 27411 }, { "epoch": 2.5724474474474475, "grad_norm": 1.2202815636986222, "learning_rate": 6.06329151403241e-07, "loss": 0.2934, "step": 27412 }, { "epoch": 2.5725412912912913, "grad_norm": 1.1250689313846411, "learning_rate": 6.060685821584167e-07, "loss": 0.3388, "step": 27413 }, { "epoch": 2.572635135135135, "grad_norm": 1.1247891423447423, "learning_rate": 6.058080653029535e-07, "loss": 0.2992, "step": 27414 }, { "epoch": 2.572728978978979, "grad_norm": 1.0164929268518124, "learning_rate": 6.055476008399586e-07, "loss": 0.3171, "step": 27415 }, { "epoch": 2.5728228228228227, "grad_norm": 1.0960481854464212, "learning_rate": 6.052871887725359e-07, "loss": 0.3231, "step": 27416 }, { "epoch": 2.5729166666666665, "grad_norm": 1.8528615945009008, "learning_rate": 6.05026829103792e-07, "loss": 0.2852, "step": 27417 }, { "epoch": 2.5730105105105103, "grad_norm": 1.140965221848404, "learning_rate": 6.047665218368293e-07, "loss": 0.2981, "step": 27418 }, { "epoch": 2.5731043543543546, "grad_norm": 1.1532020208660068, "learning_rate": 6.045062669747537e-07, "loss": 0.3124, "step": 27419 }, { "epoch": 2.573198198198198, "grad_norm": 1.164954976573332, "learning_rate": 6.042460645206671e-07, "loss": 0.2891, "step": 27420 }, { "epoch": 2.573292042042042, "grad_norm": 1.1031955338385364, "learning_rate": 6.039859144776711e-07, "loss": 0.3117, "step": 27421 }, { "epoch": 2.573385885885886, "grad_norm": 1.209302876134903, "learning_rate": 6.037258168488691e-07, "loss": 0.3093, "step": 27422 }, { "epoch": 2.57347972972973, "grad_norm": 1.153885833904524, "learning_rate": 6.034657716373621e-07, "loss": 0.2909, "step": 27423 }, { "epoch": 2.5735735735735736, "grad_norm": 1.257037688332334, "learning_rate": 6.032057788462487e-07, "loss": 0.3349, "step": 27424 }, { "epoch": 2.5736674174174174, "grad_norm": 1.0753075545934154, "learning_rate": 6.029458384786313e-07, "loss": 0.2844, "step": 27425 }, { "epoch": 2.5737612612612613, "grad_norm": 1.2536707227963197, "learning_rate": 6.026859505376087e-07, "loss": 0.2881, "step": 27426 }, { "epoch": 2.573855105105105, "grad_norm": 1.1529848025861285, "learning_rate": 6.024261150262784e-07, "loss": 0.311, "step": 27427 }, { "epoch": 2.573948948948949, "grad_norm": 1.376346852254514, "learning_rate": 6.021663319477394e-07, "loss": 0.3093, "step": 27428 }, { "epoch": 2.5740427927927927, "grad_norm": 1.5142482430010933, "learning_rate": 6.019066013050889e-07, "loss": 0.3017, "step": 27429 }, { "epoch": 2.5741366366366365, "grad_norm": 1.1091262424834003, "learning_rate": 6.016469231014227e-07, "loss": 0.2903, "step": 27430 }, { "epoch": 2.5742304804804803, "grad_norm": 1.0846913681203174, "learning_rate": 6.013872973398388e-07, "loss": 0.3294, "step": 27431 }, { "epoch": 2.5743243243243246, "grad_norm": 1.2292235012556325, "learning_rate": 6.011277240234314e-07, "loss": 0.3571, "step": 27432 }, { "epoch": 2.574418168168168, "grad_norm": 1.1939425134928219, "learning_rate": 6.008682031552953e-07, "loss": 0.3196, "step": 27433 }, { "epoch": 2.574512012012012, "grad_norm": 1.0816077505431425, "learning_rate": 6.006087347385265e-07, "loss": 0.3081, "step": 27434 }, { "epoch": 2.574605855855856, "grad_norm": 1.2636278730834387, "learning_rate": 6.003493187762171e-07, "loss": 0.3208, "step": 27435 }, { "epoch": 2.5746996996997, "grad_norm": 1.0907914031956967, "learning_rate": 6.0008995527146e-07, "loss": 0.3299, "step": 27436 }, { "epoch": 2.5747935435435436, "grad_norm": 1.189788384271736, "learning_rate": 5.998306442273488e-07, "loss": 0.307, "step": 27437 }, { "epoch": 2.5748873873873874, "grad_norm": 1.142863792655679, "learning_rate": 5.995713856469753e-07, "loss": 0.266, "step": 27438 }, { "epoch": 2.5749812312312312, "grad_norm": 1.0841227488671143, "learning_rate": 5.993121795334294e-07, "loss": 0.2923, "step": 27439 }, { "epoch": 2.575075075075075, "grad_norm": 1.2660063871915392, "learning_rate": 5.990530258898025e-07, "loss": 0.3494, "step": 27440 }, { "epoch": 2.575168918918919, "grad_norm": 1.3122426590754168, "learning_rate": 5.987939247191848e-07, "loss": 0.2393, "step": 27441 }, { "epoch": 2.5752627627627627, "grad_norm": 1.0023690561630263, "learning_rate": 5.985348760246634e-07, "loss": 0.2815, "step": 27442 }, { "epoch": 2.5753566066066065, "grad_norm": 1.1269365330176813, "learning_rate": 5.982758798093303e-07, "loss": 0.3377, "step": 27443 }, { "epoch": 2.5754504504504503, "grad_norm": 1.1078425154066602, "learning_rate": 5.980169360762717e-07, "loss": 0.3297, "step": 27444 }, { "epoch": 2.5755442942942945, "grad_norm": 1.4755366591964385, "learning_rate": 5.977580448285742e-07, "loss": 0.3536, "step": 27445 }, { "epoch": 2.575638138138138, "grad_norm": 1.2223524861745227, "learning_rate": 5.974992060693269e-07, "loss": 0.2884, "step": 27446 }, { "epoch": 2.575731981981982, "grad_norm": 0.9865586441795139, "learning_rate": 5.972404198016147e-07, "loss": 0.3215, "step": 27447 }, { "epoch": 2.575825825825826, "grad_norm": 1.505438579872572, "learning_rate": 5.969816860285216e-07, "loss": 0.2867, "step": 27448 }, { "epoch": 2.57591966966967, "grad_norm": 1.2391322729906624, "learning_rate": 5.967230047531353e-07, "loss": 0.2781, "step": 27449 }, { "epoch": 2.5760135135135136, "grad_norm": 1.1324770786117087, "learning_rate": 5.964643759785393e-07, "loss": 0.3199, "step": 27450 }, { "epoch": 2.5761073573573574, "grad_norm": 1.116879032556832, "learning_rate": 5.962057997078164e-07, "loss": 0.3357, "step": 27451 }, { "epoch": 2.576201201201201, "grad_norm": 1.0835937325128422, "learning_rate": 5.959472759440505e-07, "loss": 0.3127, "step": 27452 }, { "epoch": 2.576295045045045, "grad_norm": 1.2323944271866332, "learning_rate": 5.956888046903231e-07, "loss": 0.2884, "step": 27453 }, { "epoch": 2.576388888888889, "grad_norm": 1.4242921941650462, "learning_rate": 5.954303859497151e-07, "loss": 0.3326, "step": 27454 }, { "epoch": 2.5764827327327327, "grad_norm": 0.9737949938461365, "learning_rate": 5.951720197253108e-07, "loss": 0.3201, "step": 27455 }, { "epoch": 2.5765765765765765, "grad_norm": 1.2151492133921296, "learning_rate": 5.949137060201887e-07, "loss": 0.3132, "step": 27456 }, { "epoch": 2.5766704204204203, "grad_norm": 1.5896231188523609, "learning_rate": 5.946554448374275e-07, "loss": 0.3215, "step": 27457 }, { "epoch": 2.5767642642642645, "grad_norm": 1.0766467293259812, "learning_rate": 5.943972361801093e-07, "loss": 0.271, "step": 27458 }, { "epoch": 2.576858108108108, "grad_norm": 0.9581443064366827, "learning_rate": 5.941390800513114e-07, "loss": 0.3213, "step": 27459 }, { "epoch": 2.576951951951952, "grad_norm": 1.060290180963461, "learning_rate": 5.938809764541103e-07, "loss": 0.2993, "step": 27460 }, { "epoch": 2.577045795795796, "grad_norm": 1.2064811473183004, "learning_rate": 5.936229253915865e-07, "loss": 0.3107, "step": 27461 }, { "epoch": 2.5771396396396398, "grad_norm": 1.0038167981122326, "learning_rate": 5.933649268668151e-07, "loss": 0.3299, "step": 27462 }, { "epoch": 2.5772334834834836, "grad_norm": 3.345534979480092, "learning_rate": 5.931069808828721e-07, "loss": 0.2701, "step": 27463 }, { "epoch": 2.5773273273273274, "grad_norm": 1.1871463361904093, "learning_rate": 5.928490874428333e-07, "loss": 0.3237, "step": 27464 }, { "epoch": 2.577421171171171, "grad_norm": 1.378207256660446, "learning_rate": 5.925912465497735e-07, "loss": 0.3116, "step": 27465 }, { "epoch": 2.577515015015015, "grad_norm": 1.1703160399066233, "learning_rate": 5.923334582067658e-07, "loss": 0.3435, "step": 27466 }, { "epoch": 2.577608858858859, "grad_norm": 1.1405475396712408, "learning_rate": 5.92075722416886e-07, "loss": 0.3191, "step": 27467 }, { "epoch": 2.5777027027027026, "grad_norm": 1.172266507764723, "learning_rate": 5.918180391832068e-07, "loss": 0.3292, "step": 27468 }, { "epoch": 2.5777965465465464, "grad_norm": 1.1736664198593967, "learning_rate": 5.915604085087983e-07, "loss": 0.3063, "step": 27469 }, { "epoch": 2.5778903903903903, "grad_norm": 1.1839527422754603, "learning_rate": 5.913028303967349e-07, "loss": 0.3001, "step": 27470 }, { "epoch": 2.5779842342342345, "grad_norm": 1.2161202697287672, "learning_rate": 5.91045304850087e-07, "loss": 0.2878, "step": 27471 }, { "epoch": 2.578078078078078, "grad_norm": 1.3417869156594056, "learning_rate": 5.90787831871924e-07, "loss": 0.345, "step": 27472 }, { "epoch": 2.578171921921922, "grad_norm": 1.0100837586055957, "learning_rate": 5.905304114653171e-07, "loss": 0.2826, "step": 27473 }, { "epoch": 2.5782657657657655, "grad_norm": 1.3459067710927402, "learning_rate": 5.902730436333354e-07, "loss": 0.3372, "step": 27474 }, { "epoch": 2.5783596096096097, "grad_norm": 1.1062226433153055, "learning_rate": 5.900157283790475e-07, "loss": 0.2867, "step": 27475 }, { "epoch": 2.5784534534534536, "grad_norm": 1.3626792545295099, "learning_rate": 5.897584657055206e-07, "loss": 0.2906, "step": 27476 }, { "epoch": 2.5785472972972974, "grad_norm": 1.1637860966849958, "learning_rate": 5.895012556158225e-07, "loss": 0.3, "step": 27477 }, { "epoch": 2.578641141141141, "grad_norm": 1.6680931424037913, "learning_rate": 5.892440981130193e-07, "loss": 0.3456, "step": 27478 }, { "epoch": 2.578734984984985, "grad_norm": 1.4013596037748226, "learning_rate": 5.88986993200179e-07, "loss": 0.3136, "step": 27479 }, { "epoch": 2.578828828828829, "grad_norm": 1.3458042778778327, "learning_rate": 5.887299408803654e-07, "loss": 0.3057, "step": 27480 }, { "epoch": 2.5789226726726726, "grad_norm": 1.2362434611904303, "learning_rate": 5.884729411566431e-07, "loss": 0.2689, "step": 27481 }, { "epoch": 2.5790165165165164, "grad_norm": 1.0895796935144504, "learning_rate": 5.882159940320781e-07, "loss": 0.2936, "step": 27482 }, { "epoch": 2.5791103603603602, "grad_norm": 1.336337422265498, "learning_rate": 5.879590995097334e-07, "loss": 0.3458, "step": 27483 }, { "epoch": 2.579204204204204, "grad_norm": 1.135545750439607, "learning_rate": 5.877022575926705e-07, "loss": 0.2927, "step": 27484 }, { "epoch": 2.579298048048048, "grad_norm": 1.0454261865292835, "learning_rate": 5.874454682839537e-07, "loss": 0.3546, "step": 27485 }, { "epoch": 2.579391891891892, "grad_norm": 1.34334305413543, "learning_rate": 5.871887315866436e-07, "loss": 0.336, "step": 27486 }, { "epoch": 2.5794857357357355, "grad_norm": 1.1109707774652013, "learning_rate": 5.869320475038021e-07, "loss": 0.3055, "step": 27487 }, { "epoch": 2.5795795795795797, "grad_norm": 1.2178207909275662, "learning_rate": 5.866754160384891e-07, "loss": 0.3468, "step": 27488 }, { "epoch": 2.5796734234234235, "grad_norm": 1.133559859187336, "learning_rate": 5.864188371937646e-07, "loss": 0.3154, "step": 27489 }, { "epoch": 2.5797672672672673, "grad_norm": 1.0436834074077421, "learning_rate": 5.861623109726866e-07, "loss": 0.3038, "step": 27490 }, { "epoch": 2.579861111111111, "grad_norm": 1.1094434655649914, "learning_rate": 5.859058373783161e-07, "loss": 0.3285, "step": 27491 }, { "epoch": 2.579954954954955, "grad_norm": 1.2750660675862517, "learning_rate": 5.856494164137095e-07, "loss": 0.3444, "step": 27492 }, { "epoch": 2.580048798798799, "grad_norm": 1.0857542936287015, "learning_rate": 5.853930480819237e-07, "loss": 0.3006, "step": 27493 }, { "epoch": 2.5801426426426426, "grad_norm": 1.1571857940645485, "learning_rate": 5.851367323860169e-07, "loss": 0.2964, "step": 27494 }, { "epoch": 2.5802364864864864, "grad_norm": 0.9739213519611359, "learning_rate": 5.848804693290444e-07, "loss": 0.278, "step": 27495 }, { "epoch": 2.58033033033033, "grad_norm": 1.1164450600792195, "learning_rate": 5.846242589140611e-07, "loss": 0.3265, "step": 27496 }, { "epoch": 2.580424174174174, "grad_norm": 1.0408577832392532, "learning_rate": 5.843681011441233e-07, "loss": 0.3054, "step": 27497 }, { "epoch": 2.580518018018018, "grad_norm": 1.2524902101234179, "learning_rate": 5.841119960222841e-07, "loss": 0.3121, "step": 27498 }, { "epoch": 2.580611861861862, "grad_norm": 1.0106607358147845, "learning_rate": 5.838559435515973e-07, "loss": 0.3157, "step": 27499 }, { "epoch": 2.5807057057057055, "grad_norm": 1.0538801374018403, "learning_rate": 5.835999437351159e-07, "loss": 0.2836, "step": 27500 }, { "epoch": 2.5807995495495497, "grad_norm": 1.2209678519734872, "learning_rate": 5.833439965758924e-07, "loss": 0.2956, "step": 27501 }, { "epoch": 2.5808933933933935, "grad_norm": 1.1391903364004183, "learning_rate": 5.830881020769769e-07, "loss": 0.3218, "step": 27502 }, { "epoch": 2.5809872372372373, "grad_norm": 13.970627157328204, "learning_rate": 5.828322602414227e-07, "loss": 0.3338, "step": 27503 }, { "epoch": 2.581081081081081, "grad_norm": 1.049591146249159, "learning_rate": 5.825764710722798e-07, "loss": 0.3519, "step": 27504 }, { "epoch": 2.581174924924925, "grad_norm": 0.9709546775507529, "learning_rate": 5.823207345725962e-07, "loss": 0.3296, "step": 27505 }, { "epoch": 2.5812687687687688, "grad_norm": 1.0596385925805472, "learning_rate": 5.820650507454234e-07, "loss": 0.3458, "step": 27506 }, { "epoch": 2.5813626126126126, "grad_norm": 1.0988054126305746, "learning_rate": 5.81809419593809e-07, "loss": 0.3272, "step": 27507 }, { "epoch": 2.5814564564564564, "grad_norm": 1.5936260608629185, "learning_rate": 5.815538411208e-07, "loss": 0.2984, "step": 27508 }, { "epoch": 2.5815503003003, "grad_norm": 1.2815231990451998, "learning_rate": 5.812983153294455e-07, "loss": 0.3099, "step": 27509 }, { "epoch": 2.581644144144144, "grad_norm": 1.0491316234295893, "learning_rate": 5.810428422227915e-07, "loss": 0.2871, "step": 27510 }, { "epoch": 2.581737987987988, "grad_norm": 2.3451590717687054, "learning_rate": 5.807874218038822e-07, "loss": 0.3026, "step": 27511 }, { "epoch": 2.581831831831832, "grad_norm": 1.1595751530411695, "learning_rate": 5.805320540757664e-07, "loss": 0.3102, "step": 27512 }, { "epoch": 2.5819256756756754, "grad_norm": 1.17028632334697, "learning_rate": 5.802767390414865e-07, "loss": 0.311, "step": 27513 }, { "epoch": 2.5820195195195197, "grad_norm": 1.3007785093387545, "learning_rate": 5.800214767040874e-07, "loss": 0.3508, "step": 27514 }, { "epoch": 2.5821133633633635, "grad_norm": 1.2398267946676456, "learning_rate": 5.797662670666121e-07, "loss": 0.3011, "step": 27515 }, { "epoch": 2.5822072072072073, "grad_norm": 2.1497383967636425, "learning_rate": 5.795111101321044e-07, "loss": 0.3353, "step": 27516 }, { "epoch": 2.582301051051051, "grad_norm": 1.251339356057588, "learning_rate": 5.792560059036046e-07, "loss": 0.2996, "step": 27517 }, { "epoch": 2.582394894894895, "grad_norm": 1.1453787355589802, "learning_rate": 5.790009543841568e-07, "loss": 0.3063, "step": 27518 }, { "epoch": 2.5824887387387387, "grad_norm": 1.0656418028519856, "learning_rate": 5.787459555768004e-07, "loss": 0.3184, "step": 27519 }, { "epoch": 2.5825825825825826, "grad_norm": 1.0740808229267473, "learning_rate": 5.784910094845764e-07, "loss": 0.2961, "step": 27520 }, { "epoch": 2.5826764264264264, "grad_norm": 1.2379250664416028, "learning_rate": 5.782361161105248e-07, "loss": 0.297, "step": 27521 }, { "epoch": 2.58277027027027, "grad_norm": 1.1760532889763033, "learning_rate": 5.779812754576847e-07, "loss": 0.3389, "step": 27522 }, { "epoch": 2.582864114114114, "grad_norm": 1.0961208988169113, "learning_rate": 5.777264875290928e-07, "loss": 0.3328, "step": 27523 }, { "epoch": 2.582957957957958, "grad_norm": 1.0622979317985028, "learning_rate": 5.774717523277901e-07, "loss": 0.3176, "step": 27524 }, { "epoch": 2.583051801801802, "grad_norm": 1.1886204971465186, "learning_rate": 5.772170698568119e-07, "loss": 0.3154, "step": 27525 }, { "epoch": 2.5831456456456454, "grad_norm": 2.3655135846843867, "learning_rate": 5.769624401191948e-07, "loss": 0.2917, "step": 27526 }, { "epoch": 2.5832394894894897, "grad_norm": 1.0322814466624004, "learning_rate": 5.767078631179757e-07, "loss": 0.3287, "step": 27527 }, { "epoch": 2.5833333333333335, "grad_norm": 1.0067379214675005, "learning_rate": 5.764533388561888e-07, "loss": 0.3062, "step": 27528 }, { "epoch": 2.5834271771771773, "grad_norm": 1.0513093581704784, "learning_rate": 5.761988673368685e-07, "loss": 0.3431, "step": 27529 }, { "epoch": 2.583521021021021, "grad_norm": 1.4545238806728824, "learning_rate": 5.759444485630511e-07, "loss": 0.2911, "step": 27530 }, { "epoch": 2.583614864864865, "grad_norm": 1.25289331103755, "learning_rate": 5.756900825377687e-07, "loss": 0.3274, "step": 27531 }, { "epoch": 2.5837087087087087, "grad_norm": 1.1267938404176343, "learning_rate": 5.754357692640528e-07, "loss": 0.2707, "step": 27532 }, { "epoch": 2.5838025525525525, "grad_norm": 1.1067954842156127, "learning_rate": 5.75181508744938e-07, "loss": 0.3391, "step": 27533 }, { "epoch": 2.5838963963963963, "grad_norm": 1.210283607187558, "learning_rate": 5.749273009834549e-07, "loss": 0.3531, "step": 27534 }, { "epoch": 2.58399024024024, "grad_norm": 1.046762013801929, "learning_rate": 5.746731459826333e-07, "loss": 0.3734, "step": 27535 }, { "epoch": 2.584084084084084, "grad_norm": 1.3911547862041718, "learning_rate": 5.744190437455061e-07, "loss": 0.3281, "step": 27536 }, { "epoch": 2.5841779279279278, "grad_norm": 1.433744163293227, "learning_rate": 5.74164994275101e-07, "loss": 0.3107, "step": 27537 }, { "epoch": 2.584271771771772, "grad_norm": 1.1360063413801282, "learning_rate": 5.739109975744478e-07, "loss": 0.3205, "step": 27538 }, { "epoch": 2.5843656156156154, "grad_norm": 1.0934608525486638, "learning_rate": 5.736570536465746e-07, "loss": 0.2962, "step": 27539 }, { "epoch": 2.5844594594594597, "grad_norm": 1.0843644608584186, "learning_rate": 5.734031624945091e-07, "loss": 0.2975, "step": 27540 }, { "epoch": 2.5845533033033035, "grad_norm": 0.9180348810101588, "learning_rate": 5.731493241212782e-07, "loss": 0.3359, "step": 27541 }, { "epoch": 2.5846471471471473, "grad_norm": 0.8839103273368527, "learning_rate": 5.728955385299095e-07, "loss": 0.3364, "step": 27542 }, { "epoch": 2.584740990990991, "grad_norm": 1.0849831204066693, "learning_rate": 5.726418057234285e-07, "loss": 0.2815, "step": 27543 }, { "epoch": 2.584834834834835, "grad_norm": 1.0673095769472614, "learning_rate": 5.723881257048591e-07, "loss": 0.3257, "step": 27544 }, { "epoch": 2.5849286786786787, "grad_norm": 1.042660324556577, "learning_rate": 5.721344984772287e-07, "loss": 0.2954, "step": 27545 }, { "epoch": 2.5850225225225225, "grad_norm": 1.356562314486363, "learning_rate": 5.718809240435591e-07, "loss": 0.2794, "step": 27546 }, { "epoch": 2.5851163663663663, "grad_norm": 1.552688258664921, "learning_rate": 5.716274024068741e-07, "loss": 0.2743, "step": 27547 }, { "epoch": 2.58521021021021, "grad_norm": 1.1615095489301575, "learning_rate": 5.713739335701973e-07, "loss": 0.2869, "step": 27548 }, { "epoch": 2.585304054054054, "grad_norm": 1.1655176119327553, "learning_rate": 5.711205175365503e-07, "loss": 0.3092, "step": 27549 }, { "epoch": 2.5853978978978978, "grad_norm": 1.1651883876956655, "learning_rate": 5.70867154308955e-07, "loss": 0.2743, "step": 27550 }, { "epoch": 2.585491741741742, "grad_norm": 1.1061853216128184, "learning_rate": 5.706138438904319e-07, "loss": 0.2987, "step": 27551 }, { "epoch": 2.5855855855855854, "grad_norm": 1.1904205898761866, "learning_rate": 5.703605862840006e-07, "loss": 0.3215, "step": 27552 }, { "epoch": 2.5856794294294296, "grad_norm": 1.3404121141262622, "learning_rate": 5.701073814926811e-07, "loss": 0.2796, "step": 27553 }, { "epoch": 2.585773273273273, "grad_norm": 1.0883436684440235, "learning_rate": 5.698542295194931e-07, "loss": 0.3163, "step": 27554 }, { "epoch": 2.5858671171171173, "grad_norm": 1.1194553796911155, "learning_rate": 5.696011303674547e-07, "loss": 0.2557, "step": 27555 }, { "epoch": 2.585960960960961, "grad_norm": 1.4899574603967032, "learning_rate": 5.693480840395826e-07, "loss": 0.329, "step": 27556 }, { "epoch": 2.586054804804805, "grad_norm": 1.146624877869727, "learning_rate": 5.690950905388959e-07, "loss": 0.329, "step": 27557 }, { "epoch": 2.5861486486486487, "grad_norm": 1.1489842812777529, "learning_rate": 5.688421498684094e-07, "loss": 0.2998, "step": 27558 }, { "epoch": 2.5862424924924925, "grad_norm": 1.0783331847121982, "learning_rate": 5.68589262031139e-07, "loss": 0.317, "step": 27559 }, { "epoch": 2.5863363363363363, "grad_norm": 1.224950481622873, "learning_rate": 5.683364270301011e-07, "loss": 0.3167, "step": 27560 }, { "epoch": 2.58643018018018, "grad_norm": 1.0987307231961432, "learning_rate": 5.680836448683096e-07, "loss": 0.2634, "step": 27561 }, { "epoch": 2.586524024024024, "grad_norm": 1.2978582130413507, "learning_rate": 5.678309155487783e-07, "loss": 0.2911, "step": 27562 }, { "epoch": 2.5866178678678677, "grad_norm": 1.24382385706058, "learning_rate": 5.67578239074521e-07, "loss": 0.3216, "step": 27563 }, { "epoch": 2.5867117117117115, "grad_norm": 1.2663021955544504, "learning_rate": 5.673256154485496e-07, "loss": 0.287, "step": 27564 }, { "epoch": 2.5868055555555554, "grad_norm": 1.1221743462210625, "learning_rate": 5.670730446738759e-07, "loss": 0.2974, "step": 27565 }, { "epoch": 2.5868993993993996, "grad_norm": 1.0199527865246452, "learning_rate": 5.668205267535126e-07, "loss": 0.3366, "step": 27566 }, { "epoch": 2.586993243243243, "grad_norm": 0.9828330972755331, "learning_rate": 5.665680616904706e-07, "loss": 0.299, "step": 27567 }, { "epoch": 2.5870870870870872, "grad_norm": 1.078643792028083, "learning_rate": 5.663156494877576e-07, "loss": 0.2673, "step": 27568 }, { "epoch": 2.587180930930931, "grad_norm": 1.1428954926601642, "learning_rate": 5.660632901483864e-07, "loss": 0.3303, "step": 27569 }, { "epoch": 2.587274774774775, "grad_norm": 1.0657134381052957, "learning_rate": 5.658109836753644e-07, "loss": 0.3158, "step": 27570 }, { "epoch": 2.5873686186186187, "grad_norm": 2.1052416237132388, "learning_rate": 5.655587300716986e-07, "loss": 0.3183, "step": 27571 }, { "epoch": 2.5874624624624625, "grad_norm": 1.0205135244456347, "learning_rate": 5.653065293403992e-07, "loss": 0.3185, "step": 27572 }, { "epoch": 2.5875563063063063, "grad_norm": 1.0639547135785279, "learning_rate": 5.650543814844723e-07, "loss": 0.3442, "step": 27573 }, { "epoch": 2.58765015015015, "grad_norm": 1.2153311717188007, "learning_rate": 5.648022865069236e-07, "loss": 0.3024, "step": 27574 }, { "epoch": 2.587743993993994, "grad_norm": 1.3097280778936526, "learning_rate": 5.645502444107592e-07, "loss": 0.2971, "step": 27575 }, { "epoch": 2.5878378378378377, "grad_norm": 1.0678370870851226, "learning_rate": 5.642982551989845e-07, "loss": 0.2697, "step": 27576 }, { "epoch": 2.5879316816816815, "grad_norm": 1.1038247094944058, "learning_rate": 5.640463188746026e-07, "loss": 0.2882, "step": 27577 }, { "epoch": 2.5880255255255253, "grad_norm": 1.3438710150082904, "learning_rate": 5.637944354406194e-07, "loss": 0.2953, "step": 27578 }, { "epoch": 2.5881193693693696, "grad_norm": 1.81659711182389, "learning_rate": 5.635426049000376e-07, "loss": 0.2977, "step": 27579 }, { "epoch": 2.588213213213213, "grad_norm": 1.0424970688778366, "learning_rate": 5.632908272558585e-07, "loss": 0.3384, "step": 27580 }, { "epoch": 2.588307057057057, "grad_norm": 1.3507200985233168, "learning_rate": 5.630391025110854e-07, "loss": 0.2695, "step": 27581 }, { "epoch": 2.588400900900901, "grad_norm": 1.0692981929042846, "learning_rate": 5.627874306687198e-07, "loss": 0.326, "step": 27582 }, { "epoch": 2.588494744744745, "grad_norm": 1.1191941239801497, "learning_rate": 5.625358117317609e-07, "loss": 0.3203, "step": 27583 }, { "epoch": 2.5885885885885886, "grad_norm": 1.2596235940300866, "learning_rate": 5.622842457032107e-07, "loss": 0.3532, "step": 27584 }, { "epoch": 2.5886824324324325, "grad_norm": 1.3270482619269726, "learning_rate": 5.620327325860675e-07, "loss": 0.3072, "step": 27585 }, { "epoch": 2.5887762762762763, "grad_norm": 1.2279258574754752, "learning_rate": 5.61781272383331e-07, "loss": 0.2738, "step": 27586 }, { "epoch": 2.58887012012012, "grad_norm": 1.2956446119388163, "learning_rate": 5.615298650979978e-07, "loss": 0.308, "step": 27587 }, { "epoch": 2.588963963963964, "grad_norm": 1.0753447650508652, "learning_rate": 5.612785107330671e-07, "loss": 0.3199, "step": 27588 }, { "epoch": 2.5890578078078077, "grad_norm": 1.0130308250255022, "learning_rate": 5.610272092915336e-07, "loss": 0.3228, "step": 27589 }, { "epoch": 2.5891516516516515, "grad_norm": 1.4500732423197744, "learning_rate": 5.607759607763969e-07, "loss": 0.3171, "step": 27590 }, { "epoch": 2.5892454954954953, "grad_norm": 1.122174059382631, "learning_rate": 5.605247651906504e-07, "loss": 0.3429, "step": 27591 }, { "epoch": 2.5893393393393396, "grad_norm": 1.004933474898424, "learning_rate": 5.602736225372884e-07, "loss": 0.3013, "step": 27592 }, { "epoch": 2.589433183183183, "grad_norm": 1.0412511259043091, "learning_rate": 5.600225328193076e-07, "loss": 0.3305, "step": 27593 }, { "epoch": 2.589527027027027, "grad_norm": 1.103386630193941, "learning_rate": 5.59771496039701e-07, "loss": 0.288, "step": 27594 }, { "epoch": 2.589620870870871, "grad_norm": 1.0224743702053993, "learning_rate": 5.595205122014602e-07, "loss": 0.3113, "step": 27595 }, { "epoch": 2.589714714714715, "grad_norm": 1.0019935559194066, "learning_rate": 5.592695813075805e-07, "loss": 0.3015, "step": 27596 }, { "epoch": 2.5898085585585586, "grad_norm": 1.1846553323040288, "learning_rate": 5.590187033610517e-07, "loss": 0.2845, "step": 27597 }, { "epoch": 2.5899024024024024, "grad_norm": 1.4419679405138222, "learning_rate": 5.587678783648654e-07, "loss": 0.2986, "step": 27598 }, { "epoch": 2.5899962462462462, "grad_norm": 1.3654565354470365, "learning_rate": 5.58517106322013e-07, "loss": 0.2982, "step": 27599 }, { "epoch": 2.59009009009009, "grad_norm": 1.2896356152493438, "learning_rate": 5.582663872354832e-07, "loss": 0.2909, "step": 27600 }, { "epoch": 2.590183933933934, "grad_norm": 1.0679463848933588, "learning_rate": 5.580157211082654e-07, "loss": 0.3099, "step": 27601 }, { "epoch": 2.5902777777777777, "grad_norm": 1.2543064911379258, "learning_rate": 5.577651079433499e-07, "loss": 0.3034, "step": 27602 }, { "epoch": 2.5903716216216215, "grad_norm": 2.013068487474897, "learning_rate": 5.575145477437238e-07, "loss": 0.3614, "step": 27603 }, { "epoch": 2.5904654654654653, "grad_norm": 1.0919996506109437, "learning_rate": 5.572640405123736e-07, "loss": 0.2945, "step": 27604 }, { "epoch": 2.5905593093093096, "grad_norm": 1.288587172904507, "learning_rate": 5.570135862522885e-07, "loss": 0.2674, "step": 27605 }, { "epoch": 2.590653153153153, "grad_norm": 1.3402579915932038, "learning_rate": 5.567631849664529e-07, "loss": 0.2916, "step": 27606 }, { "epoch": 2.590746996996997, "grad_norm": 1.0938456125595568, "learning_rate": 5.565128366578515e-07, "loss": 0.3067, "step": 27607 }, { "epoch": 2.590840840840841, "grad_norm": 1.0382575589564338, "learning_rate": 5.56262541329472e-07, "loss": 0.3132, "step": 27608 }, { "epoch": 2.590934684684685, "grad_norm": 1.6492311176892598, "learning_rate": 5.560122989842976e-07, "loss": 0.3379, "step": 27609 }, { "epoch": 2.5910285285285286, "grad_norm": 1.1292113323075184, "learning_rate": 5.557621096253108e-07, "loss": 0.3028, "step": 27610 }, { "epoch": 2.5911223723723724, "grad_norm": 1.0406603478538008, "learning_rate": 5.555119732554959e-07, "loss": 0.3376, "step": 27611 }, { "epoch": 2.5912162162162162, "grad_norm": 1.1112500751763914, "learning_rate": 5.552618898778345e-07, "loss": 0.2915, "step": 27612 }, { "epoch": 2.59131006006006, "grad_norm": 1.2344951625792846, "learning_rate": 5.55011859495308e-07, "loss": 0.3363, "step": 27613 }, { "epoch": 2.591403903903904, "grad_norm": 1.0370520529792802, "learning_rate": 5.547618821108991e-07, "loss": 0.3027, "step": 27614 }, { "epoch": 2.5914977477477477, "grad_norm": 1.1225601418485285, "learning_rate": 5.54511957727587e-07, "loss": 0.3085, "step": 27615 }, { "epoch": 2.5915915915915915, "grad_norm": 1.060899394851762, "learning_rate": 5.542620863483517e-07, "loss": 0.2936, "step": 27616 }, { "epoch": 2.5916854354354353, "grad_norm": 1.0958170548115598, "learning_rate": 5.54012267976174e-07, "loss": 0.3307, "step": 27617 }, { "epoch": 2.5917792792792795, "grad_norm": 1.2048109028620373, "learning_rate": 5.537625026140303e-07, "loss": 0.3157, "step": 27618 }, { "epoch": 2.591873123123123, "grad_norm": 1.2070997956342207, "learning_rate": 5.535127902648996e-07, "loss": 0.324, "step": 27619 }, { "epoch": 2.591966966966967, "grad_norm": 1.2225003054373695, "learning_rate": 5.532631309317594e-07, "loss": 0.2991, "step": 27620 }, { "epoch": 2.592060810810811, "grad_norm": 1.062601660651253, "learning_rate": 5.530135246175866e-07, "loss": 0.3189, "step": 27621 }, { "epoch": 2.5921546546546548, "grad_norm": 2.1135505956736536, "learning_rate": 5.527639713253569e-07, "loss": 0.3078, "step": 27622 }, { "epoch": 2.5922484984984986, "grad_norm": 1.0466787600954708, "learning_rate": 5.52514471058046e-07, "loss": 0.2949, "step": 27623 }, { "epoch": 2.5923423423423424, "grad_norm": 1.0773772875480923, "learning_rate": 5.522650238186284e-07, "loss": 0.3353, "step": 27624 }, { "epoch": 2.592436186186186, "grad_norm": 1.0793876914939142, "learning_rate": 5.520156296100771e-07, "loss": 0.3336, "step": 27625 }, { "epoch": 2.59253003003003, "grad_norm": 0.99689890845478, "learning_rate": 5.517662884353681e-07, "loss": 0.3309, "step": 27626 }, { "epoch": 2.592623873873874, "grad_norm": 10.70464663672515, "learning_rate": 5.515170002974735e-07, "loss": 0.2905, "step": 27627 }, { "epoch": 2.5927177177177176, "grad_norm": 1.0411694229959916, "learning_rate": 5.512677651993637e-07, "loss": 0.3264, "step": 27628 }, { "epoch": 2.5928115615615615, "grad_norm": 1.0844463999649323, "learning_rate": 5.510185831440135e-07, "loss": 0.3095, "step": 27629 }, { "epoch": 2.5929054054054053, "grad_norm": 1.9452183532571232, "learning_rate": 5.507694541343922e-07, "loss": 0.2844, "step": 27630 }, { "epoch": 2.5929992492492495, "grad_norm": 1.0652912657444302, "learning_rate": 5.505203781734691e-07, "loss": 0.3266, "step": 27631 }, { "epoch": 2.593093093093093, "grad_norm": 1.11120555880951, "learning_rate": 5.502713552642164e-07, "loss": 0.3114, "step": 27632 }, { "epoch": 2.593186936936937, "grad_norm": 1.2902342132581852, "learning_rate": 5.500223854096015e-07, "loss": 0.3204, "step": 27633 }, { "epoch": 2.5932807807807805, "grad_norm": 1.1248200622484341, "learning_rate": 5.497734686125933e-07, "loss": 0.3121, "step": 27634 }, { "epoch": 2.5933746246246248, "grad_norm": 1.269584145898126, "learning_rate": 5.495246048761605e-07, "loss": 0.3062, "step": 27635 }, { "epoch": 2.5934684684684686, "grad_norm": 1.2163795828913704, "learning_rate": 5.492757942032689e-07, "loss": 0.3168, "step": 27636 }, { "epoch": 2.5935623123123124, "grad_norm": 1.2220576336048052, "learning_rate": 5.49027036596887e-07, "loss": 0.3558, "step": 27637 }, { "epoch": 2.593656156156156, "grad_norm": 1.6002578205361397, "learning_rate": 5.487783320599787e-07, "loss": 0.2963, "step": 27638 }, { "epoch": 2.59375, "grad_norm": 1.210517338227598, "learning_rate": 5.485296805955103e-07, "loss": 0.3648, "step": 27639 }, { "epoch": 2.593843843843844, "grad_norm": 1.140550862964738, "learning_rate": 5.482810822064461e-07, "loss": 0.2883, "step": 27640 }, { "epoch": 2.5939376876876876, "grad_norm": 0.9608932832055713, "learning_rate": 5.480325368957512e-07, "loss": 0.2935, "step": 27641 }, { "epoch": 2.5940315315315314, "grad_norm": 1.2272865738145577, "learning_rate": 5.477840446663879e-07, "loss": 0.2729, "step": 27642 }, { "epoch": 2.5941253753753752, "grad_norm": 1.202157949973227, "learning_rate": 5.475356055213187e-07, "loss": 0.268, "step": 27643 }, { "epoch": 2.5942192192192195, "grad_norm": 1.004518218203975, "learning_rate": 5.472872194635076e-07, "loss": 0.3219, "step": 27644 }, { "epoch": 2.594313063063063, "grad_norm": 1.184471204115503, "learning_rate": 5.470388864959148e-07, "loss": 0.3298, "step": 27645 }, { "epoch": 2.594406906906907, "grad_norm": 1.1420386407367646, "learning_rate": 5.467906066215006e-07, "loss": 0.2837, "step": 27646 }, { "epoch": 2.5945007507507505, "grad_norm": 1.228006378314257, "learning_rate": 5.465423798432273e-07, "loss": 0.2829, "step": 27647 }, { "epoch": 2.5945945945945947, "grad_norm": 1.0675332706845984, "learning_rate": 5.46294206164053e-07, "loss": 0.2756, "step": 27648 }, { "epoch": 2.5946884384384385, "grad_norm": 1.1872496135989608, "learning_rate": 5.460460855869376e-07, "loss": 0.3372, "step": 27649 }, { "epoch": 2.5947822822822824, "grad_norm": 0.9917722356261252, "learning_rate": 5.457980181148386e-07, "loss": 0.2764, "step": 27650 }, { "epoch": 2.594876126126126, "grad_norm": 1.1933590798621558, "learning_rate": 5.455500037507139e-07, "loss": 0.3163, "step": 27651 }, { "epoch": 2.59496996996997, "grad_norm": 0.9879549614764903, "learning_rate": 5.453020424975197e-07, "loss": 0.2873, "step": 27652 }, { "epoch": 2.595063813813814, "grad_norm": 1.222216041180303, "learning_rate": 5.450541343582149e-07, "loss": 0.3228, "step": 27653 }, { "epoch": 2.5951576576576576, "grad_norm": 1.1096968237632836, "learning_rate": 5.448062793357539e-07, "loss": 0.3243, "step": 27654 }, { "epoch": 2.5952515015015014, "grad_norm": 1.2076504413609908, "learning_rate": 5.445584774330909e-07, "loss": 0.2881, "step": 27655 }, { "epoch": 2.5953453453453452, "grad_norm": 1.1160337657238322, "learning_rate": 5.443107286531823e-07, "loss": 0.344, "step": 27656 }, { "epoch": 2.595439189189189, "grad_norm": 1.347563732900699, "learning_rate": 5.440630329989815e-07, "loss": 0.3116, "step": 27657 }, { "epoch": 2.595533033033033, "grad_norm": 1.0899702452326687, "learning_rate": 5.438153904734406e-07, "loss": 0.3053, "step": 27658 }, { "epoch": 2.595626876876877, "grad_norm": 1.9727226482222369, "learning_rate": 5.435678010795142e-07, "loss": 0.3328, "step": 27659 }, { "epoch": 2.5957207207207205, "grad_norm": 1.161162747560495, "learning_rate": 5.433202648201535e-07, "loss": 0.3412, "step": 27660 }, { "epoch": 2.5958145645645647, "grad_norm": 1.2724017753341998, "learning_rate": 5.4307278169831e-07, "loss": 0.3014, "step": 27661 }, { "epoch": 2.5959084084084085, "grad_norm": 1.3147449896625834, "learning_rate": 5.428253517169341e-07, "loss": 0.3028, "step": 27662 }, { "epoch": 2.5960022522522523, "grad_norm": 1.052612015456598, "learning_rate": 5.425779748789756e-07, "loss": 0.3231, "step": 27663 }, { "epoch": 2.596096096096096, "grad_norm": 1.1165145314893383, "learning_rate": 5.423306511873843e-07, "loss": 0.342, "step": 27664 }, { "epoch": 2.59618993993994, "grad_norm": 1.0090294546770633, "learning_rate": 5.420833806451097e-07, "loss": 0.3234, "step": 27665 }, { "epoch": 2.5962837837837838, "grad_norm": 1.0358863592025376, "learning_rate": 5.418361632551e-07, "loss": 0.2875, "step": 27666 }, { "epoch": 2.5963776276276276, "grad_norm": 1.2020707246023203, "learning_rate": 5.415889990203016e-07, "loss": 0.3011, "step": 27667 }, { "epoch": 2.5964714714714714, "grad_norm": 1.1767659794387055, "learning_rate": 5.413418879436627e-07, "loss": 0.3053, "step": 27668 }, { "epoch": 2.596565315315315, "grad_norm": 1.1298480548798098, "learning_rate": 5.410948300281293e-07, "loss": 0.2856, "step": 27669 }, { "epoch": 2.596659159159159, "grad_norm": 1.146189995629039, "learning_rate": 5.408478252766464e-07, "loss": 0.317, "step": 27670 }, { "epoch": 2.596753003003003, "grad_norm": 1.1640915818405124, "learning_rate": 5.406008736921609e-07, "loss": 0.3003, "step": 27671 }, { "epoch": 2.596846846846847, "grad_norm": 1.225346905093897, "learning_rate": 5.403539752776154e-07, "loss": 0.332, "step": 27672 }, { "epoch": 2.5969406906906904, "grad_norm": 1.1919573409986404, "learning_rate": 5.401071300359545e-07, "loss": 0.3417, "step": 27673 }, { "epoch": 2.5970345345345347, "grad_norm": 1.1206294958691667, "learning_rate": 5.398603379701212e-07, "loss": 0.3202, "step": 27674 }, { "epoch": 2.5971283783783785, "grad_norm": 1.0865364249045044, "learning_rate": 5.396135990830581e-07, "loss": 0.2726, "step": 27675 }, { "epoch": 2.5972222222222223, "grad_norm": 2.836509455912566, "learning_rate": 5.393669133777063e-07, "loss": 0.3303, "step": 27676 }, { "epoch": 2.597316066066066, "grad_norm": 1.5763242307695489, "learning_rate": 5.391202808570084e-07, "loss": 0.3955, "step": 27677 }, { "epoch": 2.59740990990991, "grad_norm": 1.1834344482439016, "learning_rate": 5.388737015239043e-07, "loss": 0.2904, "step": 27678 }, { "epoch": 2.5975037537537538, "grad_norm": 2.5272081240004276, "learning_rate": 5.386271753813332e-07, "loss": 0.2934, "step": 27679 }, { "epoch": 2.5975975975975976, "grad_norm": 1.183339919433598, "learning_rate": 5.383807024322369e-07, "loss": 0.227, "step": 27680 }, { "epoch": 2.5976914414414414, "grad_norm": 1.080669058134158, "learning_rate": 5.381342826795521e-07, "loss": 0.2853, "step": 27681 }, { "epoch": 2.597785285285285, "grad_norm": 1.4060554663786728, "learning_rate": 5.378879161262162e-07, "loss": 0.2854, "step": 27682 }, { "epoch": 2.597879129129129, "grad_norm": 1.1913567177222928, "learning_rate": 5.376416027751691e-07, "loss": 0.3126, "step": 27683 }, { "epoch": 2.597972972972973, "grad_norm": 0.9982580203114784, "learning_rate": 5.373953426293465e-07, "loss": 0.323, "step": 27684 }, { "epoch": 2.598066816816817, "grad_norm": 1.0426001462663472, "learning_rate": 5.371491356916842e-07, "loss": 0.305, "step": 27685 }, { "epoch": 2.5981606606606604, "grad_norm": 1.290940687806986, "learning_rate": 5.369029819651184e-07, "loss": 0.3135, "step": 27686 }, { "epoch": 2.5982545045045047, "grad_norm": 1.007992169411238, "learning_rate": 5.36656881452583e-07, "loss": 0.3204, "step": 27687 }, { "epoch": 2.5983483483483485, "grad_norm": 1.0210093517185823, "learning_rate": 5.364108341570124e-07, "loss": 0.3147, "step": 27688 }, { "epoch": 2.5984421921921923, "grad_norm": 1.1335912680175295, "learning_rate": 5.361648400813413e-07, "loss": 0.3505, "step": 27689 }, { "epoch": 2.598536036036036, "grad_norm": 0.9772557433863205, "learning_rate": 5.359188992285025e-07, "loss": 0.3143, "step": 27690 }, { "epoch": 2.59862987987988, "grad_norm": 1.0495656177248773, "learning_rate": 5.356730116014269e-07, "loss": 0.3105, "step": 27691 }, { "epoch": 2.5987237237237237, "grad_norm": 1.20641967074651, "learning_rate": 5.354271772030489e-07, "loss": 0.353, "step": 27692 }, { "epoch": 2.5988175675675675, "grad_norm": 1.9893804120674736, "learning_rate": 5.351813960362973e-07, "loss": 0.3325, "step": 27693 }, { "epoch": 2.5989114114114114, "grad_norm": 1.1290037394277486, "learning_rate": 5.34935668104103e-07, "loss": 0.3168, "step": 27694 }, { "epoch": 2.599005255255255, "grad_norm": 1.5491743076933269, "learning_rate": 5.346899934093969e-07, "loss": 0.284, "step": 27695 }, { "epoch": 2.599099099099099, "grad_norm": 1.3868973982175834, "learning_rate": 5.344443719551079e-07, "loss": 0.3143, "step": 27696 }, { "epoch": 2.599192942942943, "grad_norm": 1.0039886007812733, "learning_rate": 5.341988037441642e-07, "loss": 0.3173, "step": 27697 }, { "epoch": 2.599286786786787, "grad_norm": 1.0584580534394552, "learning_rate": 5.339532887794935e-07, "loss": 0.3032, "step": 27698 }, { "epoch": 2.5993806306306304, "grad_norm": 1.1533738546021959, "learning_rate": 5.337078270640234e-07, "loss": 0.3275, "step": 27699 }, { "epoch": 2.5994744744744747, "grad_norm": 1.2604154819177469, "learning_rate": 5.334624186006799e-07, "loss": 0.3332, "step": 27700 }, { "epoch": 2.5995683183183185, "grad_norm": 1.0904638216985059, "learning_rate": 5.3321706339239e-07, "loss": 0.3209, "step": 27701 }, { "epoch": 2.5996621621621623, "grad_norm": 1.624891203092066, "learning_rate": 5.329717614420793e-07, "loss": 0.2869, "step": 27702 }, { "epoch": 2.599756006006006, "grad_norm": 1.1236685743881696, "learning_rate": 5.32726512752671e-07, "loss": 0.2924, "step": 27703 }, { "epoch": 2.59984984984985, "grad_norm": 1.2328988048334604, "learning_rate": 5.32481317327091e-07, "loss": 0.3543, "step": 27704 }, { "epoch": 2.5999436936936937, "grad_norm": 0.972252562096267, "learning_rate": 5.322361751682619e-07, "loss": 0.3077, "step": 27705 }, { "epoch": 2.6000375375375375, "grad_norm": 0.9973979544113631, "learning_rate": 5.319910862791062e-07, "loss": 0.3105, "step": 27706 }, { "epoch": 2.6001313813813813, "grad_norm": 1.6155189235100593, "learning_rate": 5.317460506625472e-07, "loss": 0.3646, "step": 27707 }, { "epoch": 2.600225225225225, "grad_norm": 1.0995438402966176, "learning_rate": 5.31501068321506e-07, "loss": 0.3089, "step": 27708 }, { "epoch": 2.600319069069069, "grad_norm": 1.0115924915163612, "learning_rate": 5.312561392589038e-07, "loss": 0.3312, "step": 27709 }, { "epoch": 2.6004129129129128, "grad_norm": 1.198637042825267, "learning_rate": 5.310112634776604e-07, "loss": 0.2742, "step": 27710 }, { "epoch": 2.600506756756757, "grad_norm": 1.231814031523242, "learning_rate": 5.307664409806956e-07, "loss": 0.2436, "step": 27711 }, { "epoch": 2.6006006006006004, "grad_norm": 1.1332391155927004, "learning_rate": 5.305216717709278e-07, "loss": 0.3046, "step": 27712 }, { "epoch": 2.6006944444444446, "grad_norm": 1.236708927720521, "learning_rate": 5.302769558512766e-07, "loss": 0.3071, "step": 27713 }, { "epoch": 2.6007882882882885, "grad_norm": 1.189316642093057, "learning_rate": 5.300322932246599e-07, "loss": 0.3118, "step": 27714 }, { "epoch": 2.6008821321321323, "grad_norm": 0.9485961248199624, "learning_rate": 5.297876838939931e-07, "loss": 0.3278, "step": 27715 }, { "epoch": 2.600975975975976, "grad_norm": 1.134546597510466, "learning_rate": 5.295431278621949e-07, "loss": 0.2992, "step": 27716 }, { "epoch": 2.60106981981982, "grad_norm": 1.16228046372232, "learning_rate": 5.292986251321796e-07, "loss": 0.3232, "step": 27717 }, { "epoch": 2.6011636636636637, "grad_norm": 1.0750724181511315, "learning_rate": 5.290541757068624e-07, "loss": 0.3291, "step": 27718 }, { "epoch": 2.6012575075075075, "grad_norm": 1.098877292565786, "learning_rate": 5.288097795891595e-07, "loss": 0.322, "step": 27719 }, { "epoch": 2.6013513513513513, "grad_norm": 1.0550312105614776, "learning_rate": 5.285654367819837e-07, "loss": 0.3264, "step": 27720 }, { "epoch": 2.601445195195195, "grad_norm": 2.4162463437100503, "learning_rate": 5.283211472882487e-07, "loss": 0.3227, "step": 27721 }, { "epoch": 2.601539039039039, "grad_norm": 1.0531488306131627, "learning_rate": 5.280769111108669e-07, "loss": 0.3357, "step": 27722 }, { "epoch": 2.6016328828828827, "grad_norm": 1.2792241807799933, "learning_rate": 5.278327282527502e-07, "loss": 0.3215, "step": 27723 }, { "epoch": 2.601726726726727, "grad_norm": 1.1228452071840862, "learning_rate": 5.275885987168089e-07, "loss": 0.3218, "step": 27724 }, { "epoch": 2.6018205705705704, "grad_norm": 1.945506507694668, "learning_rate": 5.273445225059564e-07, "loss": 0.2905, "step": 27725 }, { "epoch": 2.6019144144144146, "grad_norm": 1.1226006853121926, "learning_rate": 5.271004996231021e-07, "loss": 0.3183, "step": 27726 }, { "epoch": 2.602008258258258, "grad_norm": 1.1426304832763086, "learning_rate": 5.268565300711532e-07, "loss": 0.31, "step": 27727 }, { "epoch": 2.6021021021021022, "grad_norm": 1.099947537344518, "learning_rate": 5.26612613853022e-07, "loss": 0.2866, "step": 27728 }, { "epoch": 2.602195945945946, "grad_norm": 1.043356796448041, "learning_rate": 5.263687509716148e-07, "loss": 0.2725, "step": 27729 }, { "epoch": 2.60228978978979, "grad_norm": 1.1639038656916496, "learning_rate": 5.261249414298381e-07, "loss": 0.3045, "step": 27730 }, { "epoch": 2.6023836336336337, "grad_norm": 0.9680203076352756, "learning_rate": 5.258811852306017e-07, "loss": 0.322, "step": 27731 }, { "epoch": 2.6024774774774775, "grad_norm": 1.091378842136182, "learning_rate": 5.256374823768107e-07, "loss": 0.3002, "step": 27732 }, { "epoch": 2.6025713213213213, "grad_norm": 1.129061887882372, "learning_rate": 5.253938328713704e-07, "loss": 0.3271, "step": 27733 }, { "epoch": 2.602665165165165, "grad_norm": 1.1355392132222863, "learning_rate": 5.251502367171857e-07, "loss": 0.3564, "step": 27734 }, { "epoch": 2.602759009009009, "grad_norm": 1.1207928283116502, "learning_rate": 5.249066939171621e-07, "loss": 0.3029, "step": 27735 }, { "epoch": 2.6028528528528527, "grad_norm": 0.9342490837050866, "learning_rate": 5.246632044742012e-07, "loss": 0.2773, "step": 27736 }, { "epoch": 2.6029466966966965, "grad_norm": 1.3078666037030349, "learning_rate": 5.244197683912083e-07, "loss": 0.264, "step": 27737 }, { "epoch": 2.6030405405405403, "grad_norm": 1.1106655672875752, "learning_rate": 5.241763856710858e-07, "loss": 0.2664, "step": 27738 }, { "epoch": 2.6031343843843846, "grad_norm": 1.047839216383118, "learning_rate": 5.239330563167338e-07, "loss": 0.302, "step": 27739 }, { "epoch": 2.603228228228228, "grad_norm": 1.1021819948370872, "learning_rate": 5.236897803310559e-07, "loss": 0.3591, "step": 27740 }, { "epoch": 2.6033220720720722, "grad_norm": 1.3395661202098965, "learning_rate": 5.234465577169517e-07, "loss": 0.2948, "step": 27741 }, { "epoch": 2.603415915915916, "grad_norm": 1.0279105286166104, "learning_rate": 5.232033884773203e-07, "loss": 0.2965, "step": 27742 }, { "epoch": 2.60350975975976, "grad_norm": 1.0515818226142313, "learning_rate": 5.229602726150618e-07, "loss": 0.304, "step": 27743 }, { "epoch": 2.6036036036036037, "grad_norm": 1.0730874365343523, "learning_rate": 5.227172101330757e-07, "loss": 0.2982, "step": 27744 }, { "epoch": 2.6036974474474475, "grad_norm": 1.0600543641584665, "learning_rate": 5.224742010342593e-07, "loss": 0.3262, "step": 27745 }, { "epoch": 2.6037912912912913, "grad_norm": 1.1340830841417937, "learning_rate": 5.222312453215095e-07, "loss": 0.3204, "step": 27746 }, { "epoch": 2.603885135135135, "grad_norm": 1.0843740109992883, "learning_rate": 5.219883429977235e-07, "loss": 0.3074, "step": 27747 }, { "epoch": 2.603978978978979, "grad_norm": 0.9813627192627026, "learning_rate": 5.217454940657968e-07, "loss": 0.3049, "step": 27748 }, { "epoch": 2.6040728228228227, "grad_norm": 1.0945013732271058, "learning_rate": 5.21502698528627e-07, "loss": 0.3009, "step": 27749 }, { "epoch": 2.6041666666666665, "grad_norm": 1.1101132651236447, "learning_rate": 5.212599563891069e-07, "loss": 0.2481, "step": 27750 }, { "epoch": 2.6042605105105103, "grad_norm": 1.1437154413636443, "learning_rate": 5.210172676501307e-07, "loss": 0.294, "step": 27751 }, { "epoch": 2.6043543543543546, "grad_norm": 1.827381603974264, "learning_rate": 5.20774632314594e-07, "loss": 0.3502, "step": 27752 }, { "epoch": 2.604448198198198, "grad_norm": 1.2649546963269664, "learning_rate": 5.205320503853884e-07, "loss": 0.306, "step": 27753 }, { "epoch": 2.604542042042042, "grad_norm": 1.1512198676817984, "learning_rate": 5.202895218654052e-07, "loss": 0.3226, "step": 27754 }, { "epoch": 2.604635885885886, "grad_norm": 1.073299769374193, "learning_rate": 5.200470467575386e-07, "loss": 0.3199, "step": 27755 }, { "epoch": 2.60472972972973, "grad_norm": 0.9886261818961346, "learning_rate": 5.198046250646782e-07, "loss": 0.3528, "step": 27756 }, { "epoch": 2.6048235735735736, "grad_norm": 1.1029017959958207, "learning_rate": 5.195622567897135e-07, "loss": 0.3162, "step": 27757 }, { "epoch": 2.6049174174174174, "grad_norm": 1.246618782381246, "learning_rate": 5.193199419355366e-07, "loss": 0.2796, "step": 27758 }, { "epoch": 2.6050112612612613, "grad_norm": 1.8346891601645106, "learning_rate": 5.190776805050357e-07, "loss": 0.3195, "step": 27759 }, { "epoch": 2.605105105105105, "grad_norm": 1.0418782549469574, "learning_rate": 5.188354725010974e-07, "loss": 0.375, "step": 27760 }, { "epoch": 2.605198948948949, "grad_norm": 1.2173444908743853, "learning_rate": 5.185933179266122e-07, "loss": 0.3352, "step": 27761 }, { "epoch": 2.6052927927927927, "grad_norm": 1.3433047399291975, "learning_rate": 5.183512167844668e-07, "loss": 0.2889, "step": 27762 }, { "epoch": 2.6053866366366365, "grad_norm": 1.05307712418368, "learning_rate": 5.181091690775453e-07, "loss": 0.2869, "step": 27763 }, { "epoch": 2.6054804804804803, "grad_norm": 1.138021399972759, "learning_rate": 5.178671748087372e-07, "loss": 0.3606, "step": 27764 }, { "epoch": 2.6055743243243246, "grad_norm": 1.1262189336041206, "learning_rate": 5.176252339809263e-07, "loss": 0.3123, "step": 27765 }, { "epoch": 2.605668168168168, "grad_norm": 1.2328171914482502, "learning_rate": 5.173833465969963e-07, "loss": 0.319, "step": 27766 }, { "epoch": 2.605762012012012, "grad_norm": 1.0437140522540662, "learning_rate": 5.171415126598334e-07, "loss": 0.286, "step": 27767 }, { "epoch": 2.605855855855856, "grad_norm": 1.0824779911896565, "learning_rate": 5.168997321723196e-07, "loss": 0.3279, "step": 27768 }, { "epoch": 2.6059496996997, "grad_norm": 0.9605604137415193, "learning_rate": 5.166580051373365e-07, "loss": 0.2903, "step": 27769 }, { "epoch": 2.6060435435435436, "grad_norm": 1.205909449519436, "learning_rate": 5.164163315577692e-07, "loss": 0.3132, "step": 27770 }, { "epoch": 2.6061373873873874, "grad_norm": 1.2034000781618737, "learning_rate": 5.161747114364973e-07, "loss": 0.3508, "step": 27771 }, { "epoch": 2.6062312312312312, "grad_norm": 1.276946293837834, "learning_rate": 5.159331447764026e-07, "loss": 0.3007, "step": 27772 }, { "epoch": 2.606325075075075, "grad_norm": 0.938849061169814, "learning_rate": 5.156916315803646e-07, "loss": 0.34, "step": 27773 }, { "epoch": 2.606418918918919, "grad_norm": 1.0345261038734097, "learning_rate": 5.154501718512628e-07, "loss": 0.3322, "step": 27774 }, { "epoch": 2.6065127627627627, "grad_norm": 3.6964483784606084, "learning_rate": 5.152087655919752e-07, "loss": 0.2893, "step": 27775 }, { "epoch": 2.6066066066066065, "grad_norm": 1.226107093389688, "learning_rate": 5.149674128053827e-07, "loss": 0.3161, "step": 27776 }, { "epoch": 2.6067004504504503, "grad_norm": 1.047412746077945, "learning_rate": 5.147261134943616e-07, "loss": 0.2953, "step": 27777 }, { "epoch": 2.6067942942942945, "grad_norm": 1.23941416214647, "learning_rate": 5.144848676617881e-07, "loss": 0.2934, "step": 27778 }, { "epoch": 2.606888138138138, "grad_norm": 1.3169760548262495, "learning_rate": 5.142436753105401e-07, "loss": 0.2928, "step": 27779 }, { "epoch": 2.606981981981982, "grad_norm": 1.3125763552061378, "learning_rate": 5.140025364434925e-07, "loss": 0.3268, "step": 27780 }, { "epoch": 2.607075825825826, "grad_norm": 1.0560058885283952, "learning_rate": 5.137614510635203e-07, "loss": 0.3321, "step": 27781 }, { "epoch": 2.60716966966967, "grad_norm": 0.9939421822797433, "learning_rate": 5.135204191734988e-07, "loss": 0.2957, "step": 27782 }, { "epoch": 2.6072635135135136, "grad_norm": 1.1630827579336764, "learning_rate": 5.132794407763014e-07, "loss": 0.295, "step": 27783 }, { "epoch": 2.6073573573573574, "grad_norm": 1.2304137414846437, "learning_rate": 5.130385158748014e-07, "loss": 0.284, "step": 27784 }, { "epoch": 2.607451201201201, "grad_norm": 1.180874391818112, "learning_rate": 5.127976444718713e-07, "loss": 0.3327, "step": 27785 }, { "epoch": 2.607545045045045, "grad_norm": 2.646847520720903, "learning_rate": 5.125568265703828e-07, "loss": 0.2887, "step": 27786 }, { "epoch": 2.607638888888889, "grad_norm": 1.8546681373079807, "learning_rate": 5.123160621732071e-07, "loss": 0.3216, "step": 27787 }, { "epoch": 2.6077327327327327, "grad_norm": 1.0899684325830679, "learning_rate": 5.120753512832155e-07, "loss": 0.2897, "step": 27788 }, { "epoch": 2.6078265765765765, "grad_norm": 1.5849389206840792, "learning_rate": 5.118346939032776e-07, "loss": 0.3012, "step": 27789 }, { "epoch": 2.6079204204204203, "grad_norm": 1.5937047749818576, "learning_rate": 5.115940900362621e-07, "loss": 0.3193, "step": 27790 }, { "epoch": 2.6080142642642645, "grad_norm": 1.247874791121881, "learning_rate": 5.113535396850394e-07, "loss": 0.2892, "step": 27791 }, { "epoch": 2.608108108108108, "grad_norm": 1.234187075363536, "learning_rate": 5.111130428524769e-07, "loss": 0.3632, "step": 27792 }, { "epoch": 2.608201951951952, "grad_norm": 1.091593611078443, "learning_rate": 5.10872599541441e-07, "loss": 0.3116, "step": 27793 }, { "epoch": 2.608295795795796, "grad_norm": 1.180017345882519, "learning_rate": 5.106322097548006e-07, "loss": 0.2462, "step": 27794 }, { "epoch": 2.6083896396396398, "grad_norm": 0.9802105391736605, "learning_rate": 5.1039187349542e-07, "loss": 0.3007, "step": 27795 }, { "epoch": 2.6084834834834836, "grad_norm": 1.138049020151366, "learning_rate": 5.101515907661658e-07, "loss": 0.2891, "step": 27796 }, { "epoch": 2.6085773273273274, "grad_norm": 1.808329755181149, "learning_rate": 5.09911361569903e-07, "loss": 0.3102, "step": 27797 }, { "epoch": 2.608671171171171, "grad_norm": 1.069412949816256, "learning_rate": 5.096711859094949e-07, "loss": 0.3044, "step": 27798 }, { "epoch": 2.608765015015015, "grad_norm": 1.2038328012909376, "learning_rate": 5.094310637878053e-07, "loss": 0.2841, "step": 27799 }, { "epoch": 2.608858858858859, "grad_norm": 1.1275608587495076, "learning_rate": 5.091909952076984e-07, "loss": 0.287, "step": 27800 }, { "epoch": 2.6089527027027026, "grad_norm": 1.5814271197955203, "learning_rate": 5.089509801720354e-07, "loss": 0.3075, "step": 27801 }, { "epoch": 2.6090465465465464, "grad_norm": 0.9698439845320064, "learning_rate": 5.08711018683678e-07, "loss": 0.3003, "step": 27802 }, { "epoch": 2.6091403903903903, "grad_norm": 1.1386861174642966, "learning_rate": 5.084711107454887e-07, "loss": 0.3441, "step": 27803 }, { "epoch": 2.6092342342342345, "grad_norm": 1.3767255268074894, "learning_rate": 5.08231256360327e-07, "loss": 0.3268, "step": 27804 }, { "epoch": 2.609328078078078, "grad_norm": 1.2443749799979462, "learning_rate": 5.079914555310511e-07, "loss": 0.3209, "step": 27805 }, { "epoch": 2.609421921921922, "grad_norm": 1.026877369564644, "learning_rate": 5.077517082605232e-07, "loss": 0.2883, "step": 27806 }, { "epoch": 2.6095157657657655, "grad_norm": 1.038462539405323, "learning_rate": 5.075120145516005e-07, "loss": 0.2942, "step": 27807 }, { "epoch": 2.6096096096096097, "grad_norm": 1.1995458835722634, "learning_rate": 5.072723744071406e-07, "loss": 0.2888, "step": 27808 }, { "epoch": 2.6097034534534536, "grad_norm": 1.092548937159835, "learning_rate": 5.070327878300007e-07, "loss": 0.2869, "step": 27809 }, { "epoch": 2.6097972972972974, "grad_norm": 1.2111641359749665, "learning_rate": 5.067932548230381e-07, "loss": 0.2764, "step": 27810 }, { "epoch": 2.609891141141141, "grad_norm": 1.06644375690389, "learning_rate": 5.06553775389107e-07, "loss": 0.3536, "step": 27811 }, { "epoch": 2.609984984984985, "grad_norm": 1.2676722478365032, "learning_rate": 5.063143495310657e-07, "loss": 0.3539, "step": 27812 }, { "epoch": 2.610078828828829, "grad_norm": 1.130494890348577, "learning_rate": 5.060749772517664e-07, "loss": 0.3045, "step": 27813 }, { "epoch": 2.6101726726726726, "grad_norm": 1.315625790970672, "learning_rate": 5.058356585540636e-07, "loss": 0.3235, "step": 27814 }, { "epoch": 2.6102665165165164, "grad_norm": 1.029791669520072, "learning_rate": 5.055963934408121e-07, "loss": 0.2861, "step": 27815 }, { "epoch": 2.6103603603603602, "grad_norm": 0.9245891908640573, "learning_rate": 5.053571819148639e-07, "loss": 0.3244, "step": 27816 }, { "epoch": 2.610454204204204, "grad_norm": 1.4141393365371784, "learning_rate": 5.051180239790698e-07, "loss": 0.3226, "step": 27817 }, { "epoch": 2.610548048048048, "grad_norm": 1.1803665602119298, "learning_rate": 5.048789196362835e-07, "loss": 0.3158, "step": 27818 }, { "epoch": 2.610641891891892, "grad_norm": 1.101931550048038, "learning_rate": 5.04639868889355e-07, "loss": 0.3322, "step": 27819 }, { "epoch": 2.6107357357357355, "grad_norm": 0.992521064395895, "learning_rate": 5.044008717411342e-07, "loss": 0.3124, "step": 27820 }, { "epoch": 2.6108295795795797, "grad_norm": 2.6389054355716195, "learning_rate": 5.04161928194471e-07, "loss": 0.3128, "step": 27821 }, { "epoch": 2.6109234234234235, "grad_norm": 1.1532466824532248, "learning_rate": 5.039230382522147e-07, "loss": 0.3051, "step": 27822 }, { "epoch": 2.6110172672672673, "grad_norm": 1.0538977131392622, "learning_rate": 5.036842019172117e-07, "loss": 0.2929, "step": 27823 }, { "epoch": 2.611111111111111, "grad_norm": 1.1180883500510543, "learning_rate": 5.034454191923122e-07, "loss": 0.2989, "step": 27824 }, { "epoch": 2.611204954954955, "grad_norm": 1.4699568019320515, "learning_rate": 5.032066900803623e-07, "loss": 0.3428, "step": 27825 }, { "epoch": 2.611298798798799, "grad_norm": 1.0488773907949145, "learning_rate": 5.029680145842075e-07, "loss": 0.3165, "step": 27826 }, { "epoch": 2.6113926426426426, "grad_norm": 1.083340224242839, "learning_rate": 5.027293927066951e-07, "loss": 0.3181, "step": 27827 }, { "epoch": 2.6114864864864864, "grad_norm": 1.6137990176261823, "learning_rate": 5.024908244506693e-07, "loss": 0.309, "step": 27828 }, { "epoch": 2.61158033033033, "grad_norm": 1.091797547123959, "learning_rate": 5.022523098189736e-07, "loss": 0.3388, "step": 27829 }, { "epoch": 2.611674174174174, "grad_norm": 1.667581686386829, "learning_rate": 5.020138488144543e-07, "loss": 0.3293, "step": 27830 }, { "epoch": 2.611768018018018, "grad_norm": 3.204663310427059, "learning_rate": 5.017754414399534e-07, "loss": 0.3077, "step": 27831 }, { "epoch": 2.611861861861862, "grad_norm": 1.1407259817095758, "learning_rate": 5.015370876983128e-07, "loss": 0.369, "step": 27832 }, { "epoch": 2.6119557057057055, "grad_norm": 1.2285531175397126, "learning_rate": 5.012987875923753e-07, "loss": 0.271, "step": 27833 }, { "epoch": 2.6120495495495497, "grad_norm": 1.3537196650596832, "learning_rate": 5.010605411249819e-07, "loss": 0.3131, "step": 27834 }, { "epoch": 2.6121433933933935, "grad_norm": 1.4755640691956027, "learning_rate": 5.008223482989716e-07, "loss": 0.3176, "step": 27835 }, { "epoch": 2.6122372372372373, "grad_norm": 1.1268704315794997, "learning_rate": 5.005842091171876e-07, "loss": 0.284, "step": 27836 }, { "epoch": 2.612331081081081, "grad_norm": 1.0350259330726646, "learning_rate": 5.003461235824675e-07, "loss": 0.3053, "step": 27837 }, { "epoch": 2.612424924924925, "grad_norm": 1.0892595514091177, "learning_rate": 5.001080916976492e-07, "loss": 0.2887, "step": 27838 }, { "epoch": 2.6125187687687688, "grad_norm": 1.1514145859770053, "learning_rate": 4.998701134655726e-07, "loss": 0.2829, "step": 27839 }, { "epoch": 2.6126126126126126, "grad_norm": 1.3238554265349611, "learning_rate": 4.996321888890743e-07, "loss": 0.3438, "step": 27840 }, { "epoch": 2.6127064564564564, "grad_norm": 1.6723509266902745, "learning_rate": 4.993943179709898e-07, "loss": 0.3207, "step": 27841 }, { "epoch": 2.6128003003003, "grad_norm": 1.2017361153578654, "learning_rate": 4.99156500714158e-07, "loss": 0.3208, "step": 27842 }, { "epoch": 2.612894144144144, "grad_norm": 1.1137820439777346, "learning_rate": 4.989187371214127e-07, "loss": 0.2817, "step": 27843 }, { "epoch": 2.612987987987988, "grad_norm": 1.1911259071091076, "learning_rate": 4.986810271955889e-07, "loss": 0.2774, "step": 27844 }, { "epoch": 2.613081831831832, "grad_norm": 1.1202441784700772, "learning_rate": 4.984433709395215e-07, "loss": 0.3113, "step": 27845 }, { "epoch": 2.6131756756756754, "grad_norm": 1.005848342242678, "learning_rate": 4.982057683560432e-07, "loss": 0.308, "step": 27846 }, { "epoch": 2.6132695195195197, "grad_norm": 1.113061745221305, "learning_rate": 4.979682194479863e-07, "loss": 0.3379, "step": 27847 }, { "epoch": 2.6133633633633635, "grad_norm": 1.8634174584313496, "learning_rate": 4.977307242181851e-07, "loss": 0.2981, "step": 27848 }, { "epoch": 2.6134572072072073, "grad_norm": 1.4193362481340521, "learning_rate": 4.974932826694706e-07, "loss": 0.297, "step": 27849 }, { "epoch": 2.613551051051051, "grad_norm": 1.146787178281457, "learning_rate": 4.972558948046729e-07, "loss": 0.3001, "step": 27850 }, { "epoch": 2.613644894894895, "grad_norm": 2.0318886018927578, "learning_rate": 4.970185606266237e-07, "loss": 0.3121, "step": 27851 }, { "epoch": 2.6137387387387387, "grad_norm": 1.0184876612223799, "learning_rate": 4.967812801381521e-07, "loss": 0.3009, "step": 27852 }, { "epoch": 2.6138325825825826, "grad_norm": 1.1449268428363089, "learning_rate": 4.965440533420868e-07, "loss": 0.3267, "step": 27853 }, { "epoch": 2.6139264264264264, "grad_norm": 1.8607834087609119, "learning_rate": 4.963068802412574e-07, "loss": 0.3321, "step": 27854 }, { "epoch": 2.61402027027027, "grad_norm": 1.1341756433593917, "learning_rate": 4.960697608384907e-07, "loss": 0.3071, "step": 27855 }, { "epoch": 2.614114114114114, "grad_norm": 1.295819007704204, "learning_rate": 4.958326951366149e-07, "loss": 0.3262, "step": 27856 }, { "epoch": 2.614207957957958, "grad_norm": 1.2622986646599037, "learning_rate": 4.955956831384562e-07, "loss": 0.2991, "step": 27857 }, { "epoch": 2.614301801801802, "grad_norm": 1.266380407399827, "learning_rate": 4.953587248468394e-07, "loss": 0.29, "step": 27858 }, { "epoch": 2.6143956456456454, "grad_norm": 1.1215642076919472, "learning_rate": 4.951218202645907e-07, "loss": 0.3579, "step": 27859 }, { "epoch": 2.6144894894894897, "grad_norm": 1.112791459656151, "learning_rate": 4.948849693945352e-07, "loss": 0.3067, "step": 27860 }, { "epoch": 2.6145833333333335, "grad_norm": 1.1298818179362287, "learning_rate": 4.946481722394964e-07, "loss": 0.3153, "step": 27861 }, { "epoch": 2.6146771771771773, "grad_norm": 0.9948889121202842, "learning_rate": 4.944114288022972e-07, "loss": 0.2828, "step": 27862 }, { "epoch": 2.614771021021021, "grad_norm": 1.0781830468250901, "learning_rate": 4.941747390857615e-07, "loss": 0.3017, "step": 27863 }, { "epoch": 2.614864864864865, "grad_norm": 1.0332656459311416, "learning_rate": 4.939381030927104e-07, "loss": 0.3639, "step": 27864 }, { "epoch": 2.6149587087087087, "grad_norm": 1.1443036407010765, "learning_rate": 4.93701520825965e-07, "loss": 0.2851, "step": 27865 }, { "epoch": 2.6150525525525525, "grad_norm": 1.0554755524783992, "learning_rate": 4.934649922883472e-07, "loss": 0.3177, "step": 27866 }, { "epoch": 2.6151463963963963, "grad_norm": 1.16622545231943, "learning_rate": 4.932285174826773e-07, "loss": 0.3427, "step": 27867 }, { "epoch": 2.61524024024024, "grad_norm": 1.1097504261354783, "learning_rate": 4.929920964117741e-07, "loss": 0.3085, "step": 27868 }, { "epoch": 2.615334084084084, "grad_norm": 1.0234104000194615, "learning_rate": 4.927557290784562e-07, "loss": 0.3412, "step": 27869 }, { "epoch": 2.6154279279279278, "grad_norm": 1.1655041775123283, "learning_rate": 4.925194154855429e-07, "loss": 0.3152, "step": 27870 }, { "epoch": 2.615521771771772, "grad_norm": 1.0691747268318592, "learning_rate": 4.922831556358492e-07, "loss": 0.2977, "step": 27871 }, { "epoch": 2.6156156156156154, "grad_norm": 1.043821316650287, "learning_rate": 4.920469495321955e-07, "loss": 0.2888, "step": 27872 }, { "epoch": 2.6157094594594597, "grad_norm": 1.1016131397827975, "learning_rate": 4.918107971773966e-07, "loss": 0.3228, "step": 27873 }, { "epoch": 2.6158033033033035, "grad_norm": 1.1279564673704052, "learning_rate": 4.915746985742676e-07, "loss": 0.2882, "step": 27874 }, { "epoch": 2.6158971471471473, "grad_norm": 1.3790680415163918, "learning_rate": 4.913386537256243e-07, "loss": 0.3481, "step": 27875 }, { "epoch": 2.615990990990991, "grad_norm": 1.9865796337397603, "learning_rate": 4.911026626342813e-07, "loss": 0.3251, "step": 27876 }, { "epoch": 2.616084834834835, "grad_norm": 1.2792610677388823, "learning_rate": 4.908667253030508e-07, "loss": 0.2884, "step": 27877 }, { "epoch": 2.6161786786786787, "grad_norm": 0.9513363297590954, "learning_rate": 4.906308417347477e-07, "loss": 0.2866, "step": 27878 }, { "epoch": 2.6162725225225225, "grad_norm": 1.136665955409312, "learning_rate": 4.903950119321843e-07, "loss": 0.3097, "step": 27879 }, { "epoch": 2.6163663663663663, "grad_norm": 1.3532196267561352, "learning_rate": 4.901592358981705e-07, "loss": 0.2936, "step": 27880 }, { "epoch": 2.61646021021021, "grad_norm": 1.2640400303727863, "learning_rate": 4.899235136355202e-07, "loss": 0.3429, "step": 27881 }, { "epoch": 2.616554054054054, "grad_norm": 1.0571066155321833, "learning_rate": 4.896878451470432e-07, "loss": 0.2696, "step": 27882 }, { "epoch": 2.6166478978978978, "grad_norm": 1.1768170296430513, "learning_rate": 4.894522304355476e-07, "loss": 0.3127, "step": 27883 }, { "epoch": 2.616741741741742, "grad_norm": 1.2045329187499048, "learning_rate": 4.892166695038448e-07, "loss": 0.2982, "step": 27884 }, { "epoch": 2.6168355855855854, "grad_norm": 1.038250995141407, "learning_rate": 4.889811623547419e-07, "loss": 0.3403, "step": 27885 }, { "epoch": 2.6169294294294296, "grad_norm": 1.0468984269385624, "learning_rate": 4.887457089910469e-07, "loss": 0.3038, "step": 27886 }, { "epoch": 2.617023273273273, "grad_norm": 1.0198380514140164, "learning_rate": 4.885103094155691e-07, "loss": 0.3116, "step": 27887 }, { "epoch": 2.6171171171171173, "grad_norm": 1.238593384474987, "learning_rate": 4.882749636311135e-07, "loss": 0.3038, "step": 27888 }, { "epoch": 2.617210960960961, "grad_norm": 1.1013682712003832, "learning_rate": 4.880396716404856e-07, "loss": 0.3068, "step": 27889 }, { "epoch": 2.617304804804805, "grad_norm": 1.3271934072402785, "learning_rate": 4.878044334464927e-07, "loss": 0.363, "step": 27890 }, { "epoch": 2.6173986486486487, "grad_norm": 1.1747611125325634, "learning_rate": 4.875692490519385e-07, "loss": 0.3218, "step": 27891 }, { "epoch": 2.6174924924924925, "grad_norm": 1.111022356348044, "learning_rate": 4.873341184596259e-07, "loss": 0.2891, "step": 27892 }, { "epoch": 2.6175863363363363, "grad_norm": 1.0447884389483297, "learning_rate": 4.870990416723614e-07, "loss": 0.3217, "step": 27893 }, { "epoch": 2.61768018018018, "grad_norm": 1.1463650445019948, "learning_rate": 4.868640186929458e-07, "loss": 0.2892, "step": 27894 }, { "epoch": 2.617774024024024, "grad_norm": 1.0663844428556244, "learning_rate": 4.866290495241815e-07, "loss": 0.3046, "step": 27895 }, { "epoch": 2.6178678678678677, "grad_norm": 1.2646021052327774, "learning_rate": 4.863941341688705e-07, "loss": 0.3353, "step": 27896 }, { "epoch": 2.6179617117117115, "grad_norm": 1.2855992776988034, "learning_rate": 4.861592726298136e-07, "loss": 0.3226, "step": 27897 }, { "epoch": 2.6180555555555554, "grad_norm": 1.088776901451168, "learning_rate": 4.859244649098099e-07, "loss": 0.3291, "step": 27898 }, { "epoch": 2.6181493993993996, "grad_norm": 1.1159295320388067, "learning_rate": 4.85689711011661e-07, "loss": 0.3255, "step": 27899 }, { "epoch": 2.618243243243243, "grad_norm": 1.1418444525140237, "learning_rate": 4.854550109381645e-07, "loss": 0.3011, "step": 27900 }, { "epoch": 2.6183370870870872, "grad_norm": 1.0414682346015693, "learning_rate": 4.852203646921189e-07, "loss": 0.275, "step": 27901 }, { "epoch": 2.618430930930931, "grad_norm": 1.3794113425649341, "learning_rate": 4.84985772276323e-07, "loss": 0.3116, "step": 27902 }, { "epoch": 2.618524774774775, "grad_norm": 1.1531750543124784, "learning_rate": 4.847512336935734e-07, "loss": 0.3114, "step": 27903 }, { "epoch": 2.6186186186186187, "grad_norm": 2.1130051671061465, "learning_rate": 4.845167489466652e-07, "loss": 0.3644, "step": 27904 }, { "epoch": 2.6187124624624625, "grad_norm": 1.1038588058490364, "learning_rate": 4.84282318038396e-07, "loss": 0.3365, "step": 27905 }, { "epoch": 2.6188063063063063, "grad_norm": 2.1176855573678957, "learning_rate": 4.840479409715598e-07, "loss": 0.2999, "step": 27906 }, { "epoch": 2.61890015015015, "grad_norm": 1.1387955813539785, "learning_rate": 4.838136177489522e-07, "loss": 0.3286, "step": 27907 }, { "epoch": 2.618993993993994, "grad_norm": 1.074931842022865, "learning_rate": 4.835793483733658e-07, "loss": 0.2876, "step": 27908 }, { "epoch": 2.6190878378378377, "grad_norm": 1.1397199741674968, "learning_rate": 4.833451328475947e-07, "loss": 0.3155, "step": 27909 }, { "epoch": 2.6191816816816815, "grad_norm": 1.0913442520671068, "learning_rate": 4.831109711744303e-07, "loss": 0.3, "step": 27910 }, { "epoch": 2.6192755255255253, "grad_norm": 1.0003213594531128, "learning_rate": 4.828768633566661e-07, "loss": 0.3083, "step": 27911 }, { "epoch": 2.6193693693693696, "grad_norm": 1.214306234944119, "learning_rate": 4.826428093970931e-07, "loss": 0.3379, "step": 27912 }, { "epoch": 2.619463213213213, "grad_norm": 1.2326460871810825, "learning_rate": 4.824088092985002e-07, "loss": 0.3211, "step": 27913 }, { "epoch": 2.619557057057057, "grad_norm": 1.2903377601604293, "learning_rate": 4.821748630636796e-07, "loss": 0.3116, "step": 27914 }, { "epoch": 2.619650900900901, "grad_norm": 1.1266593306956416, "learning_rate": 4.819409706954204e-07, "loss": 0.3116, "step": 27915 }, { "epoch": 2.619744744744745, "grad_norm": 1.1033372869515428, "learning_rate": 4.817071321965094e-07, "loss": 0.2734, "step": 27916 }, { "epoch": 2.6198385885885886, "grad_norm": 1.2344991826868736, "learning_rate": 4.81473347569737e-07, "loss": 0.3245, "step": 27917 }, { "epoch": 2.6199324324324325, "grad_norm": 1.145322822485109, "learning_rate": 4.812396168178896e-07, "loss": 0.3321, "step": 27918 }, { "epoch": 2.6200262762762763, "grad_norm": 6.530497940245756, "learning_rate": 4.810059399437545e-07, "loss": 0.3354, "step": 27919 }, { "epoch": 2.62012012012012, "grad_norm": 1.2260110235420147, "learning_rate": 4.807723169501171e-07, "loss": 0.293, "step": 27920 }, { "epoch": 2.620213963963964, "grad_norm": 1.0375712580250072, "learning_rate": 4.805387478397633e-07, "loss": 0.3103, "step": 27921 }, { "epoch": 2.6203078078078077, "grad_norm": 1.2150305661764587, "learning_rate": 4.803052326154767e-07, "loss": 0.2947, "step": 27922 }, { "epoch": 2.6204016516516515, "grad_norm": 1.0374360775001301, "learning_rate": 4.80071771280044e-07, "loss": 0.3137, "step": 27923 }, { "epoch": 2.6204954954954953, "grad_norm": 1.5416984224835129, "learning_rate": 4.798383638362474e-07, "loss": 0.2826, "step": 27924 }, { "epoch": 2.6205893393393396, "grad_norm": 1.6334787901481542, "learning_rate": 4.796050102868693e-07, "loss": 0.3107, "step": 27925 }, { "epoch": 2.620683183183183, "grad_norm": 1.1590551374867093, "learning_rate": 4.793717106346934e-07, "loss": 0.3305, "step": 27926 }, { "epoch": 2.620777027027027, "grad_norm": 1.9995354695718732, "learning_rate": 4.791384648825009e-07, "loss": 0.319, "step": 27927 }, { "epoch": 2.620870870870871, "grad_norm": 1.3382684263993547, "learning_rate": 4.789052730330712e-07, "loss": 0.3042, "step": 27928 }, { "epoch": 2.620964714714715, "grad_norm": 1.0188293139117253, "learning_rate": 4.786721350891876e-07, "loss": 0.3147, "step": 27929 }, { "epoch": 2.6210585585585586, "grad_norm": 1.1250056079207142, "learning_rate": 4.78439051053628e-07, "loss": 0.3134, "step": 27930 }, { "epoch": 2.6211524024024024, "grad_norm": 1.2073298948017592, "learning_rate": 4.782060209291712e-07, "loss": 0.2858, "step": 27931 }, { "epoch": 2.6212462462462462, "grad_norm": 1.1681868151075319, "learning_rate": 4.779730447185971e-07, "loss": 0.3494, "step": 27932 }, { "epoch": 2.62134009009009, "grad_norm": 1.1347241182437153, "learning_rate": 4.777401224246819e-07, "loss": 0.2885, "step": 27933 }, { "epoch": 2.621433933933934, "grad_norm": 1.2417375708976899, "learning_rate": 4.775072540502029e-07, "loss": 0.2669, "step": 27934 }, { "epoch": 2.6215277777777777, "grad_norm": 1.2260730932864234, "learning_rate": 4.772744395979378e-07, "loss": 0.305, "step": 27935 }, { "epoch": 2.6216216216216215, "grad_norm": 1.126721066218439, "learning_rate": 4.770416790706617e-07, "loss": 0.3023, "step": 27936 }, { "epoch": 2.6217154654654653, "grad_norm": 1.0643772288626145, "learning_rate": 4.7680897247114945e-07, "loss": 0.3069, "step": 27937 }, { "epoch": 2.6218093093093096, "grad_norm": 0.9362927302795809, "learning_rate": 4.7657631980217724e-07, "loss": 0.2855, "step": 27938 }, { "epoch": 2.621903153153153, "grad_norm": 1.0625110154295034, "learning_rate": 4.763437210665178e-07, "loss": 0.2953, "step": 27939 }, { "epoch": 2.621996996996997, "grad_norm": 1.204834689493274, "learning_rate": 4.7611117626694346e-07, "loss": 0.3416, "step": 27940 }, { "epoch": 2.622090840840841, "grad_norm": 1.0745676736242182, "learning_rate": 4.7587868540622916e-07, "loss": 0.3197, "step": 27941 }, { "epoch": 2.622184684684685, "grad_norm": 1.0726454461655086, "learning_rate": 4.756462484871455e-07, "loss": 0.2981, "step": 27942 }, { "epoch": 2.6222785285285286, "grad_norm": 1.5239087365071418, "learning_rate": 4.75413865512464e-07, "loss": 0.2971, "step": 27943 }, { "epoch": 2.6223723723723724, "grad_norm": 1.140165764443954, "learning_rate": 4.75181536484956e-07, "loss": 0.3083, "step": 27944 }, { "epoch": 2.6224662162162162, "grad_norm": 1.1674447169049433, "learning_rate": 4.7494926140739084e-07, "loss": 0.3139, "step": 27945 }, { "epoch": 2.62256006006006, "grad_norm": 1.1912544951742627, "learning_rate": 4.747170402825374e-07, "loss": 0.2906, "step": 27946 }, { "epoch": 2.622653903903904, "grad_norm": 1.0966850611155345, "learning_rate": 4.744848731131657e-07, "loss": 0.2982, "step": 27947 }, { "epoch": 2.6227477477477477, "grad_norm": 1.1880954009265088, "learning_rate": 4.742527599020441e-07, "loss": 0.321, "step": 27948 }, { "epoch": 2.6228415915915915, "grad_norm": 1.3173829305189835, "learning_rate": 4.740207006519382e-07, "loss": 0.3225, "step": 27949 }, { "epoch": 2.6229354354354353, "grad_norm": 1.4291736113310367, "learning_rate": 4.737886953656173e-07, "loss": 0.3337, "step": 27950 }, { "epoch": 2.6230292792792795, "grad_norm": 1.2803665051314452, "learning_rate": 4.7355674404584604e-07, "loss": 0.3452, "step": 27951 }, { "epoch": 2.623123123123123, "grad_norm": 2.5754003973233095, "learning_rate": 4.733248466953899e-07, "loss": 0.3159, "step": 27952 }, { "epoch": 2.623216966966967, "grad_norm": 1.1219888402024736, "learning_rate": 4.730930033170156e-07, "loss": 0.3062, "step": 27953 }, { "epoch": 2.623310810810811, "grad_norm": 1.2511337795945503, "learning_rate": 4.728612139134858e-07, "loss": 0.3639, "step": 27954 }, { "epoch": 2.6234046546546548, "grad_norm": 1.245167425054669, "learning_rate": 4.726294784875646e-07, "loss": 0.3109, "step": 27955 }, { "epoch": 2.6234984984984986, "grad_norm": 1.1836533851095246, "learning_rate": 4.7239779704201513e-07, "loss": 0.3064, "step": 27956 }, { "epoch": 2.6235923423423424, "grad_norm": 1.1300850786389887, "learning_rate": 4.721661695795993e-07, "loss": 0.3154, "step": 27957 }, { "epoch": 2.623686186186186, "grad_norm": 1.1535824177621548, "learning_rate": 4.719345961030786e-07, "loss": 0.3178, "step": 27958 }, { "epoch": 2.62378003003003, "grad_norm": 1.265996534612694, "learning_rate": 4.717030766152153e-07, "loss": 0.2935, "step": 27959 }, { "epoch": 2.623873873873874, "grad_norm": 1.2514859737801165, "learning_rate": 4.7147161111876947e-07, "loss": 0.2961, "step": 27960 }, { "epoch": 2.6239677177177176, "grad_norm": 1.211588825586613, "learning_rate": 4.712401996164995e-07, "loss": 0.3326, "step": 27961 }, { "epoch": 2.6240615615615615, "grad_norm": 1.2780732486525295, "learning_rate": 4.7100884211116637e-07, "loss": 0.2894, "step": 27962 }, { "epoch": 2.6241554054054053, "grad_norm": 1.0290428432768226, "learning_rate": 4.7077753860552797e-07, "loss": 0.3192, "step": 27963 }, { "epoch": 2.6242492492492495, "grad_norm": 1.0844836296004232, "learning_rate": 4.7054628910234104e-07, "loss": 0.3049, "step": 27964 }, { "epoch": 2.624343093093093, "grad_norm": 1.1001280048127082, "learning_rate": 4.7031509360436487e-07, "loss": 0.304, "step": 27965 }, { "epoch": 2.624436936936937, "grad_norm": 1.0042138893337298, "learning_rate": 4.7008395211435466e-07, "loss": 0.3326, "step": 27966 }, { "epoch": 2.6245307807807805, "grad_norm": 0.9244966070687581, "learning_rate": 4.69852864635067e-07, "loss": 0.3085, "step": 27967 }, { "epoch": 2.6246246246246248, "grad_norm": 1.3857202279885754, "learning_rate": 4.696218311692563e-07, "loss": 0.3053, "step": 27968 }, { "epoch": 2.6247184684684686, "grad_norm": 1.1521735428489452, "learning_rate": 4.693908517196782e-07, "loss": 0.2934, "step": 27969 }, { "epoch": 2.6248123123123124, "grad_norm": 1.2961932884526721, "learning_rate": 4.6915992628908504e-07, "loss": 0.2888, "step": 27970 }, { "epoch": 2.624906156156156, "grad_norm": 1.4150377783250683, "learning_rate": 4.689290548802322e-07, "loss": 0.2661, "step": 27971 }, { "epoch": 2.625, "grad_norm": 1.2311557194589704, "learning_rate": 4.6869823749587206e-07, "loss": 0.3211, "step": 27972 }, { "epoch": 2.625093843843844, "grad_norm": 1.2167674602203602, "learning_rate": 4.684674741387546e-07, "loss": 0.3257, "step": 27973 }, { "epoch": 2.6251876876876876, "grad_norm": 1.2649331419545589, "learning_rate": 4.6823676481163425e-07, "loss": 0.3188, "step": 27974 }, { "epoch": 2.6252815315315314, "grad_norm": 1.3079651443720826, "learning_rate": 4.6800610951726e-07, "loss": 0.3102, "step": 27975 }, { "epoch": 2.6253753753753752, "grad_norm": 1.1564282242155044, "learning_rate": 4.6777550825838124e-07, "loss": 0.2929, "step": 27976 }, { "epoch": 2.6254692192192195, "grad_norm": 1.2034907110045505, "learning_rate": 4.6754496103774915e-07, "loss": 0.3345, "step": 27977 }, { "epoch": 2.625563063063063, "grad_norm": 0.9941855349022654, "learning_rate": 4.6731446785811264e-07, "loss": 0.2794, "step": 27978 }, { "epoch": 2.625656906906907, "grad_norm": 1.133542772844835, "learning_rate": 4.670840287222189e-07, "loss": 0.3282, "step": 27979 }, { "epoch": 2.6257507507507505, "grad_norm": 1.3904104308636254, "learning_rate": 4.6685364363281583e-07, "loss": 0.2882, "step": 27980 }, { "epoch": 2.6258445945945947, "grad_norm": 1.164184272723228, "learning_rate": 4.6662331259265003e-07, "loss": 0.2707, "step": 27981 }, { "epoch": 2.6259384384384385, "grad_norm": 1.086466285324316, "learning_rate": 4.6639303560446715e-07, "loss": 0.3007, "step": 27982 }, { "epoch": 2.6260322822822824, "grad_norm": 1.1216269091422626, "learning_rate": 4.661628126710149e-07, "loss": 0.326, "step": 27983 }, { "epoch": 2.626126126126126, "grad_norm": 1.0945017867394777, "learning_rate": 4.6593264379503677e-07, "loss": 0.3035, "step": 27984 }, { "epoch": 2.62621996996997, "grad_norm": 1.1260985071896958, "learning_rate": 4.657025289792766e-07, "loss": 0.2984, "step": 27985 }, { "epoch": 2.626313813813814, "grad_norm": 1.113818305096689, "learning_rate": 4.654724682264794e-07, "loss": 0.263, "step": 27986 }, { "epoch": 2.6264076576576576, "grad_norm": 2.8683446011058424, "learning_rate": 4.65242461539388e-07, "loss": 0.3152, "step": 27987 }, { "epoch": 2.6265015015015014, "grad_norm": 1.0412005120484906, "learning_rate": 4.6501250892074355e-07, "loss": 0.3085, "step": 27988 }, { "epoch": 2.6265953453453452, "grad_norm": 1.0458759321030136, "learning_rate": 4.6478261037328943e-07, "loss": 0.2975, "step": 27989 }, { "epoch": 2.626689189189189, "grad_norm": 1.2674061780780759, "learning_rate": 4.645527658997662e-07, "loss": 0.2849, "step": 27990 }, { "epoch": 2.626783033033033, "grad_norm": 1.082423035713983, "learning_rate": 4.643229755029144e-07, "loss": 0.3214, "step": 27991 }, { "epoch": 2.626876876876877, "grad_norm": 1.164959367295011, "learning_rate": 4.64093239185473e-07, "loss": 0.299, "step": 27992 }, { "epoch": 2.6269707207207205, "grad_norm": 1.0787598395274565, "learning_rate": 4.6386355695018203e-07, "loss": 0.3201, "step": 27993 }, { "epoch": 2.6270645645645647, "grad_norm": 9.966556848732813, "learning_rate": 4.6363392879977876e-07, "loss": 0.2833, "step": 27994 }, { "epoch": 2.6271584084084085, "grad_norm": 1.4373906579280176, "learning_rate": 4.634043547370032e-07, "loss": 0.2962, "step": 27995 }, { "epoch": 2.6272522522522523, "grad_norm": 1.1395214552093091, "learning_rate": 4.631748347645909e-07, "loss": 0.3225, "step": 27996 }, { "epoch": 2.627346096096096, "grad_norm": 1.109528105882487, "learning_rate": 4.6294536888527863e-07, "loss": 0.3244, "step": 27997 }, { "epoch": 2.62743993993994, "grad_norm": 1.422860209428316, "learning_rate": 4.627159571018036e-07, "loss": 0.3208, "step": 27998 }, { "epoch": 2.6275337837837838, "grad_norm": 1.081757549438019, "learning_rate": 4.624865994168998e-07, "loss": 0.2792, "step": 27999 }, { "epoch": 2.6276276276276276, "grad_norm": 1.117311040249167, "learning_rate": 4.622572958333016e-07, "loss": 0.3168, "step": 28000 }, { "epoch": 2.6277214714714714, "grad_norm": 1.2543955090735908, "learning_rate": 4.6202804635374464e-07, "loss": 0.2722, "step": 28001 }, { "epoch": 2.627815315315315, "grad_norm": 1.1315370118033083, "learning_rate": 4.617988509809618e-07, "loss": 0.3359, "step": 28002 }, { "epoch": 2.627909159159159, "grad_norm": 1.0544902895823072, "learning_rate": 4.615697097176836e-07, "loss": 0.2797, "step": 28003 }, { "epoch": 2.628003003003003, "grad_norm": 1.147113736304182, "learning_rate": 4.6134062256664557e-07, "loss": 0.3111, "step": 28004 }, { "epoch": 2.628096846846847, "grad_norm": 1.1671325837666524, "learning_rate": 4.6111158953057786e-07, "loss": 0.3076, "step": 28005 }, { "epoch": 2.6281906906906904, "grad_norm": 2.0003739635304183, "learning_rate": 4.608826106122094e-07, "loss": 0.3383, "step": 28006 }, { "epoch": 2.6282845345345347, "grad_norm": 1.2248514863609279, "learning_rate": 4.6065368581427237e-07, "loss": 0.3006, "step": 28007 }, { "epoch": 2.6283783783783785, "grad_norm": 1.4121364673721513, "learning_rate": 4.604248151394963e-07, "loss": 0.2977, "step": 28008 }, { "epoch": 2.6284722222222223, "grad_norm": 1.184378957316001, "learning_rate": 4.601959985906079e-07, "loss": 0.2909, "step": 28009 }, { "epoch": 2.628566066066066, "grad_norm": 0.932809477448609, "learning_rate": 4.599672361703383e-07, "loss": 0.3248, "step": 28010 }, { "epoch": 2.62865990990991, "grad_norm": 1.227141461610615, "learning_rate": 4.5973852788141313e-07, "loss": 0.312, "step": 28011 }, { "epoch": 2.6287537537537538, "grad_norm": 1.131362050699408, "learning_rate": 4.5950987372655964e-07, "loss": 0.334, "step": 28012 }, { "epoch": 2.6288475975975976, "grad_norm": 1.1089431179977802, "learning_rate": 4.592812737085045e-07, "loss": 0.2949, "step": 28013 }, { "epoch": 2.6289414414414414, "grad_norm": 0.9785421289293289, "learning_rate": 4.590527278299739e-07, "loss": 0.3533, "step": 28014 }, { "epoch": 2.629035285285285, "grad_norm": 1.1604539801777414, "learning_rate": 4.588242360936906e-07, "loss": 0.278, "step": 28015 }, { "epoch": 2.629129129129129, "grad_norm": 1.0113744097679696, "learning_rate": 4.5859579850238143e-07, "loss": 0.3248, "step": 28016 }, { "epoch": 2.629222972972973, "grad_norm": 1.2027102567028845, "learning_rate": 4.5836741505876915e-07, "loss": 0.3146, "step": 28017 }, { "epoch": 2.629316816816817, "grad_norm": 1.1899757897070982, "learning_rate": 4.5813908576557656e-07, "loss": 0.3091, "step": 28018 }, { "epoch": 2.6294106606606604, "grad_norm": 1.3787652333455107, "learning_rate": 4.5791081062552587e-07, "loss": 0.3782, "step": 28019 }, { "epoch": 2.6295045045045047, "grad_norm": 1.2357244897574473, "learning_rate": 4.5768258964134e-07, "loss": 0.2641, "step": 28020 }, { "epoch": 2.6295983483483485, "grad_norm": 1.2629002087295713, "learning_rate": 4.5745442281573737e-07, "loss": 0.2953, "step": 28021 }, { "epoch": 2.6296921921921923, "grad_norm": 1.4936305202239202, "learning_rate": 4.572263101514418e-07, "loss": 0.3059, "step": 28022 }, { "epoch": 2.629786036036036, "grad_norm": 1.1793927619276432, "learning_rate": 4.5699825165117176e-07, "loss": 0.3216, "step": 28023 }, { "epoch": 2.62987987987988, "grad_norm": 1.20388013772645, "learning_rate": 4.5677024731764495e-07, "loss": 0.3169, "step": 28024 }, { "epoch": 2.6299737237237237, "grad_norm": 1.0970703927689462, "learning_rate": 4.565422971535821e-07, "loss": 0.3127, "step": 28025 }, { "epoch": 2.6300675675675675, "grad_norm": 1.1082978608294747, "learning_rate": 4.5631440116170046e-07, "loss": 0.3006, "step": 28026 }, { "epoch": 2.6301614114114114, "grad_norm": 1.217041851883671, "learning_rate": 4.560865593447156e-07, "loss": 0.3675, "step": 28027 }, { "epoch": 2.630255255255255, "grad_norm": 1.1011652970053913, "learning_rate": 4.55858771705347e-07, "loss": 0.3252, "step": 28028 }, { "epoch": 2.630349099099099, "grad_norm": 1.0476973832315504, "learning_rate": 4.556310382463086e-07, "loss": 0.275, "step": 28029 }, { "epoch": 2.630442942942943, "grad_norm": 1.2662716531221867, "learning_rate": 4.5540335897031607e-07, "loss": 0.3287, "step": 28030 }, { "epoch": 2.630536786786787, "grad_norm": 1.4623658335682554, "learning_rate": 4.551757338800844e-07, "loss": 0.3079, "step": 28031 }, { "epoch": 2.6306306306306304, "grad_norm": 1.0675258726768015, "learning_rate": 4.5494816297832754e-07, "loss": 0.2961, "step": 28032 }, { "epoch": 2.6307244744744747, "grad_norm": 1.1338930698268705, "learning_rate": 4.547206462677578e-07, "loss": 0.3121, "step": 28033 }, { "epoch": 2.6308183183183185, "grad_norm": 1.3252187087431309, "learning_rate": 4.544931837510891e-07, "loss": 0.2647, "step": 28034 }, { "epoch": 2.6309121621621623, "grad_norm": 1.6594434602970123, "learning_rate": 4.5426577543103366e-07, "loss": 0.3262, "step": 28035 }, { "epoch": 2.631006006006006, "grad_norm": 1.0138080035314843, "learning_rate": 4.540384213103011e-07, "loss": 0.3154, "step": 28036 }, { "epoch": 2.63109984984985, "grad_norm": 1.3709654856074722, "learning_rate": 4.5381112139160467e-07, "loss": 0.3002, "step": 28037 }, { "epoch": 2.6311936936936937, "grad_norm": 1.101302143246684, "learning_rate": 4.535838756776534e-07, "loss": 0.3015, "step": 28038 }, { "epoch": 2.6312875375375375, "grad_norm": 1.213912286118087, "learning_rate": 4.533566841711556e-07, "loss": 0.344, "step": 28039 }, { "epoch": 2.6313813813813813, "grad_norm": 1.62558129155718, "learning_rate": 4.53129546874822e-07, "loss": 0.2863, "step": 28040 }, { "epoch": 2.631475225225225, "grad_norm": 1.168666207486838, "learning_rate": 4.5290246379135983e-07, "loss": 0.2936, "step": 28041 }, { "epoch": 2.631569069069069, "grad_norm": 1.1629634422509385, "learning_rate": 4.526754349234769e-07, "loss": 0.2794, "step": 28042 }, { "epoch": 2.6316629129129128, "grad_norm": 1.2147604401361596, "learning_rate": 4.524484602738799e-07, "loss": 0.2902, "step": 28043 }, { "epoch": 2.631756756756757, "grad_norm": 0.9859626886382148, "learning_rate": 4.5222153984527507e-07, "loss": 0.2746, "step": 28044 }, { "epoch": 2.6318506006006004, "grad_norm": 1.1055318602276245, "learning_rate": 4.519946736403674e-07, "loss": 0.2713, "step": 28045 }, { "epoch": 2.6319444444444446, "grad_norm": 1.1040733678876486, "learning_rate": 4.517678616618631e-07, "loss": 0.3185, "step": 28046 }, { "epoch": 2.6320382882882885, "grad_norm": 1.1394513110209628, "learning_rate": 4.515411039124662e-07, "loss": 0.3288, "step": 28047 }, { "epoch": 2.6321321321321323, "grad_norm": 1.2044744494147688, "learning_rate": 4.5131440039487883e-07, "loss": 0.3055, "step": 28048 }, { "epoch": 2.632225975975976, "grad_norm": 1.4850374683372731, "learning_rate": 4.510877511118067e-07, "loss": 0.3101, "step": 28049 }, { "epoch": 2.63231981981982, "grad_norm": 1.0446971917801295, "learning_rate": 4.5086115606594984e-07, "loss": 0.3021, "step": 28050 }, { "epoch": 2.6324136636636637, "grad_norm": 1.084842578125704, "learning_rate": 4.506346152600105e-07, "loss": 0.2974, "step": 28051 }, { "epoch": 2.6325075075075075, "grad_norm": 1.1192791949268015, "learning_rate": 4.5040812869669106e-07, "loss": 0.3052, "step": 28052 }, { "epoch": 2.6326013513513513, "grad_norm": 1.0159414964709896, "learning_rate": 4.5018169637869036e-07, "loss": 0.2781, "step": 28053 }, { "epoch": 2.632695195195195, "grad_norm": 1.1002892704327547, "learning_rate": 4.499553183087091e-07, "loss": 0.3221, "step": 28054 }, { "epoch": 2.632789039039039, "grad_norm": 1.487536997028776, "learning_rate": 4.497289944894462e-07, "loss": 0.3328, "step": 28055 }, { "epoch": 2.6328828828828827, "grad_norm": 0.9964630386288309, "learning_rate": 4.4950272492360004e-07, "loss": 0.345, "step": 28056 }, { "epoch": 2.632976726726727, "grad_norm": 1.127675119428808, "learning_rate": 4.492765096138674e-07, "loss": 0.3203, "step": 28057 }, { "epoch": 2.6330705705705704, "grad_norm": 1.283902845432749, "learning_rate": 4.4905034856294774e-07, "loss": 0.3103, "step": 28058 }, { "epoch": 2.6331644144144146, "grad_norm": 1.0905967998141683, "learning_rate": 4.488242417735361e-07, "loss": 0.3171, "step": 28059 }, { "epoch": 2.633258258258258, "grad_norm": 1.2037635148630188, "learning_rate": 4.4859818924832764e-07, "loss": 0.3566, "step": 28060 }, { "epoch": 2.6333521021021022, "grad_norm": 1.1830253419064016, "learning_rate": 4.4837219099002014e-07, "loss": 0.3295, "step": 28061 }, { "epoch": 2.633445945945946, "grad_norm": 1.3193778474347644, "learning_rate": 4.4814624700130596e-07, "loss": 0.3477, "step": 28062 }, { "epoch": 2.63353978978979, "grad_norm": 1.1500421299905716, "learning_rate": 4.479203572848795e-07, "loss": 0.3122, "step": 28063 }, { "epoch": 2.6336336336336337, "grad_norm": 1.0598343209145173, "learning_rate": 4.4769452184343475e-07, "loss": 0.2745, "step": 28064 }, { "epoch": 2.6337274774774775, "grad_norm": 1.288329611341109, "learning_rate": 4.4746874067966464e-07, "loss": 0.3225, "step": 28065 }, { "epoch": 2.6338213213213213, "grad_norm": 1.2215309618890182, "learning_rate": 4.4724301379625967e-07, "loss": 0.3087, "step": 28066 }, { "epoch": 2.633915165165165, "grad_norm": 1.11744053028907, "learning_rate": 4.4701734119591277e-07, "loss": 0.2962, "step": 28067 }, { "epoch": 2.634009009009009, "grad_norm": 0.9419635222341234, "learning_rate": 4.46791722881314e-07, "loss": 0.2875, "step": 28068 }, { "epoch": 2.6341028528528527, "grad_norm": 1.0178917804776093, "learning_rate": 4.4656615885515175e-07, "loss": 0.2763, "step": 28069 }, { "epoch": 2.6341966966966965, "grad_norm": 1.162948960668023, "learning_rate": 4.463406491201189e-07, "loss": 0.319, "step": 28070 }, { "epoch": 2.6342905405405403, "grad_norm": 1.0223825158966975, "learning_rate": 4.461151936789021e-07, "loss": 0.3286, "step": 28071 }, { "epoch": 2.6343843843843846, "grad_norm": 1.0451624041432017, "learning_rate": 4.4588979253418876e-07, "loss": 0.3384, "step": 28072 }, { "epoch": 2.634478228228228, "grad_norm": 1.2125108378622889, "learning_rate": 4.456644456886683e-07, "loss": 0.3143, "step": 28073 }, { "epoch": 2.6345720720720722, "grad_norm": 1.1205532054442853, "learning_rate": 4.4543915314502696e-07, "loss": 0.3101, "step": 28074 }, { "epoch": 2.634665915915916, "grad_norm": 1.1072400790118404, "learning_rate": 4.4521391490594924e-07, "loss": 0.3242, "step": 28075 }, { "epoch": 2.63475975975976, "grad_norm": 1.033777435531111, "learning_rate": 4.4498873097412353e-07, "loss": 0.3007, "step": 28076 }, { "epoch": 2.6348536036036037, "grad_norm": 1.241390337155994, "learning_rate": 4.4476360135223273e-07, "loss": 0.3521, "step": 28077 }, { "epoch": 2.6349474474474475, "grad_norm": 1.1572198779124565, "learning_rate": 4.4453852604296186e-07, "loss": 0.2911, "step": 28078 }, { "epoch": 2.6350412912912913, "grad_norm": 1.2528655281502628, "learning_rate": 4.443135050489944e-07, "loss": 0.3188, "step": 28079 }, { "epoch": 2.635135135135135, "grad_norm": 1.079653192999875, "learning_rate": 4.4408853837301313e-07, "loss": 0.3012, "step": 28080 }, { "epoch": 2.635228978978979, "grad_norm": 1.0829204334126181, "learning_rate": 4.4386362601769984e-07, "loss": 0.303, "step": 28081 }, { "epoch": 2.6353228228228227, "grad_norm": 1.1658797072769238, "learning_rate": 4.4363876798573734e-07, "loss": 0.2655, "step": 28082 }, { "epoch": 2.6354166666666665, "grad_norm": 1.1319619548574515, "learning_rate": 4.4341396427980574e-07, "loss": 0.3109, "step": 28083 }, { "epoch": 2.6355105105105103, "grad_norm": 1.287546366122582, "learning_rate": 4.431892149025857e-07, "loss": 0.3103, "step": 28084 }, { "epoch": 2.6356043543543546, "grad_norm": 1.088324736932388, "learning_rate": 4.429645198567573e-07, "loss": 0.309, "step": 28085 }, { "epoch": 2.635698198198198, "grad_norm": 1.094545864184615, "learning_rate": 4.4273987914499885e-07, "loss": 0.3057, "step": 28086 }, { "epoch": 2.635792042042042, "grad_norm": 1.2146873420687567, "learning_rate": 4.4251529276998883e-07, "loss": 0.326, "step": 28087 }, { "epoch": 2.635885885885886, "grad_norm": 1.1605394245757705, "learning_rate": 4.4229076073440623e-07, "loss": 0.2908, "step": 28088 }, { "epoch": 2.63597972972973, "grad_norm": 1.0776275107719944, "learning_rate": 4.4206628304092726e-07, "loss": 0.2773, "step": 28089 }, { "epoch": 2.6360735735735736, "grad_norm": 1.0419411976803978, "learning_rate": 4.418418596922286e-07, "loss": 0.3167, "step": 28090 }, { "epoch": 2.6361674174174174, "grad_norm": 35.111348372498284, "learning_rate": 4.4161749069098535e-07, "loss": 0.3057, "step": 28091 }, { "epoch": 2.6362612612612613, "grad_norm": 1.1185178321878957, "learning_rate": 4.4139317603987375e-07, "loss": 0.3025, "step": 28092 }, { "epoch": 2.636355105105105, "grad_norm": 1.0424164339413795, "learning_rate": 4.411689157415672e-07, "loss": 0.3357, "step": 28093 }, { "epoch": 2.636448948948949, "grad_norm": 1.063061134591794, "learning_rate": 4.4094470979874017e-07, "loss": 0.3444, "step": 28094 }, { "epoch": 2.6365427927927927, "grad_norm": 1.3659501047743747, "learning_rate": 4.4072055821406667e-07, "loss": 0.2789, "step": 28095 }, { "epoch": 2.6366366366366365, "grad_norm": 1.057911914005939, "learning_rate": 4.404964609902174e-07, "loss": 0.3415, "step": 28096 }, { "epoch": 2.6367304804804803, "grad_norm": 1.1072114091747987, "learning_rate": 4.4027241812986677e-07, "loss": 0.3172, "step": 28097 }, { "epoch": 2.6368243243243246, "grad_norm": 2.062270752021441, "learning_rate": 4.400484296356844e-07, "loss": 0.3174, "step": 28098 }, { "epoch": 2.636918168168168, "grad_norm": 1.2092046772994083, "learning_rate": 4.3982449551034035e-07, "loss": 0.3305, "step": 28099 }, { "epoch": 2.637012012012012, "grad_norm": 1.0141642595312919, "learning_rate": 4.3960061575650635e-07, "loss": 0.3108, "step": 28100 }, { "epoch": 2.637105855855856, "grad_norm": 1.0403633567757784, "learning_rate": 4.393767903768509e-07, "loss": 0.3292, "step": 28101 }, { "epoch": 2.6371996996997, "grad_norm": 1.075274274736265, "learning_rate": 4.3915301937404287e-07, "loss": 0.2917, "step": 28102 }, { "epoch": 2.6372935435435436, "grad_norm": 0.9764009003886372, "learning_rate": 4.3892930275075017e-07, "loss": 0.2905, "step": 28103 }, { "epoch": 2.6373873873873874, "grad_norm": 0.9066958259454947, "learning_rate": 4.3870564050964014e-07, "loss": 0.3214, "step": 28104 }, { "epoch": 2.6374812312312312, "grad_norm": 1.5709176805033451, "learning_rate": 4.38482032653379e-07, "loss": 0.3246, "step": 28105 }, { "epoch": 2.637575075075075, "grad_norm": 1.1738704294950792, "learning_rate": 4.382584791846339e-07, "loss": 0.319, "step": 28106 }, { "epoch": 2.637668918918919, "grad_norm": 1.1561339847204797, "learning_rate": 4.3803498010607015e-07, "loss": 0.3098, "step": 28107 }, { "epoch": 2.6377627627627627, "grad_norm": 1.1747061296224648, "learning_rate": 4.3781153542035157e-07, "loss": 0.3022, "step": 28108 }, { "epoch": 2.6378566066066065, "grad_norm": 1.27518957726813, "learning_rate": 4.3758814513014337e-07, "loss": 0.3106, "step": 28109 }, { "epoch": 2.6379504504504503, "grad_norm": 1.0300811010965323, "learning_rate": 4.3736480923810887e-07, "loss": 0.3675, "step": 28110 }, { "epoch": 2.6380442942942945, "grad_norm": 1.2067483355772644, "learning_rate": 4.371415277469099e-07, "loss": 0.3167, "step": 28111 }, { "epoch": 2.638138138138138, "grad_norm": 1.1129607862369724, "learning_rate": 4.369183006592104e-07, "loss": 0.3395, "step": 28112 }, { "epoch": 2.638231981981982, "grad_norm": 1.0187003670189874, "learning_rate": 4.36695127977671e-07, "loss": 0.3195, "step": 28113 }, { "epoch": 2.638325825825826, "grad_norm": 1.4788401154050768, "learning_rate": 4.3647200970495294e-07, "loss": 0.3233, "step": 28114 }, { "epoch": 2.63841966966967, "grad_norm": 1.065765701370273, "learning_rate": 4.362489458437158e-07, "loss": 0.3078, "step": 28115 }, { "epoch": 2.6385135135135136, "grad_norm": 1.2593059576983452, "learning_rate": 4.360259363966196e-07, "loss": 0.305, "step": 28116 }, { "epoch": 2.6386073573573574, "grad_norm": 1.128553533018714, "learning_rate": 4.3580298136632284e-07, "loss": 0.3163, "step": 28117 }, { "epoch": 2.638701201201201, "grad_norm": 1.0806449731953178, "learning_rate": 4.3558008075548495e-07, "loss": 0.326, "step": 28118 }, { "epoch": 2.638795045045045, "grad_norm": 1.0131438375733786, "learning_rate": 4.353572345667628e-07, "loss": 0.2766, "step": 28119 }, { "epoch": 2.638888888888889, "grad_norm": 1.4213559852474749, "learning_rate": 4.3513444280281256e-07, "loss": 0.3308, "step": 28120 }, { "epoch": 2.6389827327327327, "grad_norm": 1.2841769188176244, "learning_rate": 4.3491170546629324e-07, "loss": 0.2825, "step": 28121 }, { "epoch": 2.6390765765765765, "grad_norm": 1.0795511030098348, "learning_rate": 4.346890225598582e-07, "loss": 0.3513, "step": 28122 }, { "epoch": 2.6391704204204203, "grad_norm": 1.0049785780967877, "learning_rate": 4.344663940861626e-07, "loss": 0.3676, "step": 28123 }, { "epoch": 2.6392642642642645, "grad_norm": 1.1058036805890545, "learning_rate": 4.342438200478627e-07, "loss": 0.3034, "step": 28124 }, { "epoch": 2.639358108108108, "grad_norm": 1.4318512618975427, "learning_rate": 4.340213004476107e-07, "loss": 0.3368, "step": 28125 }, { "epoch": 2.639451951951952, "grad_norm": 1.3020795271632681, "learning_rate": 4.3379883528805954e-07, "loss": 0.3273, "step": 28126 }, { "epoch": 2.639545795795796, "grad_norm": 1.086940019518884, "learning_rate": 4.3357642457186264e-07, "loss": 0.3427, "step": 28127 }, { "epoch": 2.6396396396396398, "grad_norm": 1.0609069353499272, "learning_rate": 4.3335406830167294e-07, "loss": 0.3387, "step": 28128 }, { "epoch": 2.6397334834834836, "grad_norm": 1.0448834385588748, "learning_rate": 4.331317664801382e-07, "loss": 0.3079, "step": 28129 }, { "epoch": 2.6398273273273274, "grad_norm": 1.5945687085154334, "learning_rate": 4.329095191099114e-07, "loss": 0.2767, "step": 28130 }, { "epoch": 2.639921171171171, "grad_norm": 0.9971783742710157, "learning_rate": 4.326873261936426e-07, "loss": 0.2613, "step": 28131 }, { "epoch": 2.640015015015015, "grad_norm": 1.0429107177436403, "learning_rate": 4.3246518773397916e-07, "loss": 0.3246, "step": 28132 }, { "epoch": 2.640108858858859, "grad_norm": 1.09681118453853, "learning_rate": 4.322431037335717e-07, "loss": 0.2525, "step": 28133 }, { "epoch": 2.6402027027027026, "grad_norm": 1.188282013733248, "learning_rate": 4.32021074195067e-07, "loss": 0.302, "step": 28134 }, { "epoch": 2.6402965465465464, "grad_norm": 1.2172595212795208, "learning_rate": 4.3179909912111184e-07, "loss": 0.3076, "step": 28135 }, { "epoch": 2.6403903903903903, "grad_norm": 1.054546432399332, "learning_rate": 4.3157717851435524e-07, "loss": 0.2982, "step": 28136 }, { "epoch": 2.6404842342342345, "grad_norm": 1.1171593159386368, "learning_rate": 4.313553123774411e-07, "loss": 0.2958, "step": 28137 }, { "epoch": 2.640578078078078, "grad_norm": 1.0159376516190108, "learning_rate": 4.311335007130141e-07, "loss": 0.3177, "step": 28138 }, { "epoch": 2.640671921921922, "grad_norm": 1.0850653504179533, "learning_rate": 4.3091174352372143e-07, "loss": 0.3233, "step": 28139 }, { "epoch": 2.6407657657657655, "grad_norm": 1.027114126491587, "learning_rate": 4.306900408122061e-07, "loss": 0.2667, "step": 28140 }, { "epoch": 2.6408596096096097, "grad_norm": 1.331357370837318, "learning_rate": 4.304683925811104e-07, "loss": 0.2992, "step": 28141 }, { "epoch": 2.6409534534534536, "grad_norm": 1.094866059954741, "learning_rate": 4.302467988330783e-07, "loss": 0.3189, "step": 28142 }, { "epoch": 2.6410472972972974, "grad_norm": 1.1793143689135352, "learning_rate": 4.300252595707516e-07, "loss": 0.3308, "step": 28143 }, { "epoch": 2.641141141141141, "grad_norm": 1.4293900699895357, "learning_rate": 4.2980377479677036e-07, "loss": 0.3261, "step": 28144 }, { "epoch": 2.641234984984985, "grad_norm": 0.9135130047952321, "learning_rate": 4.2958234451377757e-07, "loss": 0.2887, "step": 28145 }, { "epoch": 2.641328828828829, "grad_norm": 1.3941270833074362, "learning_rate": 4.293609687244127e-07, "loss": 0.3486, "step": 28146 }, { "epoch": 2.6414226726726726, "grad_norm": 1.0977736463449992, "learning_rate": 4.2913964743131366e-07, "loss": 0.2505, "step": 28147 }, { "epoch": 2.6415165165165164, "grad_norm": 0.9501845925365409, "learning_rate": 4.2891838063712167e-07, "loss": 0.2847, "step": 28148 }, { "epoch": 2.6416103603603602, "grad_norm": 1.1943431016421338, "learning_rate": 4.286971683444735e-07, "loss": 0.2804, "step": 28149 }, { "epoch": 2.641704204204204, "grad_norm": 1.058630584287281, "learning_rate": 4.284760105560065e-07, "loss": 0.2992, "step": 28150 }, { "epoch": 2.641798048048048, "grad_norm": 1.4328280095186388, "learning_rate": 4.282549072743586e-07, "loss": 0.3466, "step": 28151 }, { "epoch": 2.641891891891892, "grad_norm": 1.1056693009899847, "learning_rate": 4.2803385850216594e-07, "loss": 0.3513, "step": 28152 }, { "epoch": 2.6419857357357355, "grad_norm": 1.1922290998991263, "learning_rate": 4.2781286424206315e-07, "loss": 0.3167, "step": 28153 }, { "epoch": 2.6420795795795797, "grad_norm": 1.1211299576807718, "learning_rate": 4.275919244966864e-07, "loss": 0.2752, "step": 28154 }, { "epoch": 2.6421734234234235, "grad_norm": 0.9728318104314442, "learning_rate": 4.2737103926866864e-07, "loss": 0.3246, "step": 28155 }, { "epoch": 2.6422672672672673, "grad_norm": 1.1401550573558485, "learning_rate": 4.2715020856064324e-07, "loss": 0.3053, "step": 28156 }, { "epoch": 2.642361111111111, "grad_norm": 1.0826803159269336, "learning_rate": 4.269294323752454e-07, "loss": 0.2802, "step": 28157 }, { "epoch": 2.642454954954955, "grad_norm": 1.103179363229373, "learning_rate": 4.267087107151058e-07, "loss": 0.313, "step": 28158 }, { "epoch": 2.642548798798799, "grad_norm": 1.6594510481181262, "learning_rate": 4.2648804358285613e-07, "loss": 0.3326, "step": 28159 }, { "epoch": 2.6426426426426426, "grad_norm": 0.9519933978035272, "learning_rate": 4.2626743098112823e-07, "loss": 0.2765, "step": 28160 }, { "epoch": 2.6427364864864864, "grad_norm": 1.1507929436583755, "learning_rate": 4.260468729125522e-07, "loss": 0.332, "step": 28161 }, { "epoch": 2.64283033033033, "grad_norm": 1.4874480294256478, "learning_rate": 4.2582636937975653e-07, "loss": 0.3188, "step": 28162 }, { "epoch": 2.642924174174174, "grad_norm": 1.116674366775452, "learning_rate": 4.256059203853724e-07, "loss": 0.2936, "step": 28163 }, { "epoch": 2.643018018018018, "grad_norm": 1.1365842153899035, "learning_rate": 4.253855259320272e-07, "loss": 0.3126, "step": 28164 }, { "epoch": 2.643111861861862, "grad_norm": 1.0532928808847497, "learning_rate": 4.2516518602234933e-07, "loss": 0.3224, "step": 28165 }, { "epoch": 2.6432057057057055, "grad_norm": 1.3226497655258354, "learning_rate": 4.2494490065896456e-07, "loss": 0.2658, "step": 28166 }, { "epoch": 2.6432995495495497, "grad_norm": 1.1213815266253904, "learning_rate": 4.247246698445007e-07, "loss": 0.3449, "step": 28167 }, { "epoch": 2.6433933933933935, "grad_norm": 1.1927816352460978, "learning_rate": 4.245044935815823e-07, "loss": 0.2742, "step": 28168 }, { "epoch": 2.6434872372372373, "grad_norm": 1.112458995437741, "learning_rate": 4.2428437187283565e-07, "loss": 0.3138, "step": 28169 }, { "epoch": 2.643581081081081, "grad_norm": 1.593882625952862, "learning_rate": 4.2406430472088533e-07, "loss": 0.3256, "step": 28170 }, { "epoch": 2.643674924924925, "grad_norm": 1.1813002727305684, "learning_rate": 4.238442921283542e-07, "loss": 0.2938, "step": 28171 }, { "epoch": 2.6437687687687688, "grad_norm": 1.375223467827365, "learning_rate": 4.2362433409786685e-07, "loss": 0.2727, "step": 28172 }, { "epoch": 2.6438626126126126, "grad_norm": 1.0735119208717145, "learning_rate": 4.2340443063204505e-07, "loss": 0.2773, "step": 28173 }, { "epoch": 2.6439564564564564, "grad_norm": 1.1630560565044772, "learning_rate": 4.2318458173351005e-07, "loss": 0.3065, "step": 28174 }, { "epoch": 2.6440503003003, "grad_norm": 1.7415128139907785, "learning_rate": 4.229647874048848e-07, "loss": 0.3247, "step": 28175 }, { "epoch": 2.644144144144144, "grad_norm": 1.0617446550302279, "learning_rate": 4.227450476487888e-07, "loss": 0.3102, "step": 28176 }, { "epoch": 2.644237987987988, "grad_norm": 1.336363160816333, "learning_rate": 4.225253624678427e-07, "loss": 0.3312, "step": 28177 }, { "epoch": 2.644331831831832, "grad_norm": 1.144085954298856, "learning_rate": 4.2230573186466563e-07, "loss": 0.2631, "step": 28178 }, { "epoch": 2.6444256756756754, "grad_norm": 1.6556637410910457, "learning_rate": 4.220861558418754e-07, "loss": 0.3311, "step": 28179 }, { "epoch": 2.6445195195195197, "grad_norm": 0.9996728881303812, "learning_rate": 4.2186663440209053e-07, "loss": 0.3443, "step": 28180 }, { "epoch": 2.6446133633633635, "grad_norm": 1.2839951253951185, "learning_rate": 4.2164716754792943e-07, "loss": 0.2959, "step": 28181 }, { "epoch": 2.6447072072072073, "grad_norm": 1.1462161494667553, "learning_rate": 4.214277552820073e-07, "loss": 0.3407, "step": 28182 }, { "epoch": 2.644801051051051, "grad_norm": 2.5752565362152833, "learning_rate": 4.212083976069409e-07, "loss": 0.341, "step": 28183 }, { "epoch": 2.644894894894895, "grad_norm": 1.0226374532924123, "learning_rate": 4.209890945253459e-07, "loss": 0.2715, "step": 28184 }, { "epoch": 2.6449887387387387, "grad_norm": 1.3849037341982835, "learning_rate": 4.207698460398374e-07, "loss": 0.3106, "step": 28185 }, { "epoch": 2.6450825825825826, "grad_norm": 1.1766466128598645, "learning_rate": 4.205506521530273e-07, "loss": 0.3111, "step": 28186 }, { "epoch": 2.6451764264264264, "grad_norm": 1.132502175423309, "learning_rate": 4.2033151286753224e-07, "loss": 0.297, "step": 28187 }, { "epoch": 2.64527027027027, "grad_norm": 1.2333483859776315, "learning_rate": 4.2011242818596364e-07, "loss": 0.3213, "step": 28188 }, { "epoch": 2.645364114114114, "grad_norm": 1.1751678266368037, "learning_rate": 4.1989339811093323e-07, "loss": 0.2864, "step": 28189 }, { "epoch": 2.645457957957958, "grad_norm": 1.106590030121008, "learning_rate": 4.1967442264505275e-07, "loss": 0.3223, "step": 28190 }, { "epoch": 2.645551801801802, "grad_norm": 1.1180087113527468, "learning_rate": 4.1945550179093354e-07, "loss": 0.237, "step": 28191 }, { "epoch": 2.6456456456456454, "grad_norm": 1.242332184376412, "learning_rate": 4.1923663555118457e-07, "loss": 0.3409, "step": 28192 }, { "epoch": 2.6457394894894897, "grad_norm": 1.3688774973863578, "learning_rate": 4.1901782392841703e-07, "loss": 0.3245, "step": 28193 }, { "epoch": 2.6458333333333335, "grad_norm": 1.2307226662517383, "learning_rate": 4.187990669252395e-07, "loss": 0.3212, "step": 28194 }, { "epoch": 2.6459271771771773, "grad_norm": 1.1879995660221758, "learning_rate": 4.185803645442588e-07, "loss": 0.3026, "step": 28195 }, { "epoch": 2.646021021021021, "grad_norm": 1.0945424257620897, "learning_rate": 4.1836171678808435e-07, "loss": 0.3179, "step": 28196 }, { "epoch": 2.646114864864865, "grad_norm": 1.3912673626449874, "learning_rate": 4.181431236593225e-07, "loss": 0.3311, "step": 28197 }, { "epoch": 2.6462087087087087, "grad_norm": 1.1402359723088393, "learning_rate": 4.179245851605784e-07, "loss": 0.2958, "step": 28198 }, { "epoch": 2.6463025525525525, "grad_norm": 1.276814892056982, "learning_rate": 4.1770610129446e-07, "loss": 0.2797, "step": 28199 }, { "epoch": 2.6463963963963963, "grad_norm": 1.0730352979288564, "learning_rate": 4.1748767206357066e-07, "loss": 0.2774, "step": 28200 }, { "epoch": 2.64649024024024, "grad_norm": 1.1848642694529397, "learning_rate": 4.172692974705156e-07, "loss": 0.3164, "step": 28201 }, { "epoch": 2.646584084084084, "grad_norm": 1.3411679592434267, "learning_rate": 4.170509775178977e-07, "loss": 0.3361, "step": 28202 }, { "epoch": 2.6466779279279278, "grad_norm": 1.1119613621780438, "learning_rate": 4.1683271220832045e-07, "loss": 0.2758, "step": 28203 }, { "epoch": 2.646771771771772, "grad_norm": 1.1251978610901985, "learning_rate": 4.166145015443851e-07, "loss": 0.3132, "step": 28204 }, { "epoch": 2.6468656156156154, "grad_norm": 1.4671249290015123, "learning_rate": 4.163963455286957e-07, "loss": 0.3193, "step": 28205 }, { "epoch": 2.6469594594594597, "grad_norm": 0.9411299131870338, "learning_rate": 4.161782441638518e-07, "loss": 0.3074, "step": 28206 }, { "epoch": 2.6470533033033035, "grad_norm": 1.1024709295773387, "learning_rate": 4.1596019745245355e-07, "loss": 0.3109, "step": 28207 }, { "epoch": 2.6471471471471473, "grad_norm": 1.2252265489913048, "learning_rate": 4.1574220539710217e-07, "loss": 0.3153, "step": 28208 }, { "epoch": 2.647240990990991, "grad_norm": 1.2455158600310758, "learning_rate": 4.1552426800039623e-07, "loss": 0.2975, "step": 28209 }, { "epoch": 2.647334834834835, "grad_norm": 1.3785498225553252, "learning_rate": 4.1530638526493305e-07, "loss": 0.321, "step": 28210 }, { "epoch": 2.6474286786786787, "grad_norm": 1.1809323293015852, "learning_rate": 4.150885571933122e-07, "loss": 0.3222, "step": 28211 }, { "epoch": 2.6475225225225225, "grad_norm": 1.4145676944757826, "learning_rate": 4.1487078378812995e-07, "loss": 0.3036, "step": 28212 }, { "epoch": 2.6476163663663663, "grad_norm": 1.4073705683050917, "learning_rate": 4.146530650519831e-07, "loss": 0.3147, "step": 28213 }, { "epoch": 2.64771021021021, "grad_norm": 0.9760489638197145, "learning_rate": 4.144354009874674e-07, "loss": 0.2659, "step": 28214 }, { "epoch": 2.647804054054054, "grad_norm": 1.0314979392623063, "learning_rate": 4.142177915971779e-07, "loss": 0.2807, "step": 28215 }, { "epoch": 2.6478978978978978, "grad_norm": 1.1091931794661047, "learning_rate": 4.140002368837087e-07, "loss": 0.2888, "step": 28216 }, { "epoch": 2.647991741741742, "grad_norm": 1.1539749217761337, "learning_rate": 4.1378273684965554e-07, "loss": 0.3651, "step": 28217 }, { "epoch": 2.6480855855855854, "grad_norm": 1.305079088174151, "learning_rate": 4.135652914976107e-07, "loss": 0.3045, "step": 28218 }, { "epoch": 2.6481794294294296, "grad_norm": 1.352404390527223, "learning_rate": 4.1334790083016554e-07, "loss": 0.2952, "step": 28219 }, { "epoch": 2.648273273273273, "grad_norm": 1.1671377602371855, "learning_rate": 4.13130564849914e-07, "loss": 0.3056, "step": 28220 }, { "epoch": 2.6483671171171173, "grad_norm": 1.1390813569761966, "learning_rate": 4.1291328355944626e-07, "loss": 0.3079, "step": 28221 }, { "epoch": 2.648460960960961, "grad_norm": 1.234325862193301, "learning_rate": 4.126960569613531e-07, "loss": 0.3459, "step": 28222 }, { "epoch": 2.648554804804805, "grad_norm": 1.0578188555338308, "learning_rate": 4.124788850582251e-07, "loss": 0.3084, "step": 28223 }, { "epoch": 2.6486486486486487, "grad_norm": 1.0475353741151328, "learning_rate": 4.122617678526514e-07, "loss": 0.3198, "step": 28224 }, { "epoch": 2.6487424924924925, "grad_norm": 1.1772593107955003, "learning_rate": 4.12044705347221e-07, "loss": 0.3208, "step": 28225 }, { "epoch": 2.6488363363363363, "grad_norm": 1.2497153750635068, "learning_rate": 4.1182769754452123e-07, "loss": 0.3399, "step": 28226 }, { "epoch": 2.64893018018018, "grad_norm": 4.274522863731777, "learning_rate": 4.1161074444713955e-07, "loss": 0.3183, "step": 28227 }, { "epoch": 2.649024024024024, "grad_norm": 1.0654215955598194, "learning_rate": 4.113938460576622e-07, "loss": 0.2977, "step": 28228 }, { "epoch": 2.6491178678678677, "grad_norm": 1.4199055701040308, "learning_rate": 4.111770023786771e-07, "loss": 0.3415, "step": 28229 }, { "epoch": 2.6492117117117115, "grad_norm": 0.8701915925650513, "learning_rate": 4.109602134127688e-07, "loss": 0.2945, "step": 28230 }, { "epoch": 2.6493055555555554, "grad_norm": 1.2481592767111698, "learning_rate": 4.107434791625209e-07, "loss": 0.3427, "step": 28231 }, { "epoch": 2.6493993993993996, "grad_norm": 1.2548607109274932, "learning_rate": 4.1052679963051957e-07, "loss": 0.2997, "step": 28232 }, { "epoch": 2.649493243243243, "grad_norm": 1.0706870471495096, "learning_rate": 4.1031017481934775e-07, "loss": 0.3114, "step": 28233 }, { "epoch": 2.6495870870870872, "grad_norm": 1.0780315254775887, "learning_rate": 4.100936047315862e-07, "loss": 0.3287, "step": 28234 }, { "epoch": 2.649680930930931, "grad_norm": 1.376127915578353, "learning_rate": 4.098770893698206e-07, "loss": 0.33, "step": 28235 }, { "epoch": 2.649774774774775, "grad_norm": 1.187657553502338, "learning_rate": 4.0966062873663e-07, "loss": 0.3465, "step": 28236 }, { "epoch": 2.6498686186186187, "grad_norm": 1.0885643018888007, "learning_rate": 4.094442228345963e-07, "loss": 0.3248, "step": 28237 }, { "epoch": 2.6499624624624625, "grad_norm": 1.3567075640751434, "learning_rate": 4.0922787166629896e-07, "loss": 0.2876, "step": 28238 }, { "epoch": 2.6500563063063063, "grad_norm": 1.2260213886783404, "learning_rate": 4.090115752343188e-07, "loss": 0.3269, "step": 28239 }, { "epoch": 2.65015015015015, "grad_norm": 1.3664102880979112, "learning_rate": 4.087953335412326e-07, "loss": 0.2951, "step": 28240 }, { "epoch": 2.650243993993994, "grad_norm": 1.3551469926405857, "learning_rate": 4.085791465896205e-07, "loss": 0.2938, "step": 28241 }, { "epoch": 2.6503378378378377, "grad_norm": 0.9388478874702794, "learning_rate": 4.0836301438206047e-07, "loss": 0.2793, "step": 28242 }, { "epoch": 2.6504316816816815, "grad_norm": 1.098679113380686, "learning_rate": 4.0814693692112717e-07, "loss": 0.2996, "step": 28243 }, { "epoch": 2.6505255255255253, "grad_norm": 1.1157964909341904, "learning_rate": 4.07930914209399e-07, "loss": 0.3237, "step": 28244 }, { "epoch": 2.6506193693693696, "grad_norm": 1.0789164145330608, "learning_rate": 4.077149462494517e-07, "loss": 0.2687, "step": 28245 }, { "epoch": 2.650713213213213, "grad_norm": 1.219920098911506, "learning_rate": 4.074990330438583e-07, "loss": 0.3323, "step": 28246 }, { "epoch": 2.650807057057057, "grad_norm": 1.26748251842973, "learning_rate": 4.072831745951955e-07, "loss": 0.3338, "step": 28247 }, { "epoch": 2.650900900900901, "grad_norm": 1.1567065286125258, "learning_rate": 4.070673709060358e-07, "loss": 0.2992, "step": 28248 }, { "epoch": 2.650994744744745, "grad_norm": 1.1790257382086613, "learning_rate": 4.0685162197895147e-07, "loss": 0.3289, "step": 28249 }, { "epoch": 2.6510885885885886, "grad_norm": 1.1783747157689182, "learning_rate": 4.0663592781651773e-07, "loss": 0.3449, "step": 28250 }, { "epoch": 2.6511824324324325, "grad_norm": 1.2581242821958267, "learning_rate": 4.0642028842130365e-07, "loss": 0.3288, "step": 28251 }, { "epoch": 2.6512762762762763, "grad_norm": 1.0582739786997113, "learning_rate": 4.0620470379588e-07, "loss": 0.3226, "step": 28252 }, { "epoch": 2.65137012012012, "grad_norm": 1.083174592945716, "learning_rate": 4.05989173942819e-07, "loss": 0.3242, "step": 28253 }, { "epoch": 2.651463963963964, "grad_norm": 1.0655204785524943, "learning_rate": 4.0577369886469044e-07, "loss": 0.3223, "step": 28254 }, { "epoch": 2.6515578078078077, "grad_norm": 1.0248231897908633, "learning_rate": 4.0555827856406104e-07, "loss": 0.3136, "step": 28255 }, { "epoch": 2.6516516516516515, "grad_norm": 1.1811923970825613, "learning_rate": 4.053429130435027e-07, "loss": 0.3187, "step": 28256 }, { "epoch": 2.6517454954954953, "grad_norm": 4.2280499865880286, "learning_rate": 4.051276023055811e-07, "loss": 0.3073, "step": 28257 }, { "epoch": 2.6518393393393396, "grad_norm": 1.3228593612359756, "learning_rate": 4.0491234635286316e-07, "loss": 0.2972, "step": 28258 }, { "epoch": 2.651933183183183, "grad_norm": 2.5405100272854844, "learning_rate": 4.0469714518791726e-07, "loss": 0.3052, "step": 28259 }, { "epoch": 2.652027027027027, "grad_norm": 1.2944904840840847, "learning_rate": 4.044819988133075e-07, "loss": 0.3165, "step": 28260 }, { "epoch": 2.652120870870871, "grad_norm": 1.2187538383016174, "learning_rate": 4.042669072315991e-07, "loss": 0.2831, "step": 28261 }, { "epoch": 2.652214714714715, "grad_norm": 1.4034528804723072, "learning_rate": 4.040518704453583e-07, "loss": 0.2923, "step": 28262 }, { "epoch": 2.6523085585585586, "grad_norm": 1.1188807321371932, "learning_rate": 4.0383688845714806e-07, "loss": 0.2999, "step": 28263 }, { "epoch": 2.6524024024024024, "grad_norm": 1.2591102878158467, "learning_rate": 4.0362196126953137e-07, "loss": 0.308, "step": 28264 }, { "epoch": 2.6524962462462462, "grad_norm": 0.9833267294847613, "learning_rate": 4.034070888850711e-07, "loss": 0.2669, "step": 28265 }, { "epoch": 2.65259009009009, "grad_norm": 1.2040388771298125, "learning_rate": 4.0319227130632907e-07, "loss": 0.2719, "step": 28266 }, { "epoch": 2.652683933933934, "grad_norm": 1.329046356725736, "learning_rate": 4.029775085358656e-07, "loss": 0.3153, "step": 28267 }, { "epoch": 2.6527777777777777, "grad_norm": 1.0801623910157094, "learning_rate": 4.027628005762435e-07, "loss": 0.3155, "step": 28268 }, { "epoch": 2.6528716216216215, "grad_norm": 2.102916001106406, "learning_rate": 4.0254814743002133e-07, "loss": 0.3057, "step": 28269 }, { "epoch": 2.6529654654654653, "grad_norm": 1.0105236746700026, "learning_rate": 4.0233354909975765e-07, "loss": 0.3112, "step": 28270 }, { "epoch": 2.6530593093093096, "grad_norm": 1.214536186600388, "learning_rate": 4.0211900558801307e-07, "loss": 0.3518, "step": 28271 }, { "epoch": 2.653153153153153, "grad_norm": 1.4111013867025093, "learning_rate": 4.019045168973451e-07, "loss": 0.323, "step": 28272 }, { "epoch": 2.653246996996997, "grad_norm": 1.555958640943119, "learning_rate": 4.016900830303094e-07, "loss": 0.3194, "step": 28273 }, { "epoch": 2.653340840840841, "grad_norm": 1.0051877005334633, "learning_rate": 4.0147570398946566e-07, "loss": 0.3065, "step": 28274 }, { "epoch": 2.653434684684685, "grad_norm": 1.018415835380366, "learning_rate": 4.012613797773679e-07, "loss": 0.3106, "step": 28275 }, { "epoch": 2.6535285285285286, "grad_norm": 1.2957702722358255, "learning_rate": 4.0104711039657183e-07, "loss": 0.3504, "step": 28276 }, { "epoch": 2.6536223723723724, "grad_norm": 1.1305132283762633, "learning_rate": 4.0083289584963216e-07, "loss": 0.2973, "step": 28277 }, { "epoch": 2.6537162162162162, "grad_norm": 1.1727929183883476, "learning_rate": 4.006187361391034e-07, "loss": 0.3441, "step": 28278 }, { "epoch": 2.65381006006006, "grad_norm": 1.2256523310710017, "learning_rate": 4.0040463126753803e-07, "loss": 0.2931, "step": 28279 }, { "epoch": 2.653903903903904, "grad_norm": 1.0563999040299188, "learning_rate": 4.001905812374901e-07, "loss": 0.3035, "step": 28280 }, { "epoch": 2.6539977477477477, "grad_norm": 1.2286467483225332, "learning_rate": 3.999765860515115e-07, "loss": 0.3566, "step": 28281 }, { "epoch": 2.6540915915915915, "grad_norm": 1.3103846944604014, "learning_rate": 3.9976264571215284e-07, "loss": 0.2729, "step": 28282 }, { "epoch": 2.6541854354354353, "grad_norm": 1.2730090552432831, "learning_rate": 3.995487602219661e-07, "loss": 0.3071, "step": 28283 }, { "epoch": 2.6542792792792795, "grad_norm": 0.9887613820975839, "learning_rate": 3.9933492958350083e-07, "loss": 0.3189, "step": 28284 }, { "epoch": 2.654373123123123, "grad_norm": 0.9024715376908327, "learning_rate": 3.991211537993062e-07, "loss": 0.2517, "step": 28285 }, { "epoch": 2.654466966966967, "grad_norm": 1.1464744686945478, "learning_rate": 3.9890743287193224e-07, "loss": 0.3058, "step": 28286 }, { "epoch": 2.654560810810811, "grad_norm": 1.131921511414042, "learning_rate": 3.986937668039259e-07, "loss": 0.2794, "step": 28287 }, { "epoch": 2.6546546546546548, "grad_norm": 1.0376340481527568, "learning_rate": 3.9848015559783626e-07, "loss": 0.3177, "step": 28288 }, { "epoch": 2.6547484984984986, "grad_norm": 1.1684365700600787, "learning_rate": 3.982665992562085e-07, "loss": 0.2869, "step": 28289 }, { "epoch": 2.6548423423423424, "grad_norm": 1.050095800874325, "learning_rate": 3.980530977815905e-07, "loss": 0.3275, "step": 28290 }, { "epoch": 2.654936186186186, "grad_norm": 0.9900334513697393, "learning_rate": 3.9783965117652534e-07, "loss": 0.3019, "step": 28291 }, { "epoch": 2.65503003003003, "grad_norm": 2.144669454878662, "learning_rate": 3.976262594435609e-07, "loss": 0.3017, "step": 28292 }, { "epoch": 2.655123873873874, "grad_norm": 1.0720272171033014, "learning_rate": 3.9741292258524023e-07, "loss": 0.3098, "step": 28293 }, { "epoch": 2.6552177177177176, "grad_norm": 1.2468474460446966, "learning_rate": 3.9719964060410623e-07, "loss": 0.3147, "step": 28294 }, { "epoch": 2.6553115615615615, "grad_norm": 1.4284359422175956, "learning_rate": 3.969864135027035e-07, "loss": 0.2778, "step": 28295 }, { "epoch": 2.6554054054054053, "grad_norm": 1.2899857596473323, "learning_rate": 3.967732412835734e-07, "loss": 0.2975, "step": 28296 }, { "epoch": 2.6554992492492495, "grad_norm": 1.1870221327484285, "learning_rate": 3.965601239492567e-07, "loss": 0.3286, "step": 28297 }, { "epoch": 2.655593093093093, "grad_norm": 1.0280291711216947, "learning_rate": 3.963470615022963e-07, "loss": 0.3405, "step": 28298 }, { "epoch": 2.655686936936937, "grad_norm": 1.0370305847677048, "learning_rate": 3.9613405394523183e-07, "loss": 0.3259, "step": 28299 }, { "epoch": 2.6557807807807805, "grad_norm": 1.0786703374745104, "learning_rate": 3.95921101280603e-07, "loss": 0.3187, "step": 28300 }, { "epoch": 2.6558746246246248, "grad_norm": 1.0669629037963613, "learning_rate": 3.9570820351094884e-07, "loss": 0.2908, "step": 28301 }, { "epoch": 2.6559684684684686, "grad_norm": 1.070683536212411, "learning_rate": 3.9549536063880733e-07, "loss": 0.278, "step": 28302 }, { "epoch": 2.6560623123123124, "grad_norm": 1.0416978851768455, "learning_rate": 3.952825726667159e-07, "loss": 0.3446, "step": 28303 }, { "epoch": 2.656156156156156, "grad_norm": 1.1555668815984494, "learning_rate": 3.95069839597213e-07, "loss": 0.2645, "step": 28304 }, { "epoch": 2.65625, "grad_norm": 1.4672660521722714, "learning_rate": 3.9485716143283447e-07, "loss": 0.3142, "step": 28305 }, { "epoch": 2.656343843843844, "grad_norm": 1.5862051261196324, "learning_rate": 3.946445381761149e-07, "loss": 0.3261, "step": 28306 }, { "epoch": 2.6564376876876876, "grad_norm": 0.9803471476304733, "learning_rate": 3.9443196982959176e-07, "loss": 0.3328, "step": 28307 }, { "epoch": 2.6565315315315314, "grad_norm": 0.9088559589114135, "learning_rate": 3.942194563957985e-07, "loss": 0.257, "step": 28308 }, { "epoch": 2.6566253753753752, "grad_norm": 1.1411100796393485, "learning_rate": 3.940069978772676e-07, "loss": 0.3168, "step": 28309 }, { "epoch": 2.6567192192192195, "grad_norm": 1.3010710349515702, "learning_rate": 3.937945942765337e-07, "loss": 0.3333, "step": 28310 }, { "epoch": 2.656813063063063, "grad_norm": 1.3712172717479993, "learning_rate": 3.9358224559612966e-07, "loss": 0.2998, "step": 28311 }, { "epoch": 2.656906906906907, "grad_norm": 1.0966312178425477, "learning_rate": 3.933699518385864e-07, "loss": 0.3237, "step": 28312 }, { "epoch": 2.6570007507507505, "grad_norm": 1.1297277123979417, "learning_rate": 3.9315771300643514e-07, "loss": 0.2973, "step": 28313 }, { "epoch": 2.6570945945945947, "grad_norm": 1.244543306107624, "learning_rate": 3.929455291022072e-07, "loss": 0.3375, "step": 28314 }, { "epoch": 2.6571884384384385, "grad_norm": 1.1635814994941303, "learning_rate": 3.927334001284305e-07, "loss": 0.3087, "step": 28315 }, { "epoch": 2.6572822822822824, "grad_norm": 0.9350649938836111, "learning_rate": 3.9252132608763706e-07, "loss": 0.2926, "step": 28316 }, { "epoch": 2.657376126126126, "grad_norm": 1.2378286949402306, "learning_rate": 3.9230930698235416e-07, "loss": 0.2891, "step": 28317 }, { "epoch": 2.65746996996997, "grad_norm": 1.1860655024788644, "learning_rate": 3.9209734281510813e-07, "loss": 0.3381, "step": 28318 }, { "epoch": 2.657563813813814, "grad_norm": 1.0913288164211912, "learning_rate": 3.9188543358842924e-07, "loss": 0.317, "step": 28319 }, { "epoch": 2.6576576576576576, "grad_norm": 1.201954699918804, "learning_rate": 3.9167357930484263e-07, "loss": 0.344, "step": 28320 }, { "epoch": 2.6577515015015014, "grad_norm": 2.7857929264081376, "learning_rate": 3.9146177996687353e-07, "loss": 0.3611, "step": 28321 }, { "epoch": 2.6578453453453452, "grad_norm": 1.1660018798250815, "learning_rate": 3.9125003557704875e-07, "loss": 0.3003, "step": 28322 }, { "epoch": 2.657939189189189, "grad_norm": 1.1539998734507806, "learning_rate": 3.9103834613789196e-07, "loss": 0.2943, "step": 28323 }, { "epoch": 2.658033033033033, "grad_norm": 0.9060306578054602, "learning_rate": 3.9082671165192764e-07, "loss": 0.3029, "step": 28324 }, { "epoch": 2.658126876876877, "grad_norm": 1.1376322395919882, "learning_rate": 3.906151321216789e-07, "loss": 0.2909, "step": 28325 }, { "epoch": 2.6582207207207205, "grad_norm": 1.7096934585799397, "learning_rate": 3.9040360754966865e-07, "loss": 0.3439, "step": 28326 }, { "epoch": 2.6583145645645647, "grad_norm": 1.122629563265167, "learning_rate": 3.901921379384177e-07, "loss": 0.318, "step": 28327 }, { "epoch": 2.6584084084084085, "grad_norm": 1.061812217811833, "learning_rate": 3.89980723290449e-07, "loss": 0.3048, "step": 28328 }, { "epoch": 2.6585022522522523, "grad_norm": 1.2820558032033842, "learning_rate": 3.897693636082828e-07, "loss": 0.3178, "step": 28329 }, { "epoch": 2.658596096096096, "grad_norm": 1.1157762770256257, "learning_rate": 3.8955805889443863e-07, "loss": 0.2949, "step": 28330 }, { "epoch": 2.65868993993994, "grad_norm": 1.5835412866757181, "learning_rate": 3.8934680915143683e-07, "loss": 0.3058, "step": 28331 }, { "epoch": 2.6587837837837838, "grad_norm": 0.9908859392292124, "learning_rate": 3.8913561438179595e-07, "loss": 0.2995, "step": 28332 }, { "epoch": 2.6588776276276276, "grad_norm": 1.1650685845285342, "learning_rate": 3.889244745880327e-07, "loss": 0.2815, "step": 28333 }, { "epoch": 2.6589714714714714, "grad_norm": 1.1432420641348975, "learning_rate": 3.8871338977266637e-07, "loss": 0.3104, "step": 28334 }, { "epoch": 2.659065315315315, "grad_norm": 1.495766924021479, "learning_rate": 3.885023599382132e-07, "loss": 0.2887, "step": 28335 }, { "epoch": 2.659159159159159, "grad_norm": 1.2848597322950897, "learning_rate": 3.882913850871889e-07, "loss": 0.2872, "step": 28336 }, { "epoch": 2.659253003003003, "grad_norm": 1.0306236586388415, "learning_rate": 3.8808046522210984e-07, "loss": 0.3483, "step": 28337 }, { "epoch": 2.659346846846847, "grad_norm": 1.5505250012974428, "learning_rate": 3.8786960034548957e-07, "loss": 0.3267, "step": 28338 }, { "epoch": 2.6594406906906904, "grad_norm": 0.9728628122579092, "learning_rate": 3.8765879045984225e-07, "loss": 0.3174, "step": 28339 }, { "epoch": 2.6595345345345347, "grad_norm": 1.083337651223946, "learning_rate": 3.87448035567683e-07, "loss": 0.3147, "step": 28340 }, { "epoch": 2.6596283783783785, "grad_norm": 1.0783073608601217, "learning_rate": 3.872373356715231e-07, "loss": 0.2995, "step": 28341 }, { "epoch": 2.6597222222222223, "grad_norm": 1.1236245321746, "learning_rate": 3.8702669077387514e-07, "loss": 0.2957, "step": 28342 }, { "epoch": 2.659816066066066, "grad_norm": 1.5654521473056384, "learning_rate": 3.8681610087725144e-07, "loss": 0.3254, "step": 28343 }, { "epoch": 2.65990990990991, "grad_norm": 1.2718463617536249, "learning_rate": 3.8660556598416223e-07, "loss": 0.2776, "step": 28344 }, { "epoch": 2.6600037537537538, "grad_norm": 0.956634008480094, "learning_rate": 3.863950860971172e-07, "loss": 0.2844, "step": 28345 }, { "epoch": 2.6600975975975976, "grad_norm": 1.2511169941611142, "learning_rate": 3.861846612186271e-07, "loss": 0.3125, "step": 28346 }, { "epoch": 2.6601914414414414, "grad_norm": 0.9282216629217781, "learning_rate": 3.8597429135120046e-07, "loss": 0.2945, "step": 28347 }, { "epoch": 2.660285285285285, "grad_norm": 1.413640583674313, "learning_rate": 3.857639764973459e-07, "loss": 0.2849, "step": 28348 }, { "epoch": 2.660379129129129, "grad_norm": 1.1932251017021436, "learning_rate": 3.8555371665956967e-07, "loss": 0.3073, "step": 28349 }, { "epoch": 2.660472972972973, "grad_norm": 1.405998787608968, "learning_rate": 3.853435118403798e-07, "loss": 0.3377, "step": 28350 }, { "epoch": 2.660566816816817, "grad_norm": 1.281508197487766, "learning_rate": 3.85133362042282e-07, "loss": 0.3413, "step": 28351 }, { "epoch": 2.6606606606606604, "grad_norm": 1.0329040330055612, "learning_rate": 3.849232672677827e-07, "loss": 0.3286, "step": 28352 }, { "epoch": 2.6607545045045047, "grad_norm": 1.0483756531820252, "learning_rate": 3.8471322751938654e-07, "loss": 0.3616, "step": 28353 }, { "epoch": 2.6608483483483485, "grad_norm": 1.171133878688721, "learning_rate": 3.845032427995965e-07, "loss": 0.3363, "step": 28354 }, { "epoch": 2.6609421921921923, "grad_norm": 1.0678004707337276, "learning_rate": 3.8429331311091887e-07, "loss": 0.2952, "step": 28355 }, { "epoch": 2.661036036036036, "grad_norm": 1.2477252516571302, "learning_rate": 3.8408343845585504e-07, "loss": 0.3399, "step": 28356 }, { "epoch": 2.66112987987988, "grad_norm": 1.141511696503016, "learning_rate": 3.838736188369069e-07, "loss": 0.3092, "step": 28357 }, { "epoch": 2.6612237237237237, "grad_norm": 1.4340201316940195, "learning_rate": 3.836638542565779e-07, "loss": 0.2956, "step": 28358 }, { "epoch": 2.6613175675675675, "grad_norm": 1.044571465645545, "learning_rate": 3.834541447173679e-07, "loss": 0.3128, "step": 28359 }, { "epoch": 2.6614114114114114, "grad_norm": 1.2504384441726173, "learning_rate": 3.8324449022177744e-07, "loss": 0.3076, "step": 28360 }, { "epoch": 2.661505255255255, "grad_norm": 1.540011910251976, "learning_rate": 3.830348907723064e-07, "loss": 0.3596, "step": 28361 }, { "epoch": 2.661599099099099, "grad_norm": 1.1614083892532474, "learning_rate": 3.8282534637145375e-07, "loss": 0.315, "step": 28362 }, { "epoch": 2.661692942942943, "grad_norm": 1.1734815190376087, "learning_rate": 3.82615857021717e-07, "loss": 0.3119, "step": 28363 }, { "epoch": 2.661786786786787, "grad_norm": 1.9242549260123345, "learning_rate": 3.8240642272559583e-07, "loss": 0.3117, "step": 28364 }, { "epoch": 2.6618806306306304, "grad_norm": 1.0412180790374805, "learning_rate": 3.821970434855859e-07, "loss": 0.3529, "step": 28365 }, { "epoch": 2.6619744744744747, "grad_norm": 1.0509971064672448, "learning_rate": 3.8198771930418375e-07, "loss": 0.2876, "step": 28366 }, { "epoch": 2.6620683183183185, "grad_norm": 1.118536900249332, "learning_rate": 3.8177845018388615e-07, "loss": 0.294, "step": 28367 }, { "epoch": 2.6621621621621623, "grad_norm": 1.1035296559265206, "learning_rate": 3.8156923612718774e-07, "loss": 0.3238, "step": 28368 }, { "epoch": 2.662256006006006, "grad_norm": 1.0178512758100449, "learning_rate": 3.813600771365816e-07, "loss": 0.3301, "step": 28369 }, { "epoch": 2.66234984984985, "grad_norm": 1.11841155340434, "learning_rate": 3.8115097321456406e-07, "loss": 0.284, "step": 28370 }, { "epoch": 2.6624436936936937, "grad_norm": 1.2131222638931694, "learning_rate": 3.8094192436362697e-07, "loss": 0.2768, "step": 28371 }, { "epoch": 2.6625375375375375, "grad_norm": 1.049836766124888, "learning_rate": 3.8073293058626224e-07, "loss": 0.3194, "step": 28372 }, { "epoch": 2.6626313813813813, "grad_norm": 1.1066096380081194, "learning_rate": 3.8052399188496403e-07, "loss": 0.3062, "step": 28373 }, { "epoch": 2.662725225225225, "grad_norm": 1.8707170752505113, "learning_rate": 3.803151082622214e-07, "loss": 0.3349, "step": 28374 }, { "epoch": 2.662819069069069, "grad_norm": 1.1161440666640778, "learning_rate": 3.8010627972052406e-07, "loss": 0.277, "step": 28375 }, { "epoch": 2.6629129129129128, "grad_norm": 1.433867397847311, "learning_rate": 3.7989750626236444e-07, "loss": 0.2909, "step": 28376 }, { "epoch": 2.663006756756757, "grad_norm": 1.083427568022807, "learning_rate": 3.7968878789023054e-07, "loss": 0.2944, "step": 28377 }, { "epoch": 2.6631006006006004, "grad_norm": 1.0858056434422325, "learning_rate": 3.794801246066104e-07, "loss": 0.3291, "step": 28378 }, { "epoch": 2.6631944444444446, "grad_norm": 1.1455569075296477, "learning_rate": 3.792715164139932e-07, "loss": 0.3683, "step": 28379 }, { "epoch": 2.6632882882882885, "grad_norm": 1.325292088066211, "learning_rate": 3.790629633148657e-07, "loss": 0.328, "step": 28380 }, { "epoch": 2.6633821321321323, "grad_norm": 1.1776837437381513, "learning_rate": 3.7885446531171324e-07, "loss": 0.2786, "step": 28381 }, { "epoch": 2.663475975975976, "grad_norm": 1.1292809742627108, "learning_rate": 3.786460224070237e-07, "loss": 0.3158, "step": 28382 }, { "epoch": 2.66356981981982, "grad_norm": 0.9894042057745199, "learning_rate": 3.784376346032814e-07, "loss": 0.2999, "step": 28383 }, { "epoch": 2.6636636636636637, "grad_norm": 1.0671427167808583, "learning_rate": 3.782293019029709e-07, "loss": 0.3282, "step": 28384 }, { "epoch": 2.6637575075075075, "grad_norm": 1.1281648149122747, "learning_rate": 3.7802102430857633e-07, "loss": 0.3646, "step": 28385 }, { "epoch": 2.6638513513513513, "grad_norm": 1.2733135804889826, "learning_rate": 3.778128018225818e-07, "loss": 0.3328, "step": 28386 }, { "epoch": 2.663945195195195, "grad_norm": 1.2446527778693097, "learning_rate": 3.7760463444746873e-07, "loss": 0.2631, "step": 28387 }, { "epoch": 2.664039039039039, "grad_norm": 1.2431519772670978, "learning_rate": 3.773965221857201e-07, "loss": 0.3007, "step": 28388 }, { "epoch": 2.6641328828828827, "grad_norm": 1.4044321295390916, "learning_rate": 3.7718846503981663e-07, "loss": 0.3224, "step": 28389 }, { "epoch": 2.664226726726727, "grad_norm": 0.9492903745284764, "learning_rate": 3.769804630122381e-07, "loss": 0.2846, "step": 28390 }, { "epoch": 2.6643205705705704, "grad_norm": 1.2423497229355935, "learning_rate": 3.7677251610546696e-07, "loss": 0.305, "step": 28391 }, { "epoch": 2.6644144144144146, "grad_norm": 1.14462719603077, "learning_rate": 3.765646243219806e-07, "loss": 0.2854, "step": 28392 }, { "epoch": 2.664508258258258, "grad_norm": 1.284481435534178, "learning_rate": 3.7635678766425823e-07, "loss": 0.3179, "step": 28393 }, { "epoch": 2.6646021021021022, "grad_norm": 1.4182799964416573, "learning_rate": 3.7614900613477833e-07, "loss": 0.3046, "step": 28394 }, { "epoch": 2.664695945945946, "grad_norm": 1.1500391509683958, "learning_rate": 3.759412797360179e-07, "loss": 0.2871, "step": 28395 }, { "epoch": 2.66478978978979, "grad_norm": 1.3792659897771697, "learning_rate": 3.7573360847045327e-07, "loss": 0.3244, "step": 28396 }, { "epoch": 2.6648836336336337, "grad_norm": 1.1325905815531732, "learning_rate": 3.7552599234056187e-07, "loss": 0.2715, "step": 28397 }, { "epoch": 2.6649774774774775, "grad_norm": 1.2220841565765905, "learning_rate": 3.753184313488184e-07, "loss": 0.3114, "step": 28398 }, { "epoch": 2.6650713213213213, "grad_norm": 1.391821843374125, "learning_rate": 3.7511092549769755e-07, "loss": 0.3064, "step": 28399 }, { "epoch": 2.665165165165165, "grad_norm": 1.1609310265791961, "learning_rate": 3.7490347478967347e-07, "loss": 0.3507, "step": 28400 }, { "epoch": 2.665259009009009, "grad_norm": 1.2519780261288087, "learning_rate": 3.746960792272197e-07, "loss": 0.3117, "step": 28401 }, { "epoch": 2.6653528528528527, "grad_norm": 1.386761110123926, "learning_rate": 3.7448873881280825e-07, "loss": 0.324, "step": 28402 }, { "epoch": 2.6654466966966965, "grad_norm": 1.1179718921795754, "learning_rate": 3.74281453548912e-07, "loss": 0.2872, "step": 28403 }, { "epoch": 2.6655405405405403, "grad_norm": 1.035285893303274, "learning_rate": 3.7407422343800294e-07, "loss": 0.3209, "step": 28404 }, { "epoch": 2.6656343843843846, "grad_norm": 1.0568470706821296, "learning_rate": 3.7386704848255074e-07, "loss": 0.3137, "step": 28405 }, { "epoch": 2.665728228228228, "grad_norm": 1.0549454471037139, "learning_rate": 3.7365992868502675e-07, "loss": 0.3193, "step": 28406 }, { "epoch": 2.6658220720720722, "grad_norm": 1.054579722329484, "learning_rate": 3.7345286404790015e-07, "loss": 0.3124, "step": 28407 }, { "epoch": 2.665915915915916, "grad_norm": 1.294487486565802, "learning_rate": 3.732458545736384e-07, "loss": 0.2707, "step": 28408 }, { "epoch": 2.66600975975976, "grad_norm": 1.119539514785893, "learning_rate": 3.7303890026471166e-07, "loss": 0.3458, "step": 28409 }, { "epoch": 2.6661036036036037, "grad_norm": 1.089510549254912, "learning_rate": 3.7283200112358697e-07, "loss": 0.3527, "step": 28410 }, { "epoch": 2.6661974474474475, "grad_norm": 1.1484104277587655, "learning_rate": 3.726251571527306e-07, "loss": 0.3048, "step": 28411 }, { "epoch": 2.6662912912912913, "grad_norm": 1.077093262415791, "learning_rate": 3.7241836835460955e-07, "loss": 0.3015, "step": 28412 }, { "epoch": 2.666385135135135, "grad_norm": 1.3653136128774712, "learning_rate": 3.72211634731689e-07, "loss": 0.3178, "step": 28413 }, { "epoch": 2.666478978978979, "grad_norm": 1.7208600278996842, "learning_rate": 3.7200495628643253e-07, "loss": 0.3123, "step": 28414 }, { "epoch": 2.6665728228228227, "grad_norm": 1.2051947665537648, "learning_rate": 3.717983330213065e-07, "loss": 0.312, "step": 28415 }, { "epoch": 2.6666666666666665, "grad_norm": 1.0460649990376758, "learning_rate": 3.7159176493877345e-07, "loss": 0.2869, "step": 28416 }, { "epoch": 2.6667605105105103, "grad_norm": 1.1660328588676414, "learning_rate": 3.713852520412964e-07, "loss": 0.3086, "step": 28417 }, { "epoch": 2.6668543543543546, "grad_norm": 1.2611806181105996, "learning_rate": 3.7117879433133773e-07, "loss": 0.2853, "step": 28418 }, { "epoch": 2.666948198198198, "grad_norm": 1.1244721457393079, "learning_rate": 3.7097239181135945e-07, "loss": 0.3279, "step": 28419 }, { "epoch": 2.667042042042042, "grad_norm": 1.1769822228706854, "learning_rate": 3.707660444838218e-07, "loss": 0.2803, "step": 28420 }, { "epoch": 2.667135885885886, "grad_norm": 1.0523802117017707, "learning_rate": 3.705597523511856e-07, "loss": 0.2997, "step": 28421 }, { "epoch": 2.66722972972973, "grad_norm": 1.4271240413078834, "learning_rate": 3.7035351541591046e-07, "loss": 0.2754, "step": 28422 }, { "epoch": 2.6673235735735736, "grad_norm": 1.1697544284464003, "learning_rate": 3.7014733368045564e-07, "loss": 0.2925, "step": 28423 }, { "epoch": 2.6674174174174174, "grad_norm": 1.1634939271462859, "learning_rate": 3.6994120714727857e-07, "loss": 0.2975, "step": 28424 }, { "epoch": 2.6675112612612613, "grad_norm": 1.0884409829022326, "learning_rate": 3.697351358188378e-07, "loss": 0.3097, "step": 28425 }, { "epoch": 2.667605105105105, "grad_norm": 1.2060354899312264, "learning_rate": 3.6952911969758863e-07, "loss": 0.367, "step": 28426 }, { "epoch": 2.667698948948949, "grad_norm": 1.2811454250099432, "learning_rate": 3.6932315878599024e-07, "loss": 0.286, "step": 28427 }, { "epoch": 2.6677927927927927, "grad_norm": 0.9883170648456545, "learning_rate": 3.6911725308649615e-07, "loss": 0.3285, "step": 28428 }, { "epoch": 2.6678866366366365, "grad_norm": 1.1725379382342684, "learning_rate": 3.689114026015617e-07, "loss": 0.2992, "step": 28429 }, { "epoch": 2.6679804804804803, "grad_norm": 1.1275083047610401, "learning_rate": 3.687056073336426e-07, "loss": 0.3368, "step": 28430 }, { "epoch": 2.6680743243243246, "grad_norm": 1.1492175243635276, "learning_rate": 3.684998672851914e-07, "loss": 0.3037, "step": 28431 }, { "epoch": 2.668168168168168, "grad_norm": 0.9251701654408194, "learning_rate": 3.6829418245866056e-07, "loss": 0.3076, "step": 28432 }, { "epoch": 2.668262012012012, "grad_norm": 1.159203455343979, "learning_rate": 3.6808855285650367e-07, "loss": 0.3186, "step": 28433 }, { "epoch": 2.668355855855856, "grad_norm": 1.0162883809689593, "learning_rate": 3.678829784811727e-07, "loss": 0.2924, "step": 28434 }, { "epoch": 2.6684496996997, "grad_norm": 1.1187138443175968, "learning_rate": 3.676774593351179e-07, "loss": 0.2679, "step": 28435 }, { "epoch": 2.6685435435435436, "grad_norm": 1.121473033250832, "learning_rate": 3.6747199542079006e-07, "loss": 0.3232, "step": 28436 }, { "epoch": 2.6686373873873874, "grad_norm": 1.0464450778375116, "learning_rate": 3.6726658674063887e-07, "loss": 0.3066, "step": 28437 }, { "epoch": 2.6687312312312312, "grad_norm": 1.4008424666407098, "learning_rate": 3.670612332971124e-07, "loss": 0.321, "step": 28438 }, { "epoch": 2.668825075075075, "grad_norm": 1.2476932158745855, "learning_rate": 3.668559350926609e-07, "loss": 0.2939, "step": 28439 }, { "epoch": 2.668918918918919, "grad_norm": 4.088294476615442, "learning_rate": 3.666506921297319e-07, "loss": 0.3359, "step": 28440 }, { "epoch": 2.6690127627627627, "grad_norm": 1.1103871680089075, "learning_rate": 3.6644550441077064e-07, "loss": 0.2956, "step": 28441 }, { "epoch": 2.6691066066066065, "grad_norm": 1.180931574697217, "learning_rate": 3.662403719382257e-07, "loss": 0.2935, "step": 28442 }, { "epoch": 2.6692004504504503, "grad_norm": 1.4566688252103837, "learning_rate": 3.660352947145429e-07, "loss": 0.2994, "step": 28443 }, { "epoch": 2.6692942942942945, "grad_norm": 1.093921938717284, "learning_rate": 3.658302727421653e-07, "loss": 0.261, "step": 28444 }, { "epoch": 2.669388138138138, "grad_norm": 1.400071950799449, "learning_rate": 3.656253060235393e-07, "loss": 0.2953, "step": 28445 }, { "epoch": 2.669481981981982, "grad_norm": 0.9803790488685004, "learning_rate": 3.65420394561109e-07, "loss": 0.3347, "step": 28446 }, { "epoch": 2.669575825825826, "grad_norm": 1.080146470461909, "learning_rate": 3.652155383573164e-07, "loss": 0.2964, "step": 28447 }, { "epoch": 2.66966966966967, "grad_norm": 1.1721343994318174, "learning_rate": 3.6501073741460445e-07, "loss": 0.354, "step": 28448 }, { "epoch": 2.6697635135135136, "grad_norm": 1.1303084890516972, "learning_rate": 3.6480599173541466e-07, "loss": 0.3255, "step": 28449 }, { "epoch": 2.6698573573573574, "grad_norm": 1.1370939490547203, "learning_rate": 3.646013013221883e-07, "loss": 0.375, "step": 28450 }, { "epoch": 2.669951201201201, "grad_norm": 1.8386066274229875, "learning_rate": 3.643966661773668e-07, "loss": 0.3136, "step": 28451 }, { "epoch": 2.670045045045045, "grad_norm": 1.1928107684562934, "learning_rate": 3.6419208630338934e-07, "loss": 0.3345, "step": 28452 }, { "epoch": 2.670138888888889, "grad_norm": 1.2527948889540892, "learning_rate": 3.6398756170269447e-07, "loss": 0.3098, "step": 28453 }, { "epoch": 2.6702327327327327, "grad_norm": 1.8202424584050105, "learning_rate": 3.637830923777219e-07, "loss": 0.3193, "step": 28454 }, { "epoch": 2.6703265765765765, "grad_norm": 1.1118713251652137, "learning_rate": 3.635786783309098e-07, "loss": 0.325, "step": 28455 }, { "epoch": 2.6704204204204203, "grad_norm": 1.1825103213879038, "learning_rate": 3.6337431956469384e-07, "loss": 0.3083, "step": 28456 }, { "epoch": 2.6705142642642645, "grad_norm": 1.173992795516608, "learning_rate": 3.6317001608151215e-07, "loss": 0.3093, "step": 28457 }, { "epoch": 2.670608108108108, "grad_norm": 1.2846436824157936, "learning_rate": 3.629657678838e-07, "loss": 0.2937, "step": 28458 }, { "epoch": 2.670701951951952, "grad_norm": 1.1652984520700658, "learning_rate": 3.627615749739932e-07, "loss": 0.2886, "step": 28459 }, { "epoch": 2.670795795795796, "grad_norm": 1.1871026699527467, "learning_rate": 3.6255743735452596e-07, "loss": 0.2914, "step": 28460 }, { "epoch": 2.6708896396396398, "grad_norm": 1.1310296469939938, "learning_rate": 3.6235335502783187e-07, "loss": 0.3086, "step": 28461 }, { "epoch": 2.6709834834834836, "grad_norm": 1.1585781273874427, "learning_rate": 3.621493279963434e-07, "loss": 0.3367, "step": 28462 }, { "epoch": 2.6710773273273274, "grad_norm": 2.5763979695741166, "learning_rate": 3.6194535626249585e-07, "loss": 0.317, "step": 28463 }, { "epoch": 2.671171171171171, "grad_norm": 1.1832340481787513, "learning_rate": 3.6174143982871945e-07, "loss": 0.3176, "step": 28464 }, { "epoch": 2.671265015015015, "grad_norm": 1.5012028263100723, "learning_rate": 3.6153757869744454e-07, "loss": 0.2926, "step": 28465 }, { "epoch": 2.671358858858859, "grad_norm": 1.1996700742104074, "learning_rate": 3.613337728711042e-07, "loss": 0.3144, "step": 28466 }, { "epoch": 2.6714527027027026, "grad_norm": 1.656610340797542, "learning_rate": 3.6113002235212746e-07, "loss": 0.3246, "step": 28467 }, { "epoch": 2.6715465465465464, "grad_norm": 1.1344491209607275, "learning_rate": 3.6092632714294253e-07, "loss": 0.3543, "step": 28468 }, { "epoch": 2.6716403903903903, "grad_norm": 1.1738531471298341, "learning_rate": 3.607226872459796e-07, "loss": 0.338, "step": 28469 }, { "epoch": 2.6717342342342345, "grad_norm": 1.2419516970477873, "learning_rate": 3.6051910266366564e-07, "loss": 0.303, "step": 28470 }, { "epoch": 2.671828078078078, "grad_norm": 1.2872807933848711, "learning_rate": 3.603155733984293e-07, "loss": 0.3083, "step": 28471 }, { "epoch": 2.671921921921922, "grad_norm": 1.0896533044422834, "learning_rate": 3.6011209945269576e-07, "loss": 0.3233, "step": 28472 }, { "epoch": 2.6720157657657655, "grad_norm": 1.162741271146107, "learning_rate": 3.5990868082889207e-07, "loss": 0.304, "step": 28473 }, { "epoch": 2.6721096096096097, "grad_norm": 1.1529949937324313, "learning_rate": 3.597053175294424e-07, "loss": 0.263, "step": 28474 }, { "epoch": 2.6722034534534536, "grad_norm": 1.562548556098041, "learning_rate": 3.5950200955677306e-07, "loss": 0.2771, "step": 28475 }, { "epoch": 2.6722972972972974, "grad_norm": 1.099603648074892, "learning_rate": 3.5929875691330775e-07, "loss": 0.3431, "step": 28476 }, { "epoch": 2.672391141141141, "grad_norm": 1.2604466586295153, "learning_rate": 3.5909555960146837e-07, "loss": 0.303, "step": 28477 }, { "epoch": 2.672484984984985, "grad_norm": 1.9298564008328292, "learning_rate": 3.588924176236802e-07, "loss": 0.3343, "step": 28478 }, { "epoch": 2.672578828828829, "grad_norm": 1.1185447003378344, "learning_rate": 3.5868933098236356e-07, "loss": 0.3561, "step": 28479 }, { "epoch": 2.6726726726726726, "grad_norm": 1.2438590216703347, "learning_rate": 3.584862996799393e-07, "loss": 0.2767, "step": 28480 }, { "epoch": 2.6727665165165164, "grad_norm": 1.300892785208237, "learning_rate": 3.582833237188305e-07, "loss": 0.3176, "step": 28481 }, { "epoch": 2.6728603603603602, "grad_norm": 1.2770632160837956, "learning_rate": 3.580804031014551e-07, "loss": 0.3059, "step": 28482 }, { "epoch": 2.672954204204204, "grad_norm": 1.1195258047483374, "learning_rate": 3.5787753783023405e-07, "loss": 0.3297, "step": 28483 }, { "epoch": 2.673048048048048, "grad_norm": 1.104386553396259, "learning_rate": 3.576747279075854e-07, "loss": 0.3017, "step": 28484 }, { "epoch": 2.673141891891892, "grad_norm": 1.18769886020287, "learning_rate": 3.574719733359272e-07, "loss": 0.2894, "step": 28485 }, { "epoch": 2.6732357357357355, "grad_norm": 1.0890951849389403, "learning_rate": 3.572692741176764e-07, "loss": 0.3096, "step": 28486 }, { "epoch": 2.6733295795795797, "grad_norm": 1.0979342872482905, "learning_rate": 3.570666302552511e-07, "loss": 0.325, "step": 28487 }, { "epoch": 2.6734234234234235, "grad_norm": 0.9716238779595994, "learning_rate": 3.568640417510666e-07, "loss": 0.2904, "step": 28488 }, { "epoch": 2.6735172672672673, "grad_norm": 1.1186022934948932, "learning_rate": 3.566615086075381e-07, "loss": 0.3197, "step": 28489 }, { "epoch": 2.673611111111111, "grad_norm": 1.0641250386224894, "learning_rate": 3.564590308270821e-07, "loss": 0.2961, "step": 28490 }, { "epoch": 2.673704954954955, "grad_norm": 1.2865363744685725, "learning_rate": 3.562566084121111e-07, "loss": 0.2778, "step": 28491 }, { "epoch": 2.673798798798799, "grad_norm": 1.4187281899075088, "learning_rate": 3.5605424136503805e-07, "loss": 0.3634, "step": 28492 }, { "epoch": 2.6738926426426426, "grad_norm": 1.4606306378672345, "learning_rate": 3.5585192968827786e-07, "loss": 0.325, "step": 28493 }, { "epoch": 2.6739864864864864, "grad_norm": 1.1941490130681076, "learning_rate": 3.5564967338424185e-07, "loss": 0.2949, "step": 28494 }, { "epoch": 2.67408033033033, "grad_norm": 1.136350460173033, "learning_rate": 3.5544747245534084e-07, "loss": 0.2962, "step": 28495 }, { "epoch": 2.674174174174174, "grad_norm": 1.370500270045348, "learning_rate": 3.552453269039874e-07, "loss": 0.2954, "step": 28496 }, { "epoch": 2.674268018018018, "grad_norm": 1.0493434231681633, "learning_rate": 3.5504323673259013e-07, "loss": 0.3399, "step": 28497 }, { "epoch": 2.674361861861862, "grad_norm": 1.0859075117573824, "learning_rate": 3.548412019435582e-07, "loss": 0.2798, "step": 28498 }, { "epoch": 2.6744557057057055, "grad_norm": 1.4184256634110193, "learning_rate": 3.5463922253930195e-07, "loss": 0.3391, "step": 28499 }, { "epoch": 2.6745495495495497, "grad_norm": 1.051713521414984, "learning_rate": 3.544372985222289e-07, "loss": 0.3001, "step": 28500 }, { "epoch": 2.6746433933933935, "grad_norm": 1.22628928782042, "learning_rate": 3.542354298947459e-07, "loss": 0.2565, "step": 28501 }, { "epoch": 2.6747372372372373, "grad_norm": 1.1190147359627607, "learning_rate": 3.540336166592617e-07, "loss": 0.2695, "step": 28502 }, { "epoch": 2.674831081081081, "grad_norm": 1.169553801447489, "learning_rate": 3.5383185881818105e-07, "loss": 0.2995, "step": 28503 }, { "epoch": 2.674924924924925, "grad_norm": 0.9870415964661156, "learning_rate": 3.5363015637390973e-07, "loss": 0.3174, "step": 28504 }, { "epoch": 2.6750187687687688, "grad_norm": 1.4192560418518552, "learning_rate": 3.534285093288531e-07, "loss": 0.2796, "step": 28505 }, { "epoch": 2.6751126126126126, "grad_norm": 0.9391588453116475, "learning_rate": 3.532269176854158e-07, "loss": 0.2694, "step": 28506 }, { "epoch": 2.6752064564564564, "grad_norm": 1.5311493199161006, "learning_rate": 3.530253814459994e-07, "loss": 0.2915, "step": 28507 }, { "epoch": 2.6753003003003, "grad_norm": 0.8582043280583425, "learning_rate": 3.528239006130096e-07, "loss": 0.2834, "step": 28508 }, { "epoch": 2.675394144144144, "grad_norm": 1.1570010894998377, "learning_rate": 3.526224751888474e-07, "loss": 0.2732, "step": 28509 }, { "epoch": 2.675487987987988, "grad_norm": 1.3573238387323732, "learning_rate": 3.524211051759141e-07, "loss": 0.3057, "step": 28510 }, { "epoch": 2.675581831831832, "grad_norm": 1.1253844977947614, "learning_rate": 3.5221979057661116e-07, "loss": 0.3045, "step": 28511 }, { "epoch": 2.6756756756756754, "grad_norm": 1.0271385763753265, "learning_rate": 3.520185313933389e-07, "loss": 0.3374, "step": 28512 }, { "epoch": 2.6757695195195197, "grad_norm": 1.1130491293064286, "learning_rate": 3.518173276284953e-07, "loss": 0.3107, "step": 28513 }, { "epoch": 2.6758633633633635, "grad_norm": 1.7106956803562003, "learning_rate": 3.5161617928448245e-07, "loss": 0.3575, "step": 28514 }, { "epoch": 2.6759572072072073, "grad_norm": 1.7599081566543593, "learning_rate": 3.5141508636369617e-07, "loss": 0.3136, "step": 28515 }, { "epoch": 2.676051051051051, "grad_norm": 1.1718900941558095, "learning_rate": 3.512140488685345e-07, "loss": 0.3307, "step": 28516 }, { "epoch": 2.676144894894895, "grad_norm": 1.2041527846849587, "learning_rate": 3.510130668013956e-07, "loss": 0.3575, "step": 28517 }, { "epoch": 2.6762387387387387, "grad_norm": 1.0893351426614897, "learning_rate": 3.508121401646747e-07, "loss": 0.291, "step": 28518 }, { "epoch": 2.6763325825825826, "grad_norm": 1.285630994248451, "learning_rate": 3.506112689607677e-07, "loss": 0.3529, "step": 28519 }, { "epoch": 2.6764264264264264, "grad_norm": 1.0643205215442602, "learning_rate": 3.504104531920699e-07, "loss": 0.2946, "step": 28520 }, { "epoch": 2.67652027027027, "grad_norm": 1.2747253263925737, "learning_rate": 3.5020969286097607e-07, "loss": 0.3073, "step": 28521 }, { "epoch": 2.676614114114114, "grad_norm": 0.9870163965582786, "learning_rate": 3.500089879698787e-07, "loss": 0.2681, "step": 28522 }, { "epoch": 2.676707957957958, "grad_norm": 1.287949557984728, "learning_rate": 3.49808338521172e-07, "loss": 0.2605, "step": 28523 }, { "epoch": 2.676801801801802, "grad_norm": 0.9684169835041445, "learning_rate": 3.4960774451724743e-07, "loss": 0.3174, "step": 28524 }, { "epoch": 2.6768956456456454, "grad_norm": 1.099255640766736, "learning_rate": 3.494072059604964e-07, "loss": 0.3067, "step": 28525 }, { "epoch": 2.6769894894894897, "grad_norm": 1.3004208892761453, "learning_rate": 3.492067228533114e-07, "loss": 0.3431, "step": 28526 }, { "epoch": 2.6770833333333335, "grad_norm": 1.1921674855191067, "learning_rate": 3.490062951980822e-07, "loss": 0.303, "step": 28527 }, { "epoch": 2.6771771771771773, "grad_norm": 1.168838292633677, "learning_rate": 3.488059229971974e-07, "loss": 0.3102, "step": 28528 }, { "epoch": 2.677271021021021, "grad_norm": 1.159481011697431, "learning_rate": 3.486056062530474e-07, "loss": 0.3285, "step": 28529 }, { "epoch": 2.677364864864865, "grad_norm": 1.1597313059046985, "learning_rate": 3.484053449680208e-07, "loss": 0.2645, "step": 28530 }, { "epoch": 2.6774587087087087, "grad_norm": 1.0671068879313685, "learning_rate": 3.4820513914450346e-07, "loss": 0.3005, "step": 28531 }, { "epoch": 2.6775525525525525, "grad_norm": 1.0692376203001623, "learning_rate": 3.4800498878488464e-07, "loss": 0.2788, "step": 28532 }, { "epoch": 2.6776463963963963, "grad_norm": 1.8136883229289884, "learning_rate": 3.4780489389155013e-07, "loss": 0.2865, "step": 28533 }, { "epoch": 2.67774024024024, "grad_norm": 1.1679908593586206, "learning_rate": 3.476048544668853e-07, "loss": 0.3128, "step": 28534 }, { "epoch": 2.677834084084084, "grad_norm": 1.0086964475411722, "learning_rate": 3.474048705132749e-07, "loss": 0.2735, "step": 28535 }, { "epoch": 2.6779279279279278, "grad_norm": 1.1496522484592246, "learning_rate": 3.472049420331042e-07, "loss": 0.31, "step": 28536 }, { "epoch": 2.678021771771772, "grad_norm": 1.3873197229968581, "learning_rate": 3.4700506902875575e-07, "loss": 0.273, "step": 28537 }, { "epoch": 2.6781156156156154, "grad_norm": 1.3274953377617054, "learning_rate": 3.4680525150261437e-07, "loss": 0.2605, "step": 28538 }, { "epoch": 2.6782094594594597, "grad_norm": 1.230866048942093, "learning_rate": 3.466054894570614e-07, "loss": 0.3191, "step": 28539 }, { "epoch": 2.6783033033033035, "grad_norm": 1.22000601766249, "learning_rate": 3.464057828944784e-07, "loss": 0.2792, "step": 28540 }, { "epoch": 2.6783971471471473, "grad_norm": 2.8740786690727083, "learning_rate": 3.4620613181724716e-07, "loss": 0.3104, "step": 28541 }, { "epoch": 2.678490990990991, "grad_norm": 1.0728277944922855, "learning_rate": 3.4600653622774873e-07, "loss": 0.2912, "step": 28542 }, { "epoch": 2.678584834834835, "grad_norm": 1.1674411630499495, "learning_rate": 3.4580699612836054e-07, "loss": 0.3182, "step": 28543 }, { "epoch": 2.6786786786786787, "grad_norm": 1.4074291196863828, "learning_rate": 3.4560751152146467e-07, "loss": 0.2827, "step": 28544 }, { "epoch": 2.6787725225225225, "grad_norm": 1.2674842038322602, "learning_rate": 3.4540808240943804e-07, "loss": 0.3248, "step": 28545 }, { "epoch": 2.6788663663663663, "grad_norm": 1.1259129481460421, "learning_rate": 3.452087087946587e-07, "loss": 0.3192, "step": 28546 }, { "epoch": 2.67896021021021, "grad_norm": 0.9107834013362645, "learning_rate": 3.450093906795038e-07, "loss": 0.3045, "step": 28547 }, { "epoch": 2.679054054054054, "grad_norm": 1.1927642198757378, "learning_rate": 3.448101280663496e-07, "loss": 0.2931, "step": 28548 }, { "epoch": 2.6791478978978978, "grad_norm": 1.0543859146585692, "learning_rate": 3.4461092095757156e-07, "loss": 0.3094, "step": 28549 }, { "epoch": 2.679241741741742, "grad_norm": 1.2503210830487106, "learning_rate": 3.44411769355546e-07, "loss": 0.2659, "step": 28550 }, { "epoch": 2.6793355855855854, "grad_norm": 1.289732043673466, "learning_rate": 3.4421267326264664e-07, "loss": 0.2876, "step": 28551 }, { "epoch": 2.6794294294294296, "grad_norm": 1.2518184058271704, "learning_rate": 3.440136326812471e-07, "loss": 0.3428, "step": 28552 }, { "epoch": 2.679523273273273, "grad_norm": 1.0432248253188852, "learning_rate": 3.438146476137216e-07, "loss": 0.2934, "step": 28553 }, { "epoch": 2.6796171171171173, "grad_norm": 1.0488003126958805, "learning_rate": 3.436157180624422e-07, "loss": 0.3397, "step": 28554 }, { "epoch": 2.679710960960961, "grad_norm": 1.1580201715976974, "learning_rate": 3.434168440297797e-07, "loss": 0.268, "step": 28555 }, { "epoch": 2.679804804804805, "grad_norm": 1.2525102068179614, "learning_rate": 3.4321802551810724e-07, "loss": 0.2895, "step": 28556 }, { "epoch": 2.6798986486486487, "grad_norm": 0.9358857919411012, "learning_rate": 3.430192625297946e-07, "loss": 0.3167, "step": 28557 }, { "epoch": 2.6799924924924925, "grad_norm": 1.2101205337467429, "learning_rate": 3.428205550672109e-07, "loss": 0.3079, "step": 28558 }, { "epoch": 2.6800863363363363, "grad_norm": 1.1230147625428504, "learning_rate": 3.4262190313272546e-07, "loss": 0.2578, "step": 28559 }, { "epoch": 2.68018018018018, "grad_norm": 1.0595356013656367, "learning_rate": 3.4242330672870805e-07, "loss": 0.2904, "step": 28560 }, { "epoch": 2.680274024024024, "grad_norm": 1.568486634688738, "learning_rate": 3.422247658575245e-07, "loss": 0.3243, "step": 28561 }, { "epoch": 2.6803678678678677, "grad_norm": 1.145841146948625, "learning_rate": 3.420262805215441e-07, "loss": 0.3334, "step": 28562 }, { "epoch": 2.6804617117117115, "grad_norm": 1.125310683572379, "learning_rate": 3.4182785072313264e-07, "loss": 0.3197, "step": 28563 }, { "epoch": 2.6805555555555554, "grad_norm": 1.079785454036457, "learning_rate": 3.4162947646465493e-07, "loss": 0.3337, "step": 28564 }, { "epoch": 2.6806493993993996, "grad_norm": 1.2364393539518024, "learning_rate": 3.4143115774847857e-07, "loss": 0.3082, "step": 28565 }, { "epoch": 2.680743243243243, "grad_norm": 1.5086865126071147, "learning_rate": 3.412328945769661e-07, "loss": 0.3243, "step": 28566 }, { "epoch": 2.6808370870870872, "grad_norm": 1.1554563882315059, "learning_rate": 3.4103468695248165e-07, "loss": 0.3129, "step": 28567 }, { "epoch": 2.680930930930931, "grad_norm": 1.7208251105546852, "learning_rate": 3.408365348773901e-07, "loss": 0.3316, "step": 28568 }, { "epoch": 2.681024774774775, "grad_norm": 1.2699534556545968, "learning_rate": 3.406384383540523e-07, "loss": 0.3231, "step": 28569 }, { "epoch": 2.6811186186186187, "grad_norm": 1.0895864003229103, "learning_rate": 3.404403973848314e-07, "loss": 0.3146, "step": 28570 }, { "epoch": 2.6812124624624625, "grad_norm": 1.119800621440231, "learning_rate": 3.402424119720876e-07, "loss": 0.2993, "step": 28571 }, { "epoch": 2.6813063063063063, "grad_norm": 1.1586558356821426, "learning_rate": 3.40044482118182e-07, "loss": 0.3218, "step": 28572 }, { "epoch": 2.68140015015015, "grad_norm": 1.2743640763268596, "learning_rate": 3.398466078254736e-07, "loss": 0.3226, "step": 28573 }, { "epoch": 2.681493993993994, "grad_norm": 1.3837247255092058, "learning_rate": 3.396487890963235e-07, "loss": 0.3028, "step": 28574 }, { "epoch": 2.6815878378378377, "grad_norm": 1.252708748592869, "learning_rate": 3.3945102593308964e-07, "loss": 0.3092, "step": 28575 }, { "epoch": 2.6816816816816815, "grad_norm": 1.5584233325968113, "learning_rate": 3.3925331833812855e-07, "loss": 0.3046, "step": 28576 }, { "epoch": 2.6817755255255253, "grad_norm": 1.3005884584732224, "learning_rate": 3.3905566631379947e-07, "loss": 0.2824, "step": 28577 }, { "epoch": 2.6818693693693696, "grad_norm": 1.245240808001096, "learning_rate": 3.388580698624583e-07, "loss": 0.3071, "step": 28578 }, { "epoch": 2.681963213213213, "grad_norm": 1.5041883811551342, "learning_rate": 3.386605289864603e-07, "loss": 0.3236, "step": 28579 }, { "epoch": 2.682057057057057, "grad_norm": 1.0565443744790706, "learning_rate": 3.3846304368816197e-07, "loss": 0.3491, "step": 28580 }, { "epoch": 2.682150900900901, "grad_norm": 1.114802565458857, "learning_rate": 3.382656139699175e-07, "loss": 0.3125, "step": 28581 }, { "epoch": 2.682244744744745, "grad_norm": 0.968062957720198, "learning_rate": 3.380682398340812e-07, "loss": 0.3578, "step": 28582 }, { "epoch": 2.6823385885885886, "grad_norm": 1.8394197540139439, "learning_rate": 3.3787092128300557e-07, "loss": 0.3473, "step": 28583 }, { "epoch": 2.6824324324324325, "grad_norm": 1.1004500361771135, "learning_rate": 3.3767365831904376e-07, "loss": 0.2789, "step": 28584 }, { "epoch": 2.6825262762762763, "grad_norm": 1.2295899498132281, "learning_rate": 3.374764509445466e-07, "loss": 0.2797, "step": 28585 }, { "epoch": 2.68262012012012, "grad_norm": 1.0642596617317628, "learning_rate": 3.372792991618673e-07, "loss": 0.3278, "step": 28586 }, { "epoch": 2.682713963963964, "grad_norm": 0.9752953010321069, "learning_rate": 3.3708220297335557e-07, "loss": 0.3428, "step": 28587 }, { "epoch": 2.6828078078078077, "grad_norm": 1.107900274909928, "learning_rate": 3.3688516238136125e-07, "loss": 0.2691, "step": 28588 }, { "epoch": 2.6829016516516515, "grad_norm": 1.2493569709393286, "learning_rate": 3.366881773882347e-07, "loss": 0.3139, "step": 28589 }, { "epoch": 2.6829954954954953, "grad_norm": 1.2764423259645212, "learning_rate": 3.364912479963234e-07, "loss": 0.3357, "step": 28590 }, { "epoch": 2.6830893393393396, "grad_norm": 1.189444639000704, "learning_rate": 3.3629437420797495e-07, "loss": 0.3481, "step": 28591 }, { "epoch": 2.683183183183183, "grad_norm": 1.0373658872133467, "learning_rate": 3.360975560255392e-07, "loss": 0.2857, "step": 28592 }, { "epoch": 2.683277027027027, "grad_norm": 0.9914246093722638, "learning_rate": 3.359007934513603e-07, "loss": 0.3009, "step": 28593 }, { "epoch": 2.683370870870871, "grad_norm": 1.0645085603209679, "learning_rate": 3.3570408648778586e-07, "loss": 0.2782, "step": 28594 }, { "epoch": 2.683464714714715, "grad_norm": 1.3090150889007188, "learning_rate": 3.3550743513716013e-07, "loss": 0.3096, "step": 28595 }, { "epoch": 2.6835585585585586, "grad_norm": 1.2563442624108003, "learning_rate": 3.353108394018284e-07, "loss": 0.3423, "step": 28596 }, { "epoch": 2.6836524024024024, "grad_norm": 1.0154102027031788, "learning_rate": 3.351142992841333e-07, "loss": 0.3377, "step": 28597 }, { "epoch": 2.6837462462462462, "grad_norm": 1.0612522823841704, "learning_rate": 3.349178147864207e-07, "loss": 0.2561, "step": 28598 }, { "epoch": 2.68384009009009, "grad_norm": 1.1518618944133983, "learning_rate": 3.347213859110321e-07, "loss": 0.269, "step": 28599 }, { "epoch": 2.683933933933934, "grad_norm": 1.193922323919933, "learning_rate": 3.345250126603083e-07, "loss": 0.3199, "step": 28600 }, { "epoch": 2.6840277777777777, "grad_norm": 1.2928648357884518, "learning_rate": 3.3432869503659313e-07, "loss": 0.3148, "step": 28601 }, { "epoch": 2.6841216216216215, "grad_norm": 1.118906260299982, "learning_rate": 3.341324330422258e-07, "loss": 0.3287, "step": 28602 }, { "epoch": 2.6842154654654653, "grad_norm": 1.3721979316543507, "learning_rate": 3.3393622667954595e-07, "loss": 0.2986, "step": 28603 }, { "epoch": 2.6843093093093096, "grad_norm": 1.3350700481161728, "learning_rate": 3.337400759508941e-07, "loss": 0.3215, "step": 28604 }, { "epoch": 2.684403153153153, "grad_norm": 0.9757296716590641, "learning_rate": 3.3354398085860886e-07, "loss": 0.3231, "step": 28605 }, { "epoch": 2.684496996996997, "grad_norm": 0.9970685716407517, "learning_rate": 3.333479414050278e-07, "loss": 0.277, "step": 28606 }, { "epoch": 2.684590840840841, "grad_norm": 1.3139105715358834, "learning_rate": 3.3315195759248853e-07, "loss": 0.2892, "step": 28607 }, { "epoch": 2.684684684684685, "grad_norm": 1.2812564001957145, "learning_rate": 3.329560294233275e-07, "loss": 0.3248, "step": 28608 }, { "epoch": 2.6847785285285286, "grad_norm": 1.6214981430056474, "learning_rate": 3.327601568998806e-07, "loss": 0.3021, "step": 28609 }, { "epoch": 2.6848723723723724, "grad_norm": 1.2733198781211568, "learning_rate": 3.3256434002448436e-07, "loss": 0.2991, "step": 28610 }, { "epoch": 2.6849662162162162, "grad_norm": 1.2829399202228629, "learning_rate": 3.323685787994724e-07, "loss": 0.3214, "step": 28611 }, { "epoch": 2.68506006006006, "grad_norm": 1.2269338583524205, "learning_rate": 3.3217287322717893e-07, "loss": 0.298, "step": 28612 }, { "epoch": 2.685153903903904, "grad_norm": 1.5794843883095242, "learning_rate": 3.319772233099383e-07, "loss": 0.3292, "step": 28613 }, { "epoch": 2.6852477477477477, "grad_norm": 1.353255117359119, "learning_rate": 3.3178162905008247e-07, "loss": 0.3241, "step": 28614 }, { "epoch": 2.6853415915915915, "grad_norm": 1.0805544236830982, "learning_rate": 3.315860904499429e-07, "loss": 0.2996, "step": 28615 }, { "epoch": 2.6854354354354353, "grad_norm": 1.0436375358798908, "learning_rate": 3.3139060751185283e-07, "loss": 0.3379, "step": 28616 }, { "epoch": 2.6855292792792795, "grad_norm": 1.117278461546387, "learning_rate": 3.311951802381419e-07, "loss": 0.2875, "step": 28617 }, { "epoch": 2.685623123123123, "grad_norm": 1.0721338242545573, "learning_rate": 3.3099980863113947e-07, "loss": 0.3536, "step": 28618 }, { "epoch": 2.685716966966967, "grad_norm": 1.029744906031215, "learning_rate": 3.3080449269317747e-07, "loss": 0.3164, "step": 28619 }, { "epoch": 2.685810810810811, "grad_norm": 1.4294167682816648, "learning_rate": 3.306092324265825e-07, "loss": 0.3243, "step": 28620 }, { "epoch": 2.6859046546546548, "grad_norm": 1.0944186375159333, "learning_rate": 3.30414027833682e-07, "loss": 0.3148, "step": 28621 }, { "epoch": 2.6859984984984986, "grad_norm": 1.0897541366573384, "learning_rate": 3.302188789168059e-07, "loss": 0.3199, "step": 28622 }, { "epoch": 2.6860923423423424, "grad_norm": 1.1067390640296482, "learning_rate": 3.3002378567827953e-07, "loss": 0.318, "step": 28623 }, { "epoch": 2.686186186186186, "grad_norm": 1.2779034168697412, "learning_rate": 3.298287481204282e-07, "loss": 0.3283, "step": 28624 }, { "epoch": 2.68628003003003, "grad_norm": 1.0305312175389245, "learning_rate": 3.2963376624557896e-07, "loss": 0.3054, "step": 28625 }, { "epoch": 2.686373873873874, "grad_norm": 1.089576463205681, "learning_rate": 3.2943884005605663e-07, "loss": 0.3392, "step": 28626 }, { "epoch": 2.6864677177177176, "grad_norm": 1.2471667885431468, "learning_rate": 3.2924396955418327e-07, "loss": 0.3016, "step": 28627 }, { "epoch": 2.6865615615615615, "grad_norm": 1.2404760793628604, "learning_rate": 3.2904915474228525e-07, "loss": 0.3018, "step": 28628 }, { "epoch": 2.6866554054054053, "grad_norm": 1.0063851066492013, "learning_rate": 3.288543956226831e-07, "loss": 0.2731, "step": 28629 }, { "epoch": 2.6867492492492495, "grad_norm": 1.218441716813295, "learning_rate": 3.286596921976998e-07, "loss": 0.3175, "step": 28630 }, { "epoch": 2.686843093093093, "grad_norm": 1.04349094150469, "learning_rate": 3.284650444696569e-07, "loss": 0.316, "step": 28631 }, { "epoch": 2.686936936936937, "grad_norm": 1.3403702082674689, "learning_rate": 3.282704524408753e-07, "loss": 0.309, "step": 28632 }, { "epoch": 2.6870307807807805, "grad_norm": 0.9893515508921867, "learning_rate": 3.2807591611367486e-07, "loss": 0.3006, "step": 28633 }, { "epoch": 2.6871246246246248, "grad_norm": 1.0475011082759143, "learning_rate": 3.2788143549037534e-07, "loss": 0.3479, "step": 28634 }, { "epoch": 2.6872184684684686, "grad_norm": 1.2203389565076697, "learning_rate": 3.2768701057329545e-07, "loss": 0.3328, "step": 28635 }, { "epoch": 2.6873123123123124, "grad_norm": 1.0519920163994312, "learning_rate": 3.274926413647522e-07, "loss": 0.2873, "step": 28636 }, { "epoch": 2.687406156156156, "grad_norm": 0.9506460950863939, "learning_rate": 3.2729832786706495e-07, "loss": 0.2876, "step": 28637 }, { "epoch": 2.6875, "grad_norm": 1.1250283482616419, "learning_rate": 3.271040700825495e-07, "loss": 0.3438, "step": 28638 }, { "epoch": 2.687593843843844, "grad_norm": 1.1492028235715896, "learning_rate": 3.269098680135213e-07, "loss": 0.3269, "step": 28639 }, { "epoch": 2.6876876876876876, "grad_norm": 1.0780083003488838, "learning_rate": 3.267157216622979e-07, "loss": 0.2936, "step": 28640 }, { "epoch": 2.6877815315315314, "grad_norm": 1.176303908988933, "learning_rate": 3.265216310311931e-07, "loss": 0.3111, "step": 28641 }, { "epoch": 2.6878753753753752, "grad_norm": 1.4405490899527538, "learning_rate": 3.263275961225199e-07, "loss": 0.2661, "step": 28642 }, { "epoch": 2.6879692192192195, "grad_norm": 1.4669325905567046, "learning_rate": 3.2613361693859367e-07, "loss": 0.3124, "step": 28643 }, { "epoch": 2.688063063063063, "grad_norm": 1.092625367480161, "learning_rate": 3.25939693481726e-07, "loss": 0.3454, "step": 28644 }, { "epoch": 2.688156906906907, "grad_norm": 1.216231572600267, "learning_rate": 3.2574582575423007e-07, "loss": 0.3228, "step": 28645 }, { "epoch": 2.6882507507507505, "grad_norm": 1.3834789534218221, "learning_rate": 3.2555201375841615e-07, "loss": 0.3289, "step": 28646 }, { "epoch": 2.6883445945945947, "grad_norm": 1.2552859625661452, "learning_rate": 3.2535825749659633e-07, "loss": 0.3125, "step": 28647 }, { "epoch": 2.6884384384384385, "grad_norm": 1.2170964552724681, "learning_rate": 3.2516455697107927e-07, "loss": 0.3538, "step": 28648 }, { "epoch": 2.6885322822822824, "grad_norm": 1.2432146227367498, "learning_rate": 3.24970912184176e-07, "loss": 0.318, "step": 28649 }, { "epoch": 2.688626126126126, "grad_norm": 1.1690607680008824, "learning_rate": 3.247773231381951e-07, "loss": 0.3389, "step": 28650 }, { "epoch": 2.68871996996997, "grad_norm": 1.1881969775510595, "learning_rate": 3.2458378983544315e-07, "loss": 0.3105, "step": 28651 }, { "epoch": 2.688813813813814, "grad_norm": 1.4206538433137155, "learning_rate": 3.243903122782299e-07, "loss": 0.315, "step": 28652 }, { "epoch": 2.6889076576576576, "grad_norm": 1.2332792312636116, "learning_rate": 3.241968904688614e-07, "loss": 0.3377, "step": 28653 }, { "epoch": 2.6890015015015014, "grad_norm": 1.1540362813175864, "learning_rate": 3.240035244096429e-07, "loss": 0.348, "step": 28654 }, { "epoch": 2.6890953453453452, "grad_norm": 1.0948494704481426, "learning_rate": 3.238102141028815e-07, "loss": 0.3139, "step": 28655 }, { "epoch": 2.689189189189189, "grad_norm": 1.1449831692553925, "learning_rate": 3.236169595508809e-07, "loss": 0.3165, "step": 28656 }, { "epoch": 2.689283033033033, "grad_norm": 1.092319536357745, "learning_rate": 3.2342376075594594e-07, "loss": 0.3213, "step": 28657 }, { "epoch": 2.689376876876877, "grad_norm": 1.1337938028018213, "learning_rate": 3.232306177203798e-07, "loss": 0.2637, "step": 28658 }, { "epoch": 2.6894707207207205, "grad_norm": 1.1266448089350252, "learning_rate": 3.2303753044648557e-07, "loss": 0.3236, "step": 28659 }, { "epoch": 2.6895645645645647, "grad_norm": 1.8610409519370539, "learning_rate": 3.2284449893656476e-07, "loss": 0.2935, "step": 28660 }, { "epoch": 2.6896584084084085, "grad_norm": 1.0826135952551883, "learning_rate": 3.2265152319292e-07, "loss": 0.3072, "step": 28661 }, { "epoch": 2.6897522522522523, "grad_norm": 1.4174999000203972, "learning_rate": 3.2245860321785174e-07, "loss": 0.329, "step": 28662 }, { "epoch": 2.689846096096096, "grad_norm": 1.0068962310778298, "learning_rate": 3.222657390136591e-07, "loss": 0.2969, "step": 28663 }, { "epoch": 2.68993993993994, "grad_norm": 1.2555603255542207, "learning_rate": 3.220729305826431e-07, "loss": 0.2783, "step": 28664 }, { "epoch": 2.6900337837837838, "grad_norm": 1.1613487962216564, "learning_rate": 3.2188017792710247e-07, "loss": 0.3237, "step": 28665 }, { "epoch": 2.6901276276276276, "grad_norm": 1.2049661784267682, "learning_rate": 3.216874810493342e-07, "loss": 0.2942, "step": 28666 }, { "epoch": 2.6902214714714714, "grad_norm": 1.1843719434355728, "learning_rate": 3.214948399516377e-07, "loss": 0.3133, "step": 28667 }, { "epoch": 2.690315315315315, "grad_norm": 1.0911674703834622, "learning_rate": 3.213022546363087e-07, "loss": 0.3345, "step": 28668 }, { "epoch": 2.690409159159159, "grad_norm": 1.0978911900554877, "learning_rate": 3.2110972510564333e-07, "loss": 0.287, "step": 28669 }, { "epoch": 2.690503003003003, "grad_norm": 1.0414167151768592, "learning_rate": 3.2091725136193794e-07, "loss": 0.316, "step": 28670 }, { "epoch": 2.690596846846847, "grad_norm": 1.1018217147865794, "learning_rate": 3.207248334074864e-07, "loss": 0.3458, "step": 28671 }, { "epoch": 2.6906906906906904, "grad_norm": 1.0788168824138313, "learning_rate": 3.2053247124458287e-07, "loss": 0.3054, "step": 28672 }, { "epoch": 2.6907845345345347, "grad_norm": 1.3849820850874401, "learning_rate": 3.2034016487552166e-07, "loss": 0.3222, "step": 28673 }, { "epoch": 2.6908783783783785, "grad_norm": 1.1811773754398662, "learning_rate": 3.20147914302596e-07, "loss": 0.3035, "step": 28674 }, { "epoch": 2.6909722222222223, "grad_norm": 1.0944264828601205, "learning_rate": 3.1995571952809666e-07, "loss": 0.3425, "step": 28675 }, { "epoch": 2.691066066066066, "grad_norm": 1.0593114238857053, "learning_rate": 3.19763580554317e-07, "loss": 0.3586, "step": 28676 }, { "epoch": 2.69115990990991, "grad_norm": 1.1030076624064216, "learning_rate": 3.1957149738354676e-07, "loss": 0.3621, "step": 28677 }, { "epoch": 2.6912537537537538, "grad_norm": 1.2943360344025505, "learning_rate": 3.1937947001807576e-07, "loss": 0.3103, "step": 28678 }, { "epoch": 2.6913475975975976, "grad_norm": 1.0822328357714903, "learning_rate": 3.191874984601945e-07, "loss": 0.2821, "step": 28679 }, { "epoch": 2.6914414414414414, "grad_norm": 1.1743885486902423, "learning_rate": 3.1899558271219213e-07, "loss": 0.3283, "step": 28680 }, { "epoch": 2.691535285285285, "grad_norm": 1.0964971241839692, "learning_rate": 3.188037227763563e-07, "loss": 0.3338, "step": 28681 }, { "epoch": 2.691629129129129, "grad_norm": 1.3035252656801426, "learning_rate": 3.1861191865497467e-07, "loss": 0.3139, "step": 28682 }, { "epoch": 2.691722972972973, "grad_norm": 1.1238508011691852, "learning_rate": 3.1842017035033367e-07, "loss": 0.2819, "step": 28683 }, { "epoch": 2.691816816816817, "grad_norm": 1.0906891339424478, "learning_rate": 3.182284778647199e-07, "loss": 0.3268, "step": 28684 }, { "epoch": 2.6919106606606604, "grad_norm": 1.5154594193885278, "learning_rate": 3.180368412004192e-07, "loss": 0.332, "step": 28685 }, { "epoch": 2.6920045045045047, "grad_norm": 1.0332495138654352, "learning_rate": 3.1784526035971654e-07, "loss": 0.2911, "step": 28686 }, { "epoch": 2.6920983483483485, "grad_norm": 1.0779088242733292, "learning_rate": 3.1765373534489497e-07, "loss": 0.3171, "step": 28687 }, { "epoch": 2.6921921921921923, "grad_norm": 1.8554403010745812, "learning_rate": 3.1746226615823994e-07, "loss": 0.3046, "step": 28688 }, { "epoch": 2.692286036036036, "grad_norm": 1.1506784357226494, "learning_rate": 3.1727085280203353e-07, "loss": 0.3112, "step": 28689 }, { "epoch": 2.69237987987988, "grad_norm": 1.3001607506163682, "learning_rate": 3.1707949527855663e-07, "loss": 0.3445, "step": 28690 }, { "epoch": 2.6924737237237237, "grad_norm": 1.1602734581461522, "learning_rate": 3.168881935900936e-07, "loss": 0.3686, "step": 28691 }, { "epoch": 2.6925675675675675, "grad_norm": 1.1083901315626312, "learning_rate": 3.1669694773892313e-07, "loss": 0.3373, "step": 28692 }, { "epoch": 2.6926614114114114, "grad_norm": 1.2736135537950797, "learning_rate": 3.1650575772732616e-07, "loss": 0.2756, "step": 28693 }, { "epoch": 2.692755255255255, "grad_norm": 1.1577352720199958, "learning_rate": 3.1631462355758256e-07, "loss": 0.3463, "step": 28694 }, { "epoch": 2.692849099099099, "grad_norm": 0.9544688296417788, "learning_rate": 3.161235452319711e-07, "loss": 0.2933, "step": 28695 }, { "epoch": 2.692942942942943, "grad_norm": 1.8826598531853793, "learning_rate": 3.1593252275276876e-07, "loss": 0.2883, "step": 28696 }, { "epoch": 2.693036786786787, "grad_norm": 1.2028941177241723, "learning_rate": 3.157415561222549e-07, "loss": 0.349, "step": 28697 }, { "epoch": 2.6931306306306304, "grad_norm": 1.4426639824922027, "learning_rate": 3.15550645342706e-07, "loss": 0.3425, "step": 28698 }, { "epoch": 2.6932244744744747, "grad_norm": 1.0431503549474743, "learning_rate": 3.1535979041639686e-07, "loss": 0.2819, "step": 28699 }, { "epoch": 2.6933183183183185, "grad_norm": 1.0658294255681604, "learning_rate": 3.151689913456057e-07, "loss": 0.3564, "step": 28700 }, { "epoch": 2.6934121621621623, "grad_norm": 1.2100866263152714, "learning_rate": 3.149782481326058e-07, "loss": 0.2828, "step": 28701 }, { "epoch": 2.693506006006006, "grad_norm": 1.073799157746374, "learning_rate": 3.1478756077967076e-07, "loss": 0.3192, "step": 28702 }, { "epoch": 2.69359984984985, "grad_norm": 1.225235625084304, "learning_rate": 3.1459692928907604e-07, "loss": 0.2877, "step": 28703 }, { "epoch": 2.6936936936936937, "grad_norm": 1.0913137321683382, "learning_rate": 3.144063536630937e-07, "loss": 0.3709, "step": 28704 }, { "epoch": 2.6937875375375375, "grad_norm": 1.0578753735612056, "learning_rate": 3.142158339039952e-07, "loss": 0.3172, "step": 28705 }, { "epoch": 2.6938813813813813, "grad_norm": 1.1407742965975376, "learning_rate": 3.1402537001405334e-07, "loss": 0.2936, "step": 28706 }, { "epoch": 2.693975225225225, "grad_norm": 1.0751825894344464, "learning_rate": 3.138349619955383e-07, "loss": 0.3373, "step": 28707 }, { "epoch": 2.694069069069069, "grad_norm": 1.2409986633666474, "learning_rate": 3.136446098507201e-07, "loss": 0.3056, "step": 28708 }, { "epoch": 2.6941629129129128, "grad_norm": 1.1244055111725206, "learning_rate": 3.1345431358186904e-07, "loss": 0.2918, "step": 28709 }, { "epoch": 2.694256756756757, "grad_norm": 1.029379361041821, "learning_rate": 3.132640731912545e-07, "loss": 0.2995, "step": 28710 }, { "epoch": 2.6943506006006004, "grad_norm": 1.4901376824011923, "learning_rate": 3.130738886811424e-07, "loss": 0.3162, "step": 28711 }, { "epoch": 2.6944444444444446, "grad_norm": 1.1855096021799472, "learning_rate": 3.128837600538032e-07, "loss": 0.3068, "step": 28712 }, { "epoch": 2.6945382882882885, "grad_norm": 1.3419711349382724, "learning_rate": 3.1269368731150273e-07, "loss": 0.3412, "step": 28713 }, { "epoch": 2.6946321321321323, "grad_norm": 1.100198581560175, "learning_rate": 3.1250367045650597e-07, "loss": 0.3467, "step": 28714 }, { "epoch": 2.694725975975976, "grad_norm": 1.4347615155018703, "learning_rate": 3.123137094910805e-07, "loss": 0.3261, "step": 28715 }, { "epoch": 2.69481981981982, "grad_norm": 0.9791441580404517, "learning_rate": 3.1212380441749015e-07, "loss": 0.3193, "step": 28716 }, { "epoch": 2.6949136636636637, "grad_norm": 0.9255966378665628, "learning_rate": 3.1193395523799965e-07, "loss": 0.3088, "step": 28717 }, { "epoch": 2.6950075075075075, "grad_norm": 1.1019512785261505, "learning_rate": 3.117441619548722e-07, "loss": 0.3046, "step": 28718 }, { "epoch": 2.6951013513513513, "grad_norm": 1.2206973457771721, "learning_rate": 3.115544245703711e-07, "loss": 0.2847, "step": 28719 }, { "epoch": 2.695195195195195, "grad_norm": 1.0574870992652474, "learning_rate": 3.113647430867578e-07, "loss": 0.317, "step": 28720 }, { "epoch": 2.695289039039039, "grad_norm": 1.3496549532671962, "learning_rate": 3.111751175062949e-07, "loss": 0.293, "step": 28721 }, { "epoch": 2.6953828828828827, "grad_norm": 3.0245304251167293, "learning_rate": 3.1098554783124293e-07, "loss": 0.3306, "step": 28722 }, { "epoch": 2.695476726726727, "grad_norm": 1.1705351252991854, "learning_rate": 3.107960340638616e-07, "loss": 0.305, "step": 28723 }, { "epoch": 2.6955705705705704, "grad_norm": 0.9501979647240386, "learning_rate": 3.106065762064114e-07, "loss": 0.3067, "step": 28724 }, { "epoch": 2.6956644144144146, "grad_norm": 1.1385759695195778, "learning_rate": 3.104171742611517e-07, "loss": 0.2974, "step": 28725 }, { "epoch": 2.695758258258258, "grad_norm": 1.020311413215941, "learning_rate": 3.102278282303384e-07, "loss": 0.3139, "step": 28726 }, { "epoch": 2.6958521021021022, "grad_norm": 0.9842726071978883, "learning_rate": 3.1003853811623185e-07, "loss": 0.3077, "step": 28727 }, { "epoch": 2.695945945945946, "grad_norm": 1.0513669269354478, "learning_rate": 3.098493039210876e-07, "loss": 0.2878, "step": 28728 }, { "epoch": 2.69603978978979, "grad_norm": 1.1316878503864005, "learning_rate": 3.096601256471621e-07, "loss": 0.333, "step": 28729 }, { "epoch": 2.6961336336336337, "grad_norm": 1.0862806434260721, "learning_rate": 3.094710032967113e-07, "loss": 0.2861, "step": 28730 }, { "epoch": 2.6962274774774775, "grad_norm": 1.1199763127491733, "learning_rate": 3.0928193687198904e-07, "loss": 0.358, "step": 28731 }, { "epoch": 2.6963213213213213, "grad_norm": 1.1288873139779034, "learning_rate": 3.090929263752501e-07, "loss": 0.3222, "step": 28732 }, { "epoch": 2.696415165165165, "grad_norm": 1.064158122213819, "learning_rate": 3.089039718087489e-07, "loss": 0.3265, "step": 28733 }, { "epoch": 2.696509009009009, "grad_norm": 1.0826876423762286, "learning_rate": 3.087150731747379e-07, "loss": 0.3263, "step": 28734 }, { "epoch": 2.6966028528528527, "grad_norm": 1.5475911301148824, "learning_rate": 3.0852623047546824e-07, "loss": 0.3328, "step": 28735 }, { "epoch": 2.6966966966966965, "grad_norm": 2.10834999559158, "learning_rate": 3.0833744371319365e-07, "loss": 0.2823, "step": 28736 }, { "epoch": 2.6967905405405403, "grad_norm": 1.049192532187116, "learning_rate": 3.0814871289016334e-07, "loss": 0.3346, "step": 28737 }, { "epoch": 2.6968843843843846, "grad_norm": 1.2051007745122886, "learning_rate": 3.079600380086278e-07, "loss": 0.2785, "step": 28738 }, { "epoch": 2.696978228228228, "grad_norm": 1.109467625165313, "learning_rate": 3.077714190708375e-07, "loss": 0.2868, "step": 28739 }, { "epoch": 2.6970720720720722, "grad_norm": 0.9230054645357547, "learning_rate": 3.075828560790406e-07, "loss": 0.3244, "step": 28740 }, { "epoch": 2.697165915915916, "grad_norm": 1.202236977855959, "learning_rate": 3.073943490354858e-07, "loss": 0.3246, "step": 28741 }, { "epoch": 2.69725975975976, "grad_norm": 1.1039197051312288, "learning_rate": 3.072058979424208e-07, "loss": 0.339, "step": 28742 }, { "epoch": 2.6973536036036037, "grad_norm": 1.0449651403217768, "learning_rate": 3.0701750280209165e-07, "loss": 0.3143, "step": 28743 }, { "epoch": 2.6974474474474475, "grad_norm": 0.9771324760232452, "learning_rate": 3.0682916361674474e-07, "loss": 0.2905, "step": 28744 }, { "epoch": 2.6975412912912913, "grad_norm": 1.122602386078916, "learning_rate": 3.066408803886267e-07, "loss": 0.2778, "step": 28745 }, { "epoch": 2.697635135135135, "grad_norm": 1.2803417214139203, "learning_rate": 3.0645265311998183e-07, "loss": 0.3157, "step": 28746 }, { "epoch": 2.697728978978979, "grad_norm": 1.1727895241325974, "learning_rate": 3.062644818130539e-07, "loss": 0.2893, "step": 28747 }, { "epoch": 2.6978228228228227, "grad_norm": 1.4239326668865788, "learning_rate": 3.060763664700872e-07, "loss": 0.2928, "step": 28748 }, { "epoch": 2.6979166666666665, "grad_norm": 1.3115701962209119, "learning_rate": 3.0588830709332497e-07, "loss": 0.3207, "step": 28749 }, { "epoch": 2.6980105105105103, "grad_norm": 1.0717324259270258, "learning_rate": 3.057003036850076e-07, "loss": 0.3372, "step": 28750 }, { "epoch": 2.6981043543543546, "grad_norm": 1.0994631976798885, "learning_rate": 3.0551235624737884e-07, "loss": 0.3293, "step": 28751 }, { "epoch": 2.698198198198198, "grad_norm": 4.978755576584954, "learning_rate": 3.0532446478267917e-07, "loss": 0.2968, "step": 28752 }, { "epoch": 2.698292042042042, "grad_norm": 1.0535577615944387, "learning_rate": 3.0513662929314735e-07, "loss": 0.3279, "step": 28753 }, { "epoch": 2.698385885885886, "grad_norm": 1.3166839893350877, "learning_rate": 3.049488497810249e-07, "loss": 0.3049, "step": 28754 }, { "epoch": 2.69847972972973, "grad_norm": 1.143065030630126, "learning_rate": 3.0476112624855006e-07, "loss": 0.2798, "step": 28755 }, { "epoch": 2.6985735735735736, "grad_norm": 1.3818490860461705, "learning_rate": 3.0457345869796095e-07, "loss": 0.3225, "step": 28756 }, { "epoch": 2.6986674174174174, "grad_norm": 1.3221935902708808, "learning_rate": 3.043858471314948e-07, "loss": 0.3224, "step": 28757 }, { "epoch": 2.6987612612612613, "grad_norm": 1.1677495257914596, "learning_rate": 3.0419829155138915e-07, "loss": 0.3199, "step": 28758 }, { "epoch": 2.698855105105105, "grad_norm": 1.32219934963014, "learning_rate": 3.040107919598789e-07, "loss": 0.3322, "step": 28759 }, { "epoch": 2.698948948948949, "grad_norm": 1.2734627361340385, "learning_rate": 3.0382334835920126e-07, "loss": 0.3364, "step": 28760 }, { "epoch": 2.6990427927927927, "grad_norm": 2.0756255197621702, "learning_rate": 3.0363596075159096e-07, "loss": 0.3408, "step": 28761 }, { "epoch": 2.6991366366366365, "grad_norm": 1.2480389239500636, "learning_rate": 3.0344862913928073e-07, "loss": 0.3204, "step": 28762 }, { "epoch": 2.6992304804804803, "grad_norm": 1.0715028678182534, "learning_rate": 3.032613535245066e-07, "loss": 0.315, "step": 28763 }, { "epoch": 2.6993243243243246, "grad_norm": 1.4609033590956837, "learning_rate": 3.030741339094995e-07, "loss": 0.3199, "step": 28764 }, { "epoch": 2.699418168168168, "grad_norm": 1.3356013470079644, "learning_rate": 3.028869702964921e-07, "loss": 0.3264, "step": 28765 }, { "epoch": 2.699512012012012, "grad_norm": 1.2289379307248933, "learning_rate": 3.0269986268771643e-07, "loss": 0.332, "step": 28766 }, { "epoch": 2.699605855855856, "grad_norm": 6.498565244929598, "learning_rate": 3.025128110854031e-07, "loss": 0.2988, "step": 28767 }, { "epoch": 2.6996996996997, "grad_norm": 1.1555035897988999, "learning_rate": 3.02325815491783e-07, "loss": 0.2601, "step": 28768 }, { "epoch": 2.6997935435435436, "grad_norm": 1.1505807836793884, "learning_rate": 3.0213887590908433e-07, "loss": 0.3341, "step": 28769 }, { "epoch": 2.6998873873873874, "grad_norm": 0.9916765185830289, "learning_rate": 3.0195199233953697e-07, "loss": 0.3161, "step": 28770 }, { "epoch": 2.6999812312312312, "grad_norm": 1.3148323720915807, "learning_rate": 3.0176516478536865e-07, "loss": 0.323, "step": 28771 }, { "epoch": 2.700075075075075, "grad_norm": 1.2343913749846736, "learning_rate": 3.0157839324880754e-07, "loss": 0.3097, "step": 28772 }, { "epoch": 2.700168918918919, "grad_norm": 1.0319543386945766, "learning_rate": 3.013916777320802e-07, "loss": 0.2669, "step": 28773 }, { "epoch": 2.7002627627627627, "grad_norm": 1.1194717918294648, "learning_rate": 3.0120501823741266e-07, "loss": 0.3027, "step": 28774 }, { "epoch": 2.7003566066066065, "grad_norm": 1.207636094760252, "learning_rate": 3.0101841476703084e-07, "loss": 0.347, "step": 28775 }, { "epoch": 2.7004504504504503, "grad_norm": 1.2175543510755817, "learning_rate": 3.008318673231597e-07, "loss": 0.2764, "step": 28776 }, { "epoch": 2.7005442942942945, "grad_norm": 1.0845984105145474, "learning_rate": 3.0064537590802243e-07, "loss": 0.2957, "step": 28777 }, { "epoch": 2.700638138138138, "grad_norm": 1.8890873199381646, "learning_rate": 3.004589405238445e-07, "loss": 0.3171, "step": 28778 }, { "epoch": 2.700731981981982, "grad_norm": 1.0308940319292057, "learning_rate": 3.002725611728474e-07, "loss": 0.3267, "step": 28779 }, { "epoch": 2.700825825825826, "grad_norm": 1.0335040438727487, "learning_rate": 3.0008623785725387e-07, "loss": 0.3023, "step": 28780 }, { "epoch": 2.70091966966967, "grad_norm": 1.2451225018413923, "learning_rate": 2.998999705792849e-07, "loss": 0.2873, "step": 28781 }, { "epoch": 2.7010135135135136, "grad_norm": 1.2904601158910294, "learning_rate": 2.99713759341162e-07, "loss": 0.3032, "step": 28782 }, { "epoch": 2.7011073573573574, "grad_norm": 1.2466581535521963, "learning_rate": 2.995276041451045e-07, "loss": 0.3218, "step": 28783 }, { "epoch": 2.701201201201201, "grad_norm": 1.0717099177164933, "learning_rate": 2.9934150499333347e-07, "loss": 0.3303, "step": 28784 }, { "epoch": 2.701295045045045, "grad_norm": 1.162637104523928, "learning_rate": 2.9915546188806654e-07, "loss": 0.2846, "step": 28785 }, { "epoch": 2.701388888888889, "grad_norm": 1.0723674665170129, "learning_rate": 2.9896947483152195e-07, "loss": 0.2823, "step": 28786 }, { "epoch": 2.7014827327327327, "grad_norm": 1.984272335279839, "learning_rate": 2.987835438259179e-07, "loss": 0.374, "step": 28787 }, { "epoch": 2.7015765765765765, "grad_norm": 1.1220687099522317, "learning_rate": 2.9859766887347154e-07, "loss": 0.2719, "step": 28788 }, { "epoch": 2.7016704204204203, "grad_norm": 1.1483631039972635, "learning_rate": 2.984118499763972e-07, "loss": 0.3306, "step": 28789 }, { "epoch": 2.7017642642642645, "grad_norm": 1.0910499211068145, "learning_rate": 2.982260871369125e-07, "loss": 0.3275, "step": 28790 }, { "epoch": 2.701858108108108, "grad_norm": 1.1133256352209755, "learning_rate": 2.980403803572318e-07, "loss": 0.3355, "step": 28791 }, { "epoch": 2.701951951951952, "grad_norm": 1.0799201075662572, "learning_rate": 2.9785472963956894e-07, "loss": 0.265, "step": 28792 }, { "epoch": 2.702045795795796, "grad_norm": 1.4134417277907583, "learning_rate": 2.976691349861377e-07, "loss": 0.2733, "step": 28793 }, { "epoch": 2.7021396396396398, "grad_norm": 1.1380073460177167, "learning_rate": 2.974835963991507e-07, "loss": 0.3352, "step": 28794 }, { "epoch": 2.7022334834834836, "grad_norm": 1.396657059485122, "learning_rate": 2.9729811388081895e-07, "loss": 0.3486, "step": 28795 }, { "epoch": 2.7023273273273274, "grad_norm": 1.124642574707397, "learning_rate": 2.971126874333569e-07, "loss": 0.3229, "step": 28796 }, { "epoch": 2.702421171171171, "grad_norm": 1.2829545202684616, "learning_rate": 2.9692731705897314e-07, "loss": 0.3011, "step": 28797 }, { "epoch": 2.702515015015015, "grad_norm": 1.1548962144200017, "learning_rate": 2.9674200275987776e-07, "loss": 0.3217, "step": 28798 }, { "epoch": 2.702608858858859, "grad_norm": 1.37460828223249, "learning_rate": 2.965567445382822e-07, "loss": 0.2989, "step": 28799 }, { "epoch": 2.7027027027027026, "grad_norm": 1.047637184103216, "learning_rate": 2.963715423963942e-07, "loss": 0.3368, "step": 28800 }, { "epoch": 2.7027965465465464, "grad_norm": 1.2298244974609878, "learning_rate": 2.961863963364209e-07, "loss": 0.3008, "step": 28801 }, { "epoch": 2.7028903903903903, "grad_norm": 1.0033441819631161, "learning_rate": 2.96001306360571e-07, "loss": 0.3068, "step": 28802 }, { "epoch": 2.7029842342342345, "grad_norm": 0.9595232422398188, "learning_rate": 2.9581627247105174e-07, "loss": 0.3054, "step": 28803 }, { "epoch": 2.703078078078078, "grad_norm": 1.2391011300922177, "learning_rate": 2.95631294670069e-07, "loss": 0.2921, "step": 28804 }, { "epoch": 2.703171921921922, "grad_norm": 1.1264819905302834, "learning_rate": 2.954463729598273e-07, "loss": 0.318, "step": 28805 }, { "epoch": 2.7032657657657655, "grad_norm": 1.1862177848479578, "learning_rate": 2.9526150734253247e-07, "loss": 0.2891, "step": 28806 }, { "epoch": 2.7033596096096097, "grad_norm": 1.121041960538604, "learning_rate": 2.9507669782038783e-07, "loss": 0.3367, "step": 28807 }, { "epoch": 2.7034534534534536, "grad_norm": 1.1604193459949999, "learning_rate": 2.948919443955983e-07, "loss": 0.3265, "step": 28808 }, { "epoch": 2.7035472972972974, "grad_norm": 1.2046572032157166, "learning_rate": 2.9470724707036545e-07, "loss": 0.3144, "step": 28809 }, { "epoch": 2.703641141141141, "grad_norm": 1.0552810818641687, "learning_rate": 2.9452260584689143e-07, "loss": 0.3156, "step": 28810 }, { "epoch": 2.703734984984985, "grad_norm": 1.1568014108390043, "learning_rate": 2.9433802072737884e-07, "loss": 0.3378, "step": 28811 }, { "epoch": 2.703828828828829, "grad_norm": 3.1043656050021387, "learning_rate": 2.9415349171402816e-07, "loss": 0.2835, "step": 28812 }, { "epoch": 2.7039226726726726, "grad_norm": 1.9725667291949358, "learning_rate": 2.939690188090383e-07, "loss": 0.3195, "step": 28813 }, { "epoch": 2.7040165165165164, "grad_norm": 1.2880174036283225, "learning_rate": 2.9378460201461015e-07, "loss": 0.3565, "step": 28814 }, { "epoch": 2.7041103603603602, "grad_norm": 1.6102230874193972, "learning_rate": 2.936002413329425e-07, "loss": 0.3138, "step": 28815 }, { "epoch": 2.704204204204204, "grad_norm": 1.513795063927112, "learning_rate": 2.934159367662326e-07, "loss": 0.3764, "step": 28816 }, { "epoch": 2.704298048048048, "grad_norm": 1.244496912646874, "learning_rate": 2.932316883166786e-07, "loss": 0.3464, "step": 28817 }, { "epoch": 2.704391891891892, "grad_norm": 1.1123947213296093, "learning_rate": 2.930474959864776e-07, "loss": 0.3131, "step": 28818 }, { "epoch": 2.7044857357357355, "grad_norm": 1.2284916006040763, "learning_rate": 2.928633597778241e-07, "loss": 0.3128, "step": 28819 }, { "epoch": 2.7045795795795797, "grad_norm": 1.97467446804282, "learning_rate": 2.9267927969291565e-07, "loss": 0.3011, "step": 28820 }, { "epoch": 2.7046734234234235, "grad_norm": 1.1071661551009069, "learning_rate": 2.924952557339461e-07, "loss": 0.3193, "step": 28821 }, { "epoch": 2.7047672672672673, "grad_norm": 1.2793136410846533, "learning_rate": 2.923112879031087e-07, "loss": 0.3141, "step": 28822 }, { "epoch": 2.704861111111111, "grad_norm": 1.533332604853635, "learning_rate": 2.921273762025989e-07, "loss": 0.2979, "step": 28823 }, { "epoch": 2.704954954954955, "grad_norm": 1.109297337865437, "learning_rate": 2.9194352063460827e-07, "loss": 0.3139, "step": 28824 }, { "epoch": 2.705048798798799, "grad_norm": 1.2106047964545155, "learning_rate": 2.91759721201329e-07, "loss": 0.3284, "step": 28825 }, { "epoch": 2.7051426426426426, "grad_norm": 1.1884515133595401, "learning_rate": 2.915759779049526e-07, "loss": 0.2827, "step": 28826 }, { "epoch": 2.7052364864864864, "grad_norm": 1.469365401936719, "learning_rate": 2.913922907476707e-07, "loss": 0.2987, "step": 28827 }, { "epoch": 2.70533033033033, "grad_norm": 1.1656181244969244, "learning_rate": 2.912086597316721e-07, "loss": 0.3197, "step": 28828 }, { "epoch": 2.705424174174174, "grad_norm": 2.18107032459724, "learning_rate": 2.9102508485914725e-07, "loss": 0.3317, "step": 28829 }, { "epoch": 2.705518018018018, "grad_norm": 1.1008761003900682, "learning_rate": 2.908415661322844e-07, "loss": 0.3152, "step": 28830 }, { "epoch": 2.705611861861862, "grad_norm": 1.8594399511452624, "learning_rate": 2.9065810355327074e-07, "loss": 0.3026, "step": 28831 }, { "epoch": 2.7057057057057055, "grad_norm": 1.1346461539329207, "learning_rate": 2.9047469712429613e-07, "loss": 0.3105, "step": 28832 }, { "epoch": 2.7057995495495497, "grad_norm": 1.3095795065113491, "learning_rate": 2.9029134684754545e-07, "loss": 0.3458, "step": 28833 }, { "epoch": 2.7058933933933935, "grad_norm": 0.9734765664641177, "learning_rate": 2.901080527252048e-07, "loss": 0.3318, "step": 28834 }, { "epoch": 2.7059872372372373, "grad_norm": 2.3960925041768237, "learning_rate": 2.899248147594613e-07, "loss": 0.3483, "step": 28835 }, { "epoch": 2.706081081081081, "grad_norm": 1.0479244733528563, "learning_rate": 2.8974163295249823e-07, "loss": 0.2974, "step": 28836 }, { "epoch": 2.706174924924925, "grad_norm": 1.0576173911213382, "learning_rate": 2.895585073064988e-07, "loss": 0.3195, "step": 28837 }, { "epoch": 2.7062687687687688, "grad_norm": 1.1230281499411798, "learning_rate": 2.893754378236491e-07, "loss": 0.3167, "step": 28838 }, { "epoch": 2.7063626126126126, "grad_norm": 1.5847902647885461, "learning_rate": 2.8919242450613005e-07, "loss": 0.2889, "step": 28839 }, { "epoch": 2.7064564564564564, "grad_norm": 1.2093811361281628, "learning_rate": 2.8900946735612446e-07, "loss": 0.3591, "step": 28840 }, { "epoch": 2.7065503003003, "grad_norm": 1.1862368682423488, "learning_rate": 2.888265663758133e-07, "loss": 0.3118, "step": 28841 }, { "epoch": 2.706644144144144, "grad_norm": 1.171925891811707, "learning_rate": 2.8864372156737763e-07, "loss": 0.291, "step": 28842 }, { "epoch": 2.706737987987988, "grad_norm": 1.2349608337053073, "learning_rate": 2.884609329329963e-07, "loss": 0.3087, "step": 28843 }, { "epoch": 2.706831831831832, "grad_norm": 0.961254977412909, "learning_rate": 2.8827820047485077e-07, "loss": 0.2876, "step": 28844 }, { "epoch": 2.7069256756756754, "grad_norm": 1.084728391987619, "learning_rate": 2.8809552419511887e-07, "loss": 0.3334, "step": 28845 }, { "epoch": 2.7070195195195197, "grad_norm": 1.4093889705137945, "learning_rate": 2.879129040959777e-07, "loss": 0.3144, "step": 28846 }, { "epoch": 2.7071133633633635, "grad_norm": 1.0252039803413089, "learning_rate": 2.877303401796067e-07, "loss": 0.3252, "step": 28847 }, { "epoch": 2.7072072072072073, "grad_norm": 1.1105770103812302, "learning_rate": 2.875478324481806e-07, "loss": 0.2734, "step": 28848 }, { "epoch": 2.707301051051051, "grad_norm": 1.171750815109374, "learning_rate": 2.873653809038762e-07, "loss": 0.276, "step": 28849 }, { "epoch": 2.707394894894895, "grad_norm": 1.0020247075669488, "learning_rate": 2.871829855488695e-07, "loss": 0.3636, "step": 28850 }, { "epoch": 2.7074887387387387, "grad_norm": 1.1640378436466383, "learning_rate": 2.8700064638533475e-07, "loss": 0.3069, "step": 28851 }, { "epoch": 2.7075825825825826, "grad_norm": 1.240546824660005, "learning_rate": 2.8681836341544643e-07, "loss": 0.3226, "step": 28852 }, { "epoch": 2.7076764264264264, "grad_norm": 1.2295480737053261, "learning_rate": 2.8663613664137726e-07, "loss": 0.2931, "step": 28853 }, { "epoch": 2.70777027027027, "grad_norm": 4.675861571812093, "learning_rate": 2.8645396606529985e-07, "loss": 0.294, "step": 28854 }, { "epoch": 2.707864114114114, "grad_norm": 1.106362962391197, "learning_rate": 2.8627185168938585e-07, "loss": 0.3186, "step": 28855 }, { "epoch": 2.707957957957958, "grad_norm": 1.1359481379645848, "learning_rate": 2.860897935158086e-07, "loss": 0.3349, "step": 28856 }, { "epoch": 2.708051801801802, "grad_norm": 1.0446599404770331, "learning_rate": 2.8590779154673677e-07, "loss": 0.2751, "step": 28857 }, { "epoch": 2.7081456456456454, "grad_norm": 1.0700531235615045, "learning_rate": 2.8572584578434094e-07, "loss": 0.2863, "step": 28858 }, { "epoch": 2.7082394894894897, "grad_norm": 1.5467730054887907, "learning_rate": 2.8554395623079155e-07, "loss": 0.3185, "step": 28859 }, { "epoch": 2.7083333333333335, "grad_norm": 1.0914632904564465, "learning_rate": 2.8536212288825585e-07, "loss": 0.2976, "step": 28860 }, { "epoch": 2.7084271771771773, "grad_norm": 1.2684334684404488, "learning_rate": 2.8518034575890197e-07, "loss": 0.3566, "step": 28861 }, { "epoch": 2.708521021021021, "grad_norm": 1.2500815531773126, "learning_rate": 2.849986248448988e-07, "loss": 0.3218, "step": 28862 }, { "epoch": 2.708614864864865, "grad_norm": 0.9797183750597867, "learning_rate": 2.848169601484119e-07, "loss": 0.3304, "step": 28863 }, { "epoch": 2.7087087087087087, "grad_norm": 1.189934897239214, "learning_rate": 2.846353516716066e-07, "loss": 0.3025, "step": 28864 }, { "epoch": 2.7088025525525525, "grad_norm": 1.2709750979828582, "learning_rate": 2.8445379941664963e-07, "loss": 0.3118, "step": 28865 }, { "epoch": 2.7088963963963963, "grad_norm": 0.9972726672067285, "learning_rate": 2.842723033857048e-07, "loss": 0.3443, "step": 28866 }, { "epoch": 2.70899024024024, "grad_norm": 1.0850210875708775, "learning_rate": 2.8409086358093586e-07, "loss": 0.2837, "step": 28867 }, { "epoch": 2.709084084084084, "grad_norm": 0.9780064565001576, "learning_rate": 2.8390948000450725e-07, "loss": 0.3387, "step": 28868 }, { "epoch": 2.7091779279279278, "grad_norm": 1.0985635729501038, "learning_rate": 2.8372815265858054e-07, "loss": 0.3208, "step": 28869 }, { "epoch": 2.709271771771772, "grad_norm": 1.0523558696809883, "learning_rate": 2.835468815453174e-07, "loss": 0.2638, "step": 28870 }, { "epoch": 2.7093656156156154, "grad_norm": 1.1009957444813698, "learning_rate": 2.8336566666688103e-07, "loss": 0.3189, "step": 28871 }, { "epoch": 2.7094594594594597, "grad_norm": 1.0847107338448383, "learning_rate": 2.8318450802543086e-07, "loss": 0.3432, "step": 28872 }, { "epoch": 2.7095533033033035, "grad_norm": 1.186217785162928, "learning_rate": 2.8300340562312624e-07, "loss": 0.3021, "step": 28873 }, { "epoch": 2.7096471471471473, "grad_norm": 1.3510805595361615, "learning_rate": 2.8282235946212764e-07, "loss": 0.3201, "step": 28874 }, { "epoch": 2.709740990990991, "grad_norm": 1.0979423232036625, "learning_rate": 2.826413695445934e-07, "loss": 0.2942, "step": 28875 }, { "epoch": 2.709834834834835, "grad_norm": 1.0563136848226815, "learning_rate": 2.824604358726807e-07, "loss": 0.3237, "step": 28876 }, { "epoch": 2.7099286786786787, "grad_norm": 1.0930492752073444, "learning_rate": 2.8227955844854825e-07, "loss": 0.3122, "step": 28877 }, { "epoch": 2.7100225225225225, "grad_norm": 1.2110006274279237, "learning_rate": 2.820987372743517e-07, "loss": 0.2945, "step": 28878 }, { "epoch": 2.7101163663663663, "grad_norm": 1.3152654609482044, "learning_rate": 2.8191797235224694e-07, "loss": 0.3021, "step": 28879 }, { "epoch": 2.71021021021021, "grad_norm": 1.0804616833023375, "learning_rate": 2.817372636843896e-07, "loss": 0.31, "step": 28880 }, { "epoch": 2.710304054054054, "grad_norm": 1.1932266382297674, "learning_rate": 2.8155661127293453e-07, "loss": 0.3067, "step": 28881 }, { "epoch": 2.7103978978978978, "grad_norm": 1.098411411448592, "learning_rate": 2.81376015120034e-07, "loss": 0.2813, "step": 28882 }, { "epoch": 2.710491741741742, "grad_norm": 1.0237089028598279, "learning_rate": 2.81195475227844e-07, "loss": 0.2743, "step": 28883 }, { "epoch": 2.7105855855855854, "grad_norm": 1.0585306716546607, "learning_rate": 2.8101499159851496e-07, "loss": 0.2779, "step": 28884 }, { "epoch": 2.7106794294294296, "grad_norm": 1.6534710144873217, "learning_rate": 2.808345642341992e-07, "loss": 0.3392, "step": 28885 }, { "epoch": 2.710773273273273, "grad_norm": 1.0136315590514955, "learning_rate": 2.8065419313704933e-07, "loss": 0.2718, "step": 28886 }, { "epoch": 2.7108671171171173, "grad_norm": 1.1725650660606914, "learning_rate": 2.804738783092148e-07, "loss": 0.3334, "step": 28887 }, { "epoch": 2.710960960960961, "grad_norm": 1.0251433516271948, "learning_rate": 2.80293619752845e-07, "loss": 0.2918, "step": 28888 }, { "epoch": 2.711054804804805, "grad_norm": 1.3294553541489114, "learning_rate": 2.8011341747009036e-07, "loss": 0.2996, "step": 28889 }, { "epoch": 2.7111486486486487, "grad_norm": 1.079662244696671, "learning_rate": 2.799332714630992e-07, "loss": 0.2853, "step": 28890 }, { "epoch": 2.7112424924924925, "grad_norm": 1.4944585866206914, "learning_rate": 2.797531817340188e-07, "loss": 0.2856, "step": 28891 }, { "epoch": 2.7113363363363363, "grad_norm": 1.1260386924167651, "learning_rate": 2.7957314828499725e-07, "loss": 0.3342, "step": 28892 }, { "epoch": 2.71143018018018, "grad_norm": 1.0527150393359828, "learning_rate": 2.793931711181802e-07, "loss": 0.2843, "step": 28893 }, { "epoch": 2.711524024024024, "grad_norm": 1.1310551512730866, "learning_rate": 2.7921325023571313e-07, "loss": 0.2979, "step": 28894 }, { "epoch": 2.7116178678678677, "grad_norm": 1.2862938627863016, "learning_rate": 2.7903338563974315e-07, "loss": 0.3242, "step": 28895 }, { "epoch": 2.7117117117117115, "grad_norm": 1.2010596300719822, "learning_rate": 2.7885357733241357e-07, "loss": 0.2842, "step": 28896 }, { "epoch": 2.7118055555555554, "grad_norm": 1.1218249694883666, "learning_rate": 2.7867382531586774e-07, "loss": 0.3509, "step": 28897 }, { "epoch": 2.7118993993993996, "grad_norm": 1.140829659170755, "learning_rate": 2.784941295922505e-07, "loss": 0.272, "step": 28898 }, { "epoch": 2.711993243243243, "grad_norm": 1.1454521171339418, "learning_rate": 2.7831449016370303e-07, "loss": 0.3018, "step": 28899 }, { "epoch": 2.7120870870870872, "grad_norm": 1.2454122611847436, "learning_rate": 2.7813490703236743e-07, "loss": 0.2956, "step": 28900 }, { "epoch": 2.712180930930931, "grad_norm": 1.3067232055059899, "learning_rate": 2.779553802003854e-07, "loss": 0.3097, "step": 28901 }, { "epoch": 2.712274774774775, "grad_norm": 0.9848326675324048, "learning_rate": 2.777759096698973e-07, "loss": 0.3319, "step": 28902 }, { "epoch": 2.7123686186186187, "grad_norm": 1.156289258934958, "learning_rate": 2.775964954430427e-07, "loss": 0.2743, "step": 28903 }, { "epoch": 2.7124624624624625, "grad_norm": 1.5256479091881687, "learning_rate": 2.7741713752196086e-07, "loss": 0.3134, "step": 28904 }, { "epoch": 2.7125563063063063, "grad_norm": 1.2182647551042978, "learning_rate": 2.7723783590879017e-07, "loss": 0.35, "step": 28905 }, { "epoch": 2.71265015015015, "grad_norm": 1.120875620747555, "learning_rate": 2.770585906056683e-07, "loss": 0.3399, "step": 28906 }, { "epoch": 2.712743993993994, "grad_norm": 1.1814239552895718, "learning_rate": 2.76879401614733e-07, "loss": 0.3007, "step": 28907 }, { "epoch": 2.7128378378378377, "grad_norm": 1.3189953672831227, "learning_rate": 2.7670026893812097e-07, "loss": 0.2917, "step": 28908 }, { "epoch": 2.7129316816816815, "grad_norm": 1.270967585363875, "learning_rate": 2.765211925779665e-07, "loss": 0.3043, "step": 28909 }, { "epoch": 2.7130255255255253, "grad_norm": 1.297683482034782, "learning_rate": 2.7634217253640626e-07, "loss": 0.3384, "step": 28910 }, { "epoch": 2.7131193693693696, "grad_norm": 1.2424496892607706, "learning_rate": 2.7616320881557467e-07, "loss": 0.3184, "step": 28911 }, { "epoch": 2.713213213213213, "grad_norm": 1.2254072746085931, "learning_rate": 2.759843014176039e-07, "loss": 0.3248, "step": 28912 }, { "epoch": 2.713307057057057, "grad_norm": 1.090426135499747, "learning_rate": 2.758054503446295e-07, "loss": 0.2738, "step": 28913 }, { "epoch": 2.713400900900901, "grad_norm": 1.0015673790269854, "learning_rate": 2.7562665559878244e-07, "loss": 0.279, "step": 28914 }, { "epoch": 2.713494744744745, "grad_norm": 1.264573084743027, "learning_rate": 2.75447917182195e-07, "loss": 0.3377, "step": 28915 }, { "epoch": 2.7135885885885886, "grad_norm": 1.172007155634445, "learning_rate": 2.7526923509699764e-07, "loss": 0.3251, "step": 28916 }, { "epoch": 2.7136824324324325, "grad_norm": 1.460375923297774, "learning_rate": 2.7509060934532204e-07, "loss": 0.3153, "step": 28917 }, { "epoch": 2.7137762762762763, "grad_norm": 1.2200037366678982, "learning_rate": 2.7491203992929594e-07, "loss": 0.333, "step": 28918 }, { "epoch": 2.71387012012012, "grad_norm": 1.2992164011677012, "learning_rate": 2.747335268510504e-07, "loss": 0.3247, "step": 28919 }, { "epoch": 2.713963963963964, "grad_norm": 1.116129365155539, "learning_rate": 2.745550701127131e-07, "loss": 0.3002, "step": 28920 }, { "epoch": 2.7140578078078077, "grad_norm": 1.0514152848528107, "learning_rate": 2.743766697164113e-07, "loss": 0.2963, "step": 28921 }, { "epoch": 2.7141516516516515, "grad_norm": 2.852783873759622, "learning_rate": 2.7419832566427386e-07, "loss": 0.289, "step": 28922 }, { "epoch": 2.7142454954954953, "grad_norm": 1.2400953964688526, "learning_rate": 2.7402003795842513e-07, "loss": 0.3142, "step": 28923 }, { "epoch": 2.7143393393393396, "grad_norm": 1.1504165531823003, "learning_rate": 2.738418066009912e-07, "loss": 0.3173, "step": 28924 }, { "epoch": 2.714433183183183, "grad_norm": 1.1225269127329665, "learning_rate": 2.736636315940988e-07, "loss": 0.3333, "step": 28925 }, { "epoch": 2.714527027027027, "grad_norm": 1.0861575445457012, "learning_rate": 2.734855129398706e-07, "loss": 0.2982, "step": 28926 }, { "epoch": 2.714620870870871, "grad_norm": 0.9842311921788209, "learning_rate": 2.733074506404315e-07, "loss": 0.3562, "step": 28927 }, { "epoch": 2.714714714714715, "grad_norm": 1.0597772693550787, "learning_rate": 2.731294446979038e-07, "loss": 0.3316, "step": 28928 }, { "epoch": 2.7148085585585586, "grad_norm": 1.0931856909482454, "learning_rate": 2.729514951144097e-07, "loss": 0.2842, "step": 28929 }, { "epoch": 2.7149024024024024, "grad_norm": 1.3856943032511284, "learning_rate": 2.727736018920707e-07, "loss": 0.2929, "step": 28930 }, { "epoch": 2.7149962462462462, "grad_norm": 1.1524663607405672, "learning_rate": 2.725957650330091e-07, "loss": 0.2853, "step": 28931 }, { "epoch": 2.71509009009009, "grad_norm": 1.1265062978903368, "learning_rate": 2.724179845393449e-07, "loss": 0.3008, "step": 28932 }, { "epoch": 2.715183933933934, "grad_norm": 0.9855796345150409, "learning_rate": 2.722402604131963e-07, "loss": 0.2963, "step": 28933 }, { "epoch": 2.7152777777777777, "grad_norm": 1.0159225742653557, "learning_rate": 2.7206259265668447e-07, "loss": 0.3056, "step": 28934 }, { "epoch": 2.7153716216216215, "grad_norm": 0.969795846276827, "learning_rate": 2.718849812719271e-07, "loss": 0.273, "step": 28935 }, { "epoch": 2.7154654654654653, "grad_norm": 1.1373606154144305, "learning_rate": 2.717074262610403e-07, "loss": 0.2869, "step": 28936 }, { "epoch": 2.7155593093093096, "grad_norm": 0.9830998859240702, "learning_rate": 2.715299276261435e-07, "loss": 0.2897, "step": 28937 }, { "epoch": 2.715653153153153, "grad_norm": 1.6909192640738375, "learning_rate": 2.7135248536935165e-07, "loss": 0.2916, "step": 28938 }, { "epoch": 2.715746996996997, "grad_norm": 1.1144117515161533, "learning_rate": 2.711750994927803e-07, "loss": 0.331, "step": 28939 }, { "epoch": 2.715840840840841, "grad_norm": 1.117914318015547, "learning_rate": 2.7099776999854553e-07, "loss": 0.3292, "step": 28940 }, { "epoch": 2.715934684684685, "grad_norm": 1.3721504102144324, "learning_rate": 2.708204968887601e-07, "loss": 0.3039, "step": 28941 }, { "epoch": 2.7160285285285286, "grad_norm": 1.2584050156546531, "learning_rate": 2.7064328016553843e-07, "loss": 0.3004, "step": 28942 }, { "epoch": 2.7161223723723724, "grad_norm": 1.0249084398471064, "learning_rate": 2.704661198309938e-07, "loss": 0.3072, "step": 28943 }, { "epoch": 2.7162162162162162, "grad_norm": 0.9526715990526864, "learning_rate": 2.70289015887239e-07, "loss": 0.3042, "step": 28944 }, { "epoch": 2.71631006006006, "grad_norm": 1.0693448164813377, "learning_rate": 2.7011196833638343e-07, "loss": 0.3202, "step": 28945 }, { "epoch": 2.716403903903904, "grad_norm": 1.1448378734658, "learning_rate": 2.699349771805404e-07, "loss": 0.2845, "step": 28946 }, { "epoch": 2.7164977477477477, "grad_norm": 1.0582367150830525, "learning_rate": 2.697580424218199e-07, "loss": 0.3328, "step": 28947 }, { "epoch": 2.7165915915915915, "grad_norm": 1.1479575323255173, "learning_rate": 2.695811640623297e-07, "loss": 0.3223, "step": 28948 }, { "epoch": 2.7166854354354353, "grad_norm": 1.6402942693950946, "learning_rate": 2.6940434210418086e-07, "loss": 0.3307, "step": 28949 }, { "epoch": 2.7167792792792795, "grad_norm": 1.1238860362965506, "learning_rate": 2.6922757654948063e-07, "loss": 0.3118, "step": 28950 }, { "epoch": 2.716873123123123, "grad_norm": 1.3129007920238884, "learning_rate": 2.6905086740033727e-07, "loss": 0.2835, "step": 28951 }, { "epoch": 2.716966966966967, "grad_norm": 1.070500767348093, "learning_rate": 2.688742146588569e-07, "loss": 0.3186, "step": 28952 }, { "epoch": 2.717060810810811, "grad_norm": 2.797104198959177, "learning_rate": 2.6869761832714615e-07, "loss": 0.322, "step": 28953 }, { "epoch": 2.7171546546546548, "grad_norm": 1.2673596993115819, "learning_rate": 2.6852107840730947e-07, "loss": 0.3086, "step": 28954 }, { "epoch": 2.7172484984984986, "grad_norm": 1.2771846843460333, "learning_rate": 2.6834459490145404e-07, "loss": 0.3033, "step": 28955 }, { "epoch": 2.7173423423423424, "grad_norm": 1.1328862338221801, "learning_rate": 2.6816816781168263e-07, "loss": 0.312, "step": 28956 }, { "epoch": 2.717436186186186, "grad_norm": 1.279819763842065, "learning_rate": 2.679917971400986e-07, "loss": 0.3347, "step": 28957 }, { "epoch": 2.71753003003003, "grad_norm": 1.0516559892576063, "learning_rate": 2.6781548288880575e-07, "loss": 0.3101, "step": 28958 }, { "epoch": 2.717623873873874, "grad_norm": 1.4535181071547225, "learning_rate": 2.676392250599058e-07, "loss": 0.3154, "step": 28959 }, { "epoch": 2.7177177177177176, "grad_norm": 0.9886194441467557, "learning_rate": 2.674630236554998e-07, "loss": 0.3035, "step": 28960 }, { "epoch": 2.7178115615615615, "grad_norm": 1.2188004875662186, "learning_rate": 2.6728687867769e-07, "loss": 0.3015, "step": 28961 }, { "epoch": 2.7179054054054053, "grad_norm": 1.386800656779437, "learning_rate": 2.6711079012857523e-07, "loss": 0.316, "step": 28962 }, { "epoch": 2.7179992492492495, "grad_norm": 1.0815524502415335, "learning_rate": 2.6693475801025606e-07, "loss": 0.2983, "step": 28963 }, { "epoch": 2.718093093093093, "grad_norm": 0.9405139277882077, "learning_rate": 2.6675878232483075e-07, "loss": 0.2549, "step": 28964 }, { "epoch": 2.718186936936937, "grad_norm": 1.2822693477853389, "learning_rate": 2.665828630743977e-07, "loss": 0.287, "step": 28965 }, { "epoch": 2.7182807807807805, "grad_norm": 1.9675930377710915, "learning_rate": 2.6640700026105305e-07, "loss": 0.269, "step": 28966 }, { "epoch": 2.7183746246246248, "grad_norm": 1.3393602735680659, "learning_rate": 2.662311938868961e-07, "loss": 0.3075, "step": 28967 }, { "epoch": 2.7184684684684686, "grad_norm": 0.9151605147233401, "learning_rate": 2.660554439540214e-07, "loss": 0.2978, "step": 28968 }, { "epoch": 2.7185623123123124, "grad_norm": 1.0327887440517296, "learning_rate": 2.6587975046452387e-07, "loss": 0.2862, "step": 28969 }, { "epoch": 2.718656156156156, "grad_norm": 1.1136975768350292, "learning_rate": 2.657041134205007e-07, "loss": 0.2869, "step": 28970 }, { "epoch": 2.71875, "grad_norm": 1.163127706169145, "learning_rate": 2.6552853282404367e-07, "loss": 0.2992, "step": 28971 }, { "epoch": 2.718843843843844, "grad_norm": 1.2182271820562736, "learning_rate": 2.6535300867724713e-07, "loss": 0.3308, "step": 28972 }, { "epoch": 2.7189376876876876, "grad_norm": 1.0296417489971825, "learning_rate": 2.651775409822044e-07, "loss": 0.3228, "step": 28973 }, { "epoch": 2.7190315315315314, "grad_norm": 1.183927745789917, "learning_rate": 2.6500212974100713e-07, "loss": 0.3208, "step": 28974 }, { "epoch": 2.7191253753753752, "grad_norm": 1.0805741364016668, "learning_rate": 2.6482677495574705e-07, "loss": 0.2995, "step": 28975 }, { "epoch": 2.7192192192192195, "grad_norm": 1.6792454364255336, "learning_rate": 2.646514766285141e-07, "loss": 0.2778, "step": 28976 }, { "epoch": 2.719313063063063, "grad_norm": 1.2898072309044233, "learning_rate": 2.644762347613994e-07, "loss": 0.3478, "step": 28977 }, { "epoch": 2.719406906906907, "grad_norm": 1.25091764908834, "learning_rate": 2.643010493564907e-07, "loss": 0.3362, "step": 28978 }, { "epoch": 2.7195007507507505, "grad_norm": 1.1265790591445906, "learning_rate": 2.641259204158786e-07, "loss": 0.3298, "step": 28979 }, { "epoch": 2.7195945945945947, "grad_norm": 1.002506242019578, "learning_rate": 2.6395084794165085e-07, "loss": 0.3102, "step": 28980 }, { "epoch": 2.7196884384384385, "grad_norm": 1.0194958614036642, "learning_rate": 2.637758319358941e-07, "loss": 0.332, "step": 28981 }, { "epoch": 2.7197822822822824, "grad_norm": 1.2776720203902108, "learning_rate": 2.6360087240069554e-07, "loss": 0.3105, "step": 28982 }, { "epoch": 2.719876126126126, "grad_norm": 1.1793655720435037, "learning_rate": 2.634259693381419e-07, "loss": 0.3519, "step": 28983 }, { "epoch": 2.71996996996997, "grad_norm": 1.2262990672901284, "learning_rate": 2.6325112275031706e-07, "loss": 0.2974, "step": 28984 }, { "epoch": 2.720063813813814, "grad_norm": 1.3114080598931723, "learning_rate": 2.630763326393071e-07, "loss": 0.3578, "step": 28985 }, { "epoch": 2.7201576576576576, "grad_norm": 1.1092144403767727, "learning_rate": 2.6290159900719537e-07, "loss": 0.2886, "step": 28986 }, { "epoch": 2.7202515015015014, "grad_norm": 1.1457638883019652, "learning_rate": 2.6272692185606573e-07, "loss": 0.3156, "step": 28987 }, { "epoch": 2.7203453453453452, "grad_norm": 1.1855636203257, "learning_rate": 2.6255230118800046e-07, "loss": 0.3453, "step": 28988 }, { "epoch": 2.720439189189189, "grad_norm": 1.1346504800602346, "learning_rate": 2.623777370050812e-07, "loss": 0.2899, "step": 28989 }, { "epoch": 2.720533033033033, "grad_norm": 1.2004879858346529, "learning_rate": 2.6220322930938955e-07, "loss": 0.2721, "step": 28990 }, { "epoch": 2.720626876876877, "grad_norm": 1.1476302314453852, "learning_rate": 2.620287781030073e-07, "loss": 0.2785, "step": 28991 }, { "epoch": 2.7207207207207205, "grad_norm": 0.9997256788621968, "learning_rate": 2.618543833880133e-07, "loss": 0.3607, "step": 28992 }, { "epoch": 2.7208145645645647, "grad_norm": 1.1476582679089278, "learning_rate": 2.616800451664864e-07, "loss": 0.3553, "step": 28993 }, { "epoch": 2.7209084084084085, "grad_norm": 1.3283562606821064, "learning_rate": 2.615057634405066e-07, "loss": 0.3041, "step": 28994 }, { "epoch": 2.7210022522522523, "grad_norm": 1.1480678721315365, "learning_rate": 2.613315382121512e-07, "loss": 0.3402, "step": 28995 }, { "epoch": 2.721096096096096, "grad_norm": 1.6113471955301268, "learning_rate": 2.6115736948349734e-07, "loss": 0.3233, "step": 28996 }, { "epoch": 2.72118993993994, "grad_norm": 1.2120460840630212, "learning_rate": 2.6098325725662235e-07, "loss": 0.2863, "step": 28997 }, { "epoch": 2.7212837837837838, "grad_norm": 1.0656301868479192, "learning_rate": 2.6080920153360167e-07, "loss": 0.3095, "step": 28998 }, { "epoch": 2.7213776276276276, "grad_norm": 1.1275449709941692, "learning_rate": 2.6063520231650984e-07, "loss": 0.3231, "step": 28999 }, { "epoch": 2.7214714714714714, "grad_norm": 1.2628812915592385, "learning_rate": 2.6046125960742295e-07, "loss": 0.3125, "step": 29000 }, { "epoch": 2.721565315315315, "grad_norm": 1.048851153557905, "learning_rate": 2.602873734084155e-07, "loss": 0.3116, "step": 29001 }, { "epoch": 2.721659159159159, "grad_norm": 1.0548532153947323, "learning_rate": 2.601135437215574e-07, "loss": 0.3165, "step": 29002 }, { "epoch": 2.721753003003003, "grad_norm": 1.1331763568461086, "learning_rate": 2.5993977054892483e-07, "loss": 0.2942, "step": 29003 }, { "epoch": 2.721846846846847, "grad_norm": 1.0296800811927955, "learning_rate": 2.597660538925878e-07, "loss": 0.289, "step": 29004 }, { "epoch": 2.7219406906906904, "grad_norm": 1.1501313168071565, "learning_rate": 2.595923937546174e-07, "loss": 0.3195, "step": 29005 }, { "epoch": 2.7220345345345347, "grad_norm": 1.245215666785362, "learning_rate": 2.594187901370854e-07, "loss": 0.3255, "step": 29006 }, { "epoch": 2.7221283783783785, "grad_norm": 1.2931125105324792, "learning_rate": 2.592452430420611e-07, "loss": 0.3121, "step": 29007 }, { "epoch": 2.7222222222222223, "grad_norm": 1.1378582831817452, "learning_rate": 2.5907175247161297e-07, "loss": 0.2937, "step": 29008 }, { "epoch": 2.722316066066066, "grad_norm": 2.432647522102214, "learning_rate": 2.588983184278115e-07, "loss": 0.2976, "step": 29009 }, { "epoch": 2.72240990990991, "grad_norm": 1.410803808051816, "learning_rate": 2.5872494091272295e-07, "loss": 0.2967, "step": 29010 }, { "epoch": 2.7225037537537538, "grad_norm": 1.3574129826170784, "learning_rate": 2.585516199284144e-07, "loss": 0.3087, "step": 29011 }, { "epoch": 2.7225975975975976, "grad_norm": 1.2614997614203687, "learning_rate": 2.583783554769537e-07, "loss": 0.293, "step": 29012 }, { "epoch": 2.7226914414414414, "grad_norm": 1.220521433216557, "learning_rate": 2.5820514756040526e-07, "loss": 0.3125, "step": 29013 }, { "epoch": 2.722785285285285, "grad_norm": 0.9736342531850141, "learning_rate": 2.5803199618083585e-07, "loss": 0.3118, "step": 29014 }, { "epoch": 2.722879129129129, "grad_norm": 3.1080057859767156, "learning_rate": 2.5785890134030877e-07, "loss": 0.2903, "step": 29015 }, { "epoch": 2.722972972972973, "grad_norm": 1.0426724576962778, "learning_rate": 2.576858630408879e-07, "loss": 0.3073, "step": 29016 }, { "epoch": 2.723066816816817, "grad_norm": 1.2453987436328098, "learning_rate": 2.575128812846356e-07, "loss": 0.3042, "step": 29017 }, { "epoch": 2.7231606606606604, "grad_norm": 1.4405949181420017, "learning_rate": 2.5733995607361674e-07, "loss": 0.319, "step": 29018 }, { "epoch": 2.7232545045045047, "grad_norm": 1.1945825085388646, "learning_rate": 2.571670874098914e-07, "loss": 0.2944, "step": 29019 }, { "epoch": 2.7233483483483485, "grad_norm": 1.116922455980751, "learning_rate": 2.5699427529552066e-07, "loss": 0.3256, "step": 29020 }, { "epoch": 2.7234421921921923, "grad_norm": 0.9963523650063121, "learning_rate": 2.5682151973256576e-07, "loss": 0.3021, "step": 29021 }, { "epoch": 2.723536036036036, "grad_norm": 0.9428017187689721, "learning_rate": 2.5664882072308664e-07, "loss": 0.2799, "step": 29022 }, { "epoch": 2.72362987987988, "grad_norm": 1.3617127175034287, "learning_rate": 2.564761782691405e-07, "loss": 0.2977, "step": 29023 }, { "epoch": 2.7237237237237237, "grad_norm": 4.547206199430773, "learning_rate": 2.5630359237278856e-07, "loss": 0.3013, "step": 29024 }, { "epoch": 2.7238175675675675, "grad_norm": 1.24101392625162, "learning_rate": 2.5613106303608693e-07, "loss": 0.2866, "step": 29025 }, { "epoch": 2.7239114114114114, "grad_norm": 1.07170086332298, "learning_rate": 2.559585902610928e-07, "loss": 0.3083, "step": 29026 }, { "epoch": 2.724005255255255, "grad_norm": 1.1832574207821458, "learning_rate": 2.557861740498624e-07, "loss": 0.2822, "step": 29027 }, { "epoch": 2.724099099099099, "grad_norm": 1.6783404199210201, "learning_rate": 2.556138144044523e-07, "loss": 0.3495, "step": 29028 }, { "epoch": 2.724192942942943, "grad_norm": 1.2795332272374607, "learning_rate": 2.5544151132691654e-07, "loss": 0.3628, "step": 29029 }, { "epoch": 2.724286786786787, "grad_norm": 1.2190236623883903, "learning_rate": 2.552692648193106e-07, "loss": 0.3191, "step": 29030 }, { "epoch": 2.7243806306306304, "grad_norm": 1.145554716851111, "learning_rate": 2.5509707488368727e-07, "loss": 0.299, "step": 29031 }, { "epoch": 2.7244744744744747, "grad_norm": 1.1663173629141126, "learning_rate": 2.5492494152209945e-07, "loss": 0.2879, "step": 29032 }, { "epoch": 2.7245683183183185, "grad_norm": 1.052696683564931, "learning_rate": 2.54752864736601e-07, "loss": 0.3502, "step": 29033 }, { "epoch": 2.7246621621621623, "grad_norm": 1.4813919591180422, "learning_rate": 2.545808445292425e-07, "loss": 0.282, "step": 29034 }, { "epoch": 2.724756006006006, "grad_norm": 1.1976966092029626, "learning_rate": 2.544088809020745e-07, "loss": 0.261, "step": 29035 }, { "epoch": 2.72484984984985, "grad_norm": 1.422773892658008, "learning_rate": 2.5423697385714817e-07, "loss": 0.3344, "step": 29036 }, { "epoch": 2.7249436936936937, "grad_norm": 1.4810346657175162, "learning_rate": 2.540651233965136e-07, "loss": 0.3226, "step": 29037 }, { "epoch": 2.7250375375375375, "grad_norm": 1.2321236763736125, "learning_rate": 2.538933295222185e-07, "loss": 0.293, "step": 29038 }, { "epoch": 2.7251313813813813, "grad_norm": 1.15678173973002, "learning_rate": 2.537215922363123e-07, "loss": 0.3098, "step": 29039 }, { "epoch": 2.725225225225225, "grad_norm": 1.4717038142808596, "learning_rate": 2.5354991154084185e-07, "loss": 0.3525, "step": 29040 }, { "epoch": 2.725319069069069, "grad_norm": 1.1299417417693707, "learning_rate": 2.5337828743785377e-07, "loss": 0.3068, "step": 29041 }, { "epoch": 2.7254129129129128, "grad_norm": 0.9820215659852739, "learning_rate": 2.532067199293958e-07, "loss": 0.2944, "step": 29042 }, { "epoch": 2.725506756756757, "grad_norm": 1.3422556084776267, "learning_rate": 2.5303520901751254e-07, "loss": 0.3284, "step": 29043 }, { "epoch": 2.7256006006006004, "grad_norm": 1.311465285015439, "learning_rate": 2.5286375470424894e-07, "loss": 0.2803, "step": 29044 }, { "epoch": 2.7256944444444446, "grad_norm": 1.3078046417936633, "learning_rate": 2.5269235699165005e-07, "loss": 0.2998, "step": 29045 }, { "epoch": 2.7257882882882885, "grad_norm": 1.0171726082090295, "learning_rate": 2.525210158817587e-07, "loss": 0.2926, "step": 29046 }, { "epoch": 2.7258821321321323, "grad_norm": 1.2669735642719497, "learning_rate": 2.5234973137661767e-07, "loss": 0.3019, "step": 29047 }, { "epoch": 2.725975975975976, "grad_norm": 1.6534125641978263, "learning_rate": 2.521785034782698e-07, "loss": 0.3182, "step": 29048 }, { "epoch": 2.72606981981982, "grad_norm": 1.1278637022860927, "learning_rate": 2.5200733218875674e-07, "loss": 0.2818, "step": 29049 }, { "epoch": 2.7261636636636637, "grad_norm": 1.3563859111892056, "learning_rate": 2.5183621751011913e-07, "loss": 0.3327, "step": 29050 }, { "epoch": 2.7262575075075075, "grad_norm": 1.079465768124115, "learning_rate": 2.5166515944439694e-07, "loss": 0.2858, "step": 29051 }, { "epoch": 2.7263513513513513, "grad_norm": 1.1695987015745073, "learning_rate": 2.5149415799362976e-07, "loss": 0.3111, "step": 29052 }, { "epoch": 2.726445195195195, "grad_norm": 1.1782725509127594, "learning_rate": 2.513232131598559e-07, "loss": 0.3004, "step": 29053 }, { "epoch": 2.726539039039039, "grad_norm": 1.2379960675911723, "learning_rate": 2.511523249451153e-07, "loss": 0.3436, "step": 29054 }, { "epoch": 2.7266328828828827, "grad_norm": 1.1635146853431442, "learning_rate": 2.509814933514443e-07, "loss": 0.2941, "step": 29055 }, { "epoch": 2.726726726726727, "grad_norm": 1.2283692369585861, "learning_rate": 2.508107183808789e-07, "loss": 0.301, "step": 29056 }, { "epoch": 2.7268205705705704, "grad_norm": 1.2902785654517686, "learning_rate": 2.506400000354575e-07, "loss": 0.3178, "step": 29057 }, { "epoch": 2.7269144144144146, "grad_norm": 1.105408925429165, "learning_rate": 2.504693383172141e-07, "loss": 0.2878, "step": 29058 }, { "epoch": 2.727008258258258, "grad_norm": 1.343585809627602, "learning_rate": 2.502987332281831e-07, "loss": 0.3099, "step": 29059 }, { "epoch": 2.7271021021021022, "grad_norm": 1.4545019907998227, "learning_rate": 2.5012818477039955e-07, "loss": 0.3451, "step": 29060 }, { "epoch": 2.727195945945946, "grad_norm": 1.0924074267362962, "learning_rate": 2.4995769294589746e-07, "loss": 0.2983, "step": 29061 }, { "epoch": 2.72728978978979, "grad_norm": 1.27782853124386, "learning_rate": 2.4978725775670844e-07, "loss": 0.323, "step": 29062 }, { "epoch": 2.7273836336336337, "grad_norm": 1.1505449175333304, "learning_rate": 2.4961687920486475e-07, "loss": 0.2529, "step": 29063 }, { "epoch": 2.7274774774774775, "grad_norm": 1.1481924497347067, "learning_rate": 2.4944655729239874e-07, "loss": 0.2979, "step": 29064 }, { "epoch": 2.7275713213213213, "grad_norm": 1.1188912654318073, "learning_rate": 2.492762920213393e-07, "loss": 0.3403, "step": 29065 }, { "epoch": 2.727665165165165, "grad_norm": 1.8794334993415724, "learning_rate": 2.4910608339371867e-07, "loss": 0.3188, "step": 29066 }, { "epoch": 2.727759009009009, "grad_norm": 1.0598930444689336, "learning_rate": 2.489359314115652e-07, "loss": 0.2835, "step": 29067 }, { "epoch": 2.7278528528528527, "grad_norm": 0.9746764780166358, "learning_rate": 2.487658360769074e-07, "loss": 0.3458, "step": 29068 }, { "epoch": 2.7279466966966965, "grad_norm": 1.364280598434089, "learning_rate": 2.4859579739177406e-07, "loss": 0.3316, "step": 29069 }, { "epoch": 2.7280405405405403, "grad_norm": 1.2187427739504724, "learning_rate": 2.484258153581925e-07, "loss": 0.3357, "step": 29070 }, { "epoch": 2.7281343843843846, "grad_norm": 1.1052910545457026, "learning_rate": 2.4825588997818837e-07, "loss": 0.2957, "step": 29071 }, { "epoch": 2.728228228228228, "grad_norm": 1.2603072810941724, "learning_rate": 2.4808602125378945e-07, "loss": 0.3094, "step": 29072 }, { "epoch": 2.7283220720720722, "grad_norm": 1.324387970355655, "learning_rate": 2.4791620918701966e-07, "loss": 0.3071, "step": 29073 }, { "epoch": 2.728415915915916, "grad_norm": 1.2467213130112078, "learning_rate": 2.477464537799046e-07, "loss": 0.3206, "step": 29074 }, { "epoch": 2.72850975975976, "grad_norm": 1.2880724858274653, "learning_rate": 2.4757675503446766e-07, "loss": 0.3627, "step": 29075 }, { "epoch": 2.7286036036036037, "grad_norm": 1.3773704540101197, "learning_rate": 2.474071129527328e-07, "loss": 0.3153, "step": 29076 }, { "epoch": 2.7286974474474475, "grad_norm": 1.1149266174725858, "learning_rate": 2.4723752753672117e-07, "loss": 0.3532, "step": 29077 }, { "epoch": 2.7287912912912913, "grad_norm": 1.298010291260985, "learning_rate": 2.4706799878845664e-07, "loss": 0.308, "step": 29078 }, { "epoch": 2.728885135135135, "grad_norm": 1.3208261311702596, "learning_rate": 2.4689852670995996e-07, "loss": 0.2972, "step": 29079 }, { "epoch": 2.728978978978979, "grad_norm": 1.2754211712720667, "learning_rate": 2.467291113032505e-07, "loss": 0.2593, "step": 29080 }, { "epoch": 2.7290728228228227, "grad_norm": 1.0219631556487614, "learning_rate": 2.4655975257035e-07, "loss": 0.3177, "step": 29081 }, { "epoch": 2.7291666666666665, "grad_norm": 1.2052431405205604, "learning_rate": 2.4639045051327747e-07, "loss": 0.3665, "step": 29082 }, { "epoch": 2.7292605105105103, "grad_norm": 1.1157422351190078, "learning_rate": 2.4622120513405013e-07, "loss": 0.36, "step": 29083 }, { "epoch": 2.7293543543543546, "grad_norm": 1.1769581820255002, "learning_rate": 2.460520164346875e-07, "loss": 0.3598, "step": 29084 }, { "epoch": 2.729448198198198, "grad_norm": 1.267844439186522, "learning_rate": 2.458828844172062e-07, "loss": 0.3149, "step": 29085 }, { "epoch": 2.729542042042042, "grad_norm": 1.8980315903082643, "learning_rate": 2.4571380908362253e-07, "loss": 0.3062, "step": 29086 }, { "epoch": 2.729635885885886, "grad_norm": 1.1533967926857445, "learning_rate": 2.4554479043595317e-07, "loss": 0.2887, "step": 29087 }, { "epoch": 2.72972972972973, "grad_norm": 1.216131924838236, "learning_rate": 2.4537582847621256e-07, "loss": 0.309, "step": 29088 }, { "epoch": 2.7298235735735736, "grad_norm": 0.972595412668852, "learning_rate": 2.452069232064147e-07, "loss": 0.2915, "step": 29089 }, { "epoch": 2.7299174174174174, "grad_norm": 1.3555875776749347, "learning_rate": 2.450380746285752e-07, "loss": 0.2762, "step": 29090 }, { "epoch": 2.7300112612612613, "grad_norm": 1.3003794030284395, "learning_rate": 2.448692827447063e-07, "loss": 0.3328, "step": 29091 }, { "epoch": 2.730105105105105, "grad_norm": 1.0127113975862165, "learning_rate": 2.447005475568204e-07, "loss": 0.3166, "step": 29092 }, { "epoch": 2.730198948948949, "grad_norm": 1.2153906884839993, "learning_rate": 2.445318690669296e-07, "loss": 0.3406, "step": 29093 }, { "epoch": 2.7302927927927927, "grad_norm": 1.1621444709577622, "learning_rate": 2.4436324727704465e-07, "loss": 0.3519, "step": 29094 }, { "epoch": 2.7303866366366365, "grad_norm": 1.1810036386890386, "learning_rate": 2.441946821891766e-07, "loss": 0.3477, "step": 29095 }, { "epoch": 2.7304804804804803, "grad_norm": 1.0165801668009946, "learning_rate": 2.4402617380533567e-07, "loss": 0.2849, "step": 29096 }, { "epoch": 2.7305743243243246, "grad_norm": 1.1911753641384424, "learning_rate": 2.4385772212753014e-07, "loss": 0.3136, "step": 29097 }, { "epoch": 2.730668168168168, "grad_norm": 1.1673261295444053, "learning_rate": 2.4368932715776897e-07, "loss": 0.3076, "step": 29098 }, { "epoch": 2.730762012012012, "grad_norm": 1.092861126211265, "learning_rate": 2.435209888980594e-07, "loss": 0.3253, "step": 29099 }, { "epoch": 2.730855855855856, "grad_norm": 1.231679438170083, "learning_rate": 2.4335270735040826e-07, "loss": 0.2949, "step": 29100 }, { "epoch": 2.7309496996997, "grad_norm": 1.0516824551939141, "learning_rate": 2.4318448251682334e-07, "loss": 0.3609, "step": 29101 }, { "epoch": 2.7310435435435436, "grad_norm": 1.3535351026936404, "learning_rate": 2.4301631439930974e-07, "loss": 0.2865, "step": 29102 }, { "epoch": 2.7311373873873874, "grad_norm": 1.052385004063358, "learning_rate": 2.4284820299987186e-07, "loss": 0.3022, "step": 29103 }, { "epoch": 2.7312312312312312, "grad_norm": 1.1281226926236103, "learning_rate": 2.426801483205155e-07, "loss": 0.2876, "step": 29104 }, { "epoch": 2.731325075075075, "grad_norm": 1.0097546060066822, "learning_rate": 2.425121503632433e-07, "loss": 0.3262, "step": 29105 }, { "epoch": 2.731418918918919, "grad_norm": 1.1679760607616108, "learning_rate": 2.423442091300582e-07, "loss": 0.3071, "step": 29106 }, { "epoch": 2.7315127627627627, "grad_norm": 0.9967931319232568, "learning_rate": 2.4217632462296417e-07, "loss": 0.2805, "step": 29107 }, { "epoch": 2.7316066066066065, "grad_norm": 1.102298920801871, "learning_rate": 2.420084968439612e-07, "loss": 0.3547, "step": 29108 }, { "epoch": 2.7317004504504503, "grad_norm": 1.0213265294560994, "learning_rate": 2.4184072579505004e-07, "loss": 0.2815, "step": 29109 }, { "epoch": 2.7317942942942945, "grad_norm": 1.0016604675173828, "learning_rate": 2.41673011478234e-07, "loss": 0.3471, "step": 29110 }, { "epoch": 2.731888138138138, "grad_norm": 1.101577583274466, "learning_rate": 2.415053538955092e-07, "loss": 0.3051, "step": 29111 }, { "epoch": 2.731981981981982, "grad_norm": 1.153287481076474, "learning_rate": 2.4133775304887583e-07, "loss": 0.312, "step": 29112 }, { "epoch": 2.732075825825826, "grad_norm": 1.0348449865496416, "learning_rate": 2.411702089403334e-07, "loss": 0.3151, "step": 29113 }, { "epoch": 2.73216966966967, "grad_norm": 1.2456655022267702, "learning_rate": 2.4100272157187797e-07, "loss": 0.2902, "step": 29114 }, { "epoch": 2.7322635135135136, "grad_norm": 1.0654347157571389, "learning_rate": 2.408352909455069e-07, "loss": 0.3379, "step": 29115 }, { "epoch": 2.7323573573573574, "grad_norm": 1.110278458066382, "learning_rate": 2.406679170632176e-07, "loss": 0.3257, "step": 29116 }, { "epoch": 2.732451201201201, "grad_norm": 1.0055344996585742, "learning_rate": 2.405005999270049e-07, "loss": 0.3063, "step": 29117 }, { "epoch": 2.732545045045045, "grad_norm": 1.2111990951026068, "learning_rate": 2.4033333953886296e-07, "loss": 0.2993, "step": 29118 }, { "epoch": 2.732638888888889, "grad_norm": 1.308266250616899, "learning_rate": 2.401661359007873e-07, "loss": 0.3042, "step": 29119 }, { "epoch": 2.7327327327327327, "grad_norm": 1.2590835058038756, "learning_rate": 2.3999898901477137e-07, "loss": 0.3559, "step": 29120 }, { "epoch": 2.7328265765765765, "grad_norm": 1.0769918939175374, "learning_rate": 2.398318988828069e-07, "loss": 0.3354, "step": 29121 }, { "epoch": 2.7329204204204203, "grad_norm": 1.104329441588707, "learning_rate": 2.3966486550688726e-07, "loss": 0.3486, "step": 29122 }, { "epoch": 2.7330142642642645, "grad_norm": 1.0583158339708614, "learning_rate": 2.394978888890037e-07, "loss": 0.3219, "step": 29123 }, { "epoch": 2.733108108108108, "grad_norm": 1.1759594014469585, "learning_rate": 2.393309690311474e-07, "loss": 0.3165, "step": 29124 }, { "epoch": 2.733201951951952, "grad_norm": 1.3734811073871578, "learning_rate": 2.391641059353084e-07, "loss": 0.3094, "step": 29125 }, { "epoch": 2.733295795795796, "grad_norm": 1.0296788250169702, "learning_rate": 2.389972996034756e-07, "loss": 0.2874, "step": 29126 }, { "epoch": 2.7333896396396398, "grad_norm": 1.159786801455358, "learning_rate": 2.3883055003763813e-07, "loss": 0.3562, "step": 29127 }, { "epoch": 2.7334834834834836, "grad_norm": 1.1269820519020581, "learning_rate": 2.386638572397848e-07, "loss": 0.3156, "step": 29128 }, { "epoch": 2.7335773273273274, "grad_norm": 1.140167463886259, "learning_rate": 2.38497221211903e-07, "loss": 0.307, "step": 29129 }, { "epoch": 2.733671171171171, "grad_norm": 1.1089348787777216, "learning_rate": 2.3833064195597834e-07, "loss": 0.282, "step": 29130 }, { "epoch": 2.733765015015015, "grad_norm": 1.21573650290865, "learning_rate": 2.381641194739981e-07, "loss": 0.3044, "step": 29131 }, { "epoch": 2.733858858858859, "grad_norm": 1.050900335950213, "learning_rate": 2.379976537679479e-07, "loss": 0.334, "step": 29132 }, { "epoch": 2.7339527027027026, "grad_norm": 1.0187686051272329, "learning_rate": 2.3783124483981124e-07, "loss": 0.3074, "step": 29133 }, { "epoch": 2.7340465465465464, "grad_norm": 0.9309646423640511, "learning_rate": 2.3766489269157422e-07, "loss": 0.3302, "step": 29134 }, { "epoch": 2.7341403903903903, "grad_norm": 1.5024051435173127, "learning_rate": 2.374985973252192e-07, "loss": 0.3006, "step": 29135 }, { "epoch": 2.7342342342342345, "grad_norm": 1.0466711880941073, "learning_rate": 2.373323587427284e-07, "loss": 0.311, "step": 29136 }, { "epoch": 2.734328078078078, "grad_norm": 1.2194424012795004, "learning_rate": 2.371661769460848e-07, "loss": 0.3512, "step": 29137 }, { "epoch": 2.734421921921922, "grad_norm": 1.1002743365509366, "learning_rate": 2.3700005193726895e-07, "loss": 0.3289, "step": 29138 }, { "epoch": 2.7345157657657655, "grad_norm": 1.6781920330849904, "learning_rate": 2.3683398371826205e-07, "loss": 0.2814, "step": 29139 }, { "epoch": 2.7346096096096097, "grad_norm": 1.6046147965232982, "learning_rate": 2.3666797229104422e-07, "loss": 0.3172, "step": 29140 }, { "epoch": 2.7347034534534536, "grad_norm": 0.9621023015823957, "learning_rate": 2.365020176575955e-07, "loss": 0.3019, "step": 29141 }, { "epoch": 2.7347972972972974, "grad_norm": 1.1239557690523614, "learning_rate": 2.3633611981989268e-07, "loss": 0.3084, "step": 29142 }, { "epoch": 2.734891141141141, "grad_norm": 1.3829843366321852, "learning_rate": 2.3617027877991584e-07, "loss": 0.3149, "step": 29143 }, { "epoch": 2.734984984984985, "grad_norm": 1.0866391477417, "learning_rate": 2.3600449453964114e-07, "loss": 0.2985, "step": 29144 }, { "epoch": 2.735078828828829, "grad_norm": 1.1507942863474054, "learning_rate": 2.358387671010448e-07, "loss": 0.314, "step": 29145 }, { "epoch": 2.7351726726726726, "grad_norm": 1.0372665951095346, "learning_rate": 2.356730964661047e-07, "loss": 0.2849, "step": 29146 }, { "epoch": 2.7352665165165164, "grad_norm": 1.1021644176332683, "learning_rate": 2.3550748263679478e-07, "loss": 0.3324, "step": 29147 }, { "epoch": 2.7353603603603602, "grad_norm": 1.2640582257821589, "learning_rate": 2.353419256150896e-07, "loss": 0.3003, "step": 29148 }, { "epoch": 2.735454204204204, "grad_norm": 1.2884503323099066, "learning_rate": 2.3517642540296372e-07, "loss": 0.305, "step": 29149 }, { "epoch": 2.735548048048048, "grad_norm": 1.1126643692329405, "learning_rate": 2.3501098200238993e-07, "loss": 0.2858, "step": 29150 }, { "epoch": 2.735641891891892, "grad_norm": 1.4397644955846576, "learning_rate": 2.3484559541534056e-07, "loss": 0.3159, "step": 29151 }, { "epoch": 2.7357357357357355, "grad_norm": 1.112501884200802, "learning_rate": 2.3468026564378855e-07, "loss": 0.3447, "step": 29152 }, { "epoch": 2.7358295795795797, "grad_norm": 1.105497733678152, "learning_rate": 2.3451499268970502e-07, "loss": 0.2859, "step": 29153 }, { "epoch": 2.7359234234234235, "grad_norm": 1.2014512352892872, "learning_rate": 2.3434977655505898e-07, "loss": 0.3038, "step": 29154 }, { "epoch": 2.7360172672672673, "grad_norm": 1.1733205548146415, "learning_rate": 2.3418461724182218e-07, "loss": 0.3358, "step": 29155 }, { "epoch": 2.736111111111111, "grad_norm": 1.2991708848776908, "learning_rate": 2.3401951475196304e-07, "loss": 0.3527, "step": 29156 }, { "epoch": 2.736204954954955, "grad_norm": 1.0160316430149572, "learning_rate": 2.3385446908744946e-07, "loss": 0.3398, "step": 29157 }, { "epoch": 2.736298798798799, "grad_norm": 1.11434901782263, "learning_rate": 2.336894802502504e-07, "loss": 0.2704, "step": 29158 }, { "epoch": 2.7363926426426426, "grad_norm": 0.918282046445407, "learning_rate": 2.3352454824233316e-07, "loss": 0.3064, "step": 29159 }, { "epoch": 2.7364864864864864, "grad_norm": 1.1424713337335, "learning_rate": 2.3335967306566342e-07, "loss": 0.3199, "step": 29160 }, { "epoch": 2.73658033033033, "grad_norm": 1.9657992615947322, "learning_rate": 2.331948547222074e-07, "loss": 0.3602, "step": 29161 }, { "epoch": 2.736674174174174, "grad_norm": 1.5542730005375391, "learning_rate": 2.3303009321393012e-07, "loss": 0.3007, "step": 29162 }, { "epoch": 2.736768018018018, "grad_norm": 1.218797338001589, "learning_rate": 2.3286538854279506e-07, "loss": 0.2988, "step": 29163 }, { "epoch": 2.736861861861862, "grad_norm": 1.1933749167230656, "learning_rate": 2.3270074071076842e-07, "loss": 0.3081, "step": 29164 }, { "epoch": 2.7369557057057055, "grad_norm": 1.099580046722955, "learning_rate": 2.3253614971981143e-07, "loss": 0.326, "step": 29165 }, { "epoch": 2.7370495495495497, "grad_norm": 0.933286466507943, "learning_rate": 2.3237161557188636e-07, "loss": 0.3026, "step": 29166 }, { "epoch": 2.7371433933933935, "grad_norm": 1.0483046523279922, "learning_rate": 2.3220713826895668e-07, "loss": 0.352, "step": 29167 }, { "epoch": 2.7372372372372373, "grad_norm": 1.083094449971172, "learning_rate": 2.3204271781298192e-07, "loss": 0.3591, "step": 29168 }, { "epoch": 2.737331081081081, "grad_norm": 1.1453435608667277, "learning_rate": 2.3187835420592276e-07, "loss": 0.3308, "step": 29169 }, { "epoch": 2.737424924924925, "grad_norm": 1.1706371604368357, "learning_rate": 2.317140474497398e-07, "loss": 0.3421, "step": 29170 }, { "epoch": 2.7375187687687688, "grad_norm": 1.0878807775852637, "learning_rate": 2.3154979754639095e-07, "loss": 0.3048, "step": 29171 }, { "epoch": 2.7376126126126126, "grad_norm": 1.7015803865652417, "learning_rate": 2.3138560449783575e-07, "loss": 0.2897, "step": 29172 }, { "epoch": 2.7377064564564564, "grad_norm": 1.1134375787608828, "learning_rate": 2.3122146830603042e-07, "loss": 0.3041, "step": 29173 }, { "epoch": 2.7378003003003, "grad_norm": 1.082389248409805, "learning_rate": 2.310573889729334e-07, "loss": 0.298, "step": 29174 }, { "epoch": 2.737894144144144, "grad_norm": 1.1071206605226274, "learning_rate": 2.3089336650049976e-07, "loss": 0.2999, "step": 29175 }, { "epoch": 2.737987987987988, "grad_norm": 1.20159214516792, "learning_rate": 2.307294008906863e-07, "loss": 0.286, "step": 29176 }, { "epoch": 2.738081831831832, "grad_norm": 1.2704175079929918, "learning_rate": 2.3056549214544753e-07, "loss": 0.3067, "step": 29177 }, { "epoch": 2.7381756756756754, "grad_norm": 1.1246278285133557, "learning_rate": 2.3040164026673694e-07, "loss": 0.305, "step": 29178 }, { "epoch": 2.7382695195195197, "grad_norm": 1.2690051057266822, "learning_rate": 2.302378452565096e-07, "loss": 0.306, "step": 29179 }, { "epoch": 2.7383633633633635, "grad_norm": 1.1789903104894965, "learning_rate": 2.3007410711671785e-07, "loss": 0.3252, "step": 29180 }, { "epoch": 2.7384572072072073, "grad_norm": 1.107950100038577, "learning_rate": 2.2991042584931346e-07, "loss": 0.3197, "step": 29181 }, { "epoch": 2.738551051051051, "grad_norm": 1.2587953822749918, "learning_rate": 2.2974680145624929e-07, "loss": 0.3169, "step": 29182 }, { "epoch": 2.738644894894895, "grad_norm": 1.1435378621519439, "learning_rate": 2.295832339394749e-07, "loss": 0.297, "step": 29183 }, { "epoch": 2.7387387387387387, "grad_norm": 1.4278924970846996, "learning_rate": 2.2941972330094154e-07, "loss": 0.3029, "step": 29184 }, { "epoch": 2.7388325825825826, "grad_norm": 1.2986301324522591, "learning_rate": 2.292562695425976e-07, "loss": 0.3203, "step": 29185 }, { "epoch": 2.7389264264264264, "grad_norm": 1.0589224958152672, "learning_rate": 2.2909287266639323e-07, "loss": 0.3053, "step": 29186 }, { "epoch": 2.73902027027027, "grad_norm": 1.133121742226231, "learning_rate": 2.289295326742752e-07, "loss": 0.3, "step": 29187 }, { "epoch": 2.739114114114114, "grad_norm": 0.9505506695180993, "learning_rate": 2.2876624956819248e-07, "loss": 0.3269, "step": 29188 }, { "epoch": 2.739207957957958, "grad_norm": 1.1787420397732493, "learning_rate": 2.2860302335009188e-07, "loss": 0.2865, "step": 29189 }, { "epoch": 2.739301801801802, "grad_norm": 1.0446531238585295, "learning_rate": 2.2843985402191737e-07, "loss": 0.257, "step": 29190 }, { "epoch": 2.7393956456456454, "grad_norm": 1.2186721275987595, "learning_rate": 2.2827674158561742e-07, "loss": 0.2775, "step": 29191 }, { "epoch": 2.7394894894894897, "grad_norm": 1.0218709158622383, "learning_rate": 2.2811368604313544e-07, "loss": 0.3848, "step": 29192 }, { "epoch": 2.7395833333333335, "grad_norm": 1.6356055680192785, "learning_rate": 2.2795068739641436e-07, "loss": 0.3197, "step": 29193 }, { "epoch": 2.7396771771771773, "grad_norm": 1.0719455781837244, "learning_rate": 2.2778774564740036e-07, "loss": 0.3191, "step": 29194 }, { "epoch": 2.739771021021021, "grad_norm": 1.063042596262836, "learning_rate": 2.2762486079803415e-07, "loss": 0.3261, "step": 29195 }, { "epoch": 2.739864864864865, "grad_norm": 1.0434580081893554, "learning_rate": 2.274620328502586e-07, "loss": 0.3224, "step": 29196 }, { "epoch": 2.7399587087087087, "grad_norm": 1.1481967392128127, "learning_rate": 2.2729926180601492e-07, "loss": 0.3047, "step": 29197 }, { "epoch": 2.7400525525525525, "grad_norm": 1.0825611317986468, "learning_rate": 2.271365476672438e-07, "loss": 0.3035, "step": 29198 }, { "epoch": 2.7401463963963963, "grad_norm": 1.2312674278472484, "learning_rate": 2.269738904358848e-07, "loss": 0.2979, "step": 29199 }, { "epoch": 2.74024024024024, "grad_norm": 1.14261311363387, "learning_rate": 2.268112901138786e-07, "loss": 0.2824, "step": 29200 }, { "epoch": 2.740334084084084, "grad_norm": 1.025113721353821, "learning_rate": 2.266487467031625e-07, "loss": 0.3186, "step": 29201 }, { "epoch": 2.7404279279279278, "grad_norm": 1.1760184396091506, "learning_rate": 2.2648626020567556e-07, "loss": 0.3101, "step": 29202 }, { "epoch": 2.740521771771772, "grad_norm": 1.1073350532888377, "learning_rate": 2.263238306233545e-07, "loss": 0.2689, "step": 29203 }, { "epoch": 2.7406156156156154, "grad_norm": 1.2888528708526645, "learning_rate": 2.261614579581367e-07, "loss": 0.2994, "step": 29204 }, { "epoch": 2.7407094594594597, "grad_norm": 1.138162637579082, "learning_rate": 2.2599914221195673e-07, "loss": 0.3423, "step": 29205 }, { "epoch": 2.7408033033033035, "grad_norm": 1.0816694125802984, "learning_rate": 2.2583688338675192e-07, "loss": 0.3054, "step": 29206 }, { "epoch": 2.7408971471471473, "grad_norm": 3.058508761002793, "learning_rate": 2.256746814844557e-07, "loss": 0.3184, "step": 29207 }, { "epoch": 2.740990990990991, "grad_norm": 1.3142173204387355, "learning_rate": 2.255125365070021e-07, "loss": 0.2967, "step": 29208 }, { "epoch": 2.741084834834835, "grad_norm": 1.107635273775765, "learning_rate": 2.253504484563246e-07, "loss": 0.3134, "step": 29209 }, { "epoch": 2.7411786786786787, "grad_norm": 1.3563115619777768, "learning_rate": 2.2518841733435548e-07, "loss": 0.2654, "step": 29210 }, { "epoch": 2.7412725225225225, "grad_norm": 0.9755772622206983, "learning_rate": 2.2502644314302602e-07, "loss": 0.3309, "step": 29211 }, { "epoch": 2.7413663663663663, "grad_norm": 1.000060643225037, "learning_rate": 2.248645258842691e-07, "loss": 0.3026, "step": 29212 }, { "epoch": 2.74146021021021, "grad_norm": 1.2761715684753878, "learning_rate": 2.2470266556001485e-07, "loss": 0.3061, "step": 29213 }, { "epoch": 2.741554054054054, "grad_norm": 2.61418882378719, "learning_rate": 2.2454086217219118e-07, "loss": 0.3081, "step": 29214 }, { "epoch": 2.7416478978978978, "grad_norm": 1.0202260873285762, "learning_rate": 2.243791157227304e-07, "loss": 0.3484, "step": 29215 }, { "epoch": 2.741741741741742, "grad_norm": 1.0570594218386589, "learning_rate": 2.242174262135588e-07, "loss": 0.3105, "step": 29216 }, { "epoch": 2.7418355855855854, "grad_norm": 1.0945426506193625, "learning_rate": 2.240557936466048e-07, "loss": 0.2986, "step": 29217 }, { "epoch": 2.7419294294294296, "grad_norm": 1.0913431646863836, "learning_rate": 2.2389421802379575e-07, "loss": 0.3156, "step": 29218 }, { "epoch": 2.742023273273273, "grad_norm": 1.0758555239242384, "learning_rate": 2.2373269934705844e-07, "loss": 0.3257, "step": 29219 }, { "epoch": 2.7421171171171173, "grad_norm": 1.0490115970860954, "learning_rate": 2.2357123761831856e-07, "loss": 0.2833, "step": 29220 }, { "epoch": 2.742210960960961, "grad_norm": 0.980559296268486, "learning_rate": 2.2340983283950014e-07, "loss": 0.332, "step": 29221 }, { "epoch": 2.742304804804805, "grad_norm": 1.132682760813659, "learning_rate": 2.232484850125294e-07, "loss": 0.3207, "step": 29222 }, { "epoch": 2.7423986486486487, "grad_norm": 1.3017664579471568, "learning_rate": 2.230871941393281e-07, "loss": 0.2949, "step": 29223 }, { "epoch": 2.7424924924924925, "grad_norm": 1.1105494243883967, "learning_rate": 2.229259602218209e-07, "loss": 0.3042, "step": 29224 }, { "epoch": 2.7425863363363363, "grad_norm": 1.3537661624966406, "learning_rate": 2.227647832619295e-07, "loss": 0.3177, "step": 29225 }, { "epoch": 2.74268018018018, "grad_norm": 1.0421272463791713, "learning_rate": 2.2260366326157579e-07, "loss": 0.2963, "step": 29226 }, { "epoch": 2.742774024024024, "grad_norm": 1.1730799762840793, "learning_rate": 2.2244260022268095e-07, "loss": 0.3077, "step": 29227 }, { "epoch": 2.7428678678678677, "grad_norm": 1.0532536191628876, "learning_rate": 2.2228159414716566e-07, "loss": 0.3328, "step": 29228 }, { "epoch": 2.7429617117117115, "grad_norm": 1.2267048526597537, "learning_rate": 2.2212064503694841e-07, "loss": 0.3222, "step": 29229 }, { "epoch": 2.7430555555555554, "grad_norm": 1.213613927465715, "learning_rate": 2.2195975289394987e-07, "loss": 0.3224, "step": 29230 }, { "epoch": 2.7431493993993996, "grad_norm": 1.3241398962643214, "learning_rate": 2.2179891772008743e-07, "loss": 0.3656, "step": 29231 }, { "epoch": 2.743243243243243, "grad_norm": 1.1062613326753736, "learning_rate": 2.2163813951727896e-07, "loss": 0.2973, "step": 29232 }, { "epoch": 2.7433370870870872, "grad_norm": 1.1070402956423449, "learning_rate": 2.214774182874413e-07, "loss": 0.2705, "step": 29233 }, { "epoch": 2.743430930930931, "grad_norm": 1.157437178167197, "learning_rate": 2.2131675403249065e-07, "loss": 0.2903, "step": 29234 }, { "epoch": 2.743524774774775, "grad_norm": 1.1756478643060086, "learning_rate": 2.2115614675434216e-07, "loss": 0.2338, "step": 29235 }, { "epoch": 2.7436186186186187, "grad_norm": 1.1436584804502055, "learning_rate": 2.2099559645491209e-07, "loss": 0.3055, "step": 29236 }, { "epoch": 2.7437124624624625, "grad_norm": 1.1173257410286737, "learning_rate": 2.208351031361139e-07, "loss": 0.2905, "step": 29237 }, { "epoch": 2.7438063063063063, "grad_norm": 1.0473246757009147, "learning_rate": 2.2067466679986048e-07, "loss": 0.3437, "step": 29238 }, { "epoch": 2.74390015015015, "grad_norm": 1.2850580155929139, "learning_rate": 2.2051428744806647e-07, "loss": 0.2974, "step": 29239 }, { "epoch": 2.743993993993994, "grad_norm": 1.0501145340531097, "learning_rate": 2.2035396508264307e-07, "loss": 0.3311, "step": 29240 }, { "epoch": 2.7440878378378377, "grad_norm": 1.0106863831039514, "learning_rate": 2.2019369970550098e-07, "loss": 0.3097, "step": 29241 }, { "epoch": 2.7441816816816815, "grad_norm": 1.2768387520988382, "learning_rate": 2.2003349131855255e-07, "loss": 0.2949, "step": 29242 }, { "epoch": 2.7442755255255253, "grad_norm": 1.3170051891729875, "learning_rate": 2.1987333992370741e-07, "loss": 0.3073, "step": 29243 }, { "epoch": 2.7443693693693696, "grad_norm": 1.2000237378276815, "learning_rate": 2.1971324552287454e-07, "loss": 0.2506, "step": 29244 }, { "epoch": 2.744463213213213, "grad_norm": 1.274170636942031, "learning_rate": 2.195532081179641e-07, "loss": 0.2797, "step": 29245 }, { "epoch": 2.744557057057057, "grad_norm": 1.1597762195504464, "learning_rate": 2.1939322771088345e-07, "loss": 0.2932, "step": 29246 }, { "epoch": 2.744650900900901, "grad_norm": 1.1219263356244151, "learning_rate": 2.192333043035394e-07, "loss": 0.3414, "step": 29247 }, { "epoch": 2.744744744744745, "grad_norm": 1.0748058998164758, "learning_rate": 2.1907343789783985e-07, "loss": 0.3312, "step": 29248 }, { "epoch": 2.7448385885885886, "grad_norm": 1.0921179554538651, "learning_rate": 2.1891362849568998e-07, "loss": 0.3022, "step": 29249 }, { "epoch": 2.7449324324324325, "grad_norm": 1.173809204161357, "learning_rate": 2.1875387609899544e-07, "loss": 0.3299, "step": 29250 }, { "epoch": 2.7450262762762763, "grad_norm": 2.02923787560437, "learning_rate": 2.1859418070966143e-07, "loss": 0.3087, "step": 29251 }, { "epoch": 2.74512012012012, "grad_norm": 0.9893187781954089, "learning_rate": 2.1843454232959194e-07, "loss": 0.3389, "step": 29252 }, { "epoch": 2.745213963963964, "grad_norm": 1.1513249195445747, "learning_rate": 2.1827496096068934e-07, "loss": 0.3285, "step": 29253 }, { "epoch": 2.7453078078078077, "grad_norm": 1.2044734228604062, "learning_rate": 2.181154366048577e-07, "loss": 0.3176, "step": 29254 }, { "epoch": 2.7454016516516515, "grad_norm": 1.2506101596541717, "learning_rate": 2.1795596926399875e-07, "loss": 0.3125, "step": 29255 }, { "epoch": 2.7454954954954953, "grad_norm": 1.1364998146129381, "learning_rate": 2.1779655894001328e-07, "loss": 0.2825, "step": 29256 }, { "epoch": 2.7455893393393396, "grad_norm": 0.9858244839329603, "learning_rate": 2.176372056348025e-07, "loss": 0.2851, "step": 29257 }, { "epoch": 2.745683183183183, "grad_norm": 1.1002630479079392, "learning_rate": 2.1747790935026602e-07, "loss": 0.2951, "step": 29258 }, { "epoch": 2.745777027027027, "grad_norm": 1.1030034054724676, "learning_rate": 2.1731867008830343e-07, "loss": 0.3512, "step": 29259 }, { "epoch": 2.745870870870871, "grad_norm": 0.9690312089611188, "learning_rate": 2.171594878508132e-07, "loss": 0.2743, "step": 29260 }, { "epoch": 2.745964714714715, "grad_norm": 1.2663841550134187, "learning_rate": 2.1700036263969383e-07, "loss": 0.3144, "step": 29261 }, { "epoch": 2.7460585585585586, "grad_norm": 1.0763564917401158, "learning_rate": 2.1684129445684098e-07, "loss": 0.3145, "step": 29262 }, { "epoch": 2.7461524024024024, "grad_norm": 1.617703648203295, "learning_rate": 2.1668228330415265e-07, "loss": 0.3309, "step": 29263 }, { "epoch": 2.7462462462462462, "grad_norm": 0.965018866477953, "learning_rate": 2.1652332918352503e-07, "loss": 0.2907, "step": 29264 }, { "epoch": 2.74634009009009, "grad_norm": 1.2254137229043645, "learning_rate": 2.1636443209685165e-07, "loss": 0.2913, "step": 29265 }, { "epoch": 2.746433933933934, "grad_norm": 1.14530933137318, "learning_rate": 2.1620559204602875e-07, "loss": 0.2932, "step": 29266 }, { "epoch": 2.7465277777777777, "grad_norm": 1.4876354444534097, "learning_rate": 2.1604680903294984e-07, "loss": 0.2962, "step": 29267 }, { "epoch": 2.7466216216216215, "grad_norm": 1.0330826084941318, "learning_rate": 2.1588808305950726e-07, "loss": 0.3018, "step": 29268 }, { "epoch": 2.7467154654654653, "grad_norm": 1.4882549294383456, "learning_rate": 2.157294141275945e-07, "loss": 0.276, "step": 29269 }, { "epoch": 2.7468093093093096, "grad_norm": 1.2043395305119968, "learning_rate": 2.1557080223910344e-07, "loss": 0.2987, "step": 29270 }, { "epoch": 2.746903153153153, "grad_norm": 1.4611147509506972, "learning_rate": 2.154122473959247e-07, "loss": 0.2883, "step": 29271 }, { "epoch": 2.746996996996997, "grad_norm": 1.24760537377025, "learning_rate": 2.1525374959994848e-07, "loss": 0.3321, "step": 29272 }, { "epoch": 2.747090840840841, "grad_norm": 1.0661620758430743, "learning_rate": 2.150953088530655e-07, "loss": 0.3354, "step": 29273 }, { "epoch": 2.747184684684685, "grad_norm": 1.2180354810505054, "learning_rate": 2.1493692515716313e-07, "loss": 0.2774, "step": 29274 }, { "epoch": 2.7472785285285286, "grad_norm": 1.034421595673522, "learning_rate": 2.1477859851413208e-07, "loss": 0.2971, "step": 29275 }, { "epoch": 2.7473723723723724, "grad_norm": 1.1869168872418276, "learning_rate": 2.146203289258586e-07, "loss": 0.3421, "step": 29276 }, { "epoch": 2.7474662162162162, "grad_norm": 1.0694043875765562, "learning_rate": 2.1446211639422954e-07, "loss": 0.3287, "step": 29277 }, { "epoch": 2.74756006006006, "grad_norm": 1.251669977910051, "learning_rate": 2.1430396092113282e-07, "loss": 0.283, "step": 29278 }, { "epoch": 2.747653903903904, "grad_norm": 1.3221352957184815, "learning_rate": 2.1414586250845304e-07, "loss": 0.2866, "step": 29279 }, { "epoch": 2.7477477477477477, "grad_norm": 1.3807276197966518, "learning_rate": 2.1398782115807425e-07, "loss": 0.3325, "step": 29280 }, { "epoch": 2.7478415915915915, "grad_norm": 1.2070233805803086, "learning_rate": 2.138298368718833e-07, "loss": 0.3118, "step": 29281 }, { "epoch": 2.7479354354354353, "grad_norm": 1.158592455791032, "learning_rate": 2.1367190965176255e-07, "loss": 0.3148, "step": 29282 }, { "epoch": 2.7480292792792795, "grad_norm": 1.4706007373289716, "learning_rate": 2.1351403949959438e-07, "loss": 0.2975, "step": 29283 }, { "epoch": 2.748123123123123, "grad_norm": 1.4338929428236418, "learning_rate": 2.1335622641726227e-07, "loss": 0.3478, "step": 29284 }, { "epoch": 2.748216966966967, "grad_norm": 1.0444923012996798, "learning_rate": 2.1319847040664698e-07, "loss": 0.3356, "step": 29285 }, { "epoch": 2.748310810810811, "grad_norm": 2.344205094023712, "learning_rate": 2.1304077146962865e-07, "loss": 0.319, "step": 29286 }, { "epoch": 2.7484046546546548, "grad_norm": 1.0288673026728052, "learning_rate": 2.1288312960808964e-07, "loss": 0.3043, "step": 29287 }, { "epoch": 2.7484984984984986, "grad_norm": 1.4805978381686848, "learning_rate": 2.127255448239085e-07, "loss": 0.3287, "step": 29288 }, { "epoch": 2.7485923423423424, "grad_norm": 1.2305473605284973, "learning_rate": 2.125680171189637e-07, "loss": 0.3075, "step": 29289 }, { "epoch": 2.748686186186186, "grad_norm": 1.1267364234749588, "learning_rate": 2.1241054649513425e-07, "loss": 0.2961, "step": 29290 }, { "epoch": 2.74878003003003, "grad_norm": 1.8643582418609976, "learning_rate": 2.1225313295429706e-07, "loss": 0.3162, "step": 29291 }, { "epoch": 2.748873873873874, "grad_norm": 1.4176842654479325, "learning_rate": 2.1209577649832892e-07, "loss": 0.3066, "step": 29292 }, { "epoch": 2.7489677177177176, "grad_norm": 1.1469545623772475, "learning_rate": 2.119384771291072e-07, "loss": 0.3125, "step": 29293 }, { "epoch": 2.7490615615615615, "grad_norm": 1.1200153436968334, "learning_rate": 2.11781234848506e-07, "loss": 0.3323, "step": 29294 }, { "epoch": 2.7491554054054053, "grad_norm": 1.0464175998872023, "learning_rate": 2.11624049658401e-07, "loss": 0.326, "step": 29295 }, { "epoch": 2.7492492492492495, "grad_norm": 1.3459914104621629, "learning_rate": 2.1146692156066572e-07, "loss": 0.3278, "step": 29296 }, { "epoch": 2.749343093093093, "grad_norm": 1.016026418819914, "learning_rate": 2.1130985055717424e-07, "loss": 0.3114, "step": 29297 }, { "epoch": 2.749436936936937, "grad_norm": 1.07409174475336, "learning_rate": 2.1115283664979836e-07, "loss": 0.3454, "step": 29298 }, { "epoch": 2.7495307807807805, "grad_norm": 1.0934283063437518, "learning_rate": 2.109958798404116e-07, "loss": 0.2963, "step": 29299 }, { "epoch": 2.7496246246246248, "grad_norm": 1.1353603223451554, "learning_rate": 2.1083898013088412e-07, "loss": 0.2879, "step": 29300 }, { "epoch": 2.7497184684684686, "grad_norm": 1.2993524850094362, "learning_rate": 2.1068213752308718e-07, "loss": 0.3036, "step": 29301 }, { "epoch": 2.7498123123123124, "grad_norm": 1.1108462633089007, "learning_rate": 2.1052535201889103e-07, "loss": 0.3266, "step": 29302 }, { "epoch": 2.749906156156156, "grad_norm": 1.0074758048502779, "learning_rate": 2.1036862362016463e-07, "loss": 0.3069, "step": 29303 }, { "epoch": 2.75, "grad_norm": 1.1542861627552574, "learning_rate": 2.1021195232877657e-07, "loss": 0.32, "step": 29304 }, { "epoch": 2.750093843843844, "grad_norm": 1.10883144311502, "learning_rate": 2.100553381465953e-07, "loss": 0.319, "step": 29305 }, { "epoch": 2.7501876876876876, "grad_norm": 1.0208676368722711, "learning_rate": 2.0989878107548823e-07, "loss": 0.3058, "step": 29306 }, { "epoch": 2.7502815315315314, "grad_norm": 1.2841346642482718, "learning_rate": 2.0974228111732165e-07, "loss": 0.278, "step": 29307 }, { "epoch": 2.7503753753753752, "grad_norm": 1.2200268200471367, "learning_rate": 2.0958583827396185e-07, "loss": 0.3307, "step": 29308 }, { "epoch": 2.7504692192192195, "grad_norm": 1.2289561262073045, "learning_rate": 2.0942945254727398e-07, "loss": 0.3212, "step": 29309 }, { "epoch": 2.750563063063063, "grad_norm": 1.2865317330150354, "learning_rate": 2.0927312393912157e-07, "loss": 0.3257, "step": 29310 }, { "epoch": 2.750656906906907, "grad_norm": 1.0863500676903846, "learning_rate": 2.0911685245137036e-07, "loss": 0.3048, "step": 29311 }, { "epoch": 2.7507507507507505, "grad_norm": 1.0770967500499489, "learning_rate": 2.0896063808588273e-07, "loss": 0.3242, "step": 29312 }, { "epoch": 2.7508445945945947, "grad_norm": 1.0546694027184962, "learning_rate": 2.0880448084452053e-07, "loss": 0.3016, "step": 29313 }, { "epoch": 2.7509384384384385, "grad_norm": 1.2075092966740935, "learning_rate": 2.0864838072914727e-07, "loss": 0.2998, "step": 29314 }, { "epoch": 2.7510322822822824, "grad_norm": 1.535755361877288, "learning_rate": 2.0849233774162313e-07, "loss": 0.3146, "step": 29315 }, { "epoch": 2.751126126126126, "grad_norm": 1.4646266110127808, "learning_rate": 2.083363518838083e-07, "loss": 0.318, "step": 29316 }, { "epoch": 2.75121996996997, "grad_norm": 1.8833766922406825, "learning_rate": 2.0818042315756404e-07, "loss": 0.3153, "step": 29317 }, { "epoch": 2.751313813813814, "grad_norm": 1.0451963295242555, "learning_rate": 2.0802455156474778e-07, "loss": 0.2971, "step": 29318 }, { "epoch": 2.7514076576576576, "grad_norm": 1.074704401900517, "learning_rate": 2.0786873710721912e-07, "loss": 0.2751, "step": 29319 }, { "epoch": 2.7515015015015014, "grad_norm": 1.365097757086315, "learning_rate": 2.0771297978683546e-07, "loss": 0.3374, "step": 29320 }, { "epoch": 2.7515953453453452, "grad_norm": 0.9586021888077187, "learning_rate": 2.0755727960545424e-07, "loss": 0.3131, "step": 29321 }, { "epoch": 2.751689189189189, "grad_norm": 1.0848718408337308, "learning_rate": 2.0740163656493063e-07, "loss": 0.3222, "step": 29322 }, { "epoch": 2.751783033033033, "grad_norm": 1.1303061535815038, "learning_rate": 2.0724605066712256e-07, "loss": 0.3548, "step": 29323 }, { "epoch": 2.751876876876877, "grad_norm": 1.3522549150861314, "learning_rate": 2.0709052191388356e-07, "loss": 0.3392, "step": 29324 }, { "epoch": 2.7519707207207205, "grad_norm": 1.4384172897932124, "learning_rate": 2.0693505030706772e-07, "loss": 0.3109, "step": 29325 }, { "epoch": 2.7520645645645647, "grad_norm": 2.8631712539273035, "learning_rate": 2.0677963584852966e-07, "loss": 0.2815, "step": 29326 }, { "epoch": 2.7521584084084085, "grad_norm": 1.2470806977563071, "learning_rate": 2.0662427854012235e-07, "loss": 0.3013, "step": 29327 }, { "epoch": 2.7522522522522523, "grad_norm": 0.996595911779315, "learning_rate": 2.064689783836976e-07, "loss": 0.2921, "step": 29328 }, { "epoch": 2.752346096096096, "grad_norm": 1.3564098885252691, "learning_rate": 2.0631373538110789e-07, "loss": 0.2893, "step": 29329 }, { "epoch": 2.75243993993994, "grad_norm": 1.2456274572656958, "learning_rate": 2.0615854953420334e-07, "loss": 0.3334, "step": 29330 }, { "epoch": 2.7525337837837838, "grad_norm": 1.0819183658558356, "learning_rate": 2.0600342084483528e-07, "loss": 0.2845, "step": 29331 }, { "epoch": 2.7526276276276276, "grad_norm": 1.0974451250149209, "learning_rate": 2.058483493148522e-07, "loss": 0.369, "step": 29332 }, { "epoch": 2.7527214714714714, "grad_norm": 1.664798270318452, "learning_rate": 2.0569333494610323e-07, "loss": 0.314, "step": 29333 }, { "epoch": 2.752815315315315, "grad_norm": 1.1233171758287024, "learning_rate": 2.0553837774043683e-07, "loss": 0.3301, "step": 29334 }, { "epoch": 2.752909159159159, "grad_norm": 1.3251834973640668, "learning_rate": 2.05383477699701e-07, "loss": 0.3167, "step": 29335 }, { "epoch": 2.753003003003003, "grad_norm": 1.4095131139508434, "learning_rate": 2.0522863482574207e-07, "loss": 0.314, "step": 29336 }, { "epoch": 2.753096846846847, "grad_norm": 0.990383435368593, "learning_rate": 2.050738491204063e-07, "loss": 0.3167, "step": 29337 }, { "epoch": 2.7531906906906904, "grad_norm": 0.9525409659518282, "learning_rate": 2.0491912058553942e-07, "loss": 0.3041, "step": 29338 }, { "epoch": 2.7532845345345347, "grad_norm": 1.6318118084277635, "learning_rate": 2.0476444922298667e-07, "loss": 0.285, "step": 29339 }, { "epoch": 2.7533783783783785, "grad_norm": 1.0264418099139354, "learning_rate": 2.0460983503459099e-07, "loss": 0.3328, "step": 29340 }, { "epoch": 2.7534722222222223, "grad_norm": 1.224163906672792, "learning_rate": 2.04455278022197e-07, "loss": 0.2919, "step": 29341 }, { "epoch": 2.753566066066066, "grad_norm": 1.1649518482289296, "learning_rate": 2.0430077818764717e-07, "loss": 0.3175, "step": 29342 }, { "epoch": 2.75365990990991, "grad_norm": 1.2516925159235284, "learning_rate": 2.0414633553278383e-07, "loss": 0.3301, "step": 29343 }, { "epoch": 2.7537537537537538, "grad_norm": 1.1854542962173849, "learning_rate": 2.039919500594484e-07, "loss": 0.335, "step": 29344 }, { "epoch": 2.7538475975975976, "grad_norm": 1.2622855343087906, "learning_rate": 2.0383762176948097e-07, "loss": 0.2994, "step": 29345 }, { "epoch": 2.7539414414414414, "grad_norm": 1.0711881861598456, "learning_rate": 2.0368335066472177e-07, "loss": 0.2535, "step": 29346 }, { "epoch": 2.754035285285285, "grad_norm": 1.3843041794043276, "learning_rate": 2.0352913674701102e-07, "loss": 0.3209, "step": 29347 }, { "epoch": 2.754129129129129, "grad_norm": 1.04024308479707, "learning_rate": 2.0337498001818668e-07, "loss": 0.3069, "step": 29348 }, { "epoch": 2.754222972972973, "grad_norm": 1.1722705664957664, "learning_rate": 2.0322088048008616e-07, "loss": 0.3042, "step": 29349 }, { "epoch": 2.754316816816817, "grad_norm": 1.0219637837045192, "learning_rate": 2.0306683813454853e-07, "loss": 0.2906, "step": 29350 }, { "epoch": 2.7544106606606604, "grad_norm": 1.239721313229979, "learning_rate": 2.0291285298340957e-07, "loss": 0.2937, "step": 29351 }, { "epoch": 2.7545045045045047, "grad_norm": 1.1504261460068204, "learning_rate": 2.0275892502850503e-07, "loss": 0.2796, "step": 29352 }, { "epoch": 2.7545983483483485, "grad_norm": 1.3025242046001964, "learning_rate": 2.0260505427167064e-07, "loss": 0.3031, "step": 29353 }, { "epoch": 2.7546921921921923, "grad_norm": 1.0992227766731222, "learning_rate": 2.0245124071474055e-07, "loss": 0.3373, "step": 29354 }, { "epoch": 2.754786036036036, "grad_norm": 1.1297657995271138, "learning_rate": 2.0229748435954932e-07, "loss": 0.2712, "step": 29355 }, { "epoch": 2.75487987987988, "grad_norm": 1.048113393413947, "learning_rate": 2.0214378520793e-07, "loss": 0.3678, "step": 29356 }, { "epoch": 2.7549737237237237, "grad_norm": 0.969406509883115, "learning_rate": 2.019901432617144e-07, "loss": 0.2974, "step": 29357 }, { "epoch": 2.7550675675675675, "grad_norm": 0.8864721350746412, "learning_rate": 2.0183655852273497e-07, "loss": 0.303, "step": 29358 }, { "epoch": 2.7551614114114114, "grad_norm": 1.5671480200771202, "learning_rate": 2.016830309928236e-07, "loss": 0.272, "step": 29359 }, { "epoch": 2.755255255255255, "grad_norm": 1.060939365156094, "learning_rate": 2.0152956067380992e-07, "loss": 0.296, "step": 29360 }, { "epoch": 2.755349099099099, "grad_norm": 1.0968125263318884, "learning_rate": 2.01376147567523e-07, "loss": 0.329, "step": 29361 }, { "epoch": 2.755442942942943, "grad_norm": 1.0801051114102724, "learning_rate": 2.0122279167579418e-07, "loss": 0.3176, "step": 29362 }, { "epoch": 2.755536786786787, "grad_norm": 1.1822491949948848, "learning_rate": 2.0106949300045086e-07, "loss": 0.3306, "step": 29363 }, { "epoch": 2.7556306306306304, "grad_norm": 1.190690186551907, "learning_rate": 2.0091625154331995e-07, "loss": 0.3034, "step": 29364 }, { "epoch": 2.7557244744744747, "grad_norm": 1.1615678176099438, "learning_rate": 2.007630673062305e-07, "loss": 0.2855, "step": 29365 }, { "epoch": 2.7558183183183185, "grad_norm": 1.1672338697376334, "learning_rate": 2.006099402910072e-07, "loss": 0.336, "step": 29366 }, { "epoch": 2.7559121621621623, "grad_norm": 1.0244304368773829, "learning_rate": 2.0045687049947583e-07, "loss": 0.3175, "step": 29367 }, { "epoch": 2.756006006006006, "grad_norm": 2.323792341371027, "learning_rate": 2.0030385793346319e-07, "loss": 0.328, "step": 29368 }, { "epoch": 2.75609984984985, "grad_norm": 1.1100304711636484, "learning_rate": 2.001509025947923e-07, "loss": 0.2629, "step": 29369 }, { "epoch": 2.7561936936936937, "grad_norm": 1.1582840534112857, "learning_rate": 1.9999800448528728e-07, "loss": 0.31, "step": 29370 }, { "epoch": 2.7562875375375375, "grad_norm": 1.053896796190198, "learning_rate": 1.9984516360677053e-07, "loss": 0.2972, "step": 29371 }, { "epoch": 2.7563813813813813, "grad_norm": 1.1838464064343786, "learning_rate": 1.9969237996106506e-07, "loss": 0.3111, "step": 29372 }, { "epoch": 2.756475225225225, "grad_norm": 1.0716748506111182, "learning_rate": 1.995396535499916e-07, "loss": 0.287, "step": 29373 }, { "epoch": 2.756569069069069, "grad_norm": 1.176428623241044, "learning_rate": 1.9938698437537264e-07, "loss": 0.2623, "step": 29374 }, { "epoch": 2.7566629129129128, "grad_norm": 1.2757452027422291, "learning_rate": 1.9923437243902776e-07, "loss": 0.2964, "step": 29375 }, { "epoch": 2.756756756756757, "grad_norm": 2.226083413739269, "learning_rate": 1.9908181774277559e-07, "loss": 0.2823, "step": 29376 }, { "epoch": 2.7568506006006004, "grad_norm": 3.1207015787138874, "learning_rate": 1.989293202884368e-07, "loss": 0.303, "step": 29377 }, { "epoch": 2.7569444444444446, "grad_norm": 1.293890268858489, "learning_rate": 1.9877688007782835e-07, "loss": 0.2935, "step": 29378 }, { "epoch": 2.7570382882882885, "grad_norm": 1.1407917683984905, "learning_rate": 1.9862449711276822e-07, "loss": 0.3245, "step": 29379 }, { "epoch": 2.7571321321321323, "grad_norm": 1.269425331590617, "learning_rate": 1.9847217139507379e-07, "loss": 0.2865, "step": 29380 }, { "epoch": 2.757225975975976, "grad_norm": 1.2914172200182565, "learning_rate": 1.9831990292656035e-07, "loss": 0.3242, "step": 29381 }, { "epoch": 2.75731981981982, "grad_norm": 1.0365177811383954, "learning_rate": 1.9816769170904416e-07, "loss": 0.3491, "step": 29382 }, { "epoch": 2.7574136636636637, "grad_norm": 1.725664542926748, "learning_rate": 1.9801553774433934e-07, "loss": 0.3003, "step": 29383 }, { "epoch": 2.7575075075075075, "grad_norm": 1.0298798741736124, "learning_rate": 1.9786344103426115e-07, "loss": 0.3304, "step": 29384 }, { "epoch": 2.7576013513513513, "grad_norm": 1.2730263916802236, "learning_rate": 1.9771140158062085e-07, "loss": 0.2999, "step": 29385 }, { "epoch": 2.757695195195195, "grad_norm": 0.9309951978909136, "learning_rate": 1.9755941938523425e-07, "loss": 0.3176, "step": 29386 }, { "epoch": 2.757789039039039, "grad_norm": 1.4406272592309308, "learning_rate": 1.9740749444991103e-07, "loss": 0.3119, "step": 29387 }, { "epoch": 2.7578828828828827, "grad_norm": 1.0054444932910547, "learning_rate": 1.9725562677646358e-07, "loss": 0.3207, "step": 29388 }, { "epoch": 2.757976726726727, "grad_norm": 0.8848825598577508, "learning_rate": 1.9710381636670273e-07, "loss": 0.3092, "step": 29389 }, { "epoch": 2.7580705705705704, "grad_norm": 1.4181806996091428, "learning_rate": 1.9695206322243865e-07, "loss": 0.3239, "step": 29390 }, { "epoch": 2.7581644144144146, "grad_norm": 1.2129201800976335, "learning_rate": 1.9680036734547935e-07, "loss": 0.2854, "step": 29391 }, { "epoch": 2.758258258258258, "grad_norm": 1.3622944316298011, "learning_rate": 1.9664872873763562e-07, "loss": 0.2915, "step": 29392 }, { "epoch": 2.7583521021021022, "grad_norm": 1.1401596106832295, "learning_rate": 1.9649714740071434e-07, "loss": 0.2884, "step": 29393 }, { "epoch": 2.758445945945946, "grad_norm": 1.36117596441351, "learning_rate": 1.9634562333652242e-07, "loss": 0.2857, "step": 29394 }, { "epoch": 2.75853978978979, "grad_norm": 1.241039229837044, "learning_rate": 1.961941565468678e-07, "loss": 0.2897, "step": 29395 }, { "epoch": 2.7586336336336337, "grad_norm": 1.1537424089293047, "learning_rate": 1.960427470335552e-07, "loss": 0.3126, "step": 29396 }, { "epoch": 2.7587274774774775, "grad_norm": 1.02518513847687, "learning_rate": 1.9589139479838982e-07, "loss": 0.3077, "step": 29397 }, { "epoch": 2.7588213213213213, "grad_norm": 1.118400932275727, "learning_rate": 1.9574009984317688e-07, "loss": 0.356, "step": 29398 }, { "epoch": 2.758915165165165, "grad_norm": 0.9899576353846655, "learning_rate": 1.9558886216972052e-07, "loss": 0.3395, "step": 29399 }, { "epoch": 2.759009009009009, "grad_norm": 1.0427227542758672, "learning_rate": 1.9543768177982315e-07, "loss": 0.2854, "step": 29400 }, { "epoch": 2.7591028528528527, "grad_norm": 1.1057745596521948, "learning_rate": 1.9528655867528834e-07, "loss": 0.3216, "step": 29401 }, { "epoch": 2.7591966966966965, "grad_norm": 1.110556975755071, "learning_rate": 1.9513549285791744e-07, "loss": 0.3005, "step": 29402 }, { "epoch": 2.7592905405405403, "grad_norm": 2.3160180308422076, "learning_rate": 1.9498448432951067e-07, "loss": 0.3115, "step": 29403 }, { "epoch": 2.7593843843843846, "grad_norm": 1.203257358908401, "learning_rate": 1.9483353309187047e-07, "loss": 0.3193, "step": 29404 }, { "epoch": 2.759478228228228, "grad_norm": 1.1018598944585158, "learning_rate": 1.9468263914679487e-07, "loss": 0.313, "step": 29405 }, { "epoch": 2.7595720720720722, "grad_norm": 1.3409460903252441, "learning_rate": 1.9453180249608404e-07, "loss": 0.3428, "step": 29406 }, { "epoch": 2.759665915915916, "grad_norm": 1.2801820442172918, "learning_rate": 1.9438102314153661e-07, "loss": 0.2981, "step": 29407 }, { "epoch": 2.75975975975976, "grad_norm": 1.4346666157084722, "learning_rate": 1.9423030108494946e-07, "loss": 0.292, "step": 29408 }, { "epoch": 2.7598536036036037, "grad_norm": 1.0898067860473073, "learning_rate": 1.9407963632811945e-07, "loss": 0.2966, "step": 29409 }, { "epoch": 2.7599474474474475, "grad_norm": 1.0425669361523149, "learning_rate": 1.9392902887284404e-07, "loss": 0.3178, "step": 29410 }, { "epoch": 2.7600412912912913, "grad_norm": 1.2470813106978769, "learning_rate": 1.937784787209185e-07, "loss": 0.2971, "step": 29411 }, { "epoch": 2.760135135135135, "grad_norm": 1.413269141679914, "learning_rate": 1.9362798587413688e-07, "loss": 0.3267, "step": 29412 }, { "epoch": 2.760228978978979, "grad_norm": 1.7640760478048003, "learning_rate": 1.934775503342956e-07, "loss": 0.3363, "step": 29413 }, { "epoch": 2.7603228228228227, "grad_norm": 1.1948963636064163, "learning_rate": 1.9332717210318707e-07, "loss": 0.3242, "step": 29414 }, { "epoch": 2.7604166666666665, "grad_norm": 1.2141408706222896, "learning_rate": 1.9317685118260376e-07, "loss": 0.3657, "step": 29415 }, { "epoch": 2.7605105105105103, "grad_norm": 1.054728105574857, "learning_rate": 1.9302658757433922e-07, "loss": 0.3168, "step": 29416 }, { "epoch": 2.7606043543543546, "grad_norm": 1.1031982350851481, "learning_rate": 1.9287638128018428e-07, "loss": 0.3035, "step": 29417 }, { "epoch": 2.760698198198198, "grad_norm": 1.3042916184154014, "learning_rate": 1.927262323019302e-07, "loss": 0.2981, "step": 29418 }, { "epoch": 2.760792042042042, "grad_norm": 1.0433615695837948, "learning_rate": 1.9257614064136675e-07, "loss": 0.2974, "step": 29419 }, { "epoch": 2.760885885885886, "grad_norm": 1.6577922779271677, "learning_rate": 1.9242610630028413e-07, "loss": 0.3201, "step": 29420 }, { "epoch": 2.76097972972973, "grad_norm": 1.1940506334994343, "learning_rate": 1.922761292804698e-07, "loss": 0.2905, "step": 29421 }, { "epoch": 2.7610735735735736, "grad_norm": 1.155285398506181, "learning_rate": 1.9212620958371396e-07, "loss": 0.3375, "step": 29422 }, { "epoch": 2.7611674174174174, "grad_norm": 1.2414448132326807, "learning_rate": 1.9197634721180304e-07, "loss": 0.3264, "step": 29423 }, { "epoch": 2.7612612612612613, "grad_norm": 1.096529319205085, "learning_rate": 1.9182654216652387e-07, "loss": 0.3021, "step": 29424 }, { "epoch": 2.761355105105105, "grad_norm": 1.2243573601352369, "learning_rate": 1.9167679444966282e-07, "loss": 0.345, "step": 29425 }, { "epoch": 2.761448948948949, "grad_norm": 1.368236867975912, "learning_rate": 1.9152710406300511e-07, "loss": 0.3294, "step": 29426 }, { "epoch": 2.7615427927927927, "grad_norm": 1.2256387250562588, "learning_rate": 1.913774710083355e-07, "loss": 0.3033, "step": 29427 }, { "epoch": 2.7616366366366365, "grad_norm": 1.0541142096433735, "learning_rate": 1.9122789528743858e-07, "loss": 0.3245, "step": 29428 }, { "epoch": 2.7617304804804803, "grad_norm": 1.1305310576713017, "learning_rate": 1.910783769020974e-07, "loss": 0.303, "step": 29429 }, { "epoch": 2.7618243243243246, "grad_norm": 1.163566371716571, "learning_rate": 1.9092891585409502e-07, "loss": 0.3199, "step": 29430 }, { "epoch": 2.761918168168168, "grad_norm": 1.0562526037885491, "learning_rate": 1.9077951214521329e-07, "loss": 0.3057, "step": 29431 }, { "epoch": 2.762012012012012, "grad_norm": 1.336666897426764, "learning_rate": 1.9063016577723304e-07, "loss": 0.3201, "step": 29432 }, { "epoch": 2.762105855855856, "grad_norm": 1.2052854865924665, "learning_rate": 1.904808767519345e-07, "loss": 0.31, "step": 29433 }, { "epoch": 2.7621996996997, "grad_norm": 1.041520129245628, "learning_rate": 1.9033164507109957e-07, "loss": 0.2907, "step": 29434 }, { "epoch": 2.7622935435435436, "grad_norm": 1.274948549247455, "learning_rate": 1.9018247073650686e-07, "loss": 0.3144, "step": 29435 }, { "epoch": 2.7623873873873874, "grad_norm": 0.9693389301386007, "learning_rate": 1.9003335374993325e-07, "loss": 0.2898, "step": 29436 }, { "epoch": 2.7624812312312312, "grad_norm": 1.0152651686760372, "learning_rate": 1.89884294113159e-07, "loss": 0.3003, "step": 29437 }, { "epoch": 2.762575075075075, "grad_norm": 2.444518694659236, "learning_rate": 1.8973529182796046e-07, "loss": 0.3033, "step": 29438 }, { "epoch": 2.762668918918919, "grad_norm": 1.2453614664879094, "learning_rate": 1.8958634689611345e-07, "loss": 0.3315, "step": 29439 }, { "epoch": 2.7627627627627627, "grad_norm": 1.211976923296307, "learning_rate": 1.894374593193954e-07, "loss": 0.2997, "step": 29440 }, { "epoch": 2.7628566066066065, "grad_norm": 1.1240446524014298, "learning_rate": 1.8928862909958045e-07, "loss": 0.309, "step": 29441 }, { "epoch": 2.7629504504504503, "grad_norm": 1.105131457013941, "learning_rate": 1.891398562384439e-07, "loss": 0.3107, "step": 29442 }, { "epoch": 2.7630442942942945, "grad_norm": 1.2088809618742133, "learning_rate": 1.8899114073775872e-07, "loss": 0.3121, "step": 29443 }, { "epoch": 2.763138138138138, "grad_norm": 1.1935477211007883, "learning_rate": 1.8884248259929793e-07, "loss": 0.3194, "step": 29444 }, { "epoch": 2.763231981981982, "grad_norm": 1.1023423306810232, "learning_rate": 1.8869388182483462e-07, "loss": 0.3315, "step": 29445 }, { "epoch": 2.763325825825826, "grad_norm": 1.031093448173742, "learning_rate": 1.8854533841614065e-07, "loss": 0.3087, "step": 29446 }, { "epoch": 2.76341966966967, "grad_norm": 1.1758809692323755, "learning_rate": 1.8839685237498684e-07, "loss": 0.3085, "step": 29447 }, { "epoch": 2.7635135135135136, "grad_norm": 1.1652612171001475, "learning_rate": 1.8824842370314289e-07, "loss": 0.297, "step": 29448 }, { "epoch": 2.7636073573573574, "grad_norm": 1.074525410005316, "learning_rate": 1.8810005240238017e-07, "loss": 0.3044, "step": 29449 }, { "epoch": 2.763701201201201, "grad_norm": 1.2038326600856346, "learning_rate": 1.879517384744667e-07, "loss": 0.29, "step": 29450 }, { "epoch": 2.763795045045045, "grad_norm": 1.6539291580068562, "learning_rate": 1.878034819211705e-07, "loss": 0.3162, "step": 29451 }, { "epoch": 2.763888888888889, "grad_norm": 1.0634902038626546, "learning_rate": 1.876552827442607e-07, "loss": 0.3236, "step": 29452 }, { "epoch": 2.7639827327327327, "grad_norm": 1.1345005627271114, "learning_rate": 1.875071409455026e-07, "loss": 0.2779, "step": 29453 }, { "epoch": 2.7640765765765765, "grad_norm": 1.4688694357820873, "learning_rate": 1.8735905652666364e-07, "loss": 0.3287, "step": 29454 }, { "epoch": 2.7641704204204203, "grad_norm": 1.0817356220216245, "learning_rate": 1.872110294895091e-07, "loss": 0.276, "step": 29455 }, { "epoch": 2.7642642642642645, "grad_norm": 1.1598923633568636, "learning_rate": 1.870630598358042e-07, "loss": 0.3022, "step": 29456 }, { "epoch": 2.764358108108108, "grad_norm": 1.2460603180245198, "learning_rate": 1.86915147567312e-07, "loss": 0.3, "step": 29457 }, { "epoch": 2.764451951951952, "grad_norm": 1.1011055075964125, "learning_rate": 1.8676729268579718e-07, "loss": 0.3075, "step": 29458 }, { "epoch": 2.764545795795796, "grad_norm": 3.447674389767087, "learning_rate": 1.866194951930228e-07, "loss": 0.3258, "step": 29459 }, { "epoch": 2.7646396396396398, "grad_norm": 1.036810656554129, "learning_rate": 1.8647175509075022e-07, "loss": 0.2819, "step": 29460 }, { "epoch": 2.7647334834834836, "grad_norm": 1.0708565827623289, "learning_rate": 1.863240723807419e-07, "loss": 0.3752, "step": 29461 }, { "epoch": 2.7648273273273274, "grad_norm": 1.1227774005763616, "learning_rate": 1.8617644706475812e-07, "loss": 0.2841, "step": 29462 }, { "epoch": 2.764921171171171, "grad_norm": 1.0604606337458533, "learning_rate": 1.8602887914455858e-07, "loss": 0.3046, "step": 29463 }, { "epoch": 2.765015015015015, "grad_norm": 1.2034272589808177, "learning_rate": 1.8588136862190353e-07, "loss": 0.3141, "step": 29464 }, { "epoch": 2.765108858858859, "grad_norm": 1.3363518906082505, "learning_rate": 1.8573391549855213e-07, "loss": 0.2759, "step": 29465 }, { "epoch": 2.7652027027027026, "grad_norm": 1.025629800788479, "learning_rate": 1.855865197762613e-07, "loss": 0.3244, "step": 29466 }, { "epoch": 2.7652965465465464, "grad_norm": 1.0950816961765981, "learning_rate": 1.8543918145678908e-07, "loss": 0.3256, "step": 29467 }, { "epoch": 2.7653903903903903, "grad_norm": 1.0498710981994548, "learning_rate": 1.852919005418924e-07, "loss": 0.326, "step": 29468 }, { "epoch": 2.7654842342342345, "grad_norm": 1.2880619201411883, "learning_rate": 1.851446770333265e-07, "loss": 0.3134, "step": 29469 }, { "epoch": 2.765578078078078, "grad_norm": 1.2204369449266126, "learning_rate": 1.8499751093284778e-07, "loss": 0.3282, "step": 29470 }, { "epoch": 2.765671921921922, "grad_norm": 1.0936073873788104, "learning_rate": 1.8485040224221041e-07, "loss": 0.3482, "step": 29471 }, { "epoch": 2.7657657657657655, "grad_norm": 1.2613312768076843, "learning_rate": 1.8470335096316795e-07, "loss": 0.3267, "step": 29472 }, { "epoch": 2.7658596096096097, "grad_norm": 1.052778395034805, "learning_rate": 1.8455635709747454e-07, "loss": 0.3213, "step": 29473 }, { "epoch": 2.7659534534534536, "grad_norm": 1.0618403795760538, "learning_rate": 1.8440942064688272e-07, "loss": 0.3132, "step": 29474 }, { "epoch": 2.7660472972972974, "grad_norm": 1.4068361708138644, "learning_rate": 1.8426254161314326e-07, "loss": 0.341, "step": 29475 }, { "epoch": 2.766141141141141, "grad_norm": 1.3249417933664012, "learning_rate": 1.8411571999800926e-07, "loss": 0.3254, "step": 29476 }, { "epoch": 2.766234984984985, "grad_norm": 1.134612470989963, "learning_rate": 1.8396895580322982e-07, "loss": 0.296, "step": 29477 }, { "epoch": 2.766328828828829, "grad_norm": 1.1153342362701737, "learning_rate": 1.8382224903055578e-07, "loss": 0.2755, "step": 29478 }, { "epoch": 2.7664226726726726, "grad_norm": 1.0174408227280183, "learning_rate": 1.8367559968173575e-07, "loss": 0.3096, "step": 29479 }, { "epoch": 2.7665165165165164, "grad_norm": 1.1997101242117607, "learning_rate": 1.8352900775851835e-07, "loss": 0.3017, "step": 29480 }, { "epoch": 2.7666103603603602, "grad_norm": 1.1879658948972571, "learning_rate": 1.8338247326265045e-07, "loss": 0.2687, "step": 29481 }, { "epoch": 2.766704204204204, "grad_norm": 1.138126087366231, "learning_rate": 1.832359961958813e-07, "loss": 0.3548, "step": 29482 }, { "epoch": 2.766798048048048, "grad_norm": 1.1662674568603666, "learning_rate": 1.8308957655995607e-07, "loss": 0.2599, "step": 29483 }, { "epoch": 2.766891891891892, "grad_norm": 0.9444689586882805, "learning_rate": 1.8294321435662065e-07, "loss": 0.3472, "step": 29484 }, { "epoch": 2.7669857357357355, "grad_norm": 1.1187593470342578, "learning_rate": 1.827969095876203e-07, "loss": 0.3166, "step": 29485 }, { "epoch": 2.7670795795795797, "grad_norm": 1.1929957659933579, "learning_rate": 1.8265066225469974e-07, "loss": 0.3302, "step": 29486 }, { "epoch": 2.7671734234234235, "grad_norm": 1.1533915973923015, "learning_rate": 1.8250447235960145e-07, "loss": 0.2931, "step": 29487 }, { "epoch": 2.7672672672672673, "grad_norm": 1.0214480238772152, "learning_rate": 1.823583399040696e-07, "loss": 0.3001, "step": 29488 }, { "epoch": 2.767361111111111, "grad_norm": 0.9891774266892216, "learning_rate": 1.8221226488984667e-07, "loss": 0.3308, "step": 29489 }, { "epoch": 2.767454954954955, "grad_norm": 1.0179875891911796, "learning_rate": 1.8206624731867294e-07, "loss": 0.3165, "step": 29490 }, { "epoch": 2.767548798798799, "grad_norm": 1.811672412485986, "learning_rate": 1.819202871922915e-07, "loss": 0.3122, "step": 29491 }, { "epoch": 2.7676426426426426, "grad_norm": 2.0296972699000713, "learning_rate": 1.81774384512442e-07, "loss": 0.2902, "step": 29492 }, { "epoch": 2.7677364864864864, "grad_norm": 1.884362468942009, "learning_rate": 1.8162853928086256e-07, "loss": 0.3247, "step": 29493 }, { "epoch": 2.76783033033033, "grad_norm": 1.2083832903236438, "learning_rate": 1.814827514992934e-07, "loss": 0.2993, "step": 29494 }, { "epoch": 2.767924174174174, "grad_norm": 1.1003913223030464, "learning_rate": 1.813370211694726e-07, "loss": 0.3173, "step": 29495 }, { "epoch": 2.768018018018018, "grad_norm": 1.1387002410482054, "learning_rate": 1.8119134829313657e-07, "loss": 0.3012, "step": 29496 }, { "epoch": 2.768111861861862, "grad_norm": 1.1144473064261393, "learning_rate": 1.8104573287202442e-07, "loss": 0.3349, "step": 29497 }, { "epoch": 2.7682057057057055, "grad_norm": 0.9740568052482145, "learning_rate": 1.8090017490787094e-07, "loss": 0.3101, "step": 29498 }, { "epoch": 2.7682995495495497, "grad_norm": 0.9930600874909723, "learning_rate": 1.8075467440241134e-07, "loss": 0.3273, "step": 29499 }, { "epoch": 2.7683933933933935, "grad_norm": 1.0715117044622215, "learning_rate": 1.806092313573815e-07, "loss": 0.3098, "step": 29500 }, { "epoch": 2.7684872372372373, "grad_norm": 1.4596674208027867, "learning_rate": 1.8046384577451503e-07, "loss": 0.3287, "step": 29501 }, { "epoch": 2.768581081081081, "grad_norm": 1.0574588805457148, "learning_rate": 1.8031851765554499e-07, "loss": 0.329, "step": 29502 }, { "epoch": 2.768674924924925, "grad_norm": 1.1155478552705116, "learning_rate": 1.8017324700220496e-07, "loss": 0.332, "step": 29503 }, { "epoch": 2.7687687687687688, "grad_norm": 0.9282701327754846, "learning_rate": 1.8002803381622635e-07, "loss": 0.3117, "step": 29504 }, { "epoch": 2.7688626126126126, "grad_norm": 1.0387212867635303, "learning_rate": 1.7988287809934114e-07, "loss": 0.3206, "step": 29505 }, { "epoch": 2.7689564564564564, "grad_norm": 1.0883765684116207, "learning_rate": 1.79737779853279e-07, "loss": 0.3021, "step": 29506 }, { "epoch": 2.7690503003003, "grad_norm": 1.0839892133929507, "learning_rate": 1.7959273907977138e-07, "loss": 0.3132, "step": 29507 }, { "epoch": 2.769144144144144, "grad_norm": 1.1512191504435823, "learning_rate": 1.7944775578054575e-07, "loss": 0.3138, "step": 29508 }, { "epoch": 2.769237987987988, "grad_norm": 2.3115009429843414, "learning_rate": 1.7930282995733296e-07, "loss": 0.3276, "step": 29509 }, { "epoch": 2.769331831831832, "grad_norm": 1.0531112499246256, "learning_rate": 1.791579616118594e-07, "loss": 0.3529, "step": 29510 }, { "epoch": 2.7694256756756754, "grad_norm": 1.1724506706100237, "learning_rate": 1.7901315074585256e-07, "loss": 0.3004, "step": 29511 }, { "epoch": 2.7695195195195197, "grad_norm": 1.3315797015021082, "learning_rate": 1.7886839736104e-07, "loss": 0.287, "step": 29512 }, { "epoch": 2.7696133633633635, "grad_norm": 1.3917715326402342, "learning_rate": 1.7872370145914642e-07, "loss": 0.2674, "step": 29513 }, { "epoch": 2.7697072072072073, "grad_norm": 1.2824375903724152, "learning_rate": 1.7857906304189765e-07, "loss": 0.2927, "step": 29514 }, { "epoch": 2.769801051051051, "grad_norm": 1.6577818103133706, "learning_rate": 1.7843448211101843e-07, "loss": 0.314, "step": 29515 }, { "epoch": 2.769894894894895, "grad_norm": 1.1727172789006826, "learning_rate": 1.782899586682324e-07, "loss": 0.3134, "step": 29516 }, { "epoch": 2.7699887387387387, "grad_norm": 1.1752510398395375, "learning_rate": 1.781454927152626e-07, "loss": 0.3158, "step": 29517 }, { "epoch": 2.7700825825825826, "grad_norm": 0.9956571281734874, "learning_rate": 1.7800108425383155e-07, "loss": 0.28, "step": 29518 }, { "epoch": 2.7701764264264264, "grad_norm": 1.081990268603463, "learning_rate": 1.778567332856612e-07, "loss": 0.2934, "step": 29519 }, { "epoch": 2.77027027027027, "grad_norm": 1.075320051602668, "learning_rate": 1.7771243981247189e-07, "loss": 0.3322, "step": 29520 }, { "epoch": 2.770364114114114, "grad_norm": 1.2500464002865757, "learning_rate": 1.775682038359855e-07, "loss": 0.3445, "step": 29521 }, { "epoch": 2.770457957957958, "grad_norm": 1.0055736457244386, "learning_rate": 1.7742402535792068e-07, "loss": 0.3117, "step": 29522 }, { "epoch": 2.770551801801802, "grad_norm": 1.0948136819629704, "learning_rate": 1.7727990437999609e-07, "loss": 0.3256, "step": 29523 }, { "epoch": 2.7706456456456454, "grad_norm": 1.2438261244806863, "learning_rate": 1.77135840903932e-07, "loss": 0.2954, "step": 29524 }, { "epoch": 2.7707394894894897, "grad_norm": 1.2165344130757676, "learning_rate": 1.7699183493144423e-07, "loss": 0.2552, "step": 29525 }, { "epoch": 2.7708333333333335, "grad_norm": 1.162942712184235, "learning_rate": 1.7684788646425032e-07, "loss": 0.2224, "step": 29526 }, { "epoch": 2.7709271771771773, "grad_norm": 1.061293798270913, "learning_rate": 1.7670399550406724e-07, "loss": 0.2959, "step": 29527 }, { "epoch": 2.771021021021021, "grad_norm": 1.1131151156018642, "learning_rate": 1.7656016205260974e-07, "loss": 0.3514, "step": 29528 }, { "epoch": 2.771114864864865, "grad_norm": 1.105373108022214, "learning_rate": 1.764163861115936e-07, "loss": 0.3363, "step": 29529 }, { "epoch": 2.7712087087087087, "grad_norm": 1.4868980563127503, "learning_rate": 1.7627266768273255e-07, "loss": 0.324, "step": 29530 }, { "epoch": 2.7713025525525525, "grad_norm": 1.1339219550940711, "learning_rate": 1.7612900676774015e-07, "loss": 0.325, "step": 29531 }, { "epoch": 2.7713963963963963, "grad_norm": 1.1556763785371984, "learning_rate": 1.7598540336832836e-07, "loss": 0.3281, "step": 29532 }, { "epoch": 2.77149024024024, "grad_norm": 1.2009638212846492, "learning_rate": 1.7584185748621142e-07, "loss": 0.2628, "step": 29533 }, { "epoch": 2.771584084084084, "grad_norm": 1.02234910594648, "learning_rate": 1.7569836912309957e-07, "loss": 0.3014, "step": 29534 }, { "epoch": 2.7716779279279278, "grad_norm": 1.2194950211065363, "learning_rate": 1.7555493828070314e-07, "loss": 0.2993, "step": 29535 }, { "epoch": 2.771771771771772, "grad_norm": 1.1970011665702969, "learning_rate": 1.7541156496073353e-07, "loss": 0.2937, "step": 29536 }, { "epoch": 2.7718656156156154, "grad_norm": 1.4902107131456792, "learning_rate": 1.7526824916489993e-07, "loss": 0.3345, "step": 29537 }, { "epoch": 2.7719594594594597, "grad_norm": 0.8445815232193882, "learning_rate": 1.7512499089491042e-07, "loss": 0.3072, "step": 29538 }, { "epoch": 2.7720533033033035, "grad_norm": 1.0891833241927529, "learning_rate": 1.7498179015247364e-07, "loss": 0.3206, "step": 29539 }, { "epoch": 2.7721471471471473, "grad_norm": 1.4784998222966874, "learning_rate": 1.7483864693929708e-07, "loss": 0.2979, "step": 29540 }, { "epoch": 2.772240990990991, "grad_norm": 1.3268279506869907, "learning_rate": 1.746955612570872e-07, "loss": 0.3194, "step": 29541 }, { "epoch": 2.772334834834835, "grad_norm": 1.0059448338622148, "learning_rate": 1.7455253310755038e-07, "loss": 0.3343, "step": 29542 }, { "epoch": 2.7724286786786787, "grad_norm": 1.1624719721570798, "learning_rate": 1.7440956249239082e-07, "loss": 0.2921, "step": 29543 }, { "epoch": 2.7725225225225225, "grad_norm": 1.12777818791778, "learning_rate": 1.742666494133144e-07, "loss": 0.2969, "step": 29544 }, { "epoch": 2.7726163663663663, "grad_norm": 0.9290323389480332, "learning_rate": 1.7412379387202473e-07, "loss": 0.2979, "step": 29545 }, { "epoch": 2.77271021021021, "grad_norm": 1.0965297538334173, "learning_rate": 1.7398099587022488e-07, "loss": 0.2924, "step": 29546 }, { "epoch": 2.772804054054054, "grad_norm": 1.2908436397632759, "learning_rate": 1.7383825540961684e-07, "loss": 0.265, "step": 29547 }, { "epoch": 2.7728978978978978, "grad_norm": 1.2349447034464751, "learning_rate": 1.7369557249190428e-07, "loss": 0.3221, "step": 29548 }, { "epoch": 2.772991741741742, "grad_norm": 1.0248854773944704, "learning_rate": 1.7355294711878746e-07, "loss": 0.3129, "step": 29549 }, { "epoch": 2.7730855855855854, "grad_norm": 1.240672068560369, "learning_rate": 1.7341037929196613e-07, "loss": 0.2889, "step": 29550 }, { "epoch": 2.7731794294294296, "grad_norm": 1.2136447284035041, "learning_rate": 1.7326786901314173e-07, "loss": 0.3292, "step": 29551 }, { "epoch": 2.773273273273273, "grad_norm": 1.1070485706262996, "learning_rate": 1.7312541628401235e-07, "loss": 0.305, "step": 29552 }, { "epoch": 2.7733671171171173, "grad_norm": 1.1872066531041894, "learning_rate": 1.7298302110627718e-07, "loss": 0.3103, "step": 29553 }, { "epoch": 2.773460960960961, "grad_norm": 0.9586588477758854, "learning_rate": 1.728406834816332e-07, "loss": 0.3138, "step": 29554 }, { "epoch": 2.773554804804805, "grad_norm": 1.2628986230127321, "learning_rate": 1.726984034117779e-07, "loss": 0.3101, "step": 29555 }, { "epoch": 2.7736486486486487, "grad_norm": 1.1397020667771374, "learning_rate": 1.725561808984072e-07, "loss": 0.3338, "step": 29556 }, { "epoch": 2.7737424924924925, "grad_norm": 1.1128519214487576, "learning_rate": 1.7241401594321748e-07, "loss": 0.3245, "step": 29557 }, { "epoch": 2.7738363363363363, "grad_norm": 1.1767631684007651, "learning_rate": 1.722719085479041e-07, "loss": 0.2783, "step": 29558 }, { "epoch": 2.77393018018018, "grad_norm": 1.0590088213962947, "learning_rate": 1.7212985871416011e-07, "loss": 0.3217, "step": 29559 }, { "epoch": 2.774024024024024, "grad_norm": 1.3543289853019136, "learning_rate": 1.7198786644368082e-07, "loss": 0.3279, "step": 29560 }, { "epoch": 2.7741178678678677, "grad_norm": 1.1765411623555047, "learning_rate": 1.7184593173815822e-07, "loss": 0.3232, "step": 29561 }, { "epoch": 2.7742117117117115, "grad_norm": 1.1809797457091218, "learning_rate": 1.717040545992843e-07, "loss": 0.3117, "step": 29562 }, { "epoch": 2.7743055555555554, "grad_norm": 1.1092282413125678, "learning_rate": 1.7156223502875157e-07, "loss": 0.3166, "step": 29563 }, { "epoch": 2.7743993993993996, "grad_norm": 1.1489722394352313, "learning_rate": 1.7142047302825094e-07, "loss": 0.3288, "step": 29564 }, { "epoch": 2.774493243243243, "grad_norm": 1.219962305594971, "learning_rate": 1.7127876859947212e-07, "loss": 0.3374, "step": 29565 }, { "epoch": 2.7745870870870872, "grad_norm": 1.183970146373802, "learning_rate": 1.711371217441049e-07, "loss": 0.3325, "step": 29566 }, { "epoch": 2.774680930930931, "grad_norm": 1.0632193314589424, "learning_rate": 1.7099553246383794e-07, "loss": 0.2758, "step": 29567 }, { "epoch": 2.774774774774775, "grad_norm": 1.067925051122314, "learning_rate": 1.708540007603593e-07, "loss": 0.3281, "step": 29568 }, { "epoch": 2.7748686186186187, "grad_norm": 1.0711180687598065, "learning_rate": 1.707125266353571e-07, "loss": 0.3326, "step": 29569 }, { "epoch": 2.7749624624624625, "grad_norm": 0.9839516936928965, "learning_rate": 1.705711100905183e-07, "loss": 0.2979, "step": 29570 }, { "epoch": 2.7750563063063063, "grad_norm": 1.153740718248161, "learning_rate": 1.7042975112752713e-07, "loss": 0.3007, "step": 29571 }, { "epoch": 2.77515015015015, "grad_norm": 1.2187481058633995, "learning_rate": 1.702884497480717e-07, "loss": 0.3358, "step": 29572 }, { "epoch": 2.775243993993994, "grad_norm": 1.1521613039924585, "learning_rate": 1.701472059538356e-07, "loss": 0.34, "step": 29573 }, { "epoch": 2.7753378378378377, "grad_norm": 1.170710839807351, "learning_rate": 1.70006019746502e-07, "loss": 0.3332, "step": 29574 }, { "epoch": 2.7754316816816815, "grad_norm": 1.1800472823597543, "learning_rate": 1.6986489112775506e-07, "loss": 0.2894, "step": 29575 }, { "epoch": 2.7755255255255253, "grad_norm": 0.9588421469158204, "learning_rate": 1.6972382009927846e-07, "loss": 0.2889, "step": 29576 }, { "epoch": 2.7756193693693696, "grad_norm": 1.0710831776458571, "learning_rate": 1.6958280666275251e-07, "loss": 0.316, "step": 29577 }, { "epoch": 2.775713213213213, "grad_norm": 1.1769176602282287, "learning_rate": 1.6944185081985976e-07, "loss": 0.3191, "step": 29578 }, { "epoch": 2.775807057057057, "grad_norm": 0.9869210962620006, "learning_rate": 1.6930095257227997e-07, "loss": 0.2858, "step": 29579 }, { "epoch": 2.775900900900901, "grad_norm": 1.031768912509125, "learning_rate": 1.6916011192169347e-07, "loss": 0.3043, "step": 29580 }, { "epoch": 2.775994744744745, "grad_norm": 1.1073482096338443, "learning_rate": 1.6901932886977945e-07, "loss": 0.3259, "step": 29581 }, { "epoch": 2.7760885885885886, "grad_norm": 1.1779303916654378, "learning_rate": 1.688786034182166e-07, "loss": 0.3503, "step": 29582 }, { "epoch": 2.7761824324324325, "grad_norm": 1.0274632475393004, "learning_rate": 1.6873793556868245e-07, "loss": 0.3541, "step": 29583 }, { "epoch": 2.7762762762762763, "grad_norm": 1.1399102536532133, "learning_rate": 1.685973253228551e-07, "loss": 0.2833, "step": 29584 }, { "epoch": 2.77637012012012, "grad_norm": 1.225424070292987, "learning_rate": 1.6845677268241046e-07, "loss": 0.3107, "step": 29585 }, { "epoch": 2.776463963963964, "grad_norm": 1.2025597484836632, "learning_rate": 1.683162776490238e-07, "loss": 0.3211, "step": 29586 }, { "epoch": 2.7765578078078077, "grad_norm": 1.0778548340389045, "learning_rate": 1.6817584022437105e-07, "loss": 0.3025, "step": 29587 }, { "epoch": 2.7766516516516515, "grad_norm": 0.9625292864875922, "learning_rate": 1.6803546041012698e-07, "loss": 0.2613, "step": 29588 }, { "epoch": 2.7767454954954953, "grad_norm": 1.5257429998475083, "learning_rate": 1.678951382079641e-07, "loss": 0.2589, "step": 29589 }, { "epoch": 2.7768393393393396, "grad_norm": 1.1485115862452326, "learning_rate": 1.6775487361955666e-07, "loss": 0.3004, "step": 29590 }, { "epoch": 2.776933183183183, "grad_norm": 1.3977993293583189, "learning_rate": 1.6761466664657666e-07, "loss": 0.3203, "step": 29591 }, { "epoch": 2.777027027027027, "grad_norm": 1.1936473328289843, "learning_rate": 1.6747451729069496e-07, "loss": 0.3124, "step": 29592 }, { "epoch": 2.777120870870871, "grad_norm": 1.1124784821898124, "learning_rate": 1.6733442555358414e-07, "loss": 0.264, "step": 29593 }, { "epoch": 2.777214714714715, "grad_norm": 1.2403945031199417, "learning_rate": 1.671943914369134e-07, "loss": 0.3171, "step": 29594 }, { "epoch": 2.7773085585585586, "grad_norm": 1.4864655610383049, "learning_rate": 1.6705441494235253e-07, "loss": 0.3195, "step": 29595 }, { "epoch": 2.7774024024024024, "grad_norm": 2.918903617465059, "learning_rate": 1.6691449607157072e-07, "loss": 0.3145, "step": 29596 }, { "epoch": 2.7774962462462462, "grad_norm": 1.1476838552495072, "learning_rate": 1.6677463482623668e-07, "loss": 0.2929, "step": 29597 }, { "epoch": 2.77759009009009, "grad_norm": 1.2005590761781884, "learning_rate": 1.6663483120801682e-07, "loss": 0.3212, "step": 29598 }, { "epoch": 2.777683933933934, "grad_norm": 1.677376672709943, "learning_rate": 1.6649508521857926e-07, "loss": 0.2988, "step": 29599 }, { "epoch": 2.7777777777777777, "grad_norm": 1.1156089543458347, "learning_rate": 1.663553968595899e-07, "loss": 0.3215, "step": 29600 }, { "epoch": 2.7778716216216215, "grad_norm": 1.131497008504706, "learning_rate": 1.662157661327135e-07, "loss": 0.352, "step": 29601 }, { "epoch": 2.7779654654654653, "grad_norm": 1.2393525415689142, "learning_rate": 1.6607619303961597e-07, "loss": 0.3566, "step": 29602 }, { "epoch": 2.7780593093093096, "grad_norm": 1.0258570209233668, "learning_rate": 1.6593667758196042e-07, "loss": 0.3168, "step": 29603 }, { "epoch": 2.778153153153153, "grad_norm": 1.4646358906201393, "learning_rate": 1.657972197614105e-07, "loss": 0.3025, "step": 29604 }, { "epoch": 2.778246996996997, "grad_norm": 0.9436557625439941, "learning_rate": 1.656578195796299e-07, "loss": 0.3116, "step": 29605 }, { "epoch": 2.778340840840841, "grad_norm": 1.4346704101678298, "learning_rate": 1.6551847703828006e-07, "loss": 0.3378, "step": 29606 }, { "epoch": 2.778434684684685, "grad_norm": 0.9521387703003733, "learning_rate": 1.6537919213902187e-07, "loss": 0.2955, "step": 29607 }, { "epoch": 2.7785285285285286, "grad_norm": 1.0762146906375651, "learning_rate": 1.652399648835168e-07, "loss": 0.3004, "step": 29608 }, { "epoch": 2.7786223723723724, "grad_norm": 1.1137307942970147, "learning_rate": 1.6510079527342516e-07, "loss": 0.3375, "step": 29609 }, { "epoch": 2.7787162162162162, "grad_norm": 1.7493121714804647, "learning_rate": 1.6496168331040508e-07, "loss": 0.3328, "step": 29610 }, { "epoch": 2.77881006006006, "grad_norm": 1.1613389707238937, "learning_rate": 1.6482262899611635e-07, "loss": 0.2781, "step": 29611 }, { "epoch": 2.778903903903904, "grad_norm": 1.150371666632404, "learning_rate": 1.6468363233221652e-07, "loss": 0.33, "step": 29612 }, { "epoch": 2.7789977477477477, "grad_norm": 1.0353107930723828, "learning_rate": 1.6454469332036204e-07, "loss": 0.2784, "step": 29613 }, { "epoch": 2.7790915915915915, "grad_norm": 1.094192115202808, "learning_rate": 1.6440581196221106e-07, "loss": 0.3022, "step": 29614 }, { "epoch": 2.7791854354354353, "grad_norm": 1.5064550728554356, "learning_rate": 1.6426698825941944e-07, "loss": 0.3221, "step": 29615 }, { "epoch": 2.7792792792792795, "grad_norm": 1.3374494197786029, "learning_rate": 1.6412822221364033e-07, "loss": 0.3084, "step": 29616 }, { "epoch": 2.779373123123123, "grad_norm": 0.9505605357321357, "learning_rate": 1.6398951382653018e-07, "loss": 0.3184, "step": 29617 }, { "epoch": 2.779466966966967, "grad_norm": 1.03487279599878, "learning_rate": 1.6385086309974208e-07, "loss": 0.3275, "step": 29618 }, { "epoch": 2.779560810810811, "grad_norm": 1.084826881226315, "learning_rate": 1.6371227003492862e-07, "loss": 0.3477, "step": 29619 }, { "epoch": 2.7796546546546548, "grad_norm": 1.2146805247848689, "learning_rate": 1.6357373463374404e-07, "loss": 0.3524, "step": 29620 }, { "epoch": 2.7797484984984986, "grad_norm": 5.312013559838192, "learning_rate": 1.6343525689783868e-07, "loss": 0.2954, "step": 29621 }, { "epoch": 2.7798423423423424, "grad_norm": 1.0774932237604884, "learning_rate": 1.6329683682886343e-07, "loss": 0.3244, "step": 29622 }, { "epoch": 2.779936186186186, "grad_norm": 1.8884595484938247, "learning_rate": 1.6315847442846977e-07, "loss": 0.3103, "step": 29623 }, { "epoch": 2.78003003003003, "grad_norm": 1.3116410398785308, "learning_rate": 1.6302016969830692e-07, "loss": 0.3025, "step": 29624 }, { "epoch": 2.780123873873874, "grad_norm": 1.1577873268809928, "learning_rate": 1.6288192264002357e-07, "loss": 0.2981, "step": 29625 }, { "epoch": 2.7802177177177176, "grad_norm": 1.0733872737703383, "learning_rate": 1.6274373325526895e-07, "loss": 0.3235, "step": 29626 }, { "epoch": 2.7803115615615615, "grad_norm": 1.2444273022023085, "learning_rate": 1.626056015456895e-07, "loss": 0.351, "step": 29627 }, { "epoch": 2.7804054054054053, "grad_norm": 1.198150462352154, "learning_rate": 1.6246752751293338e-07, "loss": 0.3003, "step": 29628 }, { "epoch": 2.7804992492492495, "grad_norm": 1.124860409314397, "learning_rate": 1.6232951115864593e-07, "loss": 0.2957, "step": 29629 }, { "epoch": 2.780593093093093, "grad_norm": 0.9859552444951533, "learning_rate": 1.6219155248447361e-07, "loss": 0.3345, "step": 29630 }, { "epoch": 2.780686936936937, "grad_norm": 1.22622381362541, "learning_rate": 1.6205365149205953e-07, "loss": 0.3222, "step": 29631 }, { "epoch": 2.7807807807807805, "grad_norm": 0.9651578673338284, "learning_rate": 1.619158081830502e-07, "loss": 0.3047, "step": 29632 }, { "epoch": 2.7808746246246248, "grad_norm": 1.325590801776612, "learning_rate": 1.6177802255908815e-07, "loss": 0.3056, "step": 29633 }, { "epoch": 2.7809684684684686, "grad_norm": 1.7006042149449148, "learning_rate": 1.616402946218154e-07, "loss": 0.3386, "step": 29634 }, { "epoch": 2.7810623123123124, "grad_norm": 1.2934142527673458, "learning_rate": 1.6150262437287513e-07, "loss": 0.3003, "step": 29635 }, { "epoch": 2.781156156156156, "grad_norm": 1.2895903716808141, "learning_rate": 1.6136501181390872e-07, "loss": 0.3037, "step": 29636 }, { "epoch": 2.78125, "grad_norm": 1.1313266388118763, "learning_rate": 1.6122745694655662e-07, "loss": 0.29, "step": 29637 }, { "epoch": 2.781343843843844, "grad_norm": 1.1361520307499984, "learning_rate": 1.610899597724591e-07, "loss": 0.2923, "step": 29638 }, { "epoch": 2.7814376876876876, "grad_norm": 1.0385089584443195, "learning_rate": 1.6095252029325546e-07, "loss": 0.295, "step": 29639 }, { "epoch": 2.7815315315315314, "grad_norm": 1.2063367909340832, "learning_rate": 1.6081513851058495e-07, "loss": 0.3208, "step": 29640 }, { "epoch": 2.7816253753753752, "grad_norm": 1.161391951856904, "learning_rate": 1.6067781442608454e-07, "loss": 0.2792, "step": 29641 }, { "epoch": 2.7817192192192195, "grad_norm": 1.3508398171870102, "learning_rate": 1.6054054804139242e-07, "loss": 0.3175, "step": 29642 }, { "epoch": 2.781813063063063, "grad_norm": 1.2448960350210256, "learning_rate": 1.6040333935814445e-07, "loss": 0.3249, "step": 29643 }, { "epoch": 2.781906906906907, "grad_norm": 1.0853951945755675, "learning_rate": 1.6026618837797713e-07, "loss": 0.2595, "step": 29644 }, { "epoch": 2.7820007507507505, "grad_norm": 1.2231373792348565, "learning_rate": 1.601290951025264e-07, "loss": 0.3284, "step": 29645 }, { "epoch": 2.7820945945945947, "grad_norm": 1.2017264648694288, "learning_rate": 1.599920595334248e-07, "loss": 0.3484, "step": 29646 }, { "epoch": 2.7821884384384385, "grad_norm": 1.69024067782429, "learning_rate": 1.598550816723088e-07, "loss": 0.3036, "step": 29647 }, { "epoch": 2.7822822822822824, "grad_norm": 1.2255909678856254, "learning_rate": 1.5971816152080988e-07, "loss": 0.3442, "step": 29648 }, { "epoch": 2.782376126126126, "grad_norm": 3.3259243162867733, "learning_rate": 1.595812990805601e-07, "loss": 0.3414, "step": 29649 }, { "epoch": 2.78246996996997, "grad_norm": 1.404215258861291, "learning_rate": 1.594444943531931e-07, "loss": 0.3501, "step": 29650 }, { "epoch": 2.782563813813814, "grad_norm": 1.0904500521499805, "learning_rate": 1.5930774734033926e-07, "loss": 0.301, "step": 29651 }, { "epoch": 2.7826576576576576, "grad_norm": 1.0413665351694035, "learning_rate": 1.5917105804362843e-07, "loss": 0.3, "step": 29652 }, { "epoch": 2.7827515015015014, "grad_norm": 1.48099365655994, "learning_rate": 1.5903442646469146e-07, "loss": 0.3211, "step": 29653 }, { "epoch": 2.7828453453453452, "grad_norm": 1.2581782880489898, "learning_rate": 1.58897852605156e-07, "loss": 0.266, "step": 29654 }, { "epoch": 2.782939189189189, "grad_norm": 1.1846384991411043, "learning_rate": 1.5876133646665126e-07, "loss": 0.3271, "step": 29655 }, { "epoch": 2.783033033033033, "grad_norm": 1.0044022241026633, "learning_rate": 1.586248780508054e-07, "loss": 0.3116, "step": 29656 }, { "epoch": 2.783126876876877, "grad_norm": 1.1683589431803956, "learning_rate": 1.5848847735924432e-07, "loss": 0.2769, "step": 29657 }, { "epoch": 2.7832207207207205, "grad_norm": 1.314617610885643, "learning_rate": 1.5835213439359508e-07, "loss": 0.2869, "step": 29658 }, { "epoch": 2.7833145645645647, "grad_norm": 1.4643681332171536, "learning_rate": 1.5821584915548304e-07, "loss": 0.2865, "step": 29659 }, { "epoch": 2.7834084084084085, "grad_norm": 1.0220457181063451, "learning_rate": 1.5807962164653413e-07, "loss": 0.3203, "step": 29660 }, { "epoch": 2.7835022522522523, "grad_norm": 1.073409369820198, "learning_rate": 1.5794345186837034e-07, "loss": 0.2773, "step": 29661 }, { "epoch": 2.783596096096096, "grad_norm": 1.0056257472649162, "learning_rate": 1.5780733982261764e-07, "loss": 0.3059, "step": 29662 }, { "epoch": 2.78368993993994, "grad_norm": 1.0641417133775952, "learning_rate": 1.5767128551089804e-07, "loss": 0.3118, "step": 29663 }, { "epoch": 2.7837837837837838, "grad_norm": 1.2647362580545172, "learning_rate": 1.575352889348336e-07, "loss": 0.3001, "step": 29664 }, { "epoch": 2.7838776276276276, "grad_norm": 1.1151109908309402, "learning_rate": 1.573993500960458e-07, "loss": 0.3168, "step": 29665 }, { "epoch": 2.7839714714714714, "grad_norm": 1.446621142430198, "learning_rate": 1.5726346899615553e-07, "loss": 0.2871, "step": 29666 }, { "epoch": 2.784065315315315, "grad_norm": 1.3305282134635525, "learning_rate": 1.5712764563678207e-07, "loss": 0.2938, "step": 29667 }, { "epoch": 2.784159159159159, "grad_norm": 1.2234536798486653, "learning_rate": 1.5699188001954634e-07, "loss": 0.2866, "step": 29668 }, { "epoch": 2.784253003003003, "grad_norm": 1.233390614732585, "learning_rate": 1.5685617214606652e-07, "loss": 0.3382, "step": 29669 }, { "epoch": 2.784346846846847, "grad_norm": 1.106391418956922, "learning_rate": 1.5672052201795962e-07, "loss": 0.3339, "step": 29670 }, { "epoch": 2.7844406906906904, "grad_norm": 1.2522639085998624, "learning_rate": 1.565849296368449e-07, "loss": 0.2565, "step": 29671 }, { "epoch": 2.7845345345345347, "grad_norm": 1.3869448375960345, "learning_rate": 1.5644939500433777e-07, "loss": 0.3232, "step": 29672 }, { "epoch": 2.7846283783783785, "grad_norm": 1.1409536367210846, "learning_rate": 1.5631391812205464e-07, "loss": 0.2884, "step": 29673 }, { "epoch": 2.7847222222222223, "grad_norm": 1.092264095593974, "learning_rate": 1.5617849899161042e-07, "loss": 0.3051, "step": 29674 }, { "epoch": 2.784816066066066, "grad_norm": 1.415122428295141, "learning_rate": 1.56043137614621e-07, "loss": 0.2908, "step": 29675 }, { "epoch": 2.78490990990991, "grad_norm": 1.1049729633672158, "learning_rate": 1.5590783399269892e-07, "loss": 0.3098, "step": 29676 }, { "epoch": 2.7850037537537538, "grad_norm": 0.9553108022361287, "learning_rate": 1.5577258812745743e-07, "loss": 0.313, "step": 29677 }, { "epoch": 2.7850975975975976, "grad_norm": 1.2855876459397024, "learning_rate": 1.556374000205102e-07, "loss": 0.3395, "step": 29678 }, { "epoch": 2.7851914414414414, "grad_norm": 1.198795398460127, "learning_rate": 1.5550226967346704e-07, "loss": 0.2935, "step": 29679 }, { "epoch": 2.785285285285285, "grad_norm": 1.4049435859395405, "learning_rate": 1.5536719708794168e-07, "loss": 0.3229, "step": 29680 }, { "epoch": 2.785379129129129, "grad_norm": 1.2583546114168644, "learning_rate": 1.5523218226554337e-07, "loss": 0.2807, "step": 29681 }, { "epoch": 2.785472972972973, "grad_norm": 1.0965241303294317, "learning_rate": 1.550972252078814e-07, "loss": 0.318, "step": 29682 }, { "epoch": 2.785566816816817, "grad_norm": 1.1229628308768167, "learning_rate": 1.5496232591656556e-07, "loss": 0.3171, "step": 29683 }, { "epoch": 2.7856606606606604, "grad_norm": 2.9793515837554874, "learning_rate": 1.548274843932046e-07, "loss": 0.2985, "step": 29684 }, { "epoch": 2.7857545045045047, "grad_norm": 1.1458241788167567, "learning_rate": 1.54692700639405e-07, "loss": 0.3149, "step": 29685 }, { "epoch": 2.7858483483483485, "grad_norm": 1.1660536460480462, "learning_rate": 1.5455797465677546e-07, "loss": 0.3083, "step": 29686 }, { "epoch": 2.7859421921921923, "grad_norm": 1.0253065775720072, "learning_rate": 1.5442330644692082e-07, "loss": 0.301, "step": 29687 }, { "epoch": 2.786036036036036, "grad_norm": 1.2290873849414579, "learning_rate": 1.5428869601144815e-07, "loss": 0.3213, "step": 29688 }, { "epoch": 2.78612987987988, "grad_norm": 1.0909865871702684, "learning_rate": 1.541541433519611e-07, "loss": 0.315, "step": 29689 }, { "epoch": 2.7862237237237237, "grad_norm": 1.18068519075551, "learning_rate": 1.5401964847006455e-07, "loss": 0.3128, "step": 29690 }, { "epoch": 2.7863175675675675, "grad_norm": 1.0136433105005869, "learning_rate": 1.5388521136736224e-07, "loss": 0.318, "step": 29691 }, { "epoch": 2.7864114114114114, "grad_norm": 1.3227452704508131, "learning_rate": 1.5375083204545672e-07, "loss": 0.3222, "step": 29692 }, { "epoch": 2.786505255255255, "grad_norm": 1.1199871914644208, "learning_rate": 1.536165105059506e-07, "loss": 0.3115, "step": 29693 }, { "epoch": 2.786599099099099, "grad_norm": 1.3824697267498063, "learning_rate": 1.5348224675044487e-07, "loss": 0.2824, "step": 29694 }, { "epoch": 2.786692942942943, "grad_norm": 1.1004828018797175, "learning_rate": 1.5334804078054098e-07, "loss": 0.2546, "step": 29695 }, { "epoch": 2.786786786786787, "grad_norm": 1.0948723297767435, "learning_rate": 1.532138925978388e-07, "loss": 0.338, "step": 29696 }, { "epoch": 2.7868806306306304, "grad_norm": 1.0668965016192613, "learning_rate": 1.530798022039376e-07, "loss": 0.3161, "step": 29697 }, { "epoch": 2.7869744744744747, "grad_norm": 1.0924148565889402, "learning_rate": 1.5294576960043662e-07, "loss": 0.318, "step": 29698 }, { "epoch": 2.7870683183183185, "grad_norm": 1.1680009634885187, "learning_rate": 1.5281179478893347e-07, "loss": 0.2934, "step": 29699 }, { "epoch": 2.7871621621621623, "grad_norm": 1.2151589036459687, "learning_rate": 1.526778777710264e-07, "loss": 0.362, "step": 29700 }, { "epoch": 2.787256006006006, "grad_norm": 1.3467784748054221, "learning_rate": 1.525440185483107e-07, "loss": 0.3285, "step": 29701 }, { "epoch": 2.78734984984985, "grad_norm": 1.183644608148105, "learning_rate": 1.5241021712238402e-07, "loss": 0.2953, "step": 29702 }, { "epoch": 2.7874436936936937, "grad_norm": 1.1957065472775525, "learning_rate": 1.5227647349483953e-07, "loss": 0.3143, "step": 29703 }, { "epoch": 2.7875375375375375, "grad_norm": 0.955125331632398, "learning_rate": 1.521427876672743e-07, "loss": 0.2907, "step": 29704 }, { "epoch": 2.7876313813813813, "grad_norm": 1.0447604812674012, "learning_rate": 1.5200915964128093e-07, "loss": 0.3234, "step": 29705 }, { "epoch": 2.787725225225225, "grad_norm": 1.023384301770074, "learning_rate": 1.518755894184526e-07, "loss": 0.3302, "step": 29706 }, { "epoch": 2.787819069069069, "grad_norm": 1.1240912642117686, "learning_rate": 1.5174207700038246e-07, "loss": 0.2942, "step": 29707 }, { "epoch": 2.7879129129129128, "grad_norm": 1.057661505547605, "learning_rate": 1.5160862238866257e-07, "loss": 0.2782, "step": 29708 }, { "epoch": 2.788006756756757, "grad_norm": 1.0774246401610328, "learning_rate": 1.514752255848828e-07, "loss": 0.2884, "step": 29709 }, { "epoch": 2.7881006006006004, "grad_norm": 1.0656373583956817, "learning_rate": 1.513418865906352e-07, "loss": 0.3269, "step": 29710 }, { "epoch": 2.7881944444444446, "grad_norm": 1.5980476259343717, "learning_rate": 1.5120860540750904e-07, "loss": 0.3189, "step": 29711 }, { "epoch": 2.7882882882882885, "grad_norm": 1.1400675170805736, "learning_rate": 1.5107538203709303e-07, "loss": 0.31, "step": 29712 }, { "epoch": 2.7883821321321323, "grad_norm": 1.233323929954233, "learning_rate": 1.5094221648097595e-07, "loss": 0.3056, "step": 29713 }, { "epoch": 2.788475975975976, "grad_norm": 1.2988329950512862, "learning_rate": 1.5080910874074593e-07, "loss": 0.3275, "step": 29714 }, { "epoch": 2.78856981981982, "grad_norm": 1.0602681239337577, "learning_rate": 1.5067605881798897e-07, "loss": 0.3085, "step": 29715 }, { "epoch": 2.7886636636636637, "grad_norm": 1.044931875629075, "learning_rate": 1.5054306671429264e-07, "loss": 0.2784, "step": 29716 }, { "epoch": 2.7887575075075075, "grad_norm": 0.9827376426501392, "learning_rate": 1.504101324312418e-07, "loss": 0.2588, "step": 29717 }, { "epoch": 2.7888513513513513, "grad_norm": 1.031932283599824, "learning_rate": 1.502772559704213e-07, "loss": 0.3156, "step": 29718 }, { "epoch": 2.788945195195195, "grad_norm": 1.1893153685422453, "learning_rate": 1.5014443733341654e-07, "loss": 0.3075, "step": 29719 }, { "epoch": 2.789039039039039, "grad_norm": 1.0712908510908705, "learning_rate": 1.5001167652181014e-07, "loss": 0.2558, "step": 29720 }, { "epoch": 2.7891328828828827, "grad_norm": 1.3227459506292198, "learning_rate": 1.4987897353718527e-07, "loss": 0.3393, "step": 29721 }, { "epoch": 2.789226726726727, "grad_norm": 1.1282832305991948, "learning_rate": 1.4974632838112456e-07, "loss": 0.3232, "step": 29722 }, { "epoch": 2.7893205705705704, "grad_norm": 1.094487093019426, "learning_rate": 1.49613741055209e-07, "loss": 0.293, "step": 29723 }, { "epoch": 2.7894144144144146, "grad_norm": 1.0590013422996214, "learning_rate": 1.4948121156102003e-07, "loss": 0.2903, "step": 29724 }, { "epoch": 2.789508258258258, "grad_norm": 1.3185215101891479, "learning_rate": 1.49348739900137e-07, "loss": 0.3342, "step": 29725 }, { "epoch": 2.7896021021021022, "grad_norm": 1.1729286371938332, "learning_rate": 1.4921632607414026e-07, "loss": 0.3296, "step": 29726 }, { "epoch": 2.789695945945946, "grad_norm": 0.9446920197005872, "learning_rate": 1.490839700846075e-07, "loss": 0.3074, "step": 29727 }, { "epoch": 2.78978978978979, "grad_norm": 1.5213770067963066, "learning_rate": 1.4895167193311798e-07, "loss": 0.2932, "step": 29728 }, { "epoch": 2.7898836336336337, "grad_norm": 1.0917977903585154, "learning_rate": 1.4881943162124822e-07, "loss": 0.3071, "step": 29729 }, { "epoch": 2.7899774774774775, "grad_norm": 1.2640956133136054, "learning_rate": 1.4868724915057532e-07, "loss": 0.3125, "step": 29730 }, { "epoch": 2.7900713213213213, "grad_norm": 1.1510405669951056, "learning_rate": 1.4855512452267573e-07, "loss": 0.3382, "step": 29731 }, { "epoch": 2.790165165165165, "grad_norm": 1.2525503176407862, "learning_rate": 1.4842305773912436e-07, "loss": 0.3484, "step": 29732 }, { "epoch": 2.790259009009009, "grad_norm": 1.6953016844123407, "learning_rate": 1.4829104880149548e-07, "loss": 0.3257, "step": 29733 }, { "epoch": 2.7903528528528527, "grad_norm": 1.24774264549971, "learning_rate": 1.4815909771136395e-07, "loss": 0.344, "step": 29734 }, { "epoch": 2.7904466966966965, "grad_norm": 1.2055030861791167, "learning_rate": 1.480272044703024e-07, "loss": 0.3119, "step": 29735 }, { "epoch": 2.7905405405405403, "grad_norm": 1.0846901450674102, "learning_rate": 1.478953690798829e-07, "loss": 0.3164, "step": 29736 }, { "epoch": 2.7906343843843846, "grad_norm": 1.0426583490388739, "learning_rate": 1.4776359154167863e-07, "loss": 0.3205, "step": 29737 }, { "epoch": 2.790728228228228, "grad_norm": 1.1694492179896145, "learning_rate": 1.4763187185726057e-07, "loss": 0.3338, "step": 29738 }, { "epoch": 2.7908220720720722, "grad_norm": 1.190936144263934, "learning_rate": 1.4750021002819747e-07, "loss": 0.3105, "step": 29739 }, { "epoch": 2.790915915915916, "grad_norm": 0.9925008215335933, "learning_rate": 1.473686060560614e-07, "loss": 0.2873, "step": 29740 }, { "epoch": 2.79100975975976, "grad_norm": 1.0828710767507586, "learning_rate": 1.4723705994242054e-07, "loss": 0.3072, "step": 29741 }, { "epoch": 2.7911036036036037, "grad_norm": 1.4867083298755845, "learning_rate": 1.4710557168884309e-07, "loss": 0.3357, "step": 29742 }, { "epoch": 2.7911974474474475, "grad_norm": 0.9668182913898044, "learning_rate": 1.4697414129689724e-07, "loss": 0.3198, "step": 29743 }, { "epoch": 2.7912912912912913, "grad_norm": 1.117019646132621, "learning_rate": 1.4684276876815006e-07, "loss": 0.2974, "step": 29744 }, { "epoch": 2.791385135135135, "grad_norm": 1.0988393262626817, "learning_rate": 1.4671145410416698e-07, "loss": 0.3126, "step": 29745 }, { "epoch": 2.791478978978979, "grad_norm": 1.2775525391518368, "learning_rate": 1.4658019730651508e-07, "loss": 0.3132, "step": 29746 }, { "epoch": 2.7915728228228227, "grad_norm": 1.0256635829599525, "learning_rate": 1.4644899837675862e-07, "loss": 0.311, "step": 29747 }, { "epoch": 2.7916666666666665, "grad_norm": 1.1221601058856938, "learning_rate": 1.4631785731646142e-07, "loss": 0.249, "step": 29748 }, { "epoch": 2.7917605105105103, "grad_norm": 0.9892696802261399, "learning_rate": 1.4618677412718828e-07, "loss": 0.3504, "step": 29749 }, { "epoch": 2.7918543543543546, "grad_norm": 1.4158360968021682, "learning_rate": 1.4605574881050132e-07, "loss": 0.3318, "step": 29750 }, { "epoch": 2.791948198198198, "grad_norm": 1.228896525573167, "learning_rate": 1.459247813679632e-07, "loss": 0.3163, "step": 29751 }, { "epoch": 2.792042042042042, "grad_norm": 1.0401491459920578, "learning_rate": 1.4579387180113535e-07, "loss": 0.37, "step": 29752 }, { "epoch": 2.792135885885886, "grad_norm": 1.0204061534149522, "learning_rate": 1.456630201115783e-07, "loss": 0.3197, "step": 29753 }, { "epoch": 2.79222972972973, "grad_norm": 1.2063232373101231, "learning_rate": 1.4553222630085184e-07, "loss": 0.3201, "step": 29754 }, { "epoch": 2.7923235735735736, "grad_norm": 1.2558502521222443, "learning_rate": 1.4540149037051643e-07, "loss": 0.3123, "step": 29755 }, { "epoch": 2.7924174174174174, "grad_norm": 1.2510132182678635, "learning_rate": 1.4527081232213024e-07, "loss": 0.282, "step": 29756 }, { "epoch": 2.7925112612612613, "grad_norm": 1.2103868786380427, "learning_rate": 1.451401921572515e-07, "loss": 0.3429, "step": 29757 }, { "epoch": 2.792605105105105, "grad_norm": 1.109673612997323, "learning_rate": 1.4500962987743783e-07, "loss": 0.3135, "step": 29758 }, { "epoch": 2.792698948948949, "grad_norm": 0.9799390710170971, "learning_rate": 1.4487912548424576e-07, "loss": 0.2812, "step": 29759 }, { "epoch": 2.7927927927927927, "grad_norm": 1.0231690985799515, "learning_rate": 1.4474867897923072e-07, "loss": 0.3081, "step": 29760 }, { "epoch": 2.7928866366366365, "grad_norm": 1.0708359934323837, "learning_rate": 1.4461829036394925e-07, "loss": 0.3029, "step": 29761 }, { "epoch": 2.7929804804804803, "grad_norm": 1.2593235620977696, "learning_rate": 1.444879596399551e-07, "loss": 0.338, "step": 29762 }, { "epoch": 2.7930743243243246, "grad_norm": 1.359985446222133, "learning_rate": 1.4435768680880312e-07, "loss": 0.3119, "step": 29763 }, { "epoch": 2.793168168168168, "grad_norm": 1.0839033494756334, "learning_rate": 1.4422747187204543e-07, "loss": 0.2959, "step": 29764 }, { "epoch": 2.793262012012012, "grad_norm": 1.1446028406999669, "learning_rate": 1.4409731483123467e-07, "loss": 0.2861, "step": 29765 }, { "epoch": 2.793355855855856, "grad_norm": 1.2398292019335813, "learning_rate": 1.439672156879235e-07, "loss": 0.304, "step": 29766 }, { "epoch": 2.7934496996997, "grad_norm": 1.1228573218094549, "learning_rate": 1.4383717444366285e-07, "loss": 0.3018, "step": 29767 }, { "epoch": 2.7935435435435436, "grad_norm": 1.5224357707241, "learning_rate": 1.437071911000032e-07, "loss": 0.2923, "step": 29768 }, { "epoch": 2.7936373873873874, "grad_norm": 1.1443261021066544, "learning_rate": 1.4357726565849328e-07, "loss": 0.3192, "step": 29769 }, { "epoch": 2.7937312312312312, "grad_norm": 1.6745677849836171, "learning_rate": 1.4344739812068408e-07, "loss": 0.3088, "step": 29770 }, { "epoch": 2.793825075075075, "grad_norm": 1.1932907732817166, "learning_rate": 1.4331758848812328e-07, "loss": 0.3287, "step": 29771 }, { "epoch": 2.793918918918919, "grad_norm": 1.0131384729743935, "learning_rate": 1.4318783676235793e-07, "loss": 0.3441, "step": 29772 }, { "epoch": 2.7940127627627627, "grad_norm": 1.3435206817186893, "learning_rate": 1.4305814294493568e-07, "loss": 0.2944, "step": 29773 }, { "epoch": 2.7941066066066065, "grad_norm": 1.1233926848161395, "learning_rate": 1.4292850703740313e-07, "loss": 0.2618, "step": 29774 }, { "epoch": 2.7942004504504503, "grad_norm": 1.1180092001403574, "learning_rate": 1.4279892904130566e-07, "loss": 0.3122, "step": 29775 }, { "epoch": 2.7942942942942945, "grad_norm": 1.102965454131048, "learning_rate": 1.426694089581887e-07, "loss": 0.29, "step": 29776 }, { "epoch": 2.794388138138138, "grad_norm": 2.0525456467402554, "learning_rate": 1.4253994678959548e-07, "loss": 0.3176, "step": 29777 }, { "epoch": 2.794481981981982, "grad_norm": 1.290116595748073, "learning_rate": 1.4241054253706976e-07, "loss": 0.3504, "step": 29778 }, { "epoch": 2.794575825825826, "grad_norm": 1.2583357693883603, "learning_rate": 1.422811962021553e-07, "loss": 0.3463, "step": 29779 }, { "epoch": 2.79466966966967, "grad_norm": 1.3343990934333014, "learning_rate": 1.4215190778639364e-07, "loss": 0.2936, "step": 29780 }, { "epoch": 2.7947635135135136, "grad_norm": 1.0184107856890574, "learning_rate": 1.4202267729132636e-07, "loss": 0.343, "step": 29781 }, { "epoch": 2.7948573573573574, "grad_norm": 1.0694912175012663, "learning_rate": 1.4189350471849494e-07, "loss": 0.2929, "step": 29782 }, { "epoch": 2.794951201201201, "grad_norm": 1.5880941597914435, "learning_rate": 1.4176439006943877e-07, "loss": 0.3269, "step": 29783 }, { "epoch": 2.795045045045045, "grad_norm": 1.1173637288670326, "learning_rate": 1.4163533334569768e-07, "loss": 0.2761, "step": 29784 }, { "epoch": 2.795138888888889, "grad_norm": 1.3823715116808466, "learning_rate": 1.4150633454880992e-07, "loss": 0.2899, "step": 29785 }, { "epoch": 2.7952327327327327, "grad_norm": 1.4347432020988056, "learning_rate": 1.4137739368031477e-07, "loss": 0.3348, "step": 29786 }, { "epoch": 2.7953265765765765, "grad_norm": 1.3242246265331412, "learning_rate": 1.4124851074174828e-07, "loss": 0.3802, "step": 29787 }, { "epoch": 2.7954204204204203, "grad_norm": 1.4080111197000011, "learning_rate": 1.4111968573464806e-07, "loss": 0.3076, "step": 29788 }, { "epoch": 2.7955142642642645, "grad_norm": 1.1512581656790526, "learning_rate": 1.4099091866054902e-07, "loss": 0.3104, "step": 29789 }, { "epoch": 2.795608108108108, "grad_norm": 1.1583856189369122, "learning_rate": 1.4086220952098717e-07, "loss": 0.3419, "step": 29790 }, { "epoch": 2.795701951951952, "grad_norm": 1.098107712769645, "learning_rate": 1.4073355831749736e-07, "loss": 0.3019, "step": 29791 }, { "epoch": 2.795795795795796, "grad_norm": 1.2081154779911967, "learning_rate": 1.4060496505161336e-07, "loss": 0.2715, "step": 29792 }, { "epoch": 2.7958896396396398, "grad_norm": 1.0501222590506978, "learning_rate": 1.4047642972486786e-07, "loss": 0.306, "step": 29793 }, { "epoch": 2.7959834834834836, "grad_norm": 1.1346713150429573, "learning_rate": 1.4034795233879406e-07, "loss": 0.293, "step": 29794 }, { "epoch": 2.7960773273273274, "grad_norm": 1.1489713541620592, "learning_rate": 1.4021953289492407e-07, "loss": 0.2971, "step": 29795 }, { "epoch": 2.796171171171171, "grad_norm": 1.1589700219582764, "learning_rate": 1.4009117139478723e-07, "loss": 0.3638, "step": 29796 }, { "epoch": 2.796265015015015, "grad_norm": 1.136760950103844, "learning_rate": 1.3996286783991675e-07, "loss": 0.3139, "step": 29797 }, { "epoch": 2.796358858858859, "grad_norm": 1.1004763497664094, "learning_rate": 1.398346222318403e-07, "loss": 0.3319, "step": 29798 }, { "epoch": 2.7964527027027026, "grad_norm": 1.120410764330078, "learning_rate": 1.3970643457208778e-07, "loss": 0.3476, "step": 29799 }, { "epoch": 2.7965465465465464, "grad_norm": 1.0438537208564072, "learning_rate": 1.3957830486218793e-07, "loss": 0.2933, "step": 29800 }, { "epoch": 2.7966403903903903, "grad_norm": 1.2630790647778476, "learning_rate": 1.3945023310366735e-07, "loss": 0.2679, "step": 29801 }, { "epoch": 2.7967342342342345, "grad_norm": 1.2218116348637693, "learning_rate": 1.3932221929805367e-07, "loss": 0.3211, "step": 29802 }, { "epoch": 2.796828078078078, "grad_norm": 1.1585330146749355, "learning_rate": 1.3919426344687348e-07, "loss": 0.2785, "step": 29803 }, { "epoch": 2.796921921921922, "grad_norm": 1.1362788080605177, "learning_rate": 1.390663655516522e-07, "loss": 0.3108, "step": 29804 }, { "epoch": 2.7970157657657655, "grad_norm": 1.1494888548614557, "learning_rate": 1.3893852561391418e-07, "loss": 0.3003, "step": 29805 }, { "epoch": 2.7971096096096097, "grad_norm": 1.2376026633092008, "learning_rate": 1.3881074363518488e-07, "loss": 0.3304, "step": 29806 }, { "epoch": 2.7972034534534536, "grad_norm": 1.220080597917223, "learning_rate": 1.386830196169875e-07, "loss": 0.3021, "step": 29807 }, { "epoch": 2.7972972972972974, "grad_norm": 1.474581159427546, "learning_rate": 1.385553535608436e-07, "loss": 0.3368, "step": 29808 }, { "epoch": 2.797391141141141, "grad_norm": 1.1654986262347244, "learning_rate": 1.3842774546827698e-07, "loss": 0.301, "step": 29809 }, { "epoch": 2.797484984984985, "grad_norm": 1.1298350934875607, "learning_rate": 1.3830019534080864e-07, "loss": 0.2963, "step": 29810 }, { "epoch": 2.797578828828829, "grad_norm": 1.6636115263203992, "learning_rate": 1.3817270317995957e-07, "loss": 0.3477, "step": 29811 }, { "epoch": 2.7976726726726726, "grad_norm": 1.2109418507877714, "learning_rate": 1.3804526898724912e-07, "loss": 0.3216, "step": 29812 }, { "epoch": 2.7977665165165164, "grad_norm": 1.1830341601619647, "learning_rate": 1.379178927641972e-07, "loss": 0.3076, "step": 29813 }, { "epoch": 2.7978603603603602, "grad_norm": 1.105906122076521, "learning_rate": 1.3779057451232204e-07, "loss": 0.3109, "step": 29814 }, { "epoch": 2.797954204204204, "grad_norm": 1.3219410755145797, "learning_rate": 1.3766331423314294e-07, "loss": 0.3236, "step": 29815 }, { "epoch": 2.798048048048048, "grad_norm": 1.1293997931107702, "learning_rate": 1.3753611192817595e-07, "loss": 0.3351, "step": 29816 }, { "epoch": 2.798141891891892, "grad_norm": 1.320717588317879, "learning_rate": 1.3740896759893763e-07, "loss": 0.2858, "step": 29817 }, { "epoch": 2.7982357357357355, "grad_norm": 1.0579263255788778, "learning_rate": 1.3728188124694564e-07, "loss": 0.3138, "step": 29818 }, { "epoch": 2.7983295795795797, "grad_norm": 0.9781286650979749, "learning_rate": 1.3715485287371322e-07, "loss": 0.334, "step": 29819 }, { "epoch": 2.7984234234234235, "grad_norm": 1.184743592289676, "learning_rate": 1.3702788248075583e-07, "loss": 0.3522, "step": 29820 }, { "epoch": 2.7985172672672673, "grad_norm": 1.0815335913257766, "learning_rate": 1.369009700695878e-07, "loss": 0.3069, "step": 29821 }, { "epoch": 2.798611111111111, "grad_norm": 1.5065897948074927, "learning_rate": 1.3677411564172182e-07, "loss": 0.3117, "step": 29822 }, { "epoch": 2.798704954954955, "grad_norm": 1.219176063549194, "learning_rate": 1.3664731919867058e-07, "loss": 0.3199, "step": 29823 }, { "epoch": 2.798798798798799, "grad_norm": 1.3712769232674276, "learning_rate": 1.3652058074194564e-07, "loss": 0.2902, "step": 29824 }, { "epoch": 2.7988926426426426, "grad_norm": 1.1902454467147465, "learning_rate": 1.36393900273058e-07, "loss": 0.2848, "step": 29825 }, { "epoch": 2.7989864864864864, "grad_norm": 1.1011967149563269, "learning_rate": 1.362672777935181e-07, "loss": 0.2914, "step": 29826 }, { "epoch": 2.79908033033033, "grad_norm": 1.0399676434281986, "learning_rate": 1.3614071330483593e-07, "loss": 0.2999, "step": 29827 }, { "epoch": 2.799174174174174, "grad_norm": 1.2318141214986, "learning_rate": 1.3601420680852074e-07, "loss": 0.3042, "step": 29828 }, { "epoch": 2.799268018018018, "grad_norm": 1.343530342682389, "learning_rate": 1.3588775830608025e-07, "loss": 0.3319, "step": 29829 }, { "epoch": 2.799361861861862, "grad_norm": 1.0092592135963414, "learning_rate": 1.3576136779902328e-07, "loss": 0.3228, "step": 29830 }, { "epoch": 2.7994557057057055, "grad_norm": 1.1291126562433003, "learning_rate": 1.3563503528885523e-07, "loss": 0.3152, "step": 29831 }, { "epoch": 2.7995495495495497, "grad_norm": 1.4847048045429656, "learning_rate": 1.355087607770833e-07, "loss": 0.3093, "step": 29832 }, { "epoch": 2.7996433933933935, "grad_norm": 1.1702562492602797, "learning_rate": 1.353825442652129e-07, "loss": 0.3461, "step": 29833 }, { "epoch": 2.7997372372372373, "grad_norm": 1.002944045086082, "learning_rate": 1.3525638575474953e-07, "loss": 0.3108, "step": 29834 }, { "epoch": 2.799831081081081, "grad_norm": 1.2886728139455847, "learning_rate": 1.351302852471964e-07, "loss": 0.2786, "step": 29835 }, { "epoch": 2.799924924924925, "grad_norm": 1.205994444748867, "learning_rate": 1.3500424274405787e-07, "loss": 0.335, "step": 29836 }, { "epoch": 2.8000187687687688, "grad_norm": 1.1945776361251736, "learning_rate": 1.348782582468361e-07, "loss": 0.277, "step": 29837 }, { "epoch": 2.8001126126126126, "grad_norm": 2.1890701682072433, "learning_rate": 1.3475233175703262e-07, "loss": 0.3305, "step": 29838 }, { "epoch": 2.8002064564564564, "grad_norm": 1.3835927804907746, "learning_rate": 1.346264632761507e-07, "loss": 0.3464, "step": 29839 }, { "epoch": 2.8003003003003, "grad_norm": 1.0739841650927096, "learning_rate": 1.345006528056897e-07, "loss": 0.3352, "step": 29840 }, { "epoch": 2.800394144144144, "grad_norm": 1.2567047771412927, "learning_rate": 1.343749003471495e-07, "loss": 0.3246, "step": 29841 }, { "epoch": 2.800487987987988, "grad_norm": 1.2997201498176938, "learning_rate": 1.3424920590203117e-07, "loss": 0.3405, "step": 29842 }, { "epoch": 2.800581831831832, "grad_norm": 1.258440949294754, "learning_rate": 1.3412356947183125e-07, "loss": 0.3047, "step": 29843 }, { "epoch": 2.8006756756756754, "grad_norm": 1.216796409411589, "learning_rate": 1.3399799105804913e-07, "loss": 0.3279, "step": 29844 }, { "epoch": 2.8007695195195197, "grad_norm": 1.0643348418526852, "learning_rate": 1.338724706621819e-07, "loss": 0.2962, "step": 29845 }, { "epoch": 2.8008633633633635, "grad_norm": 1.2611264898553773, "learning_rate": 1.3374700828572562e-07, "loss": 0.3076, "step": 29846 }, { "epoch": 2.8009572072072073, "grad_norm": 0.9463134562304979, "learning_rate": 1.3362160393017632e-07, "loss": 0.3041, "step": 29847 }, { "epoch": 2.801051051051051, "grad_norm": 1.1135282519420213, "learning_rate": 1.3349625759703e-07, "loss": 0.3247, "step": 29848 }, { "epoch": 2.801144894894895, "grad_norm": 1.1074657859139196, "learning_rate": 1.3337096928777992e-07, "loss": 0.3533, "step": 29849 }, { "epoch": 2.8012387387387387, "grad_norm": 1.0796856786735822, "learning_rate": 1.3324573900392047e-07, "loss": 0.3151, "step": 29850 }, { "epoch": 2.8013325825825826, "grad_norm": 1.6458642796254912, "learning_rate": 1.331205667469454e-07, "loss": 0.2986, "step": 29851 }, { "epoch": 2.8014264264264264, "grad_norm": 1.141545427267947, "learning_rate": 1.3299545251834634e-07, "loss": 0.3121, "step": 29852 }, { "epoch": 2.80152027027027, "grad_norm": 1.001988171832021, "learning_rate": 1.3287039631961484e-07, "loss": 0.2986, "step": 29853 }, { "epoch": 2.801614114114114, "grad_norm": 1.083999233240087, "learning_rate": 1.3274539815224308e-07, "loss": 0.3038, "step": 29854 }, { "epoch": 2.801707957957958, "grad_norm": 1.3368242593410757, "learning_rate": 1.3262045801772093e-07, "loss": 0.3008, "step": 29855 }, { "epoch": 2.801801801801802, "grad_norm": 1.3990612151840258, "learning_rate": 1.3249557591753726e-07, "loss": 0.2678, "step": 29856 }, { "epoch": 2.8018956456456454, "grad_norm": 1.0404299085401454, "learning_rate": 1.323707518531825e-07, "loss": 0.2692, "step": 29857 }, { "epoch": 2.8019894894894897, "grad_norm": 1.171457456160333, "learning_rate": 1.322459858261438e-07, "loss": 0.3437, "step": 29858 }, { "epoch": 2.8020833333333335, "grad_norm": 1.032601560556682, "learning_rate": 1.3212127783790828e-07, "loss": 0.3024, "step": 29859 }, { "epoch": 2.8021771771771773, "grad_norm": 1.182032417182762, "learning_rate": 1.3199662788996482e-07, "loss": 0.3443, "step": 29860 }, { "epoch": 2.802271021021021, "grad_norm": 0.9397205753966774, "learning_rate": 1.3187203598379883e-07, "loss": 0.2459, "step": 29861 }, { "epoch": 2.802364864864865, "grad_norm": 1.1141667945012454, "learning_rate": 1.3174750212089414e-07, "loss": 0.3185, "step": 29862 }, { "epoch": 2.8024587087087087, "grad_norm": 0.996382269787917, "learning_rate": 1.3162302630273794e-07, "loss": 0.3181, "step": 29863 }, { "epoch": 2.8025525525525525, "grad_norm": 1.0190521933321823, "learning_rate": 1.3149860853081288e-07, "loss": 0.2903, "step": 29864 }, { "epoch": 2.8026463963963963, "grad_norm": 1.0501853000059531, "learning_rate": 1.3137424880660276e-07, "loss": 0.2976, "step": 29865 }, { "epoch": 2.80274024024024, "grad_norm": 1.1530743447930238, "learning_rate": 1.312499471315909e-07, "loss": 0.3205, "step": 29866 }, { "epoch": 2.802834084084084, "grad_norm": 1.423688394066605, "learning_rate": 1.3112570350725885e-07, "loss": 0.3097, "step": 29867 }, { "epoch": 2.8029279279279278, "grad_norm": 1.6195006310659408, "learning_rate": 1.3100151793508765e-07, "loss": 0.3042, "step": 29868 }, { "epoch": 2.803021771771772, "grad_norm": 1.075300981331334, "learning_rate": 1.308773904165589e-07, "loss": 0.3385, "step": 29869 }, { "epoch": 2.8031156156156154, "grad_norm": 1.4189850888830677, "learning_rate": 1.3075332095315252e-07, "loss": 0.3171, "step": 29870 }, { "epoch": 2.8032094594594597, "grad_norm": 1.0674622494103796, "learning_rate": 1.3062930954634624e-07, "loss": 0.3289, "step": 29871 }, { "epoch": 2.8033033033033035, "grad_norm": 1.2952727714962267, "learning_rate": 1.305053561976205e-07, "loss": 0.2721, "step": 29872 }, { "epoch": 2.8033971471471473, "grad_norm": 1.1059660269440716, "learning_rate": 1.303814609084525e-07, "loss": 0.2927, "step": 29873 }, { "epoch": 2.803490990990991, "grad_norm": 1.189756079265258, "learning_rate": 1.3025762368031935e-07, "loss": 0.339, "step": 29874 }, { "epoch": 2.803584834834835, "grad_norm": 1.089529341828906, "learning_rate": 1.3013384451469767e-07, "loss": 0.3089, "step": 29875 }, { "epoch": 2.8036786786786787, "grad_norm": 1.2927196414983777, "learning_rate": 1.300101234130635e-07, "loss": 0.3083, "step": 29876 }, { "epoch": 2.8037725225225225, "grad_norm": 1.224008882672842, "learning_rate": 1.2988646037689123e-07, "loss": 0.3281, "step": 29877 }, { "epoch": 2.8038663663663663, "grad_norm": 1.1353981377814666, "learning_rate": 1.2976285540765687e-07, "loss": 0.3761, "step": 29878 }, { "epoch": 2.80396021021021, "grad_norm": 1.019711583960707, "learning_rate": 1.2963930850683259e-07, "loss": 0.2763, "step": 29879 }, { "epoch": 2.804054054054054, "grad_norm": 1.361575563995867, "learning_rate": 1.2951581967589167e-07, "loss": 0.311, "step": 29880 }, { "epoch": 2.8041478978978978, "grad_norm": 1.0703622221146933, "learning_rate": 1.2939238891630735e-07, "loss": 0.2872, "step": 29881 }, { "epoch": 2.804241741741742, "grad_norm": 1.528743885967515, "learning_rate": 1.292690162295507e-07, "loss": 0.3428, "step": 29882 }, { "epoch": 2.8043355855855854, "grad_norm": 1.2304328918432996, "learning_rate": 1.2914570161709218e-07, "loss": 0.3104, "step": 29883 }, { "epoch": 2.8044294294294296, "grad_norm": 1.0153118453521075, "learning_rate": 1.2902244508040397e-07, "loss": 0.3402, "step": 29884 }, { "epoch": 2.804523273273273, "grad_norm": 1.2861434728112477, "learning_rate": 1.2889924662095378e-07, "loss": 0.309, "step": 29885 }, { "epoch": 2.8046171171171173, "grad_norm": 1.1328625065986015, "learning_rate": 1.2877610624021098e-07, "loss": 0.3531, "step": 29886 }, { "epoch": 2.804710960960961, "grad_norm": 1.1373707190444813, "learning_rate": 1.2865302393964442e-07, "loss": 0.3149, "step": 29887 }, { "epoch": 2.804804804804805, "grad_norm": 1.0054329563556212, "learning_rate": 1.2852999972072122e-07, "loss": 0.3216, "step": 29888 }, { "epoch": 2.8048986486486487, "grad_norm": 1.1824718690570155, "learning_rate": 1.2840703358490746e-07, "loss": 0.3162, "step": 29889 }, { "epoch": 2.8049924924924925, "grad_norm": 1.264419012436687, "learning_rate": 1.2828412553367087e-07, "loss": 0.2693, "step": 29890 }, { "epoch": 2.8050863363363363, "grad_norm": 1.1526957310722643, "learning_rate": 1.2816127556847578e-07, "loss": 0.2981, "step": 29891 }, { "epoch": 2.80518018018018, "grad_norm": 1.0608587055022345, "learning_rate": 1.2803848369078663e-07, "loss": 0.306, "step": 29892 }, { "epoch": 2.805274024024024, "grad_norm": 1.2164421502943263, "learning_rate": 1.2791574990206834e-07, "loss": 0.3512, "step": 29893 }, { "epoch": 2.8053678678678677, "grad_norm": 1.1509992349093063, "learning_rate": 1.2779307420378418e-07, "loss": 0.3016, "step": 29894 }, { "epoch": 2.8054617117117115, "grad_norm": 1.3063901277669057, "learning_rate": 1.276704565973963e-07, "loss": 0.3365, "step": 29895 }, { "epoch": 2.8055555555555554, "grad_norm": 1.45604528418889, "learning_rate": 1.2754789708436744e-07, "loss": 0.3096, "step": 29896 }, { "epoch": 2.8056493993993996, "grad_norm": 1.562092637371449, "learning_rate": 1.2742539566615808e-07, "loss": 0.3373, "step": 29897 }, { "epoch": 2.805743243243243, "grad_norm": 0.9806739284063439, "learning_rate": 1.273029523442293e-07, "loss": 0.2993, "step": 29898 }, { "epoch": 2.8058370870870872, "grad_norm": 1.2713326365449897, "learning_rate": 1.27180567120041e-07, "loss": 0.3384, "step": 29899 }, { "epoch": 2.805930930930931, "grad_norm": 1.2127382762727479, "learning_rate": 1.2705823999505264e-07, "loss": 0.34, "step": 29900 }, { "epoch": 2.806024774774775, "grad_norm": 2.9496055048319243, "learning_rate": 1.2693597097072185e-07, "loss": 0.3125, "step": 29901 }, { "epoch": 2.8061186186186187, "grad_norm": 1.2202340436153385, "learning_rate": 1.2681376004850697e-07, "loss": 0.3474, "step": 29902 }, { "epoch": 2.8062124624624625, "grad_norm": 1.7434312910031549, "learning_rate": 1.2669160722986572e-07, "loss": 0.3296, "step": 29903 }, { "epoch": 2.8063063063063063, "grad_norm": 1.0320862102748887, "learning_rate": 1.2656951251625303e-07, "loss": 0.3087, "step": 29904 }, { "epoch": 2.80640015015015, "grad_norm": 1.2096333983435936, "learning_rate": 1.2644747590912665e-07, "loss": 0.2992, "step": 29905 }, { "epoch": 2.806493993993994, "grad_norm": 1.3387047290689686, "learning_rate": 1.2632549740993982e-07, "loss": 0.3172, "step": 29906 }, { "epoch": 2.8065878378378377, "grad_norm": 1.1950941821933587, "learning_rate": 1.2620357702014807e-07, "loss": 0.3253, "step": 29907 }, { "epoch": 2.8066816816816815, "grad_norm": 1.2214144190227747, "learning_rate": 1.2608171474120468e-07, "loss": 0.3001, "step": 29908 }, { "epoch": 2.8067755255255253, "grad_norm": 1.1884616996310493, "learning_rate": 1.2595991057456292e-07, "loss": 0.3091, "step": 29909 }, { "epoch": 2.8068693693693696, "grad_norm": 1.5474389393078916, "learning_rate": 1.2583816452167441e-07, "loss": 0.3172, "step": 29910 }, { "epoch": 2.806963213213213, "grad_norm": 1.1212791940017097, "learning_rate": 1.2571647658399133e-07, "loss": 0.3324, "step": 29911 }, { "epoch": 2.807057057057057, "grad_norm": 1.093870149227051, "learning_rate": 1.255948467629642e-07, "loss": 0.3475, "step": 29912 }, { "epoch": 2.807150900900901, "grad_norm": 1.1230978409233296, "learning_rate": 1.2547327506004293e-07, "loss": 0.3221, "step": 29913 }, { "epoch": 2.807244744744745, "grad_norm": 1.0900999557179454, "learning_rate": 1.253517614766786e-07, "loss": 0.3314, "step": 29914 }, { "epoch": 2.8073385885885886, "grad_norm": 1.2509380316000098, "learning_rate": 1.252303060143184e-07, "loss": 0.26, "step": 29915 }, { "epoch": 2.8074324324324325, "grad_norm": 1.3243177466847864, "learning_rate": 1.2510890867441006e-07, "loss": 0.306, "step": 29916 }, { "epoch": 2.8075262762762763, "grad_norm": 1.2138144309017336, "learning_rate": 1.2498756945840296e-07, "loss": 0.3393, "step": 29917 }, { "epoch": 2.80762012012012, "grad_norm": 1.1486973887512222, "learning_rate": 1.2486628836774318e-07, "loss": 0.3312, "step": 29918 }, { "epoch": 2.807713963963964, "grad_norm": 1.240654395100256, "learning_rate": 1.2474506540387565e-07, "loss": 0.2852, "step": 29919 }, { "epoch": 2.8078078078078077, "grad_norm": 1.0617621786485727, "learning_rate": 1.2462390056824648e-07, "loss": 0.3178, "step": 29920 }, { "epoch": 2.8079016516516515, "grad_norm": 2.6830176321458388, "learning_rate": 1.2450279386230114e-07, "loss": 0.2919, "step": 29921 }, { "epoch": 2.8079954954954953, "grad_norm": 1.0660490353600023, "learning_rate": 1.2438174528748236e-07, "loss": 0.3305, "step": 29922 }, { "epoch": 2.8080893393393396, "grad_norm": 1.1492919122186003, "learning_rate": 1.242607548452335e-07, "loss": 0.3205, "step": 29923 }, { "epoch": 2.808183183183183, "grad_norm": 1.1184580727225475, "learning_rate": 1.241398225369983e-07, "loss": 0.3228, "step": 29924 }, { "epoch": 2.808277027027027, "grad_norm": 1.0509395068826914, "learning_rate": 1.240189483642168e-07, "loss": 0.2904, "step": 29925 }, { "epoch": 2.808370870870871, "grad_norm": 1.8399654656129465, "learning_rate": 1.238981323283317e-07, "loss": 0.3458, "step": 29926 }, { "epoch": 2.808464714714715, "grad_norm": 1.7804682280464645, "learning_rate": 1.2377737443078353e-07, "loss": 0.3128, "step": 29927 }, { "epoch": 2.8085585585585586, "grad_norm": 1.4146157953365477, "learning_rate": 1.2365667467301057e-07, "loss": 0.3236, "step": 29928 }, { "epoch": 2.8086524024024024, "grad_norm": 1.1909559580888534, "learning_rate": 1.235360330564539e-07, "loss": 0.3079, "step": 29929 }, { "epoch": 2.8087462462462462, "grad_norm": 1.0519157058686175, "learning_rate": 1.234154495825507e-07, "loss": 0.3176, "step": 29930 }, { "epoch": 2.80884009009009, "grad_norm": 1.8874853927357857, "learning_rate": 1.2329492425273926e-07, "loss": 0.2927, "step": 29931 }, { "epoch": 2.808933933933934, "grad_norm": 1.2390054504127295, "learning_rate": 1.231744570684562e-07, "loss": 0.3157, "step": 29932 }, { "epoch": 2.8090277777777777, "grad_norm": 1.1674332961394207, "learning_rate": 1.2305404803113818e-07, "loss": 0.3106, "step": 29933 }, { "epoch": 2.8091216216216215, "grad_norm": 1.182000484456953, "learning_rate": 1.229336971422207e-07, "loss": 0.3432, "step": 29934 }, { "epoch": 2.8092154654654653, "grad_norm": 1.3090417331568422, "learning_rate": 1.2281340440313873e-07, "loss": 0.2968, "step": 29935 }, { "epoch": 2.8093093093093096, "grad_norm": 1.205554468177026, "learning_rate": 1.2269316981532663e-07, "loss": 0.3331, "step": 29936 }, { "epoch": 2.809403153153153, "grad_norm": 1.4897676494788459, "learning_rate": 1.2257299338021779e-07, "loss": 0.3053, "step": 29937 }, { "epoch": 2.809496996996997, "grad_norm": 3.022370968647512, "learning_rate": 1.224528750992454e-07, "loss": 0.3289, "step": 29938 }, { "epoch": 2.809590840840841, "grad_norm": 1.0435277610711218, "learning_rate": 1.223328149738412e-07, "loss": 0.284, "step": 29939 }, { "epoch": 2.809684684684685, "grad_norm": 1.1488823266076524, "learning_rate": 1.2221281300543675e-07, "loss": 0.3045, "step": 29940 }, { "epoch": 2.8097785285285286, "grad_norm": 1.0089836990084788, "learning_rate": 1.2209286919546316e-07, "loss": 0.3175, "step": 29941 }, { "epoch": 2.8098723723723724, "grad_norm": 1.3541337149045614, "learning_rate": 1.2197298354535038e-07, "loss": 0.2862, "step": 29942 }, { "epoch": 2.8099662162162162, "grad_norm": 1.6719539037106494, "learning_rate": 1.218531560565278e-07, "loss": 0.3222, "step": 29943 }, { "epoch": 2.81006006006006, "grad_norm": 1.114417670464425, "learning_rate": 1.2173338673042435e-07, "loss": 0.3293, "step": 29944 }, { "epoch": 2.810153903903904, "grad_norm": 1.1342200879861404, "learning_rate": 1.216136755684677e-07, "loss": 0.3124, "step": 29945 }, { "epoch": 2.8102477477477477, "grad_norm": 1.1244676054396414, "learning_rate": 1.2149402257208565e-07, "loss": 0.3095, "step": 29946 }, { "epoch": 2.8103415915915915, "grad_norm": 1.2105953935784737, "learning_rate": 1.213744277427048e-07, "loss": 0.2915, "step": 29947 }, { "epoch": 2.8104354354354353, "grad_norm": 1.1754481157564882, "learning_rate": 1.2125489108175015e-07, "loss": 0.3178, "step": 29948 }, { "epoch": 2.8105292792792795, "grad_norm": 1.0492162899835482, "learning_rate": 1.2113541259064777e-07, "loss": 0.3015, "step": 29949 }, { "epoch": 2.810623123123123, "grad_norm": 1.0277927957818878, "learning_rate": 1.2101599227082206e-07, "loss": 0.2753, "step": 29950 }, { "epoch": 2.810716966966967, "grad_norm": 1.0873037880483993, "learning_rate": 1.208966301236969e-07, "loss": 0.311, "step": 29951 }, { "epoch": 2.810810810810811, "grad_norm": 1.1159959312973107, "learning_rate": 1.207773261506956e-07, "loss": 0.3002, "step": 29952 }, { "epoch": 2.8109046546546548, "grad_norm": 1.3853292336084366, "learning_rate": 1.2065808035324034e-07, "loss": 0.2834, "step": 29953 }, { "epoch": 2.8109984984984986, "grad_norm": 1.4335685886806986, "learning_rate": 1.2053889273275332e-07, "loss": 0.2878, "step": 29954 }, { "epoch": 2.8110923423423424, "grad_norm": 1.2505872120577726, "learning_rate": 1.2041976329065507e-07, "loss": 0.3279, "step": 29955 }, { "epoch": 2.811186186186186, "grad_norm": 1.1775907407889918, "learning_rate": 1.2030069202836613e-07, "loss": 0.3585, "step": 29956 }, { "epoch": 2.81128003003003, "grad_norm": 1.172841898513663, "learning_rate": 1.2018167894730704e-07, "loss": 0.2919, "step": 29957 }, { "epoch": 2.811373873873874, "grad_norm": 1.2898359048312762, "learning_rate": 1.2006272404889552e-07, "loss": 0.2946, "step": 29958 }, { "epoch": 2.8114677177177176, "grad_norm": 1.354801479945967, "learning_rate": 1.19943827334551e-07, "loss": 0.281, "step": 29959 }, { "epoch": 2.8115615615615615, "grad_norm": 1.0628238545857691, "learning_rate": 1.1982498880569015e-07, "loss": 0.2884, "step": 29960 }, { "epoch": 2.8116554054054053, "grad_norm": 1.1268266717658961, "learning_rate": 1.1970620846373014e-07, "loss": 0.2912, "step": 29961 }, { "epoch": 2.8117492492492495, "grad_norm": 1.1051424182791973, "learning_rate": 1.1958748631008766e-07, "loss": 0.2632, "step": 29962 }, { "epoch": 2.811843093093093, "grad_norm": 1.0184987638773932, "learning_rate": 1.1946882234617817e-07, "loss": 0.2983, "step": 29963 }, { "epoch": 2.811936936936937, "grad_norm": 1.3444209092552042, "learning_rate": 1.193502165734156e-07, "loss": 0.2933, "step": 29964 }, { "epoch": 2.8120307807807805, "grad_norm": 1.7364966979469754, "learning_rate": 1.1923166899321549e-07, "loss": 0.314, "step": 29965 }, { "epoch": 2.8121246246246248, "grad_norm": 1.132298954028156, "learning_rate": 1.1911317960699054e-07, "loss": 0.3043, "step": 29966 }, { "epoch": 2.8122184684684686, "grad_norm": 2.1228119013292255, "learning_rate": 1.1899474841615355e-07, "loss": 0.3038, "step": 29967 }, { "epoch": 2.8123123123123124, "grad_norm": 1.191950930052355, "learning_rate": 1.1887637542211672e-07, "loss": 0.3218, "step": 29968 }, { "epoch": 2.812406156156156, "grad_norm": 1.0763431835011246, "learning_rate": 1.1875806062629113e-07, "loss": 0.3141, "step": 29969 }, { "epoch": 2.8125, "grad_norm": 1.1600295674367416, "learning_rate": 1.1863980403008791e-07, "loss": 0.286, "step": 29970 }, { "epoch": 2.812593843843844, "grad_norm": 1.135898034474431, "learning_rate": 1.1852160563491699e-07, "loss": 0.3323, "step": 29971 }, { "epoch": 2.8126876876876876, "grad_norm": 1.8253006431609244, "learning_rate": 1.1840346544218729e-07, "loss": 0.2649, "step": 29972 }, { "epoch": 2.8127815315315314, "grad_norm": 1.053455252147269, "learning_rate": 1.1828538345330709e-07, "loss": 0.3669, "step": 29973 }, { "epoch": 2.8128753753753752, "grad_norm": 1.5263797577203884, "learning_rate": 1.1816735966968529e-07, "loss": 0.3308, "step": 29974 }, { "epoch": 2.8129692192192195, "grad_norm": 1.3879311843788384, "learning_rate": 1.1804939409272854e-07, "loss": 0.2847, "step": 29975 }, { "epoch": 2.813063063063063, "grad_norm": 1.15209275484909, "learning_rate": 1.1793148672384291e-07, "loss": 0.3063, "step": 29976 }, { "epoch": 2.813156906906907, "grad_norm": 1.0753474296014391, "learning_rate": 1.1781363756443564e-07, "loss": 0.2975, "step": 29977 }, { "epoch": 2.8132507507507505, "grad_norm": 1.0249812346876555, "learning_rate": 1.176958466159106e-07, "loss": 0.2583, "step": 29978 }, { "epoch": 2.8133445945945947, "grad_norm": 1.2021773266835205, "learning_rate": 1.1757811387967222e-07, "loss": 0.2857, "step": 29979 }, { "epoch": 2.8134384384384385, "grad_norm": 1.3465878113228718, "learning_rate": 1.1746043935712547e-07, "loss": 0.3549, "step": 29980 }, { "epoch": 2.8135322822822824, "grad_norm": 1.1553503519231998, "learning_rate": 1.1734282304967204e-07, "loss": 0.2797, "step": 29981 }, { "epoch": 2.813626126126126, "grad_norm": 1.163498520811334, "learning_rate": 1.1722526495871466e-07, "loss": 0.3165, "step": 29982 }, { "epoch": 2.81371996996997, "grad_norm": 1.0680565117705414, "learning_rate": 1.1710776508565503e-07, "loss": 0.284, "step": 29983 }, { "epoch": 2.813813813813814, "grad_norm": 1.0470181878180382, "learning_rate": 1.1699032343189531e-07, "loss": 0.3192, "step": 29984 }, { "epoch": 2.8139076576576576, "grad_norm": 1.1572185196802798, "learning_rate": 1.1687293999883332e-07, "loss": 0.3465, "step": 29985 }, { "epoch": 2.8140015015015014, "grad_norm": 1.1693557919657929, "learning_rate": 1.1675561478787068e-07, "loss": 0.2839, "step": 29986 }, { "epoch": 2.8140953453453452, "grad_norm": 1.2100651905690725, "learning_rate": 1.1663834780040518e-07, "loss": 0.3093, "step": 29987 }, { "epoch": 2.814189189189189, "grad_norm": 1.0492800952857197, "learning_rate": 1.1652113903783568e-07, "loss": 0.3026, "step": 29988 }, { "epoch": 2.814283033033033, "grad_norm": 1.027957058947227, "learning_rate": 1.1640398850155943e-07, "loss": 0.3571, "step": 29989 }, { "epoch": 2.814376876876877, "grad_norm": 1.0372026001059014, "learning_rate": 1.1628689619297307e-07, "loss": 0.2991, "step": 29990 }, { "epoch": 2.8144707207207205, "grad_norm": 1.01295951739921, "learning_rate": 1.1616986211347214e-07, "loss": 0.2859, "step": 29991 }, { "epoch": 2.8145645645645647, "grad_norm": 1.2145464104555497, "learning_rate": 1.1605288626445387e-07, "loss": 0.2812, "step": 29992 }, { "epoch": 2.8146584084084085, "grad_norm": 0.9744892533435965, "learning_rate": 1.1593596864731161e-07, "loss": 0.303, "step": 29993 }, { "epoch": 2.8147522522522523, "grad_norm": 2.37035014165882, "learning_rate": 1.1581910926343864e-07, "loss": 0.3165, "step": 29994 }, { "epoch": 2.814846096096096, "grad_norm": 1.1992268887365878, "learning_rate": 1.1570230811423e-07, "loss": 0.3048, "step": 29995 }, { "epoch": 2.81493993993994, "grad_norm": 0.957590540874061, "learning_rate": 1.1558556520107789e-07, "loss": 0.3031, "step": 29996 }, { "epoch": 2.8150337837837838, "grad_norm": 1.2389458731610452, "learning_rate": 1.1546888052537397e-07, "loss": 0.3221, "step": 29997 }, { "epoch": 2.8151276276276276, "grad_norm": 1.293024209512896, "learning_rate": 1.1535225408850937e-07, "loss": 0.3182, "step": 29998 }, { "epoch": 2.8152214714714714, "grad_norm": 1.1169680285036485, "learning_rate": 1.1523568589187462e-07, "loss": 0.2882, "step": 29999 }, { "epoch": 2.815315315315315, "grad_norm": 1.0540257536135502, "learning_rate": 1.1511917593685918e-07, "loss": 0.3028, "step": 30000 }, { "epoch": 2.815409159159159, "grad_norm": 1.4481618254916668, "learning_rate": 1.1500272422485304e-07, "loss": 0.2916, "step": 30001 }, { "epoch": 2.815503003003003, "grad_norm": 1.0492839256804178, "learning_rate": 1.1488633075724454e-07, "loss": 0.343, "step": 30002 }, { "epoch": 2.815596846846847, "grad_norm": 1.5129620469506064, "learning_rate": 1.1476999553542089e-07, "loss": 0.3091, "step": 30003 }, { "epoch": 2.8156906906906904, "grad_norm": 1.0402357589170368, "learning_rate": 1.146537185607699e-07, "loss": 0.2811, "step": 30004 }, { "epoch": 2.8157845345345347, "grad_norm": 1.0574373196875952, "learning_rate": 1.1453749983467766e-07, "loss": 0.2538, "step": 30005 }, { "epoch": 2.8158783783783785, "grad_norm": 1.0706538738241569, "learning_rate": 1.1442133935852973e-07, "loss": 0.2891, "step": 30006 }, { "epoch": 2.8159722222222223, "grad_norm": 1.189184785006763, "learning_rate": 1.1430523713371112e-07, "loss": 0.2535, "step": 30007 }, { "epoch": 2.816066066066066, "grad_norm": 1.1827778161168827, "learning_rate": 1.1418919316160627e-07, "loss": 0.3431, "step": 30008 }, { "epoch": 2.81615990990991, "grad_norm": 1.2452591327218427, "learning_rate": 1.1407320744359851e-07, "loss": 0.3005, "step": 30009 }, { "epoch": 2.8162537537537538, "grad_norm": 1.2375345066766805, "learning_rate": 1.139572799810712e-07, "loss": 0.3095, "step": 30010 }, { "epoch": 2.8163475975975976, "grad_norm": 1.2925274406761023, "learning_rate": 1.1384141077540656e-07, "loss": 0.3007, "step": 30011 }, { "epoch": 2.8164414414414414, "grad_norm": 1.014467145458668, "learning_rate": 1.1372559982798514e-07, "loss": 0.2972, "step": 30012 }, { "epoch": 2.816535285285285, "grad_norm": 1.125548365578496, "learning_rate": 1.1360984714018864e-07, "loss": 0.286, "step": 30013 }, { "epoch": 2.816629129129129, "grad_norm": 1.032662964392393, "learning_rate": 1.1349415271339703e-07, "loss": 0.2896, "step": 30014 }, { "epoch": 2.816722972972973, "grad_norm": 1.06716467093415, "learning_rate": 1.133785165489898e-07, "loss": 0.3086, "step": 30015 }, { "epoch": 2.816816816816817, "grad_norm": 1.2553004545765325, "learning_rate": 1.132629386483458e-07, "loss": 0.3104, "step": 30016 }, { "epoch": 2.8169106606606604, "grad_norm": 1.1109627874099501, "learning_rate": 1.1314741901284287e-07, "loss": 0.2728, "step": 30017 }, { "epoch": 2.8170045045045047, "grad_norm": 1.1387469325200714, "learning_rate": 1.1303195764385822e-07, "loss": 0.2693, "step": 30018 }, { "epoch": 2.8170983483483485, "grad_norm": 1.037522879705841, "learning_rate": 1.1291655454276907e-07, "loss": 0.3103, "step": 30019 }, { "epoch": 2.8171921921921923, "grad_norm": 1.1545288436846703, "learning_rate": 1.1280120971095043e-07, "loss": 0.3277, "step": 30020 }, { "epoch": 2.817286036036036, "grad_norm": 1.100652840361961, "learning_rate": 1.12685923149779e-07, "loss": 0.3505, "step": 30021 }, { "epoch": 2.81737987987988, "grad_norm": 1.1552609122412238, "learning_rate": 1.125706948606281e-07, "loss": 0.2594, "step": 30022 }, { "epoch": 2.8174737237237237, "grad_norm": 1.1596382520319488, "learning_rate": 1.1245552484487221e-07, "loss": 0.2942, "step": 30023 }, { "epoch": 2.8175675675675675, "grad_norm": 1.2252025018908506, "learning_rate": 1.1234041310388355e-07, "loss": 0.3199, "step": 30024 }, { "epoch": 2.8176614114114114, "grad_norm": 1.2070104161918007, "learning_rate": 1.1222535963903602e-07, "loss": 0.318, "step": 30025 }, { "epoch": 2.817755255255255, "grad_norm": 1.2108714551275175, "learning_rate": 1.1211036445170075e-07, "loss": 0.2845, "step": 30026 }, { "epoch": 2.817849099099099, "grad_norm": 1.0986122045532254, "learning_rate": 1.1199542754324887e-07, "loss": 0.3037, "step": 30027 }, { "epoch": 2.817942942942943, "grad_norm": 1.1724009375521773, "learning_rate": 1.118805489150504e-07, "loss": 0.3323, "step": 30028 }, { "epoch": 2.818036786786787, "grad_norm": 1.295756395866023, "learning_rate": 1.117657285684759e-07, "loss": 0.285, "step": 30029 }, { "epoch": 2.8181306306306304, "grad_norm": 1.1261736551809676, "learning_rate": 1.1165096650489372e-07, "loss": 0.2976, "step": 30030 }, { "epoch": 2.8182244744744747, "grad_norm": 2.622431000571311, "learning_rate": 1.1153626272567275e-07, "loss": 0.281, "step": 30031 }, { "epoch": 2.8183183183183185, "grad_norm": 1.2900688908807316, "learning_rate": 1.114216172321797e-07, "loss": 0.2626, "step": 30032 }, { "epoch": 2.8184121621621623, "grad_norm": 1.056366693097302, "learning_rate": 1.1130703002578291e-07, "loss": 0.2995, "step": 30033 }, { "epoch": 2.818506006006006, "grad_norm": 1.2569843449994873, "learning_rate": 1.1119250110784741e-07, "loss": 0.3303, "step": 30034 }, { "epoch": 2.81859984984985, "grad_norm": 1.124201794664982, "learning_rate": 1.1107803047973875e-07, "loss": 0.3022, "step": 30035 }, { "epoch": 2.8186936936936937, "grad_norm": 1.1591997461814012, "learning_rate": 1.1096361814282197e-07, "loss": 0.2996, "step": 30036 }, { "epoch": 2.8187875375375375, "grad_norm": 1.117346196082054, "learning_rate": 1.1084926409846208e-07, "loss": 0.2873, "step": 30037 }, { "epoch": 2.8188813813813813, "grad_norm": 1.0541477271735802, "learning_rate": 1.1073496834802134e-07, "loss": 0.3025, "step": 30038 }, { "epoch": 2.818975225225225, "grad_norm": 1.1766238443787211, "learning_rate": 1.1062073089286308e-07, "loss": 0.3194, "step": 30039 }, { "epoch": 2.819069069069069, "grad_norm": 1.165818023848555, "learning_rate": 1.1050655173434899e-07, "loss": 0.2859, "step": 30040 }, { "epoch": 2.8191629129129128, "grad_norm": 1.102245672009985, "learning_rate": 1.1039243087384133e-07, "loss": 0.2981, "step": 30041 }, { "epoch": 2.819256756756757, "grad_norm": 0.9936396449439174, "learning_rate": 1.1027836831269955e-07, "loss": 0.3129, "step": 30042 }, { "epoch": 2.8193506006006004, "grad_norm": 1.119955513623141, "learning_rate": 1.101643640522848e-07, "loss": 0.3225, "step": 30043 }, { "epoch": 2.8194444444444446, "grad_norm": 1.223723844641098, "learning_rate": 1.1005041809395601e-07, "loss": 0.3002, "step": 30044 }, { "epoch": 2.8195382882882885, "grad_norm": 2.3965245562611033, "learning_rate": 1.0993653043907148e-07, "loss": 0.3369, "step": 30045 }, { "epoch": 2.8196321321321323, "grad_norm": 1.1861873581684295, "learning_rate": 1.0982270108898907e-07, "loss": 0.2996, "step": 30046 }, { "epoch": 2.819725975975976, "grad_norm": 1.1721961493431519, "learning_rate": 1.09708930045066e-07, "loss": 0.3148, "step": 30047 }, { "epoch": 2.81981981981982, "grad_norm": 1.3986775751453477, "learning_rate": 1.0959521730865897e-07, "loss": 0.3133, "step": 30048 }, { "epoch": 2.8199136636636637, "grad_norm": 1.3825967699649548, "learning_rate": 1.094815628811241e-07, "loss": 0.3096, "step": 30049 }, { "epoch": 2.8200075075075075, "grad_norm": 1.2300919674462156, "learning_rate": 1.0936796676381589e-07, "loss": 0.3233, "step": 30050 }, { "epoch": 2.8201013513513513, "grad_norm": 1.0339089997637811, "learning_rate": 1.0925442895808824e-07, "loss": 0.2604, "step": 30051 }, { "epoch": 2.820195195195195, "grad_norm": 1.0809541280767754, "learning_rate": 1.0914094946529674e-07, "loss": 0.2885, "step": 30052 }, { "epoch": 2.820289039039039, "grad_norm": 1.1788098020373665, "learning_rate": 1.0902752828679308e-07, "loss": 0.3278, "step": 30053 }, { "epoch": 2.8203828828828827, "grad_norm": 1.1155386122266435, "learning_rate": 1.0891416542392952e-07, "loss": 0.327, "step": 30054 }, { "epoch": 2.820476726726727, "grad_norm": 1.200300005286917, "learning_rate": 1.088008608780583e-07, "loss": 0.3601, "step": 30055 }, { "epoch": 2.8205705705705704, "grad_norm": 1.0883022160961542, "learning_rate": 1.0868761465053001e-07, "loss": 0.3135, "step": 30056 }, { "epoch": 2.8206644144144146, "grad_norm": 1.3640600112217225, "learning_rate": 1.0857442674269469e-07, "loss": 0.3345, "step": 30057 }, { "epoch": 2.820758258258258, "grad_norm": 1.0310143886279146, "learning_rate": 1.0846129715590293e-07, "loss": 0.3331, "step": 30058 }, { "epoch": 2.8208521021021022, "grad_norm": 1.103334975971873, "learning_rate": 1.0834822589150252e-07, "loss": 0.3274, "step": 30059 }, { "epoch": 2.820945945945946, "grad_norm": 1.092315398778304, "learning_rate": 1.0823521295084127e-07, "loss": 0.324, "step": 30060 }, { "epoch": 2.82103978978979, "grad_norm": 1.0583803659229676, "learning_rate": 1.0812225833526757e-07, "loss": 0.3376, "step": 30061 }, { "epoch": 2.8211336336336337, "grad_norm": 1.217871579273827, "learning_rate": 1.080093620461281e-07, "loss": 0.3511, "step": 30062 }, { "epoch": 2.8212274774774775, "grad_norm": 1.1025787969157437, "learning_rate": 1.0789652408476847e-07, "loss": 0.3083, "step": 30063 }, { "epoch": 2.8213213213213213, "grad_norm": 1.202776050146134, "learning_rate": 1.0778374445253481e-07, "loss": 0.3151, "step": 30064 }, { "epoch": 2.821415165165165, "grad_norm": 1.122308997188254, "learning_rate": 1.076710231507716e-07, "loss": 0.3254, "step": 30065 }, { "epoch": 2.821509009009009, "grad_norm": 1.187975183256371, "learning_rate": 1.0755836018082167e-07, "loss": 0.3283, "step": 30066 }, { "epoch": 2.8216028528528527, "grad_norm": 1.695922685073359, "learning_rate": 1.0744575554403002e-07, "loss": 0.3109, "step": 30067 }, { "epoch": 2.8216966966966965, "grad_norm": 1.2245775596061177, "learning_rate": 1.0733320924173785e-07, "loss": 0.3162, "step": 30068 }, { "epoch": 2.8217905405405403, "grad_norm": 1.4898195524865845, "learning_rate": 1.0722072127528849e-07, "loss": 0.2813, "step": 30069 }, { "epoch": 2.8218843843843846, "grad_norm": 1.3273778061509969, "learning_rate": 1.0710829164602199e-07, "loss": 0.3149, "step": 30070 }, { "epoch": 2.821978228228228, "grad_norm": 1.1697798722526376, "learning_rate": 1.0699592035527895e-07, "loss": 0.2784, "step": 30071 }, { "epoch": 2.8220720720720722, "grad_norm": 1.094141897870158, "learning_rate": 1.0688360740439884e-07, "loss": 0.3485, "step": 30072 }, { "epoch": 2.822165915915916, "grad_norm": 1.2768352939729317, "learning_rate": 1.0677135279472229e-07, "loss": 0.3141, "step": 30073 }, { "epoch": 2.82225975975976, "grad_norm": 1.3315415170557658, "learning_rate": 1.0665915652758651e-07, "loss": 0.2812, "step": 30074 }, { "epoch": 2.8223536036036037, "grad_norm": 0.9853803341556042, "learning_rate": 1.0654701860432936e-07, "loss": 0.3278, "step": 30075 }, { "epoch": 2.8224474474474475, "grad_norm": 1.2398560224780715, "learning_rate": 1.064349390262881e-07, "loss": 0.2848, "step": 30076 }, { "epoch": 2.8225412912912913, "grad_norm": 1.320256863902723, "learning_rate": 1.0632291779479942e-07, "loss": 0.3156, "step": 30077 }, { "epoch": 2.822635135135135, "grad_norm": 1.0830528451705426, "learning_rate": 1.0621095491119781e-07, "loss": 0.3322, "step": 30078 }, { "epoch": 2.822728978978979, "grad_norm": 1.0831483100028887, "learning_rate": 1.0609905037681944e-07, "loss": 0.3121, "step": 30079 }, { "epoch": 2.8228228228228227, "grad_norm": 5.404032455425099, "learning_rate": 1.0598720419299824e-07, "loss": 0.3009, "step": 30080 }, { "epoch": 2.8229166666666665, "grad_norm": 1.1751872005148256, "learning_rate": 1.0587541636106757e-07, "loss": 0.264, "step": 30081 }, { "epoch": 2.8230105105105103, "grad_norm": 0.9701112651943976, "learning_rate": 1.0576368688235972e-07, "loss": 0.3364, "step": 30082 }, { "epoch": 2.8231043543543546, "grad_norm": 1.3521666965223682, "learning_rate": 1.0565201575820804e-07, "loss": 0.3123, "step": 30083 }, { "epoch": 2.823198198198198, "grad_norm": 1.0532326277781305, "learning_rate": 1.0554040298994317e-07, "loss": 0.3432, "step": 30084 }, { "epoch": 2.823292042042042, "grad_norm": 6.843150942196366, "learning_rate": 1.0542884857889624e-07, "loss": 0.3489, "step": 30085 }, { "epoch": 2.823385885885886, "grad_norm": 1.211509893793231, "learning_rate": 1.053173525263973e-07, "loss": 0.2982, "step": 30086 }, { "epoch": 2.82347972972973, "grad_norm": 1.1625785040704526, "learning_rate": 1.0520591483377474e-07, "loss": 0.3276, "step": 30087 }, { "epoch": 2.8235735735735736, "grad_norm": 1.0853153003286444, "learning_rate": 1.0509453550235916e-07, "loss": 0.3085, "step": 30088 }, { "epoch": 2.8236674174174174, "grad_norm": 1.0493762462743552, "learning_rate": 1.0498321453347726e-07, "loss": 0.2831, "step": 30089 }, { "epoch": 2.8237612612612613, "grad_norm": 1.1229580427172547, "learning_rate": 1.0487195192845634e-07, "loss": 0.3333, "step": 30090 }, { "epoch": 2.823855105105105, "grad_norm": 1.180450379355474, "learning_rate": 1.0476074768862365e-07, "loss": 0.3388, "step": 30091 }, { "epoch": 2.823948948948949, "grad_norm": 1.0282724650203237, "learning_rate": 1.0464960181530481e-07, "loss": 0.3463, "step": 30092 }, { "epoch": 2.8240427927927927, "grad_norm": 1.0232448958849092, "learning_rate": 1.0453851430982431e-07, "loss": 0.3348, "step": 30093 }, { "epoch": 2.8241366366366365, "grad_norm": 1.3327463581311043, "learning_rate": 1.0442748517350776e-07, "loss": 0.3213, "step": 30094 }, { "epoch": 2.8242304804804803, "grad_norm": 1.1390781490249253, "learning_rate": 1.0431651440767854e-07, "loss": 0.2738, "step": 30095 }, { "epoch": 2.8243243243243246, "grad_norm": 1.086763043096483, "learning_rate": 1.0420560201365948e-07, "loss": 0.2791, "step": 30096 }, { "epoch": 2.824418168168168, "grad_norm": 1.1378920887263735, "learning_rate": 1.0409474799277341e-07, "loss": 0.3112, "step": 30097 }, { "epoch": 2.824512012012012, "grad_norm": 1.04606496869486, "learning_rate": 1.0398395234634151e-07, "loss": 0.3339, "step": 30098 }, { "epoch": 2.824605855855856, "grad_norm": 1.1858483266662652, "learning_rate": 1.0387321507568493e-07, "loss": 0.3159, "step": 30099 }, { "epoch": 2.8246996996997, "grad_norm": 1.1836863682781757, "learning_rate": 1.0376253618212484e-07, "loss": 0.3106, "step": 30100 }, { "epoch": 2.8247935435435436, "grad_norm": 1.03576562441902, "learning_rate": 1.0365191566698019e-07, "loss": 0.3063, "step": 30101 }, { "epoch": 2.8248873873873874, "grad_norm": 1.079162700522589, "learning_rate": 1.0354135353156991e-07, "loss": 0.3084, "step": 30102 }, { "epoch": 2.8249812312312312, "grad_norm": 1.3393630556078533, "learning_rate": 1.0343084977721241e-07, "loss": 0.2861, "step": 30103 }, { "epoch": 2.825075075075075, "grad_norm": 1.0535668504872973, "learning_rate": 1.0332040440522551e-07, "loss": 0.3316, "step": 30104 }, { "epoch": 2.825168918918919, "grad_norm": 1.0687991002303734, "learning_rate": 1.0321001741692483e-07, "loss": 0.3086, "step": 30105 }, { "epoch": 2.8252627627627627, "grad_norm": 1.2450817802805763, "learning_rate": 1.0309968881362876e-07, "loss": 0.2978, "step": 30106 }, { "epoch": 2.8253566066066065, "grad_norm": 1.252190259130874, "learning_rate": 1.0298941859665069e-07, "loss": 0.3221, "step": 30107 }, { "epoch": 2.8254504504504503, "grad_norm": 1.0393895571444383, "learning_rate": 1.0287920676730568e-07, "loss": 0.3118, "step": 30108 }, { "epoch": 2.8255442942942945, "grad_norm": 1.2191085672061464, "learning_rate": 1.0276905332690934e-07, "loss": 0.3349, "step": 30109 }, { "epoch": 2.825638138138138, "grad_norm": 1.2200887659495288, "learning_rate": 1.0265895827677341e-07, "loss": 0.3238, "step": 30110 }, { "epoch": 2.825731981981982, "grad_norm": 1.182300435768304, "learning_rate": 1.0254892161821018e-07, "loss": 0.2862, "step": 30111 }, { "epoch": 2.825825825825826, "grad_norm": 0.9898974383137349, "learning_rate": 1.0243894335253358e-07, "loss": 0.2963, "step": 30112 }, { "epoch": 2.82591966966967, "grad_norm": 1.0122978521303219, "learning_rate": 1.0232902348105422e-07, "loss": 0.3361, "step": 30113 }, { "epoch": 2.8260135135135136, "grad_norm": 1.185599886197986, "learning_rate": 1.0221916200508109e-07, "loss": 0.3195, "step": 30114 }, { "epoch": 2.8261073573573574, "grad_norm": 0.9974521005484119, "learning_rate": 1.0210935892592644e-07, "loss": 0.3387, "step": 30115 }, { "epoch": 2.826201201201201, "grad_norm": 1.068233677602943, "learning_rate": 1.0199961424489813e-07, "loss": 0.303, "step": 30116 }, { "epoch": 2.826295045045045, "grad_norm": 1.1455785191113292, "learning_rate": 1.0188992796330399e-07, "loss": 0.3448, "step": 30117 }, { "epoch": 2.826388888888889, "grad_norm": 1.0892718636661332, "learning_rate": 1.0178030008245354e-07, "loss": 0.3005, "step": 30118 }, { "epoch": 2.8264827327327327, "grad_norm": 1.2111832167891183, "learning_rate": 1.0167073060365296e-07, "loss": 0.2843, "step": 30119 }, { "epoch": 2.8265765765765765, "grad_norm": 1.0764371737489753, "learning_rate": 1.0156121952820897e-07, "loss": 0.3294, "step": 30120 }, { "epoch": 2.8266704204204203, "grad_norm": 1.4054192018990415, "learning_rate": 1.0145176685742719e-07, "loss": 0.2883, "step": 30121 }, { "epoch": 2.8267642642642645, "grad_norm": 1.0562560864072938, "learning_rate": 1.0134237259261214e-07, "loss": 0.2817, "step": 30122 }, { "epoch": 2.826858108108108, "grad_norm": 1.5564008259368778, "learning_rate": 1.0123303673506835e-07, "loss": 0.332, "step": 30123 }, { "epoch": 2.826951951951952, "grad_norm": 1.0797137123113478, "learning_rate": 1.0112375928609974e-07, "loss": 0.2966, "step": 30124 }, { "epoch": 2.827045795795796, "grad_norm": 1.123672534398538, "learning_rate": 1.0101454024700919e-07, "loss": 0.301, "step": 30125 }, { "epoch": 2.8271396396396398, "grad_norm": 0.9582252322809899, "learning_rate": 1.0090537961909896e-07, "loss": 0.2901, "step": 30126 }, { "epoch": 2.8272334834834836, "grad_norm": 1.142886715294658, "learning_rate": 1.0079627740367026e-07, "loss": 0.3347, "step": 30127 }, { "epoch": 2.8273273273273274, "grad_norm": 1.0142754324425287, "learning_rate": 1.0068723360202427e-07, "loss": 0.3317, "step": 30128 }, { "epoch": 2.827421171171171, "grad_norm": 1.367571767601202, "learning_rate": 1.0057824821546103e-07, "loss": 0.3117, "step": 30129 }, { "epoch": 2.827515015015015, "grad_norm": 1.1143266155590024, "learning_rate": 1.0046932124528008e-07, "loss": 0.2903, "step": 30130 }, { "epoch": 2.827608858858859, "grad_norm": 1.1247957621127773, "learning_rate": 1.0036045269277984e-07, "loss": 0.2729, "step": 30131 }, { "epoch": 2.8277027027027026, "grad_norm": 1.2271368054744292, "learning_rate": 1.0025164255925868e-07, "loss": 0.305, "step": 30132 }, { "epoch": 2.8277965465465464, "grad_norm": 1.2485103308508574, "learning_rate": 1.0014289084601392e-07, "loss": 0.3225, "step": 30133 }, { "epoch": 2.8278903903903903, "grad_norm": 1.3883888549732035, "learning_rate": 1.000341975543423e-07, "loss": 0.2989, "step": 30134 }, { "epoch": 2.8279842342342345, "grad_norm": 1.2576425480633677, "learning_rate": 9.992556268553944e-08, "loss": 0.3174, "step": 30135 }, { "epoch": 2.828078078078078, "grad_norm": 1.0594988826553635, "learning_rate": 9.981698624090042e-08, "loss": 0.3061, "step": 30136 }, { "epoch": 2.828171921921922, "grad_norm": 1.1293556947760188, "learning_rate": 9.970846822172087e-08, "loss": 0.3016, "step": 30137 }, { "epoch": 2.8282657657657655, "grad_norm": 1.1333728390300548, "learning_rate": 9.960000862929309e-08, "loss": 0.3253, "step": 30138 }, { "epoch": 2.8283596096096097, "grad_norm": 1.1492781281135878, "learning_rate": 9.949160746491216e-08, "loss": 0.2958, "step": 30139 }, { "epoch": 2.8284534534534536, "grad_norm": 1.3790370114707846, "learning_rate": 9.938326472986926e-08, "loss": 0.3026, "step": 30140 }, { "epoch": 2.8285472972972974, "grad_norm": 1.16963806205928, "learning_rate": 9.927498042545614e-08, "loss": 0.3275, "step": 30141 }, { "epoch": 2.828641141141141, "grad_norm": 1.16759398284795, "learning_rate": 9.916675455296453e-08, "loss": 0.298, "step": 30142 }, { "epoch": 2.828734984984985, "grad_norm": 1.2599842780078996, "learning_rate": 9.9058587113684e-08, "loss": 0.276, "step": 30143 }, { "epoch": 2.828828828828829, "grad_norm": 1.154932359209234, "learning_rate": 9.895047810890568e-08, "loss": 0.3362, "step": 30144 }, { "epoch": 2.8289226726726726, "grad_norm": 1.1668803901906708, "learning_rate": 9.884242753991691e-08, "loss": 0.3188, "step": 30145 }, { "epoch": 2.8290165165165164, "grad_norm": 1.1267169555242456, "learning_rate": 9.87344354080072e-08, "loss": 0.3334, "step": 30146 }, { "epoch": 2.8291103603603602, "grad_norm": 1.3321457372911039, "learning_rate": 9.862650171446275e-08, "loss": 0.3258, "step": 30147 }, { "epoch": 2.829204204204204, "grad_norm": 1.1229352482969115, "learning_rate": 9.851862646057197e-08, "loss": 0.289, "step": 30148 }, { "epoch": 2.829298048048048, "grad_norm": 0.9394848034490304, "learning_rate": 9.841080964762106e-08, "loss": 0.3375, "step": 30149 }, { "epoch": 2.829391891891892, "grad_norm": 1.1653399938020588, "learning_rate": 9.830305127689399e-08, "loss": 0.3151, "step": 30150 }, { "epoch": 2.8294857357357355, "grad_norm": 1.0946417168666431, "learning_rate": 9.819535134967695e-08, "loss": 0.2999, "step": 30151 }, { "epoch": 2.8295795795795797, "grad_norm": 1.098542506847678, "learning_rate": 9.808770986725336e-08, "loss": 0.3278, "step": 30152 }, { "epoch": 2.8296734234234235, "grad_norm": 1.4738637059268458, "learning_rate": 9.798012683090719e-08, "loss": 0.2994, "step": 30153 }, { "epoch": 2.8297672672672673, "grad_norm": 1.1138842260338775, "learning_rate": 9.787260224192074e-08, "loss": 0.357, "step": 30154 }, { "epoch": 2.829861111111111, "grad_norm": 1.0030432381743042, "learning_rate": 9.77651361015769e-08, "loss": 0.3354, "step": 30155 }, { "epoch": 2.829954954954955, "grad_norm": 1.0157124944310454, "learning_rate": 9.765772841115573e-08, "loss": 0.3349, "step": 30156 }, { "epoch": 2.830048798798799, "grad_norm": 3.251966232537652, "learning_rate": 9.755037917193843e-08, "loss": 0.2922, "step": 30157 }, { "epoch": 2.8301426426426426, "grad_norm": 1.0741214646432424, "learning_rate": 9.74430883852051e-08, "loss": 0.345, "step": 30158 }, { "epoch": 2.8302364864864864, "grad_norm": 1.0514854889877503, "learning_rate": 9.733585605223417e-08, "loss": 0.3058, "step": 30159 }, { "epoch": 2.83033033033033, "grad_norm": 0.9666256984611487, "learning_rate": 9.72286821743057e-08, "loss": 0.2899, "step": 30160 }, { "epoch": 2.830424174174174, "grad_norm": 1.1537221924987222, "learning_rate": 9.712156675269647e-08, "loss": 0.324, "step": 30161 }, { "epoch": 2.830518018018018, "grad_norm": 1.1162038121698785, "learning_rate": 9.701450978868321e-08, "loss": 0.3313, "step": 30162 }, { "epoch": 2.830611861861862, "grad_norm": 1.1282252959153978, "learning_rate": 9.690751128354381e-08, "loss": 0.2729, "step": 30163 }, { "epoch": 2.8307057057057055, "grad_norm": 0.9775711239897744, "learning_rate": 9.68005712385528e-08, "loss": 0.3415, "step": 30164 }, { "epoch": 2.8307995495495497, "grad_norm": 1.22495331186032, "learning_rate": 9.669368965498582e-08, "loss": 0.3187, "step": 30165 }, { "epoch": 2.8308933933933935, "grad_norm": 1.3748476482693706, "learning_rate": 9.658686653411687e-08, "loss": 0.2894, "step": 30166 }, { "epoch": 2.8309872372372373, "grad_norm": 1.0766368645350781, "learning_rate": 9.648010187722046e-08, "loss": 0.2929, "step": 30167 }, { "epoch": 2.831081081081081, "grad_norm": 1.3374810723089143, "learning_rate": 9.637339568556837e-08, "loss": 0.2857, "step": 30168 }, { "epoch": 2.831174924924925, "grad_norm": 1.287704823245895, "learning_rate": 9.626674796043345e-08, "loss": 0.3383, "step": 30169 }, { "epoch": 2.8312687687687688, "grad_norm": 0.9861074155249374, "learning_rate": 9.616015870308748e-08, "loss": 0.2874, "step": 30170 }, { "epoch": 2.8313626126126126, "grad_norm": 1.1801040318687075, "learning_rate": 9.605362791480055e-08, "loss": 0.3011, "step": 30171 }, { "epoch": 2.8314564564564564, "grad_norm": 0.9540904953107332, "learning_rate": 9.594715559684387e-08, "loss": 0.29, "step": 30172 }, { "epoch": 2.8315503003003, "grad_norm": 1.3885613283062768, "learning_rate": 9.584074175048586e-08, "loss": 0.3253, "step": 30173 }, { "epoch": 2.831644144144144, "grad_norm": 1.1984724726664826, "learning_rate": 9.573438637699606e-08, "loss": 0.3683, "step": 30174 }, { "epoch": 2.831737987987988, "grad_norm": 1.8436406314353597, "learning_rate": 9.56280894776429e-08, "loss": 0.3204, "step": 30175 }, { "epoch": 2.831831831831832, "grad_norm": 1.5190598455926547, "learning_rate": 9.55218510536926e-08, "loss": 0.3025, "step": 30176 }, { "epoch": 2.8319256756756754, "grad_norm": 1.3385370301585655, "learning_rate": 9.541567110641248e-08, "loss": 0.2881, "step": 30177 }, { "epoch": 2.8320195195195197, "grad_norm": 0.9863300349389258, "learning_rate": 9.530954963706929e-08, "loss": 0.3146, "step": 30178 }, { "epoch": 2.8321133633633635, "grad_norm": 1.0000206651805315, "learning_rate": 9.520348664692702e-08, "loss": 0.3184, "step": 30179 }, { "epoch": 2.8322072072072073, "grad_norm": 1.3257850663991197, "learning_rate": 9.509748213725134e-08, "loss": 0.3065, "step": 30180 }, { "epoch": 2.832301051051051, "grad_norm": 2.0740230008186393, "learning_rate": 9.499153610930512e-08, "loss": 0.3127, "step": 30181 }, { "epoch": 2.832394894894895, "grad_norm": 1.5020143428609103, "learning_rate": 9.488564856435234e-08, "loss": 0.284, "step": 30182 }, { "epoch": 2.8324887387387387, "grad_norm": 1.2657089511038397, "learning_rate": 9.477981950365533e-08, "loss": 0.3111, "step": 30183 }, { "epoch": 2.8325825825825826, "grad_norm": 1.5236918562184174, "learning_rate": 9.467404892847587e-08, "loss": 0.2933, "step": 30184 }, { "epoch": 2.8326764264264264, "grad_norm": 1.2090019444632882, "learning_rate": 9.45683368400746e-08, "loss": 0.2819, "step": 30185 }, { "epoch": 2.83277027027027, "grad_norm": 1.3826306138368243, "learning_rate": 9.446268323971275e-08, "loss": 0.3056, "step": 30186 }, { "epoch": 2.832864114114114, "grad_norm": 1.105437933132153, "learning_rate": 9.43570881286493e-08, "loss": 0.2943, "step": 30187 }, { "epoch": 2.832957957957958, "grad_norm": 1.5540722909534896, "learning_rate": 9.425155150814436e-08, "loss": 0.3191, "step": 30188 }, { "epoch": 2.833051801801802, "grad_norm": 1.1797326484109945, "learning_rate": 9.41460733794547e-08, "loss": 0.2823, "step": 30189 }, { "epoch": 2.8331456456456454, "grad_norm": 1.0386296093333571, "learning_rate": 9.404065374383931e-08, "loss": 0.2626, "step": 30190 }, { "epoch": 2.8332394894894897, "grad_norm": 1.2768866690619212, "learning_rate": 9.393529260255497e-08, "loss": 0.3175, "step": 30191 }, { "epoch": 2.8333333333333335, "grad_norm": 1.281750274074224, "learning_rate": 9.382998995685676e-08, "loss": 0.3108, "step": 30192 }, { "epoch": 2.8334271771771773, "grad_norm": 1.12073123738337, "learning_rate": 9.37247458080015e-08, "loss": 0.3064, "step": 30193 }, { "epoch": 2.833521021021021, "grad_norm": 1.2961830793553113, "learning_rate": 9.361956015724372e-08, "loss": 0.3817, "step": 30194 }, { "epoch": 2.833614864864865, "grad_norm": 1.2348215228380621, "learning_rate": 9.351443300583685e-08, "loss": 0.332, "step": 30195 }, { "epoch": 2.8337087087087087, "grad_norm": 1.2310501151816005, "learning_rate": 9.340936435503489e-08, "loss": 0.317, "step": 30196 }, { "epoch": 2.8338025525525525, "grad_norm": 1.9768038402381882, "learning_rate": 9.33043542060913e-08, "loss": 0.3094, "step": 30197 }, { "epoch": 2.8338963963963963, "grad_norm": 1.0084845425240654, "learning_rate": 9.319940256025617e-08, "loss": 0.2934, "step": 30198 }, { "epoch": 2.83399024024024, "grad_norm": 1.1940267869324972, "learning_rate": 9.309450941878296e-08, "loss": 0.3166, "step": 30199 }, { "epoch": 2.834084084084084, "grad_norm": 1.2579792918198924, "learning_rate": 9.29896747829212e-08, "loss": 0.3404, "step": 30200 }, { "epoch": 2.8341779279279278, "grad_norm": 1.5218212759125622, "learning_rate": 9.288489865392047e-08, "loss": 0.2886, "step": 30201 }, { "epoch": 2.834271771771772, "grad_norm": 1.1020467676527628, "learning_rate": 9.278018103303144e-08, "loss": 0.2795, "step": 30202 }, { "epoch": 2.8343656156156154, "grad_norm": 1.2531263091813318, "learning_rate": 9.267552192150198e-08, "loss": 0.308, "step": 30203 }, { "epoch": 2.8344594594594597, "grad_norm": 1.2881199821135034, "learning_rate": 9.257092132057944e-08, "loss": 0.3259, "step": 30204 }, { "epoch": 2.8345533033033035, "grad_norm": 1.0658424780631917, "learning_rate": 9.24663792315117e-08, "loss": 0.3061, "step": 30205 }, { "epoch": 2.8346471471471473, "grad_norm": 1.1170755894453082, "learning_rate": 9.236189565554443e-08, "loss": 0.3103, "step": 30206 }, { "epoch": 2.834740990990991, "grad_norm": 1.2273961195757834, "learning_rate": 9.225747059392387e-08, "loss": 0.3192, "step": 30207 }, { "epoch": 2.834834834834835, "grad_norm": 1.4419337683769917, "learning_rate": 9.215310404789513e-08, "loss": 0.339, "step": 30208 }, { "epoch": 2.8349286786786787, "grad_norm": 1.2231519439489702, "learning_rate": 9.204879601870276e-08, "loss": 0.3232, "step": 30209 }, { "epoch": 2.8350225225225225, "grad_norm": 1.2139870537014539, "learning_rate": 9.194454650759021e-08, "loss": 0.3299, "step": 30210 }, { "epoch": 2.8351163663663663, "grad_norm": 1.0431591277765926, "learning_rate": 9.18403555158004e-08, "loss": 0.3052, "step": 30211 }, { "epoch": 2.83521021021021, "grad_norm": 1.156729782008998, "learning_rate": 9.173622304457563e-08, "loss": 0.2837, "step": 30212 }, { "epoch": 2.835304054054054, "grad_norm": 1.1711466578226228, "learning_rate": 9.163214909515771e-08, "loss": 0.327, "step": 30213 }, { "epoch": 2.8353978978978978, "grad_norm": 1.0344187595289132, "learning_rate": 9.152813366878732e-08, "loss": 0.2711, "step": 30214 }, { "epoch": 2.835491741741742, "grad_norm": 1.69045900187607, "learning_rate": 9.142417676670457e-08, "loss": 0.2832, "step": 30215 }, { "epoch": 2.8355855855855854, "grad_norm": 1.313537279383179, "learning_rate": 9.132027839014956e-08, "loss": 0.3067, "step": 30216 }, { "epoch": 2.8356794294294296, "grad_norm": 0.9536904309493026, "learning_rate": 9.121643854036022e-08, "loss": 0.3221, "step": 30217 }, { "epoch": 2.835773273273273, "grad_norm": 1.1128597802325861, "learning_rate": 9.111265721857499e-08, "loss": 0.2554, "step": 30218 }, { "epoch": 2.8358671171171173, "grad_norm": 0.9414630074170823, "learning_rate": 9.10089344260312e-08, "loss": 0.3042, "step": 30219 }, { "epoch": 2.835960960960961, "grad_norm": 1.2029618910746598, "learning_rate": 9.090527016396567e-08, "loss": 0.294, "step": 30220 }, { "epoch": 2.836054804804805, "grad_norm": 1.598847805508727, "learning_rate": 9.08016644336146e-08, "loss": 0.2898, "step": 30221 }, { "epoch": 2.8361486486486487, "grad_norm": 1.090327226147073, "learning_rate": 9.069811723621258e-08, "loss": 0.2945, "step": 30222 }, { "epoch": 2.8362424924924925, "grad_norm": 1.062196241735562, "learning_rate": 9.059462857299528e-08, "loss": 0.275, "step": 30223 }, { "epoch": 2.8363363363363363, "grad_norm": 1.2713354275211106, "learning_rate": 9.049119844519561e-08, "loss": 0.2832, "step": 30224 }, { "epoch": 2.83643018018018, "grad_norm": 1.082965000675193, "learning_rate": 9.038782685404701e-08, "loss": 0.331, "step": 30225 }, { "epoch": 2.836524024024024, "grad_norm": 1.1084654627940898, "learning_rate": 9.028451380078296e-08, "loss": 0.3498, "step": 30226 }, { "epoch": 2.8366178678678677, "grad_norm": 1.1695777836818946, "learning_rate": 9.018125928663413e-08, "loss": 0.2768, "step": 30227 }, { "epoch": 2.8367117117117115, "grad_norm": 1.2543253002835442, "learning_rate": 9.00780633128312e-08, "loss": 0.3132, "step": 30228 }, { "epoch": 2.8368055555555554, "grad_norm": 1.0734003533959562, "learning_rate": 8.997492588060707e-08, "loss": 0.2825, "step": 30229 }, { "epoch": 2.8368993993993996, "grad_norm": 1.1391582271121745, "learning_rate": 8.987184699118855e-08, "loss": 0.285, "step": 30230 }, { "epoch": 2.836993243243243, "grad_norm": 1.1405758154312973, "learning_rate": 8.976882664580633e-08, "loss": 0.3077, "step": 30231 }, { "epoch": 2.8370870870870872, "grad_norm": 2.024968689833863, "learning_rate": 8.966586484568774e-08, "loss": 0.3, "step": 30232 }, { "epoch": 2.837180930930931, "grad_norm": 1.1919437199107117, "learning_rate": 8.956296159206179e-08, "loss": 0.3234, "step": 30233 }, { "epoch": 2.837274774774775, "grad_norm": 1.217596742098228, "learning_rate": 8.946011688615363e-08, "loss": 0.3183, "step": 30234 }, { "epoch": 2.8373686186186187, "grad_norm": 1.0441040871921234, "learning_rate": 8.935733072919117e-08, "loss": 0.3158, "step": 30235 }, { "epoch": 2.8374624624624625, "grad_norm": 1.4147475909925233, "learning_rate": 8.925460312239898e-08, "loss": 0.3527, "step": 30236 }, { "epoch": 2.8375563063063063, "grad_norm": 1.0760112908829804, "learning_rate": 8.915193406700162e-08, "loss": 0.3049, "step": 30237 }, { "epoch": 2.83765015015015, "grad_norm": 0.9393981827271739, "learning_rate": 8.90493235642248e-08, "loss": 0.3131, "step": 30238 }, { "epoch": 2.837743993993994, "grad_norm": 1.1020260901754246, "learning_rate": 8.89467716152903e-08, "loss": 0.3323, "step": 30239 }, { "epoch": 2.8378378378378377, "grad_norm": 1.2718447025183535, "learning_rate": 8.884427822142106e-08, "loss": 0.3005, "step": 30240 }, { "epoch": 2.8379316816816815, "grad_norm": 1.181927255695943, "learning_rate": 8.874184338383996e-08, "loss": 0.2556, "step": 30241 }, { "epoch": 2.8380255255255253, "grad_norm": 0.9348756454270744, "learning_rate": 8.86394671037677e-08, "loss": 0.364, "step": 30242 }, { "epoch": 2.8381193693693696, "grad_norm": 1.1974402574906393, "learning_rate": 8.853714938242552e-08, "loss": 0.3011, "step": 30243 }, { "epoch": 2.838213213213213, "grad_norm": 1.1639772097905006, "learning_rate": 8.843489022103302e-08, "loss": 0.3069, "step": 30244 }, { "epoch": 2.838307057057057, "grad_norm": 1.1596146382333894, "learning_rate": 8.833268962080866e-08, "loss": 0.2975, "step": 30245 }, { "epoch": 2.838400900900901, "grad_norm": 1.087850349343794, "learning_rate": 8.8230547582972e-08, "loss": 0.3246, "step": 30246 }, { "epoch": 2.838494744744745, "grad_norm": 1.1586113163419984, "learning_rate": 8.812846410874099e-08, "loss": 0.2946, "step": 30247 }, { "epoch": 2.8385885885885886, "grad_norm": 1.1395839933710128, "learning_rate": 8.802643919933185e-08, "loss": 0.3145, "step": 30248 }, { "epoch": 2.8386824324324325, "grad_norm": 1.136832440838875, "learning_rate": 8.79244728559614e-08, "loss": 0.3232, "step": 30249 }, { "epoch": 2.8387762762762763, "grad_norm": 1.2150899369495387, "learning_rate": 8.782256507984644e-08, "loss": 0.3625, "step": 30250 }, { "epoch": 2.83887012012012, "grad_norm": 1.0939720121498422, "learning_rate": 8.772071587220043e-08, "loss": 0.3488, "step": 30251 }, { "epoch": 2.838963963963964, "grad_norm": 1.2479882223508558, "learning_rate": 8.761892523423854e-08, "loss": 0.3371, "step": 30252 }, { "epoch": 2.8390578078078077, "grad_norm": 1.1084038781225125, "learning_rate": 8.751719316717478e-08, "loss": 0.2834, "step": 30253 }, { "epoch": 2.8391516516516515, "grad_norm": 1.457199040546102, "learning_rate": 8.74155196722215e-08, "loss": 0.2783, "step": 30254 }, { "epoch": 2.8392454954954953, "grad_norm": 1.1302511032553415, "learning_rate": 8.731390475059165e-08, "loss": 0.2575, "step": 30255 }, { "epoch": 2.8393393393393396, "grad_norm": 1.290764588961736, "learning_rate": 8.72123484034959e-08, "loss": 0.2958, "step": 30256 }, { "epoch": 2.839433183183183, "grad_norm": 1.2399221624794539, "learning_rate": 8.711085063214552e-08, "loss": 0.3479, "step": 30257 }, { "epoch": 2.839527027027027, "grad_norm": 1.127958790695688, "learning_rate": 8.70094114377501e-08, "loss": 0.3115, "step": 30258 }, { "epoch": 2.839620870870871, "grad_norm": 0.9609713459506918, "learning_rate": 8.690803082152033e-08, "loss": 0.3207, "step": 30259 }, { "epoch": 2.839714714714715, "grad_norm": 1.641214543834188, "learning_rate": 8.680670878466413e-08, "loss": 0.2791, "step": 30260 }, { "epoch": 2.8398085585585586, "grad_norm": 2.233500702906744, "learning_rate": 8.670544532838998e-08, "loss": 0.3, "step": 30261 }, { "epoch": 2.8399024024024024, "grad_norm": 1.1584569490939933, "learning_rate": 8.66042404539047e-08, "loss": 0.3326, "step": 30262 }, { "epoch": 2.8399962462462462, "grad_norm": 1.2544017672252559, "learning_rate": 8.650309416241564e-08, "loss": 0.3289, "step": 30263 }, { "epoch": 2.84009009009009, "grad_norm": 1.1269206671499077, "learning_rate": 8.640200645512853e-08, "loss": 0.2519, "step": 30264 }, { "epoch": 2.840183933933934, "grad_norm": 1.0436663653625675, "learning_rate": 8.63009773332485e-08, "loss": 0.3161, "step": 30265 }, { "epoch": 2.8402777777777777, "grad_norm": 1.2786222633605275, "learning_rate": 8.620000679798013e-08, "loss": 0.3221, "step": 30266 }, { "epoch": 2.8403716216216215, "grad_norm": 0.9880675931674913, "learning_rate": 8.609909485052748e-08, "loss": 0.3159, "step": 30267 }, { "epoch": 2.8404654654654653, "grad_norm": 1.2992493533647131, "learning_rate": 8.599824149209401e-08, "loss": 0.3128, "step": 30268 }, { "epoch": 2.8405593093093096, "grad_norm": 1.2472629722176405, "learning_rate": 8.589744672388156e-08, "loss": 0.3394, "step": 30269 }, { "epoch": 2.840653153153153, "grad_norm": 1.1308081407571575, "learning_rate": 8.579671054709193e-08, "loss": 0.3055, "step": 30270 }, { "epoch": 2.840746996996997, "grad_norm": 0.8891551908831737, "learning_rate": 8.569603296292695e-08, "loss": 0.3454, "step": 30271 }, { "epoch": 2.840840840840841, "grad_norm": 1.279173940621543, "learning_rate": 8.559541397258619e-08, "loss": 0.2868, "step": 30272 }, { "epoch": 2.840934684684685, "grad_norm": 1.3589702504387389, "learning_rate": 8.549485357726984e-08, "loss": 0.3303, "step": 30273 }, { "epoch": 2.8410285285285286, "grad_norm": 1.3315987896686303, "learning_rate": 8.539435177817635e-08, "loss": 0.2861, "step": 30274 }, { "epoch": 2.8411223723723724, "grad_norm": 1.0694732911922606, "learning_rate": 8.529390857650532e-08, "loss": 0.2814, "step": 30275 }, { "epoch": 2.8412162162162162, "grad_norm": 1.326273026783435, "learning_rate": 8.519352397345249e-08, "loss": 0.3215, "step": 30276 }, { "epoch": 2.84131006006006, "grad_norm": 1.1698002716069629, "learning_rate": 8.50931979702163e-08, "loss": 0.3214, "step": 30277 }, { "epoch": 2.841403903903904, "grad_norm": 1.1088489981971863, "learning_rate": 8.499293056799251e-08, "loss": 0.2636, "step": 30278 }, { "epoch": 2.8414977477477477, "grad_norm": 1.0898439353030924, "learning_rate": 8.489272176797625e-08, "loss": 0.3009, "step": 30279 }, { "epoch": 2.8415915915915915, "grad_norm": 1.1726362493914217, "learning_rate": 8.479257157136268e-08, "loss": 0.3909, "step": 30280 }, { "epoch": 2.8416854354354353, "grad_norm": 1.073856390037714, "learning_rate": 8.469247997934527e-08, "loss": 0.3241, "step": 30281 }, { "epoch": 2.8417792792792795, "grad_norm": 1.344742902539064, "learning_rate": 8.459244699311808e-08, "loss": 0.3548, "step": 30282 }, { "epoch": 2.841873123123123, "grad_norm": 1.1345803511300312, "learning_rate": 8.449247261387406e-08, "loss": 0.3428, "step": 30283 }, { "epoch": 2.841966966966967, "grad_norm": 1.0102834528209994, "learning_rate": 8.439255684280445e-08, "loss": 0.2924, "step": 30284 }, { "epoch": 2.842060810810811, "grad_norm": 1.1004949249715317, "learning_rate": 8.429269968110109e-08, "loss": 0.3214, "step": 30285 }, { "epoch": 2.8421546546546548, "grad_norm": 0.9411310120580804, "learning_rate": 8.419290112995415e-08, "loss": 0.3209, "step": 30286 }, { "epoch": 2.8422484984984986, "grad_norm": 1.3896575587628237, "learning_rate": 8.409316119055433e-08, "loss": 0.3045, "step": 30287 }, { "epoch": 2.8423423423423424, "grad_norm": 1.1923162020986404, "learning_rate": 8.399347986408956e-08, "loss": 0.3171, "step": 30288 }, { "epoch": 2.842436186186186, "grad_norm": 1.2076843531586878, "learning_rate": 8.389385715175002e-08, "loss": 0.2443, "step": 30289 }, { "epoch": 2.84253003003003, "grad_norm": 1.1031256316056286, "learning_rate": 8.379429305472197e-08, "loss": 0.311, "step": 30290 }, { "epoch": 2.842623873873874, "grad_norm": 1.7725588401527148, "learning_rate": 8.369478757419336e-08, "loss": 0.3198, "step": 30291 }, { "epoch": 2.8427177177177176, "grad_norm": 1.2340047099223241, "learning_rate": 8.359534071135045e-08, "loss": 0.2818, "step": 30292 }, { "epoch": 2.8428115615615615, "grad_norm": 1.453377312966219, "learning_rate": 8.349595246737896e-08, "loss": 0.2675, "step": 30293 }, { "epoch": 2.8429054054054053, "grad_norm": 1.1394791285965213, "learning_rate": 8.33966228434635e-08, "loss": 0.3025, "step": 30294 }, { "epoch": 2.8429992492492495, "grad_norm": 1.2976502389450817, "learning_rate": 8.329735184078924e-08, "loss": 0.3118, "step": 30295 }, { "epoch": 2.843093093093093, "grad_norm": 1.2526742814864231, "learning_rate": 8.319813946053968e-08, "loss": 0.2949, "step": 30296 }, { "epoch": 2.843186936936937, "grad_norm": 1.1863044193425818, "learning_rate": 8.309898570389607e-08, "loss": 0.3106, "step": 30297 }, { "epoch": 2.8432807807807805, "grad_norm": 1.1078383890565375, "learning_rate": 8.299989057204305e-08, "loss": 0.3182, "step": 30298 }, { "epoch": 2.8433746246246248, "grad_norm": 1.144733778410352, "learning_rate": 8.290085406616077e-08, "loss": 0.2753, "step": 30299 }, { "epoch": 2.8434684684684686, "grad_norm": 1.1848631607709479, "learning_rate": 8.280187618742996e-08, "loss": 0.3051, "step": 30300 }, { "epoch": 2.8435623123123124, "grad_norm": 1.2742248582849653, "learning_rate": 8.270295693703134e-08, "loss": 0.2782, "step": 30301 }, { "epoch": 2.843656156156156, "grad_norm": 3.5913011272890887, "learning_rate": 8.260409631614452e-08, "loss": 0.3071, "step": 30302 }, { "epoch": 2.84375, "grad_norm": 1.1319959658980787, "learning_rate": 8.250529432594745e-08, "loss": 0.2923, "step": 30303 }, { "epoch": 2.843843843843844, "grad_norm": 1.0949751972896145, "learning_rate": 8.240655096761862e-08, "loss": 0.3, "step": 30304 }, { "epoch": 2.8439376876876876, "grad_norm": 1.1977571812270877, "learning_rate": 8.230786624233488e-08, "loss": 0.314, "step": 30305 }, { "epoch": 2.8440315315315314, "grad_norm": 1.7619196196591231, "learning_rate": 8.220924015127363e-08, "loss": 0.2913, "step": 30306 }, { "epoch": 2.8441253753753752, "grad_norm": 0.9963826388093688, "learning_rate": 8.211067269561002e-08, "loss": 0.3445, "step": 30307 }, { "epoch": 2.8442192192192195, "grad_norm": 1.522683949082522, "learning_rate": 8.201216387652034e-08, "loss": 0.2872, "step": 30308 }, { "epoch": 2.844313063063063, "grad_norm": 1.116740630078527, "learning_rate": 8.191371369517753e-08, "loss": 0.3144, "step": 30309 }, { "epoch": 2.844406906906907, "grad_norm": 0.8941385570413118, "learning_rate": 8.181532215275678e-08, "loss": 0.3314, "step": 30310 }, { "epoch": 2.8445007507507505, "grad_norm": 1.1381515011129166, "learning_rate": 8.171698925043104e-08, "loss": 0.3074, "step": 30311 }, { "epoch": 2.8445945945945947, "grad_norm": 1.0554792153645052, "learning_rate": 8.161871498937157e-08, "loss": 0.3341, "step": 30312 }, { "epoch": 2.8446884384384385, "grad_norm": 1.0375129004771209, "learning_rate": 8.152049937075191e-08, "loss": 0.3226, "step": 30313 }, { "epoch": 2.8447822822822824, "grad_norm": 1.1322246127719178, "learning_rate": 8.14223423957422e-08, "loss": 0.3411, "step": 30314 }, { "epoch": 2.844876126126126, "grad_norm": 1.2035672082251399, "learning_rate": 8.132424406551264e-08, "loss": 0.3593, "step": 30315 }, { "epoch": 2.84496996996997, "grad_norm": 1.1486143507751014, "learning_rate": 8.122620438123341e-08, "loss": 0.3301, "step": 30316 }, { "epoch": 2.845063813813814, "grad_norm": 1.4235656246598765, "learning_rate": 8.112822334407244e-08, "loss": 0.2846, "step": 30317 }, { "epoch": 2.8451576576576576, "grad_norm": 1.2608416019417692, "learning_rate": 8.103030095519826e-08, "loss": 0.3046, "step": 30318 }, { "epoch": 2.8452515015015014, "grad_norm": 1.2151533992558239, "learning_rate": 8.093243721577936e-08, "loss": 0.3194, "step": 30319 }, { "epoch": 2.8453453453453452, "grad_norm": 1.096421991686825, "learning_rate": 8.083463212698205e-08, "loss": 0.332, "step": 30320 }, { "epoch": 2.845439189189189, "grad_norm": 1.0260350281591009, "learning_rate": 8.073688568997207e-08, "loss": 0.2763, "step": 30321 }, { "epoch": 2.845533033033033, "grad_norm": 1.180962878670615, "learning_rate": 8.063919790591513e-08, "loss": 0.3306, "step": 30322 }, { "epoch": 2.845626876876877, "grad_norm": 1.8285181903747887, "learning_rate": 8.054156877597641e-08, "loss": 0.3237, "step": 30323 }, { "epoch": 2.8457207207207205, "grad_norm": 1.4469867386816258, "learning_rate": 8.044399830131888e-08, "loss": 0.3384, "step": 30324 }, { "epoch": 2.8458145645645647, "grad_norm": 1.161487771947445, "learning_rate": 8.034648648310716e-08, "loss": 0.3129, "step": 30325 }, { "epoch": 2.8459084084084085, "grad_norm": 1.2564194781773932, "learning_rate": 8.024903332250312e-08, "loss": 0.3168, "step": 30326 }, { "epoch": 2.8460022522522523, "grad_norm": 1.2754181990073445, "learning_rate": 8.015163882066857e-08, "loss": 0.3274, "step": 30327 }, { "epoch": 2.846096096096096, "grad_norm": 1.1350215247695756, "learning_rate": 8.005430297876537e-08, "loss": 0.3234, "step": 30328 }, { "epoch": 2.84618993993994, "grad_norm": 1.075840511666171, "learning_rate": 7.995702579795427e-08, "loss": 0.3312, "step": 30329 }, { "epoch": 2.8462837837837838, "grad_norm": 1.2390309577922123, "learning_rate": 7.98598072793938e-08, "loss": 0.3208, "step": 30330 }, { "epoch": 2.8463776276276276, "grad_norm": 1.4202163492693478, "learning_rate": 7.976264742424412e-08, "loss": 0.2763, "step": 30331 }, { "epoch": 2.8464714714714714, "grad_norm": 1.0747672706125353, "learning_rate": 7.966554623366374e-08, "loss": 0.2983, "step": 30332 }, { "epoch": 2.846565315315315, "grad_norm": 1.033862678545012, "learning_rate": 7.956850370880953e-08, "loss": 0.2668, "step": 30333 }, { "epoch": 2.846659159159159, "grad_norm": 1.1410076828144065, "learning_rate": 7.947151985083945e-08, "loss": 0.3015, "step": 30334 }, { "epoch": 2.846753003003003, "grad_norm": 1.7969121464835562, "learning_rate": 7.93745946609098e-08, "loss": 0.3307, "step": 30335 }, { "epoch": 2.846846846846847, "grad_norm": 1.1011265239790367, "learning_rate": 7.927772814017576e-08, "loss": 0.2745, "step": 30336 }, { "epoch": 2.8469406906906904, "grad_norm": 1.1982052069446845, "learning_rate": 7.918092028979252e-08, "loss": 0.3539, "step": 30337 }, { "epoch": 2.8470345345345347, "grad_norm": 1.0990633810798414, "learning_rate": 7.908417111091471e-08, "loss": 0.3092, "step": 30338 }, { "epoch": 2.8471283783783785, "grad_norm": 1.086920771057338, "learning_rate": 7.89874806046953e-08, "loss": 0.3209, "step": 30339 }, { "epoch": 2.8472222222222223, "grad_norm": 1.0732851411959907, "learning_rate": 7.889084877228726e-08, "loss": 0.261, "step": 30340 }, { "epoch": 2.847316066066066, "grad_norm": 1.089538905391685, "learning_rate": 7.879427561484243e-08, "loss": 0.2993, "step": 30341 }, { "epoch": 2.84740990990991, "grad_norm": 1.121495750219218, "learning_rate": 7.869776113351268e-08, "loss": 0.2702, "step": 30342 }, { "epoch": 2.8475037537537538, "grad_norm": 0.9799451612868219, "learning_rate": 7.860130532944932e-08, "loss": 0.2914, "step": 30343 }, { "epoch": 2.8475975975975976, "grad_norm": 2.1299150930484023, "learning_rate": 7.85049082038014e-08, "loss": 0.264, "step": 30344 }, { "epoch": 2.8476914414414414, "grad_norm": 1.4701102273905253, "learning_rate": 7.84085697577186e-08, "loss": 0.2717, "step": 30345 }, { "epoch": 2.847785285285285, "grad_norm": 1.2659047469875553, "learning_rate": 7.831228999234997e-08, "loss": 0.306, "step": 30346 }, { "epoch": 2.847879129129129, "grad_norm": 1.1690013063795366, "learning_rate": 7.821606890884348e-08, "loss": 0.3401, "step": 30347 }, { "epoch": 2.847972972972973, "grad_norm": 0.9800970050052422, "learning_rate": 7.81199065083449e-08, "loss": 0.2811, "step": 30348 }, { "epoch": 2.848066816816817, "grad_norm": 1.1291492547526476, "learning_rate": 7.802380279200272e-08, "loss": 0.345, "step": 30349 }, { "epoch": 2.8481606606606604, "grad_norm": 1.0759807423353742, "learning_rate": 7.792775776096217e-08, "loss": 0.2973, "step": 30350 }, { "epoch": 2.8482545045045047, "grad_norm": 1.027757615201206, "learning_rate": 7.783177141636788e-08, "loss": 0.3001, "step": 30351 }, { "epoch": 2.8483483483483485, "grad_norm": 1.1377177023550298, "learning_rate": 7.773584375936561e-08, "loss": 0.3239, "step": 30352 }, { "epoch": 2.8484421921921923, "grad_norm": 1.1701012167607625, "learning_rate": 7.763997479109719e-08, "loss": 0.3362, "step": 30353 }, { "epoch": 2.848536036036036, "grad_norm": 1.0312607731155763, "learning_rate": 7.754416451270675e-08, "loss": 0.3045, "step": 30354 }, { "epoch": 2.84862987987988, "grad_norm": 1.093873387877689, "learning_rate": 7.744841292533668e-08, "loss": 0.316, "step": 30355 }, { "epoch": 2.8487237237237237, "grad_norm": 1.0581355942899027, "learning_rate": 7.735272003012884e-08, "loss": 0.3437, "step": 30356 }, { "epoch": 2.8488175675675675, "grad_norm": 1.129877136579753, "learning_rate": 7.725708582822345e-08, "loss": 0.3313, "step": 30357 }, { "epoch": 2.8489114114114114, "grad_norm": 1.5166006605316875, "learning_rate": 7.71615103207618e-08, "loss": 0.3425, "step": 30358 }, { "epoch": 2.849005255255255, "grad_norm": 1.0422966187810516, "learning_rate": 7.706599350888244e-08, "loss": 0.3162, "step": 30359 }, { "epoch": 2.849099099099099, "grad_norm": 1.233756298875983, "learning_rate": 7.697053539372446e-08, "loss": 0.3258, "step": 30360 }, { "epoch": 2.849192942942943, "grad_norm": 1.4308005349316222, "learning_rate": 7.687513597642638e-08, "loss": 0.2798, "step": 30361 }, { "epoch": 2.849286786786787, "grad_norm": 1.2471696078899956, "learning_rate": 7.677979525812563e-08, "loss": 0.2927, "step": 30362 }, { "epoch": 2.8493806306306304, "grad_norm": 1.1947455715966968, "learning_rate": 7.668451323995851e-08, "loss": 0.308, "step": 30363 }, { "epoch": 2.8494744744744747, "grad_norm": 1.2008604352713323, "learning_rate": 7.658928992306136e-08, "loss": 0.2979, "step": 30364 }, { "epoch": 2.8495683183183185, "grad_norm": 1.1340792920314389, "learning_rate": 7.649412530856992e-08, "loss": 0.285, "step": 30365 }, { "epoch": 2.8496621621621623, "grad_norm": 0.9810472576808926, "learning_rate": 7.639901939761829e-08, "loss": 0.2872, "step": 30366 }, { "epoch": 2.849756006006006, "grad_norm": 1.0782000270240577, "learning_rate": 7.630397219134111e-08, "loss": 0.3072, "step": 30367 }, { "epoch": 2.84984984984985, "grad_norm": 1.0577808146052683, "learning_rate": 7.620898369087083e-08, "loss": 0.2641, "step": 30368 }, { "epoch": 2.8499436936936937, "grad_norm": 1.0743567391671718, "learning_rate": 7.611405389733983e-08, "loss": 0.3079, "step": 30369 }, { "epoch": 2.8500375375375375, "grad_norm": 1.0980202221114568, "learning_rate": 7.601918281188059e-08, "loss": 0.3138, "step": 30370 }, { "epoch": 2.8501313813813813, "grad_norm": 1.1732471842967631, "learning_rate": 7.592437043562495e-08, "loss": 0.2819, "step": 30371 }, { "epoch": 2.850225225225225, "grad_norm": 1.0970254502090864, "learning_rate": 7.582961676970147e-08, "loss": 0.3304, "step": 30372 }, { "epoch": 2.850319069069069, "grad_norm": 1.287061773692318, "learning_rate": 7.5734921815242e-08, "loss": 0.3205, "step": 30373 }, { "epoch": 2.8504129129129128, "grad_norm": 1.1370627270678004, "learning_rate": 7.564028557337399e-08, "loss": 0.2778, "step": 30374 }, { "epoch": 2.850506756756757, "grad_norm": 1.0298789846877638, "learning_rate": 7.554570804522654e-08, "loss": 0.3074, "step": 30375 }, { "epoch": 2.8506006006006004, "grad_norm": 1.0961762358098999, "learning_rate": 7.545118923192707e-08, "loss": 0.3164, "step": 30376 }, { "epoch": 2.8506944444444446, "grad_norm": 1.2957053805534238, "learning_rate": 7.535672913460301e-08, "loss": 0.3007, "step": 30377 }, { "epoch": 2.8507882882882885, "grad_norm": 1.0877875473634324, "learning_rate": 7.526232775438014e-08, "loss": 0.3646, "step": 30378 }, { "epoch": 2.8508821321321323, "grad_norm": 1.0290769952007182, "learning_rate": 7.516798509238422e-08, "loss": 0.3278, "step": 30379 }, { "epoch": 2.850975975975976, "grad_norm": 1.067250431084436, "learning_rate": 7.507370114974044e-08, "loss": 0.3531, "step": 30380 }, { "epoch": 2.85106981981982, "grad_norm": 1.0970627815218352, "learning_rate": 7.497947592757182e-08, "loss": 0.2921, "step": 30381 }, { "epoch": 2.8511636636636637, "grad_norm": 1.0480687480922901, "learning_rate": 7.488530942700245e-08, "loss": 0.3237, "step": 30382 }, { "epoch": 2.8512575075075075, "grad_norm": 1.0376737521974861, "learning_rate": 7.479120164915587e-08, "loss": 0.2871, "step": 30383 }, { "epoch": 2.8513513513513513, "grad_norm": 1.7267185161331469, "learning_rate": 7.469715259515287e-08, "loss": 0.3416, "step": 30384 }, { "epoch": 2.851445195195195, "grad_norm": 1.009999057409522, "learning_rate": 7.460316226611586e-08, "loss": 0.2696, "step": 30385 }, { "epoch": 2.851539039039039, "grad_norm": 1.1299981947596123, "learning_rate": 7.450923066316451e-08, "loss": 0.3023, "step": 30386 }, { "epoch": 2.8516328828828827, "grad_norm": 1.2691203672005245, "learning_rate": 7.44153577874196e-08, "loss": 0.2938, "step": 30387 }, { "epoch": 2.851726726726727, "grad_norm": 1.1108097232638696, "learning_rate": 7.432154364000022e-08, "loss": 0.3318, "step": 30388 }, { "epoch": 2.8518205705705704, "grad_norm": 1.1653521701030545, "learning_rate": 7.422778822202492e-08, "loss": 0.3114, "step": 30389 }, { "epoch": 2.8519144144144146, "grad_norm": 2.6420597037274827, "learning_rate": 7.413409153461115e-08, "loss": 0.2908, "step": 30390 }, { "epoch": 2.852008258258258, "grad_norm": 0.9956335066347047, "learning_rate": 7.404045357887635e-08, "loss": 0.3154, "step": 30391 }, { "epoch": 2.8521021021021022, "grad_norm": 0.9642710260340662, "learning_rate": 7.394687435593684e-08, "loss": 0.2762, "step": 30392 }, { "epoch": 2.852195945945946, "grad_norm": 0.9882966351141547, "learning_rate": 7.385335386690839e-08, "loss": 0.2982, "step": 30393 }, { "epoch": 2.85228978978979, "grad_norm": 1.2827198597977463, "learning_rate": 7.375989211290679e-08, "loss": 0.2958, "step": 30394 }, { "epoch": 2.8523836336336337, "grad_norm": 1.2191915223539707, "learning_rate": 7.366648909504559e-08, "loss": 0.2913, "step": 30395 }, { "epoch": 2.8524774774774775, "grad_norm": 1.2084029588843972, "learning_rate": 7.357314481443834e-08, "loss": 0.2866, "step": 30396 }, { "epoch": 2.8525713213213213, "grad_norm": 1.3010951221113254, "learning_rate": 7.347985927219859e-08, "loss": 0.2748, "step": 30397 }, { "epoch": 2.852665165165165, "grad_norm": 1.2181564509767007, "learning_rate": 7.338663246943823e-08, "loss": 0.3379, "step": 30398 }, { "epoch": 2.852759009009009, "grad_norm": 1.2548180101677981, "learning_rate": 7.329346440726859e-08, "loss": 0.298, "step": 30399 }, { "epoch": 2.8528528528528527, "grad_norm": 1.0606218017344515, "learning_rate": 7.320035508680101e-08, "loss": 0.3148, "step": 30400 }, { "epoch": 2.8529466966966965, "grad_norm": 1.2501922651388693, "learning_rate": 7.310730450914572e-08, "loss": 0.314, "step": 30401 }, { "epoch": 2.8530405405405403, "grad_norm": 1.0357245025108555, "learning_rate": 7.301431267541182e-08, "loss": 0.2619, "step": 30402 }, { "epoch": 2.8531343843843846, "grad_norm": 0.9832337415611386, "learning_rate": 7.292137958670786e-08, "loss": 0.332, "step": 30403 }, { "epoch": 2.853228228228228, "grad_norm": 1.3380257806769067, "learning_rate": 7.282850524414242e-08, "loss": 0.3261, "step": 30404 }, { "epoch": 2.8533220720720722, "grad_norm": 1.2317514080128, "learning_rate": 7.273568964882239e-08, "loss": 0.306, "step": 30405 }, { "epoch": 2.853415915915916, "grad_norm": 1.1039371851163249, "learning_rate": 7.264293280185463e-08, "loss": 0.3521, "step": 30406 }, { "epoch": 2.85350975975976, "grad_norm": 1.1109748056513256, "learning_rate": 7.255023470434552e-08, "loss": 0.2991, "step": 30407 }, { "epoch": 2.8536036036036037, "grad_norm": 1.1289801065599931, "learning_rate": 7.245759535739916e-08, "loss": 0.2756, "step": 30408 }, { "epoch": 2.8536974474474475, "grad_norm": 1.4584412169589995, "learning_rate": 7.236501476212132e-08, "loss": 0.3107, "step": 30409 }, { "epoch": 2.8537912912912913, "grad_norm": 1.0500283519905096, "learning_rate": 7.227249291961558e-08, "loss": 0.3231, "step": 30410 }, { "epoch": 2.853885135135135, "grad_norm": 1.1927137838806765, "learning_rate": 7.218002983098437e-08, "loss": 0.2569, "step": 30411 }, { "epoch": 2.853978978978979, "grad_norm": 1.4172919435452693, "learning_rate": 7.208762549733073e-08, "loss": 0.3152, "step": 30412 }, { "epoch": 2.8540728228228227, "grad_norm": 1.3448280356689695, "learning_rate": 7.199527991975652e-08, "loss": 0.3176, "step": 30413 }, { "epoch": 2.8541666666666665, "grad_norm": 1.355123181984092, "learning_rate": 7.190299309936256e-08, "loss": 0.2719, "step": 30414 }, { "epoch": 2.8542605105105103, "grad_norm": 1.135531611981777, "learning_rate": 7.181076503724904e-08, "loss": 0.2749, "step": 30415 }, { "epoch": 2.8543543543543546, "grad_norm": 1.5310563583316645, "learning_rate": 7.171859573451622e-08, "loss": 0.2902, "step": 30416 }, { "epoch": 2.854448198198198, "grad_norm": 1.0307404055563703, "learning_rate": 7.162648519226211e-08, "loss": 0.2968, "step": 30417 }, { "epoch": 2.854542042042042, "grad_norm": 1.0902384247441692, "learning_rate": 7.153443341158584e-08, "loss": 0.3032, "step": 30418 }, { "epoch": 2.854635885885886, "grad_norm": 1.1254007287214203, "learning_rate": 7.144244039358428e-08, "loss": 0.3157, "step": 30419 }, { "epoch": 2.85472972972973, "grad_norm": 3.7803207739472517, "learning_rate": 7.135050613935435e-08, "loss": 0.2877, "step": 30420 }, { "epoch": 2.8548235735735736, "grad_norm": 1.2317946796058352, "learning_rate": 7.125863064999294e-08, "loss": 0.3541, "step": 30421 }, { "epoch": 2.8549174174174174, "grad_norm": 1.2633858612246445, "learning_rate": 7.116681392659475e-08, "loss": 0.3268, "step": 30422 }, { "epoch": 2.8550112612612613, "grad_norm": 1.0086453519634084, "learning_rate": 7.107505597025444e-08, "loss": 0.3052, "step": 30423 }, { "epoch": 2.855105105105105, "grad_norm": 1.2807094172895137, "learning_rate": 7.098335678206669e-08, "loss": 0.3235, "step": 30424 }, { "epoch": 2.855198948948949, "grad_norm": 1.3556466007407157, "learning_rate": 7.089171636312453e-08, "loss": 0.3214, "step": 30425 }, { "epoch": 2.8552927927927927, "grad_norm": 1.2819149408622117, "learning_rate": 7.080013471452041e-08, "loss": 0.3156, "step": 30426 }, { "epoch": 2.8553866366366365, "grad_norm": 1.1569624638313736, "learning_rate": 7.070861183734679e-08, "loss": 0.2726, "step": 30427 }, { "epoch": 2.8554804804804803, "grad_norm": 1.203578838880328, "learning_rate": 7.061714773269391e-08, "loss": 0.3069, "step": 30428 }, { "epoch": 2.8555743243243246, "grad_norm": 1.2322983472389566, "learning_rate": 7.052574240165311e-08, "loss": 0.3303, "step": 30429 }, { "epoch": 2.855668168168168, "grad_norm": 1.3623373385611843, "learning_rate": 7.043439584531409e-08, "loss": 0.2956, "step": 30430 }, { "epoch": 2.855762012012012, "grad_norm": 1.1691600456655566, "learning_rate": 7.034310806476597e-08, "loss": 0.3001, "step": 30431 }, { "epoch": 2.855855855855856, "grad_norm": 0.9779200243440376, "learning_rate": 7.025187906109732e-08, "loss": 0.3127, "step": 30432 }, { "epoch": 2.8559496996997, "grad_norm": 1.0867690605003342, "learning_rate": 7.01607088353956e-08, "loss": 0.2897, "step": 30433 }, { "epoch": 2.8560435435435436, "grad_norm": 1.2394158099585386, "learning_rate": 7.006959738874774e-08, "loss": 0.3415, "step": 30434 }, { "epoch": 2.8561373873873874, "grad_norm": 1.1901378500269035, "learning_rate": 6.997854472224064e-08, "loss": 0.2986, "step": 30435 }, { "epoch": 2.8562312312312312, "grad_norm": 2.404460888251602, "learning_rate": 6.988755083695953e-08, "loss": 0.2787, "step": 30436 }, { "epoch": 2.856325075075075, "grad_norm": 1.0167461020651605, "learning_rate": 6.97966157339891e-08, "loss": 0.2994, "step": 30437 }, { "epoch": 2.856418918918919, "grad_norm": 1.0932575454278874, "learning_rate": 6.970573941441461e-08, "loss": 0.3051, "step": 30438 }, { "epoch": 2.8565127627627627, "grad_norm": 1.0709465013659223, "learning_rate": 6.961492187931795e-08, "loss": 0.2918, "step": 30439 }, { "epoch": 2.8566066066066065, "grad_norm": 1.2642497651251259, "learning_rate": 6.952416312978328e-08, "loss": 0.3183, "step": 30440 }, { "epoch": 2.8567004504504503, "grad_norm": 1.0584787270185023, "learning_rate": 6.943346316689247e-08, "loss": 0.282, "step": 30441 }, { "epoch": 2.8567942942942945, "grad_norm": 1.8750255151166078, "learning_rate": 6.934282199172637e-08, "loss": 0.3273, "step": 30442 }, { "epoch": 2.856888138138138, "grad_norm": 1.2369210284443324, "learning_rate": 6.92522396053663e-08, "loss": 0.3282, "step": 30443 }, { "epoch": 2.856981981981982, "grad_norm": 1.7479750020653624, "learning_rate": 6.916171600889199e-08, "loss": 0.3218, "step": 30444 }, { "epoch": 2.857075825825826, "grad_norm": 1.1030558122110612, "learning_rate": 6.90712512033831e-08, "loss": 0.3149, "step": 30445 }, { "epoch": 2.85716966966967, "grad_norm": 1.0609954681117508, "learning_rate": 6.898084518991766e-08, "loss": 0.2899, "step": 30446 }, { "epoch": 2.8572635135135136, "grad_norm": 0.9784522127683597, "learning_rate": 6.889049796957426e-08, "loss": 0.3488, "step": 30447 }, { "epoch": 2.8573573573573574, "grad_norm": 0.9716064863641656, "learning_rate": 6.880020954342981e-08, "loss": 0.318, "step": 30448 }, { "epoch": 2.857451201201201, "grad_norm": 3.1205998133797554, "learning_rate": 6.870997991256067e-08, "loss": 0.2588, "step": 30449 }, { "epoch": 2.857545045045045, "grad_norm": 1.1007904858545385, "learning_rate": 6.861980907804322e-08, "loss": 0.3141, "step": 30450 }, { "epoch": 2.857638888888889, "grad_norm": 1.4076575410343466, "learning_rate": 6.852969704095159e-08, "loss": 0.3129, "step": 30451 }, { "epoch": 2.8577327327327327, "grad_norm": 1.1358230347743676, "learning_rate": 6.843964380236157e-08, "loss": 0.2674, "step": 30452 }, { "epoch": 2.8578265765765765, "grad_norm": 2.0305310563045076, "learning_rate": 6.834964936334509e-08, "loss": 0.3175, "step": 30453 }, { "epoch": 2.8579204204204203, "grad_norm": 1.1062840928182123, "learning_rate": 6.825971372497686e-08, "loss": 0.3204, "step": 30454 }, { "epoch": 2.8580142642642645, "grad_norm": 1.0986350999050591, "learning_rate": 6.816983688832823e-08, "loss": 0.3013, "step": 30455 }, { "epoch": 2.858108108108108, "grad_norm": 1.133231259869902, "learning_rate": 6.808001885447112e-08, "loss": 0.314, "step": 30456 }, { "epoch": 2.858201951951952, "grad_norm": 1.207717971602538, "learning_rate": 6.799025962447637e-08, "loss": 0.3408, "step": 30457 }, { "epoch": 2.858295795795796, "grad_norm": 1.1772389343485665, "learning_rate": 6.790055919941418e-08, "loss": 0.3012, "step": 30458 }, { "epoch": 2.8583896396396398, "grad_norm": 1.2711539863086632, "learning_rate": 6.781091758035374e-08, "loss": 0.3197, "step": 30459 }, { "epoch": 2.8584834834834836, "grad_norm": 1.0807584167866688, "learning_rate": 6.772133476836529e-08, "loss": 0.2883, "step": 30460 }, { "epoch": 2.8585773273273274, "grad_norm": 1.1791804816203504, "learning_rate": 6.763181076451519e-08, "loss": 0.285, "step": 30461 }, { "epoch": 2.858671171171171, "grad_norm": 1.1796109964391697, "learning_rate": 6.754234556987149e-08, "loss": 0.3031, "step": 30462 }, { "epoch": 2.858765015015015, "grad_norm": 1.151143075970462, "learning_rate": 6.745293918550111e-08, "loss": 0.3271, "step": 30463 }, { "epoch": 2.858858858858859, "grad_norm": 1.0848896892573565, "learning_rate": 6.736359161246986e-08, "loss": 0.3114, "step": 30464 }, { "epoch": 2.8589527027027026, "grad_norm": 1.0424496761001272, "learning_rate": 6.727430285184245e-08, "loss": 0.3201, "step": 30465 }, { "epoch": 2.8590465465465464, "grad_norm": 1.0933285389722487, "learning_rate": 6.718507290468468e-08, "loss": 0.3414, "step": 30466 }, { "epoch": 2.8591403903903903, "grad_norm": 1.1684665287148237, "learning_rate": 6.709590177206016e-08, "loss": 0.3193, "step": 30467 }, { "epoch": 2.8592342342342345, "grad_norm": 1.1172847474957148, "learning_rate": 6.700678945503081e-08, "loss": 0.33, "step": 30468 }, { "epoch": 2.859328078078078, "grad_norm": 0.9403942227118746, "learning_rate": 6.691773595466077e-08, "loss": 0.3041, "step": 30469 }, { "epoch": 2.859421921921922, "grad_norm": 1.0976172724769333, "learning_rate": 6.682874127201144e-08, "loss": 0.3, "step": 30470 }, { "epoch": 2.8595157657657655, "grad_norm": 1.1443140212477974, "learning_rate": 6.673980540814252e-08, "loss": 0.2966, "step": 30471 }, { "epoch": 2.8596096096096097, "grad_norm": 1.0197585360119745, "learning_rate": 6.665092836411646e-08, "loss": 0.3409, "step": 30472 }, { "epoch": 2.8597034534534536, "grad_norm": 0.934593011145205, "learning_rate": 6.65621101409919e-08, "loss": 0.2689, "step": 30473 }, { "epoch": 2.8597972972972974, "grad_norm": 1.0847883831779361, "learning_rate": 6.647335073982742e-08, "loss": 0.3018, "step": 30474 }, { "epoch": 2.859891141141141, "grad_norm": 1.2142410034893911, "learning_rate": 6.638465016168272e-08, "loss": 0.3035, "step": 30475 }, { "epoch": 2.859984984984985, "grad_norm": 1.079030981751505, "learning_rate": 6.629600840761419e-08, "loss": 0.3178, "step": 30476 }, { "epoch": 2.860078828828829, "grad_norm": 1.0329974333351122, "learning_rate": 6.62074254786782e-08, "loss": 0.3065, "step": 30477 }, { "epoch": 2.8601726726726726, "grad_norm": 1.191008653510988, "learning_rate": 6.611890137593224e-08, "loss": 0.2886, "step": 30478 }, { "epoch": 2.8602665165165164, "grad_norm": 1.195253867375931, "learning_rate": 6.603043610043158e-08, "loss": 0.298, "step": 30479 }, { "epoch": 2.8603603603603602, "grad_norm": 1.153064056324363, "learning_rate": 6.594202965323037e-08, "loss": 0.2801, "step": 30480 }, { "epoch": 2.860454204204204, "grad_norm": 1.1824665732588806, "learning_rate": 6.585368203538333e-08, "loss": 0.3077, "step": 30481 }, { "epoch": 2.860548048048048, "grad_norm": 1.1673165294560848, "learning_rate": 6.576539324794351e-08, "loss": 0.3026, "step": 30482 }, { "epoch": 2.860641891891892, "grad_norm": 1.3375978161376285, "learning_rate": 6.567716329196338e-08, "loss": 0.3309, "step": 30483 }, { "epoch": 2.8607357357357355, "grad_norm": 1.1614139399024495, "learning_rate": 6.558899216849546e-08, "loss": 0.291, "step": 30484 }, { "epoch": 2.8608295795795797, "grad_norm": 0.9170934179523185, "learning_rate": 6.550087987859055e-08, "loss": 0.2848, "step": 30485 }, { "epoch": 2.8609234234234235, "grad_norm": 1.1271128457048523, "learning_rate": 6.54128264232995e-08, "loss": 0.2721, "step": 30486 }, { "epoch": 2.8610172672672673, "grad_norm": 1.1503907534684261, "learning_rate": 6.5324831803672e-08, "loss": 0.3118, "step": 30487 }, { "epoch": 2.861111111111111, "grad_norm": 1.1536915644826464, "learning_rate": 6.523689602075723e-08, "loss": 0.3184, "step": 30488 }, { "epoch": 2.861204954954955, "grad_norm": 1.1509513510313427, "learning_rate": 6.514901907560433e-08, "loss": 0.2929, "step": 30489 }, { "epoch": 2.861298798798799, "grad_norm": 1.1121269797056692, "learning_rate": 6.506120096926028e-08, "loss": 0.3317, "step": 30490 }, { "epoch": 2.8613926426426426, "grad_norm": 1.0801915072872674, "learning_rate": 6.497344170277198e-08, "loss": 0.3352, "step": 30491 }, { "epoch": 2.8614864864864864, "grad_norm": 3.288305731273001, "learning_rate": 6.488574127718583e-08, "loss": 0.3218, "step": 30492 }, { "epoch": 2.86158033033033, "grad_norm": 0.9732832038565218, "learning_rate": 6.479809969354877e-08, "loss": 0.2902, "step": 30493 }, { "epoch": 2.861674174174174, "grad_norm": 1.1388277423612783, "learning_rate": 6.471051695290387e-08, "loss": 0.2551, "step": 30494 }, { "epoch": 2.861768018018018, "grad_norm": 1.1773856511725602, "learning_rate": 6.462299305629693e-08, "loss": 0.2901, "step": 30495 }, { "epoch": 2.861861861861862, "grad_norm": 1.200400603129389, "learning_rate": 6.453552800477047e-08, "loss": 0.3196, "step": 30496 }, { "epoch": 2.8619557057057055, "grad_norm": 1.032637002390016, "learning_rate": 6.44481217993681e-08, "loss": 0.3257, "step": 30497 }, { "epoch": 2.8620495495495497, "grad_norm": 1.0921110244540115, "learning_rate": 6.436077444113176e-08, "loss": 0.3179, "step": 30498 }, { "epoch": 2.8621433933933935, "grad_norm": 1.0618534594278906, "learning_rate": 6.427348593110283e-08, "loss": 0.302, "step": 30499 }, { "epoch": 2.8622372372372373, "grad_norm": 1.172114079681618, "learning_rate": 6.41862562703216e-08, "loss": 0.2821, "step": 30500 }, { "epoch": 2.862331081081081, "grad_norm": 1.1266324997720134, "learning_rate": 6.40990854598289e-08, "loss": 0.3391, "step": 30501 }, { "epoch": 2.862424924924925, "grad_norm": 1.0335154133166677, "learning_rate": 6.40119735006639e-08, "loss": 0.2916, "step": 30502 }, { "epoch": 2.8625187687687688, "grad_norm": 1.0422517726178857, "learning_rate": 6.392492039386467e-08, "loss": 0.2766, "step": 30503 }, { "epoch": 2.8626126126126126, "grad_norm": 1.20248202341517, "learning_rate": 6.383792614046925e-08, "loss": 0.2886, "step": 30504 }, { "epoch": 2.8627064564564564, "grad_norm": 1.2408234494835784, "learning_rate": 6.375099074151626e-08, "loss": 0.3322, "step": 30505 }, { "epoch": 2.8628003003003, "grad_norm": 1.3080979639172434, "learning_rate": 6.366411419804042e-08, "loss": 0.3383, "step": 30506 }, { "epoch": 2.862894144144144, "grad_norm": 1.2394770489509135, "learning_rate": 6.357729651107814e-08, "loss": 0.3111, "step": 30507 }, { "epoch": 2.862987987987988, "grad_norm": 1.258955584100051, "learning_rate": 6.349053768166525e-08, "loss": 0.3074, "step": 30508 }, { "epoch": 2.863081831831832, "grad_norm": 1.18566259410249, "learning_rate": 6.340383771083536e-08, "loss": 0.2998, "step": 30509 }, { "epoch": 2.8631756756756754, "grad_norm": 1.1058985727963844, "learning_rate": 6.331719659962265e-08, "loss": 0.316, "step": 30510 }, { "epoch": 2.8632695195195197, "grad_norm": 1.307325988372884, "learning_rate": 6.323061434905964e-08, "loss": 0.3082, "step": 30511 }, { "epoch": 2.8633633633633635, "grad_norm": 1.168211511886253, "learning_rate": 6.314409096017992e-08, "loss": 0.2792, "step": 30512 }, { "epoch": 2.8634572072072073, "grad_norm": 1.1809773613928007, "learning_rate": 6.305762643401381e-08, "loss": 0.3272, "step": 30513 }, { "epoch": 2.863551051051051, "grad_norm": 1.095708213502168, "learning_rate": 6.297122077159268e-08, "loss": 0.2971, "step": 30514 }, { "epoch": 2.863644894894895, "grad_norm": 1.2086976402003549, "learning_rate": 6.288487397394628e-08, "loss": 0.3755, "step": 30515 }, { "epoch": 2.8637387387387387, "grad_norm": 1.0067987822515132, "learning_rate": 6.279858604210487e-08, "loss": 0.2664, "step": 30516 }, { "epoch": 2.8638325825825826, "grad_norm": 1.878366832405653, "learning_rate": 6.27123569770971e-08, "loss": 0.2982, "step": 30517 }, { "epoch": 2.8639264264264264, "grad_norm": 0.9213436333017561, "learning_rate": 6.262618677995103e-08, "loss": 0.3119, "step": 30518 }, { "epoch": 2.86402027027027, "grad_norm": 1.0579285361227837, "learning_rate": 6.25400754516936e-08, "loss": 0.316, "step": 30519 }, { "epoch": 2.864114114114114, "grad_norm": 1.2121919373728691, "learning_rate": 6.245402299335234e-08, "loss": 0.289, "step": 30520 }, { "epoch": 2.864207957957958, "grad_norm": 2.0329422911402917, "learning_rate": 6.23680294059531e-08, "loss": 0.3346, "step": 30521 }, { "epoch": 2.864301801801802, "grad_norm": 1.0396680291256049, "learning_rate": 6.22820946905206e-08, "loss": 0.343, "step": 30522 }, { "epoch": 2.8643956456456454, "grad_norm": 2.068419242690957, "learning_rate": 6.219621884808014e-08, "loss": 0.3294, "step": 30523 }, { "epoch": 2.8644894894894897, "grad_norm": 1.1182167124940245, "learning_rate": 6.211040187965534e-08, "loss": 0.3486, "step": 30524 }, { "epoch": 2.8645833333333335, "grad_norm": 0.9808232002654478, "learning_rate": 6.202464378626871e-08, "loss": 0.3026, "step": 30525 }, { "epoch": 2.8646771771771773, "grad_norm": 1.621408653804326, "learning_rate": 6.193894456894445e-08, "loss": 0.3349, "step": 30526 }, { "epoch": 2.864771021021021, "grad_norm": 1.130707446333002, "learning_rate": 6.185330422870284e-08, "loss": 0.3021, "step": 30527 }, { "epoch": 2.864864864864865, "grad_norm": 1.2177298695605583, "learning_rate": 6.176772276656474e-08, "loss": 0.2782, "step": 30528 }, { "epoch": 2.8649587087087087, "grad_norm": 1.5173427085873348, "learning_rate": 6.168220018355209e-08, "loss": 0.2568, "step": 30529 }, { "epoch": 2.8650525525525525, "grad_norm": 1.210054920091845, "learning_rate": 6.159673648068354e-08, "loss": 0.34, "step": 30530 }, { "epoch": 2.8651463963963963, "grad_norm": 1.047616971562697, "learning_rate": 6.151133165897826e-08, "loss": 0.3128, "step": 30531 }, { "epoch": 2.86524024024024, "grad_norm": 1.0277598906872956, "learning_rate": 6.14259857194549e-08, "loss": 0.3351, "step": 30532 }, { "epoch": 2.865334084084084, "grad_norm": 1.4900368737855216, "learning_rate": 6.134069866313042e-08, "loss": 0.2931, "step": 30533 }, { "epoch": 2.8654279279279278, "grad_norm": 1.1461567962066208, "learning_rate": 6.125547049102231e-08, "loss": 0.3434, "step": 30534 }, { "epoch": 2.865521771771772, "grad_norm": 1.2738131530474912, "learning_rate": 6.117030120414646e-08, "loss": 0.3186, "step": 30535 }, { "epoch": 2.8656156156156154, "grad_norm": 1.0419496584728307, "learning_rate": 6.108519080351816e-08, "loss": 0.2855, "step": 30536 }, { "epoch": 2.8657094594594597, "grad_norm": 1.3253959628290861, "learning_rate": 6.100013929015269e-08, "loss": 0.3203, "step": 30537 }, { "epoch": 2.8658033033033035, "grad_norm": 1.1098719523806408, "learning_rate": 6.091514666506371e-08, "loss": 0.2943, "step": 30538 }, { "epoch": 2.8658971471471473, "grad_norm": 1.047669797327498, "learning_rate": 6.083021292926484e-08, "loss": 0.3246, "step": 30539 }, { "epoch": 2.865990990990991, "grad_norm": 1.469628319524333, "learning_rate": 6.07453380837686e-08, "loss": 0.316, "step": 30540 }, { "epoch": 2.866084834834835, "grad_norm": 1.1621710041738584, "learning_rate": 6.066052212958695e-08, "loss": 0.3052, "step": 30541 }, { "epoch": 2.8661786786786787, "grad_norm": 1.2883177123787455, "learning_rate": 6.057576506773133e-08, "loss": 0.2959, "step": 30542 }, { "epoch": 2.8662725225225225, "grad_norm": 1.1140451458890146, "learning_rate": 6.049106689921147e-08, "loss": 0.3192, "step": 30543 }, { "epoch": 2.8663663663663663, "grad_norm": 1.202052915783696, "learning_rate": 6.040642762503879e-08, "loss": 0.3063, "step": 30544 }, { "epoch": 2.86646021021021, "grad_norm": 1.1654943353479883, "learning_rate": 6.032184724622136e-08, "loss": 0.2675, "step": 30545 }, { "epoch": 2.866554054054054, "grad_norm": 1.0527301114287917, "learning_rate": 6.023732576376783e-08, "loss": 0.3072, "step": 30546 }, { "epoch": 2.8666478978978978, "grad_norm": 1.054613686362896, "learning_rate": 6.015286317868629e-08, "loss": 0.3051, "step": 30547 }, { "epoch": 2.866741741741742, "grad_norm": 1.6939748295616095, "learning_rate": 6.00684594919837e-08, "loss": 0.3125, "step": 30548 }, { "epoch": 2.8668355855855854, "grad_norm": 1.2746632140604297, "learning_rate": 5.998411470466592e-08, "loss": 0.2922, "step": 30549 }, { "epoch": 2.8669294294294296, "grad_norm": 1.3030863344793893, "learning_rate": 5.989982881773937e-08, "loss": 0.31, "step": 30550 }, { "epoch": 2.867023273273273, "grad_norm": 1.0081975952908633, "learning_rate": 5.981560183220825e-08, "loss": 0.3023, "step": 30551 }, { "epoch": 2.8671171171171173, "grad_norm": 1.1282768689834763, "learning_rate": 5.973143374907675e-08, "loss": 0.2906, "step": 30552 }, { "epoch": 2.867210960960961, "grad_norm": 2.203028797835633, "learning_rate": 5.964732456934908e-08, "loss": 0.2873, "step": 30553 }, { "epoch": 2.867304804804805, "grad_norm": 1.1682212247031616, "learning_rate": 5.956327429402831e-08, "loss": 0.2979, "step": 30554 }, { "epoch": 2.8673986486486487, "grad_norm": 1.1048886485994216, "learning_rate": 5.947928292411531e-08, "loss": 0.3316, "step": 30555 }, { "epoch": 2.8674924924924925, "grad_norm": 1.227279050634867, "learning_rate": 5.939535046061262e-08, "loss": 0.3233, "step": 30556 }, { "epoch": 2.8675863363363363, "grad_norm": 1.1068020493575574, "learning_rate": 5.931147690452111e-08, "loss": 0.3097, "step": 30557 }, { "epoch": 2.86768018018018, "grad_norm": 1.3537398652294133, "learning_rate": 5.92276622568394e-08, "loss": 0.381, "step": 30558 }, { "epoch": 2.867774024024024, "grad_norm": 1.1368674833019108, "learning_rate": 5.9143906518568387e-08, "loss": 0.3003, "step": 30559 }, { "epoch": 2.8678678678678677, "grad_norm": 1.3221448216712106, "learning_rate": 5.906020969070614e-08, "loss": 0.2972, "step": 30560 }, { "epoch": 2.8679617117117115, "grad_norm": 1.1897852384235275, "learning_rate": 5.897657177425076e-08, "loss": 0.2845, "step": 30561 }, { "epoch": 2.8680555555555554, "grad_norm": 1.3141007879577844, "learning_rate": 5.889299277019866e-08, "loss": 0.2899, "step": 30562 }, { "epoch": 2.8681493993993996, "grad_norm": 1.1778029863568584, "learning_rate": 5.8809472679547394e-08, "loss": 0.3062, "step": 30563 }, { "epoch": 2.868243243243243, "grad_norm": 1.6744312249798654, "learning_rate": 5.872601150329171e-08, "loss": 0.3323, "step": 30564 }, { "epoch": 2.8683370870870872, "grad_norm": 1.098809752921782, "learning_rate": 5.864260924242804e-08, "loss": 0.3006, "step": 30565 }, { "epoch": 2.868430930930931, "grad_norm": 1.2997985213830086, "learning_rate": 5.855926589794947e-08, "loss": 0.2917, "step": 30566 }, { "epoch": 2.868524774774775, "grad_norm": 1.047907682785105, "learning_rate": 5.8475981470850763e-08, "loss": 0.3456, "step": 30567 }, { "epoch": 2.8686186186186187, "grad_norm": 1.338300641985333, "learning_rate": 5.839275596212446e-08, "loss": 0.2922, "step": 30568 }, { "epoch": 2.8687124624624625, "grad_norm": 1.053160907074021, "learning_rate": 5.8309589372763095e-08, "loss": 0.2855, "step": 30569 }, { "epoch": 2.8688063063063063, "grad_norm": 1.11106721721028, "learning_rate": 5.8226481703757555e-08, "loss": 0.3397, "step": 30570 }, { "epoch": 2.86890015015015, "grad_norm": 1.0817760518797948, "learning_rate": 5.814343295609981e-08, "loss": 0.3345, "step": 30571 }, { "epoch": 2.868993993993994, "grad_norm": 1.1712775050908397, "learning_rate": 5.806044313077908e-08, "loss": 0.2816, "step": 30572 }, { "epoch": 2.8690878378378377, "grad_norm": 1.1659208425772378, "learning_rate": 5.797751222878567e-08, "loss": 0.3243, "step": 30573 }, { "epoch": 2.8691816816816815, "grad_norm": 1.182370441957557, "learning_rate": 5.7894640251108245e-08, "loss": 0.3489, "step": 30574 }, { "epoch": 2.8692755255255253, "grad_norm": 1.2965903872135407, "learning_rate": 5.781182719873435e-08, "loss": 0.3478, "step": 30575 }, { "epoch": 2.8693693693693696, "grad_norm": 1.0020527583499914, "learning_rate": 5.772907307265152e-08, "loss": 0.2942, "step": 30576 }, { "epoch": 2.869463213213213, "grad_norm": 1.064644919641583, "learning_rate": 5.764637787384675e-08, "loss": 0.3146, "step": 30577 }, { "epoch": 2.869557057057057, "grad_norm": 1.1591643716800404, "learning_rate": 5.756374160330647e-08, "loss": 0.3543, "step": 30578 }, { "epoch": 2.869650900900901, "grad_norm": 1.2797838725122717, "learning_rate": 5.748116426201433e-08, "loss": 0.3077, "step": 30579 }, { "epoch": 2.869744744744745, "grad_norm": 1.3299180364692906, "learning_rate": 5.739864585095678e-08, "loss": 0.2968, "step": 30580 }, { "epoch": 2.8698385885885886, "grad_norm": 1.3223095079273122, "learning_rate": 5.731618637111691e-08, "loss": 0.2859, "step": 30581 }, { "epoch": 2.8699324324324325, "grad_norm": 1.1627782378061104, "learning_rate": 5.7233785823477274e-08, "loss": 0.3071, "step": 30582 }, { "epoch": 2.8700262762762763, "grad_norm": 1.1823852271665174, "learning_rate": 5.715144420902152e-08, "loss": 0.3052, "step": 30583 }, { "epoch": 2.87012012012012, "grad_norm": 1.1956260988745593, "learning_rate": 5.706916152873054e-08, "loss": 0.3614, "step": 30584 }, { "epoch": 2.870213963963964, "grad_norm": 1.0511999838956738, "learning_rate": 5.6986937783585775e-08, "loss": 0.309, "step": 30585 }, { "epoch": 2.8703078078078077, "grad_norm": 1.0444762968859578, "learning_rate": 5.6904772974567534e-08, "loss": 0.3058, "step": 30586 }, { "epoch": 2.8704016516516515, "grad_norm": 1.1867191125502752, "learning_rate": 5.6822667102655604e-08, "loss": 0.2964, "step": 30587 }, { "epoch": 2.8704954954954953, "grad_norm": 1.1815461386283916, "learning_rate": 5.674062016882809e-08, "loss": 0.3351, "step": 30588 }, { "epoch": 2.8705893393393396, "grad_norm": 1.19005912782656, "learning_rate": 5.6658632174064754e-08, "loss": 0.2709, "step": 30589 }, { "epoch": 2.870683183183183, "grad_norm": 1.1165152202610082, "learning_rate": 5.6576703119342044e-08, "loss": 0.2584, "step": 30590 }, { "epoch": 2.870777027027027, "grad_norm": 1.1384151326671625, "learning_rate": 5.6494833005636965e-08, "loss": 0.3243, "step": 30591 }, { "epoch": 2.870870870870871, "grad_norm": 1.084826892900643, "learning_rate": 5.641302183392594e-08, "loss": 0.3225, "step": 30592 }, { "epoch": 2.870964714714715, "grad_norm": 1.018308824551657, "learning_rate": 5.6331269605184315e-08, "loss": 0.3208, "step": 30593 }, { "epoch": 2.8710585585585586, "grad_norm": 0.9766052185914047, "learning_rate": 5.624957632038686e-08, "loss": 0.2876, "step": 30594 }, { "epoch": 2.8711524024024024, "grad_norm": 1.1824773615591058, "learning_rate": 5.616794198050779e-08, "loss": 0.2922, "step": 30595 }, { "epoch": 2.8712462462462462, "grad_norm": 1.0741499329240565, "learning_rate": 5.608636658651967e-08, "loss": 0.3122, "step": 30596 }, { "epoch": 2.87134009009009, "grad_norm": 1.141331421841735, "learning_rate": 5.600485013939616e-08, "loss": 0.3204, "step": 30597 }, { "epoch": 2.871433933933934, "grad_norm": 1.0802262919474483, "learning_rate": 5.592339264010871e-08, "loss": 0.3102, "step": 30598 }, { "epoch": 2.8715277777777777, "grad_norm": 1.6190570128003106, "learning_rate": 5.5841994089628205e-08, "loss": 0.3296, "step": 30599 }, { "epoch": 2.8716216216216215, "grad_norm": 0.9961822535172549, "learning_rate": 5.576065448892498e-08, "loss": 0.2827, "step": 30600 }, { "epoch": 2.8717154654654653, "grad_norm": 1.136415859886509, "learning_rate": 5.5679373838970486e-08, "loss": 0.3355, "step": 30601 }, { "epoch": 2.8718093093093096, "grad_norm": 1.1662404482746578, "learning_rate": 5.5598152140732276e-08, "loss": 0.3097, "step": 30602 }, { "epoch": 2.871903153153153, "grad_norm": 1.8346226239819776, "learning_rate": 5.551698939517902e-08, "loss": 0.2811, "step": 30603 }, { "epoch": 2.871996996996997, "grad_norm": 1.1599473916917933, "learning_rate": 5.5435885603278836e-08, "loss": 0.2975, "step": 30604 }, { "epoch": 2.872090840840841, "grad_norm": 1.3259763717180655, "learning_rate": 5.5354840765998176e-08, "loss": 0.2926, "step": 30605 }, { "epoch": 2.872184684684685, "grad_norm": 1.152855228078436, "learning_rate": 5.5273854884304034e-08, "loss": 0.3017, "step": 30606 }, { "epoch": 2.8722785285285286, "grad_norm": 1.3603533430276045, "learning_rate": 5.5192927959161756e-08, "loss": 0.2968, "step": 30607 }, { "epoch": 2.8723723723723724, "grad_norm": 1.1066547603465202, "learning_rate": 5.5112059991536126e-08, "loss": 0.3067, "step": 30608 }, { "epoch": 2.8724662162162162, "grad_norm": 1.1374356158863776, "learning_rate": 5.5031250982390816e-08, "loss": 0.3051, "step": 30609 }, { "epoch": 2.87256006006006, "grad_norm": 1.0342553524222737, "learning_rate": 5.495050093269061e-08, "loss": 0.3063, "step": 30610 }, { "epoch": 2.872653903903904, "grad_norm": 1.221597606066158, "learning_rate": 5.486980984339752e-08, "loss": 0.2983, "step": 30611 }, { "epoch": 2.8727477477477477, "grad_norm": 1.1829054951311475, "learning_rate": 5.478917771547354e-08, "loss": 0.3188, "step": 30612 }, { "epoch": 2.8728415915915915, "grad_norm": 1.0166162771417806, "learning_rate": 5.470860454988014e-08, "loss": 0.3244, "step": 30613 }, { "epoch": 2.8729354354354353, "grad_norm": 1.7123557028978846, "learning_rate": 5.462809034757821e-08, "loss": 0.3081, "step": 30614 }, { "epoch": 2.8730292792792795, "grad_norm": 1.0863623147093224, "learning_rate": 5.454763510952754e-08, "loss": 0.3144, "step": 30615 }, { "epoch": 2.873123123123123, "grad_norm": 2.2431279027000186, "learning_rate": 5.4467238836687363e-08, "loss": 0.3359, "step": 30616 }, { "epoch": 2.873216966966967, "grad_norm": 1.0373345690359193, "learning_rate": 5.438690153001691e-08, "loss": 0.2958, "step": 30617 }, { "epoch": 2.873310810810811, "grad_norm": 0.9820315149345485, "learning_rate": 5.430662319047264e-08, "loss": 0.3205, "step": 30618 }, { "epoch": 2.8734046546546548, "grad_norm": 1.160542563306599, "learning_rate": 5.422640381901323e-08, "loss": 0.2638, "step": 30619 }, { "epoch": 2.8734984984984986, "grad_norm": 1.2820074861802984, "learning_rate": 5.4146243416594026e-08, "loss": 0.2976, "step": 30620 }, { "epoch": 2.8735923423423424, "grad_norm": 1.1888701594503308, "learning_rate": 5.4066141984171484e-08, "loss": 0.3054, "step": 30621 }, { "epoch": 2.873686186186186, "grad_norm": 1.05233321472883, "learning_rate": 5.39860995227004e-08, "loss": 0.309, "step": 30622 }, { "epoch": 2.87378003003003, "grad_norm": 1.0967623023335915, "learning_rate": 5.3906116033135006e-08, "loss": 0.2862, "step": 30623 }, { "epoch": 2.873873873873874, "grad_norm": 1.3801263184239096, "learning_rate": 5.3826191516429536e-08, "loss": 0.2932, "step": 30624 }, { "epoch": 2.8739677177177176, "grad_norm": 1.4819939347698412, "learning_rate": 5.3746325973536575e-08, "loss": 0.2754, "step": 30625 }, { "epoch": 2.8740615615615615, "grad_norm": 1.2752926031932421, "learning_rate": 5.366651940540812e-08, "loss": 0.3102, "step": 30626 }, { "epoch": 2.8741554054054053, "grad_norm": 1.8601022077676879, "learning_rate": 5.3586771812995655e-08, "loss": 0.3436, "step": 30627 }, { "epoch": 2.8742492492492495, "grad_norm": 1.432152739851237, "learning_rate": 5.3507083197250065e-08, "loss": 0.3201, "step": 30628 }, { "epoch": 2.874343093093093, "grad_norm": 1.019281236774198, "learning_rate": 5.342745355912227e-08, "loss": 0.3028, "step": 30629 }, { "epoch": 2.874436936936937, "grad_norm": 1.2852466166512357, "learning_rate": 5.3347882899560945e-08, "loss": 0.3047, "step": 30630 }, { "epoch": 2.8745307807807805, "grad_norm": 1.1000604456758811, "learning_rate": 5.326837121951478e-08, "loss": 0.3273, "step": 30631 }, { "epoch": 2.8746246246246248, "grad_norm": 1.1499525646992868, "learning_rate": 5.318891851993246e-08, "loss": 0.3344, "step": 30632 }, { "epoch": 2.8747184684684686, "grad_norm": 1.1067409262836874, "learning_rate": 5.3109524801760457e-08, "loss": 0.3373, "step": 30633 }, { "epoch": 2.8748123123123124, "grad_norm": 1.2133715088220098, "learning_rate": 5.303019006594578e-08, "loss": 0.3047, "step": 30634 }, { "epoch": 2.874906156156156, "grad_norm": 1.046295754927732, "learning_rate": 5.295091431343491e-08, "loss": 0.3157, "step": 30635 }, { "epoch": 2.875, "grad_norm": 1.1906458434724068, "learning_rate": 5.2871697545172075e-08, "loss": 0.3122, "step": 30636 }, { "epoch": 2.875093843843844, "grad_norm": 1.048230975720892, "learning_rate": 5.279253976210208e-08, "loss": 0.2913, "step": 30637 }, { "epoch": 2.8751876876876876, "grad_norm": 1.2126198689842245, "learning_rate": 5.271344096516973e-08, "loss": 0.3331, "step": 30638 }, { "epoch": 2.8752815315315314, "grad_norm": 1.9471690146117095, "learning_rate": 5.2634401155316486e-08, "loss": 0.3143, "step": 30639 }, { "epoch": 2.8753753753753752, "grad_norm": 1.1296575971430298, "learning_rate": 5.2555420333485485e-08, "loss": 0.3302, "step": 30640 }, { "epoch": 2.8754692192192195, "grad_norm": 0.9606263726192996, "learning_rate": 5.247649850061875e-08, "loss": 0.3394, "step": 30641 }, { "epoch": 2.875563063063063, "grad_norm": 1.1144918379288502, "learning_rate": 5.239763565765721e-08, "loss": 0.325, "step": 30642 }, { "epoch": 2.875656906906907, "grad_norm": 0.9628297706085925, "learning_rate": 5.231883180554065e-08, "loss": 0.3171, "step": 30643 }, { "epoch": 2.8757507507507505, "grad_norm": 2.492721105819481, "learning_rate": 5.224008694520944e-08, "loss": 0.2935, "step": 30644 }, { "epoch": 2.8758445945945947, "grad_norm": 1.1101713854515018, "learning_rate": 5.216140107760115e-08, "loss": 0.3251, "step": 30645 }, { "epoch": 2.8759384384384385, "grad_norm": 1.1044714826728512, "learning_rate": 5.20827742036556e-08, "loss": 0.317, "step": 30646 }, { "epoch": 2.8760322822822824, "grad_norm": 1.1888194494905024, "learning_rate": 5.200420632430925e-08, "loss": 0.2775, "step": 30647 }, { "epoch": 2.876126126126126, "grad_norm": 1.1942882290224515, "learning_rate": 5.192569744049914e-08, "loss": 0.3137, "step": 30648 }, { "epoch": 2.87621996996997, "grad_norm": 1.0857517331906879, "learning_rate": 5.1847247553161176e-08, "loss": 0.3199, "step": 30649 }, { "epoch": 2.876313813813814, "grad_norm": 1.0607649458178878, "learning_rate": 5.176885666323073e-08, "loss": 0.2954, "step": 30650 }, { "epoch": 2.8764076576576576, "grad_norm": 1.3666437213209524, "learning_rate": 5.1690524771642604e-08, "loss": 0.3035, "step": 30651 }, { "epoch": 2.8765015015015014, "grad_norm": 1.1679158014953772, "learning_rate": 5.161225187933106e-08, "loss": 0.29, "step": 30652 }, { "epoch": 2.8765953453453452, "grad_norm": 1.1014514235874644, "learning_rate": 5.153403798722867e-08, "loss": 0.3264, "step": 30653 }, { "epoch": 2.876689189189189, "grad_norm": 1.1622526571094518, "learning_rate": 5.1455883096268034e-08, "loss": 0.2968, "step": 30654 }, { "epoch": 2.876783033033033, "grad_norm": 1.0312430087182036, "learning_rate": 5.137778720738174e-08, "loss": 0.3285, "step": 30655 }, { "epoch": 2.876876876876877, "grad_norm": 1.2318491310156336, "learning_rate": 5.1299750321500695e-08, "loss": 0.3408, "step": 30656 }, { "epoch": 2.8769707207207205, "grad_norm": 1.875262481254698, "learning_rate": 5.122177243955473e-08, "loss": 0.3413, "step": 30657 }, { "epoch": 2.8770645645645647, "grad_norm": 1.065108224780282, "learning_rate": 5.114385356247476e-08, "loss": 0.2843, "step": 30658 }, { "epoch": 2.8771584084084085, "grad_norm": 1.0329918948672476, "learning_rate": 5.1065993691188364e-08, "loss": 0.2815, "step": 30659 }, { "epoch": 2.8772522522522523, "grad_norm": 1.064991336809971, "learning_rate": 5.0988192826624815e-08, "loss": 0.3184, "step": 30660 }, { "epoch": 2.877346096096096, "grad_norm": 1.091610111008324, "learning_rate": 5.09104509697117e-08, "loss": 0.2951, "step": 30661 }, { "epoch": 2.87743993993994, "grad_norm": 1.8549633317132235, "learning_rate": 5.0832768121375495e-08, "loss": 0.285, "step": 30662 }, { "epoch": 2.8775337837837838, "grad_norm": 1.0617668649842005, "learning_rate": 5.075514428254269e-08, "loss": 0.3326, "step": 30663 }, { "epoch": 2.8776276276276276, "grad_norm": 1.2952912387567461, "learning_rate": 5.06775794541392e-08, "loss": 0.2928, "step": 30664 }, { "epoch": 2.8777214714714714, "grad_norm": 1.2707277407493274, "learning_rate": 5.060007363708874e-08, "loss": 0.2893, "step": 30665 }, { "epoch": 2.877815315315315, "grad_norm": 1.1007113706765463, "learning_rate": 5.052262683231612e-08, "loss": 0.316, "step": 30666 }, { "epoch": 2.877909159159159, "grad_norm": 1.201775207616848, "learning_rate": 5.0445239040745606e-08, "loss": 0.3443, "step": 30667 }, { "epoch": 2.878003003003003, "grad_norm": 1.2216191775745964, "learning_rate": 5.036791026329812e-08, "loss": 0.3356, "step": 30668 }, { "epoch": 2.878096846846847, "grad_norm": 1.0500492279771108, "learning_rate": 5.0290640500896826e-08, "loss": 0.3511, "step": 30669 }, { "epoch": 2.8781906906906904, "grad_norm": 1.182548663256785, "learning_rate": 5.0213429754463194e-08, "loss": 0.2847, "step": 30670 }, { "epoch": 2.8782845345345347, "grad_norm": 0.9934831624129893, "learning_rate": 5.013627802491705e-08, "loss": 0.293, "step": 30671 }, { "epoch": 2.8783783783783785, "grad_norm": 1.0820654618962264, "learning_rate": 5.0059185313178773e-08, "loss": 0.2696, "step": 30672 }, { "epoch": 2.8784722222222223, "grad_norm": 1.0912809579767224, "learning_rate": 4.9982151620167615e-08, "loss": 0.2902, "step": 30673 }, { "epoch": 2.878566066066066, "grad_norm": 1.0888048686558476, "learning_rate": 4.990517694680175e-08, "loss": 0.319, "step": 30674 }, { "epoch": 2.87865990990991, "grad_norm": 1.2911597582525793, "learning_rate": 4.98282612939982e-08, "loss": 0.3081, "step": 30675 }, { "epoch": 2.8787537537537538, "grad_norm": 3.5539133129288243, "learning_rate": 4.975140466267569e-08, "loss": 0.2846, "step": 30676 }, { "epoch": 2.8788475975975976, "grad_norm": 1.178758958526642, "learning_rate": 4.967460705375016e-08, "loss": 0.2941, "step": 30677 }, { "epoch": 2.8789414414414414, "grad_norm": 1.0925078770336538, "learning_rate": 4.9597868468135856e-08, "loss": 0.336, "step": 30678 }, { "epoch": 2.879035285285285, "grad_norm": 1.567754099537434, "learning_rate": 4.9521188906749284e-08, "loss": 0.3051, "step": 30679 }, { "epoch": 2.879129129129129, "grad_norm": 1.1978489076128707, "learning_rate": 4.944456837050471e-08, "loss": 0.3356, "step": 30680 }, { "epoch": 2.879222972972973, "grad_norm": 0.9577671394832846, "learning_rate": 4.936800686031418e-08, "loss": 0.3326, "step": 30681 }, { "epoch": 2.879316816816817, "grad_norm": 1.1314850907330727, "learning_rate": 4.929150437709251e-08, "loss": 0.3104, "step": 30682 }, { "epoch": 2.8794106606606604, "grad_norm": 1.0154012094256561, "learning_rate": 4.921506092175066e-08, "loss": 0.2854, "step": 30683 }, { "epoch": 2.8795045045045047, "grad_norm": 1.2227554079198055, "learning_rate": 4.91386764952001e-08, "loss": 0.3213, "step": 30684 }, { "epoch": 2.8795983483483485, "grad_norm": 1.4703422135700754, "learning_rate": 4.906235109835233e-08, "loss": 0.3097, "step": 30685 }, { "epoch": 2.8796921921921923, "grad_norm": 1.244865994277092, "learning_rate": 4.898608473211608e-08, "loss": 0.2863, "step": 30686 }, { "epoch": 2.879786036036036, "grad_norm": 1.2226220065101878, "learning_rate": 4.8909877397401716e-08, "loss": 0.3087, "step": 30687 }, { "epoch": 2.87987987987988, "grad_norm": 1.0847753322365377, "learning_rate": 4.8833729095117964e-08, "loss": 0.302, "step": 30688 }, { "epoch": 2.8799737237237237, "grad_norm": 1.3013731051615587, "learning_rate": 4.8757639826171875e-08, "loss": 0.344, "step": 30689 }, { "epoch": 2.8800675675675675, "grad_norm": 1.1334546553987068, "learning_rate": 4.868160959147106e-08, "loss": 0.318, "step": 30690 }, { "epoch": 2.8801614114114114, "grad_norm": 0.9637438866726113, "learning_rate": 4.860563839192256e-08, "loss": 0.3372, "step": 30691 }, { "epoch": 2.880255255255255, "grad_norm": 2.344037714661097, "learning_rate": 4.852972622843177e-08, "loss": 0.2653, "step": 30692 }, { "epoch": 2.880349099099099, "grad_norm": 1.0498854415912815, "learning_rate": 4.8453873101903524e-08, "loss": 0.3173, "step": 30693 }, { "epoch": 2.880442942942943, "grad_norm": 1.1614074807645878, "learning_rate": 4.837807901324265e-08, "loss": 0.3177, "step": 30694 }, { "epoch": 2.880536786786787, "grad_norm": 1.1228984211365436, "learning_rate": 4.830234396335287e-08, "loss": 0.2585, "step": 30695 }, { "epoch": 2.8806306306306304, "grad_norm": 1.062832842598234, "learning_rate": 4.8226667953136796e-08, "loss": 0.301, "step": 30696 }, { "epoch": 2.8807244744744747, "grad_norm": 1.1678598222809315, "learning_rate": 4.815105098349759e-08, "loss": 0.3132, "step": 30697 }, { "epoch": 2.8808183183183185, "grad_norm": 1.3766653749765192, "learning_rate": 4.807549305533565e-08, "loss": 0.337, "step": 30698 }, { "epoch": 2.8809121621621623, "grad_norm": 1.2662635060842795, "learning_rate": 4.799999416955192e-08, "loss": 0.3185, "step": 30699 }, { "epoch": 2.881006006006006, "grad_norm": 1.1623811948004392, "learning_rate": 4.79245543270479e-08, "loss": 0.2704, "step": 30700 }, { "epoch": 2.88109984984985, "grad_norm": 0.919451579997608, "learning_rate": 4.784917352872231e-08, "loss": 0.3213, "step": 30701 }, { "epoch": 2.8811936936936937, "grad_norm": 1.1845535370864038, "learning_rate": 4.777385177547278e-08, "loss": 0.2953, "step": 30702 }, { "epoch": 2.8812875375375375, "grad_norm": 1.2176111805365164, "learning_rate": 4.7698589068199685e-08, "loss": 0.2973, "step": 30703 }, { "epoch": 2.8813813813813813, "grad_norm": 1.4266576244125393, "learning_rate": 4.762338540779843e-08, "loss": 0.303, "step": 30704 }, { "epoch": 2.881475225225225, "grad_norm": 1.185148931371819, "learning_rate": 4.7548240795166626e-08, "loss": 0.3064, "step": 30705 }, { "epoch": 2.881569069069069, "grad_norm": 1.1532424609285117, "learning_rate": 4.747315523120022e-08, "loss": 0.3174, "step": 30706 }, { "epoch": 2.8816629129129128, "grad_norm": 2.0998490322330867, "learning_rate": 4.7398128716794054e-08, "loss": 0.3483, "step": 30707 }, { "epoch": 2.881756756756757, "grad_norm": 1.1909142825228, "learning_rate": 4.7323161252842976e-08, "loss": 0.2765, "step": 30708 }, { "epoch": 2.8818506006006004, "grad_norm": 1.6091262398259425, "learning_rate": 4.7248252840240704e-08, "loss": 0.3091, "step": 30709 }, { "epoch": 2.8819444444444446, "grad_norm": 1.073905884329177, "learning_rate": 4.717340347988042e-08, "loss": 0.2773, "step": 30710 }, { "epoch": 2.8820382882882885, "grad_norm": 1.2439910319217207, "learning_rate": 4.709861317265418e-08, "loss": 0.2742, "step": 30711 }, { "epoch": 2.8821321321321323, "grad_norm": 1.0299641300008524, "learning_rate": 4.702388191945462e-08, "loss": 0.3318, "step": 30712 }, { "epoch": 2.882225975975976, "grad_norm": 1.23687451619332, "learning_rate": 4.694920972117212e-08, "loss": 0.3064, "step": 30713 }, { "epoch": 2.88231981981982, "grad_norm": 2.8382346936336824, "learning_rate": 4.687459657869708e-08, "loss": 0.3126, "step": 30714 }, { "epoch": 2.8824136636636637, "grad_norm": 1.1108576994079273, "learning_rate": 4.6800042492919365e-08, "loss": 0.2877, "step": 30715 }, { "epoch": 2.8825075075075075, "grad_norm": 1.2147985664633811, "learning_rate": 4.6725547464727686e-08, "loss": 0.3083, "step": 30716 }, { "epoch": 2.8826013513513513, "grad_norm": 1.0806404807182095, "learning_rate": 4.665111149500967e-08, "loss": 0.3247, "step": 30717 }, { "epoch": 2.882695195195195, "grad_norm": 1.6131165523932185, "learning_rate": 4.657673458465406e-08, "loss": 0.2874, "step": 30718 }, { "epoch": 2.882789039039039, "grad_norm": 0.9985934527996638, "learning_rate": 4.650241673454681e-08, "loss": 0.2887, "step": 30719 }, { "epoch": 2.8828828828828827, "grad_norm": 1.2187356928313626, "learning_rate": 4.642815794557443e-08, "loss": 0.289, "step": 30720 }, { "epoch": 2.882976726726727, "grad_norm": 1.1640196508661937, "learning_rate": 4.635395821862232e-08, "loss": 0.2855, "step": 30721 }, { "epoch": 2.8830705705705704, "grad_norm": 1.163551322668732, "learning_rate": 4.627981755457478e-08, "loss": 0.3281, "step": 30722 }, { "epoch": 2.8831644144144146, "grad_norm": 1.2393658863688868, "learning_rate": 4.620573595431554e-08, "loss": 0.3146, "step": 30723 }, { "epoch": 2.883258258258258, "grad_norm": 1.1166301155981855, "learning_rate": 4.613171341872891e-08, "loss": 0.3421, "step": 30724 }, { "epoch": 2.8833521021021022, "grad_norm": 1.2175544856541372, "learning_rate": 4.6057749948696387e-08, "loss": 0.3265, "step": 30725 }, { "epoch": 2.883445945945946, "grad_norm": 1.0467041504436365, "learning_rate": 4.5983845545100606e-08, "loss": 0.3111, "step": 30726 }, { "epoch": 2.88353978978979, "grad_norm": 1.052871960935994, "learning_rate": 4.5910000208822525e-08, "loss": 0.2956, "step": 30727 }, { "epoch": 2.8836336336336337, "grad_norm": 1.203460206971075, "learning_rate": 4.583621394074256e-08, "loss": 0.3124, "step": 30728 }, { "epoch": 2.8837274774774775, "grad_norm": 1.0771442923078363, "learning_rate": 4.576248674174055e-08, "loss": 0.3081, "step": 30729 }, { "epoch": 2.8838213213213213, "grad_norm": 1.079389140996623, "learning_rate": 4.568881861269525e-08, "loss": 0.3086, "step": 30730 }, { "epoch": 2.883915165165165, "grad_norm": 1.3320945934416637, "learning_rate": 4.5615209554485395e-08, "loss": 0.2925, "step": 30731 }, { "epoch": 2.884009009009009, "grad_norm": 0.975320921887005, "learning_rate": 4.5541659567988616e-08, "loss": 0.3291, "step": 30732 }, { "epoch": 2.8841028528528527, "grad_norm": 1.0072939200638942, "learning_rate": 4.546816865408144e-08, "loss": 0.3279, "step": 30733 }, { "epoch": 2.8841966966966965, "grad_norm": 1.1223043119903016, "learning_rate": 4.539473681364093e-08, "loss": 0.2889, "step": 30734 }, { "epoch": 2.8842905405405403, "grad_norm": 1.0797516921906576, "learning_rate": 4.532136404754195e-08, "loss": 0.3304, "step": 30735 }, { "epoch": 2.8843843843843846, "grad_norm": 1.2075971912688392, "learning_rate": 4.5248050356659354e-08, "loss": 0.3266, "step": 30736 }, { "epoch": 2.884478228228228, "grad_norm": 1.0331386146183956, "learning_rate": 4.517479574186745e-08, "loss": 0.3125, "step": 30737 }, { "epoch": 2.8845720720720722, "grad_norm": 1.0837444635924145, "learning_rate": 4.510160020403942e-08, "loss": 0.3011, "step": 30738 }, { "epoch": 2.884665915915916, "grad_norm": 0.9610040338625386, "learning_rate": 4.50284637440479e-08, "loss": 0.2952, "step": 30739 }, { "epoch": 2.88475975975976, "grad_norm": 1.1890944563051753, "learning_rate": 4.4955386362766085e-08, "loss": 0.3291, "step": 30740 }, { "epoch": 2.8848536036036037, "grad_norm": 1.0947236561252287, "learning_rate": 4.4882368061063276e-08, "loss": 0.3068, "step": 30741 }, { "epoch": 2.8849474474474475, "grad_norm": 1.089674245247211, "learning_rate": 4.480940883981211e-08, "loss": 0.3366, "step": 30742 }, { "epoch": 2.8850412912912913, "grad_norm": 1.395740604357174, "learning_rate": 4.473650869988133e-08, "loss": 0.3179, "step": 30743 }, { "epoch": 2.885135135135135, "grad_norm": 1.140724891997119, "learning_rate": 4.46636676421397e-08, "loss": 0.3221, "step": 30744 }, { "epoch": 2.885228978978979, "grad_norm": 1.1921014012918885, "learning_rate": 4.4590885667457064e-08, "loss": 0.3238, "step": 30745 }, { "epoch": 2.8853228228228227, "grad_norm": 1.1550114732819703, "learning_rate": 4.4518162776700513e-08, "loss": 0.3078, "step": 30746 }, { "epoch": 2.8854166666666665, "grad_norm": 1.2416714090335075, "learning_rate": 4.444549897073713e-08, "loss": 0.2989, "step": 30747 }, { "epoch": 2.8855105105105103, "grad_norm": 1.1530451980566683, "learning_rate": 4.4372894250432897e-08, "loss": 0.2899, "step": 30748 }, { "epoch": 2.8856043543543546, "grad_norm": 1.5768952441574913, "learning_rate": 4.430034861665433e-08, "loss": 0.2636, "step": 30749 }, { "epoch": 2.885698198198198, "grad_norm": 1.060018375427306, "learning_rate": 4.422786207026575e-08, "loss": 0.2839, "step": 30750 }, { "epoch": 2.885792042042042, "grad_norm": 1.4164117643225194, "learning_rate": 4.4155434612131454e-08, "loss": 0.2841, "step": 30751 }, { "epoch": 2.885885885885886, "grad_norm": 1.2570074737598134, "learning_rate": 4.4083066243115204e-08, "loss": 0.3008, "step": 30752 }, { "epoch": 2.88597972972973, "grad_norm": 1.1379393622582445, "learning_rate": 4.401075696408019e-08, "loss": 0.302, "step": 30753 }, { "epoch": 2.8860735735735736, "grad_norm": 1.1644167544513893, "learning_rate": 4.393850677588796e-08, "loss": 0.3014, "step": 30754 }, { "epoch": 2.8861674174174174, "grad_norm": 1.1641770107525597, "learning_rate": 4.386631567940058e-08, "loss": 0.3219, "step": 30755 }, { "epoch": 2.8862612612612613, "grad_norm": 1.2345778448386489, "learning_rate": 4.379418367547794e-08, "loss": 0.3296, "step": 30756 }, { "epoch": 2.886355105105105, "grad_norm": 1.1150508076270873, "learning_rate": 4.372211076498045e-08, "loss": 0.3013, "step": 30757 }, { "epoch": 2.886448948948949, "grad_norm": 1.1871078937659127, "learning_rate": 4.3650096948767985e-08, "loss": 0.3163, "step": 30758 }, { "epoch": 2.8865427927927927, "grad_norm": 1.0012271942071045, "learning_rate": 4.3578142227698736e-08, "loss": 0.3155, "step": 30759 }, { "epoch": 2.8866366366366365, "grad_norm": 1.2430129275963786, "learning_rate": 4.3506246602630363e-08, "loss": 0.3036, "step": 30760 }, { "epoch": 2.8867304804804803, "grad_norm": 2.000293229611576, "learning_rate": 4.343441007442051e-08, "loss": 0.3275, "step": 30761 }, { "epoch": 2.8868243243243246, "grad_norm": 1.081827842757617, "learning_rate": 4.336263264392571e-08, "loss": 0.3399, "step": 30762 }, { "epoch": 2.886918168168168, "grad_norm": 1.0098066202799787, "learning_rate": 4.329091431200139e-08, "loss": 0.3373, "step": 30763 }, { "epoch": 2.887012012012012, "grad_norm": 1.0728380141400002, "learning_rate": 4.321925507950297e-08, "loss": 0.3163, "step": 30764 }, { "epoch": 2.887105855855856, "grad_norm": 1.21361851216721, "learning_rate": 4.314765494728423e-08, "loss": 0.2609, "step": 30765 }, { "epoch": 2.8871996996997, "grad_norm": 1.1230388495598305, "learning_rate": 4.307611391620004e-08, "loss": 0.281, "step": 30766 }, { "epoch": 2.8872935435435436, "grad_norm": 1.044017264616023, "learning_rate": 4.3004631987102476e-08, "loss": 0.3124, "step": 30767 }, { "epoch": 2.8873873873873874, "grad_norm": 1.0211385452958461, "learning_rate": 4.29332091608442e-08, "loss": 0.3341, "step": 30768 }, { "epoch": 2.8874812312312312, "grad_norm": 1.8960432619881866, "learning_rate": 4.286184543827676e-08, "loss": 0.3276, "step": 30769 }, { "epoch": 2.887575075075075, "grad_norm": 2.0606898230991435, "learning_rate": 4.279054082025058e-08, "loss": 0.294, "step": 30770 }, { "epoch": 2.887668918918919, "grad_norm": 1.436635512853738, "learning_rate": 4.271929530761665e-08, "loss": 0.3298, "step": 30771 }, { "epoch": 2.8877627627627627, "grad_norm": 1.0311752914269394, "learning_rate": 4.264810890122428e-08, "loss": 0.3171, "step": 30772 }, { "epoch": 2.8878566066066065, "grad_norm": 1.1085537203488476, "learning_rate": 4.257698160192114e-08, "loss": 0.3331, "step": 30773 }, { "epoch": 2.8879504504504503, "grad_norm": 1.1994942681312444, "learning_rate": 4.2505913410556544e-08, "loss": 0.3175, "step": 30774 }, { "epoch": 2.8880442942942945, "grad_norm": 1.1280104819920067, "learning_rate": 4.243490432797759e-08, "loss": 0.2546, "step": 30775 }, { "epoch": 2.888138138138138, "grad_norm": 1.0967591220213004, "learning_rate": 4.2363954355030266e-08, "loss": 0.2985, "step": 30776 }, { "epoch": 2.888231981981982, "grad_norm": 0.9007201339367471, "learning_rate": 4.2293063492561124e-08, "loss": 0.3187, "step": 30777 }, { "epoch": 2.888325825825826, "grad_norm": 2.1198444513148997, "learning_rate": 4.22222317414156e-08, "loss": 0.2907, "step": 30778 }, { "epoch": 2.88841966966967, "grad_norm": 1.0846221201178143, "learning_rate": 4.2151459102437454e-08, "loss": 0.2919, "step": 30779 }, { "epoch": 2.8885135135135136, "grad_norm": 1.1600488472320676, "learning_rate": 4.2080745576471016e-08, "loss": 0.283, "step": 30780 }, { "epoch": 2.8886073573573574, "grad_norm": 1.2039219310854945, "learning_rate": 4.20100911643595e-08, "loss": 0.313, "step": 30781 }, { "epoch": 2.888701201201201, "grad_norm": 1.232196775743295, "learning_rate": 4.1939495866945565e-08, "loss": 0.2725, "step": 30782 }, { "epoch": 2.888795045045045, "grad_norm": 1.5589073479102284, "learning_rate": 4.186895968507021e-08, "loss": 0.3127, "step": 30783 }, { "epoch": 2.888888888888889, "grad_norm": 1.4036394903625848, "learning_rate": 4.179848261957498e-08, "loss": 0.3334, "step": 30784 }, { "epoch": 2.8889827327327327, "grad_norm": 1.0658409172938117, "learning_rate": 4.172806467129975e-08, "loss": 0.2632, "step": 30785 }, { "epoch": 2.8890765765765765, "grad_norm": 1.0954051008985626, "learning_rate": 4.165770584108442e-08, "loss": 0.3182, "step": 30786 }, { "epoch": 2.8891704204204203, "grad_norm": 1.0227573976790467, "learning_rate": 4.158740612976775e-08, "loss": 0.3511, "step": 30787 }, { "epoch": 2.8892642642642645, "grad_norm": 1.0873538757281385, "learning_rate": 4.151716553818796e-08, "loss": 0.3072, "step": 30788 }, { "epoch": 2.889358108108108, "grad_norm": 1.278603608816836, "learning_rate": 4.1446984067182725e-08, "loss": 0.3536, "step": 30789 }, { "epoch": 2.889451951951952, "grad_norm": 1.1014382484769436, "learning_rate": 4.137686171758859e-08, "loss": 0.2815, "step": 30790 }, { "epoch": 2.889545795795796, "grad_norm": 8.339291242228414, "learning_rate": 4.1306798490242105e-08, "loss": 0.273, "step": 30791 }, { "epoch": 2.8896396396396398, "grad_norm": 1.4239028152560012, "learning_rate": 4.123679438597761e-08, "loss": 0.3131, "step": 30792 }, { "epoch": 2.8897334834834836, "grad_norm": 1.11606092276868, "learning_rate": 4.116684940563109e-08, "loss": 0.3039, "step": 30793 }, { "epoch": 2.8898273273273274, "grad_norm": 0.9791701300835808, "learning_rate": 4.1096963550035785e-08, "loss": 0.3165, "step": 30794 }, { "epoch": 2.889921171171171, "grad_norm": 1.112756015128019, "learning_rate": 4.1027136820024347e-08, "loss": 0.2633, "step": 30795 }, { "epoch": 2.890015015015015, "grad_norm": 1.3824100657838483, "learning_rate": 4.0957369216430566e-08, "loss": 0.3172, "step": 30796 }, { "epoch": 2.890108858858859, "grad_norm": 3.0231223171417567, "learning_rate": 4.088766074008599e-08, "loss": 0.2749, "step": 30797 }, { "epoch": 2.8902027027027026, "grad_norm": 1.1448139151794978, "learning_rate": 4.081801139182051e-08, "loss": 0.3055, "step": 30798 }, { "epoch": 2.8902965465465464, "grad_norm": 1.729846294446347, "learning_rate": 4.0748421172466245e-08, "loss": 0.3274, "step": 30799 }, { "epoch": 2.8903903903903903, "grad_norm": 1.129676563629194, "learning_rate": 4.067889008285253e-08, "loss": 0.3105, "step": 30800 }, { "epoch": 2.8904842342342345, "grad_norm": 1.1781468238185437, "learning_rate": 4.060941812380703e-08, "loss": 0.2711, "step": 30801 }, { "epoch": 2.890578078078078, "grad_norm": 0.9385845918742836, "learning_rate": 4.05400052961602e-08, "loss": 0.3136, "step": 30802 }, { "epoch": 2.890671921921922, "grad_norm": 1.0667642469433924, "learning_rate": 4.0470651600738044e-08, "loss": 0.3519, "step": 30803 }, { "epoch": 2.8907657657657655, "grad_norm": 1.3045219428825687, "learning_rate": 4.040135703836823e-08, "loss": 0.3329, "step": 30804 }, { "epoch": 2.8908596096096097, "grad_norm": 1.0549331371875088, "learning_rate": 4.0332121609876205e-08, "loss": 0.3149, "step": 30805 }, { "epoch": 2.8909534534534536, "grad_norm": 1.209152267100268, "learning_rate": 4.026294531608854e-08, "loss": 0.3122, "step": 30806 }, { "epoch": 2.8910472972972974, "grad_norm": 1.1934403049772722, "learning_rate": 4.0193828157829575e-08, "loss": 0.3401, "step": 30807 }, { "epoch": 2.891141141141141, "grad_norm": 1.0199832337892225, "learning_rate": 4.012477013592364e-08, "loss": 0.3439, "step": 30808 }, { "epoch": 2.891234984984985, "grad_norm": 3.3571712789515327, "learning_rate": 4.005577125119342e-08, "loss": 0.3103, "step": 30809 }, { "epoch": 2.891328828828829, "grad_norm": 1.1781803062986642, "learning_rate": 3.9986831504461584e-08, "loss": 0.2939, "step": 30810 }, { "epoch": 2.8914226726726726, "grad_norm": 1.0527068240710886, "learning_rate": 3.9917950896550815e-08, "loss": 0.3442, "step": 30811 }, { "epoch": 2.8915165165165164, "grad_norm": 1.3047400057331409, "learning_rate": 3.984912942828212e-08, "loss": 0.2841, "step": 30812 }, { "epoch": 2.8916103603603602, "grad_norm": 1.2362486362654792, "learning_rate": 3.9780367100475946e-08, "loss": 0.3292, "step": 30813 }, { "epoch": 2.891704204204204, "grad_norm": 1.086076247687412, "learning_rate": 3.97116639139522e-08, "loss": 0.3315, "step": 30814 }, { "epoch": 2.891798048048048, "grad_norm": 1.0566998753072585, "learning_rate": 3.964301986953023e-08, "loss": 0.3001, "step": 30815 }, { "epoch": 2.891891891891892, "grad_norm": 1.1684858598627654, "learning_rate": 3.957443496802826e-08, "loss": 0.34, "step": 30816 }, { "epoch": 2.8919857357357355, "grad_norm": 1.1238292127653784, "learning_rate": 3.950590921026454e-08, "loss": 0.2944, "step": 30817 }, { "epoch": 2.8920795795795797, "grad_norm": 2.0330022044804905, "learning_rate": 3.943744259705506e-08, "loss": 0.2927, "step": 30818 }, { "epoch": 2.8921734234234235, "grad_norm": 1.303600886352759, "learning_rate": 3.936903512921697e-08, "loss": 0.28, "step": 30819 }, { "epoch": 2.8922672672672673, "grad_norm": 1.2152434998284096, "learning_rate": 3.930068680756627e-08, "loss": 0.3199, "step": 30820 }, { "epoch": 2.892361111111111, "grad_norm": 1.3734868695332054, "learning_rate": 3.923239763291675e-08, "loss": 0.3104, "step": 30821 }, { "epoch": 2.892454954954955, "grad_norm": 1.1696511231630933, "learning_rate": 3.916416760608277e-08, "loss": 0.2835, "step": 30822 }, { "epoch": 2.892548798798799, "grad_norm": 1.3041861265508186, "learning_rate": 3.909599672787923e-08, "loss": 0.3396, "step": 30823 }, { "epoch": 2.8926426426426426, "grad_norm": 1.1094924939113142, "learning_rate": 3.902788499911769e-08, "loss": 0.2803, "step": 30824 }, { "epoch": 2.8927364864864864, "grad_norm": 1.0855507489260972, "learning_rate": 3.8959832420609747e-08, "loss": 0.2528, "step": 30825 }, { "epoch": 2.89283033033033, "grad_norm": 1.2079497609028211, "learning_rate": 3.889183899316862e-08, "loss": 0.3157, "step": 30826 }, { "epoch": 2.892924174174174, "grad_norm": 1.012042649547334, "learning_rate": 3.8823904717603666e-08, "loss": 0.306, "step": 30827 }, { "epoch": 2.893018018018018, "grad_norm": 1.1334073735565455, "learning_rate": 3.87560295947248e-08, "loss": 0.2844, "step": 30828 }, { "epoch": 2.893111861861862, "grad_norm": 1.0689992969249282, "learning_rate": 3.8688213625342475e-08, "loss": 0.3011, "step": 30829 }, { "epoch": 2.8932057057057055, "grad_norm": 1.2330906900897107, "learning_rate": 3.862045681026383e-08, "loss": 0.3289, "step": 30830 }, { "epoch": 2.8932995495495497, "grad_norm": 1.1258727406522933, "learning_rate": 3.855275915029766e-08, "loss": 0.343, "step": 30831 }, { "epoch": 2.8933933933933935, "grad_norm": 1.1467855836608738, "learning_rate": 3.8485120646251095e-08, "loss": 0.3349, "step": 30832 }, { "epoch": 2.8934872372372373, "grad_norm": 1.15702713514867, "learning_rate": 3.841754129893016e-08, "loss": 0.3368, "step": 30833 }, { "epoch": 2.893581081081081, "grad_norm": 1.0424040129756957, "learning_rate": 3.835002110914032e-08, "loss": 0.3086, "step": 30834 }, { "epoch": 2.893674924924925, "grad_norm": 0.9897067669990787, "learning_rate": 3.828256007768816e-08, "loss": 0.3139, "step": 30835 }, { "epoch": 2.8937687687687688, "grad_norm": 1.2136962434098912, "learning_rate": 3.821515820537636e-08, "loss": 0.3032, "step": 30836 }, { "epoch": 2.8938626126126126, "grad_norm": 1.2277139662129128, "learning_rate": 3.8147815493009276e-08, "loss": 0.3114, "step": 30837 }, { "epoch": 2.8939564564564564, "grad_norm": 1.1075967098840247, "learning_rate": 3.8080531941389607e-08, "loss": 0.3384, "step": 30838 }, { "epoch": 2.8940503003003, "grad_norm": 1.3868578971728678, "learning_rate": 3.801330755132005e-08, "loss": 0.3208, "step": 30839 }, { "epoch": 2.894144144144144, "grad_norm": 1.3589687816070457, "learning_rate": 3.794614232360161e-08, "loss": 0.2888, "step": 30840 }, { "epoch": 2.894237987987988, "grad_norm": 1.1568090287358546, "learning_rate": 3.787903625903533e-08, "loss": 0.3176, "step": 30841 }, { "epoch": 2.894331831831832, "grad_norm": 1.0295676545540886, "learning_rate": 3.7811989358421675e-08, "loss": 0.3195, "step": 30842 }, { "epoch": 2.8944256756756754, "grad_norm": 0.9771955272292903, "learning_rate": 3.774500162255945e-08, "loss": 0.3406, "step": 30843 }, { "epoch": 2.8945195195195197, "grad_norm": 1.204977007132553, "learning_rate": 3.767807305224747e-08, "loss": 0.3082, "step": 30844 }, { "epoch": 2.8946133633633635, "grad_norm": 10.907723805692415, "learning_rate": 3.7611203648283966e-08, "loss": 0.2928, "step": 30845 }, { "epoch": 2.8947072072072073, "grad_norm": 1.1912460549236867, "learning_rate": 3.754439341146554e-08, "loss": 0.3279, "step": 30846 }, { "epoch": 2.894801051051051, "grad_norm": 1.3495342761793692, "learning_rate": 3.7477642342590435e-08, "loss": 0.3031, "step": 30847 }, { "epoch": 2.894894894894895, "grad_norm": 2.4044404334329204, "learning_rate": 3.741095044245302e-08, "loss": 0.3036, "step": 30848 }, { "epoch": 2.8949887387387387, "grad_norm": 1.0222056085949176, "learning_rate": 3.734431771184821e-08, "loss": 0.2956, "step": 30849 }, { "epoch": 2.8950825825825826, "grad_norm": 1.3383349389902892, "learning_rate": 3.7277744151572035e-08, "loss": 0.2863, "step": 30850 }, { "epoch": 2.8951764264264264, "grad_norm": 5.442649950157585, "learning_rate": 3.72112297624172e-08, "loss": 0.2985, "step": 30851 }, { "epoch": 2.89527027027027, "grad_norm": 1.2416141028547865, "learning_rate": 3.71447745451764e-08, "loss": 0.3032, "step": 30852 }, { "epoch": 2.895364114114114, "grad_norm": 1.4791497492001981, "learning_rate": 3.707837850064344e-08, "loss": 0.3013, "step": 30853 }, { "epoch": 2.895457957957958, "grad_norm": 1.2468221578548921, "learning_rate": 3.7012041629608805e-08, "loss": 0.3401, "step": 30854 }, { "epoch": 2.895551801801802, "grad_norm": 1.1252621417846989, "learning_rate": 3.6945763932862974e-08, "loss": 0.3176, "step": 30855 }, { "epoch": 2.8956456456456454, "grad_norm": 0.8824341009126881, "learning_rate": 3.6879545411198094e-08, "loss": 0.2681, "step": 30856 }, { "epoch": 2.8957394894894897, "grad_norm": 1.209356552881413, "learning_rate": 3.681338606540186e-08, "loss": 0.2982, "step": 30857 }, { "epoch": 2.8958333333333335, "grad_norm": 1.383677099455774, "learning_rate": 3.674728589626364e-08, "loss": 0.2763, "step": 30858 }, { "epoch": 2.8959271771771773, "grad_norm": 1.0983268915581916, "learning_rate": 3.6681244904572254e-08, "loss": 0.3014, "step": 30859 }, { "epoch": 2.896021021021021, "grad_norm": 1.007887546701199, "learning_rate": 3.66152630911143e-08, "loss": 0.2908, "step": 30860 }, { "epoch": 2.896114864864865, "grad_norm": 1.0592004222071743, "learning_rate": 3.654934045667635e-08, "loss": 0.3041, "step": 30861 }, { "epoch": 2.8962087087087087, "grad_norm": 1.1667537842501539, "learning_rate": 3.648347700204502e-08, "loss": 0.2987, "step": 30862 }, { "epoch": 2.8963025525525525, "grad_norm": 1.2077634279699336, "learning_rate": 3.641767272800523e-08, "loss": 0.3142, "step": 30863 }, { "epoch": 2.8963963963963963, "grad_norm": 1.2948221572228333, "learning_rate": 3.63519276353419e-08, "loss": 0.3402, "step": 30864 }, { "epoch": 2.89649024024024, "grad_norm": 0.9921359629054474, "learning_rate": 3.6286241724838855e-08, "loss": 0.3143, "step": 30865 }, { "epoch": 2.896584084084084, "grad_norm": 1.078362387801831, "learning_rate": 3.6220614997278804e-08, "loss": 0.3168, "step": 30866 }, { "epoch": 2.8966779279279278, "grad_norm": 1.1710753798288325, "learning_rate": 3.6155047453444444e-08, "loss": 0.3063, "step": 30867 }, { "epoch": 2.896771771771772, "grad_norm": 1.0469558101349599, "learning_rate": 3.608953909411794e-08, "loss": 0.3103, "step": 30868 }, { "epoch": 2.8968656156156154, "grad_norm": 1.0302622487149693, "learning_rate": 3.602408992008033e-08, "loss": 0.2734, "step": 30869 }, { "epoch": 2.8969594594594597, "grad_norm": 1.11682958123425, "learning_rate": 3.595869993211154e-08, "loss": 0.314, "step": 30870 }, { "epoch": 2.8970533033033035, "grad_norm": 1.2197658013311823, "learning_rate": 3.5893369130991506e-08, "loss": 0.294, "step": 30871 }, { "epoch": 2.8971471471471473, "grad_norm": 1.0447848637622552, "learning_rate": 3.582809751749905e-08, "loss": 0.3136, "step": 30872 }, { "epoch": 2.897240990990991, "grad_norm": 1.1974898203668014, "learning_rate": 3.5762885092411884e-08, "loss": 0.2757, "step": 30873 }, { "epoch": 2.897334834834835, "grad_norm": 1.2581896193846407, "learning_rate": 3.569773185650882e-08, "loss": 0.3241, "step": 30874 }, { "epoch": 2.8974286786786787, "grad_norm": 1.0542430969772403, "learning_rate": 3.5632637810565915e-08, "loss": 0.293, "step": 30875 }, { "epoch": 2.8975225225225225, "grad_norm": 1.0770282992067117, "learning_rate": 3.556760295535866e-08, "loss": 0.3289, "step": 30876 }, { "epoch": 2.8976163663663663, "grad_norm": 1.2021294704598093, "learning_rate": 3.5502627291663646e-08, "loss": 0.3038, "step": 30877 }, { "epoch": 2.89771021021021, "grad_norm": 1.3311620068653032, "learning_rate": 3.543771082025527e-08, "loss": 0.3085, "step": 30878 }, { "epoch": 2.897804054054054, "grad_norm": 1.0224507776796103, "learning_rate": 3.537285354190678e-08, "loss": 0.2793, "step": 30879 }, { "epoch": 2.8978978978978978, "grad_norm": 1.0454583196050782, "learning_rate": 3.530805545739258e-08, "loss": 0.3089, "step": 30880 }, { "epoch": 2.897991741741742, "grad_norm": 1.796633131050121, "learning_rate": 3.5243316567484806e-08, "loss": 0.2938, "step": 30881 }, { "epoch": 2.8980855855855854, "grad_norm": 0.9813976823625705, "learning_rate": 3.5178636872955084e-08, "loss": 0.3324, "step": 30882 }, { "epoch": 2.8981794294294296, "grad_norm": 1.1661771070280158, "learning_rate": 3.511401637457446e-08, "loss": 0.3105, "step": 30883 }, { "epoch": 2.898273273273273, "grad_norm": 1.0421084075103535, "learning_rate": 3.504945507311452e-08, "loss": 0.305, "step": 30884 }, { "epoch": 2.8983671171171173, "grad_norm": 1.3751171383149914, "learning_rate": 3.498495296934357e-08, "loss": 0.3303, "step": 30885 }, { "epoch": 2.898460960960961, "grad_norm": 1.0752645975131925, "learning_rate": 3.492051006403152e-08, "loss": 0.3056, "step": 30886 }, { "epoch": 2.898554804804805, "grad_norm": 1.0011669955961566, "learning_rate": 3.4856126357946666e-08, "loss": 0.3178, "step": 30887 }, { "epoch": 2.8986486486486487, "grad_norm": 1.2493020198443214, "learning_rate": 3.479180185185671e-08, "loss": 0.2894, "step": 30888 }, { "epoch": 2.8987424924924925, "grad_norm": 1.1674925828190397, "learning_rate": 3.472753654652827e-08, "loss": 0.3028, "step": 30889 }, { "epoch": 2.8988363363363363, "grad_norm": 1.6309275693690295, "learning_rate": 3.466333044272796e-08, "loss": 0.3349, "step": 30890 }, { "epoch": 2.89893018018018, "grad_norm": 0.9666656713206153, "learning_rate": 3.459918354122127e-08, "loss": 0.3079, "step": 30891 }, { "epoch": 2.899024024024024, "grad_norm": 1.17252422010575, "learning_rate": 3.4535095842772595e-08, "loss": 0.2532, "step": 30892 }, { "epoch": 2.8991178678678677, "grad_norm": 1.1420318550282214, "learning_rate": 3.4471067348146314e-08, "loss": 0.3384, "step": 30893 }, { "epoch": 2.8992117117117115, "grad_norm": 1.2176246291893698, "learning_rate": 3.440709805810627e-08, "loss": 0.3288, "step": 30894 }, { "epoch": 2.8993055555555554, "grad_norm": 1.0374139289426987, "learning_rate": 3.4343187973414626e-08, "loss": 0.2516, "step": 30895 }, { "epoch": 2.8993993993993996, "grad_norm": 1.0296291952373218, "learning_rate": 3.427933709483411e-08, "loss": 0.2638, "step": 30896 }, { "epoch": 2.899493243243243, "grad_norm": 1.4824992575790015, "learning_rate": 3.4215545423124665e-08, "loss": 0.3282, "step": 30897 }, { "epoch": 2.8995870870870872, "grad_norm": 1.1766370351047861, "learning_rate": 3.4151812959047906e-08, "loss": 0.2896, "step": 30898 }, { "epoch": 2.899680930930931, "grad_norm": 1.2185719400630863, "learning_rate": 3.4088139703363773e-08, "loss": 0.3375, "step": 30899 }, { "epoch": 2.899774774774775, "grad_norm": 1.0392313087602856, "learning_rate": 3.402452565683112e-08, "loss": 0.2797, "step": 30900 }, { "epoch": 2.8998686186186187, "grad_norm": 1.238655278626968, "learning_rate": 3.3960970820208214e-08, "loss": 0.3138, "step": 30901 }, { "epoch": 2.8999624624624625, "grad_norm": 1.1087450200061042, "learning_rate": 3.38974751942539e-08, "loss": 0.3205, "step": 30902 }, { "epoch": 2.9000563063063063, "grad_norm": 1.0439231801555378, "learning_rate": 3.383403877972369e-08, "loss": 0.3111, "step": 30903 }, { "epoch": 2.90015015015015, "grad_norm": 1.0844157416327016, "learning_rate": 3.377066157737474e-08, "loss": 0.2916, "step": 30904 }, { "epoch": 2.900243993993994, "grad_norm": 1.1676017917977646, "learning_rate": 3.370734358796313e-08, "loss": 0.2852, "step": 30905 }, { "epoch": 2.9003378378378377, "grad_norm": 1.1578507020457862, "learning_rate": 3.364408481224324e-08, "loss": 0.3166, "step": 30906 }, { "epoch": 2.9004316816816815, "grad_norm": 1.2317836285391905, "learning_rate": 3.358088525096948e-08, "loss": 0.2929, "step": 30907 }, { "epoch": 2.9005255255255253, "grad_norm": 1.1163941432587456, "learning_rate": 3.3517744904895125e-08, "loss": 0.2959, "step": 30908 }, { "epoch": 2.9006193693693696, "grad_norm": 1.07233010784325, "learning_rate": 3.3454663774773466e-08, "loss": 0.3079, "step": 30909 }, { "epoch": 2.900713213213213, "grad_norm": 0.9814491741453044, "learning_rate": 3.339164186135613e-08, "loss": 0.3067, "step": 30910 }, { "epoch": 2.900807057057057, "grad_norm": 1.059387262850759, "learning_rate": 3.332867916539473e-08, "loss": 0.3184, "step": 30911 }, { "epoch": 2.900900900900901, "grad_norm": 1.1058584677344332, "learning_rate": 3.326577568764033e-08, "loss": 0.3324, "step": 30912 }, { "epoch": 2.900994744744745, "grad_norm": 1.0991289548069947, "learning_rate": 3.320293142884234e-08, "loss": 0.3101, "step": 30913 }, { "epoch": 2.9010885885885886, "grad_norm": 1.1012410648437168, "learning_rate": 3.3140146389750714e-08, "loss": 0.3635, "step": 30914 }, { "epoch": 2.9011824324324325, "grad_norm": 1.1785479179034972, "learning_rate": 3.307742057111318e-08, "loss": 0.2951, "step": 30915 }, { "epoch": 2.9012762762762763, "grad_norm": 0.9571406266146629, "learning_rate": 3.301475397367859e-08, "loss": 0.2966, "step": 30916 }, { "epoch": 2.90137012012012, "grad_norm": 1.4966488911830962, "learning_rate": 3.295214659819357e-08, "loss": 0.3211, "step": 30917 }, { "epoch": 2.901463963963964, "grad_norm": 1.4278684938224735, "learning_rate": 3.2889598445404734e-08, "loss": 0.2924, "step": 30918 }, { "epoch": 2.9015578078078077, "grad_norm": 1.117120465875893, "learning_rate": 3.282710951605761e-08, "loss": 0.301, "step": 30919 }, { "epoch": 2.9016516516516515, "grad_norm": 0.9790571625514338, "learning_rate": 3.2764679810897704e-08, "loss": 0.3387, "step": 30920 }, { "epoch": 2.9017454954954953, "grad_norm": 1.0807328644335532, "learning_rate": 3.270230933066887e-08, "loss": 0.3412, "step": 30921 }, { "epoch": 2.9018393393393396, "grad_norm": 0.9951293590288836, "learning_rate": 3.263999807611551e-08, "loss": 0.3346, "step": 30922 }, { "epoch": 2.901933183183183, "grad_norm": 1.4262061234953127, "learning_rate": 3.2577746047979805e-08, "loss": 0.3292, "step": 30923 }, { "epoch": 2.902027027027027, "grad_norm": 1.177334269088108, "learning_rate": 3.251555324700395e-08, "loss": 0.3107, "step": 30924 }, { "epoch": 2.902120870870871, "grad_norm": 1.1849684179641078, "learning_rate": 3.245341967393012e-08, "loss": 0.3078, "step": 30925 }, { "epoch": 2.902214714714715, "grad_norm": 1.0660810269636698, "learning_rate": 3.239134532949884e-08, "loss": 0.2658, "step": 30926 }, { "epoch": 2.9023085585585586, "grad_norm": 1.1982026199187812, "learning_rate": 3.232933021445006e-08, "loss": 0.3516, "step": 30927 }, { "epoch": 2.9024024024024024, "grad_norm": 1.234899744147786, "learning_rate": 3.2267374329523207e-08, "loss": 0.3557, "step": 30928 }, { "epoch": 2.9024962462462462, "grad_norm": 1.063280637862615, "learning_rate": 3.220547767545712e-08, "loss": 0.3063, "step": 30929 }, { "epoch": 2.90259009009009, "grad_norm": 1.1938874023145105, "learning_rate": 3.2143640252990104e-08, "loss": 0.2806, "step": 30930 }, { "epoch": 2.902683933933934, "grad_norm": 1.3269873494376778, "learning_rate": 3.2081862062859345e-08, "loss": 0.3235, "step": 30931 }, { "epoch": 2.9027777777777777, "grad_norm": 1.2812455421060025, "learning_rate": 3.202014310580093e-08, "loss": 0.295, "step": 30932 }, { "epoch": 2.9028716216216215, "grad_norm": 1.0278677539233338, "learning_rate": 3.195848338255092e-08, "loss": 0.3167, "step": 30933 }, { "epoch": 2.9029654654654653, "grad_norm": 1.246648129695615, "learning_rate": 3.189688289384485e-08, "loss": 0.3141, "step": 30934 }, { "epoch": 2.9030593093093096, "grad_norm": 1.0697588003695417, "learning_rate": 3.183534164041657e-08, "loss": 0.3386, "step": 30935 }, { "epoch": 2.903153153153153, "grad_norm": 0.9841441291204746, "learning_rate": 3.1773859623000503e-08, "loss": 0.3251, "step": 30936 }, { "epoch": 2.903246996996997, "grad_norm": 1.1746709537769973, "learning_rate": 3.1712436842329386e-08, "loss": 0.3167, "step": 30937 }, { "epoch": 2.903340840840841, "grad_norm": 1.2031985130635907, "learning_rate": 3.165107329913597e-08, "loss": 0.312, "step": 30938 }, { "epoch": 2.903434684684685, "grad_norm": 1.2488080750072492, "learning_rate": 3.1589768994150784e-08, "loss": 0.3067, "step": 30939 }, { "epoch": 2.9035285285285286, "grad_norm": 1.0096598736382087, "learning_rate": 3.152852392810601e-08, "loss": 0.3211, "step": 30940 }, { "epoch": 2.9036223723723724, "grad_norm": 1.1918696064345355, "learning_rate": 3.1467338101731636e-08, "loss": 0.3134, "step": 30941 }, { "epoch": 2.9037162162162162, "grad_norm": 1.3096318147742543, "learning_rate": 3.1406211515757066e-08, "loss": 0.3042, "step": 30942 }, { "epoch": 2.90381006006006, "grad_norm": 1.035625199386531, "learning_rate": 3.1345144170910616e-08, "loss": 0.3388, "step": 30943 }, { "epoch": 2.903903903903904, "grad_norm": 1.2455268707435594, "learning_rate": 3.1284136067920575e-08, "loss": 0.3551, "step": 30944 }, { "epoch": 2.9039977477477477, "grad_norm": 1.0229165886948208, "learning_rate": 3.122318720751472e-08, "loss": 0.3075, "step": 30945 }, { "epoch": 2.9040915915915915, "grad_norm": 1.0252473683089884, "learning_rate": 3.116229759041967e-08, "loss": 0.3028, "step": 30946 }, { "epoch": 2.9041854354354353, "grad_norm": 1.1204479052878495, "learning_rate": 3.1101467217361514e-08, "loss": 0.2941, "step": 30947 }, { "epoch": 2.9042792792792795, "grad_norm": 1.134891590700092, "learning_rate": 3.104069608906524e-08, "loss": 0.2928, "step": 30948 }, { "epoch": 2.904373123123123, "grad_norm": 1.1042045064242894, "learning_rate": 3.0979984206255254e-08, "loss": 0.3617, "step": 30949 }, { "epoch": 2.904466966966967, "grad_norm": 1.572599759427903, "learning_rate": 3.091933156965654e-08, "loss": 0.3321, "step": 30950 }, { "epoch": 2.904560810810811, "grad_norm": 1.6262167523502389, "learning_rate": 3.0858738179990744e-08, "loss": 0.3027, "step": 30951 }, { "epoch": 2.9046546546546548, "grad_norm": 1.2127002467878685, "learning_rate": 3.079820403798117e-08, "loss": 0.3162, "step": 30952 }, { "epoch": 2.9047484984984986, "grad_norm": 2.4927421205570575, "learning_rate": 3.073772914434947e-08, "loss": 0.3125, "step": 30953 }, { "epoch": 2.9048423423423424, "grad_norm": 1.2705665151230032, "learning_rate": 3.067731349981673e-08, "loss": 0.3076, "step": 30954 }, { "epoch": 2.904936186186186, "grad_norm": 1.048160388430452, "learning_rate": 3.0616957105103486e-08, "loss": 0.3311, "step": 30955 }, { "epoch": 2.90503003003003, "grad_norm": 1.2269741102185951, "learning_rate": 3.055665996092916e-08, "loss": 0.3241, "step": 30956 }, { "epoch": 2.905123873873874, "grad_norm": 1.5269346757315037, "learning_rate": 3.0496422068012067e-08, "loss": 0.3152, "step": 30957 }, { "epoch": 2.9052177177177176, "grad_norm": 1.0478917981494098, "learning_rate": 3.043624342707163e-08, "loss": 0.3181, "step": 30958 }, { "epoch": 2.9053115615615615, "grad_norm": 1.0812522925855979, "learning_rate": 3.03761240388245e-08, "loss": 0.2966, "step": 30959 }, { "epoch": 2.9054054054054053, "grad_norm": 1.3074550727297987, "learning_rate": 3.031606390398734e-08, "loss": 0.3305, "step": 30960 }, { "epoch": 2.9054992492492495, "grad_norm": 1.2956018046942162, "learning_rate": 3.025606302327677e-08, "loss": 0.3255, "step": 30961 }, { "epoch": 2.905593093093093, "grad_norm": 1.222403196149252, "learning_rate": 3.0196121397408354e-08, "loss": 0.2968, "step": 30962 }, { "epoch": 2.905686936936937, "grad_norm": 1.1550531639417554, "learning_rate": 3.0136239027096504e-08, "loss": 0.3, "step": 30963 }, { "epoch": 2.9057807807807805, "grad_norm": 1.0691231231119926, "learning_rate": 3.00764159130551e-08, "loss": 0.3034, "step": 30964 }, { "epoch": 2.9058746246246248, "grad_norm": 1.119101541385134, "learning_rate": 3.0016652055997466e-08, "loss": 0.2863, "step": 30965 }, { "epoch": 2.9059684684684686, "grad_norm": 1.274433294738641, "learning_rate": 2.9956947456636356e-08, "loss": 0.2616, "step": 30966 }, { "epoch": 2.9060623123123124, "grad_norm": 1.1190930212523533, "learning_rate": 2.989730211568398e-08, "loss": 0.3338, "step": 30967 }, { "epoch": 2.906156156156156, "grad_norm": 1.0679331138715933, "learning_rate": 2.983771603385033e-08, "loss": 0.3014, "step": 30968 }, { "epoch": 2.90625, "grad_norm": 1.142324933474147, "learning_rate": 2.9778189211846498e-08, "loss": 0.3307, "step": 30969 }, { "epoch": 2.906343843843844, "grad_norm": 1.17489104178974, "learning_rate": 2.9718721650382476e-08, "loss": 0.3529, "step": 30970 }, { "epoch": 2.9064376876876876, "grad_norm": 1.1074249768035758, "learning_rate": 2.965931335016714e-08, "loss": 0.2961, "step": 30971 }, { "epoch": 2.9065315315315314, "grad_norm": 1.065971568201323, "learning_rate": 2.9599964311908814e-08, "loss": 0.3126, "step": 30972 }, { "epoch": 2.9066253753753752, "grad_norm": 1.212298044925838, "learning_rate": 2.954067453631526e-08, "loss": 0.3078, "step": 30973 }, { "epoch": 2.9067192192192195, "grad_norm": 1.0500687886018267, "learning_rate": 2.948144402409259e-08, "loss": 0.275, "step": 30974 }, { "epoch": 2.906813063063063, "grad_norm": 1.357198432980061, "learning_rate": 2.9422272775948e-08, "loss": 0.2826, "step": 30975 }, { "epoch": 2.906906906906907, "grad_norm": 1.1198398885705754, "learning_rate": 2.9363160792587053e-08, "loss": 0.3243, "step": 30976 }, { "epoch": 2.9070007507507505, "grad_norm": 1.1669973564084197, "learning_rate": 2.9304108074713623e-08, "loss": 0.3006, "step": 30977 }, { "epoch": 2.9070945945945947, "grad_norm": 1.3915951486307503, "learning_rate": 2.92451146230327e-08, "loss": 0.3118, "step": 30978 }, { "epoch": 2.9071884384384385, "grad_norm": 1.0872097128273226, "learning_rate": 2.9186180438247057e-08, "loss": 0.3094, "step": 30979 }, { "epoch": 2.9072822822822824, "grad_norm": 1.133228532504828, "learning_rate": 2.912730552106002e-08, "loss": 0.3472, "step": 30980 }, { "epoch": 2.907376126126126, "grad_norm": 1.2003030875396847, "learning_rate": 2.90684898721727e-08, "loss": 0.301, "step": 30981 }, { "epoch": 2.90746996996997, "grad_norm": 1.1297070490220615, "learning_rate": 2.9009733492287308e-08, "loss": 0.3115, "step": 30982 }, { "epoch": 2.907563813813814, "grad_norm": 1.6714673129135516, "learning_rate": 2.895103638210328e-08, "loss": 0.2886, "step": 30983 }, { "epoch": 2.9076576576576576, "grad_norm": 1.0166051263074758, "learning_rate": 2.8892398542321175e-08, "loss": 0.3127, "step": 30984 }, { "epoch": 2.9077515015015014, "grad_norm": 1.114845470832535, "learning_rate": 2.883381997364043e-08, "loss": 0.2765, "step": 30985 }, { "epoch": 2.9078453453453452, "grad_norm": 1.1453814184682254, "learning_rate": 2.8775300676759377e-08, "loss": 0.3335, "step": 30986 }, { "epoch": 2.907939189189189, "grad_norm": 1.3913231388632286, "learning_rate": 2.8716840652374678e-08, "loss": 0.2677, "step": 30987 }, { "epoch": 2.908033033033033, "grad_norm": 1.227165888322647, "learning_rate": 2.8658439901184665e-08, "loss": 0.3231, "step": 30988 }, { "epoch": 2.908126876876877, "grad_norm": 1.1301745911898409, "learning_rate": 2.860009842388545e-08, "loss": 0.3032, "step": 30989 }, { "epoch": 2.9082207207207205, "grad_norm": 1.1666289755400356, "learning_rate": 2.854181622117147e-08, "loss": 0.3272, "step": 30990 }, { "epoch": 2.9083145645645647, "grad_norm": 1.0809142460615362, "learning_rate": 2.8483593293739397e-08, "loss": 0.2896, "step": 30991 }, { "epoch": 2.9084084084084085, "grad_norm": 1.3079442029066504, "learning_rate": 2.8425429642282564e-08, "loss": 0.2926, "step": 30992 }, { "epoch": 2.9085022522522523, "grad_norm": 1.18958051345375, "learning_rate": 2.8367325267493752e-08, "loss": 0.292, "step": 30993 }, { "epoch": 2.908596096096096, "grad_norm": 1.0887642985046768, "learning_rate": 2.8309280170066844e-08, "loss": 0.2724, "step": 30994 }, { "epoch": 2.90868993993994, "grad_norm": 1.1780415728844633, "learning_rate": 2.825129435069407e-08, "loss": 0.2973, "step": 30995 }, { "epoch": 2.9087837837837838, "grad_norm": 1.3274258374527985, "learning_rate": 2.819336781006543e-08, "loss": 0.2924, "step": 30996 }, { "epoch": 2.9088776276276276, "grad_norm": 1.1106034420085231, "learning_rate": 2.8135500548872596e-08, "loss": 0.3053, "step": 30997 }, { "epoch": 2.9089714714714714, "grad_norm": 1.2368159590981012, "learning_rate": 2.807769256780557e-08, "loss": 0.267, "step": 30998 }, { "epoch": 2.909065315315315, "grad_norm": 1.1113604790960294, "learning_rate": 2.8019943867553244e-08, "loss": 0.3164, "step": 30999 }, { "epoch": 2.909159159159159, "grad_norm": 1.084404159807912, "learning_rate": 2.796225444880396e-08, "loss": 0.2918, "step": 31000 }, { "epoch": 2.909253003003003, "grad_norm": 1.8855782374106242, "learning_rate": 2.790462431224661e-08, "loss": 0.2668, "step": 31001 }, { "epoch": 2.909346846846847, "grad_norm": 1.0258391316554873, "learning_rate": 2.784705345856675e-08, "loss": 0.3277, "step": 31002 }, { "epoch": 2.9094406906906904, "grad_norm": 1.0328350993646613, "learning_rate": 2.7789541888452175e-08, "loss": 0.3319, "step": 31003 }, { "epoch": 2.9095345345345347, "grad_norm": 1.0124794697852895, "learning_rate": 2.7732089602588442e-08, "loss": 0.304, "step": 31004 }, { "epoch": 2.9096283783783785, "grad_norm": 1.0151429745014893, "learning_rate": 2.767469660166e-08, "loss": 0.3421, "step": 31005 }, { "epoch": 2.9097222222222223, "grad_norm": 1.0792534034525285, "learning_rate": 2.7617362886350752e-08, "loss": 0.3018, "step": 31006 }, { "epoch": 2.909816066066066, "grad_norm": 1.0463297878676507, "learning_rate": 2.75600884573457e-08, "loss": 0.2424, "step": 31007 }, { "epoch": 2.90990990990991, "grad_norm": 0.9985494854276104, "learning_rate": 2.7502873315326528e-08, "loss": 0.3439, "step": 31008 }, { "epoch": 2.9100037537537538, "grad_norm": 1.974094800461256, "learning_rate": 2.7445717460976018e-08, "loss": 0.3052, "step": 31009 }, { "epoch": 2.9100975975975976, "grad_norm": 1.1386463309206858, "learning_rate": 2.7388620894975293e-08, "loss": 0.2824, "step": 31010 }, { "epoch": 2.9101914414414414, "grad_norm": 0.9702447981528498, "learning_rate": 2.733158361800492e-08, "loss": 0.2715, "step": 31011 }, { "epoch": 2.910285285285285, "grad_norm": 1.1156416044510973, "learning_rate": 2.727460563074602e-08, "loss": 0.3209, "step": 31012 }, { "epoch": 2.910379129129129, "grad_norm": 1.2723271935979605, "learning_rate": 2.721768693387694e-08, "loss": 0.3116, "step": 31013 }, { "epoch": 2.910472972972973, "grad_norm": 1.2037428509285943, "learning_rate": 2.7160827528076582e-08, "loss": 0.3325, "step": 31014 }, { "epoch": 2.910566816816817, "grad_norm": 0.9687843426429967, "learning_rate": 2.7104027414022737e-08, "loss": 0.2894, "step": 31015 }, { "epoch": 2.9106606606606604, "grad_norm": 1.1072621597534316, "learning_rate": 2.7047286592393197e-08, "loss": 0.3545, "step": 31016 }, { "epoch": 2.9107545045045047, "grad_norm": 1.2064092495086867, "learning_rate": 2.6990605063864084e-08, "loss": 0.2898, "step": 31017 }, { "epoch": 2.9108483483483485, "grad_norm": 1.187808096735936, "learning_rate": 2.6933982829111526e-08, "loss": 0.3209, "step": 31018 }, { "epoch": 2.9109421921921923, "grad_norm": 1.164066009979518, "learning_rate": 2.6877419888809986e-08, "loss": 0.3103, "step": 31019 }, { "epoch": 2.911036036036036, "grad_norm": 1.095337265635634, "learning_rate": 2.6820916243634478e-08, "loss": 0.2857, "step": 31020 }, { "epoch": 2.91112987987988, "grad_norm": 1.1609340486385016, "learning_rate": 2.6764471894258348e-08, "loss": 0.2982, "step": 31021 }, { "epoch": 2.9112237237237237, "grad_norm": 1.1748615855222775, "learning_rate": 2.6708086841354952e-08, "loss": 0.3594, "step": 31022 }, { "epoch": 2.9113175675675675, "grad_norm": 1.1398252344270965, "learning_rate": 2.6651761085595974e-08, "loss": 0.3284, "step": 31023 }, { "epoch": 2.9114114114114114, "grad_norm": 0.9435066450312941, "learning_rate": 2.659549462765365e-08, "loss": 0.3133, "step": 31024 }, { "epoch": 2.911505255255255, "grad_norm": 0.9961960803124259, "learning_rate": 2.653928746819856e-08, "loss": 0.3023, "step": 31025 }, { "epoch": 2.911599099099099, "grad_norm": 1.160883915084834, "learning_rate": 2.6483139607901277e-08, "loss": 0.2742, "step": 31026 }, { "epoch": 2.911692942942943, "grad_norm": 1.0451656933323208, "learning_rate": 2.642705104743015e-08, "loss": 0.2835, "step": 31027 }, { "epoch": 2.911786786786787, "grad_norm": 1.147261618254766, "learning_rate": 2.637102178745521e-08, "loss": 0.3159, "step": 31028 }, { "epoch": 2.9118806306306304, "grad_norm": 1.1733574125250275, "learning_rate": 2.631505182864369e-08, "loss": 0.3043, "step": 31029 }, { "epoch": 2.9119744744744747, "grad_norm": 2.8140214070846388, "learning_rate": 2.6259141171663395e-08, "loss": 0.2937, "step": 31030 }, { "epoch": 2.9120683183183185, "grad_norm": 1.084193239966289, "learning_rate": 2.6203289817180455e-08, "loss": 0.3222, "step": 31031 }, { "epoch": 2.9121621621621623, "grad_norm": 1.1435101854637577, "learning_rate": 2.6147497765861008e-08, "loss": 0.2777, "step": 31032 }, { "epoch": 2.912256006006006, "grad_norm": 1.0668259589403783, "learning_rate": 2.609176501837063e-08, "loss": 0.2974, "step": 31033 }, { "epoch": 2.91234984984985, "grad_norm": 1.2172139479201844, "learning_rate": 2.603609157537379e-08, "loss": 0.3105, "step": 31034 }, { "epoch": 2.9124436936936937, "grad_norm": 0.9491968178299134, "learning_rate": 2.5980477437533292e-08, "loss": 0.3005, "step": 31035 }, { "epoch": 2.9125375375375375, "grad_norm": 2.11222439448844, "learning_rate": 2.5924922605513603e-08, "loss": 0.3391, "step": 31036 }, { "epoch": 2.9126313813813813, "grad_norm": 1.368576911028796, "learning_rate": 2.586942707997586e-08, "loss": 0.2968, "step": 31037 }, { "epoch": 2.912725225225225, "grad_norm": 1.2524189662838658, "learning_rate": 2.581399086158287e-08, "loss": 0.2927, "step": 31038 }, { "epoch": 2.912819069069069, "grad_norm": 1.0281182742005206, "learning_rate": 2.575861395099466e-08, "loss": 0.318, "step": 31039 }, { "epoch": 2.9129129129129128, "grad_norm": 1.2441684194809546, "learning_rate": 2.5703296348872364e-08, "loss": 0.2997, "step": 31040 }, { "epoch": 2.913006756756757, "grad_norm": 1.3631044658417395, "learning_rate": 2.5648038055874904e-08, "loss": 0.3179, "step": 31041 }, { "epoch": 2.9131006006006004, "grad_norm": 1.4063825181535714, "learning_rate": 2.5592839072661747e-08, "loss": 0.2987, "step": 31042 }, { "epoch": 2.9131944444444446, "grad_norm": 1.8130383638659544, "learning_rate": 2.5537699399890147e-08, "loss": 0.3697, "step": 31043 }, { "epoch": 2.9132882882882885, "grad_norm": 1.0923781699810877, "learning_rate": 2.5482619038217916e-08, "loss": 0.3001, "step": 31044 }, { "epoch": 2.9133821321321323, "grad_norm": 1.0026864382559417, "learning_rate": 2.5427597988302298e-08, "loss": 0.3211, "step": 31045 }, { "epoch": 2.913475975975976, "grad_norm": 1.0615294760305802, "learning_rate": 2.5372636250798887e-08, "loss": 0.2884, "step": 31046 }, { "epoch": 2.91356981981982, "grad_norm": 1.1857299284775464, "learning_rate": 2.5317733826362713e-08, "loss": 0.2957, "step": 31047 }, { "epoch": 2.9136636636636637, "grad_norm": 1.1495121313176915, "learning_rate": 2.5262890715648804e-08, "loss": 0.3111, "step": 31048 }, { "epoch": 2.9137575075075075, "grad_norm": 1.1557014252112794, "learning_rate": 2.5208106919311083e-08, "loss": 0.298, "step": 31049 }, { "epoch": 2.9138513513513513, "grad_norm": 1.535293590098365, "learning_rate": 2.515338243800236e-08, "loss": 0.3041, "step": 31050 }, { "epoch": 2.913945195195195, "grad_norm": 3.0417817278563044, "learning_rate": 2.509871727237545e-08, "loss": 0.2628, "step": 31051 }, { "epoch": 2.914039039039039, "grad_norm": 1.1897315880812547, "learning_rate": 2.5044111423082053e-08, "loss": 0.3523, "step": 31052 }, { "epoch": 2.9141328828828827, "grad_norm": 1.1160036546380971, "learning_rate": 2.498956489077331e-08, "loss": 0.2839, "step": 31053 }, { "epoch": 2.914226726726727, "grad_norm": 1.2069231286529858, "learning_rate": 2.4935077676099263e-08, "loss": 0.3633, "step": 31054 }, { "epoch": 2.9143205705705704, "grad_norm": 1.0803026283528707, "learning_rate": 2.4880649779710497e-08, "loss": 0.3326, "step": 31055 }, { "epoch": 2.9144144144144146, "grad_norm": 1.2498484847047904, "learning_rate": 2.4826281202254276e-08, "loss": 0.338, "step": 31056 }, { "epoch": 2.914508258258258, "grad_norm": 1.1864053409201687, "learning_rate": 2.4771971944380636e-08, "loss": 0.3402, "step": 31057 }, { "epoch": 2.9146021021021022, "grad_norm": 1.3262112046641548, "learning_rate": 2.471772200673628e-08, "loss": 0.3037, "step": 31058 }, { "epoch": 2.914695945945946, "grad_norm": 1.0985776819039437, "learning_rate": 2.4663531389967356e-08, "loss": 0.2866, "step": 31059 }, { "epoch": 2.91478978978979, "grad_norm": 1.0491346731521263, "learning_rate": 2.460940009472168e-08, "loss": 0.2954, "step": 31060 }, { "epoch": 2.9148836336336337, "grad_norm": 0.97105060898148, "learning_rate": 2.4555328121643185e-08, "loss": 0.3367, "step": 31061 }, { "epoch": 2.9149774774774775, "grad_norm": 0.9514465577710521, "learning_rate": 2.450131547137691e-08, "loss": 0.3425, "step": 31062 }, { "epoch": 2.9150713213213213, "grad_norm": 1.085419791580364, "learning_rate": 2.4447362144567332e-08, "loss": 0.3378, "step": 31063 }, { "epoch": 2.915165165165165, "grad_norm": 1.3486564967838188, "learning_rate": 2.439346814185728e-08, "loss": 0.3363, "step": 31064 }, { "epoch": 2.915259009009009, "grad_norm": 1.0274007025843614, "learning_rate": 2.4339633463889568e-08, "loss": 0.3319, "step": 31065 }, { "epoch": 2.9153528528528527, "grad_norm": 1.0744698410045368, "learning_rate": 2.428585811130646e-08, "loss": 0.3188, "step": 31066 }, { "epoch": 2.9154466966966965, "grad_norm": 1.0591821884320691, "learning_rate": 2.4232142084748e-08, "loss": 0.3606, "step": 31067 }, { "epoch": 2.9155405405405403, "grad_norm": 1.2309237182789723, "learning_rate": 2.41784853848559e-08, "loss": 0.3149, "step": 31068 }, { "epoch": 2.9156343843843846, "grad_norm": 1.3545095667504716, "learning_rate": 2.4124888012269087e-08, "loss": 0.3077, "step": 31069 }, { "epoch": 2.915728228228228, "grad_norm": 0.9709395870374513, "learning_rate": 2.40713499676265e-08, "loss": 0.2963, "step": 31070 }, { "epoch": 2.9158220720720722, "grad_norm": 1.1712469555904823, "learning_rate": 2.401787125156707e-08, "loss": 0.3077, "step": 31071 }, { "epoch": 2.915915915915916, "grad_norm": 0.9960256676302115, "learning_rate": 2.396445186472862e-08, "loss": 0.299, "step": 31072 }, { "epoch": 2.91600975975976, "grad_norm": 1.01517680191653, "learning_rate": 2.3911091807747312e-08, "loss": 0.303, "step": 31073 }, { "epoch": 2.9161036036036037, "grad_norm": 1.0422093889991684, "learning_rate": 2.3857791081259296e-08, "loss": 0.2505, "step": 31074 }, { "epoch": 2.9161974474474475, "grad_norm": 7.79452757825333, "learning_rate": 2.380454968590129e-08, "loss": 0.3006, "step": 31075 }, { "epoch": 2.9162912912912913, "grad_norm": 1.0681380341411684, "learning_rate": 2.3751367622306676e-08, "loss": 0.3256, "step": 31076 }, { "epoch": 2.916385135135135, "grad_norm": 1.3300508840838283, "learning_rate": 2.36982448911105e-08, "loss": 0.3071, "step": 31077 }, { "epoch": 2.916478978978979, "grad_norm": 1.0045038558247918, "learning_rate": 2.3645181492946145e-08, "loss": 0.3121, "step": 31078 }, { "epoch": 2.9165728228228227, "grad_norm": 1.0663624795863718, "learning_rate": 2.359217742844533e-08, "loss": 0.3434, "step": 31079 }, { "epoch": 2.9166666666666665, "grad_norm": 1.1718833967369335, "learning_rate": 2.3539232698240878e-08, "loss": 0.2663, "step": 31080 }, { "epoch": 2.9167605105105103, "grad_norm": 1.186845476008036, "learning_rate": 2.3486347302963953e-08, "loss": 0.3548, "step": 31081 }, { "epoch": 2.9168543543543546, "grad_norm": 1.1045640806425172, "learning_rate": 2.343352124324516e-08, "loss": 0.2655, "step": 31082 }, { "epoch": 2.916948198198198, "grad_norm": 1.0111887024696886, "learning_rate": 2.3380754519714e-08, "loss": 0.2787, "step": 31083 }, { "epoch": 2.917042042042042, "grad_norm": 1.0354016588678345, "learning_rate": 2.3328047132999966e-08, "loss": 0.272, "step": 31084 }, { "epoch": 2.917135885885886, "grad_norm": 1.3518700965864654, "learning_rate": 2.3275399083730887e-08, "loss": 0.3216, "step": 31085 }, { "epoch": 2.91722972972973, "grad_norm": 1.1300322606968505, "learning_rate": 2.3222810372535153e-08, "loss": 0.2868, "step": 31086 }, { "epoch": 2.9173235735735736, "grad_norm": 1.2501081880445122, "learning_rate": 2.317028100004004e-08, "loss": 0.3039, "step": 31087 }, { "epoch": 2.9174174174174174, "grad_norm": 1.1549913912640575, "learning_rate": 2.3117810966871157e-08, "loss": 0.3373, "step": 31088 }, { "epoch": 2.9175112612612613, "grad_norm": 1.1646304667090457, "learning_rate": 2.3065400273654114e-08, "loss": 0.3043, "step": 31089 }, { "epoch": 2.917605105105105, "grad_norm": 1.050221222650175, "learning_rate": 2.3013048921014523e-08, "loss": 0.3398, "step": 31090 }, { "epoch": 2.917698948948949, "grad_norm": 1.1473503470110598, "learning_rate": 2.2960756909575777e-08, "loss": 0.3229, "step": 31091 }, { "epoch": 2.9177927927927927, "grad_norm": 0.972616796917599, "learning_rate": 2.290852423996126e-08, "loss": 0.3002, "step": 31092 }, { "epoch": 2.9178866366366365, "grad_norm": 1.0194658415401296, "learning_rate": 2.2856350912794923e-08, "loss": 0.2789, "step": 31093 }, { "epoch": 2.9179804804804803, "grad_norm": 1.1180583816339777, "learning_rate": 2.280423692869793e-08, "loss": 0.3059, "step": 31094 }, { "epoch": 2.9180743243243246, "grad_norm": 1.2945836283441854, "learning_rate": 2.2752182288291458e-08, "loss": 0.3452, "step": 31095 }, { "epoch": 2.918168168168168, "grad_norm": 0.9412229343453863, "learning_rate": 2.270018699219667e-08, "loss": 0.3545, "step": 31096 }, { "epoch": 2.918262012012012, "grad_norm": 2.697646303552766, "learning_rate": 2.264825104103363e-08, "loss": 0.3114, "step": 31097 }, { "epoch": 2.918355855855856, "grad_norm": 1.0776277682368696, "learning_rate": 2.259637443542073e-08, "loss": 0.3119, "step": 31098 }, { "epoch": 2.9184496996997, "grad_norm": 1.1963550101755964, "learning_rate": 2.2544557175977476e-08, "loss": 0.3224, "step": 31099 }, { "epoch": 2.9185435435435436, "grad_norm": 1.1034293483710869, "learning_rate": 2.249279926332171e-08, "loss": 0.2897, "step": 31100 }, { "epoch": 2.9186373873873874, "grad_norm": 1.094138282077789, "learning_rate": 2.2441100698069595e-08, "loss": 0.2803, "step": 31101 }, { "epoch": 2.9187312312312312, "grad_norm": 1.1853197813171512, "learning_rate": 2.238946148083787e-08, "loss": 0.2706, "step": 31102 }, { "epoch": 2.918825075075075, "grad_norm": 1.0950579537759897, "learning_rate": 2.2337881612242706e-08, "loss": 0.2662, "step": 31103 }, { "epoch": 2.918918918918919, "grad_norm": 1.1371374395045715, "learning_rate": 2.228636109289861e-08, "loss": 0.3275, "step": 31104 }, { "epoch": 2.9190127627627627, "grad_norm": 1.057737747735177, "learning_rate": 2.223489992342065e-08, "loss": 0.3406, "step": 31105 }, { "epoch": 2.9191066066066065, "grad_norm": 1.1248456482421463, "learning_rate": 2.2183498104421663e-08, "loss": 0.3238, "step": 31106 }, { "epoch": 2.9192004504504503, "grad_norm": 1.4189598308560665, "learning_rate": 2.2132155636513942e-08, "loss": 0.3083, "step": 31107 }, { "epoch": 2.9192942942942945, "grad_norm": 1.1439898497214722, "learning_rate": 2.2080872520310882e-08, "loss": 0.3153, "step": 31108 }, { "epoch": 2.919388138138138, "grad_norm": 1.1937065243161948, "learning_rate": 2.2029648756423662e-08, "loss": 0.3043, "step": 31109 }, { "epoch": 2.919481981981982, "grad_norm": 1.1608221867795783, "learning_rate": 2.1978484345462347e-08, "loss": 0.2967, "step": 31110 }, { "epoch": 2.919575825825826, "grad_norm": 0.98460583090668, "learning_rate": 2.192737928803812e-08, "loss": 0.2594, "step": 31111 }, { "epoch": 2.91966966966967, "grad_norm": 1.1014176452586562, "learning_rate": 2.1876333584758824e-08, "loss": 0.3529, "step": 31112 }, { "epoch": 2.9197635135135136, "grad_norm": 1.131100428004969, "learning_rate": 2.1825347236234528e-08, "loss": 0.2993, "step": 31113 }, { "epoch": 2.9198573573573574, "grad_norm": 1.1383585411775994, "learning_rate": 2.177442024307197e-08, "loss": 0.3348, "step": 31114 }, { "epoch": 2.919951201201201, "grad_norm": 0.9735479925252676, "learning_rate": 2.172355260587955e-08, "loss": 0.294, "step": 31115 }, { "epoch": 2.920045045045045, "grad_norm": 1.1538400678591463, "learning_rate": 2.1672744325262894e-08, "loss": 0.2708, "step": 31116 }, { "epoch": 2.920138888888889, "grad_norm": 1.2583940616094087, "learning_rate": 2.1621995401827634e-08, "loss": 0.2882, "step": 31117 }, { "epoch": 2.9202327327327327, "grad_norm": 1.1136211336610564, "learning_rate": 2.1571305836179947e-08, "loss": 0.3129, "step": 31118 }, { "epoch": 2.9203265765765765, "grad_norm": 1.1930091388439203, "learning_rate": 2.152067562892268e-08, "loss": 0.3418, "step": 31119 }, { "epoch": 2.9204204204204203, "grad_norm": 1.1784540802264396, "learning_rate": 2.1470104780660918e-08, "loss": 0.3206, "step": 31120 }, { "epoch": 2.9205142642642645, "grad_norm": 1.0964718206331538, "learning_rate": 2.1419593291996942e-08, "loss": 0.3182, "step": 31121 }, { "epoch": 2.920608108108108, "grad_norm": 1.0445883277737638, "learning_rate": 2.1369141163533057e-08, "loss": 0.3314, "step": 31122 }, { "epoch": 2.920701951951952, "grad_norm": 1.1089073347904026, "learning_rate": 2.1318748395871002e-08, "loss": 0.2901, "step": 31123 }, { "epoch": 2.920795795795796, "grad_norm": 1.061645160660163, "learning_rate": 2.1268414989610852e-08, "loss": 0.2965, "step": 31124 }, { "epoch": 2.9208896396396398, "grad_norm": 1.273334138057341, "learning_rate": 2.1218140945353792e-08, "loss": 0.3134, "step": 31125 }, { "epoch": 2.9209834834834836, "grad_norm": 1.6505651988075591, "learning_rate": 2.116792626369879e-08, "loss": 0.3119, "step": 31126 }, { "epoch": 2.9210773273273274, "grad_norm": 1.3505014802646755, "learning_rate": 2.111777094524481e-08, "loss": 0.2788, "step": 31127 }, { "epoch": 2.921171171171171, "grad_norm": 1.200600036707159, "learning_rate": 2.1067674990589705e-08, "loss": 0.3252, "step": 31128 }, { "epoch": 2.921265015015015, "grad_norm": 1.2158949434041817, "learning_rate": 2.101763840033022e-08, "loss": 0.2812, "step": 31129 }, { "epoch": 2.921358858858859, "grad_norm": 1.1380287789990469, "learning_rate": 2.096766117506366e-08, "loss": 0.3095, "step": 31130 }, { "epoch": 2.9214527027027026, "grad_norm": 1.4144848158804966, "learning_rate": 2.0917743315385654e-08, "loss": 0.3007, "step": 31131 }, { "epoch": 2.9215465465465464, "grad_norm": 1.4499508102754697, "learning_rate": 2.0867884821891284e-08, "loss": 0.2678, "step": 31132 }, { "epoch": 2.9216403903903903, "grad_norm": 1.0440200285840353, "learning_rate": 2.081808569517563e-08, "loss": 0.3394, "step": 31133 }, { "epoch": 2.9217342342342345, "grad_norm": 1.2010747457698823, "learning_rate": 2.076834593583099e-08, "loss": 0.3342, "step": 31134 }, { "epoch": 2.921828078078078, "grad_norm": 1.1908616413138151, "learning_rate": 2.0718665544451898e-08, "loss": 0.3213, "step": 31135 }, { "epoch": 2.921921921921922, "grad_norm": 1.016622468593557, "learning_rate": 2.0669044521630655e-08, "loss": 0.295, "step": 31136 }, { "epoch": 2.9220157657657655, "grad_norm": 1.1505308034009534, "learning_rate": 2.0619482867957896e-08, "loss": 0.2983, "step": 31137 }, { "epoch": 2.9221096096096097, "grad_norm": 1.1549904639773458, "learning_rate": 2.0569980584025374e-08, "loss": 0.2917, "step": 31138 }, { "epoch": 2.9222034534534536, "grad_norm": 1.1818891224559454, "learning_rate": 2.0520537670422614e-08, "loss": 0.327, "step": 31139 }, { "epoch": 2.9222972972972974, "grad_norm": 3.1533408925103683, "learning_rate": 2.04711541277397e-08, "loss": 0.3363, "step": 31140 }, { "epoch": 2.922391141141141, "grad_norm": 1.2250864528957344, "learning_rate": 2.0421829956565608e-08, "loss": 0.2904, "step": 31141 }, { "epoch": 2.922484984984985, "grad_norm": 0.9796802225105729, "learning_rate": 2.0372565157487645e-08, "loss": 0.2952, "step": 31142 }, { "epoch": 2.922578828828829, "grad_norm": 1.3284625712849834, "learning_rate": 2.032335973109423e-08, "loss": 0.3273, "step": 31143 }, { "epoch": 2.9226726726726726, "grad_norm": 1.2231693064681775, "learning_rate": 2.0274213677971e-08, "loss": 0.3158, "step": 31144 }, { "epoch": 2.9227665165165164, "grad_norm": 1.1461514843022764, "learning_rate": 2.0225126998704713e-08, "loss": 0.3249, "step": 31145 }, { "epoch": 2.9228603603603602, "grad_norm": 1.2614455173114272, "learning_rate": 2.01760996938799e-08, "loss": 0.3064, "step": 31146 }, { "epoch": 2.922954204204204, "grad_norm": 0.9444197292643491, "learning_rate": 2.0127131764081652e-08, "loss": 0.3003, "step": 31147 }, { "epoch": 2.923048048048048, "grad_norm": 0.9661134654749924, "learning_rate": 2.0078223209893945e-08, "loss": 0.2965, "step": 31148 }, { "epoch": 2.923141891891892, "grad_norm": 1.0667756957813908, "learning_rate": 2.0029374031899086e-08, "loss": 0.2864, "step": 31149 }, { "epoch": 2.9232357357357355, "grad_norm": 1.1840384136718294, "learning_rate": 1.9980584230681054e-08, "loss": 0.3387, "step": 31150 }, { "epoch": 2.9233295795795797, "grad_norm": 1.1523000389767024, "learning_rate": 1.9931853806819944e-08, "loss": 0.2843, "step": 31151 }, { "epoch": 2.9234234234234235, "grad_norm": 1.251445589051466, "learning_rate": 1.9883182760898066e-08, "loss": 0.3237, "step": 31152 }, { "epoch": 2.9235172672672673, "grad_norm": 1.3269379967350463, "learning_rate": 1.9834571093494402e-08, "loss": 0.3055, "step": 31153 }, { "epoch": 2.923611111111111, "grad_norm": 1.022685528994506, "learning_rate": 1.97860188051896e-08, "loss": 0.2767, "step": 31154 }, { "epoch": 2.923704954954955, "grad_norm": 1.129098983128652, "learning_rate": 1.9737525896562082e-08, "loss": 0.3219, "step": 31155 }, { "epoch": 2.923798798798799, "grad_norm": 1.1751587505792331, "learning_rate": 1.968909236819083e-08, "loss": 0.3485, "step": 31156 }, { "epoch": 2.9238926426426426, "grad_norm": 1.0131712806600852, "learning_rate": 1.9640718220652056e-08, "loss": 0.3122, "step": 31157 }, { "epoch": 2.9239864864864864, "grad_norm": 1.1706473049747381, "learning_rate": 1.9592403454523068e-08, "loss": 0.3003, "step": 31158 }, { "epoch": 2.92408033033033, "grad_norm": 1.2423439881316414, "learning_rate": 1.954414807038063e-08, "loss": 0.278, "step": 31159 }, { "epoch": 2.924174174174174, "grad_norm": 1.0968515730549613, "learning_rate": 1.949595206879873e-08, "loss": 0.2941, "step": 31160 }, { "epoch": 2.924268018018018, "grad_norm": 1.206589780154566, "learning_rate": 1.9447815450353012e-08, "loss": 0.3068, "step": 31161 }, { "epoch": 2.924361861861862, "grad_norm": 1.0397500752051596, "learning_rate": 1.939973821561747e-08, "loss": 0.2994, "step": 31162 }, { "epoch": 2.9244557057057055, "grad_norm": 1.101806850808699, "learning_rate": 1.9351720365164973e-08, "loss": 0.3324, "step": 31163 }, { "epoch": 2.9245495495495497, "grad_norm": 1.0997840224392825, "learning_rate": 1.9303761899568398e-08, "loss": 0.2908, "step": 31164 }, { "epoch": 2.9246433933933935, "grad_norm": 1.242457366866029, "learning_rate": 1.9255862819398398e-08, "loss": 0.3113, "step": 31165 }, { "epoch": 2.9247372372372373, "grad_norm": 1.2129186438938067, "learning_rate": 1.920802312522785e-08, "loss": 0.3002, "step": 31166 }, { "epoch": 2.924831081081081, "grad_norm": 0.9968729924044963, "learning_rate": 1.9160242817625742e-08, "loss": 0.3142, "step": 31167 }, { "epoch": 2.924924924924925, "grad_norm": 1.022801173595879, "learning_rate": 1.9112521897162174e-08, "loss": 0.3215, "step": 31168 }, { "epoch": 2.9250187687687688, "grad_norm": 1.2693496736277785, "learning_rate": 1.906486036440669e-08, "loss": 0.3028, "step": 31169 }, { "epoch": 2.9251126126126126, "grad_norm": 1.0742622852904853, "learning_rate": 1.9017258219926616e-08, "loss": 0.3303, "step": 31170 }, { "epoch": 2.9252064564564564, "grad_norm": 1.101893167143484, "learning_rate": 1.896971546428983e-08, "loss": 0.2819, "step": 31171 }, { "epoch": 2.9253003003003, "grad_norm": 1.0546644876243656, "learning_rate": 1.8922232098064207e-08, "loss": 0.2634, "step": 31172 }, { "epoch": 2.925394144144144, "grad_norm": 1.145921486784795, "learning_rate": 1.8874808121814303e-08, "loss": 0.2762, "step": 31173 }, { "epoch": 2.925487987987988, "grad_norm": 0.9836301348021982, "learning_rate": 1.8827443536106326e-08, "loss": 0.3399, "step": 31174 }, { "epoch": 2.925581831831832, "grad_norm": 1.0867618367585477, "learning_rate": 1.878013834150538e-08, "loss": 0.3076, "step": 31175 }, { "epoch": 2.9256756756756754, "grad_norm": 1.2001224850376961, "learning_rate": 1.873289253857491e-08, "loss": 0.3586, "step": 31176 }, { "epoch": 2.9257695195195197, "grad_norm": 1.0870715561730737, "learning_rate": 1.8685706127878898e-08, "loss": 0.3138, "step": 31177 }, { "epoch": 2.9258633633633635, "grad_norm": 1.0979610975900114, "learning_rate": 1.8638579109979126e-08, "loss": 0.3163, "step": 31178 }, { "epoch": 2.9259572072072073, "grad_norm": 1.0393439795037274, "learning_rate": 1.8591511485437363e-08, "loss": 0.3025, "step": 31179 }, { "epoch": 2.926051051051051, "grad_norm": 1.0508173092857485, "learning_rate": 1.8544503254815938e-08, "loss": 0.3191, "step": 31180 }, { "epoch": 2.926144894894895, "grad_norm": 1.1104384402295373, "learning_rate": 1.84975544186744e-08, "loss": 0.291, "step": 31181 }, { "epoch": 2.9262387387387387, "grad_norm": 1.143739578444291, "learning_rate": 1.8450664977572864e-08, "loss": 0.2992, "step": 31182 }, { "epoch": 2.9263325825825826, "grad_norm": 15.932753427887993, "learning_rate": 1.8403834932070318e-08, "loss": 0.3118, "step": 31183 }, { "epoch": 2.9264264264264264, "grad_norm": 1.0622390074541737, "learning_rate": 1.8357064282725212e-08, "loss": 0.3198, "step": 31184 }, { "epoch": 2.92652027027027, "grad_norm": 1.6824237400385127, "learning_rate": 1.8310353030094875e-08, "loss": 0.2993, "step": 31185 }, { "epoch": 2.926614114114114, "grad_norm": 2.728780847976901, "learning_rate": 1.8263701174736638e-08, "loss": 0.3355, "step": 31186 }, { "epoch": 2.926707957957958, "grad_norm": 1.0797987958141329, "learning_rate": 1.8217108717207276e-08, "loss": 0.2875, "step": 31187 }, { "epoch": 2.926801801801802, "grad_norm": 1.0581296226422212, "learning_rate": 1.8170575658060795e-08, "loss": 0.3193, "step": 31188 }, { "epoch": 2.9268956456456454, "grad_norm": 1.0382672655418346, "learning_rate": 1.8124101997853417e-08, "loss": 0.308, "step": 31189 }, { "epoch": 2.9269894894894897, "grad_norm": 1.1962634755941692, "learning_rate": 1.8077687737138583e-08, "loss": 0.3202, "step": 31190 }, { "epoch": 2.9270833333333335, "grad_norm": 1.0433581478188323, "learning_rate": 1.8031332876469186e-08, "loss": 0.3329, "step": 31191 }, { "epoch": 2.9271771771771773, "grad_norm": 2.2240158782785415, "learning_rate": 1.798503741639923e-08, "loss": 0.3501, "step": 31192 }, { "epoch": 2.927271021021021, "grad_norm": 1.230554730519256, "learning_rate": 1.793880135747994e-08, "loss": 0.2801, "step": 31193 }, { "epoch": 2.927364864864865, "grad_norm": 1.1767918065055851, "learning_rate": 1.789262470026254e-08, "loss": 0.3453, "step": 31194 }, { "epoch": 2.9274587087087087, "grad_norm": 1.1911397615101305, "learning_rate": 1.78465074452977e-08, "loss": 0.3197, "step": 31195 }, { "epoch": 2.9275525525525525, "grad_norm": 1.1497463188193762, "learning_rate": 1.7800449593135537e-08, "loss": 0.2913, "step": 31196 }, { "epoch": 2.9276463963963963, "grad_norm": 1.0931944892050667, "learning_rate": 1.7754451144324504e-08, "loss": 0.3304, "step": 31197 }, { "epoch": 2.92774024024024, "grad_norm": 1.2592445061660953, "learning_rate": 1.7708512099414156e-08, "loss": 0.3217, "step": 31198 }, { "epoch": 2.927834084084084, "grad_norm": 1.0963393094489067, "learning_rate": 1.766263245895128e-08, "loss": 0.3212, "step": 31199 }, { "epoch": 2.9279279279279278, "grad_norm": 1.1438242524540319, "learning_rate": 1.7616812223483215e-08, "loss": 0.2804, "step": 31200 }, { "epoch": 2.928021771771772, "grad_norm": 1.4582758071403632, "learning_rate": 1.7571051393556748e-08, "loss": 0.3338, "step": 31201 }, { "epoch": 2.9281156156156154, "grad_norm": 1.3207628477118447, "learning_rate": 1.7525349969716443e-08, "loss": 0.2868, "step": 31202 }, { "epoch": 2.9282094594594597, "grad_norm": 1.0143621975716028, "learning_rate": 1.7479707952507972e-08, "loss": 0.3246, "step": 31203 }, { "epoch": 2.9283033033033035, "grad_norm": 0.9576913949470652, "learning_rate": 1.7434125342475348e-08, "loss": 0.2755, "step": 31204 }, { "epoch": 2.9283971471471473, "grad_norm": 1.3870135259966287, "learning_rate": 1.7388602140162026e-08, "loss": 0.336, "step": 31205 }, { "epoch": 2.928490990990991, "grad_norm": 0.9258863688017679, "learning_rate": 1.73431383461109e-08, "loss": 0.3106, "step": 31206 }, { "epoch": 2.928584834834835, "grad_norm": 1.1037911342559823, "learning_rate": 1.729773396086376e-08, "loss": 0.328, "step": 31207 }, { "epoch": 2.9286786786786787, "grad_norm": 1.105547617133357, "learning_rate": 1.72523889849624e-08, "loss": 0.3075, "step": 31208 }, { "epoch": 2.9287725225225225, "grad_norm": 1.1527347215022565, "learning_rate": 1.7207103418946936e-08, "loss": 0.2792, "step": 31209 }, { "epoch": 2.9288663663663663, "grad_norm": 1.1726729908325384, "learning_rate": 1.716187726335805e-08, "loss": 0.3398, "step": 31210 }, { "epoch": 2.92896021021021, "grad_norm": 1.4053883659629434, "learning_rate": 1.711671051873476e-08, "loss": 0.3161, "step": 31211 }, { "epoch": 2.929054054054054, "grad_norm": 1.0783545844149511, "learning_rate": 1.707160318561496e-08, "loss": 0.3429, "step": 31212 }, { "epoch": 2.9291478978978978, "grad_norm": 1.2781000544918708, "learning_rate": 1.702655526453656e-08, "loss": 0.2767, "step": 31213 }, { "epoch": 2.929241741741742, "grad_norm": 1.1734076088840197, "learning_rate": 1.698156675603746e-08, "loss": 0.2973, "step": 31214 }, { "epoch": 2.9293355855855854, "grad_norm": 1.214254036695671, "learning_rate": 1.6936637660653344e-08, "loss": 0.3134, "step": 31215 }, { "epoch": 2.9294294294294296, "grad_norm": 1.0988144288775865, "learning_rate": 1.689176797892045e-08, "loss": 0.3206, "step": 31216 }, { "epoch": 2.929523273273273, "grad_norm": 1.0871354076531796, "learning_rate": 1.6846957711373902e-08, "loss": 0.2817, "step": 31217 }, { "epoch": 2.9296171171171173, "grad_norm": 1.250960949794445, "learning_rate": 1.680220685854661e-08, "loss": 0.279, "step": 31218 }, { "epoch": 2.929710960960961, "grad_norm": 1.1283603597345182, "learning_rate": 1.6757515420973702e-08, "loss": 0.2989, "step": 31219 }, { "epoch": 2.929804804804805, "grad_norm": 1.1023902719371361, "learning_rate": 1.6712883399187528e-08, "loss": 0.3481, "step": 31220 }, { "epoch": 2.9298986486486487, "grad_norm": 1.0229591426927649, "learning_rate": 1.6668310793719334e-08, "loss": 0.354, "step": 31221 }, { "epoch": 2.9299924924924925, "grad_norm": 1.046355351655087, "learning_rate": 1.6623797605102022e-08, "loss": 0.3067, "step": 31222 }, { "epoch": 2.9300863363363363, "grad_norm": 0.9313697049718199, "learning_rate": 1.6579343833865723e-08, "loss": 0.3279, "step": 31223 }, { "epoch": 2.93018018018018, "grad_norm": 1.231833787588107, "learning_rate": 1.6534949480540018e-08, "loss": 0.3546, "step": 31224 }, { "epoch": 2.930274024024024, "grad_norm": 1.1184994959459638, "learning_rate": 1.6490614545655036e-08, "loss": 0.3426, "step": 31225 }, { "epoch": 2.9303678678678677, "grad_norm": 1.1564133038926205, "learning_rate": 1.6446339029739243e-08, "loss": 0.2852, "step": 31226 }, { "epoch": 2.9304617117117115, "grad_norm": 1.066138778184693, "learning_rate": 1.6402122933319442e-08, "loss": 0.3071, "step": 31227 }, { "epoch": 2.9305555555555554, "grad_norm": 1.0821175692937293, "learning_rate": 1.635796625692465e-08, "loss": 0.2903, "step": 31228 }, { "epoch": 2.9306493993993996, "grad_norm": 1.088655501334241, "learning_rate": 1.6313869001079452e-08, "loss": 0.2911, "step": 31229 }, { "epoch": 2.930743243243243, "grad_norm": 0.9583125856089358, "learning_rate": 1.6269831166311202e-08, "loss": 0.3234, "step": 31230 }, { "epoch": 2.9308370870870872, "grad_norm": 1.0398681277314235, "learning_rate": 1.6225852753143923e-08, "loss": 0.32, "step": 31231 }, { "epoch": 2.930930930930931, "grad_norm": 1.2113865643317305, "learning_rate": 1.61819337621022e-08, "loss": 0.3274, "step": 31232 }, { "epoch": 2.931024774774775, "grad_norm": 1.0780986299, "learning_rate": 1.613807419371005e-08, "loss": 0.2893, "step": 31233 }, { "epoch": 2.9311186186186187, "grad_norm": 1.1959464198105072, "learning_rate": 1.60942740484904e-08, "loss": 0.3242, "step": 31234 }, { "epoch": 2.9312124624624625, "grad_norm": 1.2959765815129853, "learning_rate": 1.6050533326965047e-08, "loss": 0.3026, "step": 31235 }, { "epoch": 2.9313063063063063, "grad_norm": 1.7402889329382085, "learning_rate": 1.6006852029655796e-08, "loss": 0.283, "step": 31236 }, { "epoch": 2.93140015015015, "grad_norm": 1.3059389103886656, "learning_rate": 1.5963230157083897e-08, "loss": 0.2988, "step": 31237 }, { "epoch": 2.931493993993994, "grad_norm": 1.173252962711598, "learning_rate": 1.5919667709768382e-08, "loss": 0.3313, "step": 31238 }, { "epoch": 2.9315878378378377, "grad_norm": 1.1121058890346323, "learning_rate": 1.587616468822939e-08, "loss": 0.299, "step": 31239 }, { "epoch": 2.9316816816816815, "grad_norm": 1.4229104145837237, "learning_rate": 1.5832721092985947e-08, "loss": 0.3283, "step": 31240 }, { "epoch": 2.9317755255255253, "grad_norm": 1.2433364441339168, "learning_rate": 1.5789336924555422e-08, "loss": 0.2816, "step": 31241 }, { "epoch": 2.9318693693693696, "grad_norm": 1.5823441122761044, "learning_rate": 1.574601218345462e-08, "loss": 0.3294, "step": 31242 }, { "epoch": 2.931963213213213, "grad_norm": 1.156668761675381, "learning_rate": 1.5702746870201456e-08, "loss": 0.3579, "step": 31243 }, { "epoch": 2.932057057057057, "grad_norm": 1.0396846066898222, "learning_rate": 1.5659540985310528e-08, "loss": 0.3105, "step": 31244 }, { "epoch": 2.932150900900901, "grad_norm": 1.3457123891858278, "learning_rate": 1.561639452929753e-08, "loss": 0.2799, "step": 31245 }, { "epoch": 2.932244744744745, "grad_norm": 1.05028996483549, "learning_rate": 1.5573307502677048e-08, "loss": 0.2753, "step": 31246 }, { "epoch": 2.9323385885885886, "grad_norm": 1.0875328951943983, "learning_rate": 1.553027990596312e-08, "loss": 0.2419, "step": 31247 }, { "epoch": 2.9324324324324325, "grad_norm": 1.1789530991948531, "learning_rate": 1.5487311739667556e-08, "loss": 0.2971, "step": 31248 }, { "epoch": 2.9325262762762763, "grad_norm": 1.1450336568364379, "learning_rate": 1.5444403004303833e-08, "loss": 0.3605, "step": 31249 }, { "epoch": 2.93262012012012, "grad_norm": 0.9900439670976521, "learning_rate": 1.540155370038321e-08, "loss": 0.2943, "step": 31250 }, { "epoch": 2.932713963963964, "grad_norm": 1.7691662446358107, "learning_rate": 1.535876382841639e-08, "loss": 0.3343, "step": 31251 }, { "epoch": 2.9328078078078077, "grad_norm": 1.3490697836132337, "learning_rate": 1.531603338891352e-08, "loss": 0.2985, "step": 31252 }, { "epoch": 2.9329016516516515, "grad_norm": 1.0108531111321377, "learning_rate": 1.527336238238475e-08, "loss": 0.3201, "step": 31253 }, { "epoch": 2.9329954954954953, "grad_norm": 3.919566978765906, "learning_rate": 1.5230750809338e-08, "loss": 0.2959, "step": 31254 }, { "epoch": 2.9330893393393396, "grad_norm": 1.0913300670766422, "learning_rate": 1.5188198670281762e-08, "loss": 0.3139, "step": 31255 }, { "epoch": 2.933183183183183, "grad_norm": 1.1411320308635264, "learning_rate": 1.5145705965722844e-08, "loss": 0.3268, "step": 31256 }, { "epoch": 2.933277027027027, "grad_norm": 1.2467662979784104, "learning_rate": 1.5103272696168624e-08, "loss": 0.3226, "step": 31257 }, { "epoch": 2.933370870870871, "grad_norm": 1.0738415381943291, "learning_rate": 1.5060898862125363e-08, "loss": 0.3194, "step": 31258 }, { "epoch": 2.933464714714715, "grad_norm": 1.1525519667451254, "learning_rate": 1.5018584464097098e-08, "loss": 0.3073, "step": 31259 }, { "epoch": 2.9335585585585586, "grad_norm": 1.911978601647059, "learning_rate": 1.497632950258898e-08, "loss": 0.3071, "step": 31260 }, { "epoch": 2.9336524024024024, "grad_norm": 1.0515754897857503, "learning_rate": 1.4934133978105058e-08, "loss": 0.2664, "step": 31261 }, { "epoch": 2.9337462462462462, "grad_norm": 1.084005849098311, "learning_rate": 1.4891997891147703e-08, "loss": 0.3247, "step": 31262 }, { "epoch": 2.93384009009009, "grad_norm": 1.390692349892353, "learning_rate": 1.48499212422204e-08, "loss": 0.3358, "step": 31263 }, { "epoch": 2.933933933933934, "grad_norm": 1.170858607019387, "learning_rate": 1.480790403182386e-08, "loss": 0.287, "step": 31264 }, { "epoch": 2.9340277777777777, "grad_norm": 1.1532883471909101, "learning_rate": 1.4765946260459351e-08, "loss": 0.2972, "step": 31265 }, { "epoch": 2.9341216216216215, "grad_norm": 1.2667763497543962, "learning_rate": 1.4724047928627027e-08, "loss": 0.3198, "step": 31266 }, { "epoch": 2.9342154654654653, "grad_norm": 1.98168564424359, "learning_rate": 1.4682209036827045e-08, "loss": 0.2812, "step": 31267 }, { "epoch": 2.9343093093093096, "grad_norm": 1.5711321014555677, "learning_rate": 1.4640429585557892e-08, "loss": 0.3333, "step": 31268 }, { "epoch": 2.934403153153153, "grad_norm": 1.285948022238532, "learning_rate": 1.459870957531695e-08, "loss": 0.2685, "step": 31269 }, { "epoch": 2.934496996996997, "grad_norm": 1.0114465639548433, "learning_rate": 1.4557049006603264e-08, "loss": 0.3514, "step": 31270 }, { "epoch": 2.934590840840841, "grad_norm": 1.0427657011220504, "learning_rate": 1.4515447879911992e-08, "loss": 0.3148, "step": 31271 }, { "epoch": 2.934684684684685, "grad_norm": 1.3652881579074165, "learning_rate": 1.4473906195739961e-08, "loss": 0.3292, "step": 31272 }, { "epoch": 2.9347785285285286, "grad_norm": 2.031349545025681, "learning_rate": 1.4432423954582331e-08, "loss": 0.311, "step": 31273 }, { "epoch": 2.9348723723723724, "grad_norm": 1.250155676232603, "learning_rate": 1.4391001156933703e-08, "loss": 0.3287, "step": 31274 }, { "epoch": 2.9349662162162162, "grad_norm": 1.0516599905627662, "learning_rate": 1.4349637803287575e-08, "loss": 0.3041, "step": 31275 }, { "epoch": 2.93506006006006, "grad_norm": 1.2339030682691428, "learning_rate": 1.4308333894137993e-08, "loss": 0.3411, "step": 31276 }, { "epoch": 2.935153903903904, "grad_norm": 1.2110774370780286, "learning_rate": 1.4267089429976789e-08, "loss": 0.3365, "step": 31277 }, { "epoch": 2.9352477477477477, "grad_norm": 1.1865549130651827, "learning_rate": 1.4225904411295789e-08, "loss": 0.2912, "step": 31278 }, { "epoch": 2.9353415915915915, "grad_norm": 1.6598805363314892, "learning_rate": 1.4184778838586267e-08, "loss": 0.3004, "step": 31279 }, { "epoch": 2.9354354354354353, "grad_norm": 1.138900561414844, "learning_rate": 1.4143712712338387e-08, "loss": 0.299, "step": 31280 }, { "epoch": 2.9355292792792795, "grad_norm": 1.0845034343843698, "learning_rate": 1.4102706033041757e-08, "loss": 0.2899, "step": 31281 }, { "epoch": 2.935623123123123, "grad_norm": 1.1950654345122533, "learning_rate": 1.4061758801185432e-08, "loss": 0.3229, "step": 31282 }, { "epoch": 2.935716966966967, "grad_norm": 0.968721501201864, "learning_rate": 1.4020871017257354e-08, "loss": 0.3174, "step": 31283 }, { "epoch": 2.935810810810811, "grad_norm": 1.0700454316040566, "learning_rate": 1.3980042681745464e-08, "loss": 0.2934, "step": 31284 }, { "epoch": 2.9359046546546548, "grad_norm": 1.1374170697224277, "learning_rate": 1.3939273795136044e-08, "loss": 0.3364, "step": 31285 }, { "epoch": 2.9359984984984986, "grad_norm": 0.9235724055035068, "learning_rate": 1.3898564357915922e-08, "loss": 0.3075, "step": 31286 }, { "epoch": 2.9360923423423424, "grad_norm": 1.1069416575780602, "learning_rate": 1.3857914370569714e-08, "loss": 0.2856, "step": 31287 }, { "epoch": 2.936186186186186, "grad_norm": 1.0232244802543273, "learning_rate": 1.3817323833582586e-08, "loss": 0.2826, "step": 31288 }, { "epoch": 2.93628003003003, "grad_norm": 1.3083506806183516, "learning_rate": 1.3776792747438595e-08, "loss": 0.3539, "step": 31289 }, { "epoch": 2.936373873873874, "grad_norm": 1.2291116985559396, "learning_rate": 1.3736321112620132e-08, "loss": 0.2789, "step": 31290 }, { "epoch": 2.9364677177177176, "grad_norm": 1.1793874987046273, "learning_rate": 1.3695908929610702e-08, "loss": 0.3258, "step": 31291 }, { "epoch": 2.9365615615615615, "grad_norm": 1.1652399792811718, "learning_rate": 1.365555619889214e-08, "loss": 0.3129, "step": 31292 }, { "epoch": 2.9366554054054053, "grad_norm": 1.1059433655048219, "learning_rate": 1.3615262920944616e-08, "loss": 0.3467, "step": 31293 }, { "epoch": 2.9367492492492495, "grad_norm": 1.181281399151853, "learning_rate": 1.3575029096249414e-08, "loss": 0.2904, "step": 31294 }, { "epoch": 2.936843093093093, "grad_norm": 1.1296924194118438, "learning_rate": 1.353485472528615e-08, "loss": 0.3307, "step": 31295 }, { "epoch": 2.936936936936937, "grad_norm": 1.0504764593672589, "learning_rate": 1.3494739808533886e-08, "loss": 0.2907, "step": 31296 }, { "epoch": 2.9370307807807805, "grad_norm": 1.3556901817385878, "learning_rate": 1.345468434647057e-08, "loss": 0.2883, "step": 31297 }, { "epoch": 2.9371246246246248, "grad_norm": 0.9716097935728725, "learning_rate": 1.34146883395736e-08, "loss": 0.2981, "step": 31298 }, { "epoch": 2.9372184684684686, "grad_norm": 0.995461672862707, "learning_rate": 1.3374751788320373e-08, "loss": 0.3123, "step": 31299 }, { "epoch": 2.9373123123123124, "grad_norm": 1.2015362568343526, "learning_rate": 1.3334874693187171e-08, "loss": 0.3381, "step": 31300 }, { "epoch": 2.937406156156156, "grad_norm": 1.5770597147509842, "learning_rate": 1.329505705464862e-08, "loss": 0.3633, "step": 31301 }, { "epoch": 2.9375, "grad_norm": 1.1018231154303153, "learning_rate": 1.3255298873180445e-08, "loss": 0.2789, "step": 31302 }, { "epoch": 2.937593843843844, "grad_norm": 1.3958517397477403, "learning_rate": 1.3215600149256158e-08, "loss": 0.3209, "step": 31303 }, { "epoch": 2.9376876876876876, "grad_norm": 1.2802583184680654, "learning_rate": 1.317596088334927e-08, "loss": 0.3335, "step": 31304 }, { "epoch": 2.9377815315315314, "grad_norm": 1.1056943738514506, "learning_rate": 1.3136381075931625e-08, "loss": 0.2696, "step": 31305 }, { "epoch": 2.9378753753753752, "grad_norm": 1.5563963937764802, "learning_rate": 1.3096860727476735e-08, "loss": 0.2808, "step": 31306 }, { "epoch": 2.9379692192192195, "grad_norm": 1.0745679265829835, "learning_rate": 1.3057399838454775e-08, "loss": 0.344, "step": 31307 }, { "epoch": 2.938063063063063, "grad_norm": 1.0228159106980221, "learning_rate": 1.301799840933593e-08, "loss": 0.337, "step": 31308 }, { "epoch": 2.938156906906907, "grad_norm": 1.1595295200576061, "learning_rate": 1.2978656440590376e-08, "loss": 0.3425, "step": 31309 }, { "epoch": 2.9382507507507505, "grad_norm": 1.0985577723831086, "learning_rate": 1.2939373932687737e-08, "loss": 0.337, "step": 31310 }, { "epoch": 2.9383445945945947, "grad_norm": 1.2443392204637622, "learning_rate": 1.290015088609542e-08, "loss": 0.3335, "step": 31311 }, { "epoch": 2.9384384384384385, "grad_norm": 1.131029757404059, "learning_rate": 1.2860987301281936e-08, "loss": 0.2921, "step": 31312 }, { "epoch": 2.9385322822822824, "grad_norm": 2.218887844764466, "learning_rate": 1.2821883178713579e-08, "loss": 0.3142, "step": 31313 }, { "epoch": 2.938626126126126, "grad_norm": 1.2124194991858772, "learning_rate": 1.27828385188572e-08, "loss": 0.3073, "step": 31314 }, { "epoch": 2.93871996996997, "grad_norm": 1.0772175478784807, "learning_rate": 1.2743853322177425e-08, "loss": 0.2851, "step": 31315 }, { "epoch": 2.938813813813814, "grad_norm": 1.133916857492134, "learning_rate": 1.2704927589140548e-08, "loss": 0.2676, "step": 31316 }, { "epoch": 2.9389076576576576, "grad_norm": 1.1276559266646726, "learning_rate": 1.2666061320208977e-08, "loss": 0.292, "step": 31317 }, { "epoch": 2.9390015015015014, "grad_norm": 1.5071255910367407, "learning_rate": 1.2627254515847342e-08, "loss": 0.2657, "step": 31318 }, { "epoch": 2.9390953453453452, "grad_norm": 1.0535600420484161, "learning_rate": 1.2588507176517495e-08, "loss": 0.3049, "step": 31319 }, { "epoch": 2.939189189189189, "grad_norm": 1.1680370438772056, "learning_rate": 1.2549819302682398e-08, "loss": 0.3117, "step": 31320 }, { "epoch": 2.939283033033033, "grad_norm": 1.3946234502490902, "learning_rate": 1.2511190894802239e-08, "loss": 0.3402, "step": 31321 }, { "epoch": 2.939376876876877, "grad_norm": 1.2339504422904266, "learning_rate": 1.247262195333887e-08, "loss": 0.314, "step": 31322 }, { "epoch": 2.9394707207207205, "grad_norm": 1.065255499342782, "learning_rate": 1.2434112478750815e-08, "loss": 0.3018, "step": 31323 }, { "epoch": 2.9395645645645647, "grad_norm": 3.7003959130299338, "learning_rate": 1.239566247149826e-08, "loss": 0.2993, "step": 31324 }, { "epoch": 2.9396584084084085, "grad_norm": 1.055324787079991, "learning_rate": 1.2357271932039172e-08, "loss": 0.2877, "step": 31325 }, { "epoch": 2.9397522522522523, "grad_norm": 1.0072322800898927, "learning_rate": 1.2318940860830963e-08, "loss": 0.3221, "step": 31326 }, { "epoch": 2.939846096096096, "grad_norm": 1.1643360620478866, "learning_rate": 1.2280669258331046e-08, "loss": 0.2962, "step": 31327 }, { "epoch": 2.93993993993994, "grad_norm": 1.0070286965235318, "learning_rate": 1.2242457124996276e-08, "loss": 0.3366, "step": 31328 }, { "epoch": 2.9400337837837838, "grad_norm": 1.1089595328519206, "learning_rate": 1.2204304461281291e-08, "loss": 0.3053, "step": 31329 }, { "epoch": 2.9401276276276276, "grad_norm": 1.169649970015513, "learning_rate": 1.216621126764128e-08, "loss": 0.3012, "step": 31330 }, { "epoch": 2.9402214714714714, "grad_norm": 1.0940854563346798, "learning_rate": 1.2128177544530884e-08, "loss": 0.3138, "step": 31331 }, { "epoch": 2.940315315315315, "grad_norm": 1.004529983979927, "learning_rate": 1.2090203292403069e-08, "loss": 0.293, "step": 31332 }, { "epoch": 2.940409159159159, "grad_norm": 1.786334295629488, "learning_rate": 1.2052288511711362e-08, "loss": 0.2977, "step": 31333 }, { "epoch": 2.940503003003003, "grad_norm": 1.0212108851600044, "learning_rate": 1.2014433202906517e-08, "loss": 0.3172, "step": 31334 }, { "epoch": 2.940596846846847, "grad_norm": 1.1755896209355712, "learning_rate": 1.1976637366441501e-08, "loss": 0.2958, "step": 31335 }, { "epoch": 2.9406906906906904, "grad_norm": 1.1346288182300186, "learning_rate": 1.1938901002765402e-08, "loss": 0.3128, "step": 31336 }, { "epoch": 2.9407845345345347, "grad_norm": 1.0730049513920266, "learning_rate": 1.1901224112328969e-08, "loss": 0.3174, "step": 31337 }, { "epoch": 2.9408783783783785, "grad_norm": 1.8488704927908741, "learning_rate": 1.1863606695581287e-08, "loss": 0.3564, "step": 31338 }, { "epoch": 2.9409722222222223, "grad_norm": 0.9825865800874252, "learning_rate": 1.1826048752970886e-08, "loss": 0.3088, "step": 31339 }, { "epoch": 2.941066066066066, "grad_norm": 1.1083543001302798, "learning_rate": 1.1788550284945744e-08, "loss": 0.2896, "step": 31340 }, { "epoch": 2.94115990990991, "grad_norm": 1.0824146388477434, "learning_rate": 1.1751111291952722e-08, "loss": 0.3242, "step": 31341 }, { "epoch": 2.9412537537537538, "grad_norm": 0.977873995409454, "learning_rate": 1.1713731774438131e-08, "loss": 0.2803, "step": 31342 }, { "epoch": 2.9413475975975976, "grad_norm": 1.422751496725781, "learning_rate": 1.167641173284828e-08, "loss": 0.313, "step": 31343 }, { "epoch": 2.9414414414414414, "grad_norm": 1.0964299603617722, "learning_rate": 1.163915116762726e-08, "loss": 0.3073, "step": 31344 }, { "epoch": 2.941535285285285, "grad_norm": 1.1099345361237551, "learning_rate": 1.1601950079219715e-08, "loss": 0.302, "step": 31345 }, { "epoch": 2.941629129129129, "grad_norm": 1.0878287717017696, "learning_rate": 1.1564808468069177e-08, "loss": 0.2584, "step": 31346 }, { "epoch": 2.941722972972973, "grad_norm": 1.0637044680033816, "learning_rate": 1.1527726334618628e-08, "loss": 0.3072, "step": 31347 }, { "epoch": 2.941816816816817, "grad_norm": 1.227183453473684, "learning_rate": 1.1490703679310489e-08, "loss": 0.2924, "step": 31348 }, { "epoch": 2.9419106606606604, "grad_norm": 1.4008876612269199, "learning_rate": 1.145374050258552e-08, "loss": 0.3144, "step": 31349 }, { "epoch": 2.9420045045045047, "grad_norm": 1.0332211582967408, "learning_rate": 1.141683680488448e-08, "loss": 0.3238, "step": 31350 }, { "epoch": 2.9420983483483485, "grad_norm": 1.1125025255668597, "learning_rate": 1.137999258664757e-08, "loss": 0.3144, "step": 31351 }, { "epoch": 2.9421921921921923, "grad_norm": 1.0951289181586623, "learning_rate": 1.1343207848314441e-08, "loss": 0.3228, "step": 31352 }, { "epoch": 2.942286036036036, "grad_norm": 1.0613443692031508, "learning_rate": 1.1306482590323076e-08, "loss": 0.3376, "step": 31353 }, { "epoch": 2.94237987987988, "grad_norm": 1.1743700475529815, "learning_rate": 1.1269816813112011e-08, "loss": 0.3441, "step": 31354 }, { "epoch": 2.9424737237237237, "grad_norm": 1.1839173029471284, "learning_rate": 1.123321051711812e-08, "loss": 0.3317, "step": 31355 }, { "epoch": 2.9425675675675675, "grad_norm": 1.0044895530711049, "learning_rate": 1.1196663702777722e-08, "loss": 0.2999, "step": 31356 }, { "epoch": 2.9426614114114114, "grad_norm": 1.3870620543833063, "learning_rate": 1.1160176370526576e-08, "loss": 0.3329, "step": 31357 }, { "epoch": 2.942755255255255, "grad_norm": 1.392166836048728, "learning_rate": 1.1123748520799893e-08, "loss": 0.3021, "step": 31358 }, { "epoch": 2.942849099099099, "grad_norm": 1.5074088585080407, "learning_rate": 1.1087380154031213e-08, "loss": 0.2957, "step": 31359 }, { "epoch": 2.942942942942943, "grad_norm": 1.1948484775377048, "learning_rate": 1.1051071270655745e-08, "loss": 0.2901, "step": 31360 }, { "epoch": 2.943036786786787, "grad_norm": 1.1940257666442864, "learning_rate": 1.1014821871105363e-08, "loss": 0.3164, "step": 31361 }, { "epoch": 2.9431306306306304, "grad_norm": 1.0615275204727748, "learning_rate": 1.0978631955812502e-08, "loss": 0.3019, "step": 31362 }, { "epoch": 2.9432244744744747, "grad_norm": 1.4517715441451227, "learning_rate": 1.0942501525207926e-08, "loss": 0.3067, "step": 31363 }, { "epoch": 2.9433183183183185, "grad_norm": 1.2113149942250623, "learning_rate": 1.090643057972407e-08, "loss": 0.2992, "step": 31364 }, { "epoch": 2.9434121621621623, "grad_norm": 1.5133954834752672, "learning_rate": 1.0870419119788922e-08, "loss": 0.3106, "step": 31365 }, { "epoch": 2.943506006006006, "grad_norm": 1.6969345484625766, "learning_rate": 1.0834467145833804e-08, "loss": 0.3067, "step": 31366 }, { "epoch": 2.94359984984985, "grad_norm": 1.6391359333046227, "learning_rate": 1.0798574658286153e-08, "loss": 0.2743, "step": 31367 }, { "epoch": 2.9436936936936937, "grad_norm": 1.0927250966564797, "learning_rate": 1.0762741657574516e-08, "loss": 0.2623, "step": 31368 }, { "epoch": 2.9437875375375375, "grad_norm": 1.1751438506528122, "learning_rate": 1.0726968144125772e-08, "loss": 0.3125, "step": 31369 }, { "epoch": 2.9438813813813813, "grad_norm": 1.6142523916661318, "learning_rate": 1.06912541183668e-08, "loss": 0.3182, "step": 31370 }, { "epoch": 2.943975225225225, "grad_norm": 1.3359818811936164, "learning_rate": 1.065559958072282e-08, "loss": 0.3305, "step": 31371 }, { "epoch": 2.944069069069069, "grad_norm": 1.0468614187017622, "learning_rate": 1.06200045316196e-08, "loss": 0.3304, "step": 31372 }, { "epoch": 2.9441629129129128, "grad_norm": 1.0818651814288078, "learning_rate": 1.0584468971481242e-08, "loss": 0.2823, "step": 31373 }, { "epoch": 2.944256756756757, "grad_norm": 1.0052793797470008, "learning_rate": 1.0548992900731302e-08, "loss": 0.3033, "step": 31374 }, { "epoch": 2.9443506006006004, "grad_norm": 1.1712937731258126, "learning_rate": 1.0513576319793328e-08, "loss": 0.2953, "step": 31375 }, { "epoch": 2.9444444444444446, "grad_norm": 1.0848872172978106, "learning_rate": 1.0478219229089204e-08, "loss": 0.2879, "step": 31376 }, { "epoch": 2.9445382882882885, "grad_norm": 1.1291830387981727, "learning_rate": 1.0442921629039703e-08, "loss": 0.2643, "step": 31377 }, { "epoch": 2.9446321321321323, "grad_norm": 1.196082063884283, "learning_rate": 1.0407683520067269e-08, "loss": 0.3181, "step": 31378 }, { "epoch": 2.944725975975976, "grad_norm": 1.5993326181627356, "learning_rate": 1.0372504902591007e-08, "loss": 0.3216, "step": 31379 }, { "epoch": 2.94481981981982, "grad_norm": 1.0261660864286808, "learning_rate": 1.0337385777030585e-08, "loss": 0.2931, "step": 31380 }, { "epoch": 2.9449136636636637, "grad_norm": 1.3010754548803363, "learning_rate": 1.0302326143805108e-08, "loss": 0.3214, "step": 31381 }, { "epoch": 2.9450075075075075, "grad_norm": 1.1826575960191745, "learning_rate": 1.0267326003332024e-08, "loss": 0.2991, "step": 31382 }, { "epoch": 2.9451013513513513, "grad_norm": 1.2543534114398736, "learning_rate": 1.0232385356028773e-08, "loss": 0.3217, "step": 31383 }, { "epoch": 2.945195195195195, "grad_norm": 1.083909770008366, "learning_rate": 1.0197504202312247e-08, "loss": 0.3106, "step": 31384 }, { "epoch": 2.945289039039039, "grad_norm": 1.0031255205689278, "learning_rate": 1.0162682542598223e-08, "loss": 0.2935, "step": 31385 }, { "epoch": 2.9453828828828827, "grad_norm": 1.0505239903482348, "learning_rate": 1.012792037730137e-08, "loss": 0.2967, "step": 31386 }, { "epoch": 2.945476726726727, "grad_norm": 0.9330509033075088, "learning_rate": 1.009321770683691e-08, "loss": 0.3272, "step": 31387 }, { "epoch": 2.9455705705705704, "grad_norm": 1.3941066699961226, "learning_rate": 1.0058574531618404e-08, "loss": 0.3397, "step": 31388 }, { "epoch": 2.9456644144144146, "grad_norm": 1.1545381634240202, "learning_rate": 1.0023990852058296e-08, "loss": 0.2959, "step": 31389 }, { "epoch": 2.945758258258258, "grad_norm": 1.1761599922755974, "learning_rate": 9.989466668570147e-09, "loss": 0.3123, "step": 31390 }, { "epoch": 2.9458521021021022, "grad_norm": 0.9608713693250136, "learning_rate": 9.955001981564183e-09, "loss": 0.2845, "step": 31391 }, { "epoch": 2.945945945945946, "grad_norm": 1.0454077487530193, "learning_rate": 9.920596791452297e-09, "loss": 0.3231, "step": 31392 }, { "epoch": 2.94603978978979, "grad_norm": 1.1301418639065313, "learning_rate": 9.886251098644716e-09, "loss": 0.3167, "step": 31393 }, { "epoch": 2.9461336336336337, "grad_norm": 1.014162805437062, "learning_rate": 9.85196490355056e-09, "loss": 0.2899, "step": 31394 }, { "epoch": 2.9462274774774775, "grad_norm": 1.0140644951937932, "learning_rate": 9.817738206578387e-09, "loss": 0.2982, "step": 31395 }, { "epoch": 2.9463213213213213, "grad_norm": 1.013048506022719, "learning_rate": 9.783571008136761e-09, "loss": 0.3262, "step": 31396 }, { "epoch": 2.946415165165165, "grad_norm": 1.04756306907326, "learning_rate": 9.749463308633134e-09, "loss": 0.331, "step": 31397 }, { "epoch": 2.946509009009009, "grad_norm": 1.2852169744646635, "learning_rate": 9.715415108473846e-09, "loss": 0.2778, "step": 31398 }, { "epoch": 2.9466028528528527, "grad_norm": 1.188514100538724, "learning_rate": 9.681426408065242e-09, "loss": 0.3115, "step": 31399 }, { "epoch": 2.9466966966966965, "grad_norm": 1.2180480566018357, "learning_rate": 9.647497207811996e-09, "loss": 0.3018, "step": 31400 }, { "epoch": 2.9467905405405403, "grad_norm": 1.1604205972696329, "learning_rate": 9.613627508118784e-09, "loss": 0.3081, "step": 31401 }, { "epoch": 2.9468843843843846, "grad_norm": 1.1754863160752935, "learning_rate": 9.579817309390282e-09, "loss": 0.3142, "step": 31402 }, { "epoch": 2.946978228228228, "grad_norm": 0.9909953000893981, "learning_rate": 9.546066612028394e-09, "loss": 0.2981, "step": 31403 }, { "epoch": 2.9470720720720722, "grad_norm": 1.53645557895885, "learning_rate": 9.512375416436127e-09, "loss": 0.3028, "step": 31404 }, { "epoch": 2.947165915915916, "grad_norm": 4.3840693289705195, "learning_rate": 9.478743723015382e-09, "loss": 0.3282, "step": 31405 }, { "epoch": 2.94725975975976, "grad_norm": 1.0904500904036785, "learning_rate": 9.445171532166953e-09, "loss": 0.2984, "step": 31406 }, { "epoch": 2.9473536036036037, "grad_norm": 1.1419103566735471, "learning_rate": 9.411658844291072e-09, "loss": 0.2864, "step": 31407 }, { "epoch": 2.9474474474474475, "grad_norm": 1.0199764682811026, "learning_rate": 9.378205659787421e-09, "loss": 0.303, "step": 31408 }, { "epoch": 2.9475412912912913, "grad_norm": 1.1210442109464287, "learning_rate": 9.34481197905457e-09, "loss": 0.3056, "step": 31409 }, { "epoch": 2.947635135135135, "grad_norm": 1.0764891099529672, "learning_rate": 9.311477802491087e-09, "loss": 0.3419, "step": 31410 }, { "epoch": 2.947728978978979, "grad_norm": 0.9040316402690004, "learning_rate": 9.27820313049388e-09, "loss": 0.3049, "step": 31411 }, { "epoch": 2.9478228228228227, "grad_norm": 1.071128720900216, "learning_rate": 9.244987963460405e-09, "loss": 0.3351, "step": 31412 }, { "epoch": 2.9479166666666665, "grad_norm": 1.2178132207420767, "learning_rate": 9.211832301785906e-09, "loss": 0.323, "step": 31413 }, { "epoch": 2.9480105105105103, "grad_norm": 1.115722230765551, "learning_rate": 9.178736145866729e-09, "loss": 0.3054, "step": 31414 }, { "epoch": 2.9481043543543546, "grad_norm": 1.1693541960335019, "learning_rate": 9.14569949609645e-09, "loss": 0.3274, "step": 31415 }, { "epoch": 2.948198198198198, "grad_norm": 1.1856311084537028, "learning_rate": 9.112722352869197e-09, "loss": 0.3267, "step": 31416 }, { "epoch": 2.948292042042042, "grad_norm": 0.9713369618768535, "learning_rate": 9.079804716578544e-09, "loss": 0.2855, "step": 31417 }, { "epoch": 2.948385885885886, "grad_norm": 1.1107517337585975, "learning_rate": 9.046946587616957e-09, "loss": 0.2886, "step": 31418 }, { "epoch": 2.94847972972973, "grad_norm": 1.157460915125747, "learning_rate": 9.014147966375786e-09, "loss": 0.3085, "step": 31419 }, { "epoch": 2.9485735735735736, "grad_norm": 1.0191625018854629, "learning_rate": 8.981408853246942e-09, "loss": 0.3206, "step": 31420 }, { "epoch": 2.9486674174174174, "grad_norm": 1.1221240627773013, "learning_rate": 8.948729248619558e-09, "loss": 0.2715, "step": 31421 }, { "epoch": 2.9487612612612613, "grad_norm": 1.1428098724095679, "learning_rate": 8.916109152883879e-09, "loss": 0.305, "step": 31422 }, { "epoch": 2.948855105105105, "grad_norm": 1.0294208177359476, "learning_rate": 8.883548566429034e-09, "loss": 0.3334, "step": 31423 }, { "epoch": 2.948948948948949, "grad_norm": 1.0479611076908433, "learning_rate": 8.85104748964305e-09, "loss": 0.3102, "step": 31424 }, { "epoch": 2.9490427927927927, "grad_norm": 1.3234817148389315, "learning_rate": 8.818605922913392e-09, "loss": 0.2945, "step": 31425 }, { "epoch": 2.9491366366366365, "grad_norm": 0.997974025806773, "learning_rate": 8.786223866626975e-09, "loss": 0.3123, "step": 31426 }, { "epoch": 2.9492304804804803, "grad_norm": 1.1864829078624703, "learning_rate": 8.753901321170154e-09, "loss": 0.3339, "step": 31427 }, { "epoch": 2.9493243243243246, "grad_norm": 1.1553259775988096, "learning_rate": 8.721638286927626e-09, "loss": 0.2891, "step": 31428 }, { "epoch": 2.949418168168168, "grad_norm": 1.0941590366452336, "learning_rate": 8.689434764284633e-09, "loss": 0.2926, "step": 31429 }, { "epoch": 2.949512012012012, "grad_norm": 1.1377949605265412, "learning_rate": 8.657290753624759e-09, "loss": 0.2843, "step": 31430 }, { "epoch": 2.949605855855856, "grad_norm": 1.0763696120943285, "learning_rate": 8.625206255331032e-09, "loss": 0.2958, "step": 31431 }, { "epoch": 2.9496996996997, "grad_norm": 1.1295080987179085, "learning_rate": 8.593181269787032e-09, "loss": 0.3067, "step": 31432 }, { "epoch": 2.9497935435435436, "grad_norm": 1.210734511796479, "learning_rate": 8.561215797374122e-09, "loss": 0.3336, "step": 31433 }, { "epoch": 2.9498873873873874, "grad_norm": 1.1815888811649697, "learning_rate": 8.52930983847311e-09, "loss": 0.297, "step": 31434 }, { "epoch": 2.9499812312312312, "grad_norm": 1.0870724021028442, "learning_rate": 8.497463393464245e-09, "loss": 0.359, "step": 31435 }, { "epoch": 2.950075075075075, "grad_norm": 1.103076376764097, "learning_rate": 8.465676462728334e-09, "loss": 0.3217, "step": 31436 }, { "epoch": 2.950168918918919, "grad_norm": 1.1204308152674727, "learning_rate": 8.433949046642853e-09, "loss": 0.3067, "step": 31437 }, { "epoch": 2.9502627627627627, "grad_norm": 1.1101952595501698, "learning_rate": 8.402281145587499e-09, "loss": 0.3063, "step": 31438 }, { "epoch": 2.9503566066066065, "grad_norm": 0.9342967239199205, "learning_rate": 8.370672759938636e-09, "loss": 0.359, "step": 31439 }, { "epoch": 2.9504504504504503, "grad_norm": 1.0033138174324139, "learning_rate": 8.339123890074297e-09, "loss": 0.2977, "step": 31440 }, { "epoch": 2.9505442942942945, "grad_norm": 1.6071210077493328, "learning_rate": 8.307634536369735e-09, "loss": 0.3181, "step": 31441 }, { "epoch": 2.950638138138138, "grad_norm": 1.123377257798355, "learning_rate": 8.276204699200763e-09, "loss": 0.2992, "step": 31442 }, { "epoch": 2.950731981981982, "grad_norm": 1.0483947021848588, "learning_rate": 8.24483437894208e-09, "loss": 0.2879, "step": 31443 }, { "epoch": 2.950825825825826, "grad_norm": 1.0314547083362091, "learning_rate": 8.21352357596783e-09, "loss": 0.2924, "step": 31444 }, { "epoch": 2.95091966966967, "grad_norm": 1.1630119386986129, "learning_rate": 8.18227229065105e-09, "loss": 0.3622, "step": 31445 }, { "epoch": 2.9510135135135136, "grad_norm": 1.019108507635148, "learning_rate": 8.151080523364774e-09, "loss": 0.3171, "step": 31446 }, { "epoch": 2.9511073573573574, "grad_norm": 1.1148260472739058, "learning_rate": 8.119948274480372e-09, "loss": 0.2978, "step": 31447 }, { "epoch": 2.951201201201201, "grad_norm": 1.1856466234153584, "learning_rate": 8.088875544369767e-09, "loss": 0.2905, "step": 31448 }, { "epoch": 2.951295045045045, "grad_norm": 1.03181105566383, "learning_rate": 8.057862333402666e-09, "loss": 0.3364, "step": 31449 }, { "epoch": 2.951388888888889, "grad_norm": 1.0959332802112596, "learning_rate": 8.026908641949327e-09, "loss": 0.2952, "step": 31450 }, { "epoch": 2.9514827327327327, "grad_norm": 1.500918856070315, "learning_rate": 7.9960144703789e-09, "loss": 0.3188, "step": 31451 }, { "epoch": 2.9515765765765765, "grad_norm": 1.6093069741913426, "learning_rate": 7.965179819058867e-09, "loss": 0.2906, "step": 31452 }, { "epoch": 2.9516704204204203, "grad_norm": 1.1074482836489048, "learning_rate": 7.934404688358378e-09, "loss": 0.3294, "step": 31453 }, { "epoch": 2.9517642642642645, "grad_norm": 1.1106137450963456, "learning_rate": 7.903689078643251e-09, "loss": 0.3066, "step": 31454 }, { "epoch": 2.951858108108108, "grad_norm": 1.174353680202359, "learning_rate": 7.873032990279305e-09, "loss": 0.3179, "step": 31455 }, { "epoch": 2.951951951951952, "grad_norm": 1.0693611071472766, "learning_rate": 7.84243642363347e-09, "loss": 0.2855, "step": 31456 }, { "epoch": 2.952045795795796, "grad_norm": 1.0960436428061968, "learning_rate": 7.811899379069343e-09, "loss": 0.3134, "step": 31457 }, { "epoch": 2.9521396396396398, "grad_norm": 1.0111321881118578, "learning_rate": 7.781421856952187e-09, "loss": 0.3177, "step": 31458 }, { "epoch": 2.9522334834834836, "grad_norm": 1.1813401525923106, "learning_rate": 7.751003857643934e-09, "loss": 0.2746, "step": 31459 }, { "epoch": 2.9523273273273274, "grad_norm": 1.4513494409616519, "learning_rate": 7.72064538150874e-09, "loss": 0.2914, "step": 31460 }, { "epoch": 2.952421171171171, "grad_norm": 1.0121072213473954, "learning_rate": 7.69034642890687e-09, "loss": 0.3213, "step": 31461 }, { "epoch": 2.952515015015015, "grad_norm": 1.1243126238512318, "learning_rate": 7.660107000201367e-09, "loss": 0.3358, "step": 31462 }, { "epoch": 2.952608858858859, "grad_norm": 1.2466813882478567, "learning_rate": 7.629927095751944e-09, "loss": 0.3195, "step": 31463 }, { "epoch": 2.9527027027027026, "grad_norm": 1.5022983669647823, "learning_rate": 7.599806715917757e-09, "loss": 0.3026, "step": 31464 }, { "epoch": 2.9527965465465464, "grad_norm": 1.0487053139929534, "learning_rate": 7.56974586105963e-09, "loss": 0.3421, "step": 31465 }, { "epoch": 2.9528903903903903, "grad_norm": 1.1010864863568863, "learning_rate": 7.539744531534499e-09, "loss": 0.2802, "step": 31466 }, { "epoch": 2.9529842342342345, "grad_norm": 1.3723293603388034, "learning_rate": 7.50980272770041e-09, "loss": 0.2947, "step": 31467 }, { "epoch": 2.953078078078078, "grad_norm": 2.2680725402699946, "learning_rate": 7.479920449914857e-09, "loss": 0.355, "step": 31468 }, { "epoch": 2.953171921921922, "grad_norm": 1.1892382248432245, "learning_rate": 7.450097698533665e-09, "loss": 0.3296, "step": 31469 }, { "epoch": 2.9532657657657655, "grad_norm": 1.4766402143851498, "learning_rate": 7.420334473912105e-09, "loss": 0.2999, "step": 31470 }, { "epoch": 2.9533596096096097, "grad_norm": 1.0693007972932602, "learning_rate": 7.390630776406005e-09, "loss": 0.2716, "step": 31471 }, { "epoch": 2.9534534534534536, "grad_norm": 1.0741993482571714, "learning_rate": 7.360986606368969e-09, "loss": 0.308, "step": 31472 }, { "epoch": 2.9535472972972974, "grad_norm": 1.2650882403495067, "learning_rate": 7.331401964154605e-09, "loss": 0.3151, "step": 31473 }, { "epoch": 2.953641141141141, "grad_norm": 1.8165200868853828, "learning_rate": 7.301876850115408e-09, "loss": 0.3488, "step": 31474 }, { "epoch": 2.953734984984985, "grad_norm": 1.0990980404594688, "learning_rate": 7.272411264603319e-09, "loss": 0.2896, "step": 31475 }, { "epoch": 2.953828828828829, "grad_norm": 1.2122095552374361, "learning_rate": 7.243005207970277e-09, "loss": 0.3006, "step": 31476 }, { "epoch": 2.9539226726726726, "grad_norm": 1.341053651189254, "learning_rate": 7.213658680566005e-09, "loss": 0.2884, "step": 31477 }, { "epoch": 2.9540165165165164, "grad_norm": 1.1140376449180405, "learning_rate": 7.184371682741331e-09, "loss": 0.3146, "step": 31478 }, { "epoch": 2.9541103603603602, "grad_norm": 1.2133593417077784, "learning_rate": 7.155144214844867e-09, "loss": 0.3026, "step": 31479 }, { "epoch": 2.954204204204204, "grad_norm": 1.2777747432619582, "learning_rate": 7.125976277225222e-09, "loss": 0.2841, "step": 31480 }, { "epoch": 2.954298048048048, "grad_norm": 1.171766789395674, "learning_rate": 7.09686787023045e-09, "loss": 0.3362, "step": 31481 }, { "epoch": 2.954391891891892, "grad_norm": 1.0908592000338095, "learning_rate": 7.067818994206943e-09, "loss": 0.3101, "step": 31482 }, { "epoch": 2.9544857357357355, "grad_norm": 1.0594806637227852, "learning_rate": 7.038829649501644e-09, "loss": 0.3181, "step": 31483 }, { "epoch": 2.9545795795795797, "grad_norm": 1.1144857580677552, "learning_rate": 7.0098998364598325e-09, "loss": 0.3149, "step": 31484 }, { "epoch": 2.9546734234234235, "grad_norm": 1.0778645498553088, "learning_rate": 6.981029555426788e-09, "loss": 0.2656, "step": 31485 }, { "epoch": 2.9547672672672673, "grad_norm": 0.9811258702386502, "learning_rate": 6.95221880674668e-09, "loss": 0.3051, "step": 31486 }, { "epoch": 2.954861111111111, "grad_norm": 1.2889392567454316, "learning_rate": 6.923467590763122e-09, "loss": 0.3217, "step": 31487 }, { "epoch": 2.954954954954955, "grad_norm": 1.1890681317549292, "learning_rate": 6.894775907818063e-09, "loss": 0.3128, "step": 31488 }, { "epoch": 2.955048798798799, "grad_norm": 1.1615837408658884, "learning_rate": 6.866143758255117e-09, "loss": 0.2902, "step": 31489 }, { "epoch": 2.9551426426426426, "grad_norm": 1.4153227314099681, "learning_rate": 6.837571142414012e-09, "loss": 0.2842, "step": 31490 }, { "epoch": 2.9552364864864864, "grad_norm": 1.108795891841318, "learning_rate": 6.8090580606366976e-09, "loss": 0.2946, "step": 31491 }, { "epoch": 2.95533033033033, "grad_norm": 1.1808208715370245, "learning_rate": 6.780604513262346e-09, "loss": 0.2948, "step": 31492 }, { "epoch": 2.955424174174174, "grad_norm": 1.054437219648898, "learning_rate": 6.752210500630685e-09, "loss": 0.3345, "step": 31493 }, { "epoch": 2.955518018018018, "grad_norm": 1.1725846375508504, "learning_rate": 6.723876023079778e-09, "loss": 0.3563, "step": 31494 }, { "epoch": 2.955611861861862, "grad_norm": 0.9610982959825192, "learning_rate": 6.695601080948244e-09, "loss": 0.2917, "step": 31495 }, { "epoch": 2.9557057057057055, "grad_norm": 1.17338205934894, "learning_rate": 6.6673856745724795e-09, "loss": 0.2763, "step": 31496 }, { "epoch": 2.9557995495495497, "grad_norm": 1.120654779585455, "learning_rate": 6.639229804288882e-09, "loss": 0.3375, "step": 31497 }, { "epoch": 2.9558933933933935, "grad_norm": 1.2782432490079407, "learning_rate": 6.61113347043385e-09, "loss": 0.3442, "step": 31498 }, { "epoch": 2.9559872372372373, "grad_norm": 0.9852185522665515, "learning_rate": 6.583096673342116e-09, "loss": 0.2947, "step": 31499 }, { "epoch": 2.956081081081081, "grad_norm": 1.021387492220985, "learning_rate": 6.555119413347299e-09, "loss": 0.3285, "step": 31500 }, { "epoch": 2.956174924924925, "grad_norm": 1.0773684426265924, "learning_rate": 6.527201690784135e-09, "loss": 0.3085, "step": 31501 }, { "epoch": 2.9562687687687688, "grad_norm": 1.2539959912221335, "learning_rate": 6.499343505984023e-09, "loss": 0.3009, "step": 31502 }, { "epoch": 2.9563626126126126, "grad_norm": 1.4423452580768277, "learning_rate": 6.471544859280587e-09, "loss": 0.3069, "step": 31503 }, { "epoch": 2.9564564564564564, "grad_norm": 1.2272595603155063, "learning_rate": 6.443805751004118e-09, "loss": 0.2589, "step": 31504 }, { "epoch": 2.9565503003003, "grad_norm": 1.0903387491408614, "learning_rate": 6.41612618148657e-09, "loss": 0.3598, "step": 31505 }, { "epoch": 2.956644144144144, "grad_norm": 1.1164350115206998, "learning_rate": 6.388506151056573e-09, "loss": 0.3212, "step": 31506 }, { "epoch": 2.956737987987988, "grad_norm": 1.0881612849444102, "learning_rate": 6.360945660044415e-09, "loss": 0.3056, "step": 31507 }, { "epoch": 2.956831831831832, "grad_norm": 1.1105802820614121, "learning_rate": 6.3334447087787246e-09, "loss": 0.277, "step": 31508 }, { "epoch": 2.9569256756756754, "grad_norm": 1.159886251828491, "learning_rate": 6.3060032975864604e-09, "loss": 0.3407, "step": 31509 }, { "epoch": 2.9570195195195197, "grad_norm": 1.074706877441525, "learning_rate": 6.27862142679625e-09, "loss": 0.2915, "step": 31510 }, { "epoch": 2.9571133633633635, "grad_norm": 1.227389181526736, "learning_rate": 6.251299096732832e-09, "loss": 0.305, "step": 31511 }, { "epoch": 2.9572072072072073, "grad_norm": 1.131765651845117, "learning_rate": 6.2240363077237245e-09, "loss": 0.3135, "step": 31512 }, { "epoch": 2.957301051051051, "grad_norm": 1.1127987832579849, "learning_rate": 6.196833060092555e-09, "loss": 0.3047, "step": 31513 }, { "epoch": 2.957394894894895, "grad_norm": 1.4232730417580381, "learning_rate": 6.1696893541646205e-09, "loss": 0.268, "step": 31514 }, { "epoch": 2.9574887387387387, "grad_norm": 1.1817532227768275, "learning_rate": 6.1426051902629955e-09, "loss": 0.3297, "step": 31515 }, { "epoch": 2.9575825825825826, "grad_norm": 1.1058058262568706, "learning_rate": 6.1155805687113105e-09, "loss": 0.3285, "step": 31516 }, { "epoch": 2.9576764264264264, "grad_norm": 1.1382655203642191, "learning_rate": 6.088615489830974e-09, "loss": 0.272, "step": 31517 }, { "epoch": 2.95777027027027, "grad_norm": 1.2415936281208806, "learning_rate": 6.061709953943395e-09, "loss": 0.3133, "step": 31518 }, { "epoch": 2.957864114114114, "grad_norm": 1.0856937436650478, "learning_rate": 6.034863961370541e-09, "loss": 0.3331, "step": 31519 }, { "epoch": 2.957957957957958, "grad_norm": 1.245753084007402, "learning_rate": 6.008077512431598e-09, "loss": 0.2876, "step": 31520 }, { "epoch": 2.958051801801802, "grad_norm": 0.9898478103021716, "learning_rate": 5.981350607445757e-09, "loss": 0.2932, "step": 31521 }, { "epoch": 2.9581456456456454, "grad_norm": 1.4296595836500763, "learning_rate": 5.954683246732207e-09, "loss": 0.3126, "step": 31522 }, { "epoch": 2.9582394894894897, "grad_norm": 1.1414465911909035, "learning_rate": 5.928075430609026e-09, "loss": 0.3349, "step": 31523 }, { "epoch": 2.9583333333333335, "grad_norm": 1.103826882575128, "learning_rate": 5.901527159392628e-09, "loss": 0.2996, "step": 31524 }, { "epoch": 2.9584271771771773, "grad_norm": 0.9593318548764589, "learning_rate": 5.875038433400537e-09, "loss": 0.3259, "step": 31525 }, { "epoch": 2.958521021021021, "grad_norm": 1.1235817560221375, "learning_rate": 5.848609252948056e-09, "loss": 0.3578, "step": 31526 }, { "epoch": 2.958614864864865, "grad_norm": 1.3560000018719962, "learning_rate": 5.822239618349934e-09, "loss": 0.3691, "step": 31527 }, { "epoch": 2.9587087087087087, "grad_norm": 1.3080849412207507, "learning_rate": 5.795929529921474e-09, "loss": 0.3747, "step": 31528 }, { "epoch": 2.9588025525525525, "grad_norm": 1.2457274126925286, "learning_rate": 5.769678987975758e-09, "loss": 0.2961, "step": 31529 }, { "epoch": 2.9588963963963963, "grad_norm": 1.0619568123000704, "learning_rate": 5.743487992826424e-09, "loss": 0.3136, "step": 31530 }, { "epoch": 2.95899024024024, "grad_norm": 1.1346561248312865, "learning_rate": 5.717356544784891e-09, "loss": 0.3359, "step": 31531 }, { "epoch": 2.959084084084084, "grad_norm": 1.192790493709439, "learning_rate": 5.691284644163131e-09, "loss": 0.3061, "step": 31532 }, { "epoch": 2.9591779279279278, "grad_norm": 1.4283672770099785, "learning_rate": 5.665272291272006e-09, "loss": 0.3304, "step": 31533 }, { "epoch": 2.959271771771772, "grad_norm": 1.6261259234677556, "learning_rate": 5.639319486421269e-09, "loss": 0.2911, "step": 31534 }, { "epoch": 2.9593656156156154, "grad_norm": 1.153224967112515, "learning_rate": 5.613426229921226e-09, "loss": 0.3764, "step": 31535 }, { "epoch": 2.9594594594594597, "grad_norm": 1.2429654801844872, "learning_rate": 5.587592522079965e-09, "loss": 0.3049, "step": 31536 }, { "epoch": 2.9595533033033035, "grad_norm": 1.2257032821073697, "learning_rate": 5.561818363205573e-09, "loss": 0.2883, "step": 31537 }, { "epoch": 2.9596471471471473, "grad_norm": 1.2152396908535095, "learning_rate": 5.536103753605582e-09, "loss": 0.3428, "step": 31538 }, { "epoch": 2.959740990990991, "grad_norm": 1.0109609737102319, "learning_rate": 5.510448693586412e-09, "loss": 0.3257, "step": 31539 }, { "epoch": 2.959834834834835, "grad_norm": 1.2596808033819527, "learning_rate": 5.4848531834539305e-09, "loss": 0.2776, "step": 31540 }, { "epoch": 2.9599286786786787, "grad_norm": 1.1251970897363361, "learning_rate": 5.45931722351345e-09, "loss": 0.2873, "step": 31541 }, { "epoch": 2.9600225225225225, "grad_norm": 1.0579948836969826, "learning_rate": 5.433840814069169e-09, "loss": 0.334, "step": 31542 }, { "epoch": 2.9601163663663663, "grad_norm": 1.0758636530869488, "learning_rate": 5.4084239554252905e-09, "loss": 0.2782, "step": 31543 }, { "epoch": 2.96021021021021, "grad_norm": 1.2457528603604286, "learning_rate": 5.38306664788435e-09, "loss": 0.2541, "step": 31544 }, { "epoch": 2.960304054054054, "grad_norm": 1.092740008197271, "learning_rate": 5.357768891748882e-09, "loss": 0.3482, "step": 31545 }, { "epoch": 2.9603978978978978, "grad_norm": 1.0958990610748527, "learning_rate": 5.3325306873208695e-09, "loss": 0.2626, "step": 31546 }, { "epoch": 2.960491741741742, "grad_norm": 1.0026137891261118, "learning_rate": 5.307352034900626e-09, "loss": 0.303, "step": 31547 }, { "epoch": 2.9605855855855854, "grad_norm": 1.066471263109919, "learning_rate": 5.2822329347890224e-09, "loss": 0.2953, "step": 31548 }, { "epoch": 2.9606794294294296, "grad_norm": 1.1504417571944148, "learning_rate": 5.257173387284709e-09, "loss": 0.2871, "step": 31549 }, { "epoch": 2.960773273273273, "grad_norm": 1.2403052717281438, "learning_rate": 5.2321733926874454e-09, "loss": 0.311, "step": 31550 }, { "epoch": 2.9608671171171173, "grad_norm": 1.1855431416472995, "learning_rate": 5.2072329512947715e-09, "loss": 0.3045, "step": 31551 }, { "epoch": 2.960960960960961, "grad_norm": 1.1015146529062692, "learning_rate": 5.182352063404228e-09, "loss": 0.3231, "step": 31552 }, { "epoch": 2.961054804804805, "grad_norm": 1.2459431157330605, "learning_rate": 5.157530729311688e-09, "loss": 0.2789, "step": 31553 }, { "epoch": 2.9611486486486487, "grad_norm": 1.2270785561750355, "learning_rate": 5.132768949314138e-09, "loss": 0.282, "step": 31554 }, { "epoch": 2.9612424924924925, "grad_norm": 1.2962606453313346, "learning_rate": 5.1080667237068945e-09, "loss": 0.3183, "step": 31555 }, { "epoch": 2.9613363363363363, "grad_norm": 1.3523542886878774, "learning_rate": 5.083424052783614e-09, "loss": 0.2608, "step": 31556 }, { "epoch": 2.96143018018018, "grad_norm": 1.3213234548790125, "learning_rate": 5.058840936838505e-09, "loss": 0.3162, "step": 31557 }, { "epoch": 2.961524024024024, "grad_norm": 1.1124394783483795, "learning_rate": 5.034317376164666e-09, "loss": 0.3356, "step": 31558 }, { "epoch": 2.9616178678678677, "grad_norm": 1.0970521603060701, "learning_rate": 5.00985337105464e-09, "loss": 0.354, "step": 31559 }, { "epoch": 2.9617117117117115, "grad_norm": 1.0577018269263065, "learning_rate": 4.9854489218004175e-09, "loss": 0.2888, "step": 31560 }, { "epoch": 2.9618055555555554, "grad_norm": 1.120499816244923, "learning_rate": 4.961104028691765e-09, "loss": 0.3089, "step": 31561 }, { "epoch": 2.9618993993993996, "grad_norm": 1.1302541526919707, "learning_rate": 4.936818692020673e-09, "loss": 0.2971, "step": 31562 }, { "epoch": 2.961993243243243, "grad_norm": 1.252548699452893, "learning_rate": 4.912592912075242e-09, "loss": 0.2773, "step": 31563 }, { "epoch": 2.9620870870870872, "grad_norm": 1.213403464604703, "learning_rate": 4.888426689144687e-09, "loss": 0.3178, "step": 31564 }, { "epoch": 2.962180930930931, "grad_norm": 1.4578071567744686, "learning_rate": 4.8643200235176655e-09, "loss": 0.3059, "step": 31565 }, { "epoch": 2.962274774774775, "grad_norm": 1.100441924399145, "learning_rate": 4.840272915480615e-09, "loss": 0.3096, "step": 31566 }, { "epoch": 2.9623686186186187, "grad_norm": 1.1385972117298921, "learning_rate": 4.816285365321638e-09, "loss": 0.2604, "step": 31567 }, { "epoch": 2.9624624624624625, "grad_norm": 1.040107671896608, "learning_rate": 4.792357373325507e-09, "loss": 0.3413, "step": 31568 }, { "epoch": 2.9625563063063063, "grad_norm": 1.0207657760152007, "learning_rate": 4.7684889397786595e-09, "loss": 0.2622, "step": 31569 }, { "epoch": 2.96265015015015, "grad_norm": 1.1894702525544363, "learning_rate": 4.744680064964202e-09, "loss": 0.3004, "step": 31570 }, { "epoch": 2.962743993993994, "grad_norm": 1.304546128967119, "learning_rate": 4.7209307491674625e-09, "loss": 0.3012, "step": 31571 }, { "epoch": 2.9628378378378377, "grad_norm": 1.2455480920692832, "learning_rate": 4.6972409926709925e-09, "loss": 0.3272, "step": 31572 }, { "epoch": 2.9629316816816815, "grad_norm": 1.2532824121688093, "learning_rate": 4.673610795756789e-09, "loss": 0.3028, "step": 31573 }, { "epoch": 2.9630255255255253, "grad_norm": 1.0519187033823425, "learning_rate": 4.650040158707403e-09, "loss": 0.3034, "step": 31574 }, { "epoch": 2.9631193693693696, "grad_norm": 1.053788678985139, "learning_rate": 4.626529081803166e-09, "loss": 0.279, "step": 31575 }, { "epoch": 2.963213213213213, "grad_norm": 0.9970463259518777, "learning_rate": 4.603077565324965e-09, "loss": 0.3189, "step": 31576 }, { "epoch": 2.963307057057057, "grad_norm": 1.0589671819816633, "learning_rate": 4.5796856095520205e-09, "loss": 0.2966, "step": 31577 }, { "epoch": 2.963400900900901, "grad_norm": 1.6979568164307552, "learning_rate": 4.556353214763554e-09, "loss": 0.291, "step": 31578 }, { "epoch": 2.963494744744745, "grad_norm": 1.4029312581372548, "learning_rate": 4.533080381237676e-09, "loss": 0.2978, "step": 31579 }, { "epoch": 2.9635885885885886, "grad_norm": 1.4290698435294569, "learning_rate": 4.509867109251387e-09, "loss": 0.3385, "step": 31580 }, { "epoch": 2.9636824324324325, "grad_norm": 0.9965928706606039, "learning_rate": 4.4867133990827985e-09, "loss": 0.3325, "step": 31581 }, { "epoch": 2.9637762762762763, "grad_norm": 1.1275015956229741, "learning_rate": 4.463619251006135e-09, "loss": 0.32, "step": 31582 }, { "epoch": 2.96387012012012, "grad_norm": 1.1572734714555968, "learning_rate": 4.4405846652983975e-09, "loss": 0.3271, "step": 31583 }, { "epoch": 2.963963963963964, "grad_norm": 1.1785015695506258, "learning_rate": 4.417609642232701e-09, "loss": 0.3183, "step": 31584 }, { "epoch": 2.9640578078078077, "grad_norm": 1.1234866823337755, "learning_rate": 4.3946941820843804e-09, "loss": 0.2767, "step": 31585 }, { "epoch": 2.9641516516516515, "grad_norm": 1.294160726597435, "learning_rate": 4.3718382851259955e-09, "loss": 0.3103, "step": 31586 }, { "epoch": 2.9642454954954953, "grad_norm": 1.0484796669668104, "learning_rate": 4.3490419516301065e-09, "loss": 0.3322, "step": 31587 }, { "epoch": 2.9643393393393396, "grad_norm": 1.4887204422094038, "learning_rate": 4.326305181868718e-09, "loss": 0.3113, "step": 31588 }, { "epoch": 2.964433183183183, "grad_norm": 1.2949776637405443, "learning_rate": 4.3036279761127235e-09, "loss": 0.3917, "step": 31589 }, { "epoch": 2.964527027027027, "grad_norm": 0.9683753785861815, "learning_rate": 4.281010334631908e-09, "loss": 0.344, "step": 31590 }, { "epoch": 2.964620870870871, "grad_norm": 1.207766796178404, "learning_rate": 4.258452257697166e-09, "loss": 0.3344, "step": 31591 }, { "epoch": 2.964714714714715, "grad_norm": 1.2655094158316398, "learning_rate": 4.235953745576615e-09, "loss": 0.2855, "step": 31592 }, { "epoch": 2.9648085585585586, "grad_norm": 1.3261191708162572, "learning_rate": 4.213514798538932e-09, "loss": 0.2801, "step": 31593 }, { "epoch": 2.9649024024024024, "grad_norm": 1.1264651540118185, "learning_rate": 4.191135416851677e-09, "loss": 0.294, "step": 31594 }, { "epoch": 2.9649962462462462, "grad_norm": 1.2039422809173743, "learning_rate": 4.168815600781306e-09, "loss": 0.3149, "step": 31595 }, { "epoch": 2.96509009009009, "grad_norm": 1.0503086711559437, "learning_rate": 4.1465553505942726e-09, "loss": 0.2959, "step": 31596 }, { "epoch": 2.965183933933934, "grad_norm": 0.9742751794662282, "learning_rate": 4.124354666555363e-09, "loss": 0.3005, "step": 31597 }, { "epoch": 2.9652777777777777, "grad_norm": 1.2245344360064934, "learning_rate": 4.102213548929923e-09, "loss": 0.2847, "step": 31598 }, { "epoch": 2.9653716216216215, "grad_norm": 1.3035762732112879, "learning_rate": 4.080131997982184e-09, "loss": 0.2777, "step": 31599 }, { "epoch": 2.9654654654654653, "grad_norm": 1.199044538257873, "learning_rate": 4.0581100139747145e-09, "loss": 0.2962, "step": 31600 }, { "epoch": 2.9655593093093096, "grad_norm": 1.0558580799133646, "learning_rate": 4.0361475971700816e-09, "loss": 0.2977, "step": 31601 }, { "epoch": 2.965653153153153, "grad_norm": 1.5907633730803499, "learning_rate": 4.0142447478308535e-09, "loss": 0.3012, "step": 31602 }, { "epoch": 2.965746996996997, "grad_norm": 1.1077044731305863, "learning_rate": 3.992401466217932e-09, "loss": 0.3091, "step": 31603 }, { "epoch": 2.965840840840841, "grad_norm": 1.339320949526338, "learning_rate": 3.97061775259111e-09, "loss": 0.274, "step": 31604 }, { "epoch": 2.965934684684685, "grad_norm": 1.307882602581833, "learning_rate": 3.9488936072107356e-09, "loss": 0.293, "step": 31605 }, { "epoch": 2.9660285285285286, "grad_norm": 1.26152933690955, "learning_rate": 3.927229030336044e-09, "loss": 0.3483, "step": 31606 }, { "epoch": 2.9661223723723724, "grad_norm": 1.467927877129715, "learning_rate": 3.905624022224608e-09, "loss": 0.3161, "step": 31607 }, { "epoch": 2.9662162162162162, "grad_norm": 1.1481610376634948, "learning_rate": 3.884078583134554e-09, "loss": 0.3267, "step": 31608 }, { "epoch": 2.96631006006006, "grad_norm": 1.2219711912456401, "learning_rate": 3.862592713322899e-09, "loss": 0.3104, "step": 31609 }, { "epoch": 2.966403903903904, "grad_norm": 1.156603978623912, "learning_rate": 3.841166413045549e-09, "loss": 0.3118, "step": 31610 }, { "epoch": 2.9664977477477477, "grad_norm": 1.4481908601945397, "learning_rate": 3.8197996825578565e-09, "loss": 0.3547, "step": 31611 }, { "epoch": 2.9665915915915915, "grad_norm": 1.6249541931195859, "learning_rate": 3.798492522114616e-09, "loss": 0.305, "step": 31612 }, { "epoch": 2.9666854354354353, "grad_norm": 1.24473257237735, "learning_rate": 3.77724493197007e-09, "loss": 0.2768, "step": 31613 }, { "epoch": 2.9667792792792795, "grad_norm": 1.2163477157906206, "learning_rate": 3.756056912377348e-09, "loss": 0.3258, "step": 31614 }, { "epoch": 2.966873123123123, "grad_norm": 2.2811304835893673, "learning_rate": 3.734928463589027e-09, "loss": 0.299, "step": 31615 }, { "epoch": 2.966966966966967, "grad_norm": 1.1234694998835988, "learning_rate": 3.713859585857682e-09, "loss": 0.3145, "step": 31616 }, { "epoch": 2.967060810810811, "grad_norm": 1.1776182795406849, "learning_rate": 3.6928502794336684e-09, "loss": 0.2884, "step": 31617 }, { "epoch": 2.9671546546546548, "grad_norm": 1.1760511058839103, "learning_rate": 3.671900544567897e-09, "loss": 0.3394, "step": 31618 }, { "epoch": 2.9672484984984986, "grad_norm": 1.0851399476228096, "learning_rate": 3.651010381509612e-09, "loss": 0.3195, "step": 31619 }, { "epoch": 2.9673423423423424, "grad_norm": 1.108670012907026, "learning_rate": 3.6301797905091694e-09, "loss": 0.3449, "step": 31620 }, { "epoch": 2.967436186186186, "grad_norm": 1.1272754333079043, "learning_rate": 3.609408771813594e-09, "loss": 0.294, "step": 31621 }, { "epoch": 2.96753003003003, "grad_norm": 1.0780038297017769, "learning_rate": 3.5886973256715753e-09, "loss": 0.3081, "step": 31622 }, { "epoch": 2.967623873873874, "grad_norm": 1.2053284431653852, "learning_rate": 3.5680454523290276e-09, "loss": 0.2877, "step": 31623 }, { "epoch": 2.9677177177177176, "grad_norm": 1.204738682159073, "learning_rate": 3.547453152032976e-09, "loss": 0.3094, "step": 31624 }, { "epoch": 2.9678115615615615, "grad_norm": 1.0382568098053482, "learning_rate": 3.526920425028779e-09, "loss": 0.2551, "step": 31625 }, { "epoch": 2.9679054054054053, "grad_norm": 1.1097775760971398, "learning_rate": 3.506447271561242e-09, "loss": 0.3348, "step": 31626 }, { "epoch": 2.9679992492492495, "grad_norm": 1.1003188877383911, "learning_rate": 3.486033691874058e-09, "loss": 0.2756, "step": 31627 }, { "epoch": 2.968093093093093, "grad_norm": 1.1781990626336647, "learning_rate": 3.465679686211476e-09, "loss": 0.3229, "step": 31628 }, { "epoch": 2.968186936936937, "grad_norm": 2.4264491072485477, "learning_rate": 3.4453852548149703e-09, "loss": 0.3049, "step": 31629 }, { "epoch": 2.9682807807807805, "grad_norm": 1.9452843002567428, "learning_rate": 3.425150397927679e-09, "loss": 0.3216, "step": 31630 }, { "epoch": 2.9683746246246248, "grad_norm": 1.2167851695263892, "learning_rate": 3.4049751157899656e-09, "loss": 0.3757, "step": 31631 }, { "epoch": 2.9684684684684686, "grad_norm": 1.049554716840764, "learning_rate": 3.384859408643304e-09, "loss": 0.3198, "step": 31632 }, { "epoch": 2.9685623123123124, "grad_norm": 1.1758661491202052, "learning_rate": 3.3648032767269467e-09, "loss": 0.2826, "step": 31633 }, { "epoch": 2.968656156156156, "grad_norm": 1.178105982920465, "learning_rate": 3.344806720279592e-09, "loss": 0.3014, "step": 31634 }, { "epoch": 2.96875, "grad_norm": 1.1211268100434668, "learning_rate": 3.324869739540493e-09, "loss": 0.3386, "step": 31635 }, { "epoch": 2.968843843843844, "grad_norm": 1.5589334880770458, "learning_rate": 3.3049923347472367e-09, "loss": 0.3263, "step": 31636 }, { "epoch": 2.9689376876876876, "grad_norm": 2.151525138447519, "learning_rate": 3.2851745061368566e-09, "loss": 0.2975, "step": 31637 }, { "epoch": 2.9690315315315314, "grad_norm": 1.1955425141652445, "learning_rate": 3.26541625394472e-09, "loss": 0.2884, "step": 31638 }, { "epoch": 2.9691253753753752, "grad_norm": 1.6295423249264134, "learning_rate": 3.2457175784073037e-09, "loss": 0.2822, "step": 31639 }, { "epoch": 2.9692192192192195, "grad_norm": 1.0113167127316667, "learning_rate": 3.2260784797594202e-09, "loss": 0.2831, "step": 31640 }, { "epoch": 2.969313063063063, "grad_norm": 1.2370972164993894, "learning_rate": 3.2064989582353268e-09, "loss": 0.3043, "step": 31641 }, { "epoch": 2.969406906906907, "grad_norm": 1.5118668971185396, "learning_rate": 3.18697901406817e-09, "loss": 0.3379, "step": 31642 }, { "epoch": 2.9695007507507505, "grad_norm": 1.1607243334203725, "learning_rate": 3.167518647490542e-09, "loss": 0.3309, "step": 31643 }, { "epoch": 2.9695945945945947, "grad_norm": 0.9736614610986891, "learning_rate": 3.1481178587344787e-09, "loss": 0.2813, "step": 31644 }, { "epoch": 2.9696884384384385, "grad_norm": 1.547189132302782, "learning_rate": 3.128776648032017e-09, "loss": 0.3817, "step": 31645 }, { "epoch": 2.9697822822822824, "grad_norm": 0.9981518778728667, "learning_rate": 3.1094950156129734e-09, "loss": 0.2958, "step": 31646 }, { "epoch": 2.969876126126126, "grad_norm": 1.1930775011522465, "learning_rate": 3.0902729617077188e-09, "loss": 0.3083, "step": 31647 }, { "epoch": 2.96996996996997, "grad_norm": 1.128133521634265, "learning_rate": 3.0711104865449593e-09, "loss": 0.3076, "step": 31648 }, { "epoch": 2.970063813813814, "grad_norm": 1.0472268832745986, "learning_rate": 3.052007590353401e-09, "loss": 0.325, "step": 31649 }, { "epoch": 2.9701576576576576, "grad_norm": 1.3702650501513973, "learning_rate": 3.0329642733611943e-09, "loss": 0.2843, "step": 31650 }, { "epoch": 2.9702515015015014, "grad_norm": 1.900015646575143, "learning_rate": 3.013980535794825e-09, "loss": 0.292, "step": 31651 }, { "epoch": 2.9703453453453452, "grad_norm": 1.3155904048914853, "learning_rate": 2.9950563778807784e-09, "loss": 0.2977, "step": 31652 }, { "epoch": 2.970439189189189, "grad_norm": 1.1480835104895222, "learning_rate": 2.976191799844985e-09, "loss": 0.3326, "step": 31653 }, { "epoch": 2.970533033033033, "grad_norm": 1.2968961802989154, "learning_rate": 2.9573868019122655e-09, "loss": 0.3039, "step": 31654 }, { "epoch": 2.970626876876877, "grad_norm": 1.2011870988478253, "learning_rate": 2.938641384306884e-09, "loss": 0.2965, "step": 31655 }, { "epoch": 2.9707207207207205, "grad_norm": 0.9541836916666642, "learning_rate": 2.9199555472519957e-09, "loss": 0.281, "step": 31656 }, { "epoch": 2.9708145645645647, "grad_norm": 1.414366356485776, "learning_rate": 2.901329290970201e-09, "loss": 0.3037, "step": 31657 }, { "epoch": 2.9709084084084085, "grad_norm": 1.4808268103375961, "learning_rate": 2.8827626156846535e-09, "loss": 0.2892, "step": 31658 }, { "epoch": 2.9710022522522523, "grad_norm": 1.0513275299054496, "learning_rate": 2.8642555216157332e-09, "loss": 0.3319, "step": 31659 }, { "epoch": 2.971096096096096, "grad_norm": 1.1595688171229324, "learning_rate": 2.845808008984374e-09, "loss": 0.3026, "step": 31660 }, { "epoch": 2.97118993993994, "grad_norm": 1.3852566883158153, "learning_rate": 2.827420078010401e-09, "loss": 0.3302, "step": 31661 }, { "epoch": 2.9712837837837838, "grad_norm": 1.230387242536946, "learning_rate": 2.8090917289136376e-09, "loss": 0.2909, "step": 31662 }, { "epoch": 2.9713776276276276, "grad_norm": 1.2118603327397381, "learning_rate": 2.790822961912243e-09, "loss": 0.3307, "step": 31663 }, { "epoch": 2.9714714714714714, "grad_norm": 1.1637505268067214, "learning_rate": 2.7726137772238204e-09, "loss": 0.3077, "step": 31664 }, { "epoch": 2.971565315315315, "grad_norm": 1.1996480806903198, "learning_rate": 2.754464175065419e-09, "loss": 0.2953, "step": 31665 }, { "epoch": 2.971659159159159, "grad_norm": 1.021094201396648, "learning_rate": 2.736374155654087e-09, "loss": 0.2577, "step": 31666 }, { "epoch": 2.971753003003003, "grad_norm": 1.1770470533836255, "learning_rate": 2.7183437192046526e-09, "loss": 0.2983, "step": 31667 }, { "epoch": 2.971846846846847, "grad_norm": 1.0075556899609848, "learning_rate": 2.700372865933054e-09, "loss": 0.3025, "step": 31668 }, { "epoch": 2.9719406906906904, "grad_norm": 4.381564753266887, "learning_rate": 2.6824615960530096e-09, "loss": 0.2813, "step": 31669 }, { "epoch": 2.9720345345345347, "grad_norm": 1.101441549637543, "learning_rate": 2.664609909777682e-09, "loss": 0.3277, "step": 31670 }, { "epoch": 2.9721283783783785, "grad_norm": 1.1267700802134635, "learning_rate": 2.6468178073207894e-09, "loss": 0.3116, "step": 31671 }, { "epoch": 2.9722222222222223, "grad_norm": 1.0176444382054104, "learning_rate": 2.629085288893829e-09, "loss": 0.2788, "step": 31672 }, { "epoch": 2.972316066066066, "grad_norm": 1.1331216993940083, "learning_rate": 2.611412354708853e-09, "loss": 0.2993, "step": 31673 }, { "epoch": 2.97240990990991, "grad_norm": 1.196903163603998, "learning_rate": 2.5937990049756944e-09, "loss": 0.3409, "step": 31674 }, { "epoch": 2.9725037537537538, "grad_norm": 1.127410702556952, "learning_rate": 2.5762452399047398e-09, "loss": 0.3035, "step": 31675 }, { "epoch": 2.9725975975975976, "grad_norm": 1.1377652155211166, "learning_rate": 2.5587510597058217e-09, "loss": 0.2983, "step": 31676 }, { "epoch": 2.9726914414414414, "grad_norm": 1.1135653513378492, "learning_rate": 2.541316464586552e-09, "loss": 0.3107, "step": 31677 }, { "epoch": 2.972785285285285, "grad_norm": 1.359520792443634, "learning_rate": 2.523941454755652e-09, "loss": 0.302, "step": 31678 }, { "epoch": 2.972879129129129, "grad_norm": 1.3355886867786375, "learning_rate": 2.5066260304201783e-09, "loss": 0.2613, "step": 31679 }, { "epoch": 2.972972972972973, "grad_norm": 0.9547633908525154, "learning_rate": 2.489370191786078e-09, "loss": 0.2807, "step": 31680 }, { "epoch": 2.973066816816817, "grad_norm": 1.2186591986363018, "learning_rate": 2.472173939059852e-09, "loss": 0.3253, "step": 31681 }, { "epoch": 2.9731606606606604, "grad_norm": 1.2868658267855289, "learning_rate": 2.4550372724457814e-09, "loss": 0.2989, "step": 31682 }, { "epoch": 2.9732545045045047, "grad_norm": 1.2488505818079911, "learning_rate": 2.437960192148148e-09, "loss": 0.3645, "step": 31683 }, { "epoch": 2.9733483483483485, "grad_norm": 1.0181857300604062, "learning_rate": 2.4209426983712315e-09, "loss": 0.3199, "step": 31684 }, { "epoch": 2.9734421921921923, "grad_norm": 1.0566044107837063, "learning_rate": 2.4039847913176485e-09, "loss": 0.2662, "step": 31685 }, { "epoch": 2.973536036036036, "grad_norm": 1.2765232871345342, "learning_rate": 2.3870864711894593e-09, "loss": 0.3015, "step": 31686 }, { "epoch": 2.97362987987988, "grad_norm": 1.0614906570707956, "learning_rate": 2.3702477381881693e-09, "loss": 0.3068, "step": 31687 }, { "epoch": 2.9737237237237237, "grad_norm": 1.2057752616401067, "learning_rate": 2.3534685925141744e-09, "loss": 0.2894, "step": 31688 }, { "epoch": 2.9738175675675675, "grad_norm": 1.373349851628695, "learning_rate": 2.336749034368424e-09, "loss": 0.2835, "step": 31689 }, { "epoch": 2.9739114114114114, "grad_norm": 1.4333543262103732, "learning_rate": 2.320089063950204e-09, "loss": 0.349, "step": 31690 }, { "epoch": 2.974005255255255, "grad_norm": 1.1748409216747764, "learning_rate": 2.3034886814571334e-09, "loss": 0.3127, "step": 31691 }, { "epoch": 2.974099099099099, "grad_norm": 1.1047554837467175, "learning_rate": 2.2869478870879427e-09, "loss": 0.2966, "step": 31692 }, { "epoch": 2.974192942942943, "grad_norm": 1.2069058358591782, "learning_rate": 2.2704666810396957e-09, "loss": 0.3279, "step": 31693 }, { "epoch": 2.974286786786787, "grad_norm": 0.9889739849980828, "learning_rate": 2.2540450635089026e-09, "loss": 0.3255, "step": 31694 }, { "epoch": 2.9743806306306304, "grad_norm": 1.051767410669198, "learning_rate": 2.237683034691518e-09, "loss": 0.2846, "step": 31695 }, { "epoch": 2.9744744744744747, "grad_norm": 1.051003268154272, "learning_rate": 2.22138059478183e-09, "loss": 0.3076, "step": 31696 }, { "epoch": 2.9745683183183185, "grad_norm": 1.1207520526024604, "learning_rate": 2.2051377439752385e-09, "loss": 0.3314, "step": 31697 }, { "epoch": 2.9746621621621623, "grad_norm": 1.4736304063902943, "learning_rate": 2.188954482464922e-09, "loss": 0.2887, "step": 31698 }, { "epoch": 2.974756006006006, "grad_norm": 0.9990790126830011, "learning_rate": 2.17283081044406e-09, "loss": 0.3334, "step": 31699 }, { "epoch": 2.97484984984985, "grad_norm": 1.2763675479455605, "learning_rate": 2.15676672810472e-09, "loss": 0.3255, "step": 31700 }, { "epoch": 2.9749436936936937, "grad_norm": 1.0465131451828018, "learning_rate": 2.140762235638416e-09, "loss": 0.2797, "step": 31701 }, { "epoch": 2.9750375375375375, "grad_norm": 1.220110700207882, "learning_rate": 2.1248173332361068e-09, "loss": 0.315, "step": 31702 }, { "epoch": 2.9751313813813813, "grad_norm": 1.0531237742238464, "learning_rate": 2.1089320210876395e-09, "loss": 0.3168, "step": 31703 }, { "epoch": 2.975225225225225, "grad_norm": 1.1657339561117523, "learning_rate": 2.0931062993828634e-09, "loss": 0.2398, "step": 31704 }, { "epoch": 2.975319069069069, "grad_norm": 1.2057418451392867, "learning_rate": 2.0773401683099603e-09, "loss": 0.2946, "step": 31705 }, { "epoch": 2.9754129129129128, "grad_norm": 1.130826887752496, "learning_rate": 2.0616336280571135e-09, "loss": 0.2902, "step": 31706 }, { "epoch": 2.975506756756757, "grad_norm": 1.1141116601653074, "learning_rate": 2.045986678811951e-09, "loss": 0.2877, "step": 31707 }, { "epoch": 2.9756006006006004, "grad_norm": 0.9311139823441129, "learning_rate": 2.0303993207604344e-09, "loss": 0.2775, "step": 31708 }, { "epoch": 2.9756944444444446, "grad_norm": 1.2132320526682252, "learning_rate": 2.0148715540885265e-09, "loss": 0.3015, "step": 31709 }, { "epoch": 2.9757882882882885, "grad_norm": 1.3708558852635275, "learning_rate": 1.999403378981635e-09, "loss": 0.3151, "step": 31710 }, { "epoch": 2.9758821321321323, "grad_norm": 0.9934624120804567, "learning_rate": 1.983994795624611e-09, "loss": 0.3129, "step": 31711 }, { "epoch": 2.975975975975976, "grad_norm": 2.7617651405145427, "learning_rate": 1.9686458042000866e-09, "loss": 0.3137, "step": 31712 }, { "epoch": 2.97606981981982, "grad_norm": 1.0254858720414877, "learning_rate": 1.953356404891804e-09, "loss": 0.3063, "step": 31713 }, { "epoch": 2.9761636636636637, "grad_norm": 1.215367739829909, "learning_rate": 1.93812659788184e-09, "loss": 0.3036, "step": 31714 }, { "epoch": 2.9762575075075075, "grad_norm": 1.010530757228676, "learning_rate": 1.9229563833522704e-09, "loss": 0.3748, "step": 31715 }, { "epoch": 2.9763513513513513, "grad_norm": 0.9919144456794226, "learning_rate": 1.9078457614829515e-09, "loss": 0.3101, "step": 31716 }, { "epoch": 2.976445195195195, "grad_norm": 1.0730938690318688, "learning_rate": 1.89279473245485e-09, "loss": 0.2966, "step": 31717 }, { "epoch": 2.976539039039039, "grad_norm": 1.1757944059082068, "learning_rate": 1.877803296447267e-09, "loss": 0.2748, "step": 31718 }, { "epoch": 2.9766328828828827, "grad_norm": 1.0781298807420034, "learning_rate": 1.862871453638393e-09, "loss": 0.3035, "step": 31719 }, { "epoch": 2.976726726726727, "grad_norm": 1.5049958760379165, "learning_rate": 1.847999204207529e-09, "loss": 0.3262, "step": 31720 }, { "epoch": 2.9768205705705704, "grad_norm": 1.0742244076411982, "learning_rate": 1.8331865483306454e-09, "loss": 0.2895, "step": 31721 }, { "epoch": 2.9769144144144146, "grad_norm": 1.1172107025477502, "learning_rate": 1.8184334861853782e-09, "loss": 0.2899, "step": 31722 }, { "epoch": 2.977008258258258, "grad_norm": 1.2841117156934485, "learning_rate": 1.8037400179465869e-09, "loss": 0.2761, "step": 31723 }, { "epoch": 2.9771021021021022, "grad_norm": 1.2344903278460715, "learning_rate": 1.7891061437907975e-09, "loss": 0.356, "step": 31724 }, { "epoch": 2.977195945945946, "grad_norm": 1.1664757789490683, "learning_rate": 1.7745318638912045e-09, "loss": 0.3184, "step": 31725 }, { "epoch": 2.97728978978979, "grad_norm": 1.1713929642301415, "learning_rate": 1.7600171784221132e-09, "loss": 0.308, "step": 31726 }, { "epoch": 2.9773836336336337, "grad_norm": 1.4764002876087268, "learning_rate": 1.745562087556718e-09, "loss": 0.3371, "step": 31727 }, { "epoch": 2.9774774774774775, "grad_norm": 1.162603019155849, "learning_rate": 1.731166591467659e-09, "loss": 0.3385, "step": 31728 }, { "epoch": 2.9775713213213213, "grad_norm": 1.1178491837276703, "learning_rate": 1.716830690325355e-09, "loss": 0.3194, "step": 31729 }, { "epoch": 2.977665165165165, "grad_norm": 1.1365532699660317, "learning_rate": 1.7025543843024461e-09, "loss": 0.293, "step": 31730 }, { "epoch": 2.977759009009009, "grad_norm": 1.0869879303877623, "learning_rate": 1.688337673567686e-09, "loss": 0.3059, "step": 31731 }, { "epoch": 2.9778528528528527, "grad_norm": 4.66537134596572, "learning_rate": 1.674180558291494e-09, "loss": 0.3021, "step": 31732 }, { "epoch": 2.9779466966966965, "grad_norm": 1.0559028683524314, "learning_rate": 1.6600830386420685e-09, "loss": 0.3095, "step": 31733 }, { "epoch": 2.9780405405405403, "grad_norm": 1.082011012372519, "learning_rate": 1.646045114787609e-09, "loss": 0.3256, "step": 31734 }, { "epoch": 2.9781343843843846, "grad_norm": 1.3040333981209828, "learning_rate": 1.6320667868957586e-09, "loss": 0.3082, "step": 31735 }, { "epoch": 2.978228228228228, "grad_norm": 1.4154899153844698, "learning_rate": 1.618148055133051e-09, "loss": 0.2746, "step": 31736 }, { "epoch": 2.9783220720720722, "grad_norm": 1.2761124050201444, "learning_rate": 1.6042889196654644e-09, "loss": 0.3438, "step": 31737 }, { "epoch": 2.978415915915916, "grad_norm": 1.1604506120088223, "learning_rate": 1.590489380657867e-09, "loss": 0.2759, "step": 31738 }, { "epoch": 2.97850975975976, "grad_norm": 1.0856332874280854, "learning_rate": 1.576749438275682e-09, "loss": 0.3345, "step": 31739 }, { "epoch": 2.9786036036036037, "grad_norm": 1.113634130546145, "learning_rate": 1.5630690926821124e-09, "loss": 0.3311, "step": 31740 }, { "epoch": 2.9786974474474475, "grad_norm": 1.2035845530045057, "learning_rate": 1.5494483440403607e-09, "loss": 0.3218, "step": 31741 }, { "epoch": 2.9787912912912913, "grad_norm": 0.9751993168121272, "learning_rate": 1.5358871925125196e-09, "loss": 0.2747, "step": 31742 }, { "epoch": 2.978885135135135, "grad_norm": 0.9668861975926225, "learning_rate": 1.5223856382606817e-09, "loss": 0.2465, "step": 31743 }, { "epoch": 2.978978978978979, "grad_norm": 1.6850329140275035, "learning_rate": 1.5089436814458292e-09, "loss": 0.2967, "step": 31744 }, { "epoch": 2.9790728228228227, "grad_norm": 1.2043538008819947, "learning_rate": 1.4955613222283892e-09, "loss": 0.3521, "step": 31745 }, { "epoch": 2.9791666666666665, "grad_norm": 1.2521294441044015, "learning_rate": 1.4822385607671242e-09, "loss": 0.3149, "step": 31746 }, { "epoch": 2.9792605105105103, "grad_norm": 1.1249838829883088, "learning_rate": 1.4689753972219057e-09, "loss": 0.2513, "step": 31747 }, { "epoch": 2.9793543543543546, "grad_norm": 1.0624012782820864, "learning_rate": 1.4557718317503854e-09, "loss": 0.3415, "step": 31748 }, { "epoch": 2.979448198198198, "grad_norm": 1.1558129756146571, "learning_rate": 1.4426278645096603e-09, "loss": 0.2934, "step": 31749 }, { "epoch": 2.979542042042042, "grad_norm": 1.3611567597057161, "learning_rate": 1.4295434956568267e-09, "loss": 0.3329, "step": 31750 }, { "epoch": 2.979635885885886, "grad_norm": 1.278662578937348, "learning_rate": 1.4165187253478708e-09, "loss": 0.295, "step": 31751 }, { "epoch": 2.97972972972973, "grad_norm": 0.935279301142751, "learning_rate": 1.4035535537382239e-09, "loss": 0.3082, "step": 31752 }, { "epoch": 2.9798235735735736, "grad_norm": 3.6981061657489676, "learning_rate": 1.3906479809822071e-09, "loss": 0.3312, "step": 31753 }, { "epoch": 2.9799174174174174, "grad_norm": 1.4999214122719207, "learning_rate": 1.3778020072335863e-09, "loss": 0.2876, "step": 31754 }, { "epoch": 2.9800112612612613, "grad_norm": 1.0561819043159384, "learning_rate": 1.3650156326461273e-09, "loss": 0.2997, "step": 31755 }, { "epoch": 2.980105105105105, "grad_norm": 1.3369235372721318, "learning_rate": 1.3522888573719305e-09, "loss": 0.3121, "step": 31756 }, { "epoch": 2.980198948948949, "grad_norm": 1.2922206881633165, "learning_rate": 1.3396216815619867e-09, "loss": 0.3108, "step": 31757 }, { "epoch": 2.9802927927927927, "grad_norm": 1.25546926556763, "learning_rate": 1.3270141053689512e-09, "loss": 0.2801, "step": 31758 }, { "epoch": 2.9803866366366365, "grad_norm": 0.984904777527924, "learning_rate": 1.314466128941594e-09, "loss": 0.3213, "step": 31759 }, { "epoch": 2.9804804804804803, "grad_norm": 1.043626437043587, "learning_rate": 1.30197775243035e-09, "loss": 0.322, "step": 31760 }, { "epoch": 2.9805743243243246, "grad_norm": 1.0677856875029754, "learning_rate": 1.2895489759839897e-09, "loss": 0.2971, "step": 31761 }, { "epoch": 2.980668168168168, "grad_norm": 1.1354049333304803, "learning_rate": 1.2771797997507273e-09, "loss": 0.3254, "step": 31762 }, { "epoch": 2.980762012012012, "grad_norm": 1.4306480779060398, "learning_rate": 1.2648702238782228e-09, "loss": 0.3016, "step": 31763 }, { "epoch": 2.980855855855856, "grad_norm": 0.9343889718791352, "learning_rate": 1.2526202485124706e-09, "loss": 0.318, "step": 31764 }, { "epoch": 2.9809496996997, "grad_norm": 1.0597538144442942, "learning_rate": 1.2404298738000198e-09, "loss": 0.2702, "step": 31765 }, { "epoch": 2.9810435435435436, "grad_norm": 1.3114197934077165, "learning_rate": 1.228299099886865e-09, "loss": 0.3263, "step": 31766 }, { "epoch": 2.9811373873873874, "grad_norm": 1.2924310259345142, "learning_rate": 1.2162279269162247e-09, "loss": 0.2913, "step": 31767 }, { "epoch": 2.9812312312312312, "grad_norm": 1.2599160096858049, "learning_rate": 1.2042163550335385e-09, "loss": 0.2581, "step": 31768 }, { "epoch": 2.981325075075075, "grad_norm": 1.2028947299248731, "learning_rate": 1.1922643843809144e-09, "loss": 0.3256, "step": 31769 }, { "epoch": 2.981418918918919, "grad_norm": 1.1621152103282388, "learning_rate": 1.1803720151010167e-09, "loss": 0.3062, "step": 31770 }, { "epoch": 2.9815127627627627, "grad_norm": 1.74835758936662, "learning_rate": 1.1685392473365088e-09, "loss": 0.2886, "step": 31771 }, { "epoch": 2.9816066066066065, "grad_norm": 1.0793571080427744, "learning_rate": 1.1567660812272786e-09, "loss": 0.3064, "step": 31772 }, { "epoch": 2.9817004504504503, "grad_norm": 1.1632174122677568, "learning_rate": 1.1450525169148797e-09, "loss": 0.3296, "step": 31773 }, { "epoch": 2.9817942942942945, "grad_norm": 1.5147134941123066, "learning_rate": 1.1333985545375348e-09, "loss": 0.3221, "step": 31774 }, { "epoch": 2.981888138138138, "grad_norm": 1.1117216661115588, "learning_rate": 1.1218041942356871e-09, "loss": 0.318, "step": 31775 }, { "epoch": 2.981981981981982, "grad_norm": 1.2130075072179207, "learning_rate": 1.1102694361464494e-09, "loss": 0.3206, "step": 31776 }, { "epoch": 2.982075825825826, "grad_norm": 1.1238097554166584, "learning_rate": 1.0987942804080442e-09, "loss": 0.2895, "step": 31777 }, { "epoch": 2.98216966966967, "grad_norm": 1.1890508822681511, "learning_rate": 1.087378727157029e-09, "loss": 0.307, "step": 31778 }, { "epoch": 2.9822635135135136, "grad_norm": 1.1991080552950344, "learning_rate": 1.0760227765299614e-09, "loss": 0.2896, "step": 31779 }, { "epoch": 2.9823573573573574, "grad_norm": 1.0252245061716285, "learning_rate": 1.0647264286611781e-09, "loss": 0.3502, "step": 31780 }, { "epoch": 2.982451201201201, "grad_norm": 1.9361714238459733, "learning_rate": 1.0534896836861264e-09, "loss": 0.3023, "step": 31781 }, { "epoch": 2.982545045045045, "grad_norm": 1.1052507874649289, "learning_rate": 1.042312541738588e-09, "loss": 0.2682, "step": 31782 }, { "epoch": 2.982638888888889, "grad_norm": 1.143158441559089, "learning_rate": 1.0311950029523455e-09, "loss": 0.313, "step": 31783 }, { "epoch": 2.9827327327327327, "grad_norm": 1.6303128095330426, "learning_rate": 1.0201370674589594e-09, "loss": 0.35, "step": 31784 }, { "epoch": 2.9828265765765765, "grad_norm": 1.2536317566625268, "learning_rate": 1.0091387353911019e-09, "loss": 0.3436, "step": 31785 }, { "epoch": 2.9829204204204203, "grad_norm": 1.1433197902757397, "learning_rate": 9.982000068797792e-10, "loss": 0.2992, "step": 31786 }, { "epoch": 2.9830142642642645, "grad_norm": 0.9769806986372963, "learning_rate": 9.873208820548875e-10, "loss": 0.3056, "step": 31787 }, { "epoch": 2.983108108108108, "grad_norm": 1.3110454364597974, "learning_rate": 9.765013610468777e-10, "loss": 0.2876, "step": 31788 }, { "epoch": 2.983201951951952, "grad_norm": 1.3779373134037696, "learning_rate": 9.657414439845359e-10, "loss": 0.3124, "step": 31789 }, { "epoch": 2.983295795795796, "grad_norm": 1.0630186886953166, "learning_rate": 9.550411309955376e-10, "loss": 0.308, "step": 31790 }, { "epoch": 2.9833896396396398, "grad_norm": 0.9238380177663614, "learning_rate": 9.444004222081137e-10, "loss": 0.3035, "step": 31791 }, { "epoch": 2.9834834834834836, "grad_norm": 1.08374974143271, "learning_rate": 9.33819317749385e-10, "loss": 0.3288, "step": 31792 }, { "epoch": 2.9835773273273274, "grad_norm": 1.2284654323528126, "learning_rate": 9.232978177442509e-10, "loss": 0.3073, "step": 31793 }, { "epoch": 2.983671171171171, "grad_norm": 1.1508998115635276, "learning_rate": 9.128359223198324e-10, "loss": 0.2925, "step": 31794 }, { "epoch": 2.983765015015015, "grad_norm": 1.2097295852338075, "learning_rate": 9.024336315993643e-10, "loss": 0.3113, "step": 31795 }, { "epoch": 2.983858858858859, "grad_norm": 1.14398238991898, "learning_rate": 8.920909457077464e-10, "loss": 0.2987, "step": 31796 }, { "epoch": 2.9839527027027026, "grad_norm": 1.1220949787263221, "learning_rate": 8.818078647682137e-10, "loss": 0.2716, "step": 31797 }, { "epoch": 2.9840465465465464, "grad_norm": 1.2634330728593004, "learning_rate": 8.715843889034459e-10, "loss": 0.3335, "step": 31798 }, { "epoch": 2.9841403903903903, "grad_norm": 1.1842876026917506, "learning_rate": 8.614205182344571e-10, "loss": 0.3279, "step": 31799 }, { "epoch": 2.9842342342342345, "grad_norm": 1.186290577947624, "learning_rate": 8.513162528839269e-10, "loss": 0.2946, "step": 31800 }, { "epoch": 2.984328078078078, "grad_norm": 1.335627252561786, "learning_rate": 8.412715929712045e-10, "loss": 0.2904, "step": 31801 }, { "epoch": 2.984421921921922, "grad_norm": 0.9439901895013948, "learning_rate": 8.312865386161939e-10, "loss": 0.3322, "step": 31802 }, { "epoch": 2.9845157657657655, "grad_norm": 1.0911499477639153, "learning_rate": 8.213610899382441e-10, "loss": 0.3538, "step": 31803 }, { "epoch": 2.9846096096096097, "grad_norm": 1.3030733477782512, "learning_rate": 8.114952470555937e-10, "loss": 0.3145, "step": 31804 }, { "epoch": 2.9847034534534536, "grad_norm": 0.9525945762801634, "learning_rate": 8.016890100859265e-10, "loss": 0.3338, "step": 31805 }, { "epoch": 2.9847972972972974, "grad_norm": 2.6577918606239788, "learning_rate": 7.919423791458159e-10, "loss": 0.3355, "step": 31806 }, { "epoch": 2.984891141141141, "grad_norm": 1.1765955023248893, "learning_rate": 7.822553543518352e-10, "loss": 0.3244, "step": 31807 }, { "epoch": 2.984984984984985, "grad_norm": 1.253150102386093, "learning_rate": 7.726279358194478e-10, "loss": 0.2938, "step": 31808 }, { "epoch": 2.985078828828829, "grad_norm": 1.5107964583609594, "learning_rate": 7.630601236635615e-10, "loss": 0.3179, "step": 31809 }, { "epoch": 2.9851726726726726, "grad_norm": 1.0487215409728095, "learning_rate": 7.535519179979745e-10, "loss": 0.2917, "step": 31810 }, { "epoch": 2.9852665165165164, "grad_norm": 1.097433563775931, "learning_rate": 7.441033189364843e-10, "loss": 0.3048, "step": 31811 }, { "epoch": 2.9853603603603602, "grad_norm": 1.2951222680264833, "learning_rate": 7.347143265906687e-10, "loss": 0.3341, "step": 31812 }, { "epoch": 2.985454204204204, "grad_norm": 1.2032976628297989, "learning_rate": 7.253849410737701e-10, "loss": 0.2781, "step": 31813 }, { "epoch": 2.985548048048048, "grad_norm": 1.1053246268657793, "learning_rate": 7.161151624962559e-10, "loss": 0.3087, "step": 31814 }, { "epoch": 2.985641891891892, "grad_norm": 1.193444844441594, "learning_rate": 7.069049909691483e-10, "loss": 0.2897, "step": 31815 }, { "epoch": 2.9857357357357355, "grad_norm": 1.0769848542063036, "learning_rate": 6.977544266018044e-10, "loss": 0.3347, "step": 31816 }, { "epoch": 2.9858295795795797, "grad_norm": 1.1156961576302962, "learning_rate": 6.88663469503581e-10, "loss": 0.3079, "step": 31817 }, { "epoch": 2.9859234234234235, "grad_norm": 1.2874144912749643, "learning_rate": 6.7963211978328e-10, "loss": 0.3052, "step": 31818 }, { "epoch": 2.9860172672672673, "grad_norm": 1.107370622607335, "learning_rate": 6.706603775474829e-10, "loss": 0.2587, "step": 31819 }, { "epoch": 2.986111111111111, "grad_norm": 1.538452117298502, "learning_rate": 6.617482429038813e-10, "loss": 0.3147, "step": 31820 }, { "epoch": 2.986204954954955, "grad_norm": 1.3724996764320532, "learning_rate": 6.528957159585015e-10, "loss": 0.3492, "step": 31821 }, { "epoch": 2.986298798798799, "grad_norm": 1.2971693366977286, "learning_rate": 6.441027968173697e-10, "loss": 0.2891, "step": 31822 }, { "epoch": 2.9863926426426426, "grad_norm": 1.1338046994485254, "learning_rate": 6.353694855848469e-10, "loss": 0.3112, "step": 31823 }, { "epoch": 2.9864864864864864, "grad_norm": 1.2485110520943459, "learning_rate": 6.266957823652942e-10, "loss": 0.2993, "step": 31824 }, { "epoch": 2.98658033033033, "grad_norm": 1.22427190084766, "learning_rate": 6.180816872619622e-10, "loss": 0.3396, "step": 31825 }, { "epoch": 2.986674174174174, "grad_norm": 1.1541733369036973, "learning_rate": 6.095272003781017e-10, "loss": 0.2843, "step": 31826 }, { "epoch": 2.986768018018018, "grad_norm": 1.2114070124467573, "learning_rate": 6.010323218147429e-10, "loss": 0.3191, "step": 31827 }, { "epoch": 2.986861861861862, "grad_norm": 1.0744226387549627, "learning_rate": 5.925970516734714e-10, "loss": 0.3185, "step": 31828 }, { "epoch": 2.9869557057057055, "grad_norm": 1.164894170796742, "learning_rate": 5.842213900553174e-10, "loss": 0.3055, "step": 31829 }, { "epoch": 2.9870495495495497, "grad_norm": 1.157774812443728, "learning_rate": 5.759053370602008e-10, "loss": 0.3225, "step": 31830 }, { "epoch": 2.9871433933933935, "grad_norm": 1.042577288404989, "learning_rate": 5.676488927863766e-10, "loss": 0.3148, "step": 31831 }, { "epoch": 2.9872372372372373, "grad_norm": 1.2590680640600518, "learning_rate": 5.594520573332097e-10, "loss": 0.2437, "step": 31832 }, { "epoch": 2.987331081081081, "grad_norm": 1.3427941899462503, "learning_rate": 5.513148307978444e-10, "loss": 0.3643, "step": 31833 }, { "epoch": 2.987424924924925, "grad_norm": 1.1248965064409409, "learning_rate": 5.432372132779807e-10, "loss": 0.3357, "step": 31834 }, { "epoch": 2.9875187687687688, "grad_norm": 1.1705225138872362, "learning_rate": 5.352192048690974e-10, "loss": 0.2959, "step": 31835 }, { "epoch": 2.9876126126126126, "grad_norm": 1.1411644528932643, "learning_rate": 5.272608056677841e-10, "loss": 0.331, "step": 31836 }, { "epoch": 2.9877064564564564, "grad_norm": 1.0173899543731826, "learning_rate": 5.193620157678547e-10, "loss": 0.2399, "step": 31837 }, { "epoch": 2.9878003003003, "grad_norm": 1.1981426927616952, "learning_rate": 5.115228352642331e-10, "loss": 0.3308, "step": 31838 }, { "epoch": 2.987894144144144, "grad_norm": 1.0080012550520343, "learning_rate": 5.037432642496231e-10, "loss": 0.2911, "step": 31839 }, { "epoch": 2.987987987987988, "grad_norm": 1.2478198768208613, "learning_rate": 4.960233028178386e-10, "loss": 0.3475, "step": 31840 }, { "epoch": 2.988081831831832, "grad_norm": 1.0320654716819235, "learning_rate": 4.883629510599175e-10, "loss": 0.3136, "step": 31841 }, { "epoch": 2.9881756756756754, "grad_norm": 1.0529942899059108, "learning_rate": 4.807622090680086e-10, "loss": 0.2906, "step": 31842 }, { "epoch": 2.9882695195195197, "grad_norm": 1.0266006221439592, "learning_rate": 4.732210769325951e-10, "loss": 0.3286, "step": 31843 }, { "epoch": 2.9883633633633635, "grad_norm": 1.1853387851904802, "learning_rate": 4.657395547430499e-10, "loss": 0.3305, "step": 31844 }, { "epoch": 2.9884572072072073, "grad_norm": 1.0267864186867393, "learning_rate": 4.583176425887459e-10, "loss": 0.3487, "step": 31845 }, { "epoch": 2.988551051051051, "grad_norm": 1.0277641620289215, "learning_rate": 4.50955340558501e-10, "loss": 0.3006, "step": 31846 }, { "epoch": 2.988644894894895, "grad_norm": 0.9341741469635375, "learning_rate": 4.436526487400228e-10, "loss": 0.2533, "step": 31847 }, { "epoch": 2.9887387387387387, "grad_norm": 1.036044813259224, "learning_rate": 4.364095672204638e-10, "loss": 0.2762, "step": 31848 }, { "epoch": 2.9888325825825826, "grad_norm": 0.9745259564926856, "learning_rate": 4.2922609608531116e-10, "loss": 0.2932, "step": 31849 }, { "epoch": 2.9889264264264264, "grad_norm": 1.218769019128407, "learning_rate": 4.2210223542171746e-10, "loss": 0.3289, "step": 31850 }, { "epoch": 2.98902027027027, "grad_norm": 1.0396607325910716, "learning_rate": 4.1503798531350447e-10, "loss": 0.3342, "step": 31851 }, { "epoch": 2.989114114114114, "grad_norm": 1.2496636842512376, "learning_rate": 4.0803334584504916e-10, "loss": 0.2972, "step": 31852 }, { "epoch": 2.989207957957958, "grad_norm": 1.2389829302307582, "learning_rate": 4.0108831710017337e-10, "loss": 0.2962, "step": 31853 }, { "epoch": 2.989301801801802, "grad_norm": 1.1978376698106559, "learning_rate": 3.942028991615887e-10, "loss": 0.3415, "step": 31854 }, { "epoch": 2.9893956456456454, "grad_norm": 1.060766069325342, "learning_rate": 3.873770921114517e-10, "loss": 0.3112, "step": 31855 }, { "epoch": 2.9894894894894897, "grad_norm": 1.189511175257371, "learning_rate": 3.806108960308086e-10, "loss": 0.3402, "step": 31856 }, { "epoch": 2.9895833333333335, "grad_norm": 1.330319224831861, "learning_rate": 3.7390431100070567e-10, "loss": 0.3301, "step": 31857 }, { "epoch": 2.9896771771771773, "grad_norm": 1.092260940312415, "learning_rate": 3.6725733710052394e-10, "loss": 0.2831, "step": 31858 }, { "epoch": 2.989771021021021, "grad_norm": 0.9802362344611779, "learning_rate": 3.6066997441019934e-10, "loss": 0.3241, "step": 31859 }, { "epoch": 2.989864864864865, "grad_norm": 4.508835508059203, "learning_rate": 3.5414222300855783e-10, "loss": 0.3126, "step": 31860 }, { "epoch": 2.9899587087087087, "grad_norm": 0.9990662358983412, "learning_rate": 3.4767408297220473e-10, "loss": 0.274, "step": 31861 }, { "epoch": 2.9900525525525525, "grad_norm": 1.0810365015142063, "learning_rate": 3.412655543794108e-10, "loss": 0.3223, "step": 31862 }, { "epoch": 2.9901463963963963, "grad_norm": 1.2929053837234719, "learning_rate": 3.3491663730567116e-10, "loss": 0.2475, "step": 31863 }, { "epoch": 2.99024024024024, "grad_norm": 1.1319441249442395, "learning_rate": 3.2862733182759123e-10, "loss": 0.3458, "step": 31864 }, { "epoch": 2.990334084084084, "grad_norm": 1.1514405177784348, "learning_rate": 3.2239763801955593e-10, "loss": 0.3215, "step": 31865 }, { "epoch": 2.9904279279279278, "grad_norm": 1.0199254376531415, "learning_rate": 3.162275559559502e-10, "loss": 0.3049, "step": 31866 }, { "epoch": 2.990521771771772, "grad_norm": 1.107644660997179, "learning_rate": 3.1011708571060393e-10, "loss": 0.3437, "step": 31867 }, { "epoch": 2.9906156156156154, "grad_norm": 1.2945590482143456, "learning_rate": 3.040662273562367e-10, "loss": 0.3053, "step": 31868 }, { "epoch": 2.9907094594594597, "grad_norm": 1.3720765999631674, "learning_rate": 2.9807498096445784e-10, "loss": 0.2899, "step": 31869 }, { "epoch": 2.9908033033033035, "grad_norm": 1.153707955318845, "learning_rate": 2.9214334660743196e-10, "loss": 0.3213, "step": 31870 }, { "epoch": 2.9908971471471473, "grad_norm": 1.0705882881182862, "learning_rate": 2.862713243556581e-10, "loss": 0.2839, "step": 31871 }, { "epoch": 2.990990990990991, "grad_norm": 1.1689805018419233, "learning_rate": 2.8045891427908036e-10, "loss": 0.2862, "step": 31872 }, { "epoch": 2.991084834834835, "grad_norm": 1.223552551402313, "learning_rate": 2.747061164470877e-10, "loss": 0.2713, "step": 31873 }, { "epoch": 2.9911786786786787, "grad_norm": 1.002958126412433, "learning_rate": 2.690129309279588e-10, "loss": 0.3038, "step": 31874 }, { "epoch": 2.9912725225225225, "grad_norm": 1.1901000573492702, "learning_rate": 2.6337935778997236e-10, "loss": 0.3449, "step": 31875 }, { "epoch": 2.9913663663663663, "grad_norm": 1.2497886895739772, "learning_rate": 2.5780539710029695e-10, "loss": 0.2766, "step": 31876 }, { "epoch": 2.99146021021021, "grad_norm": 0.9891537144792429, "learning_rate": 2.522910489249908e-10, "loss": 0.332, "step": 31877 }, { "epoch": 2.991554054054054, "grad_norm": 1.0486560004151513, "learning_rate": 2.4683631333011215e-10, "loss": 0.2798, "step": 31878 }, { "epoch": 2.9916478978978978, "grad_norm": 1.1345292247620085, "learning_rate": 2.414411903811642e-10, "loss": 0.3264, "step": 31879 }, { "epoch": 2.991741741741742, "grad_norm": 1.2729517519968205, "learning_rate": 2.3610568014142965e-10, "loss": 0.3438, "step": 31880 }, { "epoch": 2.9918355855855854, "grad_norm": 1.1114131934418827, "learning_rate": 2.308297826747463e-10, "loss": 0.2988, "step": 31881 }, { "epoch": 2.9919294294294296, "grad_norm": 1.144305301327585, "learning_rate": 2.2561349804495202e-10, "loss": 0.3097, "step": 31882 }, { "epoch": 2.992023273273273, "grad_norm": 1.2242365476202939, "learning_rate": 2.2045682631310906e-10, "loss": 0.2902, "step": 31883 }, { "epoch": 2.9921171171171173, "grad_norm": 1.1105991190905025, "learning_rate": 2.1535976754138989e-10, "loss": 0.2996, "step": 31884 }, { "epoch": 2.992210960960961, "grad_norm": 1.2077918685603861, "learning_rate": 2.1032232179085675e-10, "loss": 0.2885, "step": 31885 }, { "epoch": 2.992304804804805, "grad_norm": 1.2831881045771036, "learning_rate": 2.0534448912035153e-10, "loss": 0.2668, "step": 31886 }, { "epoch": 2.9923986486486487, "grad_norm": 1.0039767366529198, "learning_rate": 2.0042626959038135e-10, "loss": 0.3139, "step": 31887 }, { "epoch": 2.9924924924924925, "grad_norm": 1.0725935105838067, "learning_rate": 1.9556766325923293e-10, "loss": 0.3084, "step": 31888 }, { "epoch": 2.9925863363363363, "grad_norm": 1.0418228983973101, "learning_rate": 1.9076867018463785e-10, "loss": 0.3308, "step": 31889 }, { "epoch": 2.99268018018018, "grad_norm": 1.1775335858959572, "learning_rate": 1.860292904237726e-10, "loss": 0.3225, "step": 31890 }, { "epoch": 2.992774024024024, "grad_norm": 1.1305952235454073, "learning_rate": 1.8134952403325856e-10, "loss": 0.3063, "step": 31891 }, { "epoch": 2.9928678678678677, "grad_norm": 1.1091930856819687, "learning_rate": 1.76729371069162e-10, "loss": 0.291, "step": 31892 }, { "epoch": 2.9929617117117115, "grad_norm": 1.1915714757445883, "learning_rate": 1.7216883158643894e-10, "loss": 0.3085, "step": 31893 }, { "epoch": 2.9930555555555554, "grad_norm": 2.3405815341783103, "learning_rate": 1.676679056394903e-10, "loss": 0.3402, "step": 31894 }, { "epoch": 2.9931493993993996, "grad_norm": 1.3704938632982988, "learning_rate": 1.6322659328160685e-10, "loss": 0.3218, "step": 31895 }, { "epoch": 2.993243243243243, "grad_norm": 1.1735253385707227, "learning_rate": 1.588448945660792e-10, "loss": 0.294, "step": 31896 }, { "epoch": 2.9933370870870872, "grad_norm": 1.0547703146234195, "learning_rate": 1.5452280954564303e-10, "loss": 0.2902, "step": 31897 }, { "epoch": 2.993430930930931, "grad_norm": 0.9767662273690466, "learning_rate": 1.5026033827081343e-10, "loss": 0.2997, "step": 31898 }, { "epoch": 2.993524774774775, "grad_norm": 1.61165901541895, "learning_rate": 1.460574807926607e-10, "loss": 0.2492, "step": 31899 }, { "epoch": 2.9936186186186187, "grad_norm": 1.0340793724609842, "learning_rate": 1.4191423716169994e-10, "loss": 0.3221, "step": 31900 }, { "epoch": 2.9937124624624625, "grad_norm": 1.157461690104278, "learning_rate": 1.378306074273361e-10, "loss": 0.3099, "step": 31901 }, { "epoch": 2.9938063063063063, "grad_norm": 1.0539486395026127, "learning_rate": 1.3380659163786392e-10, "loss": 0.3333, "step": 31902 }, { "epoch": 2.99390015015015, "grad_norm": 1.0990156699891866, "learning_rate": 1.2984218984157803e-10, "loss": 0.3212, "step": 31903 }, { "epoch": 2.993993993993994, "grad_norm": 1.0168395061008875, "learning_rate": 1.2593740208510786e-10, "loss": 0.3193, "step": 31904 }, { "epoch": 2.9940878378378377, "grad_norm": 1.057521793077529, "learning_rate": 1.2209222841619296e-10, "loss": 0.3551, "step": 31905 }, { "epoch": 2.9941816816816815, "grad_norm": 1.262242740399842, "learning_rate": 1.1830666887979736e-10, "loss": 0.3239, "step": 31906 }, { "epoch": 2.9942755255255253, "grad_norm": 1.1217473449174242, "learning_rate": 1.1458072352144023e-10, "loss": 0.3336, "step": 31907 }, { "epoch": 2.9943693693693696, "grad_norm": 1.0594027478651398, "learning_rate": 1.1091439238553048e-10, "loss": 0.3027, "step": 31908 }, { "epoch": 2.994463213213213, "grad_norm": 1.2712831318402444, "learning_rate": 1.073076755153668e-10, "loss": 0.3069, "step": 31909 }, { "epoch": 2.994557057057057, "grad_norm": 4.725324425794907, "learning_rate": 1.037605729542479e-10, "loss": 0.3063, "step": 31910 }, { "epoch": 2.994650900900901, "grad_norm": 1.4437625716318214, "learning_rate": 1.0027308474436226e-10, "loss": 0.292, "step": 31911 }, { "epoch": 2.994744744744745, "grad_norm": 1.0187364126941763, "learning_rate": 9.684521092734322e-11, "loss": 0.3155, "step": 31912 }, { "epoch": 2.9948385885885886, "grad_norm": 1.041601558046506, "learning_rate": 9.347695154426906e-11, "loss": 0.2877, "step": 31913 }, { "epoch": 2.9949324324324325, "grad_norm": 1.1075496719790185, "learning_rate": 9.016830663510778e-11, "loss": 0.2739, "step": 31914 }, { "epoch": 2.9950262762762763, "grad_norm": 1.0672906291866875, "learning_rate": 8.691927623927232e-11, "loss": 0.2871, "step": 31915 }, { "epoch": 2.99512012012012, "grad_norm": 1.0558963434010955, "learning_rate": 8.372986039562048e-11, "loss": 0.3202, "step": 31916 }, { "epoch": 2.995213963963964, "grad_norm": 1.2441401580263844, "learning_rate": 8.060005914189984e-11, "loss": 0.3114, "step": 31917 }, { "epoch": 2.9953078078078077, "grad_norm": 1.299716936686327, "learning_rate": 7.752987251585797e-11, "loss": 0.2651, "step": 31918 }, { "epoch": 2.9954016516516515, "grad_norm": 1.1385101666848012, "learning_rate": 7.451930055357715e-11, "loss": 0.2948, "step": 31919 }, { "epoch": 2.9954954954954953, "grad_norm": 1.2806610675230017, "learning_rate": 7.156834329169471e-11, "loss": 0.3009, "step": 31920 }, { "epoch": 2.9955893393393396, "grad_norm": 1.1145149691744443, "learning_rate": 6.867700076462758e-11, "loss": 0.3257, "step": 31921 }, { "epoch": 2.995683183183183, "grad_norm": 1.5062080060828416, "learning_rate": 6.584527300734777e-11, "loss": 0.3039, "step": 31922 }, { "epoch": 2.995777027027027, "grad_norm": 1.1548903244685536, "learning_rate": 6.30731600537171e-11, "loss": 0.3103, "step": 31923 }, { "epoch": 2.995870870870871, "grad_norm": 1.0369261334520945, "learning_rate": 6.036066193593204e-11, "loss": 0.312, "step": 31924 }, { "epoch": 2.995964714714715, "grad_norm": 1.3916131110294236, "learning_rate": 5.770777868729926e-11, "loss": 0.3155, "step": 31925 }, { "epoch": 2.9960585585585586, "grad_norm": 0.9691743228057654, "learning_rate": 5.5114510339460134e-11, "loss": 0.2946, "step": 31926 }, { "epoch": 2.9961524024024024, "grad_norm": 1.0176820192470077, "learning_rate": 5.2580856922390675e-11, "loss": 0.3288, "step": 31927 }, { "epoch": 2.9962462462462462, "grad_norm": 1.059544591740614, "learning_rate": 5.0106818467177135e-11, "loss": 0.3304, "step": 31928 }, { "epoch": 2.99634009009009, "grad_norm": 0.9752008217992056, "learning_rate": 4.76923950026853e-11, "loss": 0.3229, "step": 31929 }, { "epoch": 2.996433933933934, "grad_norm": 1.2210748479208193, "learning_rate": 4.533758655778098e-11, "loss": 0.3173, "step": 31930 }, { "epoch": 2.9965277777777777, "grad_norm": 1.8224594842793431, "learning_rate": 4.3042393161329965e-11, "loss": 0.2805, "step": 31931 }, { "epoch": 2.9966216216216215, "grad_norm": 1.5344019457833207, "learning_rate": 4.0806814839977615e-11, "loss": 0.3059, "step": 31932 }, { "epoch": 2.9967154654654653, "grad_norm": 1.081902269159655, "learning_rate": 3.863085162036928e-11, "loss": 0.2984, "step": 31933 }, { "epoch": 2.9968093093093096, "grad_norm": 1.4408065551338916, "learning_rate": 3.6514503529150314e-11, "loss": 0.3263, "step": 31934 }, { "epoch": 2.996903153153153, "grad_norm": 1.895022232782995, "learning_rate": 3.445777059074562e-11, "loss": 0.35, "step": 31935 }, { "epoch": 2.996996996996997, "grad_norm": 1.3493089362979311, "learning_rate": 3.246065282958011e-11, "loss": 0.3442, "step": 31936 }, { "epoch": 2.997090840840841, "grad_norm": 1.221588730358524, "learning_rate": 3.052315027007868e-11, "loss": 0.3602, "step": 31937 }, { "epoch": 2.997184684684685, "grad_norm": 1.2455029928107018, "learning_rate": 2.8645262935556027e-11, "loss": 0.3094, "step": 31938 }, { "epoch": 2.9972785285285286, "grad_norm": 1.142481147881758, "learning_rate": 2.6826990847661495e-11, "loss": 0.3467, "step": 31939 }, { "epoch": 2.9973723723723724, "grad_norm": 1.2305410113035304, "learning_rate": 2.5068334028044426e-11, "loss": 0.2987, "step": 31940 }, { "epoch": 2.9974662162162162, "grad_norm": 1.1694840450205854, "learning_rate": 2.336929249835418e-11, "loss": 0.319, "step": 31941 }, { "epoch": 2.99756006006006, "grad_norm": 1.0909626474871104, "learning_rate": 2.172986627857476e-11, "loss": 0.285, "step": 31942 }, { "epoch": 2.997653903903904, "grad_norm": 1.1600874420641913, "learning_rate": 2.0150055388135082e-11, "loss": 0.2914, "step": 31943 }, { "epoch": 2.9977477477477477, "grad_norm": 1.1317175980156067, "learning_rate": 1.8629859845353816e-11, "loss": 0.3137, "step": 31944 }, { "epoch": 2.9978415915915915, "grad_norm": 1.2399904613951729, "learning_rate": 1.7169279669659866e-11, "loss": 0.2842, "step": 31945 }, { "epoch": 2.9979354354354353, "grad_norm": 1.1697386205396727, "learning_rate": 1.576831487715147e-11, "loss": 0.3165, "step": 31946 }, { "epoch": 2.9980292792792795, "grad_norm": 1.1927044487121528, "learning_rate": 1.4426965485592192e-11, "loss": 0.3046, "step": 31947 }, { "epoch": 2.998123123123123, "grad_norm": 1.101913613499038, "learning_rate": 1.3145231510525159e-11, "loss": 0.3231, "step": 31948 }, { "epoch": 2.998216966966967, "grad_norm": 1.9151404407930912, "learning_rate": 1.1923112966938377e-11, "loss": 0.2913, "step": 31949 }, { "epoch": 2.998310810810811, "grad_norm": 2.4946451308459054, "learning_rate": 1.0760609869264749e-11, "loss": 0.3325, "step": 31950 }, { "epoch": 2.9984046546546548, "grad_norm": 1.3121662750318321, "learning_rate": 9.657722232492284e-12, "loss": 0.3296, "step": 31951 }, { "epoch": 2.9984984984984986, "grad_norm": 1.1227816490020408, "learning_rate": 8.614450068278325e-12, "loss": 0.3052, "step": 31952 }, { "epoch": 2.9985923423423424, "grad_norm": 1.1938248775026343, "learning_rate": 7.630793390500657e-12, "loss": 0.2986, "step": 31953 }, { "epoch": 2.998686186186186, "grad_norm": 1.186475697820867, "learning_rate": 6.706752209706402e-12, "loss": 0.3002, "step": 31954 }, { "epoch": 2.99878003003003, "grad_norm": 1.1853983243277515, "learning_rate": 5.842326538108012e-12, "loss": 0.3251, "step": 31955 }, { "epoch": 2.998873873873874, "grad_norm": 1.161135791997584, "learning_rate": 5.037516384587271e-12, "loss": 0.2796, "step": 31956 }, { "epoch": 2.9989677177177176, "grad_norm": 4.3210621728922, "learning_rate": 4.292321759691298e-12, "loss": 0.3051, "step": 31957 }, { "epoch": 2.9990615615615615, "grad_norm": 1.0067996813179967, "learning_rate": 3.6067426723018774e-12, "loss": 0.3072, "step": 31958 }, { "epoch": 2.9991554054054053, "grad_norm": 1.0830993019571349, "learning_rate": 2.9807791301905697e-12, "loss": 0.2812, "step": 31959 }, { "epoch": 2.9992492492492495, "grad_norm": 1.3705990804871024, "learning_rate": 2.4144311411289368e-12, "loss": 0.3003, "step": 31960 }, { "epoch": 2.999343093093093, "grad_norm": 1.335498278404132, "learning_rate": 1.9076987117783165e-12, "loss": 0.3426, "step": 31961 }, { "epoch": 2.999436936936937, "grad_norm": 1.2841270946783756, "learning_rate": 1.4605818476898238e-12, "loss": 0.2993, "step": 31962 }, { "epoch": 2.9995307807807805, "grad_norm": 1.142022661535227, "learning_rate": 1.0730805549696854e-12, "loss": 0.2942, "step": 31963 }, { "epoch": 2.9996246246246248, "grad_norm": 1.1481302709669037, "learning_rate": 7.451948380587937e-13, "loss": 0.2985, "step": 31964 }, { "epoch": 2.9997184684684686, "grad_norm": 1.025119523857954, "learning_rate": 4.76924700842929e-13, "loss": 0.3095, "step": 31965 }, { "epoch": 2.9998123123123124, "grad_norm": 1.070499592974392, "learning_rate": 2.682701460976489e-13, "loss": 0.3036, "step": 31966 }, { "epoch": 2.999906156156156, "grad_norm": 0.9753136601854796, "learning_rate": 1.1923117659851102e-13, "loss": 0.3512, "step": 31967 }, { "epoch": 3.0, "grad_norm": 1.0926954536937976, "learning_rate": 2.980779401084988e-14, "loss": 0.2825, "step": 31968 }, { "epoch": 3.0, "step": 31968, "total_flos": 6198603321974784.0, "train_loss": 0.4076699030746867, "train_runtime": 56205.518, "train_samples_per_second": 18.2, "train_steps_per_second": 0.569 } ], "logging_steps": 1.0, "max_steps": 31968, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 4000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6198603321974784.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }