{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 59577, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005035500276952515, "grad_norm": 15.04002122139045, "learning_rate": 1.6784155756965426e-08, "loss": 1.4618, "step": 10 }, { "epoch": 0.001007100055390503, "grad_norm": 20.03343360478744, "learning_rate": 3.356831151393085e-08, "loss": 1.3278, "step": 20 }, { "epoch": 0.0015106500830857545, "grad_norm": 22.64960725427052, "learning_rate": 5.035246727089628e-08, "loss": 1.2944, "step": 30 }, { "epoch": 0.002014200110781006, "grad_norm": 18.83826220119652, "learning_rate": 6.71366230278617e-08, "loss": 1.501, "step": 40 }, { "epoch": 0.002517750138476258, "grad_norm": 11.998411451651606, "learning_rate": 8.392077878482714e-08, "loss": 1.3966, "step": 50 }, { "epoch": 0.003021300166171509, "grad_norm": 21.60448187877111, "learning_rate": 1.0070493454179255e-07, "loss": 1.3034, "step": 60 }, { "epoch": 0.0035248501938667607, "grad_norm": 18.42813984447708, "learning_rate": 1.1748909029875799e-07, "loss": 1.3763, "step": 70 }, { "epoch": 0.004028400221562012, "grad_norm": 25.016201004984683, "learning_rate": 1.342732460557234e-07, "loss": 1.3767, "step": 80 }, { "epoch": 0.004531950249257264, "grad_norm": 12.87906826126312, "learning_rate": 1.5105740181268883e-07, "loss": 1.2307, "step": 90 }, { "epoch": 0.005035500276952516, "grad_norm": 16.70449913229287, "learning_rate": 1.6784155756965428e-07, "loss": 1.2745, "step": 100 }, { "epoch": 0.0055390503046477664, "grad_norm": 14.922843790756291, "learning_rate": 1.8462571332661968e-07, "loss": 1.2724, "step": 110 }, { "epoch": 0.006042600332343018, "grad_norm": 14.182511716775267, "learning_rate": 2.014098690835851e-07, "loss": 1.2128, "step": 120 }, { "epoch": 0.00654615036003827, "grad_norm": 14.509416874321747, "learning_rate": 2.1819402484055053e-07, "loss": 1.1446, "step": 130 }, { "epoch": 0.007049700387733521, "grad_norm": 13.220458597440858, "learning_rate": 2.3497818059751599e-07, "loss": 1.064, "step": 140 }, { "epoch": 0.007553250415428773, "grad_norm": 9.46249518876751, "learning_rate": 2.517623363544814e-07, "loss": 0.8988, "step": 150 }, { "epoch": 0.008056800443124025, "grad_norm": 8.29228264472673, "learning_rate": 2.685464921114468e-07, "loss": 0.8442, "step": 160 }, { "epoch": 0.008560350470819275, "grad_norm": 7.931044950961125, "learning_rate": 2.853306478684122e-07, "loss": 0.8174, "step": 170 }, { "epoch": 0.009063900498514528, "grad_norm": 8.151285006729815, "learning_rate": 3.0211480362537766e-07, "loss": 0.7193, "step": 180 }, { "epoch": 0.009567450526209779, "grad_norm": 8.397842819102214, "learning_rate": 3.1889895938234306e-07, "loss": 0.7477, "step": 190 }, { "epoch": 0.010071000553905031, "grad_norm": 7.2625048137653065, "learning_rate": 3.3568311513930857e-07, "loss": 0.6641, "step": 200 }, { "epoch": 0.010574550581600282, "grad_norm": 8.525620506880731, "learning_rate": 3.5246727089627397e-07, "loss": 0.6266, "step": 210 }, { "epoch": 0.011078100609295533, "grad_norm": 7.136719104407234, "learning_rate": 3.6925142665323937e-07, "loss": 0.6194, "step": 220 }, { "epoch": 0.011581650636990785, "grad_norm": 7.052271426002321, "learning_rate": 3.860355824102048e-07, "loss": 0.6335, "step": 230 }, { "epoch": 0.012085200664686036, "grad_norm": 7.450701338846066, "learning_rate": 4.028197381671702e-07, "loss": 0.599, "step": 240 }, { "epoch": 0.012588750692381289, "grad_norm": 6.795421674728912, "learning_rate": 4.196038939241356e-07, "loss": 0.5932, "step": 250 }, { "epoch": 0.01309230072007654, "grad_norm": 7.03272037815685, "learning_rate": 4.3638804968110107e-07, "loss": 0.5911, "step": 260 }, { "epoch": 0.01359585074777179, "grad_norm": 7.454226292611131, "learning_rate": 4.5317220543806647e-07, "loss": 0.615, "step": 270 }, { "epoch": 0.014099400775467043, "grad_norm": 7.858654633448549, "learning_rate": 4.6995636119503197e-07, "loss": 0.5427, "step": 280 }, { "epoch": 0.014602950803162294, "grad_norm": 9.072123638087367, "learning_rate": 4.867405169519974e-07, "loss": 0.5596, "step": 290 }, { "epoch": 0.015106500830857546, "grad_norm": 7.1765428239257645, "learning_rate": 5.035246727089628e-07, "loss": 0.5829, "step": 300 }, { "epoch": 0.015610050858552797, "grad_norm": 8.741732483374703, "learning_rate": 5.203088284659282e-07, "loss": 0.554, "step": 310 }, { "epoch": 0.01611360088624805, "grad_norm": 6.547811581375519, "learning_rate": 5.370929842228936e-07, "loss": 0.5811, "step": 320 }, { "epoch": 0.0166171509139433, "grad_norm": 8.254607868337139, "learning_rate": 5.53877139979859e-07, "loss": 0.5228, "step": 330 }, { "epoch": 0.01712070094163855, "grad_norm": 6.787812379986818, "learning_rate": 5.706612957368244e-07, "loss": 0.5607, "step": 340 }, { "epoch": 0.017624250969333802, "grad_norm": 6.461081104801505, "learning_rate": 5.874454514937899e-07, "loss": 0.5301, "step": 350 }, { "epoch": 0.018127800997029056, "grad_norm": 5.632058000561634, "learning_rate": 6.042296072507553e-07, "loss": 0.5481, "step": 360 }, { "epoch": 0.018631351024724307, "grad_norm": 8.852707655741465, "learning_rate": 6.210137630077208e-07, "loss": 0.5217, "step": 370 }, { "epoch": 0.019134901052419558, "grad_norm": 6.460172584600625, "learning_rate": 6.377979187646861e-07, "loss": 0.5052, "step": 380 }, { "epoch": 0.01963845108011481, "grad_norm": 6.541120694495611, "learning_rate": 6.545820745216516e-07, "loss": 0.5838, "step": 390 }, { "epoch": 0.020142001107810063, "grad_norm": 7.2800981577718655, "learning_rate": 6.713662302786171e-07, "loss": 0.5327, "step": 400 }, { "epoch": 0.020645551135505313, "grad_norm": 6.852340769470909, "learning_rate": 6.881503860355824e-07, "loss": 0.5197, "step": 410 }, { "epoch": 0.021149101163200564, "grad_norm": 9.177962123811058, "learning_rate": 7.049345417925479e-07, "loss": 0.539, "step": 420 }, { "epoch": 0.021652651190895815, "grad_norm": 6.228183918439739, "learning_rate": 7.217186975495133e-07, "loss": 0.5102, "step": 430 }, { "epoch": 0.022156201218591066, "grad_norm": 6.801902665248208, "learning_rate": 7.385028533064787e-07, "loss": 0.5182, "step": 440 }, { "epoch": 0.02265975124628632, "grad_norm": 5.612294474373128, "learning_rate": 7.552870090634441e-07, "loss": 0.5023, "step": 450 }, { "epoch": 0.02316330127398157, "grad_norm": 6.569247125780635, "learning_rate": 7.720711648204096e-07, "loss": 0.5295, "step": 460 }, { "epoch": 0.02366685130167682, "grad_norm": 5.305675365049888, "learning_rate": 7.888553205773749e-07, "loss": 0.5187, "step": 470 }, { "epoch": 0.024170401329372072, "grad_norm": 7.149105974633733, "learning_rate": 8.056394763343404e-07, "loss": 0.5299, "step": 480 }, { "epoch": 0.024673951357067323, "grad_norm": 5.4652561137125835, "learning_rate": 8.224236320913059e-07, "loss": 0.4913, "step": 490 }, { "epoch": 0.025177501384762577, "grad_norm": 8.092328645331502, "learning_rate": 8.392077878482712e-07, "loss": 0.525, "step": 500 }, { "epoch": 0.025681051412457828, "grad_norm": 6.288739958497919, "learning_rate": 8.559919436052367e-07, "loss": 0.5732, "step": 510 }, { "epoch": 0.02618460144015308, "grad_norm": 6.017897459487346, "learning_rate": 8.727760993622021e-07, "loss": 0.5042, "step": 520 }, { "epoch": 0.02668815146784833, "grad_norm": 7.315898185645534, "learning_rate": 8.895602551191676e-07, "loss": 0.493, "step": 530 }, { "epoch": 0.02719170149554358, "grad_norm": 7.451778554676811, "learning_rate": 9.063444108761329e-07, "loss": 0.4753, "step": 540 }, { "epoch": 0.027695251523238835, "grad_norm": 6.874482620078298, "learning_rate": 9.231285666330984e-07, "loss": 0.541, "step": 550 }, { "epoch": 0.028198801550934086, "grad_norm": 6.797829854465088, "learning_rate": 9.399127223900639e-07, "loss": 0.5047, "step": 560 }, { "epoch": 0.028702351578629336, "grad_norm": 5.705123880158293, "learning_rate": 9.566968781470292e-07, "loss": 0.4869, "step": 570 }, { "epoch": 0.029205901606324587, "grad_norm": 6.495880184091474, "learning_rate": 9.734810339039948e-07, "loss": 0.5324, "step": 580 }, { "epoch": 0.02970945163401984, "grad_norm": 8.11674951043694, "learning_rate": 9.9026518966096e-07, "loss": 0.51, "step": 590 }, { "epoch": 0.030213001661715092, "grad_norm": 6.474055711822199, "learning_rate": 1.0070493454179256e-06, "loss": 0.5126, "step": 600 }, { "epoch": 0.030716551689410343, "grad_norm": 7.321507054134773, "learning_rate": 1.023833501174891e-06, "loss": 0.5289, "step": 610 }, { "epoch": 0.031220101717105594, "grad_norm": 5.652719661152647, "learning_rate": 1.0406176569318564e-06, "loss": 0.5053, "step": 620 }, { "epoch": 0.031723651744800845, "grad_norm": 6.998821063316577, "learning_rate": 1.0574018126888218e-06, "loss": 0.4673, "step": 630 }, { "epoch": 0.0322272017724961, "grad_norm": 6.255183732691073, "learning_rate": 1.0741859684457872e-06, "loss": 0.5083, "step": 640 }, { "epoch": 0.032730751800191346, "grad_norm": 7.175874006380509, "learning_rate": 1.0909701242027529e-06, "loss": 0.5169, "step": 650 }, { "epoch": 0.0332343018278866, "grad_norm": 6.2391553249903104, "learning_rate": 1.107754279959718e-06, "loss": 0.4736, "step": 660 }, { "epoch": 0.033737851855581855, "grad_norm": 6.173889959859614, "learning_rate": 1.1245384357166837e-06, "loss": 0.5042, "step": 670 }, { "epoch": 0.0342414018832771, "grad_norm": 8.577984701122446, "learning_rate": 1.1413225914736488e-06, "loss": 0.4625, "step": 680 }, { "epoch": 0.034744951910972356, "grad_norm": 5.694668375334038, "learning_rate": 1.1581067472306145e-06, "loss": 0.4911, "step": 690 }, { "epoch": 0.035248501938667604, "grad_norm": 6.8962617174526875, "learning_rate": 1.1748909029875799e-06, "loss": 0.5146, "step": 700 }, { "epoch": 0.03575205196636286, "grad_norm": 5.031733340390489, "learning_rate": 1.1916750587445452e-06, "loss": 0.4848, "step": 710 }, { "epoch": 0.03625560199405811, "grad_norm": 7.422888885848551, "learning_rate": 1.2084592145015106e-06, "loss": 0.5367, "step": 720 }, { "epoch": 0.03675915202175336, "grad_norm": 7.2510143903485, "learning_rate": 1.225243370258476e-06, "loss": 0.5121, "step": 730 }, { "epoch": 0.037262702049448614, "grad_norm": 8.059212953191567, "learning_rate": 1.2420275260154417e-06, "loss": 0.4658, "step": 740 }, { "epoch": 0.03776625207714386, "grad_norm": 7.092575110273428, "learning_rate": 1.2588116817724068e-06, "loss": 0.5204, "step": 750 }, { "epoch": 0.038269802104839115, "grad_norm": 5.351178437790541, "learning_rate": 1.2755958375293722e-06, "loss": 0.5029, "step": 760 }, { "epoch": 0.03877335213253437, "grad_norm": 6.58473954614848, "learning_rate": 1.2923799932863379e-06, "loss": 0.5689, "step": 770 }, { "epoch": 0.03927690216022962, "grad_norm": 5.893130887414932, "learning_rate": 1.3091641490433033e-06, "loss": 0.4902, "step": 780 }, { "epoch": 0.03978045218792487, "grad_norm": 5.675700016823575, "learning_rate": 1.3259483048002687e-06, "loss": 0.4784, "step": 790 }, { "epoch": 0.040284002215620125, "grad_norm": 6.398754488860266, "learning_rate": 1.3427324605572343e-06, "loss": 0.51, "step": 800 }, { "epoch": 0.04078755224331537, "grad_norm": 5.039470942643369, "learning_rate": 1.3595166163141997e-06, "loss": 0.4967, "step": 810 }, { "epoch": 0.04129110227101063, "grad_norm": 5.739055328552068, "learning_rate": 1.3763007720711649e-06, "loss": 0.4866, "step": 820 }, { "epoch": 0.041794652298705874, "grad_norm": 5.873597723678773, "learning_rate": 1.3930849278281303e-06, "loss": 0.4968, "step": 830 }, { "epoch": 0.04229820232640113, "grad_norm": 5.053234319469844, "learning_rate": 1.4098690835850959e-06, "loss": 0.5028, "step": 840 }, { "epoch": 0.04280175235409638, "grad_norm": 6.830062482200297, "learning_rate": 1.4266532393420613e-06, "loss": 0.4638, "step": 850 }, { "epoch": 0.04330530238179163, "grad_norm": 8.049316276860297, "learning_rate": 1.4434373950990267e-06, "loss": 0.5499, "step": 860 }, { "epoch": 0.043808852409486884, "grad_norm": 5.889014736153152, "learning_rate": 1.4602215508559918e-06, "loss": 0.4702, "step": 870 }, { "epoch": 0.04431240243718213, "grad_norm": 7.41782735985873, "learning_rate": 1.4770057066129575e-06, "loss": 0.4985, "step": 880 }, { "epoch": 0.044815952464877386, "grad_norm": 6.89415212288187, "learning_rate": 1.4937898623699229e-06, "loss": 0.476, "step": 890 }, { "epoch": 0.04531950249257264, "grad_norm": 4.877264867504682, "learning_rate": 1.5105740181268883e-06, "loss": 0.4662, "step": 900 }, { "epoch": 0.04582305252026789, "grad_norm": 6.392994531470521, "learning_rate": 1.5273581738838539e-06, "loss": 0.481, "step": 910 }, { "epoch": 0.04632660254796314, "grad_norm": 5.720039232784456, "learning_rate": 1.5441423296408193e-06, "loss": 0.4521, "step": 920 }, { "epoch": 0.04683015257565839, "grad_norm": 5.298190707972381, "learning_rate": 1.5609264853977847e-06, "loss": 0.4439, "step": 930 }, { "epoch": 0.04733370260335364, "grad_norm": 6.358096877566834, "learning_rate": 1.5777106411547499e-06, "loss": 0.4966, "step": 940 }, { "epoch": 0.0478372526310489, "grad_norm": 7.915261961899164, "learning_rate": 1.5944947969117155e-06, "loss": 0.4843, "step": 950 }, { "epoch": 0.048340802658744145, "grad_norm": 6.690561583489533, "learning_rate": 1.6112789526686809e-06, "loss": 0.4729, "step": 960 }, { "epoch": 0.0488443526864394, "grad_norm": 6.071945982890583, "learning_rate": 1.6280631084256463e-06, "loss": 0.5112, "step": 970 }, { "epoch": 0.049347902714134646, "grad_norm": 5.317956034812825, "learning_rate": 1.6448472641826119e-06, "loss": 0.4468, "step": 980 }, { "epoch": 0.0498514527418299, "grad_norm": 7.459962878542078, "learning_rate": 1.6616314199395773e-06, "loss": 0.5277, "step": 990 }, { "epoch": 0.050355002769525155, "grad_norm": 6.872654778636845, "learning_rate": 1.6784155756965425e-06, "loss": 0.5478, "step": 1000 }, { "epoch": 0.0508585527972204, "grad_norm": 6.825184531866471, "learning_rate": 1.6951997314535079e-06, "loss": 0.5027, "step": 1010 }, { "epoch": 0.051362102824915656, "grad_norm": 5.916638343571365, "learning_rate": 1.7119838872104735e-06, "loss": 0.4803, "step": 1020 }, { "epoch": 0.051865652852610904, "grad_norm": 7.074107120791568, "learning_rate": 1.7287680429674389e-06, "loss": 0.5403, "step": 1030 }, { "epoch": 0.05236920288030616, "grad_norm": 7.0480109116465135, "learning_rate": 1.7455521987244043e-06, "loss": 0.4814, "step": 1040 }, { "epoch": 0.05287275290800141, "grad_norm": 6.500720362162611, "learning_rate": 1.7623363544813699e-06, "loss": 0.4725, "step": 1050 }, { "epoch": 0.05337630293569666, "grad_norm": 5.258065323386125, "learning_rate": 1.7791205102383353e-06, "loss": 0.4998, "step": 1060 }, { "epoch": 0.053879852963391914, "grad_norm": 5.893463502025486, "learning_rate": 1.7959046659953005e-06, "loss": 0.5285, "step": 1070 }, { "epoch": 0.05438340299108716, "grad_norm": 6.799809068588263, "learning_rate": 1.8126888217522659e-06, "loss": 0.5179, "step": 1080 }, { "epoch": 0.054886953018782415, "grad_norm": 5.648382537635026, "learning_rate": 1.8294729775092315e-06, "loss": 0.4893, "step": 1090 }, { "epoch": 0.05539050304647767, "grad_norm": 6.46391265008981, "learning_rate": 1.8462571332661969e-06, "loss": 0.4521, "step": 1100 }, { "epoch": 0.05589405307417292, "grad_norm": 7.582490733532606, "learning_rate": 1.8630412890231623e-06, "loss": 0.4796, "step": 1110 }, { "epoch": 0.05639760310186817, "grad_norm": 5.917586475890956, "learning_rate": 1.8798254447801279e-06, "loss": 0.4725, "step": 1120 }, { "epoch": 0.056901153129563425, "grad_norm": 5.50683254295277, "learning_rate": 1.8966096005370933e-06, "loss": 0.477, "step": 1130 }, { "epoch": 0.05740470315725867, "grad_norm": 9.234378127213672, "learning_rate": 1.9133937562940585e-06, "loss": 0.5117, "step": 1140 }, { "epoch": 0.05790825318495393, "grad_norm": 5.190339558809401, "learning_rate": 1.930177912051024e-06, "loss": 0.434, "step": 1150 }, { "epoch": 0.058411803212649174, "grad_norm": 6.9184104293961886, "learning_rate": 1.9469620678079897e-06, "loss": 0.4967, "step": 1160 }, { "epoch": 0.05891535324034443, "grad_norm": 6.628267512466611, "learning_rate": 1.963746223564955e-06, "loss": 0.4761, "step": 1170 }, { "epoch": 0.05941890326803968, "grad_norm": 6.119615052240484, "learning_rate": 1.98053037932192e-06, "loss": 0.5, "step": 1180 }, { "epoch": 0.05992245329573493, "grad_norm": 6.141467383349594, "learning_rate": 1.9973145350788857e-06, "loss": 0.4601, "step": 1190 }, { "epoch": 0.060426003323430184, "grad_norm": 5.5566154570018655, "learning_rate": 2.0140986908358513e-06, "loss": 0.484, "step": 1200 }, { "epoch": 0.06092955335112543, "grad_norm": 5.974689539950695, "learning_rate": 2.0308828465928165e-06, "loss": 0.4868, "step": 1210 }, { "epoch": 0.061433103378820686, "grad_norm": 5.766549049884064, "learning_rate": 2.047667002349782e-06, "loss": 0.4672, "step": 1220 }, { "epoch": 0.06193665340651594, "grad_norm": 5.94052164277274, "learning_rate": 2.0644511581067477e-06, "loss": 0.4749, "step": 1230 }, { "epoch": 0.06244020343421119, "grad_norm": 7.201931145199051, "learning_rate": 2.081235313863713e-06, "loss": 0.4796, "step": 1240 }, { "epoch": 0.06294375346190643, "grad_norm": 6.807635275637123, "learning_rate": 2.098019469620678e-06, "loss": 0.5427, "step": 1250 }, { "epoch": 0.06344730348960169, "grad_norm": 6.732397107773614, "learning_rate": 2.1148036253776437e-06, "loss": 0.4802, "step": 1260 }, { "epoch": 0.06395085351729694, "grad_norm": 7.133160988797256, "learning_rate": 2.1315877811346093e-06, "loss": 0.4716, "step": 1270 }, { "epoch": 0.0644544035449922, "grad_norm": 6.356446789835723, "learning_rate": 2.1483719368915745e-06, "loss": 0.4695, "step": 1280 }, { "epoch": 0.06495795357268745, "grad_norm": 6.091474954956075, "learning_rate": 2.1651560926485397e-06, "loss": 0.4286, "step": 1290 }, { "epoch": 0.06546150360038269, "grad_norm": 5.215594241152414, "learning_rate": 2.1819402484055057e-06, "loss": 0.5043, "step": 1300 }, { "epoch": 0.06596505362807795, "grad_norm": 6.954731328100331, "learning_rate": 2.198724404162471e-06, "loss": 0.4348, "step": 1310 }, { "epoch": 0.0664686036557732, "grad_norm": 7.08032691298512, "learning_rate": 2.215508559919436e-06, "loss": 0.449, "step": 1320 }, { "epoch": 0.06697215368346845, "grad_norm": 7.440227227414681, "learning_rate": 2.2322927156764017e-06, "loss": 0.4903, "step": 1330 }, { "epoch": 0.06747570371116371, "grad_norm": 6.0439693773864835, "learning_rate": 2.2490768714333673e-06, "loss": 0.4675, "step": 1340 }, { "epoch": 0.06797925373885895, "grad_norm": 6.161956193083967, "learning_rate": 2.2658610271903325e-06, "loss": 0.4843, "step": 1350 }, { "epoch": 0.0684828037665542, "grad_norm": 6.686274883060637, "learning_rate": 2.2826451829472977e-06, "loss": 0.5059, "step": 1360 }, { "epoch": 0.06898635379424946, "grad_norm": 5.883860937670313, "learning_rate": 2.2994293387042633e-06, "loss": 0.5144, "step": 1370 }, { "epoch": 0.06948990382194471, "grad_norm": 4.84206695699211, "learning_rate": 2.316213494461229e-06, "loss": 0.4458, "step": 1380 }, { "epoch": 0.06999345384963997, "grad_norm": 6.9650657631524755, "learning_rate": 2.332997650218194e-06, "loss": 0.4748, "step": 1390 }, { "epoch": 0.07049700387733521, "grad_norm": 6.882044486815148, "learning_rate": 2.3497818059751597e-06, "loss": 0.5081, "step": 1400 }, { "epoch": 0.07100055390503046, "grad_norm": 6.248459166269425, "learning_rate": 2.3665659617321253e-06, "loss": 0.4726, "step": 1410 }, { "epoch": 0.07150410393272572, "grad_norm": 6.353865926752929, "learning_rate": 2.3833501174890905e-06, "loss": 0.5201, "step": 1420 }, { "epoch": 0.07200765396042097, "grad_norm": 7.757050071050892, "learning_rate": 2.4001342732460557e-06, "loss": 0.5441, "step": 1430 }, { "epoch": 0.07251120398811622, "grad_norm": 5.457745942821872, "learning_rate": 2.4169184290030213e-06, "loss": 0.4792, "step": 1440 }, { "epoch": 0.07301475401581146, "grad_norm": 6.6555397464404376, "learning_rate": 2.433702584759987e-06, "loss": 0.4858, "step": 1450 }, { "epoch": 0.07351830404350672, "grad_norm": 5.430672112131166, "learning_rate": 2.450486740516952e-06, "loss": 0.4399, "step": 1460 }, { "epoch": 0.07402185407120197, "grad_norm": 7.900509749019717, "learning_rate": 2.4672708962739177e-06, "loss": 0.4709, "step": 1470 }, { "epoch": 0.07452540409889723, "grad_norm": 5.453747466627823, "learning_rate": 2.4840550520308833e-06, "loss": 0.4901, "step": 1480 }, { "epoch": 0.07502895412659248, "grad_norm": 7.45838305173497, "learning_rate": 2.5008392077878485e-06, "loss": 0.4768, "step": 1490 }, { "epoch": 0.07553250415428772, "grad_norm": 7.35578815743402, "learning_rate": 2.5176233635448137e-06, "loss": 0.5141, "step": 1500 }, { "epoch": 0.07603605418198298, "grad_norm": 6.987699732878445, "learning_rate": 2.5344075193017793e-06, "loss": 0.5162, "step": 1510 }, { "epoch": 0.07653960420967823, "grad_norm": 7.234680831729561, "learning_rate": 2.5511916750587445e-06, "loss": 0.4942, "step": 1520 }, { "epoch": 0.07704315423737348, "grad_norm": 7.54534000283757, "learning_rate": 2.5679758308157105e-06, "loss": 0.4984, "step": 1530 }, { "epoch": 0.07754670426506874, "grad_norm": 5.465863928315474, "learning_rate": 2.5847599865726757e-06, "loss": 0.4635, "step": 1540 }, { "epoch": 0.07805025429276398, "grad_norm": 6.423920047178516, "learning_rate": 2.6015441423296413e-06, "loss": 0.4964, "step": 1550 }, { "epoch": 0.07855380432045923, "grad_norm": 6.9603315460532995, "learning_rate": 2.6183282980866065e-06, "loss": 0.4651, "step": 1560 }, { "epoch": 0.07905735434815449, "grad_norm": 6.79517607897699, "learning_rate": 2.6351124538435717e-06, "loss": 0.4764, "step": 1570 }, { "epoch": 0.07956090437584974, "grad_norm": 6.035934189269749, "learning_rate": 2.6518966096005373e-06, "loss": 0.4667, "step": 1580 }, { "epoch": 0.080064454403545, "grad_norm": 5.3254715270975534, "learning_rate": 2.6686807653575025e-06, "loss": 0.4395, "step": 1590 }, { "epoch": 0.08056800443124025, "grad_norm": 5.148333406784627, "learning_rate": 2.6854649211144685e-06, "loss": 0.4682, "step": 1600 }, { "epoch": 0.08107155445893549, "grad_norm": 5.5486527385444875, "learning_rate": 2.7022490768714337e-06, "loss": 0.4484, "step": 1610 }, { "epoch": 0.08157510448663075, "grad_norm": 5.812916102054217, "learning_rate": 2.7190332326283993e-06, "loss": 0.4746, "step": 1620 }, { "epoch": 0.082078654514326, "grad_norm": 7.750938247136075, "learning_rate": 2.7358173883853645e-06, "loss": 0.4422, "step": 1630 }, { "epoch": 0.08258220454202125, "grad_norm": 8.047620812785542, "learning_rate": 2.7526015441423297e-06, "loss": 0.4572, "step": 1640 }, { "epoch": 0.08308575456971651, "grad_norm": 5.842162949505981, "learning_rate": 2.7693856998992953e-06, "loss": 0.5087, "step": 1650 }, { "epoch": 0.08358930459741175, "grad_norm": 6.341460054085477, "learning_rate": 2.7861698556562605e-06, "loss": 0.4671, "step": 1660 }, { "epoch": 0.084092854625107, "grad_norm": 5.595576981838117, "learning_rate": 2.8029540114132257e-06, "loss": 0.4775, "step": 1670 }, { "epoch": 0.08459640465280226, "grad_norm": 6.778307296639348, "learning_rate": 2.8197381671701917e-06, "loss": 0.4672, "step": 1680 }, { "epoch": 0.08509995468049751, "grad_norm": 7.1428165827343255, "learning_rate": 2.836522322927157e-06, "loss": 0.5291, "step": 1690 }, { "epoch": 0.08560350470819277, "grad_norm": 6.163696247183399, "learning_rate": 2.8533064786841225e-06, "loss": 0.4704, "step": 1700 }, { "epoch": 0.086107054735888, "grad_norm": 7.487580443046125, "learning_rate": 2.8700906344410877e-06, "loss": 0.4562, "step": 1710 }, { "epoch": 0.08661060476358326, "grad_norm": 5.771289220995201, "learning_rate": 2.8868747901980533e-06, "loss": 0.488, "step": 1720 }, { "epoch": 0.08711415479127851, "grad_norm": 7.033752026521323, "learning_rate": 2.9036589459550185e-06, "loss": 0.4624, "step": 1730 }, { "epoch": 0.08761770481897377, "grad_norm": 8.378695528837476, "learning_rate": 2.9204431017119837e-06, "loss": 0.4877, "step": 1740 }, { "epoch": 0.08812125484666902, "grad_norm": 6.845903634350184, "learning_rate": 2.9372272574689497e-06, "loss": 0.4898, "step": 1750 }, { "epoch": 0.08862480487436426, "grad_norm": 5.957505021351809, "learning_rate": 2.954011413225915e-06, "loss": 0.4703, "step": 1760 }, { "epoch": 0.08912835490205952, "grad_norm": 7.065611775976185, "learning_rate": 2.9707955689828805e-06, "loss": 0.53, "step": 1770 }, { "epoch": 0.08963190492975477, "grad_norm": 6.606788686022319, "learning_rate": 2.9875797247398457e-06, "loss": 0.5094, "step": 1780 }, { "epoch": 0.09013545495745003, "grad_norm": 6.519026707701169, "learning_rate": 3.0043638804968113e-06, "loss": 0.4319, "step": 1790 }, { "epoch": 0.09063900498514528, "grad_norm": 6.849768355958901, "learning_rate": 3.0211480362537765e-06, "loss": 0.4804, "step": 1800 }, { "epoch": 0.09114255501284052, "grad_norm": 6.645505907304642, "learning_rate": 3.0379321920107417e-06, "loss": 0.5071, "step": 1810 }, { "epoch": 0.09164610504053577, "grad_norm": 5.841663193218122, "learning_rate": 3.0547163477677077e-06, "loss": 0.5263, "step": 1820 }, { "epoch": 0.09214965506823103, "grad_norm": 6.642462935649846, "learning_rate": 3.071500503524673e-06, "loss": 0.474, "step": 1830 }, { "epoch": 0.09265320509592628, "grad_norm": 5.63364523586092, "learning_rate": 3.0882846592816385e-06, "loss": 0.4986, "step": 1840 }, { "epoch": 0.09315675512362154, "grad_norm": 4.385457996175736, "learning_rate": 3.1050688150386037e-06, "loss": 0.4619, "step": 1850 }, { "epoch": 0.09366030515131678, "grad_norm": 6.334380385399969, "learning_rate": 3.1218529707955693e-06, "loss": 0.4673, "step": 1860 }, { "epoch": 0.09416385517901203, "grad_norm": 5.328513732912958, "learning_rate": 3.1386371265525345e-06, "loss": 0.4741, "step": 1870 }, { "epoch": 0.09466740520670729, "grad_norm": 5.96744289991445, "learning_rate": 3.1554212823094997e-06, "loss": 0.4686, "step": 1880 }, { "epoch": 0.09517095523440254, "grad_norm": 5.696046034405557, "learning_rate": 3.1722054380664657e-06, "loss": 0.5064, "step": 1890 }, { "epoch": 0.0956745052620978, "grad_norm": 5.700230796283194, "learning_rate": 3.188989593823431e-06, "loss": 0.4746, "step": 1900 }, { "epoch": 0.09617805528979304, "grad_norm": 5.185143839062262, "learning_rate": 3.2057737495803965e-06, "loss": 0.5099, "step": 1910 }, { "epoch": 0.09668160531748829, "grad_norm": 7.471422868428764, "learning_rate": 3.2225579053373617e-06, "loss": 0.5129, "step": 1920 }, { "epoch": 0.09718515534518354, "grad_norm": 6.552263736734854, "learning_rate": 3.2393420610943273e-06, "loss": 0.515, "step": 1930 }, { "epoch": 0.0976887053728788, "grad_norm": 6.957028248915917, "learning_rate": 3.2561262168512925e-06, "loss": 0.4747, "step": 1940 }, { "epoch": 0.09819225540057405, "grad_norm": 6.3711375603656135, "learning_rate": 3.2729103726082577e-06, "loss": 0.4988, "step": 1950 }, { "epoch": 0.09869580542826929, "grad_norm": 5.863350875368451, "learning_rate": 3.2896945283652238e-06, "loss": 0.4763, "step": 1960 }, { "epoch": 0.09919935545596455, "grad_norm": 5.110001309887617, "learning_rate": 3.306478684122189e-06, "loss": 0.5078, "step": 1970 }, { "epoch": 0.0997029054836598, "grad_norm": 7.730717050939946, "learning_rate": 3.3232628398791546e-06, "loss": 0.5199, "step": 1980 }, { "epoch": 0.10020645551135506, "grad_norm": 5.893579496600908, "learning_rate": 3.3400469956361197e-06, "loss": 0.4996, "step": 1990 }, { "epoch": 0.10071000553905031, "grad_norm": 6.038037185302695, "learning_rate": 3.356831151393085e-06, "loss": 0.4792, "step": 2000 }, { "epoch": 0.10121355556674555, "grad_norm": 6.5254947453293415, "learning_rate": 3.3736153071500505e-06, "loss": 0.5065, "step": 2010 }, { "epoch": 0.1017171055944408, "grad_norm": 7.0163987570602195, "learning_rate": 3.3903994629070157e-06, "loss": 0.4473, "step": 2020 }, { "epoch": 0.10222065562213606, "grad_norm": 5.360988598703461, "learning_rate": 3.4071836186639818e-06, "loss": 0.5095, "step": 2030 }, { "epoch": 0.10272420564983131, "grad_norm": 6.892259306816139, "learning_rate": 3.423967774420947e-06, "loss": 0.5085, "step": 2040 }, { "epoch": 0.10322775567752657, "grad_norm": 6.84721429326687, "learning_rate": 3.4407519301779126e-06, "loss": 0.5136, "step": 2050 }, { "epoch": 0.10373130570522181, "grad_norm": 6.338757274971814, "learning_rate": 3.4575360859348777e-06, "loss": 0.4846, "step": 2060 }, { "epoch": 0.10423485573291706, "grad_norm": 7.551632359607348, "learning_rate": 3.474320241691843e-06, "loss": 0.5488, "step": 2070 }, { "epoch": 0.10473840576061232, "grad_norm": 6.08297022396799, "learning_rate": 3.4911043974488085e-06, "loss": 0.5106, "step": 2080 }, { "epoch": 0.10524195578830757, "grad_norm": 5.287669318495262, "learning_rate": 3.5078885532057737e-06, "loss": 0.4523, "step": 2090 }, { "epoch": 0.10574550581600282, "grad_norm": 5.948570913144505, "learning_rate": 3.5246727089627398e-06, "loss": 0.5056, "step": 2100 }, { "epoch": 0.10624905584369806, "grad_norm": 5.1153176449997595, "learning_rate": 3.541456864719705e-06, "loss": 0.4831, "step": 2110 }, { "epoch": 0.10675260587139332, "grad_norm": 7.086696361579646, "learning_rate": 3.5582410204766706e-06, "loss": 0.4727, "step": 2120 }, { "epoch": 0.10725615589908857, "grad_norm": 6.7495573501765325, "learning_rate": 3.5750251762336357e-06, "loss": 0.4883, "step": 2130 }, { "epoch": 0.10775970592678383, "grad_norm": 6.2815215462590475, "learning_rate": 3.591809331990601e-06, "loss": 0.5566, "step": 2140 }, { "epoch": 0.10826325595447908, "grad_norm": 6.575702080080099, "learning_rate": 3.6085934877475665e-06, "loss": 0.5114, "step": 2150 }, { "epoch": 0.10876680598217432, "grad_norm": 5.720212303863932, "learning_rate": 3.6253776435045317e-06, "loss": 0.4478, "step": 2160 }, { "epoch": 0.10927035600986958, "grad_norm": 6.637638210540649, "learning_rate": 3.6421617992614978e-06, "loss": 0.4684, "step": 2170 }, { "epoch": 0.10977390603756483, "grad_norm": 6.372993978760531, "learning_rate": 3.658945955018463e-06, "loss": 0.5288, "step": 2180 }, { "epoch": 0.11027745606526008, "grad_norm": 5.583079297999234, "learning_rate": 3.6757301107754286e-06, "loss": 0.484, "step": 2190 }, { "epoch": 0.11078100609295534, "grad_norm": 6.808654071533685, "learning_rate": 3.6925142665323938e-06, "loss": 0.5162, "step": 2200 }, { "epoch": 0.1112845561206506, "grad_norm": 6.01401682898285, "learning_rate": 3.709298422289359e-06, "loss": 0.5252, "step": 2210 }, { "epoch": 0.11178810614834583, "grad_norm": 6.789928740846553, "learning_rate": 3.7260825780463246e-06, "loss": 0.5267, "step": 2220 }, { "epoch": 0.11229165617604109, "grad_norm": 5.39250870854594, "learning_rate": 3.7428667338032897e-06, "loss": 0.5261, "step": 2230 }, { "epoch": 0.11279520620373634, "grad_norm": 6.619126732401093, "learning_rate": 3.7596508895602558e-06, "loss": 0.5064, "step": 2240 }, { "epoch": 0.1132987562314316, "grad_norm": 6.597777267942098, "learning_rate": 3.776435045317221e-06, "loss": 0.4893, "step": 2250 }, { "epoch": 0.11380230625912685, "grad_norm": 6.440554916590027, "learning_rate": 3.7932192010741866e-06, "loss": 0.505, "step": 2260 }, { "epoch": 0.11430585628682209, "grad_norm": 7.330126922839561, "learning_rate": 3.8100033568311518e-06, "loss": 0.5227, "step": 2270 }, { "epoch": 0.11480940631451735, "grad_norm": 4.341660125837327, "learning_rate": 3.826787512588117e-06, "loss": 0.4819, "step": 2280 }, { "epoch": 0.1153129563422126, "grad_norm": 5.1728953904589074, "learning_rate": 3.843571668345082e-06, "loss": 0.4601, "step": 2290 }, { "epoch": 0.11581650636990785, "grad_norm": 6.476002272764356, "learning_rate": 3.860355824102048e-06, "loss": 0.4954, "step": 2300 }, { "epoch": 0.11632005639760311, "grad_norm": 6.164976594412468, "learning_rate": 3.877139979859013e-06, "loss": 0.4735, "step": 2310 }, { "epoch": 0.11682360642529835, "grad_norm": 6.751836097450498, "learning_rate": 3.893924135615979e-06, "loss": 0.5458, "step": 2320 }, { "epoch": 0.1173271564529936, "grad_norm": 4.931485342927508, "learning_rate": 3.910708291372945e-06, "loss": 0.4578, "step": 2330 }, { "epoch": 0.11783070648068886, "grad_norm": 6.412423602105977, "learning_rate": 3.92749244712991e-06, "loss": 0.4874, "step": 2340 }, { "epoch": 0.11833425650838411, "grad_norm": 5.430042863600796, "learning_rate": 3.944276602886875e-06, "loss": 0.4843, "step": 2350 }, { "epoch": 0.11883780653607937, "grad_norm": 5.578697223795991, "learning_rate": 3.96106075864384e-06, "loss": 0.5033, "step": 2360 }, { "epoch": 0.1193413565637746, "grad_norm": 4.7205473810962335, "learning_rate": 3.977844914400806e-06, "loss": 0.5022, "step": 2370 }, { "epoch": 0.11984490659146986, "grad_norm": 5.976181680846597, "learning_rate": 3.994629070157771e-06, "loss": 0.5116, "step": 2380 }, { "epoch": 0.12034845661916511, "grad_norm": 5.518395105911466, "learning_rate": 4.011413225914737e-06, "loss": 0.507, "step": 2390 }, { "epoch": 0.12085200664686037, "grad_norm": 6.098699227048054, "learning_rate": 4.028197381671703e-06, "loss": 0.4695, "step": 2400 }, { "epoch": 0.12135555667455562, "grad_norm": 6.95490794362938, "learning_rate": 4.044981537428668e-06, "loss": 0.5091, "step": 2410 }, { "epoch": 0.12185910670225086, "grad_norm": 6.2714642844573625, "learning_rate": 4.061765693185633e-06, "loss": 0.4878, "step": 2420 }, { "epoch": 0.12236265672994612, "grad_norm": 5.205602083228572, "learning_rate": 4.078549848942598e-06, "loss": 0.4755, "step": 2430 }, { "epoch": 0.12286620675764137, "grad_norm": 7.082755114376588, "learning_rate": 4.095334004699564e-06, "loss": 0.5267, "step": 2440 }, { "epoch": 0.12336975678533663, "grad_norm": 4.854384417477672, "learning_rate": 4.112118160456529e-06, "loss": 0.455, "step": 2450 }, { "epoch": 0.12387330681303188, "grad_norm": 5.51137559660665, "learning_rate": 4.128902316213495e-06, "loss": 0.5026, "step": 2460 }, { "epoch": 0.12437685684072712, "grad_norm": 6.159461352905283, "learning_rate": 4.145686471970461e-06, "loss": 0.5, "step": 2470 }, { "epoch": 0.12488040686842238, "grad_norm": 5.929838108483034, "learning_rate": 4.162470627727426e-06, "loss": 0.5008, "step": 2480 }, { "epoch": 0.12538395689611762, "grad_norm": 6.31381416743044, "learning_rate": 4.179254783484391e-06, "loss": 0.4652, "step": 2490 }, { "epoch": 0.12588750692381287, "grad_norm": 6.186804473794271, "learning_rate": 4.196038939241356e-06, "loss": 0.518, "step": 2500 }, { "epoch": 0.12639105695150812, "grad_norm": 5.575153921082633, "learning_rate": 4.212823094998321e-06, "loss": 0.5031, "step": 2510 }, { "epoch": 0.12689460697920338, "grad_norm": 5.864316507368532, "learning_rate": 4.229607250755287e-06, "loss": 0.499, "step": 2520 }, { "epoch": 0.12739815700689863, "grad_norm": 6.9758099904635005, "learning_rate": 4.246391406512253e-06, "loss": 0.4666, "step": 2530 }, { "epoch": 0.1279017070345939, "grad_norm": 6.288701868150937, "learning_rate": 4.263175562269219e-06, "loss": 0.5182, "step": 2540 }, { "epoch": 0.12840525706228914, "grad_norm": 5.445073357982004, "learning_rate": 4.279959718026184e-06, "loss": 0.5791, "step": 2550 }, { "epoch": 0.1289088070899844, "grad_norm": 6.162390823975325, "learning_rate": 4.296743873783149e-06, "loss": 0.5389, "step": 2560 }, { "epoch": 0.12941235711767965, "grad_norm": 7.303902283778273, "learning_rate": 4.313528029540114e-06, "loss": 0.4915, "step": 2570 }, { "epoch": 0.1299159071453749, "grad_norm": 7.529994808342196, "learning_rate": 4.330312185297079e-06, "loss": 0.4632, "step": 2580 }, { "epoch": 0.13041945717307016, "grad_norm": 5.563489134533866, "learning_rate": 4.347096341054045e-06, "loss": 0.5034, "step": 2590 }, { "epoch": 0.13092300720076538, "grad_norm": 7.293882058083905, "learning_rate": 4.363880496811011e-06, "loss": 0.4686, "step": 2600 }, { "epoch": 0.13142655722846064, "grad_norm": 6.61223918433769, "learning_rate": 4.380664652567977e-06, "loss": 0.5287, "step": 2610 }, { "epoch": 0.1319301072561559, "grad_norm": 6.460929554294699, "learning_rate": 4.397448808324942e-06, "loss": 0.5229, "step": 2620 }, { "epoch": 0.13243365728385115, "grad_norm": 5.282644081465679, "learning_rate": 4.414232964081907e-06, "loss": 0.5, "step": 2630 }, { "epoch": 0.1329372073115464, "grad_norm": 6.5085498139656695, "learning_rate": 4.431017119838872e-06, "loss": 0.4871, "step": 2640 }, { "epoch": 0.13344075733924166, "grad_norm": 6.33708852761405, "learning_rate": 4.447801275595837e-06, "loss": 0.5005, "step": 2650 }, { "epoch": 0.1339443073669369, "grad_norm": 5.4529491542784845, "learning_rate": 4.464585431352803e-06, "loss": 0.4575, "step": 2660 }, { "epoch": 0.13444785739463216, "grad_norm": 5.881054956164382, "learning_rate": 4.481369587109769e-06, "loss": 0.494, "step": 2670 }, { "epoch": 0.13495140742232742, "grad_norm": 5.918947662957121, "learning_rate": 4.498153742866735e-06, "loss": 0.5657, "step": 2680 }, { "epoch": 0.13545495745002267, "grad_norm": 5.765878098304069, "learning_rate": 4.5149378986237e-06, "loss": 0.5191, "step": 2690 }, { "epoch": 0.1359585074777179, "grad_norm": 5.938465496673458, "learning_rate": 4.531722054380665e-06, "loss": 0.4961, "step": 2700 }, { "epoch": 0.13646205750541315, "grad_norm": 6.386954414998637, "learning_rate": 4.54850621013763e-06, "loss": 0.5145, "step": 2710 }, { "epoch": 0.1369656075331084, "grad_norm": 5.059259565899698, "learning_rate": 4.565290365894595e-06, "loss": 0.501, "step": 2720 }, { "epoch": 0.13746915756080366, "grad_norm": 6.114702591941518, "learning_rate": 4.582074521651561e-06, "loss": 0.5111, "step": 2730 }, { "epoch": 0.13797270758849892, "grad_norm": 7.583805448119571, "learning_rate": 4.598858677408527e-06, "loss": 0.4618, "step": 2740 }, { "epoch": 0.13847625761619417, "grad_norm": 6.8081460070630575, "learning_rate": 4.615642833165493e-06, "loss": 0.5129, "step": 2750 }, { "epoch": 0.13897980764388942, "grad_norm": 6.490179283478047, "learning_rate": 4.632426988922458e-06, "loss": 0.4815, "step": 2760 }, { "epoch": 0.13948335767158468, "grad_norm": 5.235176838334416, "learning_rate": 4.649211144679423e-06, "loss": 0.4941, "step": 2770 }, { "epoch": 0.13998690769927993, "grad_norm": 7.547045434296421, "learning_rate": 4.665995300436388e-06, "loss": 0.5435, "step": 2780 }, { "epoch": 0.1404904577269752, "grad_norm": 5.762550680468929, "learning_rate": 4.682779456193353e-06, "loss": 0.4736, "step": 2790 }, { "epoch": 0.14099400775467041, "grad_norm": 6.391666455457382, "learning_rate": 4.699563611950319e-06, "loss": 0.4998, "step": 2800 }, { "epoch": 0.14149755778236567, "grad_norm": 5.63906992008094, "learning_rate": 4.716347767707285e-06, "loss": 0.5439, "step": 2810 }, { "epoch": 0.14200110781006092, "grad_norm": 7.174082583120372, "learning_rate": 4.733131923464251e-06, "loss": 0.5062, "step": 2820 }, { "epoch": 0.14250465783775618, "grad_norm": 6.208208062920523, "learning_rate": 4.749916079221216e-06, "loss": 0.5015, "step": 2830 }, { "epoch": 0.14300820786545143, "grad_norm": 5.71717190100412, "learning_rate": 4.766700234978181e-06, "loss": 0.5244, "step": 2840 }, { "epoch": 0.14351175789314669, "grad_norm": 6.353332791961435, "learning_rate": 4.783484390735146e-06, "loss": 0.4679, "step": 2850 }, { "epoch": 0.14401530792084194, "grad_norm": 7.650626266959237, "learning_rate": 4.800268546492111e-06, "loss": 0.5071, "step": 2860 }, { "epoch": 0.1445188579485372, "grad_norm": 6.34234456239508, "learning_rate": 4.817052702249077e-06, "loss": 0.457, "step": 2870 }, { "epoch": 0.14502240797623245, "grad_norm": 7.064631148089866, "learning_rate": 4.833836858006043e-06, "loss": 0.509, "step": 2880 }, { "epoch": 0.1455259580039277, "grad_norm": 5.85938714533859, "learning_rate": 4.850621013763009e-06, "loss": 0.4764, "step": 2890 }, { "epoch": 0.14602950803162293, "grad_norm": 6.177329916714666, "learning_rate": 4.867405169519974e-06, "loss": 0.5167, "step": 2900 }, { "epoch": 0.14653305805931818, "grad_norm": 5.814991403804744, "learning_rate": 4.884189325276939e-06, "loss": 0.5323, "step": 2910 }, { "epoch": 0.14703660808701344, "grad_norm": 6.358956470939861, "learning_rate": 4.900973481033904e-06, "loss": 0.5278, "step": 2920 }, { "epoch": 0.1475401581147087, "grad_norm": 5.968472179326986, "learning_rate": 4.917757636790869e-06, "loss": 0.5044, "step": 2930 }, { "epoch": 0.14804370814240395, "grad_norm": 7.943124576082888, "learning_rate": 4.934541792547835e-06, "loss": 0.5346, "step": 2940 }, { "epoch": 0.1485472581700992, "grad_norm": 5.408542501284775, "learning_rate": 4.951325948304801e-06, "loss": 0.4711, "step": 2950 }, { "epoch": 0.14905080819779445, "grad_norm": 5.167015799180836, "learning_rate": 4.968110104061767e-06, "loss": 0.474, "step": 2960 }, { "epoch": 0.1495543582254897, "grad_norm": 6.038224878663466, "learning_rate": 4.984894259818732e-06, "loss": 0.5104, "step": 2970 }, { "epoch": 0.15005790825318496, "grad_norm": 5.127967640689812, "learning_rate": 5.001678415575697e-06, "loss": 0.4733, "step": 2980 }, { "epoch": 0.15056145828088022, "grad_norm": 5.026902316228042, "learning_rate": 5.018462571332663e-06, "loss": 0.4659, "step": 2990 }, { "epoch": 0.15106500830857544, "grad_norm": 5.531587960490175, "learning_rate": 5.035246727089627e-06, "loss": 0.493, "step": 3000 }, { "epoch": 0.1515685583362707, "grad_norm": 6.1284240626962045, "learning_rate": 5.052030882846593e-06, "loss": 0.4939, "step": 3010 }, { "epoch": 0.15207210836396595, "grad_norm": 5.005701932453974, "learning_rate": 5.068815038603559e-06, "loss": 0.5526, "step": 3020 }, { "epoch": 0.1525756583916612, "grad_norm": 6.381892770518012, "learning_rate": 5.085599194360525e-06, "loss": 0.5415, "step": 3030 }, { "epoch": 0.15307920841935646, "grad_norm": 7.010595904767301, "learning_rate": 5.102383350117489e-06, "loss": 0.4893, "step": 3040 }, { "epoch": 0.15358275844705171, "grad_norm": 5.212570495186879, "learning_rate": 5.119167505874455e-06, "loss": 0.5278, "step": 3050 }, { "epoch": 0.15408630847474697, "grad_norm": 5.983651666861901, "learning_rate": 5.135951661631421e-06, "loss": 0.5173, "step": 3060 }, { "epoch": 0.15458985850244222, "grad_norm": 5.165265378776682, "learning_rate": 5.152735817388385e-06, "loss": 0.5572, "step": 3070 }, { "epoch": 0.15509340853013748, "grad_norm": 5.464228291516905, "learning_rate": 5.1695199731453514e-06, "loss": 0.4872, "step": 3080 }, { "epoch": 0.15559695855783273, "grad_norm": 5.5401379982741545, "learning_rate": 5.186304128902317e-06, "loss": 0.4674, "step": 3090 }, { "epoch": 0.15610050858552796, "grad_norm": 6.262702185641731, "learning_rate": 5.203088284659283e-06, "loss": 0.5359, "step": 3100 }, { "epoch": 0.1566040586132232, "grad_norm": 6.06122841447641, "learning_rate": 5.219872440416247e-06, "loss": 0.5194, "step": 3110 }, { "epoch": 0.15710760864091847, "grad_norm": 6.29787083761567, "learning_rate": 5.236656596173213e-06, "loss": 0.5506, "step": 3120 }, { "epoch": 0.15761115866861372, "grad_norm": 6.145747577076409, "learning_rate": 5.253440751930179e-06, "loss": 0.4923, "step": 3130 }, { "epoch": 0.15811470869630898, "grad_norm": 6.928751115269542, "learning_rate": 5.270224907687143e-06, "loss": 0.4934, "step": 3140 }, { "epoch": 0.15861825872400423, "grad_norm": 7.362515575108414, "learning_rate": 5.2870090634441094e-06, "loss": 0.4847, "step": 3150 }, { "epoch": 0.15912180875169948, "grad_norm": 5.454499167351114, "learning_rate": 5.303793219201075e-06, "loss": 0.4762, "step": 3160 }, { "epoch": 0.15962535877939474, "grad_norm": 5.762786241423695, "learning_rate": 5.320577374958041e-06, "loss": 0.4795, "step": 3170 }, { "epoch": 0.16012890880709, "grad_norm": 5.8675068941652855, "learning_rate": 5.337361530715005e-06, "loss": 0.5592, "step": 3180 }, { "epoch": 0.16063245883478525, "grad_norm": 5.4338373007524075, "learning_rate": 5.354145686471971e-06, "loss": 0.5435, "step": 3190 }, { "epoch": 0.1611360088624805, "grad_norm": 5.601821976476267, "learning_rate": 5.370929842228937e-06, "loss": 0.5086, "step": 3200 }, { "epoch": 0.16163955889017573, "grad_norm": 6.142698538207202, "learning_rate": 5.387713997985901e-06, "loss": 0.5346, "step": 3210 }, { "epoch": 0.16214310891787098, "grad_norm": 5.348908400732374, "learning_rate": 5.4044981537428674e-06, "loss": 0.5199, "step": 3220 }, { "epoch": 0.16264665894556624, "grad_norm": 6.144230157007429, "learning_rate": 5.421282309499833e-06, "loss": 0.5356, "step": 3230 }, { "epoch": 0.1631502089732615, "grad_norm": 4.940278940276285, "learning_rate": 5.438066465256799e-06, "loss": 0.533, "step": 3240 }, { "epoch": 0.16365375900095674, "grad_norm": 7.782913908342249, "learning_rate": 5.454850621013763e-06, "loss": 0.5468, "step": 3250 }, { "epoch": 0.164157309028652, "grad_norm": 5.8453903282838136, "learning_rate": 5.471634776770729e-06, "loss": 0.5129, "step": 3260 }, { "epoch": 0.16466085905634725, "grad_norm": 5.483260940627952, "learning_rate": 5.488418932527693e-06, "loss": 0.5247, "step": 3270 }, { "epoch": 0.1651644090840425, "grad_norm": 7.558212486690225, "learning_rate": 5.505203088284659e-06, "loss": 0.5463, "step": 3280 }, { "epoch": 0.16566795911173776, "grad_norm": 4.759324031899643, "learning_rate": 5.5219872440416254e-06, "loss": 0.5299, "step": 3290 }, { "epoch": 0.16617150913943302, "grad_norm": 5.433509807474973, "learning_rate": 5.538771399798591e-06, "loss": 0.5077, "step": 3300 }, { "epoch": 0.16667505916712824, "grad_norm": 5.292419689015692, "learning_rate": 5.555555555555557e-06, "loss": 0.5377, "step": 3310 }, { "epoch": 0.1671786091948235, "grad_norm": 6.379763286089145, "learning_rate": 5.572339711312521e-06, "loss": 0.5618, "step": 3320 }, { "epoch": 0.16768215922251875, "grad_norm": 5.90892591316792, "learning_rate": 5.589123867069487e-06, "loss": 0.5045, "step": 3330 }, { "epoch": 0.168185709250214, "grad_norm": 6.265116542758552, "learning_rate": 5.605908022826451e-06, "loss": 0.5067, "step": 3340 }, { "epoch": 0.16868925927790926, "grad_norm": 4.794860499745337, "learning_rate": 5.622692178583417e-06, "loss": 0.5621, "step": 3350 }, { "epoch": 0.1691928093056045, "grad_norm": 4.8855312951714485, "learning_rate": 5.6394763343403835e-06, "loss": 0.4731, "step": 3360 }, { "epoch": 0.16969635933329977, "grad_norm": 5.735277819378695, "learning_rate": 5.656260490097349e-06, "loss": 0.4914, "step": 3370 }, { "epoch": 0.17019990936099502, "grad_norm": 5.269001426390499, "learning_rate": 5.673044645854314e-06, "loss": 0.4991, "step": 3380 }, { "epoch": 0.17070345938869028, "grad_norm": 5.882056614716237, "learning_rate": 5.689828801611279e-06, "loss": 0.5609, "step": 3390 }, { "epoch": 0.17120700941638553, "grad_norm": 6.1630661991266775, "learning_rate": 5.706612957368245e-06, "loss": 0.4927, "step": 3400 }, { "epoch": 0.17171055944408076, "grad_norm": 5.821227188749211, "learning_rate": 5.723397113125209e-06, "loss": 0.5068, "step": 3410 }, { "epoch": 0.172214109471776, "grad_norm": 5.531914544716252, "learning_rate": 5.7401812688821754e-06, "loss": 0.5259, "step": 3420 }, { "epoch": 0.17271765949947127, "grad_norm": 5.216929851626883, "learning_rate": 5.7569654246391415e-06, "loss": 0.4847, "step": 3430 }, { "epoch": 0.17322120952716652, "grad_norm": 5.127118347369316, "learning_rate": 5.773749580396107e-06, "loss": 0.5094, "step": 3440 }, { "epoch": 0.17372475955486177, "grad_norm": 6.293225713066407, "learning_rate": 5.790533736153072e-06, "loss": 0.5018, "step": 3450 }, { "epoch": 0.17422830958255703, "grad_norm": 5.143861153099495, "learning_rate": 5.807317891910037e-06, "loss": 0.5452, "step": 3460 }, { "epoch": 0.17473185961025228, "grad_norm": 5.720009607155483, "learning_rate": 5.824102047667003e-06, "loss": 0.5167, "step": 3470 }, { "epoch": 0.17523540963794754, "grad_norm": 5.065042880362157, "learning_rate": 5.840886203423967e-06, "loss": 0.4829, "step": 3480 }, { "epoch": 0.1757389596656428, "grad_norm": 5.730191055950511, "learning_rate": 5.8576703591809334e-06, "loss": 0.4666, "step": 3490 }, { "epoch": 0.17624250969333805, "grad_norm": 4.813522587969165, "learning_rate": 5.8744545149378995e-06, "loss": 0.5258, "step": 3500 }, { "epoch": 0.17674605972103327, "grad_norm": 6.006316764095601, "learning_rate": 5.891238670694865e-06, "loss": 0.5475, "step": 3510 }, { "epoch": 0.17724960974872853, "grad_norm": 5.845653758914876, "learning_rate": 5.90802282645183e-06, "loss": 0.503, "step": 3520 }, { "epoch": 0.17775315977642378, "grad_norm": 6.552302196352518, "learning_rate": 5.924806982208795e-06, "loss": 0.5545, "step": 3530 }, { "epoch": 0.17825670980411903, "grad_norm": 6.401129612859478, "learning_rate": 5.941591137965761e-06, "loss": 0.4935, "step": 3540 }, { "epoch": 0.1787602598318143, "grad_norm": 5.0360230157906445, "learning_rate": 5.958375293722725e-06, "loss": 0.4824, "step": 3550 }, { "epoch": 0.17926380985950954, "grad_norm": 6.922780728851594, "learning_rate": 5.9751594494796914e-06, "loss": 0.5542, "step": 3560 }, { "epoch": 0.1797673598872048, "grad_norm": 4.709520691499262, "learning_rate": 5.9919436052366575e-06, "loss": 0.5199, "step": 3570 }, { "epoch": 0.18027090991490005, "grad_norm": 4.8125641731364395, "learning_rate": 6.008727760993623e-06, "loss": 0.5073, "step": 3580 }, { "epoch": 0.1807744599425953, "grad_norm": 6.582324787588929, "learning_rate": 6.025511916750588e-06, "loss": 0.4634, "step": 3590 }, { "epoch": 0.18127800997029056, "grad_norm": 6.174210949676279, "learning_rate": 6.042296072507553e-06, "loss": 0.4933, "step": 3600 }, { "epoch": 0.1817815599979858, "grad_norm": 4.791949975465981, "learning_rate": 6.059080228264519e-06, "loss": 0.4741, "step": 3610 }, { "epoch": 0.18228511002568104, "grad_norm": 7.5495495959154235, "learning_rate": 6.075864384021483e-06, "loss": 0.5077, "step": 3620 }, { "epoch": 0.1827886600533763, "grad_norm": 6.7774922240258855, "learning_rate": 6.0926485397784494e-06, "loss": 0.5299, "step": 3630 }, { "epoch": 0.18329221008107155, "grad_norm": 6.810443021139677, "learning_rate": 6.1094326955354155e-06, "loss": 0.5343, "step": 3640 }, { "epoch": 0.1837957601087668, "grad_norm": 5.582744805110956, "learning_rate": 6.126216851292381e-06, "loss": 0.504, "step": 3650 }, { "epoch": 0.18429931013646206, "grad_norm": 4.711213401053921, "learning_rate": 6.143001007049346e-06, "loss": 0.4974, "step": 3660 }, { "epoch": 0.1848028601641573, "grad_norm": 5.284571251650306, "learning_rate": 6.159785162806311e-06, "loss": 0.4605, "step": 3670 }, { "epoch": 0.18530641019185257, "grad_norm": 7.866415743794448, "learning_rate": 6.176569318563277e-06, "loss": 0.5331, "step": 3680 }, { "epoch": 0.18580996021954782, "grad_norm": 5.014382550078223, "learning_rate": 6.193353474320241e-06, "loss": 0.4848, "step": 3690 }, { "epoch": 0.18631351024724307, "grad_norm": 5.417326479531604, "learning_rate": 6.2101376300772074e-06, "loss": 0.518, "step": 3700 }, { "epoch": 0.1868170602749383, "grad_norm": 5.6918085705178045, "learning_rate": 6.2269217858341735e-06, "loss": 0.4712, "step": 3710 }, { "epoch": 0.18732061030263356, "grad_norm": 5.982910070696528, "learning_rate": 6.243705941591139e-06, "loss": 0.5371, "step": 3720 }, { "epoch": 0.1878241603303288, "grad_norm": 6.007063254349121, "learning_rate": 6.260490097348104e-06, "loss": 0.535, "step": 3730 }, { "epoch": 0.18832771035802406, "grad_norm": 5.323027214216104, "learning_rate": 6.277274253105069e-06, "loss": 0.5653, "step": 3740 }, { "epoch": 0.18883126038571932, "grad_norm": 4.519165557022807, "learning_rate": 6.294058408862035e-06, "loss": 0.4874, "step": 3750 }, { "epoch": 0.18933481041341457, "grad_norm": 5.98991635454698, "learning_rate": 6.310842564618999e-06, "loss": 0.5509, "step": 3760 }, { "epoch": 0.18983836044110983, "grad_norm": 5.820854151699805, "learning_rate": 6.3276267203759655e-06, "loss": 0.4989, "step": 3770 }, { "epoch": 0.19034191046880508, "grad_norm": 5.579990799972453, "learning_rate": 6.3444108761329315e-06, "loss": 0.5363, "step": 3780 }, { "epoch": 0.19084546049650034, "grad_norm": 6.109709355287478, "learning_rate": 6.361195031889897e-06, "loss": 0.5021, "step": 3790 }, { "epoch": 0.1913490105241956, "grad_norm": 6.505542800211632, "learning_rate": 6.377979187646862e-06, "loss": 0.5267, "step": 3800 }, { "epoch": 0.19185256055189084, "grad_norm": 6.19041733117816, "learning_rate": 6.394763343403827e-06, "loss": 0.4998, "step": 3810 }, { "epoch": 0.19235611057958607, "grad_norm": 5.342002949132524, "learning_rate": 6.411547499160793e-06, "loss": 0.5376, "step": 3820 }, { "epoch": 0.19285966060728132, "grad_norm": 5.85853745584378, "learning_rate": 6.4283316549177574e-06, "loss": 0.4768, "step": 3830 }, { "epoch": 0.19336321063497658, "grad_norm": 5.1034759468388735, "learning_rate": 6.4451158106747235e-06, "loss": 0.5319, "step": 3840 }, { "epoch": 0.19386676066267183, "grad_norm": 6.302993392915875, "learning_rate": 6.4618999664316895e-06, "loss": 0.5012, "step": 3850 }, { "epoch": 0.1943703106903671, "grad_norm": 4.878767411404605, "learning_rate": 6.478684122188655e-06, "loss": 0.4985, "step": 3860 }, { "epoch": 0.19487386071806234, "grad_norm": 5.6978090889885085, "learning_rate": 6.49546827794562e-06, "loss": 0.5206, "step": 3870 }, { "epoch": 0.1953774107457576, "grad_norm": 4.8738197032989525, "learning_rate": 6.512252433702585e-06, "loss": 0.5336, "step": 3880 }, { "epoch": 0.19588096077345285, "grad_norm": 5.41374846124313, "learning_rate": 6.529036589459551e-06, "loss": 0.4794, "step": 3890 }, { "epoch": 0.1963845108011481, "grad_norm": 5.9328758494238105, "learning_rate": 6.5458207452165154e-06, "loss": 0.5515, "step": 3900 }, { "epoch": 0.19688806082884336, "grad_norm": 5.677284385250586, "learning_rate": 6.5626049009734815e-06, "loss": 0.5324, "step": 3910 }, { "epoch": 0.19739161085653859, "grad_norm": 6.50496476859356, "learning_rate": 6.5793890567304475e-06, "loss": 0.5227, "step": 3920 }, { "epoch": 0.19789516088423384, "grad_norm": 6.5688583085389345, "learning_rate": 6.596173212487413e-06, "loss": 0.4974, "step": 3930 }, { "epoch": 0.1983987109119291, "grad_norm": 5.442702277838736, "learning_rate": 6.612957368244378e-06, "loss": 0.5457, "step": 3940 }, { "epoch": 0.19890226093962435, "grad_norm": 5.202021028935191, "learning_rate": 6.629741524001343e-06, "loss": 0.5082, "step": 3950 }, { "epoch": 0.1994058109673196, "grad_norm": 4.9275465884957, "learning_rate": 6.646525679758309e-06, "loss": 0.5497, "step": 3960 }, { "epoch": 0.19990936099501486, "grad_norm": 6.223691491236945, "learning_rate": 6.6633098355152734e-06, "loss": 0.5146, "step": 3970 }, { "epoch": 0.2004129110227101, "grad_norm": 4.709634086845786, "learning_rate": 6.6800939912722395e-06, "loss": 0.5525, "step": 3980 }, { "epoch": 0.20091646105040536, "grad_norm": 5.736655138543908, "learning_rate": 6.6968781470292055e-06, "loss": 0.5276, "step": 3990 }, { "epoch": 0.20142001107810062, "grad_norm": 4.744667902098602, "learning_rate": 6.71366230278617e-06, "loss": 0.559, "step": 4000 }, { "epoch": 0.20192356110579587, "grad_norm": 6.141356223755227, "learning_rate": 6.730446458543136e-06, "loss": 0.577, "step": 4010 }, { "epoch": 0.2024271111334911, "grad_norm": 4.899569677018337, "learning_rate": 6.747230614300101e-06, "loss": 0.5665, "step": 4020 }, { "epoch": 0.20293066116118635, "grad_norm": 5.534520945725192, "learning_rate": 6.764014770057067e-06, "loss": 0.4835, "step": 4030 }, { "epoch": 0.2034342111888816, "grad_norm": 5.219246444309969, "learning_rate": 6.7807989258140314e-06, "loss": 0.5536, "step": 4040 }, { "epoch": 0.20393776121657686, "grad_norm": 5.789595500373856, "learning_rate": 6.7975830815709975e-06, "loss": 0.5706, "step": 4050 }, { "epoch": 0.20444131124427212, "grad_norm": 5.853357909766565, "learning_rate": 6.8143672373279635e-06, "loss": 0.5114, "step": 4060 }, { "epoch": 0.20494486127196737, "grad_norm": 4.5451328872875525, "learning_rate": 6.831151393084928e-06, "loss": 0.4898, "step": 4070 }, { "epoch": 0.20544841129966263, "grad_norm": 4.869916604918561, "learning_rate": 6.847935548841894e-06, "loss": 0.5465, "step": 4080 }, { "epoch": 0.20595196132735788, "grad_norm": 4.8491011628482195, "learning_rate": 6.864719704598859e-06, "loss": 0.5224, "step": 4090 }, { "epoch": 0.20645551135505313, "grad_norm": 5.585747180122849, "learning_rate": 6.881503860355825e-06, "loss": 0.5541, "step": 4100 }, { "epoch": 0.2069590613827484, "grad_norm": 4.055298580960306, "learning_rate": 6.8982880161127895e-06, "loss": 0.4738, "step": 4110 }, { "epoch": 0.20746261141044361, "grad_norm": 4.738436514613372, "learning_rate": 6.9150721718697555e-06, "loss": 0.5302, "step": 4120 }, { "epoch": 0.20796616143813887, "grad_norm": 5.586437186855527, "learning_rate": 6.9318563276267215e-06, "loss": 0.5144, "step": 4130 }, { "epoch": 0.20846971146583412, "grad_norm": 5.22812611147883, "learning_rate": 6.948640483383686e-06, "loss": 0.5308, "step": 4140 }, { "epoch": 0.20897326149352938, "grad_norm": 6.403765915327057, "learning_rate": 6.965424639140652e-06, "loss": 0.5704, "step": 4150 }, { "epoch": 0.20947681152122463, "grad_norm": 4.729718458240657, "learning_rate": 6.982208794897617e-06, "loss": 0.5141, "step": 4160 }, { "epoch": 0.20998036154891989, "grad_norm": 4.623451971735529, "learning_rate": 6.998992950654583e-06, "loss": 0.4894, "step": 4170 }, { "epoch": 0.21048391157661514, "grad_norm": 5.810382925181792, "learning_rate": 7.0157771064115475e-06, "loss": 0.5367, "step": 4180 }, { "epoch": 0.2109874616043104, "grad_norm": 5.589407063069052, "learning_rate": 7.0325612621685135e-06, "loss": 0.5623, "step": 4190 }, { "epoch": 0.21149101163200565, "grad_norm": 4.707592081816766, "learning_rate": 7.0493454179254795e-06, "loss": 0.4533, "step": 4200 }, { "epoch": 0.2119945616597009, "grad_norm": 4.880355122893022, "learning_rate": 7.066129573682444e-06, "loss": 0.4779, "step": 4210 }, { "epoch": 0.21249811168739613, "grad_norm": 5.580319311041198, "learning_rate": 7.08291372943941e-06, "loss": 0.5882, "step": 4220 }, { "epoch": 0.21300166171509138, "grad_norm": 5.834592154420827, "learning_rate": 7.099697885196375e-06, "loss": 0.514, "step": 4230 }, { "epoch": 0.21350521174278664, "grad_norm": 5.6401358410799896, "learning_rate": 7.116482040953341e-06, "loss": 0.5268, "step": 4240 }, { "epoch": 0.2140087617704819, "grad_norm": 5.83233734659936, "learning_rate": 7.1332661967103055e-06, "loss": 0.5265, "step": 4250 }, { "epoch": 0.21451231179817715, "grad_norm": 6.219241274914385, "learning_rate": 7.1500503524672715e-06, "loss": 0.5017, "step": 4260 }, { "epoch": 0.2150158618258724, "grad_norm": 5.519433388443397, "learning_rate": 7.1668345082242375e-06, "loss": 0.5507, "step": 4270 }, { "epoch": 0.21551941185356766, "grad_norm": 5.993516626851333, "learning_rate": 7.183618663981202e-06, "loss": 0.5274, "step": 4280 }, { "epoch": 0.2160229618812629, "grad_norm": 4.797772521959965, "learning_rate": 7.200402819738168e-06, "loss": 0.5247, "step": 4290 }, { "epoch": 0.21652651190895816, "grad_norm": 6.340999235821412, "learning_rate": 7.217186975495133e-06, "loss": 0.5191, "step": 4300 }, { "epoch": 0.21703006193665342, "grad_norm": 4.8476264862198635, "learning_rate": 7.233971131252099e-06, "loss": 0.5062, "step": 4310 }, { "epoch": 0.21753361196434864, "grad_norm": 6.601360561963202, "learning_rate": 7.2507552870090635e-06, "loss": 0.5438, "step": 4320 }, { "epoch": 0.2180371619920439, "grad_norm": 6.6553721516636495, "learning_rate": 7.2675394427660295e-06, "loss": 0.4851, "step": 4330 }, { "epoch": 0.21854071201973915, "grad_norm": 5.246802449228223, "learning_rate": 7.2843235985229955e-06, "loss": 0.5325, "step": 4340 }, { "epoch": 0.2190442620474344, "grad_norm": 6.434631474789665, "learning_rate": 7.30110775427996e-06, "loss": 0.5404, "step": 4350 }, { "epoch": 0.21954781207512966, "grad_norm": 5.121309054682186, "learning_rate": 7.317891910036926e-06, "loss": 0.5619, "step": 4360 }, { "epoch": 0.22005136210282492, "grad_norm": 5.84032827661446, "learning_rate": 7.334676065793891e-06, "loss": 0.4764, "step": 4370 }, { "epoch": 0.22055491213052017, "grad_norm": 6.360334754453779, "learning_rate": 7.351460221550857e-06, "loss": 0.5297, "step": 4380 }, { "epoch": 0.22105846215821542, "grad_norm": 5.893103379882631, "learning_rate": 7.3682443773078215e-06, "loss": 0.5387, "step": 4390 }, { "epoch": 0.22156201218591068, "grad_norm": 5.0573359360313495, "learning_rate": 7.3850285330647875e-06, "loss": 0.4893, "step": 4400 }, { "epoch": 0.22206556221360593, "grad_norm": 5.306384650358763, "learning_rate": 7.4018126888217535e-06, "loss": 0.5104, "step": 4410 }, { "epoch": 0.2225691122413012, "grad_norm": 6.3310976173741516, "learning_rate": 7.418596844578718e-06, "loss": 0.5697, "step": 4420 }, { "epoch": 0.2230726622689964, "grad_norm": 6.319179570386514, "learning_rate": 7.435381000335684e-06, "loss": 0.5414, "step": 4430 }, { "epoch": 0.22357621229669167, "grad_norm": 4.658064384977212, "learning_rate": 7.452165156092649e-06, "loss": 0.5269, "step": 4440 }, { "epoch": 0.22407976232438692, "grad_norm": 5.445649159997292, "learning_rate": 7.468949311849615e-06, "loss": 0.5489, "step": 4450 }, { "epoch": 0.22458331235208218, "grad_norm": 5.866565444231766, "learning_rate": 7.4857334676065795e-06, "loss": 0.5653, "step": 4460 }, { "epoch": 0.22508686237977743, "grad_norm": 5.459726467091553, "learning_rate": 7.5025176233635455e-06, "loss": 0.521, "step": 4470 }, { "epoch": 0.22559041240747268, "grad_norm": 5.796235131554852, "learning_rate": 7.5193017791205116e-06, "loss": 0.486, "step": 4480 }, { "epoch": 0.22609396243516794, "grad_norm": 6.561911779774527, "learning_rate": 7.536085934877476e-06, "loss": 0.5165, "step": 4490 }, { "epoch": 0.2265975124628632, "grad_norm": 5.084446332232328, "learning_rate": 7.552870090634442e-06, "loss": 0.4946, "step": 4500 }, { "epoch": 0.22710106249055845, "grad_norm": 5.2436175337598785, "learning_rate": 7.569654246391407e-06, "loss": 0.5399, "step": 4510 }, { "epoch": 0.2276046125182537, "grad_norm": 6.027832910620593, "learning_rate": 7.586438402148373e-06, "loss": 0.5274, "step": 4520 }, { "epoch": 0.22810816254594893, "grad_norm": 6.106859666783671, "learning_rate": 7.6032225579053375e-06, "loss": 0.502, "step": 4530 }, { "epoch": 0.22861171257364418, "grad_norm": 6.423314497451896, "learning_rate": 7.6200067136623035e-06, "loss": 0.5194, "step": 4540 }, { "epoch": 0.22911526260133944, "grad_norm": 6.724622072358383, "learning_rate": 7.636790869419268e-06, "loss": 0.577, "step": 4550 }, { "epoch": 0.2296188126290347, "grad_norm": 5.661901342546915, "learning_rate": 7.653575025176234e-06, "loss": 0.5758, "step": 4560 }, { "epoch": 0.23012236265672995, "grad_norm": 5.549512812786963, "learning_rate": 7.6703591809332e-06, "loss": 0.5586, "step": 4570 }, { "epoch": 0.2306259126844252, "grad_norm": 4.805627773668941, "learning_rate": 7.687143336690164e-06, "loss": 0.5237, "step": 4580 }, { "epoch": 0.23112946271212045, "grad_norm": 5.710450744143056, "learning_rate": 7.70392749244713e-06, "loss": 0.5301, "step": 4590 }, { "epoch": 0.2316330127398157, "grad_norm": 6.396661341774658, "learning_rate": 7.720711648204096e-06, "loss": 0.5399, "step": 4600 }, { "epoch": 0.23213656276751096, "grad_norm": 5.677518110606364, "learning_rate": 7.73749580396106e-06, "loss": 0.5099, "step": 4610 }, { "epoch": 0.23264011279520622, "grad_norm": 5.957315924806854, "learning_rate": 7.754279959718027e-06, "loss": 0.5159, "step": 4620 }, { "epoch": 0.23314366282290144, "grad_norm": 5.0770585493087035, "learning_rate": 7.771064115474993e-06, "loss": 0.5495, "step": 4630 }, { "epoch": 0.2336472128505967, "grad_norm": 4.526439103635678, "learning_rate": 7.787848271231959e-06, "loss": 0.5363, "step": 4640 }, { "epoch": 0.23415076287829195, "grad_norm": 5.290264549075475, "learning_rate": 7.804632426988923e-06, "loss": 0.5327, "step": 4650 }, { "epoch": 0.2346543129059872, "grad_norm": 4.644101610794485, "learning_rate": 7.82141658274589e-06, "loss": 0.5281, "step": 4660 }, { "epoch": 0.23515786293368246, "grad_norm": 5.367557467862866, "learning_rate": 7.838200738502854e-06, "loss": 0.537, "step": 4670 }, { "epoch": 0.23566141296137771, "grad_norm": 5.069002462233913, "learning_rate": 7.85498489425982e-06, "loss": 0.4928, "step": 4680 }, { "epoch": 0.23616496298907297, "grad_norm": 4.963443808587147, "learning_rate": 7.871769050016784e-06, "loss": 0.5228, "step": 4690 }, { "epoch": 0.23666851301676822, "grad_norm": 5.918901093362061, "learning_rate": 7.88855320577375e-06, "loss": 0.5172, "step": 4700 }, { "epoch": 0.23717206304446348, "grad_norm": 4.812867753185492, "learning_rate": 7.905337361530716e-06, "loss": 0.5226, "step": 4710 }, { "epoch": 0.23767561307215873, "grad_norm": 6.612358634975654, "learning_rate": 7.92212151728768e-06, "loss": 0.58, "step": 4720 }, { "epoch": 0.23817916309985396, "grad_norm": 6.141675633332765, "learning_rate": 7.938905673044646e-06, "loss": 0.4829, "step": 4730 }, { "epoch": 0.2386827131275492, "grad_norm": 5.15721411563674, "learning_rate": 7.955689828801612e-06, "loss": 0.5397, "step": 4740 }, { "epoch": 0.23918626315524447, "grad_norm": 5.580162641036836, "learning_rate": 7.972473984558577e-06, "loss": 0.5275, "step": 4750 }, { "epoch": 0.23968981318293972, "grad_norm": 5.28548386144918, "learning_rate": 7.989258140315543e-06, "loss": 0.5401, "step": 4760 }, { "epoch": 0.24019336321063497, "grad_norm": 5.741865792653495, "learning_rate": 8.006042296072509e-06, "loss": 0.5507, "step": 4770 }, { "epoch": 0.24069691323833023, "grad_norm": 4.830604233704836, "learning_rate": 8.022826451829475e-06, "loss": 0.5328, "step": 4780 }, { "epoch": 0.24120046326602548, "grad_norm": 4.602516237301036, "learning_rate": 8.039610607586439e-06, "loss": 0.4777, "step": 4790 }, { "epoch": 0.24170401329372074, "grad_norm": 4.172623454802348, "learning_rate": 8.056394763343405e-06, "loss": 0.5162, "step": 4800 }, { "epoch": 0.242207563321416, "grad_norm": 5.304753162890781, "learning_rate": 8.07317891910037e-06, "loss": 0.5442, "step": 4810 }, { "epoch": 0.24271111334911125, "grad_norm": 6.1137416079583025, "learning_rate": 8.089963074857336e-06, "loss": 0.5152, "step": 4820 }, { "epoch": 0.24321466337680647, "grad_norm": 6.073741036245814, "learning_rate": 8.1067472306143e-06, "loss": 0.6087, "step": 4830 }, { "epoch": 0.24371821340450173, "grad_norm": 6.3009854125145734, "learning_rate": 8.123531386371266e-06, "loss": 0.5289, "step": 4840 }, { "epoch": 0.24422176343219698, "grad_norm": 5.447762818844736, "learning_rate": 8.140315542128232e-06, "loss": 0.5243, "step": 4850 }, { "epoch": 0.24472531345989224, "grad_norm": 6.980796087987009, "learning_rate": 8.157099697885196e-06, "loss": 0.5305, "step": 4860 }, { "epoch": 0.2452288634875875, "grad_norm": 5.448051828279497, "learning_rate": 8.173883853642162e-06, "loss": 0.5048, "step": 4870 }, { "epoch": 0.24573241351528274, "grad_norm": 5.292320517931281, "learning_rate": 8.190668009399128e-06, "loss": 0.5691, "step": 4880 }, { "epoch": 0.246235963542978, "grad_norm": 5.805730323651911, "learning_rate": 8.207452165156093e-06, "loss": 0.5178, "step": 4890 }, { "epoch": 0.24673951357067325, "grad_norm": 4.737313323952267, "learning_rate": 8.224236320913059e-06, "loss": 0.5435, "step": 4900 }, { "epoch": 0.2472430635983685, "grad_norm": 5.788898499810936, "learning_rate": 8.241020476670025e-06, "loss": 0.5148, "step": 4910 }, { "epoch": 0.24774661362606376, "grad_norm": 4.775556151736951, "learning_rate": 8.25780463242699e-06, "loss": 0.546, "step": 4920 }, { "epoch": 0.248250163653759, "grad_norm": 5.910237676509994, "learning_rate": 8.274588788183955e-06, "loss": 0.5308, "step": 4930 }, { "epoch": 0.24875371368145424, "grad_norm": 5.18864870727716, "learning_rate": 8.291372943940921e-06, "loss": 0.5483, "step": 4940 }, { "epoch": 0.2492572637091495, "grad_norm": 4.880385881173188, "learning_rate": 8.308157099697886e-06, "loss": 0.5618, "step": 4950 }, { "epoch": 0.24976081373684475, "grad_norm": 5.1607639166128045, "learning_rate": 8.324941255454852e-06, "loss": 0.566, "step": 4960 }, { "epoch": 0.25026436376454003, "grad_norm": 4.492719572869051, "learning_rate": 8.341725411211816e-06, "loss": 0.487, "step": 4970 }, { "epoch": 0.25076791379223523, "grad_norm": 6.167471668578935, "learning_rate": 8.358509566968782e-06, "loss": 0.5526, "step": 4980 }, { "epoch": 0.2512714638199305, "grad_norm": 7.1092545841650345, "learning_rate": 8.375293722725748e-06, "loss": 0.5766, "step": 4990 }, { "epoch": 0.25177501384762574, "grad_norm": 4.522585414350648, "learning_rate": 8.392077878482712e-06, "loss": 0.5679, "step": 5000 }, { "epoch": 0.252278563875321, "grad_norm": 4.854900041592555, "learning_rate": 8.408862034239678e-06, "loss": 0.5023, "step": 5010 }, { "epoch": 0.25278211390301625, "grad_norm": 5.111355511345418, "learning_rate": 8.425646189996643e-06, "loss": 0.5032, "step": 5020 }, { "epoch": 0.2532856639307115, "grad_norm": 6.277484066743564, "learning_rate": 8.442430345753609e-06, "loss": 0.4966, "step": 5030 }, { "epoch": 0.25378921395840676, "grad_norm": 5.343112657711042, "learning_rate": 8.459214501510575e-06, "loss": 0.5586, "step": 5040 }, { "epoch": 0.254292763986102, "grad_norm": 5.09849446909653, "learning_rate": 8.47599865726754e-06, "loss": 0.494, "step": 5050 }, { "epoch": 0.25479631401379726, "grad_norm": 4.672432553539562, "learning_rate": 8.492782813024507e-06, "loss": 0.5657, "step": 5060 }, { "epoch": 0.2552998640414925, "grad_norm": 4.492207360281624, "learning_rate": 8.509566968781471e-06, "loss": 0.5269, "step": 5070 }, { "epoch": 0.2558034140691878, "grad_norm": 3.518346345455333, "learning_rate": 8.526351124538437e-06, "loss": 0.5031, "step": 5080 }, { "epoch": 0.256306964096883, "grad_norm": 4.529096826288842, "learning_rate": 8.543135280295402e-06, "loss": 0.518, "step": 5090 }, { "epoch": 0.2568105141245783, "grad_norm": 5.13240581740094, "learning_rate": 8.559919436052368e-06, "loss": 0.6187, "step": 5100 }, { "epoch": 0.25731406415227354, "grad_norm": 4.831463461466899, "learning_rate": 8.576703591809332e-06, "loss": 0.5363, "step": 5110 }, { "epoch": 0.2578176141799688, "grad_norm": 4.701032550143236, "learning_rate": 8.593487747566298e-06, "loss": 0.5977, "step": 5120 }, { "epoch": 0.25832116420766404, "grad_norm": 4.514097190912011, "learning_rate": 8.610271903323264e-06, "loss": 0.4497, "step": 5130 }, { "epoch": 0.2588247142353593, "grad_norm": 5.048285824994238, "learning_rate": 8.627056059080228e-06, "loss": 0.5416, "step": 5140 }, { "epoch": 0.25932826426305455, "grad_norm": 4.175798789680828, "learning_rate": 8.643840214837194e-06, "loss": 0.5262, "step": 5150 }, { "epoch": 0.2598318142907498, "grad_norm": 5.545019705742617, "learning_rate": 8.660624370594159e-06, "loss": 0.521, "step": 5160 }, { "epoch": 0.26033536431844506, "grad_norm": 5.093015010610179, "learning_rate": 8.677408526351125e-06, "loss": 0.5886, "step": 5170 }, { "epoch": 0.2608389143461403, "grad_norm": 6.566444850264756, "learning_rate": 8.69419268210809e-06, "loss": 0.5105, "step": 5180 }, { "epoch": 0.2613424643738355, "grad_norm": 4.98427642039514, "learning_rate": 8.710976837865057e-06, "loss": 0.492, "step": 5190 }, { "epoch": 0.26184601440153077, "grad_norm": 4.796617766201639, "learning_rate": 8.727760993622023e-06, "loss": 0.5134, "step": 5200 }, { "epoch": 0.262349564429226, "grad_norm": 5.4438882787187985, "learning_rate": 8.744545149378987e-06, "loss": 0.5085, "step": 5210 }, { "epoch": 0.2628531144569213, "grad_norm": 5.135129114576111, "learning_rate": 8.761329305135953e-06, "loss": 0.5285, "step": 5220 }, { "epoch": 0.26335666448461653, "grad_norm": 5.357114317829912, "learning_rate": 8.778113460892918e-06, "loss": 0.5504, "step": 5230 }, { "epoch": 0.2638602145123118, "grad_norm": 4.695362286728109, "learning_rate": 8.794897616649884e-06, "loss": 0.5128, "step": 5240 }, { "epoch": 0.26436376454000704, "grad_norm": 5.856955933213522, "learning_rate": 8.811681772406848e-06, "loss": 0.5564, "step": 5250 }, { "epoch": 0.2648673145677023, "grad_norm": 5.188327994331532, "learning_rate": 8.828465928163814e-06, "loss": 0.5617, "step": 5260 }, { "epoch": 0.26537086459539755, "grad_norm": 4.89032729019327, "learning_rate": 8.84525008392078e-06, "loss": 0.545, "step": 5270 }, { "epoch": 0.2658744146230928, "grad_norm": 4.244918181670875, "learning_rate": 8.862034239677744e-06, "loss": 0.47, "step": 5280 }, { "epoch": 0.26637796465078806, "grad_norm": 5.229438155865848, "learning_rate": 8.87881839543471e-06, "loss": 0.5656, "step": 5290 }, { "epoch": 0.2668815146784833, "grad_norm": 5.625605820643767, "learning_rate": 8.895602551191675e-06, "loss": 0.5886, "step": 5300 }, { "epoch": 0.26738506470617857, "grad_norm": 5.206363560546202, "learning_rate": 8.91238670694864e-06, "loss": 0.5682, "step": 5310 }, { "epoch": 0.2678886147338738, "grad_norm": 4.858546619667623, "learning_rate": 8.929170862705607e-06, "loss": 0.5438, "step": 5320 }, { "epoch": 0.2683921647615691, "grad_norm": 4.392815261109625, "learning_rate": 8.945955018462573e-06, "loss": 0.6159, "step": 5330 }, { "epoch": 0.26889571478926433, "grad_norm": 5.734285928785179, "learning_rate": 8.962739174219537e-06, "loss": 0.5687, "step": 5340 }, { "epoch": 0.2693992648169596, "grad_norm": 5.319600124516972, "learning_rate": 8.979523329976503e-06, "loss": 0.5444, "step": 5350 }, { "epoch": 0.26990281484465484, "grad_norm": 5.036019464786518, "learning_rate": 8.99630748573347e-06, "loss": 0.5089, "step": 5360 }, { "epoch": 0.2704063648723501, "grad_norm": 4.907232050748852, "learning_rate": 9.013091641490434e-06, "loss": 0.5941, "step": 5370 }, { "epoch": 0.27090991490004535, "grad_norm": 5.277435894510055, "learning_rate": 9.0298757972474e-06, "loss": 0.5682, "step": 5380 }, { "epoch": 0.27141346492774054, "grad_norm": 5.506820732641515, "learning_rate": 9.046659953004364e-06, "loss": 0.5269, "step": 5390 }, { "epoch": 0.2719170149554358, "grad_norm": 4.9807344722618225, "learning_rate": 9.06344410876133e-06, "loss": 0.5286, "step": 5400 }, { "epoch": 0.27242056498313105, "grad_norm": 5.644268804248017, "learning_rate": 9.080228264518296e-06, "loss": 0.5486, "step": 5410 }, { "epoch": 0.2729241150108263, "grad_norm": 5.244397717011943, "learning_rate": 9.09701242027526e-06, "loss": 0.546, "step": 5420 }, { "epoch": 0.27342766503852156, "grad_norm": 3.844830035540378, "learning_rate": 9.113796576032226e-06, "loss": 0.5386, "step": 5430 }, { "epoch": 0.2739312150662168, "grad_norm": 4.657449386506604, "learning_rate": 9.13058073178919e-06, "loss": 0.5419, "step": 5440 }, { "epoch": 0.27443476509391207, "grad_norm": 4.834830306084943, "learning_rate": 9.147364887546157e-06, "loss": 0.5447, "step": 5450 }, { "epoch": 0.2749383151216073, "grad_norm": 5.025297838094074, "learning_rate": 9.164149043303123e-06, "loss": 0.5898, "step": 5460 }, { "epoch": 0.2754418651493026, "grad_norm": 5.89457908834921, "learning_rate": 9.180933199060089e-06, "loss": 0.5486, "step": 5470 }, { "epoch": 0.27594541517699783, "grad_norm": 5.305405656223947, "learning_rate": 9.197717354817053e-06, "loss": 0.5481, "step": 5480 }, { "epoch": 0.2764489652046931, "grad_norm": 5.527331243169862, "learning_rate": 9.21450151057402e-06, "loss": 0.519, "step": 5490 }, { "epoch": 0.27695251523238834, "grad_norm": 5.4035209187767235, "learning_rate": 9.231285666330985e-06, "loss": 0.5146, "step": 5500 }, { "epoch": 0.2774560652600836, "grad_norm": 6.703103844682733, "learning_rate": 9.24806982208795e-06, "loss": 0.5724, "step": 5510 }, { "epoch": 0.27795961528777885, "grad_norm": 4.939156233537997, "learning_rate": 9.264853977844916e-06, "loss": 0.5425, "step": 5520 }, { "epoch": 0.2784631653154741, "grad_norm": 5.878316705666455, "learning_rate": 9.28163813360188e-06, "loss": 0.5485, "step": 5530 }, { "epoch": 0.27896671534316936, "grad_norm": 5.2582950200311585, "learning_rate": 9.298422289358846e-06, "loss": 0.5409, "step": 5540 }, { "epoch": 0.2794702653708646, "grad_norm": 5.739668246251078, "learning_rate": 9.315206445115812e-06, "loss": 0.5616, "step": 5550 }, { "epoch": 0.27997381539855987, "grad_norm": 4.372582888817549, "learning_rate": 9.331990600872776e-06, "loss": 0.5522, "step": 5560 }, { "epoch": 0.2804773654262551, "grad_norm": 4.379358040807956, "learning_rate": 9.348774756629742e-06, "loss": 0.5378, "step": 5570 }, { "epoch": 0.2809809154539504, "grad_norm": 5.094239512235753, "learning_rate": 9.365558912386707e-06, "loss": 0.5793, "step": 5580 }, { "epoch": 0.2814844654816456, "grad_norm": 4.6379509504113745, "learning_rate": 9.382343068143673e-06, "loss": 0.5453, "step": 5590 }, { "epoch": 0.28198801550934083, "grad_norm": 4.470673780782031, "learning_rate": 9.399127223900639e-06, "loss": 0.5488, "step": 5600 }, { "epoch": 0.2824915655370361, "grad_norm": 5.167333113760108, "learning_rate": 9.415911379657605e-06, "loss": 0.571, "step": 5610 }, { "epoch": 0.28299511556473134, "grad_norm": 5.04022156268273, "learning_rate": 9.43269553541457e-06, "loss": 0.5624, "step": 5620 }, { "epoch": 0.2834986655924266, "grad_norm": 5.120120824755431, "learning_rate": 9.449479691171535e-06, "loss": 0.5256, "step": 5630 }, { "epoch": 0.28400221562012185, "grad_norm": 5.795907343060533, "learning_rate": 9.466263846928501e-06, "loss": 0.5241, "step": 5640 }, { "epoch": 0.2845057656478171, "grad_norm": 5.050930178361378, "learning_rate": 9.483048002685466e-06, "loss": 0.5351, "step": 5650 }, { "epoch": 0.28500931567551235, "grad_norm": 4.693323130341387, "learning_rate": 9.499832158442432e-06, "loss": 0.5424, "step": 5660 }, { "epoch": 0.2855128657032076, "grad_norm": 4.591034604991755, "learning_rate": 9.516616314199396e-06, "loss": 0.5734, "step": 5670 }, { "epoch": 0.28601641573090286, "grad_norm": 5.57906955507601, "learning_rate": 9.533400469956362e-06, "loss": 0.5517, "step": 5680 }, { "epoch": 0.2865199657585981, "grad_norm": 4.854556647511725, "learning_rate": 9.550184625713328e-06, "loss": 0.5553, "step": 5690 }, { "epoch": 0.28702351578629337, "grad_norm": 5.6538469179661694, "learning_rate": 9.566968781470292e-06, "loss": 0.5816, "step": 5700 }, { "epoch": 0.2875270658139886, "grad_norm": 5.15554777694814, "learning_rate": 9.583752937227258e-06, "loss": 0.5449, "step": 5710 }, { "epoch": 0.2880306158416839, "grad_norm": 4.510122698484071, "learning_rate": 9.600537092984223e-06, "loss": 0.5258, "step": 5720 }, { "epoch": 0.28853416586937913, "grad_norm": 4.7691468538836155, "learning_rate": 9.617321248741189e-06, "loss": 0.533, "step": 5730 }, { "epoch": 0.2890377158970744, "grad_norm": 3.433563409055146, "learning_rate": 9.634105404498155e-06, "loss": 0.5434, "step": 5740 }, { "epoch": 0.28954126592476964, "grad_norm": 5.169400142524368, "learning_rate": 9.650889560255121e-06, "loss": 0.5305, "step": 5750 }, { "epoch": 0.2900448159524649, "grad_norm": 4.199035859881939, "learning_rate": 9.667673716012085e-06, "loss": 0.5501, "step": 5760 }, { "epoch": 0.29054836598016015, "grad_norm": 5.253509353283956, "learning_rate": 9.684457871769051e-06, "loss": 0.5936, "step": 5770 }, { "epoch": 0.2910519160078554, "grad_norm": 4.909807843474922, "learning_rate": 9.701242027526017e-06, "loss": 0.5695, "step": 5780 }, { "epoch": 0.29155546603555066, "grad_norm": 5.1920890709695415, "learning_rate": 9.718026183282982e-06, "loss": 0.5635, "step": 5790 }, { "epoch": 0.29205901606324586, "grad_norm": 4.227205384805389, "learning_rate": 9.734810339039948e-06, "loss": 0.5492, "step": 5800 }, { "epoch": 0.2925625660909411, "grad_norm": 4.685977235973294, "learning_rate": 9.751594494796912e-06, "loss": 0.517, "step": 5810 }, { "epoch": 0.29306611611863637, "grad_norm": 5.839779036446028, "learning_rate": 9.768378650553878e-06, "loss": 0.5749, "step": 5820 }, { "epoch": 0.2935696661463316, "grad_norm": 5.342969345498342, "learning_rate": 9.785162806310842e-06, "loss": 0.6072, "step": 5830 }, { "epoch": 0.2940732161740269, "grad_norm": 3.9275908198865404, "learning_rate": 9.801946962067808e-06, "loss": 0.5797, "step": 5840 }, { "epoch": 0.29457676620172213, "grad_norm": 5.684035699942285, "learning_rate": 9.818731117824774e-06, "loss": 0.6031, "step": 5850 }, { "epoch": 0.2950803162294174, "grad_norm": 4.215231283431367, "learning_rate": 9.835515273581739e-06, "loss": 0.5462, "step": 5860 }, { "epoch": 0.29558386625711264, "grad_norm": 5.002953039687786, "learning_rate": 9.852299429338705e-06, "loss": 0.5595, "step": 5870 }, { "epoch": 0.2960874162848079, "grad_norm": 4.2183082282372, "learning_rate": 9.86908358509567e-06, "loss": 0.5236, "step": 5880 }, { "epoch": 0.29659096631250315, "grad_norm": 4.5884678287530765, "learning_rate": 9.885867740852635e-06, "loss": 0.5072, "step": 5890 }, { "epoch": 0.2970945163401984, "grad_norm": 3.6629024929011345, "learning_rate": 9.902651896609601e-06, "loss": 0.5664, "step": 5900 }, { "epoch": 0.29759806636789365, "grad_norm": 4.684847263995845, "learning_rate": 9.919436052366567e-06, "loss": 0.531, "step": 5910 }, { "epoch": 0.2981016163955889, "grad_norm": 5.3351467419866205, "learning_rate": 9.936220208123533e-06, "loss": 0.5337, "step": 5920 }, { "epoch": 0.29860516642328416, "grad_norm": 3.3513491668928057, "learning_rate": 9.953004363880498e-06, "loss": 0.5806, "step": 5930 }, { "epoch": 0.2991087164509794, "grad_norm": 4.747269297784496, "learning_rate": 9.969788519637464e-06, "loss": 0.5318, "step": 5940 }, { "epoch": 0.29961226647867467, "grad_norm": 5.162125546475904, "learning_rate": 9.986572675394428e-06, "loss": 0.5961, "step": 5950 }, { "epoch": 0.3001158165063699, "grad_norm": 5.530970563530301, "learning_rate": 9.999999965670907e-06, "loss": 0.6002, "step": 5960 }, { "epoch": 0.3006193665340652, "grad_norm": 4.775580326241268, "learning_rate": 9.999998764152715e-06, "loss": 0.5203, "step": 5970 }, { "epoch": 0.30112291656176043, "grad_norm": 4.766021263219351, "learning_rate": 9.999995846180362e-06, "loss": 0.5708, "step": 5980 }, { "epoch": 0.3016264665894557, "grad_norm": 4.898664629717499, "learning_rate": 9.999991211754846e-06, "loss": 0.5699, "step": 5990 }, { "epoch": 0.3021300166171509, "grad_norm": 5.653234366171527, "learning_rate": 9.999984860877766e-06, "loss": 0.5313, "step": 6000 }, { "epoch": 0.30263356664484614, "grad_norm": 4.9878130881063285, "learning_rate": 9.999976793551296e-06, "loss": 0.533, "step": 6010 }, { "epoch": 0.3031371166725414, "grad_norm": 5.161030664667951, "learning_rate": 9.999967009778208e-06, "loss": 0.5618, "step": 6020 }, { "epoch": 0.30364066670023665, "grad_norm": 4.564001566533831, "learning_rate": 9.999955509561861e-06, "loss": 0.5527, "step": 6030 }, { "epoch": 0.3041442167279319, "grad_norm": 5.438333298094031, "learning_rate": 9.9999422929062e-06, "loss": 0.5403, "step": 6040 }, { "epoch": 0.30464776675562716, "grad_norm": 5.291422273404398, "learning_rate": 9.999927359815765e-06, "loss": 0.6324, "step": 6050 }, { "epoch": 0.3051513167833224, "grad_norm": 4.975931962524808, "learning_rate": 9.999910710295681e-06, "loss": 0.5307, "step": 6060 }, { "epoch": 0.30565486681101767, "grad_norm": 5.213900400403985, "learning_rate": 9.999892344351665e-06, "loss": 0.563, "step": 6070 }, { "epoch": 0.3061584168387129, "grad_norm": 4.409270199795966, "learning_rate": 9.999872261990022e-06, "loss": 0.5726, "step": 6080 }, { "epoch": 0.3066619668664082, "grad_norm": 4.588412879307183, "learning_rate": 9.999850463217644e-06, "loss": 0.5436, "step": 6090 }, { "epoch": 0.30716551689410343, "grad_norm": 4.854380818144203, "learning_rate": 9.999826948042016e-06, "loss": 0.5751, "step": 6100 }, { "epoch": 0.3076690669217987, "grad_norm": 3.8715555812747025, "learning_rate": 9.999801716471207e-06, "loss": 0.5267, "step": 6110 }, { "epoch": 0.30817261694949394, "grad_norm": 4.8652804881839105, "learning_rate": 9.999774768513885e-06, "loss": 0.5717, "step": 6120 }, { "epoch": 0.3086761669771892, "grad_norm": 3.9638943182925184, "learning_rate": 9.999746104179297e-06, "loss": 0.5826, "step": 6130 }, { "epoch": 0.30917971700488445, "grad_norm": 4.8353541809410485, "learning_rate": 9.999715723477284e-06, "loss": 0.5573, "step": 6140 }, { "epoch": 0.3096832670325797, "grad_norm": 5.733408484455865, "learning_rate": 9.999683626418275e-06, "loss": 0.5974, "step": 6150 }, { "epoch": 0.31018681706027496, "grad_norm": 4.785530963163129, "learning_rate": 9.999649813013288e-06, "loss": 0.561, "step": 6160 }, { "epoch": 0.3106903670879702, "grad_norm": 5.637298854254361, "learning_rate": 9.999614283273933e-06, "loss": 0.5223, "step": 6170 }, { "epoch": 0.31119391711566546, "grad_norm": 5.2077344832126355, "learning_rate": 9.999577037212407e-06, "loss": 0.5319, "step": 6180 }, { "epoch": 0.3116974671433607, "grad_norm": 4.4540775456494055, "learning_rate": 9.999538074841492e-06, "loss": 0.569, "step": 6190 }, { "epoch": 0.3122010171710559, "grad_norm": 4.700435694680445, "learning_rate": 9.999497396174568e-06, "loss": 0.5959, "step": 6200 }, { "epoch": 0.31270456719875117, "grad_norm": 3.860336404563428, "learning_rate": 9.999455001225599e-06, "loss": 0.6208, "step": 6210 }, { "epoch": 0.3132081172264464, "grad_norm": 4.658553044322515, "learning_rate": 9.99941089000914e-06, "loss": 0.5107, "step": 6220 }, { "epoch": 0.3137116672541417, "grad_norm": 4.475383662898462, "learning_rate": 9.999365062540327e-06, "loss": 0.5429, "step": 6230 }, { "epoch": 0.31421521728183693, "grad_norm": 4.152973658323125, "learning_rate": 9.999317518834901e-06, "loss": 0.5834, "step": 6240 }, { "epoch": 0.3147187673095322, "grad_norm": 4.36001131345374, "learning_rate": 9.999268258909178e-06, "loss": 0.55, "step": 6250 }, { "epoch": 0.31522231733722744, "grad_norm": 4.44905099023355, "learning_rate": 9.99921728278007e-06, "loss": 0.5494, "step": 6260 }, { "epoch": 0.3157258673649227, "grad_norm": 4.641561786216865, "learning_rate": 9.999164590465076e-06, "loss": 0.5679, "step": 6270 }, { "epoch": 0.31622941739261795, "grad_norm": 6.617392988768269, "learning_rate": 9.999110181982286e-06, "loss": 0.5372, "step": 6280 }, { "epoch": 0.3167329674203132, "grad_norm": 5.323721595781472, "learning_rate": 9.999054057350376e-06, "loss": 0.5876, "step": 6290 }, { "epoch": 0.31723651744800846, "grad_norm": 4.7370416810531655, "learning_rate": 9.998996216588616e-06, "loss": 0.5174, "step": 6300 }, { "epoch": 0.3177400674757037, "grad_norm": 4.53439981146841, "learning_rate": 9.99893665971686e-06, "loss": 0.5361, "step": 6310 }, { "epoch": 0.31824361750339897, "grad_norm": 4.9224090818999775, "learning_rate": 9.998875386755554e-06, "loss": 0.537, "step": 6320 }, { "epoch": 0.3187471675310942, "grad_norm": 5.224421786872974, "learning_rate": 9.998812397725733e-06, "loss": 0.5867, "step": 6330 }, { "epoch": 0.3192507175587895, "grad_norm": 4.837249195951343, "learning_rate": 9.998747692649017e-06, "loss": 0.5184, "step": 6340 }, { "epoch": 0.31975426758648473, "grad_norm": 4.011054202368474, "learning_rate": 9.998681271547624e-06, "loss": 0.5069, "step": 6350 }, { "epoch": 0.32025781761418, "grad_norm": 4.534808638047522, "learning_rate": 9.998613134444354e-06, "loss": 0.5348, "step": 6360 }, { "epoch": 0.32076136764187524, "grad_norm": 5.200708926676524, "learning_rate": 9.998543281362595e-06, "loss": 0.5767, "step": 6370 }, { "epoch": 0.3212649176695705, "grad_norm": 5.521835546591687, "learning_rate": 9.998471712326331e-06, "loss": 0.5841, "step": 6380 }, { "epoch": 0.32176846769726575, "grad_norm": 5.3142906644299615, "learning_rate": 9.998398427360128e-06, "loss": 0.5714, "step": 6390 }, { "epoch": 0.322272017724961, "grad_norm": 4.160574417828533, "learning_rate": 9.998323426489144e-06, "loss": 0.611, "step": 6400 }, { "epoch": 0.3227755677526562, "grad_norm": 4.940243623496398, "learning_rate": 9.99824670973913e-06, "loss": 0.5356, "step": 6410 }, { "epoch": 0.32327911778035145, "grad_norm": 4.909161688500809, "learning_rate": 9.998168277136418e-06, "loss": 0.6049, "step": 6420 }, { "epoch": 0.3237826678080467, "grad_norm": 4.42595420813798, "learning_rate": 9.998088128707934e-06, "loss": 0.559, "step": 6430 }, { "epoch": 0.32428621783574196, "grad_norm": 5.338504726055159, "learning_rate": 9.998006264481194e-06, "loss": 0.6, "step": 6440 }, { "epoch": 0.3247897678634372, "grad_norm": 4.067815692566063, "learning_rate": 9.9979226844843e-06, "loss": 0.5269, "step": 6450 }, { "epoch": 0.32529331789113247, "grad_norm": 6.2865145104466436, "learning_rate": 9.997837388745945e-06, "loss": 0.5655, "step": 6460 }, { "epoch": 0.3257968679188277, "grad_norm": 4.801916965758547, "learning_rate": 9.997750377295408e-06, "loss": 0.6259, "step": 6470 }, { "epoch": 0.326300417946523, "grad_norm": 5.58439727164783, "learning_rate": 9.997661650162562e-06, "loss": 0.5266, "step": 6480 }, { "epoch": 0.32680396797421823, "grad_norm": 4.155113272848833, "learning_rate": 9.997571207377865e-06, "loss": 0.5071, "step": 6490 }, { "epoch": 0.3273075180019135, "grad_norm": 5.1900785500097975, "learning_rate": 9.997479048972365e-06, "loss": 0.5779, "step": 6500 }, { "epoch": 0.32781106802960874, "grad_norm": 5.223912345521712, "learning_rate": 9.997385174977699e-06, "loss": 0.5719, "step": 6510 }, { "epoch": 0.328314618057304, "grad_norm": 4.197505971496392, "learning_rate": 9.997289585426095e-06, "loss": 0.5637, "step": 6520 }, { "epoch": 0.32881816808499925, "grad_norm": 5.156050300543571, "learning_rate": 9.997192280350365e-06, "loss": 0.5407, "step": 6530 }, { "epoch": 0.3293217181126945, "grad_norm": 5.569016235904057, "learning_rate": 9.997093259783914e-06, "loss": 0.5287, "step": 6540 }, { "epoch": 0.32982526814038976, "grad_norm": 4.102657579809289, "learning_rate": 9.996992523760737e-06, "loss": 0.5783, "step": 6550 }, { "epoch": 0.330328818168085, "grad_norm": 5.775741758467151, "learning_rate": 9.996890072315413e-06, "loss": 0.575, "step": 6560 }, { "epoch": 0.33083236819578027, "grad_norm": 4.221155319513972, "learning_rate": 9.996785905483114e-06, "loss": 0.5113, "step": 6570 }, { "epoch": 0.3313359182234755, "grad_norm": 4.805743462715256, "learning_rate": 9.9966800232996e-06, "loss": 0.5744, "step": 6580 }, { "epoch": 0.3318394682511708, "grad_norm": 5.005080023062524, "learning_rate": 9.996572425801218e-06, "loss": 0.5587, "step": 6590 }, { "epoch": 0.33234301827886603, "grad_norm": 4.083480532862144, "learning_rate": 9.996463113024906e-06, "loss": 0.5698, "step": 6600 }, { "epoch": 0.33284656830656123, "grad_norm": 5.541193545493671, "learning_rate": 9.99635208500819e-06, "loss": 0.5427, "step": 6610 }, { "epoch": 0.3333501183342565, "grad_norm": 5.111828641556347, "learning_rate": 9.996239341789184e-06, "loss": 0.6096, "step": 6620 }, { "epoch": 0.33385366836195174, "grad_norm": 5.379184522086985, "learning_rate": 9.996124883406593e-06, "loss": 0.5697, "step": 6630 }, { "epoch": 0.334357218389647, "grad_norm": 5.804727539808609, "learning_rate": 9.99600870989971e-06, "loss": 0.5473, "step": 6640 }, { "epoch": 0.33486076841734225, "grad_norm": 5.06821566674582, "learning_rate": 9.995890821308416e-06, "loss": 0.5708, "step": 6650 }, { "epoch": 0.3353643184450375, "grad_norm": 4.9889480085747255, "learning_rate": 9.995771217673179e-06, "loss": 0.5783, "step": 6660 }, { "epoch": 0.33586786847273276, "grad_norm": 4.971060600388091, "learning_rate": 9.99564989903506e-06, "loss": 0.503, "step": 6670 }, { "epoch": 0.336371418500428, "grad_norm": 4.313793885565682, "learning_rate": 9.995526865435706e-06, "loss": 0.526, "step": 6680 }, { "epoch": 0.33687496852812326, "grad_norm": 4.726559809920947, "learning_rate": 9.995402116917353e-06, "loss": 0.5312, "step": 6690 }, { "epoch": 0.3373785185558185, "grad_norm": 4.774206811886249, "learning_rate": 9.995275653522826e-06, "loss": 0.5551, "step": 6700 }, { "epoch": 0.3378820685835138, "grad_norm": 4.461491582399786, "learning_rate": 9.99514747529554e-06, "loss": 0.5488, "step": 6710 }, { "epoch": 0.338385618611209, "grad_norm": 3.942123055167431, "learning_rate": 9.995017582279496e-06, "loss": 0.5494, "step": 6720 }, { "epoch": 0.3388891686389043, "grad_norm": 5.556951202875439, "learning_rate": 9.994885974519285e-06, "loss": 0.5521, "step": 6730 }, { "epoch": 0.33939271866659954, "grad_norm": 5.4634925960709655, "learning_rate": 9.99475265206009e-06, "loss": 0.6228, "step": 6740 }, { "epoch": 0.3398962686942948, "grad_norm": 4.518942984628786, "learning_rate": 9.994617614947675e-06, "loss": 0.5785, "step": 6750 }, { "epoch": 0.34039981872199004, "grad_norm": 5.050452275090466, "learning_rate": 9.994480863228398e-06, "loss": 0.5684, "step": 6760 }, { "epoch": 0.3409033687496853, "grad_norm": 4.868310647486722, "learning_rate": 9.994342396949206e-06, "loss": 0.5088, "step": 6770 }, { "epoch": 0.34140691877738055, "grad_norm": 4.163427784731617, "learning_rate": 9.994202216157632e-06, "loss": 0.5837, "step": 6780 }, { "epoch": 0.3419104688050758, "grad_norm": 3.7874640203453627, "learning_rate": 9.9940603209018e-06, "loss": 0.5455, "step": 6790 }, { "epoch": 0.34241401883277106, "grad_norm": 4.7381784632117565, "learning_rate": 9.99391671123042e-06, "loss": 0.541, "step": 6800 }, { "epoch": 0.34291756886046626, "grad_norm": 4.651660861565803, "learning_rate": 9.993771387192792e-06, "loss": 0.5148, "step": 6810 }, { "epoch": 0.3434211188881615, "grad_norm": 5.576141960292618, "learning_rate": 9.993624348838806e-06, "loss": 0.552, "step": 6820 }, { "epoch": 0.34392466891585677, "grad_norm": 5.4067086152947965, "learning_rate": 9.99347559621894e-06, "loss": 0.5814, "step": 6830 }, { "epoch": 0.344428218943552, "grad_norm": 5.739717485169292, "learning_rate": 9.993325129384255e-06, "loss": 0.474, "step": 6840 }, { "epoch": 0.3449317689712473, "grad_norm": 3.937052004477483, "learning_rate": 9.993172948386407e-06, "loss": 0.5765, "step": 6850 }, { "epoch": 0.34543531899894253, "grad_norm": 4.68088320009317, "learning_rate": 9.99301905327764e-06, "loss": 0.5966, "step": 6860 }, { "epoch": 0.3459388690266378, "grad_norm": 5.485315205208892, "learning_rate": 9.992863444110781e-06, "loss": 0.5525, "step": 6870 }, { "epoch": 0.34644241905433304, "grad_norm": 4.669880490381697, "learning_rate": 9.992706120939256e-06, "loss": 0.5622, "step": 6880 }, { "epoch": 0.3469459690820283, "grad_norm": 4.71314880359971, "learning_rate": 9.992547083817064e-06, "loss": 0.5299, "step": 6890 }, { "epoch": 0.34744951910972355, "grad_norm": 4.592795503617274, "learning_rate": 9.992386332798807e-06, "loss": 0.5443, "step": 6900 }, { "epoch": 0.3479530691374188, "grad_norm": 5.925025368948719, "learning_rate": 9.992223867939667e-06, "loss": 0.5521, "step": 6910 }, { "epoch": 0.34845661916511406, "grad_norm": 5.443575863560421, "learning_rate": 9.992059689295417e-06, "loss": 0.5427, "step": 6920 }, { "epoch": 0.3489601691928093, "grad_norm": 4.696965453574973, "learning_rate": 9.991893796922421e-06, "loss": 0.5847, "step": 6930 }, { "epoch": 0.34946371922050457, "grad_norm": 5.585193060831609, "learning_rate": 9.991726190877623e-06, "loss": 0.6386, "step": 6940 }, { "epoch": 0.3499672692481998, "grad_norm": 4.507391022594603, "learning_rate": 9.991556871218564e-06, "loss": 0.521, "step": 6950 }, { "epoch": 0.3504708192758951, "grad_norm": 3.8333400728873155, "learning_rate": 9.991385838003369e-06, "loss": 0.4925, "step": 6960 }, { "epoch": 0.35097436930359033, "grad_norm": 5.467364267392312, "learning_rate": 9.991213091290754e-06, "loss": 0.5682, "step": 6970 }, { "epoch": 0.3514779193312856, "grad_norm": 3.540538926198935, "learning_rate": 9.991038631140018e-06, "loss": 0.5731, "step": 6980 }, { "epoch": 0.35198146935898084, "grad_norm": 4.9776088678074295, "learning_rate": 9.990862457611055e-06, "loss": 0.5481, "step": 6990 }, { "epoch": 0.3524850193866761, "grad_norm": 4.300893666509838, "learning_rate": 9.99068457076434e-06, "loss": 0.516, "step": 7000 }, { "epoch": 0.35298856941437134, "grad_norm": 5.318742740537092, "learning_rate": 9.990504970660944e-06, "loss": 0.5187, "step": 7010 }, { "epoch": 0.35349211944206654, "grad_norm": 5.403008701775604, "learning_rate": 9.990323657362519e-06, "loss": 0.5238, "step": 7020 }, { "epoch": 0.3539956694697618, "grad_norm": 5.183085228784345, "learning_rate": 9.990140630931309e-06, "loss": 0.5775, "step": 7030 }, { "epoch": 0.35449921949745705, "grad_norm": 4.97514811160399, "learning_rate": 9.989955891430148e-06, "loss": 0.5558, "step": 7040 }, { "epoch": 0.3550027695251523, "grad_norm": 5.274636950887935, "learning_rate": 9.98976943892245e-06, "loss": 0.5651, "step": 7050 }, { "epoch": 0.35550631955284756, "grad_norm": 5.037776356519859, "learning_rate": 9.989581273472227e-06, "loss": 0.5322, "step": 7060 }, { "epoch": 0.3560098695805428, "grad_norm": 4.033636573158429, "learning_rate": 9.989391395144072e-06, "loss": 0.5479, "step": 7070 }, { "epoch": 0.35651341960823807, "grad_norm": 4.854106487837427, "learning_rate": 9.989199804003172e-06, "loss": 0.503, "step": 7080 }, { "epoch": 0.3570169696359333, "grad_norm": 4.5460324329879995, "learning_rate": 9.989006500115295e-06, "loss": 0.5656, "step": 7090 }, { "epoch": 0.3575205196636286, "grad_norm": 4.112251839479622, "learning_rate": 9.9888114835468e-06, "loss": 0.5984, "step": 7100 }, { "epoch": 0.35802406969132383, "grad_norm": 5.026563359493389, "learning_rate": 9.988614754364635e-06, "loss": 0.5493, "step": 7110 }, { "epoch": 0.3585276197190191, "grad_norm": 5.086172072426679, "learning_rate": 9.98841631263634e-06, "loss": 0.5684, "step": 7120 }, { "epoch": 0.35903116974671434, "grad_norm": 4.39655081788938, "learning_rate": 9.988216158430033e-06, "loss": 0.5393, "step": 7130 }, { "epoch": 0.3595347197744096, "grad_norm": 5.258685532922941, "learning_rate": 9.988014291814426e-06, "loss": 0.5568, "step": 7140 }, { "epoch": 0.36003826980210485, "grad_norm": 4.412801491263824, "learning_rate": 9.987810712858819e-06, "loss": 0.5558, "step": 7150 }, { "epoch": 0.3605418198298001, "grad_norm": 4.026805692968605, "learning_rate": 9.987605421633097e-06, "loss": 0.5776, "step": 7160 }, { "epoch": 0.36104536985749536, "grad_norm": 4.94616778438055, "learning_rate": 9.987398418207738e-06, "loss": 0.5673, "step": 7170 }, { "epoch": 0.3615489198851906, "grad_norm": 5.264397178920806, "learning_rate": 9.987189702653801e-06, "loss": 0.5844, "step": 7180 }, { "epoch": 0.36205246991288587, "grad_norm": 5.08189537570202, "learning_rate": 9.986979275042938e-06, "loss": 0.5879, "step": 7190 }, { "epoch": 0.3625560199405811, "grad_norm": 4.544918450022487, "learning_rate": 9.986767135447386e-06, "loss": 0.5346, "step": 7200 }, { "epoch": 0.3630595699682764, "grad_norm": 6.547655719531664, "learning_rate": 9.986553283939972e-06, "loss": 0.5525, "step": 7210 }, { "epoch": 0.3635631199959716, "grad_norm": 3.980857107158395, "learning_rate": 9.986337720594109e-06, "loss": 0.5205, "step": 7220 }, { "epoch": 0.3640666700236668, "grad_norm": 4.112833806189114, "learning_rate": 9.986120445483796e-06, "loss": 0.5407, "step": 7230 }, { "epoch": 0.3645702200513621, "grad_norm": 5.224552174637077, "learning_rate": 9.985901458683623e-06, "loss": 0.5781, "step": 7240 }, { "epoch": 0.36507377007905734, "grad_norm": 3.7712625460571596, "learning_rate": 9.985680760268766e-06, "loss": 0.5329, "step": 7250 }, { "epoch": 0.3655773201067526, "grad_norm": 5.373435225468218, "learning_rate": 9.985458350314986e-06, "loss": 0.5051, "step": 7260 }, { "epoch": 0.36608087013444784, "grad_norm": 3.9435114897508523, "learning_rate": 9.985234228898642e-06, "loss": 0.5445, "step": 7270 }, { "epoch": 0.3665844201621431, "grad_norm": 5.382163235716142, "learning_rate": 9.985008396096666e-06, "loss": 0.4938, "step": 7280 }, { "epoch": 0.36708797018983835, "grad_norm": 3.7838851596860303, "learning_rate": 9.984780851986584e-06, "loss": 0.5665, "step": 7290 }, { "epoch": 0.3675915202175336, "grad_norm": 5.57415555990164, "learning_rate": 9.984551596646515e-06, "loss": 0.5763, "step": 7300 }, { "epoch": 0.36809507024522886, "grad_norm": 4.281915193256758, "learning_rate": 9.984320630155155e-06, "loss": 0.5416, "step": 7310 }, { "epoch": 0.3685986202729241, "grad_norm": 3.995404555510499, "learning_rate": 9.984087952591798e-06, "loss": 0.5498, "step": 7320 }, { "epoch": 0.36910217030061937, "grad_norm": 3.7180295808869195, "learning_rate": 9.983853564036315e-06, "loss": 0.5304, "step": 7330 }, { "epoch": 0.3696057203283146, "grad_norm": 4.274207293681294, "learning_rate": 9.983617464569173e-06, "loss": 0.5579, "step": 7340 }, { "epoch": 0.3701092703560099, "grad_norm": 5.188155568869461, "learning_rate": 9.98337965427142e-06, "loss": 0.5506, "step": 7350 }, { "epoch": 0.37061282038370513, "grad_norm": 4.238902996684575, "learning_rate": 9.983140133224695e-06, "loss": 0.5085, "step": 7360 }, { "epoch": 0.3711163704114004, "grad_norm": 4.566791695428378, "learning_rate": 9.982898901511227e-06, "loss": 0.5494, "step": 7370 }, { "epoch": 0.37161992043909564, "grad_norm": 4.121456575123536, "learning_rate": 9.982655959213823e-06, "loss": 0.484, "step": 7380 }, { "epoch": 0.3721234704667909, "grad_norm": 5.750272854353206, "learning_rate": 9.982411306415886e-06, "loss": 0.5702, "step": 7390 }, { "epoch": 0.37262702049448615, "grad_norm": 4.522397829715356, "learning_rate": 9.982164943201404e-06, "loss": 0.5677, "step": 7400 }, { "epoch": 0.3731305705221814, "grad_norm": 4.532942646546316, "learning_rate": 9.981916869654948e-06, "loss": 0.5566, "step": 7410 }, { "epoch": 0.3736341205498766, "grad_norm": 5.204500125315686, "learning_rate": 9.981667085861683e-06, "loss": 0.6083, "step": 7420 }, { "epoch": 0.37413767057757186, "grad_norm": 4.6557884829428255, "learning_rate": 9.981415591907354e-06, "loss": 0.528, "step": 7430 }, { "epoch": 0.3746412206052671, "grad_norm": 3.8765603236522033, "learning_rate": 9.981162387878299e-06, "loss": 0.543, "step": 7440 }, { "epoch": 0.37514477063296237, "grad_norm": 5.41055204836897, "learning_rate": 9.98090747386144e-06, "loss": 0.5801, "step": 7450 }, { "epoch": 0.3756483206606576, "grad_norm": 4.764110240603519, "learning_rate": 9.980650849944287e-06, "loss": 0.5662, "step": 7460 }, { "epoch": 0.3761518706883529, "grad_norm": 4.84649554604395, "learning_rate": 9.980392516214934e-06, "loss": 0.5479, "step": 7470 }, { "epoch": 0.37665542071604813, "grad_norm": 5.40430530662807, "learning_rate": 9.98013247276207e-06, "loss": 0.5538, "step": 7480 }, { "epoch": 0.3771589707437434, "grad_norm": 4.4764259093968715, "learning_rate": 9.979870719674961e-06, "loss": 0.5665, "step": 7490 }, { "epoch": 0.37766252077143864, "grad_norm": 4.955007528762723, "learning_rate": 9.979607257043467e-06, "loss": 0.5542, "step": 7500 }, { "epoch": 0.3781660707991339, "grad_norm": 4.594459925682737, "learning_rate": 9.97934208495803e-06, "loss": 0.5754, "step": 7510 }, { "epoch": 0.37866962082682915, "grad_norm": 4.582275277551856, "learning_rate": 9.979075203509683e-06, "loss": 0.4983, "step": 7520 }, { "epoch": 0.3791731708545244, "grad_norm": 4.395908645880887, "learning_rate": 9.978806612790043e-06, "loss": 0.5218, "step": 7530 }, { "epoch": 0.37967672088221965, "grad_norm": 4.452761923731255, "learning_rate": 9.978536312891316e-06, "loss": 0.4788, "step": 7540 }, { "epoch": 0.3801802709099149, "grad_norm": 5.213934167988472, "learning_rate": 9.978264303906291e-06, "loss": 0.5373, "step": 7550 }, { "epoch": 0.38068382093761016, "grad_norm": 4.418553708208289, "learning_rate": 9.97799058592835e-06, "loss": 0.4695, "step": 7560 }, { "epoch": 0.3811873709653054, "grad_norm": 4.688130142799678, "learning_rate": 9.977715159051453e-06, "loss": 0.5481, "step": 7570 }, { "epoch": 0.38169092099300067, "grad_norm": 4.639865605497406, "learning_rate": 9.977438023370157e-06, "loss": 0.5296, "step": 7580 }, { "epoch": 0.3821944710206959, "grad_norm": 7.4822819537094, "learning_rate": 9.977159178979596e-06, "loss": 0.6261, "step": 7590 }, { "epoch": 0.3826980210483912, "grad_norm": 5.048830377418465, "learning_rate": 9.976878625975497e-06, "loss": 0.5784, "step": 7600 }, { "epoch": 0.38320157107608643, "grad_norm": 5.307092552975513, "learning_rate": 9.97659636445417e-06, "loss": 0.5686, "step": 7610 }, { "epoch": 0.3837051211037817, "grad_norm": 4.545247240872212, "learning_rate": 9.976312394512513e-06, "loss": 0.5251, "step": 7620 }, { "epoch": 0.3842086711314769, "grad_norm": 5.213599583892963, "learning_rate": 9.976026716248011e-06, "loss": 0.51, "step": 7630 }, { "epoch": 0.38471222115917214, "grad_norm": 6.127845213122464, "learning_rate": 9.975739329758736e-06, "loss": 0.5514, "step": 7640 }, { "epoch": 0.3852157711868674, "grad_norm": 4.646824319627755, "learning_rate": 9.975450235143342e-06, "loss": 0.5924, "step": 7650 }, { "epoch": 0.38571932121456265, "grad_norm": 5.7023830088665735, "learning_rate": 9.975159432501074e-06, "loss": 0.5589, "step": 7660 }, { "epoch": 0.3862228712422579, "grad_norm": 4.893170173596403, "learning_rate": 9.974866921931761e-06, "loss": 0.5622, "step": 7670 }, { "epoch": 0.38672642126995316, "grad_norm": 5.89056310184406, "learning_rate": 9.974572703535821e-06, "loss": 0.567, "step": 7680 }, { "epoch": 0.3872299712976484, "grad_norm": 5.446775233742685, "learning_rate": 9.974276777414259e-06, "loss": 0.5291, "step": 7690 }, { "epoch": 0.38773352132534367, "grad_norm": 4.385158089584645, "learning_rate": 9.973979143668655e-06, "loss": 0.5233, "step": 7700 }, { "epoch": 0.3882370713530389, "grad_norm": 5.624708283953548, "learning_rate": 9.973679802401192e-06, "loss": 0.543, "step": 7710 }, { "epoch": 0.3887406213807342, "grad_norm": 3.8954558372003967, "learning_rate": 9.97337875371463e-06, "loss": 0.5349, "step": 7720 }, { "epoch": 0.38924417140842943, "grad_norm": 3.9398589297535755, "learning_rate": 9.973075997712314e-06, "loss": 0.4952, "step": 7730 }, { "epoch": 0.3897477214361247, "grad_norm": 5.3578447978037556, "learning_rate": 9.972771534498177e-06, "loss": 0.5348, "step": 7740 }, { "epoch": 0.39025127146381994, "grad_norm": 4.524032670638024, "learning_rate": 9.972465364176741e-06, "loss": 0.5511, "step": 7750 }, { "epoch": 0.3907548214915152, "grad_norm": 4.151035432129783, "learning_rate": 9.97215748685311e-06, "loss": 0.5697, "step": 7760 }, { "epoch": 0.39125837151921045, "grad_norm": 4.48937670647006, "learning_rate": 9.971847902632974e-06, "loss": 0.568, "step": 7770 }, { "epoch": 0.3917619215469057, "grad_norm": 4.3495092154692765, "learning_rate": 9.971536611622614e-06, "loss": 0.5675, "step": 7780 }, { "epoch": 0.39226547157460095, "grad_norm": 4.491087580255265, "learning_rate": 9.97122361392889e-06, "loss": 0.5713, "step": 7790 }, { "epoch": 0.3927690216022962, "grad_norm": 4.64775426911956, "learning_rate": 9.970908909659256e-06, "loss": 0.5699, "step": 7800 }, { "epoch": 0.39327257162999146, "grad_norm": 3.4231823981545184, "learning_rate": 9.970592498921743e-06, "loss": 0.5551, "step": 7810 }, { "epoch": 0.3937761216576867, "grad_norm": 4.850028788040323, "learning_rate": 9.97027438182497e-06, "loss": 0.5619, "step": 7820 }, { "epoch": 0.3942796716853819, "grad_norm": 4.690745263681754, "learning_rate": 9.969954558478148e-06, "loss": 0.5595, "step": 7830 }, { "epoch": 0.39478322171307717, "grad_norm": 4.536499207626468, "learning_rate": 9.969633028991069e-06, "loss": 0.524, "step": 7840 }, { "epoch": 0.3952867717407724, "grad_norm": 4.356685717576649, "learning_rate": 9.96930979347411e-06, "loss": 0.5203, "step": 7850 }, { "epoch": 0.3957903217684677, "grad_norm": 4.504898642955164, "learning_rate": 9.968984852038233e-06, "loss": 0.5667, "step": 7860 }, { "epoch": 0.39629387179616293, "grad_norm": 3.7073808093601306, "learning_rate": 9.96865820479499e-06, "loss": 0.5086, "step": 7870 }, { "epoch": 0.3967974218238582, "grad_norm": 4.501916430346612, "learning_rate": 9.968329851856517e-06, "loss": 0.5268, "step": 7880 }, { "epoch": 0.39730097185155344, "grad_norm": 4.709433013439439, "learning_rate": 9.967999793335531e-06, "loss": 0.5766, "step": 7890 }, { "epoch": 0.3978045218792487, "grad_norm": 4.069103845224747, "learning_rate": 9.967668029345343e-06, "loss": 0.557, "step": 7900 }, { "epoch": 0.39830807190694395, "grad_norm": 4.392726578048697, "learning_rate": 9.96733455999984e-06, "loss": 0.5561, "step": 7910 }, { "epoch": 0.3988116219346392, "grad_norm": 3.6507726250981034, "learning_rate": 9.9669993854135e-06, "loss": 0.6119, "step": 7920 }, { "epoch": 0.39931517196233446, "grad_norm": 5.134707758953813, "learning_rate": 9.966662505701387e-06, "loss": 0.5273, "step": 7930 }, { "epoch": 0.3998187219900297, "grad_norm": 4.529496011170483, "learning_rate": 9.966323920979148e-06, "loss": 0.5552, "step": 7940 }, { "epoch": 0.40032227201772497, "grad_norm": 4.857486522341726, "learning_rate": 9.965983631363015e-06, "loss": 0.5744, "step": 7950 }, { "epoch": 0.4008258220454202, "grad_norm": 4.261245751440613, "learning_rate": 9.965641636969807e-06, "loss": 0.5809, "step": 7960 }, { "epoch": 0.4013293720731155, "grad_norm": 4.286279694132175, "learning_rate": 9.965297937916929e-06, "loss": 0.5703, "step": 7970 }, { "epoch": 0.40183292210081073, "grad_norm": 4.098440937421438, "learning_rate": 9.964952534322367e-06, "loss": 0.5441, "step": 7980 }, { "epoch": 0.402336472128506, "grad_norm": 4.972587074420484, "learning_rate": 9.964605426304697e-06, "loss": 0.5679, "step": 7990 }, { "epoch": 0.40284002215620124, "grad_norm": 5.1814957986602534, "learning_rate": 9.964256613983079e-06, "loss": 0.5492, "step": 8000 }, { "epoch": 0.4033435721838965, "grad_norm": 4.48698252119615, "learning_rate": 9.963906097477254e-06, "loss": 0.5786, "step": 8010 }, { "epoch": 0.40384712221159175, "grad_norm": 4.119538911954462, "learning_rate": 9.963553876907554e-06, "loss": 0.497, "step": 8020 }, { "epoch": 0.40435067223928695, "grad_norm": 5.671050164879564, "learning_rate": 9.963199952394891e-06, "loss": 0.5776, "step": 8030 }, { "epoch": 0.4048542222669822, "grad_norm": 4.552224871783275, "learning_rate": 9.962844324060764e-06, "loss": 0.5343, "step": 8040 }, { "epoch": 0.40535777229467745, "grad_norm": 4.9366010651487775, "learning_rate": 9.962486992027258e-06, "loss": 0.551, "step": 8050 }, { "epoch": 0.4058613223223727, "grad_norm": 4.689610218838254, "learning_rate": 9.962127956417044e-06, "loss": 0.57, "step": 8060 }, { "epoch": 0.40636487235006796, "grad_norm": 4.370667012879115, "learning_rate": 9.96176721735337e-06, "loss": 0.6166, "step": 8070 }, { "epoch": 0.4068684223777632, "grad_norm": 4.7456020855397, "learning_rate": 9.961404774960081e-06, "loss": 0.5696, "step": 8080 }, { "epoch": 0.40737197240545847, "grad_norm": 4.8593504041820745, "learning_rate": 9.961040629361597e-06, "loss": 0.5059, "step": 8090 }, { "epoch": 0.4078755224331537, "grad_norm": 4.216527539210786, "learning_rate": 9.960674780682923e-06, "loss": 0.544, "step": 8100 }, { "epoch": 0.408379072460849, "grad_norm": 4.119156723555831, "learning_rate": 9.960307229049656e-06, "loss": 0.4996, "step": 8110 }, { "epoch": 0.40888262248854423, "grad_norm": 5.114590990370959, "learning_rate": 9.959937974587971e-06, "loss": 0.5348, "step": 8120 }, { "epoch": 0.4093861725162395, "grad_norm": 4.1426454422923085, "learning_rate": 9.959567017424632e-06, "loss": 0.5086, "step": 8130 }, { "epoch": 0.40988972254393474, "grad_norm": 4.0808587907104545, "learning_rate": 9.959194357686984e-06, "loss": 0.5491, "step": 8140 }, { "epoch": 0.41039327257163, "grad_norm": 4.786973042802253, "learning_rate": 9.958819995502955e-06, "loss": 0.5307, "step": 8150 }, { "epoch": 0.41089682259932525, "grad_norm": 3.8469278857680482, "learning_rate": 9.958443931001063e-06, "loss": 0.5233, "step": 8160 }, { "epoch": 0.4114003726270205, "grad_norm": 4.818406550931295, "learning_rate": 9.958066164310406e-06, "loss": 0.5917, "step": 8170 }, { "epoch": 0.41190392265471576, "grad_norm": 4.668317512298507, "learning_rate": 9.957686695560669e-06, "loss": 0.5612, "step": 8180 }, { "epoch": 0.412407472682411, "grad_norm": 4.912731015544286, "learning_rate": 9.957305524882122e-06, "loss": 0.5901, "step": 8190 }, { "epoch": 0.41291102271010627, "grad_norm": 5.298436997502394, "learning_rate": 9.956922652405611e-06, "loss": 0.566, "step": 8200 }, { "epoch": 0.4134145727378015, "grad_norm": 4.422139853127077, "learning_rate": 9.95653807826258e-06, "loss": 0.5711, "step": 8210 }, { "epoch": 0.4139181227654968, "grad_norm": 4.113265091998366, "learning_rate": 9.956151802585048e-06, "loss": 0.5042, "step": 8220 }, { "epoch": 0.41442167279319203, "grad_norm": 5.36575870070454, "learning_rate": 9.955763825505618e-06, "loss": 0.5883, "step": 8230 }, { "epoch": 0.41492522282088723, "grad_norm": 4.205062365349511, "learning_rate": 9.955374147157477e-06, "loss": 0.5121, "step": 8240 }, { "epoch": 0.4154287728485825, "grad_norm": 12.274525000296563, "learning_rate": 9.954982767674404e-06, "loss": 0.5349, "step": 8250 }, { "epoch": 0.41593232287627774, "grad_norm": 4.392041566136507, "learning_rate": 9.954589687190752e-06, "loss": 0.5903, "step": 8260 }, { "epoch": 0.416435872903973, "grad_norm": 5.059927150905473, "learning_rate": 9.954194905841461e-06, "loss": 0.5366, "step": 8270 }, { "epoch": 0.41693942293166825, "grad_norm": 5.57377517274028, "learning_rate": 9.95379842376206e-06, "loss": 0.5805, "step": 8280 }, { "epoch": 0.4174429729593635, "grad_norm": 4.327097523774863, "learning_rate": 9.953400241088654e-06, "loss": 0.5401, "step": 8290 }, { "epoch": 0.41794652298705876, "grad_norm": 3.6532759803385697, "learning_rate": 9.953000357957936e-06, "loss": 0.56, "step": 8300 }, { "epoch": 0.418450073014754, "grad_norm": 4.638318471606618, "learning_rate": 9.952598774507186e-06, "loss": 0.5676, "step": 8310 }, { "epoch": 0.41895362304244926, "grad_norm": 5.962224311084962, "learning_rate": 9.952195490874257e-06, "loss": 0.5324, "step": 8320 }, { "epoch": 0.4194571730701445, "grad_norm": 3.9081845731232248, "learning_rate": 9.951790507197599e-06, "loss": 0.5366, "step": 8330 }, { "epoch": 0.41996072309783977, "grad_norm": 4.83556651035742, "learning_rate": 9.951383823616236e-06, "loss": 0.5796, "step": 8340 }, { "epoch": 0.420464273125535, "grad_norm": 3.4923956886766807, "learning_rate": 9.95097544026978e-06, "loss": 0.4973, "step": 8350 }, { "epoch": 0.4209678231532303, "grad_norm": 4.817012574499087, "learning_rate": 9.950565357298425e-06, "loss": 0.5283, "step": 8360 }, { "epoch": 0.42147137318092553, "grad_norm": 4.823698159915129, "learning_rate": 9.95015357484295e-06, "loss": 0.5414, "step": 8370 }, { "epoch": 0.4219749232086208, "grad_norm": 4.842071315273465, "learning_rate": 9.949740093044713e-06, "loss": 0.5568, "step": 8380 }, { "epoch": 0.42247847323631604, "grad_norm": 3.754123659960574, "learning_rate": 9.94932491204566e-06, "loss": 0.5247, "step": 8390 }, { "epoch": 0.4229820232640113, "grad_norm": 3.664047475174156, "learning_rate": 9.94890803198832e-06, "loss": 0.5994, "step": 8400 }, { "epoch": 0.42348557329170655, "grad_norm": 4.897709476983425, "learning_rate": 9.948489453015803e-06, "loss": 0.6116, "step": 8410 }, { "epoch": 0.4239891233194018, "grad_norm": 4.745218273192939, "learning_rate": 9.948069175271804e-06, "loss": 0.5419, "step": 8420 }, { "epoch": 0.42449267334709706, "grad_norm": 5.050355129138293, "learning_rate": 9.9476471989006e-06, "loss": 0.51, "step": 8430 }, { "epoch": 0.42499622337479226, "grad_norm": 4.294917737929849, "learning_rate": 9.947223524047053e-06, "loss": 0.5941, "step": 8440 }, { "epoch": 0.4254997734024875, "grad_norm": 5.986289653594223, "learning_rate": 9.946798150856604e-06, "loss": 0.5387, "step": 8450 }, { "epoch": 0.42600332343018277, "grad_norm": 4.224014739399621, "learning_rate": 9.946371079475283e-06, "loss": 0.5643, "step": 8460 }, { "epoch": 0.426506873457878, "grad_norm": 3.4194932824380593, "learning_rate": 9.945942310049699e-06, "loss": 0.5688, "step": 8470 }, { "epoch": 0.4270104234855733, "grad_norm": 4.853395999113369, "learning_rate": 9.945511842727042e-06, "loss": 0.5138, "step": 8480 }, { "epoch": 0.42751397351326853, "grad_norm": 3.9333119429047234, "learning_rate": 9.945079677655089e-06, "loss": 0.5257, "step": 8490 }, { "epoch": 0.4280175235409638, "grad_norm": 4.335386449476934, "learning_rate": 9.944645814982199e-06, "loss": 0.4859, "step": 8500 }, { "epoch": 0.42852107356865904, "grad_norm": 4.340534554045932, "learning_rate": 9.944210254857312e-06, "loss": 0.5948, "step": 8510 }, { "epoch": 0.4290246235963543, "grad_norm": 4.258866151990444, "learning_rate": 9.943772997429955e-06, "loss": 0.5025, "step": 8520 }, { "epoch": 0.42952817362404955, "grad_norm": 4.5113530109422255, "learning_rate": 9.94333404285023e-06, "loss": 0.4992, "step": 8530 }, { "epoch": 0.4300317236517448, "grad_norm": 4.752271164017003, "learning_rate": 9.942893391268828e-06, "loss": 0.5556, "step": 8540 }, { "epoch": 0.43053527367944006, "grad_norm": 5.756885003093479, "learning_rate": 9.942451042837022e-06, "loss": 0.542, "step": 8550 }, { "epoch": 0.4310388237071353, "grad_norm": 4.052865949742971, "learning_rate": 9.942006997706667e-06, "loss": 0.5457, "step": 8560 }, { "epoch": 0.43154237373483056, "grad_norm": 4.968005949746136, "learning_rate": 9.941561256030194e-06, "loss": 0.5499, "step": 8570 }, { "epoch": 0.4320459237625258, "grad_norm": 4.095932930684675, "learning_rate": 9.941113817960628e-06, "loss": 0.4861, "step": 8580 }, { "epoch": 0.4325494737902211, "grad_norm": 4.493782369217652, "learning_rate": 9.940664683651567e-06, "loss": 0.5602, "step": 8590 }, { "epoch": 0.4330530238179163, "grad_norm": 5.015022054499316, "learning_rate": 9.940213853257197e-06, "loss": 0.6055, "step": 8600 }, { "epoch": 0.4335565738456116, "grad_norm": 4.368796104984264, "learning_rate": 9.939761326932282e-06, "loss": 0.5616, "step": 8610 }, { "epoch": 0.43406012387330684, "grad_norm": 4.652737633234526, "learning_rate": 9.939307104832173e-06, "loss": 0.5163, "step": 8620 }, { "epoch": 0.4345636739010021, "grad_norm": 4.078075264879394, "learning_rate": 9.938851187112797e-06, "loss": 0.4809, "step": 8630 }, { "epoch": 0.4350672239286973, "grad_norm": 4.398009780401353, "learning_rate": 9.938393573930667e-06, "loss": 0.5479, "step": 8640 }, { "epoch": 0.43557077395639254, "grad_norm": 4.430368706434948, "learning_rate": 9.93793426544288e-06, "loss": 0.4801, "step": 8650 }, { "epoch": 0.4360743239840878, "grad_norm": 4.284380377930664, "learning_rate": 9.93747326180711e-06, "loss": 0.5636, "step": 8660 }, { "epoch": 0.43657787401178305, "grad_norm": 4.383091071166149, "learning_rate": 9.937010563181618e-06, "loss": 0.5799, "step": 8670 }, { "epoch": 0.4370814240394783, "grad_norm": 3.99595567236134, "learning_rate": 9.936546169725238e-06, "loss": 0.5778, "step": 8680 }, { "epoch": 0.43758497406717356, "grad_norm": 4.6615999564912896, "learning_rate": 9.9360800815974e-06, "loss": 0.5372, "step": 8690 }, { "epoch": 0.4380885240948688, "grad_norm": 4.83541334020499, "learning_rate": 9.935612298958103e-06, "loss": 0.4742, "step": 8700 }, { "epoch": 0.43859207412256407, "grad_norm": 4.8128592659501015, "learning_rate": 9.935142821967935e-06, "loss": 0.5266, "step": 8710 }, { "epoch": 0.4390956241502593, "grad_norm": 4.715592048555458, "learning_rate": 9.934671650788062e-06, "loss": 0.5088, "step": 8720 }, { "epoch": 0.4395991741779546, "grad_norm": 4.287006968131413, "learning_rate": 9.934198785580231e-06, "loss": 0.531, "step": 8730 }, { "epoch": 0.44010272420564983, "grad_norm": 4.05308988522101, "learning_rate": 9.933724226506773e-06, "loss": 0.5071, "step": 8740 }, { "epoch": 0.4406062742333451, "grad_norm": 4.289295984652372, "learning_rate": 9.933247973730604e-06, "loss": 0.5566, "step": 8750 }, { "epoch": 0.44110982426104034, "grad_norm": 4.694478378985095, "learning_rate": 9.932770027415213e-06, "loss": 0.552, "step": 8760 }, { "epoch": 0.4416133742887356, "grad_norm": 4.318334300344476, "learning_rate": 9.932290387724675e-06, "loss": 0.5619, "step": 8770 }, { "epoch": 0.44211692431643085, "grad_norm": 4.423396575492094, "learning_rate": 9.931809054823647e-06, "loss": 0.6022, "step": 8780 }, { "epoch": 0.4426204743441261, "grad_norm": 4.706090397570349, "learning_rate": 9.931326028877366e-06, "loss": 0.5495, "step": 8790 }, { "epoch": 0.44312402437182136, "grad_norm": 4.310861742743537, "learning_rate": 9.93084131005165e-06, "loss": 0.5437, "step": 8800 }, { "epoch": 0.4436275743995166, "grad_norm": 3.6153734766253995, "learning_rate": 9.9303548985129e-06, "loss": 0.5353, "step": 8810 }, { "epoch": 0.44413112442721187, "grad_norm": 4.15472135776358, "learning_rate": 9.929866794428096e-06, "loss": 0.5387, "step": 8820 }, { "epoch": 0.4446346744549071, "grad_norm": 4.958469298034595, "learning_rate": 9.929376997964797e-06, "loss": 0.5708, "step": 8830 }, { "epoch": 0.4451382244826024, "grad_norm": 3.704085441972622, "learning_rate": 9.928885509291151e-06, "loss": 0.5326, "step": 8840 }, { "epoch": 0.4456417745102976, "grad_norm": 3.920962776886184, "learning_rate": 9.928392328575877e-06, "loss": 0.5924, "step": 8850 }, { "epoch": 0.4461453245379928, "grad_norm": 4.9186549888384565, "learning_rate": 9.92789745598828e-06, "loss": 0.4965, "step": 8860 }, { "epoch": 0.4466488745656881, "grad_norm": 4.112370646193245, "learning_rate": 9.927400891698247e-06, "loss": 0.5018, "step": 8870 }, { "epoch": 0.44715242459338334, "grad_norm": 4.481438209401133, "learning_rate": 9.926902635876243e-06, "loss": 0.5718, "step": 8880 }, { "epoch": 0.4476559746210786, "grad_norm": 4.791716856405642, "learning_rate": 9.926402688693317e-06, "loss": 0.584, "step": 8890 }, { "epoch": 0.44815952464877384, "grad_norm": 4.02393334054047, "learning_rate": 9.925901050321091e-06, "loss": 0.5323, "step": 8900 }, { "epoch": 0.4486630746764691, "grad_norm": 3.2857829247086014, "learning_rate": 9.92539772093178e-06, "loss": 0.5029, "step": 8910 }, { "epoch": 0.44916662470416435, "grad_norm": 4.4289504906696555, "learning_rate": 9.924892700698165e-06, "loss": 0.6082, "step": 8920 }, { "epoch": 0.4496701747318596, "grad_norm": 4.051257967587111, "learning_rate": 9.92438598979362e-06, "loss": 0.5505, "step": 8930 }, { "epoch": 0.45017372475955486, "grad_norm": 4.363265673415471, "learning_rate": 9.923877588392093e-06, "loss": 0.5161, "step": 8940 }, { "epoch": 0.4506772747872501, "grad_norm": 4.972854512689043, "learning_rate": 9.923367496668113e-06, "loss": 0.5622, "step": 8950 }, { "epoch": 0.45118082481494537, "grad_norm": 4.2153106595900205, "learning_rate": 9.92285571479679e-06, "loss": 0.5404, "step": 8960 }, { "epoch": 0.4516843748426406, "grad_norm": 4.420824976772757, "learning_rate": 9.922342242953815e-06, "loss": 0.4934, "step": 8970 }, { "epoch": 0.4521879248703359, "grad_norm": 3.87438383070637, "learning_rate": 9.921827081315455e-06, "loss": 0.5491, "step": 8980 }, { "epoch": 0.45269147489803113, "grad_norm": 3.8021236999189374, "learning_rate": 9.921310230058567e-06, "loss": 0.5288, "step": 8990 }, { "epoch": 0.4531950249257264, "grad_norm": 4.95874359928253, "learning_rate": 9.920791689360573e-06, "loss": 0.5628, "step": 9000 }, { "epoch": 0.45369857495342164, "grad_norm": 5.150640803331581, "learning_rate": 9.92027145939949e-06, "loss": 0.5613, "step": 9010 }, { "epoch": 0.4542021249811169, "grad_norm": 3.89417168522616, "learning_rate": 9.919749540353905e-06, "loss": 0.5048, "step": 9020 }, { "epoch": 0.45470567500881215, "grad_norm": 3.96133975403538, "learning_rate": 9.919225932402989e-06, "loss": 0.5647, "step": 9030 }, { "epoch": 0.4552092250365074, "grad_norm": 4.418593377210117, "learning_rate": 9.918700635726491e-06, "loss": 0.5421, "step": 9040 }, { "epoch": 0.4557127750642026, "grad_norm": 4.93635779693576, "learning_rate": 9.918173650504741e-06, "loss": 0.5705, "step": 9050 }, { "epoch": 0.45621632509189786, "grad_norm": 4.288903314281606, "learning_rate": 9.917644976918648e-06, "loss": 0.5763, "step": 9060 }, { "epoch": 0.4567198751195931, "grad_norm": 4.0209979768160755, "learning_rate": 9.917114615149703e-06, "loss": 0.5605, "step": 9070 }, { "epoch": 0.45722342514728836, "grad_norm": 4.078414559336003, "learning_rate": 9.91658256537997e-06, "loss": 0.5094, "step": 9080 }, { "epoch": 0.4577269751749836, "grad_norm": 3.697074302955585, "learning_rate": 9.916048827792102e-06, "loss": 0.5401, "step": 9090 }, { "epoch": 0.4582305252026789, "grad_norm": 4.227056851509717, "learning_rate": 9.915513402569322e-06, "loss": 0.5519, "step": 9100 }, { "epoch": 0.4587340752303741, "grad_norm": 4.940281308419935, "learning_rate": 9.914976289895439e-06, "loss": 0.5456, "step": 9110 }, { "epoch": 0.4592376252580694, "grad_norm": 4.332187960425477, "learning_rate": 9.914437489954838e-06, "loss": 0.5307, "step": 9120 }, { "epoch": 0.45974117528576464, "grad_norm": 4.447718882802147, "learning_rate": 9.913897002932484e-06, "loss": 0.5423, "step": 9130 }, { "epoch": 0.4602447253134599, "grad_norm": 3.9220598113099627, "learning_rate": 9.913354829013922e-06, "loss": 0.5624, "step": 9140 }, { "epoch": 0.46074827534115514, "grad_norm": 5.035419745258107, "learning_rate": 9.912810968385276e-06, "loss": 0.497, "step": 9150 }, { "epoch": 0.4612518253688504, "grad_norm": 5.103069132830674, "learning_rate": 9.912265421233246e-06, "loss": 0.5359, "step": 9160 }, { "epoch": 0.46175537539654565, "grad_norm": 4.552711144977078, "learning_rate": 9.911718187745117e-06, "loss": 0.5736, "step": 9170 }, { "epoch": 0.4622589254242409, "grad_norm": 3.904655315997541, "learning_rate": 9.911169268108745e-06, "loss": 0.5283, "step": 9180 }, { "epoch": 0.46276247545193616, "grad_norm": 5.499221500656912, "learning_rate": 9.910618662512572e-06, "loss": 0.5345, "step": 9190 }, { "epoch": 0.4632660254796314, "grad_norm": 4.3308917299925636, "learning_rate": 9.910066371145615e-06, "loss": 0.5254, "step": 9200 }, { "epoch": 0.46376957550732667, "grad_norm": 4.568090674549481, "learning_rate": 9.90951239419747e-06, "loss": 0.5352, "step": 9210 }, { "epoch": 0.4642731255350219, "grad_norm": 4.355595727133812, "learning_rate": 9.908956731858314e-06, "loss": 0.5637, "step": 9220 }, { "epoch": 0.4647766755627172, "grad_norm": 4.325344915248529, "learning_rate": 9.908399384318898e-06, "loss": 0.5158, "step": 9230 }, { "epoch": 0.46528022559041243, "grad_norm": 6.332274329246023, "learning_rate": 9.907840351770558e-06, "loss": 0.5888, "step": 9240 }, { "epoch": 0.46578377561810763, "grad_norm": 4.474721164024607, "learning_rate": 9.907279634405204e-06, "loss": 0.503, "step": 9250 }, { "epoch": 0.4662873256458029, "grad_norm": 3.7853082604217274, "learning_rate": 9.906717232415322e-06, "loss": 0.4993, "step": 9260 }, { "epoch": 0.46679087567349814, "grad_norm": 5.045627345089198, "learning_rate": 9.906153145993982e-06, "loss": 0.5105, "step": 9270 }, { "epoch": 0.4672944257011934, "grad_norm": 3.8516983834777707, "learning_rate": 9.90558737533483e-06, "loss": 0.5418, "step": 9280 }, { "epoch": 0.46779797572888865, "grad_norm": 3.657377238009112, "learning_rate": 9.90501992063209e-06, "loss": 0.5014, "step": 9290 }, { "epoch": 0.4683015257565839, "grad_norm": 5.0945371597632425, "learning_rate": 9.904450782080563e-06, "loss": 0.5297, "step": 9300 }, { "epoch": 0.46880507578427916, "grad_norm": 4.957882595892696, "learning_rate": 9.903879959875628e-06, "loss": 0.556, "step": 9310 }, { "epoch": 0.4693086258119744, "grad_norm": 4.192870219945079, "learning_rate": 9.903307454213245e-06, "loss": 0.5217, "step": 9320 }, { "epoch": 0.46981217583966967, "grad_norm": 4.565388480186744, "learning_rate": 9.902733265289951e-06, "loss": 0.5338, "step": 9330 }, { "epoch": 0.4703157258673649, "grad_norm": 4.407180277159009, "learning_rate": 9.902157393302858e-06, "loss": 0.5, "step": 9340 }, { "epoch": 0.4708192758950602, "grad_norm": 3.772508655475109, "learning_rate": 9.901579838449657e-06, "loss": 0.5558, "step": 9350 }, { "epoch": 0.47132282592275543, "grad_norm": 4.600765646025735, "learning_rate": 9.90100060092862e-06, "loss": 0.5678, "step": 9360 }, { "epoch": 0.4718263759504507, "grad_norm": 5.725645848903262, "learning_rate": 9.90041968093859e-06, "loss": 0.5744, "step": 9370 }, { "epoch": 0.47232992597814594, "grad_norm": 3.4066774092666092, "learning_rate": 9.899837078678996e-06, "loss": 0.5159, "step": 9380 }, { "epoch": 0.4728334760058412, "grad_norm": 4.258194760157572, "learning_rate": 9.899252794349836e-06, "loss": 0.5086, "step": 9390 }, { "epoch": 0.47333702603353645, "grad_norm": 4.471717375782212, "learning_rate": 9.898666828151691e-06, "loss": 0.4998, "step": 9400 }, { "epoch": 0.4738405760612317, "grad_norm": 4.504367907985565, "learning_rate": 9.89807918028572e-06, "loss": 0.5337, "step": 9410 }, { "epoch": 0.47434412608892695, "grad_norm": 5.11160418725793, "learning_rate": 9.897489850953657e-06, "loss": 0.5762, "step": 9420 }, { "epoch": 0.4748476761166222, "grad_norm": 3.9054653857125765, "learning_rate": 9.896898840357809e-06, "loss": 0.5163, "step": 9430 }, { "epoch": 0.47535122614431746, "grad_norm": 4.892545676863925, "learning_rate": 9.896306148701069e-06, "loss": 0.5106, "step": 9440 }, { "epoch": 0.4758547761720127, "grad_norm": 4.181500038583783, "learning_rate": 9.8957117761869e-06, "loss": 0.5236, "step": 9450 }, { "epoch": 0.4763583261997079, "grad_norm": 4.396139425373519, "learning_rate": 9.895115723019347e-06, "loss": 0.583, "step": 9460 }, { "epoch": 0.47686187622740317, "grad_norm": 4.404600835062334, "learning_rate": 9.894517989403027e-06, "loss": 0.552, "step": 9470 }, { "epoch": 0.4773654262550984, "grad_norm": 3.9666407064902205, "learning_rate": 9.893918575543139e-06, "loss": 0.4977, "step": 9480 }, { "epoch": 0.4778689762827937, "grad_norm": 4.7786303212419154, "learning_rate": 9.893317481645455e-06, "loss": 0.6086, "step": 9490 }, { "epoch": 0.47837252631048893, "grad_norm": 3.948549426451803, "learning_rate": 9.892714707916326e-06, "loss": 0.4885, "step": 9500 }, { "epoch": 0.4788760763381842, "grad_norm": 4.410219456847698, "learning_rate": 9.892110254562678e-06, "loss": 0.5098, "step": 9510 }, { "epoch": 0.47937962636587944, "grad_norm": 4.433635240985968, "learning_rate": 9.891504121792014e-06, "loss": 0.6144, "step": 9520 }, { "epoch": 0.4798831763935747, "grad_norm": 4.20982806394775, "learning_rate": 9.890896309812415e-06, "loss": 0.539, "step": 9530 }, { "epoch": 0.48038672642126995, "grad_norm": 3.982959459075042, "learning_rate": 9.890286818832535e-06, "loss": 0.5295, "step": 9540 }, { "epoch": 0.4808902764489652, "grad_norm": 4.116050713651941, "learning_rate": 9.88967564906161e-06, "loss": 0.536, "step": 9550 }, { "epoch": 0.48139382647666046, "grad_norm": 4.15752361601173, "learning_rate": 9.889062800709446e-06, "loss": 0.5703, "step": 9560 }, { "epoch": 0.4818973765043557, "grad_norm": 4.906662254507785, "learning_rate": 9.888448273986432e-06, "loss": 0.5639, "step": 9570 }, { "epoch": 0.48240092653205097, "grad_norm": 4.772294660572724, "learning_rate": 9.887832069103526e-06, "loss": 0.553, "step": 9580 }, { "epoch": 0.4829044765597462, "grad_norm": 4.034772090382629, "learning_rate": 9.887214186272266e-06, "loss": 0.5555, "step": 9590 }, { "epoch": 0.4834080265874415, "grad_norm": 4.025527858746533, "learning_rate": 9.886594625704766e-06, "loss": 0.4901, "step": 9600 }, { "epoch": 0.48391157661513673, "grad_norm": 4.347046775047892, "learning_rate": 9.885973387613718e-06, "loss": 0.5666, "step": 9610 }, { "epoch": 0.484415126642832, "grad_norm": 4.24414589701676, "learning_rate": 9.885350472212383e-06, "loss": 0.5054, "step": 9620 }, { "epoch": 0.48491867667052724, "grad_norm": 3.6554999661961407, "learning_rate": 9.884725879714604e-06, "loss": 0.4792, "step": 9630 }, { "epoch": 0.4854222266982225, "grad_norm": 4.491098828149228, "learning_rate": 9.8840996103348e-06, "loss": 0.5337, "step": 9640 }, { "epoch": 0.48592577672591775, "grad_norm": 4.134936668796296, "learning_rate": 9.883471664287962e-06, "loss": 0.521, "step": 9650 }, { "epoch": 0.48642932675361295, "grad_norm": 4.347075057126814, "learning_rate": 9.882842041789657e-06, "loss": 0.5538, "step": 9660 }, { "epoch": 0.4869328767813082, "grad_norm": 3.9213045298716405, "learning_rate": 9.88221074305603e-06, "loss": 0.5163, "step": 9670 }, { "epoch": 0.48743642680900345, "grad_norm": 4.630894282031667, "learning_rate": 9.8815777683038e-06, "loss": 0.5622, "step": 9680 }, { "epoch": 0.4879399768366987, "grad_norm": 4.400013036175054, "learning_rate": 9.880943117750261e-06, "loss": 0.516, "step": 9690 }, { "epoch": 0.48844352686439396, "grad_norm": 4.478005120721502, "learning_rate": 9.880306791613283e-06, "loss": 0.5246, "step": 9700 }, { "epoch": 0.4889470768920892, "grad_norm": 3.955636805813399, "learning_rate": 9.879668790111312e-06, "loss": 0.5517, "step": 9710 }, { "epoch": 0.48945062691978447, "grad_norm": 3.8215633416290773, "learning_rate": 9.879029113463366e-06, "loss": 0.5395, "step": 9720 }, { "epoch": 0.4899541769474797, "grad_norm": 4.427147919472376, "learning_rate": 9.878387761889043e-06, "loss": 0.5257, "step": 9730 }, { "epoch": 0.490457726975175, "grad_norm": 4.692273945835363, "learning_rate": 9.87774473560851e-06, "loss": 0.5275, "step": 9740 }, { "epoch": 0.49096127700287023, "grad_norm": 4.5661891533612975, "learning_rate": 9.877100034842515e-06, "loss": 0.5286, "step": 9750 }, { "epoch": 0.4914648270305655, "grad_norm": 4.155355007879778, "learning_rate": 9.876453659812375e-06, "loss": 0.4792, "step": 9760 }, { "epoch": 0.49196837705826074, "grad_norm": 3.628450922905801, "learning_rate": 9.875805610739988e-06, "loss": 0.5332, "step": 9770 }, { "epoch": 0.492471927085956, "grad_norm": 5.1188127819243405, "learning_rate": 9.87515588784782e-06, "loss": 0.5326, "step": 9780 }, { "epoch": 0.49297547711365125, "grad_norm": 3.817661607158162, "learning_rate": 9.874504491358916e-06, "loss": 0.4657, "step": 9790 }, { "epoch": 0.4934790271413465, "grad_norm": 3.9469707458837897, "learning_rate": 9.873851421496896e-06, "loss": 0.4937, "step": 9800 }, { "epoch": 0.49398257716904176, "grad_norm": 3.967753064084268, "learning_rate": 9.873196678485952e-06, "loss": 0.5439, "step": 9810 }, { "epoch": 0.494486127196737, "grad_norm": 4.073745428075176, "learning_rate": 9.872540262550852e-06, "loss": 0.5366, "step": 9820 }, { "epoch": 0.49498967722443227, "grad_norm": 4.0816094315825335, "learning_rate": 9.871882173916938e-06, "loss": 0.5449, "step": 9830 }, { "epoch": 0.4954932272521275, "grad_norm": 4.810536994851646, "learning_rate": 9.871222412810123e-06, "loss": 0.4998, "step": 9840 }, { "epoch": 0.4959967772798228, "grad_norm": 4.109478609007079, "learning_rate": 9.8705609794569e-06, "loss": 0.4919, "step": 9850 }, { "epoch": 0.496500327307518, "grad_norm": 6.398690690054163, "learning_rate": 9.86989787408433e-06, "loss": 0.588, "step": 9860 }, { "epoch": 0.49700387733521323, "grad_norm": 5.23940997516454, "learning_rate": 9.869233096920055e-06, "loss": 0.5125, "step": 9870 }, { "epoch": 0.4975074273629085, "grad_norm": 4.6825979436755905, "learning_rate": 9.868566648192284e-06, "loss": 0.5211, "step": 9880 }, { "epoch": 0.49801097739060374, "grad_norm": 6.17413907077802, "learning_rate": 9.867898528129802e-06, "loss": 0.5271, "step": 9890 }, { "epoch": 0.498514527418299, "grad_norm": 4.107717133515404, "learning_rate": 9.867228736961972e-06, "loss": 0.5253, "step": 9900 }, { "epoch": 0.49901807744599425, "grad_norm": 3.8783692755420844, "learning_rate": 9.866557274918726e-06, "loss": 0.5082, "step": 9910 }, { "epoch": 0.4995216274736895, "grad_norm": 4.3859989308086655, "learning_rate": 9.86588414223057e-06, "loss": 0.567, "step": 9920 }, { "epoch": 0.5000251775013848, "grad_norm": 4.608088488732189, "learning_rate": 9.865209339128583e-06, "loss": 0.532, "step": 9930 }, { "epoch": 0.5005287275290801, "grad_norm": 4.347339697451645, "learning_rate": 9.864532865844424e-06, "loss": 0.521, "step": 9940 }, { "epoch": 0.5010322775567753, "grad_norm": 4.493784088199465, "learning_rate": 9.863854722610314e-06, "loss": 0.5446, "step": 9950 }, { "epoch": 0.5015358275844705, "grad_norm": 5.695965490794858, "learning_rate": 9.863174909659055e-06, "loss": 0.5283, "step": 9960 }, { "epoch": 0.5020393776121658, "grad_norm": 3.9493668005351794, "learning_rate": 9.862493427224023e-06, "loss": 0.5326, "step": 9970 }, { "epoch": 0.502542927639861, "grad_norm": 4.761089363930053, "learning_rate": 9.861810275539164e-06, "loss": 0.5412, "step": 9980 }, { "epoch": 0.5030464776675563, "grad_norm": 4.39505412657747, "learning_rate": 9.861125454838994e-06, "loss": 0.5097, "step": 9990 }, { "epoch": 0.5035500276952515, "grad_norm": 3.9040250070774896, "learning_rate": 9.860438965358612e-06, "loss": 0.4699, "step": 10000 }, { "epoch": 0.5040535777229468, "grad_norm": 4.255341236976534, "learning_rate": 9.859750807333678e-06, "loss": 0.5415, "step": 10010 }, { "epoch": 0.504557127750642, "grad_norm": 4.36312174717761, "learning_rate": 9.859060981000435e-06, "loss": 0.538, "step": 10020 }, { "epoch": 0.5050606777783373, "grad_norm": 5.220426756579555, "learning_rate": 9.858369486595692e-06, "loss": 0.5344, "step": 10030 }, { "epoch": 0.5055642278060325, "grad_norm": 4.155849111113183, "learning_rate": 9.85767632435683e-06, "loss": 0.5379, "step": 10040 }, { "epoch": 0.5060677778337278, "grad_norm": 3.951328571380757, "learning_rate": 9.85698149452181e-06, "loss": 0.4506, "step": 10050 }, { "epoch": 0.506571327861423, "grad_norm": 5.011467228381317, "learning_rate": 9.856284997329158e-06, "loss": 0.4889, "step": 10060 }, { "epoch": 0.5070748778891183, "grad_norm": 4.731937192827502, "learning_rate": 9.855586833017977e-06, "loss": 0.5364, "step": 10070 }, { "epoch": 0.5075784279168135, "grad_norm": 4.013504814695998, "learning_rate": 9.854887001827937e-06, "loss": 0.5079, "step": 10080 }, { "epoch": 0.5080819779445088, "grad_norm": 3.816360694283444, "learning_rate": 9.854185503999288e-06, "loss": 0.4742, "step": 10090 }, { "epoch": 0.508585527972204, "grad_norm": 4.806418536254357, "learning_rate": 9.853482339772846e-06, "loss": 0.5407, "step": 10100 }, { "epoch": 0.5090890779998993, "grad_norm": 4.8890398059589435, "learning_rate": 9.852777509389999e-06, "loss": 0.5331, "step": 10110 }, { "epoch": 0.5095926280275945, "grad_norm": 3.6935898948040844, "learning_rate": 9.852071013092713e-06, "loss": 0.5468, "step": 10120 }, { "epoch": 0.5100961780552898, "grad_norm": 3.972026287285784, "learning_rate": 9.851362851123516e-06, "loss": 0.5279, "step": 10130 }, { "epoch": 0.510599728082985, "grad_norm": 3.5978721286898945, "learning_rate": 9.85065302372552e-06, "loss": 0.5551, "step": 10140 }, { "epoch": 0.5111032781106803, "grad_norm": 3.736438719351624, "learning_rate": 9.8499415311424e-06, "loss": 0.5058, "step": 10150 }, { "epoch": 0.5116068281383755, "grad_norm": 3.828146357622851, "learning_rate": 9.849228373618402e-06, "loss": 0.5454, "step": 10160 }, { "epoch": 0.5121103781660707, "grad_norm": 4.824284574301837, "learning_rate": 9.848513551398348e-06, "loss": 0.5527, "step": 10170 }, { "epoch": 0.512613928193766, "grad_norm": 3.9425135263668296, "learning_rate": 9.847797064727633e-06, "loss": 0.4301, "step": 10180 }, { "epoch": 0.5131174782214613, "grad_norm": 4.337714621592641, "learning_rate": 9.847078913852218e-06, "loss": 0.5511, "step": 10190 }, { "epoch": 0.5136210282491566, "grad_norm": 4.7584532097129255, "learning_rate": 9.846359099018635e-06, "loss": 0.5695, "step": 10200 }, { "epoch": 0.5141245782768518, "grad_norm": 4.488213059731744, "learning_rate": 9.845637620473996e-06, "loss": 0.5682, "step": 10210 }, { "epoch": 0.5146281283045471, "grad_norm": 4.830100499736187, "learning_rate": 9.844914478465972e-06, "loss": 0.541, "step": 10220 }, { "epoch": 0.5151316783322423, "grad_norm": 4.8361186308295006, "learning_rate": 9.844189673242815e-06, "loss": 0.4667, "step": 10230 }, { "epoch": 0.5156352283599376, "grad_norm": 3.952906133545224, "learning_rate": 9.843463205053341e-06, "loss": 0.5311, "step": 10240 }, { "epoch": 0.5161387783876328, "grad_norm": 3.367228027808665, "learning_rate": 9.842735074146944e-06, "loss": 0.5196, "step": 10250 }, { "epoch": 0.5166423284153281, "grad_norm": 4.405400694482531, "learning_rate": 9.842005280773582e-06, "loss": 0.4782, "step": 10260 }, { "epoch": 0.5171458784430233, "grad_norm": 3.32760647369848, "learning_rate": 9.841273825183786e-06, "loss": 0.5272, "step": 10270 }, { "epoch": 0.5176494284707186, "grad_norm": 4.207266083748766, "learning_rate": 9.84054070762866e-06, "loss": 0.4298, "step": 10280 }, { "epoch": 0.5181529784984138, "grad_norm": 4.7869731914729785, "learning_rate": 9.839805928359876e-06, "loss": 0.5092, "step": 10290 }, { "epoch": 0.5186565285261091, "grad_norm": 4.666535367084879, "learning_rate": 9.839069487629674e-06, "loss": 0.4905, "step": 10300 }, { "epoch": 0.5191600785538043, "grad_norm": 4.590933544515374, "learning_rate": 9.83833138569087e-06, "loss": 0.5812, "step": 10310 }, { "epoch": 0.5196636285814996, "grad_norm": 4.54674272856783, "learning_rate": 9.837591622796852e-06, "loss": 0.5086, "step": 10320 }, { "epoch": 0.5201671786091948, "grad_norm": 4.0387821877350705, "learning_rate": 9.836850199201567e-06, "loss": 0.5133, "step": 10330 }, { "epoch": 0.5206707286368901, "grad_norm": 5.598391779187886, "learning_rate": 9.836107115159541e-06, "loss": 0.5295, "step": 10340 }, { "epoch": 0.5211742786645853, "grad_norm": 3.972287760541975, "learning_rate": 9.835362370925868e-06, "loss": 0.5041, "step": 10350 }, { "epoch": 0.5216778286922806, "grad_norm": 4.030254065736541, "learning_rate": 9.834615966756213e-06, "loss": 0.486, "step": 10360 }, { "epoch": 0.5221813787199758, "grad_norm": 4.490882698223031, "learning_rate": 9.83386790290681e-06, "loss": 0.5414, "step": 10370 }, { "epoch": 0.522684928747671, "grad_norm": 4.345022364921494, "learning_rate": 9.833118179634461e-06, "loss": 0.5169, "step": 10380 }, { "epoch": 0.5231884787753663, "grad_norm": 4.1441946658187865, "learning_rate": 9.832366797196542e-06, "loss": 0.5204, "step": 10390 }, { "epoch": 0.5236920288030615, "grad_norm": 4.425615457932682, "learning_rate": 9.831613755850992e-06, "loss": 0.5188, "step": 10400 }, { "epoch": 0.5241955788307568, "grad_norm": 12.716787963163538, "learning_rate": 9.830859055856326e-06, "loss": 0.502, "step": 10410 }, { "epoch": 0.524699128858452, "grad_norm": 4.127043474610017, "learning_rate": 9.830102697471621e-06, "loss": 0.488, "step": 10420 }, { "epoch": 0.5252026788861474, "grad_norm": 4.667591171581116, "learning_rate": 9.829344680956537e-06, "loss": 0.5517, "step": 10430 }, { "epoch": 0.5257062289138426, "grad_norm": 4.38024101020624, "learning_rate": 9.828585006571285e-06, "loss": 0.5283, "step": 10440 }, { "epoch": 0.5262097789415379, "grad_norm": 3.7259323861622327, "learning_rate": 9.82782367457666e-06, "loss": 0.5614, "step": 10450 }, { "epoch": 0.5267133289692331, "grad_norm": 4.457680819445193, "learning_rate": 9.827060685234018e-06, "loss": 0.5061, "step": 10460 }, { "epoch": 0.5272168789969284, "grad_norm": 4.576392857113229, "learning_rate": 9.826296038805285e-06, "loss": 0.5349, "step": 10470 }, { "epoch": 0.5277204290246236, "grad_norm": 3.634326907377669, "learning_rate": 9.825529735552961e-06, "loss": 0.542, "step": 10480 }, { "epoch": 0.5282239790523189, "grad_norm": 4.346457915261354, "learning_rate": 9.824761775740108e-06, "loss": 0.5471, "step": 10490 }, { "epoch": 0.5287275290800141, "grad_norm": 3.830570647442484, "learning_rate": 9.823992159630361e-06, "loss": 0.4826, "step": 10500 }, { "epoch": 0.5292310791077094, "grad_norm": 5.040797840141874, "learning_rate": 9.823220887487923e-06, "loss": 0.5099, "step": 10510 }, { "epoch": 0.5297346291354046, "grad_norm": 4.341574708998716, "learning_rate": 9.82244795957756e-06, "loss": 0.5088, "step": 10520 }, { "epoch": 0.5302381791630999, "grad_norm": 3.94995950637739, "learning_rate": 9.821673376164617e-06, "loss": 0.5091, "step": 10530 }, { "epoch": 0.5307417291907951, "grad_norm": 3.8316514869712823, "learning_rate": 9.820897137514999e-06, "loss": 0.5283, "step": 10540 }, { "epoch": 0.5312452792184904, "grad_norm": 5.234408125018396, "learning_rate": 9.820119243895183e-06, "loss": 0.4813, "step": 10550 }, { "epoch": 0.5317488292461856, "grad_norm": 4.263001085805249, "learning_rate": 9.81933969557221e-06, "loss": 0.5686, "step": 10560 }, { "epoch": 0.5322523792738808, "grad_norm": 3.8783852486211665, "learning_rate": 9.818558492813692e-06, "loss": 0.5619, "step": 10570 }, { "epoch": 0.5327559293015761, "grad_norm": 4.040529354749592, "learning_rate": 9.817775635887813e-06, "loss": 0.5884, "step": 10580 }, { "epoch": 0.5332594793292713, "grad_norm": 4.803289167656607, "learning_rate": 9.816991125063315e-06, "loss": 0.5481, "step": 10590 }, { "epoch": 0.5337630293569666, "grad_norm": 4.073576952114034, "learning_rate": 9.816204960609518e-06, "loss": 0.5261, "step": 10600 }, { "epoch": 0.5342665793846618, "grad_norm": 4.263944730486109, "learning_rate": 9.815417142796305e-06, "loss": 0.4967, "step": 10610 }, { "epoch": 0.5347701294123571, "grad_norm": 3.6939795524481265, "learning_rate": 9.814627671894123e-06, "loss": 0.54, "step": 10620 }, { "epoch": 0.5352736794400523, "grad_norm": 4.3059587121353475, "learning_rate": 9.813836548173993e-06, "loss": 0.5237, "step": 10630 }, { "epoch": 0.5357772294677476, "grad_norm": 4.186185600053734, "learning_rate": 9.8130437719075e-06, "loss": 0.5018, "step": 10640 }, { "epoch": 0.5362807794954428, "grad_norm": 4.1030043218408645, "learning_rate": 9.812249343366796e-06, "loss": 0.5378, "step": 10650 }, { "epoch": 0.5367843295231381, "grad_norm": 4.328719258037523, "learning_rate": 9.811453262824602e-06, "loss": 0.4819, "step": 10660 }, { "epoch": 0.5372878795508333, "grad_norm": 4.431876554471847, "learning_rate": 9.810655530554205e-06, "loss": 0.4898, "step": 10670 }, { "epoch": 0.5377914295785287, "grad_norm": 3.8481474133663167, "learning_rate": 9.80985614682946e-06, "loss": 0.4982, "step": 10680 }, { "epoch": 0.5382949796062239, "grad_norm": 3.1877567908443765, "learning_rate": 9.809055111924788e-06, "loss": 0.5068, "step": 10690 }, { "epoch": 0.5387985296339192, "grad_norm": 4.2405938495815, "learning_rate": 9.808252426115177e-06, "loss": 0.5135, "step": 10700 }, { "epoch": 0.5393020796616144, "grad_norm": 3.8444134219018453, "learning_rate": 9.80744808967618e-06, "loss": 0.4722, "step": 10710 }, { "epoch": 0.5398056296893097, "grad_norm": 3.339008506183769, "learning_rate": 9.80664210288392e-06, "loss": 0.5232, "step": 10720 }, { "epoch": 0.5403091797170049, "grad_norm": 3.9996291373151327, "learning_rate": 9.805834466015084e-06, "loss": 0.5296, "step": 10730 }, { "epoch": 0.5408127297447002, "grad_norm": 4.202044863019739, "learning_rate": 9.805025179346928e-06, "loss": 0.4379, "step": 10740 }, { "epoch": 0.5413162797723954, "grad_norm": 4.659466553253908, "learning_rate": 9.804214243157272e-06, "loss": 0.5234, "step": 10750 }, { "epoch": 0.5418198298000907, "grad_norm": 4.1322416366699315, "learning_rate": 9.803401657724503e-06, "loss": 0.5324, "step": 10760 }, { "epoch": 0.5423233798277859, "grad_norm": 4.610320005415769, "learning_rate": 9.802587423327571e-06, "loss": 0.5444, "step": 10770 }, { "epoch": 0.5428269298554811, "grad_norm": 4.244665263940304, "learning_rate": 9.801771540246002e-06, "loss": 0.5523, "step": 10780 }, { "epoch": 0.5433304798831764, "grad_norm": 4.281338173718613, "learning_rate": 9.800954008759877e-06, "loss": 0.5534, "step": 10790 }, { "epoch": 0.5438340299108716, "grad_norm": 4.711158150193159, "learning_rate": 9.800134829149846e-06, "loss": 0.5381, "step": 10800 }, { "epoch": 0.5443375799385669, "grad_norm": 5.030726962302378, "learning_rate": 9.799314001697128e-06, "loss": 0.4684, "step": 10810 }, { "epoch": 0.5448411299662621, "grad_norm": 4.077860765361999, "learning_rate": 9.798491526683508e-06, "loss": 0.5043, "step": 10820 }, { "epoch": 0.5453446799939574, "grad_norm": 4.0616789448686745, "learning_rate": 9.797667404391328e-06, "loss": 0.509, "step": 10830 }, { "epoch": 0.5458482300216526, "grad_norm": 4.616256154630502, "learning_rate": 9.796841635103506e-06, "loss": 0.4894, "step": 10840 }, { "epoch": 0.5463517800493479, "grad_norm": 3.8811104944454087, "learning_rate": 9.796014219103521e-06, "loss": 0.5424, "step": 10850 }, { "epoch": 0.5468553300770431, "grad_norm": 3.875075376652056, "learning_rate": 9.795185156675415e-06, "loss": 0.4917, "step": 10860 }, { "epoch": 0.5473588801047384, "grad_norm": 3.5631333291303147, "learning_rate": 9.794354448103802e-06, "loss": 0.5094, "step": 10870 }, { "epoch": 0.5478624301324336, "grad_norm": 4.2006758291373565, "learning_rate": 9.79352209367385e-06, "loss": 0.5433, "step": 10880 }, { "epoch": 0.5483659801601289, "grad_norm": 5.309360482048758, "learning_rate": 9.792688093671306e-06, "loss": 0.5134, "step": 10890 }, { "epoch": 0.5488695301878241, "grad_norm": 4.113426153747731, "learning_rate": 9.79185244838247e-06, "loss": 0.5286, "step": 10900 }, { "epoch": 0.5493730802155194, "grad_norm": 4.213591800322113, "learning_rate": 9.791015158094213e-06, "loss": 0.4934, "step": 10910 }, { "epoch": 0.5498766302432146, "grad_norm": 3.115468324795535, "learning_rate": 9.79017622309397e-06, "loss": 0.5118, "step": 10920 }, { "epoch": 0.55038018027091, "grad_norm": 5.156084510124251, "learning_rate": 9.789335643669736e-06, "loss": 0.535, "step": 10930 }, { "epoch": 0.5508837302986052, "grad_norm": 3.110483550427967, "learning_rate": 9.788493420110079e-06, "loss": 0.4485, "step": 10940 }, { "epoch": 0.5513872803263005, "grad_norm": 3.418838304477611, "learning_rate": 9.787649552704124e-06, "loss": 0.5238, "step": 10950 }, { "epoch": 0.5518908303539957, "grad_norm": 3.7348838116598655, "learning_rate": 9.786804041741562e-06, "loss": 0.4958, "step": 10960 }, { "epoch": 0.552394380381691, "grad_norm": 4.772732307335865, "learning_rate": 9.785956887512654e-06, "loss": 0.512, "step": 10970 }, { "epoch": 0.5528979304093862, "grad_norm": 4.847126601285206, "learning_rate": 9.785108090308214e-06, "loss": 0.4911, "step": 10980 }, { "epoch": 0.5534014804370814, "grad_norm": 3.9142484145965257, "learning_rate": 9.78425765041963e-06, "loss": 0.5063, "step": 10990 }, { "epoch": 0.5539050304647767, "grad_norm": 3.5373765849432375, "learning_rate": 9.783405568138853e-06, "loss": 0.5437, "step": 11000 }, { "epoch": 0.5544085804924719, "grad_norm": 4.235270533611015, "learning_rate": 9.782551843758388e-06, "loss": 0.5386, "step": 11010 }, { "epoch": 0.5549121305201672, "grad_norm": 4.078744286018541, "learning_rate": 9.781696477571317e-06, "loss": 0.5203, "step": 11020 }, { "epoch": 0.5554156805478624, "grad_norm": 5.159163655147871, "learning_rate": 9.780839469871278e-06, "loss": 0.4554, "step": 11030 }, { "epoch": 0.5559192305755577, "grad_norm": 4.154857018490995, "learning_rate": 9.77998082095247e-06, "loss": 0.5032, "step": 11040 }, { "epoch": 0.5564227806032529, "grad_norm": 5.300206910707509, "learning_rate": 9.779120531109666e-06, "loss": 0.5535, "step": 11050 }, { "epoch": 0.5569263306309482, "grad_norm": 3.6473143319274683, "learning_rate": 9.77825860063819e-06, "loss": 0.4563, "step": 11060 }, { "epoch": 0.5574298806586434, "grad_norm": 3.863523652386833, "learning_rate": 9.777395029833939e-06, "loss": 0.4637, "step": 11070 }, { "epoch": 0.5579334306863387, "grad_norm": 4.010207620805057, "learning_rate": 9.776529818993366e-06, "loss": 0.541, "step": 11080 }, { "epoch": 0.5584369807140339, "grad_norm": 4.369986332847322, "learning_rate": 9.77566296841349e-06, "loss": 0.5464, "step": 11090 }, { "epoch": 0.5589405307417292, "grad_norm": 4.123821629947784, "learning_rate": 9.774794478391896e-06, "loss": 0.5084, "step": 11100 }, { "epoch": 0.5594440807694244, "grad_norm": 3.6060268436629825, "learning_rate": 9.773924349226726e-06, "loss": 0.5386, "step": 11110 }, { "epoch": 0.5599476307971197, "grad_norm": 3.699332213895446, "learning_rate": 9.773052581216688e-06, "loss": 0.4869, "step": 11120 }, { "epoch": 0.5604511808248149, "grad_norm": 5.973686083505368, "learning_rate": 9.772179174661054e-06, "loss": 0.5286, "step": 11130 }, { "epoch": 0.5609547308525102, "grad_norm": 4.081890279154399, "learning_rate": 9.771304129859653e-06, "loss": 0.4596, "step": 11140 }, { "epoch": 0.5614582808802054, "grad_norm": 4.19020322243874, "learning_rate": 9.770427447112883e-06, "loss": 0.5384, "step": 11150 }, { "epoch": 0.5619618309079008, "grad_norm": 4.921116356760908, "learning_rate": 9.7695491267217e-06, "loss": 0.5701, "step": 11160 }, { "epoch": 0.562465380935596, "grad_norm": 4.116423759034236, "learning_rate": 9.768669168987624e-06, "loss": 0.554, "step": 11170 }, { "epoch": 0.5629689309632911, "grad_norm": 4.1131179498004355, "learning_rate": 9.767787574212733e-06, "loss": 0.4919, "step": 11180 }, { "epoch": 0.5634724809909865, "grad_norm": 4.199990066092335, "learning_rate": 9.766904342699677e-06, "loss": 0.532, "step": 11190 }, { "epoch": 0.5639760310186817, "grad_norm": 4.283762378563982, "learning_rate": 9.766019474751656e-06, "loss": 0.531, "step": 11200 }, { "epoch": 0.564479581046377, "grad_norm": 4.3063623070988015, "learning_rate": 9.765132970672439e-06, "loss": 0.5553, "step": 11210 }, { "epoch": 0.5649831310740722, "grad_norm": 3.770488028879069, "learning_rate": 9.764244830766355e-06, "loss": 0.448, "step": 11220 }, { "epoch": 0.5654866811017675, "grad_norm": 4.360772152866478, "learning_rate": 9.763355055338293e-06, "loss": 0.5627, "step": 11230 }, { "epoch": 0.5659902311294627, "grad_norm": 4.619832672602185, "learning_rate": 9.762463644693707e-06, "loss": 0.4664, "step": 11240 }, { "epoch": 0.566493781157158, "grad_norm": 3.9548524422387255, "learning_rate": 9.761570599138607e-06, "loss": 0.4735, "step": 11250 }, { "epoch": 0.5669973311848532, "grad_norm": 4.961145034411274, "learning_rate": 9.760675918979573e-06, "loss": 0.5792, "step": 11260 }, { "epoch": 0.5675008812125485, "grad_norm": 4.29672737276343, "learning_rate": 9.759779604523735e-06, "loss": 0.4771, "step": 11270 }, { "epoch": 0.5680044312402437, "grad_norm": 4.546037906217386, "learning_rate": 9.758881656078792e-06, "loss": 0.5002, "step": 11280 }, { "epoch": 0.568507981267939, "grad_norm": 4.383659681677952, "learning_rate": 9.757982073953001e-06, "loss": 0.5166, "step": 11290 }, { "epoch": 0.5690115312956342, "grad_norm": 4.004621207825169, "learning_rate": 9.757080858455182e-06, "loss": 0.5304, "step": 11300 }, { "epoch": 0.5695150813233295, "grad_norm": 5.196700417738529, "learning_rate": 9.756178009894712e-06, "loss": 0.5076, "step": 11310 }, { "epoch": 0.5700186313510247, "grad_norm": 3.9972597566428054, "learning_rate": 9.75527352858153e-06, "loss": 0.4997, "step": 11320 }, { "epoch": 0.57052218137872, "grad_norm": 4.576601032367968, "learning_rate": 9.754367414826138e-06, "loss": 0.5656, "step": 11330 }, { "epoch": 0.5710257314064152, "grad_norm": 4.925463545133142, "learning_rate": 9.753459668939598e-06, "loss": 0.5204, "step": 11340 }, { "epoch": 0.5715292814341105, "grad_norm": 4.002487585522616, "learning_rate": 9.752550291233528e-06, "loss": 0.4906, "step": 11350 }, { "epoch": 0.5720328314618057, "grad_norm": 4.4335063165095665, "learning_rate": 9.75163928202011e-06, "loss": 0.5328, "step": 11360 }, { "epoch": 0.572536381489501, "grad_norm": 4.653981040788885, "learning_rate": 9.750726641612085e-06, "loss": 0.511, "step": 11370 }, { "epoch": 0.5730399315171962, "grad_norm": 4.538774882753471, "learning_rate": 9.749812370322756e-06, "loss": 0.4975, "step": 11380 }, { "epoch": 0.5735434815448914, "grad_norm": 4.735763546449622, "learning_rate": 9.748896468465981e-06, "loss": 0.5555, "step": 11390 }, { "epoch": 0.5740470315725867, "grad_norm": 3.597924689932579, "learning_rate": 9.747978936356184e-06, "loss": 0.4992, "step": 11400 }, { "epoch": 0.5745505816002819, "grad_norm": 3.586757469489004, "learning_rate": 9.747059774308343e-06, "loss": 0.5132, "step": 11410 }, { "epoch": 0.5750541316279772, "grad_norm": 3.079587981279696, "learning_rate": 9.746138982638e-06, "loss": 0.5427, "step": 11420 }, { "epoch": 0.5755576816556724, "grad_norm": 4.3735172981972, "learning_rate": 9.745216561661252e-06, "loss": 0.5198, "step": 11430 }, { "epoch": 0.5760612316833678, "grad_norm": 4.118643920202108, "learning_rate": 9.744292511694759e-06, "loss": 0.558, "step": 11440 }, { "epoch": 0.576564781711063, "grad_norm": 5.050507207205871, "learning_rate": 9.743366833055739e-06, "loss": 0.5488, "step": 11450 }, { "epoch": 0.5770683317387583, "grad_norm": 3.8736159740163734, "learning_rate": 9.74243952606197e-06, "loss": 0.5149, "step": 11460 }, { "epoch": 0.5775718817664535, "grad_norm": 3.7216607778319264, "learning_rate": 9.741510591031786e-06, "loss": 0.4982, "step": 11470 }, { "epoch": 0.5780754317941488, "grad_norm": 4.223167606106474, "learning_rate": 9.740580028284083e-06, "loss": 0.555, "step": 11480 }, { "epoch": 0.578578981821844, "grad_norm": 3.9857000757215735, "learning_rate": 9.739647838138316e-06, "loss": 0.5466, "step": 11490 }, { "epoch": 0.5790825318495393, "grad_norm": 5.702088729019709, "learning_rate": 9.738714020914496e-06, "loss": 0.4987, "step": 11500 }, { "epoch": 0.5795860818772345, "grad_norm": 3.651861608329516, "learning_rate": 9.737778576933195e-06, "loss": 0.4101, "step": 11510 }, { "epoch": 0.5800896319049298, "grad_norm": 4.675422918545806, "learning_rate": 9.73684150651554e-06, "loss": 0.5523, "step": 11520 }, { "epoch": 0.580593181932625, "grad_norm": 4.164442387077807, "learning_rate": 9.73590280998322e-06, "loss": 0.5257, "step": 11530 }, { "epoch": 0.5810967319603203, "grad_norm": 4.7071991619320634, "learning_rate": 9.73496248765848e-06, "loss": 0.5498, "step": 11540 }, { "epoch": 0.5816002819880155, "grad_norm": 3.334625427000351, "learning_rate": 9.73402053986413e-06, "loss": 0.4695, "step": 11550 }, { "epoch": 0.5821038320157108, "grad_norm": 3.7654090166527934, "learning_rate": 9.733076966923525e-06, "loss": 0.4557, "step": 11560 }, { "epoch": 0.582607382043406, "grad_norm": 4.605507599952492, "learning_rate": 9.732131769160588e-06, "loss": 0.5058, "step": 11570 }, { "epoch": 0.5831109320711013, "grad_norm": 3.7835583842076987, "learning_rate": 9.731184946899796e-06, "loss": 0.5434, "step": 11580 }, { "epoch": 0.5836144820987965, "grad_norm": 4.046718240844815, "learning_rate": 9.730236500466185e-06, "loss": 0.5238, "step": 11590 }, { "epoch": 0.5841180321264917, "grad_norm": 4.222716324010467, "learning_rate": 9.729286430185349e-06, "loss": 0.5401, "step": 11600 }, { "epoch": 0.584621582154187, "grad_norm": 4.473185380953914, "learning_rate": 9.728334736383435e-06, "loss": 0.4887, "step": 11610 }, { "epoch": 0.5851251321818822, "grad_norm": 4.638157894268515, "learning_rate": 9.727381419387157e-06, "loss": 0.544, "step": 11620 }, { "epoch": 0.5856286822095775, "grad_norm": 4.072718262645167, "learning_rate": 9.726426479523774e-06, "loss": 0.5049, "step": 11630 }, { "epoch": 0.5861322322372727, "grad_norm": 3.3390434012851755, "learning_rate": 9.72546991712111e-06, "loss": 0.4825, "step": 11640 }, { "epoch": 0.586635782264968, "grad_norm": 3.448752680062804, "learning_rate": 9.724511732507547e-06, "loss": 0.4887, "step": 11650 }, { "epoch": 0.5871393322926632, "grad_norm": 3.7064179719851227, "learning_rate": 9.723551926012016e-06, "loss": 0.5158, "step": 11660 }, { "epoch": 0.5876428823203586, "grad_norm": 4.257891036109579, "learning_rate": 9.722590497964014e-06, "loss": 0.5701, "step": 11670 }, { "epoch": 0.5881464323480537, "grad_norm": 3.298535443848873, "learning_rate": 9.721627448693589e-06, "loss": 0.5047, "step": 11680 }, { "epoch": 0.5886499823757491, "grad_norm": 4.4017259372273, "learning_rate": 9.720662778531347e-06, "loss": 0.4811, "step": 11690 }, { "epoch": 0.5891535324034443, "grad_norm": 4.207139080218793, "learning_rate": 9.719696487808451e-06, "loss": 0.5145, "step": 11700 }, { "epoch": 0.5896570824311396, "grad_norm": 4.233720589805015, "learning_rate": 9.71872857685662e-06, "loss": 0.5188, "step": 11710 }, { "epoch": 0.5901606324588348, "grad_norm": 5.020850979833205, "learning_rate": 9.717759046008128e-06, "loss": 0.5403, "step": 11720 }, { "epoch": 0.5906641824865301, "grad_norm": 3.9911476810925137, "learning_rate": 9.716787895595809e-06, "loss": 0.5627, "step": 11730 }, { "epoch": 0.5911677325142253, "grad_norm": 4.017857168740905, "learning_rate": 9.715815125953046e-06, "loss": 0.4777, "step": 11740 }, { "epoch": 0.5916712825419206, "grad_norm": 4.056137061939091, "learning_rate": 9.714840737413784e-06, "loss": 0.5368, "step": 11750 }, { "epoch": 0.5921748325696158, "grad_norm": 4.329659458343446, "learning_rate": 9.71386473031252e-06, "loss": 0.4835, "step": 11760 }, { "epoch": 0.5926783825973111, "grad_norm": 4.1774601893101595, "learning_rate": 9.712887104984312e-06, "loss": 0.5229, "step": 11770 }, { "epoch": 0.5931819326250063, "grad_norm": 4.373557841528901, "learning_rate": 9.711907861764766e-06, "loss": 0.5449, "step": 11780 }, { "epoch": 0.5936854826527015, "grad_norm": 3.9132503825045952, "learning_rate": 9.710927000990052e-06, "loss": 0.4989, "step": 11790 }, { "epoch": 0.5941890326803968, "grad_norm": 3.9591757270804764, "learning_rate": 9.709944522996885e-06, "loss": 0.5285, "step": 11800 }, { "epoch": 0.594692582708092, "grad_norm": 4.295817476726038, "learning_rate": 9.708960428122544e-06, "loss": 0.4745, "step": 11810 }, { "epoch": 0.5951961327357873, "grad_norm": 4.474197495410239, "learning_rate": 9.707974716704861e-06, "loss": 0.5379, "step": 11820 }, { "epoch": 0.5956996827634825, "grad_norm": 5.151328861650969, "learning_rate": 9.70698738908222e-06, "loss": 0.5192, "step": 11830 }, { "epoch": 0.5962032327911778, "grad_norm": 4.8974263860619685, "learning_rate": 9.70599844559356e-06, "loss": 0.4966, "step": 11840 }, { "epoch": 0.596706782818873, "grad_norm": 3.7670047033863514, "learning_rate": 9.705007886578378e-06, "loss": 0.4955, "step": 11850 }, { "epoch": 0.5972103328465683, "grad_norm": 4.705530879160022, "learning_rate": 9.704015712376725e-06, "loss": 0.5403, "step": 11860 }, { "epoch": 0.5977138828742635, "grad_norm": 4.196855420211101, "learning_rate": 9.703021923329204e-06, "loss": 0.512, "step": 11870 }, { "epoch": 0.5982174329019588, "grad_norm": 3.725511627192521, "learning_rate": 9.702026519776975e-06, "loss": 0.5179, "step": 11880 }, { "epoch": 0.598720982929654, "grad_norm": 4.09999131785416, "learning_rate": 9.70102950206175e-06, "loss": 0.5553, "step": 11890 }, { "epoch": 0.5992245329573493, "grad_norm": 3.8212461805604785, "learning_rate": 9.700030870525795e-06, "loss": 0.5843, "step": 11900 }, { "epoch": 0.5997280829850445, "grad_norm": 4.211552035409822, "learning_rate": 9.699030625511931e-06, "loss": 0.5655, "step": 11910 }, { "epoch": 0.6002316330127399, "grad_norm": 3.5830867105910444, "learning_rate": 9.698028767363538e-06, "loss": 0.4218, "step": 11920 }, { "epoch": 0.600735183040435, "grad_norm": 4.99658463260357, "learning_rate": 9.697025296424539e-06, "loss": 0.4307, "step": 11930 }, { "epoch": 0.6012387330681304, "grad_norm": 4.293925553421387, "learning_rate": 9.69602021303942e-06, "loss": 0.506, "step": 11940 }, { "epoch": 0.6017422830958256, "grad_norm": 4.248853589317509, "learning_rate": 9.695013517553213e-06, "loss": 0.5292, "step": 11950 }, { "epoch": 0.6022458331235209, "grad_norm": 4.393008841950864, "learning_rate": 9.694005210311512e-06, "loss": 0.5716, "step": 11960 }, { "epoch": 0.6027493831512161, "grad_norm": 4.098939273329157, "learning_rate": 9.692995291660459e-06, "loss": 0.5794, "step": 11970 }, { "epoch": 0.6032529331789114, "grad_norm": 4.329138031210277, "learning_rate": 9.691983761946746e-06, "loss": 0.5014, "step": 11980 }, { "epoch": 0.6037564832066066, "grad_norm": 3.9320341171591284, "learning_rate": 9.690970621517627e-06, "loss": 0.5255, "step": 11990 }, { "epoch": 0.6042600332343018, "grad_norm": 3.516539243271014, "learning_rate": 9.6899558707209e-06, "loss": 0.5289, "step": 12000 }, { "epoch": 0.6047635832619971, "grad_norm": 3.8499609103790253, "learning_rate": 9.688939509904921e-06, "loss": 0.5077, "step": 12010 }, { "epoch": 0.6052671332896923, "grad_norm": 3.612558766895598, "learning_rate": 9.687921539418599e-06, "loss": 0.5449, "step": 12020 }, { "epoch": 0.6057706833173876, "grad_norm": 4.73774838378498, "learning_rate": 9.686901959611393e-06, "loss": 0.5658, "step": 12030 }, { "epoch": 0.6062742333450828, "grad_norm": 5.457040262075834, "learning_rate": 9.685880770833313e-06, "loss": 0.4978, "step": 12040 }, { "epoch": 0.6067777833727781, "grad_norm": 3.755491279840814, "learning_rate": 9.684857973434929e-06, "loss": 0.4553, "step": 12050 }, { "epoch": 0.6072813334004733, "grad_norm": 5.223171626182039, "learning_rate": 9.683833567767354e-06, "loss": 0.5697, "step": 12060 }, { "epoch": 0.6077848834281686, "grad_norm": 4.191135835495135, "learning_rate": 9.682807554182257e-06, "loss": 0.5179, "step": 12070 }, { "epoch": 0.6082884334558638, "grad_norm": 2.9754969995278713, "learning_rate": 9.681779933031862e-06, "loss": 0.4216, "step": 12080 }, { "epoch": 0.6087919834835591, "grad_norm": 4.482449693937685, "learning_rate": 9.68075070466894e-06, "loss": 0.5243, "step": 12090 }, { "epoch": 0.6092955335112543, "grad_norm": 4.244207842497579, "learning_rate": 9.679719869446815e-06, "loss": 0.5116, "step": 12100 }, { "epoch": 0.6097990835389496, "grad_norm": 4.630903628259296, "learning_rate": 9.678687427719367e-06, "loss": 0.4972, "step": 12110 }, { "epoch": 0.6103026335666448, "grad_norm": 3.780188192012996, "learning_rate": 9.677653379841019e-06, "loss": 0.4158, "step": 12120 }, { "epoch": 0.6108061835943401, "grad_norm": 4.436436286647174, "learning_rate": 9.676617726166754e-06, "loss": 0.5208, "step": 12130 }, { "epoch": 0.6113097336220353, "grad_norm": 5.4137870428280666, "learning_rate": 9.6755804670521e-06, "loss": 0.4871, "step": 12140 }, { "epoch": 0.6118132836497306, "grad_norm": 4.227732740270942, "learning_rate": 9.67454160285314e-06, "loss": 0.5032, "step": 12150 }, { "epoch": 0.6123168336774258, "grad_norm": 4.107099073045305, "learning_rate": 9.673501133926508e-06, "loss": 0.4761, "step": 12160 }, { "epoch": 0.6128203837051212, "grad_norm": 4.457038963954487, "learning_rate": 9.672459060629384e-06, "loss": 0.5433, "step": 12170 }, { "epoch": 0.6133239337328164, "grad_norm": 4.33642997417228, "learning_rate": 9.671415383319507e-06, "loss": 0.4533, "step": 12180 }, { "epoch": 0.6138274837605117, "grad_norm": 4.456664245647666, "learning_rate": 9.670370102355156e-06, "loss": 0.5184, "step": 12190 }, { "epoch": 0.6143310337882069, "grad_norm": 4.947044217616652, "learning_rate": 9.669323218095172e-06, "loss": 0.5441, "step": 12200 }, { "epoch": 0.6148345838159021, "grad_norm": 4.989146272309023, "learning_rate": 9.668274730898937e-06, "loss": 0.5309, "step": 12210 }, { "epoch": 0.6153381338435974, "grad_norm": 4.096202612169277, "learning_rate": 9.667224641126389e-06, "loss": 0.5061, "step": 12220 }, { "epoch": 0.6158416838712926, "grad_norm": 3.944451835506029, "learning_rate": 9.666172949138015e-06, "loss": 0.4885, "step": 12230 }, { "epoch": 0.6163452338989879, "grad_norm": 4.4930125801122, "learning_rate": 9.66511965529485e-06, "loss": 0.5023, "step": 12240 }, { "epoch": 0.6168487839266831, "grad_norm": 4.568808704962638, "learning_rate": 9.66406475995848e-06, "loss": 0.5225, "step": 12250 }, { "epoch": 0.6173523339543784, "grad_norm": 4.824162365279704, "learning_rate": 9.66300826349104e-06, "loss": 0.4887, "step": 12260 }, { "epoch": 0.6178558839820736, "grad_norm": 4.684906142895348, "learning_rate": 9.661950166255218e-06, "loss": 0.5876, "step": 12270 }, { "epoch": 0.6183594340097689, "grad_norm": 3.9480207405793997, "learning_rate": 9.66089046861425e-06, "loss": 0.5469, "step": 12280 }, { "epoch": 0.6188629840374641, "grad_norm": 4.78784360067396, "learning_rate": 9.659829170931917e-06, "loss": 0.5262, "step": 12290 }, { "epoch": 0.6193665340651594, "grad_norm": 3.3966700615772365, "learning_rate": 9.658766273572555e-06, "loss": 0.5072, "step": 12300 }, { "epoch": 0.6198700840928546, "grad_norm": 3.8854205191535596, "learning_rate": 9.657701776901047e-06, "loss": 0.5295, "step": 12310 }, { "epoch": 0.6203736341205499, "grad_norm": 4.78597270882958, "learning_rate": 9.656635681282824e-06, "loss": 0.5124, "step": 12320 }, { "epoch": 0.6208771841482451, "grad_norm": 4.064148937451452, "learning_rate": 9.65556798708387e-06, "loss": 0.5334, "step": 12330 }, { "epoch": 0.6213807341759404, "grad_norm": 3.859284791960524, "learning_rate": 9.65449869467071e-06, "loss": 0.5087, "step": 12340 }, { "epoch": 0.6218842842036356, "grad_norm": 3.8654684269286355, "learning_rate": 9.653427804410424e-06, "loss": 0.5119, "step": 12350 }, { "epoch": 0.6223878342313309, "grad_norm": 3.959833356935022, "learning_rate": 9.652355316670643e-06, "loss": 0.517, "step": 12360 }, { "epoch": 0.6228913842590261, "grad_norm": 4.033018539225871, "learning_rate": 9.651281231819536e-06, "loss": 0.5057, "step": 12370 }, { "epoch": 0.6233949342867214, "grad_norm": 3.2164660311362048, "learning_rate": 9.650205550225831e-06, "loss": 0.4991, "step": 12380 }, { "epoch": 0.6238984843144166, "grad_norm": 5.245201071076702, "learning_rate": 9.649128272258799e-06, "loss": 0.5016, "step": 12390 }, { "epoch": 0.6244020343421118, "grad_norm": 4.703141957102985, "learning_rate": 9.648049398288257e-06, "loss": 0.4877, "step": 12400 }, { "epoch": 0.6249055843698071, "grad_norm": 4.418927470291282, "learning_rate": 9.646968928684575e-06, "loss": 0.4701, "step": 12410 }, { "epoch": 0.6254091343975023, "grad_norm": 3.9977629672842805, "learning_rate": 9.64588686381867e-06, "loss": 0.4838, "step": 12420 }, { "epoch": 0.6259126844251977, "grad_norm": 3.565254176142288, "learning_rate": 9.644803204062002e-06, "loss": 0.5172, "step": 12430 }, { "epoch": 0.6264162344528929, "grad_norm": 4.361288390742426, "learning_rate": 9.64371794978658e-06, "loss": 0.4547, "step": 12440 }, { "epoch": 0.6269197844805882, "grad_norm": 4.379169341882231, "learning_rate": 9.642631101364968e-06, "loss": 0.5157, "step": 12450 }, { "epoch": 0.6274233345082834, "grad_norm": 3.748504798844609, "learning_rate": 9.641542659170266e-06, "loss": 0.4776, "step": 12460 }, { "epoch": 0.6279268845359787, "grad_norm": 3.33717773327765, "learning_rate": 9.640452623576129e-06, "loss": 0.5551, "step": 12470 }, { "epoch": 0.6284304345636739, "grad_norm": 4.059312245185761, "learning_rate": 9.639360994956755e-06, "loss": 0.5348, "step": 12480 }, { "epoch": 0.6289339845913692, "grad_norm": 4.691671221581245, "learning_rate": 9.63826777368689e-06, "loss": 0.5354, "step": 12490 }, { "epoch": 0.6294375346190644, "grad_norm": 4.3846798891706085, "learning_rate": 9.637172960141829e-06, "loss": 0.4846, "step": 12500 }, { "epoch": 0.6299410846467597, "grad_norm": 4.493046671780376, "learning_rate": 9.636076554697408e-06, "loss": 0.5344, "step": 12510 }, { "epoch": 0.6304446346744549, "grad_norm": 3.6557800963959703, "learning_rate": 9.634978557730016e-06, "loss": 0.483, "step": 12520 }, { "epoch": 0.6309481847021502, "grad_norm": 3.328145720820099, "learning_rate": 9.633878969616586e-06, "loss": 0.4657, "step": 12530 }, { "epoch": 0.6314517347298454, "grad_norm": 4.204636632080063, "learning_rate": 9.632777790734593e-06, "loss": 0.546, "step": 12540 }, { "epoch": 0.6319552847575407, "grad_norm": 4.396304872516577, "learning_rate": 9.631675021462066e-06, "loss": 0.5139, "step": 12550 }, { "epoch": 0.6324588347852359, "grad_norm": 3.3007061100223414, "learning_rate": 9.630570662177571e-06, "loss": 0.4747, "step": 12560 }, { "epoch": 0.6329623848129312, "grad_norm": 4.155406277866647, "learning_rate": 9.629464713260229e-06, "loss": 0.4415, "step": 12570 }, { "epoch": 0.6334659348406264, "grad_norm": 3.2615418070233586, "learning_rate": 9.6283571750897e-06, "loss": 0.4988, "step": 12580 }, { "epoch": 0.6339694848683217, "grad_norm": 3.7636388016964943, "learning_rate": 9.62724804804619e-06, "loss": 0.5205, "step": 12590 }, { "epoch": 0.6344730348960169, "grad_norm": 4.814238592826433, "learning_rate": 9.626137332510456e-06, "loss": 0.5126, "step": 12600 }, { "epoch": 0.6349765849237121, "grad_norm": 3.9429090194902576, "learning_rate": 9.625025028863795e-06, "loss": 0.5515, "step": 12610 }, { "epoch": 0.6354801349514074, "grad_norm": 3.6462827999337297, "learning_rate": 9.62391113748805e-06, "loss": 0.5221, "step": 12620 }, { "epoch": 0.6359836849791026, "grad_norm": 4.101893143817026, "learning_rate": 9.622795658765609e-06, "loss": 0.5476, "step": 12630 }, { "epoch": 0.6364872350067979, "grad_norm": 3.6507102647384078, "learning_rate": 9.621678593079408e-06, "loss": 0.5448, "step": 12640 }, { "epoch": 0.6369907850344931, "grad_norm": 3.571452796278452, "learning_rate": 9.620559940812925e-06, "loss": 0.4983, "step": 12650 }, { "epoch": 0.6374943350621884, "grad_norm": 3.8506069346288987, "learning_rate": 9.619439702350182e-06, "loss": 0.492, "step": 12660 }, { "epoch": 0.6379978850898836, "grad_norm": 3.8337851601323023, "learning_rate": 9.61831787807575e-06, "loss": 0.506, "step": 12670 }, { "epoch": 0.638501435117579, "grad_norm": 4.706984800834046, "learning_rate": 9.617194468374734e-06, "loss": 0.5509, "step": 12680 }, { "epoch": 0.6390049851452742, "grad_norm": 4.535320571494101, "learning_rate": 9.616069473632797e-06, "loss": 0.5222, "step": 12690 }, { "epoch": 0.6395085351729695, "grad_norm": 3.285957209110292, "learning_rate": 9.614942894236138e-06, "loss": 0.5178, "step": 12700 }, { "epoch": 0.6400120852006647, "grad_norm": 3.4801710380638617, "learning_rate": 9.6138147305715e-06, "loss": 0.4944, "step": 12710 }, { "epoch": 0.64051563522836, "grad_norm": 3.978229077941688, "learning_rate": 9.612684983026172e-06, "loss": 0.4541, "step": 12720 }, { "epoch": 0.6410191852560552, "grad_norm": 4.4902783191788505, "learning_rate": 9.611553651987988e-06, "loss": 0.5568, "step": 12730 }, { "epoch": 0.6415227352837505, "grad_norm": 4.6018379952954644, "learning_rate": 9.61042073784532e-06, "loss": 0.5325, "step": 12740 }, { "epoch": 0.6420262853114457, "grad_norm": 4.154963151859699, "learning_rate": 9.609286240987089e-06, "loss": 0.4505, "step": 12750 }, { "epoch": 0.642529835339141, "grad_norm": 5.948896419237921, "learning_rate": 9.608150161802757e-06, "loss": 0.4696, "step": 12760 }, { "epoch": 0.6430333853668362, "grad_norm": 3.7304003698108428, "learning_rate": 9.607012500682331e-06, "loss": 0.4854, "step": 12770 }, { "epoch": 0.6435369353945315, "grad_norm": 3.8207067217977038, "learning_rate": 9.605873258016358e-06, "loss": 0.5214, "step": 12780 }, { "epoch": 0.6440404854222267, "grad_norm": 4.350047551638209, "learning_rate": 9.604732434195932e-06, "loss": 0.4922, "step": 12790 }, { "epoch": 0.644544035449922, "grad_norm": 3.8028555034343907, "learning_rate": 9.603590029612687e-06, "loss": 0.5479, "step": 12800 }, { "epoch": 0.6450475854776172, "grad_norm": 3.2973753533734604, "learning_rate": 9.602446044658795e-06, "loss": 0.5341, "step": 12810 }, { "epoch": 0.6455511355053124, "grad_norm": 4.630504482637326, "learning_rate": 9.601300479726983e-06, "loss": 0.4821, "step": 12820 }, { "epoch": 0.6460546855330077, "grad_norm": 4.099382829749752, "learning_rate": 9.600153335210509e-06, "loss": 0.5527, "step": 12830 }, { "epoch": 0.6465582355607029, "grad_norm": 4.176810066142005, "learning_rate": 9.599004611503178e-06, "loss": 0.5756, "step": 12840 }, { "epoch": 0.6470617855883982, "grad_norm": 3.4846050056433002, "learning_rate": 9.597854308999334e-06, "loss": 0.4702, "step": 12850 }, { "epoch": 0.6475653356160934, "grad_norm": 3.861678177364025, "learning_rate": 9.596702428093871e-06, "loss": 0.4857, "step": 12860 }, { "epoch": 0.6480688856437887, "grad_norm": 4.558881174010183, "learning_rate": 9.595548969182216e-06, "loss": 0.5201, "step": 12870 }, { "epoch": 0.6485724356714839, "grad_norm": 4.034078249333521, "learning_rate": 9.59439393266034e-06, "loss": 0.5035, "step": 12880 }, { "epoch": 0.6490759856991792, "grad_norm": 3.5077713783758697, "learning_rate": 9.593237318924758e-06, "loss": 0.4778, "step": 12890 }, { "epoch": 0.6495795357268744, "grad_norm": 4.0220659017975615, "learning_rate": 9.592079128372524e-06, "loss": 0.502, "step": 12900 }, { "epoch": 0.6500830857545697, "grad_norm": 4.164397556291615, "learning_rate": 9.590919361401237e-06, "loss": 0.5436, "step": 12910 }, { "epoch": 0.6505866357822649, "grad_norm": 4.310703757861823, "learning_rate": 9.58975801840903e-06, "loss": 0.5213, "step": 12920 }, { "epoch": 0.6510901858099603, "grad_norm": 4.256064916125856, "learning_rate": 9.588595099794586e-06, "loss": 0.4252, "step": 12930 }, { "epoch": 0.6515937358376555, "grad_norm": 3.9595074968122894, "learning_rate": 9.587430605957119e-06, "loss": 0.5699, "step": 12940 }, { "epoch": 0.6520972858653508, "grad_norm": 3.911886998286689, "learning_rate": 9.586264537296395e-06, "loss": 0.4965, "step": 12950 }, { "epoch": 0.652600835893046, "grad_norm": 3.7876017606606935, "learning_rate": 9.585096894212713e-06, "loss": 0.5065, "step": 12960 }, { "epoch": 0.6531043859207413, "grad_norm": 3.349811202509578, "learning_rate": 9.583927677106912e-06, "loss": 0.5485, "step": 12970 }, { "epoch": 0.6536079359484365, "grad_norm": 4.920441831066773, "learning_rate": 9.582756886380376e-06, "loss": 0.5006, "step": 12980 }, { "epoch": 0.6541114859761318, "grad_norm": 3.833200575169871, "learning_rate": 9.581584522435025e-06, "loss": 0.5609, "step": 12990 }, { "epoch": 0.654615036003827, "grad_norm": 4.938508500236208, "learning_rate": 9.580410585673321e-06, "loss": 0.5476, "step": 13000 }, { "epoch": 0.6551185860315222, "grad_norm": 3.966791815320509, "learning_rate": 9.579235076498268e-06, "loss": 0.4874, "step": 13010 }, { "epoch": 0.6556221360592175, "grad_norm": 4.033361644296855, "learning_rate": 9.578057995313406e-06, "loss": 0.5267, "step": 13020 }, { "epoch": 0.6561256860869127, "grad_norm": 6.032812382043706, "learning_rate": 9.576879342522816e-06, "loss": 0.5217, "step": 13030 }, { "epoch": 0.656629236114608, "grad_norm": 4.616865228264727, "learning_rate": 9.57569911853112e-06, "loss": 0.4919, "step": 13040 }, { "epoch": 0.6571327861423032, "grad_norm": 4.958615281408679, "learning_rate": 9.574517323743478e-06, "loss": 0.5142, "step": 13050 }, { "epoch": 0.6576363361699985, "grad_norm": 4.1991660302983735, "learning_rate": 9.57333395856559e-06, "loss": 0.5257, "step": 13060 }, { "epoch": 0.6581398861976937, "grad_norm": 4.692162236746754, "learning_rate": 9.57214902340369e-06, "loss": 0.5679, "step": 13070 }, { "epoch": 0.658643436225389, "grad_norm": 3.8861647090601164, "learning_rate": 9.570962518664562e-06, "loss": 0.5094, "step": 13080 }, { "epoch": 0.6591469862530842, "grad_norm": 3.9290264955292873, "learning_rate": 9.569774444755518e-06, "loss": 0.5072, "step": 13090 }, { "epoch": 0.6596505362807795, "grad_norm": 3.926210812506941, "learning_rate": 9.568584802084413e-06, "loss": 0.4871, "step": 13100 }, { "epoch": 0.6601540863084747, "grad_norm": 3.2452550268472056, "learning_rate": 9.567393591059643e-06, "loss": 0.5112, "step": 13110 }, { "epoch": 0.66065763633617, "grad_norm": 4.34265279184824, "learning_rate": 9.566200812090138e-06, "loss": 0.5507, "step": 13120 }, { "epoch": 0.6611611863638652, "grad_norm": 4.219905522400619, "learning_rate": 9.56500646558537e-06, "loss": 0.5013, "step": 13130 }, { "epoch": 0.6616647363915605, "grad_norm": 4.887985376301214, "learning_rate": 9.563810551955346e-06, "loss": 0.5317, "step": 13140 }, { "epoch": 0.6621682864192557, "grad_norm": 4.3843100485484605, "learning_rate": 9.562613071610611e-06, "loss": 0.4801, "step": 13150 }, { "epoch": 0.662671836446951, "grad_norm": 4.581387076393426, "learning_rate": 9.561414024962252e-06, "loss": 0.4829, "step": 13160 }, { "epoch": 0.6631753864746462, "grad_norm": 4.334963273214946, "learning_rate": 9.56021341242189e-06, "loss": 0.5745, "step": 13170 }, { "epoch": 0.6636789365023416, "grad_norm": 5.047389532201247, "learning_rate": 9.559011234401684e-06, "loss": 0.5115, "step": 13180 }, { "epoch": 0.6641824865300368, "grad_norm": 4.4407283598312315, "learning_rate": 9.557807491314328e-06, "loss": 0.5221, "step": 13190 }, { "epoch": 0.6646860365577321, "grad_norm": 4.075275866154321, "learning_rate": 9.55660218357306e-06, "loss": 0.5122, "step": 13200 }, { "epoch": 0.6651895865854273, "grad_norm": 4.199688650745599, "learning_rate": 9.555395311591649e-06, "loss": 0.5147, "step": 13210 }, { "epoch": 0.6656931366131225, "grad_norm": 4.115085191385293, "learning_rate": 9.554186875784406e-06, "loss": 0.5358, "step": 13220 }, { "epoch": 0.6661966866408178, "grad_norm": 3.5792206006135032, "learning_rate": 9.552976876566171e-06, "loss": 0.5807, "step": 13230 }, { "epoch": 0.666700236668513, "grad_norm": 3.3562704643050685, "learning_rate": 9.551765314352331e-06, "loss": 0.4851, "step": 13240 }, { "epoch": 0.6672037866962083, "grad_norm": 4.3306706399272015, "learning_rate": 9.550552189558801e-06, "loss": 0.497, "step": 13250 }, { "epoch": 0.6677073367239035, "grad_norm": 4.074320261788061, "learning_rate": 9.549337502602039e-06, "loss": 0.4817, "step": 13260 }, { "epoch": 0.6682108867515988, "grad_norm": 3.64157503288392, "learning_rate": 9.548121253899032e-06, "loss": 0.5088, "step": 13270 }, { "epoch": 0.668714436779294, "grad_norm": 3.511216322839929, "learning_rate": 9.54690344386731e-06, "loss": 0.5069, "step": 13280 }, { "epoch": 0.6692179868069893, "grad_norm": 3.9610159052343215, "learning_rate": 9.545684072924934e-06, "loss": 0.5132, "step": 13290 }, { "epoch": 0.6697215368346845, "grad_norm": 4.299520207869567, "learning_rate": 9.544463141490505e-06, "loss": 0.4872, "step": 13300 }, { "epoch": 0.6702250868623798, "grad_norm": 3.9319443183867673, "learning_rate": 9.543240649983156e-06, "loss": 0.5274, "step": 13310 }, { "epoch": 0.670728636890075, "grad_norm": 3.897095672690902, "learning_rate": 9.54201659882256e-06, "loss": 0.4905, "step": 13320 }, { "epoch": 0.6712321869177703, "grad_norm": 4.418518156965829, "learning_rate": 9.540790988428917e-06, "loss": 0.4878, "step": 13330 }, { "epoch": 0.6717357369454655, "grad_norm": 4.420171351929081, "learning_rate": 9.539563819222973e-06, "loss": 0.5421, "step": 13340 }, { "epoch": 0.6722392869731608, "grad_norm": 4.023948872891642, "learning_rate": 9.538335091626002e-06, "loss": 0.4983, "step": 13350 }, { "epoch": 0.672742837000856, "grad_norm": 3.1843140373949286, "learning_rate": 9.537104806059819e-06, "loss": 0.4849, "step": 13360 }, { "epoch": 0.6732463870285513, "grad_norm": 4.032582603634922, "learning_rate": 9.535872962946763e-06, "loss": 0.4804, "step": 13370 }, { "epoch": 0.6737499370562465, "grad_norm": 4.265219260954903, "learning_rate": 9.534639562709718e-06, "loss": 0.5455, "step": 13380 }, { "epoch": 0.6742534870839418, "grad_norm": 3.621700880505107, "learning_rate": 9.5334046057721e-06, "loss": 0.5284, "step": 13390 }, { "epoch": 0.674757037111637, "grad_norm": 4.169454305680368, "learning_rate": 9.532168092557859e-06, "loss": 0.5092, "step": 13400 }, { "epoch": 0.6752605871393323, "grad_norm": 3.6556981105358575, "learning_rate": 9.530930023491475e-06, "loss": 0.4379, "step": 13410 }, { "epoch": 0.6757641371670275, "grad_norm": 6.474510683860338, "learning_rate": 9.529690398997969e-06, "loss": 0.5225, "step": 13420 }, { "epoch": 0.6762676871947227, "grad_norm": 3.8529796666501284, "learning_rate": 9.528449219502894e-06, "loss": 0.5226, "step": 13430 }, { "epoch": 0.676771237222418, "grad_norm": 3.747785899577379, "learning_rate": 9.52720648543233e-06, "loss": 0.4924, "step": 13440 }, { "epoch": 0.6772747872501133, "grad_norm": 3.9453284400904622, "learning_rate": 9.525962197212902e-06, "loss": 0.4516, "step": 13450 }, { "epoch": 0.6777783372778086, "grad_norm": 3.484628675809776, "learning_rate": 9.524716355271762e-06, "loss": 0.5002, "step": 13460 }, { "epoch": 0.6782818873055038, "grad_norm": 3.5813552376592637, "learning_rate": 9.523468960036592e-06, "loss": 0.5354, "step": 13470 }, { "epoch": 0.6787854373331991, "grad_norm": 4.639872523722359, "learning_rate": 9.522220011935617e-06, "loss": 0.5148, "step": 13480 }, { "epoch": 0.6792889873608943, "grad_norm": 3.9032270784682987, "learning_rate": 9.520969511397586e-06, "loss": 0.5387, "step": 13490 }, { "epoch": 0.6797925373885896, "grad_norm": 3.6074445839518323, "learning_rate": 9.519717458851787e-06, "loss": 0.5387, "step": 13500 }, { "epoch": 0.6802960874162848, "grad_norm": 3.6522497710923263, "learning_rate": 9.518463854728036e-06, "loss": 0.4553, "step": 13510 }, { "epoch": 0.6807996374439801, "grad_norm": 4.009623834859804, "learning_rate": 9.517208699456683e-06, "loss": 0.5198, "step": 13520 }, { "epoch": 0.6813031874716753, "grad_norm": 4.070291915667745, "learning_rate": 9.515951993468616e-06, "loss": 0.5203, "step": 13530 }, { "epoch": 0.6818067374993706, "grad_norm": 4.632450046140433, "learning_rate": 9.514693737195246e-06, "loss": 0.5603, "step": 13540 }, { "epoch": 0.6823102875270658, "grad_norm": 5.426879060462983, "learning_rate": 9.513433931068524e-06, "loss": 0.4932, "step": 13550 }, { "epoch": 0.6828138375547611, "grad_norm": 4.1536582087407785, "learning_rate": 9.512172575520927e-06, "loss": 0.4687, "step": 13560 }, { "epoch": 0.6833173875824563, "grad_norm": 4.437686152391791, "learning_rate": 9.51090967098547e-06, "loss": 0.52, "step": 13570 }, { "epoch": 0.6838209376101516, "grad_norm": 3.627471442122373, "learning_rate": 9.509645217895696e-06, "loss": 0.4705, "step": 13580 }, { "epoch": 0.6843244876378468, "grad_norm": 4.886174007663936, "learning_rate": 9.508379216685678e-06, "loss": 0.4737, "step": 13590 }, { "epoch": 0.6848280376655421, "grad_norm": 4.037982876534903, "learning_rate": 9.507111667790026e-06, "loss": 0.5134, "step": 13600 }, { "epoch": 0.6853315876932373, "grad_norm": 5.670518530400184, "learning_rate": 9.505842571643876e-06, "loss": 0.4902, "step": 13610 }, { "epoch": 0.6858351377209325, "grad_norm": 4.061773227945166, "learning_rate": 9.504571928682896e-06, "loss": 0.5473, "step": 13620 }, { "epoch": 0.6863386877486278, "grad_norm": 4.3198929853730075, "learning_rate": 9.503299739343289e-06, "loss": 0.4885, "step": 13630 }, { "epoch": 0.686842237776323, "grad_norm": 4.833489190310297, "learning_rate": 9.502026004061785e-06, "loss": 0.4815, "step": 13640 }, { "epoch": 0.6873457878040183, "grad_norm": 3.4990811051465642, "learning_rate": 9.500750723275644e-06, "loss": 0.4831, "step": 13650 }, { "epoch": 0.6878493378317135, "grad_norm": 4.140825896089448, "learning_rate": 9.499473897422662e-06, "loss": 0.4747, "step": 13660 }, { "epoch": 0.6883528878594088, "grad_norm": 4.112271477717534, "learning_rate": 9.498195526941158e-06, "loss": 0.4966, "step": 13670 }, { "epoch": 0.688856437887104, "grad_norm": 3.8058896822420265, "learning_rate": 9.496915612269988e-06, "loss": 0.4285, "step": 13680 }, { "epoch": 0.6893599879147994, "grad_norm": 3.6495010862421053, "learning_rate": 9.495634153848533e-06, "loss": 0.4647, "step": 13690 }, { "epoch": 0.6898635379424946, "grad_norm": 3.8720522172776866, "learning_rate": 9.494351152116707e-06, "loss": 0.5478, "step": 13700 }, { "epoch": 0.6903670879701899, "grad_norm": 5.6500554334562665, "learning_rate": 9.493066607514951e-06, "loss": 0.5346, "step": 13710 }, { "epoch": 0.6908706379978851, "grad_norm": 4.846156852420187, "learning_rate": 9.491780520484241e-06, "loss": 0.4971, "step": 13720 }, { "epoch": 0.6913741880255804, "grad_norm": 4.944840821889194, "learning_rate": 9.490492891466076e-06, "loss": 0.4778, "step": 13730 }, { "epoch": 0.6918777380532756, "grad_norm": 3.5448733267191304, "learning_rate": 9.48920372090249e-06, "loss": 0.4639, "step": 13740 }, { "epoch": 0.6923812880809709, "grad_norm": 4.953458545087335, "learning_rate": 9.487913009236039e-06, "loss": 0.5704, "step": 13750 }, { "epoch": 0.6928848381086661, "grad_norm": 4.236905181613399, "learning_rate": 9.486620756909817e-06, "loss": 0.5042, "step": 13760 }, { "epoch": 0.6933883881363614, "grad_norm": 4.3958748369412355, "learning_rate": 9.48532696436744e-06, "loss": 0.4825, "step": 13770 }, { "epoch": 0.6938919381640566, "grad_norm": 4.018922527247076, "learning_rate": 9.484031632053057e-06, "loss": 0.4637, "step": 13780 }, { "epoch": 0.6943954881917519, "grad_norm": 4.595169228455493, "learning_rate": 9.482734760411343e-06, "loss": 0.4656, "step": 13790 }, { "epoch": 0.6948990382194471, "grad_norm": 3.363498898907417, "learning_rate": 9.481436349887503e-06, "loss": 0.4578, "step": 13800 }, { "epoch": 0.6954025882471424, "grad_norm": 5.217351848905198, "learning_rate": 9.480136400927266e-06, "loss": 0.4782, "step": 13810 }, { "epoch": 0.6959061382748376, "grad_norm": 4.153076118019208, "learning_rate": 9.478834913976898e-06, "loss": 0.5369, "step": 13820 }, { "epoch": 0.6964096883025328, "grad_norm": 4.21847088602277, "learning_rate": 9.477531889483184e-06, "loss": 0.4552, "step": 13830 }, { "epoch": 0.6969132383302281, "grad_norm": 4.1811850432306885, "learning_rate": 9.476227327893443e-06, "loss": 0.4916, "step": 13840 }, { "epoch": 0.6974167883579233, "grad_norm": 3.716745489757779, "learning_rate": 9.474921229655518e-06, "loss": 0.5145, "step": 13850 }, { "epoch": 0.6979203383856186, "grad_norm": 3.885070748532551, "learning_rate": 9.47361359521778e-06, "loss": 0.4908, "step": 13860 }, { "epoch": 0.6984238884133138, "grad_norm": 4.630294257427119, "learning_rate": 9.472304425029127e-06, "loss": 0.5373, "step": 13870 }, { "epoch": 0.6989274384410091, "grad_norm": 4.400669694901099, "learning_rate": 9.470993719538988e-06, "loss": 0.5065, "step": 13880 }, { "epoch": 0.6994309884687043, "grad_norm": 4.025566263109718, "learning_rate": 9.469681479197314e-06, "loss": 0.4804, "step": 13890 }, { "epoch": 0.6999345384963996, "grad_norm": 5.360471536150451, "learning_rate": 9.468367704454587e-06, "loss": 0.5625, "step": 13900 }, { "epoch": 0.7004380885240948, "grad_norm": 4.436574882808873, "learning_rate": 9.467052395761813e-06, "loss": 0.5987, "step": 13910 }, { "epoch": 0.7009416385517901, "grad_norm": 4.203783693955365, "learning_rate": 9.465735553570525e-06, "loss": 0.4858, "step": 13920 }, { "epoch": 0.7014451885794853, "grad_norm": 4.494880040760592, "learning_rate": 9.464417178332784e-06, "loss": 0.4433, "step": 13930 }, { "epoch": 0.7019487386071807, "grad_norm": 3.8445626316462733, "learning_rate": 9.463097270501176e-06, "loss": 0.4863, "step": 13940 }, { "epoch": 0.7024522886348759, "grad_norm": 19.016868597531577, "learning_rate": 9.461775830528812e-06, "loss": 0.5291, "step": 13950 }, { "epoch": 0.7029558386625712, "grad_norm": 4.078106375021906, "learning_rate": 9.460452858869334e-06, "loss": 0.4733, "step": 13960 }, { "epoch": 0.7034593886902664, "grad_norm": 4.331231476899788, "learning_rate": 9.459128355976901e-06, "loss": 0.5008, "step": 13970 }, { "epoch": 0.7039629387179617, "grad_norm": 5.0223145928914406, "learning_rate": 9.457802322306206e-06, "loss": 0.4913, "step": 13980 }, { "epoch": 0.7044664887456569, "grad_norm": 3.561887172785957, "learning_rate": 9.456474758312466e-06, "loss": 0.4923, "step": 13990 }, { "epoch": 0.7049700387733522, "grad_norm": 3.7684809378822486, "learning_rate": 9.455145664451418e-06, "loss": 0.4749, "step": 14000 }, { "epoch": 0.7054735888010474, "grad_norm": 4.271817983275043, "learning_rate": 9.453815041179329e-06, "loss": 0.4898, "step": 14010 }, { "epoch": 0.7059771388287427, "grad_norm": 3.5733736262722022, "learning_rate": 9.45248288895299e-06, "loss": 0.4784, "step": 14020 }, { "epoch": 0.7064806888564379, "grad_norm": 4.766707329507839, "learning_rate": 9.451149208229717e-06, "loss": 0.5064, "step": 14030 }, { "epoch": 0.7069842388841331, "grad_norm": 4.801855007950311, "learning_rate": 9.44981399946735e-06, "loss": 0.5068, "step": 14040 }, { "epoch": 0.7074877889118284, "grad_norm": 3.9989546034703114, "learning_rate": 9.448477263124256e-06, "loss": 0.4883, "step": 14050 }, { "epoch": 0.7079913389395236, "grad_norm": 4.669311166508438, "learning_rate": 9.447138999659321e-06, "loss": 0.4749, "step": 14060 }, { "epoch": 0.7084948889672189, "grad_norm": 4.552788804453114, "learning_rate": 9.445799209531962e-06, "loss": 0.4655, "step": 14070 }, { "epoch": 0.7089984389949141, "grad_norm": 5.104473236380121, "learning_rate": 9.444457893202118e-06, "loss": 0.524, "step": 14080 }, { "epoch": 0.7095019890226094, "grad_norm": 4.030396754054832, "learning_rate": 9.443115051130243e-06, "loss": 0.4545, "step": 14090 }, { "epoch": 0.7100055390503046, "grad_norm": 4.66289957708504, "learning_rate": 9.441770683777331e-06, "loss": 0.5452, "step": 14100 }, { "epoch": 0.7105090890779999, "grad_norm": 3.813139738035405, "learning_rate": 9.440424791604888e-06, "loss": 0.506, "step": 14110 }, { "epoch": 0.7110126391056951, "grad_norm": 3.3919574403819075, "learning_rate": 9.439077375074944e-06, "loss": 0.5086, "step": 14120 }, { "epoch": 0.7115161891333904, "grad_norm": 4.3460192482850095, "learning_rate": 9.437728434650057e-06, "loss": 0.4364, "step": 14130 }, { "epoch": 0.7120197391610856, "grad_norm": 3.8409585333193417, "learning_rate": 9.436377970793308e-06, "loss": 0.4918, "step": 14140 }, { "epoch": 0.7125232891887809, "grad_norm": 4.091935180928959, "learning_rate": 9.435025983968295e-06, "loss": 0.5492, "step": 14150 }, { "epoch": 0.7130268392164761, "grad_norm": 4.014535063426086, "learning_rate": 9.433672474639148e-06, "loss": 0.474, "step": 14160 }, { "epoch": 0.7135303892441714, "grad_norm": 3.9090894424095763, "learning_rate": 9.43231744327051e-06, "loss": 0.4851, "step": 14170 }, { "epoch": 0.7140339392718666, "grad_norm": 3.516011160875346, "learning_rate": 9.430960890327551e-06, "loss": 0.4829, "step": 14180 }, { "epoch": 0.714537489299562, "grad_norm": 3.8360524976188892, "learning_rate": 9.429602816275964e-06, "loss": 0.5053, "step": 14190 }, { "epoch": 0.7150410393272572, "grad_norm": 3.9749142530548953, "learning_rate": 9.428243221581967e-06, "loss": 0.5093, "step": 14200 }, { "epoch": 0.7155445893549525, "grad_norm": 3.2783236687898443, "learning_rate": 9.426882106712292e-06, "loss": 0.4626, "step": 14210 }, { "epoch": 0.7160481393826477, "grad_norm": 3.881013572960482, "learning_rate": 9.4255194721342e-06, "loss": 0.4818, "step": 14220 }, { "epoch": 0.7165516894103429, "grad_norm": 3.2296270662473106, "learning_rate": 9.424155318315468e-06, "loss": 0.5551, "step": 14230 }, { "epoch": 0.7170552394380382, "grad_norm": 3.820538243143397, "learning_rate": 9.422789645724402e-06, "loss": 0.5045, "step": 14240 }, { "epoch": 0.7175587894657334, "grad_norm": 3.083004039217407, "learning_rate": 9.421422454829821e-06, "loss": 0.5089, "step": 14250 }, { "epoch": 0.7180623394934287, "grad_norm": 3.3580840176617994, "learning_rate": 9.420053746101072e-06, "loss": 0.4722, "step": 14260 }, { "epoch": 0.7185658895211239, "grad_norm": 4.186262714499817, "learning_rate": 9.418683520008018e-06, "loss": 0.5044, "step": 14270 }, { "epoch": 0.7190694395488192, "grad_norm": 3.71559319837529, "learning_rate": 9.417311777021047e-06, "loss": 0.5052, "step": 14280 }, { "epoch": 0.7195729895765144, "grad_norm": 3.5401257454546173, "learning_rate": 9.415938517611065e-06, "loss": 0.4928, "step": 14290 }, { "epoch": 0.7200765396042097, "grad_norm": 4.496355737780129, "learning_rate": 9.4145637422495e-06, "loss": 0.5143, "step": 14300 }, { "epoch": 0.7205800896319049, "grad_norm": 4.845254309317509, "learning_rate": 9.413187451408299e-06, "loss": 0.4852, "step": 14310 }, { "epoch": 0.7210836396596002, "grad_norm": 4.614235975208731, "learning_rate": 9.41180964555993e-06, "loss": 0.4962, "step": 14320 }, { "epoch": 0.7215871896872954, "grad_norm": 3.9862039523803703, "learning_rate": 9.410430325177384e-06, "loss": 0.518, "step": 14330 }, { "epoch": 0.7220907397149907, "grad_norm": 3.9397697848745787, "learning_rate": 9.409049490734167e-06, "loss": 0.5007, "step": 14340 }, { "epoch": 0.7225942897426859, "grad_norm": 4.4515778418948235, "learning_rate": 9.407667142704307e-06, "loss": 0.5176, "step": 14350 }, { "epoch": 0.7230978397703812, "grad_norm": 3.919156983550222, "learning_rate": 9.40628328156235e-06, "loss": 0.5028, "step": 14360 }, { "epoch": 0.7236013897980764, "grad_norm": 4.385785478711452, "learning_rate": 9.404897907783365e-06, "loss": 0.4716, "step": 14370 }, { "epoch": 0.7241049398257717, "grad_norm": 3.2817922244147786, "learning_rate": 9.403511021842939e-06, "loss": 0.4226, "step": 14380 }, { "epoch": 0.7246084898534669, "grad_norm": 4.0289980494825635, "learning_rate": 9.402122624217176e-06, "loss": 0.526, "step": 14390 }, { "epoch": 0.7251120398811622, "grad_norm": 2.9857839369660444, "learning_rate": 9.4007327153827e-06, "loss": 0.52, "step": 14400 }, { "epoch": 0.7256155899088574, "grad_norm": 3.3057824728786693, "learning_rate": 9.399341295816654e-06, "loss": 0.5296, "step": 14410 }, { "epoch": 0.7261191399365527, "grad_norm": 3.8977767691112333, "learning_rate": 9.3979483659967e-06, "loss": 0.4865, "step": 14420 }, { "epoch": 0.726622689964248, "grad_norm": 3.3892084332088217, "learning_rate": 9.39655392640102e-06, "loss": 0.4994, "step": 14430 }, { "epoch": 0.7271262399919431, "grad_norm": 4.248541428634065, "learning_rate": 9.395157977508308e-06, "loss": 0.5267, "step": 14440 }, { "epoch": 0.7276297900196385, "grad_norm": 3.9265461082239947, "learning_rate": 9.393760519797786e-06, "loss": 0.494, "step": 14450 }, { "epoch": 0.7281333400473337, "grad_norm": 4.679185527918744, "learning_rate": 9.392361553749185e-06, "loss": 0.5671, "step": 14460 }, { "epoch": 0.728636890075029, "grad_norm": 4.384267217650181, "learning_rate": 9.390961079842759e-06, "loss": 0.4655, "step": 14470 }, { "epoch": 0.7291404401027242, "grad_norm": 2.8609812907886947, "learning_rate": 9.389559098559277e-06, "loss": 0.4846, "step": 14480 }, { "epoch": 0.7296439901304195, "grad_norm": 4.777555868543226, "learning_rate": 9.388155610380025e-06, "loss": 0.4613, "step": 14490 }, { "epoch": 0.7301475401581147, "grad_norm": 4.433860802117473, "learning_rate": 9.386750615786811e-06, "loss": 0.4856, "step": 14500 }, { "epoch": 0.73065109018581, "grad_norm": 3.997213395797513, "learning_rate": 9.385344115261957e-06, "loss": 0.4842, "step": 14510 }, { "epoch": 0.7311546402135052, "grad_norm": 4.328138088732923, "learning_rate": 9.383936109288296e-06, "loss": 0.4945, "step": 14520 }, { "epoch": 0.7316581902412005, "grad_norm": 3.278299126060766, "learning_rate": 9.382526598349192e-06, "loss": 0.508, "step": 14530 }, { "epoch": 0.7321617402688957, "grad_norm": 4.1000343597050195, "learning_rate": 9.38111558292851e-06, "loss": 0.4486, "step": 14540 }, { "epoch": 0.732665290296591, "grad_norm": 4.207608778551016, "learning_rate": 9.379703063510643e-06, "loss": 0.4574, "step": 14550 }, { "epoch": 0.7331688403242862, "grad_norm": 3.7759949282481435, "learning_rate": 9.378289040580494e-06, "loss": 0.4194, "step": 14560 }, { "epoch": 0.7336723903519815, "grad_norm": 3.4517923868207103, "learning_rate": 9.376873514623488e-06, "loss": 0.5192, "step": 14570 }, { "epoch": 0.7341759403796767, "grad_norm": 3.742197528407344, "learning_rate": 9.375456486125556e-06, "loss": 0.4696, "step": 14580 }, { "epoch": 0.734679490407372, "grad_norm": 3.914943079996547, "learning_rate": 9.374037955573157e-06, "loss": 0.5065, "step": 14590 }, { "epoch": 0.7351830404350672, "grad_norm": 4.173722958187879, "learning_rate": 9.372617923453255e-06, "loss": 0.5116, "step": 14600 }, { "epoch": 0.7356865904627625, "grad_norm": 3.5150870835938277, "learning_rate": 9.371196390253337e-06, "loss": 0.4363, "step": 14610 }, { "epoch": 0.7361901404904577, "grad_norm": 3.6643739324095703, "learning_rate": 9.369773356461403e-06, "loss": 0.4891, "step": 14620 }, { "epoch": 0.736693690518153, "grad_norm": 3.8921547983216436, "learning_rate": 9.368348822565965e-06, "loss": 0.5265, "step": 14630 }, { "epoch": 0.7371972405458482, "grad_norm": 3.258023127665686, "learning_rate": 9.366922789056055e-06, "loss": 0.4795, "step": 14640 }, { "epoch": 0.7377007905735434, "grad_norm": 6.7473867855029726, "learning_rate": 9.365495256421213e-06, "loss": 0.4944, "step": 14650 }, { "epoch": 0.7382043406012387, "grad_norm": 4.383719715052666, "learning_rate": 9.364066225151504e-06, "loss": 0.4262, "step": 14660 }, { "epoch": 0.7387078906289339, "grad_norm": 4.4064479311577465, "learning_rate": 9.3626356957375e-06, "loss": 0.4701, "step": 14670 }, { "epoch": 0.7392114406566292, "grad_norm": 3.660863509438306, "learning_rate": 9.361203668670283e-06, "loss": 0.4638, "step": 14680 }, { "epoch": 0.7397149906843244, "grad_norm": 5.771411639272416, "learning_rate": 9.359770144441462e-06, "loss": 0.4734, "step": 14690 }, { "epoch": 0.7402185407120198, "grad_norm": 4.808209830119651, "learning_rate": 9.35833512354315e-06, "loss": 0.5678, "step": 14700 }, { "epoch": 0.740722090739715, "grad_norm": 4.535485504318539, "learning_rate": 9.356898606467975e-06, "loss": 0.5025, "step": 14710 }, { "epoch": 0.7412256407674103, "grad_norm": 3.2246985163450805, "learning_rate": 9.355460593709082e-06, "loss": 0.4861, "step": 14720 }, { "epoch": 0.7417291907951055, "grad_norm": 4.294882459489521, "learning_rate": 9.35402108576013e-06, "loss": 0.4816, "step": 14730 }, { "epoch": 0.7422327408228008, "grad_norm": 4.370289498842478, "learning_rate": 9.352580083115285e-06, "loss": 0.4851, "step": 14740 }, { "epoch": 0.742736290850496, "grad_norm": 4.179861292622942, "learning_rate": 9.351137586269232e-06, "loss": 0.5445, "step": 14750 }, { "epoch": 0.7432398408781913, "grad_norm": 3.7779084649206855, "learning_rate": 9.349693595717166e-06, "loss": 0.4161, "step": 14760 }, { "epoch": 0.7437433909058865, "grad_norm": 4.335845928755886, "learning_rate": 9.348248111954799e-06, "loss": 0.5025, "step": 14770 }, { "epoch": 0.7442469409335818, "grad_norm": 4.243605393220046, "learning_rate": 9.346801135478349e-06, "loss": 0.5443, "step": 14780 }, { "epoch": 0.744750490961277, "grad_norm": 4.250922703055237, "learning_rate": 9.34535266678455e-06, "loss": 0.4845, "step": 14790 }, { "epoch": 0.7452540409889723, "grad_norm": 3.6761958636022976, "learning_rate": 9.34390270637065e-06, "loss": 0.4591, "step": 14800 }, { "epoch": 0.7457575910166675, "grad_norm": 4.214278768184453, "learning_rate": 9.342451254734407e-06, "loss": 0.5048, "step": 14810 }, { "epoch": 0.7462611410443628, "grad_norm": 4.2275795291446645, "learning_rate": 9.34099831237409e-06, "loss": 0.5152, "step": 14820 }, { "epoch": 0.746764691072058, "grad_norm": 4.28825158836912, "learning_rate": 9.339543879788482e-06, "loss": 0.4479, "step": 14830 }, { "epoch": 0.7472682410997532, "grad_norm": 4.2927202732037575, "learning_rate": 9.338087957476874e-06, "loss": 0.4586, "step": 14840 }, { "epoch": 0.7477717911274485, "grad_norm": 4.735809131709913, "learning_rate": 9.336630545939076e-06, "loss": 0.4787, "step": 14850 }, { "epoch": 0.7482753411551437, "grad_norm": 4.375101369063303, "learning_rate": 9.335171645675398e-06, "loss": 0.4881, "step": 14860 }, { "epoch": 0.748778891182839, "grad_norm": 4.405959655706465, "learning_rate": 9.333711257186672e-06, "loss": 0.5088, "step": 14870 }, { "epoch": 0.7492824412105342, "grad_norm": 3.996861185656687, "learning_rate": 9.332249380974235e-06, "loss": 0.4848, "step": 14880 }, { "epoch": 0.7497859912382295, "grad_norm": 3.7087570313740503, "learning_rate": 9.330786017539934e-06, "loss": 0.4423, "step": 14890 }, { "epoch": 0.7502895412659247, "grad_norm": 3.6283811385106035, "learning_rate": 9.32932116738613e-06, "loss": 0.4985, "step": 14900 }, { "epoch": 0.75079309129362, "grad_norm": 5.225195660696426, "learning_rate": 9.327854831015692e-06, "loss": 0.4869, "step": 14910 }, { "epoch": 0.7512966413213152, "grad_norm": 4.165909449292095, "learning_rate": 9.326387008932e-06, "loss": 0.5077, "step": 14920 }, { "epoch": 0.7518001913490105, "grad_norm": 4.43453808690768, "learning_rate": 9.324917701638945e-06, "loss": 0.5214, "step": 14930 }, { "epoch": 0.7523037413767057, "grad_norm": 3.8989075476498463, "learning_rate": 9.323446909640925e-06, "loss": 0.4964, "step": 14940 }, { "epoch": 0.7528072914044011, "grad_norm": 4.036568825015419, "learning_rate": 9.321974633442851e-06, "loss": 0.4568, "step": 14950 }, { "epoch": 0.7533108414320963, "grad_norm": 3.3014385721214907, "learning_rate": 9.320500873550142e-06, "loss": 0.4813, "step": 14960 }, { "epoch": 0.7538143914597916, "grad_norm": 3.0577504651749323, "learning_rate": 9.319025630468727e-06, "loss": 0.4191, "step": 14970 }, { "epoch": 0.7543179414874868, "grad_norm": 3.589803514453301, "learning_rate": 9.31754890470504e-06, "loss": 0.5142, "step": 14980 }, { "epoch": 0.7548214915151821, "grad_norm": 5.182981032802746, "learning_rate": 9.316070696766033e-06, "loss": 0.4724, "step": 14990 }, { "epoch": 0.7553250415428773, "grad_norm": 3.0105377159532005, "learning_rate": 9.314591007159157e-06, "loss": 0.4656, "step": 15000 }, { "epoch": 0.7558285915705726, "grad_norm": 3.9477129348600792, "learning_rate": 9.313109836392377e-06, "loss": 0.5281, "step": 15010 }, { "epoch": 0.7563321415982678, "grad_norm": 3.46315812072, "learning_rate": 9.311627184974169e-06, "loss": 0.5059, "step": 15020 }, { "epoch": 0.7568356916259631, "grad_norm": 4.574391327757077, "learning_rate": 9.310143053413508e-06, "loss": 0.4616, "step": 15030 }, { "epoch": 0.7573392416536583, "grad_norm": 4.609659215295381, "learning_rate": 9.308657442219886e-06, "loss": 0.4383, "step": 15040 }, { "epoch": 0.7578427916813535, "grad_norm": 3.365831870956425, "learning_rate": 9.3071703519033e-06, "loss": 0.4523, "step": 15050 }, { "epoch": 0.7583463417090488, "grad_norm": 3.7880257555737935, "learning_rate": 9.305681782974254e-06, "loss": 0.4403, "step": 15060 }, { "epoch": 0.758849891736744, "grad_norm": 4.057377859862432, "learning_rate": 9.30419173594376e-06, "loss": 0.4783, "step": 15070 }, { "epoch": 0.7593534417644393, "grad_norm": 5.1314280149123155, "learning_rate": 9.302700211323338e-06, "loss": 0.5075, "step": 15080 }, { "epoch": 0.7598569917921345, "grad_norm": 3.643321653231634, "learning_rate": 9.301207209625014e-06, "loss": 0.5024, "step": 15090 }, { "epoch": 0.7603605418198298, "grad_norm": 7.513587351750785, "learning_rate": 9.299712731361323e-06, "loss": 0.4701, "step": 15100 }, { "epoch": 0.760864091847525, "grad_norm": 4.270235788301205, "learning_rate": 9.298216777045306e-06, "loss": 0.511, "step": 15110 }, { "epoch": 0.7613676418752203, "grad_norm": 3.358754866695441, "learning_rate": 9.29671934719051e-06, "loss": 0.4598, "step": 15120 }, { "epoch": 0.7618711919029155, "grad_norm": 3.524431778041493, "learning_rate": 9.29522044231099e-06, "loss": 0.4025, "step": 15130 }, { "epoch": 0.7623747419306108, "grad_norm": 4.343258592942491, "learning_rate": 9.293720062921304e-06, "loss": 0.5289, "step": 15140 }, { "epoch": 0.762878291958306, "grad_norm": 3.840709343930494, "learning_rate": 9.29221820953652e-06, "loss": 0.4661, "step": 15150 }, { "epoch": 0.7633818419860013, "grad_norm": 4.079554337628031, "learning_rate": 9.290714882672212e-06, "loss": 0.4867, "step": 15160 }, { "epoch": 0.7638853920136965, "grad_norm": 4.530564217400091, "learning_rate": 9.289210082844455e-06, "loss": 0.46, "step": 15170 }, { "epoch": 0.7643889420413919, "grad_norm": 5.337172625488659, "learning_rate": 9.287703810569836e-06, "loss": 0.4673, "step": 15180 }, { "epoch": 0.764892492069087, "grad_norm": 3.658663292860508, "learning_rate": 9.286196066365445e-06, "loss": 0.4652, "step": 15190 }, { "epoch": 0.7653960420967824, "grad_norm": 5.524071844071395, "learning_rate": 9.284686850748877e-06, "loss": 0.5134, "step": 15200 }, { "epoch": 0.7658995921244776, "grad_norm": 3.133271315016967, "learning_rate": 9.283176164238229e-06, "loss": 0.4363, "step": 15210 }, { "epoch": 0.7664031421521729, "grad_norm": 4.738378500781312, "learning_rate": 9.281664007352107e-06, "loss": 0.5202, "step": 15220 }, { "epoch": 0.7669066921798681, "grad_norm": 3.9716938521489475, "learning_rate": 9.280150380609623e-06, "loss": 0.5449, "step": 15230 }, { "epoch": 0.7674102422075634, "grad_norm": 4.338784021813142, "learning_rate": 9.278635284530388e-06, "loss": 0.4976, "step": 15240 }, { "epoch": 0.7679137922352586, "grad_norm": 3.917263984444507, "learning_rate": 9.277118719634524e-06, "loss": 0.5418, "step": 15250 }, { "epoch": 0.7684173422629538, "grad_norm": 4.352106137454131, "learning_rate": 9.275600686442653e-06, "loss": 0.4684, "step": 15260 }, { "epoch": 0.7689208922906491, "grad_norm": 3.743951015444132, "learning_rate": 9.2740811854759e-06, "loss": 0.5575, "step": 15270 }, { "epoch": 0.7694244423183443, "grad_norm": 3.694520641248651, "learning_rate": 9.272560217255896e-06, "loss": 0.4928, "step": 15280 }, { "epoch": 0.7699279923460396, "grad_norm": 3.943054899836008, "learning_rate": 9.271037782304781e-06, "loss": 0.5225, "step": 15290 }, { "epoch": 0.7704315423737348, "grad_norm": 3.948099075072721, "learning_rate": 9.269513881145187e-06, "loss": 0.4766, "step": 15300 }, { "epoch": 0.7709350924014301, "grad_norm": 3.418670666555807, "learning_rate": 9.267988514300258e-06, "loss": 0.4928, "step": 15310 }, { "epoch": 0.7714386424291253, "grad_norm": 3.7461595015433167, "learning_rate": 9.266461682293637e-06, "loss": 0.5278, "step": 15320 }, { "epoch": 0.7719421924568206, "grad_norm": 3.834874201227211, "learning_rate": 9.264933385649473e-06, "loss": 0.4661, "step": 15330 }, { "epoch": 0.7724457424845158, "grad_norm": 3.53226072148394, "learning_rate": 9.263403624892416e-06, "loss": 0.5544, "step": 15340 }, { "epoch": 0.7729492925122111, "grad_norm": 3.8156566182633065, "learning_rate": 9.261872400547619e-06, "loss": 0.4732, "step": 15350 }, { "epoch": 0.7734528425399063, "grad_norm": 3.7087704028483697, "learning_rate": 9.260339713140735e-06, "loss": 0.4206, "step": 15360 }, { "epoch": 0.7739563925676016, "grad_norm": 3.8916039308510024, "learning_rate": 9.258805563197928e-06, "loss": 0.458, "step": 15370 }, { "epoch": 0.7744599425952968, "grad_norm": 3.385920561470987, "learning_rate": 9.25726995124585e-06, "loss": 0.5188, "step": 15380 }, { "epoch": 0.7749634926229921, "grad_norm": 3.374449141019639, "learning_rate": 9.25573287781167e-06, "loss": 0.5196, "step": 15390 }, { "epoch": 0.7754670426506873, "grad_norm": 4.680734905474887, "learning_rate": 9.254194343423044e-06, "loss": 0.5394, "step": 15400 }, { "epoch": 0.7759705926783826, "grad_norm": 4.3308640133854786, "learning_rate": 9.252654348608144e-06, "loss": 0.4862, "step": 15410 }, { "epoch": 0.7764741427060778, "grad_norm": 3.213055769755146, "learning_rate": 9.251112893895631e-06, "loss": 0.4448, "step": 15420 }, { "epoch": 0.7769776927337732, "grad_norm": 3.364405495140258, "learning_rate": 9.249569979814675e-06, "loss": 0.4266, "step": 15430 }, { "epoch": 0.7774812427614683, "grad_norm": 4.305770866997629, "learning_rate": 9.248025606894943e-06, "loss": 0.4363, "step": 15440 }, { "epoch": 0.7779847927891635, "grad_norm": 4.84890387056578, "learning_rate": 9.246479775666606e-06, "loss": 0.4813, "step": 15450 }, { "epoch": 0.7784883428168589, "grad_norm": 4.927653294825568, "learning_rate": 9.244932486660331e-06, "loss": 0.4564, "step": 15460 }, { "epoch": 0.7789918928445541, "grad_norm": 3.0115459131649693, "learning_rate": 9.24338374040729e-06, "loss": 0.4561, "step": 15470 }, { "epoch": 0.7794954428722494, "grad_norm": 3.3496708712790864, "learning_rate": 9.241833537439156e-06, "loss": 0.5301, "step": 15480 }, { "epoch": 0.7799989928999446, "grad_norm": 4.972457271373133, "learning_rate": 9.240281878288094e-06, "loss": 0.438, "step": 15490 }, { "epoch": 0.7805025429276399, "grad_norm": 4.0375862315127655, "learning_rate": 9.238728763486778e-06, "loss": 0.5187, "step": 15500 }, { "epoch": 0.7810060929553351, "grad_norm": 3.4650213618467394, "learning_rate": 9.23717419356838e-06, "loss": 0.4344, "step": 15510 }, { "epoch": 0.7815096429830304, "grad_norm": 5.222952943843598, "learning_rate": 9.235618169066564e-06, "loss": 0.439, "step": 15520 }, { "epoch": 0.7820131930107256, "grad_norm": 4.496689053144747, "learning_rate": 9.234060690515505e-06, "loss": 0.5034, "step": 15530 }, { "epoch": 0.7825167430384209, "grad_norm": 3.7970531397999303, "learning_rate": 9.232501758449868e-06, "loss": 0.4576, "step": 15540 }, { "epoch": 0.7830202930661161, "grad_norm": 3.373540567848062, "learning_rate": 9.23094137340482e-06, "loss": 0.4771, "step": 15550 }, { "epoch": 0.7835238430938114, "grad_norm": 4.09369522390974, "learning_rate": 9.22937953591603e-06, "loss": 0.5088, "step": 15560 }, { "epoch": 0.7840273931215066, "grad_norm": 4.466535401464083, "learning_rate": 9.227816246519658e-06, "loss": 0.516, "step": 15570 }, { "epoch": 0.7845309431492019, "grad_norm": 3.9264552286142256, "learning_rate": 9.226251505752373e-06, "loss": 0.4936, "step": 15580 }, { "epoch": 0.7850344931768971, "grad_norm": 3.4452317487580677, "learning_rate": 9.22468531415133e-06, "loss": 0.4294, "step": 15590 }, { "epoch": 0.7855380432045924, "grad_norm": 4.096003847424522, "learning_rate": 9.223117672254193e-06, "loss": 0.4949, "step": 15600 }, { "epoch": 0.7860415932322876, "grad_norm": 3.4842302011678923, "learning_rate": 9.221548580599118e-06, "loss": 0.4328, "step": 15610 }, { "epoch": 0.7865451432599829, "grad_norm": 3.734496829145416, "learning_rate": 9.219978039724757e-06, "loss": 0.4389, "step": 15620 }, { "epoch": 0.7870486932876781, "grad_norm": 3.644571617164629, "learning_rate": 9.218406050170267e-06, "loss": 0.4336, "step": 15630 }, { "epoch": 0.7875522433153734, "grad_norm": 3.412589482424236, "learning_rate": 9.216832612475296e-06, "loss": 0.4713, "step": 15640 }, { "epoch": 0.7880557933430686, "grad_norm": 4.165870155758176, "learning_rate": 9.21525772717999e-06, "loss": 0.4328, "step": 15650 }, { "epoch": 0.7885593433707638, "grad_norm": 4.346822296091232, "learning_rate": 9.213681394824992e-06, "loss": 0.5042, "step": 15660 }, { "epoch": 0.7890628933984591, "grad_norm": 3.7161359865702743, "learning_rate": 9.212103615951444e-06, "loss": 0.4546, "step": 15670 }, { "epoch": 0.7895664434261543, "grad_norm": 4.593639017401848, "learning_rate": 9.210524391100985e-06, "loss": 0.4509, "step": 15680 }, { "epoch": 0.7900699934538497, "grad_norm": 4.1536646625407085, "learning_rate": 9.208943720815745e-06, "loss": 0.5391, "step": 15690 }, { "epoch": 0.7905735434815448, "grad_norm": 3.6331399069800905, "learning_rate": 9.207361605638357e-06, "loss": 0.5522, "step": 15700 }, { "epoch": 0.7910770935092402, "grad_norm": 4.190749358743088, "learning_rate": 9.205778046111942e-06, "loss": 0.5169, "step": 15710 }, { "epoch": 0.7915806435369354, "grad_norm": 4.232498246220071, "learning_rate": 9.204193042780127e-06, "loss": 0.4636, "step": 15720 }, { "epoch": 0.7920841935646307, "grad_norm": 4.129183617084641, "learning_rate": 9.202606596187027e-06, "loss": 0.5016, "step": 15730 }, { "epoch": 0.7925877435923259, "grad_norm": 3.1530539146666183, "learning_rate": 9.201018706877255e-06, "loss": 0.4104, "step": 15740 }, { "epoch": 0.7930912936200212, "grad_norm": 3.6708216374747167, "learning_rate": 9.199429375395917e-06, "loss": 0.4682, "step": 15750 }, { "epoch": 0.7935948436477164, "grad_norm": 4.634917434961761, "learning_rate": 9.197838602288617e-06, "loss": 0.5126, "step": 15760 }, { "epoch": 0.7940983936754117, "grad_norm": 3.7032382814245843, "learning_rate": 9.196246388101454e-06, "loss": 0.52, "step": 15770 }, { "epoch": 0.7946019437031069, "grad_norm": 3.499509619142284, "learning_rate": 9.194652733381021e-06, "loss": 0.4438, "step": 15780 }, { "epoch": 0.7951054937308022, "grad_norm": 3.6990116125019425, "learning_rate": 9.193057638674407e-06, "loss": 0.4922, "step": 15790 }, { "epoch": 0.7956090437584974, "grad_norm": 3.6180600525370865, "learning_rate": 9.191461104529188e-06, "loss": 0.4647, "step": 15800 }, { "epoch": 0.7961125937861927, "grad_norm": 4.110944261592694, "learning_rate": 9.189863131493444e-06, "loss": 0.4422, "step": 15810 }, { "epoch": 0.7966161438138879, "grad_norm": 3.538484800831751, "learning_rate": 9.188263720115743e-06, "loss": 0.4979, "step": 15820 }, { "epoch": 0.7971196938415832, "grad_norm": 4.3568605547348795, "learning_rate": 9.186662870945149e-06, "loss": 0.5166, "step": 15830 }, { "epoch": 0.7976232438692784, "grad_norm": 4.057218196867279, "learning_rate": 9.185060584531218e-06, "loss": 0.5201, "step": 15840 }, { "epoch": 0.7981267938969737, "grad_norm": 4.840606732437342, "learning_rate": 9.183456861424002e-06, "loss": 0.5063, "step": 15850 }, { "epoch": 0.7986303439246689, "grad_norm": 3.6109286679872623, "learning_rate": 9.181851702174042e-06, "loss": 0.4801, "step": 15860 }, { "epoch": 0.7991338939523641, "grad_norm": 4.254326790951246, "learning_rate": 9.180245107332378e-06, "loss": 0.4885, "step": 15870 }, { "epoch": 0.7996374439800594, "grad_norm": 4.083534928109781, "learning_rate": 9.178637077450538e-06, "loss": 0.4096, "step": 15880 }, { "epoch": 0.8001409940077546, "grad_norm": 4.50240785596666, "learning_rate": 9.177027613080542e-06, "loss": 0.5224, "step": 15890 }, { "epoch": 0.8006445440354499, "grad_norm": 4.901286750403302, "learning_rate": 9.175416714774908e-06, "loss": 0.5097, "step": 15900 }, { "epoch": 0.8011480940631451, "grad_norm": 3.930796621075801, "learning_rate": 9.17380438308664e-06, "loss": 0.4396, "step": 15910 }, { "epoch": 0.8016516440908404, "grad_norm": 4.160982352153176, "learning_rate": 9.172190618569236e-06, "loss": 0.4989, "step": 15920 }, { "epoch": 0.8021551941185356, "grad_norm": 4.951338824825679, "learning_rate": 9.170575421776692e-06, "loss": 0.5109, "step": 15930 }, { "epoch": 0.802658744146231, "grad_norm": 3.904231804401862, "learning_rate": 9.168958793263485e-06, "loss": 0.471, "step": 15940 }, { "epoch": 0.8031622941739262, "grad_norm": 3.5067534746250697, "learning_rate": 9.16734073358459e-06, "loss": 0.5151, "step": 15950 }, { "epoch": 0.8036658442016215, "grad_norm": 4.579917689234143, "learning_rate": 9.165721243295474e-06, "loss": 0.4666, "step": 15960 }, { "epoch": 0.8041693942293167, "grad_norm": 4.486161882374931, "learning_rate": 9.164100322952093e-06, "loss": 0.4965, "step": 15970 }, { "epoch": 0.804672944257012, "grad_norm": 3.54252111368799, "learning_rate": 9.162477973110892e-06, "loss": 0.456, "step": 15980 }, { "epoch": 0.8051764942847072, "grad_norm": 3.6856576062005586, "learning_rate": 9.160854194328813e-06, "loss": 0.4949, "step": 15990 }, { "epoch": 0.8056800443124025, "grad_norm": 3.990648870080147, "learning_rate": 9.15922898716328e-06, "loss": 0.5172, "step": 16000 }, { "epoch": 0.8061835943400977, "grad_norm": 4.296342301144447, "learning_rate": 9.157602352172214e-06, "loss": 0.4744, "step": 16010 }, { "epoch": 0.806687144367793, "grad_norm": 4.505475750148776, "learning_rate": 9.155974289914022e-06, "loss": 0.4448, "step": 16020 }, { "epoch": 0.8071906943954882, "grad_norm": 3.9976943479311835, "learning_rate": 9.154344800947608e-06, "loss": 0.4785, "step": 16030 }, { "epoch": 0.8076942444231835, "grad_norm": 3.984219800386971, "learning_rate": 9.152713885832355e-06, "loss": 0.4833, "step": 16040 }, { "epoch": 0.8081977944508787, "grad_norm": 3.7962419163152035, "learning_rate": 9.151081545128146e-06, "loss": 0.4193, "step": 16050 }, { "epoch": 0.8087013444785739, "grad_norm": 3.678037181383995, "learning_rate": 9.149447779395346e-06, "loss": 0.5101, "step": 16060 }, { "epoch": 0.8092048945062692, "grad_norm": 3.4926277184719474, "learning_rate": 9.147812589194814e-06, "loss": 0.4213, "step": 16070 }, { "epoch": 0.8097084445339644, "grad_norm": 4.575531409302849, "learning_rate": 9.146175975087892e-06, "loss": 0.4358, "step": 16080 }, { "epoch": 0.8102119945616597, "grad_norm": 4.288893105370528, "learning_rate": 9.14453793763642e-06, "loss": 0.5458, "step": 16090 }, { "epoch": 0.8107155445893549, "grad_norm": 2.8983649980986996, "learning_rate": 9.142898477402716e-06, "loss": 0.4138, "step": 16100 }, { "epoch": 0.8112190946170502, "grad_norm": 4.288991149378233, "learning_rate": 9.141257594949595e-06, "loss": 0.4547, "step": 16110 }, { "epoch": 0.8117226446447454, "grad_norm": 3.8479643664806087, "learning_rate": 9.139615290840357e-06, "loss": 0.4963, "step": 16120 }, { "epoch": 0.8122261946724407, "grad_norm": 3.5850493761335773, "learning_rate": 9.13797156563879e-06, "loss": 0.4798, "step": 16130 }, { "epoch": 0.8127297447001359, "grad_norm": 4.640067488123278, "learning_rate": 9.136326419909169e-06, "loss": 0.5623, "step": 16140 }, { "epoch": 0.8132332947278312, "grad_norm": 4.342568747421614, "learning_rate": 9.13467985421626e-06, "loss": 0.4764, "step": 16150 }, { "epoch": 0.8137368447555264, "grad_norm": 4.803470273458518, "learning_rate": 9.13303186912531e-06, "loss": 0.4052, "step": 16160 }, { "epoch": 0.8142403947832217, "grad_norm": 3.1983770603473847, "learning_rate": 9.13138246520206e-06, "loss": 0.4843, "step": 16170 }, { "epoch": 0.8147439448109169, "grad_norm": 3.3694503954956656, "learning_rate": 9.129731643012736e-06, "loss": 0.5248, "step": 16180 }, { "epoch": 0.8152474948386123, "grad_norm": 5.036895871074628, "learning_rate": 9.128079403124048e-06, "loss": 0.4659, "step": 16190 }, { "epoch": 0.8157510448663075, "grad_norm": 4.3046490729121265, "learning_rate": 9.126425746103196e-06, "loss": 0.5041, "step": 16200 }, { "epoch": 0.8162545948940028, "grad_norm": 4.429679856500993, "learning_rate": 9.124770672517867e-06, "loss": 0.4617, "step": 16210 }, { "epoch": 0.816758144921698, "grad_norm": 4.805688177399547, "learning_rate": 9.123114182936229e-06, "loss": 0.5039, "step": 16220 }, { "epoch": 0.8172616949493933, "grad_norm": 4.440442253549918, "learning_rate": 9.121456277926943e-06, "loss": 0.4409, "step": 16230 }, { "epoch": 0.8177652449770885, "grad_norm": 3.2803433909312023, "learning_rate": 9.119796958059152e-06, "loss": 0.4922, "step": 16240 }, { "epoch": 0.8182687950047838, "grad_norm": 4.097343554946812, "learning_rate": 9.118136223902485e-06, "loss": 0.4742, "step": 16250 }, { "epoch": 0.818772345032479, "grad_norm": 3.687684441066933, "learning_rate": 9.116474076027059e-06, "loss": 0.4826, "step": 16260 }, { "epoch": 0.8192758950601742, "grad_norm": 3.79211088424082, "learning_rate": 9.11481051500347e-06, "loss": 0.4226, "step": 16270 }, { "epoch": 0.8197794450878695, "grad_norm": 3.632366488790448, "learning_rate": 9.113145541402808e-06, "loss": 0.4644, "step": 16280 }, { "epoch": 0.8202829951155647, "grad_norm": 4.57614443002303, "learning_rate": 9.11147915579664e-06, "loss": 0.4652, "step": 16290 }, { "epoch": 0.82078654514326, "grad_norm": 4.103833756686014, "learning_rate": 9.109811358757024e-06, "loss": 0.5142, "step": 16300 }, { "epoch": 0.8212900951709552, "grad_norm": 3.9709085631062, "learning_rate": 9.108142150856495e-06, "loss": 0.4719, "step": 16310 }, { "epoch": 0.8217936451986505, "grad_norm": 3.7878855419722606, "learning_rate": 9.10647153266808e-06, "loss": 0.4426, "step": 16320 }, { "epoch": 0.8222971952263457, "grad_norm": 3.9263058864807308, "learning_rate": 9.104799504765289e-06, "loss": 0.4662, "step": 16330 }, { "epoch": 0.822800745254041, "grad_norm": 3.799787755860677, "learning_rate": 9.10312606772211e-06, "loss": 0.4758, "step": 16340 }, { "epoch": 0.8233042952817362, "grad_norm": 4.47396051075875, "learning_rate": 9.10145122211302e-06, "loss": 0.4519, "step": 16350 }, { "epoch": 0.8238078453094315, "grad_norm": 5.0341027921633374, "learning_rate": 9.09977496851298e-06, "loss": 0.528, "step": 16360 }, { "epoch": 0.8243113953371267, "grad_norm": 5.380225368579687, "learning_rate": 9.098097307497433e-06, "loss": 0.5035, "step": 16370 }, { "epoch": 0.824814945364822, "grad_norm": 3.9976514889271204, "learning_rate": 9.096418239642301e-06, "loss": 0.4389, "step": 16380 }, { "epoch": 0.8253184953925172, "grad_norm": 3.1610586288029525, "learning_rate": 9.094737765523996e-06, "loss": 0.4367, "step": 16390 }, { "epoch": 0.8258220454202125, "grad_norm": 4.304762319517833, "learning_rate": 9.093055885719406e-06, "loss": 0.4888, "step": 16400 }, { "epoch": 0.8263255954479077, "grad_norm": 4.981594045904692, "learning_rate": 9.09137260080591e-06, "loss": 0.462, "step": 16410 }, { "epoch": 0.826829145475603, "grad_norm": 3.1864133544285145, "learning_rate": 9.08968791136136e-06, "loss": 0.4772, "step": 16420 }, { "epoch": 0.8273326955032982, "grad_norm": 4.364890741916748, "learning_rate": 9.088001817964099e-06, "loss": 0.4619, "step": 16430 }, { "epoch": 0.8278362455309936, "grad_norm": 4.040652921130916, "learning_rate": 9.086314321192943e-06, "loss": 0.5, "step": 16440 }, { "epoch": 0.8283397955586888, "grad_norm": 4.368405516142966, "learning_rate": 9.084625421627198e-06, "loss": 0.4628, "step": 16450 }, { "epoch": 0.8288433455863841, "grad_norm": 4.3804045881705305, "learning_rate": 9.082935119846646e-06, "loss": 0.4419, "step": 16460 }, { "epoch": 0.8293468956140793, "grad_norm": 3.685538676097371, "learning_rate": 9.081243416431551e-06, "loss": 0.4584, "step": 16470 }, { "epoch": 0.8298504456417745, "grad_norm": 4.018982091139797, "learning_rate": 9.079550311962662e-06, "loss": 0.4321, "step": 16480 }, { "epoch": 0.8303539956694698, "grad_norm": 3.469748885373502, "learning_rate": 9.077855807021205e-06, "loss": 0.4596, "step": 16490 }, { "epoch": 0.830857545697165, "grad_norm": 3.41184608428652, "learning_rate": 9.07615990218889e-06, "loss": 0.46, "step": 16500 }, { "epoch": 0.8313610957248603, "grad_norm": 4.290820014767949, "learning_rate": 9.074462598047902e-06, "loss": 0.4715, "step": 16510 }, { "epoch": 0.8318646457525555, "grad_norm": 5.063044115336208, "learning_rate": 9.072763895180913e-06, "loss": 0.5309, "step": 16520 }, { "epoch": 0.8323681957802508, "grad_norm": 4.485994001208094, "learning_rate": 9.071063794171072e-06, "loss": 0.513, "step": 16530 }, { "epoch": 0.832871745807946, "grad_norm": 4.108912779204096, "learning_rate": 9.069362295602008e-06, "loss": 0.4872, "step": 16540 }, { "epoch": 0.8333752958356413, "grad_norm": 4.378242030575282, "learning_rate": 9.06765940005783e-06, "loss": 0.4934, "step": 16550 }, { "epoch": 0.8338788458633365, "grad_norm": 3.704855185854316, "learning_rate": 9.065955108123125e-06, "loss": 0.4751, "step": 16560 }, { "epoch": 0.8343823958910318, "grad_norm": 4.2499976243467055, "learning_rate": 9.064249420382963e-06, "loss": 0.5375, "step": 16570 }, { "epoch": 0.834885945918727, "grad_norm": 4.248890660525835, "learning_rate": 9.062542337422891e-06, "loss": 0.4807, "step": 16580 }, { "epoch": 0.8353894959464223, "grad_norm": 5.983615676219076, "learning_rate": 9.060833859828932e-06, "loss": 0.4613, "step": 16590 }, { "epoch": 0.8358930459741175, "grad_norm": 4.0258911657130145, "learning_rate": 9.059123988187597e-06, "loss": 0.3856, "step": 16600 }, { "epoch": 0.8363965960018128, "grad_norm": 4.073331969274475, "learning_rate": 9.057412723085862e-06, "loss": 0.4649, "step": 16610 }, { "epoch": 0.836900146029508, "grad_norm": 4.274941253807248, "learning_rate": 9.055700065111196e-06, "loss": 0.5012, "step": 16620 }, { "epoch": 0.8374036960572033, "grad_norm": 3.62467854077168, "learning_rate": 9.053986014851535e-06, "loss": 0.5439, "step": 16630 }, { "epoch": 0.8379072460848985, "grad_norm": 3.507496838836643, "learning_rate": 9.052270572895294e-06, "loss": 0.5251, "step": 16640 }, { "epoch": 0.8384107961125938, "grad_norm": 4.388893654375442, "learning_rate": 9.050553739831376e-06, "loss": 0.4207, "step": 16650 }, { "epoch": 0.838914346140289, "grad_norm": 3.882219041453529, "learning_rate": 9.048835516249147e-06, "loss": 0.4819, "step": 16660 }, { "epoch": 0.8394178961679842, "grad_norm": 3.387479519621844, "learning_rate": 9.047115902738463e-06, "loss": 0.4838, "step": 16670 }, { "epoch": 0.8399214461956795, "grad_norm": 4.5258350596805155, "learning_rate": 9.045394899889648e-06, "loss": 0.4655, "step": 16680 }, { "epoch": 0.8404249962233747, "grad_norm": 3.8090030259640955, "learning_rate": 9.043672508293509e-06, "loss": 0.4135, "step": 16690 }, { "epoch": 0.84092854625107, "grad_norm": 3.8639631163526222, "learning_rate": 9.041948728541326e-06, "loss": 0.4721, "step": 16700 }, { "epoch": 0.8414320962787653, "grad_norm": 4.239495775651219, "learning_rate": 9.040223561224857e-06, "loss": 0.4914, "step": 16710 }, { "epoch": 0.8419356463064606, "grad_norm": 3.8301248464436775, "learning_rate": 9.038497006936336e-06, "loss": 0.4698, "step": 16720 }, { "epoch": 0.8424391963341558, "grad_norm": 4.616134340265327, "learning_rate": 9.036769066268476e-06, "loss": 0.5117, "step": 16730 }, { "epoch": 0.8429427463618511, "grad_norm": 4.196617551497841, "learning_rate": 9.035039739814459e-06, "loss": 0.4473, "step": 16740 }, { "epoch": 0.8434462963895463, "grad_norm": 4.113467045637611, "learning_rate": 9.033309028167952e-06, "loss": 0.4548, "step": 16750 }, { "epoch": 0.8439498464172416, "grad_norm": 4.300593250111417, "learning_rate": 9.031576931923088e-06, "loss": 0.456, "step": 16760 }, { "epoch": 0.8444533964449368, "grad_norm": 4.538155769520924, "learning_rate": 9.029843451674482e-06, "loss": 0.4477, "step": 16770 }, { "epoch": 0.8449569464726321, "grad_norm": 3.5873935136642094, "learning_rate": 9.028108588017224e-06, "loss": 0.4896, "step": 16780 }, { "epoch": 0.8454604965003273, "grad_norm": 3.2829471065089977, "learning_rate": 9.026372341546873e-06, "loss": 0.4736, "step": 16790 }, { "epoch": 0.8459640465280226, "grad_norm": 4.473268406481929, "learning_rate": 9.02463471285947e-06, "loss": 0.4868, "step": 16800 }, { "epoch": 0.8464675965557178, "grad_norm": 3.45373775579481, "learning_rate": 9.022895702551525e-06, "loss": 0.4836, "step": 16810 }, { "epoch": 0.8469711465834131, "grad_norm": 3.602037459677541, "learning_rate": 9.021155311220025e-06, "loss": 0.4227, "step": 16820 }, { "epoch": 0.8474746966111083, "grad_norm": 3.9928000364908987, "learning_rate": 9.019413539462429e-06, "loss": 0.4415, "step": 16830 }, { "epoch": 0.8479782466388036, "grad_norm": 4.26784970331473, "learning_rate": 9.017670387876676e-06, "loss": 0.546, "step": 16840 }, { "epoch": 0.8484817966664988, "grad_norm": 4.502099159245433, "learning_rate": 9.01592585706117e-06, "loss": 0.534, "step": 16850 }, { "epoch": 0.8489853466941941, "grad_norm": 3.5988490263061403, "learning_rate": 9.014179947614795e-06, "loss": 0.4175, "step": 16860 }, { "epoch": 0.8494888967218893, "grad_norm": 3.0721765726630674, "learning_rate": 9.012432660136902e-06, "loss": 0.4404, "step": 16870 }, { "epoch": 0.8499924467495845, "grad_norm": 5.375245919708063, "learning_rate": 9.010683995227323e-06, "loss": 0.4652, "step": 16880 }, { "epoch": 0.8504959967772798, "grad_norm": 4.039777381643048, "learning_rate": 9.008933953486357e-06, "loss": 0.4068, "step": 16890 }, { "epoch": 0.850999546804975, "grad_norm": 4.263355173572954, "learning_rate": 9.007182535514778e-06, "loss": 0.472, "step": 16900 }, { "epoch": 0.8515030968326703, "grad_norm": 3.749125707507644, "learning_rate": 9.005429741913832e-06, "loss": 0.5015, "step": 16910 }, { "epoch": 0.8520066468603655, "grad_norm": 3.9894892245884557, "learning_rate": 9.003675573285236e-06, "loss": 0.4759, "step": 16920 }, { "epoch": 0.8525101968880608, "grad_norm": 3.5243963632668223, "learning_rate": 9.001920030231181e-06, "loss": 0.4588, "step": 16930 }, { "epoch": 0.853013746915756, "grad_norm": 4.418750939333908, "learning_rate": 9.00016311335433e-06, "loss": 0.441, "step": 16940 }, { "epoch": 0.8535172969434514, "grad_norm": 3.6801563801191564, "learning_rate": 8.998404823257814e-06, "loss": 0.4639, "step": 16950 }, { "epoch": 0.8540208469711466, "grad_norm": 4.258552990781899, "learning_rate": 8.996645160545242e-06, "loss": 0.4219, "step": 16960 }, { "epoch": 0.8545243969988419, "grad_norm": 4.3402341719623925, "learning_rate": 8.994884125820685e-06, "loss": 0.5422, "step": 16970 }, { "epoch": 0.8550279470265371, "grad_norm": 4.959660252295916, "learning_rate": 8.993121719688694e-06, "loss": 0.4946, "step": 16980 }, { "epoch": 0.8555314970542324, "grad_norm": 4.062294419534849, "learning_rate": 8.991357942754286e-06, "loss": 0.4647, "step": 16990 }, { "epoch": 0.8560350470819276, "grad_norm": 3.6669752287006894, "learning_rate": 8.989592795622951e-06, "loss": 0.5931, "step": 17000 }, { "epoch": 0.8565385971096229, "grad_norm": 3.759059007349685, "learning_rate": 8.987826278900645e-06, "loss": 0.4555, "step": 17010 }, { "epoch": 0.8570421471373181, "grad_norm": 4.259037059526735, "learning_rate": 8.986058393193799e-06, "loss": 0.4699, "step": 17020 }, { "epoch": 0.8575456971650134, "grad_norm": 3.1503473634733443, "learning_rate": 8.984289139109311e-06, "loss": 0.4666, "step": 17030 }, { "epoch": 0.8580492471927086, "grad_norm": 3.476977075631136, "learning_rate": 8.982518517254552e-06, "loss": 0.4782, "step": 17040 }, { "epoch": 0.8585527972204039, "grad_norm": 3.8535140320039387, "learning_rate": 8.980746528237359e-06, "loss": 0.4943, "step": 17050 }, { "epoch": 0.8590563472480991, "grad_norm": 4.014987958549475, "learning_rate": 8.97897317266604e-06, "loss": 0.4752, "step": 17060 }, { "epoch": 0.8595598972757944, "grad_norm": 4.329274447331288, "learning_rate": 8.97719845114937e-06, "loss": 0.4636, "step": 17070 }, { "epoch": 0.8600634473034896, "grad_norm": 3.7366553620511898, "learning_rate": 8.9754223642966e-06, "loss": 0.53, "step": 17080 }, { "epoch": 0.8605669973311848, "grad_norm": 3.8058573243810256, "learning_rate": 8.973644912717437e-06, "loss": 0.5093, "step": 17090 }, { "epoch": 0.8610705473588801, "grad_norm": 3.746920874911089, "learning_rate": 8.971866097022071e-06, "loss": 0.4376, "step": 17100 }, { "epoch": 0.8615740973865753, "grad_norm": 3.7370920891122545, "learning_rate": 8.970085917821147e-06, "loss": 0.5248, "step": 17110 }, { "epoch": 0.8620776474142706, "grad_norm": 3.108834084159983, "learning_rate": 8.96830437572579e-06, "loss": 0.5282, "step": 17120 }, { "epoch": 0.8625811974419658, "grad_norm": 3.9382978947066625, "learning_rate": 8.966521471347584e-06, "loss": 0.4589, "step": 17130 }, { "epoch": 0.8630847474696611, "grad_norm": 3.0121519066636417, "learning_rate": 8.964737205298586e-06, "loss": 0.465, "step": 17140 }, { "epoch": 0.8635882974973563, "grad_norm": 4.661881896281337, "learning_rate": 8.962951578191315e-06, "loss": 0.4572, "step": 17150 }, { "epoch": 0.8640918475250516, "grad_norm": 3.9944709509270426, "learning_rate": 8.961164590638764e-06, "loss": 0.4729, "step": 17160 }, { "epoch": 0.8645953975527468, "grad_norm": 4.880663151484441, "learning_rate": 8.959376243254388e-06, "loss": 0.5114, "step": 17170 }, { "epoch": 0.8650989475804421, "grad_norm": 3.461538402657349, "learning_rate": 8.95758653665211e-06, "loss": 0.4369, "step": 17180 }, { "epoch": 0.8656024976081373, "grad_norm": 3.4009406387436885, "learning_rate": 8.95579547144632e-06, "loss": 0.4765, "step": 17190 }, { "epoch": 0.8661060476358327, "grad_norm": 3.7299160674470304, "learning_rate": 8.954003048251876e-06, "loss": 0.5119, "step": 17200 }, { "epoch": 0.8666095976635279, "grad_norm": 3.6626959036650106, "learning_rate": 8.952209267684099e-06, "loss": 0.4509, "step": 17210 }, { "epoch": 0.8671131476912232, "grad_norm": 3.312919155503134, "learning_rate": 8.950414130358779e-06, "loss": 0.4435, "step": 17220 }, { "epoch": 0.8676166977189184, "grad_norm": 3.776438944582603, "learning_rate": 8.94861763689217e-06, "loss": 0.4811, "step": 17230 }, { "epoch": 0.8681202477466137, "grad_norm": 4.276285518548052, "learning_rate": 8.946819787900988e-06, "loss": 0.4728, "step": 17240 }, { "epoch": 0.8686237977743089, "grad_norm": 4.443929979281426, "learning_rate": 8.945020584002425e-06, "loss": 0.4704, "step": 17250 }, { "epoch": 0.8691273478020042, "grad_norm": 3.2125411318129116, "learning_rate": 8.943220025814128e-06, "loss": 0.5046, "step": 17260 }, { "epoch": 0.8696308978296994, "grad_norm": 3.6912450663218626, "learning_rate": 8.94141811395421e-06, "loss": 0.444, "step": 17270 }, { "epoch": 0.8701344478573946, "grad_norm": 3.854107879091701, "learning_rate": 8.939614849041255e-06, "loss": 0.4755, "step": 17280 }, { "epoch": 0.8706379978850899, "grad_norm": 4.877039392697446, "learning_rate": 8.937810231694304e-06, "loss": 0.4273, "step": 17290 }, { "epoch": 0.8711415479127851, "grad_norm": 5.107840951483257, "learning_rate": 8.936004262532871e-06, "loss": 0.4649, "step": 17300 }, { "epoch": 0.8716450979404804, "grad_norm": 3.036405763337428, "learning_rate": 8.934196942176923e-06, "loss": 0.5251, "step": 17310 }, { "epoch": 0.8721486479681756, "grad_norm": 4.664490491519247, "learning_rate": 8.932388271246897e-06, "loss": 0.4156, "step": 17320 }, { "epoch": 0.8726521979958709, "grad_norm": 4.296049029400512, "learning_rate": 8.930578250363697e-06, "loss": 0.4868, "step": 17330 }, { "epoch": 0.8731557480235661, "grad_norm": 4.942448994797158, "learning_rate": 8.928766880148687e-06, "loss": 0.4981, "step": 17340 }, { "epoch": 0.8736592980512614, "grad_norm": 3.769617103726162, "learning_rate": 8.926954161223689e-06, "loss": 0.4432, "step": 17350 }, { "epoch": 0.8741628480789566, "grad_norm": 5.021826725062468, "learning_rate": 8.925140094210997e-06, "loss": 0.4928, "step": 17360 }, { "epoch": 0.8746663981066519, "grad_norm": 4.151055726423558, "learning_rate": 8.923324679733363e-06, "loss": 0.4441, "step": 17370 }, { "epoch": 0.8751699481343471, "grad_norm": 2.6074074099785656, "learning_rate": 8.921507918414001e-06, "loss": 0.4565, "step": 17380 }, { "epoch": 0.8756734981620424, "grad_norm": 4.221381240358132, "learning_rate": 8.919689810876589e-06, "loss": 0.413, "step": 17390 }, { "epoch": 0.8761770481897376, "grad_norm": 4.186012960162786, "learning_rate": 8.91787035774527e-06, "loss": 0.5015, "step": 17400 }, { "epoch": 0.8766805982174329, "grad_norm": 3.257324241837218, "learning_rate": 8.91604955964464e-06, "loss": 0.4518, "step": 17410 }, { "epoch": 0.8771841482451281, "grad_norm": 3.8322211772535546, "learning_rate": 8.914227417199765e-06, "loss": 0.472, "step": 17420 }, { "epoch": 0.8776876982728234, "grad_norm": 2.967797053568076, "learning_rate": 8.912403931036174e-06, "loss": 0.4762, "step": 17430 }, { "epoch": 0.8781912483005186, "grad_norm": 3.8455178560570826, "learning_rate": 8.910579101779846e-06, "loss": 0.449, "step": 17440 }, { "epoch": 0.878694798328214, "grad_norm": 4.305017162775418, "learning_rate": 8.908752930057233e-06, "loss": 0.5416, "step": 17450 }, { "epoch": 0.8791983483559092, "grad_norm": 3.022135567070575, "learning_rate": 8.906925416495244e-06, "loss": 0.4691, "step": 17460 }, { "epoch": 0.8797018983836045, "grad_norm": 3.3529962718440833, "learning_rate": 8.905096561721245e-06, "loss": 0.4522, "step": 17470 }, { "epoch": 0.8802054484112997, "grad_norm": 4.2308468120057094, "learning_rate": 8.903266366363066e-06, "loss": 0.4621, "step": 17480 }, { "epoch": 0.8807089984389949, "grad_norm": 4.028450406108495, "learning_rate": 8.901434831048999e-06, "loss": 0.4853, "step": 17490 }, { "epoch": 0.8812125484666902, "grad_norm": 3.544628973331403, "learning_rate": 8.899601956407789e-06, "loss": 0.4197, "step": 17500 }, { "epoch": 0.8817160984943854, "grad_norm": 4.472814490463127, "learning_rate": 8.897767743068646e-06, "loss": 0.4607, "step": 17510 }, { "epoch": 0.8822196485220807, "grad_norm": 3.1285768173116035, "learning_rate": 8.89593219166124e-06, "loss": 0.4491, "step": 17520 }, { "epoch": 0.8827231985497759, "grad_norm": 3.876911016479199, "learning_rate": 8.894095302815701e-06, "loss": 0.4393, "step": 17530 }, { "epoch": 0.8832267485774712, "grad_norm": 3.461721784037293, "learning_rate": 8.892257077162616e-06, "loss": 0.4567, "step": 17540 }, { "epoch": 0.8837302986051664, "grad_norm": 3.837691661025277, "learning_rate": 8.890417515333028e-06, "loss": 0.4802, "step": 17550 }, { "epoch": 0.8842338486328617, "grad_norm": 4.270520092711927, "learning_rate": 8.888576617958442e-06, "loss": 0.4499, "step": 17560 }, { "epoch": 0.8847373986605569, "grad_norm": 3.3379498811228947, "learning_rate": 8.886734385670823e-06, "loss": 0.4715, "step": 17570 }, { "epoch": 0.8852409486882522, "grad_norm": 4.1858905591750935, "learning_rate": 8.884890819102594e-06, "loss": 0.404, "step": 17580 }, { "epoch": 0.8857444987159474, "grad_norm": 3.235070257183397, "learning_rate": 8.883045918886633e-06, "loss": 0.4392, "step": 17590 }, { "epoch": 0.8862480487436427, "grad_norm": 3.7630284423086926, "learning_rate": 8.881199685656277e-06, "loss": 0.4579, "step": 17600 }, { "epoch": 0.8867515987713379, "grad_norm": 4.445562527977766, "learning_rate": 8.879352120045323e-06, "loss": 0.482, "step": 17610 }, { "epoch": 0.8872551487990332, "grad_norm": 4.4093942907276045, "learning_rate": 8.87750322268802e-06, "loss": 0.4606, "step": 17620 }, { "epoch": 0.8877586988267284, "grad_norm": 3.5740646056192067, "learning_rate": 8.875652994219082e-06, "loss": 0.5244, "step": 17630 }, { "epoch": 0.8882622488544237, "grad_norm": 4.719507762598951, "learning_rate": 8.873801435273673e-06, "loss": 0.513, "step": 17640 }, { "epoch": 0.8887657988821189, "grad_norm": 4.003205454285563, "learning_rate": 8.871948546487416e-06, "loss": 0.4811, "step": 17650 }, { "epoch": 0.8892693489098142, "grad_norm": 3.9065355109726094, "learning_rate": 8.87009432849639e-06, "loss": 0.4477, "step": 17660 }, { "epoch": 0.8897728989375094, "grad_norm": 3.423787016879772, "learning_rate": 8.868238781937137e-06, "loss": 0.4772, "step": 17670 }, { "epoch": 0.8902764489652047, "grad_norm": 5.370293198208425, "learning_rate": 8.866381907446644e-06, "loss": 0.469, "step": 17680 }, { "epoch": 0.8907799989929, "grad_norm": 4.874270136359152, "learning_rate": 8.86452370566236e-06, "loss": 0.4555, "step": 17690 }, { "epoch": 0.8912835490205951, "grad_norm": 3.5691358904137602, "learning_rate": 8.862664177222188e-06, "loss": 0.4821, "step": 17700 }, { "epoch": 0.8917870990482905, "grad_norm": 3.638262814961812, "learning_rate": 8.86080332276449e-06, "loss": 0.499, "step": 17710 }, { "epoch": 0.8922906490759857, "grad_norm": 3.333888463066109, "learning_rate": 8.858941142928077e-06, "loss": 0.4824, "step": 17720 }, { "epoch": 0.892794199103681, "grad_norm": 2.998469783442506, "learning_rate": 8.857077638352222e-06, "loss": 0.4503, "step": 17730 }, { "epoch": 0.8932977491313762, "grad_norm": 3.5196829615823395, "learning_rate": 8.855212809676647e-06, "loss": 0.4476, "step": 17740 }, { "epoch": 0.8938012991590715, "grad_norm": 3.5408353627226297, "learning_rate": 8.85334665754153e-06, "loss": 0.429, "step": 17750 }, { "epoch": 0.8943048491867667, "grad_norm": 4.699433249085469, "learning_rate": 8.851479182587509e-06, "loss": 0.4559, "step": 17760 }, { "epoch": 0.894808399214462, "grad_norm": 3.8128202575316106, "learning_rate": 8.849610385455663e-06, "loss": 0.4624, "step": 17770 }, { "epoch": 0.8953119492421572, "grad_norm": 3.6514905436324248, "learning_rate": 8.84774026678754e-06, "loss": 0.419, "step": 17780 }, { "epoch": 0.8958154992698525, "grad_norm": 3.1436236865305203, "learning_rate": 8.845868827225131e-06, "loss": 0.4791, "step": 17790 }, { "epoch": 0.8963190492975477, "grad_norm": 4.882455457506299, "learning_rate": 8.843996067410888e-06, "loss": 0.4505, "step": 17800 }, { "epoch": 0.896822599325243, "grad_norm": 3.5847072794561874, "learning_rate": 8.842121987987705e-06, "loss": 0.4616, "step": 17810 }, { "epoch": 0.8973261493529382, "grad_norm": 4.39744420664567, "learning_rate": 8.840246589598946e-06, "loss": 0.4503, "step": 17820 }, { "epoch": 0.8978296993806335, "grad_norm": 4.4600349262050925, "learning_rate": 8.838369872888413e-06, "loss": 0.47, "step": 17830 }, { "epoch": 0.8983332494083287, "grad_norm": 4.669826567515701, "learning_rate": 8.836491838500365e-06, "loss": 0.4322, "step": 17840 }, { "epoch": 0.898836799436024, "grad_norm": 4.34945389620147, "learning_rate": 8.834612487079516e-06, "loss": 0.4958, "step": 17850 }, { "epoch": 0.8993403494637192, "grad_norm": 3.6003786426696696, "learning_rate": 8.83273181927103e-06, "loss": 0.3886, "step": 17860 }, { "epoch": 0.8998438994914145, "grad_norm": 3.5922745762669073, "learning_rate": 8.830849835720523e-06, "loss": 0.3853, "step": 17870 }, { "epoch": 0.9003474495191097, "grad_norm": 3.738449611439385, "learning_rate": 8.828966537074063e-06, "loss": 0.5138, "step": 17880 }, { "epoch": 0.9008509995468049, "grad_norm": 3.9296869668741463, "learning_rate": 8.82708192397817e-06, "loss": 0.4473, "step": 17890 }, { "epoch": 0.9013545495745002, "grad_norm": 3.391809453926318, "learning_rate": 8.825195997079812e-06, "loss": 0.4411, "step": 17900 }, { "epoch": 0.9018580996021954, "grad_norm": 4.471634893929689, "learning_rate": 8.823308757026412e-06, "loss": 0.4486, "step": 17910 }, { "epoch": 0.9023616496298907, "grad_norm": 4.340443273637481, "learning_rate": 8.821420204465845e-06, "loss": 0.4621, "step": 17920 }, { "epoch": 0.9028651996575859, "grad_norm": 3.2352877408431664, "learning_rate": 8.81953034004643e-06, "loss": 0.4708, "step": 17930 }, { "epoch": 0.9033687496852812, "grad_norm": 4.23199887932186, "learning_rate": 8.817639164416942e-06, "loss": 0.4972, "step": 17940 }, { "epoch": 0.9038722997129764, "grad_norm": 3.9987757342580244, "learning_rate": 8.815746678226604e-06, "loss": 0.4387, "step": 17950 }, { "epoch": 0.9043758497406718, "grad_norm": 4.066325107320134, "learning_rate": 8.813852882125091e-06, "loss": 0.4596, "step": 17960 }, { "epoch": 0.904879399768367, "grad_norm": 3.7725648083739194, "learning_rate": 8.811957776762523e-06, "loss": 0.447, "step": 17970 }, { "epoch": 0.9053829497960623, "grad_norm": 3.1838679987673646, "learning_rate": 8.810061362789475e-06, "loss": 0.3901, "step": 17980 }, { "epoch": 0.9058864998237575, "grad_norm": 4.687019196813605, "learning_rate": 8.808163640856968e-06, "loss": 0.4968, "step": 17990 }, { "epoch": 0.9063900498514528, "grad_norm": 3.272649883961061, "learning_rate": 8.806264611616473e-06, "loss": 0.4185, "step": 18000 }, { "epoch": 0.906893599879148, "grad_norm": 4.008638608581921, "learning_rate": 8.804364275719907e-06, "loss": 0.4322, "step": 18010 }, { "epoch": 0.9073971499068433, "grad_norm": 3.8792837718572426, "learning_rate": 8.802462633819644e-06, "loss": 0.4454, "step": 18020 }, { "epoch": 0.9079006999345385, "grad_norm": 4.574674294014608, "learning_rate": 8.800559686568493e-06, "loss": 0.4796, "step": 18030 }, { "epoch": 0.9084042499622338, "grad_norm": 3.0105632235351187, "learning_rate": 8.798655434619723e-06, "loss": 0.4434, "step": 18040 }, { "epoch": 0.908907799989929, "grad_norm": 4.313376680735838, "learning_rate": 8.796749878627045e-06, "loss": 0.4731, "step": 18050 }, { "epoch": 0.9094113500176243, "grad_norm": 2.862523315156173, "learning_rate": 8.79484301924462e-06, "loss": 0.4816, "step": 18060 }, { "epoch": 0.9099149000453195, "grad_norm": 3.2584080984098494, "learning_rate": 8.792934857127054e-06, "loss": 0.4512, "step": 18070 }, { "epoch": 0.9104184500730148, "grad_norm": 3.1904313946624883, "learning_rate": 8.791025392929403e-06, "loss": 0.4879, "step": 18080 }, { "epoch": 0.91092200010071, "grad_norm": 3.9318483077095143, "learning_rate": 8.78911462730717e-06, "loss": 0.4425, "step": 18090 }, { "epoch": 0.9114255501284052, "grad_norm": 3.989020631652985, "learning_rate": 8.787202560916299e-06, "loss": 0.4447, "step": 18100 }, { "epoch": 0.9119291001561005, "grad_norm": 3.9980398740554426, "learning_rate": 8.78528919441319e-06, "loss": 0.4827, "step": 18110 }, { "epoch": 0.9124326501837957, "grad_norm": 4.7458544193475, "learning_rate": 8.783374528454681e-06, "loss": 0.551, "step": 18120 }, { "epoch": 0.912936200211491, "grad_norm": 3.173736274243299, "learning_rate": 8.78145856369806e-06, "loss": 0.3749, "step": 18130 }, { "epoch": 0.9134397502391862, "grad_norm": 4.290079108634153, "learning_rate": 8.779541300801063e-06, "loss": 0.5099, "step": 18140 }, { "epoch": 0.9139433002668815, "grad_norm": 4.198501413529432, "learning_rate": 8.777622740421864e-06, "loss": 0.4344, "step": 18150 }, { "epoch": 0.9144468502945767, "grad_norm": 4.037413363586865, "learning_rate": 8.775702883219094e-06, "loss": 0.422, "step": 18160 }, { "epoch": 0.914950400322272, "grad_norm": 3.264927271122248, "learning_rate": 8.773781729851816e-06, "loss": 0.458, "step": 18170 }, { "epoch": 0.9154539503499672, "grad_norm": 3.889109549055471, "learning_rate": 8.771859280979549e-06, "loss": 0.466, "step": 18180 }, { "epoch": 0.9159575003776625, "grad_norm": 4.08283423180821, "learning_rate": 8.76993553726225e-06, "loss": 0.4693, "step": 18190 }, { "epoch": 0.9164610504053577, "grad_norm": 3.6016441060833198, "learning_rate": 8.768010499360323e-06, "loss": 0.4855, "step": 18200 }, { "epoch": 0.9169646004330531, "grad_norm": 3.346148950841285, "learning_rate": 8.766084167934614e-06, "loss": 0.5026, "step": 18210 }, { "epoch": 0.9174681504607483, "grad_norm": 3.4946746791175625, "learning_rate": 8.764156543646421e-06, "loss": 0.3986, "step": 18220 }, { "epoch": 0.9179717004884436, "grad_norm": 3.0880088371264405, "learning_rate": 8.762227627157473e-06, "loss": 0.4578, "step": 18230 }, { "epoch": 0.9184752505161388, "grad_norm": 3.2980036752709387, "learning_rate": 8.760297419129954e-06, "loss": 0.4518, "step": 18240 }, { "epoch": 0.9189788005438341, "grad_norm": 3.8404414560011872, "learning_rate": 8.758365920226489e-06, "loss": 0.4721, "step": 18250 }, { "epoch": 0.9194823505715293, "grad_norm": 3.3755176721191913, "learning_rate": 8.756433131110138e-06, "loss": 0.5056, "step": 18260 }, { "epoch": 0.9199859005992246, "grad_norm": 3.388774229532215, "learning_rate": 8.754499052444412e-06, "loss": 0.4359, "step": 18270 }, { "epoch": 0.9204894506269198, "grad_norm": 4.987995797722149, "learning_rate": 8.752563684893262e-06, "loss": 0.4382, "step": 18280 }, { "epoch": 0.9209930006546151, "grad_norm": 3.5542757507762888, "learning_rate": 8.750627029121085e-06, "loss": 0.4681, "step": 18290 }, { "epoch": 0.9214965506823103, "grad_norm": 3.2521037787895137, "learning_rate": 8.748689085792716e-06, "loss": 0.5406, "step": 18300 }, { "epoch": 0.9220001007100055, "grad_norm": 4.523900096817566, "learning_rate": 8.746749855573434e-06, "loss": 0.5171, "step": 18310 }, { "epoch": 0.9225036507377008, "grad_norm": 3.6719918328887866, "learning_rate": 8.744809339128957e-06, "loss": 0.3613, "step": 18320 }, { "epoch": 0.923007200765396, "grad_norm": 3.969673380039402, "learning_rate": 8.742867537125447e-06, "loss": 0.4987, "step": 18330 }, { "epoch": 0.9235107507930913, "grad_norm": 3.1851811558695036, "learning_rate": 8.740924450229511e-06, "loss": 0.4213, "step": 18340 }, { "epoch": 0.9240143008207865, "grad_norm": 3.3271471582403533, "learning_rate": 8.738980079108187e-06, "loss": 0.4122, "step": 18350 }, { "epoch": 0.9245178508484818, "grad_norm": 3.4580055998663894, "learning_rate": 8.737034424428963e-06, "loss": 0.4053, "step": 18360 }, { "epoch": 0.925021400876177, "grad_norm": 3.973482133986838, "learning_rate": 8.735087486859766e-06, "loss": 0.4767, "step": 18370 }, { "epoch": 0.9255249509038723, "grad_norm": 3.551119992090655, "learning_rate": 8.73313926706896e-06, "loss": 0.4521, "step": 18380 }, { "epoch": 0.9260285009315675, "grad_norm": 4.267125240828465, "learning_rate": 8.73118976572535e-06, "loss": 0.4795, "step": 18390 }, { "epoch": 0.9265320509592628, "grad_norm": 3.936551760017021, "learning_rate": 8.729238983498185e-06, "loss": 0.4307, "step": 18400 }, { "epoch": 0.927035600986958, "grad_norm": 3.8317215658091324, "learning_rate": 8.727286921057149e-06, "loss": 0.5143, "step": 18410 }, { "epoch": 0.9275391510146533, "grad_norm": 3.6215902601179417, "learning_rate": 8.725333579072368e-06, "loss": 0.4363, "step": 18420 }, { "epoch": 0.9280427010423485, "grad_norm": 4.345269611099381, "learning_rate": 8.723378958214407e-06, "loss": 0.5033, "step": 18430 }, { "epoch": 0.9285462510700438, "grad_norm": 3.5454315078470593, "learning_rate": 8.721423059154268e-06, "loss": 0.405, "step": 18440 }, { "epoch": 0.929049801097739, "grad_norm": 3.196893212717048, "learning_rate": 8.719465882563395e-06, "loss": 0.422, "step": 18450 }, { "epoch": 0.9295533511254344, "grad_norm": 3.9087581327645085, "learning_rate": 8.717507429113667e-06, "loss": 0.4663, "step": 18460 }, { "epoch": 0.9300569011531296, "grad_norm": 3.090260125516317, "learning_rate": 8.715547699477407e-06, "loss": 0.4495, "step": 18470 }, { "epoch": 0.9305604511808249, "grad_norm": 5.955369503507473, "learning_rate": 8.713586694327366e-06, "loss": 0.4225, "step": 18480 }, { "epoch": 0.9310640012085201, "grad_norm": 3.6627398033340475, "learning_rate": 8.711624414336748e-06, "loss": 0.4811, "step": 18490 }, { "epoch": 0.9315675512362153, "grad_norm": 3.6497028483760836, "learning_rate": 8.70966086017918e-06, "loss": 0.4482, "step": 18500 }, { "epoch": 0.9320711012639106, "grad_norm": 3.9236455196498112, "learning_rate": 8.707696032528732e-06, "loss": 0.4364, "step": 18510 }, { "epoch": 0.9325746512916058, "grad_norm": 3.620111854131375, "learning_rate": 8.705729932059912e-06, "loss": 0.4695, "step": 18520 }, { "epoch": 0.9330782013193011, "grad_norm": 4.134199947973326, "learning_rate": 8.703762559447668e-06, "loss": 0.465, "step": 18530 }, { "epoch": 0.9335817513469963, "grad_norm": 4.1627974300774495, "learning_rate": 8.701793915367375e-06, "loss": 0.4285, "step": 18540 }, { "epoch": 0.9340853013746916, "grad_norm": 3.588915700917621, "learning_rate": 8.699824000494857e-06, "loss": 0.4993, "step": 18550 }, { "epoch": 0.9345888514023868, "grad_norm": 3.173371548784747, "learning_rate": 8.697852815506365e-06, "loss": 0.4724, "step": 18560 }, { "epoch": 0.9350924014300821, "grad_norm": 3.942460144566235, "learning_rate": 8.695880361078588e-06, "loss": 0.4778, "step": 18570 }, { "epoch": 0.9355959514577773, "grad_norm": 3.8229081500799356, "learning_rate": 8.693906637888652e-06, "loss": 0.4427, "step": 18580 }, { "epoch": 0.9360995014854726, "grad_norm": 4.401207280054776, "learning_rate": 8.69193164661412e-06, "loss": 0.4452, "step": 18590 }, { "epoch": 0.9366030515131678, "grad_norm": 3.3907137755200245, "learning_rate": 8.689955387932987e-06, "loss": 0.4516, "step": 18600 }, { "epoch": 0.9371066015408631, "grad_norm": 3.3300991368133137, "learning_rate": 8.687977862523685e-06, "loss": 0.4664, "step": 18610 }, { "epoch": 0.9376101515685583, "grad_norm": 3.918631856443697, "learning_rate": 8.68599907106508e-06, "loss": 0.5128, "step": 18620 }, { "epoch": 0.9381137015962536, "grad_norm": 3.8783979106485402, "learning_rate": 8.684019014236475e-06, "loss": 0.4512, "step": 18630 }, { "epoch": 0.9386172516239488, "grad_norm": 3.679711062113257, "learning_rate": 8.682037692717602e-06, "loss": 0.5292, "step": 18640 }, { "epoch": 0.9391208016516441, "grad_norm": 3.421597875467834, "learning_rate": 8.680055107188635e-06, "loss": 0.4991, "step": 18650 }, { "epoch": 0.9396243516793393, "grad_norm": 4.863489397424311, "learning_rate": 8.678071258330175e-06, "loss": 0.5047, "step": 18660 }, { "epoch": 0.9401279017070346, "grad_norm": 3.362647210773639, "learning_rate": 8.67608614682326e-06, "loss": 0.5008, "step": 18670 }, { "epoch": 0.9406314517347298, "grad_norm": 4.200603761476642, "learning_rate": 8.674099773349361e-06, "loss": 0.5086, "step": 18680 }, { "epoch": 0.9411350017624251, "grad_norm": 3.3558115911466926, "learning_rate": 8.67211213859038e-06, "loss": 0.4744, "step": 18690 }, { "epoch": 0.9416385517901203, "grad_norm": 4.079679840001445, "learning_rate": 8.670123243228656e-06, "loss": 0.4415, "step": 18700 }, { "epoch": 0.9421421018178155, "grad_norm": 4.402730785942954, "learning_rate": 8.668133087946958e-06, "loss": 0.4833, "step": 18710 }, { "epoch": 0.9426456518455109, "grad_norm": 3.604349166848277, "learning_rate": 8.66614167342849e-06, "loss": 0.4577, "step": 18720 }, { "epoch": 0.9431492018732061, "grad_norm": 3.258183954638728, "learning_rate": 8.664149000356883e-06, "loss": 0.4352, "step": 18730 }, { "epoch": 0.9436527519009014, "grad_norm": 4.017017339546877, "learning_rate": 8.662155069416207e-06, "loss": 0.3779, "step": 18740 }, { "epoch": 0.9441563019285966, "grad_norm": 3.8794545911728435, "learning_rate": 8.660159881290958e-06, "loss": 0.4046, "step": 18750 }, { "epoch": 0.9446598519562919, "grad_norm": 3.9952440985267734, "learning_rate": 8.658163436666067e-06, "loss": 0.4892, "step": 18760 }, { "epoch": 0.9451634019839871, "grad_norm": 4.424125731386068, "learning_rate": 8.656165736226895e-06, "loss": 0.4968, "step": 18770 }, { "epoch": 0.9456669520116824, "grad_norm": 3.938304005409826, "learning_rate": 8.654166780659235e-06, "loss": 0.4841, "step": 18780 }, { "epoch": 0.9461705020393776, "grad_norm": 3.5931617860196328, "learning_rate": 8.65216657064931e-06, "loss": 0.3875, "step": 18790 }, { "epoch": 0.9466740520670729, "grad_norm": 4.695042278585711, "learning_rate": 8.650165106883772e-06, "loss": 0.4506, "step": 18800 }, { "epoch": 0.9471776020947681, "grad_norm": 3.68365400791896, "learning_rate": 8.64816239004971e-06, "loss": 0.4171, "step": 18810 }, { "epoch": 0.9476811521224634, "grad_norm": 3.2260816737191984, "learning_rate": 8.646158420834634e-06, "loss": 0.4991, "step": 18820 }, { "epoch": 0.9481847021501586, "grad_norm": 4.500015630154385, "learning_rate": 8.64415319992649e-06, "loss": 0.5218, "step": 18830 }, { "epoch": 0.9486882521778539, "grad_norm": 3.71608490422296, "learning_rate": 8.642146728013652e-06, "loss": 0.4236, "step": 18840 }, { "epoch": 0.9491918022055491, "grad_norm": 4.561833968398226, "learning_rate": 8.640139005784924e-06, "loss": 0.4427, "step": 18850 }, { "epoch": 0.9496953522332444, "grad_norm": 4.118825005241107, "learning_rate": 8.63813003392954e-06, "loss": 0.4844, "step": 18860 }, { "epoch": 0.9501989022609396, "grad_norm": 3.8672167484155433, "learning_rate": 8.636119813137157e-06, "loss": 0.3941, "step": 18870 }, { "epoch": 0.9507024522886349, "grad_norm": 3.8205271524548396, "learning_rate": 8.634108344097873e-06, "loss": 0.4762, "step": 18880 }, { "epoch": 0.9512060023163301, "grad_norm": 4.353140856130415, "learning_rate": 8.6320956275022e-06, "loss": 0.5028, "step": 18890 }, { "epoch": 0.9517095523440254, "grad_norm": 3.832518228601352, "learning_rate": 8.63008166404109e-06, "loss": 0.4779, "step": 18900 }, { "epoch": 0.9522131023717206, "grad_norm": 4.119805520188355, "learning_rate": 8.628066454405917e-06, "loss": 0.4938, "step": 18910 }, { "epoch": 0.9527166523994158, "grad_norm": 4.181813110970435, "learning_rate": 8.626049999288482e-06, "loss": 0.4876, "step": 18920 }, { "epoch": 0.9532202024271111, "grad_norm": 3.808743786661913, "learning_rate": 8.624032299381017e-06, "loss": 0.4891, "step": 18930 }, { "epoch": 0.9537237524548063, "grad_norm": 3.783736767738782, "learning_rate": 8.622013355376182e-06, "loss": 0.4277, "step": 18940 }, { "epoch": 0.9542273024825016, "grad_norm": 3.9496572421503995, "learning_rate": 8.619993167967062e-06, "loss": 0.4623, "step": 18950 }, { "epoch": 0.9547308525101968, "grad_norm": 3.911989324810318, "learning_rate": 8.617971737847167e-06, "loss": 0.4758, "step": 18960 }, { "epoch": 0.9552344025378922, "grad_norm": 4.393215933162047, "learning_rate": 8.615949065710433e-06, "loss": 0.4911, "step": 18970 }, { "epoch": 0.9557379525655874, "grad_norm": 4.581847220899178, "learning_rate": 8.613925152251232e-06, "loss": 0.4011, "step": 18980 }, { "epoch": 0.9562415025932827, "grad_norm": 3.8273888098116595, "learning_rate": 8.61189999816435e-06, "loss": 0.4739, "step": 18990 }, { "epoch": 0.9567450526209779, "grad_norm": 3.648361783944414, "learning_rate": 8.609873604145004e-06, "loss": 0.4012, "step": 19000 }, { "epoch": 0.9572486026486732, "grad_norm": 4.626859690251317, "learning_rate": 8.607845970888841e-06, "loss": 0.511, "step": 19010 }, { "epoch": 0.9577521526763684, "grad_norm": 3.418559570178924, "learning_rate": 8.605817099091924e-06, "loss": 0.4188, "step": 19020 }, { "epoch": 0.9582557027040637, "grad_norm": 3.8217643357186395, "learning_rate": 8.60378698945075e-06, "loss": 0.41, "step": 19030 }, { "epoch": 0.9587592527317589, "grad_norm": 4.313118519210584, "learning_rate": 8.601755642662235e-06, "loss": 0.41, "step": 19040 }, { "epoch": 0.9592628027594542, "grad_norm": 4.628476335190861, "learning_rate": 8.599723059423721e-06, "loss": 0.4742, "step": 19050 }, { "epoch": 0.9597663527871494, "grad_norm": 3.331775409165422, "learning_rate": 8.59768924043298e-06, "loss": 0.4733, "step": 19060 }, { "epoch": 0.9602699028148447, "grad_norm": 4.0816078578001855, "learning_rate": 8.595654186388198e-06, "loss": 0.4472, "step": 19070 }, { "epoch": 0.9607734528425399, "grad_norm": 3.811992367954675, "learning_rate": 8.593617897987993e-06, "loss": 0.4278, "step": 19080 }, { "epoch": 0.9612770028702352, "grad_norm": 4.4040303215208905, "learning_rate": 8.591580375931406e-06, "loss": 0.5379, "step": 19090 }, { "epoch": 0.9617805528979304, "grad_norm": 4.077331625396225, "learning_rate": 8.589541620917897e-06, "loss": 0.507, "step": 19100 }, { "epoch": 0.9622841029256256, "grad_norm": 3.200989643099646, "learning_rate": 8.587501633647353e-06, "loss": 0.4432, "step": 19110 }, { "epoch": 0.9627876529533209, "grad_norm": 3.6406647082818298, "learning_rate": 8.585460414820083e-06, "loss": 0.4438, "step": 19120 }, { "epoch": 0.9632912029810161, "grad_norm": 3.9132814290821627, "learning_rate": 8.58341796513682e-06, "loss": 0.4967, "step": 19130 }, { "epoch": 0.9637947530087114, "grad_norm": 4.246955877628001, "learning_rate": 8.581374285298718e-06, "loss": 0.465, "step": 19140 }, { "epoch": 0.9642983030364066, "grad_norm": 3.7865385615354645, "learning_rate": 8.579329376007353e-06, "loss": 0.4359, "step": 19150 }, { "epoch": 0.9648018530641019, "grad_norm": 4.123041050922997, "learning_rate": 8.577283237964723e-06, "loss": 0.4201, "step": 19160 }, { "epoch": 0.9653054030917971, "grad_norm": 3.1774021973139064, "learning_rate": 8.57523587187325e-06, "loss": 0.4955, "step": 19170 }, { "epoch": 0.9658089531194924, "grad_norm": 3.982900438625774, "learning_rate": 8.573187278435777e-06, "loss": 0.459, "step": 19180 }, { "epoch": 0.9663125031471876, "grad_norm": 3.829440576900729, "learning_rate": 8.571137458355567e-06, "loss": 0.4025, "step": 19190 }, { "epoch": 0.966816053174883, "grad_norm": 4.159020892631718, "learning_rate": 8.569086412336304e-06, "loss": 0.4968, "step": 19200 }, { "epoch": 0.9673196032025781, "grad_norm": 3.617377636088969, "learning_rate": 8.567034141082091e-06, "loss": 0.4654, "step": 19210 }, { "epoch": 0.9678231532302735, "grad_norm": 3.609815404332094, "learning_rate": 8.564980645297459e-06, "loss": 0.4636, "step": 19220 }, { "epoch": 0.9683267032579687, "grad_norm": 4.2729094229398115, "learning_rate": 8.562925925687352e-06, "loss": 0.5349, "step": 19230 }, { "epoch": 0.968830253285664, "grad_norm": 5.706387298738341, "learning_rate": 8.560869982957134e-06, "loss": 0.475, "step": 19240 }, { "epoch": 0.9693338033133592, "grad_norm": 4.7682308641500795, "learning_rate": 8.558812817812595e-06, "loss": 0.4554, "step": 19250 }, { "epoch": 0.9698373533410545, "grad_norm": 4.614674537995005, "learning_rate": 8.556754430959942e-06, "loss": 0.5092, "step": 19260 }, { "epoch": 0.9703409033687497, "grad_norm": 3.9365469411847065, "learning_rate": 8.554694823105798e-06, "loss": 0.4557, "step": 19270 }, { "epoch": 0.970844453396445, "grad_norm": 3.708104485220574, "learning_rate": 8.55263399495721e-06, "loss": 0.4143, "step": 19280 }, { "epoch": 0.9713480034241402, "grad_norm": 3.1721386885840324, "learning_rate": 8.550571947221637e-06, "loss": 0.4075, "step": 19290 }, { "epoch": 0.9718515534518355, "grad_norm": 2.9478006266834464, "learning_rate": 8.548508680606967e-06, "loss": 0.4059, "step": 19300 }, { "epoch": 0.9723551034795307, "grad_norm": 3.8936616118777785, "learning_rate": 8.546444195821495e-06, "loss": 0.4528, "step": 19310 }, { "epoch": 0.9728586535072259, "grad_norm": 4.441506900831341, "learning_rate": 8.544378493573944e-06, "loss": 0.4918, "step": 19320 }, { "epoch": 0.9733622035349212, "grad_norm": 3.592772929591932, "learning_rate": 8.542311574573449e-06, "loss": 0.4435, "step": 19330 }, { "epoch": 0.9738657535626164, "grad_norm": 3.7582986966791774, "learning_rate": 8.540243439529568e-06, "loss": 0.4911, "step": 19340 }, { "epoch": 0.9743693035903117, "grad_norm": 3.2526242686669833, "learning_rate": 8.538174089152267e-06, "loss": 0.4607, "step": 19350 }, { "epoch": 0.9748728536180069, "grad_norm": 3.9204761725359165, "learning_rate": 8.536103524151939e-06, "loss": 0.4656, "step": 19360 }, { "epoch": 0.9753764036457022, "grad_norm": 4.221248964518193, "learning_rate": 8.53403174523939e-06, "loss": 0.4849, "step": 19370 }, { "epoch": 0.9758799536733974, "grad_norm": 2.781216980040246, "learning_rate": 8.531958753125842e-06, "loss": 0.4525, "step": 19380 }, { "epoch": 0.9763835037010927, "grad_norm": 3.7613453366412775, "learning_rate": 8.529884548522935e-06, "loss": 0.3917, "step": 19390 }, { "epoch": 0.9768870537287879, "grad_norm": 3.44666920395571, "learning_rate": 8.527809132142726e-06, "loss": 0.4701, "step": 19400 }, { "epoch": 0.9773906037564832, "grad_norm": 3.8655078852325744, "learning_rate": 8.525732504697683e-06, "loss": 0.4622, "step": 19410 }, { "epoch": 0.9778941537841784, "grad_norm": 3.983017838280777, "learning_rate": 8.523654666900693e-06, "loss": 0.4423, "step": 19420 }, { "epoch": 0.9783977038118737, "grad_norm": 4.513776118578744, "learning_rate": 8.521575619465066e-06, "loss": 0.4643, "step": 19430 }, { "epoch": 0.9789012538395689, "grad_norm": 3.87007558905279, "learning_rate": 8.519495363104512e-06, "loss": 0.4225, "step": 19440 }, { "epoch": 0.9794048038672643, "grad_norm": 3.6137702645499696, "learning_rate": 8.517413898533168e-06, "loss": 0.4229, "step": 19450 }, { "epoch": 0.9799083538949594, "grad_norm": 4.435338924221378, "learning_rate": 8.515331226465583e-06, "loss": 0.5259, "step": 19460 }, { "epoch": 0.9804119039226548, "grad_norm": 4.098372723191896, "learning_rate": 8.513247347616713e-06, "loss": 0.4873, "step": 19470 }, { "epoch": 0.98091545395035, "grad_norm": 2.283899849151471, "learning_rate": 8.511162262701943e-06, "loss": 0.4484, "step": 19480 }, { "epoch": 0.9814190039780453, "grad_norm": 3.6606227686813706, "learning_rate": 8.509075972437059e-06, "loss": 0.4869, "step": 19490 }, { "epoch": 0.9819225540057405, "grad_norm": 4.050222633970487, "learning_rate": 8.506988477538267e-06, "loss": 0.5228, "step": 19500 }, { "epoch": 0.9824261040334358, "grad_norm": 3.6030847199519602, "learning_rate": 8.504899778722184e-06, "loss": 0.4286, "step": 19510 }, { "epoch": 0.982929654061131, "grad_norm": 3.6146184043632963, "learning_rate": 8.50280987670584e-06, "loss": 0.457, "step": 19520 }, { "epoch": 0.9834332040888262, "grad_norm": 3.3992598486927164, "learning_rate": 8.500718772206684e-06, "loss": 0.4469, "step": 19530 }, { "epoch": 0.9839367541165215, "grad_norm": 4.054817056601935, "learning_rate": 8.49862646594257e-06, "loss": 0.4713, "step": 19540 }, { "epoch": 0.9844403041442167, "grad_norm": 3.1147566102714723, "learning_rate": 8.496532958631767e-06, "loss": 0.4978, "step": 19550 }, { "epoch": 0.984943854171912, "grad_norm": 2.670774296963279, "learning_rate": 8.494438250992959e-06, "loss": 0.408, "step": 19560 }, { "epoch": 0.9854474041996072, "grad_norm": 3.3478909232787197, "learning_rate": 8.492342343745238e-06, "loss": 0.4001, "step": 19570 }, { "epoch": 0.9859509542273025, "grad_norm": 4.782776920224973, "learning_rate": 8.490245237608114e-06, "loss": 0.4754, "step": 19580 }, { "epoch": 0.9864545042549977, "grad_norm": 4.588713168965056, "learning_rate": 8.4881469333015e-06, "loss": 0.4613, "step": 19590 }, { "epoch": 0.986958054282693, "grad_norm": 3.911907052622192, "learning_rate": 8.486047431545726e-06, "loss": 0.3989, "step": 19600 }, { "epoch": 0.9874616043103882, "grad_norm": 3.0166695885437806, "learning_rate": 8.483946733061533e-06, "loss": 0.4211, "step": 19610 }, { "epoch": 0.9879651543380835, "grad_norm": 4.523675456912697, "learning_rate": 8.481844838570071e-06, "loss": 0.5019, "step": 19620 }, { "epoch": 0.9884687043657787, "grad_norm": 3.267744972926673, "learning_rate": 8.4797417487929e-06, "loss": 0.4755, "step": 19630 }, { "epoch": 0.988972254393474, "grad_norm": 5.2098373045647675, "learning_rate": 8.477637464451995e-06, "loss": 0.4451, "step": 19640 }, { "epoch": 0.9894758044211692, "grad_norm": 3.61427707436352, "learning_rate": 8.475531986269736e-06, "loss": 0.4239, "step": 19650 }, { "epoch": 0.9899793544488645, "grad_norm": 3.302451034874651, "learning_rate": 8.473425314968913e-06, "loss": 0.4132, "step": 19660 }, { "epoch": 0.9904829044765597, "grad_norm": 3.2049749582289904, "learning_rate": 8.47131745127273e-06, "loss": 0.4321, "step": 19670 }, { "epoch": 0.990986454504255, "grad_norm": 4.187001784650315, "learning_rate": 8.469208395904794e-06, "loss": 0.521, "step": 19680 }, { "epoch": 0.9914900045319502, "grad_norm": 3.3943139511362044, "learning_rate": 8.467098149589126e-06, "loss": 0.5012, "step": 19690 }, { "epoch": 0.9919935545596456, "grad_norm": 3.2230074184207305, "learning_rate": 8.464986713050156e-06, "loss": 0.4502, "step": 19700 }, { "epoch": 0.9924971045873408, "grad_norm": 3.5405371554638454, "learning_rate": 8.46287408701272e-06, "loss": 0.4049, "step": 19710 }, { "epoch": 0.993000654615036, "grad_norm": 4.692552586004929, "learning_rate": 8.460760272202062e-06, "loss": 0.5313, "step": 19720 }, { "epoch": 0.9935042046427313, "grad_norm": 3.8894884123263616, "learning_rate": 8.458645269343837e-06, "loss": 0.4255, "step": 19730 }, { "epoch": 0.9940077546704265, "grad_norm": 4.091996414897768, "learning_rate": 8.456529079164106e-06, "loss": 0.4519, "step": 19740 }, { "epoch": 0.9945113046981218, "grad_norm": 3.944854666245239, "learning_rate": 8.454411702389337e-06, "loss": 0.4142, "step": 19750 }, { "epoch": 0.995014854725817, "grad_norm": 2.8415363223007537, "learning_rate": 8.452293139746407e-06, "loss": 0.355, "step": 19760 }, { "epoch": 0.9955184047535123, "grad_norm": 3.9519886472875525, "learning_rate": 8.450173391962601e-06, "loss": 0.4516, "step": 19770 }, { "epoch": 0.9960219547812075, "grad_norm": 3.703573384862642, "learning_rate": 8.448052459765606e-06, "loss": 0.4808, "step": 19780 }, { "epoch": 0.9965255048089028, "grad_norm": 3.2222420340541977, "learning_rate": 8.44593034388352e-06, "loss": 0.4441, "step": 19790 }, { "epoch": 0.997029054836598, "grad_norm": 3.6680995222944786, "learning_rate": 8.443807045044846e-06, "loss": 0.4396, "step": 19800 }, { "epoch": 0.9975326048642933, "grad_norm": 3.0829545564293013, "learning_rate": 8.441682563978492e-06, "loss": 0.4479, "step": 19810 }, { "epoch": 0.9980361548919885, "grad_norm": 4.345323272124365, "learning_rate": 8.439556901413776e-06, "loss": 0.4178, "step": 19820 }, { "epoch": 0.9985397049196838, "grad_norm": 3.5474332885752906, "learning_rate": 8.437430058080418e-06, "loss": 0.3939, "step": 19830 }, { "epoch": 0.999043254947379, "grad_norm": 3.553955616143146, "learning_rate": 8.435302034708543e-06, "loss": 0.4319, "step": 19840 }, { "epoch": 0.9995468049750743, "grad_norm": 4.005324560054804, "learning_rate": 8.433172832028683e-06, "loss": 0.4551, "step": 19850 }, { "epoch": 1.0000503550027695, "grad_norm": 2.811166568809425, "learning_rate": 8.43104245077177e-06, "loss": 0.4435, "step": 19860 }, { "epoch": 1.0005539050304648, "grad_norm": 3.4637614978296543, "learning_rate": 8.428910891669148e-06, "loss": 0.3383, "step": 19870 }, { "epoch": 1.0010574550581601, "grad_norm": 4.595910382202429, "learning_rate": 8.426778155452564e-06, "loss": 0.3436, "step": 19880 }, { "epoch": 1.0015610050858552, "grad_norm": 3.579119467720743, "learning_rate": 8.424644242854164e-06, "loss": 0.3537, "step": 19890 }, { "epoch": 1.0020645551135505, "grad_norm": 3.6243924244281525, "learning_rate": 8.4225091546065e-06, "loss": 0.3436, "step": 19900 }, { "epoch": 1.0025681051412458, "grad_norm": 2.8572397380056365, "learning_rate": 8.420372891442532e-06, "loss": 0.2838, "step": 19910 }, { "epoch": 1.0030716551689411, "grad_norm": 3.740074690234363, "learning_rate": 8.418235454095616e-06, "loss": 0.3361, "step": 19920 }, { "epoch": 1.0035752051966362, "grad_norm": 3.6887753482846617, "learning_rate": 8.416096843299515e-06, "loss": 0.3614, "step": 19930 }, { "epoch": 1.0040787552243315, "grad_norm": 4.033575673809318, "learning_rate": 8.413957059788398e-06, "loss": 0.3371, "step": 19940 }, { "epoch": 1.0045823052520269, "grad_norm": 2.936675288704468, "learning_rate": 8.41181610429683e-06, "loss": 0.2739, "step": 19950 }, { "epoch": 1.005085855279722, "grad_norm": 3.852966049545854, "learning_rate": 8.409673977559784e-06, "loss": 0.3114, "step": 19960 }, { "epoch": 1.0055894053074173, "grad_norm": 3.2597322890179607, "learning_rate": 8.40753068031263e-06, "loss": 0.3206, "step": 19970 }, { "epoch": 1.0060929553351126, "grad_norm": 4.03735228782882, "learning_rate": 8.405386213291147e-06, "loss": 0.3449, "step": 19980 }, { "epoch": 1.0065965053628079, "grad_norm": 4.46320150550532, "learning_rate": 8.403240577231504e-06, "loss": 0.2733, "step": 19990 }, { "epoch": 1.007100055390503, "grad_norm": 3.2355861979116156, "learning_rate": 8.401093772870284e-06, "loss": 0.3317, "step": 20000 }, { "epoch": 1.0076036054181983, "grad_norm": 3.2491876451538775, "learning_rate": 8.398945800944465e-06, "loss": 0.3225, "step": 20010 }, { "epoch": 1.0081071554458936, "grad_norm": 3.738674484637829, "learning_rate": 8.396796662191425e-06, "loss": 0.2938, "step": 20020 }, { "epoch": 1.0086107054735889, "grad_norm": 3.6078509034894712, "learning_rate": 8.394646357348942e-06, "loss": 0.3198, "step": 20030 }, { "epoch": 1.009114255501284, "grad_norm": 4.342482698718076, "learning_rate": 8.392494887155199e-06, "loss": 0.3157, "step": 20040 }, { "epoch": 1.0096178055289793, "grad_norm": 3.7636382330426645, "learning_rate": 8.390342252348774e-06, "loss": 0.3726, "step": 20050 }, { "epoch": 1.0101213555566746, "grad_norm": 4.191518047482777, "learning_rate": 8.388188453668649e-06, "loss": 0.3503, "step": 20060 }, { "epoch": 1.01062490558437, "grad_norm": 4.0169154525001725, "learning_rate": 8.386033491854203e-06, "loss": 0.3101, "step": 20070 }, { "epoch": 1.011128455612065, "grad_norm": 4.0733905368925765, "learning_rate": 8.383877367645215e-06, "loss": 0.3374, "step": 20080 }, { "epoch": 1.0116320056397603, "grad_norm": 3.0339947659846396, "learning_rate": 8.38172008178186e-06, "loss": 0.2753, "step": 20090 }, { "epoch": 1.0121355556674556, "grad_norm": 3.3346306343885357, "learning_rate": 8.379561635004719e-06, "loss": 0.2759, "step": 20100 }, { "epoch": 1.012639105695151, "grad_norm": 3.986897050098494, "learning_rate": 8.377402028054764e-06, "loss": 0.3198, "step": 20110 }, { "epoch": 1.013142655722846, "grad_norm": 3.637100566825511, "learning_rate": 8.37524126167337e-06, "loss": 0.3304, "step": 20120 }, { "epoch": 1.0136462057505413, "grad_norm": 3.323009553880261, "learning_rate": 8.373079336602308e-06, "loss": 0.2733, "step": 20130 }, { "epoch": 1.0141497557782366, "grad_norm": 3.156195640734893, "learning_rate": 8.370916253583748e-06, "loss": 0.3409, "step": 20140 }, { "epoch": 1.0146533058059317, "grad_norm": 4.2906421100957175, "learning_rate": 8.368752013360256e-06, "loss": 0.3453, "step": 20150 }, { "epoch": 1.015156855833627, "grad_norm": 3.2227186466080235, "learning_rate": 8.366586616674797e-06, "loss": 0.3178, "step": 20160 }, { "epoch": 1.0156604058613223, "grad_norm": 3.3741300112084573, "learning_rate": 8.364420064270728e-06, "loss": 0.3363, "step": 20170 }, { "epoch": 1.0161639558890176, "grad_norm": 3.3210529613715263, "learning_rate": 8.362252356891811e-06, "loss": 0.337, "step": 20180 }, { "epoch": 1.0166675059167127, "grad_norm": 3.6154010426291165, "learning_rate": 8.3600834952822e-06, "loss": 0.3204, "step": 20190 }, { "epoch": 1.017171055944408, "grad_norm": 4.080417412616901, "learning_rate": 8.357913480186446e-06, "loss": 0.2951, "step": 20200 }, { "epoch": 1.0176746059721034, "grad_norm": 3.439520386555249, "learning_rate": 8.355742312349491e-06, "loss": 0.3144, "step": 20210 }, { "epoch": 1.0181781559997987, "grad_norm": 3.8337392810747923, "learning_rate": 8.353569992516683e-06, "loss": 0.3103, "step": 20220 }, { "epoch": 1.0186817060274937, "grad_norm": 4.437198792499414, "learning_rate": 8.351396521433755e-06, "loss": 0.3257, "step": 20230 }, { "epoch": 1.019185256055189, "grad_norm": 5.659374138373904, "learning_rate": 8.349221899846843e-06, "loss": 0.3389, "step": 20240 }, { "epoch": 1.0196888060828844, "grad_norm": 3.453112401261641, "learning_rate": 8.347046128502471e-06, "loss": 0.2624, "step": 20250 }, { "epoch": 1.0201923561105797, "grad_norm": 4.620202058482151, "learning_rate": 8.344869208147568e-06, "loss": 0.3816, "step": 20260 }, { "epoch": 1.0206959061382748, "grad_norm": 4.419242412455409, "learning_rate": 8.342691139529445e-06, "loss": 0.316, "step": 20270 }, { "epoch": 1.02119945616597, "grad_norm": 3.7082034440815055, "learning_rate": 8.340511923395816e-06, "loss": 0.2944, "step": 20280 }, { "epoch": 1.0217030061936654, "grad_norm": 3.087307663911356, "learning_rate": 8.338331560494784e-06, "loss": 0.2937, "step": 20290 }, { "epoch": 1.0222065562213607, "grad_norm": 4.498225893066057, "learning_rate": 8.33615005157485e-06, "loss": 0.2914, "step": 20300 }, { "epoch": 1.0227101062490558, "grad_norm": 3.8509929508497134, "learning_rate": 8.333967397384906e-06, "loss": 0.3582, "step": 20310 }, { "epoch": 1.023213656276751, "grad_norm": 2.763307042560012, "learning_rate": 8.331783598674236e-06, "loss": 0.2651, "step": 20320 }, { "epoch": 1.0237172063044464, "grad_norm": 3.920917848411702, "learning_rate": 8.329598656192521e-06, "loss": 0.3129, "step": 20330 }, { "epoch": 1.0242207563321415, "grad_norm": 4.648407688217947, "learning_rate": 8.327412570689828e-06, "loss": 0.3138, "step": 20340 }, { "epoch": 1.0247243063598368, "grad_norm": 3.799406779858769, "learning_rate": 8.325225342916623e-06, "loss": 0.3206, "step": 20350 }, { "epoch": 1.025227856387532, "grad_norm": 2.6457519488519883, "learning_rate": 8.323036973623761e-06, "loss": 0.3669, "step": 20360 }, { "epoch": 1.0257314064152274, "grad_norm": 2.670068772931718, "learning_rate": 8.320847463562487e-06, "loss": 0.3514, "step": 20370 }, { "epoch": 1.0262349564429225, "grad_norm": 4.0649237889749985, "learning_rate": 8.318656813484444e-06, "loss": 0.2927, "step": 20380 }, { "epoch": 1.0267385064706178, "grad_norm": 3.9537034060529708, "learning_rate": 8.31646502414166e-06, "loss": 0.3283, "step": 20390 }, { "epoch": 1.0272420564983131, "grad_norm": 4.463002544411004, "learning_rate": 8.314272096286556e-06, "loss": 0.3476, "step": 20400 }, { "epoch": 1.0277456065260084, "grad_norm": 3.227658042228303, "learning_rate": 8.312078030671945e-06, "loss": 0.3148, "step": 20410 }, { "epoch": 1.0282491565537035, "grad_norm": 3.3630887625422043, "learning_rate": 8.30988282805103e-06, "loss": 0.321, "step": 20420 }, { "epoch": 1.0287527065813988, "grad_norm": 4.025916815726136, "learning_rate": 8.307686489177402e-06, "loss": 0.3141, "step": 20430 }, { "epoch": 1.0292562566090941, "grad_norm": 4.4165950065579, "learning_rate": 8.305489014805049e-06, "loss": 0.3374, "step": 20440 }, { "epoch": 1.0297598066367895, "grad_norm": 3.3051849474332524, "learning_rate": 8.303290405688338e-06, "loss": 0.2916, "step": 20450 }, { "epoch": 1.0302633566644845, "grad_norm": 3.9574598219266433, "learning_rate": 8.301090662582035e-06, "loss": 0.3523, "step": 20460 }, { "epoch": 1.0307669066921799, "grad_norm": 3.3853188993077428, "learning_rate": 8.29888978624129e-06, "loss": 0.2892, "step": 20470 }, { "epoch": 1.0312704567198752, "grad_norm": 4.927570285949903, "learning_rate": 8.296687777421648e-06, "loss": 0.2925, "step": 20480 }, { "epoch": 1.0317740067475705, "grad_norm": 4.196845777584824, "learning_rate": 8.294484636879034e-06, "loss": 0.3069, "step": 20490 }, { "epoch": 1.0322775567752656, "grad_norm": 3.5675176726645543, "learning_rate": 8.292280365369768e-06, "loss": 0.3069, "step": 20500 }, { "epoch": 1.0327811068029609, "grad_norm": 3.8858732091383765, "learning_rate": 8.290074963650556e-06, "loss": 0.2951, "step": 20510 }, { "epoch": 1.0332846568306562, "grad_norm": 4.315957455592535, "learning_rate": 8.287868432478494e-06, "loss": 0.3148, "step": 20520 }, { "epoch": 1.0337882068583513, "grad_norm": 2.9882846436495, "learning_rate": 8.285660772611062e-06, "loss": 0.3015, "step": 20530 }, { "epoch": 1.0342917568860466, "grad_norm": 3.2190627712475237, "learning_rate": 8.28345198480613e-06, "loss": 0.266, "step": 20540 }, { "epoch": 1.0347953069137419, "grad_norm": 4.261483253737423, "learning_rate": 8.281242069821954e-06, "loss": 0.3699, "step": 20550 }, { "epoch": 1.0352988569414372, "grad_norm": 3.340881027347274, "learning_rate": 8.279031028417179e-06, "loss": 0.2832, "step": 20560 }, { "epoch": 1.0358024069691323, "grad_norm": 3.1741836723291805, "learning_rate": 8.276818861350837e-06, "loss": 0.328, "step": 20570 }, { "epoch": 1.0363059569968276, "grad_norm": 3.619939442922128, "learning_rate": 8.274605569382341e-06, "loss": 0.349, "step": 20580 }, { "epoch": 1.036809507024523, "grad_norm": 3.39740654651801, "learning_rate": 8.272391153271499e-06, "loss": 0.3735, "step": 20590 }, { "epoch": 1.0373130570522182, "grad_norm": 3.3638809945542247, "learning_rate": 8.270175613778496e-06, "loss": 0.3234, "step": 20600 }, { "epoch": 1.0378166070799133, "grad_norm": 3.3058263680908953, "learning_rate": 8.267958951663906e-06, "loss": 0.3262, "step": 20610 }, { "epoch": 1.0383201571076086, "grad_norm": 5.140815388301588, "learning_rate": 8.265741167688693e-06, "loss": 0.3317, "step": 20620 }, { "epoch": 1.038823707135304, "grad_norm": 3.8149688015674568, "learning_rate": 8.263522262614198e-06, "loss": 0.3607, "step": 20630 }, { "epoch": 1.0393272571629992, "grad_norm": 4.978853579711192, "learning_rate": 8.261302237202153e-06, "loss": 0.3521, "step": 20640 }, { "epoch": 1.0398308071906943, "grad_norm": 3.794211490574185, "learning_rate": 8.259081092214673e-06, "loss": 0.346, "step": 20650 }, { "epoch": 1.0403343572183896, "grad_norm": 2.8578493455461826, "learning_rate": 8.256858828414256e-06, "loss": 0.3009, "step": 20660 }, { "epoch": 1.040837907246085, "grad_norm": 3.7840701840274655, "learning_rate": 8.254635446563784e-06, "loss": 0.3842, "step": 20670 }, { "epoch": 1.0413414572737802, "grad_norm": 3.204498022236873, "learning_rate": 8.252410947426526e-06, "loss": 0.3452, "step": 20680 }, { "epoch": 1.0418450073014753, "grad_norm": 4.354623971824623, "learning_rate": 8.250185331766132e-06, "loss": 0.3682, "step": 20690 }, { "epoch": 1.0423485573291706, "grad_norm": 3.6291719666737197, "learning_rate": 8.247958600346632e-06, "loss": 0.3287, "step": 20700 }, { "epoch": 1.042852107356866, "grad_norm": 3.567945367265495, "learning_rate": 8.245730753932447e-06, "loss": 0.3304, "step": 20710 }, { "epoch": 1.0433556573845613, "grad_norm": 3.1992899586093895, "learning_rate": 8.243501793288377e-06, "loss": 0.3344, "step": 20720 }, { "epoch": 1.0438592074122564, "grad_norm": 4.466745891772104, "learning_rate": 8.241271719179601e-06, "loss": 0.3638, "step": 20730 }, { "epoch": 1.0443627574399517, "grad_norm": 3.507252639292618, "learning_rate": 8.239040532371684e-06, "loss": 0.3017, "step": 20740 }, { "epoch": 1.044866307467647, "grad_norm": 4.037984797483664, "learning_rate": 8.236808233630571e-06, "loss": 0.3089, "step": 20750 }, { "epoch": 1.045369857495342, "grad_norm": 4.477672301184659, "learning_rate": 8.234574823722593e-06, "loss": 0.3493, "step": 20760 }, { "epoch": 1.0458734075230374, "grad_norm": 2.755485442767366, "learning_rate": 8.232340303414456e-06, "loss": 0.3395, "step": 20770 }, { "epoch": 1.0463769575507327, "grad_norm": 3.2164526210117184, "learning_rate": 8.230104673473254e-06, "loss": 0.3242, "step": 20780 }, { "epoch": 1.046880507578428, "grad_norm": 3.0522265383934752, "learning_rate": 8.227867934666455e-06, "loss": 0.3233, "step": 20790 }, { "epoch": 1.047384057606123, "grad_norm": 3.778898901371334, "learning_rate": 8.225630087761914e-06, "loss": 0.3365, "step": 20800 }, { "epoch": 1.0478876076338184, "grad_norm": 3.523404880608766, "learning_rate": 8.22339113352786e-06, "loss": 0.3022, "step": 20810 }, { "epoch": 1.0483911576615137, "grad_norm": 4.424009033549544, "learning_rate": 8.22115107273291e-06, "loss": 0.2798, "step": 20820 }, { "epoch": 1.048894707689209, "grad_norm": 5.392078324046278, "learning_rate": 8.218909906146053e-06, "loss": 0.3333, "step": 20830 }, { "epoch": 1.049398257716904, "grad_norm": 4.321563563151139, "learning_rate": 8.216667634536664e-06, "loss": 0.3324, "step": 20840 }, { "epoch": 1.0499018077445994, "grad_norm": 4.794431666957422, "learning_rate": 8.21442425867449e-06, "loss": 0.3341, "step": 20850 }, { "epoch": 1.0504053577722947, "grad_norm": 4.524681844894312, "learning_rate": 8.212179779329666e-06, "loss": 0.3154, "step": 20860 }, { "epoch": 1.05090890779999, "grad_norm": 4.7764446803913, "learning_rate": 8.2099341972727e-06, "loss": 0.4072, "step": 20870 }, { "epoch": 1.051412457827685, "grad_norm": 4.655104817278162, "learning_rate": 8.20768751327448e-06, "loss": 0.3916, "step": 20880 }, { "epoch": 1.0519160078553804, "grad_norm": 4.432701765337955, "learning_rate": 8.205439728106273e-06, "loss": 0.2928, "step": 20890 }, { "epoch": 1.0524195578830757, "grad_norm": 4.099510975000709, "learning_rate": 8.203190842539721e-06, "loss": 0.3565, "step": 20900 }, { "epoch": 1.052923107910771, "grad_norm": 4.05024167086994, "learning_rate": 8.200940857346848e-06, "loss": 0.2735, "step": 20910 }, { "epoch": 1.0534266579384661, "grad_norm": 3.85676160785677, "learning_rate": 8.198689773300052e-06, "loss": 0.3372, "step": 20920 }, { "epoch": 1.0539302079661614, "grad_norm": 3.3799551898581686, "learning_rate": 8.19643759117211e-06, "loss": 0.3274, "step": 20930 }, { "epoch": 1.0544337579938567, "grad_norm": 4.110330694300255, "learning_rate": 8.194184311736178e-06, "loss": 0.3543, "step": 20940 }, { "epoch": 1.0549373080215518, "grad_norm": 3.519889421925182, "learning_rate": 8.19192993576578e-06, "loss": 0.3197, "step": 20950 }, { "epoch": 1.0554408580492471, "grad_norm": 4.233598158097511, "learning_rate": 8.189674464034831e-06, "loss": 0.3294, "step": 20960 }, { "epoch": 1.0559444080769425, "grad_norm": 4.109759711976573, "learning_rate": 8.187417897317611e-06, "loss": 0.3015, "step": 20970 }, { "epoch": 1.0564479581046378, "grad_norm": 4.538936008655037, "learning_rate": 8.185160236388776e-06, "loss": 0.3562, "step": 20980 }, { "epoch": 1.0569515081323329, "grad_norm": 2.9728602508994992, "learning_rate": 8.182901482023362e-06, "loss": 0.3158, "step": 20990 }, { "epoch": 1.0574550581600282, "grad_norm": 3.332538875403289, "learning_rate": 8.180641634996782e-06, "loss": 0.3135, "step": 21000 }, { "epoch": 1.0579586081877235, "grad_norm": 4.290078947397097, "learning_rate": 8.178380696084817e-06, "loss": 0.2585, "step": 21010 }, { "epoch": 1.0584621582154188, "grad_norm": 3.2422878109677646, "learning_rate": 8.176118666063628e-06, "loss": 0.3186, "step": 21020 }, { "epoch": 1.0589657082431139, "grad_norm": 3.340342594121203, "learning_rate": 8.17385554570975e-06, "loss": 0.3335, "step": 21030 }, { "epoch": 1.0594692582708092, "grad_norm": 3.35197004057162, "learning_rate": 8.171591335800088e-06, "loss": 0.3659, "step": 21040 }, { "epoch": 1.0599728082985045, "grad_norm": 3.2161621731449634, "learning_rate": 8.169326037111933e-06, "loss": 0.2844, "step": 21050 }, { "epoch": 1.0604763583261998, "grad_norm": 5.442140891594486, "learning_rate": 8.167059650422932e-06, "loss": 0.3576, "step": 21060 }, { "epoch": 1.0609799083538949, "grad_norm": 2.852654106138676, "learning_rate": 8.164792176511122e-06, "loss": 0.2756, "step": 21070 }, { "epoch": 1.0614834583815902, "grad_norm": 4.369347939220291, "learning_rate": 8.162523616154903e-06, "loss": 0.339, "step": 21080 }, { "epoch": 1.0619870084092855, "grad_norm": 3.7609747570375536, "learning_rate": 8.160253970133052e-06, "loss": 0.3476, "step": 21090 }, { "epoch": 1.0624905584369808, "grad_norm": 5.057311312879773, "learning_rate": 8.157983239224716e-06, "loss": 0.34, "step": 21100 }, { "epoch": 1.062994108464676, "grad_norm": 4.843204318257215, "learning_rate": 8.155711424209418e-06, "loss": 0.3624, "step": 21110 }, { "epoch": 1.0634976584923712, "grad_norm": 2.846319746443395, "learning_rate": 8.153438525867052e-06, "loss": 0.3096, "step": 21120 }, { "epoch": 1.0640012085200665, "grad_norm": 3.5941931369047264, "learning_rate": 8.151164544977884e-06, "loss": 0.3079, "step": 21130 }, { "epoch": 1.0645047585477618, "grad_norm": 3.688883178969575, "learning_rate": 8.148889482322547e-06, "loss": 0.3447, "step": 21140 }, { "epoch": 1.065008308575457, "grad_norm": 3.728311933795423, "learning_rate": 8.146613338682055e-06, "loss": 0.3394, "step": 21150 }, { "epoch": 1.0655118586031522, "grad_norm": 4.750550497884607, "learning_rate": 8.14433611483778e-06, "loss": 0.3325, "step": 21160 }, { "epoch": 1.0660154086308475, "grad_norm": 5.267640376269819, "learning_rate": 8.14205781157148e-06, "loss": 0.2996, "step": 21170 }, { "epoch": 1.0665189586585426, "grad_norm": 5.686173626603673, "learning_rate": 8.139778429665273e-06, "loss": 0.3182, "step": 21180 }, { "epoch": 1.067022508686238, "grad_norm": 4.562169245689327, "learning_rate": 8.137497969901649e-06, "loss": 0.3535, "step": 21190 }, { "epoch": 1.0675260587139332, "grad_norm": 4.992971572729277, "learning_rate": 8.135216433063467e-06, "loss": 0.3412, "step": 21200 }, { "epoch": 1.0680296087416286, "grad_norm": 2.847340495401351, "learning_rate": 8.132933819933964e-06, "loss": 0.3107, "step": 21210 }, { "epoch": 1.0685331587693236, "grad_norm": 3.174916185601754, "learning_rate": 8.130650131296736e-06, "loss": 0.3205, "step": 21220 }, { "epoch": 1.069036708797019, "grad_norm": 3.459367762894123, "learning_rate": 8.128365367935751e-06, "loss": 0.276, "step": 21230 }, { "epoch": 1.0695402588247143, "grad_norm": 2.861713947233507, "learning_rate": 8.126079530635353e-06, "loss": 0.3226, "step": 21240 }, { "epoch": 1.0700438088524096, "grad_norm": 4.593316385863061, "learning_rate": 8.123792620180246e-06, "loss": 0.3637, "step": 21250 }, { "epoch": 1.0705473588801047, "grad_norm": 3.318797381632519, "learning_rate": 8.121504637355506e-06, "loss": 0.2976, "step": 21260 }, { "epoch": 1.0710509089078, "grad_norm": 3.512085061661162, "learning_rate": 8.119215582946573e-06, "loss": 0.3271, "step": 21270 }, { "epoch": 1.0715544589354953, "grad_norm": 4.501014905018115, "learning_rate": 8.116925457739266e-06, "loss": 0.3454, "step": 21280 }, { "epoch": 1.0720580089631906, "grad_norm": 4.4491888365704035, "learning_rate": 8.114634262519759e-06, "loss": 0.3561, "step": 21290 }, { "epoch": 1.0725615589908857, "grad_norm": 3.574195594025361, "learning_rate": 8.1123419980746e-06, "loss": 0.3106, "step": 21300 }, { "epoch": 1.073065109018581, "grad_norm": 2.615106097970561, "learning_rate": 8.110048665190701e-06, "loss": 0.2935, "step": 21310 }, { "epoch": 1.0735686590462763, "grad_norm": 3.0266661719056596, "learning_rate": 8.107754264655346e-06, "loss": 0.2772, "step": 21320 }, { "epoch": 1.0740722090739716, "grad_norm": 4.595131970838899, "learning_rate": 8.105458797256178e-06, "loss": 0.3061, "step": 21330 }, { "epoch": 1.0745757591016667, "grad_norm": 3.820563292372986, "learning_rate": 8.103162263781213e-06, "loss": 0.3012, "step": 21340 }, { "epoch": 1.075079309129362, "grad_norm": 3.768803373972923, "learning_rate": 8.100864665018827e-06, "loss": 0.2984, "step": 21350 }, { "epoch": 1.0755828591570573, "grad_norm": 3.121046630408341, "learning_rate": 8.098566001757769e-06, "loss": 0.2752, "step": 21360 }, { "epoch": 1.0760864091847524, "grad_norm": 3.394178776567917, "learning_rate": 8.096266274787145e-06, "loss": 0.3355, "step": 21370 }, { "epoch": 1.0765899592124477, "grad_norm": 3.9246667494070033, "learning_rate": 8.093965484896432e-06, "loss": 0.328, "step": 21380 }, { "epoch": 1.077093509240143, "grad_norm": 2.50156970090007, "learning_rate": 8.09166363287547e-06, "loss": 0.2961, "step": 21390 }, { "epoch": 1.0775970592678383, "grad_norm": 2.5692964782808994, "learning_rate": 8.089360719514464e-06, "loss": 0.2665, "step": 21400 }, { "epoch": 1.0781006092955334, "grad_norm": 3.498271297451123, "learning_rate": 8.087056745603985e-06, "loss": 0.312, "step": 21410 }, { "epoch": 1.0786041593232287, "grad_norm": 3.7522699851341623, "learning_rate": 8.084751711934965e-06, "loss": 0.3276, "step": 21420 }, { "epoch": 1.079107709350924, "grad_norm": 3.5661150875164633, "learning_rate": 8.0824456192987e-06, "loss": 0.3232, "step": 21430 }, { "epoch": 1.0796112593786193, "grad_norm": 4.153232903319216, "learning_rate": 8.080138468486854e-06, "loss": 0.3579, "step": 21440 }, { "epoch": 1.0801148094063144, "grad_norm": 4.086323284150432, "learning_rate": 8.077830260291445e-06, "loss": 0.2892, "step": 21450 }, { "epoch": 1.0806183594340097, "grad_norm": 3.810741240035975, "learning_rate": 8.075520995504866e-06, "loss": 0.318, "step": 21460 }, { "epoch": 1.081121909461705, "grad_norm": 4.7845681131428055, "learning_rate": 8.073210674919862e-06, "loss": 0.333, "step": 21470 }, { "epoch": 1.0816254594894004, "grad_norm": 3.9052741895389587, "learning_rate": 8.07089929932955e-06, "loss": 0.341, "step": 21480 }, { "epoch": 1.0821290095170955, "grad_norm": 3.145611680649771, "learning_rate": 8.068586869527399e-06, "loss": 0.3136, "step": 21490 }, { "epoch": 1.0826325595447908, "grad_norm": 3.3932624168237795, "learning_rate": 8.066273386307248e-06, "loss": 0.3323, "step": 21500 }, { "epoch": 1.083136109572486, "grad_norm": 3.0482506718672493, "learning_rate": 8.063958850463293e-06, "loss": 0.2932, "step": 21510 }, { "epoch": 1.0836396596001814, "grad_norm": 4.832026619860318, "learning_rate": 8.061643262790098e-06, "loss": 0.2855, "step": 21520 }, { "epoch": 1.0841432096278765, "grad_norm": 4.805442435230462, "learning_rate": 8.059326624082578e-06, "loss": 0.3274, "step": 21530 }, { "epoch": 1.0846467596555718, "grad_norm": 3.447260923536965, "learning_rate": 8.057008935136015e-06, "loss": 0.3295, "step": 21540 }, { "epoch": 1.085150309683267, "grad_norm": 3.8152000408815496, "learning_rate": 8.054690196746052e-06, "loss": 0.2924, "step": 21550 }, { "epoch": 1.0856538597109622, "grad_norm": 3.4500416566037337, "learning_rate": 8.05237040970869e-06, "loss": 0.3133, "step": 21560 }, { "epoch": 1.0861574097386575, "grad_norm": 5.8011464384697815, "learning_rate": 8.05004957482029e-06, "loss": 0.2829, "step": 21570 }, { "epoch": 1.0866609597663528, "grad_norm": 3.8719219574039303, "learning_rate": 8.047727692877575e-06, "loss": 0.3184, "step": 21580 }, { "epoch": 1.087164509794048, "grad_norm": 4.309878147479325, "learning_rate": 8.045404764677626e-06, "loss": 0.3093, "step": 21590 }, { "epoch": 1.0876680598217432, "grad_norm": 3.352266923556751, "learning_rate": 8.043080791017884e-06, "loss": 0.3208, "step": 21600 }, { "epoch": 1.0881716098494385, "grad_norm": 4.048241585913873, "learning_rate": 8.040755772696144e-06, "loss": 0.3092, "step": 21610 }, { "epoch": 1.0886751598771338, "grad_norm": 3.6560278057323523, "learning_rate": 8.038429710510567e-06, "loss": 0.3218, "step": 21620 }, { "epoch": 1.0891787099048291, "grad_norm": 3.411523345467768, "learning_rate": 8.036102605259667e-06, "loss": 0.3021, "step": 21630 }, { "epoch": 1.0896822599325242, "grad_norm": 4.385604613797053, "learning_rate": 8.033774457742322e-06, "loss": 0.3333, "step": 21640 }, { "epoch": 1.0901858099602195, "grad_norm": 5.935438229174376, "learning_rate": 8.031445268757761e-06, "loss": 0.3699, "step": 21650 }, { "epoch": 1.0906893599879148, "grad_norm": 3.870705829455948, "learning_rate": 8.029115039105573e-06, "loss": 0.2816, "step": 21660 }, { "epoch": 1.0911929100156101, "grad_norm": 4.059269455565144, "learning_rate": 8.026783769585705e-06, "loss": 0.3433, "step": 21670 }, { "epoch": 1.0916964600433052, "grad_norm": 2.0692217356012246, "learning_rate": 8.024451460998461e-06, "loss": 0.2645, "step": 21680 }, { "epoch": 1.0922000100710005, "grad_norm": 4.031552421079525, "learning_rate": 8.022118114144501e-06, "loss": 0.3191, "step": 21690 }, { "epoch": 1.0927035600986958, "grad_norm": 3.219074367084062, "learning_rate": 8.019783729824845e-06, "loss": 0.3421, "step": 21700 }, { "epoch": 1.0932071101263912, "grad_norm": 4.444328382719159, "learning_rate": 8.01744830884086e-06, "loss": 0.3208, "step": 21710 }, { "epoch": 1.0937106601540862, "grad_norm": 4.769457399830183, "learning_rate": 8.015111851994279e-06, "loss": 0.3511, "step": 21720 }, { "epoch": 1.0942142101817816, "grad_norm": 3.659051900242683, "learning_rate": 8.012774360087185e-06, "loss": 0.3745, "step": 21730 }, { "epoch": 1.0947177602094769, "grad_norm": 4.282013454897993, "learning_rate": 8.010435833922022e-06, "loss": 0.3397, "step": 21740 }, { "epoch": 1.095221310237172, "grad_norm": 3.7708816891581765, "learning_rate": 8.008096274301575e-06, "loss": 0.3387, "step": 21750 }, { "epoch": 1.0957248602648673, "grad_norm": 3.365175642360294, "learning_rate": 8.005755682029002e-06, "loss": 0.3607, "step": 21760 }, { "epoch": 1.0962284102925626, "grad_norm": 3.901631507344251, "learning_rate": 8.003414057907806e-06, "loss": 0.3378, "step": 21770 }, { "epoch": 1.0967319603202579, "grad_norm": 3.875063743205961, "learning_rate": 8.001071402741843e-06, "loss": 0.2619, "step": 21780 }, { "epoch": 1.097235510347953, "grad_norm": 3.5128062413064773, "learning_rate": 7.998727717335324e-06, "loss": 0.3419, "step": 21790 }, { "epoch": 1.0977390603756483, "grad_norm": 4.382522098120127, "learning_rate": 7.996383002492818e-06, "loss": 0.2894, "step": 21800 }, { "epoch": 1.0982426104033436, "grad_norm": 3.3505810795067132, "learning_rate": 7.994037259019244e-06, "loss": 0.2901, "step": 21810 }, { "epoch": 1.098746160431039, "grad_norm": 2.896523262080157, "learning_rate": 7.991690487719875e-06, "loss": 0.3055, "step": 21820 }, { "epoch": 1.099249710458734, "grad_norm": 3.7817766562436264, "learning_rate": 7.989342689400332e-06, "loss": 0.3071, "step": 21830 }, { "epoch": 1.0997532604864293, "grad_norm": 3.27189467998929, "learning_rate": 7.986993864866598e-06, "loss": 0.2857, "step": 21840 }, { "epoch": 1.1002568105141246, "grad_norm": 5.019508152165413, "learning_rate": 7.984644014925e-06, "loss": 0.3247, "step": 21850 }, { "epoch": 1.10076036054182, "grad_norm": 4.6080867041799305, "learning_rate": 7.982293140382222e-06, "loss": 0.3422, "step": 21860 }, { "epoch": 1.101263910569515, "grad_norm": 3.5692973838324638, "learning_rate": 7.979941242045296e-06, "loss": 0.3762, "step": 21870 }, { "epoch": 1.1017674605972103, "grad_norm": 3.0675264730702296, "learning_rate": 7.977588320721607e-06, "loss": 0.3147, "step": 21880 }, { "epoch": 1.1022710106249056, "grad_norm": 5.251664547837923, "learning_rate": 7.975234377218895e-06, "loss": 0.4197, "step": 21890 }, { "epoch": 1.102774560652601, "grad_norm": 3.3009533121490358, "learning_rate": 7.972879412345244e-06, "loss": 0.3421, "step": 21900 }, { "epoch": 1.103278110680296, "grad_norm": 3.490882794209531, "learning_rate": 7.970523426909095e-06, "loss": 0.2833, "step": 21910 }, { "epoch": 1.1037816607079913, "grad_norm": 3.305408318429654, "learning_rate": 7.968166421719234e-06, "loss": 0.3677, "step": 21920 }, { "epoch": 1.1042852107356866, "grad_norm": 4.232134869691784, "learning_rate": 7.9658083975848e-06, "loss": 0.3676, "step": 21930 }, { "epoch": 1.1047887607633817, "grad_norm": 4.776862306124834, "learning_rate": 7.963449355315281e-06, "loss": 0.3004, "step": 21940 }, { "epoch": 1.105292310791077, "grad_norm": 3.35200894389394, "learning_rate": 7.961089295720516e-06, "loss": 0.2952, "step": 21950 }, { "epoch": 1.1057958608187723, "grad_norm": 3.4108309162463994, "learning_rate": 7.95872821961069e-06, "loss": 0.3187, "step": 21960 }, { "epoch": 1.1062994108464677, "grad_norm": 2.9635599349267308, "learning_rate": 7.95636612779634e-06, "loss": 0.3271, "step": 21970 }, { "epoch": 1.1068029608741627, "grad_norm": 4.231466317926958, "learning_rate": 7.954003021088354e-06, "loss": 0.2748, "step": 21980 }, { "epoch": 1.107306510901858, "grad_norm": 3.274631269544388, "learning_rate": 7.95163890029796e-06, "loss": 0.2759, "step": 21990 }, { "epoch": 1.1078100609295534, "grad_norm": 4.66673879979555, "learning_rate": 7.94927376623674e-06, "loss": 0.3097, "step": 22000 }, { "epoch": 1.1083136109572487, "grad_norm": 4.01134929274814, "learning_rate": 7.946907619716625e-06, "loss": 0.3241, "step": 22010 }, { "epoch": 1.1088171609849438, "grad_norm": 4.11504447662792, "learning_rate": 7.94454046154989e-06, "loss": 0.3788, "step": 22020 }, { "epoch": 1.109320711012639, "grad_norm": 4.126297562942095, "learning_rate": 7.942172292549163e-06, "loss": 0.3338, "step": 22030 }, { "epoch": 1.1098242610403344, "grad_norm": 4.161752658949382, "learning_rate": 7.939803113527407e-06, "loss": 0.3185, "step": 22040 }, { "epoch": 1.1103278110680297, "grad_norm": 4.210620710140687, "learning_rate": 7.937432925297948e-06, "loss": 0.3009, "step": 22050 }, { "epoch": 1.1108313610957248, "grad_norm": 3.735877499799602, "learning_rate": 7.935061728674445e-06, "loss": 0.3174, "step": 22060 }, { "epoch": 1.11133491112342, "grad_norm": 4.277593140333687, "learning_rate": 7.93268952447091e-06, "loss": 0.3025, "step": 22070 }, { "epoch": 1.1118384611511154, "grad_norm": 4.110274807446537, "learning_rate": 7.930316313501698e-06, "loss": 0.3066, "step": 22080 }, { "epoch": 1.1123420111788107, "grad_norm": 4.120237010502346, "learning_rate": 7.927942096581511e-06, "loss": 0.3213, "step": 22090 }, { "epoch": 1.1128455612065058, "grad_norm": 3.0948757969328202, "learning_rate": 7.925566874525397e-06, "loss": 0.3175, "step": 22100 }, { "epoch": 1.113349111234201, "grad_norm": 4.357099598768344, "learning_rate": 7.923190648148749e-06, "loss": 0.3427, "step": 22110 }, { "epoch": 1.1138526612618964, "grad_norm": 3.723784239703334, "learning_rate": 7.920813418267301e-06, "loss": 0.3612, "step": 22120 }, { "epoch": 1.1143562112895917, "grad_norm": 3.407407892899588, "learning_rate": 7.918435185697138e-06, "loss": 0.2846, "step": 22130 }, { "epoch": 1.1148597613172868, "grad_norm": 3.6475074322250673, "learning_rate": 7.916055951254682e-06, "loss": 0.3456, "step": 22140 }, { "epoch": 1.1153633113449821, "grad_norm": 4.090008473816648, "learning_rate": 7.913675715756706e-06, "loss": 0.3465, "step": 22150 }, { "epoch": 1.1158668613726774, "grad_norm": 3.6462296946079022, "learning_rate": 7.91129448002032e-06, "loss": 0.333, "step": 22160 }, { "epoch": 1.1163704114003727, "grad_norm": 4.287998539661458, "learning_rate": 7.908912244862984e-06, "loss": 0.2983, "step": 22170 }, { "epoch": 1.1168739614280678, "grad_norm": 3.58901416396213, "learning_rate": 7.906529011102495e-06, "loss": 0.3321, "step": 22180 }, { "epoch": 1.1173775114557631, "grad_norm": 3.672807859724051, "learning_rate": 7.904144779556996e-06, "loss": 0.3046, "step": 22190 }, { "epoch": 1.1178810614834584, "grad_norm": 2.8836314820356406, "learning_rate": 7.901759551044974e-06, "loss": 0.3235, "step": 22200 }, { "epoch": 1.1183846115111535, "grad_norm": 3.3036994866948013, "learning_rate": 7.899373326385253e-06, "loss": 0.3165, "step": 22210 }, { "epoch": 1.1188881615388488, "grad_norm": 3.500400400550225, "learning_rate": 7.896986106397004e-06, "loss": 0.342, "step": 22220 }, { "epoch": 1.1193917115665442, "grad_norm": 3.6930776549587643, "learning_rate": 7.894597891899738e-06, "loss": 0.3461, "step": 22230 }, { "epoch": 1.1198952615942395, "grad_norm": 4.495848189473675, "learning_rate": 7.892208683713307e-06, "loss": 0.2863, "step": 22240 }, { "epoch": 1.1203988116219346, "grad_norm": 3.5472862019624136, "learning_rate": 7.889818482657908e-06, "loss": 0.2728, "step": 22250 }, { "epoch": 1.1209023616496299, "grad_norm": 4.17203901341087, "learning_rate": 7.887427289554068e-06, "loss": 0.3199, "step": 22260 }, { "epoch": 1.1214059116773252, "grad_norm": 4.072944420332133, "learning_rate": 7.885035105222667e-06, "loss": 0.3163, "step": 22270 }, { "epoch": 1.1219094617050205, "grad_norm": 3.2917202727581363, "learning_rate": 7.882641930484919e-06, "loss": 0.301, "step": 22280 }, { "epoch": 1.1224130117327156, "grad_norm": 3.7221869227663533, "learning_rate": 7.88024776616238e-06, "loss": 0.2805, "step": 22290 }, { "epoch": 1.1229165617604109, "grad_norm": 4.084375071065176, "learning_rate": 7.877852613076943e-06, "loss": 0.3029, "step": 22300 }, { "epoch": 1.1234201117881062, "grad_norm": 3.9959064291842696, "learning_rate": 7.875456472050844e-06, "loss": 0.3201, "step": 22310 }, { "epoch": 1.1239236618158015, "grad_norm": 4.9446690056114395, "learning_rate": 7.873059343906656e-06, "loss": 0.3476, "step": 22320 }, { "epoch": 1.1244272118434966, "grad_norm": 4.848585492031731, "learning_rate": 7.870661229467291e-06, "loss": 0.3396, "step": 22330 }, { "epoch": 1.124930761871192, "grad_norm": 3.3658724957261454, "learning_rate": 7.868262129556e-06, "loss": 0.3076, "step": 22340 }, { "epoch": 1.1254343118988872, "grad_norm": 4.608028755598247, "learning_rate": 7.86586204499637e-06, "loss": 0.3502, "step": 22350 }, { "epoch": 1.1259378619265825, "grad_norm": 3.8440431144475085, "learning_rate": 7.863460976612335e-06, "loss": 0.3278, "step": 22360 }, { "epoch": 1.1264414119542776, "grad_norm": 3.95143514068305, "learning_rate": 7.86105892522815e-06, "loss": 0.3293, "step": 22370 }, { "epoch": 1.126944961981973, "grad_norm": 3.7417750272178614, "learning_rate": 7.858655891668428e-06, "loss": 0.2692, "step": 22380 }, { "epoch": 1.1274485120096682, "grad_norm": 4.487362280495763, "learning_rate": 7.8562518767581e-06, "loss": 0.3603, "step": 22390 }, { "epoch": 1.1279520620373633, "grad_norm": 3.0565051545286996, "learning_rate": 7.853846881322449e-06, "loss": 0.3121, "step": 22400 }, { "epoch": 1.1284556120650586, "grad_norm": 3.324088963399328, "learning_rate": 7.851440906187082e-06, "loss": 0.295, "step": 22410 }, { "epoch": 1.128959162092754, "grad_norm": 1.9817784643323366, "learning_rate": 7.849033952177953e-06, "loss": 0.3207, "step": 22420 }, { "epoch": 1.1294627121204492, "grad_norm": 4.126275335234165, "learning_rate": 7.846626020121345e-06, "loss": 0.323, "step": 22430 }, { "epoch": 1.1299662621481443, "grad_norm": 3.840371635414329, "learning_rate": 7.844217110843882e-06, "loss": 0.3077, "step": 22440 }, { "epoch": 1.1304698121758396, "grad_norm": 3.096183422387647, "learning_rate": 7.841807225172517e-06, "loss": 0.3328, "step": 22450 }, { "epoch": 1.130973362203535, "grad_norm": 6.084217176592971, "learning_rate": 7.839396363934545e-06, "loss": 0.3507, "step": 22460 }, { "epoch": 1.1314769122312303, "grad_norm": 4.216797041367087, "learning_rate": 7.83698452795759e-06, "loss": 0.3097, "step": 22470 }, { "epoch": 1.1319804622589253, "grad_norm": 4.367888936722606, "learning_rate": 7.834571718069616e-06, "loss": 0.3045, "step": 22480 }, { "epoch": 1.1324840122866207, "grad_norm": 2.888741404199989, "learning_rate": 7.832157935098917e-06, "loss": 0.2712, "step": 22490 }, { "epoch": 1.132987562314316, "grad_norm": 3.749629885474544, "learning_rate": 7.829743179874123e-06, "loss": 0.3195, "step": 22500 }, { "epoch": 1.1334911123420113, "grad_norm": 4.743109367912742, "learning_rate": 7.827327453224197e-06, "loss": 0.3178, "step": 22510 }, { "epoch": 1.1339946623697064, "grad_norm": 4.176830330473834, "learning_rate": 7.824910755978438e-06, "loss": 0.3144, "step": 22520 }, { "epoch": 1.1344982123974017, "grad_norm": 4.022528753646341, "learning_rate": 7.822493088966475e-06, "loss": 0.3307, "step": 22530 }, { "epoch": 1.135001762425097, "grad_norm": 4.1479905677412106, "learning_rate": 7.82007445301827e-06, "loss": 0.3212, "step": 22540 }, { "epoch": 1.1355053124527923, "grad_norm": 4.116129176531848, "learning_rate": 7.817654848964122e-06, "loss": 0.3371, "step": 22550 }, { "epoch": 1.1360088624804874, "grad_norm": 5.033789902489276, "learning_rate": 7.815234277634656e-06, "loss": 0.3189, "step": 22560 }, { "epoch": 1.1365124125081827, "grad_norm": 4.0627652151137905, "learning_rate": 7.812812739860833e-06, "loss": 0.3234, "step": 22570 }, { "epoch": 1.137015962535878, "grad_norm": 3.3941128046529405, "learning_rate": 7.810390236473945e-06, "loss": 0.2893, "step": 22580 }, { "epoch": 1.137519512563573, "grad_norm": 4.516835101527178, "learning_rate": 7.807966768305615e-06, "loss": 0.3678, "step": 22590 }, { "epoch": 1.1380230625912684, "grad_norm": 3.9280096727241594, "learning_rate": 7.805542336187799e-06, "loss": 0.339, "step": 22600 }, { "epoch": 1.1385266126189637, "grad_norm": 3.953093750365866, "learning_rate": 7.803116940952782e-06, "loss": 0.351, "step": 22610 }, { "epoch": 1.139030162646659, "grad_norm": 3.500546688002438, "learning_rate": 7.800690583433178e-06, "loss": 0.3202, "step": 22620 }, { "epoch": 1.139533712674354, "grad_norm": 4.257847683841809, "learning_rate": 7.798263264461938e-06, "loss": 0.309, "step": 22630 }, { "epoch": 1.1400372627020494, "grad_norm": 3.658271256482983, "learning_rate": 7.795834984872332e-06, "loss": 0.3327, "step": 22640 }, { "epoch": 1.1405408127297447, "grad_norm": 4.260668109811015, "learning_rate": 7.793405745497972e-06, "loss": 0.3163, "step": 22650 }, { "epoch": 1.14104436275744, "grad_norm": 3.578048678443574, "learning_rate": 7.790975547172793e-06, "loss": 0.2765, "step": 22660 }, { "epoch": 1.1415479127851351, "grad_norm": 5.088549175727397, "learning_rate": 7.78854439073106e-06, "loss": 0.3403, "step": 22670 }, { "epoch": 1.1420514628128304, "grad_norm": 4.972545045281482, "learning_rate": 7.786112277007363e-06, "loss": 0.3437, "step": 22680 }, { "epoch": 1.1425550128405257, "grad_norm": 4.352120211603007, "learning_rate": 7.783679206836629e-06, "loss": 0.3161, "step": 22690 }, { "epoch": 1.143058562868221, "grad_norm": 4.932283996987125, "learning_rate": 7.781245181054106e-06, "loss": 0.351, "step": 22700 }, { "epoch": 1.1435621128959161, "grad_norm": 3.1546460503965696, "learning_rate": 7.778810200495377e-06, "loss": 0.3458, "step": 22710 }, { "epoch": 1.1440656629236114, "grad_norm": 3.862190952711087, "learning_rate": 7.776374265996344e-06, "loss": 0.2851, "step": 22720 }, { "epoch": 1.1445692129513068, "grad_norm": 2.99731774377971, "learning_rate": 7.773937378393244e-06, "loss": 0.3451, "step": 22730 }, { "epoch": 1.145072762979002, "grad_norm": 6.049614583818582, "learning_rate": 7.771499538522637e-06, "loss": 0.3228, "step": 22740 }, { "epoch": 1.1455763130066972, "grad_norm": 3.384947976813665, "learning_rate": 7.769060747221411e-06, "loss": 0.2762, "step": 22750 }, { "epoch": 1.1460798630343925, "grad_norm": 3.7300458694126672, "learning_rate": 7.766621005326784e-06, "loss": 0.2814, "step": 22760 }, { "epoch": 1.1465834130620878, "grad_norm": 4.223819645898068, "learning_rate": 7.764180313676293e-06, "loss": 0.3212, "step": 22770 }, { "epoch": 1.1470869630897829, "grad_norm": 3.516436429003477, "learning_rate": 7.761738673107808e-06, "loss": 0.3169, "step": 22780 }, { "epoch": 1.1475905131174782, "grad_norm": 3.8127138981754887, "learning_rate": 7.759296084459521e-06, "loss": 0.2944, "step": 22790 }, { "epoch": 1.1480940631451735, "grad_norm": 3.9977557408230857, "learning_rate": 7.75685254856995e-06, "loss": 0.3248, "step": 22800 }, { "epoch": 1.1485976131728688, "grad_norm": 4.752092845203481, "learning_rate": 7.754408066277943e-06, "loss": 0.3194, "step": 22810 }, { "epoch": 1.1491011632005639, "grad_norm": 4.303011212491577, "learning_rate": 7.751962638422662e-06, "loss": 0.3245, "step": 22820 }, { "epoch": 1.1496047132282592, "grad_norm": 3.265958267581098, "learning_rate": 7.749516265843604e-06, "loss": 0.2664, "step": 22830 }, { "epoch": 1.1501082632559545, "grad_norm": 3.969455934492367, "learning_rate": 7.747068949380585e-06, "loss": 0.3324, "step": 22840 }, { "epoch": 1.1506118132836498, "grad_norm": 4.426202092134435, "learning_rate": 7.744620689873748e-06, "loss": 0.3606, "step": 22850 }, { "epoch": 1.151115363311345, "grad_norm": 3.0844082734688123, "learning_rate": 7.742171488163557e-06, "loss": 0.2963, "step": 22860 }, { "epoch": 1.1516189133390402, "grad_norm": 4.301168644176434, "learning_rate": 7.739721345090802e-06, "loss": 0.2985, "step": 22870 }, { "epoch": 1.1521224633667355, "grad_norm": 3.6792699797413384, "learning_rate": 7.737270261496592e-06, "loss": 0.2953, "step": 22880 }, { "epoch": 1.1526260133944308, "grad_norm": 5.154172327961074, "learning_rate": 7.734818238222365e-06, "loss": 0.3236, "step": 22890 }, { "epoch": 1.153129563422126, "grad_norm": 3.958778563338153, "learning_rate": 7.732365276109878e-06, "loss": 0.3459, "step": 22900 }, { "epoch": 1.1536331134498212, "grad_norm": 4.553924486488727, "learning_rate": 7.729911376001209e-06, "loss": 0.3157, "step": 22910 }, { "epoch": 1.1541366634775165, "grad_norm": 4.101548954776382, "learning_rate": 7.72745653873876e-06, "loss": 0.3452, "step": 22920 }, { "epoch": 1.1546402135052118, "grad_norm": 3.8208071196297495, "learning_rate": 7.725000765165254e-06, "loss": 0.338, "step": 22930 }, { "epoch": 1.155143763532907, "grad_norm": 3.471532855924308, "learning_rate": 7.722544056123739e-06, "loss": 0.3372, "step": 22940 }, { "epoch": 1.1556473135606022, "grad_norm": 2.3931379825647383, "learning_rate": 7.720086412457575e-06, "loss": 0.3081, "step": 22950 }, { "epoch": 1.1561508635882976, "grad_norm": 3.869330413546944, "learning_rate": 7.717627835010454e-06, "loss": 0.3121, "step": 22960 }, { "epoch": 1.1566544136159926, "grad_norm": 3.7848001801486144, "learning_rate": 7.715168324626381e-06, "loss": 0.3009, "step": 22970 }, { "epoch": 1.157157963643688, "grad_norm": 4.408217548286508, "learning_rate": 7.712707882149684e-06, "loss": 0.3158, "step": 22980 }, { "epoch": 1.1576615136713833, "grad_norm": 3.81391061639847, "learning_rate": 7.71024650842501e-06, "loss": 0.3197, "step": 22990 }, { "epoch": 1.1581650636990786, "grad_norm": 3.5047967386280874, "learning_rate": 7.707784204297326e-06, "loss": 0.3059, "step": 23000 }, { "epoch": 1.1586686137267739, "grad_norm": 3.5971558387275637, "learning_rate": 7.705320970611922e-06, "loss": 0.3211, "step": 23010 }, { "epoch": 1.159172163754469, "grad_norm": 3.235814188036925, "learning_rate": 7.7028568082144e-06, "loss": 0.3054, "step": 23020 }, { "epoch": 1.1596757137821643, "grad_norm": 3.2744614386672986, "learning_rate": 7.700391717950683e-06, "loss": 0.298, "step": 23030 }, { "epoch": 1.1601792638098596, "grad_norm": 3.9292797345201116, "learning_rate": 7.69792570066702e-06, "loss": 0.3277, "step": 23040 }, { "epoch": 1.1606828138375547, "grad_norm": 4.107621693599381, "learning_rate": 7.695458757209968e-06, "loss": 0.3644, "step": 23050 }, { "epoch": 1.16118636386525, "grad_norm": 3.17242844137373, "learning_rate": 7.692990888426409e-06, "loss": 0.2961, "step": 23060 }, { "epoch": 1.1616899138929453, "grad_norm": 3.1407177857944344, "learning_rate": 7.690522095163535e-06, "loss": 0.291, "step": 23070 }, { "epoch": 1.1621934639206406, "grad_norm": 3.113973605495565, "learning_rate": 7.688052378268867e-06, "loss": 0.3014, "step": 23080 }, { "epoch": 1.1626970139483357, "grad_norm": 4.245959071713204, "learning_rate": 7.68558173859023e-06, "loss": 0.3393, "step": 23090 }, { "epoch": 1.163200563976031, "grad_norm": 4.204961874601193, "learning_rate": 7.68311017697578e-06, "loss": 0.3007, "step": 23100 }, { "epoch": 1.1637041140037263, "grad_norm": 3.7538391579258654, "learning_rate": 7.680637694273972e-06, "loss": 0.3073, "step": 23110 }, { "epoch": 1.1642076640314216, "grad_norm": 3.931834842320645, "learning_rate": 7.678164291333594e-06, "loss": 0.3394, "step": 23120 }, { "epoch": 1.1647112140591167, "grad_norm": 2.248464234640227, "learning_rate": 7.675689969003738e-06, "loss": 0.2996, "step": 23130 }, { "epoch": 1.165214764086812, "grad_norm": 4.9577478573017935, "learning_rate": 7.673214728133822e-06, "loss": 0.3023, "step": 23140 }, { "epoch": 1.1657183141145073, "grad_norm": 4.240375843799218, "learning_rate": 7.67073856957357e-06, "loss": 0.3171, "step": 23150 }, { "epoch": 1.1662218641422024, "grad_norm": 4.359431380984339, "learning_rate": 7.668261494173024e-06, "loss": 0.3543, "step": 23160 }, { "epoch": 1.1667254141698977, "grad_norm": 3.88818831801969, "learning_rate": 7.665783502782542e-06, "loss": 0.323, "step": 23170 }, { "epoch": 1.167228964197593, "grad_norm": 3.928292528045368, "learning_rate": 7.663304596252798e-06, "loss": 0.3359, "step": 23180 }, { "epoch": 1.1677325142252883, "grad_norm": 2.887256784404496, "learning_rate": 7.660824775434777e-06, "loss": 0.3321, "step": 23190 }, { "epoch": 1.1682360642529837, "grad_norm": 4.316879057886267, "learning_rate": 7.65834404117978e-06, "loss": 0.3176, "step": 23200 }, { "epoch": 1.1687396142806787, "grad_norm": 3.518678124137049, "learning_rate": 7.655862394339418e-06, "loss": 0.293, "step": 23210 }, { "epoch": 1.169243164308374, "grad_norm": 4.202750411285686, "learning_rate": 7.653379835765618e-06, "loss": 0.333, "step": 23220 }, { "epoch": 1.1697467143360694, "grad_norm": 3.316922814883244, "learning_rate": 7.650896366310624e-06, "loss": 0.3209, "step": 23230 }, { "epoch": 1.1702502643637644, "grad_norm": 4.406945111818345, "learning_rate": 7.64841198682698e-06, "loss": 0.3548, "step": 23240 }, { "epoch": 1.1707538143914598, "grad_norm": 3.6163154367924717, "learning_rate": 7.645926698167561e-06, "loss": 0.3529, "step": 23250 }, { "epoch": 1.171257364419155, "grad_norm": 3.293623728386688, "learning_rate": 7.643440501185537e-06, "loss": 0.265, "step": 23260 }, { "epoch": 1.1717609144468504, "grad_norm": 4.226360749535605, "learning_rate": 7.640953396734401e-06, "loss": 0.3434, "step": 23270 }, { "epoch": 1.1722644644745455, "grad_norm": 3.124957383006835, "learning_rate": 7.63846538566795e-06, "loss": 0.2896, "step": 23280 }, { "epoch": 1.1727680145022408, "grad_norm": 2.85417669362102, "learning_rate": 7.635976468840299e-06, "loss": 0.3149, "step": 23290 }, { "epoch": 1.173271564529936, "grad_norm": 2.9915531905863193, "learning_rate": 7.633486647105867e-06, "loss": 0.3069, "step": 23300 }, { "epoch": 1.1737751145576314, "grad_norm": 4.507831530940753, "learning_rate": 7.63099592131939e-06, "loss": 0.3775, "step": 23310 }, { "epoch": 1.1742786645853265, "grad_norm": 4.697198732808974, "learning_rate": 7.628504292335911e-06, "loss": 0.3073, "step": 23320 }, { "epoch": 1.1747822146130218, "grad_norm": 4.310076093703273, "learning_rate": 7.626011761010782e-06, "loss": 0.3093, "step": 23330 }, { "epoch": 1.175285764640717, "grad_norm": 3.84181890817437, "learning_rate": 7.623518328199667e-06, "loss": 0.3217, "step": 23340 }, { "epoch": 1.1757893146684122, "grad_norm": 2.614335584920546, "learning_rate": 7.621023994758541e-06, "loss": 0.3207, "step": 23350 }, { "epoch": 1.1762928646961075, "grad_norm": 3.985166842017452, "learning_rate": 7.618528761543682e-06, "loss": 0.3285, "step": 23360 }, { "epoch": 1.1767964147238028, "grad_norm": 3.7892555119309304, "learning_rate": 7.616032629411685e-06, "loss": 0.3724, "step": 23370 }, { "epoch": 1.1772999647514981, "grad_norm": 3.5018787048366993, "learning_rate": 7.613535599219446e-06, "loss": 0.2862, "step": 23380 }, { "epoch": 1.1778035147791934, "grad_norm": 4.439004278940685, "learning_rate": 7.611037671824177e-06, "loss": 0.374, "step": 23390 }, { "epoch": 1.1783070648068885, "grad_norm": 4.682501264085453, "learning_rate": 7.608538848083389e-06, "loss": 0.2937, "step": 23400 }, { "epoch": 1.1788106148345838, "grad_norm": 3.465031498621085, "learning_rate": 7.606039128854907e-06, "loss": 0.3134, "step": 23410 }, { "epoch": 1.1793141648622791, "grad_norm": 3.4348797010246654, "learning_rate": 7.603538514996865e-06, "loss": 0.3162, "step": 23420 }, { "epoch": 1.1798177148899742, "grad_norm": 4.052091221903404, "learning_rate": 7.601037007367698e-06, "loss": 0.3025, "step": 23430 }, { "epoch": 1.1803212649176695, "grad_norm": 2.6308284480507975, "learning_rate": 7.598534606826151e-06, "loss": 0.2998, "step": 23440 }, { "epoch": 1.1808248149453648, "grad_norm": 3.263453587078538, "learning_rate": 7.596031314231276e-06, "loss": 0.3423, "step": 23450 }, { "epoch": 1.1813283649730602, "grad_norm": 3.520456872843584, "learning_rate": 7.593527130442431e-06, "loss": 0.3655, "step": 23460 }, { "epoch": 1.1818319150007552, "grad_norm": 3.3626948735570608, "learning_rate": 7.5910220563192774e-06, "loss": 0.2568, "step": 23470 }, { "epoch": 1.1823354650284505, "grad_norm": 3.8482083754699814, "learning_rate": 7.588516092721787e-06, "loss": 0.3099, "step": 23480 }, { "epoch": 1.1828390150561459, "grad_norm": 3.31012782463634, "learning_rate": 7.586009240510235e-06, "loss": 0.2926, "step": 23490 }, { "epoch": 1.1833425650838412, "grad_norm": 3.1900678336735897, "learning_rate": 7.583501500545196e-06, "loss": 0.2678, "step": 23500 }, { "epoch": 1.1838461151115363, "grad_norm": 4.307003798137489, "learning_rate": 7.58099287368756e-06, "loss": 0.3216, "step": 23510 }, { "epoch": 1.1843496651392316, "grad_norm": 3.8060634658512047, "learning_rate": 7.578483360798514e-06, "loss": 0.2848, "step": 23520 }, { "epoch": 1.1848532151669269, "grad_norm": 3.8140431900135505, "learning_rate": 7.5759729627395495e-06, "loss": 0.2952, "step": 23530 }, { "epoch": 1.185356765194622, "grad_norm": 3.7417756910563913, "learning_rate": 7.5734616803724644e-06, "loss": 0.2785, "step": 23540 }, { "epoch": 1.1858603152223173, "grad_norm": 5.508270133702207, "learning_rate": 7.570949514559358e-06, "loss": 0.2907, "step": 23550 }, { "epoch": 1.1863638652500126, "grad_norm": 3.7310148469782325, "learning_rate": 7.568436466162639e-06, "loss": 0.3069, "step": 23560 }, { "epoch": 1.186867415277708, "grad_norm": 4.293823678665652, "learning_rate": 7.565922536045005e-06, "loss": 0.2993, "step": 23570 }, { "epoch": 1.1873709653054032, "grad_norm": 2.816062493029938, "learning_rate": 7.563407725069474e-06, "loss": 0.2971, "step": 23580 }, { "epoch": 1.1878745153330983, "grad_norm": 3.502626717214896, "learning_rate": 7.560892034099352e-06, "loss": 0.2954, "step": 23590 }, { "epoch": 1.1883780653607936, "grad_norm": 3.6657610932534017, "learning_rate": 7.558375463998258e-06, "loss": 0.321, "step": 23600 }, { "epoch": 1.188881615388489, "grad_norm": 3.1408550703811273, "learning_rate": 7.555858015630102e-06, "loss": 0.3405, "step": 23610 }, { "epoch": 1.189385165416184, "grad_norm": 3.807944399483345, "learning_rate": 7.553339689859106e-06, "loss": 0.3137, "step": 23620 }, { "epoch": 1.1898887154438793, "grad_norm": 2.4101622687240387, "learning_rate": 7.550820487549786e-06, "loss": 0.2967, "step": 23630 }, { "epoch": 1.1903922654715746, "grad_norm": 4.500608304441119, "learning_rate": 7.548300409566962e-06, "loss": 0.35, "step": 23640 }, { "epoch": 1.19089581549927, "grad_norm": 3.1622386062303263, "learning_rate": 7.545779456775752e-06, "loss": 0.2693, "step": 23650 }, { "epoch": 1.191399365526965, "grad_norm": 4.825185472937062, "learning_rate": 7.543257630041579e-06, "loss": 0.2941, "step": 23660 }, { "epoch": 1.1919029155546603, "grad_norm": 3.866492561226142, "learning_rate": 7.540734930230162e-06, "loss": 0.2904, "step": 23670 }, { "epoch": 1.1924064655823556, "grad_norm": 4.237262603403277, "learning_rate": 7.53821135820752e-06, "loss": 0.3222, "step": 23680 }, { "epoch": 1.192910015610051, "grad_norm": 4.6067238167620275, "learning_rate": 7.535686914839972e-06, "loss": 0.3069, "step": 23690 }, { "epoch": 1.193413565637746, "grad_norm": 4.020727739454598, "learning_rate": 7.533161600994141e-06, "loss": 0.3275, "step": 23700 }, { "epoch": 1.1939171156654413, "grad_norm": 3.9004373524931633, "learning_rate": 7.530635417536938e-06, "loss": 0.296, "step": 23710 }, { "epoch": 1.1944206656931367, "grad_norm": 4.566879432314898, "learning_rate": 7.528108365335585e-06, "loss": 0.3594, "step": 23720 }, { "epoch": 1.194924215720832, "grad_norm": 3.8856231511279753, "learning_rate": 7.52558044525759e-06, "loss": 0.311, "step": 23730 }, { "epoch": 1.195427765748527, "grad_norm": 4.082029938889536, "learning_rate": 7.523051658170769e-06, "loss": 0.3342, "step": 23740 }, { "epoch": 1.1959313157762224, "grad_norm": 4.296544720760398, "learning_rate": 7.52052200494323e-06, "loss": 0.351, "step": 23750 }, { "epoch": 1.1964348658039177, "grad_norm": 2.928131440594265, "learning_rate": 7.517991486443381e-06, "loss": 0.2643, "step": 23760 }, { "epoch": 1.196938415831613, "grad_norm": 3.2174992986546167, "learning_rate": 7.515460103539925e-06, "loss": 0.2905, "step": 23770 }, { "epoch": 1.197441965859308, "grad_norm": 4.084750371839485, "learning_rate": 7.512927857101865e-06, "loss": 0.3295, "step": 23780 }, { "epoch": 1.1979455158870034, "grad_norm": 1.9343722326268171, "learning_rate": 7.510394747998495e-06, "loss": 0.2691, "step": 23790 }, { "epoch": 1.1984490659146987, "grad_norm": 4.444405837779115, "learning_rate": 7.50786077709941e-06, "loss": 0.347, "step": 23800 }, { "epoch": 1.1989526159423938, "grad_norm": 3.4920347482058567, "learning_rate": 7.505325945274498e-06, "loss": 0.3153, "step": 23810 }, { "epoch": 1.199456165970089, "grad_norm": 5.639163596830937, "learning_rate": 7.502790253393947e-06, "loss": 0.3385, "step": 23820 }, { "epoch": 1.1999597159977844, "grad_norm": 3.6571837067425874, "learning_rate": 7.500253702328233e-06, "loss": 0.3083, "step": 23830 }, { "epoch": 1.2004632660254797, "grad_norm": 4.137039083466322, "learning_rate": 7.4977162929481336e-06, "loss": 0.2721, "step": 23840 }, { "epoch": 1.2009668160531748, "grad_norm": 3.5607015630179837, "learning_rate": 7.495178026124718e-06, "loss": 0.3116, "step": 23850 }, { "epoch": 1.20147036608087, "grad_norm": 3.9056390713477622, "learning_rate": 7.492638902729348e-06, "loss": 0.3133, "step": 23860 }, { "epoch": 1.2019739161085654, "grad_norm": 3.1290893940291546, "learning_rate": 7.490098923633687e-06, "loss": 0.355, "step": 23870 }, { "epoch": 1.2024774661362607, "grad_norm": 3.150431033130909, "learning_rate": 7.487558089709678e-06, "loss": 0.3338, "step": 23880 }, { "epoch": 1.2029810161639558, "grad_norm": 3.0360366695234986, "learning_rate": 7.485016401829573e-06, "loss": 0.3302, "step": 23890 }, { "epoch": 1.2034845661916511, "grad_norm": 3.342691789887939, "learning_rate": 7.482473860865909e-06, "loss": 0.3133, "step": 23900 }, { "epoch": 1.2039881162193464, "grad_norm": 4.696296494336744, "learning_rate": 7.4799304676915175e-06, "loss": 0.3253, "step": 23910 }, { "epoch": 1.2044916662470417, "grad_norm": 3.551920661486532, "learning_rate": 7.4773862231795205e-06, "loss": 0.3175, "step": 23920 }, { "epoch": 1.2049952162747368, "grad_norm": 2.5800425230122896, "learning_rate": 7.4748411282033354e-06, "loss": 0.2797, "step": 23930 }, { "epoch": 1.2054987663024321, "grad_norm": 3.844182712001926, "learning_rate": 7.472295183636669e-06, "loss": 0.283, "step": 23940 }, { "epoch": 1.2060023163301274, "grad_norm": 4.25486524490029, "learning_rate": 7.469748390353524e-06, "loss": 0.2712, "step": 23950 }, { "epoch": 1.2065058663578228, "grad_norm": 4.774131241809232, "learning_rate": 7.467200749228188e-06, "loss": 0.3264, "step": 23960 }, { "epoch": 1.2070094163855178, "grad_norm": 3.2648891863115126, "learning_rate": 7.464652261135244e-06, "loss": 0.2309, "step": 23970 }, { "epoch": 1.2075129664132132, "grad_norm": 3.2573350572133286, "learning_rate": 7.4621029269495645e-06, "loss": 0.2738, "step": 23980 }, { "epoch": 1.2080165164409085, "grad_norm": 4.953338098861791, "learning_rate": 7.4595527475463146e-06, "loss": 0.3643, "step": 23990 }, { "epoch": 1.2085200664686035, "grad_norm": 4.464136598637844, "learning_rate": 7.457001723800945e-06, "loss": 0.275, "step": 24000 }, { "epoch": 1.2090236164962989, "grad_norm": 4.02451607852562, "learning_rate": 7.4544498565892035e-06, "loss": 0.3426, "step": 24010 }, { "epoch": 1.2095271665239942, "grad_norm": 4.545446199397374, "learning_rate": 7.4518971467871174e-06, "loss": 0.2857, "step": 24020 }, { "epoch": 1.2100307165516895, "grad_norm": 3.6313840570678186, "learning_rate": 7.449343595271011e-06, "loss": 0.3177, "step": 24030 }, { "epoch": 1.2105342665793846, "grad_norm": 3.583476271268746, "learning_rate": 7.446789202917497e-06, "loss": 0.2889, "step": 24040 }, { "epoch": 1.2110378166070799, "grad_norm": 3.338943188049151, "learning_rate": 7.444233970603474e-06, "loss": 0.2952, "step": 24050 }, { "epoch": 1.2115413666347752, "grad_norm": 3.4799085922798745, "learning_rate": 7.441677899206128e-06, "loss": 0.3153, "step": 24060 }, { "epoch": 1.2120449166624705, "grad_norm": 3.652678340952646, "learning_rate": 7.439120989602938e-06, "loss": 0.2981, "step": 24070 }, { "epoch": 1.2125484666901656, "grad_norm": 3.18392473977331, "learning_rate": 7.436563242671666e-06, "loss": 0.304, "step": 24080 }, { "epoch": 1.213052016717861, "grad_norm": 3.518139860603058, "learning_rate": 7.434004659290366e-06, "loss": 0.259, "step": 24090 }, { "epoch": 1.2135555667455562, "grad_norm": 2.1506554112773353, "learning_rate": 7.431445240337373e-06, "loss": 0.3257, "step": 24100 }, { "epoch": 1.2140591167732515, "grad_norm": 3.4536402981495393, "learning_rate": 7.428884986691314e-06, "loss": 0.3269, "step": 24110 }, { "epoch": 1.2145626668009466, "grad_norm": 3.8122366563968453, "learning_rate": 7.4263238992311e-06, "loss": 0.3416, "step": 24120 }, { "epoch": 1.215066216828642, "grad_norm": 3.530481490127056, "learning_rate": 7.423761978835929e-06, "loss": 0.3461, "step": 24130 }, { "epoch": 1.2155697668563372, "grad_norm": 3.510812044093655, "learning_rate": 7.421199226385287e-06, "loss": 0.2607, "step": 24140 }, { "epoch": 1.2160733168840325, "grad_norm": 3.620952719102536, "learning_rate": 7.418635642758942e-06, "loss": 0.331, "step": 24150 }, { "epoch": 1.2165768669117276, "grad_norm": 3.227641786147366, "learning_rate": 7.416071228836949e-06, "loss": 0.353, "step": 24160 }, { "epoch": 1.217080416939423, "grad_norm": 3.334418137702331, "learning_rate": 7.413505985499647e-06, "loss": 0.2633, "step": 24170 }, { "epoch": 1.2175839669671182, "grad_norm": 4.004509417193227, "learning_rate": 7.410939913627665e-06, "loss": 0.3453, "step": 24180 }, { "epoch": 1.2180875169948133, "grad_norm": 3.3798023822314205, "learning_rate": 7.408373014101906e-06, "loss": 0.3, "step": 24190 }, { "epoch": 1.2185910670225086, "grad_norm": 4.24778848371416, "learning_rate": 7.405805287803569e-06, "loss": 0.3635, "step": 24200 }, { "epoch": 1.219094617050204, "grad_norm": 3.121814742504914, "learning_rate": 7.4032367356141265e-06, "loss": 0.3561, "step": 24210 }, { "epoch": 1.2195981670778993, "grad_norm": 4.0817373419522065, "learning_rate": 7.400667358415343e-06, "loss": 0.2856, "step": 24220 }, { "epoch": 1.2201017171055946, "grad_norm": 4.120084922647006, "learning_rate": 7.398097157089259e-06, "loss": 0.2954, "step": 24230 }, { "epoch": 1.2206052671332897, "grad_norm": 3.775277171799177, "learning_rate": 7.395526132518203e-06, "loss": 0.2766, "step": 24240 }, { "epoch": 1.221108817160985, "grad_norm": 4.407467508576891, "learning_rate": 7.392954285584785e-06, "loss": 0.3232, "step": 24250 }, { "epoch": 1.2216123671886803, "grad_norm": 4.649861817485378, "learning_rate": 7.390381617171895e-06, "loss": 0.3196, "step": 24260 }, { "epoch": 1.2221159172163754, "grad_norm": 3.8494198264656454, "learning_rate": 7.387808128162708e-06, "loss": 0.3048, "step": 24270 }, { "epoch": 1.2226194672440707, "grad_norm": 3.0342099762062937, "learning_rate": 7.38523381944068e-06, "loss": 0.3348, "step": 24280 }, { "epoch": 1.223123017271766, "grad_norm": 4.117362045381339, "learning_rate": 7.3826586918895445e-06, "loss": 0.3574, "step": 24290 }, { "epoch": 1.2236265672994613, "grad_norm": 2.74712307524752, "learning_rate": 7.380082746393324e-06, "loss": 0.3029, "step": 24300 }, { "epoch": 1.2241301173271564, "grad_norm": 3.994761702989496, "learning_rate": 7.377505983836313e-06, "loss": 0.3271, "step": 24310 }, { "epoch": 1.2246336673548517, "grad_norm": 2.4707819787036094, "learning_rate": 7.374928405103095e-06, "loss": 0.2587, "step": 24320 }, { "epoch": 1.225137217382547, "grad_norm": 4.9146646880529286, "learning_rate": 7.372350011078524e-06, "loss": 0.2859, "step": 24330 }, { "epoch": 1.2256407674102423, "grad_norm": 4.900027080162678, "learning_rate": 7.369770802647743e-06, "loss": 0.3503, "step": 24340 }, { "epoch": 1.2261443174379374, "grad_norm": 3.796670777816629, "learning_rate": 7.367190780696171e-06, "loss": 0.2976, "step": 24350 }, { "epoch": 1.2266478674656327, "grad_norm": 3.104432728004046, "learning_rate": 7.3646099461095045e-06, "loss": 0.3122, "step": 24360 }, { "epoch": 1.227151417493328, "grad_norm": 3.4682920848921546, "learning_rate": 7.36202829977372e-06, "loss": 0.3471, "step": 24370 }, { "epoch": 1.227654967521023, "grad_norm": 4.749070667180376, "learning_rate": 7.3594458425750755e-06, "loss": 0.3112, "step": 24380 }, { "epoch": 1.2281585175487184, "grad_norm": 1.9254755707651026, "learning_rate": 7.356862575400105e-06, "loss": 0.293, "step": 24390 }, { "epoch": 1.2286620675764137, "grad_norm": 3.5862115538754407, "learning_rate": 7.3542784991356185e-06, "loss": 0.3756, "step": 24400 }, { "epoch": 1.229165617604109, "grad_norm": 4.306783055846633, "learning_rate": 7.351693614668708e-06, "loss": 0.3445, "step": 24410 }, { "epoch": 1.2296691676318043, "grad_norm": 4.2182257690728315, "learning_rate": 7.349107922886739e-06, "loss": 0.2621, "step": 24420 }, { "epoch": 1.2301727176594994, "grad_norm": 4.460302424849426, "learning_rate": 7.3465214246773575e-06, "loss": 0.3341, "step": 24430 }, { "epoch": 1.2306762676871947, "grad_norm": 3.601068010893152, "learning_rate": 7.343934120928483e-06, "loss": 0.2767, "step": 24440 }, { "epoch": 1.23117981771489, "grad_norm": 4.567982189842419, "learning_rate": 7.3413460125283164e-06, "loss": 0.3443, "step": 24450 }, { "epoch": 1.2316833677425851, "grad_norm": 4.979972054205183, "learning_rate": 7.338757100365329e-06, "loss": 0.2877, "step": 24460 }, { "epoch": 1.2321869177702804, "grad_norm": 3.855131717923582, "learning_rate": 7.336167385328273e-06, "loss": 0.3265, "step": 24470 }, { "epoch": 1.2326904677979758, "grad_norm": 5.59772680288827, "learning_rate": 7.333576868306172e-06, "loss": 0.3065, "step": 24480 }, { "epoch": 1.233194017825671, "grad_norm": 3.6867561699677958, "learning_rate": 7.330985550188328e-06, "loss": 0.3494, "step": 24490 }, { "epoch": 1.2336975678533662, "grad_norm": 4.334719862364686, "learning_rate": 7.328393431864315e-06, "loss": 0.3306, "step": 24500 }, { "epoch": 1.2342011178810615, "grad_norm": 3.5796813155883522, "learning_rate": 7.325800514223989e-06, "loss": 0.3245, "step": 24510 }, { "epoch": 1.2347046679087568, "grad_norm": 3.451118790217006, "learning_rate": 7.323206798157469e-06, "loss": 0.3312, "step": 24520 }, { "epoch": 1.235208217936452, "grad_norm": 3.65820677283264, "learning_rate": 7.32061228455516e-06, "loss": 0.3211, "step": 24530 }, { "epoch": 1.2357117679641472, "grad_norm": 4.1680591829012865, "learning_rate": 7.318016974307728e-06, "loss": 0.305, "step": 24540 }, { "epoch": 1.2362153179918425, "grad_norm": 4.057975473173234, "learning_rate": 7.3154208683061255e-06, "loss": 0.3075, "step": 24550 }, { "epoch": 1.2367188680195378, "grad_norm": 4.015734643644191, "learning_rate": 7.312823967441569e-06, "loss": 0.3243, "step": 24560 }, { "epoch": 1.2372224180472329, "grad_norm": 4.890440654049472, "learning_rate": 7.310226272605552e-06, "loss": 0.3609, "step": 24570 }, { "epoch": 1.2377259680749282, "grad_norm": 3.360327046061934, "learning_rate": 7.307627784689839e-06, "loss": 0.3106, "step": 24580 }, { "epoch": 1.2382295181026235, "grad_norm": 2.897138254874222, "learning_rate": 7.305028504586469e-06, "loss": 0.3148, "step": 24590 }, { "epoch": 1.2387330681303188, "grad_norm": 3.9543682544221763, "learning_rate": 7.302428433187747e-06, "loss": 0.3045, "step": 24600 }, { "epoch": 1.2392366181580141, "grad_norm": 3.27445534003525, "learning_rate": 7.29982757138626e-06, "loss": 0.2793, "step": 24610 }, { "epoch": 1.2397401681857092, "grad_norm": 3.9456271270202263, "learning_rate": 7.297225920074854e-06, "loss": 0.3548, "step": 24620 }, { "epoch": 1.2402437182134045, "grad_norm": 3.748822128844157, "learning_rate": 7.294623480146658e-06, "loss": 0.2945, "step": 24630 }, { "epoch": 1.2407472682410998, "grad_norm": 2.4974756720245885, "learning_rate": 7.292020252495061e-06, "loss": 0.2803, "step": 24640 }, { "epoch": 1.241250818268795, "grad_norm": 2.77421787518416, "learning_rate": 7.289416238013731e-06, "loss": 0.3132, "step": 24650 }, { "epoch": 1.2417543682964902, "grad_norm": 4.118900251742071, "learning_rate": 7.286811437596602e-06, "loss": 0.3089, "step": 24660 }, { "epoch": 1.2422579183241855, "grad_norm": 7.431076156321512, "learning_rate": 7.284205852137876e-06, "loss": 0.3511, "step": 24670 }, { "epoch": 1.2427614683518808, "grad_norm": 3.716549279884042, "learning_rate": 7.281599482532028e-06, "loss": 0.336, "step": 24680 }, { "epoch": 1.243265018379576, "grad_norm": 4.157054121520911, "learning_rate": 7.278992329673802e-06, "loss": 0.21, "step": 24690 }, { "epoch": 1.2437685684072712, "grad_norm": 4.198456107474946, "learning_rate": 7.2763843944582095e-06, "loss": 0.3542, "step": 24700 }, { "epoch": 1.2442721184349665, "grad_norm": 2.433891124492269, "learning_rate": 7.27377567778053e-06, "loss": 0.2509, "step": 24710 }, { "epoch": 1.2447756684626619, "grad_norm": 4.65006104827168, "learning_rate": 7.271166180536313e-06, "loss": 0.3796, "step": 24720 }, { "epoch": 1.245279218490357, "grad_norm": 3.1477314739722497, "learning_rate": 7.268555903621376e-06, "loss": 0.2963, "step": 24730 }, { "epoch": 1.2457827685180523, "grad_norm": 3.4435528543738414, "learning_rate": 7.265944847931801e-06, "loss": 0.2901, "step": 24740 }, { "epoch": 1.2462863185457476, "grad_norm": 4.167747309684182, "learning_rate": 7.263333014363942e-06, "loss": 0.3278, "step": 24750 }, { "epoch": 1.2467898685734427, "grad_norm": 3.30792805807146, "learning_rate": 7.260720403814417e-06, "loss": 0.3157, "step": 24760 }, { "epoch": 1.247293418601138, "grad_norm": 3.66024364883428, "learning_rate": 7.258107017180113e-06, "loss": 0.3421, "step": 24770 }, { "epoch": 1.2477969686288333, "grad_norm": 3.808364171616753, "learning_rate": 7.255492855358178e-06, "loss": 0.293, "step": 24780 }, { "epoch": 1.2483005186565286, "grad_norm": 3.6472073682732913, "learning_rate": 7.252877919246033e-06, "loss": 0.3244, "step": 24790 }, { "epoch": 1.248804068684224, "grad_norm": 3.1136820884961085, "learning_rate": 7.2502622097413625e-06, "loss": 0.3064, "step": 24800 }, { "epoch": 1.249307618711919, "grad_norm": 3.2276369314950375, "learning_rate": 7.247645727742112e-06, "loss": 0.3447, "step": 24810 }, { "epoch": 1.2498111687396143, "grad_norm": 3.531999416632163, "learning_rate": 7.2450284741465e-06, "loss": 0.2473, "step": 24820 }, { "epoch": 1.2503147187673096, "grad_norm": 3.2388638409797936, "learning_rate": 7.242410449853003e-06, "loss": 0.3501, "step": 24830 }, { "epoch": 1.2508182687950047, "grad_norm": 3.6409342088192886, "learning_rate": 7.239791655760368e-06, "loss": 0.3001, "step": 24840 }, { "epoch": 1.2513218188227, "grad_norm": 4.4246831368037824, "learning_rate": 7.2371720927676015e-06, "loss": 0.3101, "step": 24850 }, { "epoch": 1.2518253688503953, "grad_norm": 3.687649682556397, "learning_rate": 7.234551761773975e-06, "loss": 0.2602, "step": 24860 }, { "epoch": 1.2523289188780906, "grad_norm": 5.6064192642733035, "learning_rate": 7.231930663679026e-06, "loss": 0.3068, "step": 24870 }, { "epoch": 1.252832468905786, "grad_norm": 3.386957051938966, "learning_rate": 7.22930879938255e-06, "loss": 0.3506, "step": 24880 }, { "epoch": 1.253336018933481, "grad_norm": 3.029530549281378, "learning_rate": 7.226686169784613e-06, "loss": 0.2592, "step": 24890 }, { "epoch": 1.2538395689611763, "grad_norm": 4.222828168569913, "learning_rate": 7.224062775785542e-06, "loss": 0.2982, "step": 24900 }, { "epoch": 1.2543431189888716, "grad_norm": 4.21106650674913, "learning_rate": 7.221438618285918e-06, "loss": 0.3249, "step": 24910 }, { "epoch": 1.2548466690165667, "grad_norm": 3.8675043544608694, "learning_rate": 7.218813698186594e-06, "loss": 0.3675, "step": 24920 }, { "epoch": 1.255350219044262, "grad_norm": 3.47333364828985, "learning_rate": 7.216188016388681e-06, "loss": 0.3273, "step": 24930 }, { "epoch": 1.2558537690719573, "grad_norm": 5.2470064854413145, "learning_rate": 7.213561573793553e-06, "loss": 0.2801, "step": 24940 }, { "epoch": 1.2563573190996524, "grad_norm": 3.24611876762372, "learning_rate": 7.21093437130284e-06, "loss": 0.3292, "step": 24950 }, { "epoch": 1.2568608691273477, "grad_norm": 3.6662927187387666, "learning_rate": 7.20830640981844e-06, "loss": 0.3238, "step": 24960 }, { "epoch": 1.257364419155043, "grad_norm": 3.3880738071159753, "learning_rate": 7.205677690242508e-06, "loss": 0.2712, "step": 24970 }, { "epoch": 1.2578679691827384, "grad_norm": 3.054474703795773, "learning_rate": 7.2030482134774595e-06, "loss": 0.2713, "step": 24980 }, { "epoch": 1.2583715192104337, "grad_norm": 3.7159046988822406, "learning_rate": 7.200417980425969e-06, "loss": 0.3068, "step": 24990 }, { "epoch": 1.2588750692381288, "grad_norm": 3.2122589325334308, "learning_rate": 7.197786991990971e-06, "loss": 0.2775, "step": 25000 }, { "epoch": 1.259378619265824, "grad_norm": 3.5236913028842998, "learning_rate": 7.195155249075661e-06, "loss": 0.2818, "step": 25010 }, { "epoch": 1.2598821692935194, "grad_norm": 3.507131937669152, "learning_rate": 7.192522752583494e-06, "loss": 0.338, "step": 25020 }, { "epoch": 1.2603857193212145, "grad_norm": 3.859830115865318, "learning_rate": 7.18988950341818e-06, "loss": 0.2956, "step": 25030 }, { "epoch": 1.2608892693489098, "grad_norm": 3.5382034199375094, "learning_rate": 7.187255502483691e-06, "loss": 0.3364, "step": 25040 }, { "epoch": 1.261392819376605, "grad_norm": 3.6704991241093348, "learning_rate": 7.184620750684254e-06, "loss": 0.3433, "step": 25050 }, { "epoch": 1.2618963694043004, "grad_norm": 3.6474320875918336, "learning_rate": 7.181985248924356e-06, "loss": 0.3302, "step": 25060 }, { "epoch": 1.2623999194319957, "grad_norm": 3.5388836418775034, "learning_rate": 7.1793489981087405e-06, "loss": 0.3034, "step": 25070 }, { "epoch": 1.2629034694596908, "grad_norm": 2.9141626247221097, "learning_rate": 7.176711999142409e-06, "loss": 0.3293, "step": 25080 }, { "epoch": 1.263407019487386, "grad_norm": 4.929610726628728, "learning_rate": 7.1740742529306205e-06, "loss": 0.3348, "step": 25090 }, { "epoch": 1.2639105695150814, "grad_norm": 4.803024057552593, "learning_rate": 7.171435760378886e-06, "loss": 0.3738, "step": 25100 }, { "epoch": 1.2644141195427765, "grad_norm": 3.4140001168250804, "learning_rate": 7.1687965223929815e-06, "loss": 0.3489, "step": 25110 }, { "epoch": 1.2649176695704718, "grad_norm": 4.078894892633829, "learning_rate": 7.166156539878926e-06, "loss": 0.3373, "step": 25120 }, { "epoch": 1.2654212195981671, "grad_norm": 3.5662391578708785, "learning_rate": 7.163515813743009e-06, "loss": 0.3335, "step": 25130 }, { "epoch": 1.2659247696258622, "grad_norm": 2.8641726949113306, "learning_rate": 7.160874344891761e-06, "loss": 0.3272, "step": 25140 }, { "epoch": 1.2664283196535575, "grad_norm": 3.9834806698483867, "learning_rate": 7.15823213423198e-06, "loss": 0.2727, "step": 25150 }, { "epoch": 1.2669318696812528, "grad_norm": 3.4040530657884758, "learning_rate": 7.155589182670708e-06, "loss": 0.2925, "step": 25160 }, { "epoch": 1.2674354197089481, "grad_norm": 2.8262381635502756, "learning_rate": 7.15294549111525e-06, "loss": 0.2281, "step": 25170 }, { "epoch": 1.2679389697366434, "grad_norm": 4.505718645285074, "learning_rate": 7.150301060473162e-06, "loss": 0.3329, "step": 25180 }, { "epoch": 1.2684425197643385, "grad_norm": 3.5185890463234815, "learning_rate": 7.147655891652249e-06, "loss": 0.3124, "step": 25190 }, { "epoch": 1.2689460697920338, "grad_norm": 4.083046537023338, "learning_rate": 7.145009985560575e-06, "loss": 0.3103, "step": 25200 }, { "epoch": 1.2694496198197291, "grad_norm": 4.1420272324924605, "learning_rate": 7.142363343106458e-06, "loss": 0.2867, "step": 25210 }, { "epoch": 1.2699531698474242, "grad_norm": 3.820184209660987, "learning_rate": 7.139715965198461e-06, "loss": 0.3681, "step": 25220 }, { "epoch": 1.2704567198751195, "grad_norm": 4.963724848426831, "learning_rate": 7.1370678527454115e-06, "loss": 0.3313, "step": 25230 }, { "epoch": 1.2709602699028149, "grad_norm": 4.338715854591606, "learning_rate": 7.134419006656378e-06, "loss": 0.3841, "step": 25240 }, { "epoch": 1.2714638199305102, "grad_norm": 4.6382688221722885, "learning_rate": 7.1317694278406855e-06, "loss": 0.296, "step": 25250 }, { "epoch": 1.2719673699582055, "grad_norm": 3.4746895292946687, "learning_rate": 7.1291191172079126e-06, "loss": 0.2758, "step": 25260 }, { "epoch": 1.2724709199859006, "grad_norm": 3.3798151765951934, "learning_rate": 7.1264680756678836e-06, "loss": 0.303, "step": 25270 }, { "epoch": 1.2729744700135959, "grad_norm": 4.19949844741266, "learning_rate": 7.12381630413068e-06, "loss": 0.306, "step": 25280 }, { "epoch": 1.2734780200412912, "grad_norm": 4.049865109245777, "learning_rate": 7.12116380350663e-06, "loss": 0.3025, "step": 25290 }, { "epoch": 1.2739815700689863, "grad_norm": 2.8554595792904114, "learning_rate": 7.118510574706313e-06, "loss": 0.2919, "step": 25300 }, { "epoch": 1.2744851200966816, "grad_norm": 4.710248371800446, "learning_rate": 7.115856618640557e-06, "loss": 0.3299, "step": 25310 }, { "epoch": 1.2749886701243769, "grad_norm": 4.857450977968714, "learning_rate": 7.113201936220442e-06, "loss": 0.3026, "step": 25320 }, { "epoch": 1.275492220152072, "grad_norm": 4.348725916913219, "learning_rate": 7.110546528357297e-06, "loss": 0.3195, "step": 25330 }, { "epoch": 1.2759957701797673, "grad_norm": 3.6701187494886844, "learning_rate": 7.1078903959627e-06, "loss": 0.2622, "step": 25340 }, { "epoch": 1.2764993202074626, "grad_norm": 3.7765532159169086, "learning_rate": 7.105233539948475e-06, "loss": 0.2957, "step": 25350 }, { "epoch": 1.277002870235158, "grad_norm": 4.021720876586603, "learning_rate": 7.1025759612266964e-06, "loss": 0.2867, "step": 25360 }, { "epoch": 1.2775064202628532, "grad_norm": 3.0252834849613075, "learning_rate": 7.099917660709689e-06, "loss": 0.3112, "step": 25370 }, { "epoch": 1.2780099702905483, "grad_norm": 1.9947795511842963, "learning_rate": 7.097258639310022e-06, "loss": 0.3046, "step": 25380 }, { "epoch": 1.2785135203182436, "grad_norm": 3.7156135252151286, "learning_rate": 7.0945988979405136e-06, "loss": 0.3213, "step": 25390 }, { "epoch": 1.279017070345939, "grad_norm": 3.060469179099016, "learning_rate": 7.091938437514229e-06, "loss": 0.2404, "step": 25400 }, { "epoch": 1.279520620373634, "grad_norm": 3.380209193061396, "learning_rate": 7.089277258944478e-06, "loss": 0.3152, "step": 25410 }, { "epoch": 1.2800241704013293, "grad_norm": 4.013465419464502, "learning_rate": 7.086615363144823e-06, "loss": 0.3243, "step": 25420 }, { "epoch": 1.2805277204290246, "grad_norm": 3.6604321864057177, "learning_rate": 7.083952751029065e-06, "loss": 0.3078, "step": 25430 }, { "epoch": 1.28103127045672, "grad_norm": 3.708305216798379, "learning_rate": 7.081289423511257e-06, "loss": 0.2811, "step": 25440 }, { "epoch": 1.2815348204844152, "grad_norm": 3.924321445620637, "learning_rate": 7.078625381505693e-06, "loss": 0.2821, "step": 25450 }, { "epoch": 1.2820383705121103, "grad_norm": 3.71460992705369, "learning_rate": 7.075960625926918e-06, "loss": 0.311, "step": 25460 }, { "epoch": 1.2825419205398056, "grad_norm": 4.793676469385205, "learning_rate": 7.073295157689713e-06, "loss": 0.3733, "step": 25470 }, { "epoch": 1.283045470567501, "grad_norm": 3.3548865731652247, "learning_rate": 7.070628977709114e-06, "loss": 0.3398, "step": 25480 }, { "epoch": 1.283549020595196, "grad_norm": 5.038722291870915, "learning_rate": 7.067962086900394e-06, "loss": 0.3419, "step": 25490 }, { "epoch": 1.2840525706228914, "grad_norm": 4.11596813287136, "learning_rate": 7.0652944861790726e-06, "loss": 0.3179, "step": 25500 }, { "epoch": 1.2845561206505867, "grad_norm": 3.4378847500771332, "learning_rate": 7.062626176460913e-06, "loss": 0.2837, "step": 25510 }, { "epoch": 1.2850596706782818, "grad_norm": 3.5232861480967617, "learning_rate": 7.059957158661923e-06, "loss": 0.3581, "step": 25520 }, { "epoch": 1.285563220705977, "grad_norm": 3.58169856511166, "learning_rate": 7.057287433698349e-06, "loss": 0.3063, "step": 25530 }, { "epoch": 1.2860667707336724, "grad_norm": 4.593940422125501, "learning_rate": 7.054617002486686e-06, "loss": 0.2977, "step": 25540 }, { "epoch": 1.2865703207613677, "grad_norm": 3.7236616179712203, "learning_rate": 7.051945865943669e-06, "loss": 0.2934, "step": 25550 }, { "epoch": 1.287073870789063, "grad_norm": 4.028855365670892, "learning_rate": 7.0492740249862735e-06, "loss": 0.291, "step": 25560 }, { "epoch": 1.287577420816758, "grad_norm": 4.705444020710517, "learning_rate": 7.046601480531717e-06, "loss": 0.3494, "step": 25570 }, { "epoch": 1.2880809708444534, "grad_norm": 3.7009179430985424, "learning_rate": 7.043928233497463e-06, "loss": 0.2639, "step": 25580 }, { "epoch": 1.2885845208721487, "grad_norm": 4.023058053216174, "learning_rate": 7.041254284801212e-06, "loss": 0.3454, "step": 25590 }, { "epoch": 1.2890880708998438, "grad_norm": 2.8281479023940443, "learning_rate": 7.038579635360905e-06, "loss": 0.3181, "step": 25600 }, { "epoch": 1.289591620927539, "grad_norm": 3.2184383297544494, "learning_rate": 7.035904286094726e-06, "loss": 0.2625, "step": 25610 }, { "epoch": 1.2900951709552344, "grad_norm": 3.9069547004247767, "learning_rate": 7.0332282379210955e-06, "loss": 0.3434, "step": 25620 }, { "epoch": 1.2905987209829297, "grad_norm": 3.9354073552756534, "learning_rate": 7.030551491758682e-06, "loss": 0.2763, "step": 25630 }, { "epoch": 1.291102271010625, "grad_norm": 4.028202437986912, "learning_rate": 7.027874048526382e-06, "loss": 0.3291, "step": 25640 }, { "epoch": 1.2916058210383201, "grad_norm": 3.8129570227714726, "learning_rate": 7.025195909143342e-06, "loss": 0.3053, "step": 25650 }, { "epoch": 1.2921093710660154, "grad_norm": 3.9369907625106912, "learning_rate": 7.022517074528941e-06, "loss": 0.2953, "step": 25660 }, { "epoch": 1.2926129210937107, "grad_norm": 3.9722949256984674, "learning_rate": 7.019837545602798e-06, "loss": 0.3393, "step": 25670 }, { "epoch": 1.2931164711214058, "grad_norm": 4.865912475707343, "learning_rate": 7.0171573232847714e-06, "loss": 0.3594, "step": 25680 }, { "epoch": 1.2936200211491011, "grad_norm": 3.374259004790934, "learning_rate": 7.014476408494958e-06, "loss": 0.2669, "step": 25690 }, { "epoch": 1.2941235711767964, "grad_norm": 4.528549778237079, "learning_rate": 7.011794802153691e-06, "loss": 0.3116, "step": 25700 }, { "epoch": 1.2946271212044917, "grad_norm": 3.8671538339914533, "learning_rate": 7.0091125051815415e-06, "loss": 0.3205, "step": 25710 }, { "epoch": 1.2951306712321868, "grad_norm": 2.8151838958788296, "learning_rate": 7.006429518499317e-06, "loss": 0.349, "step": 25720 }, { "epoch": 1.2956342212598821, "grad_norm": 3.3575283724525438, "learning_rate": 7.003745843028065e-06, "loss": 0.2648, "step": 25730 }, { "epoch": 1.2961377712875775, "grad_norm": 4.080292091251255, "learning_rate": 7.001061479689063e-06, "loss": 0.2787, "step": 25740 }, { "epoch": 1.2966413213152728, "grad_norm": 2.591606662599855, "learning_rate": 6.998376429403831e-06, "loss": 0.3021, "step": 25750 }, { "epoch": 1.2971448713429679, "grad_norm": 4.284203064208133, "learning_rate": 6.995690693094121e-06, "loss": 0.3584, "step": 25760 }, { "epoch": 1.2976484213706632, "grad_norm": 4.362301185237212, "learning_rate": 6.9930042716819265e-06, "loss": 0.3241, "step": 25770 }, { "epoch": 1.2981519713983585, "grad_norm": 2.8806194629684407, "learning_rate": 6.990317166089464e-06, "loss": 0.2907, "step": 25780 }, { "epoch": 1.2986555214260536, "grad_norm": 3.9680536088176437, "learning_rate": 6.987629377239199e-06, "loss": 0.3693, "step": 25790 }, { "epoch": 1.2991590714537489, "grad_norm": 2.4897457116887836, "learning_rate": 6.984940906053821e-06, "loss": 0.2705, "step": 25800 }, { "epoch": 1.2996626214814442, "grad_norm": 3.1975450157074756, "learning_rate": 6.982251753456259e-06, "loss": 0.2884, "step": 25810 }, { "epoch": 1.3001661715091395, "grad_norm": 3.835370517371359, "learning_rate": 6.979561920369675e-06, "loss": 0.3073, "step": 25820 }, { "epoch": 1.3006697215368348, "grad_norm": 4.036514041263253, "learning_rate": 6.976871407717466e-06, "loss": 0.31, "step": 25830 }, { "epoch": 1.3011732715645299, "grad_norm": 3.3065092087091763, "learning_rate": 6.974180216423257e-06, "loss": 0.2721, "step": 25840 }, { "epoch": 1.3016768215922252, "grad_norm": 4.5269367818425135, "learning_rate": 6.97148834741091e-06, "loss": 0.393, "step": 25850 }, { "epoch": 1.3021803716199205, "grad_norm": 3.0011478702304064, "learning_rate": 6.968795801604522e-06, "loss": 0.2459, "step": 25860 }, { "epoch": 1.3026839216476156, "grad_norm": 4.7675261355439975, "learning_rate": 6.966102579928417e-06, "loss": 0.3215, "step": 25870 }, { "epoch": 1.303187471675311, "grad_norm": 4.1587908670117, "learning_rate": 6.963408683307155e-06, "loss": 0.3027, "step": 25880 }, { "epoch": 1.3036910217030062, "grad_norm": 4.391256057681645, "learning_rate": 6.960714112665523e-06, "loss": 0.3153, "step": 25890 }, { "epoch": 1.3041945717307015, "grad_norm": 4.498950171198475, "learning_rate": 6.9580188689285475e-06, "loss": 0.3315, "step": 25900 }, { "epoch": 1.3046981217583968, "grad_norm": 3.9173864824163727, "learning_rate": 6.95532295302148e-06, "loss": 0.3202, "step": 25910 }, { "epoch": 1.305201671786092, "grad_norm": 3.6832675748550607, "learning_rate": 6.952626365869801e-06, "loss": 0.3495, "step": 25920 }, { "epoch": 1.3057052218137872, "grad_norm": 3.0366117008381024, "learning_rate": 6.9499291083992256e-06, "loss": 0.3279, "step": 25930 }, { "epoch": 1.3062087718414825, "grad_norm": 4.031334960716485, "learning_rate": 6.9472311815357006e-06, "loss": 0.3342, "step": 25940 }, { "epoch": 1.3067123218691776, "grad_norm": 4.671740949761697, "learning_rate": 6.944532586205396e-06, "loss": 0.2878, "step": 25950 }, { "epoch": 1.307215871896873, "grad_norm": 3.7177615538658717, "learning_rate": 6.941833323334717e-06, "loss": 0.2934, "step": 25960 }, { "epoch": 1.3077194219245682, "grad_norm": 4.0217348640998285, "learning_rate": 6.939133393850297e-06, "loss": 0.3338, "step": 25970 }, { "epoch": 1.3082229719522633, "grad_norm": 4.994633139441192, "learning_rate": 6.936432798678995e-06, "loss": 0.2731, "step": 25980 }, { "epoch": 1.3087265219799586, "grad_norm": 3.812371891347254, "learning_rate": 6.933731538747901e-06, "loss": 0.3538, "step": 25990 }, { "epoch": 1.309230072007654, "grad_norm": 3.25505948219118, "learning_rate": 6.931029614984335e-06, "loss": 0.352, "step": 26000 }, { "epoch": 1.3097336220353493, "grad_norm": 4.162116341239659, "learning_rate": 6.9283270283158395e-06, "loss": 0.2876, "step": 26010 }, { "epoch": 1.3102371720630446, "grad_norm": 3.7026013350355975, "learning_rate": 6.925623779670193e-06, "loss": 0.3345, "step": 26020 }, { "epoch": 1.3107407220907397, "grad_norm": 3.9415459674620643, "learning_rate": 6.922919869975391e-06, "loss": 0.3359, "step": 26030 }, { "epoch": 1.311244272118435, "grad_norm": 3.8041944245373247, "learning_rate": 6.920215300159665e-06, "loss": 0.285, "step": 26040 }, { "epoch": 1.3117478221461303, "grad_norm": 4.4170954944516785, "learning_rate": 6.917510071151467e-06, "loss": 0.3224, "step": 26050 }, { "epoch": 1.3122513721738254, "grad_norm": 4.6033660082071926, "learning_rate": 6.9148041838794784e-06, "loss": 0.3003, "step": 26060 }, { "epoch": 1.3127549222015207, "grad_norm": 3.4085407839107784, "learning_rate": 6.912097639272605e-06, "loss": 0.2664, "step": 26070 }, { "epoch": 1.313258472229216, "grad_norm": 3.3563198340362046, "learning_rate": 6.909390438259981e-06, "loss": 0.3463, "step": 26080 }, { "epoch": 1.3137620222569113, "grad_norm": 2.3902315791845115, "learning_rate": 6.906682581770961e-06, "loss": 0.2678, "step": 26090 }, { "epoch": 1.3142655722846066, "grad_norm": 4.018737639592074, "learning_rate": 6.90397407073513e-06, "loss": 0.3431, "step": 26100 }, { "epoch": 1.3147691223123017, "grad_norm": 3.2276904412846568, "learning_rate": 6.901264906082293e-06, "loss": 0.2709, "step": 26110 }, { "epoch": 1.315272672339997, "grad_norm": 4.110999850235334, "learning_rate": 6.8985550887424846e-06, "loss": 0.3074, "step": 26120 }, { "epoch": 1.3157762223676923, "grad_norm": 3.48829914019239, "learning_rate": 6.895844619645957e-06, "loss": 0.311, "step": 26130 }, { "epoch": 1.3162797723953874, "grad_norm": 4.432677480289227, "learning_rate": 6.893133499723194e-06, "loss": 0.3503, "step": 26140 }, { "epoch": 1.3167833224230827, "grad_norm": 5.156825967922963, "learning_rate": 6.8904217299048926e-06, "loss": 0.3136, "step": 26150 }, { "epoch": 1.317286872450778, "grad_norm": 3.078828373652771, "learning_rate": 6.887709311121984e-06, "loss": 0.3005, "step": 26160 }, { "epoch": 1.3177904224784731, "grad_norm": 4.727334245512328, "learning_rate": 6.884996244305614e-06, "loss": 0.3288, "step": 26170 }, { "epoch": 1.3182939725061684, "grad_norm": 3.7979820315922352, "learning_rate": 6.882282530387155e-06, "loss": 0.2942, "step": 26180 }, { "epoch": 1.3187975225338637, "grad_norm": 3.899402917636554, "learning_rate": 6.879568170298201e-06, "loss": 0.2951, "step": 26190 }, { "epoch": 1.319301072561559, "grad_norm": 3.733324413153611, "learning_rate": 6.8768531649705635e-06, "loss": 0.3489, "step": 26200 }, { "epoch": 1.3198046225892544, "grad_norm": 4.379187759762188, "learning_rate": 6.8741375153362846e-06, "loss": 0.339, "step": 26210 }, { "epoch": 1.3203081726169494, "grad_norm": 4.676420072237397, "learning_rate": 6.871421222327618e-06, "loss": 0.3169, "step": 26220 }, { "epoch": 1.3208117226446447, "grad_norm": 4.3871545808740535, "learning_rate": 6.868704286877044e-06, "loss": 0.3378, "step": 26230 }, { "epoch": 1.32131527267234, "grad_norm": 4.174500316361026, "learning_rate": 6.86598670991726e-06, "loss": 0.3653, "step": 26240 }, { "epoch": 1.3218188227000351, "grad_norm": 3.3335632065369594, "learning_rate": 6.863268492381192e-06, "loss": 0.2567, "step": 26250 }, { "epoch": 1.3223223727277305, "grad_norm": 3.5290942956124276, "learning_rate": 6.8605496352019695e-06, "loss": 0.2793, "step": 26260 }, { "epoch": 1.3228259227554258, "grad_norm": 3.6687290141832265, "learning_rate": 6.85783013931296e-06, "loss": 0.2891, "step": 26270 }, { "epoch": 1.323329472783121, "grad_norm": 3.3162847326813583, "learning_rate": 6.855110005647736e-06, "loss": 0.3006, "step": 26280 }, { "epoch": 1.3238330228108164, "grad_norm": 4.650829982792614, "learning_rate": 6.852389235140099e-06, "loss": 0.315, "step": 26290 }, { "epoch": 1.3243365728385115, "grad_norm": 2.992950927414748, "learning_rate": 6.84966782872406e-06, "loss": 0.268, "step": 26300 }, { "epoch": 1.3248401228662068, "grad_norm": 3.9304673413391233, "learning_rate": 6.846945787333859e-06, "loss": 0.3577, "step": 26310 }, { "epoch": 1.325343672893902, "grad_norm": 3.417243719177521, "learning_rate": 6.844223111903943e-06, "loss": 0.3197, "step": 26320 }, { "epoch": 1.3258472229215972, "grad_norm": 5.016883467258597, "learning_rate": 6.8414998033689835e-06, "loss": 0.356, "step": 26330 }, { "epoch": 1.3263507729492925, "grad_norm": 3.903599161726211, "learning_rate": 6.838775862663869e-06, "loss": 0.276, "step": 26340 }, { "epoch": 1.3268543229769878, "grad_norm": 2.8931021152816805, "learning_rate": 6.836051290723702e-06, "loss": 0.2716, "step": 26350 }, { "epoch": 1.3273578730046829, "grad_norm": 3.099384419061321, "learning_rate": 6.833326088483802e-06, "loss": 0.299, "step": 26360 }, { "epoch": 1.3278614230323782, "grad_norm": 3.567343812144862, "learning_rate": 6.83060025687971e-06, "loss": 0.3788, "step": 26370 }, { "epoch": 1.3283649730600735, "grad_norm": 3.9786050443512764, "learning_rate": 6.827873796847177e-06, "loss": 0.3155, "step": 26380 }, { "epoch": 1.3288685230877688, "grad_norm": 4.781949046949167, "learning_rate": 6.825146709322172e-06, "loss": 0.3401, "step": 26390 }, { "epoch": 1.3293720731154641, "grad_norm": 3.5730683946296393, "learning_rate": 6.822418995240878e-06, "loss": 0.2906, "step": 26400 }, { "epoch": 1.3298756231431592, "grad_norm": 4.453667705455238, "learning_rate": 6.819690655539697e-06, "loss": 0.3352, "step": 26410 }, { "epoch": 1.3303791731708545, "grad_norm": 4.106189942782792, "learning_rate": 6.816961691155242e-06, "loss": 0.2765, "step": 26420 }, { "epoch": 1.3308827231985498, "grad_norm": 3.6211743283441415, "learning_rate": 6.814232103024342e-06, "loss": 0.341, "step": 26430 }, { "epoch": 1.331386273226245, "grad_norm": 4.3748970582229365, "learning_rate": 6.8115018920840405e-06, "loss": 0.3125, "step": 26440 }, { "epoch": 1.3318898232539402, "grad_norm": 4.507914114424057, "learning_rate": 6.808771059271592e-06, "loss": 0.2983, "step": 26450 }, { "epoch": 1.3323933732816355, "grad_norm": 3.4770606780660303, "learning_rate": 6.806039605524467e-06, "loss": 0.2924, "step": 26460 }, { "epoch": 1.3328969233093308, "grad_norm": 3.880119168585951, "learning_rate": 6.803307531780351e-06, "loss": 0.2718, "step": 26470 }, { "epoch": 1.3334004733370262, "grad_norm": 3.7585999868106255, "learning_rate": 6.800574838977139e-06, "loss": 0.2926, "step": 26480 }, { "epoch": 1.3339040233647212, "grad_norm": 4.582973590342653, "learning_rate": 6.797841528052937e-06, "loss": 0.3459, "step": 26490 }, { "epoch": 1.3344075733924166, "grad_norm": 3.6480664754610452, "learning_rate": 6.795107599946071e-06, "loss": 0.3487, "step": 26500 }, { "epoch": 1.3349111234201119, "grad_norm": 3.8219894693162884, "learning_rate": 6.792373055595069e-06, "loss": 0.2858, "step": 26510 }, { "epoch": 1.335414673447807, "grad_norm": 2.959903037030843, "learning_rate": 6.789637895938678e-06, "loss": 0.3116, "step": 26520 }, { "epoch": 1.3359182234755023, "grad_norm": 2.917196392606192, "learning_rate": 6.786902121915852e-06, "loss": 0.3252, "step": 26530 }, { "epoch": 1.3364217735031976, "grad_norm": 4.675281278302282, "learning_rate": 6.784165734465759e-06, "loss": 0.3636, "step": 26540 }, { "epoch": 1.3369253235308927, "grad_norm": 4.378181426823413, "learning_rate": 6.781428734527773e-06, "loss": 0.313, "step": 26550 }, { "epoch": 1.337428873558588, "grad_norm": 3.50408905570339, "learning_rate": 6.778691123041485e-06, "loss": 0.2985, "step": 26560 }, { "epoch": 1.3379324235862833, "grad_norm": 3.1127620921499224, "learning_rate": 6.77595290094669e-06, "loss": 0.2598, "step": 26570 }, { "epoch": 1.3384359736139786, "grad_norm": 4.031564892080851, "learning_rate": 6.7732140691833935e-06, "loss": 0.2828, "step": 26580 }, { "epoch": 1.338939523641674, "grad_norm": 3.872867000434766, "learning_rate": 6.770474628691815e-06, "loss": 0.2578, "step": 26590 }, { "epoch": 1.339443073669369, "grad_norm": 3.964076613324515, "learning_rate": 6.767734580412376e-06, "loss": 0.3221, "step": 26600 }, { "epoch": 1.3399466236970643, "grad_norm": 2.8784365944725656, "learning_rate": 6.764993925285712e-06, "loss": 0.2824, "step": 26610 }, { "epoch": 1.3404501737247596, "grad_norm": 4.532705530766434, "learning_rate": 6.7622526642526655e-06, "loss": 0.3581, "step": 26620 }, { "epoch": 1.3409537237524547, "grad_norm": 3.4208039770409844, "learning_rate": 6.759510798254286e-06, "loss": 0.3163, "step": 26630 }, { "epoch": 1.34145727378015, "grad_norm": 3.5621586494754705, "learning_rate": 6.756768328231831e-06, "loss": 0.3484, "step": 26640 }, { "epoch": 1.3419608238078453, "grad_norm": 3.851727424112072, "learning_rate": 6.7540252551267634e-06, "loss": 0.3112, "step": 26650 }, { "epoch": 1.3424643738355406, "grad_norm": 2.584887324228926, "learning_rate": 6.751281579880762e-06, "loss": 0.3252, "step": 26660 }, { "epoch": 1.342967923863236, "grad_norm": 3.5193807305439733, "learning_rate": 6.748537303435697e-06, "loss": 0.2712, "step": 26670 }, { "epoch": 1.343471473890931, "grad_norm": 3.503934616098699, "learning_rate": 6.745792426733659e-06, "loss": 0.2682, "step": 26680 }, { "epoch": 1.3439750239186263, "grad_norm": 5.338264911792508, "learning_rate": 6.743046950716937e-06, "loss": 0.2661, "step": 26690 }, { "epoch": 1.3444785739463216, "grad_norm": 2.371253040617512, "learning_rate": 6.740300876328031e-06, "loss": 0.2833, "step": 26700 }, { "epoch": 1.3449821239740167, "grad_norm": 3.834852063846145, "learning_rate": 6.73755420450964e-06, "loss": 0.3196, "step": 26710 }, { "epoch": 1.345485674001712, "grad_norm": 3.6512576239681804, "learning_rate": 6.734806936204672e-06, "loss": 0.3517, "step": 26720 }, { "epoch": 1.3459892240294073, "grad_norm": 3.2304043940732763, "learning_rate": 6.732059072356241e-06, "loss": 0.2893, "step": 26730 }, { "epoch": 1.3464927740571024, "grad_norm": 2.6804480073835677, "learning_rate": 6.729310613907662e-06, "loss": 0.2885, "step": 26740 }, { "epoch": 1.3469963240847977, "grad_norm": 3.227936093271767, "learning_rate": 6.726561561802459e-06, "loss": 0.3145, "step": 26750 }, { "epoch": 1.347499874112493, "grad_norm": 4.987696772846174, "learning_rate": 6.723811916984351e-06, "loss": 0.3024, "step": 26760 }, { "epoch": 1.3480034241401884, "grad_norm": 4.059948837915421, "learning_rate": 6.721061680397269e-06, "loss": 0.337, "step": 26770 }, { "epoch": 1.3485069741678837, "grad_norm": 4.014294834568524, "learning_rate": 6.718310852985347e-06, "loss": 0.3081, "step": 26780 }, { "epoch": 1.3490105241955788, "grad_norm": 4.623330321521252, "learning_rate": 6.715559435692915e-06, "loss": 0.3151, "step": 26790 }, { "epoch": 1.349514074223274, "grad_norm": 4.026829217749829, "learning_rate": 6.712807429464512e-06, "loss": 0.2792, "step": 26800 }, { "epoch": 1.3500176242509694, "grad_norm": 3.962141242705722, "learning_rate": 6.710054835244875e-06, "loss": 0.2922, "step": 26810 }, { "epoch": 1.3505211742786645, "grad_norm": 3.7979281000047385, "learning_rate": 6.707301653978945e-06, "loss": 0.3244, "step": 26820 }, { "epoch": 1.3510247243063598, "grad_norm": 3.1179316012670153, "learning_rate": 6.704547886611866e-06, "loss": 0.3555, "step": 26830 }, { "epoch": 1.351528274334055, "grad_norm": 4.007571144553102, "learning_rate": 6.701793534088979e-06, "loss": 0.2974, "step": 26840 }, { "epoch": 1.3520318243617504, "grad_norm": 4.141862645720995, "learning_rate": 6.699038597355829e-06, "loss": 0.3076, "step": 26850 }, { "epoch": 1.3525353743894457, "grad_norm": 4.191198320059311, "learning_rate": 6.69628307735816e-06, "loss": 0.282, "step": 26860 }, { "epoch": 1.3530389244171408, "grad_norm": 3.5657918132235937, "learning_rate": 6.693526975041919e-06, "loss": 0.3172, "step": 26870 }, { "epoch": 1.353542474444836, "grad_norm": 4.025218419482776, "learning_rate": 6.6907702913532495e-06, "loss": 0.3367, "step": 26880 }, { "epoch": 1.3540460244725314, "grad_norm": 4.212988659755159, "learning_rate": 6.688013027238496e-06, "loss": 0.4073, "step": 26890 }, { "epoch": 1.3545495745002265, "grad_norm": 3.708580629680762, "learning_rate": 6.685255183644204e-06, "loss": 0.3297, "step": 26900 }, { "epoch": 1.3550531245279218, "grad_norm": 2.4418686590929206, "learning_rate": 6.682496761517112e-06, "loss": 0.2608, "step": 26910 }, { "epoch": 1.3555566745556171, "grad_norm": 4.2638230739466705, "learning_rate": 6.6797377618041644e-06, "loss": 0.3412, "step": 26920 }, { "epoch": 1.3560602245833124, "grad_norm": 3.3105384813357612, "learning_rate": 6.676978185452501e-06, "loss": 0.2896, "step": 26930 }, { "epoch": 1.3565637746110075, "grad_norm": 4.164393417538681, "learning_rate": 6.674218033409458e-06, "loss": 0.3291, "step": 26940 }, { "epoch": 1.3570673246387028, "grad_norm": 3.0801699606630675, "learning_rate": 6.671457306622572e-06, "loss": 0.2833, "step": 26950 }, { "epoch": 1.3575708746663981, "grad_norm": 3.1310346799496553, "learning_rate": 6.668696006039572e-06, "loss": 0.3228, "step": 26960 }, { "epoch": 1.3580744246940935, "grad_norm": 4.410508431357053, "learning_rate": 6.665934132608394e-06, "loss": 0.2899, "step": 26970 }, { "epoch": 1.3585779747217885, "grad_norm": 3.550800280947041, "learning_rate": 6.6631716872771554e-06, "loss": 0.3245, "step": 26980 }, { "epoch": 1.3590815247494838, "grad_norm": 3.68477967343427, "learning_rate": 6.660408670994186e-06, "loss": 0.3151, "step": 26990 }, { "epoch": 1.3595850747771792, "grad_norm": 4.192753024593741, "learning_rate": 6.657645084708e-06, "loss": 0.2977, "step": 27000 }, { "epoch": 1.3600886248048742, "grad_norm": 4.239168921252605, "learning_rate": 6.6548809293673135e-06, "loss": 0.3167, "step": 27010 }, { "epoch": 1.3605921748325696, "grad_norm": 3.4573907104852246, "learning_rate": 6.6521162059210344e-06, "loss": 0.2825, "step": 27020 }, { "epoch": 1.3610957248602649, "grad_norm": 4.406958418881923, "learning_rate": 6.649350915318268e-06, "loss": 0.3045, "step": 27030 }, { "epoch": 1.3615992748879602, "grad_norm": 3.887911046697641, "learning_rate": 6.6465850585083145e-06, "loss": 0.3289, "step": 27040 }, { "epoch": 1.3621028249156555, "grad_norm": 4.458570391587803, "learning_rate": 6.643818636440666e-06, "loss": 0.3149, "step": 27050 }, { "epoch": 1.3626063749433506, "grad_norm": 3.635520350795762, "learning_rate": 6.641051650065009e-06, "loss": 0.2875, "step": 27060 }, { "epoch": 1.3631099249710459, "grad_norm": 3.897151426603238, "learning_rate": 6.638284100331227e-06, "loss": 0.2984, "step": 27070 }, { "epoch": 1.3636134749987412, "grad_norm": 2.3969371667032964, "learning_rate": 6.635515988189393e-06, "loss": 0.3074, "step": 27080 }, { "epoch": 1.3641170250264363, "grad_norm": 2.8570108745733815, "learning_rate": 6.632747314589777e-06, "loss": 0.2581, "step": 27090 }, { "epoch": 1.3646205750541316, "grad_norm": 5.501012823079438, "learning_rate": 6.629978080482837e-06, "loss": 0.3119, "step": 27100 }, { "epoch": 1.365124125081827, "grad_norm": 3.4050473202816445, "learning_rate": 6.627208286819228e-06, "loss": 0.3329, "step": 27110 }, { "epoch": 1.3656276751095222, "grad_norm": 4.146981854488733, "learning_rate": 6.624437934549793e-06, "loss": 0.2913, "step": 27120 }, { "epoch": 1.3661312251372175, "grad_norm": 2.9243038160545116, "learning_rate": 6.62166702462557e-06, "loss": 0.2818, "step": 27130 }, { "epoch": 1.3666347751649126, "grad_norm": 3.7312934562548774, "learning_rate": 6.618895557997789e-06, "loss": 0.2745, "step": 27140 }, { "epoch": 1.367138325192608, "grad_norm": 3.5806031943842482, "learning_rate": 6.616123535617864e-06, "loss": 0.3656, "step": 27150 }, { "epoch": 1.3676418752203032, "grad_norm": 4.094345741698907, "learning_rate": 6.61335095843741e-06, "loss": 0.2881, "step": 27160 }, { "epoch": 1.3681454252479983, "grad_norm": 3.0467893776340285, "learning_rate": 6.610577827408226e-06, "loss": 0.2984, "step": 27170 }, { "epoch": 1.3686489752756936, "grad_norm": 2.5678361322018275, "learning_rate": 6.607804143482304e-06, "loss": 0.3051, "step": 27180 }, { "epoch": 1.369152525303389, "grad_norm": 2.5286258641303494, "learning_rate": 6.60502990761182e-06, "loss": 0.3198, "step": 27190 }, { "epoch": 1.369656075331084, "grad_norm": 4.0133256390345124, "learning_rate": 6.602255120749149e-06, "loss": 0.3847, "step": 27200 }, { "epoch": 1.3701596253587793, "grad_norm": 4.28996645359215, "learning_rate": 6.599479783846849e-06, "loss": 0.3524, "step": 27210 }, { "epoch": 1.3706631753864746, "grad_norm": 3.602635672830609, "learning_rate": 6.596703897857666e-06, "loss": 0.3144, "step": 27220 }, { "epoch": 1.37116672541417, "grad_norm": 4.17351733632136, "learning_rate": 6.5939274637345375e-06, "loss": 0.2706, "step": 27230 }, { "epoch": 1.3716702754418653, "grad_norm": 3.4460838598636614, "learning_rate": 6.5911504824305895e-06, "loss": 0.3089, "step": 27240 }, { "epoch": 1.3721738254695603, "grad_norm": 2.817706671268001, "learning_rate": 6.5883729548991325e-06, "loss": 0.2671, "step": 27250 }, { "epoch": 1.3726773754972557, "grad_norm": 3.28632420609523, "learning_rate": 6.585594882093667e-06, "loss": 0.2868, "step": 27260 }, { "epoch": 1.373180925524951, "grad_norm": 4.159776426709629, "learning_rate": 6.5828162649678804e-06, "loss": 0.3176, "step": 27270 }, { "epoch": 1.373684475552646, "grad_norm": 3.6933643290205644, "learning_rate": 6.580037104475648e-06, "loss": 0.3289, "step": 27280 }, { "epoch": 1.3741880255803414, "grad_norm": 4.0948311198748915, "learning_rate": 6.577257401571028e-06, "loss": 0.2916, "step": 27290 }, { "epoch": 1.3746915756080367, "grad_norm": 3.1687652731758407, "learning_rate": 6.574477157208268e-06, "loss": 0.3386, "step": 27300 }, { "epoch": 1.375195125635732, "grad_norm": 2.362667476254903, "learning_rate": 6.571696372341802e-06, "loss": 0.263, "step": 27310 }, { "epoch": 1.3756986756634273, "grad_norm": 4.0632651967414795, "learning_rate": 6.568915047926246e-06, "loss": 0.2999, "step": 27320 }, { "epoch": 1.3762022256911224, "grad_norm": 4.170712544591792, "learning_rate": 6.566133184916403e-06, "loss": 0.2906, "step": 27330 }, { "epoch": 1.3767057757188177, "grad_norm": 2.499390365727617, "learning_rate": 6.563350784267265e-06, "loss": 0.3867, "step": 27340 }, { "epoch": 1.377209325746513, "grad_norm": 3.592338428001334, "learning_rate": 6.560567846934003e-06, "loss": 0.2955, "step": 27350 }, { "epoch": 1.377712875774208, "grad_norm": 3.327294512542395, "learning_rate": 6.557784373871971e-06, "loss": 0.2945, "step": 27360 }, { "epoch": 1.3782164258019034, "grad_norm": 2.868448007125645, "learning_rate": 6.555000366036715e-06, "loss": 0.2743, "step": 27370 }, { "epoch": 1.3787199758295987, "grad_norm": 5.568467910535111, "learning_rate": 6.552215824383959e-06, "loss": 0.3404, "step": 27380 }, { "epoch": 1.3792235258572938, "grad_norm": 4.061625396209968, "learning_rate": 6.549430749869606e-06, "loss": 0.3303, "step": 27390 }, { "epoch": 1.379727075884989, "grad_norm": 3.7538854000209554, "learning_rate": 6.54664514344975e-06, "loss": 0.3031, "step": 27400 }, { "epoch": 1.3802306259126844, "grad_norm": 2.9258239104922596, "learning_rate": 6.543859006080666e-06, "loss": 0.3163, "step": 27410 }, { "epoch": 1.3807341759403797, "grad_norm": 4.013567642622096, "learning_rate": 6.541072338718808e-06, "loss": 0.2918, "step": 27420 }, { "epoch": 1.381237725968075, "grad_norm": 3.91453618140995, "learning_rate": 6.538285142320812e-06, "loss": 0.2683, "step": 27430 }, { "epoch": 1.3817412759957701, "grad_norm": 4.307172304792056, "learning_rate": 6.5354974178435e-06, "loss": 0.338, "step": 27440 }, { "epoch": 1.3822448260234654, "grad_norm": 3.714590959097417, "learning_rate": 6.532709166243872e-06, "loss": 0.298, "step": 27450 }, { "epoch": 1.3827483760511607, "grad_norm": 3.4599522125410527, "learning_rate": 6.529920388479107e-06, "loss": 0.3222, "step": 27460 }, { "epoch": 1.3832519260788558, "grad_norm": 3.9949975359344077, "learning_rate": 6.52713108550657e-06, "loss": 0.265, "step": 27470 }, { "epoch": 1.3837554761065511, "grad_norm": 3.44101997358772, "learning_rate": 6.524341258283802e-06, "loss": 0.2741, "step": 27480 }, { "epoch": 1.3842590261342465, "grad_norm": 2.447715541725601, "learning_rate": 6.521550907768526e-06, "loss": 0.2653, "step": 27490 }, { "epoch": 1.3847625761619418, "grad_norm": 4.061473845899893, "learning_rate": 6.5187600349186435e-06, "loss": 0.3006, "step": 27500 }, { "epoch": 1.385266126189637, "grad_norm": 3.5859441756683683, "learning_rate": 6.515968640692235e-06, "loss": 0.3292, "step": 27510 }, { "epoch": 1.3857696762173322, "grad_norm": 3.3578498698087196, "learning_rate": 6.513176726047562e-06, "loss": 0.2911, "step": 27520 }, { "epoch": 1.3862732262450275, "grad_norm": 3.352894822799803, "learning_rate": 6.510384291943064e-06, "loss": 0.2752, "step": 27530 }, { "epoch": 1.3867767762727228, "grad_norm": 2.661731743033067, "learning_rate": 6.507591339337356e-06, "loss": 0.2592, "step": 27540 }, { "epoch": 1.3872803263004179, "grad_norm": 3.235883014058295, "learning_rate": 6.5047978691892345e-06, "loss": 0.2921, "step": 27550 }, { "epoch": 1.3877838763281132, "grad_norm": 4.114612292314407, "learning_rate": 6.502003882457674e-06, "loss": 0.3315, "step": 27560 }, { "epoch": 1.3882874263558085, "grad_norm": 3.593271917119345, "learning_rate": 6.499209380101823e-06, "loss": 0.3281, "step": 27570 }, { "epoch": 1.3887909763835036, "grad_norm": 3.7251638368472997, "learning_rate": 6.4964143630810076e-06, "loss": 0.2636, "step": 27580 }, { "epoch": 1.3892945264111989, "grad_norm": 4.522773697534528, "learning_rate": 6.493618832354735e-06, "loss": 0.3114, "step": 27590 }, { "epoch": 1.3897980764388942, "grad_norm": 4.272422054744327, "learning_rate": 6.490822788882683e-06, "loss": 0.3146, "step": 27600 }, { "epoch": 1.3903016264665895, "grad_norm": 2.7100982146120858, "learning_rate": 6.488026233624708e-06, "loss": 0.3178, "step": 27610 }, { "epoch": 1.3908051764942848, "grad_norm": 4.038505719992834, "learning_rate": 6.485229167540843e-06, "loss": 0.3052, "step": 27620 }, { "epoch": 1.39130872652198, "grad_norm": 2.837380781051585, "learning_rate": 6.482431591591296e-06, "loss": 0.2986, "step": 27630 }, { "epoch": 1.3918122765496752, "grad_norm": 3.609369714750828, "learning_rate": 6.479633506736447e-06, "loss": 0.2467, "step": 27640 }, { "epoch": 1.3923158265773705, "grad_norm": 3.8326767463792764, "learning_rate": 6.476834913936856e-06, "loss": 0.3178, "step": 27650 }, { "epoch": 1.3928193766050656, "grad_norm": 3.3791252095344757, "learning_rate": 6.474035814153252e-06, "loss": 0.3253, "step": 27660 }, { "epoch": 1.393322926632761, "grad_norm": 3.90753677537265, "learning_rate": 6.471236208346543e-06, "loss": 0.3179, "step": 27670 }, { "epoch": 1.3938264766604562, "grad_norm": 3.1267893611767597, "learning_rate": 6.468436097477806e-06, "loss": 0.2309, "step": 27680 }, { "epoch": 1.3943300266881515, "grad_norm": 4.141499938471435, "learning_rate": 6.4656354825082945e-06, "loss": 0.3011, "step": 27690 }, { "epoch": 1.3948335767158468, "grad_norm": 3.361228436875656, "learning_rate": 6.462834364399435e-06, "loss": 0.3473, "step": 27700 }, { "epoch": 1.395337126743542, "grad_norm": 4.573724671230489, "learning_rate": 6.460032744112823e-06, "loss": 0.2736, "step": 27710 }, { "epoch": 1.3958406767712372, "grad_norm": 4.8840541944420695, "learning_rate": 6.457230622610233e-06, "loss": 0.2935, "step": 27720 }, { "epoch": 1.3963442267989326, "grad_norm": 19.717116389065122, "learning_rate": 6.454428000853606e-06, "loss": 0.3439, "step": 27730 }, { "epoch": 1.3968477768266276, "grad_norm": 4.320160128801883, "learning_rate": 6.451624879805058e-06, "loss": 0.3799, "step": 27740 }, { "epoch": 1.397351326854323, "grad_norm": 3.038543010517683, "learning_rate": 6.44882126042687e-06, "loss": 0.2931, "step": 27750 }, { "epoch": 1.3978548768820183, "grad_norm": 3.978385795359883, "learning_rate": 6.4460171436815064e-06, "loss": 0.315, "step": 27760 }, { "epoch": 1.3983584269097133, "grad_norm": 6.893429075667047, "learning_rate": 6.443212530531591e-06, "loss": 0.3045, "step": 27770 }, { "epoch": 1.3988619769374087, "grad_norm": 2.8571054176091963, "learning_rate": 6.440407421939922e-06, "loss": 0.3102, "step": 27780 }, { "epoch": 1.399365526965104, "grad_norm": 3.7600129062879315, "learning_rate": 6.437601818869468e-06, "loss": 0.3427, "step": 27790 }, { "epoch": 1.3998690769927993, "grad_norm": 3.5556298752045916, "learning_rate": 6.434795722283368e-06, "loss": 0.3116, "step": 27800 }, { "epoch": 1.4003726270204946, "grad_norm": 5.045149984014081, "learning_rate": 6.431989133144927e-06, "loss": 0.2702, "step": 27810 }, { "epoch": 1.4008761770481897, "grad_norm": 2.883412737882893, "learning_rate": 6.429182052417625e-06, "loss": 0.2847, "step": 27820 }, { "epoch": 1.401379727075885, "grad_norm": 3.622994339567862, "learning_rate": 6.4263744810651056e-06, "loss": 0.2596, "step": 27830 }, { "epoch": 1.4018832771035803, "grad_norm": 2.3988571165814307, "learning_rate": 6.4235664200511815e-06, "loss": 0.3564, "step": 27840 }, { "epoch": 1.4023868271312754, "grad_norm": 3.3674620469117165, "learning_rate": 6.420757870339836e-06, "loss": 0.2783, "step": 27850 }, { "epoch": 1.4028903771589707, "grad_norm": 3.4731073766975804, "learning_rate": 6.417948832895219e-06, "loss": 0.3525, "step": 27860 }, { "epoch": 1.403393927186666, "grad_norm": 3.848610245664179, "learning_rate": 6.415139308681646e-06, "loss": 0.3114, "step": 27870 }, { "epoch": 1.4038974772143613, "grad_norm": 3.457616400397194, "learning_rate": 6.412329298663602e-06, "loss": 0.3324, "step": 27880 }, { "epoch": 1.4044010272420566, "grad_norm": 3.807231472077361, "learning_rate": 6.409518803805739e-06, "loss": 0.3416, "step": 27890 }, { "epoch": 1.4049045772697517, "grad_norm": 4.509375270170479, "learning_rate": 6.4067078250728735e-06, "loss": 0.2916, "step": 27900 }, { "epoch": 1.405408127297447, "grad_norm": 4.048024174710451, "learning_rate": 6.403896363429986e-06, "loss": 0.2795, "step": 27910 }, { "epoch": 1.4059116773251423, "grad_norm": 4.603914125285747, "learning_rate": 6.401084419842231e-06, "loss": 0.3509, "step": 27920 }, { "epoch": 1.4064152273528374, "grad_norm": 3.056024639048296, "learning_rate": 6.398271995274919e-06, "loss": 0.2891, "step": 27930 }, { "epoch": 1.4069187773805327, "grad_norm": 2.888303330328863, "learning_rate": 6.395459090693533e-06, "loss": 0.2848, "step": 27940 }, { "epoch": 1.407422327408228, "grad_norm": 3.0746535100706285, "learning_rate": 6.392645707063716e-06, "loss": 0.2766, "step": 27950 }, { "epoch": 1.4079258774359231, "grad_norm": 4.24264983900441, "learning_rate": 6.389831845351275e-06, "loss": 0.2829, "step": 27960 }, { "epoch": 1.4084294274636184, "grad_norm": 2.7745858724949866, "learning_rate": 6.387017506522187e-06, "loss": 0.3043, "step": 27970 }, { "epoch": 1.4089329774913137, "grad_norm": 3.8445236411734083, "learning_rate": 6.3842026915425885e-06, "loss": 0.2764, "step": 27980 }, { "epoch": 1.409436527519009, "grad_norm": 3.6957521968929776, "learning_rate": 6.381387401378777e-06, "loss": 0.2935, "step": 27990 }, { "epoch": 1.4099400775467044, "grad_norm": 4.219977441507719, "learning_rate": 6.378571636997218e-06, "loss": 0.2901, "step": 28000 }, { "epoch": 1.4104436275743994, "grad_norm": 4.560470003160688, "learning_rate": 6.375755399364538e-06, "loss": 0.2989, "step": 28010 }, { "epoch": 1.4109471776020948, "grad_norm": 5.037184049285643, "learning_rate": 6.3729386894475236e-06, "loss": 0.3097, "step": 28020 }, { "epoch": 1.41145072762979, "grad_norm": 3.9246494951127704, "learning_rate": 6.3701215082131295e-06, "loss": 0.3177, "step": 28030 }, { "epoch": 1.4119542776574852, "grad_norm": 3.954098820467192, "learning_rate": 6.367303856628465e-06, "loss": 0.2636, "step": 28040 }, { "epoch": 1.4124578276851805, "grad_norm": 4.0868110966445945, "learning_rate": 6.364485735660807e-06, "loss": 0.3111, "step": 28050 }, { "epoch": 1.4129613777128758, "grad_norm": 3.286675351398396, "learning_rate": 6.361667146277588e-06, "loss": 0.2966, "step": 28060 }, { "epoch": 1.413464927740571, "grad_norm": 4.5093209641299, "learning_rate": 6.358848089446408e-06, "loss": 0.3216, "step": 28070 }, { "epoch": 1.4139684777682664, "grad_norm": 2.7708533673470748, "learning_rate": 6.356028566135018e-06, "loss": 0.2824, "step": 28080 }, { "epoch": 1.4144720277959615, "grad_norm": 3.5130940376409834, "learning_rate": 6.35320857731134e-06, "loss": 0.2766, "step": 28090 }, { "epoch": 1.4149755778236568, "grad_norm": 2.1747871782374415, "learning_rate": 6.350388123943447e-06, "loss": 0.2873, "step": 28100 }, { "epoch": 1.415479127851352, "grad_norm": 2.846477820047703, "learning_rate": 6.34756720699958e-06, "loss": 0.2812, "step": 28110 }, { "epoch": 1.4159826778790472, "grad_norm": 3.7533971425040913, "learning_rate": 6.344745827448127e-06, "loss": 0.3128, "step": 28120 }, { "epoch": 1.4164862279067425, "grad_norm": 3.3604332598076008, "learning_rate": 6.341923986257648e-06, "loss": 0.3229, "step": 28130 }, { "epoch": 1.4169897779344378, "grad_norm": 4.053625186457295, "learning_rate": 6.3391016843968525e-06, "loss": 0.3029, "step": 28140 }, { "epoch": 1.4174933279621331, "grad_norm": 3.1399654608619665, "learning_rate": 6.336278922834612e-06, "loss": 0.302, "step": 28150 }, { "epoch": 1.4179968779898282, "grad_norm": 3.5593820116385784, "learning_rate": 6.333455702539953e-06, "loss": 0.2864, "step": 28160 }, { "epoch": 1.4185004280175235, "grad_norm": 3.9530392149445475, "learning_rate": 6.330632024482065e-06, "loss": 0.343, "step": 28170 }, { "epoch": 1.4190039780452188, "grad_norm": 4.994534120284078, "learning_rate": 6.327807889630289e-06, "loss": 0.3191, "step": 28180 }, { "epoch": 1.4195075280729141, "grad_norm": 4.403951432927447, "learning_rate": 6.324983298954124e-06, "loss": 0.3832, "step": 28190 }, { "epoch": 1.4200110781006092, "grad_norm": 4.447919479870438, "learning_rate": 6.322158253423227e-06, "loss": 0.3176, "step": 28200 }, { "epoch": 1.4205146281283045, "grad_norm": 3.358504754122989, "learning_rate": 6.319332754007412e-06, "loss": 0.3486, "step": 28210 }, { "epoch": 1.4210181781559998, "grad_norm": 2.7085349234925786, "learning_rate": 6.3165068016766455e-06, "loss": 0.3451, "step": 28220 }, { "epoch": 1.421521728183695, "grad_norm": 3.5048321001851126, "learning_rate": 6.313680397401052e-06, "loss": 0.2912, "step": 28230 }, { "epoch": 1.4220252782113902, "grad_norm": 3.41667137418146, "learning_rate": 6.31085354215091e-06, "loss": 0.3102, "step": 28240 }, { "epoch": 1.4225288282390856, "grad_norm": 2.9020106985061473, "learning_rate": 6.308026236896654e-06, "loss": 0.3022, "step": 28250 }, { "epoch": 1.4230323782667809, "grad_norm": 3.317510723588134, "learning_rate": 6.30519848260887e-06, "loss": 0.2719, "step": 28260 }, { "epoch": 1.4235359282944762, "grad_norm": 3.846183972963548, "learning_rate": 6.302370280258302e-06, "loss": 0.2586, "step": 28270 }, { "epoch": 1.4240394783221713, "grad_norm": 4.305330062725501, "learning_rate": 6.299541630815848e-06, "loss": 0.3252, "step": 28280 }, { "epoch": 1.4245430283498666, "grad_norm": 3.7560541413131614, "learning_rate": 6.296712535252555e-06, "loss": 0.2636, "step": 28290 }, { "epoch": 1.4250465783775619, "grad_norm": 3.0857671179098514, "learning_rate": 6.293882994539626e-06, "loss": 0.3286, "step": 28300 }, { "epoch": 1.425550128405257, "grad_norm": 3.89784700967892, "learning_rate": 6.291053009648418e-06, "loss": 0.3338, "step": 28310 }, { "epoch": 1.4260536784329523, "grad_norm": 4.5713085338037525, "learning_rate": 6.288222581550438e-06, "loss": 0.2923, "step": 28320 }, { "epoch": 1.4265572284606476, "grad_norm": 4.212727758544674, "learning_rate": 6.285391711217347e-06, "loss": 0.3179, "step": 28330 }, { "epoch": 1.427060778488343, "grad_norm": 4.683915948782142, "learning_rate": 6.282560399620956e-06, "loss": 0.3211, "step": 28340 }, { "epoch": 1.4275643285160382, "grad_norm": 4.025512700357127, "learning_rate": 6.279728647733231e-06, "loss": 0.2819, "step": 28350 }, { "epoch": 1.4280678785437333, "grad_norm": 2.941034270351989, "learning_rate": 6.2768964565262834e-06, "loss": 0.3193, "step": 28360 }, { "epoch": 1.4285714285714286, "grad_norm": 4.932106413831223, "learning_rate": 6.27406382697238e-06, "loss": 0.2708, "step": 28370 }, { "epoch": 1.429074978599124, "grad_norm": 4.461916546871861, "learning_rate": 6.271230760043938e-06, "loss": 0.3593, "step": 28380 }, { "epoch": 1.429578528626819, "grad_norm": 4.500596023742651, "learning_rate": 6.268397256713522e-06, "loss": 0.3025, "step": 28390 }, { "epoch": 1.4300820786545143, "grad_norm": 3.583997370860703, "learning_rate": 6.2655633179538485e-06, "loss": 0.3345, "step": 28400 }, { "epoch": 1.4305856286822096, "grad_norm": 3.9432456167273795, "learning_rate": 6.262728944737784e-06, "loss": 0.3, "step": 28410 }, { "epoch": 1.4310891787099047, "grad_norm": 4.7226000896920475, "learning_rate": 6.259894138038342e-06, "loss": 0.3024, "step": 28420 }, { "epoch": 1.4315927287376, "grad_norm": 3.5926582369155255, "learning_rate": 6.257058898828685e-06, "loss": 0.2941, "step": 28430 }, { "epoch": 1.4320962787652953, "grad_norm": 4.308769446583816, "learning_rate": 6.254223228082126e-06, "loss": 0.3115, "step": 28440 }, { "epoch": 1.4325998287929906, "grad_norm": 2.693232705671848, "learning_rate": 6.251387126772126e-06, "loss": 0.2602, "step": 28450 }, { "epoch": 1.433103378820686, "grad_norm": 4.450525922177874, "learning_rate": 6.24855059587229e-06, "loss": 0.3592, "step": 28460 }, { "epoch": 1.433606928848381, "grad_norm": 3.1947001550520366, "learning_rate": 6.245713636356376e-06, "loss": 0.2964, "step": 28470 }, { "epoch": 1.4341104788760763, "grad_norm": 2.726645823933083, "learning_rate": 6.242876249198285e-06, "loss": 0.2596, "step": 28480 }, { "epoch": 1.4346140289037717, "grad_norm": 3.707429804615271, "learning_rate": 6.240038435372068e-06, "loss": 0.2808, "step": 28490 }, { "epoch": 1.4351175789314667, "grad_norm": 4.099678371438592, "learning_rate": 6.237200195851918e-06, "loss": 0.3196, "step": 28500 }, { "epoch": 1.435621128959162, "grad_norm": 3.34949034947126, "learning_rate": 6.234361531612179e-06, "loss": 0.336, "step": 28510 }, { "epoch": 1.4361246789868574, "grad_norm": 4.725027656227982, "learning_rate": 6.231522443627337e-06, "loss": 0.2768, "step": 28520 }, { "epoch": 1.4366282290145527, "grad_norm": 5.121930071811916, "learning_rate": 6.228682932872025e-06, "loss": 0.3014, "step": 28530 }, { "epoch": 1.437131779042248, "grad_norm": 3.004684296409958, "learning_rate": 6.2258430003210245e-06, "loss": 0.3125, "step": 28540 }, { "epoch": 1.437635329069943, "grad_norm": 5.259914331351875, "learning_rate": 6.2230026469492555e-06, "loss": 0.3129, "step": 28550 }, { "epoch": 1.4381388790976384, "grad_norm": 4.890729385258488, "learning_rate": 6.220161873731786e-06, "loss": 0.3391, "step": 28560 }, { "epoch": 1.4386424291253337, "grad_norm": 4.021794676857036, "learning_rate": 6.217320681643828e-06, "loss": 0.3713, "step": 28570 }, { "epoch": 1.4391459791530288, "grad_norm": 3.8919421052041883, "learning_rate": 6.214479071660736e-06, "loss": 0.3092, "step": 28580 }, { "epoch": 1.439649529180724, "grad_norm": 3.912978241614037, "learning_rate": 6.211637044758012e-06, "loss": 0.3403, "step": 28590 }, { "epoch": 1.4401530792084194, "grad_norm": 3.639826579558629, "learning_rate": 6.208794601911295e-06, "loss": 0.2865, "step": 28600 }, { "epoch": 1.4406566292361145, "grad_norm": 3.1456925152203046, "learning_rate": 6.205951744096369e-06, "loss": 0.2827, "step": 28610 }, { "epoch": 1.4411601792638098, "grad_norm": 2.854596928575976, "learning_rate": 6.2031084722891645e-06, "loss": 0.2812, "step": 28620 }, { "epoch": 1.441663729291505, "grad_norm": 4.540404915845113, "learning_rate": 6.200264787465749e-06, "loss": 0.2934, "step": 28630 }, { "epoch": 1.4421672793192004, "grad_norm": 3.093609103472885, "learning_rate": 6.197420690602332e-06, "loss": 0.294, "step": 28640 }, { "epoch": 1.4426708293468957, "grad_norm": 3.130936820681925, "learning_rate": 6.19457618267527e-06, "loss": 0.3309, "step": 28650 }, { "epoch": 1.4431743793745908, "grad_norm": 3.25512170324195, "learning_rate": 6.191731264661055e-06, "loss": 0.298, "step": 28660 }, { "epoch": 1.4436779294022861, "grad_norm": 3.076597089795293, "learning_rate": 6.188885937536322e-06, "loss": 0.3432, "step": 28670 }, { "epoch": 1.4441814794299814, "grad_norm": 3.950754328267733, "learning_rate": 6.186040202277842e-06, "loss": 0.2652, "step": 28680 }, { "epoch": 1.4446850294576765, "grad_norm": 3.4804502762575127, "learning_rate": 6.183194059862537e-06, "loss": 0.291, "step": 28690 }, { "epoch": 1.4451885794853718, "grad_norm": 4.520512249929671, "learning_rate": 6.180347511267456e-06, "loss": 0.3253, "step": 28700 }, { "epoch": 1.4456921295130671, "grad_norm": 3.7946974656305574, "learning_rate": 6.177500557469796e-06, "loss": 0.2846, "step": 28710 }, { "epoch": 1.4461956795407624, "grad_norm": 4.217717795429432, "learning_rate": 6.174653199446888e-06, "loss": 0.3172, "step": 28720 }, { "epoch": 1.4466992295684578, "grad_norm": 3.1572598328250003, "learning_rate": 6.1718054381762095e-06, "loss": 0.285, "step": 28730 }, { "epoch": 1.4472027795961528, "grad_norm": 3.8250565196468758, "learning_rate": 6.168957274635366e-06, "loss": 0.3077, "step": 28740 }, { "epoch": 1.4477063296238482, "grad_norm": 4.391314123875808, "learning_rate": 6.166108709802107e-06, "loss": 0.2981, "step": 28750 }, { "epoch": 1.4482098796515435, "grad_norm": 3.133700887164687, "learning_rate": 6.16325974465432e-06, "loss": 0.3251, "step": 28760 }, { "epoch": 1.4487134296792386, "grad_norm": 3.519743294810279, "learning_rate": 6.160410380170029e-06, "loss": 0.2722, "step": 28770 }, { "epoch": 1.4492169797069339, "grad_norm": 3.7730308593168465, "learning_rate": 6.1575606173273925e-06, "loss": 0.322, "step": 28780 }, { "epoch": 1.4497205297346292, "grad_norm": 3.3237849179200505, "learning_rate": 6.154710457104713e-06, "loss": 0.3156, "step": 28790 }, { "epoch": 1.4502240797623243, "grad_norm": 3.923993671874384, "learning_rate": 6.15185990048042e-06, "loss": 0.3297, "step": 28800 }, { "epoch": 1.4507276297900196, "grad_norm": 3.542447298701915, "learning_rate": 6.149008948433086e-06, "loss": 0.2681, "step": 28810 }, { "epoch": 1.4512311798177149, "grad_norm": 4.659793261495263, "learning_rate": 6.146157601941417e-06, "loss": 0.2855, "step": 28820 }, { "epoch": 1.4517347298454102, "grad_norm": 3.858669181508099, "learning_rate": 6.143305861984253e-06, "loss": 0.3058, "step": 28830 }, { "epoch": 1.4522382798731055, "grad_norm": 3.8963423923192817, "learning_rate": 6.140453729540571e-06, "loss": 0.2732, "step": 28840 }, { "epoch": 1.4527418299008006, "grad_norm": 5.727112722912702, "learning_rate": 6.137601205589483e-06, "loss": 0.3463, "step": 28850 }, { "epoch": 1.453245379928496, "grad_norm": 4.118447360510199, "learning_rate": 6.134748291110234e-06, "loss": 0.2889, "step": 28860 }, { "epoch": 1.4537489299561912, "grad_norm": 2.5342003090576006, "learning_rate": 6.131894987082203e-06, "loss": 0.2868, "step": 28870 }, { "epoch": 1.4542524799838863, "grad_norm": 4.684440484027327, "learning_rate": 6.1290412944849045e-06, "loss": 0.3618, "step": 28880 }, { "epoch": 1.4547560300115816, "grad_norm": 4.184906837148051, "learning_rate": 6.126187214297983e-06, "loss": 0.3372, "step": 28890 }, { "epoch": 1.455259580039277, "grad_norm": 1.6136369967836903, "learning_rate": 6.123332747501223e-06, "loss": 0.2599, "step": 28900 }, { "epoch": 1.4557631300669722, "grad_norm": 3.437311249367683, "learning_rate": 6.1204778950745305e-06, "loss": 0.2497, "step": 28910 }, { "epoch": 1.4562666800946675, "grad_norm": 5.049296505622157, "learning_rate": 6.117622657997956e-06, "loss": 0.3064, "step": 28920 }, { "epoch": 1.4567702301223626, "grad_norm": 4.385254558294588, "learning_rate": 6.114767037251675e-06, "loss": 0.3322, "step": 28930 }, { "epoch": 1.457273780150058, "grad_norm": 3.594923479701713, "learning_rate": 6.111911033815994e-06, "loss": 0.3013, "step": 28940 }, { "epoch": 1.4577773301777532, "grad_norm": 2.715111576470561, "learning_rate": 6.109054648671353e-06, "loss": 0.2756, "step": 28950 }, { "epoch": 1.4582808802054483, "grad_norm": 3.737637834413121, "learning_rate": 6.106197882798327e-06, "loss": 0.3484, "step": 28960 }, { "epoch": 1.4587844302331436, "grad_norm": 4.522171436209408, "learning_rate": 6.1033407371776155e-06, "loss": 0.2673, "step": 28970 }, { "epoch": 1.459287980260839, "grad_norm": 2.7209162819466934, "learning_rate": 6.10048321279005e-06, "loss": 0.3229, "step": 28980 }, { "epoch": 1.459791530288534, "grad_norm": 3.7555560724827743, "learning_rate": 6.097625310616591e-06, "loss": 0.3119, "step": 28990 }, { "epoch": 1.4602950803162293, "grad_norm": 3.6213193156121184, "learning_rate": 6.094767031638337e-06, "loss": 0.2842, "step": 29000 }, { "epoch": 1.4607986303439247, "grad_norm": 4.297995342105557, "learning_rate": 6.091908376836502e-06, "loss": 0.3012, "step": 29010 }, { "epoch": 1.46130218037162, "grad_norm": 3.1905463473057107, "learning_rate": 6.089049347192439e-06, "loss": 0.2487, "step": 29020 }, { "epoch": 1.4618057303993153, "grad_norm": 4.574182785569621, "learning_rate": 6.086189943687626e-06, "loss": 0.3078, "step": 29030 }, { "epoch": 1.4623092804270104, "grad_norm": 4.204762722026066, "learning_rate": 6.083330167303674e-06, "loss": 0.3273, "step": 29040 }, { "epoch": 1.4628128304547057, "grad_norm": 2.832717362944739, "learning_rate": 6.080470019022313e-06, "loss": 0.2942, "step": 29050 }, { "epoch": 1.463316380482401, "grad_norm": 3.747061085413123, "learning_rate": 6.07760949982541e-06, "loss": 0.2822, "step": 29060 }, { "epoch": 1.463819930510096, "grad_norm": 3.6554515041150752, "learning_rate": 6.074748610694953e-06, "loss": 0.2548, "step": 29070 }, { "epoch": 1.4643234805377914, "grad_norm": 3.619075670474566, "learning_rate": 6.07188735261306e-06, "loss": 0.2861, "step": 29080 }, { "epoch": 1.4648270305654867, "grad_norm": 3.8138107418624454, "learning_rate": 6.0690257265619735e-06, "loss": 0.2995, "step": 29090 }, { "epoch": 1.465330580593182, "grad_norm": 3.0771438201994568, "learning_rate": 6.066163733524066e-06, "loss": 0.3066, "step": 29100 }, { "epoch": 1.4658341306208773, "grad_norm": 4.666237997088634, "learning_rate": 6.063301374481832e-06, "loss": 0.2992, "step": 29110 }, { "epoch": 1.4663376806485724, "grad_norm": 4.049762839028336, "learning_rate": 6.060438650417895e-06, "loss": 0.3376, "step": 29120 }, { "epoch": 1.4668412306762677, "grad_norm": 3.7024276777660643, "learning_rate": 6.057575562315001e-06, "loss": 0.2984, "step": 29130 }, { "epoch": 1.467344780703963, "grad_norm": 3.6521188109897653, "learning_rate": 6.054712111156024e-06, "loss": 0.2678, "step": 29140 }, { "epoch": 1.467848330731658, "grad_norm": 3.684244387289537, "learning_rate": 6.051848297923957e-06, "loss": 0.3184, "step": 29150 }, { "epoch": 1.4683518807593534, "grad_norm": 3.027596383487331, "learning_rate": 6.048984123601923e-06, "loss": 0.3046, "step": 29160 }, { "epoch": 1.4688554307870487, "grad_norm": 4.603757496499363, "learning_rate": 6.0461195891731685e-06, "loss": 0.3404, "step": 29170 }, { "epoch": 1.4693589808147438, "grad_norm": 3.8388271749535066, "learning_rate": 6.0432546956210605e-06, "loss": 0.2659, "step": 29180 }, { "epoch": 1.4698625308424391, "grad_norm": 3.509503449683088, "learning_rate": 6.040389443929091e-06, "loss": 0.2631, "step": 29190 }, { "epoch": 1.4703660808701344, "grad_norm": 3.643071072673201, "learning_rate": 6.037523835080874e-06, "loss": 0.2643, "step": 29200 }, { "epoch": 1.4708696308978297, "grad_norm": 3.29965235353002, "learning_rate": 6.034657870060151e-06, "loss": 0.3059, "step": 29210 }, { "epoch": 1.471373180925525, "grad_norm": 3.9583795408191196, "learning_rate": 6.031791549850777e-06, "loss": 0.3232, "step": 29220 }, { "epoch": 1.4718767309532201, "grad_norm": 3.6540987088105354, "learning_rate": 6.028924875436737e-06, "loss": 0.2545, "step": 29230 }, { "epoch": 1.4723802809809154, "grad_norm": 3.043417192873067, "learning_rate": 6.0260578478021324e-06, "loss": 0.2921, "step": 29240 }, { "epoch": 1.4728838310086108, "grad_norm": 3.0272861689325152, "learning_rate": 6.023190467931188e-06, "loss": 0.3026, "step": 29250 }, { "epoch": 1.4733873810363058, "grad_norm": 3.695574858477805, "learning_rate": 6.020322736808248e-06, "loss": 0.2875, "step": 29260 }, { "epoch": 1.4738909310640012, "grad_norm": 4.264688091803442, "learning_rate": 6.017454655417783e-06, "loss": 0.3488, "step": 29270 }, { "epoch": 1.4743944810916965, "grad_norm": 4.501314398090253, "learning_rate": 6.014586224744374e-06, "loss": 0.3064, "step": 29280 }, { "epoch": 1.4748980311193918, "grad_norm": 3.3960557454235243, "learning_rate": 6.011717445772731e-06, "loss": 0.3123, "step": 29290 }, { "epoch": 1.475401581147087, "grad_norm": 3.6940890304243417, "learning_rate": 6.008848319487678e-06, "loss": 0.2992, "step": 29300 }, { "epoch": 1.4759051311747822, "grad_norm": 4.113254618689733, "learning_rate": 6.005978846874161e-06, "loss": 0.3378, "step": 29310 }, { "epoch": 1.4764086812024775, "grad_norm": 4.1007573950321445, "learning_rate": 6.003109028917241e-06, "loss": 0.3009, "step": 29320 }, { "epoch": 1.4769122312301728, "grad_norm": 4.26983440508492, "learning_rate": 6.000238866602104e-06, "loss": 0.3102, "step": 29330 }, { "epoch": 1.4774157812578679, "grad_norm": 2.7617791272293193, "learning_rate": 5.997368360914048e-06, "loss": 0.3162, "step": 29340 }, { "epoch": 1.4779193312855632, "grad_norm": 3.744479015024899, "learning_rate": 5.994497512838495e-06, "loss": 0.3274, "step": 29350 }, { "epoch": 1.4784228813132585, "grad_norm": 2.9881277107746635, "learning_rate": 5.991626323360976e-06, "loss": 0.3376, "step": 29360 }, { "epoch": 1.4789264313409538, "grad_norm": 2.9065964160584494, "learning_rate": 5.98875479346715e-06, "loss": 0.2902, "step": 29370 }, { "epoch": 1.479429981368649, "grad_norm": 3.871325655905098, "learning_rate": 5.985882924142783e-06, "loss": 0.3148, "step": 29380 }, { "epoch": 1.4799335313963442, "grad_norm": 3.7569738046268237, "learning_rate": 5.983010716373763e-06, "loss": 0.2848, "step": 29390 }, { "epoch": 1.4804370814240395, "grad_norm": 2.7476325791442258, "learning_rate": 5.9801381711460905e-06, "loss": 0.2608, "step": 29400 }, { "epoch": 1.4809406314517348, "grad_norm": 3.7190179295142323, "learning_rate": 5.977265289445889e-06, "loss": 0.2977, "step": 29410 }, { "epoch": 1.48144418147943, "grad_norm": 3.94313051661991, "learning_rate": 5.974392072259389e-06, "loss": 0.3571, "step": 29420 }, { "epoch": 1.4819477315071252, "grad_norm": 4.116927978713113, "learning_rate": 5.971518520572942e-06, "loss": 0.3271, "step": 29430 }, { "epoch": 1.4824512815348205, "grad_norm": 3.313194918081749, "learning_rate": 5.968644635373011e-06, "loss": 0.2577, "step": 29440 }, { "epoch": 1.4829548315625156, "grad_norm": 3.8411850461414425, "learning_rate": 5.9657704176461755e-06, "loss": 0.3032, "step": 29450 }, { "epoch": 1.483458381590211, "grad_norm": 4.521598316523058, "learning_rate": 5.962895868379126e-06, "loss": 0.2737, "step": 29460 }, { "epoch": 1.4839619316179062, "grad_norm": 5.042567127606195, "learning_rate": 5.96002098855867e-06, "loss": 0.3567, "step": 29470 }, { "epoch": 1.4844654816456015, "grad_norm": 3.8830723896677175, "learning_rate": 5.95714577917173e-06, "loss": 0.2422, "step": 29480 }, { "epoch": 1.4849690316732969, "grad_norm": 4.413115285170891, "learning_rate": 5.954270241205338e-06, "loss": 0.3239, "step": 29490 }, { "epoch": 1.485472581700992, "grad_norm": 4.216536927103525, "learning_rate": 5.951394375646639e-06, "loss": 0.307, "step": 29500 }, { "epoch": 1.4859761317286873, "grad_norm": 4.606747590989987, "learning_rate": 5.948518183482893e-06, "loss": 0.3114, "step": 29510 }, { "epoch": 1.4864796817563826, "grad_norm": 4.442530587592485, "learning_rate": 5.945641665701471e-06, "loss": 0.3361, "step": 29520 }, { "epoch": 1.4869832317840777, "grad_norm": 2.0662097145506775, "learning_rate": 5.942764823289853e-06, "loss": 0.2823, "step": 29530 }, { "epoch": 1.487486781811773, "grad_norm": 2.4617859181442046, "learning_rate": 5.939887657235634e-06, "loss": 0.2462, "step": 29540 }, { "epoch": 1.4879903318394683, "grad_norm": 3.8128143752417403, "learning_rate": 5.937010168526519e-06, "loss": 0.2995, "step": 29550 }, { "epoch": 1.4884938818671636, "grad_norm": 5.273961796457555, "learning_rate": 5.934132358150325e-06, "loss": 0.3578, "step": 29560 }, { "epoch": 1.488997431894859, "grad_norm": 2.544250182576635, "learning_rate": 5.931254227094976e-06, "loss": 0.2683, "step": 29570 }, { "epoch": 1.489500981922554, "grad_norm": 2.842450465728315, "learning_rate": 5.928375776348511e-06, "loss": 0.2875, "step": 29580 }, { "epoch": 1.4900045319502493, "grad_norm": 3.4252767422881796, "learning_rate": 5.925497006899074e-06, "loss": 0.2887, "step": 29590 }, { "epoch": 1.4905080819779446, "grad_norm": 3.6196301866905456, "learning_rate": 5.92261791973492e-06, "loss": 0.2639, "step": 29600 }, { "epoch": 1.4910116320056397, "grad_norm": 3.976243008504522, "learning_rate": 5.919738515844413e-06, "loss": 0.3302, "step": 29610 }, { "epoch": 1.491515182033335, "grad_norm": 4.552768117953444, "learning_rate": 5.916858796216031e-06, "loss": 0.2938, "step": 29620 }, { "epoch": 1.4920187320610303, "grad_norm": 4.026920796707537, "learning_rate": 5.913978761838348e-06, "loss": 0.2621, "step": 29630 }, { "epoch": 1.4925222820887254, "grad_norm": 3.4624646559322443, "learning_rate": 5.91109841370006e-06, "loss": 0.324, "step": 29640 }, { "epoch": 1.4930258321164207, "grad_norm": 3.83541246235187, "learning_rate": 5.908217752789962e-06, "loss": 0.2982, "step": 29650 }, { "epoch": 1.493529382144116, "grad_norm": 4.779090134647459, "learning_rate": 5.90533678009696e-06, "loss": 0.2672, "step": 29660 }, { "epoch": 1.4940329321718113, "grad_norm": 3.7413803379101047, "learning_rate": 5.902455496610062e-06, "loss": 0.3025, "step": 29670 }, { "epoch": 1.4945364821995066, "grad_norm": 4.136731138526986, "learning_rate": 5.89957390331839e-06, "loss": 0.305, "step": 29680 }, { "epoch": 1.4950400322272017, "grad_norm": 3.180038692241235, "learning_rate": 5.896692001211169e-06, "loss": 0.2537, "step": 29690 }, { "epoch": 1.495543582254897, "grad_norm": 3.795801367911769, "learning_rate": 5.893809791277727e-06, "loss": 0.3092, "step": 29700 }, { "epoch": 1.4960471322825923, "grad_norm": 3.979597125849306, "learning_rate": 5.890927274507503e-06, "loss": 0.3211, "step": 29710 }, { "epoch": 1.4965506823102874, "grad_norm": 3.122560740452159, "learning_rate": 5.888044451890036e-06, "loss": 0.2855, "step": 29720 }, { "epoch": 1.4970542323379827, "grad_norm": 3.2134216706966057, "learning_rate": 5.8851613244149766e-06, "loss": 0.2517, "step": 29730 }, { "epoch": 1.497557782365678, "grad_norm": 2.6526314931510346, "learning_rate": 5.882277893072075e-06, "loss": 0.2813, "step": 29740 }, { "epoch": 1.4980613323933734, "grad_norm": 3.765595757256919, "learning_rate": 5.879394158851185e-06, "loss": 0.327, "step": 29750 }, { "epoch": 1.4985648824210687, "grad_norm": 3.983507241443811, "learning_rate": 5.876510122742268e-06, "loss": 0.317, "step": 29760 }, { "epoch": 1.4990684324487638, "grad_norm": 3.2892390441131694, "learning_rate": 5.873625785735389e-06, "loss": 0.2912, "step": 29770 }, { "epoch": 1.499571982476459, "grad_norm": 3.9860376631370613, "learning_rate": 5.87074114882071e-06, "loss": 0.2647, "step": 29780 }, { "epoch": 1.5000755325041544, "grad_norm": 4.043772297073386, "learning_rate": 5.867856212988505e-06, "loss": 0.278, "step": 29790 }, { "epoch": 1.5005790825318495, "grad_norm": 4.023142801132038, "learning_rate": 5.864970979229144e-06, "loss": 0.3155, "step": 29800 }, { "epoch": 1.5010826325595448, "grad_norm": 3.5077384384223578, "learning_rate": 5.862085448533104e-06, "loss": 0.2726, "step": 29810 }, { "epoch": 1.50158618258724, "grad_norm": 3.735444493493266, "learning_rate": 5.8591996218909575e-06, "loss": 0.3277, "step": 29820 }, { "epoch": 1.5020897326149352, "grad_norm": 2.9588472224104194, "learning_rate": 5.856313500293387e-06, "loss": 0.2817, "step": 29830 }, { "epoch": 1.5025932826426307, "grad_norm": 4.49148850100315, "learning_rate": 5.853427084731168e-06, "loss": 0.2816, "step": 29840 }, { "epoch": 1.5030968326703258, "grad_norm": 4.563193177008002, "learning_rate": 5.850540376195184e-06, "loss": 0.3913, "step": 29850 }, { "epoch": 1.503600382698021, "grad_norm": 3.46058746970458, "learning_rate": 5.847653375676414e-06, "loss": 0.2616, "step": 29860 }, { "epoch": 1.5041039327257164, "grad_norm": 3.7893471406982133, "learning_rate": 5.8447660841659386e-06, "loss": 0.306, "step": 29870 }, { "epoch": 1.5046074827534115, "grad_norm": 5.448568181233486, "learning_rate": 5.841878502654939e-06, "loss": 0.3349, "step": 29880 }, { "epoch": 1.5051110327811068, "grad_norm": 2.7571240055705784, "learning_rate": 5.838990632134698e-06, "loss": 0.2533, "step": 29890 }, { "epoch": 1.5056145828088021, "grad_norm": 3.922232208162377, "learning_rate": 5.836102473596592e-06, "loss": 0.3101, "step": 29900 }, { "epoch": 1.5061181328364972, "grad_norm": 4.152040453100821, "learning_rate": 5.833214028032102e-06, "loss": 0.3435, "step": 29910 }, { "epoch": 1.5066216828641925, "grad_norm": 3.8134940913293884, "learning_rate": 5.830325296432803e-06, "loss": 0.3338, "step": 29920 }, { "epoch": 1.5071252328918878, "grad_norm": 3.9566989152423573, "learning_rate": 5.8274362797903735e-06, "loss": 0.3109, "step": 29930 }, { "epoch": 1.507628782919583, "grad_norm": 3.9241753102594816, "learning_rate": 5.824546979096582e-06, "loss": 0.3412, "step": 29940 }, { "epoch": 1.5081323329472784, "grad_norm": 4.390403654088484, "learning_rate": 5.821657395343304e-06, "loss": 0.288, "step": 29950 }, { "epoch": 1.5086358829749735, "grad_norm": 3.9226506127600733, "learning_rate": 5.8187675295225045e-06, "loss": 0.2932, "step": 29960 }, { "epoch": 1.5091394330026688, "grad_norm": 3.3616145522846, "learning_rate": 5.81587738262625e-06, "loss": 0.2515, "step": 29970 }, { "epoch": 1.5096429830303641, "grad_norm": 2.3665774289502783, "learning_rate": 5.812986955646699e-06, "loss": 0.2972, "step": 29980 }, { "epoch": 1.5101465330580592, "grad_norm": 3.5111655971850992, "learning_rate": 5.810096249576112e-06, "loss": 0.3372, "step": 29990 }, { "epoch": 1.5106500830857545, "grad_norm": 3.7612121071814872, "learning_rate": 5.80720526540684e-06, "loss": 0.3175, "step": 30000 }, { "epoch": 1.5111536331134499, "grad_norm": 3.714571216739201, "learning_rate": 5.8043140041313325e-06, "loss": 0.2843, "step": 30010 }, { "epoch": 1.511657183141145, "grad_norm": 3.177877848211641, "learning_rate": 5.801422466742133e-06, "loss": 0.2467, "step": 30020 }, { "epoch": 1.5121607331688405, "grad_norm": 5.353378849886052, "learning_rate": 5.798530654231878e-06, "loss": 0.3365, "step": 30030 }, { "epoch": 1.5126642831965356, "grad_norm": 3.2206172345988278, "learning_rate": 5.795638567593305e-06, "loss": 0.2523, "step": 30040 }, { "epoch": 1.5131678332242309, "grad_norm": 3.4168221271567796, "learning_rate": 5.792746207819238e-06, "loss": 0.2802, "step": 30050 }, { "epoch": 1.5136713832519262, "grad_norm": 3.3584220254905888, "learning_rate": 5.7898535759025975e-06, "loss": 0.2678, "step": 30060 }, { "epoch": 1.5141749332796213, "grad_norm": 2.578958395475349, "learning_rate": 5.786960672836399e-06, "loss": 0.2861, "step": 30070 }, { "epoch": 1.5146784833073166, "grad_norm": 4.414545998788012, "learning_rate": 5.784067499613749e-06, "loss": 0.327, "step": 30080 }, { "epoch": 1.515182033335012, "grad_norm": 3.7103509722004766, "learning_rate": 5.781174057227848e-06, "loss": 0.2653, "step": 30090 }, { "epoch": 1.515685583362707, "grad_norm": 4.161206585320679, "learning_rate": 5.778280346671989e-06, "loss": 0.2631, "step": 30100 }, { "epoch": 1.5161891333904023, "grad_norm": 3.488684078805748, "learning_rate": 5.775386368939556e-06, "loss": 0.3642, "step": 30110 }, { "epoch": 1.5166926834180976, "grad_norm": 2.1345793927557355, "learning_rate": 5.772492125024025e-06, "loss": 0.2355, "step": 30120 }, { "epoch": 1.5171962334457927, "grad_norm": 3.6874967023338376, "learning_rate": 5.769597615918964e-06, "loss": 0.308, "step": 30130 }, { "epoch": 1.5176997834734882, "grad_norm": 3.252252023746854, "learning_rate": 5.766702842618033e-06, "loss": 0.256, "step": 30140 }, { "epoch": 1.5182033335011833, "grad_norm": 4.673208773207425, "learning_rate": 5.763807806114978e-06, "loss": 0.3288, "step": 30150 }, { "epoch": 1.5187068835288786, "grad_norm": 3.5548934506822514, "learning_rate": 5.7609125074036425e-06, "loss": 0.3185, "step": 30160 }, { "epoch": 1.519210433556574, "grad_norm": 3.0684005976820807, "learning_rate": 5.758016947477955e-06, "loss": 0.298, "step": 30170 }, { "epoch": 1.519713983584269, "grad_norm": 10.123779679710266, "learning_rate": 5.755121127331933e-06, "loss": 0.3005, "step": 30180 }, { "epoch": 1.5202175336119643, "grad_norm": 3.842192969661612, "learning_rate": 5.752225047959686e-06, "loss": 0.2945, "step": 30190 }, { "epoch": 1.5207210836396596, "grad_norm": 4.130751354317966, "learning_rate": 5.749328710355414e-06, "loss": 0.2752, "step": 30200 }, { "epoch": 1.5212246336673547, "grad_norm": 3.7495875116618533, "learning_rate": 5.746432115513401e-06, "loss": 0.2974, "step": 30210 }, { "epoch": 1.5217281836950503, "grad_norm": 4.219894071861173, "learning_rate": 5.743535264428024e-06, "loss": 0.2468, "step": 30220 }, { "epoch": 1.5222317337227453, "grad_norm": 4.744951647905398, "learning_rate": 5.740638158093742e-06, "loss": 0.2985, "step": 30230 }, { "epoch": 1.5227352837504406, "grad_norm": 3.240780116433571, "learning_rate": 5.73774079750511e-06, "loss": 0.3016, "step": 30240 }, { "epoch": 1.523238833778136, "grad_norm": 4.44372922669623, "learning_rate": 5.734843183656761e-06, "loss": 0.3006, "step": 30250 }, { "epoch": 1.523742383805831, "grad_norm": 3.0549483459135027, "learning_rate": 5.7319453175434225e-06, "loss": 0.2909, "step": 30260 }, { "epoch": 1.5242459338335264, "grad_norm": 3.8329793154697462, "learning_rate": 5.729047200159905e-06, "loss": 0.3052, "step": 30270 }, { "epoch": 1.5247494838612217, "grad_norm": 3.6235870323997075, "learning_rate": 5.726148832501106e-06, "loss": 0.2838, "step": 30280 }, { "epoch": 1.5252530338889168, "grad_norm": 3.8269409643663312, "learning_rate": 5.723250215562007e-06, "loss": 0.3481, "step": 30290 }, { "epoch": 1.525756583916612, "grad_norm": 3.531692992724791, "learning_rate": 5.720351350337678e-06, "loss": 0.3051, "step": 30300 }, { "epoch": 1.5262601339443074, "grad_norm": 3.7532837913583132, "learning_rate": 5.7174522378232755e-06, "loss": 0.2992, "step": 30310 }, { "epoch": 1.5267636839720025, "grad_norm": 4.734712103595816, "learning_rate": 5.714552879014035e-06, "loss": 0.3126, "step": 30320 }, { "epoch": 1.527267233999698, "grad_norm": 4.070136631995312, "learning_rate": 5.711653274905282e-06, "loss": 0.2515, "step": 30330 }, { "epoch": 1.527770784027393, "grad_norm": 4.085067024999472, "learning_rate": 5.708753426492422e-06, "loss": 0.2875, "step": 30340 }, { "epoch": 1.5282743340550884, "grad_norm": 3.8240135307878593, "learning_rate": 5.705853334770951e-06, "loss": 0.2992, "step": 30350 }, { "epoch": 1.5287778840827837, "grad_norm": 4.18481472332677, "learning_rate": 5.7029530007364384e-06, "loss": 0.3904, "step": 30360 }, { "epoch": 1.5292814341104788, "grad_norm": 4.199334416677623, "learning_rate": 5.700052425384548e-06, "loss": 0.3229, "step": 30370 }, { "epoch": 1.529784984138174, "grad_norm": 4.080203214874504, "learning_rate": 5.697151609711017e-06, "loss": 0.2898, "step": 30380 }, { "epoch": 1.5302885341658694, "grad_norm": 3.690244913997835, "learning_rate": 5.694250554711671e-06, "loss": 0.3617, "step": 30390 }, { "epoch": 1.5307920841935645, "grad_norm": 4.242819415905098, "learning_rate": 5.691349261382415e-06, "loss": 0.3288, "step": 30400 }, { "epoch": 1.53129563422126, "grad_norm": 4.320534258531322, "learning_rate": 5.688447730719239e-06, "loss": 0.2543, "step": 30410 }, { "epoch": 1.5317991842489551, "grad_norm": 2.935504583318846, "learning_rate": 5.685545963718208e-06, "loss": 0.2888, "step": 30420 }, { "epoch": 1.5323027342766504, "grad_norm": 3.241377519569252, "learning_rate": 5.682643961375475e-06, "loss": 0.3069, "step": 30430 }, { "epoch": 1.5328062843043457, "grad_norm": 4.430147589468046, "learning_rate": 5.67974172468727e-06, "loss": 0.3323, "step": 30440 }, { "epoch": 1.5333098343320408, "grad_norm": 3.7047554815405483, "learning_rate": 5.676839254649907e-06, "loss": 0.3215, "step": 30450 }, { "epoch": 1.5338133843597361, "grad_norm": 3.9525596002228616, "learning_rate": 5.673936552259774e-06, "loss": 0.2504, "step": 30460 }, { "epoch": 1.5343169343874314, "grad_norm": 4.161019334127642, "learning_rate": 5.6710336185133445e-06, "loss": 0.3428, "step": 30470 }, { "epoch": 1.5348204844151265, "grad_norm": 4.795940832272832, "learning_rate": 5.668130454407168e-06, "loss": 0.3026, "step": 30480 }, { "epoch": 1.5353240344428218, "grad_norm": 3.4161062323953737, "learning_rate": 5.6652270609378754e-06, "loss": 0.2697, "step": 30490 }, { "epoch": 1.5358275844705171, "grad_norm": 4.051254155985797, "learning_rate": 5.662323439102174e-06, "loss": 0.3047, "step": 30500 }, { "epoch": 1.5363311344982122, "grad_norm": 3.793071499061478, "learning_rate": 5.659419589896854e-06, "loss": 0.3778, "step": 30510 }, { "epoch": 1.5368346845259078, "grad_norm": 4.0989961638919254, "learning_rate": 5.656515514318778e-06, "loss": 0.2935, "step": 30520 }, { "epoch": 1.5373382345536029, "grad_norm": 2.97576602208688, "learning_rate": 5.653611213364888e-06, "loss": 0.3291, "step": 30530 }, { "epoch": 1.5378417845812982, "grad_norm": 4.823233296314914, "learning_rate": 5.650706688032205e-06, "loss": 0.298, "step": 30540 }, { "epoch": 1.5383453346089935, "grad_norm": 2.873012971848975, "learning_rate": 5.647801939317827e-06, "loss": 0.3211, "step": 30550 }, { "epoch": 1.5388488846366886, "grad_norm": 3.632777639685845, "learning_rate": 5.644896968218927e-06, "loss": 0.3147, "step": 30560 }, { "epoch": 1.5393524346643839, "grad_norm": 3.245436564815796, "learning_rate": 5.641991775732756e-06, "loss": 0.2943, "step": 30570 }, { "epoch": 1.5398559846920792, "grad_norm": 3.2467847201540296, "learning_rate": 5.6390863628566385e-06, "loss": 0.2755, "step": 30580 }, { "epoch": 1.5403595347197743, "grad_norm": 3.933172202054104, "learning_rate": 5.636180730587978e-06, "loss": 0.274, "step": 30590 }, { "epoch": 1.5408630847474698, "grad_norm": 4.038663468553192, "learning_rate": 5.633274879924251e-06, "loss": 0.272, "step": 30600 }, { "epoch": 1.541366634775165, "grad_norm": 3.646648829148728, "learning_rate": 5.63036881186301e-06, "loss": 0.2601, "step": 30610 }, { "epoch": 1.5418701848028602, "grad_norm": 4.616011069349801, "learning_rate": 5.627462527401882e-06, "loss": 0.3179, "step": 30620 }, { "epoch": 1.5423737348305555, "grad_norm": 2.884586635116102, "learning_rate": 5.624556027538566e-06, "loss": 0.2728, "step": 30630 }, { "epoch": 1.5428772848582506, "grad_norm": 4.027331630359802, "learning_rate": 5.62164931327084e-06, "loss": 0.3238, "step": 30640 }, { "epoch": 1.543380834885946, "grad_norm": 3.219623200709874, "learning_rate": 5.618742385596551e-06, "loss": 0.3178, "step": 30650 }, { "epoch": 1.5438843849136412, "grad_norm": 3.4704951969547655, "learning_rate": 5.61583524551362e-06, "loss": 0.3228, "step": 30660 }, { "epoch": 1.5443879349413363, "grad_norm": 3.463191616589382, "learning_rate": 5.612927894020042e-06, "loss": 0.2892, "step": 30670 }, { "epoch": 1.5448914849690318, "grad_norm": 3.7312108222205693, "learning_rate": 5.610020332113888e-06, "loss": 0.3536, "step": 30680 }, { "epoch": 1.545395034996727, "grad_norm": 2.0891975210567564, "learning_rate": 5.607112560793293e-06, "loss": 0.2525, "step": 30690 }, { "epoch": 1.545898585024422, "grad_norm": 2.7712536683427724, "learning_rate": 5.60420458105647e-06, "loss": 0.2864, "step": 30700 }, { "epoch": 1.5464021350521175, "grad_norm": 2.2821267642628458, "learning_rate": 5.601296393901703e-06, "loss": 0.2138, "step": 30710 }, { "epoch": 1.5469056850798126, "grad_norm": 4.533578299439686, "learning_rate": 5.598388000327347e-06, "loss": 0.2809, "step": 30720 }, { "epoch": 1.547409235107508, "grad_norm": 4.168655211708476, "learning_rate": 5.595479401331823e-06, "loss": 0.2945, "step": 30730 }, { "epoch": 1.5479127851352033, "grad_norm": 3.3497624383128914, "learning_rate": 5.5925705979136305e-06, "loss": 0.2548, "step": 30740 }, { "epoch": 1.5484163351628983, "grad_norm": 3.84796373610443, "learning_rate": 5.589661591071332e-06, "loss": 0.3346, "step": 30750 }, { "epoch": 1.5489198851905936, "grad_norm": 4.095337465643764, "learning_rate": 5.5867523818035695e-06, "loss": 0.2684, "step": 30760 }, { "epoch": 1.549423435218289, "grad_norm": 5.14317730446733, "learning_rate": 5.58384297110904e-06, "loss": 0.2669, "step": 30770 }, { "epoch": 1.549926985245984, "grad_norm": 3.2968247322041315, "learning_rate": 5.580933359986524e-06, "loss": 0.2793, "step": 30780 }, { "epoch": 1.5504305352736796, "grad_norm": 2.9064966500667495, "learning_rate": 5.57802354943486e-06, "loss": 0.3078, "step": 30790 }, { "epoch": 1.5509340853013747, "grad_norm": 4.796394306451247, "learning_rate": 5.575113540452963e-06, "loss": 0.2681, "step": 30800 }, { "epoch": 1.55143763532907, "grad_norm": 4.216335786483377, "learning_rate": 5.572203334039811e-06, "loss": 0.3122, "step": 30810 }, { "epoch": 1.5519411853567653, "grad_norm": 3.1263528794297675, "learning_rate": 5.569292931194451e-06, "loss": 0.302, "step": 30820 }, { "epoch": 1.5524447353844604, "grad_norm": 3.4100886611675394, "learning_rate": 5.566382332916e-06, "loss": 0.2454, "step": 30830 }, { "epoch": 1.5529482854121557, "grad_norm": 3.548952000910308, "learning_rate": 5.563471540203638e-06, "loss": 0.2582, "step": 30840 }, { "epoch": 1.553451835439851, "grad_norm": 3.8660387138587917, "learning_rate": 5.560560554056614e-06, "loss": 0.2711, "step": 30850 }, { "epoch": 1.553955385467546, "grad_norm": 4.099001504322411, "learning_rate": 5.557649375474244e-06, "loss": 0.2911, "step": 30860 }, { "epoch": 1.5544589354952416, "grad_norm": 2.7058821070635126, "learning_rate": 5.554738005455906e-06, "loss": 0.3133, "step": 30870 }, { "epoch": 1.5549624855229367, "grad_norm": 3.8771810560669593, "learning_rate": 5.551826445001052e-06, "loss": 0.2764, "step": 30880 }, { "epoch": 1.555466035550632, "grad_norm": 4.0351027673763715, "learning_rate": 5.548914695109191e-06, "loss": 0.2784, "step": 30890 }, { "epoch": 1.5559695855783273, "grad_norm": 4.132350705200247, "learning_rate": 5.5460027567799015e-06, "loss": 0.3244, "step": 30900 }, { "epoch": 1.5564731356060224, "grad_norm": 3.334188353080005, "learning_rate": 5.543090631012824e-06, "loss": 0.314, "step": 30910 }, { "epoch": 1.5569766856337177, "grad_norm": 4.170604279199568, "learning_rate": 5.540178318807665e-06, "loss": 0.3137, "step": 30920 }, { "epoch": 1.557480235661413, "grad_norm": 4.305703461602351, "learning_rate": 5.537265821164197e-06, "loss": 0.2529, "step": 30930 }, { "epoch": 1.5579837856891081, "grad_norm": 4.71205698102661, "learning_rate": 5.534353139082252e-06, "loss": 0.2811, "step": 30940 }, { "epoch": 1.5584873357168034, "grad_norm": 4.455055397535545, "learning_rate": 5.531440273561729e-06, "loss": 0.2755, "step": 30950 }, { "epoch": 1.5589908857444987, "grad_norm": 4.325430829406704, "learning_rate": 5.528527225602586e-06, "loss": 0.3119, "step": 30960 }, { "epoch": 1.5594944357721938, "grad_norm": 3.7631172649493103, "learning_rate": 5.525613996204847e-06, "loss": 0.2766, "step": 30970 }, { "epoch": 1.5599979857998894, "grad_norm": 3.7748938779521355, "learning_rate": 5.5227005863685964e-06, "loss": 0.2784, "step": 30980 }, { "epoch": 1.5605015358275844, "grad_norm": 4.646344500454016, "learning_rate": 5.519786997093984e-06, "loss": 0.2688, "step": 30990 }, { "epoch": 1.5610050858552798, "grad_norm": 3.6137011961421837, "learning_rate": 5.5168732293812155e-06, "loss": 0.3242, "step": 31000 }, { "epoch": 1.561508635882975, "grad_norm": 4.193707445460522, "learning_rate": 5.513959284230563e-06, "loss": 0.3297, "step": 31010 }, { "epoch": 1.5620121859106701, "grad_norm": 4.152706192322288, "learning_rate": 5.5110451626423555e-06, "loss": 0.2975, "step": 31020 }, { "epoch": 1.5625157359383655, "grad_norm": 5.024455773188647, "learning_rate": 5.5081308656169876e-06, "loss": 0.3056, "step": 31030 }, { "epoch": 1.5630192859660608, "grad_norm": 3.9315910884938554, "learning_rate": 5.505216394154906e-06, "loss": 0.3098, "step": 31040 }, { "epoch": 1.5635228359937559, "grad_norm": 3.3858365292582384, "learning_rate": 5.5023017492566265e-06, "loss": 0.3115, "step": 31050 }, { "epoch": 1.5640263860214514, "grad_norm": 4.070111832474517, "learning_rate": 5.499386931922717e-06, "loss": 0.2766, "step": 31060 }, { "epoch": 1.5645299360491465, "grad_norm": 2.883099614542977, "learning_rate": 5.496471943153814e-06, "loss": 0.2983, "step": 31070 }, { "epoch": 1.5650334860768418, "grad_norm": 4.174309104222459, "learning_rate": 5.493556783950599e-06, "loss": 0.2634, "step": 31080 }, { "epoch": 1.565537036104537, "grad_norm": 3.3687434060624533, "learning_rate": 5.490641455313823e-06, "loss": 0.2418, "step": 31090 }, { "epoch": 1.5660405861322322, "grad_norm": 2.8971602350875636, "learning_rate": 5.487725958244293e-06, "loss": 0.2511, "step": 31100 }, { "epoch": 1.5665441361599275, "grad_norm": 3.7724728550772926, "learning_rate": 5.484810293742871e-06, "loss": 0.2597, "step": 31110 }, { "epoch": 1.5670476861876228, "grad_norm": 5.279270362565412, "learning_rate": 5.481894462810479e-06, "loss": 0.286, "step": 31120 }, { "epoch": 1.567551236215318, "grad_norm": 3.2764056608574545, "learning_rate": 5.478978466448095e-06, "loss": 0.3121, "step": 31130 }, { "epoch": 1.5680547862430132, "grad_norm": 3.179307262602381, "learning_rate": 5.4760623056567545e-06, "loss": 0.2852, "step": 31140 }, { "epoch": 1.5685583362707085, "grad_norm": 3.1397092982421912, "learning_rate": 5.4731459814375476e-06, "loss": 0.3135, "step": 31150 }, { "epoch": 1.5690618862984036, "grad_norm": 4.090076032226045, "learning_rate": 5.470229494791622e-06, "loss": 0.2316, "step": 31160 }, { "epoch": 1.5695654363260991, "grad_norm": 5.364664578218689, "learning_rate": 5.467312846720184e-06, "loss": 0.2912, "step": 31170 }, { "epoch": 1.5700689863537942, "grad_norm": 2.914233752657139, "learning_rate": 5.464396038224489e-06, "loss": 0.2758, "step": 31180 }, { "epoch": 1.5705725363814895, "grad_norm": 2.868409308397618, "learning_rate": 5.461479070305853e-06, "loss": 0.254, "step": 31190 }, { "epoch": 1.5710760864091848, "grad_norm": 2.900552802523254, "learning_rate": 5.458561943965643e-06, "loss": 0.2772, "step": 31200 }, { "epoch": 1.57157963643688, "grad_norm": 2.9201446000495928, "learning_rate": 5.455644660205284e-06, "loss": 0.2796, "step": 31210 }, { "epoch": 1.5720831864645752, "grad_norm": 3.9981691384653595, "learning_rate": 5.452727220026251e-06, "loss": 0.2905, "step": 31220 }, { "epoch": 1.5725867364922705, "grad_norm": 4.5979880992639846, "learning_rate": 5.4498096244300734e-06, "loss": 0.2876, "step": 31230 }, { "epoch": 1.5730902865199656, "grad_norm": 3.4511807760000215, "learning_rate": 5.44689187441834e-06, "loss": 0.2904, "step": 31240 }, { "epoch": 1.5735938365476612, "grad_norm": 2.639152668583416, "learning_rate": 5.443973970992685e-06, "loss": 0.2431, "step": 31250 }, { "epoch": 1.5740973865753562, "grad_norm": 2.878088520698497, "learning_rate": 5.441055915154797e-06, "loss": 0.3163, "step": 31260 }, { "epoch": 1.5746009366030516, "grad_norm": 2.268121938134334, "learning_rate": 5.43813770790642e-06, "loss": 0.2878, "step": 31270 }, { "epoch": 1.5751044866307469, "grad_norm": 3.811590161259703, "learning_rate": 5.435219350249349e-06, "loss": 0.3253, "step": 31280 }, { "epoch": 1.575608036658442, "grad_norm": 3.470714419545421, "learning_rate": 5.432300843185425e-06, "loss": 0.2707, "step": 31290 }, { "epoch": 1.5761115866861373, "grad_norm": 3.486304824305347, "learning_rate": 5.429382187716551e-06, "loss": 0.2802, "step": 31300 }, { "epoch": 1.5766151367138326, "grad_norm": 3.0330053116767157, "learning_rate": 5.42646338484467e-06, "loss": 0.3281, "step": 31310 }, { "epoch": 1.5771186867415277, "grad_norm": 3.236116042910395, "learning_rate": 5.423544435571784e-06, "loss": 0.2673, "step": 31320 }, { "epoch": 1.577622236769223, "grad_norm": 3.8931417199155844, "learning_rate": 5.420625340899939e-06, "loss": 0.2617, "step": 31330 }, { "epoch": 1.5781257867969183, "grad_norm": 3.65098344697575, "learning_rate": 5.417706101831237e-06, "loss": 0.3131, "step": 31340 }, { "epoch": 1.5786293368246134, "grad_norm": 2.5597467419192657, "learning_rate": 5.414786719367822e-06, "loss": 0.2851, "step": 31350 }, { "epoch": 1.579132886852309, "grad_norm": 2.225306195835779, "learning_rate": 5.411867194511895e-06, "loss": 0.2921, "step": 31360 }, { "epoch": 1.579636436880004, "grad_norm": 3.693565034540425, "learning_rate": 5.4089475282657e-06, "loss": 0.321, "step": 31370 }, { "epoch": 1.5801399869076993, "grad_norm": 3.996553758531123, "learning_rate": 5.406027721631535e-06, "loss": 0.2475, "step": 31380 }, { "epoch": 1.5806435369353946, "grad_norm": 4.158564205035365, "learning_rate": 5.403107775611739e-06, "loss": 0.2595, "step": 31390 }, { "epoch": 1.5811470869630897, "grad_norm": 3.6559336550175314, "learning_rate": 5.400187691208707e-06, "loss": 0.2729, "step": 31400 }, { "epoch": 1.581650636990785, "grad_norm": 3.714827780149409, "learning_rate": 5.397267469424874e-06, "loss": 0.3127, "step": 31410 }, { "epoch": 1.5821541870184803, "grad_norm": 2.938288120164897, "learning_rate": 5.394347111262728e-06, "loss": 0.2735, "step": 31420 }, { "epoch": 1.5826577370461754, "grad_norm": 3.6177522553841306, "learning_rate": 5.3914266177248e-06, "loss": 0.306, "step": 31430 }, { "epoch": 1.583161287073871, "grad_norm": 3.446130393067769, "learning_rate": 5.388505989813669e-06, "loss": 0.2838, "step": 31440 }, { "epoch": 1.583664837101566, "grad_norm": 3.4441362188769915, "learning_rate": 5.385585228531961e-06, "loss": 0.2569, "step": 31450 }, { "epoch": 1.5841683871292613, "grad_norm": 4.190665075398184, "learning_rate": 5.382664334882347e-06, "loss": 0.2679, "step": 31460 }, { "epoch": 1.5846719371569566, "grad_norm": 4.285087734296159, "learning_rate": 5.379743309867542e-06, "loss": 0.2899, "step": 31470 }, { "epoch": 1.5851754871846517, "grad_norm": 4.436600214978002, "learning_rate": 5.376822154490308e-06, "loss": 0.2596, "step": 31480 }, { "epoch": 1.585679037212347, "grad_norm": 4.020011003913946, "learning_rate": 5.37390086975345e-06, "loss": 0.299, "step": 31490 }, { "epoch": 1.5861825872400424, "grad_norm": 3.169955757052248, "learning_rate": 5.370979456659821e-06, "loss": 0.2645, "step": 31500 }, { "epoch": 1.5866861372677374, "grad_norm": 4.208022800275371, "learning_rate": 5.368057916212314e-06, "loss": 0.2795, "step": 31510 }, { "epoch": 1.5871896872954327, "grad_norm": 3.1276213126259953, "learning_rate": 5.365136249413867e-06, "loss": 0.297, "step": 31520 }, { "epoch": 1.587693237323128, "grad_norm": 3.176219441107989, "learning_rate": 5.362214457267462e-06, "loss": 0.2745, "step": 31530 }, { "epoch": 1.5881967873508231, "grad_norm": 3.7230756895483497, "learning_rate": 5.359292540776123e-06, "loss": 0.3078, "step": 31540 }, { "epoch": 1.5887003373785187, "grad_norm": 3.995699893319162, "learning_rate": 5.356370500942919e-06, "loss": 0.2657, "step": 31550 }, { "epoch": 1.5892038874062138, "grad_norm": 4.164666708893669, "learning_rate": 5.353448338770959e-06, "loss": 0.3167, "step": 31560 }, { "epoch": 1.589707437433909, "grad_norm": 4.617485435984801, "learning_rate": 5.350526055263395e-06, "loss": 0.2945, "step": 31570 }, { "epoch": 1.5902109874616044, "grad_norm": 4.755229959500301, "learning_rate": 5.34760365142342e-06, "loss": 0.2914, "step": 31580 }, { "epoch": 1.5907145374892995, "grad_norm": 3.774595593720854, "learning_rate": 5.344681128254269e-06, "loss": 0.2506, "step": 31590 }, { "epoch": 1.5912180875169948, "grad_norm": 4.0193768740043, "learning_rate": 5.341758486759216e-06, "loss": 0.3336, "step": 31600 }, { "epoch": 1.59172163754469, "grad_norm": 4.223501714048607, "learning_rate": 5.338835727941579e-06, "loss": 0.3224, "step": 31610 }, { "epoch": 1.5922251875723852, "grad_norm": 4.181144946766752, "learning_rate": 5.335912852804716e-06, "loss": 0.3167, "step": 31620 }, { "epoch": 1.5927287376000807, "grad_norm": 3.8051618860413456, "learning_rate": 5.3329898623520196e-06, "loss": 0.3068, "step": 31630 }, { "epoch": 1.5932322876277758, "grad_norm": 2.227390872842114, "learning_rate": 5.330066757586928e-06, "loss": 0.2596, "step": 31640 }, { "epoch": 1.593735837655471, "grad_norm": 3.3903469007643734, "learning_rate": 5.327143539512919e-06, "loss": 0.2491, "step": 31650 }, { "epoch": 1.5942393876831664, "grad_norm": 3.3342122696915197, "learning_rate": 5.324220209133501e-06, "loss": 0.3193, "step": 31660 }, { "epoch": 1.5947429377108615, "grad_norm": 3.3008909977471563, "learning_rate": 5.321296767452232e-06, "loss": 0.2972, "step": 31670 }, { "epoch": 1.5952464877385568, "grad_norm": 2.8734357309603316, "learning_rate": 5.318373215472701e-06, "loss": 0.2412, "step": 31680 }, { "epoch": 1.5957500377662521, "grad_norm": 3.5524450549112045, "learning_rate": 5.315449554198538e-06, "loss": 0.2891, "step": 31690 }, { "epoch": 1.5962535877939472, "grad_norm": 4.167002663969948, "learning_rate": 5.312525784633405e-06, "loss": 0.3447, "step": 31700 }, { "epoch": 1.5967571378216425, "grad_norm": 4.034743832621884, "learning_rate": 5.309601907781011e-06, "loss": 0.2491, "step": 31710 }, { "epoch": 1.5972606878493378, "grad_norm": 3.8887732227101615, "learning_rate": 5.306677924645095e-06, "loss": 0.2768, "step": 31720 }, { "epoch": 1.597764237877033, "grad_norm": 3.774038078681839, "learning_rate": 5.303753836229431e-06, "loss": 0.2954, "step": 31730 }, { "epoch": 1.5982677879047285, "grad_norm": 2.787859989160211, "learning_rate": 5.300829643537835e-06, "loss": 0.3195, "step": 31740 }, { "epoch": 1.5987713379324235, "grad_norm": 5.041269987568089, "learning_rate": 5.297905347574155e-06, "loss": 0.3111, "step": 31750 }, { "epoch": 1.5992748879601189, "grad_norm": 4.633279671127817, "learning_rate": 5.2949809493422745e-06, "loss": 0.3471, "step": 31760 }, { "epoch": 1.5997784379878142, "grad_norm": 4.35159788409814, "learning_rate": 5.2920564498461135e-06, "loss": 0.2825, "step": 31770 }, { "epoch": 1.6002819880155092, "grad_norm": 3.980695555870856, "learning_rate": 5.289131850089627e-06, "loss": 0.3151, "step": 31780 }, { "epoch": 1.6007855380432046, "grad_norm": 3.521063607886436, "learning_rate": 5.286207151076802e-06, "loss": 0.2886, "step": 31790 }, { "epoch": 1.6012890880708999, "grad_norm": 3.020787267931635, "learning_rate": 5.2832823538116606e-06, "loss": 0.3072, "step": 31800 }, { "epoch": 1.601792638098595, "grad_norm": 3.5219432276549023, "learning_rate": 5.280357459298261e-06, "loss": 0.3081, "step": 31810 }, { "epoch": 1.6022961881262905, "grad_norm": 3.4158743481869367, "learning_rate": 5.277432468540692e-06, "loss": 0.2855, "step": 31820 }, { "epoch": 1.6027997381539856, "grad_norm": 4.216820846767757, "learning_rate": 5.274507382543077e-06, "loss": 0.3066, "step": 31830 }, { "epoch": 1.6033032881816809, "grad_norm": 3.338240558850918, "learning_rate": 5.271582202309571e-06, "loss": 0.3066, "step": 31840 }, { "epoch": 1.6038068382093762, "grad_norm": 4.161401780807707, "learning_rate": 5.26865692884436e-06, "loss": 0.2842, "step": 31850 }, { "epoch": 1.6043103882370713, "grad_norm": 4.33228642465826, "learning_rate": 5.2657315631516685e-06, "loss": 0.2744, "step": 31860 }, { "epoch": 1.6048139382647666, "grad_norm": 3.795719728812397, "learning_rate": 5.262806106235742e-06, "loss": 0.3326, "step": 31870 }, { "epoch": 1.605317488292462, "grad_norm": 3.297793033019915, "learning_rate": 5.259880559100867e-06, "loss": 0.3069, "step": 31880 }, { "epoch": 1.605821038320157, "grad_norm": 3.660442878181688, "learning_rate": 5.256954922751358e-06, "loss": 0.3281, "step": 31890 }, { "epoch": 1.6063245883478525, "grad_norm": 4.2111189890489875, "learning_rate": 5.254029198191556e-06, "loss": 0.289, "step": 31900 }, { "epoch": 1.6068281383755476, "grad_norm": 3.9610875610553586, "learning_rate": 5.251103386425838e-06, "loss": 0.3209, "step": 31910 }, { "epoch": 1.6073316884032427, "grad_norm": 4.020355106298558, "learning_rate": 5.248177488458609e-06, "loss": 0.3088, "step": 31920 }, { "epoch": 1.6078352384309382, "grad_norm": 3.8517904362345314, "learning_rate": 5.245251505294302e-06, "loss": 0.2816, "step": 31930 }, { "epoch": 1.6083387884586333, "grad_norm": 3.6776569208299263, "learning_rate": 5.242325437937381e-06, "loss": 0.2975, "step": 31940 }, { "epoch": 1.6088423384863286, "grad_norm": 4.2285681984148935, "learning_rate": 5.239399287392337e-06, "loss": 0.2616, "step": 31950 }, { "epoch": 1.609345888514024, "grad_norm": 2.5293040161174583, "learning_rate": 5.236473054663696e-06, "loss": 0.2612, "step": 31960 }, { "epoch": 1.609849438541719, "grad_norm": 3.370592021543729, "learning_rate": 5.2335467407559994e-06, "loss": 0.2899, "step": 31970 }, { "epoch": 1.6103529885694143, "grad_norm": 3.298309585318761, "learning_rate": 5.230620346673829e-06, "loss": 0.3295, "step": 31980 }, { "epoch": 1.6108565385971096, "grad_norm": 3.4827219627691868, "learning_rate": 5.227693873421788e-06, "loss": 0.2922, "step": 31990 }, { "epoch": 1.6113600886248047, "grad_norm": 4.2342361070887495, "learning_rate": 5.224767322004509e-06, "loss": 0.2698, "step": 32000 }, { "epoch": 1.6118636386525003, "grad_norm": 4.4841839335394065, "learning_rate": 5.221840693426649e-06, "loss": 0.2709, "step": 32010 }, { "epoch": 1.6123671886801954, "grad_norm": 3.79912111128003, "learning_rate": 5.2189139886928954e-06, "loss": 0.2816, "step": 32020 }, { "epoch": 1.6128707387078907, "grad_norm": 4.2837393820463445, "learning_rate": 5.215987208807957e-06, "loss": 0.2992, "step": 32030 }, { "epoch": 1.613374288735586, "grad_norm": 4.000703233567678, "learning_rate": 5.213060354776571e-06, "loss": 0.2481, "step": 32040 }, { "epoch": 1.613877838763281, "grad_norm": 4.963460311247981, "learning_rate": 5.210133427603501e-06, "loss": 0.3091, "step": 32050 }, { "epoch": 1.6143813887909764, "grad_norm": 2.225851468710197, "learning_rate": 5.207206428293534e-06, "loss": 0.3527, "step": 32060 }, { "epoch": 1.6148849388186717, "grad_norm": 3.1195839779297687, "learning_rate": 5.204279357851483e-06, "loss": 0.2784, "step": 32070 }, { "epoch": 1.6153884888463668, "grad_norm": 4.7005087807647445, "learning_rate": 5.201352217282183e-06, "loss": 0.3243, "step": 32080 }, { "epoch": 1.6158920388740623, "grad_norm": 4.453899780828226, "learning_rate": 5.198425007590496e-06, "loss": 0.3303, "step": 32090 }, { "epoch": 1.6163955889017574, "grad_norm": 3.1759511917355376, "learning_rate": 5.195497729781306e-06, "loss": 0.2186, "step": 32100 }, { "epoch": 1.6168991389294527, "grad_norm": 2.674709284801814, "learning_rate": 5.19257038485952e-06, "loss": 0.2312, "step": 32110 }, { "epoch": 1.617402688957148, "grad_norm": 4.294506018318587, "learning_rate": 5.189642973830071e-06, "loss": 0.2854, "step": 32120 }, { "epoch": 1.617906238984843, "grad_norm": 3.735172246105332, "learning_rate": 5.186715497697913e-06, "loss": 0.2412, "step": 32130 }, { "epoch": 1.6184097890125384, "grad_norm": 4.692234056466051, "learning_rate": 5.183787957468018e-06, "loss": 0.3281, "step": 32140 }, { "epoch": 1.6189133390402337, "grad_norm": 3.683100187821147, "learning_rate": 5.180860354145389e-06, "loss": 0.2602, "step": 32150 }, { "epoch": 1.6194168890679288, "grad_norm": 4.027708686649672, "learning_rate": 5.177932688735041e-06, "loss": 0.278, "step": 32160 }, { "epoch": 1.619920439095624, "grad_norm": 3.8248527337334135, "learning_rate": 5.17500496224202e-06, "loss": 0.3564, "step": 32170 }, { "epoch": 1.6204239891233194, "grad_norm": 4.203696647040926, "learning_rate": 5.172077175671382e-06, "loss": 0.3231, "step": 32180 }, { "epoch": 1.6209275391510145, "grad_norm": 3.419040976604884, "learning_rate": 5.169149330028214e-06, "loss": 0.2805, "step": 32190 }, { "epoch": 1.62143108917871, "grad_norm": 3.0349283989320863, "learning_rate": 5.166221426317617e-06, "loss": 0.3008, "step": 32200 }, { "epoch": 1.6219346392064051, "grad_norm": 4.22578717242003, "learning_rate": 5.163293465544714e-06, "loss": 0.2623, "step": 32210 }, { "epoch": 1.6224381892341004, "grad_norm": 2.9338912050452515, "learning_rate": 5.160365448714647e-06, "loss": 0.2747, "step": 32220 }, { "epoch": 1.6229417392617957, "grad_norm": 3.366659230104762, "learning_rate": 5.157437376832579e-06, "loss": 0.2996, "step": 32230 }, { "epoch": 1.6234452892894908, "grad_norm": 3.2109046736401003, "learning_rate": 5.154509250903689e-06, "loss": 0.2687, "step": 32240 }, { "epoch": 1.6239488393171861, "grad_norm": 2.4805726391759566, "learning_rate": 5.1515810719331775e-06, "loss": 0.3199, "step": 32250 }, { "epoch": 1.6244523893448815, "grad_norm": 4.5415401796561605, "learning_rate": 5.148652840926258e-06, "loss": 0.2535, "step": 32260 }, { "epoch": 1.6249559393725765, "grad_norm": 2.98306139784828, "learning_rate": 5.145724558888172e-06, "loss": 0.2444, "step": 32270 }, { "epoch": 1.625459489400272, "grad_norm": 3.442738737166425, "learning_rate": 5.142796226824167e-06, "loss": 0.2677, "step": 32280 }, { "epoch": 1.6259630394279672, "grad_norm": 2.998696930992765, "learning_rate": 5.139867845739515e-06, "loss": 0.2446, "step": 32290 }, { "epoch": 1.6264665894556625, "grad_norm": 3.2062033390067937, "learning_rate": 5.1369394166394994e-06, "loss": 0.2249, "step": 32300 }, { "epoch": 1.6269701394833578, "grad_norm": 3.287195095042056, "learning_rate": 5.134010940529429e-06, "loss": 0.3147, "step": 32310 }, { "epoch": 1.6274736895110529, "grad_norm": 3.071218970058455, "learning_rate": 5.131082418414618e-06, "loss": 0.2553, "step": 32320 }, { "epoch": 1.6279772395387482, "grad_norm": 2.93513804363694, "learning_rate": 5.128153851300405e-06, "loss": 0.3228, "step": 32330 }, { "epoch": 1.6284807895664435, "grad_norm": 3.2828149770861366, "learning_rate": 5.125225240192137e-06, "loss": 0.2634, "step": 32340 }, { "epoch": 1.6289843395941386, "grad_norm": 3.2732391996197734, "learning_rate": 5.122296586095184e-06, "loss": 0.2807, "step": 32350 }, { "epoch": 1.6294878896218339, "grad_norm": 3.0921738256431968, "learning_rate": 5.119367890014921e-06, "loss": 0.2979, "step": 32360 }, { "epoch": 1.6299914396495292, "grad_norm": 3.3862369205906995, "learning_rate": 5.116439152956747e-06, "loss": 0.3164, "step": 32370 }, { "epoch": 1.6304949896772243, "grad_norm": 3.3420718432498497, "learning_rate": 5.11351037592607e-06, "loss": 0.328, "step": 32380 }, { "epoch": 1.6309985397049198, "grad_norm": 3.1029946514133178, "learning_rate": 5.110581559928311e-06, "loss": 0.2682, "step": 32390 }, { "epoch": 1.631502089732615, "grad_norm": 4.453341002436182, "learning_rate": 5.1076527059689075e-06, "loss": 0.3701, "step": 32400 }, { "epoch": 1.6320056397603102, "grad_norm": 2.887747552126521, "learning_rate": 5.104723815053307e-06, "loss": 0.2754, "step": 32410 }, { "epoch": 1.6325091897880055, "grad_norm": 2.670927016376241, "learning_rate": 5.101794888186974e-06, "loss": 0.2424, "step": 32420 }, { "epoch": 1.6330127398157006, "grad_norm": 3.3250141761427088, "learning_rate": 5.098865926375377e-06, "loss": 0.2241, "step": 32430 }, { "epoch": 1.633516289843396, "grad_norm": 3.326276806723091, "learning_rate": 5.095936930624008e-06, "loss": 0.3001, "step": 32440 }, { "epoch": 1.6340198398710912, "grad_norm": 4.430032673106127, "learning_rate": 5.093007901938362e-06, "loss": 0.2958, "step": 32450 }, { "epoch": 1.6345233898987863, "grad_norm": 3.1390189645817435, "learning_rate": 5.090078841323947e-06, "loss": 0.2409, "step": 32460 }, { "epoch": 1.6350269399264818, "grad_norm": 4.234030850076254, "learning_rate": 5.0871497497862845e-06, "loss": 0.2676, "step": 32470 }, { "epoch": 1.635530489954177, "grad_norm": 3.6648845725942927, "learning_rate": 5.0842206283309055e-06, "loss": 0.3239, "step": 32480 }, { "epoch": 1.6360340399818722, "grad_norm": 3.597467779278107, "learning_rate": 5.081291477963349e-06, "loss": 0.3074, "step": 32490 }, { "epoch": 1.6365375900095676, "grad_norm": 3.176124387941761, "learning_rate": 5.0783622996891656e-06, "loss": 0.2844, "step": 32500 }, { "epoch": 1.6370411400372626, "grad_norm": 3.4947139736643478, "learning_rate": 5.0754330945139175e-06, "loss": 0.2807, "step": 32510 }, { "epoch": 1.637544690064958, "grad_norm": 4.132167282342506, "learning_rate": 5.072503863443172e-06, "loss": 0.3136, "step": 32520 }, { "epoch": 1.6380482400926533, "grad_norm": 4.083520564129904, "learning_rate": 5.069574607482509e-06, "loss": 0.2551, "step": 32530 }, { "epoch": 1.6385517901203484, "grad_norm": 4.020481052743045, "learning_rate": 5.066645327637516e-06, "loss": 0.2985, "step": 32540 }, { "epoch": 1.6390553401480437, "grad_norm": 3.7970902461982905, "learning_rate": 5.063716024913788e-06, "loss": 0.301, "step": 32550 }, { "epoch": 1.639558890175739, "grad_norm": 3.1324321636583696, "learning_rate": 5.060786700316926e-06, "loss": 0.2678, "step": 32560 }, { "epoch": 1.640062440203434, "grad_norm": 4.112242788002289, "learning_rate": 5.0578573548525425e-06, "loss": 0.2797, "step": 32570 }, { "epoch": 1.6405659902311296, "grad_norm": 4.0077930442746945, "learning_rate": 5.054927989526255e-06, "loss": 0.3361, "step": 32580 }, { "epoch": 1.6410695402588247, "grad_norm": 3.6566808241060222, "learning_rate": 5.051998605343686e-06, "loss": 0.293, "step": 32590 }, { "epoch": 1.64157309028652, "grad_norm": 3.5483549954748916, "learning_rate": 5.04906920331047e-06, "loss": 0.3047, "step": 32600 }, { "epoch": 1.6420766403142153, "grad_norm": 3.7909758530029762, "learning_rate": 5.04613978443224e-06, "loss": 0.3263, "step": 32610 }, { "epoch": 1.6425801903419104, "grad_norm": 3.0991909060707616, "learning_rate": 5.043210349714645e-06, "loss": 0.262, "step": 32620 }, { "epoch": 1.6430837403696057, "grad_norm": 4.945072494715582, "learning_rate": 5.0402809001633245e-06, "loss": 0.3069, "step": 32630 }, { "epoch": 1.643587290397301, "grad_norm": 3.6077290020409376, "learning_rate": 5.037351436783941e-06, "loss": 0.3375, "step": 32640 }, { "epoch": 1.644090840424996, "grad_norm": 3.6030171565206635, "learning_rate": 5.034421960582147e-06, "loss": 0.2852, "step": 32650 }, { "epoch": 1.6445943904526916, "grad_norm": 4.292765494076072, "learning_rate": 5.031492472563606e-06, "loss": 0.3225, "step": 32660 }, { "epoch": 1.6450979404803867, "grad_norm": 4.59369611593486, "learning_rate": 5.028562973733984e-06, "loss": 0.3358, "step": 32670 }, { "epoch": 1.645601490508082, "grad_norm": 4.390155953671751, "learning_rate": 5.025633465098955e-06, "loss": 0.3163, "step": 32680 }, { "epoch": 1.6461050405357773, "grad_norm": 3.7790795583155132, "learning_rate": 5.022703947664189e-06, "loss": 0.2688, "step": 32690 }, { "epoch": 1.6466085905634724, "grad_norm": 4.233760706017616, "learning_rate": 5.019774422435365e-06, "loss": 0.3574, "step": 32700 }, { "epoch": 1.6471121405911677, "grad_norm": 4.227747704584299, "learning_rate": 5.016844890418161e-06, "loss": 0.299, "step": 32710 }, { "epoch": 1.647615690618863, "grad_norm": 2.3090212075209746, "learning_rate": 5.013915352618258e-06, "loss": 0.2846, "step": 32720 }, { "epoch": 1.6481192406465581, "grad_norm": 3.9987771748060124, "learning_rate": 5.010985810041342e-06, "loss": 0.2742, "step": 32730 }, { "epoch": 1.6486227906742534, "grad_norm": 4.460598853871327, "learning_rate": 5.008056263693096e-06, "loss": 0.3168, "step": 32740 }, { "epoch": 1.6491263407019487, "grad_norm": 4.31419240360175, "learning_rate": 5.0051267145792095e-06, "loss": 0.3663, "step": 32750 }, { "epoch": 1.6496298907296438, "grad_norm": 4.037531713379572, "learning_rate": 5.002197163705368e-06, "loss": 0.2821, "step": 32760 }, { "epoch": 1.6501334407573394, "grad_norm": 3.0258383840307648, "learning_rate": 4.99926761207726e-06, "loss": 0.2519, "step": 32770 }, { "epoch": 1.6506369907850345, "grad_norm": 3.492856788529311, "learning_rate": 4.9963380607005745e-06, "loss": 0.2496, "step": 32780 }, { "epoch": 1.6511405408127298, "grad_norm": 3.5451155947743884, "learning_rate": 4.9934085105809995e-06, "loss": 0.3009, "step": 32790 }, { "epoch": 1.651644090840425, "grad_norm": 3.0243778226511897, "learning_rate": 4.990478962724221e-06, "loss": 0.27, "step": 32800 }, { "epoch": 1.6521476408681202, "grad_norm": 3.1537118873265815, "learning_rate": 4.98754941813593e-06, "loss": 0.2657, "step": 32810 }, { "epoch": 1.6526511908958155, "grad_norm": 2.379570220520369, "learning_rate": 4.984619877821809e-06, "loss": 0.2918, "step": 32820 }, { "epoch": 1.6531547409235108, "grad_norm": 3.6652868425947003, "learning_rate": 4.981690342787546e-06, "loss": 0.2994, "step": 32830 }, { "epoch": 1.6536582909512059, "grad_norm": 4.016568955489568, "learning_rate": 4.978760814038823e-06, "loss": 0.2774, "step": 32840 }, { "epoch": 1.6541618409789014, "grad_norm": 5.072665881875107, "learning_rate": 4.975831292581318e-06, "loss": 0.2803, "step": 32850 }, { "epoch": 1.6546653910065965, "grad_norm": 3.103639989962455, "learning_rate": 4.9729017794207095e-06, "loss": 0.3146, "step": 32860 }, { "epoch": 1.6551689410342918, "grad_norm": 3.907182507483946, "learning_rate": 4.9699722755626765e-06, "loss": 0.2337, "step": 32870 }, { "epoch": 1.655672491061987, "grad_norm": 2.8077664691577784, "learning_rate": 4.9670427820128855e-06, "loss": 0.2452, "step": 32880 }, { "epoch": 1.6561760410896822, "grad_norm": 4.328955436276393, "learning_rate": 4.96411329977701e-06, "loss": 0.2729, "step": 32890 }, { "epoch": 1.6566795911173775, "grad_norm": 4.5143761929257815, "learning_rate": 4.961183829860713e-06, "loss": 0.2805, "step": 32900 }, { "epoch": 1.6571831411450728, "grad_norm": 3.606619903511628, "learning_rate": 4.9582543732696555e-06, "loss": 0.2803, "step": 32910 }, { "epoch": 1.657686691172768, "grad_norm": 2.5291103648604767, "learning_rate": 4.955324931009491e-06, "loss": 0.3048, "step": 32920 }, { "epoch": 1.6581902412004632, "grad_norm": 3.3485607526768018, "learning_rate": 4.952395504085872e-06, "loss": 0.265, "step": 32930 }, { "epoch": 1.6586937912281585, "grad_norm": 3.612036022868601, "learning_rate": 4.949466093504443e-06, "loss": 0.2766, "step": 32940 }, { "epoch": 1.6591973412558536, "grad_norm": 4.011400286086611, "learning_rate": 4.946536700270848e-06, "loss": 0.26, "step": 32950 }, { "epoch": 1.6597008912835491, "grad_norm": 3.364021752251953, "learning_rate": 4.943607325390717e-06, "loss": 0.3073, "step": 32960 }, { "epoch": 1.6602044413112442, "grad_norm": 3.7500119278459763, "learning_rate": 4.9406779698696775e-06, "loss": 0.3331, "step": 32970 }, { "epoch": 1.6607079913389395, "grad_norm": 4.110438810236988, "learning_rate": 4.937748634713354e-06, "loss": 0.2739, "step": 32980 }, { "epoch": 1.6612115413666348, "grad_norm": 3.5037619788859447, "learning_rate": 4.934819320927358e-06, "loss": 0.2855, "step": 32990 }, { "epoch": 1.66171509139433, "grad_norm": 2.578584253359916, "learning_rate": 4.931890029517296e-06, "loss": 0.2723, "step": 33000 }, { "epoch": 1.6622186414220252, "grad_norm": 4.492291133018604, "learning_rate": 4.928960761488769e-06, "loss": 0.298, "step": 33010 }, { "epoch": 1.6627221914497206, "grad_norm": 3.629791984931382, "learning_rate": 4.926031517847368e-06, "loss": 0.2689, "step": 33020 }, { "epoch": 1.6632257414774156, "grad_norm": 3.6968845090544415, "learning_rate": 4.923102299598675e-06, "loss": 0.3049, "step": 33030 }, { "epoch": 1.6637292915051112, "grad_norm": 3.0686273120402223, "learning_rate": 4.920173107748264e-06, "loss": 0.2236, "step": 33040 }, { "epoch": 1.6642328415328063, "grad_norm": 3.535517980723317, "learning_rate": 4.9172439433017e-06, "loss": 0.2614, "step": 33050 }, { "epoch": 1.6647363915605016, "grad_norm": 3.553756796933406, "learning_rate": 4.914314807264539e-06, "loss": 0.2658, "step": 33060 }, { "epoch": 1.6652399415881969, "grad_norm": 3.8183516801599526, "learning_rate": 4.911385700642325e-06, "loss": 0.2451, "step": 33070 }, { "epoch": 1.665743491615892, "grad_norm": 3.334593986955417, "learning_rate": 4.908456624440596e-06, "loss": 0.3115, "step": 33080 }, { "epoch": 1.6662470416435873, "grad_norm": 4.216326774915821, "learning_rate": 4.905527579664875e-06, "loss": 0.2808, "step": 33090 }, { "epoch": 1.6667505916712826, "grad_norm": 3.845738311480325, "learning_rate": 4.90259856732068e-06, "loss": 0.2454, "step": 33100 }, { "epoch": 1.6672541416989777, "grad_norm": 2.765115126960448, "learning_rate": 4.89966958841351e-06, "loss": 0.2986, "step": 33110 }, { "epoch": 1.6677576917266732, "grad_norm": 3.9336057313660215, "learning_rate": 4.896740643948863e-06, "loss": 0.2904, "step": 33120 }, { "epoch": 1.6682612417543683, "grad_norm": 4.509243329159719, "learning_rate": 4.8938117349322115e-06, "loss": 0.2586, "step": 33130 }, { "epoch": 1.6687647917820634, "grad_norm": 6.137988730085213, "learning_rate": 4.890882862369028e-06, "loss": 0.3253, "step": 33140 }, { "epoch": 1.669268341809759, "grad_norm": 3.0793915200139366, "learning_rate": 4.8879540272647645e-06, "loss": 0.2135, "step": 33150 }, { "epoch": 1.669771891837454, "grad_norm": 4.017393643809317, "learning_rate": 4.885025230624868e-06, "loss": 0.3512, "step": 33160 }, { "epoch": 1.6702754418651493, "grad_norm": 3.213835440187959, "learning_rate": 4.882096473454764e-06, "loss": 0.2724, "step": 33170 }, { "epoch": 1.6707789918928446, "grad_norm": 4.230742410900917, "learning_rate": 4.879167756759871e-06, "loss": 0.3303, "step": 33180 }, { "epoch": 1.6712825419205397, "grad_norm": 2.8169741593447046, "learning_rate": 4.8762390815455915e-06, "loss": 0.3374, "step": 33190 }, { "epoch": 1.671786091948235, "grad_norm": 2.301111770677176, "learning_rate": 4.873310448817309e-06, "loss": 0.2743, "step": 33200 }, { "epoch": 1.6722896419759303, "grad_norm": 4.100329383979794, "learning_rate": 4.8703818595803975e-06, "loss": 0.2867, "step": 33210 }, { "epoch": 1.6727931920036254, "grad_norm": 4.873717333357981, "learning_rate": 4.867453314840218e-06, "loss": 0.2684, "step": 33220 }, { "epoch": 1.673296742031321, "grad_norm": 4.670642696810068, "learning_rate": 4.8645248156021105e-06, "loss": 0.2592, "step": 33230 }, { "epoch": 1.673800292059016, "grad_norm": 2.996120102938874, "learning_rate": 4.861596362871404e-06, "loss": 0.2701, "step": 33240 }, { "epoch": 1.6743038420867113, "grad_norm": 2.424299330002596, "learning_rate": 4.858667957653411e-06, "loss": 0.257, "step": 33250 }, { "epoch": 1.6748073921144067, "grad_norm": 4.324365748449715, "learning_rate": 4.8557396009534224e-06, "loss": 0.2838, "step": 33260 }, { "epoch": 1.6753109421421017, "grad_norm": 2.2679425118076724, "learning_rate": 4.852811293776717e-06, "loss": 0.2663, "step": 33270 }, { "epoch": 1.675814492169797, "grad_norm": 3.6019673252983613, "learning_rate": 4.84988303712856e-06, "loss": 0.2854, "step": 33280 }, { "epoch": 1.6763180421974924, "grad_norm": 3.0453737940337446, "learning_rate": 4.846954832014192e-06, "loss": 0.2979, "step": 33290 }, { "epoch": 1.6768215922251875, "grad_norm": 3.795427525054684, "learning_rate": 4.844026679438839e-06, "loss": 0.337, "step": 33300 }, { "epoch": 1.677325142252883, "grad_norm": 3.787589610162385, "learning_rate": 4.841098580407711e-06, "loss": 0.2719, "step": 33310 }, { "epoch": 1.677828692280578, "grad_norm": 3.9625087553649636, "learning_rate": 4.838170535925999e-06, "loss": 0.313, "step": 33320 }, { "epoch": 1.6783322423082734, "grad_norm": 2.879374053001673, "learning_rate": 4.835242546998869e-06, "loss": 0.2795, "step": 33330 }, { "epoch": 1.6788357923359687, "grad_norm": 3.6040698019027593, "learning_rate": 4.8323146146314785e-06, "loss": 0.2895, "step": 33340 }, { "epoch": 1.6793393423636638, "grad_norm": 3.952978372909759, "learning_rate": 4.8293867398289566e-06, "loss": 0.2883, "step": 33350 }, { "epoch": 1.679842892391359, "grad_norm": 4.426431720210586, "learning_rate": 4.826458923596417e-06, "loss": 0.3156, "step": 33360 }, { "epoch": 1.6803464424190544, "grad_norm": 3.8324162232758243, "learning_rate": 4.823531166938953e-06, "loss": 0.2722, "step": 33370 }, { "epoch": 1.6808499924467495, "grad_norm": 4.132240619762297, "learning_rate": 4.820603470861636e-06, "loss": 0.2817, "step": 33380 }, { "epoch": 1.6813535424744448, "grad_norm": 3.4794307358465866, "learning_rate": 4.8176758363695195e-06, "loss": 0.3018, "step": 33390 }, { "epoch": 1.68185709250214, "grad_norm": 2.6567646932872244, "learning_rate": 4.814748264467631e-06, "loss": 0.2717, "step": 33400 }, { "epoch": 1.6823606425298352, "grad_norm": 3.3593968015717715, "learning_rate": 4.811820756160982e-06, "loss": 0.291, "step": 33410 }, { "epoch": 1.6828641925575307, "grad_norm": 2.1890787284175643, "learning_rate": 4.808893312454556e-06, "loss": 0.2823, "step": 33420 }, { "epoch": 1.6833677425852258, "grad_norm": 3.950265620953664, "learning_rate": 4.805965934353322e-06, "loss": 0.2635, "step": 33430 }, { "epoch": 1.6838712926129211, "grad_norm": 3.8628572247326325, "learning_rate": 4.803038622862219e-06, "loss": 0.3049, "step": 33440 }, { "epoch": 1.6843748426406164, "grad_norm": 3.6309221308499473, "learning_rate": 4.800111378986168e-06, "loss": 0.2595, "step": 33450 }, { "epoch": 1.6848783926683115, "grad_norm": 2.978136617222185, "learning_rate": 4.797184203730066e-06, "loss": 0.3282, "step": 33460 }, { "epoch": 1.6853819426960068, "grad_norm": 3.7802829078808533, "learning_rate": 4.794257098098785e-06, "loss": 0.3396, "step": 33470 }, { "epoch": 1.6858854927237021, "grad_norm": 3.423368191373169, "learning_rate": 4.791330063097171e-06, "loss": 0.3193, "step": 33480 }, { "epoch": 1.6863890427513972, "grad_norm": 6.199525354709581, "learning_rate": 4.7884030997300536e-06, "loss": 0.2763, "step": 33490 }, { "epoch": 1.6868925927790928, "grad_norm": 3.2114339670756813, "learning_rate": 4.7854762090022274e-06, "loss": 0.2539, "step": 33500 }, { "epoch": 1.6873961428067878, "grad_norm": 3.9282934769574336, "learning_rate": 4.782549391918473e-06, "loss": 0.2952, "step": 33510 }, { "epoch": 1.6878996928344832, "grad_norm": 3.6159329204222086, "learning_rate": 4.779622649483537e-06, "loss": 0.2583, "step": 33520 }, { "epoch": 1.6884032428621785, "grad_norm": 2.4957220786526784, "learning_rate": 4.776695982702143e-06, "loss": 0.3156, "step": 33530 }, { "epoch": 1.6889067928898736, "grad_norm": 2.9974934607524806, "learning_rate": 4.773769392578989e-06, "loss": 0.252, "step": 33540 }, { "epoch": 1.6894103429175689, "grad_norm": 4.3153365761186, "learning_rate": 4.770842880118749e-06, "loss": 0.2572, "step": 33550 }, { "epoch": 1.6899138929452642, "grad_norm": 3.4815811082995287, "learning_rate": 4.767916446326066e-06, "loss": 0.2689, "step": 33560 }, { "epoch": 1.6904174429729593, "grad_norm": 4.616446648386362, "learning_rate": 4.764990092205558e-06, "loss": 0.2599, "step": 33570 }, { "epoch": 1.6909209930006546, "grad_norm": 4.4475504253528895, "learning_rate": 4.762063818761818e-06, "loss": 0.302, "step": 33580 }, { "epoch": 1.6914245430283499, "grad_norm": 4.469994672380663, "learning_rate": 4.759137626999407e-06, "loss": 0.2397, "step": 33590 }, { "epoch": 1.691928093056045, "grad_norm": 4.228416420446894, "learning_rate": 4.756211517922863e-06, "loss": 0.2935, "step": 33600 }, { "epoch": 1.6924316430837405, "grad_norm": 4.2144785144104935, "learning_rate": 4.7532854925366904e-06, "loss": 0.2479, "step": 33610 }, { "epoch": 1.6929351931114356, "grad_norm": 3.4349426949704585, "learning_rate": 4.750359551845366e-06, "loss": 0.2864, "step": 33620 }, { "epoch": 1.693438743139131, "grad_norm": 5.872019793964709, "learning_rate": 4.747433696853339e-06, "loss": 0.3164, "step": 33630 }, { "epoch": 1.6939422931668262, "grad_norm": 3.843963016358384, "learning_rate": 4.744507928565031e-06, "loss": 0.2861, "step": 33640 }, { "epoch": 1.6944458431945213, "grad_norm": 4.758131770980383, "learning_rate": 4.74158224798483e-06, "loss": 0.2532, "step": 33650 }, { "epoch": 1.6949493932222166, "grad_norm": 3.8116699516673744, "learning_rate": 4.738656656117097e-06, "loss": 0.2754, "step": 33660 }, { "epoch": 1.695452943249912, "grad_norm": 3.32018502852362, "learning_rate": 4.735731153966161e-06, "loss": 0.2615, "step": 33670 }, { "epoch": 1.695956493277607, "grad_norm": 2.7527870894728452, "learning_rate": 4.732805742536318e-06, "loss": 0.2667, "step": 33680 }, { "epoch": 1.6964600433053025, "grad_norm": 3.7819154786827327, "learning_rate": 4.729880422831836e-06, "loss": 0.2775, "step": 33690 }, { "epoch": 1.6969635933329976, "grad_norm": 3.0026223300043924, "learning_rate": 4.726955195856953e-06, "loss": 0.3021, "step": 33700 }, { "epoch": 1.697467143360693, "grad_norm": 3.772777361192197, "learning_rate": 4.72403006261587e-06, "loss": 0.2808, "step": 33710 }, { "epoch": 1.6979706933883882, "grad_norm": 3.658063250398491, "learning_rate": 4.72110502411276e-06, "loss": 0.2738, "step": 33720 }, { "epoch": 1.6984742434160833, "grad_norm": 2.009370097229364, "learning_rate": 4.718180081351762e-06, "loss": 0.2653, "step": 33730 }, { "epoch": 1.6989777934437786, "grad_norm": 3.6034722524373852, "learning_rate": 4.715255235336984e-06, "loss": 0.2841, "step": 33740 }, { "epoch": 1.699481343471474, "grad_norm": 5.463346445569194, "learning_rate": 4.712330487072496e-06, "loss": 0.2832, "step": 33750 }, { "epoch": 1.699984893499169, "grad_norm": 3.1149302330753637, "learning_rate": 4.709405837562339e-06, "loss": 0.3235, "step": 33760 }, { "epoch": 1.7004884435268643, "grad_norm": 3.1856423249918984, "learning_rate": 4.7064812878105175e-06, "loss": 0.2909, "step": 33770 }, { "epoch": 1.7009919935545597, "grad_norm": 4.687740533146439, "learning_rate": 4.703556838821005e-06, "loss": 0.2195, "step": 33780 }, { "epoch": 1.7014955435822547, "grad_norm": 3.5815477100123037, "learning_rate": 4.7006324915977365e-06, "loss": 0.3037, "step": 33790 }, { "epoch": 1.7019990936099503, "grad_norm": 4.778319521067857, "learning_rate": 4.697708247144615e-06, "loss": 0.3327, "step": 33800 }, { "epoch": 1.7025026436376454, "grad_norm": 2.93310291641841, "learning_rate": 4.694784106465508e-06, "loss": 0.3136, "step": 33810 }, { "epoch": 1.7030061936653407, "grad_norm": 2.0891703089770046, "learning_rate": 4.6918600705642435e-06, "loss": 0.2588, "step": 33820 }, { "epoch": 1.703509743693036, "grad_norm": 3.613556689601287, "learning_rate": 4.688936140444617e-06, "loss": 0.2965, "step": 33830 }, { "epoch": 1.704013293720731, "grad_norm": 3.486708430714043, "learning_rate": 4.68601231711039e-06, "loss": 0.3156, "step": 33840 }, { "epoch": 1.7045168437484264, "grad_norm": 3.43430482382915, "learning_rate": 4.6830886015652816e-06, "loss": 0.2295, "step": 33850 }, { "epoch": 1.7050203937761217, "grad_norm": 4.520140884487215, "learning_rate": 4.680164994812977e-06, "loss": 0.3101, "step": 33860 }, { "epoch": 1.7055239438038168, "grad_norm": 3.5445096951032364, "learning_rate": 4.677241497857127e-06, "loss": 0.2807, "step": 33870 }, { "epoch": 1.7060274938315123, "grad_norm": 4.991334952739358, "learning_rate": 4.674318111701338e-06, "loss": 0.2997, "step": 33880 }, { "epoch": 1.7065310438592074, "grad_norm": 2.0559555991502494, "learning_rate": 4.671394837349182e-06, "loss": 0.2678, "step": 33890 }, { "epoch": 1.7070345938869027, "grad_norm": 4.598112292925229, "learning_rate": 4.668471675804194e-06, "loss": 0.2859, "step": 33900 }, { "epoch": 1.707538143914598, "grad_norm": 3.1768784536268035, "learning_rate": 4.6655486280698695e-06, "loss": 0.2527, "step": 33910 }, { "epoch": 1.708041693942293, "grad_norm": 3.1061638689173785, "learning_rate": 4.662625695149662e-06, "loss": 0.3127, "step": 33920 }, { "epoch": 1.7085452439699884, "grad_norm": 3.903518121623115, "learning_rate": 4.659702878046989e-06, "loss": 0.2684, "step": 33930 }, { "epoch": 1.7090487939976837, "grad_norm": 3.8253972229494098, "learning_rate": 4.6567801777652295e-06, "loss": 0.3243, "step": 33940 }, { "epoch": 1.7095523440253788, "grad_norm": 3.1069765087674357, "learning_rate": 4.653857595307714e-06, "loss": 0.3319, "step": 33950 }, { "epoch": 1.7100558940530741, "grad_norm": 2.989566006239223, "learning_rate": 4.650935131677744e-06, "loss": 0.2661, "step": 33960 }, { "epoch": 1.7105594440807694, "grad_norm": 4.071965104454164, "learning_rate": 4.648012787878573e-06, "loss": 0.3106, "step": 33970 }, { "epoch": 1.7110629941084645, "grad_norm": 3.907943670581092, "learning_rate": 4.645090564913413e-06, "loss": 0.2621, "step": 33980 }, { "epoch": 1.71156654413616, "grad_norm": 4.567521160653774, "learning_rate": 4.64216846378544e-06, "loss": 0.3132, "step": 33990 }, { "epoch": 1.7120700941638551, "grad_norm": 2.715479318828299, "learning_rate": 4.639246485497782e-06, "loss": 0.2216, "step": 34000 }, { "epoch": 1.7125736441915504, "grad_norm": 4.602639061749298, "learning_rate": 4.636324631053532e-06, "loss": 0.3602, "step": 34010 }, { "epoch": 1.7130771942192458, "grad_norm": 3.623654271967794, "learning_rate": 4.63340290145573e-06, "loss": 0.2719, "step": 34020 }, { "epoch": 1.7135807442469408, "grad_norm": 3.708696480798092, "learning_rate": 4.630481297707383e-06, "loss": 0.2463, "step": 34030 }, { "epoch": 1.7140842942746362, "grad_norm": 4.668013519744246, "learning_rate": 4.627559820811449e-06, "loss": 0.3505, "step": 34040 }, { "epoch": 1.7145878443023315, "grad_norm": 3.3152674548015186, "learning_rate": 4.624638471770847e-06, "loss": 0.2599, "step": 34050 }, { "epoch": 1.7150913943300266, "grad_norm": 2.984651371230675, "learning_rate": 4.621717251588447e-06, "loss": 0.2543, "step": 34060 }, { "epoch": 1.715594944357722, "grad_norm": 3.8452999667270897, "learning_rate": 4.618796161267079e-06, "loss": 0.2987, "step": 34070 }, { "epoch": 1.7160984943854172, "grad_norm": 3.884469313149557, "learning_rate": 4.615875201809528e-06, "loss": 0.3097, "step": 34080 }, { "epoch": 1.7166020444131125, "grad_norm": 4.131784590726875, "learning_rate": 4.612954374218531e-06, "loss": 0.2641, "step": 34090 }, { "epoch": 1.7171055944408078, "grad_norm": 4.886481159000456, "learning_rate": 4.6100336794967795e-06, "loss": 0.3084, "step": 34100 }, { "epoch": 1.7176091444685029, "grad_norm": 4.245750854949555, "learning_rate": 4.6071131186469255e-06, "loss": 0.3073, "step": 34110 }, { "epoch": 1.7181126944961982, "grad_norm": 5.1033346963789805, "learning_rate": 4.604192692671568e-06, "loss": 0.3267, "step": 34120 }, { "epoch": 1.7186162445238935, "grad_norm": 4.18262540936145, "learning_rate": 4.601272402573265e-06, "loss": 0.2987, "step": 34130 }, { "epoch": 1.7191197945515886, "grad_norm": 2.8849472219714825, "learning_rate": 4.5983522493545246e-06, "loss": 0.234, "step": 34140 }, { "epoch": 1.719623344579284, "grad_norm": 3.8492910645458, "learning_rate": 4.59543223401781e-06, "loss": 0.2386, "step": 34150 }, { "epoch": 1.7201268946069792, "grad_norm": 3.9783238592803634, "learning_rate": 4.592512357565533e-06, "loss": 0.2715, "step": 34160 }, { "epoch": 1.7206304446346743, "grad_norm": 2.2824929452306657, "learning_rate": 4.589592621000063e-06, "loss": 0.222, "step": 34170 }, { "epoch": 1.7211339946623698, "grad_norm": 3.153393829308998, "learning_rate": 4.5866730253237186e-06, "loss": 0.2544, "step": 34180 }, { "epoch": 1.721637544690065, "grad_norm": 3.4620180277909043, "learning_rate": 4.583753571538769e-06, "loss": 0.27, "step": 34190 }, { "epoch": 1.7221410947177602, "grad_norm": 4.080817601399158, "learning_rate": 4.5808342606474385e-06, "loss": 0.2919, "step": 34200 }, { "epoch": 1.7226446447454555, "grad_norm": 2.952887477954902, "learning_rate": 4.577915093651899e-06, "loss": 0.2798, "step": 34210 }, { "epoch": 1.7231481947731506, "grad_norm": 5.058083634883743, "learning_rate": 4.5749960715542756e-06, "loss": 0.297, "step": 34220 }, { "epoch": 1.723651744800846, "grad_norm": 4.042675809738229, "learning_rate": 4.572077195356639e-06, "loss": 0.291, "step": 34230 }, { "epoch": 1.7241552948285412, "grad_norm": 2.5735113430716554, "learning_rate": 4.569158466061015e-06, "loss": 0.2326, "step": 34240 }, { "epoch": 1.7246588448562363, "grad_norm": 3.8601573462533705, "learning_rate": 4.566239884669375e-06, "loss": 0.276, "step": 34250 }, { "epoch": 1.7251623948839319, "grad_norm": 3.9561477374989185, "learning_rate": 4.563321452183645e-06, "loss": 0.3092, "step": 34260 }, { "epoch": 1.725665944911627, "grad_norm": 3.28712643789316, "learning_rate": 4.560403169605692e-06, "loss": 0.2648, "step": 34270 }, { "epoch": 1.7261694949393223, "grad_norm": 4.670930907023864, "learning_rate": 4.557485037937338e-06, "loss": 0.291, "step": 34280 }, { "epoch": 1.7266730449670176, "grad_norm": 4.827458075087706, "learning_rate": 4.554567058180354e-06, "loss": 0.3103, "step": 34290 }, { "epoch": 1.7271765949947127, "grad_norm": 3.7513155196876364, "learning_rate": 4.551649231336451e-06, "loss": 0.2855, "step": 34300 }, { "epoch": 1.727680145022408, "grad_norm": 2.779021769344307, "learning_rate": 4.548731558407295e-06, "loss": 0.2211, "step": 34310 }, { "epoch": 1.7281836950501033, "grad_norm": 4.0404224394341615, "learning_rate": 4.545814040394495e-06, "loss": 0.2534, "step": 34320 }, { "epoch": 1.7286872450777984, "grad_norm": 4.5665617835406795, "learning_rate": 4.54289667829961e-06, "loss": 0.3249, "step": 34330 }, { "epoch": 1.729190795105494, "grad_norm": 3.508406491320851, "learning_rate": 4.539979473124144e-06, "loss": 0.2802, "step": 34340 }, { "epoch": 1.729694345133189, "grad_norm": 4.424219246830668, "learning_rate": 4.537062425869547e-06, "loss": 0.3106, "step": 34350 }, { "epoch": 1.730197895160884, "grad_norm": 3.4533054409934176, "learning_rate": 4.534145537537214e-06, "loss": 0.2682, "step": 34360 }, { "epoch": 1.7307014451885796, "grad_norm": 2.785969481048045, "learning_rate": 4.531228809128486e-06, "loss": 0.252, "step": 34370 }, { "epoch": 1.7312049952162747, "grad_norm": 4.138828041338382, "learning_rate": 4.528312241644651e-06, "loss": 0.2432, "step": 34380 }, { "epoch": 1.73170854524397, "grad_norm": 5.065288488693364, "learning_rate": 4.525395836086937e-06, "loss": 0.2992, "step": 34390 }, { "epoch": 1.7322120952716653, "grad_norm": 4.555639188158577, "learning_rate": 4.522479593456524e-06, "loss": 0.2833, "step": 34400 }, { "epoch": 1.7327156452993604, "grad_norm": 3.4437150664249887, "learning_rate": 4.519563514754529e-06, "loss": 0.273, "step": 34410 }, { "epoch": 1.7332191953270557, "grad_norm": 3.3061105590779767, "learning_rate": 4.5166476009820135e-06, "loss": 0.2949, "step": 34420 }, { "epoch": 1.733722745354751, "grad_norm": 5.451937249355933, "learning_rate": 4.513731853139989e-06, "loss": 0.2565, "step": 34430 }, { "epoch": 1.734226295382446, "grad_norm": 2.9294694408874165, "learning_rate": 4.510816272229402e-06, "loss": 0.2675, "step": 34440 }, { "epoch": 1.7347298454101416, "grad_norm": 4.006340937666044, "learning_rate": 4.507900859251144e-06, "loss": 0.2839, "step": 34450 }, { "epoch": 1.7352333954378367, "grad_norm": 5.833889956510161, "learning_rate": 4.504985615206052e-06, "loss": 0.3129, "step": 34460 }, { "epoch": 1.735736945465532, "grad_norm": 4.067525400290032, "learning_rate": 4.502070541094903e-06, "loss": 0.2509, "step": 34470 }, { "epoch": 1.7362404954932273, "grad_norm": 3.5340939414919137, "learning_rate": 4.499155637918412e-06, "loss": 0.3053, "step": 34480 }, { "epoch": 1.7367440455209224, "grad_norm": 4.255211800263875, "learning_rate": 4.496240906677245e-06, "loss": 0.2711, "step": 34490 }, { "epoch": 1.7372475955486177, "grad_norm": 4.625861405098378, "learning_rate": 4.493326348371998e-06, "loss": 0.2649, "step": 34500 }, { "epoch": 1.737751145576313, "grad_norm": 3.88401575478977, "learning_rate": 4.490411964003212e-06, "loss": 0.2866, "step": 34510 }, { "epoch": 1.7382546956040081, "grad_norm": 6.04575679131376, "learning_rate": 4.487497754571371e-06, "loss": 0.3117, "step": 34520 }, { "epoch": 1.7387582456317037, "grad_norm": 3.953603044482443, "learning_rate": 4.484583721076896e-06, "loss": 0.3144, "step": 34530 }, { "epoch": 1.7392617956593988, "grad_norm": 3.753898896819467, "learning_rate": 4.4816698645201474e-06, "loss": 0.2753, "step": 34540 }, { "epoch": 1.739765345687094, "grad_norm": 4.155579502762742, "learning_rate": 4.478756185901427e-06, "loss": 0.2702, "step": 34550 }, { "epoch": 1.7402688957147894, "grad_norm": 2.5961530719948938, "learning_rate": 4.475842686220974e-06, "loss": 0.2473, "step": 34560 }, { "epoch": 1.7407724457424845, "grad_norm": 4.261763533583835, "learning_rate": 4.472929366478966e-06, "loss": 0.2866, "step": 34570 }, { "epoch": 1.7412759957701798, "grad_norm": 3.5736632050421377, "learning_rate": 4.470016227675517e-06, "loss": 0.2901, "step": 34580 }, { "epoch": 1.741779545797875, "grad_norm": 3.6649064136651455, "learning_rate": 4.467103270810685e-06, "loss": 0.2346, "step": 34590 }, { "epoch": 1.7422830958255702, "grad_norm": 4.460341891493144, "learning_rate": 4.464190496884459e-06, "loss": 0.2683, "step": 34600 }, { "epoch": 1.7427866458532655, "grad_norm": 3.160652604332505, "learning_rate": 4.461277906896769e-06, "loss": 0.2617, "step": 34610 }, { "epoch": 1.7432901958809608, "grad_norm": 3.7191172754536077, "learning_rate": 4.45836550184748e-06, "loss": 0.3052, "step": 34620 }, { "epoch": 1.7437937459086559, "grad_norm": 3.8972495948454067, "learning_rate": 4.455453282736397e-06, "loss": 0.2801, "step": 34630 }, { "epoch": 1.7442972959363514, "grad_norm": 3.381360532641231, "learning_rate": 4.452541250563254e-06, "loss": 0.2592, "step": 34640 }, { "epoch": 1.7448008459640465, "grad_norm": 4.2018064384149465, "learning_rate": 4.449629406327727e-06, "loss": 0.3016, "step": 34650 }, { "epoch": 1.7453043959917418, "grad_norm": 3.661825200546271, "learning_rate": 4.446717751029425e-06, "loss": 0.3362, "step": 34660 }, { "epoch": 1.7458079460194371, "grad_norm": 3.7770141267035267, "learning_rate": 4.443806285667895e-06, "loss": 0.2605, "step": 34670 }, { "epoch": 1.7463114960471322, "grad_norm": 4.564778241015681, "learning_rate": 4.440895011242614e-06, "loss": 0.3173, "step": 34680 }, { "epoch": 1.7468150460748275, "grad_norm": 5.407976332796167, "learning_rate": 4.437983928752998e-06, "loss": 0.2449, "step": 34690 }, { "epoch": 1.7473185961025228, "grad_norm": 2.7681633338808846, "learning_rate": 4.435073039198396e-06, "loss": 0.2165, "step": 34700 }, { "epoch": 1.747822146130218, "grad_norm": 3.9517667249433006, "learning_rate": 4.432162343578087e-06, "loss": 0.3017, "step": 34710 }, { "epoch": 1.7483256961579134, "grad_norm": 3.434980874026655, "learning_rate": 4.4292518428912876e-06, "loss": 0.275, "step": 34720 }, { "epoch": 1.7488292461856085, "grad_norm": 4.286189506615804, "learning_rate": 4.4263415381371465e-06, "loss": 0.3334, "step": 34730 }, { "epoch": 1.7493327962133038, "grad_norm": 3.0318289134228187, "learning_rate": 4.423431430314745e-06, "loss": 0.3104, "step": 34740 }, { "epoch": 1.7498363462409992, "grad_norm": 3.7741844665538875, "learning_rate": 4.420521520423096e-06, "loss": 0.3135, "step": 34750 }, { "epoch": 1.7503398962686942, "grad_norm": 3.6825862446112985, "learning_rate": 4.4176118094611466e-06, "loss": 0.3007, "step": 34760 }, { "epoch": 1.7508434462963895, "grad_norm": 4.175433066464296, "learning_rate": 4.4147022984277744e-06, "loss": 0.3014, "step": 34770 }, { "epoch": 1.7513469963240849, "grad_norm": 2.3502859570644943, "learning_rate": 4.411792988321784e-06, "loss": 0.3019, "step": 34780 }, { "epoch": 1.75185054635178, "grad_norm": 4.9233174359425265, "learning_rate": 4.40888388014192e-06, "loss": 0.259, "step": 34790 }, { "epoch": 1.7523540963794753, "grad_norm": 2.8839745640993497, "learning_rate": 4.40597497488685e-06, "loss": 0.2434, "step": 34800 }, { "epoch": 1.7528576464071706, "grad_norm": 4.306681467003879, "learning_rate": 4.403066273555175e-06, "loss": 0.2976, "step": 34810 }, { "epoch": 1.7533611964348657, "grad_norm": 3.3727886833099037, "learning_rate": 4.400157777145428e-06, "loss": 0.256, "step": 34820 }, { "epoch": 1.7538647464625612, "grad_norm": 3.6540065557497052, "learning_rate": 4.397249486656065e-06, "loss": 0.293, "step": 34830 }, { "epoch": 1.7543682964902563, "grad_norm": 3.3512923475400713, "learning_rate": 4.3943414030854825e-06, "loss": 0.2617, "step": 34840 }, { "epoch": 1.7548718465179516, "grad_norm": 3.968221134936253, "learning_rate": 4.391433527431994e-06, "loss": 0.2513, "step": 34850 }, { "epoch": 1.755375396545647, "grad_norm": 2.801366663141094, "learning_rate": 4.388525860693847e-06, "loss": 0.259, "step": 34860 }, { "epoch": 1.755878946573342, "grad_norm": 3.919905727863658, "learning_rate": 4.385618403869217e-06, "loss": 0.2572, "step": 34870 }, { "epoch": 1.7563824966010373, "grad_norm": 3.631409630745882, "learning_rate": 4.38271115795621e-06, "loss": 0.284, "step": 34880 }, { "epoch": 1.7568860466287326, "grad_norm": 2.2821545661631517, "learning_rate": 4.379804123952854e-06, "loss": 0.2937, "step": 34890 }, { "epoch": 1.7573895966564277, "grad_norm": 4.077715185346426, "learning_rate": 4.37689730285711e-06, "loss": 0.3426, "step": 34900 }, { "epoch": 1.7578931466841232, "grad_norm": 3.5415525513392514, "learning_rate": 4.373990695666865e-06, "loss": 0.2598, "step": 34910 }, { "epoch": 1.7583966967118183, "grad_norm": 3.333949413973517, "learning_rate": 4.3710843033799255e-06, "loss": 0.2493, "step": 34920 }, { "epoch": 1.7589002467395136, "grad_norm": 4.275056445961447, "learning_rate": 4.368178126994032e-06, "loss": 0.2869, "step": 34930 }, { "epoch": 1.759403796767209, "grad_norm": 2.1406790509218423, "learning_rate": 4.365272167506849e-06, "loss": 0.3011, "step": 34940 }, { "epoch": 1.759907346794904, "grad_norm": 3.7523523470529097, "learning_rate": 4.362366425915965e-06, "loss": 0.3194, "step": 34950 }, { "epoch": 1.7604108968225993, "grad_norm": 3.340790436991974, "learning_rate": 4.359460903218897e-06, "loss": 0.2375, "step": 34960 }, { "epoch": 1.7609144468502946, "grad_norm": 4.754654418260819, "learning_rate": 4.3565556004130825e-06, "loss": 0.2248, "step": 34970 }, { "epoch": 1.7614179968779897, "grad_norm": 3.153448471341629, "learning_rate": 4.353650518495887e-06, "loss": 0.2394, "step": 34980 }, { "epoch": 1.761921546905685, "grad_norm": 3.322632356375471, "learning_rate": 4.350745658464595e-06, "loss": 0.2639, "step": 34990 }, { "epoch": 1.7624250969333803, "grad_norm": 3.192090901979793, "learning_rate": 4.347841021316423e-06, "loss": 0.3229, "step": 35000 }, { "epoch": 1.7629286469610754, "grad_norm": 4.051030318544335, "learning_rate": 4.344936608048503e-06, "loss": 0.3076, "step": 35010 }, { "epoch": 1.763432196988771, "grad_norm": 4.342786287272131, "learning_rate": 4.342032419657898e-06, "loss": 0.2535, "step": 35020 }, { "epoch": 1.763935747016466, "grad_norm": 3.8072091527264087, "learning_rate": 4.3391284571415865e-06, "loss": 0.2551, "step": 35030 }, { "epoch": 1.7644392970441614, "grad_norm": 3.251717366706231, "learning_rate": 4.336224721496472e-06, "loss": 0.2637, "step": 35040 }, { "epoch": 1.7649428470718567, "grad_norm": 2.549356568045726, "learning_rate": 4.333321213719383e-06, "loss": 0.298, "step": 35050 }, { "epoch": 1.7654463970995518, "grad_norm": 2.7096368836206035, "learning_rate": 4.330417934807065e-06, "loss": 0.2986, "step": 35060 }, { "epoch": 1.765949947127247, "grad_norm": 3.9735887648776487, "learning_rate": 4.327514885756188e-06, "loss": 0.2181, "step": 35070 }, { "epoch": 1.7664534971549424, "grad_norm": 5.6111742448196935, "learning_rate": 4.324612067563343e-06, "loss": 0.2771, "step": 35080 }, { "epoch": 1.7669570471826375, "grad_norm": 3.2088040296978497, "learning_rate": 4.32170948122504e-06, "loss": 0.3228, "step": 35090 }, { "epoch": 1.767460597210333, "grad_norm": 2.689271120518637, "learning_rate": 4.318807127737712e-06, "loss": 0.238, "step": 35100 }, { "epoch": 1.767964147238028, "grad_norm": 4.2070819682787635, "learning_rate": 4.31590500809771e-06, "loss": 0.2415, "step": 35110 }, { "epoch": 1.7684676972657234, "grad_norm": 4.0328122971071405, "learning_rate": 4.313003123301305e-06, "loss": 0.2633, "step": 35120 }, { "epoch": 1.7689712472934187, "grad_norm": 5.040768419620407, "learning_rate": 4.310101474344688e-06, "loss": 0.282, "step": 35130 }, { "epoch": 1.7694747973211138, "grad_norm": 3.154468570294471, "learning_rate": 4.307200062223967e-06, "loss": 0.3084, "step": 35140 }, { "epoch": 1.769978347348809, "grad_norm": 4.386945112116783, "learning_rate": 4.304298887935173e-06, "loss": 0.2906, "step": 35150 }, { "epoch": 1.7704818973765044, "grad_norm": 4.192927291939196, "learning_rate": 4.301397952474251e-06, "loss": 0.2991, "step": 35160 }, { "epoch": 1.7709854474041995, "grad_norm": 1.9183827179601036, "learning_rate": 4.298497256837067e-06, "loss": 0.3, "step": 35170 }, { "epoch": 1.7714889974318948, "grad_norm": 3.49413210153715, "learning_rate": 4.295596802019405e-06, "loss": 0.2268, "step": 35180 }, { "epoch": 1.7719925474595901, "grad_norm": 4.463600973801658, "learning_rate": 4.292696589016962e-06, "loss": 0.2919, "step": 35190 }, { "epoch": 1.7724960974872852, "grad_norm": 2.5548725565624935, "learning_rate": 4.2897966188253525e-06, "loss": 0.28, "step": 35200 }, { "epoch": 1.7729996475149807, "grad_norm": 3.277915233710533, "learning_rate": 4.2868968924401155e-06, "loss": 0.2565, "step": 35210 }, { "epoch": 1.7735031975426758, "grad_norm": 2.836634780149708, "learning_rate": 4.283997410856697e-06, "loss": 0.2518, "step": 35220 }, { "epoch": 1.7740067475703711, "grad_norm": 3.191484672985009, "learning_rate": 4.281098175070465e-06, "loss": 0.2838, "step": 35230 }, { "epoch": 1.7745102975980664, "grad_norm": 3.2329303674703063, "learning_rate": 4.278199186076698e-06, "loss": 0.2566, "step": 35240 }, { "epoch": 1.7750138476257615, "grad_norm": 4.207353567769288, "learning_rate": 4.275300444870599e-06, "loss": 0.2466, "step": 35250 }, { "epoch": 1.7755173976534568, "grad_norm": 3.348421869726278, "learning_rate": 4.272401952447271e-06, "loss": 0.2594, "step": 35260 }, { "epoch": 1.7760209476811522, "grad_norm": 2.9575401746117853, "learning_rate": 4.269503709801745e-06, "loss": 0.2769, "step": 35270 }, { "epoch": 1.7765244977088472, "grad_norm": 2.975864996015137, "learning_rate": 4.266605717928959e-06, "loss": 0.2636, "step": 35280 }, { "epoch": 1.7770280477365428, "grad_norm": 3.220272098225139, "learning_rate": 4.26370797782377e-06, "loss": 0.2575, "step": 35290 }, { "epoch": 1.7775315977642379, "grad_norm": 3.6596499522804327, "learning_rate": 4.260810490480944e-06, "loss": 0.2884, "step": 35300 }, { "epoch": 1.7780351477919332, "grad_norm": 3.879609625081688, "learning_rate": 4.257913256895162e-06, "loss": 0.2647, "step": 35310 }, { "epoch": 1.7785386978196285, "grad_norm": 3.338758494499564, "learning_rate": 4.25501627806102e-06, "loss": 0.2426, "step": 35320 }, { "epoch": 1.7790422478473236, "grad_norm": 3.448031171262277, "learning_rate": 4.252119554973023e-06, "loss": 0.21, "step": 35330 }, { "epoch": 1.7795457978750189, "grad_norm": 3.379951657201088, "learning_rate": 4.249223088625589e-06, "loss": 0.2485, "step": 35340 }, { "epoch": 1.7800493479027142, "grad_norm": 4.008696767846077, "learning_rate": 4.246326880013049e-06, "loss": 0.3111, "step": 35350 }, { "epoch": 1.7805528979304093, "grad_norm": 4.442156587676682, "learning_rate": 4.243430930129647e-06, "loss": 0.2354, "step": 35360 }, { "epoch": 1.7810564479581046, "grad_norm": 4.554187304680029, "learning_rate": 4.2405352399695325e-06, "loss": 0.2373, "step": 35370 }, { "epoch": 1.7815599979858, "grad_norm": 4.385029152419299, "learning_rate": 4.237639810526773e-06, "loss": 0.2916, "step": 35380 }, { "epoch": 1.782063548013495, "grad_norm": 2.8440749668401066, "learning_rate": 4.234744642795344e-06, "loss": 0.2833, "step": 35390 }, { "epoch": 1.7825670980411905, "grad_norm": 3.259482357341624, "learning_rate": 4.231849737769126e-06, "loss": 0.2682, "step": 35400 }, { "epoch": 1.7830706480688856, "grad_norm": 3.515501071332856, "learning_rate": 4.228955096441917e-06, "loss": 0.2644, "step": 35410 }, { "epoch": 1.783574198096581, "grad_norm": 2.9861592487158086, "learning_rate": 4.22606071980742e-06, "loss": 0.3044, "step": 35420 }, { "epoch": 1.7840777481242762, "grad_norm": 3.506069957273524, "learning_rate": 4.223166608859247e-06, "loss": 0.2473, "step": 35430 }, { "epoch": 1.7845812981519713, "grad_norm": 2.5904636689723506, "learning_rate": 4.220272764590922e-06, "loss": 0.239, "step": 35440 }, { "epoch": 1.7850848481796666, "grad_norm": 3.854127390264106, "learning_rate": 4.217379187995874e-06, "loss": 0.2238, "step": 35450 }, { "epoch": 1.785588398207362, "grad_norm": 3.306317735828203, "learning_rate": 4.214485880067445e-06, "loss": 0.2542, "step": 35460 }, { "epoch": 1.786091948235057, "grad_norm": 2.586783634777788, "learning_rate": 4.211592841798877e-06, "loss": 0.2314, "step": 35470 }, { "epoch": 1.7865954982627525, "grad_norm": 3.9494299595046973, "learning_rate": 4.208700074183324e-06, "loss": 0.2681, "step": 35480 }, { "epoch": 1.7870990482904476, "grad_norm": 3.4728704766913583, "learning_rate": 4.205807578213849e-06, "loss": 0.256, "step": 35490 }, { "epoch": 1.787602598318143, "grad_norm": 3.1491749840080443, "learning_rate": 4.2029153548834185e-06, "loss": 0.2638, "step": 35500 }, { "epoch": 1.7881061483458383, "grad_norm": 4.647902993993602, "learning_rate": 4.200023405184907e-06, "loss": 0.2469, "step": 35510 }, { "epoch": 1.7886096983735333, "grad_norm": 4.680117352500816, "learning_rate": 4.197131730111095e-06, "loss": 0.263, "step": 35520 }, { "epoch": 1.7891132484012287, "grad_norm": 3.44831792317389, "learning_rate": 4.194240330654668e-06, "loss": 0.2998, "step": 35530 }, { "epoch": 1.789616798428924, "grad_norm": 4.595327649098219, "learning_rate": 4.191349207808216e-06, "loss": 0.3385, "step": 35540 }, { "epoch": 1.790120348456619, "grad_norm": 5.66706252925874, "learning_rate": 4.188458362564236e-06, "loss": 0.3001, "step": 35550 }, { "epoch": 1.7906238984843146, "grad_norm": 4.179348606733465, "learning_rate": 4.185567795915129e-06, "loss": 0.2729, "step": 35560 }, { "epoch": 1.7911274485120097, "grad_norm": 4.256068615716322, "learning_rate": 4.182677508853199e-06, "loss": 0.3191, "step": 35570 }, { "epoch": 1.7916309985397048, "grad_norm": 3.6606115284105134, "learning_rate": 4.179787502370658e-06, "loss": 0.2762, "step": 35580 }, { "epoch": 1.7921345485674003, "grad_norm": 4.595010066466332, "learning_rate": 4.176897777459617e-06, "loss": 0.2489, "step": 35590 }, { "epoch": 1.7926380985950954, "grad_norm": 4.31327070277091, "learning_rate": 4.174008335112094e-06, "loss": 0.2485, "step": 35600 }, { "epoch": 1.7931416486227907, "grad_norm": 4.275151269466794, "learning_rate": 4.1711191763200055e-06, "loss": 0.2861, "step": 35610 }, { "epoch": 1.793645198650486, "grad_norm": 4.283201856813805, "learning_rate": 4.168230302075176e-06, "loss": 0.2561, "step": 35620 }, { "epoch": 1.794148748678181, "grad_norm": 3.864071119591166, "learning_rate": 4.165341713369327e-06, "loss": 0.2588, "step": 35630 }, { "epoch": 1.7946522987058764, "grad_norm": 3.979448036052916, "learning_rate": 4.162453411194088e-06, "loss": 0.2584, "step": 35640 }, { "epoch": 1.7951558487335717, "grad_norm": 4.602213527930637, "learning_rate": 4.159565396540986e-06, "loss": 0.2517, "step": 35650 }, { "epoch": 1.7956593987612668, "grad_norm": 3.0081906414869093, "learning_rate": 4.156677670401451e-06, "loss": 0.2736, "step": 35660 }, { "epoch": 1.7961629487889623, "grad_norm": 3.1885346366549783, "learning_rate": 4.153790233766809e-06, "loss": 0.225, "step": 35670 }, { "epoch": 1.7966664988166574, "grad_norm": 5.126870315471896, "learning_rate": 4.150903087628295e-06, "loss": 0.3337, "step": 35680 }, { "epoch": 1.7971700488443527, "grad_norm": 3.137262687975683, "learning_rate": 4.148016232977039e-06, "loss": 0.2401, "step": 35690 }, { "epoch": 1.797673598872048, "grad_norm": 3.750131951838781, "learning_rate": 4.14512967080407e-06, "loss": 0.2518, "step": 35700 }, { "epoch": 1.7981771488997431, "grad_norm": 3.109249365181066, "learning_rate": 4.142243402100322e-06, "loss": 0.2749, "step": 35710 }, { "epoch": 1.7986806989274384, "grad_norm": 4.348389279531469, "learning_rate": 4.139357427856622e-06, "loss": 0.2755, "step": 35720 }, { "epoch": 1.7991842489551337, "grad_norm": 2.751531599493906, "learning_rate": 4.136471749063702e-06, "loss": 0.2469, "step": 35730 }, { "epoch": 1.7996877989828288, "grad_norm": 6.1568512256656, "learning_rate": 4.133586366712186e-06, "loss": 0.3061, "step": 35740 }, { "epoch": 1.8001913490105244, "grad_norm": 2.6072758668938496, "learning_rate": 4.130701281792599e-06, "loss": 0.2543, "step": 35750 }, { "epoch": 1.8006948990382194, "grad_norm": 3.6281693248238844, "learning_rate": 4.127816495295367e-06, "loss": 0.2925, "step": 35760 }, { "epoch": 1.8011984490659148, "grad_norm": 3.4398351993680074, "learning_rate": 4.124932008210811e-06, "loss": 0.2439, "step": 35770 }, { "epoch": 1.80170199909361, "grad_norm": 3.3577549667942237, "learning_rate": 4.122047821529147e-06, "loss": 0.2855, "step": 35780 }, { "epoch": 1.8022055491213052, "grad_norm": 4.908193222755403, "learning_rate": 4.119163936240493e-06, "loss": 0.2711, "step": 35790 }, { "epoch": 1.8027090991490005, "grad_norm": 3.6334252755175522, "learning_rate": 4.116280353334859e-06, "loss": 0.2617, "step": 35800 }, { "epoch": 1.8032126491766958, "grad_norm": 3.1486346658495306, "learning_rate": 4.113397073802153e-06, "loss": 0.2564, "step": 35810 }, { "epoch": 1.8037161992043909, "grad_norm": 4.173530968118713, "learning_rate": 4.110514098632177e-06, "loss": 0.2377, "step": 35820 }, { "epoch": 1.8042197492320862, "grad_norm": 3.5868905284970114, "learning_rate": 4.1076314288146334e-06, "loss": 0.2908, "step": 35830 }, { "epoch": 1.8047232992597815, "grad_norm": 5.271085253451843, "learning_rate": 4.104749065339113e-06, "loss": 0.3142, "step": 35840 }, { "epoch": 1.8052268492874766, "grad_norm": 3.090455110033254, "learning_rate": 4.101867009195109e-06, "loss": 0.2448, "step": 35850 }, { "epoch": 1.805730399315172, "grad_norm": 3.347975797928239, "learning_rate": 4.098985261372002e-06, "loss": 0.3038, "step": 35860 }, { "epoch": 1.8062339493428672, "grad_norm": 2.2558728740746345, "learning_rate": 4.096103822859073e-06, "loss": 0.231, "step": 35870 }, { "epoch": 1.8067374993705625, "grad_norm": 3.9224441544035855, "learning_rate": 4.093222694645489e-06, "loss": 0.2565, "step": 35880 }, { "epoch": 1.8072410493982578, "grad_norm": 3.6040828751836567, "learning_rate": 4.090341877720318e-06, "loss": 0.2592, "step": 35890 }, { "epoch": 1.807744599425953, "grad_norm": 4.871619889874801, "learning_rate": 4.087461373072518e-06, "loss": 0.3491, "step": 35900 }, { "epoch": 1.8082481494536482, "grad_norm": 5.035829059492268, "learning_rate": 4.08458118169094e-06, "loss": 0.2878, "step": 35910 }, { "epoch": 1.8087516994813435, "grad_norm": 3.332470592211229, "learning_rate": 4.081701304564328e-06, "loss": 0.2561, "step": 35920 }, { "epoch": 1.8092552495090386, "grad_norm": 4.14549005010516, "learning_rate": 4.078821742681317e-06, "loss": 0.2768, "step": 35930 }, { "epoch": 1.8097587995367341, "grad_norm": 3.44589132063972, "learning_rate": 4.075942497030436e-06, "loss": 0.2157, "step": 35940 }, { "epoch": 1.8102623495644292, "grad_norm": 3.345641304229834, "learning_rate": 4.073063568600102e-06, "loss": 0.2783, "step": 35950 }, { "epoch": 1.8107658995921245, "grad_norm": 3.755604895764151, "learning_rate": 4.070184958378625e-06, "loss": 0.2651, "step": 35960 }, { "epoch": 1.8112694496198198, "grad_norm": 4.951677783720622, "learning_rate": 4.067306667354206e-06, "loss": 0.2351, "step": 35970 }, { "epoch": 1.811772999647515, "grad_norm": 3.677287228846167, "learning_rate": 4.064428696514937e-06, "loss": 0.2814, "step": 35980 }, { "epoch": 1.8122765496752102, "grad_norm": 2.7783625700240977, "learning_rate": 4.061551046848798e-06, "loss": 0.2301, "step": 35990 }, { "epoch": 1.8127800997029055, "grad_norm": 4.415909586423844, "learning_rate": 4.058673719343662e-06, "loss": 0.2729, "step": 36000 }, { "epoch": 1.8132836497306006, "grad_norm": 4.831179813202426, "learning_rate": 4.055796714987289e-06, "loss": 0.3082, "step": 36010 }, { "epoch": 1.813787199758296, "grad_norm": 3.434268070420146, "learning_rate": 4.052920034767325e-06, "loss": 0.2807, "step": 36020 }, { "epoch": 1.8142907497859913, "grad_norm": 3.960008774270685, "learning_rate": 4.0500436796713125e-06, "loss": 0.2272, "step": 36030 }, { "epoch": 1.8147942998136863, "grad_norm": 4.210887477230468, "learning_rate": 4.047167650686675e-06, "loss": 0.2923, "step": 36040 }, { "epoch": 1.8152978498413819, "grad_norm": 2.8667182716586983, "learning_rate": 4.044291948800729e-06, "loss": 0.2701, "step": 36050 }, { "epoch": 1.815801399869077, "grad_norm": 3.6782766722863833, "learning_rate": 4.041416575000676e-06, "loss": 0.2549, "step": 36060 }, { "epoch": 1.8163049498967723, "grad_norm": 2.7261321278757387, "learning_rate": 4.038541530273606e-06, "loss": 0.2864, "step": 36070 }, { "epoch": 1.8168084999244676, "grad_norm": 3.164853402384712, "learning_rate": 4.035666815606499e-06, "loss": 0.2874, "step": 36080 }, { "epoch": 1.8173120499521627, "grad_norm": 3.7387262651566404, "learning_rate": 4.032792431986211e-06, "loss": 0.2877, "step": 36090 }, { "epoch": 1.817815599979858, "grad_norm": 5.521003460808752, "learning_rate": 4.029918380399498e-06, "loss": 0.3267, "step": 36100 }, { "epoch": 1.8183191500075533, "grad_norm": 3.7448688268099333, "learning_rate": 4.027044661832993e-06, "loss": 0.2569, "step": 36110 }, { "epoch": 1.8188227000352484, "grad_norm": 3.8257983711854604, "learning_rate": 4.024171277273218e-06, "loss": 0.2099, "step": 36120 }, { "epoch": 1.819326250062944, "grad_norm": 3.1116948377954885, "learning_rate": 4.02129822770658e-06, "loss": 0.2905, "step": 36130 }, { "epoch": 1.819829800090639, "grad_norm": 3.5791326835701205, "learning_rate": 4.018425514119372e-06, "loss": 0.295, "step": 36140 }, { "epoch": 1.8203333501183343, "grad_norm": 3.077471758383684, "learning_rate": 4.01555313749777e-06, "loss": 0.2975, "step": 36150 }, { "epoch": 1.8208369001460296, "grad_norm": 2.249420791359161, "learning_rate": 4.012681098827834e-06, "loss": 0.249, "step": 36160 }, { "epoch": 1.8213404501737247, "grad_norm": 3.9228939197221684, "learning_rate": 4.009809399095507e-06, "loss": 0.2467, "step": 36170 }, { "epoch": 1.82184400020142, "grad_norm": 4.2342158775374505, "learning_rate": 4.006938039286621e-06, "loss": 0.2516, "step": 36180 }, { "epoch": 1.8223475502291153, "grad_norm": 3.2221532662962553, "learning_rate": 4.0040670203868846e-06, "loss": 0.208, "step": 36190 }, { "epoch": 1.8228511002568104, "grad_norm": 3.853361952940347, "learning_rate": 4.001196343381896e-06, "loss": 0.2927, "step": 36200 }, { "epoch": 1.8233546502845057, "grad_norm": 3.2835886032057826, "learning_rate": 3.99832600925713e-06, "loss": 0.2407, "step": 36210 }, { "epoch": 1.823858200312201, "grad_norm": 3.6296292696396395, "learning_rate": 3.9954560189979476e-06, "loss": 0.2506, "step": 36220 }, { "epoch": 1.8243617503398961, "grad_norm": 2.908984860939718, "learning_rate": 3.992586373589588e-06, "loss": 0.2153, "step": 36230 }, { "epoch": 1.8248653003675916, "grad_norm": 4.473140193111611, "learning_rate": 3.989717074017177e-06, "loss": 0.2868, "step": 36240 }, { "epoch": 1.8253688503952867, "grad_norm": 4.432732622466252, "learning_rate": 3.9868481212657185e-06, "loss": 0.2176, "step": 36250 }, { "epoch": 1.825872400422982, "grad_norm": 3.971746961547288, "learning_rate": 3.9839795163200956e-06, "loss": 0.2898, "step": 36260 }, { "epoch": 1.8263759504506774, "grad_norm": 2.7019650995707227, "learning_rate": 3.981111260165078e-06, "loss": 0.3273, "step": 36270 }, { "epoch": 1.8268795004783724, "grad_norm": 4.603431106739049, "learning_rate": 3.978243353785311e-06, "loss": 0.2841, "step": 36280 }, { "epoch": 1.8273830505060678, "grad_norm": 2.3001470875787158, "learning_rate": 3.9753757981653186e-06, "loss": 0.263, "step": 36290 }, { "epoch": 1.827886600533763, "grad_norm": 3.1511869337361866, "learning_rate": 3.972508594289508e-06, "loss": 0.2898, "step": 36300 }, { "epoch": 1.8283901505614581, "grad_norm": 3.457587510288782, "learning_rate": 3.9696417431421656e-06, "loss": 0.2666, "step": 36310 }, { "epoch": 1.8288937005891537, "grad_norm": 2.9454655138751273, "learning_rate": 3.966775245707452e-06, "loss": 0.2963, "step": 36320 }, { "epoch": 1.8293972506168488, "grad_norm": 4.080050203235404, "learning_rate": 3.963909102969414e-06, "loss": 0.2553, "step": 36330 }, { "epoch": 1.829900800644544, "grad_norm": 5.403865722487024, "learning_rate": 3.961043315911968e-06, "loss": 0.2831, "step": 36340 }, { "epoch": 1.8304043506722394, "grad_norm": 4.128893549952058, "learning_rate": 3.958177885518917e-06, "loss": 0.2374, "step": 36350 }, { "epoch": 1.8309079006999345, "grad_norm": 3.406203360001845, "learning_rate": 3.955312812773934e-06, "loss": 0.2978, "step": 36360 }, { "epoch": 1.8314114507276298, "grad_norm": 4.26366274372265, "learning_rate": 3.952448098660574e-06, "loss": 0.2833, "step": 36370 }, { "epoch": 1.831915000755325, "grad_norm": 3.4180196914816356, "learning_rate": 3.949583744162265e-06, "loss": 0.2697, "step": 36380 }, { "epoch": 1.8324185507830202, "grad_norm": 3.531266932961258, "learning_rate": 3.946719750262318e-06, "loss": 0.2582, "step": 36390 }, { "epoch": 1.8329221008107155, "grad_norm": 4.410078083345713, "learning_rate": 3.943856117943912e-06, "loss": 0.2484, "step": 36400 }, { "epoch": 1.8334256508384108, "grad_norm": 3.421058245817306, "learning_rate": 3.940992848190109e-06, "loss": 0.2558, "step": 36410 }, { "epoch": 1.833929200866106, "grad_norm": 2.3679317223695375, "learning_rate": 3.938129941983844e-06, "loss": 0.2472, "step": 36420 }, { "epoch": 1.8344327508938014, "grad_norm": 3.059201571920226, "learning_rate": 3.9352674003079225e-06, "loss": 0.2363, "step": 36430 }, { "epoch": 1.8349363009214965, "grad_norm": 4.754007836345663, "learning_rate": 3.932405224145032e-06, "loss": 0.3096, "step": 36440 }, { "epoch": 1.8354398509491918, "grad_norm": 3.268495427978658, "learning_rate": 3.929543414477731e-06, "loss": 0.271, "step": 36450 }, { "epoch": 1.8359434009768871, "grad_norm": 4.974404811684886, "learning_rate": 3.926681972288452e-06, "loss": 0.2886, "step": 36460 }, { "epoch": 1.8364469510045822, "grad_norm": 2.7924319907111537, "learning_rate": 3.923820898559505e-06, "loss": 0.2316, "step": 36470 }, { "epoch": 1.8369505010322775, "grad_norm": 4.91377693245516, "learning_rate": 3.920960194273068e-06, "loss": 0.3425, "step": 36480 }, { "epoch": 1.8374540510599728, "grad_norm": 4.114029080619395, "learning_rate": 3.918099860411196e-06, "loss": 0.2032, "step": 36490 }, { "epoch": 1.837957601087668, "grad_norm": 3.693527597989325, "learning_rate": 3.915239897955814e-06, "loss": 0.2948, "step": 36500 }, { "epoch": 1.8384611511153635, "grad_norm": 4.129251567056167, "learning_rate": 3.912380307888722e-06, "loss": 0.277, "step": 36510 }, { "epoch": 1.8389647011430585, "grad_norm": 4.000211353757683, "learning_rate": 3.909521091191591e-06, "loss": 0.2748, "step": 36520 }, { "epoch": 1.8394682511707539, "grad_norm": 3.403416995853059, "learning_rate": 3.906662248845966e-06, "loss": 0.2913, "step": 36530 }, { "epoch": 1.8399718011984492, "grad_norm": 2.90209059150617, "learning_rate": 3.90380378183326e-06, "loss": 0.2744, "step": 36540 }, { "epoch": 1.8404753512261443, "grad_norm": 3.638248474720126, "learning_rate": 3.900945691134758e-06, "loss": 0.3, "step": 36550 }, { "epoch": 1.8409789012538396, "grad_norm": 3.537489500575475, "learning_rate": 3.89808797773162e-06, "loss": 0.2776, "step": 36560 }, { "epoch": 1.8414824512815349, "grad_norm": 6.800262261674424, "learning_rate": 3.895230642604869e-06, "loss": 0.3223, "step": 36570 }, { "epoch": 1.84198600130923, "grad_norm": 3.7034631317222257, "learning_rate": 3.892373686735403e-06, "loss": 0.2903, "step": 36580 }, { "epoch": 1.8424895513369253, "grad_norm": 2.6527250582411344, "learning_rate": 3.889517111103991e-06, "loss": 0.2861, "step": 36590 }, { "epoch": 1.8429931013646206, "grad_norm": 3.072637022461203, "learning_rate": 3.8866609166912675e-06, "loss": 0.233, "step": 36600 }, { "epoch": 1.8434966513923157, "grad_norm": 3.0268447991842202, "learning_rate": 3.883805104477739e-06, "loss": 0.2293, "step": 36610 }, { "epoch": 1.8440002014200112, "grad_norm": 4.411662673189677, "learning_rate": 3.88094967544378e-06, "loss": 0.2701, "step": 36620 }, { "epoch": 1.8445037514477063, "grad_norm": 3.88170038044426, "learning_rate": 3.878094630569634e-06, "loss": 0.2458, "step": 36630 }, { "epoch": 1.8450073014754016, "grad_norm": 4.367510512089848, "learning_rate": 3.875239970835411e-06, "loss": 0.3245, "step": 36640 }, { "epoch": 1.845510851503097, "grad_norm": 3.555227467208306, "learning_rate": 3.872385697221088e-06, "loss": 0.2237, "step": 36650 }, { "epoch": 1.846014401530792, "grad_norm": 1.6686140728997538, "learning_rate": 3.869531810706515e-06, "loss": 0.2367, "step": 36660 }, { "epoch": 1.8465179515584873, "grad_norm": 2.032597947504462, "learning_rate": 3.866678312271401e-06, "loss": 0.2557, "step": 36670 }, { "epoch": 1.8470215015861826, "grad_norm": 3.9398576843482767, "learning_rate": 3.863825202895331e-06, "loss": 0.2972, "step": 36680 }, { "epoch": 1.8475250516138777, "grad_norm": 3.5286588349193564, "learning_rate": 3.860972483557747e-06, "loss": 0.3029, "step": 36690 }, { "epoch": 1.8480286016415732, "grad_norm": 4.160723835763243, "learning_rate": 3.858120155237966e-06, "loss": 0.2642, "step": 36700 }, { "epoch": 1.8485321516692683, "grad_norm": 4.084910011890182, "learning_rate": 3.855268218915162e-06, "loss": 0.3569, "step": 36710 }, { "epoch": 1.8490357016969636, "grad_norm": 3.6315460096195347, "learning_rate": 3.852416675568382e-06, "loss": 0.2962, "step": 36720 }, { "epoch": 1.849539251724659, "grad_norm": 3.351669956378896, "learning_rate": 3.849565526176533e-06, "loss": 0.2851, "step": 36730 }, { "epoch": 1.850042801752354, "grad_norm": 3.45705461810727, "learning_rate": 3.846714771718391e-06, "loss": 0.2498, "step": 36740 }, { "epoch": 1.8505463517800493, "grad_norm": 3.8851745758725995, "learning_rate": 3.84386441317259e-06, "loss": 0.2759, "step": 36750 }, { "epoch": 1.8510499018077446, "grad_norm": 3.980423790311694, "learning_rate": 3.841014451517636e-06, "loss": 0.2761, "step": 36760 }, { "epoch": 1.8515534518354397, "grad_norm": 4.23041004987295, "learning_rate": 3.838164887731896e-06, "loss": 0.3092, "step": 36770 }, { "epoch": 1.8520570018631353, "grad_norm": 3.7763854311720864, "learning_rate": 3.835315722793596e-06, "loss": 0.2952, "step": 36780 }, { "epoch": 1.8525605518908304, "grad_norm": 4.490551942616512, "learning_rate": 3.832466957680828e-06, "loss": 0.2946, "step": 36790 }, { "epoch": 1.8530641019185254, "grad_norm": 2.6965924167748927, "learning_rate": 3.82961859337155e-06, "loss": 0.2502, "step": 36800 }, { "epoch": 1.853567651946221, "grad_norm": 3.7891729901536353, "learning_rate": 3.826770630843578e-06, "loss": 0.2629, "step": 36810 }, { "epoch": 1.854071201973916, "grad_norm": 3.5129227804065972, "learning_rate": 3.823923071074591e-06, "loss": 0.2412, "step": 36820 }, { "epoch": 1.8545747520016114, "grad_norm": 2.083451789500484, "learning_rate": 3.821075915042133e-06, "loss": 0.2794, "step": 36830 }, { "epoch": 1.8550783020293067, "grad_norm": 3.4058027844858207, "learning_rate": 3.818229163723606e-06, "loss": 0.2527, "step": 36840 }, { "epoch": 1.8555818520570018, "grad_norm": 3.5161679011513334, "learning_rate": 3.8153828180962704e-06, "loss": 0.2952, "step": 36850 }, { "epoch": 1.856085402084697, "grad_norm": 3.5285984350295356, "learning_rate": 3.8125368791372553e-06, "loss": 0.2867, "step": 36860 }, { "epoch": 1.8565889521123924, "grad_norm": 3.320144981557349, "learning_rate": 3.8096913478235436e-06, "loss": 0.3093, "step": 36870 }, { "epoch": 1.8570925021400875, "grad_norm": 4.194045728744753, "learning_rate": 3.80684622513198e-06, "loss": 0.2807, "step": 36880 }, { "epoch": 1.857596052167783, "grad_norm": 2.386497962138328, "learning_rate": 3.804001512039271e-06, "loss": 0.2779, "step": 36890 }, { "epoch": 1.858099602195478, "grad_norm": 4.38519362433952, "learning_rate": 3.8011572095219805e-06, "loss": 0.2808, "step": 36900 }, { "epoch": 1.8586031522231734, "grad_norm": 2.1266946793114325, "learning_rate": 3.7983133185565285e-06, "loss": 0.2347, "step": 36910 }, { "epoch": 1.8591067022508687, "grad_norm": 3.532857856302676, "learning_rate": 3.795469840119201e-06, "loss": 0.2445, "step": 36920 }, { "epoch": 1.8596102522785638, "grad_norm": 3.479880908329824, "learning_rate": 3.792626775186136e-06, "loss": 0.2722, "step": 36930 }, { "epoch": 1.8601138023062591, "grad_norm": 4.200889491590403, "learning_rate": 3.789784124733332e-06, "loss": 0.2787, "step": 36940 }, { "epoch": 1.8606173523339544, "grad_norm": 4.84283774597504, "learning_rate": 3.786941889736647e-06, "loss": 0.2912, "step": 36950 }, { "epoch": 1.8611209023616495, "grad_norm": 4.3173934223899675, "learning_rate": 3.7841000711717913e-06, "loss": 0.3032, "step": 36960 }, { "epoch": 1.861624452389345, "grad_norm": 2.3193630654793442, "learning_rate": 3.781258670014341e-06, "loss": 0.2583, "step": 36970 }, { "epoch": 1.8621280024170401, "grad_norm": 2.953400465574219, "learning_rate": 3.778417687239716e-06, "loss": 0.2653, "step": 36980 }, { "epoch": 1.8626315524447354, "grad_norm": 4.322496995831765, "learning_rate": 3.7755771238232042e-06, "loss": 0.2624, "step": 36990 }, { "epoch": 1.8631351024724307, "grad_norm": 4.021838094899172, "learning_rate": 3.772736980739944e-06, "loss": 0.287, "step": 37000 }, { "epoch": 1.8636386525001258, "grad_norm": 4.030773444101021, "learning_rate": 3.769897258964932e-06, "loss": 0.2238, "step": 37010 }, { "epoch": 1.8641422025278211, "grad_norm": 2.855683350246444, "learning_rate": 3.7670579594730166e-06, "loss": 0.2457, "step": 37020 }, { "epoch": 1.8646457525555165, "grad_norm": 3.1457781313325337, "learning_rate": 3.764219083238906e-06, "loss": 0.2592, "step": 37030 }, { "epoch": 1.8651493025832115, "grad_norm": 3.173673179733738, "learning_rate": 3.761380631237161e-06, "loss": 0.2403, "step": 37040 }, { "epoch": 1.8656528526109069, "grad_norm": 3.888173352756108, "learning_rate": 3.7585426044421936e-06, "loss": 0.2615, "step": 37050 }, { "epoch": 1.8661564026386022, "grad_norm": 2.8889291374012496, "learning_rate": 3.755705003828273e-06, "loss": 0.2848, "step": 37060 }, { "epoch": 1.8666599526662973, "grad_norm": 4.105246162858527, "learning_rate": 3.7528678303695237e-06, "loss": 0.2456, "step": 37070 }, { "epoch": 1.8671635026939928, "grad_norm": 3.5437347205958702, "learning_rate": 3.7500310850399195e-06, "loss": 0.2795, "step": 37080 }, { "epoch": 1.8676670527216879, "grad_norm": 3.566998841348102, "learning_rate": 3.7471947688132914e-06, "loss": 0.2949, "step": 37090 }, { "epoch": 1.8681706027493832, "grad_norm": 2.845011791660616, "learning_rate": 3.74435888266332e-06, "loss": 0.2774, "step": 37100 }, { "epoch": 1.8686741527770785, "grad_norm": 4.63238529823603, "learning_rate": 3.741523427563541e-06, "loss": 0.3094, "step": 37110 }, { "epoch": 1.8691777028047736, "grad_norm": 2.8669810518203347, "learning_rate": 3.7386884044873363e-06, "loss": 0.2675, "step": 37120 }, { "epoch": 1.8696812528324689, "grad_norm": 3.1312804986442417, "learning_rate": 3.735853814407946e-06, "loss": 0.2315, "step": 37130 }, { "epoch": 1.8701848028601642, "grad_norm": 4.132126713988071, "learning_rate": 3.7330196582984584e-06, "loss": 0.2515, "step": 37140 }, { "epoch": 1.8706883528878593, "grad_norm": 4.0482790338470185, "learning_rate": 3.7301859371318157e-06, "loss": 0.2749, "step": 37150 }, { "epoch": 1.8711919029155548, "grad_norm": 4.653811003906798, "learning_rate": 3.7273526518808067e-06, "loss": 0.2922, "step": 37160 }, { "epoch": 1.87169545294325, "grad_norm": 4.3127734136344085, "learning_rate": 3.724519803518072e-06, "loss": 0.2492, "step": 37170 }, { "epoch": 1.8721990029709452, "grad_norm": 2.874623657843621, "learning_rate": 3.7216873930161057e-06, "loss": 0.2824, "step": 37180 }, { "epoch": 1.8727025529986405, "grad_norm": 3.947805750161696, "learning_rate": 3.718855421347245e-06, "loss": 0.2999, "step": 37190 }, { "epoch": 1.8732061030263356, "grad_norm": 2.2316682786636832, "learning_rate": 3.716023889483682e-06, "loss": 0.2801, "step": 37200 }, { "epoch": 1.873709653054031, "grad_norm": 3.947658531603055, "learning_rate": 3.7131927983974548e-06, "loss": 0.2624, "step": 37210 }, { "epoch": 1.8742132030817262, "grad_norm": 2.6585284902607302, "learning_rate": 3.7103621490604526e-06, "loss": 0.2451, "step": 37220 }, { "epoch": 1.8747167531094213, "grad_norm": 4.075805671029093, "learning_rate": 3.707531942444409e-06, "loss": 0.2394, "step": 37230 }, { "epoch": 1.8752203031371166, "grad_norm": 3.667743887824254, "learning_rate": 3.704702179520912e-06, "loss": 0.2726, "step": 37240 }, { "epoch": 1.875723853164812, "grad_norm": 3.76648703667996, "learning_rate": 3.7018728612613925e-06, "loss": 0.2697, "step": 37250 }, { "epoch": 1.876227403192507, "grad_norm": 3.3556420015383384, "learning_rate": 3.6990439886371276e-06, "loss": 0.2288, "step": 37260 }, { "epoch": 1.8767309532202026, "grad_norm": 2.8834585309828267, "learning_rate": 3.6962155626192444e-06, "loss": 0.2482, "step": 37270 }, { "epoch": 1.8772345032478976, "grad_norm": 4.584980923880804, "learning_rate": 3.693387584178717e-06, "loss": 0.3128, "step": 37280 }, { "epoch": 1.877738053275593, "grad_norm": 2.593417278820943, "learning_rate": 3.690560054286363e-06, "loss": 0.3062, "step": 37290 }, { "epoch": 1.8782416033032883, "grad_norm": 5.6072444021013155, "learning_rate": 3.68773297391285e-06, "loss": 0.2809, "step": 37300 }, { "epoch": 1.8787451533309834, "grad_norm": 3.628104231657266, "learning_rate": 3.684906344028687e-06, "loss": 0.2189, "step": 37310 }, { "epoch": 1.8792487033586787, "grad_norm": 3.2088431900957812, "learning_rate": 3.6820801656042337e-06, "loss": 0.2608, "step": 37320 }, { "epoch": 1.879752253386374, "grad_norm": 4.42799923421767, "learning_rate": 3.679254439609686e-06, "loss": 0.261, "step": 37330 }, { "epoch": 1.880255803414069, "grad_norm": 3.9576682902031664, "learning_rate": 3.676429167015094e-06, "loss": 0.2373, "step": 37340 }, { "epoch": 1.8807593534417646, "grad_norm": 3.3810473315216676, "learning_rate": 3.6736043487903457e-06, "loss": 0.258, "step": 37350 }, { "epoch": 1.8812629034694597, "grad_norm": 4.674761936865155, "learning_rate": 3.6707799859051786e-06, "loss": 0.2695, "step": 37360 }, { "epoch": 1.881766453497155, "grad_norm": 2.919685268336298, "learning_rate": 3.667956079329168e-06, "loss": 0.2385, "step": 37370 }, { "epoch": 1.8822700035248503, "grad_norm": 5.247512098076715, "learning_rate": 3.6651326300317356e-06, "loss": 0.3217, "step": 37380 }, { "epoch": 1.8827735535525454, "grad_norm": 2.8885440277505747, "learning_rate": 3.66230963898215e-06, "loss": 0.337, "step": 37390 }, { "epoch": 1.8832771035802407, "grad_norm": 3.3898938360021513, "learning_rate": 3.659487107149513e-06, "loss": 0.258, "step": 37400 }, { "epoch": 1.883780653607936, "grad_norm": 4.0957929909114545, "learning_rate": 3.6566650355027754e-06, "loss": 0.2931, "step": 37410 }, { "epoch": 1.884284203635631, "grad_norm": 4.461434317412147, "learning_rate": 3.6538434250107314e-06, "loss": 0.2303, "step": 37420 }, { "epoch": 1.8847877536633264, "grad_norm": 2.1209966076905054, "learning_rate": 3.6510222766420118e-06, "loss": 0.2669, "step": 37430 }, { "epoch": 1.8852913036910217, "grad_norm": 4.397813306067862, "learning_rate": 3.6482015913650907e-06, "loss": 0.2991, "step": 37440 }, { "epoch": 1.8857948537187168, "grad_norm": 3.430116854808074, "learning_rate": 3.6453813701482865e-06, "loss": 0.2361, "step": 37450 }, { "epoch": 1.8862984037464123, "grad_norm": 3.202790090367417, "learning_rate": 3.6425616139597553e-06, "loss": 0.2715, "step": 37460 }, { "epoch": 1.8868019537741074, "grad_norm": 4.249583654820414, "learning_rate": 3.6397423237674893e-06, "loss": 0.3064, "step": 37470 }, { "epoch": 1.8873055038018027, "grad_norm": 3.941946864212677, "learning_rate": 3.63692350053933e-06, "loss": 0.3439, "step": 37480 }, { "epoch": 1.887809053829498, "grad_norm": 3.537345994664024, "learning_rate": 3.6341051452429528e-06, "loss": 0.2895, "step": 37490 }, { "epoch": 1.8883126038571931, "grad_norm": 4.600924685836564, "learning_rate": 3.6312872588458713e-06, "loss": 0.2836, "step": 37500 }, { "epoch": 1.8888161538848884, "grad_norm": 2.807626721559316, "learning_rate": 3.628469842315443e-06, "loss": 0.2377, "step": 37510 }, { "epoch": 1.8893197039125837, "grad_norm": 4.04135298224093, "learning_rate": 3.625652896618861e-06, "loss": 0.2954, "step": 37520 }, { "epoch": 1.8898232539402788, "grad_norm": 4.183872358354171, "learning_rate": 3.6228364227231557e-06, "loss": 0.295, "step": 37530 }, { "epoch": 1.8903268039679744, "grad_norm": 4.182497492224683, "learning_rate": 3.620020421595196e-06, "loss": 0.2793, "step": 37540 }, { "epoch": 1.8908303539956695, "grad_norm": 3.9454065523988873, "learning_rate": 3.6172048942016934e-06, "loss": 0.2965, "step": 37550 }, { "epoch": 1.8913339040233648, "grad_norm": 4.2125667662574395, "learning_rate": 3.614389841509189e-06, "loss": 0.2368, "step": 37560 }, { "epoch": 1.89183745405106, "grad_norm": 1.692258842727291, "learning_rate": 3.611575264484068e-06, "loss": 0.2243, "step": 37570 }, { "epoch": 1.8923410040787552, "grad_norm": 4.530278790434773, "learning_rate": 3.608761164092546e-06, "loss": 0.2635, "step": 37580 }, { "epoch": 1.8928445541064505, "grad_norm": 3.092718621167026, "learning_rate": 3.605947541300684e-06, "loss": 0.2549, "step": 37590 }, { "epoch": 1.8933481041341458, "grad_norm": 4.294810955375549, "learning_rate": 3.603134397074365e-06, "loss": 0.28, "step": 37600 }, { "epoch": 1.8938516541618409, "grad_norm": 2.7792880518348846, "learning_rate": 3.600321732379321e-06, "loss": 0.282, "step": 37610 }, { "epoch": 1.8943552041895362, "grad_norm": 3.1286905803975245, "learning_rate": 3.597509548181112e-06, "loss": 0.2797, "step": 37620 }, { "epoch": 1.8948587542172315, "grad_norm": 3.2423375819905225, "learning_rate": 3.5946978454451376e-06, "loss": 0.2842, "step": 37630 }, { "epoch": 1.8953623042449266, "grad_norm": 4.05343872314993, "learning_rate": 3.5918866251366268e-06, "loss": 0.2964, "step": 37640 }, { "epoch": 1.895865854272622, "grad_norm": 5.096706928126322, "learning_rate": 3.58907588822065e-06, "loss": 0.3235, "step": 37650 }, { "epoch": 1.8963694043003172, "grad_norm": 4.039210039219629, "learning_rate": 3.586265635662105e-06, "loss": 0.2542, "step": 37660 }, { "epoch": 1.8968729543280125, "grad_norm": 5.591356780471932, "learning_rate": 3.5834558684257265e-06, "loss": 0.3138, "step": 37670 }, { "epoch": 1.8973765043557078, "grad_norm": 3.809484961167066, "learning_rate": 3.5806465874760806e-06, "loss": 0.2936, "step": 37680 }, { "epoch": 1.897880054383403, "grad_norm": 4.840639939050087, "learning_rate": 3.57783779377757e-06, "loss": 0.2806, "step": 37690 }, { "epoch": 1.8983836044110982, "grad_norm": 3.318241630590811, "learning_rate": 3.575029488294427e-06, "loss": 0.2494, "step": 37700 }, { "epoch": 1.8988871544387935, "grad_norm": 3.520687167807423, "learning_rate": 3.5722216719907175e-06, "loss": 0.2971, "step": 37710 }, { "epoch": 1.8993907044664886, "grad_norm": 3.2732783305723006, "learning_rate": 3.569414345830341e-06, "loss": 0.2644, "step": 37720 }, { "epoch": 1.8998942544941841, "grad_norm": 3.000207992967733, "learning_rate": 3.566607510777026e-06, "loss": 0.2633, "step": 37730 }, { "epoch": 1.9003978045218792, "grad_norm": 2.928432200566469, "learning_rate": 3.5638011677943307e-06, "loss": 0.2516, "step": 37740 }, { "epoch": 1.9009013545495745, "grad_norm": 3.2424091659523993, "learning_rate": 3.560995317845651e-06, "loss": 0.2714, "step": 37750 }, { "epoch": 1.9014049045772698, "grad_norm": 2.028916602525942, "learning_rate": 3.5581899618942084e-06, "loss": 0.2347, "step": 37760 }, { "epoch": 1.901908454604965, "grad_norm": 4.099457427161437, "learning_rate": 3.5553851009030545e-06, "loss": 0.2725, "step": 37770 }, { "epoch": 1.9024120046326602, "grad_norm": 3.111103804819698, "learning_rate": 3.552580735835075e-06, "loss": 0.2302, "step": 37780 }, { "epoch": 1.9029155546603556, "grad_norm": 4.396895227849494, "learning_rate": 3.5497768676529814e-06, "loss": 0.2687, "step": 37790 }, { "epoch": 1.9034191046880506, "grad_norm": 2.5143465832089222, "learning_rate": 3.546973497319319e-06, "loss": 0.2432, "step": 37800 }, { "epoch": 1.903922654715746, "grad_norm": 3.446657614243191, "learning_rate": 3.5441706257964557e-06, "loss": 0.2537, "step": 37810 }, { "epoch": 1.9044262047434413, "grad_norm": 3.493475562851769, "learning_rate": 3.541368254046594e-06, "loss": 0.2421, "step": 37820 }, { "epoch": 1.9049297547711364, "grad_norm": 4.4507199022068304, "learning_rate": 3.53856638303176e-06, "loss": 0.2658, "step": 37830 }, { "epoch": 1.9054333047988319, "grad_norm": 3.5421547753789415, "learning_rate": 3.5357650137138143e-06, "loss": 0.284, "step": 37840 }, { "epoch": 1.905936854826527, "grad_norm": 2.191868255887453, "learning_rate": 3.5329641470544383e-06, "loss": 0.235, "step": 37850 }, { "epoch": 1.9064404048542223, "grad_norm": 4.3889220644251274, "learning_rate": 3.530163784015147e-06, "loss": 0.2963, "step": 37860 }, { "epoch": 1.9069439548819176, "grad_norm": 4.275219183905075, "learning_rate": 3.527363925557279e-06, "loss": 0.2178, "step": 37870 }, { "epoch": 1.9074475049096127, "grad_norm": 4.750217133850979, "learning_rate": 3.524564572641999e-06, "loss": 0.2845, "step": 37880 }, { "epoch": 1.907951054937308, "grad_norm": 3.4948632279463374, "learning_rate": 3.5217657262302984e-06, "loss": 0.2908, "step": 37890 }, { "epoch": 1.9084546049650033, "grad_norm": 3.8852831811775324, "learning_rate": 3.5189673872829976e-06, "loss": 0.2628, "step": 37900 }, { "epoch": 1.9089581549926984, "grad_norm": 4.32154428893823, "learning_rate": 3.51616955676074e-06, "loss": 0.2716, "step": 37910 }, { "epoch": 1.909461705020394, "grad_norm": 4.00530781437127, "learning_rate": 3.5133722356239976e-06, "loss": 0.2636, "step": 37920 }, { "epoch": 1.909965255048089, "grad_norm": 3.632830132231333, "learning_rate": 3.510575424833064e-06, "loss": 0.2612, "step": 37930 }, { "epoch": 1.9104688050757843, "grad_norm": 3.0244176192240086, "learning_rate": 3.5077791253480586e-06, "loss": 0.2172, "step": 37940 }, { "epoch": 1.9109723551034796, "grad_norm": 3.5827466840029962, "learning_rate": 3.5049833381289237e-06, "loss": 0.2711, "step": 37950 }, { "epoch": 1.9114759051311747, "grad_norm": 2.343759787423102, "learning_rate": 3.5021880641354313e-06, "loss": 0.2764, "step": 37960 }, { "epoch": 1.91197945515887, "grad_norm": 3.379752896229452, "learning_rate": 3.49939330432717e-06, "loss": 0.2207, "step": 37970 }, { "epoch": 1.9124830051865653, "grad_norm": 4.147676932163377, "learning_rate": 3.496599059663559e-06, "loss": 0.2687, "step": 37980 }, { "epoch": 1.9129865552142604, "grad_norm": 2.7717412442431217, "learning_rate": 3.4938053311038355e-06, "loss": 0.2405, "step": 37990 }, { "epoch": 1.913490105241956, "grad_norm": 3.2455975389297307, "learning_rate": 3.49101211960706e-06, "loss": 0.2252, "step": 38000 }, { "epoch": 1.913993655269651, "grad_norm": 4.394398161581752, "learning_rate": 3.48821942613212e-06, "loss": 0.2897, "step": 38010 }, { "epoch": 1.9144972052973461, "grad_norm": 4.005603178109216, "learning_rate": 3.4854272516377184e-06, "loss": 0.2449, "step": 38020 }, { "epoch": 1.9150007553250417, "grad_norm": 4.105412679843568, "learning_rate": 3.4826355970823835e-06, "loss": 0.2914, "step": 38030 }, { "epoch": 1.9155043053527367, "grad_norm": 4.073115022311974, "learning_rate": 3.4798444634244666e-06, "loss": 0.2374, "step": 38040 }, { "epoch": 1.916007855380432, "grad_norm": 2.9957333464940437, "learning_rate": 3.4770538516221386e-06, "loss": 0.2591, "step": 38050 }, { "epoch": 1.9165114054081274, "grad_norm": 3.046154875033715, "learning_rate": 3.474263762633389e-06, "loss": 0.2512, "step": 38060 }, { "epoch": 1.9170149554358225, "grad_norm": 3.0810358070603368, "learning_rate": 3.4714741974160325e-06, "loss": 0.269, "step": 38070 }, { "epoch": 1.9175185054635178, "grad_norm": 3.2759160806327006, "learning_rate": 3.4686851569277013e-06, "loss": 0.2335, "step": 38080 }, { "epoch": 1.918022055491213, "grad_norm": 2.1927133886644796, "learning_rate": 3.4658966421258467e-06, "loss": 0.3011, "step": 38090 }, { "epoch": 1.9185256055189082, "grad_norm": 2.8531688153090133, "learning_rate": 3.463108653967739e-06, "loss": 0.24, "step": 38100 }, { "epoch": 1.9190291555466037, "grad_norm": 2.667475137587778, "learning_rate": 3.460321193410472e-06, "loss": 0.2729, "step": 38110 }, { "epoch": 1.9195327055742988, "grad_norm": 4.298391639941149, "learning_rate": 3.457534261410953e-06, "loss": 0.2645, "step": 38120 }, { "epoch": 1.920036255601994, "grad_norm": 2.8935531345043293, "learning_rate": 3.454747858925913e-06, "loss": 0.24, "step": 38130 }, { "epoch": 1.9205398056296894, "grad_norm": 3.1111786867120452, "learning_rate": 3.4519619869118993e-06, "loss": 0.284, "step": 38140 }, { "epoch": 1.9210433556573845, "grad_norm": 3.6793932983369957, "learning_rate": 3.449176646325273e-06, "loss": 0.2563, "step": 38150 }, { "epoch": 1.9215469056850798, "grad_norm": 3.8662531717592215, "learning_rate": 3.446391838122217e-06, "loss": 0.285, "step": 38160 }, { "epoch": 1.922050455712775, "grad_norm": 4.118443188002961, "learning_rate": 3.443607563258733e-06, "loss": 0.3017, "step": 38170 }, { "epoch": 1.9225540057404702, "grad_norm": 4.605719894200376, "learning_rate": 3.440823822690635e-06, "loss": 0.3118, "step": 38180 }, { "epoch": 1.9230575557681657, "grad_norm": 3.737362739627099, "learning_rate": 3.438040617373557e-06, "loss": 0.2769, "step": 38190 }, { "epoch": 1.9235611057958608, "grad_norm": 4.805231888805698, "learning_rate": 3.4352579482629477e-06, "loss": 0.2494, "step": 38200 }, { "epoch": 1.9240646558235561, "grad_norm": 4.103163963022464, "learning_rate": 3.432475816314075e-06, "loss": 0.243, "step": 38210 }, { "epoch": 1.9245682058512514, "grad_norm": 3.902681038644814, "learning_rate": 3.429694222482013e-06, "loss": 0.2651, "step": 38220 }, { "epoch": 1.9250717558789465, "grad_norm": 4.245921038271616, "learning_rate": 3.4269131677216634e-06, "loss": 0.2119, "step": 38230 }, { "epoch": 1.9255753059066418, "grad_norm": 3.421027742981172, "learning_rate": 3.4241326529877334e-06, "loss": 0.2906, "step": 38240 }, { "epoch": 1.9260788559343371, "grad_norm": 3.556724665735493, "learning_rate": 3.4213526792347513e-06, "loss": 0.2616, "step": 38250 }, { "epoch": 1.9265824059620322, "grad_norm": 3.336217858876241, "learning_rate": 3.418573247417055e-06, "loss": 0.2186, "step": 38260 }, { "epoch": 1.9270859559897275, "grad_norm": 3.9864019549219543, "learning_rate": 3.4157943584887986e-06, "loss": 0.2843, "step": 38270 }, { "epoch": 1.9275895060174228, "grad_norm": 3.1909160448868676, "learning_rate": 3.413016013403952e-06, "loss": 0.2537, "step": 38280 }, { "epoch": 1.928093056045118, "grad_norm": 5.4463002150098, "learning_rate": 3.410238213116292e-06, "loss": 0.2795, "step": 38290 }, { "epoch": 1.9285966060728135, "grad_norm": 3.6539550445227107, "learning_rate": 3.407460958579413e-06, "loss": 0.2572, "step": 38300 }, { "epoch": 1.9291001561005086, "grad_norm": 2.448823024696292, "learning_rate": 3.4046842507467223e-06, "loss": 0.3, "step": 38310 }, { "epoch": 1.9296037061282039, "grad_norm": 2.975969889127878, "learning_rate": 3.4019080905714384e-06, "loss": 0.2843, "step": 38320 }, { "epoch": 1.9301072561558992, "grad_norm": 3.2534270771834723, "learning_rate": 3.3991324790065903e-06, "loss": 0.2576, "step": 38330 }, { "epoch": 1.9306108061835943, "grad_norm": 4.575949835139559, "learning_rate": 3.3963574170050227e-06, "loss": 0.2812, "step": 38340 }, { "epoch": 1.9311143562112896, "grad_norm": 4.713349376239888, "learning_rate": 3.3935829055193893e-06, "loss": 0.2489, "step": 38350 }, { "epoch": 1.9316179062389849, "grad_norm": 3.16459884112525, "learning_rate": 3.3908089455021516e-06, "loss": 0.2984, "step": 38360 }, { "epoch": 1.93212145626668, "grad_norm": 3.7865553287743627, "learning_rate": 3.3880355379055875e-06, "loss": 0.2857, "step": 38370 }, { "epoch": 1.9326250062943755, "grad_norm": 2.6572875722381637, "learning_rate": 3.3852626836817813e-06, "loss": 0.3133, "step": 38380 }, { "epoch": 1.9331285563220706, "grad_norm": 4.173542641555243, "learning_rate": 3.3824903837826285e-06, "loss": 0.2354, "step": 38390 }, { "epoch": 1.933632106349766, "grad_norm": 2.887009613170704, "learning_rate": 3.3797186391598364e-06, "loss": 0.2261, "step": 38400 }, { "epoch": 1.9341356563774612, "grad_norm": 3.7774886814416546, "learning_rate": 3.376947450764917e-06, "loss": 0.257, "step": 38410 }, { "epoch": 1.9346392064051563, "grad_norm": 3.98613126280888, "learning_rate": 3.3741768195491985e-06, "loss": 0.2782, "step": 38420 }, { "epoch": 1.9351427564328516, "grad_norm": 2.9914308072363434, "learning_rate": 3.371406746463809e-06, "loss": 0.2474, "step": 38430 }, { "epoch": 1.935646306460547, "grad_norm": 2.8517817154945737, "learning_rate": 3.3686372324596905e-06, "loss": 0.2145, "step": 38440 }, { "epoch": 1.936149856488242, "grad_norm": 3.7060351772688764, "learning_rate": 3.365868278487592e-06, "loss": 0.2425, "step": 38450 }, { "epoch": 1.9366534065159373, "grad_norm": 4.012017175288634, "learning_rate": 3.3630998854980723e-06, "loss": 0.2649, "step": 38460 }, { "epoch": 1.9371569565436326, "grad_norm": 4.305261730701495, "learning_rate": 3.360332054441492e-06, "loss": 0.2588, "step": 38470 }, { "epoch": 1.9376605065713277, "grad_norm": 3.703404959664014, "learning_rate": 3.357564786268026e-06, "loss": 0.2126, "step": 38480 }, { "epoch": 1.9381640565990232, "grad_norm": 3.385584309533776, "learning_rate": 3.3547980819276516e-06, "loss": 0.2585, "step": 38490 }, { "epoch": 1.9386676066267183, "grad_norm": 2.8181716789954776, "learning_rate": 3.3520319423701507e-06, "loss": 0.2651, "step": 38500 }, { "epoch": 1.9391711566544136, "grad_norm": 2.753367731278834, "learning_rate": 3.3492663685451155e-06, "loss": 0.2107, "step": 38510 }, { "epoch": 1.939674706682109, "grad_norm": 3.1816700401056224, "learning_rate": 3.3465013614019428e-06, "loss": 0.2923, "step": 38520 }, { "epoch": 1.940178256709804, "grad_norm": 3.4131120405612028, "learning_rate": 3.3437369218898334e-06, "loss": 0.2303, "step": 38530 }, { "epoch": 1.9406818067374993, "grad_norm": 3.9144883816361697, "learning_rate": 3.3409730509577958e-06, "loss": 0.2359, "step": 38540 }, { "epoch": 1.9411853567651947, "grad_norm": 3.2998857356242137, "learning_rate": 3.3382097495546407e-06, "loss": 0.2748, "step": 38550 }, { "epoch": 1.9416889067928897, "grad_norm": 4.121914980662622, "learning_rate": 3.3354470186289863e-06, "loss": 0.302, "step": 38560 }, { "epoch": 1.9421924568205853, "grad_norm": 3.787216521250226, "learning_rate": 3.332684859129249e-06, "loss": 0.2471, "step": 38570 }, { "epoch": 1.9426960068482804, "grad_norm": 3.785537278256837, "learning_rate": 3.329923272003656e-06, "loss": 0.2816, "step": 38580 }, { "epoch": 1.9431995568759757, "grad_norm": 3.399436751667279, "learning_rate": 3.3271622582002337e-06, "loss": 0.2563, "step": 38590 }, { "epoch": 1.943703106903671, "grad_norm": 3.6745565693286397, "learning_rate": 3.3244018186668147e-06, "loss": 0.2828, "step": 38600 }, { "epoch": 1.944206656931366, "grad_norm": 2.2778799275400785, "learning_rate": 3.3216419543510325e-06, "loss": 0.2736, "step": 38610 }, { "epoch": 1.9447102069590614, "grad_norm": 2.9159365727987003, "learning_rate": 3.3188826662003217e-06, "loss": 0.2253, "step": 38620 }, { "epoch": 1.9452137569867567, "grad_norm": 3.416795665437251, "learning_rate": 3.316123955161924e-06, "loss": 0.2316, "step": 38630 }, { "epoch": 1.9457173070144518, "grad_norm": 3.634884793710409, "learning_rate": 3.3133658221828768e-06, "loss": 0.2359, "step": 38640 }, { "epoch": 1.946220857042147, "grad_norm": 2.49669125267324, "learning_rate": 3.3106082682100227e-06, "loss": 0.2208, "step": 38650 }, { "epoch": 1.9467244070698424, "grad_norm": 4.394372686469125, "learning_rate": 3.3078512941900054e-06, "loss": 0.3047, "step": 38660 }, { "epoch": 1.9472279570975375, "grad_norm": 3.6872760388935393, "learning_rate": 3.305094901069269e-06, "loss": 0.2863, "step": 38670 }, { "epoch": 1.947731507125233, "grad_norm": 4.335912807516516, "learning_rate": 3.302339089794058e-06, "loss": 0.2716, "step": 38680 }, { "epoch": 1.948235057152928, "grad_norm": 3.1153686664314124, "learning_rate": 3.2995838613104197e-06, "loss": 0.2823, "step": 38690 }, { "epoch": 1.9487386071806234, "grad_norm": 3.5337505004705134, "learning_rate": 3.2968292165641955e-06, "loss": 0.241, "step": 38700 }, { "epoch": 1.9492421572083187, "grad_norm": 3.984699520012302, "learning_rate": 3.294075156501032e-06, "loss": 0.2713, "step": 38710 }, { "epoch": 1.9497457072360138, "grad_norm": 3.9872604434403387, "learning_rate": 3.2913216820663716e-06, "loss": 0.2629, "step": 38720 }, { "epoch": 1.9502492572637091, "grad_norm": 5.313294312965085, "learning_rate": 3.2885687942054583e-06, "loss": 0.3082, "step": 38730 }, { "epoch": 1.9507528072914044, "grad_norm": 3.216435389007966, "learning_rate": 3.2858164938633324e-06, "loss": 0.2726, "step": 38740 }, { "epoch": 1.9512563573190995, "grad_norm": 3.1402509164996117, "learning_rate": 3.283064781984835e-06, "loss": 0.245, "step": 38750 }, { "epoch": 1.951759907346795, "grad_norm": 4.767753525569597, "learning_rate": 3.2803136595146046e-06, "loss": 0.2987, "step": 38760 }, { "epoch": 1.9522634573744901, "grad_norm": 2.0044521788884007, "learning_rate": 3.2775631273970737e-06, "loss": 0.218, "step": 38770 }, { "epoch": 1.9527670074021855, "grad_norm": 4.005312671518698, "learning_rate": 3.2748131865764755e-06, "loss": 0.2237, "step": 38780 }, { "epoch": 1.9532705574298808, "grad_norm": 3.567946504295816, "learning_rate": 3.2720638379968413e-06, "loss": 0.293, "step": 38790 }, { "epoch": 1.9537741074575758, "grad_norm": 1.3314898154105055, "learning_rate": 3.269315082601996e-06, "loss": 0.235, "step": 38800 }, { "epoch": 1.9542776574852712, "grad_norm": 2.565669513374058, "learning_rate": 3.2665669213355635e-06, "loss": 0.2662, "step": 38810 }, { "epoch": 1.9547812075129665, "grad_norm": 4.4019897790470255, "learning_rate": 3.2638193551409624e-06, "loss": 0.2632, "step": 38820 }, { "epoch": 1.9552847575406616, "grad_norm": 4.395583976186834, "learning_rate": 3.2610723849614084e-06, "loss": 0.2916, "step": 38830 }, { "epoch": 1.9557883075683569, "grad_norm": 5.270475414653019, "learning_rate": 3.258326011739908e-06, "loss": 0.2788, "step": 38840 }, { "epoch": 1.9562918575960522, "grad_norm": 4.224663531694433, "learning_rate": 3.255580236419269e-06, "loss": 0.2681, "step": 38850 }, { "epoch": 1.9567954076237473, "grad_norm": 2.458972907635498, "learning_rate": 3.2528350599420893e-06, "loss": 0.3081, "step": 38860 }, { "epoch": 1.9572989576514428, "grad_norm": 4.471634832648068, "learning_rate": 3.2500904832507653e-06, "loss": 0.2699, "step": 38870 }, { "epoch": 1.9578025076791379, "grad_norm": 2.9038091512603845, "learning_rate": 3.247346507287484e-06, "loss": 0.2385, "step": 38880 }, { "epoch": 1.9583060577068332, "grad_norm": 4.314340575820749, "learning_rate": 3.2446031329942263e-06, "loss": 0.267, "step": 38890 }, { "epoch": 1.9588096077345285, "grad_norm": 3.2451528881811993, "learning_rate": 3.2418603613127707e-06, "loss": 0.2511, "step": 38900 }, { "epoch": 1.9593131577622236, "grad_norm": 5.060910758616146, "learning_rate": 3.239118193184683e-06, "loss": 0.258, "step": 38910 }, { "epoch": 1.959816707789919, "grad_norm": 3.6893485558815353, "learning_rate": 3.2363766295513247e-06, "loss": 0.2969, "step": 38920 }, { "epoch": 1.9603202578176142, "grad_norm": 4.3223665476064115, "learning_rate": 3.233635671353852e-06, "loss": 0.2471, "step": 38930 }, { "epoch": 1.9608238078453093, "grad_norm": 3.997046747043084, "learning_rate": 3.230895319533209e-06, "loss": 0.276, "step": 38940 }, { "epoch": 1.9613273578730048, "grad_norm": 3.1916023525175556, "learning_rate": 3.228155575030133e-06, "loss": 0.3009, "step": 38950 }, { "epoch": 1.9618309079007, "grad_norm": 3.9808058739795147, "learning_rate": 3.2254164387851557e-06, "loss": 0.2715, "step": 38960 }, { "epoch": 1.9623344579283952, "grad_norm": 2.1552038914633393, "learning_rate": 3.2226779117385976e-06, "loss": 0.252, "step": 38970 }, { "epoch": 1.9628380079560905, "grad_norm": 4.467074436488231, "learning_rate": 3.2199399948305665e-06, "loss": 0.2751, "step": 38980 }, { "epoch": 1.9633415579837856, "grad_norm": 4.042200985488079, "learning_rate": 3.2172026890009684e-06, "loss": 0.2787, "step": 38990 }, { "epoch": 1.963845108011481, "grad_norm": 3.0354997392319953, "learning_rate": 3.2144659951894943e-06, "loss": 0.2943, "step": 39000 }, { "epoch": 1.9643486580391762, "grad_norm": 3.3358584850634196, "learning_rate": 3.211729914335624e-06, "loss": 0.2581, "step": 39010 }, { "epoch": 1.9648522080668713, "grad_norm": 4.255656012142144, "learning_rate": 3.2089944473786334e-06, "loss": 0.2836, "step": 39020 }, { "epoch": 1.9653557580945666, "grad_norm": 2.4717602754998524, "learning_rate": 3.2062595952575804e-06, "loss": 0.2513, "step": 39030 }, { "epoch": 1.965859308122262, "grad_norm": 4.067959716925399, "learning_rate": 3.203525358911318e-06, "loss": 0.3028, "step": 39040 }, { "epoch": 1.966362858149957, "grad_norm": 4.347787430346103, "learning_rate": 3.2007917392784804e-06, "loss": 0.2651, "step": 39050 }, { "epoch": 1.9668664081776526, "grad_norm": 2.4761351033617727, "learning_rate": 3.198058737297497e-06, "loss": 0.2772, "step": 39060 }, { "epoch": 1.9673699582053477, "grad_norm": 2.833273708602303, "learning_rate": 3.1953263539065814e-06, "loss": 0.2633, "step": 39070 }, { "epoch": 1.967873508233043, "grad_norm": 2.922110186063651, "learning_rate": 3.1925945900437385e-06, "loss": 0.242, "step": 39080 }, { "epoch": 1.9683770582607383, "grad_norm": 2.9911746108902473, "learning_rate": 3.1898634466467536e-06, "loss": 0.2621, "step": 39090 }, { "epoch": 1.9688806082884334, "grad_norm": 4.372732800317098, "learning_rate": 3.187132924653208e-06, "loss": 0.2902, "step": 39100 }, { "epoch": 1.9693841583161287, "grad_norm": 4.0978519253447, "learning_rate": 3.1844030250004644e-06, "loss": 0.3166, "step": 39110 }, { "epoch": 1.969887708343824, "grad_norm": 3.2657328047375938, "learning_rate": 3.18167374862567e-06, "loss": 0.2836, "step": 39120 }, { "epoch": 1.970391258371519, "grad_norm": 3.567051746768233, "learning_rate": 3.178945096465761e-06, "loss": 0.2687, "step": 39130 }, { "epoch": 1.9708948083992146, "grad_norm": 3.5416380264152245, "learning_rate": 3.17621706945746e-06, "loss": 0.2871, "step": 39140 }, { "epoch": 1.9713983584269097, "grad_norm": 3.8215915458863674, "learning_rate": 3.173489668537274e-06, "loss": 0.2906, "step": 39150 }, { "epoch": 1.971901908454605, "grad_norm": 3.9544717412478287, "learning_rate": 3.170762894641495e-06, "loss": 0.2904, "step": 39160 }, { "epoch": 1.9724054584823003, "grad_norm": 3.5013119529191554, "learning_rate": 3.168036748706199e-06, "loss": 0.2783, "step": 39170 }, { "epoch": 1.9729090085099954, "grad_norm": 3.1531464894331864, "learning_rate": 3.1653112316672495e-06, "loss": 0.2255, "step": 39180 }, { "epoch": 1.9734125585376907, "grad_norm": 3.626766613042368, "learning_rate": 3.1625863444602866e-06, "loss": 0.3002, "step": 39190 }, { "epoch": 1.973916108565386, "grad_norm": 2.475410069208654, "learning_rate": 3.1598620880207444e-06, "loss": 0.2373, "step": 39200 }, { "epoch": 1.974419658593081, "grad_norm": 3.7615069732346527, "learning_rate": 3.1571384632838332e-06, "loss": 0.2246, "step": 39210 }, { "epoch": 1.9749232086207766, "grad_norm": 3.3628162556063157, "learning_rate": 3.1544154711845474e-06, "loss": 0.2628, "step": 39220 }, { "epoch": 1.9754267586484717, "grad_norm": 1.6430734891848953, "learning_rate": 3.151693112657669e-06, "loss": 0.1639, "step": 39230 }, { "epoch": 1.9759303086761668, "grad_norm": 3.7845271550092425, "learning_rate": 3.1489713886377555e-06, "loss": 0.2441, "step": 39240 }, { "epoch": 1.9764338587038623, "grad_norm": 3.0514517057934056, "learning_rate": 3.1462503000591534e-06, "loss": 0.3027, "step": 39250 }, { "epoch": 1.9769374087315574, "grad_norm": 3.0670947828098423, "learning_rate": 3.1435298478559844e-06, "loss": 0.2751, "step": 39260 }, { "epoch": 1.9774409587592527, "grad_norm": 2.3307310074767487, "learning_rate": 3.1408100329621566e-06, "loss": 0.2364, "step": 39270 }, { "epoch": 1.977944508786948, "grad_norm": 3.5356896010571353, "learning_rate": 3.1380908563113565e-06, "loss": 0.2879, "step": 39280 }, { "epoch": 1.9784480588146431, "grad_norm": 4.01564923404701, "learning_rate": 3.135372318837055e-06, "loss": 0.2543, "step": 39290 }, { "epoch": 1.9789516088423384, "grad_norm": 3.432706708828547, "learning_rate": 3.1326544214724985e-06, "loss": 0.2567, "step": 39300 }, { "epoch": 1.9794551588700338, "grad_norm": 3.6833330453587, "learning_rate": 3.129937165150721e-06, "loss": 0.2577, "step": 39310 }, { "epoch": 1.9799587088977288, "grad_norm": 4.077571355981712, "learning_rate": 3.1272205508045272e-06, "loss": 0.2768, "step": 39320 }, { "epoch": 1.9804622589254244, "grad_norm": 2.022829454383306, "learning_rate": 3.1245045793665087e-06, "loss": 0.3047, "step": 39330 }, { "epoch": 1.9809658089531195, "grad_norm": 4.066022313388873, "learning_rate": 3.1217892517690314e-06, "loss": 0.2752, "step": 39340 }, { "epoch": 1.9814693589808148, "grad_norm": 2.8796680238082835, "learning_rate": 3.1190745689442452e-06, "loss": 0.2739, "step": 39350 }, { "epoch": 1.98197290900851, "grad_norm": 1.9236992750310369, "learning_rate": 3.116360531824074e-06, "loss": 0.3029, "step": 39360 }, { "epoch": 1.9824764590362052, "grad_norm": 3.083935973790084, "learning_rate": 3.1136471413402247e-06, "loss": 0.2514, "step": 39370 }, { "epoch": 1.9829800090639005, "grad_norm": 3.4455649379562177, "learning_rate": 3.110934398424179e-06, "loss": 0.2776, "step": 39380 }, { "epoch": 1.9834835590915958, "grad_norm": 4.20986866574089, "learning_rate": 3.1082223040071953e-06, "loss": 0.2753, "step": 39390 }, { "epoch": 1.9839871091192909, "grad_norm": 3.720132707587243, "learning_rate": 3.10551085902031e-06, "loss": 0.3006, "step": 39400 }, { "epoch": 1.9844906591469864, "grad_norm": 4.140022483926423, "learning_rate": 3.102800064394341e-06, "loss": 0.3069, "step": 39410 }, { "epoch": 1.9849942091746815, "grad_norm": 4.413275156543127, "learning_rate": 3.1000899210598754e-06, "loss": 0.2785, "step": 39420 }, { "epoch": 1.9854977592023768, "grad_norm": 11.308050004307987, "learning_rate": 3.0973804299472855e-06, "loss": 0.2611, "step": 39430 }, { "epoch": 1.9860013092300721, "grad_norm": 2.509846134036447, "learning_rate": 3.0946715919867122e-06, "loss": 0.2076, "step": 39440 }, { "epoch": 1.9865048592577672, "grad_norm": 2.8611238130098515, "learning_rate": 3.0919634081080755e-06, "loss": 0.213, "step": 39450 }, { "epoch": 1.9870084092854625, "grad_norm": 2.257138755968329, "learning_rate": 3.089255879241069e-06, "loss": 0.2634, "step": 39460 }, { "epoch": 1.9875119593131578, "grad_norm": 3.5191130648668847, "learning_rate": 3.086549006315165e-06, "loss": 0.2436, "step": 39470 }, { "epoch": 1.988015509340853, "grad_norm": 4.815102413391241, "learning_rate": 3.083842790259606e-06, "loss": 0.2659, "step": 39480 }, { "epoch": 1.9885190593685482, "grad_norm": 3.477191810531505, "learning_rate": 3.081137232003413e-06, "loss": 0.2443, "step": 39490 }, { "epoch": 1.9890226093962435, "grad_norm": 3.3045444429320496, "learning_rate": 3.0784323324753807e-06, "loss": 0.24, "step": 39500 }, { "epoch": 1.9895261594239386, "grad_norm": 3.5270852043905516, "learning_rate": 3.0757280926040733e-06, "loss": 0.2576, "step": 39510 }, { "epoch": 1.9900297094516342, "grad_norm": 2.6469901964760125, "learning_rate": 3.073024513317836e-06, "loss": 0.2323, "step": 39520 }, { "epoch": 1.9905332594793292, "grad_norm": 3.964090427909344, "learning_rate": 3.070321595544779e-06, "loss": 0.3062, "step": 39530 }, { "epoch": 1.9910368095070246, "grad_norm": 3.6231270442701016, "learning_rate": 3.0676193402127907e-06, "loss": 0.231, "step": 39540 }, { "epoch": 1.9915403595347199, "grad_norm": 3.757393801468329, "learning_rate": 3.0649177482495318e-06, "loss": 0.2941, "step": 39550 }, { "epoch": 1.992043909562415, "grad_norm": 2.8986859325756886, "learning_rate": 3.062216820582434e-06, "loss": 0.2616, "step": 39560 }, { "epoch": 1.9925474595901103, "grad_norm": 4.9349229598298185, "learning_rate": 3.0595165581386998e-06, "loss": 0.317, "step": 39570 }, { "epoch": 1.9930510096178056, "grad_norm": 2.6758987118132316, "learning_rate": 3.0568169618453064e-06, "loss": 0.3041, "step": 39580 }, { "epoch": 1.9935545596455007, "grad_norm": 5.437042559660488, "learning_rate": 3.0541180326290015e-06, "loss": 0.2972, "step": 39590 }, { "epoch": 1.9940581096731962, "grad_norm": 3.1534661212544557, "learning_rate": 3.0514197714163006e-06, "loss": 0.2404, "step": 39600 }, { "epoch": 1.9945616597008913, "grad_norm": 3.0887586756213152, "learning_rate": 3.048722179133492e-06, "loss": 0.2848, "step": 39610 }, { "epoch": 1.9950652097285866, "grad_norm": 3.2419490564218347, "learning_rate": 3.046025256706637e-06, "loss": 0.2649, "step": 39620 }, { "epoch": 1.995568759756282, "grad_norm": 4.850502149420929, "learning_rate": 3.0433290050615626e-06, "loss": 0.2416, "step": 39630 }, { "epoch": 1.996072309783977, "grad_norm": 6.3163483456990654, "learning_rate": 3.04063342512387e-06, "loss": 0.3109, "step": 39640 }, { "epoch": 1.9965758598116723, "grad_norm": 3.2552581598290913, "learning_rate": 3.0379385178189238e-06, "loss": 0.2203, "step": 39650 }, { "epoch": 1.9970794098393676, "grad_norm": 3.2189863055547594, "learning_rate": 3.0352442840718664e-06, "loss": 0.33, "step": 39660 }, { "epoch": 1.9975829598670627, "grad_norm": 4.544924276070614, "learning_rate": 3.032550724807598e-06, "loss": 0.2552, "step": 39670 }, { "epoch": 1.998086509894758, "grad_norm": 3.523294706631946, "learning_rate": 3.0298578409507957e-06, "loss": 0.2859, "step": 39680 }, { "epoch": 1.9985900599224533, "grad_norm": 3.9594834719352803, "learning_rate": 3.0271656334259015e-06, "loss": 0.2384, "step": 39690 }, { "epoch": 1.9990936099501484, "grad_norm": 4.513662427186129, "learning_rate": 3.024474103157127e-06, "loss": 0.3276, "step": 39700 }, { "epoch": 1.999597159977844, "grad_norm": 3.4716308858654075, "learning_rate": 3.0217832510684474e-06, "loss": 0.2348, "step": 39710 }, { "epoch": 2.000100710005539, "grad_norm": 2.0439397830458956, "learning_rate": 3.0190930780836116e-06, "loss": 0.2516, "step": 39720 }, { "epoch": 2.000604260033234, "grad_norm": 2.7687748074827243, "learning_rate": 3.0164035851261302e-06, "loss": 0.1783, "step": 39730 }, { "epoch": 2.0011078100609296, "grad_norm": 2.2823601670832354, "learning_rate": 3.0137147731192806e-06, "loss": 0.1258, "step": 39740 }, { "epoch": 2.0016113600886247, "grad_norm": 1.793319923650784, "learning_rate": 3.0110266429861064e-06, "loss": 0.1363, "step": 39750 }, { "epoch": 2.0021149101163203, "grad_norm": 3.473609486406201, "learning_rate": 3.008339195649421e-06, "loss": 0.1448, "step": 39760 }, { "epoch": 2.0026184601440153, "grad_norm": 2.093689012203041, "learning_rate": 3.0056524320318e-06, "loss": 0.1274, "step": 39770 }, { "epoch": 2.0031220101717104, "grad_norm": 2.787803585818093, "learning_rate": 3.002966353055583e-06, "loss": 0.1461, "step": 39780 }, { "epoch": 2.003625560199406, "grad_norm": 3.017941933651312, "learning_rate": 3.0002809596428793e-06, "loss": 0.1237, "step": 39790 }, { "epoch": 2.004129110227101, "grad_norm": 2.5266145164661418, "learning_rate": 2.9975962527155593e-06, "loss": 0.1418, "step": 39800 }, { "epoch": 2.004632660254796, "grad_norm": 3.201144433716653, "learning_rate": 2.9949122331952565e-06, "loss": 0.1538, "step": 39810 }, { "epoch": 2.0051362102824917, "grad_norm": 1.7198626200526541, "learning_rate": 2.9922289020033724e-06, "loss": 0.1494, "step": 39820 }, { "epoch": 2.0056397603101868, "grad_norm": 3.460686568857936, "learning_rate": 2.9895462600610702e-06, "loss": 0.1246, "step": 39830 }, { "epoch": 2.0061433103378823, "grad_norm": 3.755342169335307, "learning_rate": 2.986864308289275e-06, "loss": 0.1674, "step": 39840 }, { "epoch": 2.0066468603655774, "grad_norm": 4.1994721214118815, "learning_rate": 2.9841830476086785e-06, "loss": 0.137, "step": 39850 }, { "epoch": 2.0071504103932725, "grad_norm": 2.1852471687381763, "learning_rate": 2.981502478939731e-06, "loss": 0.1562, "step": 39860 }, { "epoch": 2.007653960420968, "grad_norm": 3.4057433837809894, "learning_rate": 2.9788226032026506e-06, "loss": 0.1867, "step": 39870 }, { "epoch": 2.008157510448663, "grad_norm": 3.1403585911543015, "learning_rate": 2.9761434213174112e-06, "loss": 0.1469, "step": 39880 }, { "epoch": 2.008661060476358, "grad_norm": 3.172247670944385, "learning_rate": 2.973464934203753e-06, "loss": 0.1426, "step": 39890 }, { "epoch": 2.0091646105040537, "grad_norm": 3.970175871719209, "learning_rate": 2.9707871427811746e-06, "loss": 0.173, "step": 39900 }, { "epoch": 2.009668160531749, "grad_norm": 2.4406142040339462, "learning_rate": 2.96811004796894e-06, "loss": 0.135, "step": 39910 }, { "epoch": 2.010171710559444, "grad_norm": 4.397877537960706, "learning_rate": 2.965433650686069e-06, "loss": 0.1304, "step": 39920 }, { "epoch": 2.0106752605871394, "grad_norm": 2.505281844802843, "learning_rate": 2.9627579518513484e-06, "loss": 0.1172, "step": 39930 }, { "epoch": 2.0111788106148345, "grad_norm": 3.780086083744716, "learning_rate": 2.9600829523833174e-06, "loss": 0.1279, "step": 39940 }, { "epoch": 2.01168236064253, "grad_norm": 2.93759554399652, "learning_rate": 2.95740865320028e-06, "loss": 0.1432, "step": 39950 }, { "epoch": 2.012185910670225, "grad_norm": 4.199961640800475, "learning_rate": 2.9547350552202985e-06, "loss": 0.1261, "step": 39960 }, { "epoch": 2.01268946069792, "grad_norm": 2.9322431245887395, "learning_rate": 2.9520621593611966e-06, "loss": 0.1371, "step": 39970 }, { "epoch": 2.0131930107256157, "grad_norm": 4.415445602860253, "learning_rate": 2.949389966540553e-06, "loss": 0.1464, "step": 39980 }, { "epoch": 2.013696560753311, "grad_norm": 4.3695675894542605, "learning_rate": 2.946718477675709e-06, "loss": 0.1781, "step": 39990 }, { "epoch": 2.014200110781006, "grad_norm": 3.403215919007243, "learning_rate": 2.944047693683763e-06, "loss": 0.1464, "step": 40000 }, { "epoch": 2.0147036608087014, "grad_norm": 3.290751750486016, "learning_rate": 2.941377615481569e-06, "loss": 0.1566, "step": 40010 }, { "epoch": 2.0152072108363965, "grad_norm": 3.2905715369014725, "learning_rate": 2.9387082439857395e-06, "loss": 0.1211, "step": 40020 }, { "epoch": 2.015710760864092, "grad_norm": 4.104160125487364, "learning_rate": 2.9360395801126485e-06, "loss": 0.1261, "step": 40030 }, { "epoch": 2.016214310891787, "grad_norm": 3.1173771117304416, "learning_rate": 2.9333716247784217e-06, "loss": 0.1309, "step": 40040 }, { "epoch": 2.0167178609194822, "grad_norm": 3.8662886334471938, "learning_rate": 2.930704378898946e-06, "loss": 0.1811, "step": 40050 }, { "epoch": 2.0172214109471778, "grad_norm": 1.9527202745685917, "learning_rate": 2.9280378433898616e-06, "loss": 0.1143, "step": 40060 }, { "epoch": 2.017724960974873, "grad_norm": 3.5148868299062737, "learning_rate": 2.9253720191665667e-06, "loss": 0.1507, "step": 40070 }, { "epoch": 2.018228511002568, "grad_norm": 3.1434107150001007, "learning_rate": 2.9227069071442126e-06, "loss": 0.1606, "step": 40080 }, { "epoch": 2.0187320610302635, "grad_norm": 2.2554225844010825, "learning_rate": 2.9200425082377105e-06, "loss": 0.1144, "step": 40090 }, { "epoch": 2.0192356110579586, "grad_norm": 3.169339965551852, "learning_rate": 2.917378823361722e-06, "loss": 0.1595, "step": 40100 }, { "epoch": 2.0197391610856537, "grad_norm": 3.163161617819, "learning_rate": 2.9147158534306667e-06, "loss": 0.1426, "step": 40110 }, { "epoch": 2.020242711113349, "grad_norm": 5.584078607828085, "learning_rate": 2.9120535993587184e-06, "loss": 0.1775, "step": 40120 }, { "epoch": 2.0207462611410443, "grad_norm": 3.2694417678460908, "learning_rate": 2.909392062059807e-06, "loss": 0.1459, "step": 40130 }, { "epoch": 2.02124981116874, "grad_norm": 2.839528923043524, "learning_rate": 2.9067312424476113e-06, "loss": 0.1355, "step": 40140 }, { "epoch": 2.021753361196435, "grad_norm": 3.3192601074834727, "learning_rate": 2.9040711414355648e-06, "loss": 0.1357, "step": 40150 }, { "epoch": 2.02225691122413, "grad_norm": 3.152083473784874, "learning_rate": 2.9014117599368585e-06, "loss": 0.1325, "step": 40160 }, { "epoch": 2.0227604612518255, "grad_norm": 3.489669889207864, "learning_rate": 2.8987530988644357e-06, "loss": 0.1344, "step": 40170 }, { "epoch": 2.0232640112795206, "grad_norm": 4.487474623522766, "learning_rate": 2.896095159130986e-06, "loss": 0.1678, "step": 40180 }, { "epoch": 2.0237675613072157, "grad_norm": 3.55271461177067, "learning_rate": 2.8934379416489594e-06, "loss": 0.121, "step": 40190 }, { "epoch": 2.024271111334911, "grad_norm": 2.782682744298756, "learning_rate": 2.890781447330553e-06, "loss": 0.1075, "step": 40200 }, { "epoch": 2.0247746613626063, "grad_norm": 2.8127497718204655, "learning_rate": 2.88812567708772e-06, "loss": 0.1291, "step": 40210 }, { "epoch": 2.025278211390302, "grad_norm": 4.0251315592572885, "learning_rate": 2.8854706318321595e-06, "loss": 0.1226, "step": 40220 }, { "epoch": 2.025781761417997, "grad_norm": 2.5947739137311827, "learning_rate": 2.882816312475323e-06, "loss": 0.1381, "step": 40230 }, { "epoch": 2.026285311445692, "grad_norm": 4.4916678398471666, "learning_rate": 2.8801627199284167e-06, "loss": 0.1571, "step": 40240 }, { "epoch": 2.0267888614733875, "grad_norm": 3.9444263393066374, "learning_rate": 2.877509855102396e-06, "loss": 0.1396, "step": 40250 }, { "epoch": 2.0272924115010826, "grad_norm": 4.295771270150575, "learning_rate": 2.8748577189079617e-06, "loss": 0.1323, "step": 40260 }, { "epoch": 2.0277959615287777, "grad_norm": 4.747679675026631, "learning_rate": 2.872206312255569e-06, "loss": 0.1408, "step": 40270 }, { "epoch": 2.0282995115564733, "grad_norm": 3.853661692603644, "learning_rate": 2.8695556360554256e-06, "loss": 0.1667, "step": 40280 }, { "epoch": 2.0288030615841683, "grad_norm": 2.940542323472785, "learning_rate": 2.8669056912174812e-06, "loss": 0.1255, "step": 40290 }, { "epoch": 2.0293066116118634, "grad_norm": 3.9460065317582322, "learning_rate": 2.8642564786514367e-06, "loss": 0.1658, "step": 40300 }, { "epoch": 2.029810161639559, "grad_norm": 3.5453346909141605, "learning_rate": 2.8616079992667435e-06, "loss": 0.1661, "step": 40310 }, { "epoch": 2.030313711667254, "grad_norm": 3.6202252301748197, "learning_rate": 2.8589602539726015e-06, "loss": 0.1382, "step": 40320 }, { "epoch": 2.0308172616949496, "grad_norm": 2.1319787913483994, "learning_rate": 2.8563132436779596e-06, "loss": 0.154, "step": 40330 }, { "epoch": 2.0313208117226447, "grad_norm": 2.614117152071004, "learning_rate": 2.8536669692915075e-06, "loss": 0.1515, "step": 40340 }, { "epoch": 2.0318243617503398, "grad_norm": 2.9132459708371274, "learning_rate": 2.8510214317216922e-06, "loss": 0.1323, "step": 40350 }, { "epoch": 2.0323279117780353, "grad_norm": 3.663223897753877, "learning_rate": 2.848376631876698e-06, "loss": 0.1545, "step": 40360 }, { "epoch": 2.0328314618057304, "grad_norm": 3.5862438160804, "learning_rate": 2.845732570664465e-06, "loss": 0.1401, "step": 40370 }, { "epoch": 2.0333350118334255, "grad_norm": 2.9342533980981838, "learning_rate": 2.8430892489926713e-06, "loss": 0.1442, "step": 40380 }, { "epoch": 2.033838561861121, "grad_norm": 3.2103403756946816, "learning_rate": 2.840446667768748e-06, "loss": 0.1374, "step": 40390 }, { "epoch": 2.034342111888816, "grad_norm": 2.5204695449758576, "learning_rate": 2.837804827899869e-06, "loss": 0.1211, "step": 40400 }, { "epoch": 2.0348456619165116, "grad_norm": 2.1383690672720235, "learning_rate": 2.835163730292953e-06, "loss": 0.1264, "step": 40410 }, { "epoch": 2.0353492119442067, "grad_norm": 3.6509929602249636, "learning_rate": 2.8325233758546666e-06, "loss": 0.1348, "step": 40420 }, { "epoch": 2.035852761971902, "grad_norm": 2.144269089314269, "learning_rate": 2.8298837654914164e-06, "loss": 0.1446, "step": 40430 }, { "epoch": 2.0363563119995973, "grad_norm": 2.634770698296518, "learning_rate": 2.8272449001093605e-06, "loss": 0.1382, "step": 40440 }, { "epoch": 2.0368598620272924, "grad_norm": 3.3852426161350446, "learning_rate": 2.824606780614393e-06, "loss": 0.1478, "step": 40450 }, { "epoch": 2.0373634120549875, "grad_norm": 4.247258361984148, "learning_rate": 2.8219694079121583e-06, "loss": 0.1278, "step": 40460 }, { "epoch": 2.037866962082683, "grad_norm": 4.563230659243651, "learning_rate": 2.8193327829080434e-06, "loss": 0.1806, "step": 40470 }, { "epoch": 2.038370512110378, "grad_norm": 2.6987246978901127, "learning_rate": 2.8166969065071793e-06, "loss": 0.1313, "step": 40480 }, { "epoch": 2.038874062138073, "grad_norm": 3.9050723532236034, "learning_rate": 2.8140617796144367e-06, "loss": 0.1381, "step": 40490 }, { "epoch": 2.0393776121657687, "grad_norm": 2.535615975206391, "learning_rate": 2.8114274031344274e-06, "loss": 0.1306, "step": 40500 }, { "epoch": 2.039881162193464, "grad_norm": 4.0340517785394745, "learning_rate": 2.8087937779715124e-06, "loss": 0.1169, "step": 40510 }, { "epoch": 2.0403847122211594, "grad_norm": 3.717355740507716, "learning_rate": 2.8061609050297926e-06, "loss": 0.1536, "step": 40520 }, { "epoch": 2.0408882622488544, "grad_norm": 1.8114921818921972, "learning_rate": 2.8035287852131055e-06, "loss": 0.1143, "step": 40530 }, { "epoch": 2.0413918122765495, "grad_norm": 2.551490465972688, "learning_rate": 2.800897419425036e-06, "loss": 0.1255, "step": 40540 }, { "epoch": 2.041895362304245, "grad_norm": 1.7435347476121354, "learning_rate": 2.7982668085689104e-06, "loss": 0.1324, "step": 40550 }, { "epoch": 2.04239891233194, "grad_norm": 5.042831943765564, "learning_rate": 2.795636953547791e-06, "loss": 0.1539, "step": 40560 }, { "epoch": 2.0429024623596352, "grad_norm": 3.1978318166600466, "learning_rate": 2.7930078552644814e-06, "loss": 0.1454, "step": 40570 }, { "epoch": 2.0434060123873308, "grad_norm": 4.3236779454736425, "learning_rate": 2.7903795146215286e-06, "loss": 0.1182, "step": 40580 }, { "epoch": 2.043909562415026, "grad_norm": 3.3521112541842744, "learning_rate": 2.7877519325212187e-06, "loss": 0.1344, "step": 40590 }, { "epoch": 2.0444131124427214, "grad_norm": 2.6477662996635014, "learning_rate": 2.785125109865579e-06, "loss": 0.1192, "step": 40600 }, { "epoch": 2.0449166624704165, "grad_norm": 2.9014339625901813, "learning_rate": 2.782499047556369e-06, "loss": 0.134, "step": 40610 }, { "epoch": 2.0454202124981116, "grad_norm": 4.071396398922559, "learning_rate": 2.7798737464950964e-06, "loss": 0.1412, "step": 40620 }, { "epoch": 2.045923762525807, "grad_norm": 2.9826927819506293, "learning_rate": 2.777249207583e-06, "loss": 0.1373, "step": 40630 }, { "epoch": 2.046427312553502, "grad_norm": 3.0437879973151847, "learning_rate": 2.774625431721063e-06, "loss": 0.1418, "step": 40640 }, { "epoch": 2.0469308625811973, "grad_norm": 3.1166417513397104, "learning_rate": 2.7720024198100005e-06, "loss": 0.1368, "step": 40650 }, { "epoch": 2.047434412608893, "grad_norm": 3.739749813122944, "learning_rate": 2.76938017275027e-06, "loss": 0.1201, "step": 40660 }, { "epoch": 2.047937962636588, "grad_norm": 2.880789995440842, "learning_rate": 2.7667586914420674e-06, "loss": 0.1309, "step": 40670 }, { "epoch": 2.048441512664283, "grad_norm": 1.6955283255526405, "learning_rate": 2.7641379767853203e-06, "loss": 0.0877, "step": 40680 }, { "epoch": 2.0489450626919785, "grad_norm": 4.007409422394392, "learning_rate": 2.7615180296796997e-06, "loss": 0.1307, "step": 40690 }, { "epoch": 2.0494486127196736, "grad_norm": 2.1740951534637856, "learning_rate": 2.7588988510246043e-06, "loss": 0.1244, "step": 40700 }, { "epoch": 2.049952162747369, "grad_norm": 2.0979943731366357, "learning_rate": 2.756280441719178e-06, "loss": 0.1032, "step": 40710 }, { "epoch": 2.050455712775064, "grad_norm": 2.44011919015089, "learning_rate": 2.7536628026622992e-06, "loss": 0.1379, "step": 40720 }, { "epoch": 2.0509592628027593, "grad_norm": 3.8523878162978367, "learning_rate": 2.7510459347525743e-06, "loss": 0.1217, "step": 40730 }, { "epoch": 2.051462812830455, "grad_norm": 3.568379343985307, "learning_rate": 2.7484298388883523e-06, "loss": 0.135, "step": 40740 }, { "epoch": 2.05196636285815, "grad_norm": 3.664725923605917, "learning_rate": 2.7458145159677185e-06, "loss": 0.1342, "step": 40750 }, { "epoch": 2.052469912885845, "grad_norm": 2.3993895072810854, "learning_rate": 2.743199966888486e-06, "loss": 0.1439, "step": 40760 }, { "epoch": 2.0529734629135405, "grad_norm": 2.377279393738005, "learning_rate": 2.7405861925482048e-06, "loss": 0.1333, "step": 40770 }, { "epoch": 2.0534770129412356, "grad_norm": 2.838516704555152, "learning_rate": 2.737973193844161e-06, "loss": 0.1128, "step": 40780 }, { "epoch": 2.053980562968931, "grad_norm": 3.471164530519512, "learning_rate": 2.7353609716733765e-06, "loss": 0.1597, "step": 40790 }, { "epoch": 2.0544841129966263, "grad_norm": 2.6367505624893384, "learning_rate": 2.732749526932599e-06, "loss": 0.1225, "step": 40800 }, { "epoch": 2.0549876630243213, "grad_norm": 3.980457881143852, "learning_rate": 2.7301388605183154e-06, "loss": 0.1374, "step": 40810 }, { "epoch": 2.055491213052017, "grad_norm": 3.2364061370285024, "learning_rate": 2.7275289733267435e-06, "loss": 0.1458, "step": 40820 }, { "epoch": 2.055994763079712, "grad_norm": 4.162399680959606, "learning_rate": 2.7249198662538382e-06, "loss": 0.1475, "step": 40830 }, { "epoch": 2.056498313107407, "grad_norm": 4.446338745976791, "learning_rate": 2.722311540195275e-06, "loss": 0.1703, "step": 40840 }, { "epoch": 2.0570018631351026, "grad_norm": 3.8750534967038175, "learning_rate": 2.7197039960464713e-06, "loss": 0.1261, "step": 40850 }, { "epoch": 2.0575054131627977, "grad_norm": 2.7440010115894706, "learning_rate": 2.7170972347025737e-06, "loss": 0.1312, "step": 40860 }, { "epoch": 2.0580089631904928, "grad_norm": 4.54955555388068, "learning_rate": 2.7144912570584626e-06, "loss": 0.1544, "step": 40870 }, { "epoch": 2.0585125132181883, "grad_norm": 3.852245278368183, "learning_rate": 2.711886064008742e-06, "loss": 0.133, "step": 40880 }, { "epoch": 2.0590160632458834, "grad_norm": 3.6923164883468367, "learning_rate": 2.7092816564477518e-06, "loss": 0.1273, "step": 40890 }, { "epoch": 2.059519613273579, "grad_norm": 4.334932501586113, "learning_rate": 2.7066780352695647e-06, "loss": 0.1513, "step": 40900 }, { "epoch": 2.060023163301274, "grad_norm": 2.776915889549735, "learning_rate": 2.7040752013679784e-06, "loss": 0.1457, "step": 40910 }, { "epoch": 2.060526713328969, "grad_norm": 3.4249521364659747, "learning_rate": 2.7014731556365194e-06, "loss": 0.1576, "step": 40920 }, { "epoch": 2.0610302633566646, "grad_norm": 3.2337615225112017, "learning_rate": 2.698871898968448e-06, "loss": 0.1415, "step": 40930 }, { "epoch": 2.0615338133843597, "grad_norm": 1.954299381554565, "learning_rate": 2.6962714322567535e-06, "loss": 0.1279, "step": 40940 }, { "epoch": 2.062037363412055, "grad_norm": 3.3638152846502734, "learning_rate": 2.6936717563941534e-06, "loss": 0.137, "step": 40950 }, { "epoch": 2.0625409134397503, "grad_norm": 1.8286888891454873, "learning_rate": 2.6910728722730887e-06, "loss": 0.1478, "step": 40960 }, { "epoch": 2.0630444634674454, "grad_norm": 2.527322216134793, "learning_rate": 2.6884747807857376e-06, "loss": 0.1348, "step": 40970 }, { "epoch": 2.063548013495141, "grad_norm": 3.1646584037458503, "learning_rate": 2.6858774828239964e-06, "loss": 0.1236, "step": 40980 }, { "epoch": 2.064051563522836, "grad_norm": 3.1746706408905006, "learning_rate": 2.6832809792794984e-06, "loss": 0.1549, "step": 40990 }, { "epoch": 2.064555113550531, "grad_norm": 2.3233567804426043, "learning_rate": 2.6806852710435956e-06, "loss": 0.163, "step": 41000 }, { "epoch": 2.0650586635782266, "grad_norm": 3.629949382451387, "learning_rate": 2.6780903590073727e-06, "loss": 0.1562, "step": 41010 }, { "epoch": 2.0655622136059217, "grad_norm": 3.523936274002547, "learning_rate": 2.6754962440616415e-06, "loss": 0.1311, "step": 41020 }, { "epoch": 2.066065763633617, "grad_norm": 4.744023429725912, "learning_rate": 2.6729029270969353e-06, "loss": 0.145, "step": 41030 }, { "epoch": 2.0665693136613124, "grad_norm": 4.145954761062938, "learning_rate": 2.670310409003518e-06, "loss": 0.1536, "step": 41040 }, { "epoch": 2.0670728636890074, "grad_norm": 3.043770850953555, "learning_rate": 2.667718690671376e-06, "loss": 0.1363, "step": 41050 }, { "epoch": 2.0675764137167025, "grad_norm": 4.4338827292073395, "learning_rate": 2.6651277729902248e-06, "loss": 0.1514, "step": 41060 }, { "epoch": 2.068079963744398, "grad_norm": 3.7622403494320817, "learning_rate": 2.6625376568494997e-06, "loss": 0.1332, "step": 41070 }, { "epoch": 2.068583513772093, "grad_norm": 3.1081808033982825, "learning_rate": 2.659948343138366e-06, "loss": 0.16, "step": 41080 }, { "epoch": 2.0690870637997887, "grad_norm": 3.7444170024409464, "learning_rate": 2.657359832745712e-06, "loss": 0.1432, "step": 41090 }, { "epoch": 2.0695906138274838, "grad_norm": 3.6201531421733, "learning_rate": 2.65477212656015e-06, "loss": 0.1681, "step": 41100 }, { "epoch": 2.070094163855179, "grad_norm": 4.0971414597535745, "learning_rate": 2.6521852254700163e-06, "loss": 0.1423, "step": 41110 }, { "epoch": 2.0705977138828744, "grad_norm": 2.2129433168322015, "learning_rate": 2.6495991303633674e-06, "loss": 0.112, "step": 41120 }, { "epoch": 2.0711012639105695, "grad_norm": 2.9766648947783274, "learning_rate": 2.647013842127988e-06, "loss": 0.1424, "step": 41130 }, { "epoch": 2.0716048139382646, "grad_norm": 2.2070752714732746, "learning_rate": 2.644429361651387e-06, "loss": 0.1433, "step": 41140 }, { "epoch": 2.07210836396596, "grad_norm": 3.6004288319898423, "learning_rate": 2.6418456898207888e-06, "loss": 0.1497, "step": 41150 }, { "epoch": 2.072611913993655, "grad_norm": 3.8152465210735396, "learning_rate": 2.6392628275231456e-06, "loss": 0.1494, "step": 41160 }, { "epoch": 2.0731154640213507, "grad_norm": 4.050434259149585, "learning_rate": 2.636680775645134e-06, "loss": 0.173, "step": 41170 }, { "epoch": 2.073619014049046, "grad_norm": 2.990199620799318, "learning_rate": 2.6340995350731455e-06, "loss": 0.1324, "step": 41180 }, { "epoch": 2.074122564076741, "grad_norm": 2.821847981518062, "learning_rate": 2.6315191066932954e-06, "loss": 0.1583, "step": 41190 }, { "epoch": 2.0746261141044364, "grad_norm": 4.49776285772367, "learning_rate": 2.6289394913914234e-06, "loss": 0.15, "step": 41200 }, { "epoch": 2.0751296641321315, "grad_norm": 3.4373778869591254, "learning_rate": 2.6263606900530877e-06, "loss": 0.1358, "step": 41210 }, { "epoch": 2.0756332141598266, "grad_norm": 3.753757282952526, "learning_rate": 2.623782703563569e-06, "loss": 0.1752, "step": 41220 }, { "epoch": 2.076136764187522, "grad_norm": 2.705128929544809, "learning_rate": 2.6212055328078633e-06, "loss": 0.145, "step": 41230 }, { "epoch": 2.076640314215217, "grad_norm": 3.168939445009091, "learning_rate": 2.6186291786706942e-06, "loss": 0.1664, "step": 41240 }, { "epoch": 2.0771438642429123, "grad_norm": 3.169320745705243, "learning_rate": 2.6160536420364957e-06, "loss": 0.1364, "step": 41250 }, { "epoch": 2.077647414270608, "grad_norm": 3.823121073973961, "learning_rate": 2.61347892378943e-06, "loss": 0.1493, "step": 41260 }, { "epoch": 2.078150964298303, "grad_norm": 3.178059174707921, "learning_rate": 2.6109050248133717e-06, "loss": 0.1482, "step": 41270 }, { "epoch": 2.0786545143259985, "grad_norm": 3.3738353086159614, "learning_rate": 2.6083319459919186e-06, "loss": 0.1445, "step": 41280 }, { "epoch": 2.0791580643536935, "grad_norm": 2.1313792964426064, "learning_rate": 2.605759688208387e-06, "loss": 0.1339, "step": 41290 }, { "epoch": 2.0796616143813886, "grad_norm": 4.189719054241153, "learning_rate": 2.603188252345805e-06, "loss": 0.1241, "step": 41300 }, { "epoch": 2.080165164409084, "grad_norm": 3.740306922408268, "learning_rate": 2.6006176392869276e-06, "loss": 0.1514, "step": 41310 }, { "epoch": 2.0806687144367793, "grad_norm": 3.2776351437318114, "learning_rate": 2.5980478499142202e-06, "loss": 0.1431, "step": 41320 }, { "epoch": 2.0811722644644743, "grad_norm": 3.072872873512233, "learning_rate": 2.595478885109868e-06, "loss": 0.1431, "step": 41330 }, { "epoch": 2.08167581449217, "grad_norm": 3.87108063000348, "learning_rate": 2.592910745755777e-06, "loss": 0.1557, "step": 41340 }, { "epoch": 2.082179364519865, "grad_norm": 4.576515679107204, "learning_rate": 2.590343432733562e-06, "loss": 0.1669, "step": 41350 }, { "epoch": 2.0826829145475605, "grad_norm": 3.571260488057347, "learning_rate": 2.5877769469245585e-06, "loss": 0.156, "step": 41360 }, { "epoch": 2.0831864645752556, "grad_norm": 4.345473601753904, "learning_rate": 2.5852112892098215e-06, "loss": 0.1178, "step": 41370 }, { "epoch": 2.0836900146029507, "grad_norm": 2.323134349966992, "learning_rate": 2.5826464604701156e-06, "loss": 0.1643, "step": 41380 }, { "epoch": 2.084193564630646, "grad_norm": 1.857863519258545, "learning_rate": 2.580082461585921e-06, "loss": 0.1331, "step": 41390 }, { "epoch": 2.0846971146583413, "grad_norm": 3.6478434854925177, "learning_rate": 2.577519293437437e-06, "loss": 0.1365, "step": 41400 }, { "epoch": 2.0852006646860364, "grad_norm": 3.2119665477150017, "learning_rate": 2.574956956904579e-06, "loss": 0.1321, "step": 41410 }, { "epoch": 2.085704214713732, "grad_norm": 2.634340112539796, "learning_rate": 2.572395452866969e-06, "loss": 0.1237, "step": 41420 }, { "epoch": 2.086207764741427, "grad_norm": 3.3838672104386713, "learning_rate": 2.569834782203949e-06, "loss": 0.1511, "step": 41430 }, { "epoch": 2.0867113147691225, "grad_norm": 5.850414756840966, "learning_rate": 2.5672749457945767e-06, "loss": 0.1341, "step": 41440 }, { "epoch": 2.0872148647968176, "grad_norm": 3.06348324446965, "learning_rate": 2.5647159445176227e-06, "loss": 0.138, "step": 41450 }, { "epoch": 2.0877184148245127, "grad_norm": 2.429203550954885, "learning_rate": 2.562157779251561e-06, "loss": 0.1318, "step": 41460 }, { "epoch": 2.0882219648522082, "grad_norm": 2.5606162102194308, "learning_rate": 2.559600450874591e-06, "loss": 0.1324, "step": 41470 }, { "epoch": 2.0887255148799033, "grad_norm": 3.7972284362380053, "learning_rate": 2.5570439602646203e-06, "loss": 0.1132, "step": 41480 }, { "epoch": 2.0892290649075984, "grad_norm": 4.955030995648123, "learning_rate": 2.5544883082992705e-06, "loss": 0.1544, "step": 41490 }, { "epoch": 2.089732614935294, "grad_norm": 3.4272673550663186, "learning_rate": 2.55193349585587e-06, "loss": 0.143, "step": 41500 }, { "epoch": 2.090236164962989, "grad_norm": 1.392395551685313, "learning_rate": 2.549379523811465e-06, "loss": 0.15, "step": 41510 }, { "epoch": 2.090739714990684, "grad_norm": 0.7363178102961988, "learning_rate": 2.5468263930428123e-06, "loss": 0.1507, "step": 41520 }, { "epoch": 2.0912432650183796, "grad_norm": 3.010121572815587, "learning_rate": 2.5442741044263765e-06, "loss": 0.1552, "step": 41530 }, { "epoch": 2.0917468150460747, "grad_norm": 2.3301175896177426, "learning_rate": 2.541722658838334e-06, "loss": 0.1186, "step": 41540 }, { "epoch": 2.0922503650737703, "grad_norm": 4.260767486110094, "learning_rate": 2.539172057154574e-06, "loss": 0.1468, "step": 41550 }, { "epoch": 2.0927539151014654, "grad_norm": 3.0269428864322085, "learning_rate": 2.5366223002506968e-06, "loss": 0.1338, "step": 41560 }, { "epoch": 2.0932574651291604, "grad_norm": 2.467036598843361, "learning_rate": 2.5340733890020074e-06, "loss": 0.13, "step": 41570 }, { "epoch": 2.093761015156856, "grad_norm": 3.712823119895623, "learning_rate": 2.531525324283526e-06, "loss": 0.1344, "step": 41580 }, { "epoch": 2.094264565184551, "grad_norm": 3.4668261419485313, "learning_rate": 2.5289781069699813e-06, "loss": 0.1489, "step": 41590 }, { "epoch": 2.094768115212246, "grad_norm": 2.0728081877066233, "learning_rate": 2.526431737935806e-06, "loss": 0.1321, "step": 41600 }, { "epoch": 2.0952716652399417, "grad_norm": 3.0062909668049524, "learning_rate": 2.5238862180551506e-06, "loss": 0.1182, "step": 41610 }, { "epoch": 2.0957752152676368, "grad_norm": 3.0750067781268706, "learning_rate": 2.5213415482018635e-06, "loss": 0.1366, "step": 41620 }, { "epoch": 2.0962787652953323, "grad_norm": 2.8580739380413194, "learning_rate": 2.5187977292495093e-06, "loss": 0.1325, "step": 41630 }, { "epoch": 2.0967823153230274, "grad_norm": 2.661372485378144, "learning_rate": 2.516254762071361e-06, "loss": 0.1061, "step": 41640 }, { "epoch": 2.0972858653507225, "grad_norm": 3.865376456152637, "learning_rate": 2.5137126475403897e-06, "loss": 0.1053, "step": 41650 }, { "epoch": 2.097789415378418, "grad_norm": 2.6508662854181866, "learning_rate": 2.5111713865292863e-06, "loss": 0.1049, "step": 41660 }, { "epoch": 2.098292965406113, "grad_norm": 1.5293586324667352, "learning_rate": 2.508630979910438e-06, "loss": 0.1249, "step": 41670 }, { "epoch": 2.098796515433808, "grad_norm": 3.3964487866571, "learning_rate": 2.506091428555947e-06, "loss": 0.1716, "step": 41680 }, { "epoch": 2.0993000654615037, "grad_norm": 2.987405429604687, "learning_rate": 2.5035527333376142e-06, "loss": 0.1436, "step": 41690 }, { "epoch": 2.099803615489199, "grad_norm": 4.7204442796666575, "learning_rate": 2.5010148951269535e-06, "loss": 0.1294, "step": 41700 }, { "epoch": 2.100307165516894, "grad_norm": 4.072996012711047, "learning_rate": 2.498477914795181e-06, "loss": 0.1698, "step": 41710 }, { "epoch": 2.1008107155445894, "grad_norm": 3.1287415228559614, "learning_rate": 2.4959417932132206e-06, "loss": 0.1287, "step": 41720 }, { "epoch": 2.1013142655722845, "grad_norm": 4.483123065620355, "learning_rate": 2.493406531251699e-06, "loss": 0.1504, "step": 41730 }, { "epoch": 2.10181781559998, "grad_norm": 2.073523218236399, "learning_rate": 2.490872129780946e-06, "loss": 0.1176, "step": 41740 }, { "epoch": 2.102321365627675, "grad_norm": 2.823207378393523, "learning_rate": 2.488338589671e-06, "loss": 0.1327, "step": 41750 }, { "epoch": 2.10282491565537, "grad_norm": 2.7159444861078823, "learning_rate": 2.485805911791605e-06, "loss": 0.1238, "step": 41760 }, { "epoch": 2.1033284656830658, "grad_norm": 3.9172649945530367, "learning_rate": 2.4832740970122023e-06, "loss": 0.1416, "step": 41770 }, { "epoch": 2.103832015710761, "grad_norm": 3.852621659354735, "learning_rate": 2.4807431462019427e-06, "loss": 0.1419, "step": 41780 }, { "epoch": 2.104335565738456, "grad_norm": 3.2090161889066984, "learning_rate": 2.4782130602296804e-06, "loss": 0.1099, "step": 41790 }, { "epoch": 2.1048391157661515, "grad_norm": 3.155771482930191, "learning_rate": 2.47568383996397e-06, "loss": 0.1479, "step": 41800 }, { "epoch": 2.1053426657938465, "grad_norm": 3.099085038822039, "learning_rate": 2.473155486273066e-06, "loss": 0.1184, "step": 41810 }, { "epoch": 2.105846215821542, "grad_norm": 4.036769562492115, "learning_rate": 2.4706280000249333e-06, "loss": 0.1382, "step": 41820 }, { "epoch": 2.106349765849237, "grad_norm": 3.0796253187065883, "learning_rate": 2.4681013820872337e-06, "loss": 0.1375, "step": 41830 }, { "epoch": 2.1068533158769323, "grad_norm": 2.885145958298426, "learning_rate": 2.465575633327334e-06, "loss": 0.1418, "step": 41840 }, { "epoch": 2.107356865904628, "grad_norm": 3.250365155328245, "learning_rate": 2.4630507546122977e-06, "loss": 0.1518, "step": 41850 }, { "epoch": 2.107860415932323, "grad_norm": 3.616316879676583, "learning_rate": 2.460526746808896e-06, "loss": 0.1535, "step": 41860 }, { "epoch": 2.108363965960018, "grad_norm": 3.8692531733565394, "learning_rate": 2.4580036107835946e-06, "loss": 0.1541, "step": 41870 }, { "epoch": 2.1088675159877135, "grad_norm": 3.4630747212608926, "learning_rate": 2.455481347402566e-06, "loss": 0.1389, "step": 41880 }, { "epoch": 2.1093710660154086, "grad_norm": 4.237422387339394, "learning_rate": 2.4529599575316783e-06, "loss": 0.1551, "step": 41890 }, { "epoch": 2.1098746160431037, "grad_norm": 1.8673042859072353, "learning_rate": 2.450439442036502e-06, "loss": 0.1609, "step": 41900 }, { "epoch": 2.110378166070799, "grad_norm": 3.798079615459439, "learning_rate": 2.447919801782309e-06, "loss": 0.0989, "step": 41910 }, { "epoch": 2.1108817160984943, "grad_norm": 3.321187735229677, "learning_rate": 2.4454010376340666e-06, "loss": 0.1258, "step": 41920 }, { "epoch": 2.11138526612619, "grad_norm": 3.3401868821306313, "learning_rate": 2.442883150456446e-06, "loss": 0.1465, "step": 41930 }, { "epoch": 2.111888816153885, "grad_norm": 2.8885637170188243, "learning_rate": 2.440366141113812e-06, "loss": 0.1522, "step": 41940 }, { "epoch": 2.11239236618158, "grad_norm": 3.662719026941365, "learning_rate": 2.437850010470234e-06, "loss": 0.1277, "step": 41950 }, { "epoch": 2.1128959162092755, "grad_norm": 2.371930351305607, "learning_rate": 2.4353347593894744e-06, "loss": 0.131, "step": 41960 }, { "epoch": 2.1133994662369706, "grad_norm": 3.705846166570805, "learning_rate": 2.4328203887349964e-06, "loss": 0.1343, "step": 41970 }, { "epoch": 2.1139030162646657, "grad_norm": 3.781789198816763, "learning_rate": 2.430306899369962e-06, "loss": 0.1654, "step": 41980 }, { "epoch": 2.1144065662923612, "grad_norm": 5.122575090946738, "learning_rate": 2.42779429215723e-06, "loss": 0.1396, "step": 41990 }, { "epoch": 2.1149101163200563, "grad_norm": 3.3405292512130216, "learning_rate": 2.425282567959355e-06, "loss": 0.1017, "step": 42000 }, { "epoch": 2.115413666347752, "grad_norm": 2.8759610018060036, "learning_rate": 2.422771727638586e-06, "loss": 0.1408, "step": 42010 }, { "epoch": 2.115917216375447, "grad_norm": 4.20595859511918, "learning_rate": 2.4202617720568738e-06, "loss": 0.136, "step": 42020 }, { "epoch": 2.116420766403142, "grad_norm": 4.438908495100991, "learning_rate": 2.4177527020758655e-06, "loss": 0.1342, "step": 42030 }, { "epoch": 2.1169243164308376, "grad_norm": 2.43717547434257, "learning_rate": 2.4152445185568984e-06, "loss": 0.1323, "step": 42040 }, { "epoch": 2.1174278664585326, "grad_norm": 2.238378541672254, "learning_rate": 2.412737222361011e-06, "loss": 0.1488, "step": 42050 }, { "epoch": 2.1179314164862277, "grad_norm": 1.198311765122823, "learning_rate": 2.4102308143489352e-06, "loss": 0.1304, "step": 42060 }, { "epoch": 2.1184349665139233, "grad_norm": 3.686812490320439, "learning_rate": 2.4077252953811015e-06, "loss": 0.1162, "step": 42070 }, { "epoch": 2.1189385165416184, "grad_norm": 4.8048230607961395, "learning_rate": 2.4052206663176243e-06, "loss": 0.1581, "step": 42080 }, { "epoch": 2.119442066569314, "grad_norm": 3.9346039953928824, "learning_rate": 2.402716928018324e-06, "loss": 0.1401, "step": 42090 }, { "epoch": 2.119945616597009, "grad_norm": 2.1521902383114284, "learning_rate": 2.400214081342712e-06, "loss": 0.1524, "step": 42100 }, { "epoch": 2.120449166624704, "grad_norm": 4.325909426953897, "learning_rate": 2.3977121271499943e-06, "loss": 0.1655, "step": 42110 }, { "epoch": 2.1209527166523996, "grad_norm": 3.293194375490672, "learning_rate": 2.395211066299064e-06, "loss": 0.1378, "step": 42120 }, { "epoch": 2.1214562666800947, "grad_norm": 3.879210441704224, "learning_rate": 2.392710899648516e-06, "loss": 0.1487, "step": 42130 }, { "epoch": 2.1219598167077898, "grad_norm": 4.285455863623051, "learning_rate": 2.390211628056636e-06, "loss": 0.1401, "step": 42140 }, { "epoch": 2.1224633667354853, "grad_norm": 2.590195392579706, "learning_rate": 2.3877132523813997e-06, "loss": 0.1312, "step": 42150 }, { "epoch": 2.1229669167631804, "grad_norm": 4.231556278715779, "learning_rate": 2.3852157734804743e-06, "loss": 0.1322, "step": 42160 }, { "epoch": 2.1234704667908755, "grad_norm": 3.488227566412862, "learning_rate": 2.3827191922112236e-06, "loss": 0.1311, "step": 42170 }, { "epoch": 2.123974016818571, "grad_norm": 2.292350149190175, "learning_rate": 2.380223509430704e-06, "loss": 0.1345, "step": 42180 }, { "epoch": 2.124477566846266, "grad_norm": 2.261978029088724, "learning_rate": 2.377728725995656e-06, "loss": 0.1173, "step": 42190 }, { "epoch": 2.1249811168739616, "grad_norm": 2.6718493186254784, "learning_rate": 2.3752348427625187e-06, "loss": 0.1466, "step": 42200 }, { "epoch": 2.1254846669016567, "grad_norm": 3.908025697074087, "learning_rate": 2.3727418605874207e-06, "loss": 0.171, "step": 42210 }, { "epoch": 2.125988216929352, "grad_norm": 4.7822654953085335, "learning_rate": 2.370249780326177e-06, "loss": 0.1498, "step": 42220 }, { "epoch": 2.1264917669570473, "grad_norm": 3.9868088816914393, "learning_rate": 2.3677586028343e-06, "loss": 0.1226, "step": 42230 }, { "epoch": 2.1269953169847424, "grad_norm": 2.1619110078554837, "learning_rate": 2.3652683289669837e-06, "loss": 0.1524, "step": 42240 }, { "epoch": 2.1274988670124375, "grad_norm": 4.413992547571095, "learning_rate": 2.3627789595791195e-06, "loss": 0.1567, "step": 42250 }, { "epoch": 2.128002417040133, "grad_norm": 3.299017796939573, "learning_rate": 2.360290495525286e-06, "loss": 0.1403, "step": 42260 }, { "epoch": 2.128505967067828, "grad_norm": 2.7001249321651044, "learning_rate": 2.3578029376597473e-06, "loss": 0.1452, "step": 42270 }, { "epoch": 2.1290095170955237, "grad_norm": 4.154936639732716, "learning_rate": 2.3553162868364634e-06, "loss": 0.1503, "step": 42280 }, { "epoch": 2.1295130671232188, "grad_norm": 4.493944672543835, "learning_rate": 2.3528305439090744e-06, "loss": 0.1264, "step": 42290 }, { "epoch": 2.130016617150914, "grad_norm": 3.1176979578255275, "learning_rate": 2.350345709730917e-06, "loss": 0.1496, "step": 42300 }, { "epoch": 2.1305201671786094, "grad_norm": 4.188068013127355, "learning_rate": 2.3478617851550093e-06, "loss": 0.1565, "step": 42310 }, { "epoch": 2.1310237172063045, "grad_norm": 2.4686699785284816, "learning_rate": 2.3453787710340615e-06, "loss": 0.151, "step": 42320 }, { "epoch": 2.1315272672339995, "grad_norm": 3.356518403105754, "learning_rate": 2.3428966682204694e-06, "loss": 0.1065, "step": 42330 }, { "epoch": 2.132030817261695, "grad_norm": 2.473831919258712, "learning_rate": 2.3404154775663214e-06, "loss": 0.1095, "step": 42340 }, { "epoch": 2.13253436728939, "grad_norm": 2.6806383377582454, "learning_rate": 2.3379351999233786e-06, "loss": 0.1089, "step": 42350 }, { "epoch": 2.1330379173170853, "grad_norm": 3.9621593169265883, "learning_rate": 2.3354558361431018e-06, "loss": 0.154, "step": 42360 }, { "epoch": 2.133541467344781, "grad_norm": 2.786350925741992, "learning_rate": 2.3329773870766355e-06, "loss": 0.1012, "step": 42370 }, { "epoch": 2.134045017372476, "grad_norm": 0.7275479526710834, "learning_rate": 2.330499853574809e-06, "loss": 0.1209, "step": 42380 }, { "epoch": 2.1345485674001714, "grad_norm": 3.7025045714698743, "learning_rate": 2.328023236488134e-06, "loss": 0.1098, "step": 42390 }, { "epoch": 2.1350521174278665, "grad_norm": 2.982589917347904, "learning_rate": 2.3255475366668124e-06, "loss": 0.14, "step": 42400 }, { "epoch": 2.1355556674555616, "grad_norm": 3.7394308276287953, "learning_rate": 2.3230727549607313e-06, "loss": 0.1601, "step": 42410 }, { "epoch": 2.136059217483257, "grad_norm": 3.3413284706405024, "learning_rate": 2.3205988922194595e-06, "loss": 0.1542, "step": 42420 }, { "epoch": 2.136562767510952, "grad_norm": 4.972510294699205, "learning_rate": 2.3181259492922487e-06, "loss": 0.1467, "step": 42430 }, { "epoch": 2.1370663175386473, "grad_norm": 3.411914215477552, "learning_rate": 2.3156539270280406e-06, "loss": 0.1261, "step": 42440 }, { "epoch": 2.137569867566343, "grad_norm": 3.3077676005125864, "learning_rate": 2.3131828262754584e-06, "loss": 0.1441, "step": 42450 }, { "epoch": 2.138073417594038, "grad_norm": 2.493640377389605, "learning_rate": 2.310712647882807e-06, "loss": 0.1392, "step": 42460 }, { "epoch": 2.1385769676217334, "grad_norm": 4.304178680366594, "learning_rate": 2.3082433926980763e-06, "loss": 0.1666, "step": 42470 }, { "epoch": 2.1390805176494285, "grad_norm": 3.2981255098101907, "learning_rate": 2.3057750615689415e-06, "loss": 0.1265, "step": 42480 }, { "epoch": 2.1395840676771236, "grad_norm": 4.681842094149064, "learning_rate": 2.3033076553427552e-06, "loss": 0.1563, "step": 42490 }, { "epoch": 2.140087617704819, "grad_norm": 4.104317445007288, "learning_rate": 2.300841174866558e-06, "loss": 0.099, "step": 42500 }, { "epoch": 2.1405911677325142, "grad_norm": 4.500100302104651, "learning_rate": 2.2983756209870678e-06, "loss": 0.1615, "step": 42510 }, { "epoch": 2.1410947177602093, "grad_norm": 3.819351210394659, "learning_rate": 2.295910994550688e-06, "loss": 0.1341, "step": 42520 }, { "epoch": 2.141598267787905, "grad_norm": 3.3108168413479864, "learning_rate": 2.293447296403505e-06, "loss": 0.1234, "step": 42530 }, { "epoch": 2.1421018178156, "grad_norm": 4.765745227323709, "learning_rate": 2.290984527391279e-06, "loss": 0.1656, "step": 42540 }, { "epoch": 2.142605367843295, "grad_norm": 6.884757642850713, "learning_rate": 2.288522688359461e-06, "loss": 0.1318, "step": 42550 }, { "epoch": 2.1431089178709906, "grad_norm": 2.9759833435298106, "learning_rate": 2.286061780153174e-06, "loss": 0.1449, "step": 42560 }, { "epoch": 2.1436124678986856, "grad_norm": 3.4219084634981356, "learning_rate": 2.2836018036172286e-06, "loss": 0.1318, "step": 42570 }, { "epoch": 2.144116017926381, "grad_norm": 3.4483122906304726, "learning_rate": 2.28114275959611e-06, "loss": 0.1136, "step": 42580 }, { "epoch": 2.1446195679540763, "grad_norm": 3.2847037783325246, "learning_rate": 2.2786846489339865e-06, "loss": 0.1265, "step": 42590 }, { "epoch": 2.1451231179817714, "grad_norm": 3.1434155496271154, "learning_rate": 2.2762274724747053e-06, "loss": 0.1176, "step": 42600 }, { "epoch": 2.145626668009467, "grad_norm": 5.188467758727896, "learning_rate": 2.273771231061795e-06, "loss": 0.163, "step": 42610 }, { "epoch": 2.146130218037162, "grad_norm": 2.752705783158034, "learning_rate": 2.2713159255384582e-06, "loss": 0.1397, "step": 42620 }, { "epoch": 2.146633768064857, "grad_norm": 2.4509740322580345, "learning_rate": 2.268861556747578e-06, "loss": 0.1474, "step": 42630 }, { "epoch": 2.1471373180925526, "grad_norm": 3.9912123429660222, "learning_rate": 2.2664081255317187e-06, "loss": 0.1546, "step": 42640 }, { "epoch": 2.1476408681202477, "grad_norm": 3.290465469623147, "learning_rate": 2.263955632733123e-06, "loss": 0.1064, "step": 42650 }, { "epoch": 2.148144418147943, "grad_norm": 4.255331409979032, "learning_rate": 2.261504079193705e-06, "loss": 0.1573, "step": 42660 }, { "epoch": 2.1486479681756383, "grad_norm": 2.7719115498194813, "learning_rate": 2.2590534657550627e-06, "loss": 0.1447, "step": 42670 }, { "epoch": 2.1491515182033334, "grad_norm": 3.073014908997348, "learning_rate": 2.2566037932584715e-06, "loss": 0.1123, "step": 42680 }, { "epoch": 2.149655068231029, "grad_norm": 2.7299389786937733, "learning_rate": 2.2541550625448804e-06, "loss": 0.1241, "step": 42690 }, { "epoch": 2.150158618258724, "grad_norm": 4.162042887751293, "learning_rate": 2.251707274454914e-06, "loss": 0.1143, "step": 42700 }, { "epoch": 2.150662168286419, "grad_norm": 3.453020027566297, "learning_rate": 2.2492604298288774e-06, "loss": 0.1094, "step": 42710 }, { "epoch": 2.1511657183141146, "grad_norm": 3.293807947986643, "learning_rate": 2.2468145295067505e-06, "loss": 0.1475, "step": 42720 }, { "epoch": 2.1516692683418097, "grad_norm": 4.39145958075178, "learning_rate": 2.24436957432819e-06, "loss": 0.166, "step": 42730 }, { "epoch": 2.152172818369505, "grad_norm": 5.170023853840461, "learning_rate": 2.2419255651325238e-06, "loss": 0.1751, "step": 42740 }, { "epoch": 2.1526763683972003, "grad_norm": 3.1967250196860353, "learning_rate": 2.2394825027587596e-06, "loss": 0.1369, "step": 42750 }, { "epoch": 2.1531799184248954, "grad_norm": 2.770016742270712, "learning_rate": 2.2370403880455804e-06, "loss": 0.1258, "step": 42760 }, { "epoch": 2.153683468452591, "grad_norm": 3.6156397988350473, "learning_rate": 2.2345992218313406e-06, "loss": 0.1686, "step": 42770 }, { "epoch": 2.154187018480286, "grad_norm": 2.5332044084876952, "learning_rate": 2.2321590049540678e-06, "loss": 0.1412, "step": 42780 }, { "epoch": 2.154690568507981, "grad_norm": 4.35617986192353, "learning_rate": 2.229719738251468e-06, "loss": 0.1517, "step": 42790 }, { "epoch": 2.1551941185356767, "grad_norm": 4.652787365900891, "learning_rate": 2.2272814225609213e-06, "loss": 0.1045, "step": 42800 }, { "epoch": 2.1556976685633717, "grad_norm": 3.6795108102114487, "learning_rate": 2.2248440587194764e-06, "loss": 0.1437, "step": 42810 }, { "epoch": 2.156201218591067, "grad_norm": 3.10556506499886, "learning_rate": 2.2224076475638597e-06, "loss": 0.0995, "step": 42820 }, { "epoch": 2.1567047686187624, "grad_norm": 3.1971623554230395, "learning_rate": 2.21997218993047e-06, "loss": 0.131, "step": 42830 }, { "epoch": 2.1572083186464575, "grad_norm": 3.4110134515452217, "learning_rate": 2.217537686655377e-06, "loss": 0.1332, "step": 42840 }, { "epoch": 2.157711868674153, "grad_norm": 5.038688071543759, "learning_rate": 2.2151041385743215e-06, "loss": 0.1675, "step": 42850 }, { "epoch": 2.158215418701848, "grad_norm": 2.748909368855059, "learning_rate": 2.21267154652272e-06, "loss": 0.1489, "step": 42860 }, { "epoch": 2.158718968729543, "grad_norm": 4.013347000963995, "learning_rate": 2.210239911335659e-06, "loss": 0.1247, "step": 42870 }, { "epoch": 2.1592225187572387, "grad_norm": 3.9394071177961956, "learning_rate": 2.207809233847899e-06, "loss": 0.1308, "step": 42880 }, { "epoch": 2.159726068784934, "grad_norm": 3.1748287937851445, "learning_rate": 2.2053795148938664e-06, "loss": 0.1323, "step": 42890 }, { "epoch": 2.160229618812629, "grad_norm": 3.401755531782661, "learning_rate": 2.2029507553076645e-06, "loss": 0.1127, "step": 42900 }, { "epoch": 2.1607331688403244, "grad_norm": 4.022983720777622, "learning_rate": 2.200522955923061e-06, "loss": 0.1594, "step": 42910 }, { "epoch": 2.1612367188680195, "grad_norm": 2.7242498904481605, "learning_rate": 2.1980961175735006e-06, "loss": 0.12, "step": 42920 }, { "epoch": 2.1617402688957146, "grad_norm": 2.7975227545721877, "learning_rate": 2.1956702410920923e-06, "loss": 0.1446, "step": 42930 }, { "epoch": 2.16224381892341, "grad_norm": 2.4718501319486217, "learning_rate": 2.1932453273116177e-06, "loss": 0.1316, "step": 42940 }, { "epoch": 2.162747368951105, "grad_norm": 3.038920792647705, "learning_rate": 2.1908213770645293e-06, "loss": 0.1382, "step": 42950 }, { "epoch": 2.1632509189788007, "grad_norm": 3.1397723556113224, "learning_rate": 2.1883983911829497e-06, "loss": 0.1332, "step": 42960 }, { "epoch": 2.163754469006496, "grad_norm": 4.379761149454711, "learning_rate": 2.1859763704986614e-06, "loss": 0.1435, "step": 42970 }, { "epoch": 2.164258019034191, "grad_norm": 2.9693005186645234, "learning_rate": 2.1835553158431255e-06, "loss": 0.1324, "step": 42980 }, { "epoch": 2.1647615690618864, "grad_norm": 4.1185227625656005, "learning_rate": 2.181135228047468e-06, "loss": 0.1417, "step": 42990 }, { "epoch": 2.1652651190895815, "grad_norm": 3.2343967412565955, "learning_rate": 2.1787161079424847e-06, "loss": 0.1405, "step": 43000 }, { "epoch": 2.1657686691172766, "grad_norm": 4.288393400186244, "learning_rate": 2.1762979563586338e-06, "loss": 0.1278, "step": 43010 }, { "epoch": 2.166272219144972, "grad_norm": 5.753862150693164, "learning_rate": 2.1738807741260474e-06, "loss": 0.1315, "step": 43020 }, { "epoch": 2.1667757691726672, "grad_norm": 1.8496451681872426, "learning_rate": 2.1714645620745235e-06, "loss": 0.1211, "step": 43030 }, { "epoch": 2.1672793192003628, "grad_norm": 3.1015870542054134, "learning_rate": 2.1690493210335234e-06, "loss": 0.1481, "step": 43040 }, { "epoch": 2.167782869228058, "grad_norm": 2.51538959374272, "learning_rate": 2.1666350518321768e-06, "loss": 0.1274, "step": 43050 }, { "epoch": 2.168286419255753, "grad_norm": 2.9418266627800436, "learning_rate": 2.164221755299281e-06, "loss": 0.1854, "step": 43060 }, { "epoch": 2.1687899692834485, "grad_norm": 3.5511131796498043, "learning_rate": 2.1618094322633016e-06, "loss": 0.1335, "step": 43070 }, { "epoch": 2.1692935193111436, "grad_norm": 4.203852660890615, "learning_rate": 2.1593980835523633e-06, "loss": 0.1164, "step": 43080 }, { "epoch": 2.1697970693388386, "grad_norm": 2.15009101352803, "learning_rate": 2.156987709994261e-06, "loss": 0.1557, "step": 43090 }, { "epoch": 2.170300619366534, "grad_norm": 4.343216818013564, "learning_rate": 2.1545783124164575e-06, "loss": 0.1827, "step": 43100 }, { "epoch": 2.1708041693942293, "grad_norm": 2.972409119247049, "learning_rate": 2.152169891646072e-06, "loss": 0.1158, "step": 43110 }, { "epoch": 2.1713077194219244, "grad_norm": 2.9607155548046005, "learning_rate": 2.1497624485098985e-06, "loss": 0.147, "step": 43120 }, { "epoch": 2.17181126944962, "grad_norm": 4.230799499113205, "learning_rate": 2.1473559838343855e-06, "loss": 0.1336, "step": 43130 }, { "epoch": 2.172314819477315, "grad_norm": 4.693623077130751, "learning_rate": 2.1449504984456533e-06, "loss": 0.1289, "step": 43140 }, { "epoch": 2.1728183695050105, "grad_norm": 3.6843929940497055, "learning_rate": 2.1425459931694838e-06, "loss": 0.1281, "step": 43150 }, { "epoch": 2.1733219195327056, "grad_norm": 2.2394187649658392, "learning_rate": 2.1401424688313187e-06, "loss": 0.1315, "step": 43160 }, { "epoch": 2.1738254695604007, "grad_norm": 4.59416351064255, "learning_rate": 2.1377399262562707e-06, "loss": 0.1581, "step": 43170 }, { "epoch": 2.174329019588096, "grad_norm": 1.96963798603135, "learning_rate": 2.1353383662691063e-06, "loss": 0.1459, "step": 43180 }, { "epoch": 2.1748325696157913, "grad_norm": 2.7560602437528128, "learning_rate": 2.1329377896942626e-06, "loss": 0.1375, "step": 43190 }, { "epoch": 2.1753361196434864, "grad_norm": 2.7695289256920343, "learning_rate": 2.1305381973558328e-06, "loss": 0.1225, "step": 43200 }, { "epoch": 2.175839669671182, "grad_norm": 3.8081582639110882, "learning_rate": 2.1281395900775758e-06, "loss": 0.1421, "step": 43210 }, { "epoch": 2.176343219698877, "grad_norm": 4.846852660281739, "learning_rate": 2.1257419686829136e-06, "loss": 0.1301, "step": 43220 }, { "epoch": 2.1768467697265725, "grad_norm": 2.690626631202312, "learning_rate": 2.123345333994928e-06, "loss": 0.1188, "step": 43230 }, { "epoch": 2.1773503197542676, "grad_norm": 1.390025683821934, "learning_rate": 2.1209496868363607e-06, "loss": 0.1131, "step": 43240 }, { "epoch": 2.1778538697819627, "grad_norm": 3.5893413991077576, "learning_rate": 2.1185550280296134e-06, "loss": 0.1493, "step": 43250 }, { "epoch": 2.1783574198096582, "grad_norm": 4.196678548515386, "learning_rate": 2.116161358396754e-06, "loss": 0.1471, "step": 43260 }, { "epoch": 2.1788609698373533, "grad_norm": 3.7907664555878355, "learning_rate": 2.1137686787595074e-06, "loss": 0.117, "step": 43270 }, { "epoch": 2.1793645198650484, "grad_norm": 4.22116592632532, "learning_rate": 2.1113769899392563e-06, "loss": 0.1333, "step": 43280 }, { "epoch": 2.179868069892744, "grad_norm": 3.435888158993796, "learning_rate": 2.1089862927570474e-06, "loss": 0.1257, "step": 43290 }, { "epoch": 2.180371619920439, "grad_norm": 3.0093480853666783, "learning_rate": 2.106596588033587e-06, "loss": 0.1216, "step": 43300 }, { "epoch": 2.180875169948134, "grad_norm": 2.0176798789308665, "learning_rate": 2.104207876589237e-06, "loss": 0.1141, "step": 43310 }, { "epoch": 2.1813787199758297, "grad_norm": 4.821539870872426, "learning_rate": 2.101820159244019e-06, "loss": 0.1099, "step": 43320 }, { "epoch": 2.1818822700035247, "grad_norm": 3.8648216647190288, "learning_rate": 2.0994334368176163e-06, "loss": 0.1644, "step": 43330 }, { "epoch": 2.1823858200312203, "grad_norm": 3.247395764118988, "learning_rate": 2.097047710129369e-06, "loss": 0.1234, "step": 43340 }, { "epoch": 2.1828893700589154, "grad_norm": 3.055014610870258, "learning_rate": 2.094662979998277e-06, "loss": 0.1462, "step": 43350 }, { "epoch": 2.1833929200866105, "grad_norm": 2.4706263603662397, "learning_rate": 2.092279247242993e-06, "loss": 0.1392, "step": 43360 }, { "epoch": 2.183896470114306, "grad_norm": 4.75330583421536, "learning_rate": 2.0898965126818333e-06, "loss": 0.154, "step": 43370 }, { "epoch": 2.184400020142001, "grad_norm": 2.101149023540863, "learning_rate": 2.08751477713277e-06, "loss": 0.1271, "step": 43380 }, { "epoch": 2.184903570169696, "grad_norm": 2.560614413545416, "learning_rate": 2.085134041413429e-06, "loss": 0.1272, "step": 43390 }, { "epoch": 2.1854071201973917, "grad_norm": 2.010177771292234, "learning_rate": 2.082754306341095e-06, "loss": 0.151, "step": 43400 }, { "epoch": 2.185910670225087, "grad_norm": 1.9582765688506227, "learning_rate": 2.0803755727327097e-06, "loss": 0.1314, "step": 43410 }, { "epoch": 2.1864142202527823, "grad_norm": 3.251210170313237, "learning_rate": 2.077997841404874e-06, "loss": 0.1216, "step": 43420 }, { "epoch": 2.1869177702804774, "grad_norm": 3.073507145315072, "learning_rate": 2.0756211131738365e-06, "loss": 0.1115, "step": 43430 }, { "epoch": 2.1874213203081725, "grad_norm": 3.937543074326027, "learning_rate": 2.0732453888555094e-06, "loss": 0.1508, "step": 43440 }, { "epoch": 2.187924870335868, "grad_norm": 3.6439557507126596, "learning_rate": 2.0708706692654577e-06, "loss": 0.1305, "step": 43450 }, { "epoch": 2.188428420363563, "grad_norm": 2.9131711765962467, "learning_rate": 2.0684969552189007e-06, "loss": 0.0971, "step": 43460 }, { "epoch": 2.188931970391258, "grad_norm": 1.9516628488056655, "learning_rate": 2.06612424753071e-06, "loss": 0.1428, "step": 43470 }, { "epoch": 2.1894355204189537, "grad_norm": 3.785561229603572, "learning_rate": 2.0637525470154172e-06, "loss": 0.1706, "step": 43480 }, { "epoch": 2.189939070446649, "grad_norm": 4.568476657799639, "learning_rate": 2.0613818544872048e-06, "loss": 0.138, "step": 43490 }, { "epoch": 2.190442620474344, "grad_norm": 1.248179830568718, "learning_rate": 2.0590121707599126e-06, "loss": 0.1039, "step": 43500 }, { "epoch": 2.1909461705020394, "grad_norm": 4.546181313448173, "learning_rate": 2.056643496647028e-06, "loss": 0.1408, "step": 43510 }, { "epoch": 2.1914497205297345, "grad_norm": 2.5015253766902963, "learning_rate": 2.0542758329616975e-06, "loss": 0.1204, "step": 43520 }, { "epoch": 2.19195327055743, "grad_norm": 4.140870195075864, "learning_rate": 2.0519091805167156e-06, "loss": 0.1301, "step": 43530 }, { "epoch": 2.192456820585125, "grad_norm": 4.1831945424867705, "learning_rate": 2.049543540124537e-06, "loss": 0.1301, "step": 43540 }, { "epoch": 2.1929603706128202, "grad_norm": 4.94163440511217, "learning_rate": 2.04717891259726e-06, "loss": 0.1331, "step": 43550 }, { "epoch": 2.1934639206405158, "grad_norm": 3.2682565805858848, "learning_rate": 2.0448152987466423e-06, "loss": 0.1472, "step": 43560 }, { "epoch": 2.193967470668211, "grad_norm": 3.1930602717291525, "learning_rate": 2.0424526993840926e-06, "loss": 0.1186, "step": 43570 }, { "epoch": 2.194471020695906, "grad_norm": 2.9942390903371754, "learning_rate": 2.0400911153206683e-06, "loss": 0.1663, "step": 43580 }, { "epoch": 2.1949745707236015, "grad_norm": 4.822497228122426, "learning_rate": 2.0377305473670765e-06, "loss": 0.1361, "step": 43590 }, { "epoch": 2.1954781207512966, "grad_norm": 2.5098234926272296, "learning_rate": 2.035370996333682e-06, "loss": 0.0985, "step": 43600 }, { "epoch": 2.195981670778992, "grad_norm": 4.424169472077294, "learning_rate": 2.033012463030497e-06, "loss": 0.1616, "step": 43610 }, { "epoch": 2.196485220806687, "grad_norm": 2.0931222089959585, "learning_rate": 2.0306549482671863e-06, "loss": 0.1434, "step": 43620 }, { "epoch": 2.1969887708343823, "grad_norm": 2.665391599282798, "learning_rate": 2.028298452853059e-06, "loss": 0.1587, "step": 43630 }, { "epoch": 2.197492320862078, "grad_norm": 2.9371368369240716, "learning_rate": 2.025942977597082e-06, "loss": 0.1565, "step": 43640 }, { "epoch": 2.197995870889773, "grad_norm": 4.672262552227011, "learning_rate": 2.0235885233078685e-06, "loss": 0.122, "step": 43650 }, { "epoch": 2.198499420917468, "grad_norm": 3.272078903331918, "learning_rate": 2.0212350907936807e-06, "loss": 0.1343, "step": 43660 }, { "epoch": 2.1990029709451635, "grad_norm": 2.822351683935089, "learning_rate": 2.0188826808624283e-06, "loss": 0.1416, "step": 43670 }, { "epoch": 2.1995065209728586, "grad_norm": 2.586127044000489, "learning_rate": 2.016531294321674e-06, "loss": 0.1114, "step": 43680 }, { "epoch": 2.2000100710005537, "grad_norm": 2.515576503319196, "learning_rate": 2.0141809319786298e-06, "loss": 0.1399, "step": 43690 }, { "epoch": 2.200513621028249, "grad_norm": 2.682351242671729, "learning_rate": 2.0118315946401494e-06, "loss": 0.1503, "step": 43700 }, { "epoch": 2.2010171710559443, "grad_norm": 2.9920998461709867, "learning_rate": 2.0094832831127414e-06, "loss": 0.132, "step": 43710 }, { "epoch": 2.20152072108364, "grad_norm": 3.3734022911005725, "learning_rate": 2.0071359982025613e-06, "loss": 0.1091, "step": 43720 }, { "epoch": 2.202024271111335, "grad_norm": 3.3771833245864205, "learning_rate": 2.0047897407154073e-06, "loss": 0.1221, "step": 43730 }, { "epoch": 2.20252782113903, "grad_norm": 2.4999768087456604, "learning_rate": 2.002444511456731e-06, "loss": 0.1231, "step": 43740 }, { "epoch": 2.2030313711667255, "grad_norm": 3.047619920214586, "learning_rate": 2.000100311231626e-06, "loss": 0.1361, "step": 43750 }, { "epoch": 2.2035349211944206, "grad_norm": 5.207311644382773, "learning_rate": 1.997757140844836e-06, "loss": 0.1517, "step": 43760 }, { "epoch": 2.2040384712221157, "grad_norm": 3.0568634184311345, "learning_rate": 1.995415001100752e-06, "loss": 0.1472, "step": 43770 }, { "epoch": 2.2045420212498112, "grad_norm": 3.9510164019239387, "learning_rate": 1.993073892803406e-06, "loss": 0.1655, "step": 43780 }, { "epoch": 2.2050455712775063, "grad_norm": 2.6830052227050705, "learning_rate": 1.9907338167564827e-06, "loss": 0.118, "step": 43790 }, { "epoch": 2.205549121305202, "grad_norm": 4.4063139490623495, "learning_rate": 1.9883947737633053e-06, "loss": 0.1297, "step": 43800 }, { "epoch": 2.206052671332897, "grad_norm": 2.6178368802453216, "learning_rate": 1.986056764626849e-06, "loss": 0.1371, "step": 43810 }, { "epoch": 2.206556221360592, "grad_norm": 4.075243526550963, "learning_rate": 1.9837197901497285e-06, "loss": 0.1318, "step": 43820 }, { "epoch": 2.2070597713882876, "grad_norm": 3.9121165057629086, "learning_rate": 1.981383851134207e-06, "loss": 0.1207, "step": 43830 }, { "epoch": 2.2075633214159827, "grad_norm": 3.0580987704989195, "learning_rate": 1.9790489483821916e-06, "loss": 0.1281, "step": 43840 }, { "epoch": 2.2080668714436777, "grad_norm": 3.3639578621768256, "learning_rate": 1.9767150826952353e-06, "loss": 0.122, "step": 43850 }, { "epoch": 2.2085704214713733, "grad_norm": 3.625340901475028, "learning_rate": 1.97438225487453e-06, "loss": 0.1451, "step": 43860 }, { "epoch": 2.2090739714990684, "grad_norm": 3.6693340124375355, "learning_rate": 1.972050465720914e-06, "loss": 0.146, "step": 43870 }, { "epoch": 2.2095775215267635, "grad_norm": 2.8912395214838145, "learning_rate": 1.9697197160348697e-06, "loss": 0.1314, "step": 43880 }, { "epoch": 2.210081071554459, "grad_norm": 3.016397744132022, "learning_rate": 1.9673900066165247e-06, "loss": 0.1382, "step": 43890 }, { "epoch": 2.210584621582154, "grad_norm": 3.8496852821948706, "learning_rate": 1.9650613382656437e-06, "loss": 0.1397, "step": 43900 }, { "epoch": 2.2110881716098496, "grad_norm": 2.711291250902028, "learning_rate": 1.9627337117816393e-06, "loss": 0.116, "step": 43910 }, { "epoch": 2.2115917216375447, "grad_norm": 6.367812466956464, "learning_rate": 1.9604071279635656e-06, "loss": 0.1411, "step": 43920 }, { "epoch": 2.21209527166524, "grad_norm": 3.6076217620324385, "learning_rate": 1.9580815876101167e-06, "loss": 0.1524, "step": 43930 }, { "epoch": 2.2125988216929353, "grad_norm": 3.7626627199596, "learning_rate": 1.955757091519627e-06, "loss": 0.141, "step": 43940 }, { "epoch": 2.2131023717206304, "grad_norm": 1.2569498677689601, "learning_rate": 1.953433640490077e-06, "loss": 0.129, "step": 43950 }, { "epoch": 2.2136059217483255, "grad_norm": 4.388304349407275, "learning_rate": 1.9511112353190888e-06, "loss": 0.1326, "step": 43960 }, { "epoch": 2.214109471776021, "grad_norm": 3.475643230471548, "learning_rate": 1.9487898768039185e-06, "loss": 0.1597, "step": 43970 }, { "epoch": 2.214613021803716, "grad_norm": 3.090018181851883, "learning_rate": 1.9464695657414693e-06, "loss": 0.119, "step": 43980 }, { "epoch": 2.2151165718314116, "grad_norm": 3.9151871198719395, "learning_rate": 1.944150302928283e-06, "loss": 0.1448, "step": 43990 }, { "epoch": 2.2156201218591067, "grad_norm": 2.6714911780123782, "learning_rate": 1.9418320891605437e-06, "loss": 0.1317, "step": 44000 }, { "epoch": 2.216123671886802, "grad_norm": 2.2533619663757265, "learning_rate": 1.9395149252340704e-06, "loss": 0.1429, "step": 44010 }, { "epoch": 2.2166272219144973, "grad_norm": 3.248227911012307, "learning_rate": 1.9371988119443243e-06, "loss": 0.17, "step": 44020 }, { "epoch": 2.2171307719421924, "grad_norm": 2.9935499330075666, "learning_rate": 1.9348837500864054e-06, "loss": 0.13, "step": 44030 }, { "epoch": 2.2176343219698875, "grad_norm": 2.247200843549535, "learning_rate": 1.932569740455057e-06, "loss": 0.1197, "step": 44040 }, { "epoch": 2.218137871997583, "grad_norm": 3.288178992342702, "learning_rate": 1.930256783844654e-06, "loss": 0.1196, "step": 44050 }, { "epoch": 2.218641422025278, "grad_norm": 2.6985113308600073, "learning_rate": 1.927944881049214e-06, "loss": 0.1388, "step": 44060 }, { "epoch": 2.2191449720529732, "grad_norm": 1.9349264413703182, "learning_rate": 1.9256340328623956e-06, "loss": 0.1368, "step": 44070 }, { "epoch": 2.2196485220806688, "grad_norm": 3.2103738484443785, "learning_rate": 1.923324240077489e-06, "loss": 0.1584, "step": 44080 }, { "epoch": 2.220152072108364, "grad_norm": 2.9992032428390942, "learning_rate": 1.9210155034874233e-06, "loss": 0.1363, "step": 44090 }, { "epoch": 2.2206556221360594, "grad_norm": 4.9352619009879755, "learning_rate": 1.9187078238847696e-06, "loss": 0.124, "step": 44100 }, { "epoch": 2.2211591721637545, "grad_norm": 3.0023639783199902, "learning_rate": 1.9164012020617325e-06, "loss": 0.1074, "step": 44110 }, { "epoch": 2.2216627221914496, "grad_norm": 3.063375264672423, "learning_rate": 1.9140956388101558e-06, "loss": 0.1459, "step": 44120 }, { "epoch": 2.222166272219145, "grad_norm": 4.109581068720675, "learning_rate": 1.911791134921516e-06, "loss": 0.1224, "step": 44130 }, { "epoch": 2.22266982224684, "grad_norm": 3.3069970859154774, "learning_rate": 1.9094876911869305e-06, "loss": 0.1366, "step": 44140 }, { "epoch": 2.2231733722745357, "grad_norm": 2.4177925043299053, "learning_rate": 1.9071853083971481e-06, "loss": 0.137, "step": 44150 }, { "epoch": 2.223676922302231, "grad_norm": 1.3308363366697802, "learning_rate": 1.9048839873425596e-06, "loss": 0.1123, "step": 44160 }, { "epoch": 2.224180472329926, "grad_norm": 3.6553747838583313, "learning_rate": 1.9025837288131832e-06, "loss": 0.1457, "step": 44170 }, { "epoch": 2.2246840223576214, "grad_norm": 3.6665697289719783, "learning_rate": 1.9002845335986792e-06, "loss": 0.1219, "step": 44180 }, { "epoch": 2.2251875723853165, "grad_norm": 2.1897637723633254, "learning_rate": 1.8979864024883421e-06, "loss": 0.1383, "step": 44190 }, { "epoch": 2.2256911224130116, "grad_norm": 5.667223270611247, "learning_rate": 1.8956893362710971e-06, "loss": 0.1684, "step": 44200 }, { "epoch": 2.226194672440707, "grad_norm": 5.21513883665338, "learning_rate": 1.8933933357355056e-06, "loss": 0.1286, "step": 44210 }, { "epoch": 2.226698222468402, "grad_norm": 1.8711828257766372, "learning_rate": 1.8910984016697642e-06, "loss": 0.141, "step": 44220 }, { "epoch": 2.2272017724960973, "grad_norm": 2.351888637851684, "learning_rate": 1.8888045348617035e-06, "loss": 0.1479, "step": 44230 }, { "epoch": 2.227705322523793, "grad_norm": 3.8718258486830064, "learning_rate": 1.8865117360987889e-06, "loss": 0.1421, "step": 44240 }, { "epoch": 2.228208872551488, "grad_norm": 3.693522803611279, "learning_rate": 1.8842200061681136e-06, "loss": 0.1314, "step": 44250 }, { "epoch": 2.2287124225791834, "grad_norm": 2.2888818461092435, "learning_rate": 1.8819293458564102e-06, "loss": 0.1433, "step": 44260 }, { "epoch": 2.2292159726068785, "grad_norm": 3.2650789293507496, "learning_rate": 1.8796397559500424e-06, "loss": 0.1651, "step": 44270 }, { "epoch": 2.2297195226345736, "grad_norm": 3.1734875592195104, "learning_rate": 1.8773512372350045e-06, "loss": 0.1311, "step": 44280 }, { "epoch": 2.230223072662269, "grad_norm": 1.8867043533001102, "learning_rate": 1.875063790496922e-06, "loss": 0.1201, "step": 44290 }, { "epoch": 2.2307266226899642, "grad_norm": 1.2192032592897697, "learning_rate": 1.8727774165210567e-06, "loss": 0.1178, "step": 44300 }, { "epoch": 2.2312301727176593, "grad_norm": 2.4224595611154673, "learning_rate": 1.870492116092301e-06, "loss": 0.1237, "step": 44310 }, { "epoch": 2.231733722745355, "grad_norm": 4.129165482936372, "learning_rate": 1.8682078899951755e-06, "loss": 0.1383, "step": 44320 }, { "epoch": 2.23223727277305, "grad_norm": 4.1786708834070385, "learning_rate": 1.8659247390138357e-06, "loss": 0.1427, "step": 44330 }, { "epoch": 2.2327408228007455, "grad_norm": 3.960272816070497, "learning_rate": 1.8636426639320675e-06, "loss": 0.1378, "step": 44340 }, { "epoch": 2.2332443728284406, "grad_norm": 2.765217147254702, "learning_rate": 1.8613616655332856e-06, "loss": 0.118, "step": 44350 }, { "epoch": 2.2337479228561357, "grad_norm": 4.243199656942344, "learning_rate": 1.8590817446005338e-06, "loss": 0.1486, "step": 44360 }, { "epoch": 2.234251472883831, "grad_norm": 2.9281323289276626, "learning_rate": 1.85680290191649e-06, "loss": 0.1265, "step": 44370 }, { "epoch": 2.2347550229115263, "grad_norm": 3.784612827908907, "learning_rate": 1.854525138263461e-06, "loss": 0.1532, "step": 44380 }, { "epoch": 2.2352585729392214, "grad_norm": 3.462413013078894, "learning_rate": 1.8522484544233837e-06, "loss": 0.1201, "step": 44390 }, { "epoch": 2.235762122966917, "grad_norm": 5.026312908341357, "learning_rate": 1.8499728511778186e-06, "loss": 0.1319, "step": 44400 }, { "epoch": 2.236265672994612, "grad_norm": 4.222647984243271, "learning_rate": 1.8476983293079643e-06, "loss": 0.1737, "step": 44410 }, { "epoch": 2.236769223022307, "grad_norm": 3.211252411979008, "learning_rate": 1.8454248895946397e-06, "loss": 0.1017, "step": 44420 }, { "epoch": 2.2372727730500026, "grad_norm": 3.226838759846116, "learning_rate": 1.8431525328182992e-06, "loss": 0.1583, "step": 44430 }, { "epoch": 2.2377763230776977, "grad_norm": 2.3790900022833834, "learning_rate": 1.8408812597590187e-06, "loss": 0.1281, "step": 44440 }, { "epoch": 2.2382798731053932, "grad_norm": 4.089472304973639, "learning_rate": 1.8386110711965077e-06, "loss": 0.1111, "step": 44450 }, { "epoch": 2.2387834231330883, "grad_norm": 3.355535769226372, "learning_rate": 1.8363419679101013e-06, "loss": 0.1283, "step": 44460 }, { "epoch": 2.2392869731607834, "grad_norm": 3.8874568509114025, "learning_rate": 1.8340739506787636e-06, "loss": 0.1388, "step": 44470 }, { "epoch": 2.239790523188479, "grad_norm": 4.474681862300977, "learning_rate": 1.8318070202810822e-06, "loss": 0.148, "step": 44480 }, { "epoch": 2.240294073216174, "grad_norm": 3.583281507493458, "learning_rate": 1.829541177495273e-06, "loss": 0.1185, "step": 44490 }, { "epoch": 2.240797623243869, "grad_norm": 3.2265629241677614, "learning_rate": 1.8272764230991801e-06, "loss": 0.1423, "step": 44500 }, { "epoch": 2.2413011732715646, "grad_norm": 2.5023654841748604, "learning_rate": 1.8250127578702752e-06, "loss": 0.141, "step": 44510 }, { "epoch": 2.2418047232992597, "grad_norm": 4.10847337653572, "learning_rate": 1.822750182585651e-06, "loss": 0.1382, "step": 44520 }, { "epoch": 2.2423082733269553, "grad_norm": 2.512248450800673, "learning_rate": 1.82048869802203e-06, "loss": 0.14, "step": 44530 }, { "epoch": 2.2428118233546503, "grad_norm": 3.6116346120615885, "learning_rate": 1.8182283049557608e-06, "loss": 0.1467, "step": 44540 }, { "epoch": 2.2433153733823454, "grad_norm": 4.218114578281818, "learning_rate": 1.8159690041628148e-06, "loss": 0.1331, "step": 44550 }, { "epoch": 2.243818923410041, "grad_norm": 3.190950927301852, "learning_rate": 1.8137107964187878e-06, "loss": 0.1195, "step": 44560 }, { "epoch": 2.244322473437736, "grad_norm": 4.6739506160683195, "learning_rate": 1.811453682498903e-06, "loss": 0.152, "step": 44570 }, { "epoch": 2.244826023465431, "grad_norm": 4.327965001076499, "learning_rate": 1.8091976631780096e-06, "loss": 0.127, "step": 44580 }, { "epoch": 2.2453295734931267, "grad_norm": 3.567728974995276, "learning_rate": 1.806942739230575e-06, "loss": 0.1667, "step": 44590 }, { "epoch": 2.2458331235208218, "grad_norm": 2.460114298671363, "learning_rate": 1.8046889114306953e-06, "loss": 0.1255, "step": 44600 }, { "epoch": 2.246336673548517, "grad_norm": 1.650499006703945, "learning_rate": 1.802436180552089e-06, "loss": 0.1265, "step": 44610 }, { "epoch": 2.2468402235762124, "grad_norm": 2.8243489470091547, "learning_rate": 1.8001845473681007e-06, "loss": 0.1242, "step": 44620 }, { "epoch": 2.2473437736039075, "grad_norm": 3.3957324447579786, "learning_rate": 1.7979340126516937e-06, "loss": 0.1373, "step": 44630 }, { "epoch": 2.247847323631603, "grad_norm": 5.4764039790943295, "learning_rate": 1.7956845771754539e-06, "loss": 0.1379, "step": 44640 }, { "epoch": 2.248350873659298, "grad_norm": 3.1716089889805574, "learning_rate": 1.7934362417115942e-06, "loss": 0.1188, "step": 44650 }, { "epoch": 2.248854423686993, "grad_norm": 3.733885658770444, "learning_rate": 1.7911890070319494e-06, "loss": 0.1218, "step": 44660 }, { "epoch": 2.2493579737146887, "grad_norm": 5.299753252154292, "learning_rate": 1.7889428739079705e-06, "loss": 0.154, "step": 44670 }, { "epoch": 2.249861523742384, "grad_norm": 4.643176969640856, "learning_rate": 1.786697843110738e-06, "loss": 0.1421, "step": 44680 }, { "epoch": 2.250365073770079, "grad_norm": 4.254303652390596, "learning_rate": 1.7844539154109508e-06, "loss": 0.1577, "step": 44690 }, { "epoch": 2.2508686237977744, "grad_norm": 4.935999037888757, "learning_rate": 1.7822110915789281e-06, "loss": 0.1281, "step": 44700 }, { "epoch": 2.2513721738254695, "grad_norm": 4.408766674538977, "learning_rate": 1.779969372384609e-06, "loss": 0.1358, "step": 44710 }, { "epoch": 2.251875723853165, "grad_norm": 2.9727271467984444, "learning_rate": 1.7777287585975566e-06, "loss": 0.1273, "step": 44720 }, { "epoch": 2.25237927388086, "grad_norm": 5.744149010697055, "learning_rate": 1.7754892509869536e-06, "loss": 0.169, "step": 44730 }, { "epoch": 2.252882823908555, "grad_norm": 4.236018691825704, "learning_rate": 1.7732508503216045e-06, "loss": 0.1442, "step": 44740 }, { "epoch": 2.2533863739362507, "grad_norm": 3.381352573363546, "learning_rate": 1.7710135573699283e-06, "loss": 0.1474, "step": 44750 }, { "epoch": 2.253889923963946, "grad_norm": 4.317156176445079, "learning_rate": 1.7687773728999708e-06, "loss": 0.1363, "step": 44760 }, { "epoch": 2.254393473991641, "grad_norm": 4.637242481819302, "learning_rate": 1.7665422976793905e-06, "loss": 0.1518, "step": 44770 }, { "epoch": 2.2548970240193364, "grad_norm": 1.8525884198441342, "learning_rate": 1.7643083324754717e-06, "loss": 0.1167, "step": 44780 }, { "epoch": 2.2554005740470315, "grad_norm": 1.5437549756776707, "learning_rate": 1.7620754780551107e-06, "loss": 0.1007, "step": 44790 }, { "epoch": 2.2559041240747266, "grad_norm": 3.6880285115484197, "learning_rate": 1.7598437351848279e-06, "loss": 0.1543, "step": 44800 }, { "epoch": 2.256407674102422, "grad_norm": 3.036500869790723, "learning_rate": 1.7576131046307632e-06, "loss": 0.1455, "step": 44810 }, { "epoch": 2.2569112241301172, "grad_norm": 2.514520447711137, "learning_rate": 1.7553835871586684e-06, "loss": 0.1331, "step": 44820 }, { "epoch": 2.2574147741578123, "grad_norm": 3.3809132422500316, "learning_rate": 1.7531551835339162e-06, "loss": 0.1564, "step": 44830 }, { "epoch": 2.257918324185508, "grad_norm": 3.4821619133450175, "learning_rate": 1.750927894521498e-06, "loss": 0.1634, "step": 44840 }, { "epoch": 2.258421874213203, "grad_norm": 3.3977225528371684, "learning_rate": 1.748701720886023e-06, "loss": 0.1505, "step": 44850 }, { "epoch": 2.2589254242408985, "grad_norm": 4.589410578946551, "learning_rate": 1.7464766633917168e-06, "loss": 0.1389, "step": 44860 }, { "epoch": 2.2594289742685936, "grad_norm": 4.214718217484414, "learning_rate": 1.7442527228024193e-06, "loss": 0.1235, "step": 44870 }, { "epoch": 2.2599325242962887, "grad_norm": 3.579639846669055, "learning_rate": 1.7420298998815903e-06, "loss": 0.1299, "step": 44880 }, { "epoch": 2.260436074323984, "grad_norm": 3.020134113167985, "learning_rate": 1.7398081953923057e-06, "loss": 0.1212, "step": 44890 }, { "epoch": 2.2609396243516793, "grad_norm": 3.4930561471904134, "learning_rate": 1.7375876100972562e-06, "loss": 0.1659, "step": 44900 }, { "epoch": 2.261443174379375, "grad_norm": 2.9096955007182155, "learning_rate": 1.7353681447587456e-06, "loss": 0.156, "step": 44910 }, { "epoch": 2.26194672440707, "grad_norm": 2.4560977012825114, "learning_rate": 1.7331498001386983e-06, "loss": 0.1738, "step": 44920 }, { "epoch": 2.262450274434765, "grad_norm": 3.630648993728038, "learning_rate": 1.730932576998654e-06, "loss": 0.1201, "step": 44930 }, { "epoch": 2.2629538244624605, "grad_norm": 3.0808927433881723, "learning_rate": 1.728716476099761e-06, "loss": 0.1413, "step": 44940 }, { "epoch": 2.2634573744901556, "grad_norm": 3.3607341826203787, "learning_rate": 1.7265014982027895e-06, "loss": 0.1407, "step": 44950 }, { "epoch": 2.2639609245178507, "grad_norm": 3.9252137540634484, "learning_rate": 1.7242876440681216e-06, "loss": 0.1541, "step": 44960 }, { "epoch": 2.2644644745455462, "grad_norm": 3.535441116305115, "learning_rate": 1.7220749144557525e-06, "loss": 0.1196, "step": 44970 }, { "epoch": 2.2649680245732413, "grad_norm": 3.6540607068661286, "learning_rate": 1.7198633101252903e-06, "loss": 0.1492, "step": 44980 }, { "epoch": 2.2654715746009364, "grad_norm": 4.571810489969075, "learning_rate": 1.7176528318359603e-06, "loss": 0.1575, "step": 44990 }, { "epoch": 2.265975124628632, "grad_norm": 2.1926698601572223, "learning_rate": 1.7154434803466003e-06, "loss": 0.1339, "step": 45000 }, { "epoch": 2.266478674656327, "grad_norm": 3.567427431396885, "learning_rate": 1.7132352564156613e-06, "loss": 0.1284, "step": 45010 }, { "epoch": 2.2669822246840226, "grad_norm": 4.7613594924102305, "learning_rate": 1.711028160801203e-06, "loss": 0.1391, "step": 45020 }, { "epoch": 2.2674857747117176, "grad_norm": 2.653349187754818, "learning_rate": 1.7088221942609057e-06, "loss": 0.1344, "step": 45030 }, { "epoch": 2.2679893247394127, "grad_norm": 4.791224406749362, "learning_rate": 1.706617357552054e-06, "loss": 0.148, "step": 45040 }, { "epoch": 2.2684928747671083, "grad_norm": 4.070442750808703, "learning_rate": 1.7044136514315506e-06, "loss": 0.1166, "step": 45050 }, { "epoch": 2.2689964247948033, "grad_norm": 2.9216079924761424, "learning_rate": 1.7022110766559058e-06, "loss": 0.1445, "step": 45060 }, { "epoch": 2.2694999748224984, "grad_norm": 3.356870290401694, "learning_rate": 1.700009633981245e-06, "loss": 0.1295, "step": 45070 }, { "epoch": 2.270003524850194, "grad_norm": 2.949806987239511, "learning_rate": 1.6978093241633042e-06, "loss": 0.1361, "step": 45080 }, { "epoch": 2.270507074877889, "grad_norm": 8.113655109925254, "learning_rate": 1.6956101479574278e-06, "loss": 0.138, "step": 45090 }, { "epoch": 2.2710106249055846, "grad_norm": 4.000024560149944, "learning_rate": 1.693412106118575e-06, "loss": 0.1291, "step": 45100 }, { "epoch": 2.2715141749332797, "grad_norm": 2.884500238237374, "learning_rate": 1.691215199401311e-06, "loss": 0.1282, "step": 45110 }, { "epoch": 2.2720177249609748, "grad_norm": 2.4008530344524517, "learning_rate": 1.689019428559816e-06, "loss": 0.1124, "step": 45120 }, { "epoch": 2.2725212749886703, "grad_norm": 1.0639109396849906, "learning_rate": 1.6868247943478788e-06, "loss": 0.1053, "step": 45130 }, { "epoch": 2.2730248250163654, "grad_norm": 3.0017866180467743, "learning_rate": 1.6846312975188944e-06, "loss": 0.125, "step": 45140 }, { "epoch": 2.2735283750440605, "grad_norm": 4.0586297167852505, "learning_rate": 1.6824389388258728e-06, "loss": 0.1375, "step": 45150 }, { "epoch": 2.274031925071756, "grad_norm": 2.55963077557785, "learning_rate": 1.6802477190214312e-06, "loss": 0.1398, "step": 45160 }, { "epoch": 2.274535475099451, "grad_norm": 2.8746169472730445, "learning_rate": 1.678057638857795e-06, "loss": 0.1361, "step": 45170 }, { "epoch": 2.275039025127146, "grad_norm": 2.9621532095285956, "learning_rate": 1.675868699086796e-06, "loss": 0.1425, "step": 45180 }, { "epoch": 2.2755425751548417, "grad_norm": 2.882841934544176, "learning_rate": 1.6736809004598804e-06, "loss": 0.1166, "step": 45190 }, { "epoch": 2.276046125182537, "grad_norm": 3.7267529278363494, "learning_rate": 1.6714942437281002e-06, "loss": 0.1424, "step": 45200 }, { "epoch": 2.2765496752102323, "grad_norm": 4.038011134117337, "learning_rate": 1.6693087296421118e-06, "loss": 0.1322, "step": 45210 }, { "epoch": 2.2770532252379274, "grad_norm": 2.2953156361001, "learning_rate": 1.667124358952184e-06, "loss": 0.1619, "step": 45220 }, { "epoch": 2.2775567752656225, "grad_norm": 2.9752543723082123, "learning_rate": 1.664941132408191e-06, "loss": 0.1104, "step": 45230 }, { "epoch": 2.278060325293318, "grad_norm": 2.7465533973121237, "learning_rate": 1.662759050759617e-06, "loss": 0.131, "step": 45240 }, { "epoch": 2.278563875321013, "grad_norm": 2.4664167935126007, "learning_rate": 1.6605781147555488e-06, "loss": 0.1068, "step": 45250 }, { "epoch": 2.279067425348708, "grad_norm": 3.045870010202238, "learning_rate": 1.6583983251446807e-06, "loss": 0.1076, "step": 45260 }, { "epoch": 2.2795709753764037, "grad_norm": 3.1216737968254438, "learning_rate": 1.6562196826753152e-06, "loss": 0.1362, "step": 45270 }, { "epoch": 2.280074525404099, "grad_norm": 1.6475715199765193, "learning_rate": 1.6540421880953633e-06, "loss": 0.1572, "step": 45280 }, { "epoch": 2.2805780754317944, "grad_norm": 2.8847683081747535, "learning_rate": 1.6518658421523353e-06, "loss": 0.155, "step": 45290 }, { "epoch": 2.2810816254594894, "grad_norm": 4.853606045611642, "learning_rate": 1.6496906455933526e-06, "loss": 0.1315, "step": 45300 }, { "epoch": 2.2815851754871845, "grad_norm": 4.709704755488117, "learning_rate": 1.647516599165142e-06, "loss": 0.1395, "step": 45310 }, { "epoch": 2.28208872551488, "grad_norm": 3.433758632597374, "learning_rate": 1.645343703614033e-06, "loss": 0.1373, "step": 45320 }, { "epoch": 2.282592275542575, "grad_norm": 3.513465991934213, "learning_rate": 1.643171959685958e-06, "loss": 0.141, "step": 45330 }, { "epoch": 2.2830958255702702, "grad_norm": 3.339730256174199, "learning_rate": 1.6410013681264596e-06, "loss": 0.1357, "step": 45340 }, { "epoch": 2.2835993755979658, "grad_norm": 4.183929915801808, "learning_rate": 1.638831929680682e-06, "loss": 0.1333, "step": 45350 }, { "epoch": 2.284102925625661, "grad_norm": 4.06702800645076, "learning_rate": 1.6366636450933749e-06, "loss": 0.1433, "step": 45360 }, { "epoch": 2.284606475653356, "grad_norm": 3.9446227489732033, "learning_rate": 1.6344965151088898e-06, "loss": 0.1199, "step": 45370 }, { "epoch": 2.2851100256810515, "grad_norm": 2.8880109991433884, "learning_rate": 1.6323305404711803e-06, "loss": 0.1545, "step": 45380 }, { "epoch": 2.2856135757087466, "grad_norm": 2.2680701802016396, "learning_rate": 1.6301657219238082e-06, "loss": 0.1163, "step": 45390 }, { "epoch": 2.286117125736442, "grad_norm": 4.726421344142252, "learning_rate": 1.6280020602099367e-06, "loss": 0.1521, "step": 45400 }, { "epoch": 2.286620675764137, "grad_norm": 4.312353369345016, "learning_rate": 1.6258395560723289e-06, "loss": 0.1502, "step": 45410 }, { "epoch": 2.2871242257918323, "grad_norm": 3.1043361374296365, "learning_rate": 1.6236782102533543e-06, "loss": 0.1362, "step": 45420 }, { "epoch": 2.287627775819528, "grad_norm": 2.85593439512501, "learning_rate": 1.6215180234949846e-06, "loss": 0.1311, "step": 45430 }, { "epoch": 2.288131325847223, "grad_norm": 3.8720841595753606, "learning_rate": 1.6193589965387907e-06, "loss": 0.143, "step": 45440 }, { "epoch": 2.288634875874918, "grad_norm": 2.37675651642407, "learning_rate": 1.6172011301259456e-06, "loss": 0.1346, "step": 45450 }, { "epoch": 2.2891384259026135, "grad_norm": 3.2504687165514436, "learning_rate": 1.615044424997227e-06, "loss": 0.1606, "step": 45460 }, { "epoch": 2.2896419759303086, "grad_norm": 3.968438104114212, "learning_rate": 1.6128888818930127e-06, "loss": 0.131, "step": 45470 }, { "epoch": 2.290145525958004, "grad_norm": 5.205787746982953, "learning_rate": 1.6107345015532794e-06, "loss": 0.1413, "step": 45480 }, { "epoch": 2.2906490759856992, "grad_norm": 2.162272983944554, "learning_rate": 1.6085812847176063e-06, "loss": 0.1344, "step": 45490 }, { "epoch": 2.2911526260133943, "grad_norm": 2.30900946799888, "learning_rate": 1.6064292321251745e-06, "loss": 0.1619, "step": 45500 }, { "epoch": 2.29165617604109, "grad_norm": 3.8278257429897127, "learning_rate": 1.6042783445147653e-06, "loss": 0.1479, "step": 45510 }, { "epoch": 2.292159726068785, "grad_norm": 1.5698455159071918, "learning_rate": 1.6021286226247573e-06, "loss": 0.1094, "step": 45520 }, { "epoch": 2.29266327609648, "grad_norm": 4.628825826761962, "learning_rate": 1.5999800671931287e-06, "loss": 0.1641, "step": 45530 }, { "epoch": 2.2931668261241756, "grad_norm": 2.1993875270308023, "learning_rate": 1.5978326789574606e-06, "loss": 0.1407, "step": 45540 }, { "epoch": 2.2936703761518706, "grad_norm": 2.2322974013905292, "learning_rate": 1.5956864586549337e-06, "loss": 0.1316, "step": 45550 }, { "epoch": 2.2941739261795657, "grad_norm": 3.7632540573289437, "learning_rate": 1.5935414070223225e-06, "loss": 0.1134, "step": 45560 }, { "epoch": 2.2946774762072613, "grad_norm": 3.809302183124438, "learning_rate": 1.5913975247960062e-06, "loss": 0.1459, "step": 45570 }, { "epoch": 2.2951810262349563, "grad_norm": 3.5155515742860635, "learning_rate": 1.5892548127119606e-06, "loss": 0.1819, "step": 45580 }, { "epoch": 2.295684576262652, "grad_norm": 2.885611607566435, "learning_rate": 1.587113271505758e-06, "loss": 0.1205, "step": 45590 }, { "epoch": 2.296188126290347, "grad_norm": 2.5814063596297476, "learning_rate": 1.5849729019125687e-06, "loss": 0.144, "step": 45600 }, { "epoch": 2.296691676318042, "grad_norm": 3.078188788728162, "learning_rate": 1.5828337046671632e-06, "loss": 0.1449, "step": 45610 }, { "epoch": 2.2971952263457376, "grad_norm": 3.0377139042199857, "learning_rate": 1.5806956805039092e-06, "loss": 0.1193, "step": 45620 }, { "epoch": 2.2976987763734327, "grad_norm": 4.725906418859227, "learning_rate": 1.5785588301567728e-06, "loss": 0.1562, "step": 45630 }, { "epoch": 2.2982023264011278, "grad_norm": 2.6684088416451646, "learning_rate": 1.5764231543593111e-06, "loss": 0.131, "step": 45640 }, { "epoch": 2.2987058764288233, "grad_norm": 3.859996087407642, "learning_rate": 1.5742886538446855e-06, "loss": 0.1344, "step": 45650 }, { "epoch": 2.2992094264565184, "grad_norm": 3.3823543408149694, "learning_rate": 1.5721553293456481e-06, "loss": 0.1455, "step": 45660 }, { "epoch": 2.299712976484214, "grad_norm": 4.304029332664452, "learning_rate": 1.5700231815945533e-06, "loss": 0.1581, "step": 45670 }, { "epoch": 2.300216526511909, "grad_norm": 4.133391851952886, "learning_rate": 1.5678922113233437e-06, "loss": 0.1467, "step": 45680 }, { "epoch": 2.300720076539604, "grad_norm": 4.8288273340502466, "learning_rate": 1.5657624192635645e-06, "loss": 0.1241, "step": 45690 }, { "epoch": 2.3012236265672996, "grad_norm": 2.400584794485629, "learning_rate": 1.5636338061463552e-06, "loss": 0.137, "step": 45700 }, { "epoch": 2.3017271765949947, "grad_norm": 4.343502156800722, "learning_rate": 1.561506372702446e-06, "loss": 0.1279, "step": 45710 }, { "epoch": 2.30223072662269, "grad_norm": 2.539823150717177, "learning_rate": 1.5593801196621693e-06, "loss": 0.1349, "step": 45720 }, { "epoch": 2.3027342766503853, "grad_norm": 3.459502454575693, "learning_rate": 1.557255047755445e-06, "loss": 0.1227, "step": 45730 }, { "epoch": 2.3032378266780804, "grad_norm": 2.7783297501240347, "learning_rate": 1.5551311577117923e-06, "loss": 0.1426, "step": 45740 }, { "epoch": 2.3037413767057755, "grad_norm": 2.859292050450431, "learning_rate": 1.553008450260325e-06, "loss": 0.1299, "step": 45750 }, { "epoch": 2.304244926733471, "grad_norm": 1.9552519525087078, "learning_rate": 1.550886926129746e-06, "loss": 0.1712, "step": 45760 }, { "epoch": 2.304748476761166, "grad_norm": 3.4496839584007724, "learning_rate": 1.5487665860483576e-06, "loss": 0.1195, "step": 45770 }, { "epoch": 2.3052520267888617, "grad_norm": 2.850109397972167, "learning_rate": 1.5466474307440543e-06, "loss": 0.158, "step": 45780 }, { "epoch": 2.3057555768165567, "grad_norm": 4.404817068833799, "learning_rate": 1.5445294609443212e-06, "loss": 0.159, "step": 45790 }, { "epoch": 2.306259126844252, "grad_norm": 3.8917562975129427, "learning_rate": 1.5424126773762366e-06, "loss": 0.1436, "step": 45800 }, { "epoch": 2.3067626768719474, "grad_norm": 3.475155102247764, "learning_rate": 1.5402970807664747e-06, "loss": 0.1431, "step": 45810 }, { "epoch": 2.3072662268996424, "grad_norm": 3.0365902857616223, "learning_rate": 1.5381826718413024e-06, "loss": 0.1255, "step": 45820 }, { "epoch": 2.307769776927338, "grad_norm": 1.5812968372857428, "learning_rate": 1.5360694513265734e-06, "loss": 0.1291, "step": 45830 }, { "epoch": 2.308273326955033, "grad_norm": 2.0159263174905786, "learning_rate": 1.533957419947739e-06, "loss": 0.1205, "step": 45840 }, { "epoch": 2.308776876982728, "grad_norm": 4.027312994692753, "learning_rate": 1.531846578429841e-06, "loss": 0.125, "step": 45850 }, { "epoch": 2.3092804270104237, "grad_norm": 3.1743839899940336, "learning_rate": 1.5297369274975143e-06, "loss": 0.1277, "step": 45860 }, { "epoch": 2.3097839770381188, "grad_norm": 3.5366247063358225, "learning_rate": 1.527628467874977e-06, "loss": 0.1259, "step": 45870 }, { "epoch": 2.310287527065814, "grad_norm": 2.8149284360260074, "learning_rate": 1.525521200286047e-06, "loss": 0.1538, "step": 45880 }, { "epoch": 2.3107910770935094, "grad_norm": 3.3273460315925902, "learning_rate": 1.5234151254541308e-06, "loss": 0.129, "step": 45890 }, { "epoch": 2.3112946271212045, "grad_norm": 4.747485957236957, "learning_rate": 1.5213102441022255e-06, "loss": 0.1382, "step": 45900 }, { "epoch": 2.3117981771488996, "grad_norm": 4.281860974691014, "learning_rate": 1.519206556952915e-06, "loss": 0.1685, "step": 45910 }, { "epoch": 2.312301727176595, "grad_norm": 3.7776736799606354, "learning_rate": 1.517104064728378e-06, "loss": 0.1433, "step": 45920 }, { "epoch": 2.31280527720429, "grad_norm": 3.575940064614472, "learning_rate": 1.5150027681503816e-06, "loss": 0.0952, "step": 45930 }, { "epoch": 2.3133088272319853, "grad_norm": 3.6920028037278882, "learning_rate": 1.5129026679402813e-06, "loss": 0.1322, "step": 45940 }, { "epoch": 2.313812377259681, "grad_norm": 3.2011372837859375, "learning_rate": 1.51080376481902e-06, "loss": 0.1568, "step": 45950 }, { "epoch": 2.314315927287376, "grad_norm": 4.280323392796795, "learning_rate": 1.508706059507134e-06, "loss": 0.1375, "step": 45960 }, { "epoch": 2.3148194773150714, "grad_norm": 3.738770703233956, "learning_rate": 1.5066095527247466e-06, "loss": 0.1274, "step": 45970 }, { "epoch": 2.3153230273427665, "grad_norm": 3.5634873715965703, "learning_rate": 1.5045142451915712e-06, "loss": 0.1768, "step": 45980 }, { "epoch": 2.3158265773704616, "grad_norm": 2.7501970021479476, "learning_rate": 1.5024201376269055e-06, "loss": 0.128, "step": 45990 }, { "epoch": 2.316330127398157, "grad_norm": 3.1260291186959477, "learning_rate": 1.500327230749637e-06, "loss": 0.1481, "step": 46000 }, { "epoch": 2.3168336774258522, "grad_norm": 3.077906900564687, "learning_rate": 1.4982355252782426e-06, "loss": 0.1315, "step": 46010 }, { "epoch": 2.3173372274535478, "grad_norm": 3.176388929548773, "learning_rate": 1.4961450219307877e-06, "loss": 0.1409, "step": 46020 }, { "epoch": 2.317840777481243, "grad_norm": 4.484017872158828, "learning_rate": 1.4940557214249196e-06, "loss": 0.1715, "step": 46030 }, { "epoch": 2.318344327508938, "grad_norm": 4.220537047502362, "learning_rate": 1.491967624477878e-06, "loss": 0.1482, "step": 46040 }, { "epoch": 2.3188478775366335, "grad_norm": 3.419373448007797, "learning_rate": 1.4898807318064895e-06, "loss": 0.1227, "step": 46050 }, { "epoch": 2.3193514275643285, "grad_norm": 1.2990376129699939, "learning_rate": 1.4877950441271631e-06, "loss": 0.1032, "step": 46060 }, { "epoch": 2.3198549775920236, "grad_norm": 3.8006199101143157, "learning_rate": 1.4857105621558954e-06, "loss": 0.1442, "step": 46070 }, { "epoch": 2.320358527619719, "grad_norm": 3.8387655930267703, "learning_rate": 1.4836272866082719e-06, "loss": 0.1455, "step": 46080 }, { "epoch": 2.3208620776474143, "grad_norm": 2.5515175244684998, "learning_rate": 1.4815452181994628e-06, "loss": 0.1363, "step": 46090 }, { "epoch": 2.3213656276751093, "grad_norm": 4.047381918001676, "learning_rate": 1.4794643576442208e-06, "loss": 0.162, "step": 46100 }, { "epoch": 2.321869177702805, "grad_norm": 3.9547908777132763, "learning_rate": 1.477384705656888e-06, "loss": 0.1204, "step": 46110 }, { "epoch": 2.3223727277305, "grad_norm": 3.714981950916613, "learning_rate": 1.4753062629513903e-06, "loss": 0.1105, "step": 46120 }, { "epoch": 2.322876277758195, "grad_norm": 2.994052977884354, "learning_rate": 1.4732290302412388e-06, "loss": 0.1243, "step": 46130 }, { "epoch": 2.3233798277858906, "grad_norm": 3.996692231020343, "learning_rate": 1.4711530082395282e-06, "loss": 0.1466, "step": 46140 }, { "epoch": 2.3238833778135857, "grad_norm": 3.6626458246350886, "learning_rate": 1.4690781976589358e-06, "loss": 0.1442, "step": 46150 }, { "epoch": 2.324386927841281, "grad_norm": 4.680747995594279, "learning_rate": 1.4670045992117266e-06, "loss": 0.1211, "step": 46160 }, { "epoch": 2.3248904778689763, "grad_norm": 4.866242659688147, "learning_rate": 1.4649322136097504e-06, "loss": 0.1221, "step": 46170 }, { "epoch": 2.3253940278966714, "grad_norm": 2.6660619479553076, "learning_rate": 1.4628610415644346e-06, "loss": 0.1329, "step": 46180 }, { "epoch": 2.325897577924367, "grad_norm": 5.77284558452244, "learning_rate": 1.4607910837867956e-06, "loss": 0.1231, "step": 46190 }, { "epoch": 2.326401127952062, "grad_norm": 4.326282159238643, "learning_rate": 1.4587223409874323e-06, "loss": 0.1233, "step": 46200 }, { "epoch": 2.3269046779797575, "grad_norm": 3.9740612021036856, "learning_rate": 1.4566548138765246e-06, "loss": 0.1569, "step": 46210 }, { "epoch": 2.3274082280074526, "grad_norm": 6.440120002961761, "learning_rate": 1.4545885031638335e-06, "loss": 0.1276, "step": 46220 }, { "epoch": 2.3279117780351477, "grad_norm": 3.472184833285704, "learning_rate": 1.4525234095587059e-06, "loss": 0.1408, "step": 46230 }, { "epoch": 2.3284153280628432, "grad_norm": 3.994855397715134, "learning_rate": 1.4504595337700705e-06, "loss": 0.1494, "step": 46240 }, { "epoch": 2.3289188780905383, "grad_norm": 3.0977746502332364, "learning_rate": 1.4483968765064387e-06, "loss": 0.1291, "step": 46250 }, { "epoch": 2.3294224281182334, "grad_norm": 2.3641926948950003, "learning_rate": 1.446335438475898e-06, "loss": 0.1199, "step": 46260 }, { "epoch": 2.329925978145929, "grad_norm": 4.203165681098992, "learning_rate": 1.4442752203861255e-06, "loss": 0.1459, "step": 46270 }, { "epoch": 2.330429528173624, "grad_norm": 3.0737581793220623, "learning_rate": 1.4422162229443716e-06, "loss": 0.13, "step": 46280 }, { "epoch": 2.330933078201319, "grad_norm": 4.363500288790634, "learning_rate": 1.4401584468574748e-06, "loss": 0.1118, "step": 46290 }, { "epoch": 2.3314366282290147, "grad_norm": 3.709765400184266, "learning_rate": 1.4381018928318475e-06, "loss": 0.1344, "step": 46300 }, { "epoch": 2.3319401782567097, "grad_norm": 1.6495996661419343, "learning_rate": 1.436046561573488e-06, "loss": 0.1138, "step": 46310 }, { "epoch": 2.332443728284405, "grad_norm": 4.0999516185194285, "learning_rate": 1.433992453787974e-06, "loss": 0.1332, "step": 46320 }, { "epoch": 2.3329472783121004, "grad_norm": 3.028002435570233, "learning_rate": 1.4319395701804589e-06, "loss": 0.1575, "step": 46330 }, { "epoch": 2.3334508283397954, "grad_norm": 4.503760497254519, "learning_rate": 1.429887911455683e-06, "loss": 0.1353, "step": 46340 }, { "epoch": 2.333954378367491, "grad_norm": 3.5559318950945284, "learning_rate": 1.4278374783179577e-06, "loss": 0.1484, "step": 46350 }, { "epoch": 2.334457928395186, "grad_norm": 4.101892574007383, "learning_rate": 1.4257882714711823e-06, "loss": 0.103, "step": 46360 }, { "epoch": 2.334961478422881, "grad_norm": 2.6690589369277236, "learning_rate": 1.423740291618827e-06, "loss": 0.1278, "step": 46370 }, { "epoch": 2.3354650284505767, "grad_norm": 1.6405784752207309, "learning_rate": 1.4216935394639469e-06, "loss": 0.1454, "step": 46380 }, { "epoch": 2.3359685784782718, "grad_norm": 2.9893428925014387, "learning_rate": 1.419648015709173e-06, "loss": 0.1012, "step": 46390 }, { "epoch": 2.3364721285059673, "grad_norm": 3.267447209331598, "learning_rate": 1.417603721056716e-06, "loss": 0.1052, "step": 46400 }, { "epoch": 2.3369756785336624, "grad_norm": 2.0659275823953998, "learning_rate": 1.4155606562083634e-06, "loss": 0.1242, "step": 46410 }, { "epoch": 2.3374792285613575, "grad_norm": 2.766176141440385, "learning_rate": 1.4135188218654789e-06, "loss": 0.1754, "step": 46420 }, { "epoch": 2.337982778589053, "grad_norm": 3.765929870044934, "learning_rate": 1.4114782187290067e-06, "loss": 0.1287, "step": 46430 }, { "epoch": 2.338486328616748, "grad_norm": 4.0166699543478686, "learning_rate": 1.409438847499469e-06, "loss": 0.1285, "step": 46440 }, { "epoch": 2.338989878644443, "grad_norm": 3.2769431528867257, "learning_rate": 1.4074007088769604e-06, "loss": 0.138, "step": 46450 }, { "epoch": 2.3394934286721387, "grad_norm": 3.3210215541448433, "learning_rate": 1.4053638035611572e-06, "loss": 0.1371, "step": 46460 }, { "epoch": 2.339996978699834, "grad_norm": 6.450352698926564, "learning_rate": 1.4033281322513104e-06, "loss": 0.1368, "step": 46470 }, { "epoch": 2.340500528727529, "grad_norm": 4.012265359973654, "learning_rate": 1.4012936956462496e-06, "loss": 0.1534, "step": 46480 }, { "epoch": 2.3410040787552244, "grad_norm": 3.231355029323873, "learning_rate": 1.3992604944443737e-06, "loss": 0.1303, "step": 46490 }, { "epoch": 2.3415076287829195, "grad_norm": 3.9015859232261962, "learning_rate": 1.397228529343665e-06, "loss": 0.1348, "step": 46500 }, { "epoch": 2.3420111788106146, "grad_norm": 3.3918373973193474, "learning_rate": 1.3951978010416784e-06, "loss": 0.1327, "step": 46510 }, { "epoch": 2.34251472883831, "grad_norm": 5.028140749233951, "learning_rate": 1.3931683102355453e-06, "loss": 0.1378, "step": 46520 }, { "epoch": 2.343018278866005, "grad_norm": 3.7322287595674966, "learning_rate": 1.3911400576219691e-06, "loss": 0.1513, "step": 46530 }, { "epoch": 2.3435218288937008, "grad_norm": 3.482058885153516, "learning_rate": 1.3891130438972323e-06, "loss": 0.1153, "step": 46540 }, { "epoch": 2.344025378921396, "grad_norm": 3.8352971904836686, "learning_rate": 1.3870872697571912e-06, "loss": 0.1388, "step": 46550 }, { "epoch": 2.344528928949091, "grad_norm": 3.9014768312747705, "learning_rate": 1.385062735897275e-06, "loss": 0.1629, "step": 46560 }, { "epoch": 2.3450324789767865, "grad_norm": 4.316568906730699, "learning_rate": 1.383039443012485e-06, "loss": 0.1597, "step": 46570 }, { "epoch": 2.3455360290044815, "grad_norm": 1.8481168559273915, "learning_rate": 1.3810173917974018e-06, "loss": 0.1203, "step": 46580 }, { "epoch": 2.346039579032177, "grad_norm": 4.6528030601470824, "learning_rate": 1.3789965829461787e-06, "loss": 0.1513, "step": 46590 }, { "epoch": 2.346543129059872, "grad_norm": 3.5933034020224626, "learning_rate": 1.3769770171525372e-06, "loss": 0.1129, "step": 46600 }, { "epoch": 2.3470466790875673, "grad_norm": 4.591136876682728, "learning_rate": 1.3749586951097799e-06, "loss": 0.1443, "step": 46610 }, { "epoch": 2.347550229115263, "grad_norm": 2.3058416992168906, "learning_rate": 1.3729416175107745e-06, "loss": 0.1564, "step": 46620 }, { "epoch": 2.348053779142958, "grad_norm": 2.3981772760093225, "learning_rate": 1.3709257850479678e-06, "loss": 0.1333, "step": 46630 }, { "epoch": 2.348557329170653, "grad_norm": 3.911085121069995, "learning_rate": 1.3689111984133774e-06, "loss": 0.108, "step": 46640 }, { "epoch": 2.3490608791983485, "grad_norm": 4.413365757419023, "learning_rate": 1.3668978582985904e-06, "loss": 0.1224, "step": 46650 }, { "epoch": 2.3495644292260436, "grad_norm": 3.397879098330865, "learning_rate": 1.364885765394769e-06, "loss": 0.1097, "step": 46660 }, { "epoch": 2.3500679792537387, "grad_norm": 4.180153740062513, "learning_rate": 1.3628749203926482e-06, "loss": 0.1308, "step": 46670 }, { "epoch": 2.350571529281434, "grad_norm": 3.291257385866311, "learning_rate": 1.360865323982532e-06, "loss": 0.1253, "step": 46680 }, { "epoch": 2.3510750793091293, "grad_norm": 2.873554071325268, "learning_rate": 1.3588569768542937e-06, "loss": 0.134, "step": 46690 }, { "epoch": 2.3515786293368244, "grad_norm": 2.547888978636819, "learning_rate": 1.3568498796973833e-06, "loss": 0.1257, "step": 46700 }, { "epoch": 2.35208217936452, "grad_norm": 3.3915730570413922, "learning_rate": 1.35484403320082e-06, "loss": 0.1494, "step": 46710 }, { "epoch": 2.352585729392215, "grad_norm": 2.4508299164562173, "learning_rate": 1.3528394380531895e-06, "loss": 0.1308, "step": 46720 }, { "epoch": 2.3530892794199105, "grad_norm": 3.2068421577929254, "learning_rate": 1.3508360949426536e-06, "loss": 0.1326, "step": 46730 }, { "epoch": 2.3535928294476056, "grad_norm": 3.123040050360423, "learning_rate": 1.3488340045569403e-06, "loss": 0.12, "step": 46740 }, { "epoch": 2.3540963794753007, "grad_norm": 3.1936551391836705, "learning_rate": 1.346833167583353e-06, "loss": 0.1138, "step": 46750 }, { "epoch": 2.3545999295029962, "grad_norm": 3.4320851807580235, "learning_rate": 1.3448335847087551e-06, "loss": 0.1422, "step": 46760 }, { "epoch": 2.3551034795306913, "grad_norm": 3.818741805075062, "learning_rate": 1.3428352566195873e-06, "loss": 0.1129, "step": 46770 }, { "epoch": 2.355607029558387, "grad_norm": 2.4357861881063125, "learning_rate": 1.3408381840018586e-06, "loss": 0.1075, "step": 46780 }, { "epoch": 2.356110579586082, "grad_norm": 3.9125278225168927, "learning_rate": 1.3388423675411461e-06, "loss": 0.1802, "step": 46790 }, { "epoch": 2.356614129613777, "grad_norm": 3.386790185010442, "learning_rate": 1.3368478079225937e-06, "loss": 0.1364, "step": 46800 }, { "epoch": 2.3571176796414726, "grad_norm": 2.763347283389057, "learning_rate": 1.3348545058309164e-06, "loss": 0.1192, "step": 46810 }, { "epoch": 2.3576212296691677, "grad_norm": 2.845097354800918, "learning_rate": 1.3328624619503984e-06, "loss": 0.1506, "step": 46820 }, { "epoch": 2.3581247796968627, "grad_norm": 2.5977775442708055, "learning_rate": 1.3308716769648883e-06, "loss": 0.1225, "step": 46830 }, { "epoch": 2.3586283297245583, "grad_norm": 2.664261774332354, "learning_rate": 1.3288821515578039e-06, "loss": 0.1227, "step": 46840 }, { "epoch": 2.3591318797522534, "grad_norm": 2.7467972843992037, "learning_rate": 1.3268938864121321e-06, "loss": 0.125, "step": 46850 }, { "epoch": 2.3596354297799484, "grad_norm": 3.4445999542837913, "learning_rate": 1.3249068822104267e-06, "loss": 0.1266, "step": 46860 }, { "epoch": 2.360138979807644, "grad_norm": 3.093155653328923, "learning_rate": 1.322921139634809e-06, "loss": 0.1291, "step": 46870 }, { "epoch": 2.360642529835339, "grad_norm": 3.355285507357386, "learning_rate": 1.3209366593669637e-06, "loss": 0.1045, "step": 46880 }, { "epoch": 2.361146079863034, "grad_norm": 2.61877905150985, "learning_rate": 1.3189534420881483e-06, "loss": 0.1411, "step": 46890 }, { "epoch": 2.3616496298907297, "grad_norm": 3.751237985452269, "learning_rate": 1.3169714884791796e-06, "loss": 0.1244, "step": 46900 }, { "epoch": 2.3621531799184248, "grad_norm": 3.2202081044144544, "learning_rate": 1.314990799220448e-06, "loss": 0.1309, "step": 46910 }, { "epoch": 2.3626567299461203, "grad_norm": 5.839656947800248, "learning_rate": 1.3130113749919022e-06, "loss": 0.144, "step": 46920 }, { "epoch": 2.3631602799738154, "grad_norm": 2.844655647251374, "learning_rate": 1.3110332164730626e-06, "loss": 0.1205, "step": 46930 }, { "epoch": 2.3636638300015105, "grad_norm": 3.1044383181328388, "learning_rate": 1.309056324343015e-06, "loss": 0.1416, "step": 46940 }, { "epoch": 2.364167380029206, "grad_norm": 2.692527771514115, "learning_rate": 1.3070806992804047e-06, "loss": 0.1303, "step": 46950 }, { "epoch": 2.364670930056901, "grad_norm": 4.224894918508952, "learning_rate": 1.3051063419634496e-06, "loss": 0.1153, "step": 46960 }, { "epoch": 2.3651744800845966, "grad_norm": 5.4639483098460975, "learning_rate": 1.3031332530699248e-06, "loss": 0.1519, "step": 46970 }, { "epoch": 2.3656780301122917, "grad_norm": 3.9711252707369433, "learning_rate": 1.3011614332771777e-06, "loss": 0.1243, "step": 46980 }, { "epoch": 2.366181580139987, "grad_norm": 4.081555737929225, "learning_rate": 1.2991908832621119e-06, "loss": 0.136, "step": 46990 }, { "epoch": 2.3666851301676823, "grad_norm": 2.4968780282255945, "learning_rate": 1.297221603701202e-06, "loss": 0.1394, "step": 47000 }, { "epoch": 2.3671886801953774, "grad_norm": 2.7065891592986615, "learning_rate": 1.295253595270483e-06, "loss": 0.1157, "step": 47010 }, { "epoch": 2.3676922302230725, "grad_norm": 2.709297273450933, "learning_rate": 1.2932868586455566e-06, "loss": 0.14, "step": 47020 }, { "epoch": 2.368195780250768, "grad_norm": 3.57263283615959, "learning_rate": 1.2913213945015834e-06, "loss": 0.1533, "step": 47030 }, { "epoch": 2.368699330278463, "grad_norm": 3.3345047759074125, "learning_rate": 1.2893572035132884e-06, "loss": 0.1285, "step": 47040 }, { "epoch": 2.369202880306158, "grad_norm": 3.336881415835464, "learning_rate": 1.287394286354962e-06, "loss": 0.1204, "step": 47050 }, { "epoch": 2.3697064303338538, "grad_norm": 4.292469637529462, "learning_rate": 1.2854326437004566e-06, "loss": 0.1389, "step": 47060 }, { "epoch": 2.370209980361549, "grad_norm": 3.24991730159203, "learning_rate": 1.2834722762231844e-06, "loss": 0.1111, "step": 47070 }, { "epoch": 2.370713530389244, "grad_norm": 4.822036939213141, "learning_rate": 1.281513184596122e-06, "loss": 0.1199, "step": 47080 }, { "epoch": 2.3712170804169395, "grad_norm": 4.675638975752864, "learning_rate": 1.279555369491809e-06, "loss": 0.1321, "step": 47090 }, { "epoch": 2.3717206304446345, "grad_norm": 3.3085401495836306, "learning_rate": 1.2775988315823478e-06, "loss": 0.1236, "step": 47100 }, { "epoch": 2.37222418047233, "grad_norm": 4.512710902994025, "learning_rate": 1.2756435715393939e-06, "loss": 0.1544, "step": 47110 }, { "epoch": 2.372727730500025, "grad_norm": 3.0671531166006716, "learning_rate": 1.2736895900341745e-06, "loss": 0.1267, "step": 47120 }, { "epoch": 2.3732312805277203, "grad_norm": 2.806891875917823, "learning_rate": 1.271736887737472e-06, "loss": 0.1315, "step": 47130 }, { "epoch": 2.373734830555416, "grad_norm": 4.7030943977222135, "learning_rate": 1.2697854653196345e-06, "loss": 0.1422, "step": 47140 }, { "epoch": 2.374238380583111, "grad_norm": 0.7964153651531191, "learning_rate": 1.2678353234505636e-06, "loss": 0.1307, "step": 47150 }, { "epoch": 2.3747419306108064, "grad_norm": 3.689938966308206, "learning_rate": 1.2658864627997268e-06, "loss": 0.1181, "step": 47160 }, { "epoch": 2.3752454806385015, "grad_norm": 3.6303053978166786, "learning_rate": 1.2639388840361521e-06, "loss": 0.1425, "step": 47170 }, { "epoch": 2.3757490306661966, "grad_norm": 3.9686098880391114, "learning_rate": 1.261992587828424e-06, "loss": 0.1656, "step": 47180 }, { "epoch": 2.376252580693892, "grad_norm": 3.595428532552561, "learning_rate": 1.2600475748446873e-06, "loss": 0.1237, "step": 47190 }, { "epoch": 2.376756130721587, "grad_norm": 3.0547194565145377, "learning_rate": 1.2581038457526478e-06, "loss": 0.1239, "step": 47200 }, { "epoch": 2.3772596807492823, "grad_norm": 3.3161417600623033, "learning_rate": 1.2561614012195721e-06, "loss": 0.1228, "step": 47210 }, { "epoch": 2.377763230776978, "grad_norm": 3.3844256858586266, "learning_rate": 1.2542202419122806e-06, "loss": 0.1396, "step": 47220 }, { "epoch": 2.378266780804673, "grad_norm": 2.460016723543124, "learning_rate": 1.252280368497159e-06, "loss": 0.1267, "step": 47230 }, { "epoch": 2.378770330832368, "grad_norm": 3.240062780328324, "learning_rate": 1.2503417816401447e-06, "loss": 0.1335, "step": 47240 }, { "epoch": 2.3792738808600635, "grad_norm": 3.601655294525053, "learning_rate": 1.2484044820067394e-06, "loss": 0.1394, "step": 47250 }, { "epoch": 2.3797774308877586, "grad_norm": 3.807203679995527, "learning_rate": 1.2464684702620007e-06, "loss": 0.1431, "step": 47260 }, { "epoch": 2.3802809809154537, "grad_norm": 4.111348252860971, "learning_rate": 1.2445337470705416e-06, "loss": 0.1118, "step": 47270 }, { "epoch": 2.3807845309431492, "grad_norm": 2.9767797316406934, "learning_rate": 1.2426003130965365e-06, "loss": 0.1092, "step": 47280 }, { "epoch": 2.3812880809708443, "grad_norm": 3.9786540665919694, "learning_rate": 1.240668169003717e-06, "loss": 0.14, "step": 47290 }, { "epoch": 2.38179163099854, "grad_norm": 3.554718927637483, "learning_rate": 1.238737315455369e-06, "loss": 0.1308, "step": 47300 }, { "epoch": 2.382295181026235, "grad_norm": 3.4502681356275064, "learning_rate": 1.2368077531143363e-06, "loss": 0.1381, "step": 47310 }, { "epoch": 2.38279873105393, "grad_norm": 3.439604433298989, "learning_rate": 1.2348794826430205e-06, "loss": 0.1478, "step": 47320 }, { "epoch": 2.3833022810816256, "grad_norm": 2.7557421379439795, "learning_rate": 1.2329525047033814e-06, "loss": 0.1218, "step": 47330 }, { "epoch": 2.3838058311093206, "grad_norm": 2.594681373971869, "learning_rate": 1.2310268199569297e-06, "loss": 0.138, "step": 47340 }, { "epoch": 2.384309381137016, "grad_norm": 3.157780087113289, "learning_rate": 1.229102429064738e-06, "loss": 0.1306, "step": 47350 }, { "epoch": 2.3848129311647113, "grad_norm": 1.4020905868459814, "learning_rate": 1.227179332687431e-06, "loss": 0.1203, "step": 47360 }, { "epoch": 2.3853164811924064, "grad_norm": 1.075842552497666, "learning_rate": 1.225257531485194e-06, "loss": 0.1262, "step": 47370 }, { "epoch": 2.385820031220102, "grad_norm": 3.068849115467974, "learning_rate": 1.223337026117757e-06, "loss": 0.1424, "step": 47380 }, { "epoch": 2.386323581247797, "grad_norm": 3.281406778917231, "learning_rate": 1.2214178172444158e-06, "loss": 0.1595, "step": 47390 }, { "epoch": 2.386827131275492, "grad_norm": 3.1827640537452697, "learning_rate": 1.2194999055240175e-06, "loss": 0.1057, "step": 47400 }, { "epoch": 2.3873306813031876, "grad_norm": 3.6358339897344045, "learning_rate": 1.2175832916149648e-06, "loss": 0.1696, "step": 47410 }, { "epoch": 2.3878342313308827, "grad_norm": 2.898959131527066, "learning_rate": 1.2156679761752117e-06, "loss": 0.1505, "step": 47420 }, { "epoch": 2.3883377813585778, "grad_norm": 2.308663693045989, "learning_rate": 1.2137539598622682e-06, "loss": 0.1401, "step": 47430 }, { "epoch": 2.3888413313862733, "grad_norm": 3.6648074654459566, "learning_rate": 1.2118412433332022e-06, "loss": 0.1465, "step": 47440 }, { "epoch": 2.3893448814139684, "grad_norm": 4.094693103272367, "learning_rate": 1.2099298272446296e-06, "loss": 0.1553, "step": 47450 }, { "epoch": 2.389848431441664, "grad_norm": 3.2769310974496744, "learning_rate": 1.208019712252721e-06, "loss": 0.1128, "step": 47460 }, { "epoch": 2.390351981469359, "grad_norm": 2.274529916227727, "learning_rate": 1.206110899013202e-06, "loss": 0.1308, "step": 47470 }, { "epoch": 2.390855531497054, "grad_norm": 4.17723234580648, "learning_rate": 1.2042033881813536e-06, "loss": 0.1347, "step": 47480 }, { "epoch": 2.3913590815247496, "grad_norm": 6.341951409244652, "learning_rate": 1.2022971804120037e-06, "loss": 0.1141, "step": 47490 }, { "epoch": 2.3918626315524447, "grad_norm": 2.4687890701205784, "learning_rate": 1.2003922763595372e-06, "loss": 0.1691, "step": 47500 }, { "epoch": 2.39236618158014, "grad_norm": 3.6924741077352583, "learning_rate": 1.1984886766778913e-06, "loss": 0.1427, "step": 47510 }, { "epoch": 2.3928697316078353, "grad_norm": 3.3845762463336366, "learning_rate": 1.196586382020553e-06, "loss": 0.1707, "step": 47520 }, { "epoch": 2.3933732816355304, "grad_norm": 2.3844377345147825, "learning_rate": 1.1946853930405649e-06, "loss": 0.1735, "step": 47530 }, { "epoch": 2.393876831663226, "grad_norm": 4.550917084377138, "learning_rate": 1.1927857103905162e-06, "loss": 0.1494, "step": 47540 }, { "epoch": 2.394380381690921, "grad_norm": 2.803430935904246, "learning_rate": 1.1908873347225525e-06, "loss": 0.1121, "step": 47550 }, { "epoch": 2.394883931718616, "grad_norm": 4.188925171818083, "learning_rate": 1.1889902666883701e-06, "loss": 0.1389, "step": 47560 }, { "epoch": 2.3953874817463117, "grad_norm": 3.455688205436102, "learning_rate": 1.187094506939212e-06, "loss": 0.1355, "step": 47570 }, { "epoch": 2.3958910317740068, "grad_norm": 2.8585205318137064, "learning_rate": 1.1852000561258791e-06, "loss": 0.1162, "step": 47580 }, { "epoch": 2.396394581801702, "grad_norm": 3.224579751296697, "learning_rate": 1.1833069148987158e-06, "loss": 0.1461, "step": 47590 }, { "epoch": 2.3968981318293974, "grad_norm": 3.617406276469027, "learning_rate": 1.1814150839076233e-06, "loss": 0.1484, "step": 47600 }, { "epoch": 2.3974016818570925, "grad_norm": 2.0263653895834493, "learning_rate": 1.1795245638020463e-06, "loss": 0.1391, "step": 47610 }, { "epoch": 2.3979052318847875, "grad_norm": 1.4925857635095965, "learning_rate": 1.177635355230986e-06, "loss": 0.1141, "step": 47620 }, { "epoch": 2.398408781912483, "grad_norm": 3.413990780725865, "learning_rate": 1.1757474588429897e-06, "loss": 0.1197, "step": 47630 }, { "epoch": 2.398912331940178, "grad_norm": 3.095531293965198, "learning_rate": 1.1738608752861563e-06, "loss": 0.1714, "step": 47640 }, { "epoch": 2.3994158819678737, "grad_norm": 4.208723934368594, "learning_rate": 1.1719756052081322e-06, "loss": 0.1149, "step": 47650 }, { "epoch": 2.399919431995569, "grad_norm": 4.028290327743583, "learning_rate": 1.1700916492561115e-06, "loss": 0.1561, "step": 47660 }, { "epoch": 2.400422982023264, "grad_norm": 3.8095283935737, "learning_rate": 1.1682090080768405e-06, "loss": 0.1225, "step": 47670 }, { "epoch": 2.4009265320509594, "grad_norm": 3.3986124570974936, "learning_rate": 1.1663276823166148e-06, "loss": 0.1225, "step": 47680 }, { "epoch": 2.4014300820786545, "grad_norm": 4.390550438553147, "learning_rate": 1.1644476726212728e-06, "loss": 0.1302, "step": 47690 }, { "epoch": 2.4019336321063496, "grad_norm": 2.537787926518645, "learning_rate": 1.1625689796362066e-06, "loss": 0.1352, "step": 47700 }, { "epoch": 2.402437182134045, "grad_norm": 4.028429411593774, "learning_rate": 1.160691604006356e-06, "loss": 0.1414, "step": 47710 }, { "epoch": 2.40294073216174, "grad_norm": 3.7618407191307797, "learning_rate": 1.1588155463762058e-06, "loss": 0.1094, "step": 47720 }, { "epoch": 2.4034442821894357, "grad_norm": 3.7573413393485144, "learning_rate": 1.156940807389787e-06, "loss": 0.1525, "step": 47730 }, { "epoch": 2.403947832217131, "grad_norm": 4.228980269720516, "learning_rate": 1.155067387690683e-06, "loss": 0.1556, "step": 47740 }, { "epoch": 2.404451382244826, "grad_norm": 4.565298975360557, "learning_rate": 1.153195287922022e-06, "loss": 0.1465, "step": 47750 }, { "epoch": 2.4049549322725214, "grad_norm": 4.880146964045605, "learning_rate": 1.151324508726479e-06, "loss": 0.1402, "step": 47760 }, { "epoch": 2.4054584823002165, "grad_norm": 1.593104442427356, "learning_rate": 1.1494550507462737e-06, "loss": 0.1288, "step": 47770 }, { "epoch": 2.4059620323279116, "grad_norm": 3.5560044523304866, "learning_rate": 1.147586914623175e-06, "loss": 0.131, "step": 47780 }, { "epoch": 2.406465582355607, "grad_norm": 3.154902429011257, "learning_rate": 1.1457201009984987e-06, "loss": 0.1155, "step": 47790 }, { "epoch": 2.4069691323833022, "grad_norm": 2.890144454796894, "learning_rate": 1.1438546105131032e-06, "loss": 0.0982, "step": 47800 }, { "epoch": 2.4074726824109973, "grad_norm": 2.654762966636521, "learning_rate": 1.141990443807393e-06, "loss": 0.1026, "step": 47810 }, { "epoch": 2.407976232438693, "grad_norm": 2.851765733432491, "learning_rate": 1.140127601521322e-06, "loss": 0.1521, "step": 47820 }, { "epoch": 2.408479782466388, "grad_norm": 4.567232457194952, "learning_rate": 1.1382660842943871e-06, "loss": 0.1736, "step": 47830 }, { "epoch": 2.4089833324940835, "grad_norm": 4.065557891008132, "learning_rate": 1.1364058927656284e-06, "loss": 0.117, "step": 47840 }, { "epoch": 2.4094868825217786, "grad_norm": 4.611185218984225, "learning_rate": 1.1345470275736354e-06, "loss": 0.1377, "step": 47850 }, { "epoch": 2.4099904325494736, "grad_norm": 3.2794967636423094, "learning_rate": 1.132689489356536e-06, "loss": 0.1283, "step": 47860 }, { "epoch": 2.410493982577169, "grad_norm": 2.42890905980382, "learning_rate": 1.1308332787520105e-06, "loss": 0.1377, "step": 47870 }, { "epoch": 2.4109975326048643, "grad_norm": 3.992849086072969, "learning_rate": 1.128978396397275e-06, "loss": 0.1331, "step": 47880 }, { "epoch": 2.4115010826325594, "grad_norm": 4.057824727204683, "learning_rate": 1.1271248429290955e-06, "loss": 0.1217, "step": 47890 }, { "epoch": 2.412004632660255, "grad_norm": 2.3951829816644596, "learning_rate": 1.1252726189837803e-06, "loss": 0.1244, "step": 47900 }, { "epoch": 2.41250818268795, "grad_norm": 3.441504349416387, "learning_rate": 1.123421725197183e-06, "loss": 0.1425, "step": 47910 }, { "epoch": 2.4130117327156455, "grad_norm": 4.774169466954615, "learning_rate": 1.121572162204696e-06, "loss": 0.1339, "step": 47920 }, { "epoch": 2.4135152827433406, "grad_norm": 3.775695083391438, "learning_rate": 1.1197239306412567e-06, "loss": 0.1263, "step": 47930 }, { "epoch": 2.4140188327710357, "grad_norm": 3.173213974652411, "learning_rate": 1.117877031141348e-06, "loss": 0.1411, "step": 47940 }, { "epoch": 2.414522382798731, "grad_norm": 4.72861642803294, "learning_rate": 1.1160314643389942e-06, "loss": 0.1136, "step": 47950 }, { "epoch": 2.4150259328264263, "grad_norm": 4.360938841281821, "learning_rate": 1.1141872308677597e-06, "loss": 0.1226, "step": 47960 }, { "epoch": 2.4155294828541214, "grad_norm": 2.6906809151558084, "learning_rate": 1.112344331360754e-06, "loss": 0.1339, "step": 47970 }, { "epoch": 2.416033032881817, "grad_norm": 3.0137724112283397, "learning_rate": 1.1105027664506284e-06, "loss": 0.1418, "step": 47980 }, { "epoch": 2.416536582909512, "grad_norm": 3.5897516995231964, "learning_rate": 1.108662536769577e-06, "loss": 0.1367, "step": 47990 }, { "epoch": 2.417040132937207, "grad_norm": 4.726303601056609, "learning_rate": 1.1068236429493295e-06, "loss": 0.1294, "step": 48000 }, { "epoch": 2.4175436829649026, "grad_norm": 3.506418623369738, "learning_rate": 1.1049860856211636e-06, "loss": 0.118, "step": 48010 }, { "epoch": 2.4180472329925977, "grad_norm": 3.072595984056646, "learning_rate": 1.1031498654158962e-06, "loss": 0.1111, "step": 48020 }, { "epoch": 2.4185507830202932, "grad_norm": 3.7004937083792133, "learning_rate": 1.1013149829638863e-06, "loss": 0.1409, "step": 48030 }, { "epoch": 2.4190543330479883, "grad_norm": 2.483049753582695, "learning_rate": 1.0994814388950303e-06, "loss": 0.1424, "step": 48040 }, { "epoch": 2.4195578830756834, "grad_norm": 2.768544532968917, "learning_rate": 1.0976492338387679e-06, "loss": 0.1239, "step": 48050 }, { "epoch": 2.420061433103379, "grad_norm": 3.307773048634072, "learning_rate": 1.0958183684240798e-06, "loss": 0.1414, "step": 48060 }, { "epoch": 2.420564983131074, "grad_norm": 3.9226841170326066, "learning_rate": 1.0939888432794843e-06, "loss": 0.1398, "step": 48070 }, { "epoch": 2.421068533158769, "grad_norm": 4.203703035644466, "learning_rate": 1.0921606590330392e-06, "loss": 0.1317, "step": 48080 }, { "epoch": 2.4215720831864647, "grad_norm": 2.7732948882397395, "learning_rate": 1.0903338163123451e-06, "loss": 0.1568, "step": 48090 }, { "epoch": 2.4220756332141598, "grad_norm": 2.3612274074647828, "learning_rate": 1.088508315744542e-06, "loss": 0.1209, "step": 48100 }, { "epoch": 2.4225791832418553, "grad_norm": 1.8852975190445285, "learning_rate": 1.0866841579563048e-06, "loss": 0.1347, "step": 48110 }, { "epoch": 2.4230827332695504, "grad_norm": 4.309176697291869, "learning_rate": 1.084861343573852e-06, "loss": 0.1204, "step": 48120 }, { "epoch": 2.4235862832972455, "grad_norm": 3.8622908706457957, "learning_rate": 1.0830398732229397e-06, "loss": 0.1351, "step": 48130 }, { "epoch": 2.424089833324941, "grad_norm": 3.479071837800286, "learning_rate": 1.0812197475288606e-06, "loss": 0.1262, "step": 48140 }, { "epoch": 2.424593383352636, "grad_norm": 3.9638959321302076, "learning_rate": 1.0794009671164484e-06, "loss": 0.113, "step": 48150 }, { "epoch": 2.425096933380331, "grad_norm": 2.621781221493099, "learning_rate": 1.0775835326100732e-06, "loss": 0.1443, "step": 48160 }, { "epoch": 2.4256004834080267, "grad_norm": 5.528179829659585, "learning_rate": 1.0757674446336435e-06, "loss": 0.1392, "step": 48170 }, { "epoch": 2.426104033435722, "grad_norm": 2.7255312722006164, "learning_rate": 1.0739527038106078e-06, "loss": 0.1096, "step": 48180 }, { "epoch": 2.426607583463417, "grad_norm": 2.6133459339296023, "learning_rate": 1.0721393107639476e-06, "loss": 0.1393, "step": 48190 }, { "epoch": 2.4271111334911124, "grad_norm": 4.123437852856629, "learning_rate": 1.0703272661161868e-06, "loss": 0.1317, "step": 48200 }, { "epoch": 2.4276146835188075, "grad_norm": 2.3362378878300785, "learning_rate": 1.0685165704893807e-06, "loss": 0.1245, "step": 48210 }, { "epoch": 2.428118233546503, "grad_norm": 5.614075477457392, "learning_rate": 1.066707224505128e-06, "loss": 0.1439, "step": 48220 }, { "epoch": 2.428621783574198, "grad_norm": 3.7703679300173376, "learning_rate": 1.0648992287845578e-06, "loss": 0.1265, "step": 48230 }, { "epoch": 2.429125333601893, "grad_norm": 3.7055922324741277, "learning_rate": 1.0630925839483397e-06, "loss": 0.1434, "step": 48240 }, { "epoch": 2.4296288836295887, "grad_norm": 4.045501838706315, "learning_rate": 1.0612872906166793e-06, "loss": 0.1465, "step": 48250 }, { "epoch": 2.430132433657284, "grad_norm": 3.312199342315469, "learning_rate": 1.0594833494093181e-06, "loss": 0.146, "step": 48260 }, { "epoch": 2.4306359836849794, "grad_norm": 3.1806690951618832, "learning_rate": 1.0576807609455314e-06, "loss": 0.132, "step": 48270 }, { "epoch": 2.4311395337126744, "grad_norm": 3.6599324563564606, "learning_rate": 1.0558795258441296e-06, "loss": 0.14, "step": 48280 }, { "epoch": 2.4316430837403695, "grad_norm": 2.584824917200079, "learning_rate": 1.054079644723463e-06, "loss": 0.1329, "step": 48290 }, { "epoch": 2.432146633768065, "grad_norm": 2.97755826760296, "learning_rate": 1.0522811182014147e-06, "loss": 0.12, "step": 48300 }, { "epoch": 2.43265018379576, "grad_norm": 2.938488347466197, "learning_rate": 1.0504839468954004e-06, "loss": 0.1368, "step": 48310 }, { "epoch": 2.4331537338234552, "grad_norm": 2.189570525619003, "learning_rate": 1.0486881314223735e-06, "loss": 0.1317, "step": 48320 }, { "epoch": 2.4336572838511508, "grad_norm": 3.8563557527718815, "learning_rate": 1.0468936723988231e-06, "loss": 0.1112, "step": 48330 }, { "epoch": 2.434160833878846, "grad_norm": 3.0438644868129643, "learning_rate": 1.0451005704407692e-06, "loss": 0.1434, "step": 48340 }, { "epoch": 2.434664383906541, "grad_norm": 2.801699094726681, "learning_rate": 1.0433088261637652e-06, "loss": 0.1428, "step": 48350 }, { "epoch": 2.4351679339342365, "grad_norm": 3.1923699484273236, "learning_rate": 1.0415184401829037e-06, "loss": 0.151, "step": 48360 }, { "epoch": 2.4356714839619316, "grad_norm": 3.3742274742424057, "learning_rate": 1.039729413112806e-06, "loss": 0.0993, "step": 48370 }, { "epoch": 2.4361750339896266, "grad_norm": 2.3735155663176575, "learning_rate": 1.0379417455676317e-06, "loss": 0.1397, "step": 48380 }, { "epoch": 2.436678584017322, "grad_norm": 3.1832655115738224, "learning_rate": 1.0361554381610672e-06, "loss": 0.1158, "step": 48390 }, { "epoch": 2.4371821340450173, "grad_norm": 4.531090880978055, "learning_rate": 1.0343704915063379e-06, "loss": 0.1265, "step": 48400 }, { "epoch": 2.437685684072713, "grad_norm": 3.894647554583781, "learning_rate": 1.0325869062161997e-06, "loss": 0.1146, "step": 48410 }, { "epoch": 2.438189234100408, "grad_norm": 3.5731297599731926, "learning_rate": 1.0308046829029418e-06, "loss": 0.1388, "step": 48420 }, { "epoch": 2.438692784128103, "grad_norm": 2.401847664102958, "learning_rate": 1.0290238221783822e-06, "loss": 0.1327, "step": 48430 }, { "epoch": 2.4391963341557985, "grad_norm": 2.8961825053749526, "learning_rate": 1.027244324653876e-06, "loss": 0.1452, "step": 48440 }, { "epoch": 2.4396998841834936, "grad_norm": 3.931184911371189, "learning_rate": 1.0254661909403103e-06, "loss": 0.1647, "step": 48450 }, { "epoch": 2.440203434211189, "grad_norm": 3.917830371175145, "learning_rate": 1.023689421648099e-06, "loss": 0.1591, "step": 48460 }, { "epoch": 2.440706984238884, "grad_norm": 1.7814695262204123, "learning_rate": 1.0219140173871945e-06, "loss": 0.0838, "step": 48470 }, { "epoch": 2.4412105342665793, "grad_norm": 4.176341663281862, "learning_rate": 1.0201399787670734e-06, "loss": 0.1367, "step": 48480 }, { "epoch": 2.441714084294275, "grad_norm": 3.1749738616335397, "learning_rate": 1.0183673063967503e-06, "loss": 0.1556, "step": 48490 }, { "epoch": 2.44221763432197, "grad_norm": 2.087351017204985, "learning_rate": 1.0165960008847643e-06, "loss": 0.1228, "step": 48500 }, { "epoch": 2.442721184349665, "grad_norm": 4.266723589731928, "learning_rate": 1.0148260628391904e-06, "loss": 0.14, "step": 48510 }, { "epoch": 2.4432247343773605, "grad_norm": 4.285130787889263, "learning_rate": 1.0130574928676311e-06, "loss": 0.1254, "step": 48520 }, { "epoch": 2.4437282844050556, "grad_norm": 1.8926017148320884, "learning_rate": 1.0112902915772233e-06, "loss": 0.1288, "step": 48530 }, { "epoch": 2.4442318344327507, "grad_norm": 3.2485835282378583, "learning_rate": 1.009524459574629e-06, "loss": 0.1454, "step": 48540 }, { "epoch": 2.4447353844604462, "grad_norm": 3.2229984118419206, "learning_rate": 1.0077599974660407e-06, "loss": 0.1379, "step": 48550 }, { "epoch": 2.4452389344881413, "grad_norm": 1.8684772639110967, "learning_rate": 1.0059969058571845e-06, "loss": 0.1155, "step": 48560 }, { "epoch": 2.4457424845158364, "grad_norm": 3.548242599048, "learning_rate": 1.0042351853533139e-06, "loss": 0.1384, "step": 48570 }, { "epoch": 2.446246034543532, "grad_norm": 2.748207293054369, "learning_rate": 1.0024748365592096e-06, "loss": 0.1451, "step": 48580 }, { "epoch": 2.446749584571227, "grad_norm": 5.290853711174246, "learning_rate": 1.000715860079185e-06, "loss": 0.1425, "step": 48590 }, { "epoch": 2.4472531345989226, "grad_norm": 2.6928343109654205, "learning_rate": 9.989582565170809e-07, "loss": 0.1325, "step": 48600 }, { "epoch": 2.4477566846266177, "grad_norm": 2.570811404465497, "learning_rate": 9.972020264762666e-07, "loss": 0.1488, "step": 48610 }, { "epoch": 2.4482602346543128, "grad_norm": 3.374708275941492, "learning_rate": 9.954471705596375e-07, "loss": 0.1135, "step": 48620 }, { "epoch": 2.4487637846820083, "grad_norm": 2.5041886796510187, "learning_rate": 9.936936893696219e-07, "loss": 0.1255, "step": 48630 }, { "epoch": 2.4492673347097034, "grad_norm": 5.727794070619962, "learning_rate": 9.919415835081731e-07, "loss": 0.1417, "step": 48640 }, { "epoch": 2.449770884737399, "grad_norm": 4.386487931788923, "learning_rate": 9.901908535767757e-07, "loss": 0.1409, "step": 48650 }, { "epoch": 2.450274434765094, "grad_norm": 3.9531427814366515, "learning_rate": 9.884415001764352e-07, "loss": 0.1211, "step": 48660 }, { "epoch": 2.450777984792789, "grad_norm": 1.53650286002277, "learning_rate": 9.866935239076914e-07, "loss": 0.1511, "step": 48670 }, { "epoch": 2.4512815348204846, "grad_norm": 4.853176239649393, "learning_rate": 9.849469253706095e-07, "loss": 0.1764, "step": 48680 }, { "epoch": 2.4517850848481797, "grad_norm": 3.966026758573782, "learning_rate": 9.832017051647791e-07, "loss": 0.1213, "step": 48690 }, { "epoch": 2.452288634875875, "grad_norm": 4.765526532606667, "learning_rate": 9.814578638893179e-07, "loss": 0.146, "step": 48700 }, { "epoch": 2.4527921849035703, "grad_norm": 3.779341644392341, "learning_rate": 9.797154021428717e-07, "loss": 0.1089, "step": 48710 }, { "epoch": 2.4532957349312654, "grad_norm": 4.742021550776487, "learning_rate": 9.779743205236131e-07, "loss": 0.1415, "step": 48720 }, { "epoch": 2.4537992849589605, "grad_norm": 3.829687436918556, "learning_rate": 9.762346196292372e-07, "loss": 0.1454, "step": 48730 }, { "epoch": 2.454302834986656, "grad_norm": 4.817277207500396, "learning_rate": 9.744963000569685e-07, "loss": 0.1382, "step": 48740 }, { "epoch": 2.454806385014351, "grad_norm": 3.0582162641331827, "learning_rate": 9.727593624035574e-07, "loss": 0.1165, "step": 48750 }, { "epoch": 2.455309935042046, "grad_norm": 3.1295791802735202, "learning_rate": 9.710238072652771e-07, "loss": 0.1226, "step": 48760 }, { "epoch": 2.4558134850697417, "grad_norm": 1.7090703010888457, "learning_rate": 9.6928963523793e-07, "loss": 0.1613, "step": 48770 }, { "epoch": 2.456317035097437, "grad_norm": 2.269860198202668, "learning_rate": 9.675568469168388e-07, "loss": 0.1402, "step": 48780 }, { "epoch": 2.4568205851251324, "grad_norm": 1.7061517544494826, "learning_rate": 9.658254428968562e-07, "loss": 0.0986, "step": 48790 }, { "epoch": 2.4573241351528274, "grad_norm": 3.196789629682461, "learning_rate": 9.64095423772357e-07, "loss": 0.1257, "step": 48800 }, { "epoch": 2.4578276851805225, "grad_norm": 5.6566022918086825, "learning_rate": 9.623667901372402e-07, "loss": 0.1457, "step": 48810 }, { "epoch": 2.458331235208218, "grad_norm": 3.4509289230831555, "learning_rate": 9.606395425849318e-07, "loss": 0.1415, "step": 48820 }, { "epoch": 2.458834785235913, "grad_norm": 3.056227323560499, "learning_rate": 9.589136817083777e-07, "loss": 0.133, "step": 48830 }, { "epoch": 2.4593383352636087, "grad_norm": 4.887961194920394, "learning_rate": 9.571892081000534e-07, "loss": 0.1285, "step": 48840 }, { "epoch": 2.4598418852913038, "grad_norm": 3.6680364223433943, "learning_rate": 9.554661223519512e-07, "loss": 0.1491, "step": 48850 }, { "epoch": 2.460345435318999, "grad_norm": 3.1799529914174163, "learning_rate": 9.537444250555933e-07, "loss": 0.1445, "step": 48860 }, { "epoch": 2.4608489853466944, "grad_norm": 6.25275882411771, "learning_rate": 9.520241168020222e-07, "loss": 0.1103, "step": 48870 }, { "epoch": 2.4613525353743895, "grad_norm": 3.895428365490208, "learning_rate": 9.503051981818057e-07, "loss": 0.1279, "step": 48880 }, { "epoch": 2.4618560854020846, "grad_norm": 3.0113217718960374, "learning_rate": 9.48587669785031e-07, "loss": 0.1397, "step": 48890 }, { "epoch": 2.46235963542978, "grad_norm": 2.977648308911908, "learning_rate": 9.468715322013094e-07, "loss": 0.1556, "step": 48900 }, { "epoch": 2.462863185457475, "grad_norm": 3.196842237937296, "learning_rate": 9.451567860197763e-07, "loss": 0.1523, "step": 48910 }, { "epoch": 2.4633667354851703, "grad_norm": 5.993463355907834, "learning_rate": 9.434434318290903e-07, "loss": 0.1539, "step": 48920 }, { "epoch": 2.463870285512866, "grad_norm": 3.221008947318196, "learning_rate": 9.417314702174268e-07, "loss": 0.1369, "step": 48930 }, { "epoch": 2.464373835540561, "grad_norm": 3.247821908504534, "learning_rate": 9.400209017724888e-07, "loss": 0.1285, "step": 48940 }, { "epoch": 2.464877385568256, "grad_norm": 4.328721182292053, "learning_rate": 9.383117270814996e-07, "loss": 0.1213, "step": 48950 }, { "epoch": 2.4653809355959515, "grad_norm": 1.8443343758507884, "learning_rate": 9.366039467312021e-07, "loss": 0.126, "step": 48960 }, { "epoch": 2.4658844856236466, "grad_norm": 3.4800458268391776, "learning_rate": 9.348975613078609e-07, "loss": 0.1459, "step": 48970 }, { "epoch": 2.466388035651342, "grad_norm": 3.580167015640462, "learning_rate": 9.331925713972639e-07, "loss": 0.1261, "step": 48980 }, { "epoch": 2.466891585679037, "grad_norm": 3.159753356110611, "learning_rate": 9.314889775847197e-07, "loss": 0.1368, "step": 48990 }, { "epoch": 2.4673951357067323, "grad_norm": 1.1525599659441046, "learning_rate": 9.297867804550536e-07, "loss": 0.0991, "step": 49000 }, { "epoch": 2.467898685734428, "grad_norm": 3.970147515149097, "learning_rate": 9.280859805926168e-07, "loss": 0.1094, "step": 49010 }, { "epoch": 2.468402235762123, "grad_norm": 3.4158539833453547, "learning_rate": 9.263865785812786e-07, "loss": 0.1555, "step": 49020 }, { "epoch": 2.4689057857898185, "grad_norm": 3.342009054634714, "learning_rate": 9.246885750044266e-07, "loss": 0.1378, "step": 49030 }, { "epoch": 2.4694093358175135, "grad_norm": 2.951828848043422, "learning_rate": 9.229919704449724e-07, "loss": 0.1149, "step": 49040 }, { "epoch": 2.4699128858452086, "grad_norm": 3.7459077976174395, "learning_rate": 9.212967654853416e-07, "loss": 0.1303, "step": 49050 }, { "epoch": 2.470416435872904, "grad_norm": 2.0071143432627903, "learning_rate": 9.196029607074847e-07, "loss": 0.1509, "step": 49060 }, { "epoch": 2.4709199859005992, "grad_norm": 4.055930226323956, "learning_rate": 9.179105566928708e-07, "loss": 0.1416, "step": 49070 }, { "epoch": 2.4714235359282943, "grad_norm": 3.243041040068999, "learning_rate": 9.162195540224839e-07, "loss": 0.1372, "step": 49080 }, { "epoch": 2.47192708595599, "grad_norm": 2.8880111083200988, "learning_rate": 9.14529953276832e-07, "loss": 0.1263, "step": 49090 }, { "epoch": 2.472430635983685, "grad_norm": 3.456328320475716, "learning_rate": 9.12841755035938e-07, "loss": 0.1408, "step": 49100 }, { "epoch": 2.47293418601138, "grad_norm": 3.6871312260066627, "learning_rate": 9.111549598793478e-07, "loss": 0.1364, "step": 49110 }, { "epoch": 2.4734377360390756, "grad_norm": 4.056413577441344, "learning_rate": 9.094695683861193e-07, "loss": 0.1145, "step": 49120 }, { "epoch": 2.4739412860667707, "grad_norm": 3.8314356476378255, "learning_rate": 9.077855811348341e-07, "loss": 0.1577, "step": 49130 }, { "epoch": 2.4744448360944657, "grad_norm": 3.671162907124836, "learning_rate": 9.06102998703589e-07, "loss": 0.117, "step": 49140 }, { "epoch": 2.4749483861221613, "grad_norm": 3.1069332029638463, "learning_rate": 9.044218216700013e-07, "loss": 0.1465, "step": 49150 }, { "epoch": 2.4754519361498564, "grad_norm": 2.7659825365588957, "learning_rate": 9.027420506112023e-07, "loss": 0.1555, "step": 49160 }, { "epoch": 2.475955486177552, "grad_norm": 3.690464022192152, "learning_rate": 9.010636861038402e-07, "loss": 0.1452, "step": 49170 }, { "epoch": 2.476459036205247, "grad_norm": 3.678178674986595, "learning_rate": 8.993867287240848e-07, "loss": 0.1503, "step": 49180 }, { "epoch": 2.476962586232942, "grad_norm": 3.764440559338952, "learning_rate": 8.977111790476211e-07, "loss": 0.1254, "step": 49190 }, { "epoch": 2.4774661362606376, "grad_norm": 1.9271396561219882, "learning_rate": 8.960370376496469e-07, "loss": 0.1409, "step": 49200 }, { "epoch": 2.4779696862883327, "grad_norm": 5.039480212212664, "learning_rate": 8.943643051048817e-07, "loss": 0.1494, "step": 49210 }, { "epoch": 2.4784732363160282, "grad_norm": 3.48508363425285, "learning_rate": 8.926929819875607e-07, "loss": 0.1339, "step": 49220 }, { "epoch": 2.4789767863437233, "grad_norm": 2.6132008322621623, "learning_rate": 8.910230688714322e-07, "loss": 0.1257, "step": 49230 }, { "epoch": 2.4794803363714184, "grad_norm": 4.121034704272695, "learning_rate": 8.893545663297615e-07, "loss": 0.1258, "step": 49240 }, { "epoch": 2.479983886399114, "grad_norm": 3.428221754558722, "learning_rate": 8.876874749353309e-07, "loss": 0.1412, "step": 49250 }, { "epoch": 2.480487436426809, "grad_norm": 2.818351350710746, "learning_rate": 8.860217952604383e-07, "loss": 0.1426, "step": 49260 }, { "epoch": 2.480990986454504, "grad_norm": 4.558908750753191, "learning_rate": 8.843575278768979e-07, "loss": 0.1444, "step": 49270 }, { "epoch": 2.4814945364821996, "grad_norm": 2.858114670010808, "learning_rate": 8.826946733560343e-07, "loss": 0.1252, "step": 49280 }, { "epoch": 2.4819980865098947, "grad_norm": 3.539788036899181, "learning_rate": 8.81033232268692e-07, "loss": 0.1284, "step": 49290 }, { "epoch": 2.48250163653759, "grad_norm": 4.120609302347175, "learning_rate": 8.793732051852299e-07, "loss": 0.1512, "step": 49300 }, { "epoch": 2.4830051865652853, "grad_norm": 3.6203456368440134, "learning_rate": 8.777145926755193e-07, "loss": 0.1158, "step": 49310 }, { "epoch": 2.4835087365929804, "grad_norm": 3.132876068852272, "learning_rate": 8.760573953089452e-07, "loss": 0.1309, "step": 49320 }, { "epoch": 2.4840122866206755, "grad_norm": 2.3512505063733466, "learning_rate": 8.744016136544093e-07, "loss": 0.1519, "step": 49330 }, { "epoch": 2.484515836648371, "grad_norm": 1.6809512198638128, "learning_rate": 8.727472482803284e-07, "loss": 0.1202, "step": 49340 }, { "epoch": 2.485019386676066, "grad_norm": 2.602559457006807, "learning_rate": 8.710942997546279e-07, "loss": 0.1382, "step": 49350 }, { "epoch": 2.4855229367037617, "grad_norm": 3.692454534176833, "learning_rate": 8.694427686447515e-07, "loss": 0.1603, "step": 49360 }, { "epoch": 2.4860264867314568, "grad_norm": 2.709385643979661, "learning_rate": 8.677926555176564e-07, "loss": 0.1126, "step": 49370 }, { "epoch": 2.486530036759152, "grad_norm": 4.18436411011211, "learning_rate": 8.661439609398098e-07, "loss": 0.1428, "step": 49380 }, { "epoch": 2.4870335867868474, "grad_norm": 4.271230457913669, "learning_rate": 8.644966854771924e-07, "loss": 0.134, "step": 49390 }, { "epoch": 2.4875371368145425, "grad_norm": 3.20576489061064, "learning_rate": 8.628508296953008e-07, "loss": 0.1393, "step": 49400 }, { "epoch": 2.488040686842238, "grad_norm": 3.0324328937029854, "learning_rate": 8.612063941591414e-07, "loss": 0.1648, "step": 49410 }, { "epoch": 2.488544236869933, "grad_norm": 2.532813148504832, "learning_rate": 8.595633794332359e-07, "loss": 0.1349, "step": 49420 }, { "epoch": 2.489047786897628, "grad_norm": 2.5180629484088364, "learning_rate": 8.57921786081613e-07, "loss": 0.1164, "step": 49430 }, { "epoch": 2.4895513369253237, "grad_norm": 2.558448919894354, "learning_rate": 8.562816146678204e-07, "loss": 0.1302, "step": 49440 }, { "epoch": 2.490054886953019, "grad_norm": 3.0954782741042597, "learning_rate": 8.546428657549105e-07, "loss": 0.1317, "step": 49450 }, { "epoch": 2.490558436980714, "grad_norm": 3.2845714872663168, "learning_rate": 8.530055399054543e-07, "loss": 0.1412, "step": 49460 }, { "epoch": 2.4910619870084094, "grad_norm": 3.6310581096733494, "learning_rate": 8.513696376815278e-07, "loss": 0.1148, "step": 49470 }, { "epoch": 2.4915655370361045, "grad_norm": 1.262323774531941, "learning_rate": 8.497351596447229e-07, "loss": 0.1076, "step": 49480 }, { "epoch": 2.4920690870637996, "grad_norm": 3.7396562085989986, "learning_rate": 8.481021063561406e-07, "loss": 0.0962, "step": 49490 }, { "epoch": 2.492572637091495, "grad_norm": 3.6461587629364267, "learning_rate": 8.464704783763955e-07, "loss": 0.1466, "step": 49500 }, { "epoch": 2.49307618711919, "grad_norm": 3.8515240326207287, "learning_rate": 8.448402762656088e-07, "loss": 0.1472, "step": 49510 }, { "epoch": 2.4935797371468853, "grad_norm": 3.8708415714681514, "learning_rate": 8.432115005834124e-07, "loss": 0.1354, "step": 49520 }, { "epoch": 2.494083287174581, "grad_norm": 3.9559026678760794, "learning_rate": 8.415841518889523e-07, "loss": 0.124, "step": 49530 }, { "epoch": 2.494586837202276, "grad_norm": 4.132454260846118, "learning_rate": 8.399582307408827e-07, "loss": 0.1181, "step": 49540 }, { "epoch": 2.4950903872299715, "grad_norm": 4.438633332949682, "learning_rate": 8.383337376973666e-07, "loss": 0.1246, "step": 49550 }, { "epoch": 2.4955939372576665, "grad_norm": 2.835986290734649, "learning_rate": 8.367106733160774e-07, "loss": 0.1228, "step": 49560 }, { "epoch": 2.4960974872853616, "grad_norm": 3.0352903595210257, "learning_rate": 8.350890381542004e-07, "loss": 0.1172, "step": 49570 }, { "epoch": 2.496601037313057, "grad_norm": 4.044336584945397, "learning_rate": 8.334688327684271e-07, "loss": 0.1228, "step": 49580 }, { "epoch": 2.4971045873407522, "grad_norm": 3.3152463260537255, "learning_rate": 8.31850057714958e-07, "loss": 0.1362, "step": 49590 }, { "epoch": 2.497608137368448, "grad_norm": 2.5336251061864083, "learning_rate": 8.302327135495048e-07, "loss": 0.1153, "step": 49600 }, { "epoch": 2.498111687396143, "grad_norm": 3.3520408007232456, "learning_rate": 8.286168008272877e-07, "loss": 0.128, "step": 49610 }, { "epoch": 2.498615237423838, "grad_norm": 3.126925867669613, "learning_rate": 8.270023201030342e-07, "loss": 0.1204, "step": 49620 }, { "epoch": 2.4991187874515335, "grad_norm": 3.1133596574446183, "learning_rate": 8.253892719309803e-07, "loss": 0.1249, "step": 49630 }, { "epoch": 2.4996223374792286, "grad_norm": 2.813653495887764, "learning_rate": 8.23777656864873e-07, "loss": 0.141, "step": 49640 }, { "epoch": 2.5001258875069237, "grad_norm": 4.065427393950052, "learning_rate": 8.221674754579622e-07, "loss": 0.1096, "step": 49650 }, { "epoch": 2.500629437534619, "grad_norm": 2.4324382735595464, "learning_rate": 8.205587282630112e-07, "loss": 0.1219, "step": 49660 }, { "epoch": 2.5011329875623143, "grad_norm": 4.435111210676798, "learning_rate": 8.189514158322865e-07, "loss": 0.1368, "step": 49670 }, { "epoch": 2.5016365375900094, "grad_norm": 4.137334342187147, "learning_rate": 8.173455387175639e-07, "loss": 0.1536, "step": 49680 }, { "epoch": 2.502140087617705, "grad_norm": 2.7757817475348965, "learning_rate": 8.157410974701279e-07, "loss": 0.1341, "step": 49690 }, { "epoch": 2.5026436376454, "grad_norm": 2.8153965336146443, "learning_rate": 8.141380926407666e-07, "loss": 0.0963, "step": 49700 }, { "epoch": 2.503147187673095, "grad_norm": 3.3846740495543965, "learning_rate": 8.125365247797795e-07, "loss": 0.1153, "step": 49710 }, { "epoch": 2.5036507377007906, "grad_norm": 3.0981810358748496, "learning_rate": 8.109363944369675e-07, "loss": 0.1362, "step": 49720 }, { "epoch": 2.5041542877284857, "grad_norm": 4.750681513763579, "learning_rate": 8.093377021616433e-07, "loss": 0.1326, "step": 49730 }, { "epoch": 2.5046578377561812, "grad_norm": 2.6218063416966553, "learning_rate": 8.077404485026213e-07, "loss": 0.1307, "step": 49740 }, { "epoch": 2.5051613877838763, "grad_norm": 2.945392936900291, "learning_rate": 8.061446340082251e-07, "loss": 0.1073, "step": 49750 }, { "epoch": 2.505664937811572, "grad_norm": 1.8018017912429576, "learning_rate": 8.045502592262839e-07, "loss": 0.1436, "step": 49760 }, { "epoch": 2.506168487839267, "grad_norm": 5.456531889999404, "learning_rate": 8.029573247041322e-07, "loss": 0.1519, "step": 49770 }, { "epoch": 2.506672037866962, "grad_norm": 2.8731803045740745, "learning_rate": 8.013658309886096e-07, "loss": 0.1217, "step": 49780 }, { "epoch": 2.5071755878946576, "grad_norm": 2.73088800902563, "learning_rate": 7.997757786260596e-07, "loss": 0.1527, "step": 49790 }, { "epoch": 2.5076791379223526, "grad_norm": 2.9071678519354927, "learning_rate": 7.981871681623349e-07, "loss": 0.0959, "step": 49800 }, { "epoch": 2.5081826879500477, "grad_norm": 2.177309285247602, "learning_rate": 7.966000001427909e-07, "loss": 0.1449, "step": 49810 }, { "epoch": 2.5086862379777433, "grad_norm": 2.5866039511620222, "learning_rate": 7.950142751122869e-07, "loss": 0.1289, "step": 49820 }, { "epoch": 2.5091897880054383, "grad_norm": 3.6288746400123046, "learning_rate": 7.934299936151884e-07, "loss": 0.1155, "step": 49830 }, { "epoch": 2.5096933380331334, "grad_norm": 2.9030902006212265, "learning_rate": 7.918471561953661e-07, "loss": 0.1418, "step": 49840 }, { "epoch": 2.510196888060829, "grad_norm": 3.2773832470132884, "learning_rate": 7.902657633961924e-07, "loss": 0.1249, "step": 49850 }, { "epoch": 2.510700438088524, "grad_norm": 2.917080257947354, "learning_rate": 7.886858157605443e-07, "loss": 0.1286, "step": 49860 }, { "epoch": 2.511203988116219, "grad_norm": 3.550077442192912, "learning_rate": 7.871073138308044e-07, "loss": 0.1463, "step": 49870 }, { "epoch": 2.5117075381439147, "grad_norm": 1.8704890725050927, "learning_rate": 7.855302581488583e-07, "loss": 0.1339, "step": 49880 }, { "epoch": 2.5122110881716098, "grad_norm": 3.204350221048971, "learning_rate": 7.839546492560957e-07, "loss": 0.134, "step": 49890 }, { "epoch": 2.512714638199305, "grad_norm": 4.626802390385158, "learning_rate": 7.823804876934065e-07, "loss": 0.1268, "step": 49900 }, { "epoch": 2.5132181882270004, "grad_norm": 3.2555343092159403, "learning_rate": 7.808077740011877e-07, "loss": 0.1423, "step": 49910 }, { "epoch": 2.5137217382546955, "grad_norm": 3.6145107186801604, "learning_rate": 7.792365087193383e-07, "loss": 0.1344, "step": 49920 }, { "epoch": 2.514225288282391, "grad_norm": 4.094141904283299, "learning_rate": 7.776666923872583e-07, "loss": 0.1277, "step": 49930 }, { "epoch": 2.514728838310086, "grad_norm": 3.1232465857347944, "learning_rate": 7.760983255438503e-07, "loss": 0.1744, "step": 49940 }, { "epoch": 2.5152323883377816, "grad_norm": 3.5868470248326663, "learning_rate": 7.745314087275219e-07, "loss": 0.1207, "step": 49950 }, { "epoch": 2.5157359383654767, "grad_norm": 4.448627631451728, "learning_rate": 7.729659424761821e-07, "loss": 0.1287, "step": 49960 }, { "epoch": 2.516239488393172, "grad_norm": 2.8522611287492166, "learning_rate": 7.714019273272388e-07, "loss": 0.1378, "step": 49970 }, { "epoch": 2.5167430384208673, "grad_norm": 3.657751174955089, "learning_rate": 7.69839363817606e-07, "loss": 0.1284, "step": 49980 }, { "epoch": 2.5172465884485624, "grad_norm": 4.564196246680037, "learning_rate": 7.682782524836985e-07, "loss": 0.1474, "step": 49990 }, { "epoch": 2.5177501384762575, "grad_norm": 3.219039555457858, "learning_rate": 7.667185938614302e-07, "loss": 0.1739, "step": 50000 }, { "epoch": 2.518253688503953, "grad_norm": 4.3494534461974625, "learning_rate": 7.651603884862163e-07, "loss": 0.1464, "step": 50010 }, { "epoch": 2.518757238531648, "grad_norm": 3.139177541879767, "learning_rate": 7.636036368929767e-07, "loss": 0.116, "step": 50020 }, { "epoch": 2.519260788559343, "grad_norm": 3.193723824821315, "learning_rate": 7.620483396161293e-07, "loss": 0.1281, "step": 50030 }, { "epoch": 2.5197643385870387, "grad_norm": 3.009929496601653, "learning_rate": 7.60494497189595e-07, "loss": 0.1296, "step": 50040 }, { "epoch": 2.520267888614734, "grad_norm": 3.472451449138381, "learning_rate": 7.589421101467908e-07, "loss": 0.1432, "step": 50050 }, { "epoch": 2.520771438642429, "grad_norm": 3.043966853805542, "learning_rate": 7.573911790206406e-07, "loss": 0.1431, "step": 50060 }, { "epoch": 2.5212749886701245, "grad_norm": 4.105837718737554, "learning_rate": 7.558417043435618e-07, "loss": 0.1422, "step": 50070 }, { "epoch": 2.5217785386978195, "grad_norm": 4.59787480817549, "learning_rate": 7.542936866474776e-07, "loss": 0.1423, "step": 50080 }, { "epoch": 2.5222820887255146, "grad_norm": 2.7828998418357007, "learning_rate": 7.527471264638064e-07, "loss": 0.1477, "step": 50090 }, { "epoch": 2.52278563875321, "grad_norm": 4.241690223852967, "learning_rate": 7.512020243234686e-07, "loss": 0.1391, "step": 50100 }, { "epoch": 2.5232891887809052, "grad_norm": 0.7648458887520068, "learning_rate": 7.496583807568852e-07, "loss": 0.0887, "step": 50110 }, { "epoch": 2.5237927388086008, "grad_norm": 3.6155398085947374, "learning_rate": 7.481161962939731e-07, "loss": 0.1321, "step": 50120 }, { "epoch": 2.524296288836296, "grad_norm": 3.435651085544168, "learning_rate": 7.465754714641521e-07, "loss": 0.1216, "step": 50130 }, { "epoch": 2.5247998388639914, "grad_norm": 2.9925682961593036, "learning_rate": 7.450362067963368e-07, "loss": 0.1185, "step": 50140 }, { "epoch": 2.5253033888916865, "grad_norm": 3.1651412317090912, "learning_rate": 7.434984028189446e-07, "loss": 0.1319, "step": 50150 }, { "epoch": 2.5258069389193816, "grad_norm": 2.9184181351299365, "learning_rate": 7.419620600598892e-07, "loss": 0.1012, "step": 50160 }, { "epoch": 2.526310488947077, "grad_norm": 1.2531083516419244, "learning_rate": 7.404271790465827e-07, "loss": 0.1193, "step": 50170 }, { "epoch": 2.526814038974772, "grad_norm": 3.5248302256936364, "learning_rate": 7.38893760305936e-07, "loss": 0.1424, "step": 50180 }, { "epoch": 2.5273175890024673, "grad_norm": 3.49130733521284, "learning_rate": 7.373618043643588e-07, "loss": 0.1395, "step": 50190 }, { "epoch": 2.527821139030163, "grad_norm": 3.8623820154134645, "learning_rate": 7.358313117477566e-07, "loss": 0.142, "step": 50200 }, { "epoch": 2.528324689057858, "grad_norm": 4.071342364756708, "learning_rate": 7.343022829815327e-07, "loss": 0.1384, "step": 50210 }, { "epoch": 2.528828239085553, "grad_norm": 4.279554969030672, "learning_rate": 7.327747185905903e-07, "loss": 0.1501, "step": 50220 }, { "epoch": 2.5293317891132485, "grad_norm": 3.2081350729637794, "learning_rate": 7.31248619099329e-07, "loss": 0.1435, "step": 50230 }, { "epoch": 2.5298353391409436, "grad_norm": 1.806595512571992, "learning_rate": 7.297239850316423e-07, "loss": 0.1366, "step": 50240 }, { "epoch": 2.5303388891686387, "grad_norm": 4.373321862760685, "learning_rate": 7.28200816910925e-07, "loss": 0.1469, "step": 50250 }, { "epoch": 2.5308424391963342, "grad_norm": 5.1442024745477015, "learning_rate": 7.266791152600677e-07, "loss": 0.1479, "step": 50260 }, { "epoch": 2.5313459892240293, "grad_norm": 2.7710560809160705, "learning_rate": 7.251588806014559e-07, "loss": 0.1656, "step": 50270 }, { "epoch": 2.5318495392517244, "grad_norm": 1.3622527605330983, "learning_rate": 7.23640113456971e-07, "loss": 0.141, "step": 50280 }, { "epoch": 2.53235308927942, "grad_norm": 5.089263781612481, "learning_rate": 7.22122814347993e-07, "loss": 0.1573, "step": 50290 }, { "epoch": 2.532856639307115, "grad_norm": 3.674397516361046, "learning_rate": 7.206069837953971e-07, "loss": 0.1435, "step": 50300 }, { "epoch": 2.5333601893348106, "grad_norm": 1.8325539736238308, "learning_rate": 7.190926223195554e-07, "loss": 0.1206, "step": 50310 }, { "epoch": 2.5338637393625056, "grad_norm": 2.6554978920947665, "learning_rate": 7.175797304403314e-07, "loss": 0.1199, "step": 50320 }, { "epoch": 2.534367289390201, "grad_norm": 2.821759879466252, "learning_rate": 7.160683086770909e-07, "loss": 0.1248, "step": 50330 }, { "epoch": 2.5348708394178963, "grad_norm": 4.358003360065237, "learning_rate": 7.145583575486875e-07, "loss": 0.1372, "step": 50340 }, { "epoch": 2.5353743894455913, "grad_norm": 2.651757540056817, "learning_rate": 7.130498775734767e-07, "loss": 0.1408, "step": 50350 }, { "epoch": 2.535877939473287, "grad_norm": 2.7661424076062775, "learning_rate": 7.115428692693033e-07, "loss": 0.1358, "step": 50360 }, { "epoch": 2.536381489500982, "grad_norm": 4.040806217627721, "learning_rate": 7.100373331535116e-07, "loss": 0.1155, "step": 50370 }, { "epoch": 2.536885039528677, "grad_norm": 3.373716914312955, "learning_rate": 7.08533269742937e-07, "loss": 0.1504, "step": 50380 }, { "epoch": 2.5373885895563726, "grad_norm": 5.6292036173755635, "learning_rate": 7.070306795539134e-07, "loss": 0.1404, "step": 50390 }, { "epoch": 2.5378921395840677, "grad_norm": 2.6636347406738627, "learning_rate": 7.055295631022647e-07, "loss": 0.1409, "step": 50400 }, { "epoch": 2.5383956896117628, "grad_norm": 0.7312227819312211, "learning_rate": 7.040299209033091e-07, "loss": 0.113, "step": 50410 }, { "epoch": 2.5388992396394583, "grad_norm": 3.343227202459289, "learning_rate": 7.025317534718612e-07, "loss": 0.1251, "step": 50420 }, { "epoch": 2.5394027896671534, "grad_norm": 4.243199506089884, "learning_rate": 7.010350613222306e-07, "loss": 0.127, "step": 50430 }, { "epoch": 2.5399063396948485, "grad_norm": 2.8041905209725115, "learning_rate": 6.99539844968214e-07, "loss": 0.1292, "step": 50440 }, { "epoch": 2.540409889722544, "grad_norm": 4.282741031687532, "learning_rate": 6.98046104923108e-07, "loss": 0.1288, "step": 50450 }, { "epoch": 2.540913439750239, "grad_norm": 4.173766364906859, "learning_rate": 6.965538416997014e-07, "loss": 0.1287, "step": 50460 }, { "epoch": 2.541416989777934, "grad_norm": 3.2508543840648367, "learning_rate": 6.950630558102722e-07, "loss": 0.1134, "step": 50470 }, { "epoch": 2.5419205398056297, "grad_norm": 4.665775135783813, "learning_rate": 6.935737477665933e-07, "loss": 0.1187, "step": 50480 }, { "epoch": 2.542424089833325, "grad_norm": 3.6478345960972915, "learning_rate": 6.920859180799322e-07, "loss": 0.1258, "step": 50490 }, { "epoch": 2.5429276398610203, "grad_norm": 3.172515743757613, "learning_rate": 6.905995672610471e-07, "loss": 0.1283, "step": 50500 }, { "epoch": 2.5434311898887154, "grad_norm": 2.2162349417982945, "learning_rate": 6.891146958201878e-07, "loss": 0.1243, "step": 50510 }, { "epoch": 2.543934739916411, "grad_norm": 2.7670809511584173, "learning_rate": 6.876313042670979e-07, "loss": 0.1176, "step": 50520 }, { "epoch": 2.544438289944106, "grad_norm": 1.7525440861138182, "learning_rate": 6.861493931110119e-07, "loss": 0.1513, "step": 50530 }, { "epoch": 2.544941839971801, "grad_norm": 2.224184737849494, "learning_rate": 6.846689628606578e-07, "loss": 0.1155, "step": 50540 }, { "epoch": 2.5454453899994967, "grad_norm": 2.9037614638371427, "learning_rate": 6.831900140242532e-07, "loss": 0.1531, "step": 50550 }, { "epoch": 2.5459489400271917, "grad_norm": 5.690526364167842, "learning_rate": 6.81712547109506e-07, "loss": 0.134, "step": 50560 }, { "epoch": 2.546452490054887, "grad_norm": 3.530115721319504, "learning_rate": 6.802365626236185e-07, "loss": 0.1396, "step": 50570 }, { "epoch": 2.5469560400825824, "grad_norm": 3.004112824046679, "learning_rate": 6.787620610732837e-07, "loss": 0.1137, "step": 50580 }, { "epoch": 2.5474595901102774, "grad_norm": 3.0593740410461416, "learning_rate": 6.772890429646828e-07, "loss": 0.1387, "step": 50590 }, { "epoch": 2.5479631401379725, "grad_norm": 3.800861246488124, "learning_rate": 6.758175088034902e-07, "loss": 0.1461, "step": 50600 }, { "epoch": 2.548466690165668, "grad_norm": 4.709294902472402, "learning_rate": 6.743474590948718e-07, "loss": 0.1453, "step": 50610 }, { "epoch": 2.548970240193363, "grad_norm": 4.455968788097993, "learning_rate": 6.728788943434805e-07, "loss": 0.1223, "step": 50620 }, { "epoch": 2.5494737902210582, "grad_norm": 3.754245874228506, "learning_rate": 6.714118150534609e-07, "loss": 0.1284, "step": 50630 }, { "epoch": 2.5499773402487538, "grad_norm": 3.567247961965183, "learning_rate": 6.699462217284492e-07, "loss": 0.1438, "step": 50640 }, { "epoch": 2.550480890276449, "grad_norm": 2.116096347233671, "learning_rate": 6.684821148715692e-07, "loss": 0.1057, "step": 50650 }, { "epoch": 2.550984440304144, "grad_norm": 2.6780709775198206, "learning_rate": 6.670194949854375e-07, "loss": 0.1432, "step": 50660 }, { "epoch": 2.5514879903318395, "grad_norm": 4.315842184489202, "learning_rate": 6.655583625721557e-07, "loss": 0.1382, "step": 50670 }, { "epoch": 2.5519915403595346, "grad_norm": 3.2415167800111004, "learning_rate": 6.640987181333197e-07, "loss": 0.1371, "step": 50680 }, { "epoch": 2.55249509038723, "grad_norm": 3.8359149948297486, "learning_rate": 6.6264056217001e-07, "loss": 0.1207, "step": 50690 }, { "epoch": 2.552998640414925, "grad_norm": 3.730622998249473, "learning_rate": 6.611838951827998e-07, "loss": 0.1514, "step": 50700 }, { "epoch": 2.5535021904426207, "grad_norm": 3.4163355657868304, "learning_rate": 6.597287176717482e-07, "loss": 0.1164, "step": 50710 }, { "epoch": 2.554005740470316, "grad_norm": 3.356819730466131, "learning_rate": 6.582750301364055e-07, "loss": 0.1346, "step": 50720 }, { "epoch": 2.554509290498011, "grad_norm": 2.902280957091221, "learning_rate": 6.568228330758097e-07, "loss": 0.1589, "step": 50730 }, { "epoch": 2.5550128405257064, "grad_norm": 3.9396687422578993, "learning_rate": 6.553721269884861e-07, "loss": 0.1259, "step": 50740 }, { "epoch": 2.5555163905534015, "grad_norm": 2.343967234507552, "learning_rate": 6.5392291237245e-07, "loss": 0.1162, "step": 50750 }, { "epoch": 2.5560199405810966, "grad_norm": 3.4192891018266898, "learning_rate": 6.524751897252013e-07, "loss": 0.1294, "step": 50760 }, { "epoch": 2.556523490608792, "grad_norm": 3.968053656707846, "learning_rate": 6.51028959543732e-07, "loss": 0.1391, "step": 50770 }, { "epoch": 2.5570270406364872, "grad_norm": 3.9544973457167925, "learning_rate": 6.495842223245203e-07, "loss": 0.1376, "step": 50780 }, { "epoch": 2.5575305906641823, "grad_norm": 2.770417277332806, "learning_rate": 6.481409785635295e-07, "loss": 0.1064, "step": 50790 }, { "epoch": 2.558034140691878, "grad_norm": 2.9790372651056702, "learning_rate": 6.46699228756213e-07, "loss": 0.1317, "step": 50800 }, { "epoch": 2.558537690719573, "grad_norm": 2.974218054946437, "learning_rate": 6.452589733975112e-07, "loss": 0.1671, "step": 50810 }, { "epoch": 2.559041240747268, "grad_norm": 3.0062289757905765, "learning_rate": 6.438202129818499e-07, "loss": 0.1389, "step": 50820 }, { "epoch": 2.5595447907749636, "grad_norm": 4.808306025603254, "learning_rate": 6.423829480031413e-07, "loss": 0.1185, "step": 50830 }, { "epoch": 2.5600483408026586, "grad_norm": 5.144338957327568, "learning_rate": 6.409471789547867e-07, "loss": 0.1097, "step": 50840 }, { "epoch": 2.5605518908303537, "grad_norm": 2.7652289826568857, "learning_rate": 6.395129063296735e-07, "loss": 0.1275, "step": 50850 }, { "epoch": 2.5610554408580493, "grad_norm": 5.73925312883311, "learning_rate": 6.380801306201722e-07, "loss": 0.1358, "step": 50860 }, { "epoch": 2.5615589908857443, "grad_norm": 2.459425029621209, "learning_rate": 6.366488523181435e-07, "loss": 0.1505, "step": 50870 }, { "epoch": 2.56206254091344, "grad_norm": 3.711324699286197, "learning_rate": 6.352190719149321e-07, "loss": 0.1532, "step": 50880 }, { "epoch": 2.562566090941135, "grad_norm": 2.9582898044175563, "learning_rate": 6.337907899013684e-07, "loss": 0.1351, "step": 50890 }, { "epoch": 2.5630696409688305, "grad_norm": 2.65980380029382, "learning_rate": 6.323640067677672e-07, "loss": 0.1232, "step": 50900 }, { "epoch": 2.5635731909965256, "grad_norm": 3.058698265358002, "learning_rate": 6.309387230039315e-07, "loss": 0.1628, "step": 50910 }, { "epoch": 2.5640767410242207, "grad_norm": 3.534721692636101, "learning_rate": 6.295149390991484e-07, "loss": 0.15, "step": 50920 }, { "epoch": 2.564580291051916, "grad_norm": 4.1255187205220745, "learning_rate": 6.280926555421901e-07, "loss": 0.1147, "step": 50930 }, { "epoch": 2.5650838410796113, "grad_norm": 3.215067910885128, "learning_rate": 6.266718728213128e-07, "loss": 0.1321, "step": 50940 }, { "epoch": 2.5655873911073064, "grad_norm": 4.256112389125472, "learning_rate": 6.252525914242597e-07, "loss": 0.1246, "step": 50950 }, { "epoch": 2.566090941135002, "grad_norm": 2.14188270313599, "learning_rate": 6.238348118382554e-07, "loss": 0.1242, "step": 50960 }, { "epoch": 2.566594491162697, "grad_norm": 3.3123742309417104, "learning_rate": 6.224185345500122e-07, "loss": 0.1082, "step": 50970 }, { "epoch": 2.567098041190392, "grad_norm": 3.6224026042284865, "learning_rate": 6.210037600457242e-07, "loss": 0.1551, "step": 50980 }, { "epoch": 2.5676015912180876, "grad_norm": 3.2846515948540342, "learning_rate": 6.19590488811071e-07, "loss": 0.1241, "step": 50990 }, { "epoch": 2.5681051412457827, "grad_norm": 3.345676902739108, "learning_rate": 6.181787213312157e-07, "loss": 0.1345, "step": 51000 }, { "epoch": 2.568608691273478, "grad_norm": 3.275492867496446, "learning_rate": 6.167684580908062e-07, "loss": 0.1372, "step": 51010 }, { "epoch": 2.5691122413011733, "grad_norm": 2.8268240937609646, "learning_rate": 6.15359699573973e-07, "loss": 0.0938, "step": 51020 }, { "epoch": 2.5696157913288684, "grad_norm": 4.172777556801914, "learning_rate": 6.139524462643276e-07, "loss": 0.1543, "step": 51030 }, { "epoch": 2.5701193413565635, "grad_norm": 2.494350572064774, "learning_rate": 6.125466986449696e-07, "loss": 0.1336, "step": 51040 }, { "epoch": 2.570622891384259, "grad_norm": 3.347924153052813, "learning_rate": 6.111424571984797e-07, "loss": 0.1203, "step": 51050 }, { "epoch": 2.571126441411954, "grad_norm": 5.022550145719344, "learning_rate": 6.097397224069196e-07, "loss": 0.1313, "step": 51060 }, { "epoch": 2.5716299914396497, "grad_norm": 2.556328240798898, "learning_rate": 6.083384947518361e-07, "loss": 0.1169, "step": 51070 }, { "epoch": 2.5721335414673447, "grad_norm": 3.656220076299645, "learning_rate": 6.069387747142591e-07, "loss": 0.1334, "step": 51080 }, { "epoch": 2.5726370914950403, "grad_norm": 2.4232922078772403, "learning_rate": 6.055405627746985e-07, "loss": 0.124, "step": 51090 }, { "epoch": 2.5731406415227354, "grad_norm": 4.634665955375076, "learning_rate": 6.041438594131471e-07, "loss": 0.1323, "step": 51100 }, { "epoch": 2.5736441915504304, "grad_norm": 3.590952792308509, "learning_rate": 6.027486651090813e-07, "loss": 0.129, "step": 51110 }, { "epoch": 2.574147741578126, "grad_norm": 2.156929579626345, "learning_rate": 6.013549803414598e-07, "loss": 0.1243, "step": 51120 }, { "epoch": 2.574651291605821, "grad_norm": 3.4390765812428095, "learning_rate": 5.999628055887202e-07, "loss": 0.1211, "step": 51130 }, { "epoch": 2.575154841633516, "grad_norm": 3.7219718607013945, "learning_rate": 5.985721413287837e-07, "loss": 0.0922, "step": 51140 }, { "epoch": 2.5756583916612117, "grad_norm": 4.332713897643452, "learning_rate": 5.971829880390529e-07, "loss": 0.1138, "step": 51150 }, { "epoch": 2.5761619416889068, "grad_norm": 2.895131458935439, "learning_rate": 5.957953461964128e-07, "loss": 0.1108, "step": 51160 }, { "epoch": 2.576665491716602, "grad_norm": 1.706848650846916, "learning_rate": 5.94409216277227e-07, "loss": 0.1228, "step": 51170 }, { "epoch": 2.5771690417442974, "grad_norm": 6.532610825394472, "learning_rate": 5.930245987573408e-07, "loss": 0.1335, "step": 51180 }, { "epoch": 2.5776725917719925, "grad_norm": 4.168725061759106, "learning_rate": 5.916414941120807e-07, "loss": 0.1253, "step": 51190 }, { "epoch": 2.5781761417996876, "grad_norm": 4.107824706157052, "learning_rate": 5.902599028162564e-07, "loss": 0.1194, "step": 51200 }, { "epoch": 2.578679691827383, "grad_norm": 3.9439626848695575, "learning_rate": 5.88879825344153e-07, "loss": 0.106, "step": 51210 }, { "epoch": 2.579183241855078, "grad_norm": 3.0897792300307727, "learning_rate": 5.875012621695392e-07, "loss": 0.1305, "step": 51220 }, { "epoch": 2.5796867918827733, "grad_norm": 4.66670344805605, "learning_rate": 5.861242137656648e-07, "loss": 0.1488, "step": 51230 }, { "epoch": 2.580190341910469, "grad_norm": 2.868827641019338, "learning_rate": 5.84748680605256e-07, "loss": 0.1224, "step": 51240 }, { "epoch": 2.580693891938164, "grad_norm": 4.094795775192755, "learning_rate": 5.833746631605214e-07, "loss": 0.1395, "step": 51250 }, { "epoch": 2.5811974419658594, "grad_norm": 3.642141218962433, "learning_rate": 5.820021619031486e-07, "loss": 0.1533, "step": 51260 }, { "epoch": 2.5817009919935545, "grad_norm": 2.9801924299882954, "learning_rate": 5.80631177304305e-07, "loss": 0.1413, "step": 51270 }, { "epoch": 2.58220454202125, "grad_norm": 2.4335561825236174, "learning_rate": 5.792617098346381e-07, "loss": 0.1329, "step": 51280 }, { "epoch": 2.582708092048945, "grad_norm": 3.297526486968431, "learning_rate": 5.778937599642725e-07, "loss": 0.134, "step": 51290 }, { "epoch": 2.5832116420766402, "grad_norm": 3.653876650202346, "learning_rate": 5.765273281628136e-07, "loss": 0.1158, "step": 51300 }, { "epoch": 2.5837151921043358, "grad_norm": 2.368404682766187, "learning_rate": 5.751624148993439e-07, "loss": 0.126, "step": 51310 }, { "epoch": 2.584218742132031, "grad_norm": 3.912325762402839, "learning_rate": 5.737990206424276e-07, "loss": 0.133, "step": 51320 }, { "epoch": 2.584722292159726, "grad_norm": 3.968241084494943, "learning_rate": 5.724371458601036e-07, "loss": 0.1219, "step": 51330 }, { "epoch": 2.5852258421874215, "grad_norm": 2.919197770721398, "learning_rate": 5.710767910198916e-07, "loss": 0.1418, "step": 51340 }, { "epoch": 2.5857293922151166, "grad_norm": 5.005330884479191, "learning_rate": 5.697179565887912e-07, "loss": 0.142, "step": 51350 }, { "epoch": 2.5862329422428116, "grad_norm": 2.2725183262272197, "learning_rate": 5.683606430332744e-07, "loss": 0.1236, "step": 51360 }, { "epoch": 2.586736492270507, "grad_norm": 3.4116159631499134, "learning_rate": 5.670048508192976e-07, "loss": 0.1464, "step": 51370 }, { "epoch": 2.5872400422982023, "grad_norm": 4.018037970562904, "learning_rate": 5.656505804122902e-07, "loss": 0.1412, "step": 51380 }, { "epoch": 2.5877435923258973, "grad_norm": 2.9185693133673998, "learning_rate": 5.642978322771625e-07, "loss": 0.1293, "step": 51390 }, { "epoch": 2.588247142353593, "grad_norm": 3.924436082885266, "learning_rate": 5.629466068782985e-07, "loss": 0.1258, "step": 51400 }, { "epoch": 2.588750692381288, "grad_norm": 2.717165138184654, "learning_rate": 5.615969046795627e-07, "loss": 0.1234, "step": 51410 }, { "epoch": 2.5892542424089835, "grad_norm": 3.24685710327786, "learning_rate": 5.602487261442951e-07, "loss": 0.1237, "step": 51420 }, { "epoch": 2.5897577924366786, "grad_norm": 3.9510186152482234, "learning_rate": 5.589020717353155e-07, "loss": 0.1291, "step": 51430 }, { "epoch": 2.5902613424643737, "grad_norm": 3.9651191155896055, "learning_rate": 5.575569419149162e-07, "loss": 0.1234, "step": 51440 }, { "epoch": 2.590764892492069, "grad_norm": 2.6225297355701644, "learning_rate": 5.562133371448669e-07, "loss": 0.1253, "step": 51450 }, { "epoch": 2.5912684425197643, "grad_norm": 3.8553761897377874, "learning_rate": 5.548712578864168e-07, "loss": 0.155, "step": 51460 }, { "epoch": 2.59177199254746, "grad_norm": 3.6681580384762467, "learning_rate": 5.535307046002891e-07, "loss": 0.1286, "step": 51470 }, { "epoch": 2.592275542575155, "grad_norm": 2.921371987038666, "learning_rate": 5.521916777466829e-07, "loss": 0.1184, "step": 51480 }, { "epoch": 2.59277909260285, "grad_norm": 3.1615892005295807, "learning_rate": 5.508541777852744e-07, "loss": 0.0986, "step": 51490 }, { "epoch": 2.5932826426305455, "grad_norm": 3.178852121076554, "learning_rate": 5.495182051752162e-07, "loss": 0.1402, "step": 51500 }, { "epoch": 2.5937861926582406, "grad_norm": 1.647490126028173, "learning_rate": 5.481837603751344e-07, "loss": 0.1548, "step": 51510 }, { "epoch": 2.5942897426859357, "grad_norm": 3.9811463198073658, "learning_rate": 5.468508438431314e-07, "loss": 0.1295, "step": 51520 }, { "epoch": 2.5947932927136312, "grad_norm": 2.4862840779741053, "learning_rate": 5.455194560367854e-07, "loss": 0.1208, "step": 51530 }, { "epoch": 2.5952968427413263, "grad_norm": 3.2174126169534225, "learning_rate": 5.441895974131506e-07, "loss": 0.1572, "step": 51540 }, { "epoch": 2.5958003927690214, "grad_norm": 4.081532835584358, "learning_rate": 5.428612684287559e-07, "loss": 0.1254, "step": 51550 }, { "epoch": 2.596303942796717, "grad_norm": 3.2942537851174385, "learning_rate": 5.41534469539603e-07, "loss": 0.1222, "step": 51560 }, { "epoch": 2.596807492824412, "grad_norm": 3.290071156308279, "learning_rate": 5.402092012011718e-07, "loss": 0.1457, "step": 51570 }, { "epoch": 2.597311042852107, "grad_norm": 3.714481490719863, "learning_rate": 5.388854638684121e-07, "loss": 0.153, "step": 51580 }, { "epoch": 2.5978145928798027, "grad_norm": 4.87274806402534, "learning_rate": 5.375632579957535e-07, "loss": 0.1234, "step": 51590 }, { "epoch": 2.5983181429074977, "grad_norm": 2.69619762129169, "learning_rate": 5.362425840370955e-07, "loss": 0.117, "step": 51600 }, { "epoch": 2.5988216929351933, "grad_norm": 4.134164186003352, "learning_rate": 5.349234424458144e-07, "loss": 0.1171, "step": 51610 }, { "epoch": 2.5993252429628884, "grad_norm": 2.6845567605721006, "learning_rate": 5.336058336747602e-07, "loss": 0.1257, "step": 51620 }, { "epoch": 2.5998287929905834, "grad_norm": 4.171488672357661, "learning_rate": 5.322897581762543e-07, "loss": 0.1331, "step": 51630 }, { "epoch": 2.600332343018279, "grad_norm": 3.858137977083198, "learning_rate": 5.309752164020954e-07, "loss": 0.1301, "step": 51640 }, { "epoch": 2.600835893045974, "grad_norm": 3.6804020585374593, "learning_rate": 5.296622088035514e-07, "loss": 0.1179, "step": 51650 }, { "epoch": 2.6013394430736696, "grad_norm": 3.0689709418940545, "learning_rate": 5.283507358313672e-07, "loss": 0.167, "step": 51660 }, { "epoch": 2.6018429931013647, "grad_norm": 4.947298180463698, "learning_rate": 5.270407979357611e-07, "loss": 0.1528, "step": 51670 }, { "epoch": 2.6023465431290598, "grad_norm": 4.39343302994593, "learning_rate": 5.257323955664201e-07, "loss": 0.1106, "step": 51680 }, { "epoch": 2.6028500931567553, "grad_norm": 2.695885487986396, "learning_rate": 5.244255291725075e-07, "loss": 0.0978, "step": 51690 }, { "epoch": 2.6033536431844504, "grad_norm": 2.855475595260842, "learning_rate": 5.231201992026608e-07, "loss": 0.1345, "step": 51700 }, { "epoch": 2.6038571932121455, "grad_norm": 1.9043827315266535, "learning_rate": 5.218164061049863e-07, "loss": 0.1348, "step": 51710 }, { "epoch": 2.604360743239841, "grad_norm": 3.6843811536375277, "learning_rate": 5.205141503270639e-07, "loss": 0.1229, "step": 51720 }, { "epoch": 2.604864293267536, "grad_norm": 3.3739280626001666, "learning_rate": 5.192134323159459e-07, "loss": 0.1243, "step": 51730 }, { "epoch": 2.605367843295231, "grad_norm": 3.4294638185158166, "learning_rate": 5.17914252518159e-07, "loss": 0.1232, "step": 51740 }, { "epoch": 2.6058713933229267, "grad_norm": 2.104281662871457, "learning_rate": 5.166166113796977e-07, "loss": 0.1186, "step": 51750 }, { "epoch": 2.606374943350622, "grad_norm": 2.582463887717679, "learning_rate": 5.153205093460312e-07, "loss": 0.1603, "step": 51760 }, { "epoch": 2.606878493378317, "grad_norm": 3.3832744537535957, "learning_rate": 5.140259468621e-07, "loss": 0.1203, "step": 51770 }, { "epoch": 2.6073820434060124, "grad_norm": 3.8961449788847284, "learning_rate": 5.127329243723167e-07, "loss": 0.1447, "step": 51780 }, { "epoch": 2.6078855934337075, "grad_norm": 2.68280165695295, "learning_rate": 5.114414423205611e-07, "loss": 0.0981, "step": 51790 }, { "epoch": 2.608389143461403, "grad_norm": 3.9521615308571714, "learning_rate": 5.101515011501889e-07, "loss": 0.1384, "step": 51800 }, { "epoch": 2.608892693489098, "grad_norm": 1.463732168276141, "learning_rate": 5.088631013040252e-07, "loss": 0.117, "step": 51810 }, { "epoch": 2.6093962435167937, "grad_norm": 2.237616546918669, "learning_rate": 5.07576243224367e-07, "loss": 0.1398, "step": 51820 }, { "epoch": 2.6098997935444888, "grad_norm": 2.5688175664564445, "learning_rate": 5.062909273529792e-07, "loss": 0.1151, "step": 51830 }, { "epoch": 2.610403343572184, "grad_norm": 3.538456726075555, "learning_rate": 5.050071541310997e-07, "loss": 0.1251, "step": 51840 }, { "epoch": 2.6109068935998794, "grad_norm": 3.745318561491886, "learning_rate": 5.037249239994369e-07, "loss": 0.1386, "step": 51850 }, { "epoch": 2.6114104436275745, "grad_norm": 2.507743523999104, "learning_rate": 5.024442373981686e-07, "loss": 0.1218, "step": 51860 }, { "epoch": 2.6119139936552696, "grad_norm": 2.7444858735603703, "learning_rate": 5.011650947669416e-07, "loss": 0.1443, "step": 51870 }, { "epoch": 2.612417543682965, "grad_norm": 3.4651029241829168, "learning_rate": 4.998874965448746e-07, "loss": 0.1359, "step": 51880 }, { "epoch": 2.61292109371066, "grad_norm": 4.571610148876709, "learning_rate": 4.986114431705558e-07, "loss": 0.1197, "step": 51890 }, { "epoch": 2.6134246437383553, "grad_norm": 3.6087103678446546, "learning_rate": 4.973369350820434e-07, "loss": 0.1493, "step": 51900 }, { "epoch": 2.613928193766051, "grad_norm": 4.250021503058258, "learning_rate": 4.960639727168626e-07, "loss": 0.0959, "step": 51910 }, { "epoch": 2.614431743793746, "grad_norm": 4.012144944889152, "learning_rate": 4.94792556512012e-07, "loss": 0.1268, "step": 51920 }, { "epoch": 2.614935293821441, "grad_norm": 3.376989720651483, "learning_rate": 4.935226869039555e-07, "loss": 0.124, "step": 51930 }, { "epoch": 2.6154388438491365, "grad_norm": 4.181111918586176, "learning_rate": 4.922543643286288e-07, "loss": 0.1152, "step": 51940 }, { "epoch": 2.6159423938768316, "grad_norm": 3.119076652718177, "learning_rate": 4.909875892214344e-07, "loss": 0.1166, "step": 51950 }, { "epoch": 2.6164459439045267, "grad_norm": 2.2173243887950687, "learning_rate": 4.897223620172453e-07, "loss": 0.1239, "step": 51960 }, { "epoch": 2.616949493932222, "grad_norm": 4.390396759229608, "learning_rate": 4.88458683150404e-07, "loss": 0.1622, "step": 51970 }, { "epoch": 2.6174530439599173, "grad_norm": 2.1702076893610136, "learning_rate": 4.871965530547173e-07, "loss": 0.117, "step": 51980 }, { "epoch": 2.617956593987613, "grad_norm": 4.744344500742061, "learning_rate": 4.859359721634654e-07, "loss": 0.1544, "step": 51990 }, { "epoch": 2.618460144015308, "grad_norm": 4.312831763684049, "learning_rate": 4.846769409093921e-07, "loss": 0.1155, "step": 52000 }, { "epoch": 2.6189636940430034, "grad_norm": 2.7238389256178825, "learning_rate": 4.834194597247133e-07, "loss": 0.1098, "step": 52010 }, { "epoch": 2.6194672440706985, "grad_norm": 3.772622210709917, "learning_rate": 4.821635290411092e-07, "loss": 0.1211, "step": 52020 }, { "epoch": 2.6199707940983936, "grad_norm": 3.060901738307697, "learning_rate": 4.809091492897306e-07, "loss": 0.1383, "step": 52030 }, { "epoch": 2.620474344126089, "grad_norm": 4.823678707909192, "learning_rate": 4.796563209011939e-07, "loss": 0.1374, "step": 52040 }, { "epoch": 2.6209778941537842, "grad_norm": 2.9798331818135875, "learning_rate": 4.784050443055848e-07, "loss": 0.1153, "step": 52050 }, { "epoch": 2.6214814441814793, "grad_norm": 3.509886102321555, "learning_rate": 4.771553199324552e-07, "loss": 0.1378, "step": 52060 }, { "epoch": 2.621984994209175, "grad_norm": 3.8629188546657907, "learning_rate": 4.7590714821082185e-07, "loss": 0.1345, "step": 52070 }, { "epoch": 2.62248854423687, "grad_norm": 3.9870912923508417, "learning_rate": 4.7466052956917306e-07, "loss": 0.1361, "step": 52080 }, { "epoch": 2.622992094264565, "grad_norm": 3.9014764380404783, "learning_rate": 4.7341546443546104e-07, "loss": 0.1291, "step": 52090 }, { "epoch": 2.6234956442922606, "grad_norm": 2.904815484652446, "learning_rate": 4.7217195323710497e-07, "loss": 0.1466, "step": 52100 }, { "epoch": 2.6239991943199557, "grad_norm": 2.9738395489575646, "learning_rate": 4.7092999640099115e-07, "loss": 0.1271, "step": 52110 }, { "epoch": 2.6245027443476507, "grad_norm": 3.2883711274643526, "learning_rate": 4.6968959435347297e-07, "loss": 0.11, "step": 52120 }, { "epoch": 2.6250062943753463, "grad_norm": 3.6944426005696402, "learning_rate": 4.6845074752036866e-07, "loss": 0.096, "step": 52130 }, { "epoch": 2.6255098444030414, "grad_norm": 2.4112649652228635, "learning_rate": 4.6721345632696144e-07, "loss": 0.1216, "step": 52140 }, { "epoch": 2.6260133944307364, "grad_norm": 3.280230197249276, "learning_rate": 4.659777211980038e-07, "loss": 0.145, "step": 52150 }, { "epoch": 2.626516944458432, "grad_norm": 2.15295929776483, "learning_rate": 4.647435425577118e-07, "loss": 0.1065, "step": 52160 }, { "epoch": 2.627020494486127, "grad_norm": 3.997899939122418, "learning_rate": 4.6351092082976946e-07, "loss": 0.1356, "step": 52170 }, { "epoch": 2.6275240445138226, "grad_norm": 3.1764707000204027, "learning_rate": 4.622798564373221e-07, "loss": 0.1069, "step": 52180 }, { "epoch": 2.6280275945415177, "grad_norm": 2.8693079368265626, "learning_rate": 4.61050349802985e-07, "loss": 0.1239, "step": 52190 }, { "epoch": 2.628531144569213, "grad_norm": 3.263780533512294, "learning_rate": 4.5982240134883503e-07, "loss": 0.1097, "step": 52200 }, { "epoch": 2.6290346945969083, "grad_norm": 3.624266163754997, "learning_rate": 4.585960114964172e-07, "loss": 0.1283, "step": 52210 }, { "epoch": 2.6295382446246034, "grad_norm": 3.4873376402501344, "learning_rate": 4.5737118066673803e-07, "loss": 0.1228, "step": 52220 }, { "epoch": 2.630041794652299, "grad_norm": 4.466433096832931, "learning_rate": 4.561479092802723e-07, "loss": 0.1537, "step": 52230 }, { "epoch": 2.630545344679994, "grad_norm": 3.9467179447638974, "learning_rate": 4.54926197756958e-07, "loss": 0.1416, "step": 52240 }, { "epoch": 2.631048894707689, "grad_norm": 4.196954543923742, "learning_rate": 4.5370604651619687e-07, "loss": 0.1383, "step": 52250 }, { "epoch": 2.6315524447353846, "grad_norm": 3.6460032957456603, "learning_rate": 4.524874559768572e-07, "loss": 0.1706, "step": 52260 }, { "epoch": 2.6320559947630797, "grad_norm": 1.4807081723196167, "learning_rate": 4.512704265572676e-07, "loss": 0.1064, "step": 52270 }, { "epoch": 2.632559544790775, "grad_norm": 3.771317866584129, "learning_rate": 4.500549586752251e-07, "loss": 0.1278, "step": 52280 }, { "epoch": 2.6330630948184703, "grad_norm": 4.0926583356086095, "learning_rate": 4.4884105274798917e-07, "loss": 0.1414, "step": 52290 }, { "epoch": 2.6335666448461654, "grad_norm": 2.571142502545099, "learning_rate": 4.476287091922804e-07, "loss": 0.1044, "step": 52300 }, { "epoch": 2.6340701948738605, "grad_norm": 3.2809418811842237, "learning_rate": 4.46417928424287e-07, "loss": 0.1367, "step": 52310 }, { "epoch": 2.634573744901556, "grad_norm": 2.413155467832089, "learning_rate": 4.4520871085965975e-07, "loss": 0.0928, "step": 52320 }, { "epoch": 2.635077294929251, "grad_norm": 3.3959742337515544, "learning_rate": 4.4400105691351113e-07, "loss": 0.0963, "step": 52330 }, { "epoch": 2.6355808449569462, "grad_norm": 3.969977649842944, "learning_rate": 4.4279496700041666e-07, "loss": 0.124, "step": 52340 }, { "epoch": 2.6360843949846418, "grad_norm": 4.615290825834764, "learning_rate": 4.415904415344174e-07, "loss": 0.1611, "step": 52350 }, { "epoch": 2.636587945012337, "grad_norm": 3.583551613510951, "learning_rate": 4.4038748092901595e-07, "loss": 0.1349, "step": 52360 }, { "epoch": 2.6370914950400324, "grad_norm": 3.8457100182005175, "learning_rate": 4.391860855971769e-07, "loss": 0.1425, "step": 52370 }, { "epoch": 2.6375950450677275, "grad_norm": 3.265965265679711, "learning_rate": 4.3798625595132927e-07, "loss": 0.1232, "step": 52380 }, { "epoch": 2.638098595095423, "grad_norm": 3.8687064408342313, "learning_rate": 4.36787992403363e-07, "loss": 0.1421, "step": 52390 }, { "epoch": 2.638602145123118, "grad_norm": 3.212280294959656, "learning_rate": 4.3559129536463353e-07, "loss": 0.0956, "step": 52400 }, { "epoch": 2.639105695150813, "grad_norm": 4.434738242776092, "learning_rate": 4.343961652459516e-07, "loss": 0.1479, "step": 52410 }, { "epoch": 2.6396092451785087, "grad_norm": 4.461893922296695, "learning_rate": 4.332026024575975e-07, "loss": 0.126, "step": 52420 }, { "epoch": 2.640112795206204, "grad_norm": 2.9105431264612043, "learning_rate": 4.3201060740931e-07, "loss": 0.0923, "step": 52430 }, { "epoch": 2.640616345233899, "grad_norm": 2.5639859571561443, "learning_rate": 4.308201805102907e-07, "loss": 0.1199, "step": 52440 }, { "epoch": 2.6411198952615944, "grad_norm": 3.2849769017000723, "learning_rate": 4.2963132216920054e-07, "loss": 0.1416, "step": 52450 }, { "epoch": 2.6416234452892895, "grad_norm": 2.8636188530628752, "learning_rate": 4.284440327941658e-07, "loss": 0.145, "step": 52460 }, { "epoch": 2.6421269953169846, "grad_norm": 3.266989333451298, "learning_rate": 4.2725831279277153e-07, "loss": 0.1352, "step": 52470 }, { "epoch": 2.64263054534468, "grad_norm": 3.2583131933319027, "learning_rate": 4.2607416257206436e-07, "loss": 0.137, "step": 52480 }, { "epoch": 2.643134095372375, "grad_norm": 2.5509980602986264, "learning_rate": 4.2489158253855187e-07, "loss": 0.1142, "step": 52490 }, { "epoch": 2.6436376454000703, "grad_norm": 3.1313197124600696, "learning_rate": 4.237105730982038e-07, "loss": 0.1242, "step": 52500 }, { "epoch": 2.644141195427766, "grad_norm": 3.287932932680053, "learning_rate": 4.225311346564498e-07, "loss": 0.1254, "step": 52510 }, { "epoch": 2.644644745455461, "grad_norm": 3.369708634679517, "learning_rate": 4.213532676181803e-07, "loss": 0.1517, "step": 52520 }, { "epoch": 2.645148295483156, "grad_norm": 3.1554127981621964, "learning_rate": 4.201769723877458e-07, "loss": 0.1471, "step": 52530 }, { "epoch": 2.6456518455108515, "grad_norm": 3.0501553479281855, "learning_rate": 4.1900224936895886e-07, "loss": 0.1282, "step": 52540 }, { "epoch": 2.6461553955385466, "grad_norm": 1.9416935110226294, "learning_rate": 4.1782909896509016e-07, "loss": 0.1076, "step": 52550 }, { "epoch": 2.646658945566242, "grad_norm": 3.0903926366643777, "learning_rate": 4.166575215788732e-07, "loss": 0.1463, "step": 52560 }, { "epoch": 2.6471624955939372, "grad_norm": 4.113041895110331, "learning_rate": 4.1548751761249785e-07, "loss": 0.1387, "step": 52570 }, { "epoch": 2.6476660456216328, "grad_norm": 2.296689297517314, "learning_rate": 4.1431908746761676e-07, "loss": 0.108, "step": 52580 }, { "epoch": 2.648169595649328, "grad_norm": 2.1084075587595796, "learning_rate": 4.131522315453418e-07, "loss": 0.1124, "step": 52590 }, { "epoch": 2.648673145677023, "grad_norm": 3.8447333149370655, "learning_rate": 4.1198695024624314e-07, "loss": 0.1393, "step": 52600 }, { "epoch": 2.6491766957047185, "grad_norm": 2.8782243052727265, "learning_rate": 4.1082324397035187e-07, "loss": 0.1179, "step": 52610 }, { "epoch": 2.6496802457324136, "grad_norm": 3.24628335627322, "learning_rate": 4.096611131171574e-07, "loss": 0.1394, "step": 52620 }, { "epoch": 2.6501837957601087, "grad_norm": 3.6595901034784193, "learning_rate": 4.0850055808560994e-07, "loss": 0.1493, "step": 52630 }, { "epoch": 2.650687345787804, "grad_norm": 2.961029028387536, "learning_rate": 4.073415792741153e-07, "loss": 0.1176, "step": 52640 }, { "epoch": 2.6511908958154993, "grad_norm": 2.332960687229944, "learning_rate": 4.0618417708054136e-07, "loss": 0.1408, "step": 52650 }, { "epoch": 2.6516944458431944, "grad_norm": 3.2809516532129477, "learning_rate": 4.0502835190221415e-07, "loss": 0.117, "step": 52660 }, { "epoch": 2.65219799587089, "grad_norm": 2.1182718354415626, "learning_rate": 4.0387410413591844e-07, "loss": 0.1285, "step": 52670 }, { "epoch": 2.652701545898585, "grad_norm": 3.7517254048956956, "learning_rate": 4.027214341778968e-07, "loss": 0.112, "step": 52680 }, { "epoch": 2.65320509592628, "grad_norm": 4.671258452404152, "learning_rate": 4.0157034242384876e-07, "loss": 0.1541, "step": 52690 }, { "epoch": 2.6537086459539756, "grad_norm": 4.28949741202772, "learning_rate": 4.004208292689349e-07, "loss": 0.1289, "step": 52700 }, { "epoch": 2.6542121959816707, "grad_norm": 3.6513717150727825, "learning_rate": 3.992728951077729e-07, "loss": 0.157, "step": 52710 }, { "epoch": 2.6547157460093658, "grad_norm": 3.6216429716989325, "learning_rate": 3.9812654033443754e-07, "loss": 0.1348, "step": 52720 }, { "epoch": 2.6552192960370613, "grad_norm": 3.6501410298169144, "learning_rate": 3.969817653424618e-07, "loss": 0.1145, "step": 52730 }, { "epoch": 2.6557228460647564, "grad_norm": 2.2665288946016964, "learning_rate": 3.95838570524838e-07, "loss": 0.1143, "step": 52740 }, { "epoch": 2.656226396092452, "grad_norm": 4.070878156544372, "learning_rate": 3.946969562740138e-07, "loss": 0.1571, "step": 52750 }, { "epoch": 2.656729946120147, "grad_norm": 3.3199731017389666, "learning_rate": 3.935569229818931e-07, "loss": 0.1288, "step": 52760 }, { "epoch": 2.6572334961478425, "grad_norm": 3.203064874615494, "learning_rate": 3.9241847103984096e-07, "loss": 0.1235, "step": 52770 }, { "epoch": 2.6577370461755376, "grad_norm": 1.9140197048033614, "learning_rate": 3.9128160083867774e-07, "loss": 0.1233, "step": 52780 }, { "epoch": 2.6582405962032327, "grad_norm": 3.5762276108690636, "learning_rate": 3.9014631276868055e-07, "loss": 0.1368, "step": 52790 }, { "epoch": 2.6587441462309283, "grad_norm": 3.6055211864984815, "learning_rate": 3.890126072195821e-07, "loss": 0.1329, "step": 52800 }, { "epoch": 2.6592476962586233, "grad_norm": 4.245713096293712, "learning_rate": 3.8788048458057503e-07, "loss": 0.1489, "step": 52810 }, { "epoch": 2.6597512462863184, "grad_norm": 2.5909219230104203, "learning_rate": 3.8674994524030505e-07, "loss": 0.1224, "step": 52820 }, { "epoch": 2.660254796314014, "grad_norm": 3.3860173728387677, "learning_rate": 3.8562098958687786e-07, "loss": 0.1326, "step": 52830 }, { "epoch": 2.660758346341709, "grad_norm": 3.194663138453404, "learning_rate": 3.844936180078518e-07, "loss": 0.1289, "step": 52840 }, { "epoch": 2.661261896369404, "grad_norm": 5.256857376052878, "learning_rate": 3.833678308902439e-07, "loss": 0.1545, "step": 52850 }, { "epoch": 2.6617654463970997, "grad_norm": 1.9212156323973009, "learning_rate": 3.8224362862052834e-07, "loss": 0.1424, "step": 52860 }, { "epoch": 2.6622689964247948, "grad_norm": 3.7952851697619976, "learning_rate": 3.811210115846309e-07, "loss": 0.1452, "step": 52870 }, { "epoch": 2.66277254645249, "grad_norm": 3.0206690102855926, "learning_rate": 3.7999998016793825e-07, "loss": 0.1423, "step": 52880 }, { "epoch": 2.6632760964801854, "grad_norm": 2.772202435253115, "learning_rate": 3.788805347552876e-07, "loss": 0.1462, "step": 52890 }, { "epoch": 2.6637796465078805, "grad_norm": 2.7756500898013936, "learning_rate": 3.777626757309777e-07, "loss": 0.1026, "step": 52900 }, { "epoch": 2.6642831965355755, "grad_norm": 4.12850653370838, "learning_rate": 3.7664640347875594e-07, "loss": 0.1648, "step": 52910 }, { "epoch": 2.664786746563271, "grad_norm": 4.209135850084018, "learning_rate": 3.7553171838183077e-07, "loss": 0.1332, "step": 52920 }, { "epoch": 2.665290296590966, "grad_norm": 3.2780436906990915, "learning_rate": 3.744186208228623e-07, "loss": 0.1288, "step": 52930 }, { "epoch": 2.6657938466186617, "grad_norm": 1.6725374020437382, "learning_rate": 3.733071111839681e-07, "loss": 0.1413, "step": 52940 }, { "epoch": 2.666297396646357, "grad_norm": 2.98951572581262, "learning_rate": 3.7219718984671915e-07, "loss": 0.1389, "step": 52950 }, { "epoch": 2.6668009466740523, "grad_norm": 2.2730639771373236, "learning_rate": 3.710888571921395e-07, "loss": 0.1383, "step": 52960 }, { "epoch": 2.6673044967017474, "grad_norm": 2.0665078618796686, "learning_rate": 3.699821136007109e-07, "loss": 0.1252, "step": 52970 }, { "epoch": 2.6678080467294425, "grad_norm": 2.7333531413711696, "learning_rate": 3.6887695945236947e-07, "loss": 0.1326, "step": 52980 }, { "epoch": 2.668311596757138, "grad_norm": 2.2164358433889384, "learning_rate": 3.6777339512650225e-07, "loss": 0.1207, "step": 52990 }, { "epoch": 2.668815146784833, "grad_norm": 3.482836694958697, "learning_rate": 3.6667142100195454e-07, "loss": 0.1339, "step": 53000 }, { "epoch": 2.669318696812528, "grad_norm": 3.0637896048083775, "learning_rate": 3.6557103745702373e-07, "loss": 0.1386, "step": 53010 }, { "epoch": 2.6698222468402237, "grad_norm": 3.768541967769658, "learning_rate": 3.6447224486946267e-07, "loss": 0.1657, "step": 53020 }, { "epoch": 2.670325796867919, "grad_norm": 4.78116704229951, "learning_rate": 3.63375043616474e-07, "loss": 0.136, "step": 53030 }, { "epoch": 2.670829346895614, "grad_norm": 4.280512814966917, "learning_rate": 3.622794340747188e-07, "loss": 0.1192, "step": 53040 }, { "epoch": 2.6713328969233094, "grad_norm": 2.057052009763296, "learning_rate": 3.6118541662030935e-07, "loss": 0.1368, "step": 53050 }, { "epoch": 2.6718364469510045, "grad_norm": 2.681183244050382, "learning_rate": 3.6009299162881374e-07, "loss": 0.1283, "step": 53060 }, { "epoch": 2.6723399969786996, "grad_norm": 6.4873371060573595, "learning_rate": 3.590021594752485e-07, "loss": 0.1368, "step": 53070 }, { "epoch": 2.672843547006395, "grad_norm": 4.379348719514105, "learning_rate": 3.579129205340881e-07, "loss": 0.1471, "step": 53080 }, { "epoch": 2.6733470970340902, "grad_norm": 3.882380943067348, "learning_rate": 3.568252751792589e-07, "loss": 0.1459, "step": 53090 }, { "epoch": 2.6738506470617853, "grad_norm": 3.5570247979353424, "learning_rate": 3.557392237841389e-07, "loss": 0.1462, "step": 53100 }, { "epoch": 2.674354197089481, "grad_norm": 2.45348885777789, "learning_rate": 3.5465476672155883e-07, "loss": 0.1053, "step": 53110 }, { "epoch": 2.674857747117176, "grad_norm": 3.2587421153342673, "learning_rate": 3.535719043638042e-07, "loss": 0.1224, "step": 53120 }, { "epoch": 2.6753612971448715, "grad_norm": 4.4459984698830155, "learning_rate": 3.52490637082612e-07, "loss": 0.1389, "step": 53130 }, { "epoch": 2.6758648471725666, "grad_norm": 4.491688253006044, "learning_rate": 3.5141096524916983e-07, "loss": 0.0998, "step": 53140 }, { "epoch": 2.676368397200262, "grad_norm": 3.421407521183263, "learning_rate": 3.503328892341207e-07, "loss": 0.1077, "step": 53150 }, { "epoch": 2.676871947227957, "grad_norm": 4.097959113874258, "learning_rate": 3.49256409407559e-07, "loss": 0.1342, "step": 53160 }, { "epoch": 2.6773754972556523, "grad_norm": 3.003356867161627, "learning_rate": 3.4818152613902814e-07, "loss": 0.1044, "step": 53170 }, { "epoch": 2.677879047283348, "grad_norm": 2.976950183396446, "learning_rate": 3.471082397975284e-07, "loss": 0.1315, "step": 53180 }, { "epoch": 2.678382597311043, "grad_norm": 3.2562756596344156, "learning_rate": 3.4603655075150624e-07, "loss": 0.1494, "step": 53190 }, { "epoch": 2.678886147338738, "grad_norm": 4.9004535807365235, "learning_rate": 3.449664593688651e-07, "loss": 0.1494, "step": 53200 }, { "epoch": 2.6793896973664335, "grad_norm": 2.7694828435742713, "learning_rate": 3.438979660169578e-07, "loss": 0.1246, "step": 53210 }, { "epoch": 2.6798932473941286, "grad_norm": 4.057943773998003, "learning_rate": 3.428310710625865e-07, "loss": 0.1331, "step": 53220 }, { "epoch": 2.6803967974218237, "grad_norm": 3.4042653525472315, "learning_rate": 3.4176577487200824e-07, "loss": 0.1283, "step": 53230 }, { "epoch": 2.680900347449519, "grad_norm": 3.9346938929028816, "learning_rate": 3.407020778109282e-07, "loss": 0.1185, "step": 53240 }, { "epoch": 2.6814038974772143, "grad_norm": 3.986571984773047, "learning_rate": 3.396399802445055e-07, "loss": 0.1583, "step": 53250 }, { "epoch": 2.6819074475049094, "grad_norm": 3.5181434707833987, "learning_rate": 3.385794825373467e-07, "loss": 0.1223, "step": 53260 }, { "epoch": 2.682410997532605, "grad_norm": 3.372989085544673, "learning_rate": 3.375205850535124e-07, "loss": 0.1405, "step": 53270 }, { "epoch": 2.6829145475603, "grad_norm": 3.5994521570061937, "learning_rate": 3.364632881565116e-07, "loss": 0.1298, "step": 53280 }, { "epoch": 2.683418097587995, "grad_norm": 3.3253769428950277, "learning_rate": 3.354075922093075e-07, "loss": 0.1124, "step": 53290 }, { "epoch": 2.6839216476156906, "grad_norm": 2.4877844958229747, "learning_rate": 3.343534975743068e-07, "loss": 0.1055, "step": 53300 }, { "epoch": 2.6844251976433857, "grad_norm": 2.0586257122763114, "learning_rate": 3.3330100461337223e-07, "loss": 0.119, "step": 53310 }, { "epoch": 2.6849287476710813, "grad_norm": 3.0239071810494043, "learning_rate": 3.3225011368781533e-07, "loss": 0.1263, "step": 53320 }, { "epoch": 2.6854322976987763, "grad_norm": 3.3718021685321875, "learning_rate": 3.312008251583987e-07, "loss": 0.1258, "step": 53330 }, { "epoch": 2.685935847726472, "grad_norm": 1.4084935668914584, "learning_rate": 3.301531393853308e-07, "loss": 0.1548, "step": 53340 }, { "epoch": 2.686439397754167, "grad_norm": 3.1498912138090343, "learning_rate": 3.2910705672827446e-07, "loss": 0.1179, "step": 53350 }, { "epoch": 2.686942947781862, "grad_norm": 2.8539369995158705, "learning_rate": 3.280625775463409e-07, "loss": 0.16, "step": 53360 }, { "epoch": 2.6874464978095576, "grad_norm": 3.7243701312292217, "learning_rate": 3.270197021980892e-07, "loss": 0.1417, "step": 53370 }, { "epoch": 2.6879500478372527, "grad_norm": 3.8331249723604714, "learning_rate": 3.259784310415287e-07, "loss": 0.1452, "step": 53380 }, { "epoch": 2.6884535978649478, "grad_norm": 1.9462275202999963, "learning_rate": 3.2493876443411843e-07, "loss": 0.1103, "step": 53390 }, { "epoch": 2.6889571478926433, "grad_norm": 1.6412226024595336, "learning_rate": 3.239007027327673e-07, "loss": 0.1108, "step": 53400 }, { "epoch": 2.6894606979203384, "grad_norm": 3.236208459167144, "learning_rate": 3.228642462938325e-07, "loss": 0.134, "step": 53410 }, { "epoch": 2.6899642479480335, "grad_norm": 2.1814119409832844, "learning_rate": 3.218293954731189e-07, "loss": 0.1233, "step": 53420 }, { "epoch": 2.690467797975729, "grad_norm": 2.9968851491376074, "learning_rate": 3.207961506258833e-07, "loss": 0.1084, "step": 53430 }, { "epoch": 2.690971348003424, "grad_norm": 3.0331305821387806, "learning_rate": 3.197645121068271e-07, "loss": 0.1098, "step": 53440 }, { "epoch": 2.691474898031119, "grad_norm": 3.4448756369844777, "learning_rate": 3.1873448027010477e-07, "loss": 0.1384, "step": 53450 }, { "epoch": 2.6919784480588147, "grad_norm": 2.35593745530292, "learning_rate": 3.17706055469314e-07, "loss": 0.1182, "step": 53460 }, { "epoch": 2.69248199808651, "grad_norm": 2.908937904606744, "learning_rate": 3.166792380575062e-07, "loss": 0.1004, "step": 53470 }, { "epoch": 2.692985548114205, "grad_norm": 5.553338415005756, "learning_rate": 3.1565402838717786e-07, "loss": 0.1324, "step": 53480 }, { "epoch": 2.6934890981419004, "grad_norm": 2.654213478669284, "learning_rate": 3.146304268102729e-07, "loss": 0.1235, "step": 53490 }, { "epoch": 2.6939926481695955, "grad_norm": 2.6257751112490433, "learning_rate": 3.136084336781858e-07, "loss": 0.1156, "step": 53500 }, { "epoch": 2.694496198197291, "grad_norm": 2.8499782503718016, "learning_rate": 3.1258804934175713e-07, "loss": 0.1409, "step": 53510 }, { "epoch": 2.694999748224986, "grad_norm": 2.404342173706103, "learning_rate": 3.11569274151276e-07, "loss": 0.1298, "step": 53520 }, { "epoch": 2.6955032982526816, "grad_norm": 3.988478774854794, "learning_rate": 3.1055210845647723e-07, "loss": 0.175, "step": 53530 }, { "epoch": 2.6960068482803767, "grad_norm": 3.4934592786357928, "learning_rate": 3.095365526065458e-07, "loss": 0.1328, "step": 53540 }, { "epoch": 2.696510398308072, "grad_norm": 2.5281904227700562, "learning_rate": 3.085226069501124e-07, "loss": 0.128, "step": 53550 }, { "epoch": 2.6970139483357674, "grad_norm": 1.976096538370156, "learning_rate": 3.075102718352563e-07, "loss": 0.1325, "step": 53560 }, { "epoch": 2.6975174983634624, "grad_norm": 3.5351588775827865, "learning_rate": 3.064995476095023e-07, "loss": 0.1315, "step": 53570 }, { "epoch": 2.6980210483911575, "grad_norm": 4.89763998833217, "learning_rate": 3.0549043461982176e-07, "loss": 0.1508, "step": 53580 }, { "epoch": 2.698524598418853, "grad_norm": 3.146042991044506, "learning_rate": 3.044829332126348e-07, "loss": 0.1497, "step": 53590 }, { "epoch": 2.699028148446548, "grad_norm": 2.369223135029886, "learning_rate": 3.034770437338086e-07, "loss": 0.1356, "step": 53600 }, { "epoch": 2.6995316984742432, "grad_norm": 3.8189241241686456, "learning_rate": 3.024727665286542e-07, "loss": 0.1269, "step": 53610 }, { "epoch": 2.7000352485019388, "grad_norm": 4.047949083371686, "learning_rate": 3.0147010194193193e-07, "loss": 0.1304, "step": 53620 }, { "epoch": 2.700538798529634, "grad_norm": 2.4278645837212336, "learning_rate": 3.0046905031784756e-07, "loss": 0.1299, "step": 53630 }, { "epoch": 2.701042348557329, "grad_norm": 3.177693906375314, "learning_rate": 2.994696120000523e-07, "loss": 0.1169, "step": 53640 }, { "epoch": 2.7015458985850245, "grad_norm": 2.8779287560114364, "learning_rate": 2.9847178733164397e-07, "loss": 0.1152, "step": 53650 }, { "epoch": 2.7020494486127196, "grad_norm": 3.569285672468046, "learning_rate": 2.974755766551668e-07, "loss": 0.1398, "step": 53660 }, { "epoch": 2.7025529986404146, "grad_norm": 3.664314864355953, "learning_rate": 2.964809803126117e-07, "loss": 0.1486, "step": 53670 }, { "epoch": 2.70305654866811, "grad_norm": 4.094898936329762, "learning_rate": 2.954879986454151e-07, "loss": 0.1047, "step": 53680 }, { "epoch": 2.7035600986958053, "grad_norm": 3.719619699257869, "learning_rate": 2.9449663199445644e-07, "loss": 0.1614, "step": 53690 }, { "epoch": 2.704063648723501, "grad_norm": 3.018353000209018, "learning_rate": 2.9350688070006463e-07, "loss": 0.1358, "step": 53700 }, { "epoch": 2.704567198751196, "grad_norm": 2.210866996662321, "learning_rate": 2.925187451020123e-07, "loss": 0.118, "step": 53710 }, { "epoch": 2.7050707487788914, "grad_norm": 3.257979337482155, "learning_rate": 2.91532225539517e-07, "loss": 0.1354, "step": 53720 }, { "epoch": 2.7055742988065865, "grad_norm": 3.9614878900256745, "learning_rate": 2.905473223512406e-07, "loss": 0.1461, "step": 53730 }, { "epoch": 2.7060778488342816, "grad_norm": 3.152794542664037, "learning_rate": 2.8956403587529323e-07, "loss": 0.1132, "step": 53740 }, { "epoch": 2.706581398861977, "grad_norm": 3.945315255269679, "learning_rate": 2.8858236644922764e-07, "loss": 0.1261, "step": 53750 }, { "epoch": 2.707084948889672, "grad_norm": 3.462483882819807, "learning_rate": 2.8760231441004207e-07, "loss": 0.1183, "step": 53760 }, { "epoch": 2.7075884989173673, "grad_norm": 3.612655982682547, "learning_rate": 2.8662388009417895e-07, "loss": 0.1262, "step": 53770 }, { "epoch": 2.708092048945063, "grad_norm": 4.169175946950111, "learning_rate": 2.856470638375269e-07, "loss": 0.1282, "step": 53780 }, { "epoch": 2.708595598972758, "grad_norm": 1.5652187388206964, "learning_rate": 2.8467186597541716e-07, "loss": 0.0838, "step": 53790 }, { "epoch": 2.709099149000453, "grad_norm": 2.303397262157488, "learning_rate": 2.8369828684262743e-07, "loss": 0.101, "step": 53800 }, { "epoch": 2.7096026990281485, "grad_norm": 3.8050706056256822, "learning_rate": 2.827263267733771e-07, "loss": 0.1417, "step": 53810 }, { "epoch": 2.7101062490558436, "grad_norm": 3.2801931250336587, "learning_rate": 2.8175598610133193e-07, "loss": 0.1537, "step": 53820 }, { "epoch": 2.7106097990835387, "grad_norm": 2.804899832138392, "learning_rate": 2.8078726515960165e-07, "loss": 0.1042, "step": 53830 }, { "epoch": 2.7111133491112342, "grad_norm": 3.02866448553447, "learning_rate": 2.798201642807386e-07, "loss": 0.141, "step": 53840 }, { "epoch": 2.7116168991389293, "grad_norm": 3.4548507727864335, "learning_rate": 2.7885468379673987e-07, "loss": 0.1423, "step": 53850 }, { "epoch": 2.712120449166625, "grad_norm": 2.5939017519725813, "learning_rate": 2.77890824039046e-07, "loss": 0.1278, "step": 53860 }, { "epoch": 2.71262399919432, "grad_norm": 3.6963175209545818, "learning_rate": 2.7692858533854227e-07, "loss": 0.1202, "step": 53870 }, { "epoch": 2.713127549222015, "grad_norm": 2.8550756980701077, "learning_rate": 2.759679680255545e-07, "loss": 0.1102, "step": 53880 }, { "epoch": 2.7136310992497106, "grad_norm": 2.1737680930753895, "learning_rate": 2.7500897242985546e-07, "loss": 0.1649, "step": 53890 }, { "epoch": 2.7141346492774057, "grad_norm": 3.532036997100095, "learning_rate": 2.740515988806591e-07, "loss": 0.1488, "step": 53900 }, { "epoch": 2.714638199305101, "grad_norm": 3.3586233722258325, "learning_rate": 2.730958477066248e-07, "loss": 0.1534, "step": 53910 }, { "epoch": 2.7151417493327963, "grad_norm": 2.1869433004197028, "learning_rate": 2.7214171923584996e-07, "loss": 0.1037, "step": 53920 }, { "epoch": 2.7156452993604914, "grad_norm": 2.5145593995730553, "learning_rate": 2.7118921379588045e-07, "loss": 0.105, "step": 53930 }, { "epoch": 2.716148849388187, "grad_norm": 1.5438920157484033, "learning_rate": 2.7023833171370196e-07, "loss": 0.1037, "step": 53940 }, { "epoch": 2.716652399415882, "grad_norm": 3.7024092991078588, "learning_rate": 2.6928907331574437e-07, "loss": 0.1133, "step": 53950 }, { "epoch": 2.717155949443577, "grad_norm": 2.755128536201728, "learning_rate": 2.683414389278788e-07, "loss": 0.1089, "step": 53960 }, { "epoch": 2.7176594994712726, "grad_norm": 3.7648197759151034, "learning_rate": 2.6739542887541936e-07, "loss": 0.1403, "step": 53970 }, { "epoch": 2.7181630494989677, "grad_norm": 3.340859706918714, "learning_rate": 2.664510434831241e-07, "loss": 0.1408, "step": 53980 }, { "epoch": 2.718666599526663, "grad_norm": 2.680952183790929, "learning_rate": 2.655082830751909e-07, "loss": 0.1127, "step": 53990 }, { "epoch": 2.7191701495543583, "grad_norm": 4.169803072375636, "learning_rate": 2.6456714797525983e-07, "loss": 0.1129, "step": 54000 }, { "epoch": 2.7196736995820534, "grad_norm": 3.454091524301666, "learning_rate": 2.636276385064157e-07, "loss": 0.1586, "step": 54010 }, { "epoch": 2.7201772496097485, "grad_norm": 3.5971501088029267, "learning_rate": 2.6268975499118333e-07, "loss": 0.1331, "step": 54020 }, { "epoch": 2.720680799637444, "grad_norm": 2.5846717638737795, "learning_rate": 2.6175349775152845e-07, "loss": 0.1361, "step": 54030 }, { "epoch": 2.721184349665139, "grad_norm": 4.22763570376573, "learning_rate": 2.608188671088607e-07, "loss": 0.1494, "step": 54040 }, { "epoch": 2.7216878996928346, "grad_norm": 2.7165251224904345, "learning_rate": 2.5988586338403066e-07, "loss": 0.1112, "step": 54050 }, { "epoch": 2.7221914497205297, "grad_norm": 2.3889905658300945, "learning_rate": 2.589544868973287e-07, "loss": 0.1207, "step": 54060 }, { "epoch": 2.722694999748225, "grad_norm": 2.484386037702849, "learning_rate": 2.5802473796848917e-07, "loss": 0.115, "step": 54070 }, { "epoch": 2.7231985497759204, "grad_norm": 1.8092203394540205, "learning_rate": 2.570966169166855e-07, "loss": 0.141, "step": 54080 }, { "epoch": 2.7237020998036154, "grad_norm": 2.9374211418481293, "learning_rate": 2.5617012406053355e-07, "loss": 0.1617, "step": 54090 }, { "epoch": 2.724205649831311, "grad_norm": 3.6977994531406657, "learning_rate": 2.55245259718091e-07, "loss": 0.1367, "step": 54100 }, { "epoch": 2.724709199859006, "grad_norm": 4.377136054371525, "learning_rate": 2.543220242068528e-07, "loss": 0.1252, "step": 54110 }, { "epoch": 2.725212749886701, "grad_norm": 2.3088264000711307, "learning_rate": 2.5340041784376045e-07, "loss": 0.1315, "step": 54120 }, { "epoch": 2.7257162999143967, "grad_norm": 4.552401852322472, "learning_rate": 2.524804409451903e-07, "loss": 0.1224, "step": 54130 }, { "epoch": 2.7262198499420918, "grad_norm": 3.0526287318187206, "learning_rate": 2.515620938269642e-07, "loss": 0.14, "step": 54140 }, { "epoch": 2.726723399969787, "grad_norm": 4.429245708463113, "learning_rate": 2.506453768043404e-07, "loss": 0.163, "step": 54150 }, { "epoch": 2.7272269499974824, "grad_norm": 3.088090623806019, "learning_rate": 2.4973029019202067e-07, "loss": 0.1284, "step": 54160 }, { "epoch": 2.7277305000251775, "grad_norm": 1.7569609331443736, "learning_rate": 2.4881683430414526e-07, "loss": 0.1223, "step": 54170 }, { "epoch": 2.7282340500528726, "grad_norm": 2.6806554390570136, "learning_rate": 2.479050094542973e-07, "loss": 0.1319, "step": 54180 }, { "epoch": 2.728737600080568, "grad_norm": 2.7023891570305345, "learning_rate": 2.469948159554963e-07, "loss": 0.143, "step": 54190 }, { "epoch": 2.729241150108263, "grad_norm": 1.8409005049153395, "learning_rate": 2.4608625412020246e-07, "loss": 0.0946, "step": 54200 }, { "epoch": 2.7297447001359583, "grad_norm": 2.886619650987153, "learning_rate": 2.4517932426031844e-07, "loss": 0.1252, "step": 54210 }, { "epoch": 2.730248250163654, "grad_norm": 1.7757653831445606, "learning_rate": 2.442740266871851e-07, "loss": 0.0889, "step": 54220 }, { "epoch": 2.730751800191349, "grad_norm": 3.3610692835679936, "learning_rate": 2.4337036171158124e-07, "loss": 0.1172, "step": 54230 }, { "epoch": 2.7312553502190444, "grad_norm": 4.2792853686232375, "learning_rate": 2.424683296437286e-07, "loss": 0.1195, "step": 54240 }, { "epoch": 2.7317589002467395, "grad_norm": 1.0519485276722722, "learning_rate": 2.4156793079328613e-07, "loss": 0.1163, "step": 54250 }, { "epoch": 2.732262450274435, "grad_norm": 3.566886188235774, "learning_rate": 2.4066916546935216e-07, "loss": 0.1333, "step": 54260 }, { "epoch": 2.73276600030213, "grad_norm": 2.806282044635318, "learning_rate": 2.397720339804649e-07, "loss": 0.1369, "step": 54270 }, { "epoch": 2.733269550329825, "grad_norm": 3.1908064680005435, "learning_rate": 2.3887653663460075e-07, "loss": 0.1313, "step": 54280 }, { "epoch": 2.7337731003575207, "grad_norm": 3.682196751302265, "learning_rate": 2.3798267373917661e-07, "loss": 0.1135, "step": 54290 }, { "epoch": 2.734276650385216, "grad_norm": 3.0556295892817142, "learning_rate": 2.370904456010481e-07, "loss": 0.1355, "step": 54300 }, { "epoch": 2.734780200412911, "grad_norm": 3.713190343980509, "learning_rate": 2.3619985252650745e-07, "loss": 0.1157, "step": 54310 }, { "epoch": 2.7352837504406065, "grad_norm": 3.1756867506282336, "learning_rate": 2.3531089482128843e-07, "loss": 0.1413, "step": 54320 }, { "epoch": 2.7357873004683015, "grad_norm": 3.4357429806499336, "learning_rate": 2.3442357279056194e-07, "loss": 0.1272, "step": 54330 }, { "epoch": 2.7362908504959966, "grad_norm": 4.104647491716876, "learning_rate": 2.3353788673893761e-07, "loss": 0.1454, "step": 54340 }, { "epoch": 2.736794400523692, "grad_norm": 2.953502018833409, "learning_rate": 2.326538369704623e-07, "loss": 0.1292, "step": 54350 }, { "epoch": 2.7372979505513872, "grad_norm": 4.059180807283703, "learning_rate": 2.3177142378862316e-07, "loss": 0.1412, "step": 54360 }, { "epoch": 2.7378015005790823, "grad_norm": 2.0977077453570208, "learning_rate": 2.3089064749634515e-07, "loss": 0.1354, "step": 54370 }, { "epoch": 2.738305050606778, "grad_norm": 3.410111771747539, "learning_rate": 2.3001150839598973e-07, "loss": 0.1212, "step": 54380 }, { "epoch": 2.738808600634473, "grad_norm": 2.989704150888174, "learning_rate": 2.2913400678935772e-07, "loss": 0.1721, "step": 54390 }, { "epoch": 2.739312150662168, "grad_norm": 3.2307928081894794, "learning_rate": 2.2825814297768811e-07, "loss": 0.1162, "step": 54400 }, { "epoch": 2.7398157006898636, "grad_norm": 4.498131618219736, "learning_rate": 2.2738391726165653e-07, "loss": 0.1549, "step": 54410 }, { "epoch": 2.7403192507175587, "grad_norm": 4.859066757111399, "learning_rate": 2.2651132994137626e-07, "loss": 0.109, "step": 54420 }, { "epoch": 2.740822800745254, "grad_norm": 2.410977298244876, "learning_rate": 2.2564038131639876e-07, "loss": 0.1127, "step": 54430 }, { "epoch": 2.7413263507729493, "grad_norm": 4.440018923862468, "learning_rate": 2.247710716857132e-07, "loss": 0.1257, "step": 54440 }, { "epoch": 2.741829900800645, "grad_norm": 3.2807714177267746, "learning_rate": 2.239034013477459e-07, "loss": 0.1474, "step": 54450 }, { "epoch": 2.74233345082834, "grad_norm": 3.176199534470513, "learning_rate": 2.2303737060035967e-07, "loss": 0.1252, "step": 54460 }, { "epoch": 2.742837000856035, "grad_norm": 3.7922228899799144, "learning_rate": 2.2217297974085505e-07, "loss": 0.1365, "step": 54470 }, { "epoch": 2.7433405508837305, "grad_norm": 3.271951364502955, "learning_rate": 2.2131022906596965e-07, "loss": 0.119, "step": 54480 }, { "epoch": 2.7438441009114256, "grad_norm": 2.682391026539072, "learning_rate": 2.2044911887187825e-07, "loss": 0.1129, "step": 54490 }, { "epoch": 2.7443476509391207, "grad_norm": 2.670776379526727, "learning_rate": 2.1958964945419102e-07, "loss": 0.0989, "step": 54500 }, { "epoch": 2.7448512009668162, "grad_norm": 2.779781394834129, "learning_rate": 2.1873182110795698e-07, "loss": 0.119, "step": 54510 }, { "epoch": 2.7453547509945113, "grad_norm": 2.317788156264517, "learning_rate": 2.178756341276611e-07, "loss": 0.1122, "step": 54520 }, { "epoch": 2.7458583010222064, "grad_norm": 2.7666524307032487, "learning_rate": 2.1702108880722494e-07, "loss": 0.1387, "step": 54530 }, { "epoch": 2.746361851049902, "grad_norm": 4.052471274095125, "learning_rate": 2.1616818544000438e-07, "loss": 0.1319, "step": 54540 }, { "epoch": 2.746865401077597, "grad_norm": 3.0421637728967457, "learning_rate": 2.1531692431879415e-07, "loss": 0.1167, "step": 54550 }, { "epoch": 2.747368951105292, "grad_norm": 3.3895269492933875, "learning_rate": 2.1446730573582486e-07, "loss": 0.102, "step": 54560 }, { "epoch": 2.7478725011329876, "grad_norm": 2.918840997000092, "learning_rate": 2.1361932998276326e-07, "loss": 0.1317, "step": 54570 }, { "epoch": 2.7483760511606827, "grad_norm": 3.6257664636105, "learning_rate": 2.1277299735071032e-07, "loss": 0.128, "step": 54580 }, { "epoch": 2.748879601188378, "grad_norm": 2.7420535742179735, "learning_rate": 2.1192830813020537e-07, "loss": 0.1209, "step": 54590 }, { "epoch": 2.7493831512160734, "grad_norm": 4.717793902197668, "learning_rate": 2.110852626112231e-07, "loss": 0.1463, "step": 54600 }, { "epoch": 2.7498867012437684, "grad_norm": 4.106164763846851, "learning_rate": 2.102438610831725e-07, "loss": 0.125, "step": 54610 }, { "epoch": 2.750390251271464, "grad_norm": 3.4476642466434013, "learning_rate": 2.0940410383489874e-07, "loss": 0.1402, "step": 54620 }, { "epoch": 2.750893801299159, "grad_norm": 2.399680145814071, "learning_rate": 2.0856599115468278e-07, "loss": 0.1119, "step": 54630 }, { "epoch": 2.7513973513268546, "grad_norm": 2.669643199347116, "learning_rate": 2.0772952333024288e-07, "loss": 0.1442, "step": 54640 }, { "epoch": 2.7519009013545497, "grad_norm": 3.47400794246201, "learning_rate": 2.0689470064872874e-07, "loss": 0.1539, "step": 54650 }, { "epoch": 2.7524044513822448, "grad_norm": 4.1726846759805225, "learning_rate": 2.0606152339672837e-07, "loss": 0.1461, "step": 54660 }, { "epoch": 2.7529080014099403, "grad_norm": 3.594724779809063, "learning_rate": 2.0522999186026404e-07, "loss": 0.1194, "step": 54670 }, { "epoch": 2.7534115514376354, "grad_norm": 3.489215477230806, "learning_rate": 2.0440010632479245e-07, "loss": 0.1221, "step": 54680 }, { "epoch": 2.7539151014653305, "grad_norm": 2.9577361892530036, "learning_rate": 2.0357186707520682e-07, "loss": 0.1209, "step": 54690 }, { "epoch": 2.754418651493026, "grad_norm": 5.151408015856331, "learning_rate": 2.0274527439583302e-07, "loss": 0.1114, "step": 54700 }, { "epoch": 2.754922201520721, "grad_norm": 3.3178588187538396, "learning_rate": 2.0192032857043298e-07, "loss": 0.118, "step": 54710 }, { "epoch": 2.755425751548416, "grad_norm": 3.4779684092376364, "learning_rate": 2.0109702988220347e-07, "loss": 0.1258, "step": 54720 }, { "epoch": 2.7559293015761117, "grad_norm": 4.226044765285331, "learning_rate": 2.0027537861377511e-07, "loss": 0.1344, "step": 54730 }, { "epoch": 2.756432851603807, "grad_norm": 3.4269952147294838, "learning_rate": 1.9945537504721446e-07, "loss": 0.0982, "step": 54740 }, { "epoch": 2.756936401631502, "grad_norm": 2.9239346720930053, "learning_rate": 1.986370194640197e-07, "loss": 0.1035, "step": 54750 }, { "epoch": 2.7574399516591974, "grad_norm": 4.258537860617184, "learning_rate": 1.978203121451261e-07, "loss": 0.1463, "step": 54760 }, { "epoch": 2.7579435016868925, "grad_norm": 3.706586053026954, "learning_rate": 1.9700525337090048e-07, "loss": 0.1362, "step": 54770 }, { "epoch": 2.7584470517145876, "grad_norm": 3.798004099639001, "learning_rate": 1.9619184342114573e-07, "loss": 0.1385, "step": 54780 }, { "epoch": 2.758950601742283, "grad_norm": 3.5622001517090673, "learning_rate": 1.95380082575099e-07, "loss": 0.1378, "step": 54790 }, { "epoch": 2.759454151769978, "grad_norm": 3.1452003322150377, "learning_rate": 1.9456997111143018e-07, "loss": 0.1498, "step": 54800 }, { "epoch": 2.7599577017976737, "grad_norm": 3.6369818622191024, "learning_rate": 1.9376150930824233e-07, "loss": 0.15, "step": 54810 }, { "epoch": 2.760461251825369, "grad_norm": 2.469653891320747, "learning_rate": 1.9295469744307294e-07, "loss": 0.1153, "step": 54820 }, { "epoch": 2.7609648018530644, "grad_norm": 2.2519332780105903, "learning_rate": 1.9214953579289374e-07, "loss": 0.1137, "step": 54830 }, { "epoch": 2.7614683518807595, "grad_norm": 3.4097584769695324, "learning_rate": 1.9134602463411034e-07, "loss": 0.1363, "step": 54840 }, { "epoch": 2.7619719019084545, "grad_norm": 2.1346000279350617, "learning_rate": 1.9054416424255874e-07, "loss": 0.1333, "step": 54850 }, { "epoch": 2.76247545193615, "grad_norm": 3.876725517054935, "learning_rate": 1.897439548935115e-07, "loss": 0.121, "step": 54860 }, { "epoch": 2.762979001963845, "grad_norm": 4.088358915988732, "learning_rate": 1.889453968616739e-07, "loss": 0.166, "step": 54870 }, { "epoch": 2.7634825519915402, "grad_norm": 3.9340013101840627, "learning_rate": 1.8814849042118332e-07, "loss": 0.1384, "step": 54880 }, { "epoch": 2.763986102019236, "grad_norm": 3.2229282329012308, "learning_rate": 1.8735323584560872e-07, "loss": 0.1305, "step": 54890 }, { "epoch": 2.764489652046931, "grad_norm": 3.486746853324245, "learning_rate": 1.8655963340795557e-07, "loss": 0.1109, "step": 54900 }, { "epoch": 2.764993202074626, "grad_norm": 3.9838313078943304, "learning_rate": 1.8576768338065988e-07, "loss": 0.1297, "step": 54910 }, { "epoch": 2.7654967521023215, "grad_norm": 2.0126444814290885, "learning_rate": 1.849773860355919e-07, "loss": 0.1146, "step": 54920 }, { "epoch": 2.7660003021300166, "grad_norm": 3.7653329198673893, "learning_rate": 1.8418874164405187e-07, "loss": 0.1223, "step": 54930 }, { "epoch": 2.7665038521577117, "grad_norm": 4.932125527853174, "learning_rate": 1.834017504767749e-07, "loss": 0.1249, "step": 54940 }, { "epoch": 2.767007402185407, "grad_norm": 2.9978317750299883, "learning_rate": 1.8261641280392817e-07, "loss": 0.1383, "step": 54950 }, { "epoch": 2.7675109522131023, "grad_norm": 3.4383233405082896, "learning_rate": 1.818327288951116e-07, "loss": 0.1509, "step": 54960 }, { "epoch": 2.7680145022407974, "grad_norm": 4.150145937955468, "learning_rate": 1.81050699019355e-07, "loss": 0.1178, "step": 54970 }, { "epoch": 2.768518052268493, "grad_norm": 2.927420532678357, "learning_rate": 1.8027032344512309e-07, "loss": 0.1197, "step": 54980 }, { "epoch": 2.769021602296188, "grad_norm": 4.3964596714900495, "learning_rate": 1.7949160244031204e-07, "loss": 0.1166, "step": 54990 }, { "epoch": 2.7695251523238835, "grad_norm": 3.0493655157968473, "learning_rate": 1.787145362722492e-07, "loss": 0.1098, "step": 55000 }, { "epoch": 2.7700287023515786, "grad_norm": 1.6644944216842743, "learning_rate": 1.7793912520769395e-07, "loss": 0.1299, "step": 55010 }, { "epoch": 2.770532252379274, "grad_norm": 4.5082940294397655, "learning_rate": 1.7716536951283946e-07, "loss": 0.1675, "step": 55020 }, { "epoch": 2.7710358024069692, "grad_norm": 5.545936215396198, "learning_rate": 1.7639326945330771e-07, "loss": 0.1431, "step": 55030 }, { "epoch": 2.7715393524346643, "grad_norm": 2.500994686290897, "learning_rate": 1.7562282529415385e-07, "loss": 0.1251, "step": 55040 }, { "epoch": 2.77204290246236, "grad_norm": 3.7671647647029083, "learning_rate": 1.748540372998636e-07, "loss": 0.1451, "step": 55050 }, { "epoch": 2.772546452490055, "grad_norm": 2.8848827648617124, "learning_rate": 1.7408690573435638e-07, "loss": 0.1205, "step": 55060 }, { "epoch": 2.77305000251775, "grad_norm": 4.323044968369179, "learning_rate": 1.7332143086098098e-07, "loss": 0.1257, "step": 55070 }, { "epoch": 2.7735535525454456, "grad_norm": 2.516623978028847, "learning_rate": 1.7255761294251773e-07, "loss": 0.1121, "step": 55080 }, { "epoch": 2.7740571025731406, "grad_norm": 3.1537114449273247, "learning_rate": 1.7179545224117856e-07, "loss": 0.1341, "step": 55090 }, { "epoch": 2.7745606526008357, "grad_norm": 5.736415825479438, "learning_rate": 1.7103494901860584e-07, "loss": 0.1583, "step": 55100 }, { "epoch": 2.7750642026285313, "grad_norm": 2.6779529233262886, "learning_rate": 1.7027610353587464e-07, "loss": 0.1349, "step": 55110 }, { "epoch": 2.7755677526562264, "grad_norm": 2.653030374642613, "learning_rate": 1.6951891605348768e-07, "loss": 0.126, "step": 55120 }, { "epoch": 2.7760713026839214, "grad_norm": 3.9103339608597434, "learning_rate": 1.687633868313826e-07, "loss": 0.1321, "step": 55130 }, { "epoch": 2.776574852711617, "grad_norm": 2.8290774161985737, "learning_rate": 1.6800951612892469e-07, "loss": 0.1283, "step": 55140 }, { "epoch": 2.777078402739312, "grad_norm": 2.7793142290159163, "learning_rate": 1.6725730420491137e-07, "loss": 0.1229, "step": 55150 }, { "epoch": 2.777581952767007, "grad_norm": 1.9544911263124152, "learning_rate": 1.6650675131756943e-07, "loss": 0.1228, "step": 55160 }, { "epoch": 2.7780855027947027, "grad_norm": 3.9162330294852494, "learning_rate": 1.657578577245572e-07, "loss": 0.1317, "step": 55170 }, { "epoch": 2.7785890528223978, "grad_norm": 3.4583025500334283, "learning_rate": 1.6501062368296293e-07, "loss": 0.1346, "step": 55180 }, { "epoch": 2.7790926028500933, "grad_norm": 2.2267455856687097, "learning_rate": 1.6426504944930644e-07, "loss": 0.116, "step": 55190 }, { "epoch": 2.7795961528777884, "grad_norm": 2.6341885787911368, "learning_rate": 1.6352113527953518e-07, "loss": 0.1172, "step": 55200 }, { "epoch": 2.780099702905484, "grad_norm": 4.022095084021262, "learning_rate": 1.627788814290282e-07, "loss": 0.1482, "step": 55210 }, { "epoch": 2.780603252933179, "grad_norm": 4.438591250852114, "learning_rate": 1.620382881525956e-07, "loss": 0.1507, "step": 55220 }, { "epoch": 2.781106802960874, "grad_norm": 1.7770497318485825, "learning_rate": 1.6129935570447565e-07, "loss": 0.1215, "step": 55230 }, { "epoch": 2.7816103529885696, "grad_norm": 3.220885461180971, "learning_rate": 1.6056208433833709e-07, "loss": 0.1267, "step": 55240 }, { "epoch": 2.7821139030162647, "grad_norm": 3.190685797539234, "learning_rate": 1.5982647430727805e-07, "loss": 0.1285, "step": 55250 }, { "epoch": 2.78261745304396, "grad_norm": 2.7196918596381807, "learning_rate": 1.5909252586382763e-07, "loss": 0.1424, "step": 55260 }, { "epoch": 2.7831210030716553, "grad_norm": 4.1833929844491005, "learning_rate": 1.5836023925994315e-07, "loss": 0.1459, "step": 55270 }, { "epoch": 2.7836245530993504, "grad_norm": 2.2356518182445053, "learning_rate": 1.5762961474701187e-07, "loss": 0.1064, "step": 55280 }, { "epoch": 2.7841281031270455, "grad_norm": 3.2664904765043024, "learning_rate": 1.5690065257585153e-07, "loss": 0.1203, "step": 55290 }, { "epoch": 2.784631653154741, "grad_norm": 2.856812895628124, "learning_rate": 1.5617335299670688e-07, "loss": 0.1314, "step": 55300 }, { "epoch": 2.785135203182436, "grad_norm": 3.4894776090493385, "learning_rate": 1.5544771625925381e-07, "loss": 0.1543, "step": 55310 }, { "epoch": 2.785638753210131, "grad_norm": 4.411536874748575, "learning_rate": 1.5472374261259693e-07, "loss": 0.1615, "step": 55320 }, { "epoch": 2.7861423032378267, "grad_norm": 4.332678205112951, "learning_rate": 1.5400143230526965e-07, "loss": 0.13, "step": 55330 }, { "epoch": 2.786645853265522, "grad_norm": 3.6023129218553644, "learning_rate": 1.5328078558523528e-07, "loss": 0.1265, "step": 55340 }, { "epoch": 2.787149403293217, "grad_norm": 4.803510235593303, "learning_rate": 1.5256180269988375e-07, "loss": 0.103, "step": 55350 }, { "epoch": 2.7876529533209125, "grad_norm": 2.5081524263655335, "learning_rate": 1.51844483896037e-07, "loss": 0.1143, "step": 55360 }, { "epoch": 2.7881565033486075, "grad_norm": 3.1497461397094506, "learning_rate": 1.5112882941994257e-07, "loss": 0.1479, "step": 55370 }, { "epoch": 2.788660053376303, "grad_norm": 2.551885409014737, "learning_rate": 1.5041483951727943e-07, "loss": 0.1229, "step": 55380 }, { "epoch": 2.789163603403998, "grad_norm": 3.3844436742154462, "learning_rate": 1.497025144331532e-07, "loss": 0.1408, "step": 55390 }, { "epoch": 2.7896671534316937, "grad_norm": 3.2318626076862658, "learning_rate": 1.489918544120983e-07, "loss": 0.1506, "step": 55400 }, { "epoch": 2.790170703459389, "grad_norm": 2.3629577113698135, "learning_rate": 1.4828285969807789e-07, "loss": 0.126, "step": 55410 }, { "epoch": 2.790674253487084, "grad_norm": 4.496496567562995, "learning_rate": 1.4757553053448503e-07, "loss": 0.1411, "step": 55420 }, { "epoch": 2.7911778035147794, "grad_norm": 3.3648499168073323, "learning_rate": 1.4686986716413776e-07, "loss": 0.1424, "step": 55430 }, { "epoch": 2.7916813535424745, "grad_norm": 2.4961714025088257, "learning_rate": 1.4616586982928394e-07, "loss": 0.1248, "step": 55440 }, { "epoch": 2.7921849035701696, "grad_norm": 3.784084589844437, "learning_rate": 1.454635387715997e-07, "loss": 0.1135, "step": 55450 }, { "epoch": 2.792688453597865, "grad_norm": 3.7203962246084252, "learning_rate": 1.447628742321894e-07, "loss": 0.1287, "step": 55460 }, { "epoch": 2.79319200362556, "grad_norm": 2.9983603558297496, "learning_rate": 1.4406387645158393e-07, "loss": 0.1184, "step": 55470 }, { "epoch": 2.7936955536532553, "grad_norm": 4.523009203771879, "learning_rate": 1.433665456697436e-07, "loss": 0.1386, "step": 55480 }, { "epoch": 2.794199103680951, "grad_norm": 2.882555583647693, "learning_rate": 1.4267088212605583e-07, "loss": 0.1292, "step": 55490 }, { "epoch": 2.794702653708646, "grad_norm": 3.593563798810259, "learning_rate": 1.4197688605933512e-07, "loss": 0.114, "step": 55500 }, { "epoch": 2.795206203736341, "grad_norm": 3.297537618669366, "learning_rate": 1.4128455770782424e-07, "loss": 0.132, "step": 55510 }, { "epoch": 2.7957097537640365, "grad_norm": 3.814861218223084, "learning_rate": 1.4059389730919248e-07, "loss": 0.0995, "step": 55520 }, { "epoch": 2.7962133037917316, "grad_norm": 4.551421930874804, "learning_rate": 1.399049051005391e-07, "loss": 0.1633, "step": 55530 }, { "epoch": 2.7967168538194267, "grad_norm": 4.510247732922067, "learning_rate": 1.392175813183866e-07, "loss": 0.1303, "step": 55540 }, { "epoch": 2.7972204038471222, "grad_norm": 4.5278233788787725, "learning_rate": 1.385319261986884e-07, "loss": 0.1259, "step": 55550 }, { "epoch": 2.7977239538748173, "grad_norm": 2.2569118673902078, "learning_rate": 1.3784793997682343e-07, "loss": 0.148, "step": 55560 }, { "epoch": 2.798227503902513, "grad_norm": 3.5535706092888413, "learning_rate": 1.3716562288759895e-07, "loss": 0.1432, "step": 55570 }, { "epoch": 2.798731053930208, "grad_norm": 3.4370935967856107, "learning_rate": 1.364849751652464e-07, "loss": 0.1255, "step": 55580 }, { "epoch": 2.7992346039579035, "grad_norm": 4.936059630484256, "learning_rate": 1.3580599704342667e-07, "loss": 0.1194, "step": 55590 }, { "epoch": 2.7997381539855986, "grad_norm": 3.604456833094381, "learning_rate": 1.3512868875522667e-07, "loss": 0.1248, "step": 55600 }, { "epoch": 2.8002417040132936, "grad_norm": 4.007039546770037, "learning_rate": 1.3445305053316093e-07, "loss": 0.1433, "step": 55610 }, { "epoch": 2.800745254040989, "grad_norm": 3.5321027945874497, "learning_rate": 1.337790826091684e-07, "loss": 0.1167, "step": 55620 }, { "epoch": 2.8012488040686843, "grad_norm": 3.010125628354399, "learning_rate": 1.3310678521461784e-07, "loss": 0.1488, "step": 55630 }, { "epoch": 2.8017523540963793, "grad_norm": 2.8803957058767864, "learning_rate": 1.324361585803019e-07, "loss": 0.1486, "step": 55640 }, { "epoch": 2.802255904124075, "grad_norm": 3.315954937591406, "learning_rate": 1.3176720293644141e-07, "loss": 0.1093, "step": 55650 }, { "epoch": 2.80275945415177, "grad_norm": 4.401380728684812, "learning_rate": 1.3109991851268156e-07, "loss": 0.1139, "step": 55660 }, { "epoch": 2.803263004179465, "grad_norm": 1.9158041757010427, "learning_rate": 1.304343055380952e-07, "loss": 0.1347, "step": 55670 }, { "epoch": 2.8037665542071606, "grad_norm": 4.613489683675308, "learning_rate": 1.2977036424118183e-07, "loss": 0.1235, "step": 55680 }, { "epoch": 2.8042701042348557, "grad_norm": 5.519928433909799, "learning_rate": 1.2910809484986743e-07, "loss": 0.1423, "step": 55690 }, { "epoch": 2.8047736542625508, "grad_norm": 3.639016118076774, "learning_rate": 1.284474975915012e-07, "loss": 0.1033, "step": 55700 }, { "epoch": 2.8052772042902463, "grad_norm": 2.2701727894242127, "learning_rate": 1.277885726928607e-07, "loss": 0.1324, "step": 55710 }, { "epoch": 2.8057807543179414, "grad_norm": 3.4833842354757274, "learning_rate": 1.2713132038014886e-07, "loss": 0.1096, "step": 55720 }, { "epoch": 2.8062843043456365, "grad_norm": 1.5807256114646584, "learning_rate": 1.2647574087899516e-07, "loss": 0.0939, "step": 55730 }, { "epoch": 2.806787854373332, "grad_norm": 3.018284309930308, "learning_rate": 1.258218344144535e-07, "loss": 0.1237, "step": 55740 }, { "epoch": 2.807291404401027, "grad_norm": 4.276055148568038, "learning_rate": 1.2516960121100375e-07, "loss": 0.1431, "step": 55750 }, { "epoch": 2.8077949544287226, "grad_norm": 2.004933476796328, "learning_rate": 1.2451904149255233e-07, "loss": 0.1015, "step": 55760 }, { "epoch": 2.8082985044564177, "grad_norm": 3.6932010124700483, "learning_rate": 1.2387015548243065e-07, "loss": 0.1192, "step": 55770 }, { "epoch": 2.8088020544841132, "grad_norm": 3.1257620894334215, "learning_rate": 1.232229434033938e-07, "loss": 0.1511, "step": 55780 }, { "epoch": 2.8093056045118083, "grad_norm": 3.2458550392467354, "learning_rate": 1.225774054776252e-07, "loss": 0.1472, "step": 55790 }, { "epoch": 2.8098091545395034, "grad_norm": 4.494473299102171, "learning_rate": 1.21933541926732e-07, "loss": 0.1418, "step": 55800 }, { "epoch": 2.810312704567199, "grad_norm": 2.509648152756181, "learning_rate": 1.2129135297174688e-07, "loss": 0.1131, "step": 55810 }, { "epoch": 2.810816254594894, "grad_norm": 3.0184773140655845, "learning_rate": 1.2065083883312624e-07, "loss": 0.1473, "step": 55820 }, { "epoch": 2.811319804622589, "grad_norm": 3.0696508319222473, "learning_rate": 1.2001199973075417e-07, "loss": 0.1271, "step": 55830 }, { "epoch": 2.8118233546502847, "grad_norm": 3.1658515286475146, "learning_rate": 1.1937483588393805e-07, "loss": 0.1255, "step": 55840 }, { "epoch": 2.8123269046779797, "grad_norm": 2.3287428398873526, "learning_rate": 1.1873934751141069e-07, "loss": 0.1065, "step": 55850 }, { "epoch": 2.812830454705675, "grad_norm": 3.4333384335493027, "learning_rate": 1.1810553483132814e-07, "loss": 0.1096, "step": 55860 }, { "epoch": 2.8133340047333704, "grad_norm": 3.249526923316938, "learning_rate": 1.1747339806127355e-07, "loss": 0.1537, "step": 55870 }, { "epoch": 2.8138375547610655, "grad_norm": 2.379891132535463, "learning_rate": 1.1684293741825392e-07, "loss": 0.12, "step": 55880 }, { "epoch": 2.8143411047887605, "grad_norm": 2.0183754937606584, "learning_rate": 1.1621415311869999e-07, "loss": 0.1413, "step": 55890 }, { "epoch": 2.814844654816456, "grad_norm": 4.952090800958864, "learning_rate": 1.1558704537846798e-07, "loss": 0.1498, "step": 55900 }, { "epoch": 2.815348204844151, "grad_norm": 3.1164923785301966, "learning_rate": 1.1496161441283904e-07, "loss": 0.1235, "step": 55910 }, { "epoch": 2.8158517548718462, "grad_norm": 3.322441045619722, "learning_rate": 1.1433786043651696e-07, "loss": 0.1044, "step": 55920 }, { "epoch": 2.816355304899542, "grad_norm": 4.540773657251279, "learning_rate": 1.1371578366363045e-07, "loss": 0.1198, "step": 55930 }, { "epoch": 2.816858854927237, "grad_norm": 4.171825486884533, "learning_rate": 1.1309538430773425e-07, "loss": 0.1386, "step": 55940 }, { "epoch": 2.8173624049549324, "grad_norm": 3.5042663713152606, "learning_rate": 1.1247666258180411e-07, "loss": 0.1058, "step": 55950 }, { "epoch": 2.8178659549826275, "grad_norm": 2.6041440392891113, "learning_rate": 1.1185961869824347e-07, "loss": 0.1211, "step": 55960 }, { "epoch": 2.818369505010323, "grad_norm": 4.756382602459243, "learning_rate": 1.112442528688762e-07, "loss": 0.1501, "step": 55970 }, { "epoch": 2.818873055038018, "grad_norm": 3.1801140169551347, "learning_rate": 1.106305653049533e-07, "loss": 0.1304, "step": 55980 }, { "epoch": 2.819376605065713, "grad_norm": 2.103317463440555, "learning_rate": 1.1001855621714685e-07, "loss": 0.1172, "step": 55990 }, { "epoch": 2.8198801550934087, "grad_norm": 3.6178540547926046, "learning_rate": 1.0940822581555488e-07, "loss": 0.1423, "step": 56000 }, { "epoch": 2.820383705121104, "grad_norm": 2.980911228960565, "learning_rate": 1.0879957430969757e-07, "loss": 0.1026, "step": 56010 }, { "epoch": 2.820887255148799, "grad_norm": 2.8345521497485207, "learning_rate": 1.0819260190851943e-07, "loss": 0.1323, "step": 56020 }, { "epoch": 2.8213908051764944, "grad_norm": 6.133403681650629, "learning_rate": 1.0758730882038937e-07, "loss": 0.175, "step": 56030 }, { "epoch": 2.8218943552041895, "grad_norm": 3.0413736486533365, "learning_rate": 1.0698369525309893e-07, "loss": 0.1057, "step": 56040 }, { "epoch": 2.8223979052318846, "grad_norm": 4.315561161730263, "learning_rate": 1.0638176141386292e-07, "loss": 0.1158, "step": 56050 }, { "epoch": 2.82290145525958, "grad_norm": 3.8826020608171703, "learning_rate": 1.0578150750931882e-07, "loss": 0.1424, "step": 56060 }, { "epoch": 2.8234050052872752, "grad_norm": 2.868443860425445, "learning_rate": 1.0518293374552957e-07, "loss": 0.1503, "step": 56070 }, { "epoch": 2.8239085553149703, "grad_norm": 2.517568230599459, "learning_rate": 1.045860403279797e-07, "loss": 0.1298, "step": 56080 }, { "epoch": 2.824412105342666, "grad_norm": 3.168430962601342, "learning_rate": 1.0399082746157752e-07, "loss": 0.1094, "step": 56090 }, { "epoch": 2.824915655370361, "grad_norm": 3.6206322565493867, "learning_rate": 1.0339729535065346e-07, "loss": 0.1364, "step": 56100 }, { "epoch": 2.825419205398056, "grad_norm": 3.3020043318119656, "learning_rate": 1.0280544419896287e-07, "loss": 0.1268, "step": 56110 }, { "epoch": 2.8259227554257516, "grad_norm": 3.927896926520089, "learning_rate": 1.0221527420968214e-07, "loss": 0.1495, "step": 56120 }, { "epoch": 2.8264263054534466, "grad_norm": 3.0196196160306368, "learning_rate": 1.0162678558541084e-07, "loss": 0.1286, "step": 56130 }, { "epoch": 2.826929855481142, "grad_norm": 3.3169048171732207, "learning_rate": 1.0103997852817294e-07, "loss": 0.1305, "step": 56140 }, { "epoch": 2.8274334055088373, "grad_norm": 1.8787133720608316, "learning_rate": 1.0045485323941285e-07, "loss": 0.1328, "step": 56150 }, { "epoch": 2.827936955536533, "grad_norm": 4.523912874813866, "learning_rate": 9.987140991999933e-08, "loss": 0.1399, "step": 56160 }, { "epoch": 2.828440505564228, "grad_norm": 2.7969493886575685, "learning_rate": 9.928964877022329e-08, "loss": 0.0881, "step": 56170 }, { "epoch": 2.828944055591923, "grad_norm": 3.0727252701166834, "learning_rate": 9.870956998979774e-08, "loss": 0.1309, "step": 56180 }, { "epoch": 2.8294476056196185, "grad_norm": 3.3521433349619025, "learning_rate": 9.813117377785841e-08, "loss": 0.1067, "step": 56190 }, { "epoch": 2.8299511556473136, "grad_norm": 4.7934984948286585, "learning_rate": 9.755446033296423e-08, "loss": 0.1428, "step": 56200 }, { "epoch": 2.8304547056750087, "grad_norm": 4.484088670440737, "learning_rate": 9.697942985309461e-08, "loss": 0.1356, "step": 56210 }, { "epoch": 2.830958255702704, "grad_norm": 2.952414752627776, "learning_rate": 9.640608253565276e-08, "loss": 0.1006, "step": 56220 }, { "epoch": 2.8314618057303993, "grad_norm": 1.691831116763301, "learning_rate": 9.5834418577464e-08, "loss": 0.1319, "step": 56230 }, { "epoch": 2.8319653557580944, "grad_norm": 3.8875046621845986, "learning_rate": 9.526443817477416e-08, "loss": 0.1291, "step": 56240 }, { "epoch": 2.83246890578579, "grad_norm": 2.8150566293434376, "learning_rate": 9.469614152325334e-08, "loss": 0.1497, "step": 56250 }, { "epoch": 2.832972455813485, "grad_norm": 3.568430199787251, "learning_rate": 9.412952881799331e-08, "loss": 0.1313, "step": 56260 }, { "epoch": 2.83347600584118, "grad_norm": 3.3190481360426234, "learning_rate": 9.356460025350567e-08, "loss": 0.1172, "step": 56270 }, { "epoch": 2.8339795558688756, "grad_norm": 4.962577705420876, "learning_rate": 9.300135602372584e-08, "loss": 0.1255, "step": 56280 }, { "epoch": 2.8344831058965707, "grad_norm": 4.6611206020007705, "learning_rate": 9.243979632200973e-08, "loss": 0.1273, "step": 56290 }, { "epoch": 2.8349866559242662, "grad_norm": 4.187417909118727, "learning_rate": 9.187992134113699e-08, "loss": 0.1231, "step": 56300 }, { "epoch": 2.8354902059519613, "grad_norm": 5.6595030031325475, "learning_rate": 9.132173127330723e-08, "loss": 0.1538, "step": 56310 }, { "epoch": 2.8359937559796564, "grad_norm": 3.6421397623997223, "learning_rate": 9.076522631014162e-08, "loss": 0.1229, "step": 56320 }, { "epoch": 2.836497306007352, "grad_norm": 2.5842823396666814, "learning_rate": 9.021040664268288e-08, "loss": 0.1228, "step": 56330 }, { "epoch": 2.837000856035047, "grad_norm": 4.426573685380801, "learning_rate": 8.965727246139644e-08, "loss": 0.144, "step": 56340 }, { "epoch": 2.8375044060627426, "grad_norm": 2.862046927440823, "learning_rate": 8.910582395616818e-08, "loss": 0.1328, "step": 56350 }, { "epoch": 2.8380079560904377, "grad_norm": 1.902174917431416, "learning_rate": 8.855606131630445e-08, "loss": 0.1189, "step": 56360 }, { "epoch": 2.8385115061181327, "grad_norm": 2.585375831247531, "learning_rate": 8.80079847305343e-08, "loss": 0.152, "step": 56370 }, { "epoch": 2.8390150561458283, "grad_norm": 3.494280590457905, "learning_rate": 8.74615943870083e-08, "loss": 0.116, "step": 56380 }, { "epoch": 2.8395186061735234, "grad_norm": 3.536523319035623, "learning_rate": 8.691689047329644e-08, "loss": 0.1188, "step": 56390 }, { "epoch": 2.8400221562012185, "grad_norm": 3.145179292328385, "learning_rate": 8.637387317639024e-08, "loss": 0.1385, "step": 56400 }, { "epoch": 2.840525706228914, "grad_norm": 1.3734931033657336, "learning_rate": 8.583254268270336e-08, "loss": 0.1029, "step": 56410 }, { "epoch": 2.841029256256609, "grad_norm": 3.3370599661226072, "learning_rate": 8.529289917806938e-08, "loss": 0.1449, "step": 56420 }, { "epoch": 2.841532806284304, "grad_norm": 3.4595226920320803, "learning_rate": 8.475494284774289e-08, "loss": 0.1453, "step": 56430 }, { "epoch": 2.8420363563119997, "grad_norm": 2.866783934895678, "learning_rate": 8.421867387639949e-08, "loss": 0.1354, "step": 56440 }, { "epoch": 2.8425399063396948, "grad_norm": 3.598502496310955, "learning_rate": 8.368409244813525e-08, "loss": 0.1278, "step": 56450 }, { "epoch": 2.84304345636739, "grad_norm": 1.7989400196639378, "learning_rate": 8.315119874646837e-08, "loss": 0.1042, "step": 56460 }, { "epoch": 2.8435470063950854, "grad_norm": 3.0067417259133102, "learning_rate": 8.261999295433476e-08, "loss": 0.1163, "step": 56470 }, { "epoch": 2.8440505564227805, "grad_norm": 2.3628593638893514, "learning_rate": 8.209047525409298e-08, "loss": 0.1164, "step": 56480 }, { "epoch": 2.844554106450476, "grad_norm": 2.051233178768961, "learning_rate": 8.156264582752204e-08, "loss": 0.123, "step": 56490 }, { "epoch": 2.845057656478171, "grad_norm": 4.2311016913606165, "learning_rate": 8.103650485582093e-08, "loss": 0.1525, "step": 56500 }, { "epoch": 2.845561206505866, "grad_norm": 3.3196886758931785, "learning_rate": 8.051205251960847e-08, "loss": 0.1581, "step": 56510 }, { "epoch": 2.8460647565335617, "grad_norm": 3.280025951080115, "learning_rate": 7.998928899892511e-08, "loss": 0.1192, "step": 56520 }, { "epoch": 2.846568306561257, "grad_norm": 3.3833755849271268, "learning_rate": 7.946821447323061e-08, "loss": 0.1296, "step": 56530 }, { "epoch": 2.8470718565889523, "grad_norm": 4.537602683310282, "learning_rate": 7.894882912140522e-08, "loss": 0.1173, "step": 56540 }, { "epoch": 2.8475754066166474, "grad_norm": 3.1313315911217137, "learning_rate": 7.84311331217491e-08, "loss": 0.1382, "step": 56550 }, { "epoch": 2.8480789566443425, "grad_norm": 2.6612078833771298, "learning_rate": 7.79151266519823e-08, "loss": 0.1692, "step": 56560 }, { "epoch": 2.848582506672038, "grad_norm": 2.9585434975674723, "learning_rate": 7.740080988924481e-08, "loss": 0.1379, "step": 56570 }, { "epoch": 2.849086056699733, "grad_norm": 3.9286355815632876, "learning_rate": 7.68881830100987e-08, "loss": 0.1213, "step": 56580 }, { "epoch": 2.8495896067274282, "grad_norm": 2.4495659904700426, "learning_rate": 7.637724619052212e-08, "loss": 0.1405, "step": 56590 }, { "epoch": 2.8500931567551238, "grad_norm": 1.9817530421882112, "learning_rate": 7.58679996059164e-08, "loss": 0.1176, "step": 56600 }, { "epoch": 2.850596706782819, "grad_norm": 3.849037193037262, "learning_rate": 7.536044343110005e-08, "loss": 0.1456, "step": 56610 }, { "epoch": 2.851100256810514, "grad_norm": 3.48637596352066, "learning_rate": 7.485457784031369e-08, "loss": 0.1309, "step": 56620 }, { "epoch": 2.8516038068382095, "grad_norm": 2.4044999447027617, "learning_rate": 7.435040300721507e-08, "loss": 0.1028, "step": 56630 }, { "epoch": 2.8521073568659046, "grad_norm": 3.6101804872288703, "learning_rate": 7.384791910488409e-08, "loss": 0.0984, "step": 56640 }, { "epoch": 2.8526109068935996, "grad_norm": 1.9163473228262302, "learning_rate": 7.334712630581886e-08, "loss": 0.0993, "step": 56650 }, { "epoch": 2.853114456921295, "grad_norm": 2.9213326404836133, "learning_rate": 7.284802478193575e-08, "loss": 0.1293, "step": 56660 }, { "epoch": 2.8536180069489903, "grad_norm": 3.21115751020408, "learning_rate": 7.235061470457327e-08, "loss": 0.1339, "step": 56670 }, { "epoch": 2.854121556976686, "grad_norm": 3.5733760800489307, "learning_rate": 7.185489624448649e-08, "loss": 0.1551, "step": 56680 }, { "epoch": 2.854625107004381, "grad_norm": 2.2052820173745618, "learning_rate": 7.136086957185206e-08, "loss": 0.1152, "step": 56690 }, { "epoch": 2.8551286570320764, "grad_norm": 4.346773946054021, "learning_rate": 7.086853485626432e-08, "loss": 0.1245, "step": 56700 }, { "epoch": 2.8556322070597715, "grad_norm": 2.863267583148636, "learning_rate": 7.037789226673752e-08, "loss": 0.1184, "step": 56710 }, { "epoch": 2.8561357570874666, "grad_norm": 2.598683812658432, "learning_rate": 6.98889419717047e-08, "loss": 0.1068, "step": 56720 }, { "epoch": 2.856639307115162, "grad_norm": 3.2410214521426783, "learning_rate": 6.940168413901827e-08, "loss": 0.1169, "step": 56730 }, { "epoch": 2.857142857142857, "grad_norm": 1.9661033597127155, "learning_rate": 6.891611893594941e-08, "loss": 0.1434, "step": 56740 }, { "epoch": 2.8576464071705523, "grad_norm": 2.4784240767511707, "learning_rate": 6.8432246529187e-08, "loss": 0.1392, "step": 56750 }, { "epoch": 2.858149957198248, "grad_norm": 3.4300005699485405, "learning_rate": 6.795006708484209e-08, "loss": 0.1472, "step": 56760 }, { "epoch": 2.858653507225943, "grad_norm": 1.5016860820027218, "learning_rate": 6.746958076844112e-08, "loss": 0.1217, "step": 56770 }, { "epoch": 2.859157057253638, "grad_norm": 3.3762268790490553, "learning_rate": 6.699078774493107e-08, "loss": 0.1431, "step": 56780 }, { "epoch": 2.8596606072813335, "grad_norm": 2.8196854711157466, "learning_rate": 6.651368817867709e-08, "loss": 0.1188, "step": 56790 }, { "epoch": 2.8601641573090286, "grad_norm": 3.293713040347403, "learning_rate": 6.603828223346321e-08, "loss": 0.1306, "step": 56800 }, { "epoch": 2.8606677073367237, "grad_norm": 3.9054345674979873, "learning_rate": 6.556457007249328e-08, "loss": 0.1101, "step": 56810 }, { "epoch": 2.8611712573644192, "grad_norm": 4.593876111671023, "learning_rate": 6.509255185838614e-08, "loss": 0.1386, "step": 56820 }, { "epoch": 2.8616748073921143, "grad_norm": 6.429119104778183, "learning_rate": 6.462222775318217e-08, "loss": 0.1331, "step": 56830 }, { "epoch": 2.8621783574198094, "grad_norm": 1.9302030347167936, "learning_rate": 6.415359791833941e-08, "loss": 0.1214, "step": 56840 }, { "epoch": 2.862681907447505, "grad_norm": 4.004814572709658, "learning_rate": 6.368666251473532e-08, "loss": 0.146, "step": 56850 }, { "epoch": 2.8631854574752, "grad_norm": 4.028731218009408, "learning_rate": 6.322142170266277e-08, "loss": 0.1197, "step": 56860 }, { "epoch": 2.8636890075028956, "grad_norm": 2.699442089832529, "learning_rate": 6.275787564183566e-08, "loss": 0.14, "step": 56870 }, { "epoch": 2.8641925575305907, "grad_norm": 5.9197454731214645, "learning_rate": 6.229602449138561e-08, "loss": 0.1541, "step": 56880 }, { "epoch": 2.864696107558286, "grad_norm": 2.6937508371253185, "learning_rate": 6.183586840986078e-08, "loss": 0.1262, "step": 56890 }, { "epoch": 2.8651996575859813, "grad_norm": 3.529379571722453, "learning_rate": 6.137740755522925e-08, "loss": 0.1357, "step": 56900 }, { "epoch": 2.8657032076136764, "grad_norm": 2.3575891654605616, "learning_rate": 6.09206420848768e-08, "loss": 0.1201, "step": 56910 }, { "epoch": 2.866206757641372, "grad_norm": 4.279326182351351, "learning_rate": 6.046557215560578e-08, "loss": 0.1418, "step": 56920 }, { "epoch": 2.866710307669067, "grad_norm": 3.705055839194739, "learning_rate": 6.0012197923639e-08, "loss": 0.13, "step": 56930 }, { "epoch": 2.867213857696762, "grad_norm": 2.9921152728860982, "learning_rate": 5.9560519544614725e-08, "loss": 0.1113, "step": 56940 }, { "epoch": 2.8677174077244576, "grad_norm": 3.2004847621019774, "learning_rate": 5.9110537173589497e-08, "loss": 0.1408, "step": 56950 }, { "epoch": 2.8682209577521527, "grad_norm": 2.9759382451156053, "learning_rate": 5.8662250965039744e-08, "loss": 0.1323, "step": 56960 }, { "epoch": 2.8687245077798478, "grad_norm": 2.966898406665505, "learning_rate": 5.821566107285681e-08, "loss": 0.1165, "step": 56970 }, { "epoch": 2.8692280578075433, "grad_norm": 5.155066159023131, "learning_rate": 5.777076765035139e-08, "loss": 0.1338, "step": 56980 }, { "epoch": 2.8697316078352384, "grad_norm": 2.3676791498529752, "learning_rate": 5.732757085025187e-08, "loss": 0.1194, "step": 56990 }, { "epoch": 2.8702351578629335, "grad_norm": 3.1116212924689512, "learning_rate": 5.688607082470266e-08, "loss": 0.115, "step": 57000 }, { "epoch": 2.870738707890629, "grad_norm": 3.4144213164459547, "learning_rate": 5.644626772526751e-08, "loss": 0.1212, "step": 57010 }, { "epoch": 2.871242257918324, "grad_norm": 3.340567440670716, "learning_rate": 5.600816170292622e-08, "loss": 0.1042, "step": 57020 }, { "epoch": 2.871745807946019, "grad_norm": 3.5611561545925756, "learning_rate": 5.5571752908076815e-08, "loss": 0.1213, "step": 57030 }, { "epoch": 2.8722493579737147, "grad_norm": 4.011624376280303, "learning_rate": 5.5137041490535007e-08, "loss": 0.1427, "step": 57040 }, { "epoch": 2.87275290800141, "grad_norm": 2.3879732180878115, "learning_rate": 5.470402759953253e-08, "loss": 0.1347, "step": 57050 }, { "epoch": 2.8732564580291053, "grad_norm": 3.230598201921651, "learning_rate": 5.427271138371937e-08, "loss": 0.1167, "step": 57060 }, { "epoch": 2.8737600080568004, "grad_norm": 3.124874889510691, "learning_rate": 5.3843092991162635e-08, "loss": 0.1336, "step": 57070 }, { "epoch": 2.874263558084496, "grad_norm": 3.201234964337587, "learning_rate": 5.3415172569346583e-08, "loss": 0.1306, "step": 57080 }, { "epoch": 2.874767108112191, "grad_norm": 3.80899999546469, "learning_rate": 5.298895026517203e-08, "loss": 0.1355, "step": 57090 }, { "epoch": 2.875270658139886, "grad_norm": 3.3221151100583715, "learning_rate": 5.25644262249575e-08, "loss": 0.1066, "step": 57100 }, { "epoch": 2.8757742081675817, "grad_norm": 3.910948373717605, "learning_rate": 5.2141600594437514e-08, "loss": 0.1267, "step": 57110 }, { "epoch": 2.8762777581952768, "grad_norm": 3.4703342452751684, "learning_rate": 5.172047351876541e-08, "loss": 0.1126, "step": 57120 }, { "epoch": 2.876781308222972, "grad_norm": 2.88788286522327, "learning_rate": 5.130104514250889e-08, "loss": 0.0983, "step": 57130 }, { "epoch": 2.8772848582506674, "grad_norm": 3.9914440889832377, "learning_rate": 5.0883315609655535e-08, "loss": 0.1305, "step": 57140 }, { "epoch": 2.8777884082783625, "grad_norm": 2.769005294441565, "learning_rate": 5.046728506360676e-08, "loss": 0.1302, "step": 57150 }, { "epoch": 2.8782919583060576, "grad_norm": 3.9769992300185155, "learning_rate": 5.0052953647183325e-08, "loss": 0.1517, "step": 57160 }, { "epoch": 2.878795508333753, "grad_norm": 3.3051175091801883, "learning_rate": 4.964032150261977e-08, "loss": 0.1427, "step": 57170 }, { "epoch": 2.879299058361448, "grad_norm": 3.307641681157846, "learning_rate": 4.922938877157057e-08, "loss": 0.13, "step": 57180 }, { "epoch": 2.8798026083891433, "grad_norm": 3.072672231019948, "learning_rate": 4.8820155595104004e-08, "loss": 0.1073, "step": 57190 }, { "epoch": 2.880306158416839, "grad_norm": 3.077911819367376, "learning_rate": 4.841262211370712e-08, "loss": 0.1016, "step": 57200 }, { "epoch": 2.880809708444534, "grad_norm": 4.321216719408084, "learning_rate": 4.800678846728135e-08, "loss": 0.1044, "step": 57210 }, { "epoch": 2.881313258472229, "grad_norm": 1.9065393443952081, "learning_rate": 4.760265479514692e-08, "loss": 0.1273, "step": 57220 }, { "epoch": 2.8818168084999245, "grad_norm": 1.5459344671916373, "learning_rate": 4.72002212360384e-08, "loss": 0.1359, "step": 57230 }, { "epoch": 2.8823203585276196, "grad_norm": 2.193392936112493, "learning_rate": 4.679948792810807e-08, "loss": 0.1173, "step": 57240 }, { "epoch": 2.882823908555315, "grad_norm": 2.1540272762945483, "learning_rate": 4.640045500892365e-08, "loss": 0.1329, "step": 57250 }, { "epoch": 2.88332745858301, "grad_norm": 3.2913094633507596, "learning_rate": 4.600312261546946e-08, "loss": 0.1366, "step": 57260 }, { "epoch": 2.8838310086107057, "grad_norm": 2.7103631782634685, "learning_rate": 4.56074908841464e-08, "loss": 0.136, "step": 57270 }, { "epoch": 2.884334558638401, "grad_norm": 3.3811228319734328, "learning_rate": 4.521355995077137e-08, "loss": 0.1433, "step": 57280 }, { "epoch": 2.884838108666096, "grad_norm": 2.537363353735156, "learning_rate": 4.482132995057675e-08, "loss": 0.1186, "step": 57290 }, { "epoch": 2.8853416586937914, "grad_norm": 3.273239832571079, "learning_rate": 4.4430801018212064e-08, "loss": 0.1176, "step": 57300 }, { "epoch": 2.8858452087214865, "grad_norm": 2.559942936527777, "learning_rate": 4.404197328774174e-08, "loss": 0.1148, "step": 57310 }, { "epoch": 2.8863487587491816, "grad_norm": 4.58378167326654, "learning_rate": 4.3654846892647875e-08, "loss": 0.1261, "step": 57320 }, { "epoch": 2.886852308776877, "grad_norm": 3.5532172137469415, "learning_rate": 4.32694219658264e-08, "loss": 0.1255, "step": 57330 }, { "epoch": 2.8873558588045722, "grad_norm": 4.169006442736007, "learning_rate": 4.288569863959036e-08, "loss": 0.1223, "step": 57340 }, { "epoch": 2.8878594088322673, "grad_norm": 2.9662384727757733, "learning_rate": 4.250367704566882e-08, "loss": 0.1253, "step": 57350 }, { "epoch": 2.888362958859963, "grad_norm": 2.609800575199973, "learning_rate": 4.2123357315206335e-08, "loss": 0.1007, "step": 57360 }, { "epoch": 2.888866508887658, "grad_norm": 3.7467429524767173, "learning_rate": 4.17447395787629e-08, "loss": 0.1246, "step": 57370 }, { "epoch": 2.889370058915353, "grad_norm": 7.681862744482097, "learning_rate": 4.136782396631456e-08, "loss": 0.116, "step": 57380 }, { "epoch": 2.8898736089430486, "grad_norm": 2.793795162617028, "learning_rate": 4.0992610607253904e-08, "loss": 0.1393, "step": 57390 }, { "epoch": 2.8903771589707437, "grad_norm": 3.139422629092417, "learning_rate": 4.061909963038679e-08, "loss": 0.12, "step": 57400 }, { "epoch": 2.8908807089984387, "grad_norm": 1.9384352867288912, "learning_rate": 4.024729116393733e-08, "loss": 0.1274, "step": 57410 }, { "epoch": 2.8913842590261343, "grad_norm": 1.335459358488485, "learning_rate": 3.9877185335542854e-08, "loss": 0.1108, "step": 57420 }, { "epoch": 2.8918878090538294, "grad_norm": 2.7535933857568202, "learning_rate": 3.950878227225896e-08, "loss": 0.1279, "step": 57430 }, { "epoch": 2.892391359081525, "grad_norm": 2.2608843355103336, "learning_rate": 3.914208210055392e-08, "loss": 0.1139, "step": 57440 }, { "epoch": 2.89289490910922, "grad_norm": 2.8940210207867154, "learning_rate": 3.8777084946312604e-08, "loss": 0.12, "step": 57450 }, { "epoch": 2.8933984591369155, "grad_norm": 2.963849192993698, "learning_rate": 3.841379093483477e-08, "loss": 0.1114, "step": 57460 }, { "epoch": 2.8939020091646106, "grad_norm": 3.2662776373165654, "learning_rate": 3.8052200190837886e-08, "loss": 0.1136, "step": 57470 }, { "epoch": 2.8944055591923057, "grad_norm": 4.6273543625741675, "learning_rate": 3.769231283845043e-08, "loss": 0.114, "step": 57480 }, { "epoch": 2.894909109220001, "grad_norm": 3.188909236644005, "learning_rate": 3.733412900121969e-08, "loss": 0.1273, "step": 57490 }, { "epoch": 2.8954126592476963, "grad_norm": 3.407642189821366, "learning_rate": 3.69776488021073e-08, "loss": 0.123, "step": 57500 }, { "epoch": 2.8959162092753914, "grad_norm": 4.271704770787686, "learning_rate": 3.662287236348927e-08, "loss": 0.1432, "step": 57510 }, { "epoch": 2.896419759303087, "grad_norm": 3.2167536001213484, "learning_rate": 3.6269799807155945e-08, "loss": 0.1444, "step": 57520 }, { "epoch": 2.896923309330782, "grad_norm": 2.9272111563490615, "learning_rate": 3.5918431254315375e-08, "loss": 0.1141, "step": 57530 }, { "epoch": 2.897426859358477, "grad_norm": 0.8253292996597679, "learning_rate": 3.5568766825589405e-08, "loss": 0.1249, "step": 57540 }, { "epoch": 2.8979304093861726, "grad_norm": 2.287409471019677, "learning_rate": 3.5220806641013126e-08, "loss": 0.1396, "step": 57550 }, { "epoch": 2.8984339594138677, "grad_norm": 2.661778361499046, "learning_rate": 3.4874550820039875e-08, "loss": 0.1514, "step": 57560 }, { "epoch": 2.898937509441563, "grad_norm": 5.674872624372199, "learning_rate": 3.452999948153457e-08, "loss": 0.1208, "step": 57570 }, { "epoch": 2.8994410594692583, "grad_norm": 3.132140534180183, "learning_rate": 3.418715274377982e-08, "loss": 0.1193, "step": 57580 }, { "epoch": 2.8999446094969534, "grad_norm": 1.7871539884826115, "learning_rate": 3.3846010724470913e-08, "loss": 0.094, "step": 57590 }, { "epoch": 2.9004481595246485, "grad_norm": 3.7506151733261563, "learning_rate": 3.350657354071918e-08, "loss": 0.1233, "step": 57600 }, { "epoch": 2.900951709552344, "grad_norm": 3.2520943455618028, "learning_rate": 3.3168841309049735e-08, "loss": 0.142, "step": 57610 }, { "epoch": 2.901455259580039, "grad_norm": 2.5535706296349923, "learning_rate": 3.283281414540429e-08, "loss": 0.1004, "step": 57620 }, { "epoch": 2.9019588096077347, "grad_norm": 3.877926179802747, "learning_rate": 3.249849216513723e-08, "loss": 0.1624, "step": 57630 }, { "epoch": 2.9024623596354298, "grad_norm": 3.0586077573203534, "learning_rate": 3.2165875483017304e-08, "loss": 0.0995, "step": 57640 }, { "epoch": 2.9029659096631253, "grad_norm": 2.235177243429761, "learning_rate": 3.18349642132304e-08, "loss": 0.1115, "step": 57650 }, { "epoch": 2.9034694596908204, "grad_norm": 2.042903484001305, "learning_rate": 3.150575846937454e-08, "loss": 0.1264, "step": 57660 }, { "epoch": 2.9039730097185155, "grad_norm": 3.045285600331469, "learning_rate": 3.1178258364462646e-08, "loss": 0.0971, "step": 57670 }, { "epoch": 2.904476559746211, "grad_norm": 3.159528205676542, "learning_rate": 3.0852464010923674e-08, "loss": 0.138, "step": 57680 }, { "epoch": 2.904980109773906, "grad_norm": 2.4102989198497755, "learning_rate": 3.052837552059873e-08, "loss": 0.1077, "step": 57690 }, { "epoch": 2.905483659801601, "grad_norm": 3.1047444852491037, "learning_rate": 3.0205993004744914e-08, "loss": 0.1443, "step": 57700 }, { "epoch": 2.9059872098292967, "grad_norm": 3.7034382766861005, "learning_rate": 2.9885316574033706e-08, "loss": 0.1206, "step": 57710 }, { "epoch": 2.906490759856992, "grad_norm": 3.578290364039419, "learning_rate": 2.9566346338549824e-08, "loss": 0.1151, "step": 57720 }, { "epoch": 2.906994309884687, "grad_norm": 2.0536844149822504, "learning_rate": 2.924908240779234e-08, "loss": 0.1185, "step": 57730 }, { "epoch": 2.9074978599123824, "grad_norm": 3.8000011744407485, "learning_rate": 2.8933524890676356e-08, "loss": 0.1436, "step": 57740 }, { "epoch": 2.9080014099400775, "grad_norm": 3.3627606420580336, "learning_rate": 2.861967389552911e-08, "loss": 0.1367, "step": 57750 }, { "epoch": 2.9085049599677726, "grad_norm": 3.6130705002716628, "learning_rate": 2.8307529530092746e-08, "loss": 0.141, "step": 57760 }, { "epoch": 2.909008509995468, "grad_norm": 4.313417317036013, "learning_rate": 2.7997091901524332e-08, "loss": 0.1211, "step": 57770 }, { "epoch": 2.909512060023163, "grad_norm": 3.064902478677094, "learning_rate": 2.7688361116393615e-08, "loss": 0.0975, "step": 57780 }, { "epoch": 2.9100156100508583, "grad_norm": 4.233165252095146, "learning_rate": 2.7381337280684706e-08, "loss": 0.1321, "step": 57790 }, { "epoch": 2.910519160078554, "grad_norm": 5.613416697661822, "learning_rate": 2.7076020499796628e-08, "loss": 0.1433, "step": 57800 }, { "epoch": 2.911022710106249, "grad_norm": 2.101547830611353, "learning_rate": 2.677241087854221e-08, "loss": 0.0959, "step": 57810 }, { "epoch": 2.9115262601339444, "grad_norm": 3.028789179158617, "learning_rate": 2.647050852114752e-08, "loss": 0.1428, "step": 57820 }, { "epoch": 2.9120298101616395, "grad_norm": 3.965972631649981, "learning_rate": 2.6170313531252433e-08, "loss": 0.1337, "step": 57830 }, { "epoch": 2.912533360189335, "grad_norm": 2.3640917923154223, "learning_rate": 2.5871826011911737e-08, "loss": 0.1441, "step": 57840 }, { "epoch": 2.91303691021703, "grad_norm": 4.134767852992861, "learning_rate": 2.5575046065593468e-08, "loss": 0.112, "step": 57850 }, { "epoch": 2.9135404602447252, "grad_norm": 2.858711721710218, "learning_rate": 2.5279973794179458e-08, "loss": 0.1295, "step": 57860 }, { "epoch": 2.9140440102724208, "grad_norm": 3.206526197961276, "learning_rate": 2.4986609298965346e-08, "loss": 0.1434, "step": 57870 }, { "epoch": 2.914547560300116, "grad_norm": 3.1744641851423308, "learning_rate": 2.469495268066002e-08, "loss": 0.1236, "step": 57880 }, { "epoch": 2.915051110327811, "grad_norm": 3.3678566962329977, "learning_rate": 2.4405004039386725e-08, "loss": 0.1118, "step": 57890 }, { "epoch": 2.9155546603555065, "grad_norm": 4.362852876309882, "learning_rate": 2.4116763474683058e-08, "loss": 0.1493, "step": 57900 }, { "epoch": 2.9160582103832016, "grad_norm": 3.819390899373679, "learning_rate": 2.3830231085498202e-08, "loss": 0.1146, "step": 57910 }, { "epoch": 2.9165617604108967, "grad_norm": 3.655627591129152, "learning_rate": 2.3545406970196806e-08, "loss": 0.1499, "step": 57920 }, { "epoch": 2.917065310438592, "grad_norm": 2.5338362927699136, "learning_rate": 2.326229122655621e-08, "loss": 0.1075, "step": 57930 }, { "epoch": 2.9175688604662873, "grad_norm": 3.4251909105444533, "learning_rate": 2.2980883951767564e-08, "loss": 0.1105, "step": 57940 }, { "epoch": 2.9180724104939824, "grad_norm": 4.298596149489845, "learning_rate": 2.2701185242435254e-08, "loss": 0.1348, "step": 57950 }, { "epoch": 2.918575960521678, "grad_norm": 2.843846551909658, "learning_rate": 2.2423195194577475e-08, "loss": 0.1258, "step": 57960 }, { "epoch": 2.919079510549373, "grad_norm": 2.3700635303130495, "learning_rate": 2.2146913903625665e-08, "loss": 0.1354, "step": 57970 }, { "epoch": 2.919583060577068, "grad_norm": 2.860736097574157, "learning_rate": 2.1872341464425073e-08, "loss": 0.1404, "step": 57980 }, { "epoch": 2.9200866106047636, "grad_norm": 1.885184386745675, "learning_rate": 2.1599477971233074e-08, "loss": 0.1154, "step": 57990 }, { "epoch": 2.9205901606324587, "grad_norm": 3.769473844358302, "learning_rate": 2.1328323517721405e-08, "loss": 0.1439, "step": 58000 }, { "epoch": 2.921093710660154, "grad_norm": 3.4754900956317547, "learning_rate": 2.1058878196975607e-08, "loss": 0.11, "step": 58010 }, { "epoch": 2.9215972606878493, "grad_norm": 3.8187540371692803, "learning_rate": 2.0791142101493357e-08, "loss": 0.1206, "step": 58020 }, { "epoch": 2.922100810715545, "grad_norm": 3.8558764878768854, "learning_rate": 2.0525115323185574e-08, "loss": 0.1216, "step": 58030 }, { "epoch": 2.92260436074324, "grad_norm": 3.8300282957651146, "learning_rate": 2.0260797953377543e-08, "loss": 0.105, "step": 58040 }, { "epoch": 2.923107910770935, "grad_norm": 3.5928458016658857, "learning_rate": 1.9998190082807234e-08, "loss": 0.1173, "step": 58050 }, { "epoch": 2.9236114607986305, "grad_norm": 2.7985582232450743, "learning_rate": 1.973729180162476e-08, "loss": 0.1101, "step": 58060 }, { "epoch": 2.9241150108263256, "grad_norm": 2.034540602069825, "learning_rate": 1.9478103199394028e-08, "loss": 0.1331, "step": 58070 }, { "epoch": 2.9246185608540207, "grad_norm": 3.4064837144377345, "learning_rate": 1.922062436509331e-08, "loss": 0.1386, "step": 58080 }, { "epoch": 2.9251221108817163, "grad_norm": 2.759242194450164, "learning_rate": 1.8964855387111346e-08, "loss": 0.1456, "step": 58090 }, { "epoch": 2.9256256609094113, "grad_norm": 2.7459200123007097, "learning_rate": 1.8710796353252347e-08, "loss": 0.1336, "step": 58100 }, { "epoch": 2.9261292109371064, "grad_norm": 4.695360342494793, "learning_rate": 1.84584473507321e-08, "loss": 0.1596, "step": 58110 }, { "epoch": 2.926632760964802, "grad_norm": 3.4259075789152678, "learning_rate": 1.8207808466179645e-08, "loss": 0.1282, "step": 58120 }, { "epoch": 2.927136310992497, "grad_norm": 2.7604338060242424, "learning_rate": 1.7958879785637263e-08, "loss": 0.1038, "step": 58130 }, { "epoch": 2.927639861020192, "grad_norm": 2.409157523685872, "learning_rate": 1.7711661394559376e-08, "loss": 0.1378, "step": 58140 }, { "epoch": 2.9281434110478877, "grad_norm": 2.3593483857500446, "learning_rate": 1.746615337781421e-08, "loss": 0.11, "step": 58150 }, { "epoch": 2.9286469610755828, "grad_norm": 2.9800562062489546, "learning_rate": 1.7222355819682678e-08, "loss": 0.1367, "step": 58160 }, { "epoch": 2.929150511103278, "grad_norm": 3.153258357090502, "learning_rate": 1.6980268803857835e-08, "loss": 0.1201, "step": 58170 }, { "epoch": 2.9296540611309734, "grad_norm": 1.493296151642398, "learning_rate": 1.6739892413446534e-08, "loss": 0.1045, "step": 58180 }, { "epoch": 2.9301576111586685, "grad_norm": 2.6271577654656357, "learning_rate": 1.650122673096666e-08, "loss": 0.1208, "step": 58190 }, { "epoch": 2.930661161186364, "grad_norm": 3.161093718364667, "learning_rate": 1.6264271838350998e-08, "loss": 0.1316, "step": 58200 }, { "epoch": 2.931164711214059, "grad_norm": 2.1132222897559254, "learning_rate": 1.602902781694393e-08, "loss": 0.1272, "step": 58210 }, { "epoch": 2.9316682612417546, "grad_norm": 3.924806557412466, "learning_rate": 1.5795494747502528e-08, "loss": 0.1113, "step": 58220 }, { "epoch": 2.9321718112694497, "grad_norm": 3.7785024241778435, "learning_rate": 1.5563672710195987e-08, "loss": 0.1235, "step": 58230 }, { "epoch": 2.932675361297145, "grad_norm": 4.302747345363737, "learning_rate": 1.533356178460732e-08, "loss": 0.1391, "step": 58240 }, { "epoch": 2.9331789113248403, "grad_norm": 1.364324151363751, "learning_rate": 1.510516204973167e-08, "loss": 0.1309, "step": 58250 }, { "epoch": 2.9336824613525354, "grad_norm": 3.0625811068245787, "learning_rate": 1.4878473583975761e-08, "loss": 0.121, "step": 58260 }, { "epoch": 2.9341860113802305, "grad_norm": 3.317308985141301, "learning_rate": 1.4653496465160122e-08, "loss": 0.1209, "step": 58270 }, { "epoch": 2.934689561407926, "grad_norm": 3.4208946512117273, "learning_rate": 1.4430230770517973e-08, "loss": 0.0958, "step": 58280 }, { "epoch": 2.935193111435621, "grad_norm": 2.881337764904893, "learning_rate": 1.4208676576693559e-08, "loss": 0.1179, "step": 58290 }, { "epoch": 2.935696661463316, "grad_norm": 3.439584401706656, "learning_rate": 1.3988833959744374e-08, "loss": 0.1217, "step": 58300 }, { "epoch": 2.9362002114910117, "grad_norm": 2.7103352769529003, "learning_rate": 1.377070299514116e-08, "loss": 0.1214, "step": 58310 }, { "epoch": 2.936703761518707, "grad_norm": 2.4707908392140006, "learning_rate": 1.355428375776624e-08, "loss": 0.134, "step": 58320 }, { "epoch": 2.937207311546402, "grad_norm": 2.6203786715647963, "learning_rate": 1.3339576321912962e-08, "loss": 0.1263, "step": 58330 }, { "epoch": 2.9377108615740974, "grad_norm": 3.8900426979227727, "learning_rate": 1.3126580761290142e-08, "loss": 0.1283, "step": 58340 }, { "epoch": 2.9382144116017925, "grad_norm": 2.4808909572768596, "learning_rate": 1.2915297149016515e-08, "loss": 0.1076, "step": 58350 }, { "epoch": 2.9387179616294876, "grad_norm": 2.601395926374061, "learning_rate": 1.270572555762406e-08, "loss": 0.1157, "step": 58360 }, { "epoch": 2.939221511657183, "grad_norm": 4.165438855606076, "learning_rate": 1.2497866059055785e-08, "loss": 0.1257, "step": 58370 }, { "epoch": 2.9397250616848782, "grad_norm": 1.7804509405090398, "learning_rate": 1.229171872466961e-08, "loss": 0.1567, "step": 58380 }, { "epoch": 2.9402286117125738, "grad_norm": 2.705771345179667, "learning_rate": 1.2087283625232816e-08, "loss": 0.1165, "step": 58390 }, { "epoch": 2.940732161740269, "grad_norm": 3.698937063956245, "learning_rate": 1.1884560830927038e-08, "loss": 0.135, "step": 58400 }, { "epoch": 2.9412357117679644, "grad_norm": 3.7285319428703043, "learning_rate": 1.1683550411343835e-08, "loss": 0.1229, "step": 58410 }, { "epoch": 2.9417392617956595, "grad_norm": 3.1941770322501513, "learning_rate": 1.1484252435489673e-08, "loss": 0.1357, "step": 58420 }, { "epoch": 2.9422428118233546, "grad_norm": 3.508791668395184, "learning_rate": 1.1286666971780935e-08, "loss": 0.155, "step": 58430 }, { "epoch": 2.94274636185105, "grad_norm": 3.4430763420475534, "learning_rate": 1.1090794088047252e-08, "loss": 0.1292, "step": 58440 }, { "epoch": 2.943249911878745, "grad_norm": 3.9295318384579887, "learning_rate": 1.0896633851529836e-08, "loss": 0.1192, "step": 58450 }, { "epoch": 2.9437534619064403, "grad_norm": 3.915005670880136, "learning_rate": 1.0704186328882038e-08, "loss": 0.1291, "step": 58460 }, { "epoch": 2.944257011934136, "grad_norm": 4.008221048128152, "learning_rate": 1.0513451586169343e-08, "loss": 0.1238, "step": 58470 }, { "epoch": 2.944760561961831, "grad_norm": 3.298886363031334, "learning_rate": 1.032442968886993e-08, "loss": 0.1033, "step": 58480 }, { "epoch": 2.945264111989526, "grad_norm": 2.325286355686762, "learning_rate": 1.013712070187245e-08, "loss": 0.1288, "step": 58490 }, { "epoch": 2.9457676620172215, "grad_norm": 5.816274627811076, "learning_rate": 9.951524689478242e-09, "loss": 0.1444, "step": 58500 }, { "epoch": 2.9462712120449166, "grad_norm": 2.697294544674939, "learning_rate": 9.767641715401898e-09, "loss": 0.1084, "step": 58510 }, { "epoch": 2.9467747620726117, "grad_norm": 4.295013073557885, "learning_rate": 9.585471842767924e-09, "loss": 0.1215, "step": 58520 }, { "epoch": 2.947278312100307, "grad_norm": 3.68054226381392, "learning_rate": 9.405015134113516e-09, "loss": 0.1275, "step": 58530 }, { "epoch": 2.9477818621280023, "grad_norm": 4.370340610243151, "learning_rate": 9.226271651388008e-09, "loss": 0.1206, "step": 58540 }, { "epoch": 2.9482854121556974, "grad_norm": 2.4651962330212274, "learning_rate": 9.049241455952318e-09, "loss": 0.155, "step": 58550 }, { "epoch": 2.948788962183393, "grad_norm": 4.274262756360321, "learning_rate": 8.873924608579499e-09, "loss": 0.1085, "step": 58560 }, { "epoch": 2.949292512211088, "grad_norm": 3.169006647756826, "learning_rate": 8.700321169454184e-09, "loss": 0.1342, "step": 58570 }, { "epoch": 2.9497960622387835, "grad_norm": 2.5096603153575394, "learning_rate": 8.528431198172593e-09, "loss": 0.1119, "step": 58580 }, { "epoch": 2.9502996122664786, "grad_norm": 3.0665633595130304, "learning_rate": 8.358254753743633e-09, "loss": 0.1298, "step": 58590 }, { "epoch": 2.950803162294174, "grad_norm": 3.7600660983746916, "learning_rate": 8.189791894586684e-09, "loss": 0.1522, "step": 58600 }, { "epoch": 2.9513067123218693, "grad_norm": 3.5182026760133023, "learning_rate": 8.023042678533822e-09, "loss": 0.1673, "step": 58610 }, { "epoch": 2.9518102623495643, "grad_norm": 3.6415835930579807, "learning_rate": 7.85800716282814e-09, "loss": 0.1361, "step": 58620 }, { "epoch": 2.95231381237726, "grad_norm": 2.3949504311253142, "learning_rate": 7.694685404125435e-09, "loss": 0.1229, "step": 58630 }, { "epoch": 2.952817362404955, "grad_norm": 3.7766950443103458, "learning_rate": 7.533077458492522e-09, "loss": 0.1503, "step": 58640 }, { "epoch": 2.95332091243265, "grad_norm": 4.470862703806027, "learning_rate": 7.373183381407245e-09, "loss": 0.1348, "step": 58650 }, { "epoch": 2.9538244624603456, "grad_norm": 2.980704795773174, "learning_rate": 7.215003227760697e-09, "loss": 0.1055, "step": 58660 }, { "epoch": 2.9543280124880407, "grad_norm": 2.573933548534423, "learning_rate": 7.0585370518538865e-09, "loss": 0.1239, "step": 58670 }, { "epoch": 2.9548315625157358, "grad_norm": 2.4866396983417025, "learning_rate": 6.903784907401068e-09, "loss": 0.1392, "step": 58680 }, { "epoch": 2.9553351125434313, "grad_norm": 5.132291002865386, "learning_rate": 6.750746847526412e-09, "loss": 0.1219, "step": 58690 }, { "epoch": 2.9558386625711264, "grad_norm": 3.3367826565109002, "learning_rate": 6.599422924766785e-09, "loss": 0.1276, "step": 58700 }, { "epoch": 2.9563422125988215, "grad_norm": 3.02649046217157, "learning_rate": 6.449813191070631e-09, "loss": 0.1276, "step": 58710 }, { "epoch": 2.956845762626517, "grad_norm": 2.914534665520763, "learning_rate": 6.301917697796867e-09, "loss": 0.1222, "step": 58720 }, { "epoch": 2.957349312654212, "grad_norm": 1.7465927793351814, "learning_rate": 6.155736495717657e-09, "loss": 0.12, "step": 58730 }, { "epoch": 2.9578528626819076, "grad_norm": 4.8865734253761355, "learning_rate": 6.011269635015082e-09, "loss": 0.1335, "step": 58740 }, { "epoch": 2.9583564127096027, "grad_norm": 3.426346795182971, "learning_rate": 5.868517165283916e-09, "loss": 0.1504, "step": 58750 }, { "epoch": 2.958859962737298, "grad_norm": 3.462277847338255, "learning_rate": 5.727479135528291e-09, "loss": 0.1296, "step": 58760 }, { "epoch": 2.9593635127649933, "grad_norm": 3.4018101813618165, "learning_rate": 5.588155594167255e-09, "loss": 0.1385, "step": 58770 }, { "epoch": 2.9598670627926884, "grad_norm": 3.2971120746329117, "learning_rate": 5.45054658902755e-09, "loss": 0.1454, "step": 58780 }, { "epoch": 2.960370612820384, "grad_norm": 3.1055587481113367, "learning_rate": 5.314652167350276e-09, "loss": 0.1333, "step": 58790 }, { "epoch": 2.960874162848079, "grad_norm": 3.5327977385586844, "learning_rate": 5.18047237578645e-09, "loss": 0.1207, "step": 58800 }, { "epoch": 2.961377712875774, "grad_norm": 3.5576927392960207, "learning_rate": 5.048007260398668e-09, "loss": 0.1075, "step": 58810 }, { "epoch": 2.9618812629034696, "grad_norm": 1.1305566217677585, "learning_rate": 4.917256866660558e-09, "loss": 0.1277, "step": 58820 }, { "epoch": 2.9623848129311647, "grad_norm": 1.667916812100194, "learning_rate": 4.788221239458435e-09, "loss": 0.118, "step": 58830 }, { "epoch": 2.96288836295886, "grad_norm": 3.286483703776619, "learning_rate": 4.6609004230885325e-09, "loss": 0.132, "step": 58840 }, { "epoch": 2.9633919129865554, "grad_norm": 3.0509038816592993, "learning_rate": 4.535294461259221e-09, "loss": 0.113, "step": 58850 }, { "epoch": 2.9638954630142504, "grad_norm": 1.8296355312735204, "learning_rate": 4.411403397089342e-09, "loss": 0.1252, "step": 58860 }, { "epoch": 2.9643990130419455, "grad_norm": 3.910453559720335, "learning_rate": 4.2892272731104304e-09, "loss": 0.1514, "step": 58870 }, { "epoch": 2.964902563069641, "grad_norm": 3.3520365468729625, "learning_rate": 4.16876613126338e-09, "loss": 0.1245, "step": 58880 }, { "epoch": 2.965406113097336, "grad_norm": 4.037842594081615, "learning_rate": 4.050020012902334e-09, "loss": 0.148, "step": 58890 }, { "epoch": 2.9659096631250312, "grad_norm": 3.8864271772235224, "learning_rate": 3.932988958790795e-09, "loss": 0.1371, "step": 58900 }, { "epoch": 2.9664132131527268, "grad_norm": 5.1669405832742505, "learning_rate": 3.8176730091055155e-09, "loss": 0.1238, "step": 58910 }, { "epoch": 2.966916763180422, "grad_norm": 2.923963743956044, "learning_rate": 3.704072203433162e-09, "loss": 0.1373, "step": 58920 }, { "epoch": 2.9674203132081174, "grad_norm": 2.9090660001891933, "learning_rate": 3.5921865807714285e-09, "loss": 0.1316, "step": 58930 }, { "epoch": 2.9679238632358125, "grad_norm": 4.275358196685367, "learning_rate": 3.4820161795295905e-09, "loss": 0.1176, "step": 58940 }, { "epoch": 2.9684274132635076, "grad_norm": 2.4962895421385114, "learning_rate": 3.3735610375285057e-09, "loss": 0.1263, "step": 58950 }, { "epoch": 2.968930963291203, "grad_norm": 3.605920714005356, "learning_rate": 3.266821192000058e-09, "loss": 0.1364, "step": 58960 }, { "epoch": 2.969434513318898, "grad_norm": 0.9594038582397248, "learning_rate": 3.161796679586604e-09, "loss": 0.0802, "step": 58970 }, { "epoch": 2.9699380633465937, "grad_norm": 2.285234121125452, "learning_rate": 3.058487536342081e-09, "loss": 0.1474, "step": 58980 }, { "epoch": 2.970441613374289, "grad_norm": 3.4753342123786783, "learning_rate": 2.9568937977320077e-09, "loss": 0.1175, "step": 58990 }, { "epoch": 2.970945163401984, "grad_norm": 2.2346176259479225, "learning_rate": 2.857015498631821e-09, "loss": 0.1305, "step": 59000 }, { "epoch": 2.9714487134296794, "grad_norm": 3.564033952101522, "learning_rate": 2.7588526733296484e-09, "loss": 0.1547, "step": 59010 }, { "epoch": 2.9719522634573745, "grad_norm": 4.190252825700175, "learning_rate": 2.662405355524089e-09, "loss": 0.1356, "step": 59020 }, { "epoch": 2.9724558134850696, "grad_norm": 3.8682351024448995, "learning_rate": 2.5676735783236597e-09, "loss": 0.125, "step": 59030 }, { "epoch": 2.972959363512765, "grad_norm": 2.5838454329063034, "learning_rate": 2.474657374249012e-09, "loss": 0.1041, "step": 59040 }, { "epoch": 2.97346291354046, "grad_norm": 3.712575804423282, "learning_rate": 2.3833567752329367e-09, "loss": 0.1429, "step": 59050 }, { "epoch": 2.9739664635681553, "grad_norm": 4.240364748798087, "learning_rate": 2.2937718126164745e-09, "loss": 0.1285, "step": 59060 }, { "epoch": 2.974470013595851, "grad_norm": 4.460167713826576, "learning_rate": 2.2059025171544677e-09, "loss": 0.1224, "step": 59070 }, { "epoch": 2.974973563623546, "grad_norm": 3.18650266738589, "learning_rate": 2.119748919011677e-09, "loss": 0.1216, "step": 59080 }, { "epoch": 2.975477113651241, "grad_norm": 2.3050469285222106, "learning_rate": 2.035311047763333e-09, "loss": 0.1115, "step": 59090 }, { "epoch": 2.9759806636789365, "grad_norm": 2.648671704251947, "learning_rate": 1.952588932396249e-09, "loss": 0.1301, "step": 59100 }, { "epoch": 2.9764842137066316, "grad_norm": 2.9571767537416997, "learning_rate": 1.8715826013082638e-09, "loss": 0.1016, "step": 59110 }, { "epoch": 2.976987763734327, "grad_norm": 2.6064263841976736, "learning_rate": 1.7922920823082446e-09, "loss": 0.1134, "step": 59120 }, { "epoch": 2.9774913137620223, "grad_norm": 2.5203396002677168, "learning_rate": 1.714717402616084e-09, "loss": 0.1104, "step": 59130 }, { "epoch": 2.977994863789718, "grad_norm": 3.4232601974753085, "learning_rate": 1.6388585888621467e-09, "loss": 0.1333, "step": 59140 }, { "epoch": 2.978498413817413, "grad_norm": 3.5711741107748223, "learning_rate": 1.564715667087824e-09, "loss": 0.1271, "step": 59150 }, { "epoch": 2.979001963845108, "grad_norm": 4.095196271140959, "learning_rate": 1.4922886627460887e-09, "loss": 0.1207, "step": 59160 }, { "epoch": 2.9795055138728035, "grad_norm": 3.353598239473309, "learning_rate": 1.4215776007003856e-09, "loss": 0.1245, "step": 59170 }, { "epoch": 2.9800090639004986, "grad_norm": 4.039612009113359, "learning_rate": 1.3525825052251863e-09, "loss": 0.1509, "step": 59180 }, { "epoch": 2.9805126139281937, "grad_norm": 3.0694627492419304, "learning_rate": 1.2853034000059883e-09, "loss": 0.135, "step": 59190 }, { "epoch": 2.981016163955889, "grad_norm": 3.503942019826135, "learning_rate": 1.2197403081387616e-09, "loss": 0.141, "step": 59200 }, { "epoch": 2.9815197139835843, "grad_norm": 2.6661990854582798, "learning_rate": 1.1558932521310574e-09, "loss": 0.1204, "step": 59210 }, { "epoch": 2.9820232640112794, "grad_norm": 3.776005480224921, "learning_rate": 1.0937622539008986e-09, "loss": 0.1302, "step": 59220 }, { "epoch": 2.982526814038975, "grad_norm": 2.386924227847947, "learning_rate": 1.033347334777335e-09, "loss": 0.129, "step": 59230 }, { "epoch": 2.98303036406667, "grad_norm": 3.181105676157016, "learning_rate": 9.746485154998874e-10, "loss": 0.1362, "step": 59240 }, { "epoch": 2.983533914094365, "grad_norm": 3.6447626312110333, "learning_rate": 9.176658162196595e-10, "loss": 0.1438, "step": 59250 }, { "epoch": 2.9840374641220606, "grad_norm": 1.4793109998355414, "learning_rate": 8.623992564987805e-10, "loss": 0.1271, "step": 59260 }, { "epoch": 2.9845410141497557, "grad_norm": 4.203177142680415, "learning_rate": 8.088488553087415e-10, "loss": 0.1223, "step": 59270 }, { "epoch": 2.985044564177451, "grad_norm": 3.3897074845760558, "learning_rate": 7.570146310337256e-10, "loss": 0.1215, "step": 59280 }, { "epoch": 2.9855481142051463, "grad_norm": 2.877874541968353, "learning_rate": 7.068966014672773e-10, "loss": 0.1373, "step": 59290 }, { "epoch": 2.9860516642328414, "grad_norm": 2.8637186794107063, "learning_rate": 6.584947838150779e-10, "loss": 0.1436, "step": 59300 }, { "epoch": 2.986555214260537, "grad_norm": 2.400743943213951, "learning_rate": 6.118091946927252e-10, "loss": 0.1497, "step": 59310 }, { "epoch": 2.987058764288232, "grad_norm": 3.3791145115844605, "learning_rate": 5.668398501273986e-10, "loss": 0.1169, "step": 59320 }, { "epoch": 2.9875623143159276, "grad_norm": 2.2612071742701647, "learning_rate": 5.235867655556393e-10, "loss": 0.1348, "step": 59330 }, { "epoch": 2.9880658643436226, "grad_norm": 2.473976439752346, "learning_rate": 4.820499558272351e-10, "loss": 0.1191, "step": 59340 }, { "epoch": 2.9885694143713177, "grad_norm": 2.909001283775111, "learning_rate": 4.4222943519967033e-10, "loss": 0.1278, "step": 59350 }, { "epoch": 2.9890729643990133, "grad_norm": 4.191108986816149, "learning_rate": 4.0412521734478625e-10, "loss": 0.1459, "step": 59360 }, { "epoch": 2.9895765144267084, "grad_norm": 3.200141988134579, "learning_rate": 3.6773731534212045e-10, "loss": 0.1016, "step": 59370 }, { "epoch": 2.9900800644544034, "grad_norm": 4.169361903821447, "learning_rate": 3.330657416839023e-10, "loss": 0.136, "step": 59380 }, { "epoch": 2.990583614482099, "grad_norm": 3.398786653640277, "learning_rate": 3.0011050827227774e-10, "loss": 0.1332, "step": 59390 }, { "epoch": 2.991087164509794, "grad_norm": 4.289368112743992, "learning_rate": 2.688716264209745e-10, "loss": 0.118, "step": 59400 }, { "epoch": 2.991590714537489, "grad_norm": 4.222418585931807, "learning_rate": 2.3934910685308175e-10, "loss": 0.1498, "step": 59410 }, { "epoch": 2.9920942645651847, "grad_norm": 3.1922093622825423, "learning_rate": 2.1154295970438054e-10, "loss": 0.1234, "step": 59420 }, { "epoch": 2.9925978145928798, "grad_norm": 3.1525320959331755, "learning_rate": 1.854531945200133e-10, "loss": 0.1086, "step": 59430 }, { "epoch": 2.993101364620575, "grad_norm": 2.824557829789729, "learning_rate": 1.610798202567043e-10, "loss": 0.1158, "step": 59440 }, { "epoch": 2.9936049146482704, "grad_norm": 1.7556823924468605, "learning_rate": 1.3842284528053916e-10, "loss": 0.1337, "step": 59450 }, { "epoch": 2.9941084646759655, "grad_norm": 4.019121600484944, "learning_rate": 1.1748227737085061e-10, "loss": 0.1357, "step": 59460 }, { "epoch": 2.9946120147036606, "grad_norm": 4.010324140087046, "learning_rate": 9.825812371577758e-11, "loss": 0.1448, "step": 59470 }, { "epoch": 2.995115564731356, "grad_norm": 1.517542635055765, "learning_rate": 8.075039091448578e-11, "loss": 0.1542, "step": 59480 }, { "epoch": 2.995619114759051, "grad_norm": 3.408190021673133, "learning_rate": 6.495908497772263e-11, "loss": 0.148, "step": 59490 }, { "epoch": 2.9961226647867467, "grad_norm": 2.915141169768917, "learning_rate": 5.0884211326152066e-11, "loss": 0.1144, "step": 59500 }, { "epoch": 2.996626214814442, "grad_norm": 3.1205822123409526, "learning_rate": 3.85257747920198e-11, "loss": 0.115, "step": 59510 }, { "epoch": 2.9971297648421373, "grad_norm": 3.5072527023479134, "learning_rate": 2.7883779616932894e-11, "loss": 0.1325, "step": 59520 }, { "epoch": 2.9976333148698324, "grad_norm": 3.3484994428724457, "learning_rate": 1.8958229455190435e-11, "loss": 0.1272, "step": 59530 }, { "epoch": 2.9981368648975275, "grad_norm": 4.272563518971965, "learning_rate": 1.1749127370452861e-11, "loss": 0.1371, "step": 59540 }, { "epoch": 2.998640414925223, "grad_norm": 3.2690406102334815, "learning_rate": 6.256475837407294e-12, "loss": 0.1259, "step": 59550 }, { "epoch": 2.999143964952918, "grad_norm": 1.8665991656873278, "learning_rate": 2.4802767412124283e-12, "loss": 0.124, "step": 59560 }, { "epoch": 2.999647514980613, "grad_norm": 4.114386690732922, "learning_rate": 4.205313797189803e-13, "loss": 0.1363, "step": 59570 }, { "epoch": 3.0, "step": 59577, "total_flos": 929666213609472.0, "train_loss": 0.3142914358571927, "train_runtime": 121857.0336, "train_samples_per_second": 3.911, "train_steps_per_second": 0.489 } ], "logging_steps": 10, "max_steps": 59577, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 929666213609472.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }